wolffd@0: function data = csv2cell(varargin) wolffd@0: % CSV2CELL - parses a Windows CSV file into an NxM cell array, where N is wolffd@0: % the number of lines in the CSV text and M is the number of fields in the wolffd@0: % longest line of the CSV file. Lines are delimited by carriage returns wolffd@0: % and/or newlines. wolffd@0: % wolffd@0: % A Windows CSV file format allows for commas (,) and double quotes (") to wolffd@0: % be contained within fields of the CSV file. Regular fields are just text wolffd@0: % separated by commas (e.g. foo,bar,hello world). Fields containing commas wolffd@0: % or double quotes are surrounded by double quotes (e.g. wolffd@0: % foo,bar,"item1,item2,item3",hello world). In the previous example, wolffd@0: % "item1,item2,item3" is one field in the CSV text. For double quotes to be wolffd@0: % represented, they are written in pairs in the file, and contained within wolffd@0: % a quoted field, (e.g. foo,"this field contains ""quotes""",bar). Spaces wolffd@0: % within fields (even leading and trailing) are preserved. wolffd@0: % wolffd@0: % All fields from the CSV file are returned as strings. If the CSV text wolffd@0: % contains lines with different numbers of fields, then the "missing" wolffd@0: % fields with appear as empty arrays, [], in the returned data. You can wolffd@0: % easily convert the data you expect to be numeric using str2num() and wolffd@0: % num2cell(). wolffd@0: % wolffd@0: % Examples: wolffd@0: % >> csv2cell('foo.csv','fromfile') % loads and parses entire file wolffd@0: % >> csv2cell(',,,') % returns cell array {'','','',''} wolffd@0: % >> csv2cell(',,,','text') % same as above, declaring text input wolffd@0: % >> csv2cell(sprintf('%s\r\n%s',... wolffd@0: % '"Ten Thousand",10000,,"10,000","""It''s ""10 Grand"", baby",10k',... wolffd@0: % ',foo,bar,soo')) wolffd@0: % ans = wolffd@0: % 'Ten Thousand' '10000' '' '10,000' [1x22 char] '10k' wolffd@0: % '' 'foo' 'bar' 'soo' [] [] wolffd@0: % >> % note the two empty [] cells, because the second line has two fewer wolffd@0: % >> % fields than the first. The empty field '' at the beginning of the wolffd@0: % >> % second line is due to the leading comma on that line, which is wolffd@0: % >> % correct behavior. A trailing comma will do the same to the end of a wolffd@0: % >> % line. wolffd@0: % wolffd@0: % Limitations/Exceptions: wolffd@0: % * This code is untested on large files. It may take a long time due to wolffd@0: % variables growing inside loops (yes, poor practice, but easy coding). wolffd@0: % * This code has been minimally tested to work with a variety of weird wolffd@0: % Excel files that I have. wolffd@0: % * Behavior with improperly formatted CSV files is untested. wolffd@0: % * Technically, CSV files from Excel always separate lines with the pair wolffd@0: % of characters \r\n. This parser will also separate lines that have only wolffd@0: % \r or \n as line terminators. wolffd@0: % * Line separation is the first operation. I don't think the Excel CSV wolffd@0: % format has any allowance for newlines or carriage returns within wolffd@0: % fields. If it does, then this parser does not support it and would not wolffd@0: % return bad output. wolffd@0: % wolffd@0: % Copyright 2008 Arthur Hebert wolffd@0: wolffd@0: % Process arguments wolffd@0: if nargin == 1 wolffd@0: text = varargin{1}; wolffd@0: elseif nargin == 2 wolffd@0: switch varargin{2} wolffd@0: case 'fromfile' wolffd@0: filename = varargin{1}; wolffd@0: fid = fopen(filename); wolffd@0: text = char(fread(fid))'; wolffd@0: fclose(fid); wolffd@0: case 'text' wolffd@0: text = varargin{1}; wolffd@0: otherwise wolffd@0: error('Invalid 2nd argument %s. Valid options are ''fromfile'' and ''text''',varargin{2}) wolffd@0: end wolffd@0: else wolffd@0: error('CSV2CELL requires 1 or 2 arguments.') wolffd@0: end wolffd@0: wolffd@0: wolffd@0: % First split it into lines wolffd@0: lines = regexp(text,'(\r\n|[\r\n])','split'); % lines should now be a cell array of text split by newlines wolffd@0: wolffd@0: % a character is either a delimiter or a field wolffd@0: inField = true; wolffd@0: inQuoteField = false; wolffd@0: % if inField && ~inQuoteField --> then we're in a raw field wolffd@0: wolffd@0: skipNext = false; wolffd@0: data = {}; wolffd@0: field = ''; wolffd@0: for lineNumber = 1:length(lines) wolffd@0: nChars = length(lines{lineNumber}); % number of characters in this line wolffd@0: fieldNumber = 1; wolffd@0: for charNumber = 1:nChars wolffd@0: if skipNext wolffd@0: skipNext = false; wolffd@0: continue wolffd@0: end wolffd@0: thisChar = lines{lineNumber}(charNumber); wolffd@0: if thisChar == ',' wolffd@0: if inField wolffd@0: if inQuoteField % this comma is part of the field wolffd@0: field(end+1) = thisChar; wolffd@0: else % this comma is the delimiter marking the end of the field wolffd@0: data{lineNumber,fieldNumber} = field; wolffd@0: field = ''; wolffd@0: fieldNumber = fieldNumber + 1; wolffd@0: end wolffd@0: else % we are not currently in a field -- this is the start of a new delimiter wolffd@0: inField = true; wolffd@0: end wolffd@0: if charNumber == nChars % this is a hanging comma, indicating the last field is blank wolffd@0: data{lineNumber,fieldNumber} = ''; wolffd@0: field = ''; wolffd@0: fieldNumber = fieldNumber + 1; wolffd@0: end wolffd@0: elseif thisChar == '"' wolffd@0: if inField wolffd@0: if inQuoteField wolffd@0: if charNumber == nChars % it's the last character, so this must be the closing delimiter? wolffd@0: inField = false; wolffd@0: inQuoteField = false; wolffd@0: data{lineNumber,fieldNumber} = field; wolffd@0: field = ''; wolffd@0: fieldNumber = fieldNumber + 1; wolffd@0: else wolffd@0: if lines{lineNumber}(charNumber+1) == '"' % this is translated to be a double quote in the field wolffd@0: field(end+1) = '"'; wolffd@0: skipNext = true; wolffd@0: else % this " is the delimiter ending this field wolffd@0: data{lineNumber,fieldNumber} = field; wolffd@0: field = ''; wolffd@0: inField = false; wolffd@0: inQuoteField = false; wolffd@0: fieldNumber = fieldNumber + 1; wolffd@0: end wolffd@0: end wolffd@0: else % this is a delimiter and we are in a new quote field wolffd@0: inQuoteField = true; wolffd@0: end wolffd@0: else % we are not in a field. This must be an opening quote for the first field? wolffd@0: inField = true; wolffd@0: inQuoteField = true; wolffd@0: end wolffd@0: else % any other character ought to be added to field wolffd@0: field(end+1) = thisChar; wolffd@0: if charNumber == nChars wolffd@0: data{lineNumber,fieldNumber} = field; wolffd@0: field = ''; wolffd@0: fieldNumber = fieldNumber + 1; wolffd@0: elseif charNumber == 1 % we are starting a new raw field wolffd@0: inField = true; wolffd@0: end wolffd@0: end wolffd@0: end wolffd@0: end wolffd@0: