camir-aes2014: core/tools/csv2cell.m annotate

annotate core/tools/csv2cell.m @ 0:e9a9cd732c1e tip

first hg version after svn

author	wolffd
date	Tue, 10 Feb 2015 15:05:51 +0000
parents
children

rev	line source
wolffd@0	1 function data = csv2cell(varargin)
wolffd@0	2 % CSV2CELL - parses a Windows CSV file into an NxM cell array, where N is
wolffd@0	3 % the number of lines in the CSV text and M is the number of fields in the
wolffd@0	4 % longest line of the CSV file. Lines are delimited by carriage returns
wolffd@0	5 % and/or newlines.
wolffd@0	6 %
wolffd@0	7 % A Windows CSV file format allows for commas (,) and double quotes (") to
wolffd@0	8 % be contained within fields of the CSV file. Regular fields are just text
wolffd@0	9 % separated by commas (e.g. foo,bar,hello world). Fields containing commas
wolffd@0	10 % or double quotes are surrounded by double quotes (e.g.
wolffd@0	11 % foo,bar,"item1,item2,item3",hello world). In the previous example,
wolffd@0	12 % "item1,item2,item3" is one field in the CSV text. For double quotes to be
wolffd@0	13 % represented, they are written in pairs in the file, and contained within
wolffd@0	14 % a quoted field, (e.g. foo,"this field contains ""quotes""",bar). Spaces
wolffd@0	15 % within fields (even leading and trailing) are preserved.
wolffd@0	16 %
wolffd@0	17 % All fields from the CSV file are returned as strings. If the CSV text
wolffd@0	18 % contains lines with different numbers of fields, then the "missing"
wolffd@0	19 % fields with appear as empty arrays, [], in the returned data. You can
wolffd@0	20 % easily convert the data you expect to be numeric using str2num() and
wolffd@0	21 % num2cell().
wolffd@0	22 %
wolffd@0	23 % Examples:
wolffd@0	24 % >> csv2cell('foo.csv','fromfile') % loads and parses entire file
wolffd@0	25 % >> csv2cell(',,,') % returns cell array {'','','',''}
wolffd@0	26 % >> csv2cell(',,,','text') % same as above, declaring text input
wolffd@0	27 % >> csv2cell(sprintf('%s\r\n%s',...
wolffd@0	28 % '"Ten Thousand",10000,,"10,000","""It''s ""10 Grand"", baby",10k',...
wolffd@0	29 % ',foo,bar,soo'))
wolffd@0	30 % ans =
wolffd@0	31 % 'Ten Thousand' '10000' '' '10,000' [1x22 char] '10k'
wolffd@0	32 % '' 'foo' 'bar' 'soo' [] []
wolffd@0	33 % >> % note the two empty [] cells, because the second line has two fewer
wolffd@0	34 % >> % fields than the first. The empty field '' at the beginning of the
wolffd@0	35 % >> % second line is due to the leading comma on that line, which is
wolffd@0	36 % >> % correct behavior. A trailing comma will do the same to the end of a
wolffd@0	37 % >> % line.
wolffd@0	38 %
wolffd@0	39 % Limitations/Exceptions:
wolffd@0	40 % * This code is untested on large files. It may take a long time due to
wolffd@0	41 % variables growing inside loops (yes, poor practice, but easy coding).
wolffd@0	42 % * This code has been minimally tested to work with a variety of weird
wolffd@0	43 % Excel files that I have.
wolffd@0	44 % * Behavior with improperly formatted CSV files is untested.
wolffd@0	45 % * Technically, CSV files from Excel always separate lines with the pair
wolffd@0	46 % of characters \r\n. This parser will also separate lines that have only
wolffd@0	47 % \r or \n as line terminators.
wolffd@0	48 % * Line separation is the first operation. I don't think the Excel CSV
wolffd@0	49 % format has any allowance for newlines or carriage returns within
wolffd@0	50 % fields. If it does, then this parser does not support it and would not
wolffd@0	51 % return bad output.
wolffd@0	52 %
wolffd@0	53 % Copyright 2008 Arthur Hebert
wolffd@0	54
wolffd@0	55 % Process arguments
wolffd@0	56 if nargin == 1
wolffd@0	57 text = varargin{1};
wolffd@0	58 elseif nargin == 2
wolffd@0	59 switch varargin{2}
wolffd@0	60 case 'fromfile'
wolffd@0	61 filename = varargin{1};
wolffd@0	62 fid = fopen(filename);
wolffd@0	63 text = char(fread(fid))';
wolffd@0	64 fclose(fid);
wolffd@0	65 case 'text'
wolffd@0	66 text = varargin{1};
wolffd@0	67 otherwise
wolffd@0	68 error('Invalid 2nd argument %s. Valid options are ''fromfile'' and ''text''',varargin{2})
wolffd@0	69 end
wolffd@0	70 else
wolffd@0	71 error('CSV2CELL requires 1 or 2 arguments.')
wolffd@0	72 end
wolffd@0	73
wolffd@0	74
wolffd@0	75 % First split it into lines
wolffd@0	76 lines = regexp(text,'(\r\n\|[\r\n])','split'); % lines should now be a cell array of text split by newlines
wolffd@0	77
wolffd@0	78 % a character is either a delimiter or a field
wolffd@0	79 inField = true;
wolffd@0	80 inQuoteField = false;
wolffd@0	81 % if inField && ~inQuoteField --> then we're in a raw field
wolffd@0	82
wolffd@0	83 skipNext = false;
wolffd@0	84 data = {};
wolffd@0	85 field = '';
wolffd@0	86 for lineNumber = 1:length(lines)
wolffd@0	87 nChars = length(lines{lineNumber}); % number of characters in this line
wolffd@0	88 fieldNumber = 1;
wolffd@0	89 for charNumber = 1:nChars
wolffd@0	90 if skipNext
wolffd@0	91 skipNext = false;
wolffd@0	92 continue
wolffd@0	93 end
wolffd@0	94 thisChar = lines{lineNumber}(charNumber);
wolffd@0	95 if thisChar == ','
wolffd@0	96 if inField
wolffd@0	97 if inQuoteField % this comma is part of the field
wolffd@0	98 field(end+1) = thisChar;
wolffd@0	99 else % this comma is the delimiter marking the end of the field
wolffd@0	100 data{lineNumber,fieldNumber} = field;
wolffd@0	101 field = '';
wolffd@0	102 fieldNumber = fieldNumber + 1;
wolffd@0	103 end
wolffd@0	104 else % we are not currently in a field -- this is the start of a new delimiter
wolffd@0	105 inField = true;
wolffd@0	106 end
wolffd@0	107 if charNumber == nChars % this is a hanging comma, indicating the last field is blank
wolffd@0	108 data{lineNumber,fieldNumber} = '';
wolffd@0	109 field = '';
wolffd@0	110 fieldNumber = fieldNumber + 1;
wolffd@0	111 end
wolffd@0	112 elseif thisChar == '"'
wolffd@0	113 if inField
wolffd@0	114 if inQuoteField
wolffd@0	115 if charNumber == nChars % it's the last character, so this must be the closing delimiter?
wolffd@0	116 inField = false;
wolffd@0	117 inQuoteField = false;
wolffd@0	118 data{lineNumber,fieldNumber} = field;
wolffd@0	119 field = '';
wolffd@0	120 fieldNumber = fieldNumber + 1;
wolffd@0	121 else
wolffd@0	122 if lines{lineNumber}(charNumber+1) == '"' % this is translated to be a double quote in the field
wolffd@0	123 field(end+1) = '"';
wolffd@0	124 skipNext = true;
wolffd@0	125 else % this " is the delimiter ending this field
wolffd@0	126 data{lineNumber,fieldNumber} = field;
wolffd@0	127 field = '';
wolffd@0	128 inField = false;
wolffd@0	129 inQuoteField = false;
wolffd@0	130 fieldNumber = fieldNumber + 1;
wolffd@0	131 end
wolffd@0	132 end
wolffd@0	133 else % this is a delimiter and we are in a new quote field
wolffd@0	134 inQuoteField = true;
wolffd@0	135 end
wolffd@0	136 else % we are not in a field. This must be an opening quote for the first field?
wolffd@0	137 inField = true;
wolffd@0	138 inQuoteField = true;
wolffd@0	139 end
wolffd@0	140 else % any other character ought to be added to field
wolffd@0	141 field(end+1) = thisChar;
wolffd@0	142 if charNumber == nChars
wolffd@0	143 data{lineNumber,fieldNumber} = field;
wolffd@0	144 field = '';
wolffd@0	145 fieldNumber = fieldNumber + 1;
wolffd@0	146 elseif charNumber == 1 % we are starting a new raw field
wolffd@0	147 inField = true;
wolffd@0	148 end
wolffd@0	149 end
wolffd@0	150 end
wolffd@0	151 end
wolffd@0	152

Mercurial > hg > camir-aes2014

annotate core/tools/csv2cell.m @ 0:e9a9cd732c1e tip