Daniel@0: %[ANR,SNR,STR] = ASORT(INP,'OPT',...); Daniel@0: % S = ASORT(INP,'OPT',...); Daniel@0: % to sort alphanumeric strings numerically if Daniel@0: % they contain one properly formatted number Daniel@0: % otherwise, ascii dictionary sorting is applied Daniel@0: % Daniel@0: % INP unsorted input: Daniel@0: % - a char array Daniel@0: % - a cell array of strings Daniel@0: % OPT options Daniel@0: % -s - sorting option Daniel@0: % '-s','ascend' [def] Daniel@0: % '-s','descend' Daniel@0: % -st - force output form S [def: nargout dependent] Daniel@0: % -t - replace matching template(s) with one space Daniel@0: % prior to sorting Daniel@0: % '-t','template' Daniel@0: % '-t',{'template1','template2',...} Daniel@0: % -w - remove space(s) prior to sorting Daniel@0: % Daniel@0: % NOTE -t/-w options are processed in the Daniel@0: % order that they appear in Daniel@0: % the command line Daniel@0: % Daniel@0: % -v - verbose output [def: quiet] Daniel@0: % -d - debug mode Daniel@0: % save additional output in S Daniel@0: % .c: lex parser input Daniel@0: % .t: lex parser table Daniel@0: % .n: lex parser output Daniel@0: % .d: numbers read from .n Daniel@0: % Daniel@0: % ANR numerically sorted alphanumeric strings [eg, 'f.-1.5e+2x.x'] Daniel@0: % - contain one number that can be read by Daniel@0: % | Daniel@0: % SNR ascii dict sorted alphanumeric strings Daniel@0: % http://www.mathworks.com/matlabcentral/fileexchange/loadFile.do?objectId=7212# Daniel@0: % Daniel@0: % - contain more than one number [eg, 'f.-1.5e +2.x'] Daniel@0: % - contain incomplete|ambiguous numbers [eg, 'f.-1.5e+2.x'] Daniel@0: % STR ascii dict sorted strings Daniel@0: % - contain no numbers [eg, 'a test'] Daniel@0: % Daniel@0: % S structure with fields Daniel@0: % .anr Daniel@0: % .srn Daniel@0: % .str Daniel@0: Daniel@0: % created: Daniel@0: % us 03-Mar-2002 Daniel@0: % modified: Daniel@0: % us 30-Mar-2005 11:57:07 / TMW R14.sp2 Daniel@0: Daniel@0: %-------------------------------------------------------------------------------- Daniel@0: function varargout=asort(inp,varargin) Daniel@0: Daniel@0: varargout(1:nargout)={[]}; Daniel@0: if ~nargin Daniel@0: help(mfilename); Daniel@0: return; Daniel@0: end Daniel@0: Daniel@0: % - common parameters/options Daniel@0: n=[]; Daniel@0: ds=[]; Daniel@0: anr={}; Daniel@0: snr={}; Daniel@0: str={}; Daniel@0: smod='ascend'; % sorting option Daniel@0: tmpl={}; % template(s) Daniel@0: sflg=false; % output mode: structure Daniel@0: tflg=false; % remove template(s) Daniel@0: dflg=false; % debug mode Daniel@0: vflg=false; % verbose output Daniel@0: wflg=false; % remove spaces Daniel@0: Daniel@0: if nargin > 1 Daniel@0: ix=find(strcmp('-s',varargin)); Daniel@0: if ~isempty(ix) && nargin > ix(end)+1 Daniel@0: smod=varargin{ix(end)+1}; Daniel@0: end Daniel@0: ix=find(strcmp('-t',varargin)); Daniel@0: if ~isempty(ix) && nargin > ix(end)+1 Daniel@0: tflg=ix(end); Daniel@0: tmpl=varargin{ix(end)+1}; Daniel@0: end Daniel@0: if find(strcmp('-d',varargin)); Daniel@0: dflg=true; Daniel@0: end Daniel@0: if find(strcmp('-st',varargin)); Daniel@0: sflg=true; Daniel@0: end Daniel@0: if find(strcmp('-v',varargin)); Daniel@0: vflg=true; Daniel@0: end Daniel@0: ix=find(strcmp('-w',varargin)); Daniel@0: if ~isempty(ix) Daniel@0: wflg=ix(end); Daniel@0: end Daniel@0: end Daniel@0: % spec numbers Daniel@0: ntmpl={ Daniel@0: ' inf ' Daniel@0: '+inf ' Daniel@0: '-inf ' Daniel@0: ' nan ' Daniel@0: '+nan ' Daniel@0: '-nan ' Daniel@0: }; Daniel@0: % spec chars Daniel@0: ctmpl={ Daniel@0: '.' % decimal point Daniel@0: 'd' % exponent Daniel@0: 'e' % exponent Daniel@0: }; Daniel@0: Daniel@0: if nargout <= 3 Daniel@0: varargout{1}=inp; Daniel@0: else Daniel@0: disp(sprintf('ASORT> too many output args [%-1d/%-1d]\n',nargout,3)); Daniel@0: help(mfilename); Daniel@0: return; Daniel@0: end Daniel@0: if isempty(inp) Daniel@0: disp(sprintf('ASORT> input is empty')); Daniel@0: return; Daniel@0: end Daniel@0: Daniel@0: ti=clock; Daniel@0: winp=whos('inp'); Daniel@0: switch winp.class Daniel@0: case 'cell' Daniel@0: if ~iscellstr(inp) Daniel@0: disp(sprintf('ASORT> cell is not an array of strings')); Daniel@0: return; Daniel@0: end Daniel@0: inp=inp(:); Daniel@0: [ins,inx]=sort(inp); Daniel@0: case 'char' Daniel@0: % [ins,inx]=sortrows(inp); Daniel@0: inp=cstr(inp); Daniel@0: otherwise Daniel@0: disp(sprintf('ASORT> does not sort input of class <%s>',winp.class)); Daniel@0: return; Daniel@0: end Daniel@0: Daniel@0: inp=inp(:); Daniel@0: inp=setinp(inp,tmpl,[tflg wflg]); Daniel@0: [ins,inx]=sort(inp); Daniel@0: if strcmp(smod,'descend') Daniel@0: ins=ins(end:-1:1,:); Daniel@0: inx=inx(end:-1:1); Daniel@0: end Daniel@0: ins=inp(inx); Daniel@0: c=lower(char(ins)); Daniel@0: wins=whos('c'); Daniel@0: [cr,cc]=size(c); Daniel@0: Daniel@0: % - LEXICAL PARSER Daniel@0: %-------------------------------------------------------------------------------- Daniel@0: % - extend input on either side for search Daniel@0: c=[' '*ones(cr,2) c ' '*ones(cr,2)]; Daniel@0: Daniel@0: % - search for valid alphanumeric items in strings Daniel@0: % numbers/signs Daniel@0: t=(c>='0'&c<='9'); Daniel@0: t=t|c=='-'; Daniel@0: t=t|c=='+'; Daniel@0: [tr,tc]=size(t); Daniel@0: % decimal points Daniel@0: % note: valid numbers with dec points must follow these templates Daniel@0: % nr.nr Daniel@0: % sign.nr Daniel@0: % nr. Daniel@0: % .nr Daniel@0: ix1= t(:,1:end-2) & ... Daniel@0: ~isletter(c(:,1:end-2)) & ... Daniel@0: c(:,2:end-1)=='.'; Daniel@0: t(:,2:end-1)=t(:,2:end-1)|ix1; Daniel@0: ix1= (t(:,3:end) & ... Daniel@0: (~isletter(c(:,3:end)) & ... Daniel@0: ~isletter(c(:,1:end-2))) | ... Daniel@0: (c(:,3:end)=='e' | ... Daniel@0: c(:,3:end)=='d')) & ... Daniel@0: c(:,2:end-1)=='.'; Daniel@0: t(:,2:end-1)=t(:,2:end-1)|ix1; Daniel@0: % t(:,3:end)=t(:,3:end)|ix1; Daniel@0: % signs Daniel@0: t(c=='-')=false; Daniel@0: t(c=='+')=false; Daniel@0: ix1= t(:,3:end) & ... Daniel@0: (c(:,2:end-1)=='-' | ... Daniel@0: c(:,2:end-1)=='+'); Daniel@0: t(:,2:end-1)=t(:,2:end-1)|ix1; Daniel@0: % exponents Daniel@0: ix1= t(:,1:end-2) & ... Daniel@0: (c(:,2:end-1)=='e' | ... Daniel@0: c(:,2:end-1)=='d'); Daniel@0: t(:,2:end-1)=t(:,2:end-1)|ix1; Daniel@0: % spec numbers Daniel@0: c=reshape(c.',1,[]); Daniel@0: t=t'; Daniel@0: ic=[]; Daniel@0: for j=1:numel(ntmpl) Daniel@0: ic=[ic,strfind(c,ntmpl{j})]; Daniel@0: end Daniel@0: ic=sort(ic); Daniel@0: for i=1:numel(ic) Daniel@0: ix=ic(i)+0:ic(i)+4; Daniel@0: t(ix)=true; Daniel@0: end Daniel@0: t=t'; Daniel@0: c=reshape(c.',[tc,tr]).'; Daniel@0: t(c==' ')=false; Daniel@0: %-------------------------------------------------------------------------------- Daniel@0: Daniel@0: % - only allow one number per string Daniel@0: il=~any(t,2); Daniel@0: ib=strfind(reshape(t.',1,[]),[0 1]); Daniel@0: if ~isempty(ib) Daniel@0: ixe=cell(3,1); Daniel@0: n=reshape(char(t.*c).',1,[]); Daniel@0: for i=1:numel(ctmpl) Daniel@0: id=strfind(n,ctmpl{i}); Daniel@0: if ~isempty(id) Daniel@0: [dum,dum,ixu{i},ixe{i}]=dupinx(id,tc); Daniel@0: end Daniel@0: end Daniel@0: in=false(tr,1); Daniel@0: im=in; Daniel@0: % must check for anomalous cases like <'.d'> Daniel@0: id=sort(... Daniel@0: [find(n>='0' & n<='9'),... Daniel@0: strfind(n,'inf'),... Daniel@0: strfind(n,'nan')]); Daniel@0: % [ibu,ibd,ixbu,ixe{i+1}]=dupinx(id,tc); Daniel@0: [ibu,ibd,ixbu,ixbd]=dupinx(id,tc); Daniel@0: in(ixbu)=true; Daniel@0: in(ixbd)=true; Daniel@0: [ibu,ibd,ixbu,ixbd]=dupinx(ib,tc); Daniel@0: im(ixbu)=true; Daniel@0: in=in&im; Daniel@0: in([ixe{:}])=false; Daniel@0: il=~any(t,2); Daniel@0: ia=~(in|il); Daniel@0: Daniel@0: % - read valid strings Daniel@0: n=t(in,:).*c(in,:); Daniel@0: n(n==0)=' '; Daniel@0: n=char(n); Daniel@0: dn=strread(n.','%n'); Daniel@0: if numel(dn) ~= numel(find(in)) Daniel@0: %disp(sprintf('ASORT> unexpected fatal error reading input!')); Daniel@0: if nargout Daniel@0: s.c=c; Daniel@0: s.t=t; Daniel@0: s.n=n; Daniel@0: s.d=dn; Daniel@0: varargout{1}=s; Daniel@0: end Daniel@0: return; Daniel@0: end Daniel@0: Daniel@0: % - sort numbers Daniel@0: [ds,dx]=sort(dn,1,smod); Daniel@0: in=find(in); Daniel@0: anr=ins(in(dx)); Daniel@0: snr=ins(ia); Daniel@0: end Daniel@0: str=ins(il); Daniel@0: to=clock; Daniel@0: Daniel@0: % - prepare output Daniel@0: if nargout < 3 || sflg Daniel@0: s.magic='ASORT'; Daniel@0: s.ver='30-Mar-2005 11:57:07'; Daniel@0: s.time=datestr(clock); Daniel@0: s.runtime=etime(to,ti); Daniel@0: s.input_class=winp.class; Daniel@0: s.input_msize=winp.size; Daniel@0: s.input_bytes=winp.bytes; Daniel@0: s.strng_class=wins.class; Daniel@0: s.strng_msize=wins.size; Daniel@0: s.strng_bytes=wins.bytes; Daniel@0: s.anr=anr; Daniel@0: s.snr=snr; Daniel@0: s.str=str; Daniel@0: if dflg Daniel@0: s.c=c; Daniel@0: s.t=t; Daniel@0: s.n=n; Daniel@0: s.d=ds; Daniel@0: end Daniel@0: varargout{1}=s; Daniel@0: else Daniel@0: s={anr,snr,str}; Daniel@0: for i=1:nargout Daniel@0: varargout{i}=s{i}; Daniel@0: end Daniel@0: end Daniel@0: Daniel@0: if vflg Daniel@0: inp=cstr(inp); Daniel@0: an=[{'--- NUMERICAL'}; anr]; Daniel@0: as=[{'--- ASCII NUMBERS'}; snr]; Daniel@0: at=[{'--- ASCII STRINGS'}; str]; Daniel@0: nn=[{'--- NUMBERS'}; num2cell(ds)]; Daniel@0: ag={' ';' ';' '}; Daniel@0: u=[{'INPUT'}; inp;ag]; Daniel@0: v=[{'ASCII SORT'}; ins;ag]; Daniel@0: w=[{'NUM SORT'}; an;as;at]; Daniel@0: x=[{'NUM READ'}; nn;as;at]; Daniel@0: w=[u,v,w,x]; Daniel@0: disp(w); Daniel@0: end Daniel@0: Daniel@0: return; Daniel@0: %-------------------------------------------------------------------------------- Daniel@0: function c=cstr(s) Daniel@0: % - bottleneck waiting for a good replacement Daniel@0: % it consumes ~75% of 's processing time! Daniel@0: Daniel@0: c=s; Daniel@0: if ischar(s) Daniel@0: sr=size(s,1); Daniel@0: c=cell(sr,1); Daniel@0: for i=1:sr Daniel@0: c{i}=s(i,:); % no deblanking! Daniel@0: end Daniel@0: end Daniel@0: return; Daniel@0: %-------------------------------------------------------------------------------- Daniel@0: function [idu,idd,ixu,ixd]=dupinx(ix,nc) Daniel@0: % - check for more than one entry/row in a matrix of column size Daniel@0: % unique indices: idu / ixu Daniel@0: % duplicate indices: idd / ixd Daniel@0: Daniel@0: if isempty(ix) Daniel@0: idu=[]; Daniel@0: idd=[]; Daniel@0: ixu=[]; Daniel@0: ixd=[]; Daniel@0: return; Daniel@0: end Daniel@0: id=fix(ix/nc)+1; Daniel@0: idi=diff(id)~=0; Daniel@0: ide=[true idi]; Daniel@0: idb=[idi true]; Daniel@0: idu=idb & ide; Daniel@0: idd=idb==1 & ide==0; Daniel@0: ixu=id(idu); Daniel@0: ixd=id(idd); Daniel@0: return; Daniel@0: %-------------------------------------------------------------------------------- Daniel@0: function inp=setinp(inp,tmpl,flg) Daniel@0: % - remove space(s) and/or templates Daniel@0: Daniel@0: if isempty(inp) || ~any(flg) Daniel@0: return; Daniel@0: end Daniel@0: Daniel@0: for i=sort(flg) Daniel@0: switch i Daniel@0: case flg(1) Daniel@0: if ischar(tmpl) Daniel@0: tmpl={tmpl}; Daniel@0: end Daniel@0: for i=1:numel(tmpl) Daniel@0: inp=strrep(inp,tmpl{i},' '); Daniel@0: end Daniel@0: case flg(2) Daniel@0: inp=strrep(inp,' ',''); Daniel@0: end Daniel@0: end Daniel@0: return; Daniel@0: %--------------------------------------------------------------------------------