wolffd@0: %[ANR,SNR,STR] = ASORT(INP,'OPT',...); wolffd@0: % S = ASORT(INP,'OPT',...); wolffd@0: % to sort alphanumeric strings numerically if wolffd@0: % they contain one properly formatted number wolffd@0: % otherwise, ascii dictionary sorting is applied wolffd@0: % wolffd@0: % INP unsorted input: wolffd@0: % - a char array wolffd@0: % - a cell array of strings wolffd@0: % OPT options wolffd@0: % -s - sorting option wolffd@0: % '-s','ascend' [def] wolffd@0: % '-s','descend' wolffd@0: % -st - force output form S [def: nargout dependent] wolffd@0: % -t - replace matching template(s) with one space wolffd@0: % prior to sorting wolffd@0: % '-t','template' wolffd@0: % '-t',{'template1','template2',...} wolffd@0: % -w - remove space(s) prior to sorting wolffd@0: % wolffd@0: % NOTE -t/-w options are processed in the wolffd@0: % order that they appear in wolffd@0: % the command line wolffd@0: % wolffd@0: % -v - verbose output [def: quiet] wolffd@0: % -d - debug mode wolffd@0: % save additional output in S wolffd@0: % .c: lex parser input wolffd@0: % .t: lex parser table wolffd@0: % .n: lex parser output wolffd@0: % .d: numbers read from .n wolffd@0: % wolffd@0: % ANR numerically sorted alphanumeric strings [eg, 'f.-1.5e+2x.x'] wolffd@0: % - contain one number that can be read by wolffd@0: % | wolffd@0: % SNR ascii dict sorted alphanumeric strings wolffd@0: % http://www.mathworks.com/matlabcentral/fileexchange/loadFile.do?objectId=7212# wolffd@0: % wolffd@0: % - contain more than one number [eg, 'f.-1.5e +2.x'] wolffd@0: % - contain incomplete|ambiguous numbers [eg, 'f.-1.5e+2.x'] wolffd@0: % STR ascii dict sorted strings wolffd@0: % - contain no numbers [eg, 'a test'] wolffd@0: % wolffd@0: % S structure with fields wolffd@0: % .anr wolffd@0: % .srn wolffd@0: % .str wolffd@0: wolffd@0: % created: wolffd@0: % us 03-Mar-2002 wolffd@0: % modified: wolffd@0: % us 30-Mar-2005 11:57:07 / TMW R14.sp2 wolffd@0: wolffd@0: %-------------------------------------------------------------------------------- wolffd@0: function varargout=asort(inp,varargin) wolffd@0: wolffd@0: varargout(1:nargout)={[]}; wolffd@0: if ~nargin wolffd@0: help(mfilename); wolffd@0: return; wolffd@0: end wolffd@0: wolffd@0: % - common parameters/options wolffd@0: n=[]; wolffd@0: ds=[]; wolffd@0: anr={}; wolffd@0: snr={}; wolffd@0: str={}; wolffd@0: smod='ascend'; % sorting option wolffd@0: tmpl={}; % template(s) wolffd@0: sflg=false; % output mode: structure wolffd@0: tflg=false; % remove template(s) wolffd@0: dflg=false; % debug mode wolffd@0: vflg=false; % verbose output wolffd@0: wflg=false; % remove spaces wolffd@0: wolffd@0: if nargin > 1 wolffd@0: ix=find(strcmp('-s',varargin)); wolffd@0: if ~isempty(ix) && nargin > ix(end)+1 wolffd@0: smod=varargin{ix(end)+1}; wolffd@0: end wolffd@0: ix=find(strcmp('-t',varargin)); wolffd@0: if ~isempty(ix) && nargin > ix(end)+1 wolffd@0: tflg=ix(end); wolffd@0: tmpl=varargin{ix(end)+1}; wolffd@0: end wolffd@0: if find(strcmp('-d',varargin)); wolffd@0: dflg=true; wolffd@0: end wolffd@0: if find(strcmp('-st',varargin)); wolffd@0: sflg=true; wolffd@0: end wolffd@0: if find(strcmp('-v',varargin)); wolffd@0: vflg=true; wolffd@0: end wolffd@0: ix=find(strcmp('-w',varargin)); wolffd@0: if ~isempty(ix) wolffd@0: wflg=ix(end); wolffd@0: end wolffd@0: end wolffd@0: % spec numbers wolffd@0: ntmpl={ wolffd@0: ' inf ' wolffd@0: '+inf ' wolffd@0: '-inf ' wolffd@0: ' nan ' wolffd@0: '+nan ' wolffd@0: '-nan ' wolffd@0: }; wolffd@0: % spec chars wolffd@0: ctmpl={ wolffd@0: '.' % decimal point wolffd@0: 'd' % exponent wolffd@0: 'e' % exponent wolffd@0: }; wolffd@0: wolffd@0: if nargout <= 3 wolffd@0: varargout{1}=inp; wolffd@0: else wolffd@0: disp(sprintf('ASORT> too many output args [%-1d/%-1d]\n',nargout,3)); wolffd@0: help(mfilename); wolffd@0: return; wolffd@0: end wolffd@0: if isempty(inp) wolffd@0: disp(sprintf('ASORT> input is empty')); wolffd@0: return; wolffd@0: end wolffd@0: wolffd@0: ti=clock; wolffd@0: winp=whos('inp'); wolffd@0: switch winp.class wolffd@0: case 'cell' wolffd@0: if ~iscellstr(inp) wolffd@0: disp(sprintf('ASORT> cell is not an array of strings')); wolffd@0: return; wolffd@0: end wolffd@0: inp=inp(:); wolffd@0: [ins,inx]=sort(inp); wolffd@0: case 'char' wolffd@0: % [ins,inx]=sortrows(inp); wolffd@0: inp=cstr(inp); wolffd@0: otherwise wolffd@0: disp(sprintf('ASORT> does not sort input of class <%s>',winp.class)); wolffd@0: return; wolffd@0: end wolffd@0: wolffd@0: inp=inp(:); wolffd@0: inp=setinp(inp,tmpl,[tflg wflg]); wolffd@0: [ins,inx]=sort(inp); wolffd@0: if strcmp(smod,'descend') wolffd@0: ins=ins(end:-1:1,:); wolffd@0: inx=inx(end:-1:1); wolffd@0: end wolffd@0: ins=inp(inx); wolffd@0: c=lower(char(ins)); wolffd@0: wins=whos('c'); wolffd@0: [cr,cc]=size(c); wolffd@0: wolffd@0: % - LEXICAL PARSER wolffd@0: %-------------------------------------------------------------------------------- wolffd@0: % - extend input on either side for search wolffd@0: c=[' '*ones(cr,2) c ' '*ones(cr,2)]; wolffd@0: wolffd@0: % - search for valid alphanumeric items in strings wolffd@0: % numbers/signs wolffd@0: t=(c>='0'&c<='9'); wolffd@0: t=t|c=='-'; wolffd@0: t=t|c=='+'; wolffd@0: [tr,tc]=size(t); wolffd@0: % decimal points wolffd@0: % note: valid numbers with dec points must follow these templates wolffd@0: % nr.nr wolffd@0: % sign.nr wolffd@0: % nr. wolffd@0: % .nr wolffd@0: ix1= t(:,1:end-2) & ... wolffd@0: ~isletter(c(:,1:end-2)) & ... wolffd@0: c(:,2:end-1)=='.'; wolffd@0: t(:,2:end-1)=t(:,2:end-1)|ix1; wolffd@0: ix1= (t(:,3:end) & ... wolffd@0: (~isletter(c(:,3:end)) & ... wolffd@0: ~isletter(c(:,1:end-2))) | ... wolffd@0: (c(:,3:end)=='e' | ... wolffd@0: c(:,3:end)=='d')) & ... wolffd@0: c(:,2:end-1)=='.'; wolffd@0: t(:,2:end-1)=t(:,2:end-1)|ix1; wolffd@0: % t(:,3:end)=t(:,3:end)|ix1; wolffd@0: % signs wolffd@0: t(c=='-')=false; wolffd@0: t(c=='+')=false; wolffd@0: ix1= t(:,3:end) & ... wolffd@0: (c(:,2:end-1)=='-' | ... wolffd@0: c(:,2:end-1)=='+'); wolffd@0: t(:,2:end-1)=t(:,2:end-1)|ix1; wolffd@0: % exponents wolffd@0: ix1= t(:,1:end-2) & ... wolffd@0: (c(:,2:end-1)=='e' | ... wolffd@0: c(:,2:end-1)=='d'); wolffd@0: t(:,2:end-1)=t(:,2:end-1)|ix1; wolffd@0: % spec numbers wolffd@0: c=reshape(c.',1,[]); wolffd@0: t=t'; wolffd@0: ic=[]; wolffd@0: for j=1:numel(ntmpl) wolffd@0: ic=[ic,strfind(c,ntmpl{j})]; wolffd@0: end wolffd@0: ic=sort(ic); wolffd@0: for i=1:numel(ic) wolffd@0: ix=ic(i)+0:ic(i)+4; wolffd@0: t(ix)=true; wolffd@0: end wolffd@0: t=t'; wolffd@0: c=reshape(c.',[tc,tr]).'; wolffd@0: t(c==' ')=false; wolffd@0: %-------------------------------------------------------------------------------- wolffd@0: wolffd@0: % - only allow one number per string wolffd@0: il=~any(t,2); wolffd@0: ib=strfind(reshape(t.',1,[]),[0 1]); wolffd@0: if ~isempty(ib) wolffd@0: ixe=cell(3,1); wolffd@0: n=reshape(char(t.*c).',1,[]); wolffd@0: for i=1:numel(ctmpl) wolffd@0: id=strfind(n,ctmpl{i}); wolffd@0: if ~isempty(id) wolffd@0: [dum,dum,ixu{i},ixe{i}]=dupinx(id,tc); wolffd@0: end wolffd@0: end wolffd@0: in=false(tr,1); wolffd@0: im=in; wolffd@0: % must check for anomalous cases like <'.d'> wolffd@0: id=sort(... wolffd@0: [find(n>='0' & n<='9'),... wolffd@0: strfind(n,'inf'),... wolffd@0: strfind(n,'nan')]); wolffd@0: % [ibu,ibd,ixbu,ixe{i+1}]=dupinx(id,tc); wolffd@0: [ibu,ibd,ixbu,ixbd]=dupinx(id,tc); wolffd@0: in(ixbu)=true; wolffd@0: in(ixbd)=true; wolffd@0: [ibu,ibd,ixbu,ixbd]=dupinx(ib,tc); wolffd@0: im(ixbu)=true; wolffd@0: in=in&im; wolffd@0: in([ixe{:}])=false; wolffd@0: il=~any(t,2); wolffd@0: ia=~(in|il); wolffd@0: wolffd@0: % - read valid strings wolffd@0: n=t(in,:).*c(in,:); wolffd@0: n(n==0)=' '; wolffd@0: n=char(n); wolffd@0: dn=strread(n.','%n'); wolffd@0: if numel(dn) ~= numel(find(in)) wolffd@0: %disp(sprintf('ASORT> unexpected fatal error reading input!')); wolffd@0: if nargout wolffd@0: s.c=c; wolffd@0: s.t=t; wolffd@0: s.n=n; wolffd@0: s.d=dn; wolffd@0: varargout{1}=s; wolffd@0: end wolffd@0: return; wolffd@0: end wolffd@0: wolffd@0: % - sort numbers wolffd@0: [ds,dx]=sort(dn,1,smod); wolffd@0: in=find(in); wolffd@0: anr=ins(in(dx)); wolffd@0: snr=ins(ia); wolffd@0: end wolffd@0: str=ins(il); wolffd@0: to=clock; wolffd@0: wolffd@0: % - prepare output wolffd@0: if nargout < 3 || sflg wolffd@0: s.magic='ASORT'; wolffd@0: s.ver='30-Mar-2005 11:57:07'; wolffd@0: s.time=datestr(clock); wolffd@0: s.runtime=etime(to,ti); wolffd@0: s.input_class=winp.class; wolffd@0: s.input_msize=winp.size; wolffd@0: s.input_bytes=winp.bytes; wolffd@0: s.strng_class=wins.class; wolffd@0: s.strng_msize=wins.size; wolffd@0: s.strng_bytes=wins.bytes; wolffd@0: s.anr=anr; wolffd@0: s.snr=snr; wolffd@0: s.str=str; wolffd@0: if dflg wolffd@0: s.c=c; wolffd@0: s.t=t; wolffd@0: s.n=n; wolffd@0: s.d=ds; wolffd@0: end wolffd@0: varargout{1}=s; wolffd@0: else wolffd@0: s={anr,snr,str}; wolffd@0: for i=1:nargout wolffd@0: varargout{i}=s{i}; wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: if vflg wolffd@0: inp=cstr(inp); wolffd@0: an=[{'--- NUMERICAL'}; anr]; wolffd@0: as=[{'--- ASCII NUMBERS'}; snr]; wolffd@0: at=[{'--- ASCII STRINGS'}; str]; wolffd@0: nn=[{'--- NUMBERS'}; num2cell(ds)]; wolffd@0: ag={' ';' ';' '}; wolffd@0: u=[{'INPUT'}; inp;ag]; wolffd@0: v=[{'ASCII SORT'}; ins;ag]; wolffd@0: w=[{'NUM SORT'}; an;as;at]; wolffd@0: x=[{'NUM READ'}; nn;as;at]; wolffd@0: w=[u,v,w,x]; wolffd@0: disp(w); wolffd@0: end wolffd@0: wolffd@0: return; wolffd@0: %-------------------------------------------------------------------------------- wolffd@0: function c=cstr(s) wolffd@0: % - bottleneck waiting for a good replacement wolffd@0: % it consumes ~75% of 's processing time! wolffd@0: wolffd@0: c=s; wolffd@0: if ischar(s) wolffd@0: sr=size(s,1); wolffd@0: c=cell(sr,1); wolffd@0: for i=1:sr wolffd@0: c{i}=s(i,:); % no deblanking! wolffd@0: end wolffd@0: end wolffd@0: return; wolffd@0: %-------------------------------------------------------------------------------- wolffd@0: function [idu,idd,ixu,ixd]=dupinx(ix,nc) wolffd@0: % - check for more than one entry/row in a matrix of column size wolffd@0: % unique indices: idu / ixu wolffd@0: % duplicate indices: idd / ixd wolffd@0: wolffd@0: if isempty(ix) wolffd@0: idu=[]; wolffd@0: idd=[]; wolffd@0: ixu=[]; wolffd@0: ixd=[]; wolffd@0: return; wolffd@0: end wolffd@0: id=fix(ix/nc)+1; wolffd@0: idi=diff(id)~=0; wolffd@0: ide=[true idi]; wolffd@0: idb=[idi true]; wolffd@0: idu=idb & ide; wolffd@0: idd=idb==1 & ide==0; wolffd@0: ixu=id(idu); wolffd@0: ixd=id(idd); wolffd@0: return; wolffd@0: %-------------------------------------------------------------------------------- wolffd@0: function inp=setinp(inp,tmpl,flg) wolffd@0: % - remove space(s) and/or templates wolffd@0: wolffd@0: if isempty(inp) || ~any(flg) wolffd@0: return; wolffd@0: end wolffd@0: wolffd@0: for i=sort(flg) wolffd@0: switch i wolffd@0: case flg(1) wolffd@0: if ischar(tmpl) wolffd@0: tmpl={tmpl}; wolffd@0: end wolffd@0: for i=1:numel(tmpl) wolffd@0: inp=strrep(inp,tmpl{i},' '); wolffd@0: end wolffd@0: case flg(2) wolffd@0: inp=strrep(inp,' ',''); wolffd@0: end wolffd@0: end wolffd@0: return; wolffd@0: %--------------------------------------------------------------------------------