matthiasm@8: %[ANR,SNR,STR] = ASORT(INP,'OPT',...); matthiasm@8: % S = ASORT(INP,'OPT',...); matthiasm@8: % to sort alphanumeric strings numerically if matthiasm@8: % they contain one properly formatted number matthiasm@8: % otherwise, ascii dictionary sorting is applied matthiasm@8: % matthiasm@8: % INP unsorted input: matthiasm@8: % - a char array matthiasm@8: % - a cell array of strings matthiasm@8: % OPT options matthiasm@8: % -s - sorting option matthiasm@8: % '-s','ascend' [def] matthiasm@8: % '-s','descend' matthiasm@8: % -st - force output form S [def: nargout dependent] matthiasm@8: % -t - replace matching template(s) with one space matthiasm@8: % prior to sorting matthiasm@8: % '-t','template' matthiasm@8: % '-t',{'template1','template2',...} matthiasm@8: % -w - remove space(s) prior to sorting matthiasm@8: % matthiasm@8: % NOTE -t/-w options are processed in the matthiasm@8: % order that they appear in matthiasm@8: % the command line matthiasm@8: % matthiasm@8: % -v - verbose output [def: quiet] matthiasm@8: % -d - debug mode matthiasm@8: % save additional output in S matthiasm@8: % .c: lex parser input matthiasm@8: % .t: lex parser table matthiasm@8: % .n: lex parser output matthiasm@8: % .d: numbers read from .n matthiasm@8: % matthiasm@8: % ANR numerically sorted alphanumeric strings [eg, 'f.-1.5e+2x.x'] matthiasm@8: % - contain one number that can be read by matthiasm@8: % | matthiasm@8: % SNR ascii dict sorted alphanumeric strings matthiasm@8: % http://www.mathworks.com/matlabcentral/fileexchange/loadFile.do?objectId=7212# matthiasm@8: % matthiasm@8: % - contain more than one number [eg, 'f.-1.5e +2.x'] matthiasm@8: % - contain incomplete|ambiguous numbers [eg, 'f.-1.5e+2.x'] matthiasm@8: % STR ascii dict sorted strings matthiasm@8: % - contain no numbers [eg, 'a test'] matthiasm@8: % matthiasm@8: % S structure with fields matthiasm@8: % .anr matthiasm@8: % .srn matthiasm@8: % .str matthiasm@8: matthiasm@8: % created: matthiasm@8: % us 03-Mar-2002 matthiasm@8: % modified: matthiasm@8: % us 30-Mar-2005 11:57:07 / TMW R14.sp2 matthiasm@8: matthiasm@8: %-------------------------------------------------------------------------------- matthiasm@8: function varargout=asort(inp,varargin) matthiasm@8: matthiasm@8: varargout(1:nargout)={[]}; matthiasm@8: if ~nargin matthiasm@8: help(mfilename); matthiasm@8: return; matthiasm@8: end matthiasm@8: matthiasm@8: % - common parameters/options matthiasm@8: n=[]; matthiasm@8: ds=[]; matthiasm@8: anr={}; matthiasm@8: snr={}; matthiasm@8: str={}; matthiasm@8: smod='ascend'; % sorting option matthiasm@8: tmpl={}; % template(s) matthiasm@8: sflg=false; % output mode: structure matthiasm@8: tflg=false; % remove template(s) matthiasm@8: dflg=false; % debug mode matthiasm@8: vflg=false; % verbose output matthiasm@8: wflg=false; % remove spaces matthiasm@8: matthiasm@8: if nargin > 1 matthiasm@8: ix=find(strcmp('-s',varargin)); matthiasm@8: if ~isempty(ix) && nargin > ix(end)+1 matthiasm@8: smod=varargin{ix(end)+1}; matthiasm@8: end matthiasm@8: ix=find(strcmp('-t',varargin)); matthiasm@8: if ~isempty(ix) && nargin > ix(end)+1 matthiasm@8: tflg=ix(end); matthiasm@8: tmpl=varargin{ix(end)+1}; matthiasm@8: end matthiasm@8: if find(strcmp('-d',varargin)); matthiasm@8: dflg=true; matthiasm@8: end matthiasm@8: if find(strcmp('-st',varargin)); matthiasm@8: sflg=true; matthiasm@8: end matthiasm@8: if find(strcmp('-v',varargin)); matthiasm@8: vflg=true; matthiasm@8: end matthiasm@8: ix=find(strcmp('-w',varargin)); matthiasm@8: if ~isempty(ix) matthiasm@8: wflg=ix(end); matthiasm@8: end matthiasm@8: end matthiasm@8: % spec numbers matthiasm@8: ntmpl={ matthiasm@8: ' inf ' matthiasm@8: '+inf ' matthiasm@8: '-inf ' matthiasm@8: ' nan ' matthiasm@8: '+nan ' matthiasm@8: '-nan ' matthiasm@8: }; matthiasm@8: % spec chars matthiasm@8: ctmpl={ matthiasm@8: '.' % decimal point matthiasm@8: 'd' % exponent matthiasm@8: 'e' % exponent matthiasm@8: }; matthiasm@8: matthiasm@8: if nargout <= 3 matthiasm@8: varargout{1}=inp; matthiasm@8: else matthiasm@8: disp(sprintf('ASORT> too many output args [%-1d/%-1d]\n',nargout,3)); matthiasm@8: help(mfilename); matthiasm@8: return; matthiasm@8: end matthiasm@8: if isempty(inp) matthiasm@8: disp(sprintf('ASORT> input is empty')); matthiasm@8: return; matthiasm@8: end matthiasm@8: matthiasm@8: ti=clock; matthiasm@8: winp=whos('inp'); matthiasm@8: switch winp.class matthiasm@8: case 'cell' matthiasm@8: if ~iscellstr(inp) matthiasm@8: disp(sprintf('ASORT> cell is not an array of strings')); matthiasm@8: return; matthiasm@8: end matthiasm@8: inp=inp(:); matthiasm@8: [ins,inx]=sort(inp); matthiasm@8: case 'char' matthiasm@8: % [ins,inx]=sortrows(inp); matthiasm@8: inp=cstr(inp); matthiasm@8: otherwise matthiasm@8: disp(sprintf('ASORT> does not sort input of class <%s>',winp.class)); matthiasm@8: return; matthiasm@8: end matthiasm@8: matthiasm@8: inp=inp(:); matthiasm@8: inp=setinp(inp,tmpl,[tflg wflg]); matthiasm@8: [ins,inx]=sort(inp); matthiasm@8: if strcmp(smod,'descend') matthiasm@8: ins=ins(end:-1:1,:); matthiasm@8: inx=inx(end:-1:1); matthiasm@8: end matthiasm@8: ins=inp(inx); matthiasm@8: c=lower(char(ins)); matthiasm@8: wins=whos('c'); matthiasm@8: [cr,cc]=size(c); matthiasm@8: matthiasm@8: % - LEXICAL PARSER matthiasm@8: %-------------------------------------------------------------------------------- matthiasm@8: % - extend input on either side for search matthiasm@8: c=[' '*ones(cr,2) c ' '*ones(cr,2)]; matthiasm@8: matthiasm@8: % - search for valid alphanumeric items in strings matthiasm@8: % numbers/signs matthiasm@8: t=(c>='0'&c<='9'); matthiasm@8: t=t|c=='-'; matthiasm@8: t=t|c=='+'; matthiasm@8: [tr,tc]=size(t); matthiasm@8: % decimal points matthiasm@8: % note: valid numbers with dec points must follow these templates matthiasm@8: % nr.nr matthiasm@8: % sign.nr matthiasm@8: % nr. matthiasm@8: % .nr matthiasm@8: ix1= t(:,1:end-2) & ... matthiasm@8: ~isletter(c(:,1:end-2)) & ... matthiasm@8: c(:,2:end-1)=='.'; matthiasm@8: t(:,2:end-1)=t(:,2:end-1)|ix1; matthiasm@8: ix1= (t(:,3:end) & ... matthiasm@8: (~isletter(c(:,3:end)) & ... matthiasm@8: ~isletter(c(:,1:end-2))) | ... matthiasm@8: (c(:,3:end)=='e' | ... matthiasm@8: c(:,3:end)=='d')) & ... matthiasm@8: c(:,2:end-1)=='.'; matthiasm@8: t(:,2:end-1)=t(:,2:end-1)|ix1; matthiasm@8: % t(:,3:end)=t(:,3:end)|ix1; matthiasm@8: % signs matthiasm@8: t(c=='-')=false; matthiasm@8: t(c=='+')=false; matthiasm@8: ix1= t(:,3:end) & ... matthiasm@8: (c(:,2:end-1)=='-' | ... matthiasm@8: c(:,2:end-1)=='+'); matthiasm@8: t(:,2:end-1)=t(:,2:end-1)|ix1; matthiasm@8: % exponents matthiasm@8: ix1= t(:,1:end-2) & ... matthiasm@8: (c(:,2:end-1)=='e' | ... matthiasm@8: c(:,2:end-1)=='d'); matthiasm@8: t(:,2:end-1)=t(:,2:end-1)|ix1; matthiasm@8: % spec numbers matthiasm@8: c=reshape(c.',1,[]); matthiasm@8: t=t'; matthiasm@8: ic=[]; matthiasm@8: for j=1:numel(ntmpl) matthiasm@8: ic=[ic,strfind(c,ntmpl{j})]; matthiasm@8: end matthiasm@8: ic=sort(ic); matthiasm@8: for i=1:numel(ic) matthiasm@8: ix=ic(i)+0:ic(i)+4; matthiasm@8: t(ix)=true; matthiasm@8: end matthiasm@8: t=t'; matthiasm@8: c=reshape(c.',[tc,tr]).'; matthiasm@8: t(c==' ')=false; matthiasm@8: %-------------------------------------------------------------------------------- matthiasm@8: matthiasm@8: % - only allow one number per string matthiasm@8: il=~any(t,2); matthiasm@8: ib=strfind(reshape(t.',1,[]),[0 1]); matthiasm@8: if ~isempty(ib) matthiasm@8: ixe=cell(3,1); matthiasm@8: n=reshape(char(t.*c).',1,[]); matthiasm@8: for i=1:numel(ctmpl) matthiasm@8: id=strfind(n,ctmpl{i}); matthiasm@8: if ~isempty(id) matthiasm@8: [dum,dum,ixu{i},ixe{i}]=dupinx(id,tc); matthiasm@8: end matthiasm@8: end matthiasm@8: in=false(tr,1); matthiasm@8: im=in; matthiasm@8: % must check for anomalous cases like <'.d'> matthiasm@8: id=sort(... matthiasm@8: [find(n>='0' & n<='9'),... matthiasm@8: strfind(n,'inf'),... matthiasm@8: strfind(n,'nan')]); matthiasm@8: % [ibu,ibd,ixbu,ixe{i+1}]=dupinx(id,tc); matthiasm@8: [ibu,ibd,ixbu,ixbd]=dupinx(id,tc); matthiasm@8: in(ixbu)=true; matthiasm@8: in(ixbd)=true; matthiasm@8: [ibu,ibd,ixbu,ixbd]=dupinx(ib,tc); matthiasm@8: im(ixbu)=true; matthiasm@8: in=in&im; matthiasm@8: in([ixe{:}])=false; matthiasm@8: il=~any(t,2); matthiasm@8: ia=~(in|il); matthiasm@8: matthiasm@8: % - read valid strings matthiasm@8: n=t(in,:).*c(in,:); matthiasm@8: n(n==0)=' '; matthiasm@8: n=char(n); matthiasm@8: dn=strread(n.','%n'); matthiasm@8: if numel(dn) ~= numel(find(in)) matthiasm@8: %disp(sprintf('ASORT> unexpected fatal error reading input!')); matthiasm@8: if nargout matthiasm@8: s.c=c; matthiasm@8: s.t=t; matthiasm@8: s.n=n; matthiasm@8: s.d=dn; matthiasm@8: varargout{1}=s; matthiasm@8: end matthiasm@8: return; matthiasm@8: end matthiasm@8: matthiasm@8: % - sort numbers matthiasm@8: [ds,dx]=sort(dn,1,smod); matthiasm@8: in=find(in); matthiasm@8: anr=ins(in(dx)); matthiasm@8: snr=ins(ia); matthiasm@8: end matthiasm@8: str=ins(il); matthiasm@8: to=clock; matthiasm@8: matthiasm@8: % - prepare output matthiasm@8: if nargout < 3 || sflg matthiasm@8: s.magic='ASORT'; matthiasm@8: s.ver='30-Mar-2005 11:57:07'; matthiasm@8: s.time=datestr(clock); matthiasm@8: s.runtime=etime(to,ti); matthiasm@8: s.input_class=winp.class; matthiasm@8: s.input_msize=winp.size; matthiasm@8: s.input_bytes=winp.bytes; matthiasm@8: s.strng_class=wins.class; matthiasm@8: s.strng_msize=wins.size; matthiasm@8: s.strng_bytes=wins.bytes; matthiasm@8: s.anr=anr; matthiasm@8: s.snr=snr; matthiasm@8: s.str=str; matthiasm@8: if dflg matthiasm@8: s.c=c; matthiasm@8: s.t=t; matthiasm@8: s.n=n; matthiasm@8: s.d=ds; matthiasm@8: end matthiasm@8: varargout{1}=s; matthiasm@8: else matthiasm@8: s={anr,snr,str}; matthiasm@8: for i=1:nargout matthiasm@8: varargout{i}=s{i}; matthiasm@8: end matthiasm@8: end matthiasm@8: matthiasm@8: if vflg matthiasm@8: inp=cstr(inp); matthiasm@8: an=[{'--- NUMERICAL'}; anr]; matthiasm@8: as=[{'--- ASCII NUMBERS'}; snr]; matthiasm@8: at=[{'--- ASCII STRINGS'}; str]; matthiasm@8: nn=[{'--- NUMBERS'}; num2cell(ds)]; matthiasm@8: ag={' ';' ';' '}; matthiasm@8: u=[{'INPUT'}; inp;ag]; matthiasm@8: v=[{'ASCII SORT'}; ins;ag]; matthiasm@8: w=[{'NUM SORT'}; an;as;at]; matthiasm@8: x=[{'NUM READ'}; nn;as;at]; matthiasm@8: w=[u,v,w,x]; matthiasm@8: disp(w); matthiasm@8: end matthiasm@8: matthiasm@8: return; matthiasm@8: %-------------------------------------------------------------------------------- matthiasm@8: function c=cstr(s) matthiasm@8: % - bottleneck waiting for a good replacement matthiasm@8: % it consumes ~75% of 's processing time! matthiasm@8: matthiasm@8: c=s; matthiasm@8: if ischar(s) matthiasm@8: sr=size(s,1); matthiasm@8: c=cell(sr,1); matthiasm@8: for i=1:sr matthiasm@8: c{i}=s(i,:); % no deblanking! matthiasm@8: end matthiasm@8: end matthiasm@8: return; matthiasm@8: %-------------------------------------------------------------------------------- matthiasm@8: function [idu,idd,ixu,ixd]=dupinx(ix,nc) matthiasm@8: % - check for more than one entry/row in a matrix of column size matthiasm@8: % unique indices: idu / ixu matthiasm@8: % duplicate indices: idd / ixd matthiasm@8: matthiasm@8: if isempty(ix) matthiasm@8: idu=[]; matthiasm@8: idd=[]; matthiasm@8: ixu=[]; matthiasm@8: ixd=[]; matthiasm@8: return; matthiasm@8: end matthiasm@8: id=fix(ix/nc)+1; matthiasm@8: idi=diff(id)~=0; matthiasm@8: ide=[true idi]; matthiasm@8: idb=[idi true]; matthiasm@8: idu=idb & ide; matthiasm@8: idd=idb==1 & ide==0; matthiasm@8: ixu=id(idu); matthiasm@8: ixd=id(idd); matthiasm@8: return; matthiasm@8: %-------------------------------------------------------------------------------- matthiasm@8: function inp=setinp(inp,tmpl,flg) matthiasm@8: % - remove space(s) and/or templates matthiasm@8: matthiasm@8: if isempty(inp) || ~any(flg) matthiasm@8: return; matthiasm@8: end matthiasm@8: matthiasm@8: for i=sort(flg) matthiasm@8: switch i matthiasm@8: case flg(1) matthiasm@8: if ischar(tmpl) matthiasm@8: tmpl={tmpl}; matthiasm@8: end matthiasm@8: for i=1:numel(tmpl) matthiasm@8: inp=strrep(inp,tmpl{i},' '); matthiasm@8: end matthiasm@8: case flg(2) matthiasm@8: inp=strrep(inp,' ',''); matthiasm@8: end matthiasm@8: end matthiasm@8: return; matthiasm@8: %--------------------------------------------------------------------------------