annotate _FullBNT/KPMtools/asort.m @ 9:4ea6619cb3f5 tip

removed log files
author matthiasm
date Fri, 11 Apr 2014 15:55:11 +0100
parents b5b38998ef3b
children
rev   line source
matthiasm@8 1 %[ANR,SNR,STR] = ASORT(INP,'OPT',...);
matthiasm@8 2 % S = ASORT(INP,'OPT',...);
matthiasm@8 3 % to sort alphanumeric strings numerically if
matthiasm@8 4 % they contain one properly formatted number
matthiasm@8 5 % otherwise, ascii dictionary sorting is applied
matthiasm@8 6 %
matthiasm@8 7 % INP unsorted input:
matthiasm@8 8 % - a char array
matthiasm@8 9 % - a cell array of strings
matthiasm@8 10 % OPT options
matthiasm@8 11 % -s - sorting option
matthiasm@8 12 % '-s','ascend' [def]
matthiasm@8 13 % '-s','descend'
matthiasm@8 14 % -st - force output form S [def: nargout dependent]
matthiasm@8 15 % -t - replace matching template(s) with one space
matthiasm@8 16 % prior to sorting
matthiasm@8 17 % '-t','template'
matthiasm@8 18 % '-t',{'template1','template2',...}
matthiasm@8 19 % -w - remove space(s) prior to sorting
matthiasm@8 20 %
matthiasm@8 21 % NOTE -t/-w options are processed in the
matthiasm@8 22 % order that they appear in
matthiasm@8 23 % the command line
matthiasm@8 24 %
matthiasm@8 25 % -v - verbose output [def: quiet]
matthiasm@8 26 % -d - debug mode
matthiasm@8 27 % save additional output in S
matthiasm@8 28 % .c: lex parser input
matthiasm@8 29 % .t: lex parser table
matthiasm@8 30 % .n: lex parser output
matthiasm@8 31 % .d: numbers read from .n
matthiasm@8 32 %
matthiasm@8 33 % ANR numerically sorted alphanumeric strings [eg, 'f.-1.5e+2x.x']
matthiasm@8 34 % - contain one number that can be read by
matthiasm@8 35 % <strread> | <sscanf>
matthiasm@8 36 % SNR ascii dict sorted alphanumeric strings
matthiasm@8 37 % http://www.mathworks.com/matlabcentral/fileexchange/loadFile.do?objectId=7212#
matthiasm@8 38 %
matthiasm@8 39 % - contain more than one number [eg, 'f.-1.5e +2.x']
matthiasm@8 40 % - contain incomplete|ambiguous numbers [eg, 'f.-1.5e+2.x']
matthiasm@8 41 % STR ascii dict sorted strings
matthiasm@8 42 % - contain no numbers [eg, 'a test']
matthiasm@8 43 %
matthiasm@8 44 % S structure with fields
matthiasm@8 45 % .anr
matthiasm@8 46 % .srn
matthiasm@8 47 % .str
matthiasm@8 48
matthiasm@8 49 % created:
matthiasm@8 50 % us 03-Mar-2002
matthiasm@8 51 % modified:
matthiasm@8 52 % us 30-Mar-2005 11:57:07 / TMW R14.sp2
matthiasm@8 53
matthiasm@8 54 %--------------------------------------------------------------------------------
matthiasm@8 55 function varargout=asort(inp,varargin)
matthiasm@8 56
matthiasm@8 57 varargout(1:nargout)={[]};
matthiasm@8 58 if ~nargin
matthiasm@8 59 help(mfilename);
matthiasm@8 60 return;
matthiasm@8 61 end
matthiasm@8 62
matthiasm@8 63 % - common parameters/options
matthiasm@8 64 n=[];
matthiasm@8 65 ds=[];
matthiasm@8 66 anr={};
matthiasm@8 67 snr={};
matthiasm@8 68 str={};
matthiasm@8 69 smod='ascend'; % sorting option
matthiasm@8 70 tmpl={}; % template(s)
matthiasm@8 71 sflg=false; % output mode: structure
matthiasm@8 72 tflg=false; % remove template(s)
matthiasm@8 73 dflg=false; % debug mode
matthiasm@8 74 vflg=false; % verbose output
matthiasm@8 75 wflg=false; % remove spaces
matthiasm@8 76
matthiasm@8 77 if nargin > 1
matthiasm@8 78 ix=find(strcmp('-s',varargin));
matthiasm@8 79 if ~isempty(ix) && nargin > ix(end)+1
matthiasm@8 80 smod=varargin{ix(end)+1};
matthiasm@8 81 end
matthiasm@8 82 ix=find(strcmp('-t',varargin));
matthiasm@8 83 if ~isempty(ix) && nargin > ix(end)+1
matthiasm@8 84 tflg=ix(end);
matthiasm@8 85 tmpl=varargin{ix(end)+1};
matthiasm@8 86 end
matthiasm@8 87 if find(strcmp('-d',varargin));
matthiasm@8 88 dflg=true;
matthiasm@8 89 end
matthiasm@8 90 if find(strcmp('-st',varargin));
matthiasm@8 91 sflg=true;
matthiasm@8 92 end
matthiasm@8 93 if find(strcmp('-v',varargin));
matthiasm@8 94 vflg=true;
matthiasm@8 95 end
matthiasm@8 96 ix=find(strcmp('-w',varargin));
matthiasm@8 97 if ~isempty(ix)
matthiasm@8 98 wflg=ix(end);
matthiasm@8 99 end
matthiasm@8 100 end
matthiasm@8 101 % spec numbers
matthiasm@8 102 ntmpl={
matthiasm@8 103 ' inf '
matthiasm@8 104 '+inf '
matthiasm@8 105 '-inf '
matthiasm@8 106 ' nan '
matthiasm@8 107 '+nan '
matthiasm@8 108 '-nan '
matthiasm@8 109 };
matthiasm@8 110 % spec chars
matthiasm@8 111 ctmpl={
matthiasm@8 112 '.' % decimal point
matthiasm@8 113 'd' % exponent
matthiasm@8 114 'e' % exponent
matthiasm@8 115 };
matthiasm@8 116
matthiasm@8 117 if nargout <= 3
matthiasm@8 118 varargout{1}=inp;
matthiasm@8 119 else
matthiasm@8 120 disp(sprintf('ASORT> too many output args [%-1d/%-1d]\n',nargout,3));
matthiasm@8 121 help(mfilename);
matthiasm@8 122 return;
matthiasm@8 123 end
matthiasm@8 124 if isempty(inp)
matthiasm@8 125 disp(sprintf('ASORT> input is empty'));
matthiasm@8 126 return;
matthiasm@8 127 end
matthiasm@8 128
matthiasm@8 129 ti=clock;
matthiasm@8 130 winp=whos('inp');
matthiasm@8 131 switch winp.class
matthiasm@8 132 case 'cell'
matthiasm@8 133 if ~iscellstr(inp)
matthiasm@8 134 disp(sprintf('ASORT> cell is not an array of strings'));
matthiasm@8 135 return;
matthiasm@8 136 end
matthiasm@8 137 inp=inp(:);
matthiasm@8 138 [ins,inx]=sort(inp);
matthiasm@8 139 case 'char'
matthiasm@8 140 % [ins,inx]=sortrows(inp);
matthiasm@8 141 inp=cstr(inp);
matthiasm@8 142 otherwise
matthiasm@8 143 disp(sprintf('ASORT> does not sort input of class <%s>',winp.class));
matthiasm@8 144 return;
matthiasm@8 145 end
matthiasm@8 146
matthiasm@8 147 inp=inp(:);
matthiasm@8 148 inp=setinp(inp,tmpl,[tflg wflg]);
matthiasm@8 149 [ins,inx]=sort(inp);
matthiasm@8 150 if strcmp(smod,'descend')
matthiasm@8 151 ins=ins(end:-1:1,:);
matthiasm@8 152 inx=inx(end:-1:1);
matthiasm@8 153 end
matthiasm@8 154 ins=inp(inx);
matthiasm@8 155 c=lower(char(ins));
matthiasm@8 156 wins=whos('c');
matthiasm@8 157 [cr,cc]=size(c);
matthiasm@8 158
matthiasm@8 159 % - LEXICAL PARSER
matthiasm@8 160 %--------------------------------------------------------------------------------
matthiasm@8 161 % - extend input on either side for search
matthiasm@8 162 c=[' '*ones(cr,2) c ' '*ones(cr,2)];
matthiasm@8 163
matthiasm@8 164 % - search for valid alphanumeric items in strings
matthiasm@8 165 % numbers/signs
matthiasm@8 166 t=(c>='0'&c<='9');
matthiasm@8 167 t=t|c=='-';
matthiasm@8 168 t=t|c=='+';
matthiasm@8 169 [tr,tc]=size(t);
matthiasm@8 170 % decimal points
matthiasm@8 171 % note: valid numbers with dec points must follow these templates
matthiasm@8 172 % nr.nr
matthiasm@8 173 % sign.nr
matthiasm@8 174 % nr.<SPACE>
matthiasm@8 175 % <SPACE>.nr
matthiasm@8 176 ix1= t(:,1:end-2) & ...
matthiasm@8 177 ~isletter(c(:,1:end-2)) & ...
matthiasm@8 178 c(:,2:end-1)=='.';
matthiasm@8 179 t(:,2:end-1)=t(:,2:end-1)|ix1;
matthiasm@8 180 ix1= (t(:,3:end) & ...
matthiasm@8 181 (~isletter(c(:,3:end)) & ...
matthiasm@8 182 ~isletter(c(:,1:end-2))) | ...
matthiasm@8 183 (c(:,3:end)=='e' | ...
matthiasm@8 184 c(:,3:end)=='d')) & ...
matthiasm@8 185 c(:,2:end-1)=='.';
matthiasm@8 186 t(:,2:end-1)=t(:,2:end-1)|ix1;
matthiasm@8 187 % t(:,3:end)=t(:,3:end)|ix1;
matthiasm@8 188 % signs
matthiasm@8 189 t(c=='-')=false;
matthiasm@8 190 t(c=='+')=false;
matthiasm@8 191 ix1= t(:,3:end) & ...
matthiasm@8 192 (c(:,2:end-1)=='-' | ...
matthiasm@8 193 c(:,2:end-1)=='+');
matthiasm@8 194 t(:,2:end-1)=t(:,2:end-1)|ix1;
matthiasm@8 195 % exponents
matthiasm@8 196 ix1= t(:,1:end-2) & ...
matthiasm@8 197 (c(:,2:end-1)=='e' | ...
matthiasm@8 198 c(:,2:end-1)=='d');
matthiasm@8 199 t(:,2:end-1)=t(:,2:end-1)|ix1;
matthiasm@8 200 % spec numbers
matthiasm@8 201 c=reshape(c.',1,[]);
matthiasm@8 202 t=t';
matthiasm@8 203 ic=[];
matthiasm@8 204 for j=1:numel(ntmpl)
matthiasm@8 205 ic=[ic,strfind(c,ntmpl{j})];
matthiasm@8 206 end
matthiasm@8 207 ic=sort(ic);
matthiasm@8 208 for i=1:numel(ic)
matthiasm@8 209 ix=ic(i)+0:ic(i)+4;
matthiasm@8 210 t(ix)=true;
matthiasm@8 211 end
matthiasm@8 212 t=t';
matthiasm@8 213 c=reshape(c.',[tc,tr]).';
matthiasm@8 214 t(c==' ')=false;
matthiasm@8 215 %--------------------------------------------------------------------------------
matthiasm@8 216
matthiasm@8 217 % - only allow one number per string
matthiasm@8 218 il=~any(t,2);
matthiasm@8 219 ib=strfind(reshape(t.',1,[]),[0 1]);
matthiasm@8 220 if ~isempty(ib)
matthiasm@8 221 ixe=cell(3,1);
matthiasm@8 222 n=reshape(char(t.*c).',1,[]);
matthiasm@8 223 for i=1:numel(ctmpl)
matthiasm@8 224 id=strfind(n,ctmpl{i});
matthiasm@8 225 if ~isempty(id)
matthiasm@8 226 [dum,dum,ixu{i},ixe{i}]=dupinx(id,tc);
matthiasm@8 227 end
matthiasm@8 228 end
matthiasm@8 229 in=false(tr,1);
matthiasm@8 230 im=in;
matthiasm@8 231 % must check for anomalous cases like <'.d'>
matthiasm@8 232 id=sort(...
matthiasm@8 233 [find(n>='0' & n<='9'),...
matthiasm@8 234 strfind(n,'inf'),...
matthiasm@8 235 strfind(n,'nan')]);
matthiasm@8 236 % [ibu,ibd,ixbu,ixe{i+1}]=dupinx(id,tc);
matthiasm@8 237 [ibu,ibd,ixbu,ixbd]=dupinx(id,tc);
matthiasm@8 238 in(ixbu)=true;
matthiasm@8 239 in(ixbd)=true;
matthiasm@8 240 [ibu,ibd,ixbu,ixbd]=dupinx(ib,tc);
matthiasm@8 241 im(ixbu)=true;
matthiasm@8 242 in=in&im;
matthiasm@8 243 in([ixe{:}])=false;
matthiasm@8 244 il=~any(t,2);
matthiasm@8 245 ia=~(in|il);
matthiasm@8 246
matthiasm@8 247 % - read valid strings
matthiasm@8 248 n=t(in,:).*c(in,:);
matthiasm@8 249 n(n==0)=' ';
matthiasm@8 250 n=char(n);
matthiasm@8 251 dn=strread(n.','%n');
matthiasm@8 252 if numel(dn) ~= numel(find(in))
matthiasm@8 253 %disp(sprintf('ASORT> unexpected fatal error reading input!'));
matthiasm@8 254 if nargout
matthiasm@8 255 s.c=c;
matthiasm@8 256 s.t=t;
matthiasm@8 257 s.n=n;
matthiasm@8 258 s.d=dn;
matthiasm@8 259 varargout{1}=s;
matthiasm@8 260 end
matthiasm@8 261 return;
matthiasm@8 262 end
matthiasm@8 263
matthiasm@8 264 % - sort numbers
matthiasm@8 265 [ds,dx]=sort(dn,1,smod);
matthiasm@8 266 in=find(in);
matthiasm@8 267 anr=ins(in(dx));
matthiasm@8 268 snr=ins(ia);
matthiasm@8 269 end
matthiasm@8 270 str=ins(il);
matthiasm@8 271 to=clock;
matthiasm@8 272
matthiasm@8 273 % - prepare output
matthiasm@8 274 if nargout < 3 || sflg
matthiasm@8 275 s.magic='ASORT';
matthiasm@8 276 s.ver='30-Mar-2005 11:57:07';
matthiasm@8 277 s.time=datestr(clock);
matthiasm@8 278 s.runtime=etime(to,ti);
matthiasm@8 279 s.input_class=winp.class;
matthiasm@8 280 s.input_msize=winp.size;
matthiasm@8 281 s.input_bytes=winp.bytes;
matthiasm@8 282 s.strng_class=wins.class;
matthiasm@8 283 s.strng_msize=wins.size;
matthiasm@8 284 s.strng_bytes=wins.bytes;
matthiasm@8 285 s.anr=anr;
matthiasm@8 286 s.snr=snr;
matthiasm@8 287 s.str=str;
matthiasm@8 288 if dflg
matthiasm@8 289 s.c=c;
matthiasm@8 290 s.t=t;
matthiasm@8 291 s.n=n;
matthiasm@8 292 s.d=ds;
matthiasm@8 293 end
matthiasm@8 294 varargout{1}=s;
matthiasm@8 295 else
matthiasm@8 296 s={anr,snr,str};
matthiasm@8 297 for i=1:nargout
matthiasm@8 298 varargout{i}=s{i};
matthiasm@8 299 end
matthiasm@8 300 end
matthiasm@8 301
matthiasm@8 302 if vflg
matthiasm@8 303 inp=cstr(inp);
matthiasm@8 304 an=[{'--- NUMERICAL'}; anr];
matthiasm@8 305 as=[{'--- ASCII NUMBERS'}; snr];
matthiasm@8 306 at=[{'--- ASCII STRINGS'}; str];
matthiasm@8 307 nn=[{'--- NUMBERS'}; num2cell(ds)];
matthiasm@8 308 ag={' ';' ';' '};
matthiasm@8 309 u=[{'INPUT'}; inp;ag];
matthiasm@8 310 v=[{'ASCII SORT'}; ins;ag];
matthiasm@8 311 w=[{'NUM SORT'}; an;as;at];
matthiasm@8 312 x=[{'NUM READ'}; nn;as;at];
matthiasm@8 313 w=[u,v,w,x];
matthiasm@8 314 disp(w);
matthiasm@8 315 end
matthiasm@8 316
matthiasm@8 317 return;
matthiasm@8 318 %--------------------------------------------------------------------------------
matthiasm@8 319 function c=cstr(s)
matthiasm@8 320 % - bottleneck waiting for a good <cellstr> replacement
matthiasm@8 321 % it consumes ~75% of <asort>'s processing time!
matthiasm@8 322
matthiasm@8 323 c=s;
matthiasm@8 324 if ischar(s)
matthiasm@8 325 sr=size(s,1);
matthiasm@8 326 c=cell(sr,1);
matthiasm@8 327 for i=1:sr
matthiasm@8 328 c{i}=s(i,:); % no deblanking!
matthiasm@8 329 end
matthiasm@8 330 end
matthiasm@8 331 return;
matthiasm@8 332 %--------------------------------------------------------------------------------
matthiasm@8 333 function [idu,idd,ixu,ixd]=dupinx(ix,nc)
matthiasm@8 334 % - check for more than one entry/row in a matrix of column size <nc>
matthiasm@8 335 % unique indices: idu / ixu
matthiasm@8 336 % duplicate indices: idd / ixd
matthiasm@8 337
matthiasm@8 338 if isempty(ix)
matthiasm@8 339 idu=[];
matthiasm@8 340 idd=[];
matthiasm@8 341 ixu=[];
matthiasm@8 342 ixd=[];
matthiasm@8 343 return;
matthiasm@8 344 end
matthiasm@8 345 id=fix(ix/nc)+1;
matthiasm@8 346 idi=diff(id)~=0;
matthiasm@8 347 ide=[true idi];
matthiasm@8 348 idb=[idi true];
matthiasm@8 349 idu=idb & ide;
matthiasm@8 350 idd=idb==1 & ide==0;
matthiasm@8 351 ixu=id(idu);
matthiasm@8 352 ixd=id(idd);
matthiasm@8 353 return;
matthiasm@8 354 %--------------------------------------------------------------------------------
matthiasm@8 355 function inp=setinp(inp,tmpl,flg)
matthiasm@8 356 % - remove space(s) and/or templates
matthiasm@8 357
matthiasm@8 358 if isempty(inp) || ~any(flg)
matthiasm@8 359 return;
matthiasm@8 360 end
matthiasm@8 361
matthiasm@8 362 for i=sort(flg)
matthiasm@8 363 switch i
matthiasm@8 364 case flg(1)
matthiasm@8 365 if ischar(tmpl)
matthiasm@8 366 tmpl={tmpl};
matthiasm@8 367 end
matthiasm@8 368 for i=1:numel(tmpl)
matthiasm@8 369 inp=strrep(inp,tmpl{i},' ');
matthiasm@8 370 end
matthiasm@8 371 case flg(2)
matthiasm@8 372 inp=strrep(inp,' ','');
matthiasm@8 373 end
matthiasm@8 374 end
matthiasm@8 375 return;
matthiasm@8 376 %--------------------------------------------------------------------------------