annotate toolboxes/FullBNT-1.0.7/KPMtools/asort.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
rev   line source
wolffd@0 1 %[ANR,SNR,STR] = ASORT(INP,'OPT',...);
wolffd@0 2 % S = ASORT(INP,'OPT',...);
wolffd@0 3 % to sort alphanumeric strings numerically if
wolffd@0 4 % they contain one properly formatted number
wolffd@0 5 % otherwise, ascii dictionary sorting is applied
wolffd@0 6 %
wolffd@0 7 % INP unsorted input:
wolffd@0 8 % - a char array
wolffd@0 9 % - a cell array of strings
wolffd@0 10 % OPT options
wolffd@0 11 % -s - sorting option
wolffd@0 12 % '-s','ascend' [def]
wolffd@0 13 % '-s','descend'
wolffd@0 14 % -st - force output form S [def: nargout dependent]
wolffd@0 15 % -t - replace matching template(s) with one space
wolffd@0 16 % prior to sorting
wolffd@0 17 % '-t','template'
wolffd@0 18 % '-t',{'template1','template2',...}
wolffd@0 19 % -w - remove space(s) prior to sorting
wolffd@0 20 %
wolffd@0 21 % NOTE -t/-w options are processed in the
wolffd@0 22 % order that they appear in
wolffd@0 23 % the command line
wolffd@0 24 %
wolffd@0 25 % -v - verbose output [def: quiet]
wolffd@0 26 % -d - debug mode
wolffd@0 27 % save additional output in S
wolffd@0 28 % .c: lex parser input
wolffd@0 29 % .t: lex parser table
wolffd@0 30 % .n: lex parser output
wolffd@0 31 % .d: numbers read from .n
wolffd@0 32 %
wolffd@0 33 % ANR numerically sorted alphanumeric strings [eg, 'f.-1.5e+2x.x']
wolffd@0 34 % - contain one number that can be read by
wolffd@0 35 % <strread> | <sscanf>
wolffd@0 36 % SNR ascii dict sorted alphanumeric strings
wolffd@0 37 % http://www.mathworks.com/matlabcentral/fileexchange/loadFile.do?objectId=7212#
wolffd@0 38 %
wolffd@0 39 % - contain more than one number [eg, 'f.-1.5e +2.x']
wolffd@0 40 % - contain incomplete|ambiguous numbers [eg, 'f.-1.5e+2.x']
wolffd@0 41 % STR ascii dict sorted strings
wolffd@0 42 % - contain no numbers [eg, 'a test']
wolffd@0 43 %
wolffd@0 44 % S structure with fields
wolffd@0 45 % .anr
wolffd@0 46 % .srn
wolffd@0 47 % .str
wolffd@0 48
wolffd@0 49 % created:
wolffd@0 50 % us 03-Mar-2002
wolffd@0 51 % modified:
wolffd@0 52 % us 30-Mar-2005 11:57:07 / TMW R14.sp2
wolffd@0 53
wolffd@0 54 %--------------------------------------------------------------------------------
wolffd@0 55 function varargout=asort(inp,varargin)
wolffd@0 56
wolffd@0 57 varargout(1:nargout)={[]};
wolffd@0 58 if ~nargin
wolffd@0 59 help(mfilename);
wolffd@0 60 return;
wolffd@0 61 end
wolffd@0 62
wolffd@0 63 % - common parameters/options
wolffd@0 64 n=[];
wolffd@0 65 ds=[];
wolffd@0 66 anr={};
wolffd@0 67 snr={};
wolffd@0 68 str={};
wolffd@0 69 smod='ascend'; % sorting option
wolffd@0 70 tmpl={}; % template(s)
wolffd@0 71 sflg=false; % output mode: structure
wolffd@0 72 tflg=false; % remove template(s)
wolffd@0 73 dflg=false; % debug mode
wolffd@0 74 vflg=false; % verbose output
wolffd@0 75 wflg=false; % remove spaces
wolffd@0 76
wolffd@0 77 if nargin > 1
wolffd@0 78 ix=find(strcmp('-s',varargin));
wolffd@0 79 if ~isempty(ix) && nargin > ix(end)+1
wolffd@0 80 smod=varargin{ix(end)+1};
wolffd@0 81 end
wolffd@0 82 ix=find(strcmp('-t',varargin));
wolffd@0 83 if ~isempty(ix) && nargin > ix(end)+1
wolffd@0 84 tflg=ix(end);
wolffd@0 85 tmpl=varargin{ix(end)+1};
wolffd@0 86 end
wolffd@0 87 if find(strcmp('-d',varargin));
wolffd@0 88 dflg=true;
wolffd@0 89 end
wolffd@0 90 if find(strcmp('-st',varargin));
wolffd@0 91 sflg=true;
wolffd@0 92 end
wolffd@0 93 if find(strcmp('-v',varargin));
wolffd@0 94 vflg=true;
wolffd@0 95 end
wolffd@0 96 ix=find(strcmp('-w',varargin));
wolffd@0 97 if ~isempty(ix)
wolffd@0 98 wflg=ix(end);
wolffd@0 99 end
wolffd@0 100 end
wolffd@0 101 % spec numbers
wolffd@0 102 ntmpl={
wolffd@0 103 ' inf '
wolffd@0 104 '+inf '
wolffd@0 105 '-inf '
wolffd@0 106 ' nan '
wolffd@0 107 '+nan '
wolffd@0 108 '-nan '
wolffd@0 109 };
wolffd@0 110 % spec chars
wolffd@0 111 ctmpl={
wolffd@0 112 '.' % decimal point
wolffd@0 113 'd' % exponent
wolffd@0 114 'e' % exponent
wolffd@0 115 };
wolffd@0 116
wolffd@0 117 if nargout <= 3
wolffd@0 118 varargout{1}=inp;
wolffd@0 119 else
wolffd@0 120 disp(sprintf('ASORT> too many output args [%-1d/%-1d]\n',nargout,3));
wolffd@0 121 help(mfilename);
wolffd@0 122 return;
wolffd@0 123 end
wolffd@0 124 if isempty(inp)
wolffd@0 125 disp(sprintf('ASORT> input is empty'));
wolffd@0 126 return;
wolffd@0 127 end
wolffd@0 128
wolffd@0 129 ti=clock;
wolffd@0 130 winp=whos('inp');
wolffd@0 131 switch winp.class
wolffd@0 132 case 'cell'
wolffd@0 133 if ~iscellstr(inp)
wolffd@0 134 disp(sprintf('ASORT> cell is not an array of strings'));
wolffd@0 135 return;
wolffd@0 136 end
wolffd@0 137 inp=inp(:);
wolffd@0 138 [ins,inx]=sort(inp);
wolffd@0 139 case 'char'
wolffd@0 140 % [ins,inx]=sortrows(inp);
wolffd@0 141 inp=cstr(inp);
wolffd@0 142 otherwise
wolffd@0 143 disp(sprintf('ASORT> does not sort input of class <%s>',winp.class));
wolffd@0 144 return;
wolffd@0 145 end
wolffd@0 146
wolffd@0 147 inp=inp(:);
wolffd@0 148 inp=setinp(inp,tmpl,[tflg wflg]);
wolffd@0 149 [ins,inx]=sort(inp);
wolffd@0 150 if strcmp(smod,'descend')
wolffd@0 151 ins=ins(end:-1:1,:);
wolffd@0 152 inx=inx(end:-1:1);
wolffd@0 153 end
wolffd@0 154 ins=inp(inx);
wolffd@0 155 c=lower(char(ins));
wolffd@0 156 wins=whos('c');
wolffd@0 157 [cr,cc]=size(c);
wolffd@0 158
wolffd@0 159 % - LEXICAL PARSER
wolffd@0 160 %--------------------------------------------------------------------------------
wolffd@0 161 % - extend input on either side for search
wolffd@0 162 c=[' '*ones(cr,2) c ' '*ones(cr,2)];
wolffd@0 163
wolffd@0 164 % - search for valid alphanumeric items in strings
wolffd@0 165 % numbers/signs
wolffd@0 166 t=(c>='0'&c<='9');
wolffd@0 167 t=t|c=='-';
wolffd@0 168 t=t|c=='+';
wolffd@0 169 [tr,tc]=size(t);
wolffd@0 170 % decimal points
wolffd@0 171 % note: valid numbers with dec points must follow these templates
wolffd@0 172 % nr.nr
wolffd@0 173 % sign.nr
wolffd@0 174 % nr.<SPACE>
wolffd@0 175 % <SPACE>.nr
wolffd@0 176 ix1= t(:,1:end-2) & ...
wolffd@0 177 ~isletter(c(:,1:end-2)) & ...
wolffd@0 178 c(:,2:end-1)=='.';
wolffd@0 179 t(:,2:end-1)=t(:,2:end-1)|ix1;
wolffd@0 180 ix1= (t(:,3:end) & ...
wolffd@0 181 (~isletter(c(:,3:end)) & ...
wolffd@0 182 ~isletter(c(:,1:end-2))) | ...
wolffd@0 183 (c(:,3:end)=='e' | ...
wolffd@0 184 c(:,3:end)=='d')) & ...
wolffd@0 185 c(:,2:end-1)=='.';
wolffd@0 186 t(:,2:end-1)=t(:,2:end-1)|ix1;
wolffd@0 187 % t(:,3:end)=t(:,3:end)|ix1;
wolffd@0 188 % signs
wolffd@0 189 t(c=='-')=false;
wolffd@0 190 t(c=='+')=false;
wolffd@0 191 ix1= t(:,3:end) & ...
wolffd@0 192 (c(:,2:end-1)=='-' | ...
wolffd@0 193 c(:,2:end-1)=='+');
wolffd@0 194 t(:,2:end-1)=t(:,2:end-1)|ix1;
wolffd@0 195 % exponents
wolffd@0 196 ix1= t(:,1:end-2) & ...
wolffd@0 197 (c(:,2:end-1)=='e' | ...
wolffd@0 198 c(:,2:end-1)=='d');
wolffd@0 199 t(:,2:end-1)=t(:,2:end-1)|ix1;
wolffd@0 200 % spec numbers
wolffd@0 201 c=reshape(c.',1,[]);
wolffd@0 202 t=t';
wolffd@0 203 ic=[];
wolffd@0 204 for j=1:numel(ntmpl)
wolffd@0 205 ic=[ic,strfind(c,ntmpl{j})];
wolffd@0 206 end
wolffd@0 207 ic=sort(ic);
wolffd@0 208 for i=1:numel(ic)
wolffd@0 209 ix=ic(i)+0:ic(i)+4;
wolffd@0 210 t(ix)=true;
wolffd@0 211 end
wolffd@0 212 t=t';
wolffd@0 213 c=reshape(c.',[tc,tr]).';
wolffd@0 214 t(c==' ')=false;
wolffd@0 215 %--------------------------------------------------------------------------------
wolffd@0 216
wolffd@0 217 % - only allow one number per string
wolffd@0 218 il=~any(t,2);
wolffd@0 219 ib=strfind(reshape(t.',1,[]),[0 1]);
wolffd@0 220 if ~isempty(ib)
wolffd@0 221 ixe=cell(3,1);
wolffd@0 222 n=reshape(char(t.*c).',1,[]);
wolffd@0 223 for i=1:numel(ctmpl)
wolffd@0 224 id=strfind(n,ctmpl{i});
wolffd@0 225 if ~isempty(id)
wolffd@0 226 [dum,dum,ixu{i},ixe{i}]=dupinx(id,tc);
wolffd@0 227 end
wolffd@0 228 end
wolffd@0 229 in=false(tr,1);
wolffd@0 230 im=in;
wolffd@0 231 % must check for anomalous cases like <'.d'>
wolffd@0 232 id=sort(...
wolffd@0 233 [find(n>='0' & n<='9'),...
wolffd@0 234 strfind(n,'inf'),...
wolffd@0 235 strfind(n,'nan')]);
wolffd@0 236 % [ibu,ibd,ixbu,ixe{i+1}]=dupinx(id,tc);
wolffd@0 237 [ibu,ibd,ixbu,ixbd]=dupinx(id,tc);
wolffd@0 238 in(ixbu)=true;
wolffd@0 239 in(ixbd)=true;
wolffd@0 240 [ibu,ibd,ixbu,ixbd]=dupinx(ib,tc);
wolffd@0 241 im(ixbu)=true;
wolffd@0 242 in=in&im;
wolffd@0 243 in([ixe{:}])=false;
wolffd@0 244 il=~any(t,2);
wolffd@0 245 ia=~(in|il);
wolffd@0 246
wolffd@0 247 % - read valid strings
wolffd@0 248 n=t(in,:).*c(in,:);
wolffd@0 249 n(n==0)=' ';
wolffd@0 250 n=char(n);
wolffd@0 251 dn=strread(n.','%n');
wolffd@0 252 if numel(dn) ~= numel(find(in))
wolffd@0 253 %disp(sprintf('ASORT> unexpected fatal error reading input!'));
wolffd@0 254 if nargout
wolffd@0 255 s.c=c;
wolffd@0 256 s.t=t;
wolffd@0 257 s.n=n;
wolffd@0 258 s.d=dn;
wolffd@0 259 varargout{1}=s;
wolffd@0 260 end
wolffd@0 261 return;
wolffd@0 262 end
wolffd@0 263
wolffd@0 264 % - sort numbers
wolffd@0 265 [ds,dx]=sort(dn,1,smod);
wolffd@0 266 in=find(in);
wolffd@0 267 anr=ins(in(dx));
wolffd@0 268 snr=ins(ia);
wolffd@0 269 end
wolffd@0 270 str=ins(il);
wolffd@0 271 to=clock;
wolffd@0 272
wolffd@0 273 % - prepare output
wolffd@0 274 if nargout < 3 || sflg
wolffd@0 275 s.magic='ASORT';
wolffd@0 276 s.ver='30-Mar-2005 11:57:07';
wolffd@0 277 s.time=datestr(clock);
wolffd@0 278 s.runtime=etime(to,ti);
wolffd@0 279 s.input_class=winp.class;
wolffd@0 280 s.input_msize=winp.size;
wolffd@0 281 s.input_bytes=winp.bytes;
wolffd@0 282 s.strng_class=wins.class;
wolffd@0 283 s.strng_msize=wins.size;
wolffd@0 284 s.strng_bytes=wins.bytes;
wolffd@0 285 s.anr=anr;
wolffd@0 286 s.snr=snr;
wolffd@0 287 s.str=str;
wolffd@0 288 if dflg
wolffd@0 289 s.c=c;
wolffd@0 290 s.t=t;
wolffd@0 291 s.n=n;
wolffd@0 292 s.d=ds;
wolffd@0 293 end
wolffd@0 294 varargout{1}=s;
wolffd@0 295 else
wolffd@0 296 s={anr,snr,str};
wolffd@0 297 for i=1:nargout
wolffd@0 298 varargout{i}=s{i};
wolffd@0 299 end
wolffd@0 300 end
wolffd@0 301
wolffd@0 302 if vflg
wolffd@0 303 inp=cstr(inp);
wolffd@0 304 an=[{'--- NUMERICAL'}; anr];
wolffd@0 305 as=[{'--- ASCII NUMBERS'}; snr];
wolffd@0 306 at=[{'--- ASCII STRINGS'}; str];
wolffd@0 307 nn=[{'--- NUMBERS'}; num2cell(ds)];
wolffd@0 308 ag={' ';' ';' '};
wolffd@0 309 u=[{'INPUT'}; inp;ag];
wolffd@0 310 v=[{'ASCII SORT'}; ins;ag];
wolffd@0 311 w=[{'NUM SORT'}; an;as;at];
wolffd@0 312 x=[{'NUM READ'}; nn;as;at];
wolffd@0 313 w=[u,v,w,x];
wolffd@0 314 disp(w);
wolffd@0 315 end
wolffd@0 316
wolffd@0 317 return;
wolffd@0 318 %--------------------------------------------------------------------------------
wolffd@0 319 function c=cstr(s)
wolffd@0 320 % - bottleneck waiting for a good <cellstr> replacement
wolffd@0 321 % it consumes ~75% of <asort>'s processing time!
wolffd@0 322
wolffd@0 323 c=s;
wolffd@0 324 if ischar(s)
wolffd@0 325 sr=size(s,1);
wolffd@0 326 c=cell(sr,1);
wolffd@0 327 for i=1:sr
wolffd@0 328 c{i}=s(i,:); % no deblanking!
wolffd@0 329 end
wolffd@0 330 end
wolffd@0 331 return;
wolffd@0 332 %--------------------------------------------------------------------------------
wolffd@0 333 function [idu,idd,ixu,ixd]=dupinx(ix,nc)
wolffd@0 334 % - check for more than one entry/row in a matrix of column size <nc>
wolffd@0 335 % unique indices: idu / ixu
wolffd@0 336 % duplicate indices: idd / ixd
wolffd@0 337
wolffd@0 338 if isempty(ix)
wolffd@0 339 idu=[];
wolffd@0 340 idd=[];
wolffd@0 341 ixu=[];
wolffd@0 342 ixd=[];
wolffd@0 343 return;
wolffd@0 344 end
wolffd@0 345 id=fix(ix/nc)+1;
wolffd@0 346 idi=diff(id)~=0;
wolffd@0 347 ide=[true idi];
wolffd@0 348 idb=[idi true];
wolffd@0 349 idu=idb & ide;
wolffd@0 350 idd=idb==1 & ide==0;
wolffd@0 351 ixu=id(idu);
wolffd@0 352 ixd=id(idd);
wolffd@0 353 return;
wolffd@0 354 %--------------------------------------------------------------------------------
wolffd@0 355 function inp=setinp(inp,tmpl,flg)
wolffd@0 356 % - remove space(s) and/or templates
wolffd@0 357
wolffd@0 358 if isempty(inp) || ~any(flg)
wolffd@0 359 return;
wolffd@0 360 end
wolffd@0 361
wolffd@0 362 for i=sort(flg)
wolffd@0 363 switch i
wolffd@0 364 case flg(1)
wolffd@0 365 if ischar(tmpl)
wolffd@0 366 tmpl={tmpl};
wolffd@0 367 end
wolffd@0 368 for i=1:numel(tmpl)
wolffd@0 369 inp=strrep(inp,tmpl{i},' ');
wolffd@0 370 end
wolffd@0 371 case flg(2)
wolffd@0 372 inp=strrep(inp,' ','');
wolffd@0 373 end
wolffd@0 374 end
wolffd@0 375 return;
wolffd@0 376 %--------------------------------------------------------------------------------