annotate toolboxes/FullBNT-1.0.7/KPMtools/asort.m @ 0:cc4b1211e677 tip

initial commit to HG from Changeset: 646 (e263d8a21543) added further path and more save "camirversion.m"
author Daniel Wolff
date Fri, 19 Aug 2016 13:07:06 +0200
parents
children
rev   line source
Daniel@0 1 %[ANR,SNR,STR] = ASORT(INP,'OPT',...);
Daniel@0 2 % S = ASORT(INP,'OPT',...);
Daniel@0 3 % to sort alphanumeric strings numerically if
Daniel@0 4 % they contain one properly formatted number
Daniel@0 5 % otherwise, ascii dictionary sorting is applied
Daniel@0 6 %
Daniel@0 7 % INP unsorted input:
Daniel@0 8 % - a char array
Daniel@0 9 % - a cell array of strings
Daniel@0 10 % OPT options
Daniel@0 11 % -s - sorting option
Daniel@0 12 % '-s','ascend' [def]
Daniel@0 13 % '-s','descend'
Daniel@0 14 % -st - force output form S [def: nargout dependent]
Daniel@0 15 % -t - replace matching template(s) with one space
Daniel@0 16 % prior to sorting
Daniel@0 17 % '-t','template'
Daniel@0 18 % '-t',{'template1','template2',...}
Daniel@0 19 % -w - remove space(s) prior to sorting
Daniel@0 20 %
Daniel@0 21 % NOTE -t/-w options are processed in the
Daniel@0 22 % order that they appear in
Daniel@0 23 % the command line
Daniel@0 24 %
Daniel@0 25 % -v - verbose output [def: quiet]
Daniel@0 26 % -d - debug mode
Daniel@0 27 % save additional output in S
Daniel@0 28 % .c: lex parser input
Daniel@0 29 % .t: lex parser table
Daniel@0 30 % .n: lex parser output
Daniel@0 31 % .d: numbers read from .n
Daniel@0 32 %
Daniel@0 33 % ANR numerically sorted alphanumeric strings [eg, 'f.-1.5e+2x.x']
Daniel@0 34 % - contain one number that can be read by
Daniel@0 35 % <strread> | <sscanf>
Daniel@0 36 % SNR ascii dict sorted alphanumeric strings
Daniel@0 37 % http://www.mathworks.com/matlabcentral/fileexchange/loadFile.do?objectId=7212#
Daniel@0 38 %
Daniel@0 39 % - contain more than one number [eg, 'f.-1.5e +2.x']
Daniel@0 40 % - contain incomplete|ambiguous numbers [eg, 'f.-1.5e+2.x']
Daniel@0 41 % STR ascii dict sorted strings
Daniel@0 42 % - contain no numbers [eg, 'a test']
Daniel@0 43 %
Daniel@0 44 % S structure with fields
Daniel@0 45 % .anr
Daniel@0 46 % .srn
Daniel@0 47 % .str
Daniel@0 48
Daniel@0 49 % created:
Daniel@0 50 % us 03-Mar-2002
Daniel@0 51 % modified:
Daniel@0 52 % us 30-Mar-2005 11:57:07 / TMW R14.sp2
Daniel@0 53
Daniel@0 54 %--------------------------------------------------------------------------------
Daniel@0 55 function varargout=asort(inp,varargin)
Daniel@0 56
Daniel@0 57 varargout(1:nargout)={[]};
Daniel@0 58 if ~nargin
Daniel@0 59 help(mfilename);
Daniel@0 60 return;
Daniel@0 61 end
Daniel@0 62
Daniel@0 63 % - common parameters/options
Daniel@0 64 n=[];
Daniel@0 65 ds=[];
Daniel@0 66 anr={};
Daniel@0 67 snr={};
Daniel@0 68 str={};
Daniel@0 69 smod='ascend'; % sorting option
Daniel@0 70 tmpl={}; % template(s)
Daniel@0 71 sflg=false; % output mode: structure
Daniel@0 72 tflg=false; % remove template(s)
Daniel@0 73 dflg=false; % debug mode
Daniel@0 74 vflg=false; % verbose output
Daniel@0 75 wflg=false; % remove spaces
Daniel@0 76
Daniel@0 77 if nargin > 1
Daniel@0 78 ix=find(strcmp('-s',varargin));
Daniel@0 79 if ~isempty(ix) && nargin > ix(end)+1
Daniel@0 80 smod=varargin{ix(end)+1};
Daniel@0 81 end
Daniel@0 82 ix=find(strcmp('-t',varargin));
Daniel@0 83 if ~isempty(ix) && nargin > ix(end)+1
Daniel@0 84 tflg=ix(end);
Daniel@0 85 tmpl=varargin{ix(end)+1};
Daniel@0 86 end
Daniel@0 87 if find(strcmp('-d',varargin));
Daniel@0 88 dflg=true;
Daniel@0 89 end
Daniel@0 90 if find(strcmp('-st',varargin));
Daniel@0 91 sflg=true;
Daniel@0 92 end
Daniel@0 93 if find(strcmp('-v',varargin));
Daniel@0 94 vflg=true;
Daniel@0 95 end
Daniel@0 96 ix=find(strcmp('-w',varargin));
Daniel@0 97 if ~isempty(ix)
Daniel@0 98 wflg=ix(end);
Daniel@0 99 end
Daniel@0 100 end
Daniel@0 101 % spec numbers
Daniel@0 102 ntmpl={
Daniel@0 103 ' inf '
Daniel@0 104 '+inf '
Daniel@0 105 '-inf '
Daniel@0 106 ' nan '
Daniel@0 107 '+nan '
Daniel@0 108 '-nan '
Daniel@0 109 };
Daniel@0 110 % spec chars
Daniel@0 111 ctmpl={
Daniel@0 112 '.' % decimal point
Daniel@0 113 'd' % exponent
Daniel@0 114 'e' % exponent
Daniel@0 115 };
Daniel@0 116
Daniel@0 117 if nargout <= 3
Daniel@0 118 varargout{1}=inp;
Daniel@0 119 else
Daniel@0 120 disp(sprintf('ASORT> too many output args [%-1d/%-1d]\n',nargout,3));
Daniel@0 121 help(mfilename);
Daniel@0 122 return;
Daniel@0 123 end
Daniel@0 124 if isempty(inp)
Daniel@0 125 disp(sprintf('ASORT> input is empty'));
Daniel@0 126 return;
Daniel@0 127 end
Daniel@0 128
Daniel@0 129 ti=clock;
Daniel@0 130 winp=whos('inp');
Daniel@0 131 switch winp.class
Daniel@0 132 case 'cell'
Daniel@0 133 if ~iscellstr(inp)
Daniel@0 134 disp(sprintf('ASORT> cell is not an array of strings'));
Daniel@0 135 return;
Daniel@0 136 end
Daniel@0 137 inp=inp(:);
Daniel@0 138 [ins,inx]=sort(inp);
Daniel@0 139 case 'char'
Daniel@0 140 % [ins,inx]=sortrows(inp);
Daniel@0 141 inp=cstr(inp);
Daniel@0 142 otherwise
Daniel@0 143 disp(sprintf('ASORT> does not sort input of class <%s>',winp.class));
Daniel@0 144 return;
Daniel@0 145 end
Daniel@0 146
Daniel@0 147 inp=inp(:);
Daniel@0 148 inp=setinp(inp,tmpl,[tflg wflg]);
Daniel@0 149 [ins,inx]=sort(inp);
Daniel@0 150 if strcmp(smod,'descend')
Daniel@0 151 ins=ins(end:-1:1,:);
Daniel@0 152 inx=inx(end:-1:1);
Daniel@0 153 end
Daniel@0 154 ins=inp(inx);
Daniel@0 155 c=lower(char(ins));
Daniel@0 156 wins=whos('c');
Daniel@0 157 [cr,cc]=size(c);
Daniel@0 158
Daniel@0 159 % - LEXICAL PARSER
Daniel@0 160 %--------------------------------------------------------------------------------
Daniel@0 161 % - extend input on either side for search
Daniel@0 162 c=[' '*ones(cr,2) c ' '*ones(cr,2)];
Daniel@0 163
Daniel@0 164 % - search for valid alphanumeric items in strings
Daniel@0 165 % numbers/signs
Daniel@0 166 t=(c>='0'&c<='9');
Daniel@0 167 t=t|c=='-';
Daniel@0 168 t=t|c=='+';
Daniel@0 169 [tr,tc]=size(t);
Daniel@0 170 % decimal points
Daniel@0 171 % note: valid numbers with dec points must follow these templates
Daniel@0 172 % nr.nr
Daniel@0 173 % sign.nr
Daniel@0 174 % nr.<SPACE>
Daniel@0 175 % <SPACE>.nr
Daniel@0 176 ix1= t(:,1:end-2) & ...
Daniel@0 177 ~isletter(c(:,1:end-2)) & ...
Daniel@0 178 c(:,2:end-1)=='.';
Daniel@0 179 t(:,2:end-1)=t(:,2:end-1)|ix1;
Daniel@0 180 ix1= (t(:,3:end) & ...
Daniel@0 181 (~isletter(c(:,3:end)) & ...
Daniel@0 182 ~isletter(c(:,1:end-2))) | ...
Daniel@0 183 (c(:,3:end)=='e' | ...
Daniel@0 184 c(:,3:end)=='d')) & ...
Daniel@0 185 c(:,2:end-1)=='.';
Daniel@0 186 t(:,2:end-1)=t(:,2:end-1)|ix1;
Daniel@0 187 % t(:,3:end)=t(:,3:end)|ix1;
Daniel@0 188 % signs
Daniel@0 189 t(c=='-')=false;
Daniel@0 190 t(c=='+')=false;
Daniel@0 191 ix1= t(:,3:end) & ...
Daniel@0 192 (c(:,2:end-1)=='-' | ...
Daniel@0 193 c(:,2:end-1)=='+');
Daniel@0 194 t(:,2:end-1)=t(:,2:end-1)|ix1;
Daniel@0 195 % exponents
Daniel@0 196 ix1= t(:,1:end-2) & ...
Daniel@0 197 (c(:,2:end-1)=='e' | ...
Daniel@0 198 c(:,2:end-1)=='d');
Daniel@0 199 t(:,2:end-1)=t(:,2:end-1)|ix1;
Daniel@0 200 % spec numbers
Daniel@0 201 c=reshape(c.',1,[]);
Daniel@0 202 t=t';
Daniel@0 203 ic=[];
Daniel@0 204 for j=1:numel(ntmpl)
Daniel@0 205 ic=[ic,strfind(c,ntmpl{j})];
Daniel@0 206 end
Daniel@0 207 ic=sort(ic);
Daniel@0 208 for i=1:numel(ic)
Daniel@0 209 ix=ic(i)+0:ic(i)+4;
Daniel@0 210 t(ix)=true;
Daniel@0 211 end
Daniel@0 212 t=t';
Daniel@0 213 c=reshape(c.',[tc,tr]).';
Daniel@0 214 t(c==' ')=false;
Daniel@0 215 %--------------------------------------------------------------------------------
Daniel@0 216
Daniel@0 217 % - only allow one number per string
Daniel@0 218 il=~any(t,2);
Daniel@0 219 ib=strfind(reshape(t.',1,[]),[0 1]);
Daniel@0 220 if ~isempty(ib)
Daniel@0 221 ixe=cell(3,1);
Daniel@0 222 n=reshape(char(t.*c).',1,[]);
Daniel@0 223 for i=1:numel(ctmpl)
Daniel@0 224 id=strfind(n,ctmpl{i});
Daniel@0 225 if ~isempty(id)
Daniel@0 226 [dum,dum,ixu{i},ixe{i}]=dupinx(id,tc);
Daniel@0 227 end
Daniel@0 228 end
Daniel@0 229 in=false(tr,1);
Daniel@0 230 im=in;
Daniel@0 231 % must check for anomalous cases like <'.d'>
Daniel@0 232 id=sort(...
Daniel@0 233 [find(n>='0' & n<='9'),...
Daniel@0 234 strfind(n,'inf'),...
Daniel@0 235 strfind(n,'nan')]);
Daniel@0 236 % [ibu,ibd,ixbu,ixe{i+1}]=dupinx(id,tc);
Daniel@0 237 [ibu,ibd,ixbu,ixbd]=dupinx(id,tc);
Daniel@0 238 in(ixbu)=true;
Daniel@0 239 in(ixbd)=true;
Daniel@0 240 [ibu,ibd,ixbu,ixbd]=dupinx(ib,tc);
Daniel@0 241 im(ixbu)=true;
Daniel@0 242 in=in&im;
Daniel@0 243 in([ixe{:}])=false;
Daniel@0 244 il=~any(t,2);
Daniel@0 245 ia=~(in|il);
Daniel@0 246
Daniel@0 247 % - read valid strings
Daniel@0 248 n=t(in,:).*c(in,:);
Daniel@0 249 n(n==0)=' ';
Daniel@0 250 n=char(n);
Daniel@0 251 dn=strread(n.','%n');
Daniel@0 252 if numel(dn) ~= numel(find(in))
Daniel@0 253 %disp(sprintf('ASORT> unexpected fatal error reading input!'));
Daniel@0 254 if nargout
Daniel@0 255 s.c=c;
Daniel@0 256 s.t=t;
Daniel@0 257 s.n=n;
Daniel@0 258 s.d=dn;
Daniel@0 259 varargout{1}=s;
Daniel@0 260 end
Daniel@0 261 return;
Daniel@0 262 end
Daniel@0 263
Daniel@0 264 % - sort numbers
Daniel@0 265 [ds,dx]=sort(dn,1,smod);
Daniel@0 266 in=find(in);
Daniel@0 267 anr=ins(in(dx));
Daniel@0 268 snr=ins(ia);
Daniel@0 269 end
Daniel@0 270 str=ins(il);
Daniel@0 271 to=clock;
Daniel@0 272
Daniel@0 273 % - prepare output
Daniel@0 274 if nargout < 3 || sflg
Daniel@0 275 s.magic='ASORT';
Daniel@0 276 s.ver='30-Mar-2005 11:57:07';
Daniel@0 277 s.time=datestr(clock);
Daniel@0 278 s.runtime=etime(to,ti);
Daniel@0 279 s.input_class=winp.class;
Daniel@0 280 s.input_msize=winp.size;
Daniel@0 281 s.input_bytes=winp.bytes;
Daniel@0 282 s.strng_class=wins.class;
Daniel@0 283 s.strng_msize=wins.size;
Daniel@0 284 s.strng_bytes=wins.bytes;
Daniel@0 285 s.anr=anr;
Daniel@0 286 s.snr=snr;
Daniel@0 287 s.str=str;
Daniel@0 288 if dflg
Daniel@0 289 s.c=c;
Daniel@0 290 s.t=t;
Daniel@0 291 s.n=n;
Daniel@0 292 s.d=ds;
Daniel@0 293 end
Daniel@0 294 varargout{1}=s;
Daniel@0 295 else
Daniel@0 296 s={anr,snr,str};
Daniel@0 297 for i=1:nargout
Daniel@0 298 varargout{i}=s{i};
Daniel@0 299 end
Daniel@0 300 end
Daniel@0 301
Daniel@0 302 if vflg
Daniel@0 303 inp=cstr(inp);
Daniel@0 304 an=[{'--- NUMERICAL'}; anr];
Daniel@0 305 as=[{'--- ASCII NUMBERS'}; snr];
Daniel@0 306 at=[{'--- ASCII STRINGS'}; str];
Daniel@0 307 nn=[{'--- NUMBERS'}; num2cell(ds)];
Daniel@0 308 ag={' ';' ';' '};
Daniel@0 309 u=[{'INPUT'}; inp;ag];
Daniel@0 310 v=[{'ASCII SORT'}; ins;ag];
Daniel@0 311 w=[{'NUM SORT'}; an;as;at];
Daniel@0 312 x=[{'NUM READ'}; nn;as;at];
Daniel@0 313 w=[u,v,w,x];
Daniel@0 314 disp(w);
Daniel@0 315 end
Daniel@0 316
Daniel@0 317 return;
Daniel@0 318 %--------------------------------------------------------------------------------
Daniel@0 319 function c=cstr(s)
Daniel@0 320 % - bottleneck waiting for a good <cellstr> replacement
Daniel@0 321 % it consumes ~75% of <asort>'s processing time!
Daniel@0 322
Daniel@0 323 c=s;
Daniel@0 324 if ischar(s)
Daniel@0 325 sr=size(s,1);
Daniel@0 326 c=cell(sr,1);
Daniel@0 327 for i=1:sr
Daniel@0 328 c{i}=s(i,:); % no deblanking!
Daniel@0 329 end
Daniel@0 330 end
Daniel@0 331 return;
Daniel@0 332 %--------------------------------------------------------------------------------
Daniel@0 333 function [idu,idd,ixu,ixd]=dupinx(ix,nc)
Daniel@0 334 % - check for more than one entry/row in a matrix of column size <nc>
Daniel@0 335 % unique indices: idu / ixu
Daniel@0 336 % duplicate indices: idd / ixd
Daniel@0 337
Daniel@0 338 if isempty(ix)
Daniel@0 339 idu=[];
Daniel@0 340 idd=[];
Daniel@0 341 ixu=[];
Daniel@0 342 ixd=[];
Daniel@0 343 return;
Daniel@0 344 end
Daniel@0 345 id=fix(ix/nc)+1;
Daniel@0 346 idi=diff(id)~=0;
Daniel@0 347 ide=[true idi];
Daniel@0 348 idb=[idi true];
Daniel@0 349 idu=idb & ide;
Daniel@0 350 idd=idb==1 & ide==0;
Daniel@0 351 ixu=id(idu);
Daniel@0 352 ixd=id(idd);
Daniel@0 353 return;
Daniel@0 354 %--------------------------------------------------------------------------------
Daniel@0 355 function inp=setinp(inp,tmpl,flg)
Daniel@0 356 % - remove space(s) and/or templates
Daniel@0 357
Daniel@0 358 if isempty(inp) || ~any(flg)
Daniel@0 359 return;
Daniel@0 360 end
Daniel@0 361
Daniel@0 362 for i=sort(flg)
Daniel@0 363 switch i
Daniel@0 364 case flg(1)
Daniel@0 365 if ischar(tmpl)
Daniel@0 366 tmpl={tmpl};
Daniel@0 367 end
Daniel@0 368 for i=1:numel(tmpl)
Daniel@0 369 inp=strrep(inp,tmpl{i},' ');
Daniel@0 370 end
Daniel@0 371 case flg(2)
Daniel@0 372 inp=strrep(inp,' ','');
Daniel@0 373 end
Daniel@0 374 end
Daniel@0 375 return;
Daniel@0 376 %--------------------------------------------------------------------------------