comparison toolboxes/FullBNT-1.0.7/KPMtools/asort.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e9a9cd732c1e
1 %[ANR,SNR,STR] = ASORT(INP,'OPT',...);
2 % S = ASORT(INP,'OPT',...);
3 % to sort alphanumeric strings numerically if
4 % they contain one properly formatted number
5 % otherwise, ascii dictionary sorting is applied
6 %
7 % INP unsorted input:
8 % - a char array
9 % - a cell array of strings
10 % OPT options
11 % -s - sorting option
12 % '-s','ascend' [def]
13 % '-s','descend'
14 % -st - force output form S [def: nargout dependent]
15 % -t - replace matching template(s) with one space
16 % prior to sorting
17 % '-t','template'
18 % '-t',{'template1','template2',...}
19 % -w - remove space(s) prior to sorting
20 %
21 % NOTE -t/-w options are processed in the
22 % order that they appear in
23 % the command line
24 %
25 % -v - verbose output [def: quiet]
26 % -d - debug mode
27 % save additional output in S
28 % .c: lex parser input
29 % .t: lex parser table
30 % .n: lex parser output
31 % .d: numbers read from .n
32 %
33 % ANR numerically sorted alphanumeric strings [eg, 'f.-1.5e+2x.x']
34 % - contain one number that can be read by
35 % <strread> | <sscanf>
36 % SNR ascii dict sorted alphanumeric strings
37 % http://www.mathworks.com/matlabcentral/fileexchange/loadFile.do?objectId=7212#
38 %
39 % - contain more than one number [eg, 'f.-1.5e +2.x']
40 % - contain incomplete|ambiguous numbers [eg, 'f.-1.5e+2.x']
41 % STR ascii dict sorted strings
42 % - contain no numbers [eg, 'a test']
43 %
44 % S structure with fields
45 % .anr
46 % .srn
47 % .str
48
49 % created:
50 % us 03-Mar-2002
51 % modified:
52 % us 30-Mar-2005 11:57:07 / TMW R14.sp2
53
54 %--------------------------------------------------------------------------------
55 function varargout=asort(inp,varargin)
56
57 varargout(1:nargout)={[]};
58 if ~nargin
59 help(mfilename);
60 return;
61 end
62
63 % - common parameters/options
64 n=[];
65 ds=[];
66 anr={};
67 snr={};
68 str={};
69 smod='ascend'; % sorting option
70 tmpl={}; % template(s)
71 sflg=false; % output mode: structure
72 tflg=false; % remove template(s)
73 dflg=false; % debug mode
74 vflg=false; % verbose output
75 wflg=false; % remove spaces
76
77 if nargin > 1
78 ix=find(strcmp('-s',varargin));
79 if ~isempty(ix) && nargin > ix(end)+1
80 smod=varargin{ix(end)+1};
81 end
82 ix=find(strcmp('-t',varargin));
83 if ~isempty(ix) && nargin > ix(end)+1
84 tflg=ix(end);
85 tmpl=varargin{ix(end)+1};
86 end
87 if find(strcmp('-d',varargin));
88 dflg=true;
89 end
90 if find(strcmp('-st',varargin));
91 sflg=true;
92 end
93 if find(strcmp('-v',varargin));
94 vflg=true;
95 end
96 ix=find(strcmp('-w',varargin));
97 if ~isempty(ix)
98 wflg=ix(end);
99 end
100 end
101 % spec numbers
102 ntmpl={
103 ' inf '
104 '+inf '
105 '-inf '
106 ' nan '
107 '+nan '
108 '-nan '
109 };
110 % spec chars
111 ctmpl={
112 '.' % decimal point
113 'd' % exponent
114 'e' % exponent
115 };
116
117 if nargout <= 3
118 varargout{1}=inp;
119 else
120 disp(sprintf('ASORT> too many output args [%-1d/%-1d]\n',nargout,3));
121 help(mfilename);
122 return;
123 end
124 if isempty(inp)
125 disp(sprintf('ASORT> input is empty'));
126 return;
127 end
128
129 ti=clock;
130 winp=whos('inp');
131 switch winp.class
132 case 'cell'
133 if ~iscellstr(inp)
134 disp(sprintf('ASORT> cell is not an array of strings'));
135 return;
136 end
137 inp=inp(:);
138 [ins,inx]=sort(inp);
139 case 'char'
140 % [ins,inx]=sortrows(inp);
141 inp=cstr(inp);
142 otherwise
143 disp(sprintf('ASORT> does not sort input of class <%s>',winp.class));
144 return;
145 end
146
147 inp=inp(:);
148 inp=setinp(inp,tmpl,[tflg wflg]);
149 [ins,inx]=sort(inp);
150 if strcmp(smod,'descend')
151 ins=ins(end:-1:1,:);
152 inx=inx(end:-1:1);
153 end
154 ins=inp(inx);
155 c=lower(char(ins));
156 wins=whos('c');
157 [cr,cc]=size(c);
158
159 % - LEXICAL PARSER
160 %--------------------------------------------------------------------------------
161 % - extend input on either side for search
162 c=[' '*ones(cr,2) c ' '*ones(cr,2)];
163
164 % - search for valid alphanumeric items in strings
165 % numbers/signs
166 t=(c>='0'&c<='9');
167 t=t|c=='-';
168 t=t|c=='+';
169 [tr,tc]=size(t);
170 % decimal points
171 % note: valid numbers with dec points must follow these templates
172 % nr.nr
173 % sign.nr
174 % nr.<SPACE>
175 % <SPACE>.nr
176 ix1= t(:,1:end-2) & ...
177 ~isletter(c(:,1:end-2)) & ...
178 c(:,2:end-1)=='.';
179 t(:,2:end-1)=t(:,2:end-1)|ix1;
180 ix1= (t(:,3:end) & ...
181 (~isletter(c(:,3:end)) & ...
182 ~isletter(c(:,1:end-2))) | ...
183 (c(:,3:end)=='e' | ...
184 c(:,3:end)=='d')) & ...
185 c(:,2:end-1)=='.';
186 t(:,2:end-1)=t(:,2:end-1)|ix1;
187 % t(:,3:end)=t(:,3:end)|ix1;
188 % signs
189 t(c=='-')=false;
190 t(c=='+')=false;
191 ix1= t(:,3:end) & ...
192 (c(:,2:end-1)=='-' | ...
193 c(:,2:end-1)=='+');
194 t(:,2:end-1)=t(:,2:end-1)|ix1;
195 % exponents
196 ix1= t(:,1:end-2) & ...
197 (c(:,2:end-1)=='e' | ...
198 c(:,2:end-1)=='d');
199 t(:,2:end-1)=t(:,2:end-1)|ix1;
200 % spec numbers
201 c=reshape(c.',1,[]);
202 t=t';
203 ic=[];
204 for j=1:numel(ntmpl)
205 ic=[ic,strfind(c,ntmpl{j})];
206 end
207 ic=sort(ic);
208 for i=1:numel(ic)
209 ix=ic(i)+0:ic(i)+4;
210 t(ix)=true;
211 end
212 t=t';
213 c=reshape(c.',[tc,tr]).';
214 t(c==' ')=false;
215 %--------------------------------------------------------------------------------
216
217 % - only allow one number per string
218 il=~any(t,2);
219 ib=strfind(reshape(t.',1,[]),[0 1]);
220 if ~isempty(ib)
221 ixe=cell(3,1);
222 n=reshape(char(t.*c).',1,[]);
223 for i=1:numel(ctmpl)
224 id=strfind(n,ctmpl{i});
225 if ~isempty(id)
226 [dum,dum,ixu{i},ixe{i}]=dupinx(id,tc);
227 end
228 end
229 in=false(tr,1);
230 im=in;
231 % must check for anomalous cases like <'.d'>
232 id=sort(...
233 [find(n>='0' & n<='9'),...
234 strfind(n,'inf'),...
235 strfind(n,'nan')]);
236 % [ibu,ibd,ixbu,ixe{i+1}]=dupinx(id,tc);
237 [ibu,ibd,ixbu,ixbd]=dupinx(id,tc);
238 in(ixbu)=true;
239 in(ixbd)=true;
240 [ibu,ibd,ixbu,ixbd]=dupinx(ib,tc);
241 im(ixbu)=true;
242 in=in&im;
243 in([ixe{:}])=false;
244 il=~any(t,2);
245 ia=~(in|il);
246
247 % - read valid strings
248 n=t(in,:).*c(in,:);
249 n(n==0)=' ';
250 n=char(n);
251 dn=strread(n.','%n');
252 if numel(dn) ~= numel(find(in))
253 %disp(sprintf('ASORT> unexpected fatal error reading input!'));
254 if nargout
255 s.c=c;
256 s.t=t;
257 s.n=n;
258 s.d=dn;
259 varargout{1}=s;
260 end
261 return;
262 end
263
264 % - sort numbers
265 [ds,dx]=sort(dn,1,smod);
266 in=find(in);
267 anr=ins(in(dx));
268 snr=ins(ia);
269 end
270 str=ins(il);
271 to=clock;
272
273 % - prepare output
274 if nargout < 3 || sflg
275 s.magic='ASORT';
276 s.ver='30-Mar-2005 11:57:07';
277 s.time=datestr(clock);
278 s.runtime=etime(to,ti);
279 s.input_class=winp.class;
280 s.input_msize=winp.size;
281 s.input_bytes=winp.bytes;
282 s.strng_class=wins.class;
283 s.strng_msize=wins.size;
284 s.strng_bytes=wins.bytes;
285 s.anr=anr;
286 s.snr=snr;
287 s.str=str;
288 if dflg
289 s.c=c;
290 s.t=t;
291 s.n=n;
292 s.d=ds;
293 end
294 varargout{1}=s;
295 else
296 s={anr,snr,str};
297 for i=1:nargout
298 varargout{i}=s{i};
299 end
300 end
301
302 if vflg
303 inp=cstr(inp);
304 an=[{'--- NUMERICAL'}; anr];
305 as=[{'--- ASCII NUMBERS'}; snr];
306 at=[{'--- ASCII STRINGS'}; str];
307 nn=[{'--- NUMBERS'}; num2cell(ds)];
308 ag={' ';' ';' '};
309 u=[{'INPUT'}; inp;ag];
310 v=[{'ASCII SORT'}; ins;ag];
311 w=[{'NUM SORT'}; an;as;at];
312 x=[{'NUM READ'}; nn;as;at];
313 w=[u,v,w,x];
314 disp(w);
315 end
316
317 return;
318 %--------------------------------------------------------------------------------
319 function c=cstr(s)
320 % - bottleneck waiting for a good <cellstr> replacement
321 % it consumes ~75% of <asort>'s processing time!
322
323 c=s;
324 if ischar(s)
325 sr=size(s,1);
326 c=cell(sr,1);
327 for i=1:sr
328 c{i}=s(i,:); % no deblanking!
329 end
330 end
331 return;
332 %--------------------------------------------------------------------------------
333 function [idu,idd,ixu,ixd]=dupinx(ix,nc)
334 % - check for more than one entry/row in a matrix of column size <nc>
335 % unique indices: idu / ixu
336 % duplicate indices: idd / ixd
337
338 if isempty(ix)
339 idu=[];
340 idd=[];
341 ixu=[];
342 ixd=[];
343 return;
344 end
345 id=fix(ix/nc)+1;
346 idi=diff(id)~=0;
347 ide=[true idi];
348 idb=[idi true];
349 idu=idb & ide;
350 idd=idb==1 & ide==0;
351 ixu=id(idu);
352 ixd=id(idd);
353 return;
354 %--------------------------------------------------------------------------------
355 function inp=setinp(inp,tmpl,flg)
356 % - remove space(s) and/or templates
357
358 if isempty(inp) || ~any(flg)
359 return;
360 end
361
362 for i=sort(flg)
363 switch i
364 case flg(1)
365 if ischar(tmpl)
366 tmpl={tmpl};
367 end
368 for i=1:numel(tmpl)
369 inp=strrep(inp,tmpl{i},' ');
370 end
371 case flg(2)
372 inp=strrep(inp,' ','');
373 end
374 end
375 return;
376 %--------------------------------------------------------------------------------