Mercurial > hg > camir-aes2014
comparison toolboxes/FullBNT-1.0.7/KPMtools/asort.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e9a9cd732c1e |
---|---|
1 %[ANR,SNR,STR] = ASORT(INP,'OPT',...); | |
2 % S = ASORT(INP,'OPT',...); | |
3 % to sort alphanumeric strings numerically if | |
4 % they contain one properly formatted number | |
5 % otherwise, ascii dictionary sorting is applied | |
6 % | |
7 % INP unsorted input: | |
8 % - a char array | |
9 % - a cell array of strings | |
10 % OPT options | |
11 % -s - sorting option | |
12 % '-s','ascend' [def] | |
13 % '-s','descend' | |
14 % -st - force output form S [def: nargout dependent] | |
15 % -t - replace matching template(s) with one space | |
16 % prior to sorting | |
17 % '-t','template' | |
18 % '-t',{'template1','template2',...} | |
19 % -w - remove space(s) prior to sorting | |
20 % | |
21 % NOTE -t/-w options are processed in the | |
22 % order that they appear in | |
23 % the command line | |
24 % | |
25 % -v - verbose output [def: quiet] | |
26 % -d - debug mode | |
27 % save additional output in S | |
28 % .c: lex parser input | |
29 % .t: lex parser table | |
30 % .n: lex parser output | |
31 % .d: numbers read from .n | |
32 % | |
33 % ANR numerically sorted alphanumeric strings [eg, 'f.-1.5e+2x.x'] | |
34 % - contain one number that can be read by | |
35 % <strread> | <sscanf> | |
36 % SNR ascii dict sorted alphanumeric strings | |
37 % http://www.mathworks.com/matlabcentral/fileexchange/loadFile.do?objectId=7212# | |
38 % | |
39 % - contain more than one number [eg, 'f.-1.5e +2.x'] | |
40 % - contain incomplete|ambiguous numbers [eg, 'f.-1.5e+2.x'] | |
41 % STR ascii dict sorted strings | |
42 % - contain no numbers [eg, 'a test'] | |
43 % | |
44 % S structure with fields | |
45 % .anr | |
46 % .srn | |
47 % .str | |
48 | |
49 % created: | |
50 % us 03-Mar-2002 | |
51 % modified: | |
52 % us 30-Mar-2005 11:57:07 / TMW R14.sp2 | |
53 | |
54 %-------------------------------------------------------------------------------- | |
55 function varargout=asort(inp,varargin) | |
56 | |
57 varargout(1:nargout)={[]}; | |
58 if ~nargin | |
59 help(mfilename); | |
60 return; | |
61 end | |
62 | |
63 % - common parameters/options | |
64 n=[]; | |
65 ds=[]; | |
66 anr={}; | |
67 snr={}; | |
68 str={}; | |
69 smod='ascend'; % sorting option | |
70 tmpl={}; % template(s) | |
71 sflg=false; % output mode: structure | |
72 tflg=false; % remove template(s) | |
73 dflg=false; % debug mode | |
74 vflg=false; % verbose output | |
75 wflg=false; % remove spaces | |
76 | |
77 if nargin > 1 | |
78 ix=find(strcmp('-s',varargin)); | |
79 if ~isempty(ix) && nargin > ix(end)+1 | |
80 smod=varargin{ix(end)+1}; | |
81 end | |
82 ix=find(strcmp('-t',varargin)); | |
83 if ~isempty(ix) && nargin > ix(end)+1 | |
84 tflg=ix(end); | |
85 tmpl=varargin{ix(end)+1}; | |
86 end | |
87 if find(strcmp('-d',varargin)); | |
88 dflg=true; | |
89 end | |
90 if find(strcmp('-st',varargin)); | |
91 sflg=true; | |
92 end | |
93 if find(strcmp('-v',varargin)); | |
94 vflg=true; | |
95 end | |
96 ix=find(strcmp('-w',varargin)); | |
97 if ~isempty(ix) | |
98 wflg=ix(end); | |
99 end | |
100 end | |
101 % spec numbers | |
102 ntmpl={ | |
103 ' inf ' | |
104 '+inf ' | |
105 '-inf ' | |
106 ' nan ' | |
107 '+nan ' | |
108 '-nan ' | |
109 }; | |
110 % spec chars | |
111 ctmpl={ | |
112 '.' % decimal point | |
113 'd' % exponent | |
114 'e' % exponent | |
115 }; | |
116 | |
117 if nargout <= 3 | |
118 varargout{1}=inp; | |
119 else | |
120 disp(sprintf('ASORT> too many output args [%-1d/%-1d]\n',nargout,3)); | |
121 help(mfilename); | |
122 return; | |
123 end | |
124 if isempty(inp) | |
125 disp(sprintf('ASORT> input is empty')); | |
126 return; | |
127 end | |
128 | |
129 ti=clock; | |
130 winp=whos('inp'); | |
131 switch winp.class | |
132 case 'cell' | |
133 if ~iscellstr(inp) | |
134 disp(sprintf('ASORT> cell is not an array of strings')); | |
135 return; | |
136 end | |
137 inp=inp(:); | |
138 [ins,inx]=sort(inp); | |
139 case 'char' | |
140 % [ins,inx]=sortrows(inp); | |
141 inp=cstr(inp); | |
142 otherwise | |
143 disp(sprintf('ASORT> does not sort input of class <%s>',winp.class)); | |
144 return; | |
145 end | |
146 | |
147 inp=inp(:); | |
148 inp=setinp(inp,tmpl,[tflg wflg]); | |
149 [ins,inx]=sort(inp); | |
150 if strcmp(smod,'descend') | |
151 ins=ins(end:-1:1,:); | |
152 inx=inx(end:-1:1); | |
153 end | |
154 ins=inp(inx); | |
155 c=lower(char(ins)); | |
156 wins=whos('c'); | |
157 [cr,cc]=size(c); | |
158 | |
159 % - LEXICAL PARSER | |
160 %-------------------------------------------------------------------------------- | |
161 % - extend input on either side for search | |
162 c=[' '*ones(cr,2) c ' '*ones(cr,2)]; | |
163 | |
164 % - search for valid alphanumeric items in strings | |
165 % numbers/signs | |
166 t=(c>='0'&c<='9'); | |
167 t=t|c=='-'; | |
168 t=t|c=='+'; | |
169 [tr,tc]=size(t); | |
170 % decimal points | |
171 % note: valid numbers with dec points must follow these templates | |
172 % nr.nr | |
173 % sign.nr | |
174 % nr.<SPACE> | |
175 % <SPACE>.nr | |
176 ix1= t(:,1:end-2) & ... | |
177 ~isletter(c(:,1:end-2)) & ... | |
178 c(:,2:end-1)=='.'; | |
179 t(:,2:end-1)=t(:,2:end-1)|ix1; | |
180 ix1= (t(:,3:end) & ... | |
181 (~isletter(c(:,3:end)) & ... | |
182 ~isletter(c(:,1:end-2))) | ... | |
183 (c(:,3:end)=='e' | ... | |
184 c(:,3:end)=='d')) & ... | |
185 c(:,2:end-1)=='.'; | |
186 t(:,2:end-1)=t(:,2:end-1)|ix1; | |
187 % t(:,3:end)=t(:,3:end)|ix1; | |
188 % signs | |
189 t(c=='-')=false; | |
190 t(c=='+')=false; | |
191 ix1= t(:,3:end) & ... | |
192 (c(:,2:end-1)=='-' | ... | |
193 c(:,2:end-1)=='+'); | |
194 t(:,2:end-1)=t(:,2:end-1)|ix1; | |
195 % exponents | |
196 ix1= t(:,1:end-2) & ... | |
197 (c(:,2:end-1)=='e' | ... | |
198 c(:,2:end-1)=='d'); | |
199 t(:,2:end-1)=t(:,2:end-1)|ix1; | |
200 % spec numbers | |
201 c=reshape(c.',1,[]); | |
202 t=t'; | |
203 ic=[]; | |
204 for j=1:numel(ntmpl) | |
205 ic=[ic,strfind(c,ntmpl{j})]; | |
206 end | |
207 ic=sort(ic); | |
208 for i=1:numel(ic) | |
209 ix=ic(i)+0:ic(i)+4; | |
210 t(ix)=true; | |
211 end | |
212 t=t'; | |
213 c=reshape(c.',[tc,tr]).'; | |
214 t(c==' ')=false; | |
215 %-------------------------------------------------------------------------------- | |
216 | |
217 % - only allow one number per string | |
218 il=~any(t,2); | |
219 ib=strfind(reshape(t.',1,[]),[0 1]); | |
220 if ~isempty(ib) | |
221 ixe=cell(3,1); | |
222 n=reshape(char(t.*c).',1,[]); | |
223 for i=1:numel(ctmpl) | |
224 id=strfind(n,ctmpl{i}); | |
225 if ~isempty(id) | |
226 [dum,dum,ixu{i},ixe{i}]=dupinx(id,tc); | |
227 end | |
228 end | |
229 in=false(tr,1); | |
230 im=in; | |
231 % must check for anomalous cases like <'.d'> | |
232 id=sort(... | |
233 [find(n>='0' & n<='9'),... | |
234 strfind(n,'inf'),... | |
235 strfind(n,'nan')]); | |
236 % [ibu,ibd,ixbu,ixe{i+1}]=dupinx(id,tc); | |
237 [ibu,ibd,ixbu,ixbd]=dupinx(id,tc); | |
238 in(ixbu)=true; | |
239 in(ixbd)=true; | |
240 [ibu,ibd,ixbu,ixbd]=dupinx(ib,tc); | |
241 im(ixbu)=true; | |
242 in=in&im; | |
243 in([ixe{:}])=false; | |
244 il=~any(t,2); | |
245 ia=~(in|il); | |
246 | |
247 % - read valid strings | |
248 n=t(in,:).*c(in,:); | |
249 n(n==0)=' '; | |
250 n=char(n); | |
251 dn=strread(n.','%n'); | |
252 if numel(dn) ~= numel(find(in)) | |
253 %disp(sprintf('ASORT> unexpected fatal error reading input!')); | |
254 if nargout | |
255 s.c=c; | |
256 s.t=t; | |
257 s.n=n; | |
258 s.d=dn; | |
259 varargout{1}=s; | |
260 end | |
261 return; | |
262 end | |
263 | |
264 % - sort numbers | |
265 [ds,dx]=sort(dn,1,smod); | |
266 in=find(in); | |
267 anr=ins(in(dx)); | |
268 snr=ins(ia); | |
269 end | |
270 str=ins(il); | |
271 to=clock; | |
272 | |
273 % - prepare output | |
274 if nargout < 3 || sflg | |
275 s.magic='ASORT'; | |
276 s.ver='30-Mar-2005 11:57:07'; | |
277 s.time=datestr(clock); | |
278 s.runtime=etime(to,ti); | |
279 s.input_class=winp.class; | |
280 s.input_msize=winp.size; | |
281 s.input_bytes=winp.bytes; | |
282 s.strng_class=wins.class; | |
283 s.strng_msize=wins.size; | |
284 s.strng_bytes=wins.bytes; | |
285 s.anr=anr; | |
286 s.snr=snr; | |
287 s.str=str; | |
288 if dflg | |
289 s.c=c; | |
290 s.t=t; | |
291 s.n=n; | |
292 s.d=ds; | |
293 end | |
294 varargout{1}=s; | |
295 else | |
296 s={anr,snr,str}; | |
297 for i=1:nargout | |
298 varargout{i}=s{i}; | |
299 end | |
300 end | |
301 | |
302 if vflg | |
303 inp=cstr(inp); | |
304 an=[{'--- NUMERICAL'}; anr]; | |
305 as=[{'--- ASCII NUMBERS'}; snr]; | |
306 at=[{'--- ASCII STRINGS'}; str]; | |
307 nn=[{'--- NUMBERS'}; num2cell(ds)]; | |
308 ag={' ';' ';' '}; | |
309 u=[{'INPUT'}; inp;ag]; | |
310 v=[{'ASCII SORT'}; ins;ag]; | |
311 w=[{'NUM SORT'}; an;as;at]; | |
312 x=[{'NUM READ'}; nn;as;at]; | |
313 w=[u,v,w,x]; | |
314 disp(w); | |
315 end | |
316 | |
317 return; | |
318 %-------------------------------------------------------------------------------- | |
319 function c=cstr(s) | |
320 % - bottleneck waiting for a good <cellstr> replacement | |
321 % it consumes ~75% of <asort>'s processing time! | |
322 | |
323 c=s; | |
324 if ischar(s) | |
325 sr=size(s,1); | |
326 c=cell(sr,1); | |
327 for i=1:sr | |
328 c{i}=s(i,:); % no deblanking! | |
329 end | |
330 end | |
331 return; | |
332 %-------------------------------------------------------------------------------- | |
333 function [idu,idd,ixu,ixd]=dupinx(ix,nc) | |
334 % - check for more than one entry/row in a matrix of column size <nc> | |
335 % unique indices: idu / ixu | |
336 % duplicate indices: idd / ixd | |
337 | |
338 if isempty(ix) | |
339 idu=[]; | |
340 idd=[]; | |
341 ixu=[]; | |
342 ixd=[]; | |
343 return; | |
344 end | |
345 id=fix(ix/nc)+1; | |
346 idi=diff(id)~=0; | |
347 ide=[true idi]; | |
348 idb=[idi true]; | |
349 idu=idb & ide; | |
350 idd=idb==1 & ide==0; | |
351 ixu=id(idu); | |
352 ixd=id(idd); | |
353 return; | |
354 %-------------------------------------------------------------------------------- | |
355 function inp=setinp(inp,tmpl,flg) | |
356 % - remove space(s) and/or templates | |
357 | |
358 if isempty(inp) || ~any(flg) | |
359 return; | |
360 end | |
361 | |
362 for i=sort(flg) | |
363 switch i | |
364 case flg(1) | |
365 if ischar(tmpl) | |
366 tmpl={tmpl}; | |
367 end | |
368 for i=1:numel(tmpl) | |
369 inp=strrep(inp,tmpl{i},' '); | |
370 end | |
371 case flg(2) | |
372 inp=strrep(inp,' ',''); | |
373 end | |
374 end | |
375 return; | |
376 %-------------------------------------------------------------------------------- |