diff toolboxes/FullBNT-1.0.7/KPMtools/asort.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/toolboxes/FullBNT-1.0.7/KPMtools/asort.m	Tue Feb 10 15:05:51 2015 +0000
@@ -0,0 +1,376 @@
+%[ANR,SNR,STR]	=  ASORT(INP,'OPT',...);
+% S		=  ASORT(INP,'OPT',...);
+%		   to sort alphanumeric strings numerically if
+%		   they contain one properly formatted number
+%		   otherwise, ascii dictionary sorting is applied
+%
+% INP	unsorted input:
+%	- a char array
+%	- a cell array of strings
+% OPT	options
+%  -s	- sorting option
+%	  '-s','ascend'					[def]
+%	  '-s','descend'
+%  -st	- force output form S				[def: nargout dependent]
+%  -t	- replace matching template(s) with one space
+%	  prior to sorting
+%	  '-t','template'
+%	  '-t',{'template1','template2',...}
+%  -w	- remove space(s) prior to sorting
+%
+%	  NOTE	-t/-w options are processed in the
+%		      order that they appear in
+%		      the command line
+%
+%  -v	- verbose output				[def: quiet]
+%  -d	- debug mode
+%	  save additional output in S
+%	  .c:	lex parser input
+%	  .t:	lex parser table
+%	  .n:	lex parser output
+%	  .d:	numbers read from .n
+%
+% ANR	numerically sorted alphanumeric strings		[eg, 'f.-1.5e+2x.x']
+%	- contain one number that can be read by
+%	  <strread> | <sscanf>
+% SNR	ascii dict  sorted alphanumeric strings
+% http://www.mathworks.com/matlabcentral/fileexchange/loadFile.do?objectId=7212#
+%
+%	- contain more than one number			[eg, 'f.-1.5e +2.x']
+%	- contain incomplete|ambiguous numbers		[eg, 'f.-1.5e+2.x']
+% STR	ascii dict  sorted strings
+%	- contain no numbers				[eg, 'a test']
+%
+% S	structure with fields
+%	.anr
+%	.srn
+%	.str
+
+% created:
+%	us	03-Mar-2002
+% modified:
+%	us	30-Mar-2005 11:57:07 	/ TMW R14.sp2
+
+%--------------------------------------------------------------------------------
+function	varargout=asort(inp,varargin)
+
+varargout(1:nargout)={[]};
+if	~nargin
+	help(mfilename);
+	return;
+end
+
+% - common parameters/options
+n=[];
+ds=[];
+anr={};
+snr={};
+str={};
+smod='ascend';	% sorting option
+tmpl={};	% template(s)
+sflg=false;	% output  mode: structure
+tflg=false;	% remove  template(s)
+dflg=false;	% debug   mode
+vflg=false;	% verbose output
+wflg=false;	% remove  spaces
+
+if	nargin > 1
+	ix=find(strcmp('-s',varargin));
+	if	~isempty(ix) && nargin > ix(end)+1
+		smod=varargin{ix(end)+1};
+	end
+	ix=find(strcmp('-t',varargin));
+	if	~isempty(ix) && nargin > ix(end)+1
+		tflg=ix(end);
+		tmpl=varargin{ix(end)+1};
+	end
+	if	find(strcmp('-d',varargin));
+		dflg=true;
+	end
+	if	find(strcmp('-st',varargin));
+		sflg=true;
+	end
+	if	find(strcmp('-v',varargin));
+		vflg=true;
+	end
+	ix=find(strcmp('-w',varargin));
+	if	~isempty(ix)
+		wflg=ix(end);
+	end
+end
+%   spec numbers
+ntmpl={
+	' inf '
+	'+inf '
+	'-inf '
+	' nan '
+	'+nan '
+	'-nan '
+	};
+%   spec chars
+ctmpl={
+	'.'	% decimal point
+	'd'	% exponent
+	'e'	% exponent
+	};
+
+if	nargout <= 3
+	varargout{1}=inp;
+else
+	disp(sprintf('ASORT> too many output args [%-1d/%-1d]\n',nargout,3));
+	help(mfilename);
+	return;
+end
+if	isempty(inp)
+	disp(sprintf('ASORT> input is empty'));
+	return;
+end
+
+ti=clock;
+winp=whos('inp');
+switch	winp.class
+	case	'cell'
+		if	~iscellstr(inp)
+			disp(sprintf('ASORT> cell is not an array of strings'));
+			return;
+		end
+		inp=inp(:);
+		[ins,inx]=sort(inp);
+	case	'char'
+		%		[ins,inx]=sortrows(inp);
+		inp=cstr(inp);
+	otherwise
+		disp(sprintf('ASORT> does not sort input of class <%s>',winp.class));
+		return;
+end
+
+inp=inp(:);
+inp=setinp(inp,tmpl,[tflg wflg]);
+[ins,inx]=sort(inp);
+if	strcmp(smod,'descend')
+	ins=ins(end:-1:1,:);
+	inx=inx(end:-1:1);
+end
+ins=inp(inx);
+c=lower(char(ins));
+wins=whos('c');
+[cr,cc]=size(c);
+
+% - LEXICAL PARSER
+%--------------------------------------------------------------------------------
+% - extend input on either side for search
+c=[' '*ones(cr,2) c ' '*ones(cr,2)];
+
+% - search for valid alphanumeric items in strings
+%   numbers/signs
+t=(c>='0'&c<='9');
+t=t|c=='-';
+t=t|c=='+';
+[tr,tc]=size(t);
+%   decimal points
+%   note: valid numbers with dec points must follow these templates
+%         nr.nr
+%	  sign.nr
+%         nr.<SPACE>
+%         <SPACE>.nr
+ix1=	 t(:,1:end-2) & ...
+	~isletter(c(:,1:end-2)) & ...
+	c(:,2:end-1)=='.';
+t(:,2:end-1)=t(:,2:end-1)|ix1;
+ix1=	(t(:,3:end) & ...
+	(~isletter(c(:,3:end)) & ...
+	~isletter(c(:,1:end-2))) | ...
+	(c(:,3:end)=='e' | ...
+	c(:,3:end)=='d')) & ...
+	c(:,2:end-1)=='.';
+t(:,2:end-1)=t(:,2:end-1)|ix1;
+%		t(:,3:end)=t(:,3:end)|ix1;
+%   signs
+t(c=='-')=false;
+t(c=='+')=false;
+ix1=	 t(:,3:end) & ...
+	(c(:,2:end-1)=='-' | ...
+	c(:,2:end-1)=='+');
+t(:,2:end-1)=t(:,2:end-1)|ix1;
+%   exponents
+ix1=	 t(:,1:end-2) & ...
+	(c(:,2:end-1)=='e' | ...
+	c(:,2:end-1)=='d');
+t(:,2:end-1)=t(:,2:end-1)|ix1;
+%   spec numbers
+c=reshape(c.',1,[]);
+t=t';
+ic=[];
+for	j=1:numel(ntmpl)
+	ic=[ic,strfind(c,ntmpl{j})];
+end
+ic=sort(ic);
+for	i=1:numel(ic)
+	ix=ic(i)+0:ic(i)+4;
+	t(ix)=true;
+end
+t=t';
+c=reshape(c.',[tc,tr]).';
+t(c==' ')=false;
+%--------------------------------------------------------------------------------
+
+% - only allow one number per string
+il=~any(t,2);
+ib=strfind(reshape(t.',1,[]),[0 1]);
+if	~isempty(ib)
+	ixe=cell(3,1);
+	n=reshape(char(t.*c).',1,[]);
+	for	i=1:numel(ctmpl)
+		id=strfind(n,ctmpl{i});
+		if	~isempty(id)
+			[dum,dum,ixu{i},ixe{i}]=dupinx(id,tc);
+		end
+	end
+	in=false(tr,1);
+	im=in;
+	%   must check for anomalous cases like <'.d'>
+	id=sort(...
+		[find(n>='0' & n<='9'),...
+		strfind(n,'inf'),...
+		strfind(n,'nan')]);
+	%		[ibu,ibd,ixbu,ixe{i+1}]=dupinx(id,tc);
+	[ibu,ibd,ixbu,ixbd]=dupinx(id,tc);
+	in(ixbu)=true;
+	in(ixbd)=true;
+	[ibu,ibd,ixbu,ixbd]=dupinx(ib,tc);
+	im(ixbu)=true;
+	in=in&im;
+	in([ixe{:}])=false;
+	il=~any(t,2);
+	ia=~(in|il);
+
+	% - read valid strings
+	n=t(in,:).*c(in,:);
+	n(n==0)=' ';
+	n=char(n);
+	dn=strread(n.','%n');
+	if	numel(dn) ~= numel(find(in))
+		%disp(sprintf('ASORT> unexpected fatal error reading input!'));
+		if	nargout
+			s.c=c;
+			s.t=t;
+			s.n=n;
+			s.d=dn;
+			varargout{1}=s;
+		end
+		return;
+	end
+
+	% - sort numbers
+	[ds,dx]=sort(dn,1,smod);
+	in=find(in);
+	anr=ins(in(dx));
+	snr=ins(ia);
+end
+str=ins(il);
+to=clock;
+
+% - prepare output
+if	nargout < 3 || sflg
+	s.magic='ASORT';
+	s.ver='30-Mar-2005 11:57:07';
+	s.time=datestr(clock);
+	s.runtime=etime(to,ti);
+	s.input_class=winp.class;
+	s.input_msize=winp.size;
+	s.input_bytes=winp.bytes;
+	s.strng_class=wins.class;
+	s.strng_msize=wins.size;
+	s.strng_bytes=wins.bytes;
+	s.anr=anr;
+	s.snr=snr;
+	s.str=str;
+	if	dflg
+		s.c=c;
+		s.t=t;
+		s.n=n;
+		s.d=ds;
+	end
+	varargout{1}=s;
+else
+	s={anr,snr,str};
+	for	i=1:nargout
+		varargout{i}=s{i};
+	end
+end
+
+if	vflg
+	inp=cstr(inp);
+	an=[{'--- NUMERICAL'};		anr];
+	as=[{'--- ASCII NUMBERS'};	snr];
+	at=[{'--- ASCII STRINGS'};	str];
+	nn=[{'--- NUMBERS'};		num2cell(ds)];
+	ag={' ';' ';' '};
+	u=[{'INPUT'};			inp;ag];
+	v=[{'ASCII SORT'};		ins;ag];
+	w=[{'NUM SORT'};		an;as;at];
+	x=[{'NUM READ'};		nn;as;at];
+	w=[u,v,w,x];
+	disp(w);
+end
+
+return;
+%--------------------------------------------------------------------------------
+function	c=cstr(s)
+% - bottleneck waiting for a good <cellstr> replacement
+%   it consumes ~75% of <asort>'s processing time!
+
+c=s;
+if	ischar(s)
+	sr=size(s,1);
+	c=cell(sr,1);
+	for	i=1:sr
+		c{i}=s(i,:);	% no deblanking!
+	end
+end
+return;
+%--------------------------------------------------------------------------------
+function	[idu,idd,ixu,ixd]=dupinx(ix,nc)
+% - check for more than one entry/row in a matrix of column size <nc>
+%   unique    indices:	idu / ixu
+%   duplicate indices:	idd / ixd
+
+if	isempty(ix)
+	idu=[];
+	idd=[];
+	ixu=[];
+	ixd=[];
+	return;
+end
+id=fix(ix/nc)+1;
+idi=diff(id)~=0;
+ide=[true idi];
+idb=[idi true];
+idu=idb & ide;
+idd=idb==1 & ide==0;
+ixu=id(idu);
+ixd=id(idd);
+return;
+%--------------------------------------------------------------------------------
+function	inp=setinp(inp,tmpl,flg)
+% - remove space(s) and/or templates
+
+if	isempty(inp) || ~any(flg)
+	return;
+end
+
+for	i=sort(flg)
+	switch	i
+		case	flg(1)
+			if	ischar(tmpl)
+				tmpl={tmpl};
+			end
+			for	i=1:numel(tmpl)
+				inp=strrep(inp,tmpl{i},' ');
+			end
+		case	flg(2)
+			inp=strrep(inp,' ','');
+	end
+end
+return;
+%--------------------------------------------------------------------------------