view toolboxes/FullBNT-1.0.7/KPMtools/asort.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
line wrap: on
line source
%[ANR,SNR,STR]	=  ASORT(INP,'OPT',...);
% S		=  ASORT(INP,'OPT',...);
%		   to sort alphanumeric strings numerically if
%		   they contain one properly formatted number
%		   otherwise, ascii dictionary sorting is applied
%
% INP	unsorted input:
%	- a char array
%	- a cell array of strings
% OPT	options
%  -s	- sorting option
%	  '-s','ascend'					[def]
%	  '-s','descend'
%  -st	- force output form S				[def: nargout dependent]
%  -t	- replace matching template(s) with one space
%	  prior to sorting
%	  '-t','template'
%	  '-t',{'template1','template2',...}
%  -w	- remove space(s) prior to sorting
%
%	  NOTE	-t/-w options are processed in the
%		      order that they appear in
%		      the command line
%
%  -v	- verbose output				[def: quiet]
%  -d	- debug mode
%	  save additional output in S
%	  .c:	lex parser input
%	  .t:	lex parser table
%	  .n:	lex parser output
%	  .d:	numbers read from .n
%
% ANR	numerically sorted alphanumeric strings		[eg, 'f.-1.5e+2x.x']
%	- contain one number that can be read by
%	  <strread> | <sscanf>
% SNR	ascii dict  sorted alphanumeric strings
% http://www.mathworks.com/matlabcentral/fileexchange/loadFile.do?objectId=7212#
%
%	- contain more than one number			[eg, 'f.-1.5e +2.x']
%	- contain incomplete|ambiguous numbers		[eg, 'f.-1.5e+2.x']
% STR	ascii dict  sorted strings
%	- contain no numbers				[eg, 'a test']
%
% S	structure with fields
%	.anr
%	.srn
%	.str

% created:
%	us	03-Mar-2002
% modified:
%	us	30-Mar-2005 11:57:07 	/ TMW R14.sp2

%--------------------------------------------------------------------------------
function	varargout=asort(inp,varargin)

varargout(1:nargout)={[]};
if	~nargin
	help(mfilename);
	return;
end

% - common parameters/options
n=[];
ds=[];
anr={};
snr={};
str={};
smod='ascend';	% sorting option
tmpl={};	% template(s)
sflg=false;	% output  mode: structure
tflg=false;	% remove  template(s)
dflg=false;	% debug   mode
vflg=false;	% verbose output
wflg=false;	% remove  spaces

if	nargin > 1
	ix=find(strcmp('-s',varargin));
	if	~isempty(ix) && nargin > ix(end)+1
		smod=varargin{ix(end)+1};
	end
	ix=find(strcmp('-t',varargin));
	if	~isempty(ix) && nargin > ix(end)+1
		tflg=ix(end);
		tmpl=varargin{ix(end)+1};
	end
	if	find(strcmp('-d',varargin));
		dflg=true;
	end
	if	find(strcmp('-st',varargin));
		sflg=true;
	end
	if	find(strcmp('-v',varargin));
		vflg=true;
	end
	ix=find(strcmp('-w',varargin));
	if	~isempty(ix)
		wflg=ix(end);
	end
end
%   spec numbers
ntmpl={
	' inf '
	'+inf '
	'-inf '
	' nan '
	'+nan '
	'-nan '
	};
%   spec chars
ctmpl={
	'.'	% decimal point
	'd'	% exponent
	'e'	% exponent
	};

if	nargout <= 3
	varargout{1}=inp;
else
	disp(sprintf('ASORT> too many output args [%-1d/%-1d]\n',nargout,3));
	help(mfilename);
	return;
end
if	isempty(inp)
	disp(sprintf('ASORT> input is empty'));
	return;
end

ti=clock;
winp=whos('inp');
switch	winp.class
	case	'cell'
		if	~iscellstr(inp)
			disp(sprintf('ASORT> cell is not an array of strings'));
			return;
		end
		inp=inp(:);
		[ins,inx]=sort(inp);
	case	'char'
		%		[ins,inx]=sortrows(inp);
		inp=cstr(inp);
	otherwise
		disp(sprintf('ASORT> does not sort input of class <%s>',winp.class));
		return;
end

inp=inp(:);
inp=setinp(inp,tmpl,[tflg wflg]);
[ins,inx]=sort(inp);
if	strcmp(smod,'descend')
	ins=ins(end:-1:1,:);
	inx=inx(end:-1:1);
end
ins=inp(inx);
c=lower(char(ins));
wins=whos('c');
[cr,cc]=size(c);

% - LEXICAL PARSER
%--------------------------------------------------------------------------------
% - extend input on either side for search
c=[' '*ones(cr,2) c ' '*ones(cr,2)];

% - search for valid alphanumeric items in strings
%   numbers/signs
t=(c>='0'&c<='9');
t=t|c=='-';
t=t|c=='+';
[tr,tc]=size(t);
%   decimal points
%   note: valid numbers with dec points must follow these templates
%         nr.nr
%	  sign.nr
%         nr.<SPACE>
%         <SPACE>.nr
ix1=	 t(:,1:end-2) & ...
	~isletter(c(:,1:end-2)) & ...
	c(:,2:end-1)=='.';
t(:,2:end-1)=t(:,2:end-1)|ix1;
ix1=	(t(:,3:end) & ...
	(~isletter(c(:,3:end)) & ...
	~isletter(c(:,1:end-2))) | ...
	(c(:,3:end)=='e' | ...
	c(:,3:end)=='d')) & ...
	c(:,2:end-1)=='.';
t(:,2:end-1)=t(:,2:end-1)|ix1;
%		t(:,3:end)=t(:,3:end)|ix1;
%   signs
t(c=='-')=false;
t(c=='+')=false;
ix1=	 t(:,3:end) & ...
	(c(:,2:end-1)=='-' | ...
	c(:,2:end-1)=='+');
t(:,2:end-1)=t(:,2:end-1)|ix1;
%   exponents
ix1=	 t(:,1:end-2) & ...
	(c(:,2:end-1)=='e' | ...
	c(:,2:end-1)=='d');
t(:,2:end-1)=t(:,2:end-1)|ix1;
%   spec numbers
c=reshape(c.',1,[]);
t=t';
ic=[];
for	j=1:numel(ntmpl)
	ic=[ic,strfind(c,ntmpl{j})];
end
ic=sort(ic);
for	i=1:numel(ic)
	ix=ic(i)+0:ic(i)+4;
	t(ix)=true;
end
t=t';
c=reshape(c.',[tc,tr]).';
t(c==' ')=false;
%--------------------------------------------------------------------------------

% - only allow one number per string
il=~any(t,2);
ib=strfind(reshape(t.',1,[]),[0 1]);
if	~isempty(ib)
	ixe=cell(3,1);
	n=reshape(char(t.*c).',1,[]);
	for	i=1:numel(ctmpl)
		id=strfind(n,ctmpl{i});
		if	~isempty(id)
			[dum,dum,ixu{i},ixe{i}]=dupinx(id,tc);
		end
	end
	in=false(tr,1);
	im=in;
	%   must check for anomalous cases like <'.d'>
	id=sort(...
		[find(n>='0' & n<='9'),...
		strfind(n,'inf'),...
		strfind(n,'nan')]);
	%		[ibu,ibd,ixbu,ixe{i+1}]=dupinx(id,tc);
	[ibu,ibd,ixbu,ixbd]=dupinx(id,tc);
	in(ixbu)=true;
	in(ixbd)=true;
	[ibu,ibd,ixbu,ixbd]=dupinx(ib,tc);
	im(ixbu)=true;
	in=in&im;
	in([ixe{:}])=false;
	il=~any(t,2);
	ia=~(in|il);

	% - read valid strings
	n=t(in,:).*c(in,:);
	n(n==0)=' ';
	n=char(n);
	dn=strread(n.','%n');
	if	numel(dn) ~= numel(find(in))
		%disp(sprintf('ASORT> unexpected fatal error reading input!'));
		if	nargout
			s.c=c;
			s.t=t;
			s.n=n;
			s.d=dn;
			varargout{1}=s;
		end
		return;
	end

	% - sort numbers
	[ds,dx]=sort(dn,1,smod);
	in=find(in);
	anr=ins(in(dx));
	snr=ins(ia);
end
str=ins(il);
to=clock;

% - prepare output
if	nargout < 3 || sflg
	s.magic='ASORT';
	s.ver='30-Mar-2005 11:57:07';
	s.time=datestr(clock);
	s.runtime=etime(to,ti);
	s.input_class=winp.class;
	s.input_msize=winp.size;
	s.input_bytes=winp.bytes;
	s.strng_class=wins.class;
	s.strng_msize=wins.size;
	s.strng_bytes=wins.bytes;
	s.anr=anr;
	s.snr=snr;
	s.str=str;
	if	dflg
		s.c=c;
		s.t=t;
		s.n=n;
		s.d=ds;
	end
	varargout{1}=s;
else
	s={anr,snr,str};
	for	i=1:nargout
		varargout{i}=s{i};
	end
end

if	vflg
	inp=cstr(inp);
	an=[{'--- NUMERICAL'};		anr];
	as=[{'--- ASCII NUMBERS'};	snr];
	at=[{'--- ASCII STRINGS'};	str];
	nn=[{'--- NUMBERS'};		num2cell(ds)];
	ag={' ';' ';' '};
	u=[{'INPUT'};			inp;ag];
	v=[{'ASCII SORT'};		ins;ag];
	w=[{'NUM SORT'};		an;as;at];
	x=[{'NUM READ'};		nn;as;at];
	w=[u,v,w,x];
	disp(w);
end

return;
%--------------------------------------------------------------------------------
function	c=cstr(s)
% - bottleneck waiting for a good <cellstr> replacement
%   it consumes ~75% of <asort>'s processing time!

c=s;
if	ischar(s)
	sr=size(s,1);
	c=cell(sr,1);
	for	i=1:sr
		c{i}=s(i,:);	% no deblanking!
	end
end
return;
%--------------------------------------------------------------------------------
function	[idu,idd,ixu,ixd]=dupinx(ix,nc)
% - check for more than one entry/row in a matrix of column size <nc>
%   unique    indices:	idu / ixu
%   duplicate indices:	idd / ixd

if	isempty(ix)
	idu=[];
	idd=[];
	ixu=[];
	ixd=[];
	return;
end
id=fix(ix/nc)+1;
idi=diff(id)~=0;
ide=[true idi];
idb=[idi true];
idu=idb & ide;
idd=idb==1 & ide==0;
ixu=id(idu);
ixd=id(idd);
return;
%--------------------------------------------------------------------------------
function	inp=setinp(inp,tmpl,flg)
% - remove space(s) and/or templates

if	isempty(inp) || ~any(flg)
	return;
end

for	i=sort(flg)
	switch	i
		case	flg(1)
			if	ischar(tmpl)
				tmpl={tmpl};
			end
			for	i=1:numel(tmpl)
				inp=strrep(inp,tmpl{i},' ');
			end
		case	flg(2)
			inp=strrep(inp,' ','');
	end
end
return;
%--------------------------------------------------------------------------------