camir-aes2014: toolboxes/MIRtoolbox1.3.2/somtoolbox/som

annotate toolboxes/MIRtoolbox1.3.2/somtoolbox/som_drmake.m @ 0:e9a9cd732c1e tip

first hg version after svn

author	wolffd
date	Tue, 10 Feb 2015 15:05:51 +0000
parents
children

rev	line source
wolffd@0	1 function [sR,best,sig,Cm] = som_drmake(D,inds1,inds2,sigmea,nanis)
wolffd@0	2
wolffd@0	3 % SOM_DRMAKE Make descriptive rules for given group within the given data.
wolffd@0	4 %
wolffd@0	5 % sR = som_drmake(D,[inds1],[inds2],[sigmea],[nanis])
wolffd@0	6 %
wolffd@0	7 % D (struct) map or data struct
wolffd@0	8 % (matrix) the data, of size [dlen x dim]
wolffd@0	9 % [inds1] (vector) indeces belonging to the group
wolffd@0	10 % (the whole data set by default)
wolffd@0	11 % [inds2] (vector) indeces belonging to the contrast group
wolffd@0	12 % (the rest of the data set by default)
wolffd@0	13 % [sigmea] (string) significance measure: 'accuracy',
wolffd@0	14 % 'mutuconf' (default), or 'accuracyI'.
wolffd@0	15 % (See definitions below).
wolffd@0	16 % [nanis] (scalar) value given for NaNs: 0 (=FALSE, default),
wolffd@0	17 % 1 (=TRUE) or NaN (=ignored)
wolffd@0	18 %
wolffd@0	19 % sR (struct array) best rule for each component. Each
wolffd@0	20 % struct has the following fields:
wolffd@0	21 % .type (string) 'som_rule'
wolffd@0	22 % .name (string) name of the component
wolffd@0	23 % .low (scalar) the low end of the rule range
wolffd@0	24 % .high (scalar) the high end of the rule range
wolffd@0	25 % .nanis (scalar) how NaNs are handled: NaN, 0 or 1
wolffd@0	26 %
wolffd@0	27 % best (vector) indeces of rules which make the best combined rule
wolffd@0	28 % sig (vector) significance measure values for each rule, and for the combined rule
wolffd@0	29 % Cm (matrix) A matrix of vectorized confusion matrices for each rule,
wolffd@0	30 % and for the combined rule: [a, c, b, d] (see below).
wolffd@0	31 %
wolffd@0	32 % For each rule, such rules sR.low <= x < sR.high are found
wolffd@0	33 % which optimize the given significance measure. The confusion
wolffd@0	34 % matrix below between the given grouping (G: group - not G: contrast group)
wolffd@0	35 % and rule (R: true or false) is used to determine the significance values:
wolffd@0	36 %
wolffd@0	37 % G not G
wolffd@0	38 % --------------- accuracy = (a+d) / (a+b+c+d)
wolffd@0	39 % true \| a \| b \|
wolffd@0	40 % \|-------------- mutuconf = a*a / ((a+b)(a+c))
wolffd@0	41 % false \| c \| d \|
wolffd@0	42 % --------------- accuracyI = a / (a+b+c)
wolffd@0	43 %
wolffd@0	44 % See also SOM_DREVAL, SOM_DRTABLE.
wolffd@0	45
wolffd@0	46 % Contributed to SOM Toolbox 2.0, January 7th, 2002 by Juha Vesanto
wolffd@0	47 % Copyright (c) by Juha Vesanto
wolffd@0	48 % http://www.cis.hut.fi/projects/somtoolbox/
wolffd@0	49
wolffd@0	50 % Version 2.0beta juuso 070102
wolffd@0	51
wolffd@0	52 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
wolffd@0	53 %% input arguments
wolffd@0	54
wolffd@0	55 if isstruct(D),
wolffd@0	56 switch D.type,
wolffd@0	57 case 'som_data', cn = D.comp_names; D = D.data;
wolffd@0	58 case 'som_map', cn = D.comp_names; D = D.codebook;
wolffd@0	59 end
wolffd@0	60 else
wolffd@0	61 cn = cell(size(D,2),1);
wolffd@0	62 for i=1:size(D,2), cn{i} = sprintf('Variable%d',i); end
wolffd@0	63 end
wolffd@0	64
wolffd@0	65 [dlen,dim] = size(D);
wolffd@0	66 if nargin<2 \| isempty(inds1), inds1 = 1:dlen; end
wolffd@0	67 if nargin<3 \| isempty(inds2), i = ones(dlen,1); i(inds1) = 0; inds2 = find(i); end
wolffd@0	68 if nargin<4, sigmea = 'mutuconf'; end
wolffd@0	69 if nargin<5, nanis = 0; end
wolffd@0	70
wolffd@0	71 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
wolffd@0	72 %% input arguments
wolffd@0	73
wolffd@0	74 sig = zeros(dim+1,1);
wolffd@0	75 Cm = zeros(dim+1,4);
wolffd@0	76
wolffd@0	77 sR1tmp = struct('type','som_rule','name','','low',-Inf,'high',Inf,'nanis',nanis,'lowstr','','highstr','');
wolffd@0	78 sR = sR1tmp;
wolffd@0	79
wolffd@0	80 % single variable rules
wolffd@0	81 for i=1:dim,
wolffd@0	82
wolffd@0	83 % bin edges
wolffd@0	84 mi = min(D(:,i));
wolffd@0	85 ma = max(D(:,i));
wolffd@0	86 [histcount,bins] = hist([mi,ma],10);
wolffd@0	87 if size(bins,1)>1, bins = bins'; end
wolffd@0	88 edges = [-Inf, (bins(1:end-1)+bins(2:end))/2, Inf];
wolffd@0	89
wolffd@0	90 % find the rule for this variable
wolffd@0	91 [low,high,s,cm] = onevar_descrule(D(inds1,i),D(inds2,i),sigmea,nanis,edges);
wolffd@0	92 sR1 = sR1tmp;
wolffd@0	93 sR1.name = cn{i};
wolffd@0	94 sR1.low = low;
wolffd@0	95 sR1.high = high;
wolffd@0	96 sR(i) = sR1;
wolffd@0	97 sig(i) = s;
wolffd@0	98 Cm(i,:) = cm;
wolffd@0	99
wolffd@0	100 end
wolffd@0	101
wolffd@0	102 % find combined rule
wolffd@0	103 [dummy,order] = sort(-sig);
wolffd@0	104 maxsig = sig(order(1)); bestcm = Cm(order(1),:);
wolffd@0	105 best = order(1);
wolffd@0	106 for i=2:dim,
wolffd@0	107 com = [best, order(i)];
wolffd@0	108 [s,cm,truex,truey] = som_dreval(sR(com),D(:,com),sigmea,inds1,inds2,'and');
wolffd@0	109 if s>maxsig, best = com; maxsig = s; bestcm = cm; end
wolffd@0	110 end
wolffd@0	111 sig(end) = maxsig;
wolffd@0	112 Cm(end,:) = cm;
wolffd@0	113
wolffd@0	114 return;
wolffd@0	115
wolffd@0	116 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%55
wolffd@0	117 %% descriptive rules
wolffd@0	118
wolffd@0	119 function [low,high,sig,cm] = onevar_descrule(x,y,sigmea,nanis,edges)
wolffd@0	120
wolffd@0	121 % Given a set of bin edges, find the range of bins with best significance.
wolffd@0	122 %
wolffd@0	123 % x data values in cluster
wolffd@0	124 % y data values not in cluster
wolffd@0	125 % sigmea significance measure
wolffd@0	126 % bins bin centers
wolffd@0	127 % nanis how to handle NaNs
wolffd@0	128
wolffd@0	129 % histogram counts
wolffd@0	130 if isnan(nanis), x = x(~isnan(x)); y = y(~isnan(y)); end
wolffd@0	131 [xcount,xbin] = histc(x,edges);
wolffd@0	132 [ycount,ybin] = histc(y,edges);
wolffd@0	133 xcount = xcount(1:end-1);
wolffd@0	134 ycount = ycount(1:end-1);
wolffd@0	135 xnan=sum(isnan(x));
wolffd@0	136 ynan=sum(isnan(y));
wolffd@0	137
wolffd@0	138 % find number of true items in both groups in all possible ranges
wolffd@0	139 n = length(xcount);
wolffd@0	140 V = zeros(n*(n+1)/2,4);
wolffd@0	141 s1 = cumsum(xcount);
wolffd@0	142 s2 = cumsum(xcount(end:-1:1)); s2 = s2(end:-1:1);
wolffd@0	143 m = s1(end);
wolffd@0	144 Tx = triu(s1(end)-mlog(exp(s1/m)exp(s2/m)')+repmat(xcount',[n 1])+repmat(xcount,[1 n]),0);
wolffd@0	145 s1 = cumsum(ycount);
wolffd@0	146 s2 = cumsum(ycount(end:-1:1)); s2 = s2(end:-1:1);
wolffd@0	147 Ty = triu(s1(end)-mlog(exp(s1/m)exp(s2/m)')+repmat(ycount',[n 1])+repmat(ycount,[1 n]),0);
wolffd@0	148 [i,j] = find(Tx+Ty);
wolffd@0	149 k = sub2ind(size(Tx),i,j);
wolffd@0	150 V = [i, j, Tx(k), Ty(k)];
wolffd@0	151 tix = V(:,3) + nanis*xnan;
wolffd@0	152 tiy = V(:,4) + nanis*ynan;
wolffd@0	153
wolffd@0	154 % select the best range
wolffd@0	155 nix = length(x);
wolffd@0	156 niy = length(y);
wolffd@0	157 Cm = [tix,nix-tix,tiy,niy-tiy];
wolffd@0	158 [s,k] = max(som_drsignif(sigmea,Cm));
wolffd@0	159
wolffd@0	160 % output
wolffd@0	161 low = edges(V(k,1));
wolffd@0	162 high = edges(V(k,2)+1);
wolffd@0	163 sig = s;
wolffd@0	164 cm = Cm(k,:);
wolffd@0	165
wolffd@0	166 return;
wolffd@0	167

Mercurial > hg > camir-aes2014

annotate toolboxes/MIRtoolbox1.3.2/somtoolbox/som_drmake.m @ 0:e9a9cd732c1e tip