Mercurial > hg > camir-aes2014
comparison toolboxes/MIRtoolbox1.3.2/somtoolbox/som_drmake.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e9a9cd732c1e |
---|---|
1 function [sR,best,sig,Cm] = som_drmake(D,inds1,inds2,sigmea,nanis) | |
2 | |
3 % SOM_DRMAKE Make descriptive rules for given group within the given data. | |
4 % | |
5 % sR = som_drmake(D,[inds1],[inds2],[sigmea],[nanis]) | |
6 % | |
7 % D (struct) map or data struct | |
8 % (matrix) the data, of size [dlen x dim] | |
9 % [inds1] (vector) indeces belonging to the group | |
10 % (the whole data set by default) | |
11 % [inds2] (vector) indeces belonging to the contrast group | |
12 % (the rest of the data set by default) | |
13 % [sigmea] (string) significance measure: 'accuracy', | |
14 % 'mutuconf' (default), or 'accuracyI'. | |
15 % (See definitions below). | |
16 % [nanis] (scalar) value given for NaNs: 0 (=FALSE, default), | |
17 % 1 (=TRUE) or NaN (=ignored) | |
18 % | |
19 % sR (struct array) best rule for each component. Each | |
20 % struct has the following fields: | |
21 % .type (string) 'som_rule' | |
22 % .name (string) name of the component | |
23 % .low (scalar) the low end of the rule range | |
24 % .high (scalar) the high end of the rule range | |
25 % .nanis (scalar) how NaNs are handled: NaN, 0 or 1 | |
26 % | |
27 % best (vector) indeces of rules which make the best combined rule | |
28 % sig (vector) significance measure values for each rule, and for the combined rule | |
29 % Cm (matrix) A matrix of vectorized confusion matrices for each rule, | |
30 % and for the combined rule: [a, c, b, d] (see below). | |
31 % | |
32 % For each rule, such rules sR.low <= x < sR.high are found | |
33 % which optimize the given significance measure. The confusion | |
34 % matrix below between the given grouping (G: group - not G: contrast group) | |
35 % and rule (R: true or false) is used to determine the significance values: | |
36 % | |
37 % G not G | |
38 % --------------- accuracy = (a+d) / (a+b+c+d) | |
39 % true | a | b | | |
40 % |-------------- mutuconf = a*a / ((a+b)(a+c)) | |
41 % false | c | d | | |
42 % --------------- accuracyI = a / (a+b+c) | |
43 % | |
44 % See also SOM_DREVAL, SOM_DRTABLE. | |
45 | |
46 % Contributed to SOM Toolbox 2.0, January 7th, 2002 by Juha Vesanto | |
47 % Copyright (c) by Juha Vesanto | |
48 % http://www.cis.hut.fi/projects/somtoolbox/ | |
49 | |
50 % Version 2.0beta juuso 070102 | |
51 | |
52 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
53 %% input arguments | |
54 | |
55 if isstruct(D), | |
56 switch D.type, | |
57 case 'som_data', cn = D.comp_names; D = D.data; | |
58 case 'som_map', cn = D.comp_names; D = D.codebook; | |
59 end | |
60 else | |
61 cn = cell(size(D,2),1); | |
62 for i=1:size(D,2), cn{i} = sprintf('Variable%d',i); end | |
63 end | |
64 | |
65 [dlen,dim] = size(D); | |
66 if nargin<2 | isempty(inds1), inds1 = 1:dlen; end | |
67 if nargin<3 | isempty(inds2), i = ones(dlen,1); i(inds1) = 0; inds2 = find(i); end | |
68 if nargin<4, sigmea = 'mutuconf'; end | |
69 if nargin<5, nanis = 0; end | |
70 | |
71 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
72 %% input arguments | |
73 | |
74 sig = zeros(dim+1,1); | |
75 Cm = zeros(dim+1,4); | |
76 | |
77 sR1tmp = struct('type','som_rule','name','','low',-Inf,'high',Inf,'nanis',nanis,'lowstr','','highstr',''); | |
78 sR = sR1tmp; | |
79 | |
80 % single variable rules | |
81 for i=1:dim, | |
82 | |
83 % bin edges | |
84 mi = min(D(:,i)); | |
85 ma = max(D(:,i)); | |
86 [histcount,bins] = hist([mi,ma],10); | |
87 if size(bins,1)>1, bins = bins'; end | |
88 edges = [-Inf, (bins(1:end-1)+bins(2:end))/2, Inf]; | |
89 | |
90 % find the rule for this variable | |
91 [low,high,s,cm] = onevar_descrule(D(inds1,i),D(inds2,i),sigmea,nanis,edges); | |
92 sR1 = sR1tmp; | |
93 sR1.name = cn{i}; | |
94 sR1.low = low; | |
95 sR1.high = high; | |
96 sR(i) = sR1; | |
97 sig(i) = s; | |
98 Cm(i,:) = cm; | |
99 | |
100 end | |
101 | |
102 % find combined rule | |
103 [dummy,order] = sort(-sig); | |
104 maxsig = sig(order(1)); bestcm = Cm(order(1),:); | |
105 best = order(1); | |
106 for i=2:dim, | |
107 com = [best, order(i)]; | |
108 [s,cm,truex,truey] = som_dreval(sR(com),D(:,com),sigmea,inds1,inds2,'and'); | |
109 if s>maxsig, best = com; maxsig = s; bestcm = cm; end | |
110 end | |
111 sig(end) = maxsig; | |
112 Cm(end,:) = cm; | |
113 | |
114 return; | |
115 | |
116 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%55 | |
117 %% descriptive rules | |
118 | |
119 function [low,high,sig,cm] = onevar_descrule(x,y,sigmea,nanis,edges) | |
120 | |
121 % Given a set of bin edges, find the range of bins with best significance. | |
122 % | |
123 % x data values in cluster | |
124 % y data values not in cluster | |
125 % sigmea significance measure | |
126 % bins bin centers | |
127 % nanis how to handle NaNs | |
128 | |
129 % histogram counts | |
130 if isnan(nanis), x = x(~isnan(x)); y = y(~isnan(y)); end | |
131 [xcount,xbin] = histc(x,edges); | |
132 [ycount,ybin] = histc(y,edges); | |
133 xcount = xcount(1:end-1); | |
134 ycount = ycount(1:end-1); | |
135 xnan=sum(isnan(x)); | |
136 ynan=sum(isnan(y)); | |
137 | |
138 % find number of true items in both groups in all possible ranges | |
139 n = length(xcount); | |
140 V = zeros(n*(n+1)/2,4); | |
141 s1 = cumsum(xcount); | |
142 s2 = cumsum(xcount(end:-1:1)); s2 = s2(end:-1:1); | |
143 m = s1(end); | |
144 Tx = triu(s1(end)-m*log(exp(s1/m)*exp(s2/m)')+repmat(xcount',[n 1])+repmat(xcount,[1 n]),0); | |
145 s1 = cumsum(ycount); | |
146 s2 = cumsum(ycount(end:-1:1)); s2 = s2(end:-1:1); | |
147 Ty = triu(s1(end)-m*log(exp(s1/m)*exp(s2/m)')+repmat(ycount',[n 1])+repmat(ycount,[1 n]),0); | |
148 [i,j] = find(Tx+Ty); | |
149 k = sub2ind(size(Tx),i,j); | |
150 V = [i, j, Tx(k), Ty(k)]; | |
151 tix = V(:,3) + nanis*xnan; | |
152 tiy = V(:,4) + nanis*ynan; | |
153 | |
154 % select the best range | |
155 nix = length(x); | |
156 niy = length(y); | |
157 Cm = [tix,nix-tix,tiy,niy-tiy]; | |
158 [s,k] = max(som_drsignif(sigmea,Cm)); | |
159 | |
160 % output | |
161 low = edges(V(k,1)); | |
162 high = edges(V(k,2)+1); | |
163 sig = s; | |
164 cm = Cm(k,:); | |
165 | |
166 return; | |
167 |