wolffd@0
|
1 function [sC,old2new,newi] = som_clset(sC,action,par1,par2)
|
wolffd@0
|
2
|
wolffd@0
|
3 % SOM_CLSET Create and/or set values in the som_clustering struct.
|
wolffd@0
|
4 %
|
wolffd@0
|
5 % first argument
|
wolffd@0
|
6 % sC (struct) a som_clustering struct
|
wolffd@0
|
7 % Z (matrix) size nb-1 x 3, as given by LINKAGE function
|
wolffd@0
|
8 % base (vector) size dlen x 1, a partitioning of the data
|
wolffd@0
|
9 %
|
wolffd@0
|
10 % actions
|
wolffd@0
|
11 % 'remove' removes the indicated clusters (par1: vector)
|
wolffd@0
|
12 % 'add' add a cluster by making a combination of the indicated
|
wolffd@0
|
13 % clusters (par1: vector)
|
wolffd@0
|
14 % %'move' moves a child cluster (par1: scalar) from a parent to another
|
wolffd@0
|
15 % % (par2: vector 1 x 2)
|
wolffd@0
|
16 % 'merge' like 'add', followed by removing the indicated clusters (par1: vector)
|
wolffd@0
|
17 % %'split' the indicated cluster (par1: scalar) is partitioned into indicated
|
wolffd@0
|
18 % % parts (par2: vector), which are then added, while the indicated cluster
|
wolffd@0
|
19 % % (par1) is removed
|
wolffd@0
|
20 % 'coord' sets the coordinates of base clusters (par1: matrix nb x *), and
|
wolffd@0
|
21 % recalculates coordinates of the derived clusters (by averaging base cluster
|
wolffd@0
|
22 % coordinates)
|
wolffd@0
|
23 % 'color' sets the colors of base clusters (par1: matrix nb x 3), and recalculates
|
wolffd@0
|
24 % colors of the derived clusters (as averages of base cluster colors)
|
wolffd@0
|
25 %
|
wolffd@0
|
26 % sC
|
wolffd@0
|
27 % .type (string) 'som_clustering'
|
wolffd@0
|
28 % .name (string) Identifier for the clustering.
|
wolffd@0
|
29 % .nb (scalar) Number of base clusters in the clustering.
|
wolffd@0
|
30 % .base (vector) Size dlen x 1, the basic groups of data
|
wolffd@0
|
31 % forming the base clusters, e.g. as a result
|
wolffd@0
|
32 % of partitive clustering. Allowed values are
|
wolffd@0
|
33 % 1:nb indicating the base cluster
|
wolffd@0
|
34 % to which the data belongs to.
|
wolffd@0
|
35 % NaN indicating that the data has
|
wolffd@0
|
36 % been ignored in the clustering
|
wolffd@0
|
37 % .nc (scalar) Number of clusters in the clustering (nb + derived clusters).
|
wolffd@0
|
38 % .children (cellarray) size nc x 1, each cell gives the list of indeces
|
wolffd@0
|
39 % of child clusters for the cluster
|
wolffd@0
|
40 % .parent (vector) size nc x 1, the index of parent of each cluster
|
wolffd@0
|
41 % (or zero if the cluster does not have a parent)
|
wolffd@0
|
42 % .coord (matrix) size nc x *, visualization coordinates for each cluster
|
wolffd@0
|
43 % By default the coordinates are set so that
|
wolffd@0
|
44 % the base clusters are ordered on a line, and the
|
wolffd@0
|
45 % position of each combined cluster is average of
|
wolffd@0
|
46 % the base clusters that constitute it.
|
wolffd@0
|
47 % .color (matrix) size nc x 3, color for each cluster.
|
wolffd@0
|
48 % By default the colors are set so that the
|
wolffd@0
|
49 % base clusters are ordered on a line,
|
wolffd@0
|
50 % and then colors are assigned from the 'hsv'
|
wolffd@0
|
51 % colormap to the base clusters. The color
|
wolffd@0
|
52 % of each combined cluster is average as above.
|
wolffd@0
|
53 % .cldist (string) Default cluster distance function.
|
wolffd@0
|
54
|
wolffd@0
|
55 inew = [];
|
wolffd@0
|
56 if isstruct(sC),
|
wolffd@0
|
57 % it should be a som_clustering struct
|
wolffd@0
|
58 old2new = [1:sC.nc];
|
wolffd@0
|
59 elseif size(sC,2)==3,
|
wolffd@0
|
60 % assume it is a cluster hierarchy matrix Z
|
wolffd@0
|
61 sC = Z2sC(sC);
|
wolffd@0
|
62 old2new = [1:sC.nc];
|
wolffd@0
|
63 else
|
wolffd@0
|
64 % assume it is a partitioning vector
|
wolffd@0
|
65 base = sC;
|
wolffd@0
|
66 u = unique(base(isfinite(base)));
|
wolffd@0
|
67 old2new = sparse(u,1,1:length(u));
|
wolffd@0
|
68 base = old2new(base);
|
wolffd@0
|
69 sC = part2sC(base);
|
wolffd@0
|
70 end
|
wolffd@0
|
71
|
wolffd@0
|
72 switch action,
|
wolffd@0
|
73 case 'remove',
|
wolffd@0
|
74 for i=1:length(par1),
|
wolffd@0
|
75 [sC,o2n] = removecluster(sC,old2new(par1(i)));
|
wolffd@0
|
76 old2new = o2n(old2new);
|
wolffd@0
|
77 end
|
wolffd@0
|
78 case 'add',
|
wolffd@0
|
79 [sC,old2new,inew] = addmergedcluster(sC,par1);
|
wolffd@0
|
80 case 'move',
|
wolffd@0
|
81 % not implemented yet
|
wolffd@0
|
82 case 'split',
|
wolffd@0
|
83 % not implemented yet
|
wolffd@0
|
84 case 'merge',
|
wolffd@0
|
85 [sC,old2new,inew] = addmergedcluster(sC,par1);
|
wolffd@0
|
86 for i=1:length(par1),
|
wolffd@0
|
87 [sC,o2n] = removecluster(sC,old2new(par1(i)));
|
wolffd@0
|
88 old2new = o2n(old2new);
|
wolffd@0
|
89 end
|
wolffd@0
|
90 case 'color',
|
wolffd@0
|
91 sC.color = derivative_average(sC,par1);
|
wolffd@0
|
92 case 'coord',
|
wolffd@0
|
93 sC.coord = derivative_average(sC,par1);
|
wolffd@0
|
94 end
|
wolffd@0
|
95
|
wolffd@0
|
96 return;
|
wolffd@0
|
97
|
wolffd@0
|
98 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
wolffd@0
|
99 %% subfunctions
|
wolffd@0
|
100
|
wolffd@0
|
101 function sC = clstruct(nb,nc)
|
wolffd@0
|
102
|
wolffd@0
|
103 sC = struct('type','som_clustering',...
|
wolffd@0
|
104 'name','','base',[],'nb',nb,'nc',nc,...
|
wolffd@0
|
105 'parent',[],'children',[],'coord',[],'color',[],'cldist','centroid');
|
wolffd@0
|
106 sC.base = [1:nb];
|
wolffd@0
|
107 sC.parent = zeros(nc,1);
|
wolffd@0
|
108 sC.children = cell(nc,1); sC.children(:) = {[]};
|
wolffd@0
|
109 sC.coord = zeros(nc,2);
|
wolffd@0
|
110 sC.color = zeros(nc,3);
|
wolffd@0
|
111 return;
|
wolffd@0
|
112
|
wolffd@0
|
113 function Z = sC2Z(sC,height)
|
wolffd@0
|
114
|
wolffd@0
|
115 if nargin<2, height = 'level'; end
|
wolffd@0
|
116
|
wolffd@0
|
117 root = find(sC.parent==0);
|
wolffd@0
|
118 order = [root];
|
wolffd@0
|
119 ch = sC.children(root);
|
wolffd@0
|
120 while any(ch), i = ch(1); order = [ch(1), order]; ch = [ch(2:end), sC.children{i}]; end
|
wolffd@0
|
121
|
wolffd@0
|
122 he = zeros(sC.nc,1);
|
wolffd@0
|
123 if strcmp(height,'level'),
|
wolffd@0
|
124 ch = sC.children{root};
|
wolffd@0
|
125 while any(ch),
|
wolffd@0
|
126 i = ch(1); he(i) = he(sC.parent(i))+1;
|
wolffd@0
|
127 ch = [ch(2:end), sC.children{i}];
|
wolffd@0
|
128 end
|
wolffd@0
|
129 he = max(he)-he;
|
wolffd@0
|
130 elseif strcmp(height,'level2'),
|
wolffd@0
|
131 for i=order, if any(sC.children{i}), he(i) = max(he(sC.children{i}))+1; end, end
|
wolffd@0
|
132 else
|
wolffd@0
|
133 %he = som_cldist ( between children )
|
wolffd@0
|
134 end
|
wolffd@0
|
135
|
wolffd@0
|
136 Z = zeros(sC.nb-1,3);
|
wolffd@0
|
137 i = sC.nb-1;
|
wolffd@0
|
138 inds = root;
|
wolffd@0
|
139 while i>0,
|
wolffd@0
|
140 ch = sC.children{inds(1)}; h = he(inds(1)); inds = [inds(2:end), ch];
|
wolffd@0
|
141 if length(ch)>=2,
|
wolffd@0
|
142 for k=1:length(ch)-2, Z(i,:) = [i-1, ch(k), h]; i = i - 1; end
|
wolffd@0
|
143 Z(i,:) = [ch(end-1) ch(end) h]; i = i - 1;
|
wolffd@0
|
144 end
|
wolffd@0
|
145 end
|
wolffd@0
|
146 return;
|
wolffd@0
|
147
|
wolffd@0
|
148 function sC = Z2sC(Z)
|
wolffd@0
|
149
|
wolffd@0
|
150 nb = size(Z,1)+1;
|
wolffd@0
|
151 nc = 2*nb-1;
|
wolffd@0
|
152 sC = clstruct(nb,nc);
|
wolffd@0
|
153 sC.base = [1:nb];
|
wolffd@0
|
154 for i=1:nc,
|
wolffd@0
|
155 j = find(Z(:,1)==i | Z(:,2)==i);
|
wolffd@0
|
156 sC.parent(i) = nb+j;
|
wolffd@0
|
157 sC.children{sC.parent(i)}(end+1) = i;
|
wolffd@0
|
158 end
|
wolffd@0
|
159 % coords and color
|
wolffd@0
|
160 order = nc;
|
wolffd@0
|
161 nonleaves = 1;
|
wolffd@0
|
162 while any(nonleaves),
|
wolffd@0
|
163 j = nonleaves(1);
|
wolffd@0
|
164 ch = sC.children{order(j)};
|
wolffd@0
|
165 if j==1, oleft = []; else oleft = order(1:(j-1)); end
|
wolffd@0
|
166 if j==length(order), oright = []; else oright = order((j+1):length(order)); end
|
wolffd@0
|
167 order = [oleft, ch, oright];
|
wolffd@0
|
168 nonleaves = find(order>nb);
|
wolffd@0
|
169 end
|
wolffd@0
|
170 [dummy,co] = sort(order);
|
wolffd@0
|
171 sC.coord = derivative_average(sC,co');
|
wolffd@0
|
172 H = hsv(nb+1);
|
wolffd@0
|
173 sC.color = derivative_average(sC,H(co,:));
|
wolffd@0
|
174 return;
|
wolffd@0
|
175
|
wolffd@0
|
176 function sC = part2sC(part)
|
wolffd@0
|
177
|
wolffd@0
|
178 nb = max(part);
|
wolffd@0
|
179 nc = nb+1;
|
wolffd@0
|
180 sC = clstruct(nb,nc);
|
wolffd@0
|
181 sC.base = part;
|
wolffd@0
|
182 sC.parent(1:nb) = nc;
|
wolffd@0
|
183 sC.children{nc} = [1:nb];
|
wolffd@0
|
184 co = [1:nb]';
|
wolffd@0
|
185 sC.coord = derivative_average(sC,co);
|
wolffd@0
|
186 H = hsv(nb+1);
|
wolffd@0
|
187 sC.color = derivative_average(sC,H(1:nb,:));
|
wolffd@0
|
188 return;
|
wolffd@0
|
189
|
wolffd@0
|
190 function [sC,old2new] = removecluster(sC,ind)
|
wolffd@0
|
191
|
wolffd@0
|
192 old2new = [1:sC.nc];
|
wolffd@0
|
193 parent_ind = sC.parent(ind);
|
wolffd@0
|
194 ch = sC.children{ind};
|
wolffd@0
|
195 if ~parent_ind,
|
wolffd@0
|
196 % trying to remove root cluster - no go
|
wolffd@0
|
197 return;
|
wolffd@0
|
198 elseif ~any(ch),
|
wolffd@0
|
199 % trying to remove a base cluster - no go
|
wolffd@0
|
200 return;
|
wolffd@0
|
201 else
|
wolffd@0
|
202 % ok, proceed
|
wolffd@0
|
203 old2new = [1:ind-1 0 ind:sC.nc-1];
|
wolffd@0
|
204 % update parent and child fields
|
wolffd@0
|
205 sC.parent(ch) = parent_ind;
|
wolffd@0
|
206 sC.children{parent_ind} = setdiff([sC.children{parent_ind}, ch],ind);
|
wolffd@0
|
207 % remove old cluster
|
wolffd@0
|
208 j = [1:ind-1, ind+1:sC.nc];
|
wolffd@0
|
209 sC.parent = sC.parent(j);
|
wolffd@0
|
210 sC.children = sC.children(j);
|
wolffd@0
|
211 sC.color = sC.color(j,:);
|
wolffd@0
|
212 sC.coord = sC.coord(j,:);
|
wolffd@0
|
213 sC.nc = sC.nc-1;
|
wolffd@0
|
214 % update old indeces to new indices
|
wolffd@0
|
215 sC.parent = old2new(sC.parent);
|
wolffd@0
|
216 for i=1:sC.nc, sC.children{i} = old2new(sC.children{i}); end
|
wolffd@0
|
217 end
|
wolffd@0
|
218 return;
|
wolffd@0
|
219
|
wolffd@0
|
220 function [sC,old2new,inew] = addmergedcluster(sC,inds)
|
wolffd@0
|
221
|
wolffd@0
|
222 old2new = [1:sC.nc];
|
wolffd@0
|
223 inew = 0;
|
wolffd@0
|
224 p_inds = sC.parent(inds);
|
wolffd@0
|
225 if ~all(p_inds(1)==p_inds),
|
wolffd@0
|
226 % clusters are not siblings - no go
|
wolffd@0
|
227 return;
|
wolffd@0
|
228 end
|
wolffd@0
|
229 parent_ind = p_inds(1);
|
wolffd@0
|
230 if isempty(setdiff(sC.children{parent_ind},inds)),
|
wolffd@0
|
231 % such a merged cluster exists already
|
wolffd@0
|
232 return;
|
wolffd@0
|
233 else
|
wolffd@0
|
234 % ok, proceed
|
wolffd@0
|
235 inew = parent_ind;
|
wolffd@0
|
236 old2new = [1:inew-1,inew+1:sC.nc+1];
|
wolffd@0
|
237 % add the new cluster (=copy of parent_ind)
|
wolffd@0
|
238 j = [1:inew,inew:sC.nc];
|
wolffd@0
|
239 sC.parent = sC.parent(j);
|
wolffd@0
|
240 sC.children = sC.children(j);
|
wolffd@0
|
241 sC.color = sC.color(j,:);
|
wolffd@0
|
242 sC.coord = sC.coord(j,:);
|
wolffd@0
|
243 sC.nc = sC.nc+1;
|
wolffd@0
|
244 % update old indeces to new indices
|
wolffd@0
|
245 sC.parent = old2new(sC.parent);
|
wolffd@0
|
246 for i=1:sC.nc, sC.children{i} = old2new(sC.children{i}); end
|
wolffd@0
|
247 inds = old2new(inds);
|
wolffd@0
|
248 parent_ind = old2new(parent_ind);
|
wolffd@0
|
249 % update parent, child, color and coord fields
|
wolffd@0
|
250 sC.parent(inds) = inew;
|
wolffd@0
|
251 sC.parent(inew) = parent_ind;
|
wolffd@0
|
252 sC.children{inew} = inds;
|
wolffd@0
|
253 sC.children{parent_ind} = [setdiff(sC.children{parent_ind}, inds), inew];
|
wolffd@0
|
254 b = baseind(sC,inew);
|
wolffd@0
|
255 sC.color(inew,:) = mean(sC.color(b,:));
|
wolffd@0
|
256 sC.coord(inew,:) = mean(sC.coord(b,:));
|
wolffd@0
|
257 end
|
wolffd@0
|
258 return;
|
wolffd@0
|
259
|
wolffd@0
|
260 function C = derivative_average(sC,Cbase)
|
wolffd@0
|
261
|
wolffd@0
|
262 [n dim] = size(Cbase);
|
wolffd@0
|
263 if n ~= sC.nb, error('Color / Coord matrix should have nb rows'); end
|
wolffd@0
|
264 C = zeros(sC.nc,dim);
|
wolffd@0
|
265 for i=1:sC.nc, C(i,:) = mean(Cbase(baseind(sC,i),:)); end
|
wolffd@0
|
266 return;
|
wolffd@0
|
267
|
wolffd@0
|
268 function bi = baseind(sC,ind)
|
wolffd@0
|
269
|
wolffd@0
|
270 bi = [ind];
|
wolffd@0
|
271 i = 1;
|
wolffd@0
|
272 while i<=length(bi), bi = [bi, sC.children{bi(i)}]; end
|
wolffd@0
|
273 bi = bi(bi<=sC.nb);
|
wolffd@0
|
274 return;
|
wolffd@0
|
275
|
wolffd@0
|
276
|
wolffd@0
|
277
|
wolffd@0
|
278
|