annotate toolboxes/MIRtoolbox1.3.2/somtoolbox/som_clset.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
rev   line source
wolffd@0 1 function [sC,old2new,newi] = som_clset(sC,action,par1,par2)
wolffd@0 2
wolffd@0 3 % SOM_CLSET Create and/or set values in the som_clustering struct.
wolffd@0 4 %
wolffd@0 5 % first argument
wolffd@0 6 % sC (struct) a som_clustering struct
wolffd@0 7 % Z (matrix) size nb-1 x 3, as given by LINKAGE function
wolffd@0 8 % base (vector) size dlen x 1, a partitioning of the data
wolffd@0 9 %
wolffd@0 10 % actions
wolffd@0 11 % 'remove' removes the indicated clusters (par1: vector)
wolffd@0 12 % 'add' add a cluster by making a combination of the indicated
wolffd@0 13 % clusters (par1: vector)
wolffd@0 14 % %'move' moves a child cluster (par1: scalar) from a parent to another
wolffd@0 15 % % (par2: vector 1 x 2)
wolffd@0 16 % 'merge' like 'add', followed by removing the indicated clusters (par1: vector)
wolffd@0 17 % %'split' the indicated cluster (par1: scalar) is partitioned into indicated
wolffd@0 18 % % parts (par2: vector), which are then added, while the indicated cluster
wolffd@0 19 % % (par1) is removed
wolffd@0 20 % 'coord' sets the coordinates of base clusters (par1: matrix nb x *), and
wolffd@0 21 % recalculates coordinates of the derived clusters (by averaging base cluster
wolffd@0 22 % coordinates)
wolffd@0 23 % 'color' sets the colors of base clusters (par1: matrix nb x 3), and recalculates
wolffd@0 24 % colors of the derived clusters (as averages of base cluster colors)
wolffd@0 25 %
wolffd@0 26 % sC
wolffd@0 27 % .type (string) 'som_clustering'
wolffd@0 28 % .name (string) Identifier for the clustering.
wolffd@0 29 % .nb (scalar) Number of base clusters in the clustering.
wolffd@0 30 % .base (vector) Size dlen x 1, the basic groups of data
wolffd@0 31 % forming the base clusters, e.g. as a result
wolffd@0 32 % of partitive clustering. Allowed values are
wolffd@0 33 % 1:nb indicating the base cluster
wolffd@0 34 % to which the data belongs to.
wolffd@0 35 % NaN indicating that the data has
wolffd@0 36 % been ignored in the clustering
wolffd@0 37 % .nc (scalar) Number of clusters in the clustering (nb + derived clusters).
wolffd@0 38 % .children (cellarray) size nc x 1, each cell gives the list of indeces
wolffd@0 39 % of child clusters for the cluster
wolffd@0 40 % .parent (vector) size nc x 1, the index of parent of each cluster
wolffd@0 41 % (or zero if the cluster does not have a parent)
wolffd@0 42 % .coord (matrix) size nc x *, visualization coordinates for each cluster
wolffd@0 43 % By default the coordinates are set so that
wolffd@0 44 % the base clusters are ordered on a line, and the
wolffd@0 45 % position of each combined cluster is average of
wolffd@0 46 % the base clusters that constitute it.
wolffd@0 47 % .color (matrix) size nc x 3, color for each cluster.
wolffd@0 48 % By default the colors are set so that the
wolffd@0 49 % base clusters are ordered on a line,
wolffd@0 50 % and then colors are assigned from the 'hsv'
wolffd@0 51 % colormap to the base clusters. The color
wolffd@0 52 % of each combined cluster is average as above.
wolffd@0 53 % .cldist (string) Default cluster distance function.
wolffd@0 54
wolffd@0 55 inew = [];
wolffd@0 56 if isstruct(sC),
wolffd@0 57 % it should be a som_clustering struct
wolffd@0 58 old2new = [1:sC.nc];
wolffd@0 59 elseif size(sC,2)==3,
wolffd@0 60 % assume it is a cluster hierarchy matrix Z
wolffd@0 61 sC = Z2sC(sC);
wolffd@0 62 old2new = [1:sC.nc];
wolffd@0 63 else
wolffd@0 64 % assume it is a partitioning vector
wolffd@0 65 base = sC;
wolffd@0 66 u = unique(base(isfinite(base)));
wolffd@0 67 old2new = sparse(u,1,1:length(u));
wolffd@0 68 base = old2new(base);
wolffd@0 69 sC = part2sC(base);
wolffd@0 70 end
wolffd@0 71
wolffd@0 72 switch action,
wolffd@0 73 case 'remove',
wolffd@0 74 for i=1:length(par1),
wolffd@0 75 [sC,o2n] = removecluster(sC,old2new(par1(i)));
wolffd@0 76 old2new = o2n(old2new);
wolffd@0 77 end
wolffd@0 78 case 'add',
wolffd@0 79 [sC,old2new,inew] = addmergedcluster(sC,par1);
wolffd@0 80 case 'move',
wolffd@0 81 % not implemented yet
wolffd@0 82 case 'split',
wolffd@0 83 % not implemented yet
wolffd@0 84 case 'merge',
wolffd@0 85 [sC,old2new,inew] = addmergedcluster(sC,par1);
wolffd@0 86 for i=1:length(par1),
wolffd@0 87 [sC,o2n] = removecluster(sC,old2new(par1(i)));
wolffd@0 88 old2new = o2n(old2new);
wolffd@0 89 end
wolffd@0 90 case 'color',
wolffd@0 91 sC.color = derivative_average(sC,par1);
wolffd@0 92 case 'coord',
wolffd@0 93 sC.coord = derivative_average(sC,par1);
wolffd@0 94 end
wolffd@0 95
wolffd@0 96 return;
wolffd@0 97
wolffd@0 98 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
wolffd@0 99 %% subfunctions
wolffd@0 100
wolffd@0 101 function sC = clstruct(nb,nc)
wolffd@0 102
wolffd@0 103 sC = struct('type','som_clustering',...
wolffd@0 104 'name','','base',[],'nb',nb,'nc',nc,...
wolffd@0 105 'parent',[],'children',[],'coord',[],'color',[],'cldist','centroid');
wolffd@0 106 sC.base = [1:nb];
wolffd@0 107 sC.parent = zeros(nc,1);
wolffd@0 108 sC.children = cell(nc,1); sC.children(:) = {[]};
wolffd@0 109 sC.coord = zeros(nc,2);
wolffd@0 110 sC.color = zeros(nc,3);
wolffd@0 111 return;
wolffd@0 112
wolffd@0 113 function Z = sC2Z(sC,height)
wolffd@0 114
wolffd@0 115 if nargin<2, height = 'level'; end
wolffd@0 116
wolffd@0 117 root = find(sC.parent==0);
wolffd@0 118 order = [root];
wolffd@0 119 ch = sC.children(root);
wolffd@0 120 while any(ch), i = ch(1); order = [ch(1), order]; ch = [ch(2:end), sC.children{i}]; end
wolffd@0 121
wolffd@0 122 he = zeros(sC.nc,1);
wolffd@0 123 if strcmp(height,'level'),
wolffd@0 124 ch = sC.children{root};
wolffd@0 125 while any(ch),
wolffd@0 126 i = ch(1); he(i) = he(sC.parent(i))+1;
wolffd@0 127 ch = [ch(2:end), sC.children{i}];
wolffd@0 128 end
wolffd@0 129 he = max(he)-he;
wolffd@0 130 elseif strcmp(height,'level2'),
wolffd@0 131 for i=order, if any(sC.children{i}), he(i) = max(he(sC.children{i}))+1; end, end
wolffd@0 132 else
wolffd@0 133 %he = som_cldist ( between children )
wolffd@0 134 end
wolffd@0 135
wolffd@0 136 Z = zeros(sC.nb-1,3);
wolffd@0 137 i = sC.nb-1;
wolffd@0 138 inds = root;
wolffd@0 139 while i>0,
wolffd@0 140 ch = sC.children{inds(1)}; h = he(inds(1)); inds = [inds(2:end), ch];
wolffd@0 141 if length(ch)>=2,
wolffd@0 142 for k=1:length(ch)-2, Z(i,:) = [i-1, ch(k), h]; i = i - 1; end
wolffd@0 143 Z(i,:) = [ch(end-1) ch(end) h]; i = i - 1;
wolffd@0 144 end
wolffd@0 145 end
wolffd@0 146 return;
wolffd@0 147
wolffd@0 148 function sC = Z2sC(Z)
wolffd@0 149
wolffd@0 150 nb = size(Z,1)+1;
wolffd@0 151 nc = 2*nb-1;
wolffd@0 152 sC = clstruct(nb,nc);
wolffd@0 153 sC.base = [1:nb];
wolffd@0 154 for i=1:nc,
wolffd@0 155 j = find(Z(:,1)==i | Z(:,2)==i);
wolffd@0 156 sC.parent(i) = nb+j;
wolffd@0 157 sC.children{sC.parent(i)}(end+1) = i;
wolffd@0 158 end
wolffd@0 159 % coords and color
wolffd@0 160 order = nc;
wolffd@0 161 nonleaves = 1;
wolffd@0 162 while any(nonleaves),
wolffd@0 163 j = nonleaves(1);
wolffd@0 164 ch = sC.children{order(j)};
wolffd@0 165 if j==1, oleft = []; else oleft = order(1:(j-1)); end
wolffd@0 166 if j==length(order), oright = []; else oright = order((j+1):length(order)); end
wolffd@0 167 order = [oleft, ch, oright];
wolffd@0 168 nonleaves = find(order>nb);
wolffd@0 169 end
wolffd@0 170 [dummy,co] = sort(order);
wolffd@0 171 sC.coord = derivative_average(sC,co');
wolffd@0 172 H = hsv(nb+1);
wolffd@0 173 sC.color = derivative_average(sC,H(co,:));
wolffd@0 174 return;
wolffd@0 175
wolffd@0 176 function sC = part2sC(part)
wolffd@0 177
wolffd@0 178 nb = max(part);
wolffd@0 179 nc = nb+1;
wolffd@0 180 sC = clstruct(nb,nc);
wolffd@0 181 sC.base = part;
wolffd@0 182 sC.parent(1:nb) = nc;
wolffd@0 183 sC.children{nc} = [1:nb];
wolffd@0 184 co = [1:nb]';
wolffd@0 185 sC.coord = derivative_average(sC,co);
wolffd@0 186 H = hsv(nb+1);
wolffd@0 187 sC.color = derivative_average(sC,H(1:nb,:));
wolffd@0 188 return;
wolffd@0 189
wolffd@0 190 function [sC,old2new] = removecluster(sC,ind)
wolffd@0 191
wolffd@0 192 old2new = [1:sC.nc];
wolffd@0 193 parent_ind = sC.parent(ind);
wolffd@0 194 ch = sC.children{ind};
wolffd@0 195 if ~parent_ind,
wolffd@0 196 % trying to remove root cluster - no go
wolffd@0 197 return;
wolffd@0 198 elseif ~any(ch),
wolffd@0 199 % trying to remove a base cluster - no go
wolffd@0 200 return;
wolffd@0 201 else
wolffd@0 202 % ok, proceed
wolffd@0 203 old2new = [1:ind-1 0 ind:sC.nc-1];
wolffd@0 204 % update parent and child fields
wolffd@0 205 sC.parent(ch) = parent_ind;
wolffd@0 206 sC.children{parent_ind} = setdiff([sC.children{parent_ind}, ch],ind);
wolffd@0 207 % remove old cluster
wolffd@0 208 j = [1:ind-1, ind+1:sC.nc];
wolffd@0 209 sC.parent = sC.parent(j);
wolffd@0 210 sC.children = sC.children(j);
wolffd@0 211 sC.color = sC.color(j,:);
wolffd@0 212 sC.coord = sC.coord(j,:);
wolffd@0 213 sC.nc = sC.nc-1;
wolffd@0 214 % update old indeces to new indices
wolffd@0 215 sC.parent = old2new(sC.parent);
wolffd@0 216 for i=1:sC.nc, sC.children{i} = old2new(sC.children{i}); end
wolffd@0 217 end
wolffd@0 218 return;
wolffd@0 219
wolffd@0 220 function [sC,old2new,inew] = addmergedcluster(sC,inds)
wolffd@0 221
wolffd@0 222 old2new = [1:sC.nc];
wolffd@0 223 inew = 0;
wolffd@0 224 p_inds = sC.parent(inds);
wolffd@0 225 if ~all(p_inds(1)==p_inds),
wolffd@0 226 % clusters are not siblings - no go
wolffd@0 227 return;
wolffd@0 228 end
wolffd@0 229 parent_ind = p_inds(1);
wolffd@0 230 if isempty(setdiff(sC.children{parent_ind},inds)),
wolffd@0 231 % such a merged cluster exists already
wolffd@0 232 return;
wolffd@0 233 else
wolffd@0 234 % ok, proceed
wolffd@0 235 inew = parent_ind;
wolffd@0 236 old2new = [1:inew-1,inew+1:sC.nc+1];
wolffd@0 237 % add the new cluster (=copy of parent_ind)
wolffd@0 238 j = [1:inew,inew:sC.nc];
wolffd@0 239 sC.parent = sC.parent(j);
wolffd@0 240 sC.children = sC.children(j);
wolffd@0 241 sC.color = sC.color(j,:);
wolffd@0 242 sC.coord = sC.coord(j,:);
wolffd@0 243 sC.nc = sC.nc+1;
wolffd@0 244 % update old indeces to new indices
wolffd@0 245 sC.parent = old2new(sC.parent);
wolffd@0 246 for i=1:sC.nc, sC.children{i} = old2new(sC.children{i}); end
wolffd@0 247 inds = old2new(inds);
wolffd@0 248 parent_ind = old2new(parent_ind);
wolffd@0 249 % update parent, child, color and coord fields
wolffd@0 250 sC.parent(inds) = inew;
wolffd@0 251 sC.parent(inew) = parent_ind;
wolffd@0 252 sC.children{inew} = inds;
wolffd@0 253 sC.children{parent_ind} = [setdiff(sC.children{parent_ind}, inds), inew];
wolffd@0 254 b = baseind(sC,inew);
wolffd@0 255 sC.color(inew,:) = mean(sC.color(b,:));
wolffd@0 256 sC.coord(inew,:) = mean(sC.coord(b,:));
wolffd@0 257 end
wolffd@0 258 return;
wolffd@0 259
wolffd@0 260 function C = derivative_average(sC,Cbase)
wolffd@0 261
wolffd@0 262 [n dim] = size(Cbase);
wolffd@0 263 if n ~= sC.nb, error('Color / Coord matrix should have nb rows'); end
wolffd@0 264 C = zeros(sC.nc,dim);
wolffd@0 265 for i=1:sC.nc, C(i,:) = mean(Cbase(baseind(sC,i),:)); end
wolffd@0 266 return;
wolffd@0 267
wolffd@0 268 function bi = baseind(sC,ind)
wolffd@0 269
wolffd@0 270 bi = [ind];
wolffd@0 271 i = 1;
wolffd@0 272 while i<=length(bi), bi = [bi, sC.children{bi(i)}]; end
wolffd@0 273 bi = bi(bi<=sC.nb);
wolffd@0 274 return;
wolffd@0 275
wolffd@0 276
wolffd@0 277
wolffd@0 278