Mercurial > hg > camir-aes2014
diff toolboxes/MIRtoolbox1.3.2/somtoolbox/som_clset.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/toolboxes/MIRtoolbox1.3.2/somtoolbox/som_clset.m Tue Feb 10 15:05:51 2015 +0000 @@ -0,0 +1,278 @@ +function [sC,old2new,newi] = som_clset(sC,action,par1,par2) + +% SOM_CLSET Create and/or set values in the som_clustering struct. +% +% first argument +% sC (struct) a som_clustering struct +% Z (matrix) size nb-1 x 3, as given by LINKAGE function +% base (vector) size dlen x 1, a partitioning of the data +% +% actions +% 'remove' removes the indicated clusters (par1: vector) +% 'add' add a cluster by making a combination of the indicated +% clusters (par1: vector) +% %'move' moves a child cluster (par1: scalar) from a parent to another +% % (par2: vector 1 x 2) +% 'merge' like 'add', followed by removing the indicated clusters (par1: vector) +% %'split' the indicated cluster (par1: scalar) is partitioned into indicated +% % parts (par2: vector), which are then added, while the indicated cluster +% % (par1) is removed +% 'coord' sets the coordinates of base clusters (par1: matrix nb x *), and +% recalculates coordinates of the derived clusters (by averaging base cluster +% coordinates) +% 'color' sets the colors of base clusters (par1: matrix nb x 3), and recalculates +% colors of the derived clusters (as averages of base cluster colors) +% +% sC +% .type (string) 'som_clustering' +% .name (string) Identifier for the clustering. +% .nb (scalar) Number of base clusters in the clustering. +% .base (vector) Size dlen x 1, the basic groups of data +% forming the base clusters, e.g. as a result +% of partitive clustering. Allowed values are +% 1:nb indicating the base cluster +% to which the data belongs to. +% NaN indicating that the data has +% been ignored in the clustering +% .nc (scalar) Number of clusters in the clustering (nb + derived clusters). +% .children (cellarray) size nc x 1, each cell gives the list of indeces +% of child clusters for the cluster +% .parent (vector) size nc x 1, the index of parent of each cluster +% (or zero if the cluster does not have a parent) +% .coord (matrix) size nc x *, visualization coordinates for each cluster +% By default the coordinates are set so that +% the base clusters are ordered on a line, and the +% position of each combined cluster is average of +% the base clusters that constitute it. +% .color (matrix) size nc x 3, color for each cluster. +% By default the colors are set so that the +% base clusters are ordered on a line, +% and then colors are assigned from the 'hsv' +% colormap to the base clusters. The color +% of each combined cluster is average as above. +% .cldist (string) Default cluster distance function. + +inew = []; +if isstruct(sC), + % it should be a som_clustering struct + old2new = [1:sC.nc]; +elseif size(sC,2)==3, + % assume it is a cluster hierarchy matrix Z + sC = Z2sC(sC); + old2new = [1:sC.nc]; +else + % assume it is a partitioning vector + base = sC; + u = unique(base(isfinite(base))); + old2new = sparse(u,1,1:length(u)); + base = old2new(base); + sC = part2sC(base); +end + +switch action, +case 'remove', + for i=1:length(par1), + [sC,o2n] = removecluster(sC,old2new(par1(i))); + old2new = o2n(old2new); + end +case 'add', + [sC,old2new,inew] = addmergedcluster(sC,par1); +case 'move', + % not implemented yet +case 'split', + % not implemented yet +case 'merge', + [sC,old2new,inew] = addmergedcluster(sC,par1); + for i=1:length(par1), + [sC,o2n] = removecluster(sC,old2new(par1(i))); + old2new = o2n(old2new); + end +case 'color', + sC.color = derivative_average(sC,par1); +case 'coord', + sC.coord = derivative_average(sC,par1); +end + +return; + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% subfunctions + +function sC = clstruct(nb,nc) + + sC = struct('type','som_clustering',... + 'name','','base',[],'nb',nb,'nc',nc,... + 'parent',[],'children',[],'coord',[],'color',[],'cldist','centroid'); + sC.base = [1:nb]; + sC.parent = zeros(nc,1); + sC.children = cell(nc,1); sC.children(:) = {[]}; + sC.coord = zeros(nc,2); + sC.color = zeros(nc,3); + return; + +function Z = sC2Z(sC,height) + + if nargin<2, height = 'level'; end + + root = find(sC.parent==0); + order = [root]; + ch = sC.children(root); + while any(ch), i = ch(1); order = [ch(1), order]; ch = [ch(2:end), sC.children{i}]; end + + he = zeros(sC.nc,1); + if strcmp(height,'level'), + ch = sC.children{root}; + while any(ch), + i = ch(1); he(i) = he(sC.parent(i))+1; + ch = [ch(2:end), sC.children{i}]; + end + he = max(he)-he; + elseif strcmp(height,'level2'), + for i=order, if any(sC.children{i}), he(i) = max(he(sC.children{i}))+1; end, end + else + %he = som_cldist ( between children ) + end + + Z = zeros(sC.nb-1,3); + i = sC.nb-1; + inds = root; + while i>0, + ch = sC.children{inds(1)}; h = he(inds(1)); inds = [inds(2:end), ch]; + if length(ch)>=2, + for k=1:length(ch)-2, Z(i,:) = [i-1, ch(k), h]; i = i - 1; end + Z(i,:) = [ch(end-1) ch(end) h]; i = i - 1; + end + end + return; + +function sC = Z2sC(Z) + + nb = size(Z,1)+1; + nc = 2*nb-1; + sC = clstruct(nb,nc); + sC.base = [1:nb]; + for i=1:nc, + j = find(Z(:,1)==i | Z(:,2)==i); + sC.parent(i) = nb+j; + sC.children{sC.parent(i)}(end+1) = i; + end + % coords and color + order = nc; + nonleaves = 1; + while any(nonleaves), + j = nonleaves(1); + ch = sC.children{order(j)}; + if j==1, oleft = []; else oleft = order(1:(j-1)); end + if j==length(order), oright = []; else oright = order((j+1):length(order)); end + order = [oleft, ch, oright]; + nonleaves = find(order>nb); + end + [dummy,co] = sort(order); + sC.coord = derivative_average(sC,co'); + H = hsv(nb+1); + sC.color = derivative_average(sC,H(co,:)); + return; + +function sC = part2sC(part) + + nb = max(part); + nc = nb+1; + sC = clstruct(nb,nc); + sC.base = part; + sC.parent(1:nb) = nc; + sC.children{nc} = [1:nb]; + co = [1:nb]'; + sC.coord = derivative_average(sC,co); + H = hsv(nb+1); + sC.color = derivative_average(sC,H(1:nb,:)); + return; + +function [sC,old2new] = removecluster(sC,ind) + + old2new = [1:sC.nc]; + parent_ind = sC.parent(ind); + ch = sC.children{ind}; + if ~parent_ind, + % trying to remove root cluster - no go + return; + elseif ~any(ch), + % trying to remove a base cluster - no go + return; + else + % ok, proceed + old2new = [1:ind-1 0 ind:sC.nc-1]; + % update parent and child fields + sC.parent(ch) = parent_ind; + sC.children{parent_ind} = setdiff([sC.children{parent_ind}, ch],ind); + % remove old cluster + j = [1:ind-1, ind+1:sC.nc]; + sC.parent = sC.parent(j); + sC.children = sC.children(j); + sC.color = sC.color(j,:); + sC.coord = sC.coord(j,:); + sC.nc = sC.nc-1; + % update old indeces to new indices + sC.parent = old2new(sC.parent); + for i=1:sC.nc, sC.children{i} = old2new(sC.children{i}); end + end + return; + +function [sC,old2new,inew] = addmergedcluster(sC,inds) + + old2new = [1:sC.nc]; + inew = 0; + p_inds = sC.parent(inds); + if ~all(p_inds(1)==p_inds), + % clusters are not siblings - no go + return; + end + parent_ind = p_inds(1); + if isempty(setdiff(sC.children{parent_ind},inds)), + % such a merged cluster exists already + return; + else + % ok, proceed + inew = parent_ind; + old2new = [1:inew-1,inew+1:sC.nc+1]; + % add the new cluster (=copy of parent_ind) + j = [1:inew,inew:sC.nc]; + sC.parent = sC.parent(j); + sC.children = sC.children(j); + sC.color = sC.color(j,:); + sC.coord = sC.coord(j,:); + sC.nc = sC.nc+1; + % update old indeces to new indices + sC.parent = old2new(sC.parent); + for i=1:sC.nc, sC.children{i} = old2new(sC.children{i}); end + inds = old2new(inds); + parent_ind = old2new(parent_ind); + % update parent, child, color and coord fields + sC.parent(inds) = inew; + sC.parent(inew) = parent_ind; + sC.children{inew} = inds; + sC.children{parent_ind} = [setdiff(sC.children{parent_ind}, inds), inew]; + b = baseind(sC,inew); + sC.color(inew,:) = mean(sC.color(b,:)); + sC.coord(inew,:) = mean(sC.coord(b,:)); + end + return; + +function C = derivative_average(sC,Cbase) + + [n dim] = size(Cbase); + if n ~= sC.nb, error('Color / Coord matrix should have nb rows'); end + C = zeros(sC.nc,dim); + for i=1:sC.nc, C(i,:) = mean(Cbase(baseind(sC,i),:)); end + return; + +function bi = baseind(sC,ind) + + bi = [ind]; + i = 1; + while i<=length(bi), bi = [bi, sC.children{bi(i)}]; end + bi = bi(bi<=sC.nb); + return; + + + +