wolffd@0: function [sC,old2new,newi] = som_clset(sC,action,par1,par2) wolffd@0: wolffd@0: % SOM_CLSET Create and/or set values in the som_clustering struct. wolffd@0: % wolffd@0: % first argument wolffd@0: % sC (struct) a som_clustering struct wolffd@0: % Z (matrix) size nb-1 x 3, as given by LINKAGE function wolffd@0: % base (vector) size dlen x 1, a partitioning of the data wolffd@0: % wolffd@0: % actions wolffd@0: % 'remove' removes the indicated clusters (par1: vector) wolffd@0: % 'add' add a cluster by making a combination of the indicated wolffd@0: % clusters (par1: vector) wolffd@0: % %'move' moves a child cluster (par1: scalar) from a parent to another wolffd@0: % % (par2: vector 1 x 2) wolffd@0: % 'merge' like 'add', followed by removing the indicated clusters (par1: vector) wolffd@0: % %'split' the indicated cluster (par1: scalar) is partitioned into indicated wolffd@0: % % parts (par2: vector), which are then added, while the indicated cluster wolffd@0: % % (par1) is removed wolffd@0: % 'coord' sets the coordinates of base clusters (par1: matrix nb x *), and wolffd@0: % recalculates coordinates of the derived clusters (by averaging base cluster wolffd@0: % coordinates) wolffd@0: % 'color' sets the colors of base clusters (par1: matrix nb x 3), and recalculates wolffd@0: % colors of the derived clusters (as averages of base cluster colors) wolffd@0: % wolffd@0: % sC wolffd@0: % .type (string) 'som_clustering' wolffd@0: % .name (string) Identifier for the clustering. wolffd@0: % .nb (scalar) Number of base clusters in the clustering. wolffd@0: % .base (vector) Size dlen x 1, the basic groups of data wolffd@0: % forming the base clusters, e.g. as a result wolffd@0: % of partitive clustering. Allowed values are wolffd@0: % 1:nb indicating the base cluster wolffd@0: % to which the data belongs to. wolffd@0: % NaN indicating that the data has wolffd@0: % been ignored in the clustering wolffd@0: % .nc (scalar) Number of clusters in the clustering (nb + derived clusters). wolffd@0: % .children (cellarray) size nc x 1, each cell gives the list of indeces wolffd@0: % of child clusters for the cluster wolffd@0: % .parent (vector) size nc x 1, the index of parent of each cluster wolffd@0: % (or zero if the cluster does not have a parent) wolffd@0: % .coord (matrix) size nc x *, visualization coordinates for each cluster wolffd@0: % By default the coordinates are set so that wolffd@0: % the base clusters are ordered on a line, and the wolffd@0: % position of each combined cluster is average of wolffd@0: % the base clusters that constitute it. wolffd@0: % .color (matrix) size nc x 3, color for each cluster. wolffd@0: % By default the colors are set so that the wolffd@0: % base clusters are ordered on a line, wolffd@0: % and then colors are assigned from the 'hsv' wolffd@0: % colormap to the base clusters. The color wolffd@0: % of each combined cluster is average as above. wolffd@0: % .cldist (string) Default cluster distance function. wolffd@0: wolffd@0: inew = []; wolffd@0: if isstruct(sC), wolffd@0: % it should be a som_clustering struct wolffd@0: old2new = [1:sC.nc]; wolffd@0: elseif size(sC,2)==3, wolffd@0: % assume it is a cluster hierarchy matrix Z wolffd@0: sC = Z2sC(sC); wolffd@0: old2new = [1:sC.nc]; wolffd@0: else wolffd@0: % assume it is a partitioning vector wolffd@0: base = sC; wolffd@0: u = unique(base(isfinite(base))); wolffd@0: old2new = sparse(u,1,1:length(u)); wolffd@0: base = old2new(base); wolffd@0: sC = part2sC(base); wolffd@0: end wolffd@0: wolffd@0: switch action, wolffd@0: case 'remove', wolffd@0: for i=1:length(par1), wolffd@0: [sC,o2n] = removecluster(sC,old2new(par1(i))); wolffd@0: old2new = o2n(old2new); wolffd@0: end wolffd@0: case 'add', wolffd@0: [sC,old2new,inew] = addmergedcluster(sC,par1); wolffd@0: case 'move', wolffd@0: % not implemented yet wolffd@0: case 'split', wolffd@0: % not implemented yet wolffd@0: case 'merge', wolffd@0: [sC,old2new,inew] = addmergedcluster(sC,par1); wolffd@0: for i=1:length(par1), wolffd@0: [sC,o2n] = removecluster(sC,old2new(par1(i))); wolffd@0: old2new = o2n(old2new); wolffd@0: end wolffd@0: case 'color', wolffd@0: sC.color = derivative_average(sC,par1); wolffd@0: case 'coord', wolffd@0: sC.coord = derivative_average(sC,par1); wolffd@0: end wolffd@0: wolffd@0: return; wolffd@0: wolffd@0: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% wolffd@0: %% subfunctions wolffd@0: wolffd@0: function sC = clstruct(nb,nc) wolffd@0: wolffd@0: sC = struct('type','som_clustering',... wolffd@0: 'name','','base',[],'nb',nb,'nc',nc,... wolffd@0: 'parent',[],'children',[],'coord',[],'color',[],'cldist','centroid'); wolffd@0: sC.base = [1:nb]; wolffd@0: sC.parent = zeros(nc,1); wolffd@0: sC.children = cell(nc,1); sC.children(:) = {[]}; wolffd@0: sC.coord = zeros(nc,2); wolffd@0: sC.color = zeros(nc,3); wolffd@0: return; wolffd@0: wolffd@0: function Z = sC2Z(sC,height) wolffd@0: wolffd@0: if nargin<2, height = 'level'; end wolffd@0: wolffd@0: root = find(sC.parent==0); wolffd@0: order = [root]; wolffd@0: ch = sC.children(root); wolffd@0: while any(ch), i = ch(1); order = [ch(1), order]; ch = [ch(2:end), sC.children{i}]; end wolffd@0: wolffd@0: he = zeros(sC.nc,1); wolffd@0: if strcmp(height,'level'), wolffd@0: ch = sC.children{root}; wolffd@0: while any(ch), wolffd@0: i = ch(1); he(i) = he(sC.parent(i))+1; wolffd@0: ch = [ch(2:end), sC.children{i}]; wolffd@0: end wolffd@0: he = max(he)-he; wolffd@0: elseif strcmp(height,'level2'), wolffd@0: for i=order, if any(sC.children{i}), he(i) = max(he(sC.children{i}))+1; end, end wolffd@0: else wolffd@0: %he = som_cldist ( between children ) wolffd@0: end wolffd@0: wolffd@0: Z = zeros(sC.nb-1,3); wolffd@0: i = sC.nb-1; wolffd@0: inds = root; wolffd@0: while i>0, wolffd@0: ch = sC.children{inds(1)}; h = he(inds(1)); inds = [inds(2:end), ch]; wolffd@0: if length(ch)>=2, wolffd@0: for k=1:length(ch)-2, Z(i,:) = [i-1, ch(k), h]; i = i - 1; end wolffd@0: Z(i,:) = [ch(end-1) ch(end) h]; i = i - 1; wolffd@0: end wolffd@0: end wolffd@0: return; wolffd@0: wolffd@0: function sC = Z2sC(Z) wolffd@0: wolffd@0: nb = size(Z,1)+1; wolffd@0: nc = 2*nb-1; wolffd@0: sC = clstruct(nb,nc); wolffd@0: sC.base = [1:nb]; wolffd@0: for i=1:nc, wolffd@0: j = find(Z(:,1)==i | Z(:,2)==i); wolffd@0: sC.parent(i) = nb+j; wolffd@0: sC.children{sC.parent(i)}(end+1) = i; wolffd@0: end wolffd@0: % coords and color wolffd@0: order = nc; wolffd@0: nonleaves = 1; wolffd@0: while any(nonleaves), wolffd@0: j = nonleaves(1); wolffd@0: ch = sC.children{order(j)}; wolffd@0: if j==1, oleft = []; else oleft = order(1:(j-1)); end wolffd@0: if j==length(order), oright = []; else oright = order((j+1):length(order)); end wolffd@0: order = [oleft, ch, oright]; wolffd@0: nonleaves = find(order>nb); wolffd@0: end wolffd@0: [dummy,co] = sort(order); wolffd@0: sC.coord = derivative_average(sC,co'); wolffd@0: H = hsv(nb+1); wolffd@0: sC.color = derivative_average(sC,H(co,:)); wolffd@0: return; wolffd@0: wolffd@0: function sC = part2sC(part) wolffd@0: wolffd@0: nb = max(part); wolffd@0: nc = nb+1; wolffd@0: sC = clstruct(nb,nc); wolffd@0: sC.base = part; wolffd@0: sC.parent(1:nb) = nc; wolffd@0: sC.children{nc} = [1:nb]; wolffd@0: co = [1:nb]'; wolffd@0: sC.coord = derivative_average(sC,co); wolffd@0: H = hsv(nb+1); wolffd@0: sC.color = derivative_average(sC,H(1:nb,:)); wolffd@0: return; wolffd@0: wolffd@0: function [sC,old2new] = removecluster(sC,ind) wolffd@0: wolffd@0: old2new = [1:sC.nc]; wolffd@0: parent_ind = sC.parent(ind); wolffd@0: ch = sC.children{ind}; wolffd@0: if ~parent_ind, wolffd@0: % trying to remove root cluster - no go wolffd@0: return; wolffd@0: elseif ~any(ch), wolffd@0: % trying to remove a base cluster - no go wolffd@0: return; wolffd@0: else wolffd@0: % ok, proceed wolffd@0: old2new = [1:ind-1 0 ind:sC.nc-1]; wolffd@0: % update parent and child fields wolffd@0: sC.parent(ch) = parent_ind; wolffd@0: sC.children{parent_ind} = setdiff([sC.children{parent_ind}, ch],ind); wolffd@0: % remove old cluster wolffd@0: j = [1:ind-1, ind+1:sC.nc]; wolffd@0: sC.parent = sC.parent(j); wolffd@0: sC.children = sC.children(j); wolffd@0: sC.color = sC.color(j,:); wolffd@0: sC.coord = sC.coord(j,:); wolffd@0: sC.nc = sC.nc-1; wolffd@0: % update old indeces to new indices wolffd@0: sC.parent = old2new(sC.parent); wolffd@0: for i=1:sC.nc, sC.children{i} = old2new(sC.children{i}); end wolffd@0: end wolffd@0: return; wolffd@0: wolffd@0: function [sC,old2new,inew] = addmergedcluster(sC,inds) wolffd@0: wolffd@0: old2new = [1:sC.nc]; wolffd@0: inew = 0; wolffd@0: p_inds = sC.parent(inds); wolffd@0: if ~all(p_inds(1)==p_inds), wolffd@0: % clusters are not siblings - no go wolffd@0: return; wolffd@0: end wolffd@0: parent_ind = p_inds(1); wolffd@0: if isempty(setdiff(sC.children{parent_ind},inds)), wolffd@0: % such a merged cluster exists already wolffd@0: return; wolffd@0: else wolffd@0: % ok, proceed wolffd@0: inew = parent_ind; wolffd@0: old2new = [1:inew-1,inew+1:sC.nc+1]; wolffd@0: % add the new cluster (=copy of parent_ind) wolffd@0: j = [1:inew,inew:sC.nc]; wolffd@0: sC.parent = sC.parent(j); wolffd@0: sC.children = sC.children(j); wolffd@0: sC.color = sC.color(j,:); wolffd@0: sC.coord = sC.coord(j,:); wolffd@0: sC.nc = sC.nc+1; wolffd@0: % update old indeces to new indices wolffd@0: sC.parent = old2new(sC.parent); wolffd@0: for i=1:sC.nc, sC.children{i} = old2new(sC.children{i}); end wolffd@0: inds = old2new(inds); wolffd@0: parent_ind = old2new(parent_ind); wolffd@0: % update parent, child, color and coord fields wolffd@0: sC.parent(inds) = inew; wolffd@0: sC.parent(inew) = parent_ind; wolffd@0: sC.children{inew} = inds; wolffd@0: sC.children{parent_ind} = [setdiff(sC.children{parent_ind}, inds), inew]; wolffd@0: b = baseind(sC,inew); wolffd@0: sC.color(inew,:) = mean(sC.color(b,:)); wolffd@0: sC.coord(inew,:) = mean(sC.coord(b,:)); wolffd@0: end wolffd@0: return; wolffd@0: wolffd@0: function C = derivative_average(sC,Cbase) wolffd@0: wolffd@0: [n dim] = size(Cbase); wolffd@0: if n ~= sC.nb, error('Color / Coord matrix should have nb rows'); end wolffd@0: C = zeros(sC.nc,dim); wolffd@0: for i=1:sC.nc, C(i,:) = mean(Cbase(baseind(sC,i),:)); end wolffd@0: return; wolffd@0: wolffd@0: function bi = baseind(sC,ind) wolffd@0: wolffd@0: bi = [ind]; wolffd@0: i = 1; wolffd@0: while i<=length(bi), bi = [bi, sC.children{bi(i)}]; end wolffd@0: bi = bi(bi<=sC.nb); wolffd@0: return; wolffd@0: wolffd@0: wolffd@0: wolffd@0: