Daniel@0: function [sC,old2new,newi] = som_clset(sC,action,par1,par2) Daniel@0: Daniel@0: % SOM_CLSET Create and/or set values in the som_clustering struct. Daniel@0: % Daniel@0: % first argument Daniel@0: % sC (struct) a som_clustering struct Daniel@0: % Z (matrix) size nb-1 x 3, as given by LINKAGE function Daniel@0: % base (vector) size dlen x 1, a partitioning of the data Daniel@0: % Daniel@0: % actions Daniel@0: % 'remove' removes the indicated clusters (par1: vector) Daniel@0: % 'add' add a cluster by making a combination of the indicated Daniel@0: % clusters (par1: vector) Daniel@0: % %'move' moves a child cluster (par1: scalar) from a parent to another Daniel@0: % % (par2: vector 1 x 2) Daniel@0: % 'merge' like 'add', followed by removing the indicated clusters (par1: vector) Daniel@0: % %'split' the indicated cluster (par1: scalar) is partitioned into indicated Daniel@0: % % parts (par2: vector), which are then added, while the indicated cluster Daniel@0: % % (par1) is removed Daniel@0: % 'coord' sets the coordinates of base clusters (par1: matrix nb x *), and Daniel@0: % recalculates coordinates of the derived clusters (by averaging base cluster Daniel@0: % coordinates) Daniel@0: % 'color' sets the colors of base clusters (par1: matrix nb x 3), and recalculates Daniel@0: % colors of the derived clusters (as averages of base cluster colors) Daniel@0: % Daniel@0: % sC Daniel@0: % .type (string) 'som_clustering' Daniel@0: % .name (string) Identifier for the clustering. Daniel@0: % .nb (scalar) Number of base clusters in the clustering. Daniel@0: % .base (vector) Size dlen x 1, the basic groups of data Daniel@0: % forming the base clusters, e.g. as a result Daniel@0: % of partitive clustering. Allowed values are Daniel@0: % 1:nb indicating the base cluster Daniel@0: % to which the data belongs to. Daniel@0: % NaN indicating that the data has Daniel@0: % been ignored in the clustering Daniel@0: % .nc (scalar) Number of clusters in the clustering (nb + derived clusters). Daniel@0: % .children (cellarray) size nc x 1, each cell gives the list of indeces Daniel@0: % of child clusters for the cluster Daniel@0: % .parent (vector) size nc x 1, the index of parent of each cluster Daniel@0: % (or zero if the cluster does not have a parent) Daniel@0: % .coord (matrix) size nc x *, visualization coordinates for each cluster Daniel@0: % By default the coordinates are set so that Daniel@0: % the base clusters are ordered on a line, and the Daniel@0: % position of each combined cluster is average of Daniel@0: % the base clusters that constitute it. Daniel@0: % .color (matrix) size nc x 3, color for each cluster. Daniel@0: % By default the colors are set so that the Daniel@0: % base clusters are ordered on a line, Daniel@0: % and then colors are assigned from the 'hsv' Daniel@0: % colormap to the base clusters. The color Daniel@0: % of each combined cluster is average as above. Daniel@0: % .cldist (string) Default cluster distance function. Daniel@0: Daniel@0: inew = []; Daniel@0: if isstruct(sC), Daniel@0: % it should be a som_clustering struct Daniel@0: old2new = [1:sC.nc]; Daniel@0: elseif size(sC,2)==3, Daniel@0: % assume it is a cluster hierarchy matrix Z Daniel@0: sC = Z2sC(sC); Daniel@0: old2new = [1:sC.nc]; Daniel@0: else Daniel@0: % assume it is a partitioning vector Daniel@0: base = sC; Daniel@0: u = unique(base(isfinite(base))); Daniel@0: old2new = sparse(u,1,1:length(u)); Daniel@0: base = old2new(base); Daniel@0: sC = part2sC(base); Daniel@0: end Daniel@0: Daniel@0: switch action, Daniel@0: case 'remove', Daniel@0: for i=1:length(par1), Daniel@0: [sC,o2n] = removecluster(sC,old2new(par1(i))); Daniel@0: old2new = o2n(old2new); Daniel@0: end Daniel@0: case 'add', Daniel@0: [sC,old2new,inew] = addmergedcluster(sC,par1); Daniel@0: case 'move', Daniel@0: % not implemented yet Daniel@0: case 'split', Daniel@0: % not implemented yet Daniel@0: case 'merge', Daniel@0: [sC,old2new,inew] = addmergedcluster(sC,par1); Daniel@0: for i=1:length(par1), Daniel@0: [sC,o2n] = removecluster(sC,old2new(par1(i))); Daniel@0: old2new = o2n(old2new); Daniel@0: end Daniel@0: case 'color', Daniel@0: sC.color = derivative_average(sC,par1); Daniel@0: case 'coord', Daniel@0: sC.coord = derivative_average(sC,par1); Daniel@0: end Daniel@0: Daniel@0: return; Daniel@0: Daniel@0: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Daniel@0: %% subfunctions Daniel@0: Daniel@0: function sC = clstruct(nb,nc) Daniel@0: Daniel@0: sC = struct('type','som_clustering',... Daniel@0: 'name','','base',[],'nb',nb,'nc',nc,... Daniel@0: 'parent',[],'children',[],'coord',[],'color',[],'cldist','centroid'); Daniel@0: sC.base = [1:nb]; Daniel@0: sC.parent = zeros(nc,1); Daniel@0: sC.children = cell(nc,1); sC.children(:) = {[]}; Daniel@0: sC.coord = zeros(nc,2); Daniel@0: sC.color = zeros(nc,3); Daniel@0: return; Daniel@0: Daniel@0: function Z = sC2Z(sC,height) Daniel@0: Daniel@0: if nargin<2, height = 'level'; end Daniel@0: Daniel@0: root = find(sC.parent==0); Daniel@0: order = [root]; Daniel@0: ch = sC.children(root); Daniel@0: while any(ch), i = ch(1); order = [ch(1), order]; ch = [ch(2:end), sC.children{i}]; end Daniel@0: Daniel@0: he = zeros(sC.nc,1); Daniel@0: if strcmp(height,'level'), Daniel@0: ch = sC.children{root}; Daniel@0: while any(ch), Daniel@0: i = ch(1); he(i) = he(sC.parent(i))+1; Daniel@0: ch = [ch(2:end), sC.children{i}]; Daniel@0: end Daniel@0: he = max(he)-he; Daniel@0: elseif strcmp(height,'level2'), Daniel@0: for i=order, if any(sC.children{i}), he(i) = max(he(sC.children{i}))+1; end, end Daniel@0: else Daniel@0: %he = som_cldist ( between children ) Daniel@0: end Daniel@0: Daniel@0: Z = zeros(sC.nb-1,3); Daniel@0: i = sC.nb-1; Daniel@0: inds = root; Daniel@0: while i>0, Daniel@0: ch = sC.children{inds(1)}; h = he(inds(1)); inds = [inds(2:end), ch]; Daniel@0: if length(ch)>=2, Daniel@0: for k=1:length(ch)-2, Z(i,:) = [i-1, ch(k), h]; i = i - 1; end Daniel@0: Z(i,:) = [ch(end-1) ch(end) h]; i = i - 1; Daniel@0: end Daniel@0: end Daniel@0: return; Daniel@0: Daniel@0: function sC = Z2sC(Z) Daniel@0: Daniel@0: nb = size(Z,1)+1; Daniel@0: nc = 2*nb-1; Daniel@0: sC = clstruct(nb,nc); Daniel@0: sC.base = [1:nb]; Daniel@0: for i=1:nc, Daniel@0: j = find(Z(:,1)==i | Z(:,2)==i); Daniel@0: sC.parent(i) = nb+j; Daniel@0: sC.children{sC.parent(i)}(end+1) = i; Daniel@0: end Daniel@0: % coords and color Daniel@0: order = nc; Daniel@0: nonleaves = 1; Daniel@0: while any(nonleaves), Daniel@0: j = nonleaves(1); Daniel@0: ch = sC.children{order(j)}; Daniel@0: if j==1, oleft = []; else oleft = order(1:(j-1)); end Daniel@0: if j==length(order), oright = []; else oright = order((j+1):length(order)); end Daniel@0: order = [oleft, ch, oright]; Daniel@0: nonleaves = find(order>nb); Daniel@0: end Daniel@0: [dummy,co] = sort(order); Daniel@0: sC.coord = derivative_average(sC,co'); Daniel@0: H = hsv(nb+1); Daniel@0: sC.color = derivative_average(sC,H(co,:)); Daniel@0: return; Daniel@0: Daniel@0: function sC = part2sC(part) Daniel@0: Daniel@0: nb = max(part); Daniel@0: nc = nb+1; Daniel@0: sC = clstruct(nb,nc); Daniel@0: sC.base = part; Daniel@0: sC.parent(1:nb) = nc; Daniel@0: sC.children{nc} = [1:nb]; Daniel@0: co = [1:nb]'; Daniel@0: sC.coord = derivative_average(sC,co); Daniel@0: H = hsv(nb+1); Daniel@0: sC.color = derivative_average(sC,H(1:nb,:)); Daniel@0: return; Daniel@0: Daniel@0: function [sC,old2new] = removecluster(sC,ind) Daniel@0: Daniel@0: old2new = [1:sC.nc]; Daniel@0: parent_ind = sC.parent(ind); Daniel@0: ch = sC.children{ind}; Daniel@0: if ~parent_ind, Daniel@0: % trying to remove root cluster - no go Daniel@0: return; Daniel@0: elseif ~any(ch), Daniel@0: % trying to remove a base cluster - no go Daniel@0: return; Daniel@0: else Daniel@0: % ok, proceed Daniel@0: old2new = [1:ind-1 0 ind:sC.nc-1]; Daniel@0: % update parent and child fields Daniel@0: sC.parent(ch) = parent_ind; Daniel@0: sC.children{parent_ind} = setdiff([sC.children{parent_ind}, ch],ind); Daniel@0: % remove old cluster Daniel@0: j = [1:ind-1, ind+1:sC.nc]; Daniel@0: sC.parent = sC.parent(j); Daniel@0: sC.children = sC.children(j); Daniel@0: sC.color = sC.color(j,:); Daniel@0: sC.coord = sC.coord(j,:); Daniel@0: sC.nc = sC.nc-1; Daniel@0: % update old indeces to new indices Daniel@0: sC.parent = old2new(sC.parent); Daniel@0: for i=1:sC.nc, sC.children{i} = old2new(sC.children{i}); end Daniel@0: end Daniel@0: return; Daniel@0: Daniel@0: function [sC,old2new,inew] = addmergedcluster(sC,inds) Daniel@0: Daniel@0: old2new = [1:sC.nc]; Daniel@0: inew = 0; Daniel@0: p_inds = sC.parent(inds); Daniel@0: if ~all(p_inds(1)==p_inds), Daniel@0: % clusters are not siblings - no go Daniel@0: return; Daniel@0: end Daniel@0: parent_ind = p_inds(1); Daniel@0: if isempty(setdiff(sC.children{parent_ind},inds)), Daniel@0: % such a merged cluster exists already Daniel@0: return; Daniel@0: else Daniel@0: % ok, proceed Daniel@0: inew = parent_ind; Daniel@0: old2new = [1:inew-1,inew+1:sC.nc+1]; Daniel@0: % add the new cluster (=copy of parent_ind) Daniel@0: j = [1:inew,inew:sC.nc]; Daniel@0: sC.parent = sC.parent(j); Daniel@0: sC.children = sC.children(j); Daniel@0: sC.color = sC.color(j,:); Daniel@0: sC.coord = sC.coord(j,:); Daniel@0: sC.nc = sC.nc+1; Daniel@0: % update old indeces to new indices Daniel@0: sC.parent = old2new(sC.parent); Daniel@0: for i=1:sC.nc, sC.children{i} = old2new(sC.children{i}); end Daniel@0: inds = old2new(inds); Daniel@0: parent_ind = old2new(parent_ind); Daniel@0: % update parent, child, color and coord fields Daniel@0: sC.parent(inds) = inew; Daniel@0: sC.parent(inew) = parent_ind; Daniel@0: sC.children{inew} = inds; Daniel@0: sC.children{parent_ind} = [setdiff(sC.children{parent_ind}, inds), inew]; Daniel@0: b = baseind(sC,inew); Daniel@0: sC.color(inew,:) = mean(sC.color(b,:)); Daniel@0: sC.coord(inew,:) = mean(sC.coord(b,:)); Daniel@0: end Daniel@0: return; Daniel@0: Daniel@0: function C = derivative_average(sC,Cbase) Daniel@0: Daniel@0: [n dim] = size(Cbase); Daniel@0: if n ~= sC.nb, error('Color / Coord matrix should have nb rows'); end Daniel@0: C = zeros(sC.nc,dim); Daniel@0: for i=1:sC.nc, C(i,:) = mean(Cbase(baseind(sC,i),:)); end Daniel@0: return; Daniel@0: Daniel@0: function bi = baseind(sC,ind) Daniel@0: Daniel@0: bi = [ind]; Daniel@0: i = 1; Daniel@0: while i<=length(bi), bi = [bi, sC.children{bi(i)}]; end Daniel@0: bi = bi(bi<=sC.nb); Daniel@0: return; Daniel@0: Daniel@0: Daniel@0: Daniel@0: