view toolboxes/MIRtoolbox1.3.2/somtoolbox/som_clset.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
line wrap: on
line source
function [sC,old2new,newi] = som_clset(sC,action,par1,par2)

% SOM_CLSET Create and/or set values in the som_clustering struct.
%
%   first argument
%     sC       (struct) a som_clustering struct
%     Z        (matrix) size nb-1 x 3, as given by LINKAGE function
%     base     (vector) size dlen x 1, a partitioning of the data
%
%   actions    
%     'remove'           removes the indicated clusters (par1: vector)
%     'add'              add a cluster by making a combination of the indicated
%                        clusters (par1: vector)
%     %'move'             moves a child cluster (par1: scalar) from a parent to another
%     %                   (par2: vector 1 x 2)
%     'merge'            like 'add', followed by removing the indicated clusters (par1: vector)
%     %'split'            the indicated cluster (par1: scalar) is partitioned into indicated
%     %                   parts (par2: vector), which are then added, while the indicated cluster
%     %                   (par1) is removed
%     'coord'            sets the coordinates of base clusters (par1: matrix nb x *), and 
%                        recalculates coordinates of the derived clusters (by averaging base cluster
%                        coordinates)
%     'color'            sets the colors of base clusters (par1: matrix nb x 3), and recalculates
%                        colors of the derived clusters (as averages of base cluster colors)
%                        
%   sC
%     .type     (string) 'som_clustering'
%     .name     (string) Identifier for the clustering.
%     .nb       (scalar) Number of base clusters in the clustering.
%     .base     (vector) Size dlen x 1, the basic groups of data 
%                        forming the base clusters, e.g. as a result 
%                        of partitive clustering. Allowed values are 
%                         1:nb   indicating the base cluster
%                                to which the data belongs to. 
%                         NaN    indicating that the data has
%                                been ignored in the clustering                        
%     .nc       (scalar) Number of clusters in the clustering (nb + derived clusters).
%     .children (cellarray) size nc x 1, each cell gives the list of indeces
%                        of child clusters for the cluster
%     .parent   (vector) size nc x 1, the index of parent of each cluster 
%                        (or zero if the cluster does not have a parent)
%     .coord    (matrix) size nc x *, visualization coordinates for each cluster
%                        By default the coordinates are set so that 
%                        the base clusters are ordered on a line, and the
%                        position of each combined cluster is average of 
%                        the base clusters that constitute it.
%     .color    (matrix) size nc x 3, color for each cluster. 
%                        By default the colors are set so that the 
%                        base clusters are ordered on a line,
%                        and then colors are assigned from the 'hsv' 
%                        colormap to the base clusters. The color
%                        of each combined cluster is average as above.
%     .cldist   (string) Default cluster distance function.

inew = []; 
if isstruct(sC), 
    % it should be a som_clustering struct
    old2new = [1:sC.nc];
elseif size(sC,2)==3, 
    % assume it is a cluster hierarchy matrix Z 
    sC = Z2sC(sC); 
    old2new = [1:sC.nc];
else
    % assume it is a partitioning vector
    base = sC; 
    u = unique(base(isfinite(base)));
    old2new = sparse(u,1,1:length(u));
    base = old2new(base);
    sC = part2sC(base); 
end 

switch action, 
case 'remove',        
    for i=1:length(par1),         
        [sC,o2n] = removecluster(sC,old2new(par1(i)));
        old2new = o2n(old2new);
    end 
case 'add', 
    [sC,old2new,inew] = addmergedcluster(sC,par1);    
case 'move',
    % not implemented yet
case 'split', 
    % not implemented yet
case 'merge', 
    [sC,old2new,inew] = addmergedcluster(sC,par1);
    for i=1:length(par1), 
        [sC,o2n] = removecluster(sC,old2new(par1(i)));
        old2new = o2n(old2new);
    end 
case 'color', 
    sC.color = derivative_average(sC,par1);
case 'coord',
    sC.coord = derivative_average(sC,par1);
end 

return;

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% subfunctions

function sC = clstruct(nb,nc)

    sC = struct('type','som_clustering',...
                'name','','base',[],'nb',nb,'nc',nc,...
                'parent',[],'children',[],'coord',[],'color',[],'cldist','centroid');
    sC.base = [1:nb]; 
    sC.parent = zeros(nc,1);
    sC.children = cell(nc,1); sC.children(:) = {[]}; 
    sC.coord = zeros(nc,2);
    sC.color = zeros(nc,3);
    return;

function Z = sC2Z(sC,height)

    if nargin<2, height = 'level'; end

    root   = find(sC.parent==0); 
    order  = [root]; 
    ch     = sC.children(root); 
    while any(ch), i = ch(1); order = [ch(1), order]; ch = [ch(2:end), sC.children{i}]; end 

    he = zeros(sC.nc,1); 
    if strcmp(height,'level'), 
        ch = sC.children{root}; 
        while any(ch),
            i = ch(1); he(i) = he(sC.parent(i))+1; 
            ch = [ch(2:end), sC.children{i}]; 
        end 
        he = max(he)-he; 
    elseif strcmp(height,'level2'), 
        for i=order, if any(sC.children{i}), he(i) = max(he(sC.children{i}))+1; end, end
    else
        %he = som_cldist ( between children )
    end 
    
    Z = zeros(sC.nb-1,3);    
    i = sC.nb-1; 
    inds = root; 
    while i>0, 
        ch = sC.children{inds(1)}; h = he(inds(1)); inds = [inds(2:end), ch]; 
        if length(ch)>=2,
            for k=1:length(ch)-2, Z(i,:) = [i-1, ch(k), h]; i = i - 1; end
            Z(i,:) = [ch(end-1) ch(end) h]; i = i - 1;             
        end 
    end 
    return;

function sC = Z2sC(Z)

    nb        = size(Z,1)+1;
    nc        = 2*nb-1;
    sC        = clstruct(nb,nc);
    sC.base   = [1:nb];
    for i=1:nc, 
        j = find(Z(:,1)==i | Z(:,2)==i); 
        sC.parent(i) = nb+j;
        sC.children{sC.parent(i)}(end+1) = i; 
    end 
    % coords and color
    order = nc; 
    nonleaves = 1; 
    while any(nonleaves), 
        j = nonleaves(1); 
        ch = sC.children{order(j)};
        if j==1, oleft = []; else oleft = order(1:(j-1)); end
        if j==length(order), oright = []; else oright = order((j+1):length(order)); end
        order = [oleft, ch, oright];
        nonleaves = find(order>nb); 
    end
    [dummy,co] = sort(order);     
    sC.coord   = derivative_average(sC,co');
    H          = hsv(nb+1);
    sC.color   = derivative_average(sC,H(co,:));    
    return;
    
function sC = part2sC(part)

    nb      = max(part); 
    nc      = nb+1; 
    sC      = clstruct(nb,nc);
    sC.base = part; 
    sC.parent(1:nb) = nc; 
    sC.children{nc} = [1:nb]; 
    co       = [1:nb]'; 
    sC.coord = derivative_average(sC,co);
    H        = hsv(nb+1);
    sC.color = derivative_average(sC,H(1:nb,:));
    return;

function [sC,old2new] = removecluster(sC,ind)
  
    old2new = [1:sC.nc]; 
    parent_ind = sC.parent(ind);
    ch = sC.children{ind};
    if ~parent_ind, 
        % trying to remove root cluster - no go
        return; 
    elseif ~any(ch), 
        % trying to remove a base cluster - no go
        return;
    else
        % ok, proceed
        old2new = [1:ind-1 0 ind:sC.nc-1];
        % update parent and child fields
        sC.parent(ch) = parent_ind;
        sC.children{parent_ind} = setdiff([sC.children{parent_ind}, ch],ind);
        % remove old cluster
        j = [1:ind-1, ind+1:sC.nc]; 
        sC.parent   = sC.parent(j);
        sC.children = sC.children(j);
        sC.color    = sC.color(j,:);
        sC.coord    = sC.coord(j,:);
        sC.nc       = sC.nc-1; 
        % update old indeces to new indices
        sC.parent = old2new(sC.parent);
        for i=1:sC.nc, sC.children{i} = old2new(sC.children{i}); end
    end     
    return;

function [sC,old2new,inew] = addmergedcluster(sC,inds)

    old2new    = [1:sC.nc]; 
    inew       = 0; 
    p_inds     = sC.parent(inds); 
    if ~all(p_inds(1)==p_inds),  
        % clusters are not siblings - no go
        return;
    end
    parent_ind = p_inds(1); 
    if isempty(setdiff(sC.children{parent_ind},inds)),  
        % such a merged cluster exists already
        return;     
    else
        % ok, proceed
        inew = parent_ind;
        old2new = [1:inew-1,inew+1:sC.nc+1];
        % add the new cluster (=copy of parent_ind) 
        j = [1:inew,inew:sC.nc];
        sC.parent   = sC.parent(j);
        sC.children = sC.children(j);
        sC.color    = sC.color(j,:);
        sC.coord    = sC.coord(j,:);
        sC.nc       = sC.nc+1;
        % update old indeces to new indices
        sC.parent = old2new(sC.parent);
        for i=1:sC.nc, sC.children{i} = old2new(sC.children{i}); end
        inds = old2new(inds);
        parent_ind = old2new(parent_ind);
        % update parent, child, color and coord fields
        sC.parent(inds)         = inew; 
        sC.parent(inew)         = parent_ind;
        sC.children{inew}       = inds; 
        sC.children{parent_ind} = [setdiff(sC.children{parent_ind}, inds), inew];
        b = baseind(sC,inew); 
        sC.color(inew,:)        = mean(sC.color(b,:));
        sC.coord(inew,:)        = mean(sC.coord(b,:));
    end    
    return;
    
function C = derivative_average(sC,Cbase)

    [n dim] = size(Cbase);
    if n ~= sC.nb, error('Color / Coord matrix should have nb rows'); end
    C = zeros(sC.nc,dim);     
    for i=1:sC.nc, C(i,:) = mean(Cbase(baseind(sC,i),:)); end   
    return;
    
function bi = baseind(sC,ind)

    bi = [ind]; 
    i = 1; 
    while i<=length(bi), bi = [bi, sC.children{bi(i)}]; end 
    bi = bi(bi<=sC.nb);
    return;