view toolboxes/MIRtoolbox1.3.2/somtoolbox/som_clstruct.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
line wrap: on
line source
function sC = som_clstruct(Z,varargin)

%SOM_CLSTRUCT Create a clustering struct or set its field values.
%
%  sC = som_clstruct(Z, [argID, value, ...]) 
%
%    Z  = linkage(pdist(sM.codebook));
%    sC = som_clstruct(Z); 
%    sC = som_clstruct(sC,'coord',som_vis_coords(lattice,msize));
%    sC = som_clstruct(sC,'color',som_colorcode(sM));
%    sC = som_clstruct(sC,'base',sC.base(som_bmus(sM,sD)));
%
%  Input and output arguments ([]'s are optional): 
%   Z         (matrix) size clen-1 x 3, where clen is the number of 
%                      base clusters. This is a clustering matrix 
%                      similar to that produced by LINKAGE in 
%                      Statistical Toolbox. See SOM_LINKAGE.
%             (struct) clustering struct (as produced by this function)
%   [argID,   (string) See below. Each pair is the fieldname and 
%    value]   (varies) the value to be given to that field.
%
%   sC        (struct) clustering struct
% 
%   The clustering struct is based on the assumption that there 
%   is a base partitioning of the SOM (or data) which is saved in 
%   the .base field of the struct. Then a hierarchical clustering
%   is applied to this base partitioning. The results are saved to 
%   .tree field of the struct. Each cluster (base and combined)
%   has also three properties: height, coordinate and color, which 
%   are used in the visualizations. The fields of the struct are:
%     .type   (string) 'som_clustering'
%     .name   (string) Identifier for the clustering.
%     .tree   (matrix) Size clen-1 x 3, as argument Z above. 
%     .base   (vector) Size dlen x 1, the basic groups of data 
%                      forming the base clusters, e.g. as a result 
%                      of partitive clustering. Allowed values are 
%                       1:clen indicating the base cluster
%                              to which the data belongs to. 
%                       NaN    indicating that the data has
%                              been ignored in the clustering
%                      By default [1:clen]. 
%     .height (vector) Size 2*clen-1 x 1, (clustering) height for each 
%                      cluster. By default 0 for each base cluster and
%                      .tree(:,3) for the others.
%     .coord  (matrix) Size 2*clen-1 x *, coordinate for each cluster, 
%                      By default the coordinates are set so that 
%                      the base clusters are ordered on a line, and the
%                      position of each combined cluster is average of 
%                      the base clusters that constitute it.
%     .color  (matrix) Size 2*clen-1 x 3, color for each cluster. 
%                      By default the colors are set so that the 
%                      base clusters are ordered on a line, like above,
%                      and then colors are assigned from the 'hsv' 
%                      colormap to the base clusters. The color
%                      of each combined cluster is average as above.
%
% Height, coord and color can also be specified in alternate forms:
%   'height' (vector) size 2*clen-1 x 1, if given explicitly
%                     size clen-1 x 1, specified heights of the 
%                          combined clusters (the base cluster heights
%                          are all = 0)
%                     size 0 x 0, default value is used
%   'coord'  (matrix) size 2*clen-1 x *, if given explicitly
%                     size clen x *, to give coordinates for base 
%                          clusters; the coordinate of combined clusters
%                          are averaged from these
%                     size dlen x *, to give coordinates of the 
%                          original data: the cluster coordinates are
%                          averaged from these based on base clusters
%                     size 0 x 0, default value is used
%   'color'  (matrix) as 'coord'
%
% See also  SOM_CLPLOT, SOM_CLVALIDITY, SOM_CLGET, SOM_CLLINKAGE.

% Copyright (c) 2000 by the SOM toolbox programming team.
% Contributed to SOM Toolbox on XXX by Juha Vesanto
% http://www.cis.hut.fi/projects/somtoolbox/

% Version 2.0beta juuso 180800

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

if isstruct(Z), 
  base = Z.base; 
  color = Z.color; 
  coord = Z.coord; 
  height = Z.height; 
  name = Z.name; 
  Z = Z.tree; 
else
  base  = []; 
  color = []; 
  coord = []; 
  height = []; 
  name = ''; 
end    
clen  = size(Z,1)+1; 

i=1; 
while i<=length(varargin), 
  argok = 1; 
  if ischar(varargin{i}), 
    switch varargin{i}, 
     case 'tree',   i=i+1; Z = varargin{i}; clen = size(Z,1)+1;
     case 'base',   i=i+1; base = varargin{i}; 
     case 'color',  i=i+1; color = varargin{i}; 
     case 'coord',  i=i+1; coord = varargin{i}; 
     case 'height', i=i+1; height = varargin{i}; 
     case 'name',   i=i+1; name = varargin{i}; 
     otherwise argok=0; 
    end
  else argok = 0; 
  end
  if ~argok, disp(['(som_clstruct) Ignoring invalid argument #' num2str(i+1)]); end
  i = i+1; 
end

if isempty(base), 
  dlen = clen; 
  base = 1:dlen; 
else
  dlen = length(base); 
  if any(base)>clen | any(base)<1, error('Incorrect base partition vector.'); end
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% analysis of hierarchy

% order of base clusters
order = 2*clen-1; 
nonleaves = 1; 
while any(nonleaves), 
  j = nonleaves(1); 
  ch = Z(order(j)-clen,1:2);
  if j==1, oleft = []; else oleft = order(1:(j-1)); end
  if j==length(order), oright = []; else oright = order((j+1):length(order)); end
  order = [oleft, ch, oright];
  nonleaves = find(order>clen); 
end

% base cluster indeces for each non-base cluster
basecl = cell(clen-1,1); 
for i=1:clen-1, 
  c1 = Z(i,1); if c1>clen, c1 = basecl{c1-clen}; end
  c2 = Z(i,2); if c2>clen, c2 = basecl{c2-clen}; end
  basecl{i} = [c1 c2];   
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% set coordinates, color and height and make the struct

% coordinates
if size(coord,1)==2*clen-1, % this is ok already
else
  if size(coord,1)==0, % the default    
    [dummy,coord] = sort(order); 
    coord = coord'; 
  elseif size(coord,1)==dlen & dlen>clen, % coordinates given for original data
    codata = coord; 
    coord = zeros(clen,size(coord,2)); 
    for i=1:clen, coord(i,:) = mean(codata(find(base==i),:),1); end  
  end
  if size(coord,1)==clen, % average from base clusters
    coord = [coord; zeros(clen-1,size(coord,2))]; 
    for i=1:clen-1, coord(i+clen,:) = mean(coord(basecl{i},:),1); end
  else
    error('Incorrect coordinate matrix.'); 
  end
end

% color
if size(color,1)==2*clen-1, % this is ok already
else
  if size(color,1)==0, % the default
    color(order,:) = hsv(length(order)); 
  elseif size(color,1)==dlen & dlen>clen, % colors given for original data
    codata = color; 
    color = zeros(clen,3); 
    for i=1:clen, color(i,:) = mean(codata(find(base==i),:),1); end  
  end
  if size(color,1)==clen, % average from base clusters
    color = [color; zeros(clen-1,3)]; 
    for i=1:clen-1, color(i+clen,:) = mean(color(basecl{i},:),1); end
  else
    error('Incorrect color matrix.'); 
  end
end

% height 
if isempty(height), 
  height = [zeros(clen,1); Z(:,3)]; 
elseif length(height)==clen-1, 
  if size(height,2)==clen-1, height = height'; end
  height = [zeros(clen,1); height]; 
elseif length(height)~=2*clen-1, 
  error('Incorrect height vector.'); 
end

% make the struct
sC = struct('type','som_clustering',...
	    'name',name,'base',base,'tree',Z,...
	    'color',color,'coord',coord,'height',height); 
return; 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%