annotate toolboxes/MIRtoolbox1.3.2/somtoolbox/som_clstruct.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
rev   line source
wolffd@0 1 function sC = som_clstruct(Z,varargin)
wolffd@0 2
wolffd@0 3 %SOM_CLSTRUCT Create a clustering struct or set its field values.
wolffd@0 4 %
wolffd@0 5 % sC = som_clstruct(Z, [argID, value, ...])
wolffd@0 6 %
wolffd@0 7 % Z = linkage(pdist(sM.codebook));
wolffd@0 8 % sC = som_clstruct(Z);
wolffd@0 9 % sC = som_clstruct(sC,'coord',som_vis_coords(lattice,msize));
wolffd@0 10 % sC = som_clstruct(sC,'color',som_colorcode(sM));
wolffd@0 11 % sC = som_clstruct(sC,'base',sC.base(som_bmus(sM,sD)));
wolffd@0 12 %
wolffd@0 13 % Input and output arguments ([]'s are optional):
wolffd@0 14 % Z (matrix) size clen-1 x 3, where clen is the number of
wolffd@0 15 % base clusters. This is a clustering matrix
wolffd@0 16 % similar to that produced by LINKAGE in
wolffd@0 17 % Statistical Toolbox. See SOM_LINKAGE.
wolffd@0 18 % (struct) clustering struct (as produced by this function)
wolffd@0 19 % [argID, (string) See below. Each pair is the fieldname and
wolffd@0 20 % value] (varies) the value to be given to that field.
wolffd@0 21 %
wolffd@0 22 % sC (struct) clustering struct
wolffd@0 23 %
wolffd@0 24 % The clustering struct is based on the assumption that there
wolffd@0 25 % is a base partitioning of the SOM (or data) which is saved in
wolffd@0 26 % the .base field of the struct. Then a hierarchical clustering
wolffd@0 27 % is applied to this base partitioning. The results are saved to
wolffd@0 28 % .tree field of the struct. Each cluster (base and combined)
wolffd@0 29 % has also three properties: height, coordinate and color, which
wolffd@0 30 % are used in the visualizations. The fields of the struct are:
wolffd@0 31 % .type (string) 'som_clustering'
wolffd@0 32 % .name (string) Identifier for the clustering.
wolffd@0 33 % .tree (matrix) Size clen-1 x 3, as argument Z above.
wolffd@0 34 % .base (vector) Size dlen x 1, the basic groups of data
wolffd@0 35 % forming the base clusters, e.g. as a result
wolffd@0 36 % of partitive clustering. Allowed values are
wolffd@0 37 % 1:clen indicating the base cluster
wolffd@0 38 % to which the data belongs to.
wolffd@0 39 % NaN indicating that the data has
wolffd@0 40 % been ignored in the clustering
wolffd@0 41 % By default [1:clen].
wolffd@0 42 % .height (vector) Size 2*clen-1 x 1, (clustering) height for each
wolffd@0 43 % cluster. By default 0 for each base cluster and
wolffd@0 44 % .tree(:,3) for the others.
wolffd@0 45 % .coord (matrix) Size 2*clen-1 x *, coordinate for each cluster,
wolffd@0 46 % By default the coordinates are set so that
wolffd@0 47 % the base clusters are ordered on a line, and the
wolffd@0 48 % position of each combined cluster is average of
wolffd@0 49 % the base clusters that constitute it.
wolffd@0 50 % .color (matrix) Size 2*clen-1 x 3, color for each cluster.
wolffd@0 51 % By default the colors are set so that the
wolffd@0 52 % base clusters are ordered on a line, like above,
wolffd@0 53 % and then colors are assigned from the 'hsv'
wolffd@0 54 % colormap to the base clusters. The color
wolffd@0 55 % of each combined cluster is average as above.
wolffd@0 56 %
wolffd@0 57 % Height, coord and color can also be specified in alternate forms:
wolffd@0 58 % 'height' (vector) size 2*clen-1 x 1, if given explicitly
wolffd@0 59 % size clen-1 x 1, specified heights of the
wolffd@0 60 % combined clusters (the base cluster heights
wolffd@0 61 % are all = 0)
wolffd@0 62 % size 0 x 0, default value is used
wolffd@0 63 % 'coord' (matrix) size 2*clen-1 x *, if given explicitly
wolffd@0 64 % size clen x *, to give coordinates for base
wolffd@0 65 % clusters; the coordinate of combined clusters
wolffd@0 66 % are averaged from these
wolffd@0 67 % size dlen x *, to give coordinates of the
wolffd@0 68 % original data: the cluster coordinates are
wolffd@0 69 % averaged from these based on base clusters
wolffd@0 70 % size 0 x 0, default value is used
wolffd@0 71 % 'color' (matrix) as 'coord'
wolffd@0 72 %
wolffd@0 73 % See also SOM_CLPLOT, SOM_CLVALIDITY, SOM_CLGET, SOM_CLLINKAGE.
wolffd@0 74
wolffd@0 75 % Copyright (c) 2000 by the SOM toolbox programming team.
wolffd@0 76 % Contributed to SOM Toolbox on XXX by Juha Vesanto
wolffd@0 77 % http://www.cis.hut.fi/projects/somtoolbox/
wolffd@0 78
wolffd@0 79 % Version 2.0beta juuso 180800
wolffd@0 80
wolffd@0 81 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
wolffd@0 82
wolffd@0 83 if isstruct(Z),
wolffd@0 84 base = Z.base;
wolffd@0 85 color = Z.color;
wolffd@0 86 coord = Z.coord;
wolffd@0 87 height = Z.height;
wolffd@0 88 name = Z.name;
wolffd@0 89 Z = Z.tree;
wolffd@0 90 else
wolffd@0 91 base = [];
wolffd@0 92 color = [];
wolffd@0 93 coord = [];
wolffd@0 94 height = [];
wolffd@0 95 name = '';
wolffd@0 96 end
wolffd@0 97 clen = size(Z,1)+1;
wolffd@0 98
wolffd@0 99 i=1;
wolffd@0 100 while i<=length(varargin),
wolffd@0 101 argok = 1;
wolffd@0 102 if ischar(varargin{i}),
wolffd@0 103 switch varargin{i},
wolffd@0 104 case 'tree', i=i+1; Z = varargin{i}; clen = size(Z,1)+1;
wolffd@0 105 case 'base', i=i+1; base = varargin{i};
wolffd@0 106 case 'color', i=i+1; color = varargin{i};
wolffd@0 107 case 'coord', i=i+1; coord = varargin{i};
wolffd@0 108 case 'height', i=i+1; height = varargin{i};
wolffd@0 109 case 'name', i=i+1; name = varargin{i};
wolffd@0 110 otherwise argok=0;
wolffd@0 111 end
wolffd@0 112 else argok = 0;
wolffd@0 113 end
wolffd@0 114 if ~argok, disp(['(som_clstruct) Ignoring invalid argument #' num2str(i+1)]); end
wolffd@0 115 i = i+1;
wolffd@0 116 end
wolffd@0 117
wolffd@0 118 if isempty(base),
wolffd@0 119 dlen = clen;
wolffd@0 120 base = 1:dlen;
wolffd@0 121 else
wolffd@0 122 dlen = length(base);
wolffd@0 123 if any(base)>clen | any(base)<1, error('Incorrect base partition vector.'); end
wolffd@0 124 end
wolffd@0 125
wolffd@0 126 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
wolffd@0 127 %% analysis of hierarchy
wolffd@0 128
wolffd@0 129 % order of base clusters
wolffd@0 130 order = 2*clen-1;
wolffd@0 131 nonleaves = 1;
wolffd@0 132 while any(nonleaves),
wolffd@0 133 j = nonleaves(1);
wolffd@0 134 ch = Z(order(j)-clen,1:2);
wolffd@0 135 if j==1, oleft = []; else oleft = order(1:(j-1)); end
wolffd@0 136 if j==length(order), oright = []; else oright = order((j+1):length(order)); end
wolffd@0 137 order = [oleft, ch, oright];
wolffd@0 138 nonleaves = find(order>clen);
wolffd@0 139 end
wolffd@0 140
wolffd@0 141 % base cluster indeces for each non-base cluster
wolffd@0 142 basecl = cell(clen-1,1);
wolffd@0 143 for i=1:clen-1,
wolffd@0 144 c1 = Z(i,1); if c1>clen, c1 = basecl{c1-clen}; end
wolffd@0 145 c2 = Z(i,2); if c2>clen, c2 = basecl{c2-clen}; end
wolffd@0 146 basecl{i} = [c1 c2];
wolffd@0 147 end
wolffd@0 148
wolffd@0 149 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
wolffd@0 150 %% set coordinates, color and height and make the struct
wolffd@0 151
wolffd@0 152 % coordinates
wolffd@0 153 if size(coord,1)==2*clen-1, % this is ok already
wolffd@0 154 else
wolffd@0 155 if size(coord,1)==0, % the default
wolffd@0 156 [dummy,coord] = sort(order);
wolffd@0 157 coord = coord';
wolffd@0 158 elseif size(coord,1)==dlen & dlen>clen, % coordinates given for original data
wolffd@0 159 codata = coord;
wolffd@0 160 coord = zeros(clen,size(coord,2));
wolffd@0 161 for i=1:clen, coord(i,:) = mean(codata(find(base==i),:),1); end
wolffd@0 162 end
wolffd@0 163 if size(coord,1)==clen, % average from base clusters
wolffd@0 164 coord = [coord; zeros(clen-1,size(coord,2))];
wolffd@0 165 for i=1:clen-1, coord(i+clen,:) = mean(coord(basecl{i},:),1); end
wolffd@0 166 else
wolffd@0 167 error('Incorrect coordinate matrix.');
wolffd@0 168 end
wolffd@0 169 end
wolffd@0 170
wolffd@0 171 % color
wolffd@0 172 if size(color,1)==2*clen-1, % this is ok already
wolffd@0 173 else
wolffd@0 174 if size(color,1)==0, % the default
wolffd@0 175 color(order,:) = hsv(length(order));
wolffd@0 176 elseif size(color,1)==dlen & dlen>clen, % colors given for original data
wolffd@0 177 codata = color;
wolffd@0 178 color = zeros(clen,3);
wolffd@0 179 for i=1:clen, color(i,:) = mean(codata(find(base==i),:),1); end
wolffd@0 180 end
wolffd@0 181 if size(color,1)==clen, % average from base clusters
wolffd@0 182 color = [color; zeros(clen-1,3)];
wolffd@0 183 for i=1:clen-1, color(i+clen,:) = mean(color(basecl{i},:),1); end
wolffd@0 184 else
wolffd@0 185 error('Incorrect color matrix.');
wolffd@0 186 end
wolffd@0 187 end
wolffd@0 188
wolffd@0 189 % height
wolffd@0 190 if isempty(height),
wolffd@0 191 height = [zeros(clen,1); Z(:,3)];
wolffd@0 192 elseif length(height)==clen-1,
wolffd@0 193 if size(height,2)==clen-1, height = height'; end
wolffd@0 194 height = [zeros(clen,1); height];
wolffd@0 195 elseif length(height)~=2*clen-1,
wolffd@0 196 error('Incorrect height vector.');
wolffd@0 197 end
wolffd@0 198
wolffd@0 199 % make the struct
wolffd@0 200 sC = struct('type','som_clustering',...
wolffd@0 201 'name',name,'base',base,'tree',Z,...
wolffd@0 202 'color',color,'coord',coord,'height',height);
wolffd@0 203 return;
wolffd@0 204
wolffd@0 205 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
wolffd@0 206