comparison toolboxes/MIRtoolbox1.3.2/somtoolbox/som_clstruct.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e9a9cd732c1e
1 function sC = som_clstruct(Z,varargin)
2
3 %SOM_CLSTRUCT Create a clustering struct or set its field values.
4 %
5 % sC = som_clstruct(Z, [argID, value, ...])
6 %
7 % Z = linkage(pdist(sM.codebook));
8 % sC = som_clstruct(Z);
9 % sC = som_clstruct(sC,'coord',som_vis_coords(lattice,msize));
10 % sC = som_clstruct(sC,'color',som_colorcode(sM));
11 % sC = som_clstruct(sC,'base',sC.base(som_bmus(sM,sD)));
12 %
13 % Input and output arguments ([]'s are optional):
14 % Z (matrix) size clen-1 x 3, where clen is the number of
15 % base clusters. This is a clustering matrix
16 % similar to that produced by LINKAGE in
17 % Statistical Toolbox. See SOM_LINKAGE.
18 % (struct) clustering struct (as produced by this function)
19 % [argID, (string) See below. Each pair is the fieldname and
20 % value] (varies) the value to be given to that field.
21 %
22 % sC (struct) clustering struct
23 %
24 % The clustering struct is based on the assumption that there
25 % is a base partitioning of the SOM (or data) which is saved in
26 % the .base field of the struct. Then a hierarchical clustering
27 % is applied to this base partitioning. The results are saved to
28 % .tree field of the struct. Each cluster (base and combined)
29 % has also three properties: height, coordinate and color, which
30 % are used in the visualizations. The fields of the struct are:
31 % .type (string) 'som_clustering'
32 % .name (string) Identifier for the clustering.
33 % .tree (matrix) Size clen-1 x 3, as argument Z above.
34 % .base (vector) Size dlen x 1, the basic groups of data
35 % forming the base clusters, e.g. as a result
36 % of partitive clustering. Allowed values are
37 % 1:clen indicating the base cluster
38 % to which the data belongs to.
39 % NaN indicating that the data has
40 % been ignored in the clustering
41 % By default [1:clen].
42 % .height (vector) Size 2*clen-1 x 1, (clustering) height for each
43 % cluster. By default 0 for each base cluster and
44 % .tree(:,3) for the others.
45 % .coord (matrix) Size 2*clen-1 x *, coordinate for each cluster,
46 % By default the coordinates are set so that
47 % the base clusters are ordered on a line, and the
48 % position of each combined cluster is average of
49 % the base clusters that constitute it.
50 % .color (matrix) Size 2*clen-1 x 3, color for each cluster.
51 % By default the colors are set so that the
52 % base clusters are ordered on a line, like above,
53 % and then colors are assigned from the 'hsv'
54 % colormap to the base clusters. The color
55 % of each combined cluster is average as above.
56 %
57 % Height, coord and color can also be specified in alternate forms:
58 % 'height' (vector) size 2*clen-1 x 1, if given explicitly
59 % size clen-1 x 1, specified heights of the
60 % combined clusters (the base cluster heights
61 % are all = 0)
62 % size 0 x 0, default value is used
63 % 'coord' (matrix) size 2*clen-1 x *, if given explicitly
64 % size clen x *, to give coordinates for base
65 % clusters; the coordinate of combined clusters
66 % are averaged from these
67 % size dlen x *, to give coordinates of the
68 % original data: the cluster coordinates are
69 % averaged from these based on base clusters
70 % size 0 x 0, default value is used
71 % 'color' (matrix) as 'coord'
72 %
73 % See also SOM_CLPLOT, SOM_CLVALIDITY, SOM_CLGET, SOM_CLLINKAGE.
74
75 % Copyright (c) 2000 by the SOM toolbox programming team.
76 % Contributed to SOM Toolbox on XXX by Juha Vesanto
77 % http://www.cis.hut.fi/projects/somtoolbox/
78
79 % Version 2.0beta juuso 180800
80
81 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
82
83 if isstruct(Z),
84 base = Z.base;
85 color = Z.color;
86 coord = Z.coord;
87 height = Z.height;
88 name = Z.name;
89 Z = Z.tree;
90 else
91 base = [];
92 color = [];
93 coord = [];
94 height = [];
95 name = '';
96 end
97 clen = size(Z,1)+1;
98
99 i=1;
100 while i<=length(varargin),
101 argok = 1;
102 if ischar(varargin{i}),
103 switch varargin{i},
104 case 'tree', i=i+1; Z = varargin{i}; clen = size(Z,1)+1;
105 case 'base', i=i+1; base = varargin{i};
106 case 'color', i=i+1; color = varargin{i};
107 case 'coord', i=i+1; coord = varargin{i};
108 case 'height', i=i+1; height = varargin{i};
109 case 'name', i=i+1; name = varargin{i};
110 otherwise argok=0;
111 end
112 else argok = 0;
113 end
114 if ~argok, disp(['(som_clstruct) Ignoring invalid argument #' num2str(i+1)]); end
115 i = i+1;
116 end
117
118 if isempty(base),
119 dlen = clen;
120 base = 1:dlen;
121 else
122 dlen = length(base);
123 if any(base)>clen | any(base)<1, error('Incorrect base partition vector.'); end
124 end
125
126 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
127 %% analysis of hierarchy
128
129 % order of base clusters
130 order = 2*clen-1;
131 nonleaves = 1;
132 while any(nonleaves),
133 j = nonleaves(1);
134 ch = Z(order(j)-clen,1:2);
135 if j==1, oleft = []; else oleft = order(1:(j-1)); end
136 if j==length(order), oright = []; else oright = order((j+1):length(order)); end
137 order = [oleft, ch, oright];
138 nonleaves = find(order>clen);
139 end
140
141 % base cluster indeces for each non-base cluster
142 basecl = cell(clen-1,1);
143 for i=1:clen-1,
144 c1 = Z(i,1); if c1>clen, c1 = basecl{c1-clen}; end
145 c2 = Z(i,2); if c2>clen, c2 = basecl{c2-clen}; end
146 basecl{i} = [c1 c2];
147 end
148
149 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
150 %% set coordinates, color and height and make the struct
151
152 % coordinates
153 if size(coord,1)==2*clen-1, % this is ok already
154 else
155 if size(coord,1)==0, % the default
156 [dummy,coord] = sort(order);
157 coord = coord';
158 elseif size(coord,1)==dlen & dlen>clen, % coordinates given for original data
159 codata = coord;
160 coord = zeros(clen,size(coord,2));
161 for i=1:clen, coord(i,:) = mean(codata(find(base==i),:),1); end
162 end
163 if size(coord,1)==clen, % average from base clusters
164 coord = [coord; zeros(clen-1,size(coord,2))];
165 for i=1:clen-1, coord(i+clen,:) = mean(coord(basecl{i},:),1); end
166 else
167 error('Incorrect coordinate matrix.');
168 end
169 end
170
171 % color
172 if size(color,1)==2*clen-1, % this is ok already
173 else
174 if size(color,1)==0, % the default
175 color(order,:) = hsv(length(order));
176 elseif size(color,1)==dlen & dlen>clen, % colors given for original data
177 codata = color;
178 color = zeros(clen,3);
179 for i=1:clen, color(i,:) = mean(codata(find(base==i),:),1); end
180 end
181 if size(color,1)==clen, % average from base clusters
182 color = [color; zeros(clen-1,3)];
183 for i=1:clen-1, color(i+clen,:) = mean(color(basecl{i},:),1); end
184 else
185 error('Incorrect color matrix.');
186 end
187 end
188
189 % height
190 if isempty(height),
191 height = [zeros(clen,1); Z(:,3)];
192 elseif length(height)==clen-1,
193 if size(height,2)==clen-1, height = height'; end
194 height = [zeros(clen,1); height];
195 elseif length(height)~=2*clen-1,
196 error('Incorrect height vector.');
197 end
198
199 % make the struct
200 sC = struct('type','som_clustering',...
201 'name',name,'base',base,'tree',Z,...
202 'color',color,'coord',coord,'height',height);
203 return;
204
205 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
206