wolffd@0
|
1
|
wolffd@0
|
2 %SOM_DEMO3 Self-organizing map visualization.
|
wolffd@0
|
3
|
wolffd@0
|
4 % Contributed to SOM Toolbox 2.0, February 11th, 2000 by Juha Vesanto
|
wolffd@0
|
5 % http://www.cis.hut.fi/projects/somtoolbox/
|
wolffd@0
|
6
|
wolffd@0
|
7 % Version 1.0beta juuso 071197
|
wolffd@0
|
8 % Version 2.0beta juuso 080200 070600
|
wolffd@0
|
9
|
wolffd@0
|
10 clf reset;
|
wolffd@0
|
11 figure(gcf)
|
wolffd@0
|
12 echo on
|
wolffd@0
|
13
|
wolffd@0
|
14
|
wolffd@0
|
15
|
wolffd@0
|
16
|
wolffd@0
|
17 clc
|
wolffd@0
|
18 % ==========================================================
|
wolffd@0
|
19 % SOM_DEMO3 - VISUALIZATION
|
wolffd@0
|
20 % ==========================================================
|
wolffd@0
|
21
|
wolffd@0
|
22 % som_show - Visualize map.
|
wolffd@0
|
23 % som_grid - Visualization with free coordinates.
|
wolffd@0
|
24 %
|
wolffd@0
|
25 % som_show_add - Add markers on som_show visualization.
|
wolffd@0
|
26 % som_show_clear - Remove markers from som_show visualization.
|
wolffd@0
|
27 % som_recolorbar - Refresh and rescale colorbars in som_show
|
wolffd@0
|
28 % visualization.
|
wolffd@0
|
29 %
|
wolffd@0
|
30 % som_cplane - Visualize component/color/U-matrix plane.
|
wolffd@0
|
31 % som_pieplane - Visualize prototype vectors as pie charts.
|
wolffd@0
|
32 % som_barplane - Visualize prototype vectors as bar charts.
|
wolffd@0
|
33 % som_plotplane - Visualize prototype vectors as line graphs.
|
wolffd@0
|
34 %
|
wolffd@0
|
35 % pcaproj - Projection to principal component space.
|
wolffd@0
|
36 % cca - Projection with Curvilinear Component Analysis.
|
wolffd@0
|
37 % sammon - Projection with Sammon's mapping.
|
wolffd@0
|
38 % som_umat - Calculate U-matrix.
|
wolffd@0
|
39 % som_colorcode - Color coding for the map.
|
wolffd@0
|
40 % som_normcolor - RGB values of indexed colors.
|
wolffd@0
|
41 % som_hits - Hit histograms for the map.
|
wolffd@0
|
42
|
wolffd@0
|
43 % The basic functions for SOM visualization are SOM_SHOW and
|
wolffd@0
|
44 % SOM_GRID. The SOM_SHOW has three auxiliary functions:
|
wolffd@0
|
45 % SOM_SHOW_ADD, SOM_SHOW_CLEAR and SOM_RECOLORBAR which are used
|
wolffd@0
|
46 % to add and remove markers and to control the colorbars.
|
wolffd@0
|
47 % SOM_SHOW actually uses SOM_CPLANE to make the visualizations.
|
wolffd@0
|
48 % Also SOM_{PIE,BAR,PLOT}PLANE can be used to visualize SOMs.
|
wolffd@0
|
49
|
wolffd@0
|
50 % The other functions listed above do not themselves visualize
|
wolffd@0
|
51 % anything, but their results are used in the visualizations.
|
wolffd@0
|
52
|
wolffd@0
|
53 % There's an important limitation that visualization functions have:
|
wolffd@0
|
54 % while the SOM Toolbox otherwise supports N-dimensional map grids,
|
wolffd@0
|
55 % visualization only works for 1- and 2-dimensional map grids!!!
|
wolffd@0
|
56
|
wolffd@0
|
57 pause % Strike any key to create demo data and map...
|
wolffd@0
|
58
|
wolffd@0
|
59
|
wolffd@0
|
60
|
wolffd@0
|
61
|
wolffd@0
|
62
|
wolffd@0
|
63 clc
|
wolffd@0
|
64 % DEMO DATA AND MAP
|
wolffd@0
|
65 % =================
|
wolffd@0
|
66
|
wolffd@0
|
67 % The data set contructed for this demo consists of random vectors
|
wolffd@0
|
68 % in three gaussian kernels the centers of which are at [0, 0, 0],
|
wolffd@0
|
69 % [3 3 3] and [9 0 0]. The map is trained using default parameters.
|
wolffd@0
|
70
|
wolffd@0
|
71 D1 = randn(100,3);
|
wolffd@0
|
72 D2 = randn(100,3) + 3;
|
wolffd@0
|
73 D3 = randn(100,3); D3(:,1) = D3(:,1) + 9;
|
wolffd@0
|
74
|
wolffd@0
|
75 sD = som_data_struct([D1; D2; D3],'name','Demo3 data',...
|
wolffd@0
|
76 'comp_names',{'X-coord','Y-coord','Z-coord'});
|
wolffd@0
|
77 sM = som_make(sD);
|
wolffd@0
|
78
|
wolffd@0
|
79 % Since the data (and thus the prototypes of the map) are
|
wolffd@0
|
80 % 3-dimensional, they can be directly plotted using PLOT3.
|
wolffd@0
|
81 % Below, the data is plotted using red 'o's and the map
|
wolffd@0
|
82 % prototype vectors with black '+'s.
|
wolffd@0
|
83
|
wolffd@0
|
84 plot3(sD.data(:,1),sD.data(:,2),sD.data(:,3),'ro',...
|
wolffd@0
|
85 sM.codebook(:,1),sM.codebook(:,2),sM.codebook(:,3),'k+')
|
wolffd@0
|
86 rotate3d on
|
wolffd@0
|
87
|
wolffd@0
|
88 % From the visualization it is pretty easy to see what the data is
|
wolffd@0
|
89 % like, and how the prototypes have been positioned. One can see
|
wolffd@0
|
90 % that there are three clusters, and that there are some prototype
|
wolffd@0
|
91 % vectors between the clusters, although there is actually no
|
wolffd@0
|
92 % data there. The map units corresponding to these prototypes
|
wolffd@0
|
93 % are called 'dead' or 'interpolative' map units.
|
wolffd@0
|
94
|
wolffd@0
|
95 pause % Strike any key to continue...
|
wolffd@0
|
96
|
wolffd@0
|
97
|
wolffd@0
|
98
|
wolffd@0
|
99 clc
|
wolffd@0
|
100 % VISUALIZATION OF MULTIDIMENSIONAL DATA
|
wolffd@0
|
101 % ======================================
|
wolffd@0
|
102
|
wolffd@0
|
103 % Usually visualization of data sets is not this straightforward,
|
wolffd@0
|
104 % since the dimensionality is much higher than three. In principle,
|
wolffd@0
|
105 % one can embed additional information to the visualization by
|
wolffd@0
|
106 % using properties other than position, for example color, size or
|
wolffd@0
|
107 % shape.
|
wolffd@0
|
108
|
wolffd@0
|
109 % Here the data set and map prototypes are plotted again, but
|
wolffd@0
|
110 % information of the cluster is shown using color: red for the
|
wolffd@0
|
111 % first cluster, green for the second and blue for the last.
|
wolffd@0
|
112
|
wolffd@0
|
113 plot3(sD.data(1:100,1),sD.data(1:100,2),sD.data(1:100,3),'ro',...
|
wolffd@0
|
114 sD.data(101:200,1),sD.data(101:200,2),sD.data(101:200,3),'go',...
|
wolffd@0
|
115 sD.data(201:300,1),sD.data(201:300,2),sD.data(201:300,3),'bo',...
|
wolffd@0
|
116 sM.codebook(:,1),sM.codebook(:,2),sM.codebook(:,3),'k+')
|
wolffd@0
|
117 rotate3d on
|
wolffd@0
|
118
|
wolffd@0
|
119 % However, this works only for relatively small dimensionality, say
|
wolffd@0
|
120 % less than 10. When the information is added this way, the
|
wolffd@0
|
121 % visualization becomes harder and harder to understand. Also, not
|
wolffd@0
|
122 % all properties are equal: the human visual system perceives
|
wolffd@0
|
123 % colors differently from position, not to mention the complex
|
wolffd@0
|
124 % rules governing perception of shape.
|
wolffd@0
|
125
|
wolffd@0
|
126 pause % Strike any key to learn about linking...
|
wolffd@0
|
127
|
wolffd@0
|
128
|
wolffd@0
|
129
|
wolffd@0
|
130
|
wolffd@0
|
131
|
wolffd@0
|
132 clc
|
wolffd@0
|
133 % LINKING MULTIPLE VISUALIZATIONS
|
wolffd@0
|
134 % ===============================
|
wolffd@0
|
135
|
wolffd@0
|
136 % The other option is to use *multiple visualizations*, so called
|
wolffd@0
|
137 % small multiples, instead of only one. The problem is then how to
|
wolffd@0
|
138 % link these visualizations together: one should be able to idetify
|
wolffd@0
|
139 % the same object from the different visualizations.
|
wolffd@0
|
140
|
wolffd@0
|
141 % This could be done using, for example, color: each object has
|
wolffd@0
|
142 % the same color in each visualization. Another option is to use
|
wolffd@0
|
143 % similar position: each object has the same position in each
|
wolffd@0
|
144 % small multiple.
|
wolffd@0
|
145
|
wolffd@0
|
146 % For example, here are four subplots, one for each component and
|
wolffd@0
|
147 % one for cluster information, where color denotes the value and
|
wolffd@0
|
148 % position is used for linking. The 2D-position is derived by
|
wolffd@0
|
149 % projecting the data into the space spanned by its two greatest
|
wolffd@0
|
150 % eigenvectors.
|
wolffd@0
|
151
|
wolffd@0
|
152 [Pd,V,me] = pcaproj(sD.data,2); % project the data
|
wolffd@0
|
153 Pm = pcaproj(sM.codebook,V,me); % project the prototypes
|
wolffd@0
|
154 colormap(hot); % colormap used for values
|
wolffd@0
|
155
|
wolffd@0
|
156 echo off
|
wolffd@0
|
157 for c=1:3,
|
wolffd@0
|
158 subplot(2,2,c), cla, hold on
|
wolffd@0
|
159 som_grid('rect',[300 1],'coord',Pd,'Line','none',...
|
wolffd@0
|
160 'MarkerColor',som_normcolor(sD.data(:,c)));
|
wolffd@0
|
161 som_grid(sM,'Coord',Pm,'Line','none','marker','+');
|
wolffd@0
|
162 hold off, title(sD.comp_names{c}), xlabel('PC 1'), ylabel('PC 2');
|
wolffd@0
|
163 end
|
wolffd@0
|
164
|
wolffd@0
|
165 subplot(2,2,4), cla
|
wolffd@0
|
166 plot(Pd(1:100,1),Pd(1:100,2),'ro',...
|
wolffd@0
|
167 Pd(101:200,1),Pd(101:200,2),'go',...
|
wolffd@0
|
168 Pd(201:300,1),Pd(201:300,2),'bo',...
|
wolffd@0
|
169 Pm(:,1),Pm(:,2),'k+')
|
wolffd@0
|
170 title('Cluster')
|
wolffd@0
|
171 echo on
|
wolffd@0
|
172
|
wolffd@0
|
173 pause % Strike any key to use color for linking...
|
wolffd@0
|
174
|
wolffd@0
|
175 % Here is another example, where color is used for linking. On the
|
wolffd@0
|
176 % top right triangle are the scatter plots of each variable without
|
wolffd@0
|
177 % color coding, and on the bottom left triangle with the color
|
wolffd@0
|
178 % coding. In the colored figures, each data sample can be
|
wolffd@0
|
179 % identified by a unique color. Well, almost identified: there are
|
wolffd@0
|
180 % quite a lot of samples with almost the same color. Color is not as
|
wolffd@0
|
181 % precise linking method as position.
|
wolffd@0
|
182
|
wolffd@0
|
183 echo off
|
wolffd@0
|
184 Col = som_normcolor([1:300]',jet(300));
|
wolffd@0
|
185 k=1;
|
wolffd@0
|
186 for i=1:3,
|
wolffd@0
|
187 for j=1:3,
|
wolffd@0
|
188 if i<j, i1=i; i2=j; else i1=j; i2=i; end
|
wolffd@0
|
189 if i<j,
|
wolffd@0
|
190 subplot(3,3,k); cla
|
wolffd@0
|
191 plot(sD.data(:,i1),sD.data(:,i2),'ko')
|
wolffd@0
|
192 xlabel(sD.comp_names{i1}), ylabel(sD.comp_names{i2})
|
wolffd@0
|
193 elseif i>j,
|
wolffd@0
|
194 subplot(3,3,k); cla
|
wolffd@0
|
195 som_grid('rect',[300 1],'coord',sD.data(:,[i1 i2]),...
|
wolffd@0
|
196 'Line','none','MarkerColor',Col);
|
wolffd@0
|
197 xlabel(sD.comp_names{i1}), ylabel(sD.comp_names{i2})
|
wolffd@0
|
198 end
|
wolffd@0
|
199 k=k+1;
|
wolffd@0
|
200 end
|
wolffd@0
|
201 end
|
wolffd@0
|
202 echo on
|
wolffd@0
|
203
|
wolffd@0
|
204 pause % Strike any key to learn about data visualization using SOM...
|
wolffd@0
|
205
|
wolffd@0
|
206
|
wolffd@0
|
207 clc
|
wolffd@0
|
208 % DATA VISUALIZATION USING SOM
|
wolffd@0
|
209 % ============================
|
wolffd@0
|
210
|
wolffd@0
|
211 % The basic visualization functions and their usage have already
|
wolffd@0
|
212 % been introduced in SOM_DEMO2. In this demo, a more structured
|
wolffd@0
|
213 % presentation is given.
|
wolffd@0
|
214
|
wolffd@0
|
215 % Data visualization techniques using the SOM can be divided to
|
wolffd@0
|
216 % three categories based on their goal:
|
wolffd@0
|
217
|
wolffd@0
|
218 % 1. visualization of clusters and shape of the data:
|
wolffd@0
|
219 % projections, U-matrices and other distance matrices
|
wolffd@0
|
220 %
|
wolffd@0
|
221 % 2. visualization of components / variables:
|
wolffd@0
|
222 % component planes, scatter plots
|
wolffd@0
|
223 %
|
wolffd@0
|
224 % 3. visualization of data projections:
|
wolffd@0
|
225 % hit histograms, response surfaces
|
wolffd@0
|
226
|
wolffd@0
|
227 pause % Strike any key to visualize clusters with distance matrices...
|
wolffd@0
|
228
|
wolffd@0
|
229
|
wolffd@0
|
230
|
wolffd@0
|
231 clf
|
wolffd@0
|
232 clc
|
wolffd@0
|
233 % 1. VISUALIZATION OF CLUSTERS: DISTANCE MATRICES
|
wolffd@0
|
234 % ===============================================
|
wolffd@0
|
235
|
wolffd@0
|
236 % Distance matrices are typically used to show the cluster
|
wolffd@0
|
237 % structure of the SOM. They show distances between neighboring
|
wolffd@0
|
238 % units, and are thus closely related to single linkage clustering
|
wolffd@0
|
239 % techniques. The most widely used distance matrix technique is
|
wolffd@0
|
240 % the U-matrix.
|
wolffd@0
|
241
|
wolffd@0
|
242 % Here, the U-matrix of the map is shown (using all three
|
wolffd@0
|
243 % components in the distance calculation):
|
wolffd@0
|
244
|
wolffd@0
|
245 colormap(1-gray)
|
wolffd@0
|
246 som_show(sM,'umat','all');
|
wolffd@0
|
247
|
wolffd@0
|
248 pause % Strike any key to see more examples of distance matrices...
|
wolffd@0
|
249
|
wolffd@0
|
250 % The function SOM_UMAT can be used to calculate U-matrix. The
|
wolffd@0
|
251 % resulting matrix holds distances between neighboring map units,
|
wolffd@0
|
252 % as well as the median distance from each map unit to its
|
wolffd@0
|
253 % neighbors. These median distances corresponding to each map unit
|
wolffd@0
|
254 % can be easily extracted. The result is a distance matrix using
|
wolffd@0
|
255 % median distance.
|
wolffd@0
|
256
|
wolffd@0
|
257 U = som_umat(sM);
|
wolffd@0
|
258 Um = U(1:2:size(U,1),1:2:size(U,2));
|
wolffd@0
|
259
|
wolffd@0
|
260 % A related technique is to assign colors to the map units such
|
wolffd@0
|
261 % that similar map units get similar colors.
|
wolffd@0
|
262
|
wolffd@0
|
263 % Here, four clustering figures are shown:
|
wolffd@0
|
264 % - U-matrix
|
wolffd@0
|
265 % - median distance matrix (with grayscale)
|
wolffd@0
|
266 % - median distance matrix (with map unit size)
|
wolffd@0
|
267 % - similarity coloring, made by spreading a colormap
|
wolffd@0
|
268 % on top of the principal component projection of the
|
wolffd@0
|
269 % prototype vectors
|
wolffd@0
|
270
|
wolffd@0
|
271 subplot(2,2,1)
|
wolffd@0
|
272 h=som_cplane([sM.topol.lattice,'U'],sM.topol.msize, U(:));
|
wolffd@0
|
273 set(h,'Edgecolor','none'); title('U-matrix')
|
wolffd@0
|
274
|
wolffd@0
|
275 subplot(2,2,2)
|
wolffd@0
|
276 h=som_cplane(sM, Um(:));
|
wolffd@0
|
277 set(h,'Edgecolor','none'); title('D-matrix (grayscale)')
|
wolffd@0
|
278
|
wolffd@0
|
279 subplot(2,2,3)
|
wolffd@0
|
280 som_cplane(sM,'none',1-Um(:)/max(Um(:)))
|
wolffd@0
|
281 title('D-matrix (marker size)')
|
wolffd@0
|
282
|
wolffd@0
|
283 subplot(2,2,4)
|
wolffd@0
|
284 C = som_colorcode(Pm); % Pm is the PC-projection calculated earlier
|
wolffd@0
|
285 som_cplane(sM,C)
|
wolffd@0
|
286 title('Similarity coloring')
|
wolffd@0
|
287
|
wolffd@0
|
288 pause % Strike any key to visualize shape and clusters with projections...
|
wolffd@0
|
289
|
wolffd@0
|
290
|
wolffd@0
|
291
|
wolffd@0
|
292 clf
|
wolffd@0
|
293 clc
|
wolffd@0
|
294 % 1. VISUALIZATION OF CLUSTERS AND SHAPE: PROJECTIONS
|
wolffd@0
|
295 % ===================================================
|
wolffd@0
|
296
|
wolffd@0
|
297 % In vector projection, a set of high-dimensional data samples is
|
wolffd@0
|
298 % projected to a lower dimensional such that the distances between
|
wolffd@0
|
299 % data sample pairs are preserved as well as possible. Depending
|
wolffd@0
|
300 % on the technique, the projection may be either linear or
|
wolffd@0
|
301 % non-linear, and it may place special emphasis on preserving
|
wolffd@0
|
302 % local distances.
|
wolffd@0
|
303
|
wolffd@0
|
304 % For example SOM is a projection technique, since the prototypes
|
wolffd@0
|
305 % have well-defined positions on the 2-dimensional map grid. SOM as
|
wolffd@0
|
306 % a projection is however a very crude one. Other projection
|
wolffd@0
|
307 % techniques include the principal component projection used
|
wolffd@0
|
308 % earlier, Sammon's mapping and Curvilinear Component Analysis
|
wolffd@0
|
309 % (to name a few). These have been implemented in functions
|
wolffd@0
|
310 % PCAPROJ, SAMMON and CCA.
|
wolffd@0
|
311
|
wolffd@0
|
312 % Projecting the map prototype vectors and joining neighboring map
|
wolffd@0
|
313 % units with lines gives the SOM its characteristic net-like look.
|
wolffd@0
|
314 % The projection figures can be linked to the map planes using
|
wolffd@0
|
315 % color coding.
|
wolffd@0
|
316
|
wolffd@0
|
317 % Here is the distance matrix, color coding, a projection without
|
wolffd@0
|
318 % coloring and a projection with one. In the last projection,
|
wolffd@0
|
319 % the size of interpolating map units has been set to zero.
|
wolffd@0
|
320
|
wolffd@0
|
321 subplot(2,2,1)
|
wolffd@0
|
322 som_cplane(sM,Um(:));
|
wolffd@0
|
323 title('Distance matrix')
|
wolffd@0
|
324
|
wolffd@0
|
325 subplot(2,2,2)
|
wolffd@0
|
326 C = som_colorcode(sM,'rgb4');
|
wolffd@0
|
327 som_cplane(sM,C);
|
wolffd@0
|
328 title('Color code')
|
wolffd@0
|
329
|
wolffd@0
|
330 subplot(2,2,3)
|
wolffd@0
|
331 som_grid(sM,'Coord',Pm,'Linecolor','k');
|
wolffd@0
|
332 title('PC-projection')
|
wolffd@0
|
333
|
wolffd@0
|
334 subplot(2,2,4)
|
wolffd@0
|
335 h = som_hits(sM,sD); s=6*(h>0);
|
wolffd@0
|
336 som_grid(sM,'Coord',Pm,'MarkerColor',C,'Linecolor','k','MarkerSize',s);
|
wolffd@0
|
337 title('Colored PC-projection')
|
wolffd@0
|
338
|
wolffd@0
|
339 pause % Strike any key to visualize component planes...
|
wolffd@0
|
340
|
wolffd@0
|
341
|
wolffd@0
|
342 clf
|
wolffd@0
|
343 clc
|
wolffd@0
|
344 % 2. VISUALIZATION OF COMPONENTS: COMPONENT PLANES
|
wolffd@0
|
345 % ================================================
|
wolffd@0
|
346
|
wolffd@0
|
347 % The component planes visualizations shows what kind of values the
|
wolffd@0
|
348 % prototype vectors of the map units have for different vector
|
wolffd@0
|
349 % components.
|
wolffd@0
|
350
|
wolffd@0
|
351 % Here is the U-matrix and the three component planes of the map.
|
wolffd@0
|
352
|
wolffd@0
|
353 som_show(sM)
|
wolffd@0
|
354
|
wolffd@0
|
355 pause % Strike any key to continue...
|
wolffd@0
|
356
|
wolffd@0
|
357 % Besides SOM_SHOW and SOM_CPLANE, there are three other
|
wolffd@0
|
358 % functions specifically designed for showing the values of the
|
wolffd@0
|
359 % component planes: SOM_PIEPLANE, SOM_BARPLANE, SOM_PLOTPLANE.
|
wolffd@0
|
360
|
wolffd@0
|
361 % SOM_PIEPLANE shows a single pie chart for each map unit. Each
|
wolffd@0
|
362 % pie shows the relative proportion of each component of the sum of
|
wolffd@0
|
363 % all components in that map unit. The component values must be
|
wolffd@0
|
364 % positive.
|
wolffd@0
|
365
|
wolffd@0
|
366 % SOM_BARPLANE shows a barchart in each map unit. The scaling of
|
wolffd@0
|
367 % bars can be made unit-wise or variable-wise. By default it is
|
wolffd@0
|
368 % determined variable-wise.
|
wolffd@0
|
369
|
wolffd@0
|
370 % SOM_PLOTPLANE shows a linegraph in each map unit.
|
wolffd@0
|
371
|
wolffd@0
|
372 M = som_normalize(sM.codebook,'range');
|
wolffd@0
|
373
|
wolffd@0
|
374 subplot(1,3,1)
|
wolffd@0
|
375 som_pieplane(sM, M);
|
wolffd@0
|
376 title('som\_pieplane')
|
wolffd@0
|
377
|
wolffd@0
|
378 subplot(1,3,2)
|
wolffd@0
|
379 som_barplane(sM, M, '', 'unitwise');
|
wolffd@0
|
380 title('som\_barplane')
|
wolffd@0
|
381
|
wolffd@0
|
382 subplot(1,3,3)
|
wolffd@0
|
383 som_plotplane(sM, M, 'b');
|
wolffd@0
|
384 title('som\_plotplane')
|
wolffd@0
|
385
|
wolffd@0
|
386 pause % Strike any key to visualize cluster properties...
|
wolffd@0
|
387
|
wolffd@0
|
388
|
wolffd@0
|
389
|
wolffd@0
|
390 clf
|
wolffd@0
|
391 clc
|
wolffd@0
|
392 % 2. VISUALIZATION OF COMPONENTS: CLUSTERS
|
wolffd@0
|
393 % ========================================
|
wolffd@0
|
394
|
wolffd@0
|
395 % An interesting question is of course how do the values of the
|
wolffd@0
|
396 % variables relate to the clusters: what are the values of the
|
wolffd@0
|
397 % components in the clusters, and which components are the ones
|
wolffd@0
|
398 % which *make* the clusters.
|
wolffd@0
|
399
|
wolffd@0
|
400 som_show(sM)
|
wolffd@0
|
401
|
wolffd@0
|
402 % From the U-matrix and component planes, one can easily see
|
wolffd@0
|
403 % what the typical values are in each cluster.
|
wolffd@0
|
404
|
wolffd@0
|
405 pause % Strike any key to continue...
|
wolffd@0
|
406
|
wolffd@0
|
407 % The significance of the components with respect to the clustering
|
wolffd@0
|
408 % is harder to visualize. One indication of importance is that on
|
wolffd@0
|
409 % the borders of the clusters, values of important variables change
|
wolffd@0
|
410 % very rabidly.
|
wolffd@0
|
411
|
wolffd@0
|
412 % Here, the distance matrix is calculated with respect to each
|
wolffd@0
|
413 % variable.
|
wolffd@0
|
414
|
wolffd@0
|
415 u1 = som_umat(sM,'mask',[1 0 0]'); u1=u1(1:2:size(u1,1),1:2:size(u1,2));
|
wolffd@0
|
416 u2 = som_umat(sM,'mask',[0 1 0]'); u2=u2(1:2:size(u2,1),1:2:size(u2,2));
|
wolffd@0
|
417 u3 = som_umat(sM,'mask',[0 0 1]'); u3=u3(1:2:size(u3,1),1:2:size(u3,2));
|
wolffd@0
|
418
|
wolffd@0
|
419 % Here, the distance matrices are shown, as well as a piechart
|
wolffd@0
|
420 % indicating the relative importance of each variable in each
|
wolffd@0
|
421 % map unit. The size of piecharts has been scaled by the
|
wolffd@0
|
422 % distance matrix calculated from all components.
|
wolffd@0
|
423
|
wolffd@0
|
424 subplot(2,2,1)
|
wolffd@0
|
425 som_cplane(sM,u1(:));
|
wolffd@0
|
426 title(sM.comp_names{1})
|
wolffd@0
|
427
|
wolffd@0
|
428 subplot(2,2,2)
|
wolffd@0
|
429 som_cplane(sM,u2(:));
|
wolffd@0
|
430 title(sM.comp_names{2})
|
wolffd@0
|
431
|
wolffd@0
|
432 subplot(2,2,3)
|
wolffd@0
|
433 som_cplane(sM,u3(:));
|
wolffd@0
|
434 title(sM.comp_names{3})
|
wolffd@0
|
435
|
wolffd@0
|
436 subplot(2,2,4)
|
wolffd@0
|
437 som_pieplane(sM, [u1(:), u2(:), u3(:)], hsv(3), Um(:)/max(Um(:)));
|
wolffd@0
|
438 title('Relative importance')
|
wolffd@0
|
439
|
wolffd@0
|
440 % From the last subplot, one can see that in the area where the
|
wolffd@0
|
441 % bigger cluster border is, the 'X-coord' component (red color)
|
wolffd@0
|
442 % has biggest effect, and thus is the main factor in separating
|
wolffd@0
|
443 % that cluster from the rest.
|
wolffd@0
|
444
|
wolffd@0
|
445 pause % Strike any key to learn about correlation hunting...
|
wolffd@0
|
446
|
wolffd@0
|
447
|
wolffd@0
|
448 clf
|
wolffd@0
|
449 clc
|
wolffd@0
|
450 % 2. VISUALIZATION OF COMPONENTS: CORRELATION HUNTING
|
wolffd@0
|
451 % ===================================================
|
wolffd@0
|
452
|
wolffd@0
|
453 % Finally, the component planes are often used for correlation
|
wolffd@0
|
454 % hunting. When the number of variables is high, the component
|
wolffd@0
|
455 % plane visualization offers a convenient way to visualize all
|
wolffd@0
|
456 % components at once and hunt for correlations (as opposed to
|
wolffd@0
|
457 % N*(N-1)/2 scatterplots).
|
wolffd@0
|
458
|
wolffd@0
|
459 % Hunting correlations this way is not very accurate. However, it
|
wolffd@0
|
460 % is easy to select interesting combinations for further
|
wolffd@0
|
461 % investigation.
|
wolffd@0
|
462
|
wolffd@0
|
463 % Here, the first and third components are shown with scatter
|
wolffd@0
|
464 % plot. As with projections, a color coding is used to link the
|
wolffd@0
|
465 % visualization to the map plane. In the color coding, size shows
|
wolffd@0
|
466 % the distance matrix information.
|
wolffd@0
|
467
|
wolffd@0
|
468 C = som_colorcode(sM);
|
wolffd@0
|
469 subplot(1,2,1)
|
wolffd@0
|
470 som_cplane(sM,C,1-Um(:)/max(Um(:)));
|
wolffd@0
|
471 title('Color coding + distance matrix')
|
wolffd@0
|
472
|
wolffd@0
|
473 subplot(1,2,2)
|
wolffd@0
|
474 som_grid(sM,'Coord',sM.codebook(:,[1 3]),'MarkerColor',C);
|
wolffd@0
|
475 title('Scatter plot'); xlabel(sM.comp_names{1}); ylabel(sM.comp_names{3})
|
wolffd@0
|
476 axis equal
|
wolffd@0
|
477
|
wolffd@0
|
478 pause % Strike any key to visualize data responses...
|
wolffd@0
|
479
|
wolffd@0
|
480
|
wolffd@0
|
481 clf
|
wolffd@0
|
482 clc
|
wolffd@0
|
483 % 3. DATA ON MAP
|
wolffd@0
|
484 % ==============
|
wolffd@0
|
485
|
wolffd@0
|
486 % The SOM is a map of the data manifold. An interesting question
|
wolffd@0
|
487 % then is where on the map a specific data sample is located, and
|
wolffd@0
|
488 % how accurate is that localization? One is interested in the
|
wolffd@0
|
489 % response of the map to the data sample.
|
wolffd@0
|
490
|
wolffd@0
|
491 % The simplest answer is to find the BMU of the data sample.
|
wolffd@0
|
492 % However, this gives no indication of the accuracy of the
|
wolffd@0
|
493 % match. Is the data sample close to the BMU, or is it actually
|
wolffd@0
|
494 % equally close to the neighboring map units (or even approximately
|
wolffd@0
|
495 % as close to all map units)? Sometimes accuracy doesn't really
|
wolffd@0
|
496 % matter, but if it does, it should be visualized somehow.
|
wolffd@0
|
497
|
wolffd@0
|
498 % Here are different kinds of response visualizations for two
|
wolffd@0
|
499 % vectors: [0 0 0] and [99 99 99].
|
wolffd@0
|
500 % - BMUs indicated with labels
|
wolffd@0
|
501 % - BMUs indicated with markers, relative quantization errors
|
wolffd@0
|
502 % (in this case, proportion between distances to BMU and
|
wolffd@0
|
503 % Worst-MU) with vertical lines
|
wolffd@0
|
504 % - quantization error between the samples and all map units
|
wolffd@0
|
505 % - fuzzy response (a non-linear function of quantization
|
wolffd@0
|
506 % error) of all map units
|
wolffd@0
|
507
|
wolffd@0
|
508 echo off
|
wolffd@0
|
509 [bm,qe] = som_bmus(sM,[0 0 0; 99 99 99],'all'); % distance to all map units
|
wolffd@0
|
510 [dummy,ind] = sort(bm(1,:)); d0 = qe(1,ind)';
|
wolffd@0
|
511 [dummy,ind] = sort(bm(2,:)); d9 = qe(2,ind)';
|
wolffd@0
|
512 bmu0 = bm(1,1); bmu9 = bm(2,1); % bmus
|
wolffd@0
|
513
|
wolffd@0
|
514 h0 = zeros(prod(sM.topol.msize),1); h0(bmu0) = 1; % crisp hits
|
wolffd@0
|
515 h9 = zeros(prod(sM.topol.msize),1); h9(bmu9) = 1;
|
wolffd@0
|
516
|
wolffd@0
|
517 lab = cell(prod(sM.topol.msize),1);
|
wolffd@0
|
518 lab{bmu0} = '[0,0,0]'; lab{bmu9} = '[99,99,99]';
|
wolffd@0
|
519
|
wolffd@0
|
520 hf0 = som_hits(sM,[0 0 0],'fuzzy'); % fuzzy response
|
wolffd@0
|
521 hf9 = som_hits(sM,[99 99 99],'fuzzy');
|
wolffd@0
|
522
|
wolffd@0
|
523 som_show(sM,'umat',{'all','BMU'},...
|
wolffd@0
|
524 'color',{d0,'Qerror 0'},'color',{hf0,'Fuzzy response 0'},...
|
wolffd@0
|
525 'empty','BMU+qerror',...
|
wolffd@0
|
526 'color',{d9,'Qerror 99'},'color',{hf9,'Fuzzy response 99'});
|
wolffd@0
|
527 som_show_add('label',lab,'Subplot',1,'Textcolor','r');
|
wolffd@0
|
528 som_show_add('hit',[h0, h9],'Subplot',4,'MarkerColor','r');
|
wolffd@0
|
529 hold on
|
wolffd@0
|
530 Co = som_vis_coords(sM.topol.lattice,sM.topol.msize);
|
wolffd@0
|
531 plot3(Co(bmu0,[1 1]),Co(bmu0,[2 2]),[0 10*qe(1,1)/qe(1,end)],'r-')
|
wolffd@0
|
532 plot3(Co(bmu9,[1 1]),Co(bmu9,[2 2]),[0 10*qe(2,1)/qe(2,end)],'r-')
|
wolffd@0
|
533 view(3), axis equal
|
wolffd@0
|
534 echo on
|
wolffd@0
|
535
|
wolffd@0
|
536 % Here are the distances to BMU, 2-BMU and WMU:
|
wolffd@0
|
537
|
wolffd@0
|
538 qe(1,[1,2,end]) % [0 0 0]
|
wolffd@0
|
539 qe(2,[1,2,end]) % [99 99 99]
|
wolffd@0
|
540
|
wolffd@0
|
541 % One can see that for [0 0 0] the accuracy is pretty good as the
|
wolffd@0
|
542 % quantization error of the BMU is much lower than that of the
|
wolffd@0
|
543 % WMU. On the other hand [99 99 99] is very far from the map:
|
wolffd@0
|
544 % distance to BMU is almost equal to distance to WMU.
|
wolffd@0
|
545
|
wolffd@0
|
546 pause % Strike any key to visualize responses of multiple samples...
|
wolffd@0
|
547
|
wolffd@0
|
548
|
wolffd@0
|
549
|
wolffd@0
|
550 clc
|
wolffd@0
|
551 clf
|
wolffd@0
|
552 % 3. DATA ON MAP: HIT HISTOGRAMS
|
wolffd@0
|
553 % ==============================
|
wolffd@0
|
554
|
wolffd@0
|
555 % One can also investigate whole data sets using the map. When the
|
wolffd@0
|
556 % BMUs of multiple data samples are aggregated, a hit histogram
|
wolffd@0
|
557 % results. Instead of BMUs, one can also aggregate for example
|
wolffd@0
|
558 % fuzzy responses.
|
wolffd@0
|
559
|
wolffd@0
|
560 % The hit histograms (or aggregated responses) can then be compared
|
wolffd@0
|
561 % with each other.
|
wolffd@0
|
562
|
wolffd@0
|
563 % Here are hit histograms of three data sets: one with 50 first
|
wolffd@0
|
564 % vectors of the data set, one with 150 samples from the data
|
wolffd@0
|
565 % set, and one with 50 randomly selected samples. In the last
|
wolffd@0
|
566 % subplot, the fuzzy response of the first data set.
|
wolffd@0
|
567
|
wolffd@0
|
568 dlen = size(sD.data,1);
|
wolffd@0
|
569 Dsample1 = sD.data(1:50,:); h1 = som_hits(sM,Dsample1);
|
wolffd@0
|
570 Dsample2 = sD.data(1:150,:); h2 = som_hits(sM,Dsample2);
|
wolffd@0
|
571 Dsample3 = sD.data(ceil(rand(50,1)*dlen),:); h3 = som_hits(sM,Dsample3);
|
wolffd@0
|
572 hf = som_hits(sM,Dsample1,'fuzzy');
|
wolffd@0
|
573
|
wolffd@0
|
574 som_show(sM,'umat','all','umat','all','umat','all','color',{hf,'Fuzzy'})
|
wolffd@0
|
575 som_show_add('hit',h1,'Subplot',1,'Markercolor','r')
|
wolffd@0
|
576 som_show_add('hit',h2,'Subplot',2,'Markercolor','r')
|
wolffd@0
|
577 som_show_add('hit',h3,'Subplot',3,'Markercolor','r')
|
wolffd@0
|
578
|
wolffd@0
|
579 pause % Strike any key to visualize trajectories...
|
wolffd@0
|
580
|
wolffd@0
|
581
|
wolffd@0
|
582
|
wolffd@0
|
583 clc
|
wolffd@0
|
584 clf
|
wolffd@0
|
585 % 3. DATA ON MAP: TRAJECTORIES
|
wolffd@0
|
586 % ============================
|
wolffd@0
|
587
|
wolffd@0
|
588 % A special data mapping technique is trajectory. If the samples
|
wolffd@0
|
589 % are ordered, forming a time-series for example, their response on
|
wolffd@0
|
590 % the map can be tracked. The function SOM_SHOW_ADD can be used to
|
wolffd@0
|
591 % show the trajectories in two different modes: 'traj' and 'comet'.
|
wolffd@0
|
592
|
wolffd@0
|
593 % Here, a series of data points is formed which go from [8,0,0]
|
wolffd@0
|
594 % to [2,2,2]. The trajectory is plotted using the two modes.
|
wolffd@0
|
595
|
wolffd@0
|
596 Dtraj = [linspace(9,2,20); linspace(0,2,20); linspace(0,2,20)]';
|
wolffd@0
|
597 T = som_bmus(sM,Dtraj);
|
wolffd@0
|
598
|
wolffd@0
|
599 som_show(sM,'comp',[1 1]);
|
wolffd@0
|
600 som_show_add('traj',T,'Markercolor','r','TrajColor','r','subplot',1);
|
wolffd@0
|
601 som_show_add('comet',T,'MarkerColor','r','subplot',2);
|
wolffd@0
|
602
|
wolffd@0
|
603 % There's also a function SOM_TRAJECTORY which lauches a GUI
|
wolffd@0
|
604 % specifically designed for displaying trajectories (in 'comet'
|
wolffd@0
|
605 % mode).
|
wolffd@0
|
606
|
wolffd@0
|
607 pause % Strike any key to learn about color handling...
|
wolffd@0
|
608
|
wolffd@0
|
609
|
wolffd@0
|
610
|
wolffd@0
|
611
|
wolffd@0
|
612 clc
|
wolffd@0
|
613 clf
|
wolffd@0
|
614 % COLOR HANDLING
|
wolffd@0
|
615 % ==============
|
wolffd@0
|
616
|
wolffd@0
|
617 % Matlab offers flexibility in the colormaps. Using the COLORMAP
|
wolffd@0
|
618 % function, the colormap may be changed. There are several useful
|
wolffd@0
|
619 % colormaps readily available, for example 'hot' and 'jet'. The
|
wolffd@0
|
620 % default number of colors in the colormaps is 64. However, it is
|
wolffd@0
|
621 % often advantageous to use less colors in the colormap. This way
|
wolffd@0
|
622 % the components planes visualization become easier to interpret.
|
wolffd@0
|
623
|
wolffd@0
|
624 % Here the three component planes are visualized using the 'hot'
|
wolffd@0
|
625 % colormap and only three colors.
|
wolffd@0
|
626
|
wolffd@0
|
627 som_show(sM,'comp',[1 2 3])
|
wolffd@0
|
628 colormap(hot(3));
|
wolffd@0
|
629 som_recolorbar
|
wolffd@0
|
630
|
wolffd@0
|
631 pause % Press any key to change the colorbar labels...
|
wolffd@0
|
632
|
wolffd@0
|
633 % The function SOM_RECOLORBAR can be used to reconfigure
|
wolffd@0
|
634 % the labels beside the colorbar.
|
wolffd@0
|
635
|
wolffd@0
|
636 % Here the colorbar of the first subplot is labeled using labels
|
wolffd@0
|
637 % 'small', 'medium' and 'big' at values 0, 1 and 2. For the
|
wolffd@0
|
638 % colorbar of the second subplot, values are calculated for the
|
wolffd@0
|
639 % borders between colors.
|
wolffd@0
|
640
|
wolffd@0
|
641 som_recolorbar(1,{[0 4 9]},'',{{'small','medium','big'}});
|
wolffd@0
|
642 som_recolorbar(2,'border','');
|
wolffd@0
|
643
|
wolffd@0
|
644 pause % Press any key to learn about SOM_NORMCOLOR...
|
wolffd@0
|
645
|
wolffd@0
|
646 % Some SOM Toolbox functions do not use indexed colors if the
|
wolffd@0
|
647 % underlying Matlab function (e.g. PLOT) do not use indexed
|
wolffd@0
|
648 % colors. SOM_NORMCOLOR is a convenient function to simulate
|
wolffd@0
|
649 % indexed colors: it calculates fixed RGB colors that
|
wolffd@0
|
650 % are similar to indexed colors with the specified colormap.
|
wolffd@0
|
651
|
wolffd@0
|
652 % Here, two SOM_GRID visualizations are created. One uses the
|
wolffd@0
|
653 % 'surf' mode to show the component colors in indexed color
|
wolffd@0
|
654 % mode, and the other uses SOM_NORMALIZE to do the same.
|
wolffd@0
|
655
|
wolffd@0
|
656 clf
|
wolffd@0
|
657 colormap(jet(64))
|
wolffd@0
|
658 subplot(1,2,1)
|
wolffd@0
|
659 som_grid(sM,'Surf',sM.codebook(:,3));
|
wolffd@0
|
660 title('Surf mode')
|
wolffd@0
|
661
|
wolffd@0
|
662 subplot(1,2,2)
|
wolffd@0
|
663 som_grid(sM,'Markercolor',som_normcolor(sM.codebook(:,3)));
|
wolffd@0
|
664 title('som\_normcolor')
|
wolffd@0
|
665
|
wolffd@0
|
666 pause % Press any key to visualize different map shapes...
|
wolffd@0
|
667
|
wolffd@0
|
668
|
wolffd@0
|
669
|
wolffd@0
|
670 clc
|
wolffd@0
|
671 clf
|
wolffd@0
|
672 % DIFFERENT MAP SHAPES
|
wolffd@0
|
673 % ====================
|
wolffd@0
|
674
|
wolffd@0
|
675 % There's no direct way to visualize cylinder or toroid maps. When
|
wolffd@0
|
676 % visualized, they are treated exactly as if they were sheet
|
wolffd@0
|
677 % shaped. However, if function SOM_UNIT_COORDS is used to provide
|
wolffd@0
|
678 % unit coordinates, then SOM_GRID can be used to visualize these
|
wolffd@0
|
679 % alternative map shapes.
|
wolffd@0
|
680
|
wolffd@0
|
681 % Here the grids of the three possible map shapes (sheet, cylinder
|
wolffd@0
|
682 % and toroid) are visualized. The last subplot shows a component
|
wolffd@0
|
683 % plane visualization of the toroid map.
|
wolffd@0
|
684
|
wolffd@0
|
685 Cor = som_unit_coords(sM.topol.msize,'hexa','sheet');
|
wolffd@0
|
686 Coc = som_unit_coords(sM.topol.msize,'hexa','cyl');
|
wolffd@0
|
687 Cot = som_unit_coords(sM.topol.msize,'hexa','toroid');
|
wolffd@0
|
688
|
wolffd@0
|
689 subplot(2,2,1)
|
wolffd@0
|
690 som_grid(sM,'Coord',Cor,'Markersize',3,'Linecolor','k');
|
wolffd@0
|
691 title('sheet'), view(0,-90), axis tight, axis equal
|
wolffd@0
|
692
|
wolffd@0
|
693 subplot(2,2,2)
|
wolffd@0
|
694 som_grid(sM,'Coord',Coc,'Markersize',3,'Linecolor','k');
|
wolffd@0
|
695 title('cylinder'), view(5,1), axis tight, axis equal
|
wolffd@0
|
696
|
wolffd@0
|
697 subplot(2,2,3)
|
wolffd@0
|
698 som_grid(sM,'Coord',Cot,'Markersize',3,'Linecolor','k');
|
wolffd@0
|
699 title('toroid'), view(-100,0), axis tight, axis equal
|
wolffd@0
|
700
|
wolffd@0
|
701 subplot(2,2,4)
|
wolffd@0
|
702 som_grid(sM,'Coord',Cot,'Surf',sM.codebook(:,3));
|
wolffd@0
|
703 colormap(jet), colorbar
|
wolffd@0
|
704 title('toroid'), view(-100,0), axis tight, axis equal
|
wolffd@0
|
705
|
wolffd@0
|
706 echo off
|