wolffd@0
|
1 function [Bmus,Qerrors] = som_bmus(sMap, sData, which_bmus, mask)
|
wolffd@0
|
2
|
wolffd@0
|
3 %SOM_BMUS Find the best-matching units from the map for the given vectors.
|
wolffd@0
|
4 %
|
wolffd@0
|
5 % [Bmus, Qerrors] = som_bmus(sMap, sData, [which], [mask])
|
wolffd@0
|
6 %
|
wolffd@0
|
7 % bmus = som_bmus(sM,sD);
|
wolffd@0
|
8 % [bmus,qerrs] = som_bmus(sM,D,[1 2 3]);
|
wolffd@0
|
9 % bmus = som_bmus(sM,D,1,[1 1 0 0 1]);
|
wolffd@0
|
10 %
|
wolffd@0
|
11 % Input and output arguments ([]'s are optional):
|
wolffd@0
|
12 % sMap (struct) map struct
|
wolffd@0
|
13 % (matrix) codebook matrix, size munits x dim
|
wolffd@0
|
14 % sData (struct) data struct
|
wolffd@0
|
15 % (matrix) data matrix, size dlen x dim
|
wolffd@0
|
16 % [which] (vector) which BMUs are returned, [1] by default
|
wolffd@0
|
17 % (string) 'all', 'best' or 'worst' meaning [1:munits],
|
wolffd@0
|
18 % [1] and [munits] respectively
|
wolffd@0
|
19 % [mask] (vector) mask vector, length=dim, sMap.mask by default
|
wolffd@0
|
20 %
|
wolffd@0
|
21 % Bmus (matrix) the requested BMUs for each data vector,
|
wolffd@0
|
22 % size dlen x length(which)
|
wolffd@0
|
23 % Qerrors (matrix) the corresponding quantization errors, size as Bmus
|
wolffd@0
|
24 %
|
wolffd@0
|
25 % NOTE: for a vector with all components NaN's, bmu=NaN and qerror=NaN
|
wolffd@0
|
26 % NOTE: the mask also effects the quantization errors
|
wolffd@0
|
27 %
|
wolffd@0
|
28 % For more help, try 'type som_bmus' or check out online documentation.
|
wolffd@0
|
29 % See also SOM_QUALITY.
|
wolffd@0
|
30
|
wolffd@0
|
31 %%%%%%%%%%%%% DETAILED DESCRIPTION %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
wolffd@0
|
32 %
|
wolffd@0
|
33 % som_bmus
|
wolffd@0
|
34 %
|
wolffd@0
|
35 % PURPOSE
|
wolffd@0
|
36 %
|
wolffd@0
|
37 % Finds Best-Matching Units (BMUs) for given data vector from a given map.
|
wolffd@0
|
38 %
|
wolffd@0
|
39 % SYNTAX
|
wolffd@0
|
40 %
|
wolffd@0
|
41 % Bmus = som_bmus(sMap, sData)
|
wolffd@0
|
42 % Bmus = som_bmus(..., which)
|
wolffd@0
|
43 % Bmus = som_bmus(..., which, mask)
|
wolffd@0
|
44 % [Bmus, Qerrs] = som_bmus(...)
|
wolffd@0
|
45 %
|
wolffd@0
|
46 % DESCRIPTION
|
wolffd@0
|
47 %
|
wolffd@0
|
48 % Returns the indexes and corresponding quantization errors of the
|
wolffd@0
|
49 % vectors in sMap that best matched the vectors in sData.
|
wolffd@0
|
50 %
|
wolffd@0
|
51 % By default only the index of the best matching unit (/vector) is
|
wolffd@0
|
52 % returned, but the 'which' argument can be used to get others as
|
wolffd@0
|
53 % well. For example it might be desirable to get also second- and
|
wolffd@0
|
54 % third-best matching units as well (which = [1:3]).
|
wolffd@0
|
55 %
|
wolffd@0
|
56 % A mask can be used to weight the search process. The mask is used to
|
wolffd@0
|
57 % weight the influence of components in the distance calculation, as
|
wolffd@0
|
58 % follows:
|
wolffd@0
|
59 %
|
wolffd@0
|
60 % distance(x,y) = (x-y)' diag(mask) (x-y)
|
wolffd@0
|
61 %
|
wolffd@0
|
62 % where x and y are two vectors, and diag(mask) is a diagonal matrix with
|
wolffd@0
|
63 % the elements of mask vector on the diagonal.
|
wolffd@0
|
64 %
|
wolffd@0
|
65 % The vectors in the data set (sData) can contain unknown components
|
wolffd@0
|
66 % (NaNs), but the map (sMap) cannot. If there are completely empty
|
wolffd@0
|
67 % vectors (all NaNs), the returned BMUs and quantization errors for those
|
wolffd@0
|
68 % vectors are NaNs.
|
wolffd@0
|
69 %
|
wolffd@0
|
70 % REQUIRED INPUT ARGUMENTS
|
wolffd@0
|
71 %
|
wolffd@0
|
72 % sMap The vectors from among which the BMUs are searched
|
wolffd@0
|
73 % for. These must not have any unknown components (NaNs).
|
wolffd@0
|
74 % (struct) map struct
|
wolffd@0
|
75 % (matrix) codebook matrix, size munits x dim
|
wolffd@0
|
76 %
|
wolffd@0
|
77 % sData The data vector(s) for which the BMUs are searched.
|
wolffd@0
|
78 % (struct) data struct
|
wolffd@0
|
79 % (matrix) data matrix, size dlen x dim
|
wolffd@0
|
80 %
|
wolffd@0
|
81 % OPTIONAL INPUT ARGUMENTS
|
wolffd@0
|
82 %
|
wolffd@0
|
83 % which (vector) which BMUs are returned,
|
wolffd@0
|
84 % by default only the best (ie. which = [1])
|
wolffd@0
|
85 % (string) 'all', 'best' or 'worst' meaning [1:munits],
|
wolffd@0
|
86 % [1] and [munits] respectively
|
wolffd@0
|
87 % mask (vector) mask vector to be used in BMU search,
|
wolffd@0
|
88 % by default sMap.mask, or ones(dim,1) in case
|
wolffd@0
|
89 % a matrix was given
|
wolffd@0
|
90 %
|
wolffd@0
|
91 % OUTPUT ARGUMENTS
|
wolffd@0
|
92 %
|
wolffd@0
|
93 % Bmus (matrix) the requested BMUs for each data vector,
|
wolffd@0
|
94 % size dlen x length(which)
|
wolffd@0
|
95 % Qerrors (matrix) the corresponding quantization errors,
|
wolffd@0
|
96 % size equal to that of Bmus
|
wolffd@0
|
97 %
|
wolffd@0
|
98 % EXAMPLES
|
wolffd@0
|
99 %
|
wolffd@0
|
100 % Simplest case:
|
wolffd@0
|
101 % bmu = som_bmus(sM, [0.3 -0.4 1.0]);
|
wolffd@0
|
102 % % 3-dimensional data, returns BMU for vector [0.3 -0.4 1]
|
wolffd@0
|
103 % bmu = som_bmus(sM, [0.3 -0.4 1.0], [3 5]);
|
wolffd@0
|
104 % % as above, except returns the 3rd and 5th BMUs
|
wolffd@0
|
105 % bmu = som_bmus(sM, [0.3 -0.4 1.0], [], [1 0 1]);
|
wolffd@0
|
106 % % as above, except ignores second component in searching
|
wolffd@0
|
107 % [bmus qerrs] = som_bmus(sM, D);
|
wolffd@0
|
108 % % returns BMUs and corresponding quantization errors
|
wolffd@0
|
109 % % for each vector in D
|
wolffd@0
|
110 % bmus = som_bmus(sM, sD);
|
wolffd@0
|
111 % % returns BMUs for each vector in sD using the mask in sM
|
wolffd@0
|
112 %
|
wolffd@0
|
113 % SEE ALSO
|
wolffd@0
|
114 %
|
wolffd@0
|
115 % som_quality Measure the quantization and topographic error of a SOM.
|
wolffd@0
|
116
|
wolffd@0
|
117 % Copyright (c) 1997-2000 by the SOM toolbox programming team.
|
wolffd@0
|
118 % http://www.cis.hut.fi/projects/somtoolbox/
|
wolffd@0
|
119
|
wolffd@0
|
120 % Version 1.0beta juuso 071197, 101297
|
wolffd@0
|
121 % Version 2.0alpha juuso 201198 080200
|
wolffd@0
|
122
|
wolffd@0
|
123 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
wolffd@0
|
124 %% check arguments and initialize
|
wolffd@0
|
125
|
wolffd@0
|
126 error(nargchk(1, 4, nargin)); % check no. of input args is correct
|
wolffd@0
|
127
|
wolffd@0
|
128 % sMap
|
wolffd@0
|
129 if isstruct(sMap),
|
wolffd@0
|
130 switch sMap.type,
|
wolffd@0
|
131 case 'som_map', M = sMap.codebook;
|
wolffd@0
|
132 case 'som_data', M = sMap.data;
|
wolffd@0
|
133 otherwise, error('Invalid 1st argument.');
|
wolffd@0
|
134 end
|
wolffd@0
|
135 else
|
wolffd@0
|
136 M = sMap;
|
wolffd@0
|
137 end
|
wolffd@0
|
138 [munits dim] = size(M);
|
wolffd@0
|
139 if any(any(isnan(M))),
|
wolffd@0
|
140 error ('Map codebook must not have missing components.');
|
wolffd@0
|
141 end
|
wolffd@0
|
142
|
wolffd@0
|
143 % data
|
wolffd@0
|
144 if isstruct(sData),
|
wolffd@0
|
145 switch sData.type,
|
wolffd@0
|
146 case 'som_map', D = sData.codebook;
|
wolffd@0
|
147 case 'som_data', D = sData.data;
|
wolffd@0
|
148 otherwise, error('Invalid 2nd argument.');
|
wolffd@0
|
149 end
|
wolffd@0
|
150 else
|
wolffd@0
|
151 D = sData;
|
wolffd@0
|
152 end
|
wolffd@0
|
153 [dlen ddim] = size(D);
|
wolffd@0
|
154 if dim ~= ddim,
|
wolffd@0
|
155 error('Data and map dimensions do not match.')
|
wolffd@0
|
156 end
|
wolffd@0
|
157
|
wolffd@0
|
158 % which_bmus
|
wolffd@0
|
159 if nargin < 3 | isempty(which_bmus) | any(isnan(which_bmus)),
|
wolffd@0
|
160 which_bmus = 1;
|
wolffd@0
|
161 else
|
wolffd@0
|
162 if ischar(which_bmus),
|
wolffd@0
|
163 switch which_bmus,
|
wolffd@0
|
164 case 'best', which_bmus = 1;
|
wolffd@0
|
165 case 'worst', which_bmus = munits;
|
wolffd@0
|
166 case 'all', which_bmus = [1:munits];
|
wolffd@0
|
167 end
|
wolffd@0
|
168 end
|
wolffd@0
|
169 end
|
wolffd@0
|
170
|
wolffd@0
|
171 % mask
|
wolffd@0
|
172 if nargin < 4 | isempty(mask) | any(isnan(mask)),
|
wolffd@0
|
173 if isstruct(sMap) & strcmp(sMap.type,'som_map'),
|
wolffd@0
|
174 mask = sMap.mask;
|
wolffd@0
|
175 elseif isstruct(sData) & strcmp(sData.type,'som_map'),
|
wolffd@0
|
176 mask = sData.mask;
|
wolffd@0
|
177 else
|
wolffd@0
|
178 mask = ones(dim,1);
|
wolffd@0
|
179 end
|
wolffd@0
|
180 end
|
wolffd@0
|
181 if size(mask,1)==1, mask = mask'; end
|
wolffd@0
|
182 if all(mask == 0),
|
wolffd@0
|
183 error('All components masked off. BMU search cannot be done.');
|
wolffd@0
|
184 end
|
wolffd@0
|
185
|
wolffd@0
|
186 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
wolffd@0
|
187 %% action
|
wolffd@0
|
188
|
wolffd@0
|
189 Bmus = zeros(dlen,length(which_bmus));
|
wolffd@0
|
190 Qerrors = Bmus;
|
wolffd@0
|
191
|
wolffd@0
|
192 % The BMU search involves calculating weighted Euclidian distances
|
wolffd@0
|
193 % to all map units for each data vector. Basically this is done as
|
wolffd@0
|
194 % for i=1:dlen,
|
wolffd@0
|
195 % for j=1:munits,
|
wolffd@0
|
196 % for k=1:dim,
|
wolffd@0
|
197 % Dist(j,i) = Dist(j,i) + mask(k) * (D(i,k) - M(j,k))^2;
|
wolffd@0
|
198 % end
|
wolffd@0
|
199 % end
|
wolffd@0
|
200 % end
|
wolffd@0
|
201 % where mask is the weighting vector for distance calculation. However, taking
|
wolffd@0
|
202 % into account that distance between vectors m and v can be expressed as
|
wolffd@0
|
203 % |m - v|^2 = sum_i ((m_i - v_i)^2) = sum_i (m_i^2 + v_i^2 - 2*m_i*v_i)
|
wolffd@0
|
204 % this can be made much faster by transforming it to a matrix operation:
|
wolffd@0
|
205 % Dist = (M.^2)*mask*ones(1,d) + ones(m,1)*mask'*(D'.^2) - 2*M*diag(mask)*D'
|
wolffd@0
|
206 %
|
wolffd@0
|
207 % In the case where there are unknown components in the data, each data
|
wolffd@0
|
208 % vector will have an individual mask vector so that for that unit, the
|
wolffd@0
|
209 % unknown components are not taken into account in distance calculation.
|
wolffd@0
|
210 % In addition all NaN's are changed to zeros so that they don't screw up
|
wolffd@0
|
211 % the matrix multiplications.
|
wolffd@0
|
212
|
wolffd@0
|
213 % calculate distances & bmus
|
wolffd@0
|
214
|
wolffd@0
|
215 % This is done a block of data at a time rather than in a
|
wolffd@0
|
216 % single sweep to save memory consumption. The 'Dist' matrix has
|
wolffd@0
|
217 % size munits*blen which would be HUGE if you did it in a single-sweep
|
wolffd@0
|
218 % operation. If you _want_ to use the single-sweep version, just
|
wolffd@0
|
219 % set blen = dlen. If you're having problems with memory, try to
|
wolffd@0
|
220 % set the value of blen lower.
|
wolffd@0
|
221 blen = min(munits,dlen);
|
wolffd@0
|
222
|
wolffd@0
|
223 % handle unknown components
|
wolffd@0
|
224 Known = ~isnan(D);
|
wolffd@0
|
225 W1 = (mask*ones(1,dlen)) .* Known';
|
wolffd@0
|
226 D(find(~Known)) = 0;
|
wolffd@0
|
227 unknown = find(sum(Known')==0); % completely unknown vectors
|
wolffd@0
|
228
|
wolffd@0
|
229 % constant matrices
|
wolffd@0
|
230 WD = 2*diag(mask)*D'; % constant matrix
|
wolffd@0
|
231 dconst = ((D.^2)*mask); % constant term in the distances
|
wolffd@0
|
232
|
wolffd@0
|
233 i0 = 0;
|
wolffd@0
|
234 while i0+1<=dlen,
|
wolffd@0
|
235 % calculate distances
|
wolffd@0
|
236 inds = [(i0+1):min(dlen,i0+blen)]; i0 = i0+blen;
|
wolffd@0
|
237 Dist = (M.^2)*W1(:,inds) - M*WD(:,inds); % plus dconst for each sample
|
wolffd@0
|
238
|
wolffd@0
|
239 % find the bmus and the corresponding quantization errors
|
wolffd@0
|
240 if all(which_bmus==1), [Q B] = min(Dist); else [Q B] = sort(Dist); end
|
wolffd@0
|
241 if munits==1, Bmus(inds,:) = 1; else Bmus(inds,:) = B(which_bmus,:)'; end
|
wolffd@0
|
242 Qerrors(inds,:) = Q(which_bmus,:)' + dconst(inds,ones(length(which_bmus),1));
|
wolffd@0
|
243 end
|
wolffd@0
|
244
|
wolffd@0
|
245 % completely unknown vectors
|
wolffd@0
|
246 if ~isempty(unknown),
|
wolffd@0
|
247 Bmus(unknown,:) = NaN;
|
wolffd@0
|
248 Qerrors(unknown,:) = NaN;
|
wolffd@0
|
249 end
|
wolffd@0
|
250
|
wolffd@0
|
251 Qerrors = sqrt(Qerrors);
|
wolffd@0
|
252
|
wolffd@0
|
253 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|