Mercurial > hg > camir-aes2014
comparison toolboxes/MIRtoolbox1.3.2/somtoolbox/som_bmus.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e9a9cd732c1e |
---|---|
1 function [Bmus,Qerrors] = som_bmus(sMap, sData, which_bmus, mask) | |
2 | |
3 %SOM_BMUS Find the best-matching units from the map for the given vectors. | |
4 % | |
5 % [Bmus, Qerrors] = som_bmus(sMap, sData, [which], [mask]) | |
6 % | |
7 % bmus = som_bmus(sM,sD); | |
8 % [bmus,qerrs] = som_bmus(sM,D,[1 2 3]); | |
9 % bmus = som_bmus(sM,D,1,[1 1 0 0 1]); | |
10 % | |
11 % Input and output arguments ([]'s are optional): | |
12 % sMap (struct) map struct | |
13 % (matrix) codebook matrix, size munits x dim | |
14 % sData (struct) data struct | |
15 % (matrix) data matrix, size dlen x dim | |
16 % [which] (vector) which BMUs are returned, [1] by default | |
17 % (string) 'all', 'best' or 'worst' meaning [1:munits], | |
18 % [1] and [munits] respectively | |
19 % [mask] (vector) mask vector, length=dim, sMap.mask by default | |
20 % | |
21 % Bmus (matrix) the requested BMUs for each data vector, | |
22 % size dlen x length(which) | |
23 % Qerrors (matrix) the corresponding quantization errors, size as Bmus | |
24 % | |
25 % NOTE: for a vector with all components NaN's, bmu=NaN and qerror=NaN | |
26 % NOTE: the mask also effects the quantization errors | |
27 % | |
28 % For more help, try 'type som_bmus' or check out online documentation. | |
29 % See also SOM_QUALITY. | |
30 | |
31 %%%%%%%%%%%%% DETAILED DESCRIPTION %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
32 % | |
33 % som_bmus | |
34 % | |
35 % PURPOSE | |
36 % | |
37 % Finds Best-Matching Units (BMUs) for given data vector from a given map. | |
38 % | |
39 % SYNTAX | |
40 % | |
41 % Bmus = som_bmus(sMap, sData) | |
42 % Bmus = som_bmus(..., which) | |
43 % Bmus = som_bmus(..., which, mask) | |
44 % [Bmus, Qerrs] = som_bmus(...) | |
45 % | |
46 % DESCRIPTION | |
47 % | |
48 % Returns the indexes and corresponding quantization errors of the | |
49 % vectors in sMap that best matched the vectors in sData. | |
50 % | |
51 % By default only the index of the best matching unit (/vector) is | |
52 % returned, but the 'which' argument can be used to get others as | |
53 % well. For example it might be desirable to get also second- and | |
54 % third-best matching units as well (which = [1:3]). | |
55 % | |
56 % A mask can be used to weight the search process. The mask is used to | |
57 % weight the influence of components in the distance calculation, as | |
58 % follows: | |
59 % | |
60 % distance(x,y) = (x-y)' diag(mask) (x-y) | |
61 % | |
62 % where x and y are two vectors, and diag(mask) is a diagonal matrix with | |
63 % the elements of mask vector on the diagonal. | |
64 % | |
65 % The vectors in the data set (sData) can contain unknown components | |
66 % (NaNs), but the map (sMap) cannot. If there are completely empty | |
67 % vectors (all NaNs), the returned BMUs and quantization errors for those | |
68 % vectors are NaNs. | |
69 % | |
70 % REQUIRED INPUT ARGUMENTS | |
71 % | |
72 % sMap The vectors from among which the BMUs are searched | |
73 % for. These must not have any unknown components (NaNs). | |
74 % (struct) map struct | |
75 % (matrix) codebook matrix, size munits x dim | |
76 % | |
77 % sData The data vector(s) for which the BMUs are searched. | |
78 % (struct) data struct | |
79 % (matrix) data matrix, size dlen x dim | |
80 % | |
81 % OPTIONAL INPUT ARGUMENTS | |
82 % | |
83 % which (vector) which BMUs are returned, | |
84 % by default only the best (ie. which = [1]) | |
85 % (string) 'all', 'best' or 'worst' meaning [1:munits], | |
86 % [1] and [munits] respectively | |
87 % mask (vector) mask vector to be used in BMU search, | |
88 % by default sMap.mask, or ones(dim,1) in case | |
89 % a matrix was given | |
90 % | |
91 % OUTPUT ARGUMENTS | |
92 % | |
93 % Bmus (matrix) the requested BMUs for each data vector, | |
94 % size dlen x length(which) | |
95 % Qerrors (matrix) the corresponding quantization errors, | |
96 % size equal to that of Bmus | |
97 % | |
98 % EXAMPLES | |
99 % | |
100 % Simplest case: | |
101 % bmu = som_bmus(sM, [0.3 -0.4 1.0]); | |
102 % % 3-dimensional data, returns BMU for vector [0.3 -0.4 1] | |
103 % bmu = som_bmus(sM, [0.3 -0.4 1.0], [3 5]); | |
104 % % as above, except returns the 3rd and 5th BMUs | |
105 % bmu = som_bmus(sM, [0.3 -0.4 1.0], [], [1 0 1]); | |
106 % % as above, except ignores second component in searching | |
107 % [bmus qerrs] = som_bmus(sM, D); | |
108 % % returns BMUs and corresponding quantization errors | |
109 % % for each vector in D | |
110 % bmus = som_bmus(sM, sD); | |
111 % % returns BMUs for each vector in sD using the mask in sM | |
112 % | |
113 % SEE ALSO | |
114 % | |
115 % som_quality Measure the quantization and topographic error of a SOM. | |
116 | |
117 % Copyright (c) 1997-2000 by the SOM toolbox programming team. | |
118 % http://www.cis.hut.fi/projects/somtoolbox/ | |
119 | |
120 % Version 1.0beta juuso 071197, 101297 | |
121 % Version 2.0alpha juuso 201198 080200 | |
122 | |
123 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
124 %% check arguments and initialize | |
125 | |
126 error(nargchk(1, 4, nargin)); % check no. of input args is correct | |
127 | |
128 % sMap | |
129 if isstruct(sMap), | |
130 switch sMap.type, | |
131 case 'som_map', M = sMap.codebook; | |
132 case 'som_data', M = sMap.data; | |
133 otherwise, error('Invalid 1st argument.'); | |
134 end | |
135 else | |
136 M = sMap; | |
137 end | |
138 [munits dim] = size(M); | |
139 if any(any(isnan(M))), | |
140 error ('Map codebook must not have missing components.'); | |
141 end | |
142 | |
143 % data | |
144 if isstruct(sData), | |
145 switch sData.type, | |
146 case 'som_map', D = sData.codebook; | |
147 case 'som_data', D = sData.data; | |
148 otherwise, error('Invalid 2nd argument.'); | |
149 end | |
150 else | |
151 D = sData; | |
152 end | |
153 [dlen ddim] = size(D); | |
154 if dim ~= ddim, | |
155 error('Data and map dimensions do not match.') | |
156 end | |
157 | |
158 % which_bmus | |
159 if nargin < 3 | isempty(which_bmus) | any(isnan(which_bmus)), | |
160 which_bmus = 1; | |
161 else | |
162 if ischar(which_bmus), | |
163 switch which_bmus, | |
164 case 'best', which_bmus = 1; | |
165 case 'worst', which_bmus = munits; | |
166 case 'all', which_bmus = [1:munits]; | |
167 end | |
168 end | |
169 end | |
170 | |
171 % mask | |
172 if nargin < 4 | isempty(mask) | any(isnan(mask)), | |
173 if isstruct(sMap) & strcmp(sMap.type,'som_map'), | |
174 mask = sMap.mask; | |
175 elseif isstruct(sData) & strcmp(sData.type,'som_map'), | |
176 mask = sData.mask; | |
177 else | |
178 mask = ones(dim,1); | |
179 end | |
180 end | |
181 if size(mask,1)==1, mask = mask'; end | |
182 if all(mask == 0), | |
183 error('All components masked off. BMU search cannot be done.'); | |
184 end | |
185 | |
186 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
187 %% action | |
188 | |
189 Bmus = zeros(dlen,length(which_bmus)); | |
190 Qerrors = Bmus; | |
191 | |
192 % The BMU search involves calculating weighted Euclidian distances | |
193 % to all map units for each data vector. Basically this is done as | |
194 % for i=1:dlen, | |
195 % for j=1:munits, | |
196 % for k=1:dim, | |
197 % Dist(j,i) = Dist(j,i) + mask(k) * (D(i,k) - M(j,k))^2; | |
198 % end | |
199 % end | |
200 % end | |
201 % where mask is the weighting vector for distance calculation. However, taking | |
202 % into account that distance between vectors m and v can be expressed as | |
203 % |m - v|^2 = sum_i ((m_i - v_i)^2) = sum_i (m_i^2 + v_i^2 - 2*m_i*v_i) | |
204 % this can be made much faster by transforming it to a matrix operation: | |
205 % Dist = (M.^2)*mask*ones(1,d) + ones(m,1)*mask'*(D'.^2) - 2*M*diag(mask)*D' | |
206 % | |
207 % In the case where there are unknown components in the data, each data | |
208 % vector will have an individual mask vector so that for that unit, the | |
209 % unknown components are not taken into account in distance calculation. | |
210 % In addition all NaN's are changed to zeros so that they don't screw up | |
211 % the matrix multiplications. | |
212 | |
213 % calculate distances & bmus | |
214 | |
215 % This is done a block of data at a time rather than in a | |
216 % single sweep to save memory consumption. The 'Dist' matrix has | |
217 % size munits*blen which would be HUGE if you did it in a single-sweep | |
218 % operation. If you _want_ to use the single-sweep version, just | |
219 % set blen = dlen. If you're having problems with memory, try to | |
220 % set the value of blen lower. | |
221 blen = min(munits,dlen); | |
222 | |
223 % handle unknown components | |
224 Known = ~isnan(D); | |
225 W1 = (mask*ones(1,dlen)) .* Known'; | |
226 D(find(~Known)) = 0; | |
227 unknown = find(sum(Known')==0); % completely unknown vectors | |
228 | |
229 % constant matrices | |
230 WD = 2*diag(mask)*D'; % constant matrix | |
231 dconst = ((D.^2)*mask); % constant term in the distances | |
232 | |
233 i0 = 0; | |
234 while i0+1<=dlen, | |
235 % calculate distances | |
236 inds = [(i0+1):min(dlen,i0+blen)]; i0 = i0+blen; | |
237 Dist = (M.^2)*W1(:,inds) - M*WD(:,inds); % plus dconst for each sample | |
238 | |
239 % find the bmus and the corresponding quantization errors | |
240 if all(which_bmus==1), [Q B] = min(Dist); else [Q B] = sort(Dist); end | |
241 if munits==1, Bmus(inds,:) = 1; else Bmus(inds,:) = B(which_bmus,:)'; end | |
242 Qerrors(inds,:) = Q(which_bmus,:)' + dconst(inds,ones(length(which_bmus),1)); | |
243 end | |
244 | |
245 % completely unknown vectors | |
246 if ~isempty(unknown), | |
247 Bmus(unknown,:) = NaN; | |
248 Qerrors(unknown,:) = NaN; | |
249 end | |
250 | |
251 Qerrors = sqrt(Qerrors); | |
252 | |
253 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |