annotate toolboxes/MIRtoolbox1.3.2/somtoolbox/som_bmus.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
rev   line source
wolffd@0 1 function [Bmus,Qerrors] = som_bmus(sMap, sData, which_bmus, mask)
wolffd@0 2
wolffd@0 3 %SOM_BMUS Find the best-matching units from the map for the given vectors.
wolffd@0 4 %
wolffd@0 5 % [Bmus, Qerrors] = som_bmus(sMap, sData, [which], [mask])
wolffd@0 6 %
wolffd@0 7 % bmus = som_bmus(sM,sD);
wolffd@0 8 % [bmus,qerrs] = som_bmus(sM,D,[1 2 3]);
wolffd@0 9 % bmus = som_bmus(sM,D,1,[1 1 0 0 1]);
wolffd@0 10 %
wolffd@0 11 % Input and output arguments ([]'s are optional):
wolffd@0 12 % sMap (struct) map struct
wolffd@0 13 % (matrix) codebook matrix, size munits x dim
wolffd@0 14 % sData (struct) data struct
wolffd@0 15 % (matrix) data matrix, size dlen x dim
wolffd@0 16 % [which] (vector) which BMUs are returned, [1] by default
wolffd@0 17 % (string) 'all', 'best' or 'worst' meaning [1:munits],
wolffd@0 18 % [1] and [munits] respectively
wolffd@0 19 % [mask] (vector) mask vector, length=dim, sMap.mask by default
wolffd@0 20 %
wolffd@0 21 % Bmus (matrix) the requested BMUs for each data vector,
wolffd@0 22 % size dlen x length(which)
wolffd@0 23 % Qerrors (matrix) the corresponding quantization errors, size as Bmus
wolffd@0 24 %
wolffd@0 25 % NOTE: for a vector with all components NaN's, bmu=NaN and qerror=NaN
wolffd@0 26 % NOTE: the mask also effects the quantization errors
wolffd@0 27 %
wolffd@0 28 % For more help, try 'type som_bmus' or check out online documentation.
wolffd@0 29 % See also SOM_QUALITY.
wolffd@0 30
wolffd@0 31 %%%%%%%%%%%%% DETAILED DESCRIPTION %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
wolffd@0 32 %
wolffd@0 33 % som_bmus
wolffd@0 34 %
wolffd@0 35 % PURPOSE
wolffd@0 36 %
wolffd@0 37 % Finds Best-Matching Units (BMUs) for given data vector from a given map.
wolffd@0 38 %
wolffd@0 39 % SYNTAX
wolffd@0 40 %
wolffd@0 41 % Bmus = som_bmus(sMap, sData)
wolffd@0 42 % Bmus = som_bmus(..., which)
wolffd@0 43 % Bmus = som_bmus(..., which, mask)
wolffd@0 44 % [Bmus, Qerrs] = som_bmus(...)
wolffd@0 45 %
wolffd@0 46 % DESCRIPTION
wolffd@0 47 %
wolffd@0 48 % Returns the indexes and corresponding quantization errors of the
wolffd@0 49 % vectors in sMap that best matched the vectors in sData.
wolffd@0 50 %
wolffd@0 51 % By default only the index of the best matching unit (/vector) is
wolffd@0 52 % returned, but the 'which' argument can be used to get others as
wolffd@0 53 % well. For example it might be desirable to get also second- and
wolffd@0 54 % third-best matching units as well (which = [1:3]).
wolffd@0 55 %
wolffd@0 56 % A mask can be used to weight the search process. The mask is used to
wolffd@0 57 % weight the influence of components in the distance calculation, as
wolffd@0 58 % follows:
wolffd@0 59 %
wolffd@0 60 % distance(x,y) = (x-y)' diag(mask) (x-y)
wolffd@0 61 %
wolffd@0 62 % where x and y are two vectors, and diag(mask) is a diagonal matrix with
wolffd@0 63 % the elements of mask vector on the diagonal.
wolffd@0 64 %
wolffd@0 65 % The vectors in the data set (sData) can contain unknown components
wolffd@0 66 % (NaNs), but the map (sMap) cannot. If there are completely empty
wolffd@0 67 % vectors (all NaNs), the returned BMUs and quantization errors for those
wolffd@0 68 % vectors are NaNs.
wolffd@0 69 %
wolffd@0 70 % REQUIRED INPUT ARGUMENTS
wolffd@0 71 %
wolffd@0 72 % sMap The vectors from among which the BMUs are searched
wolffd@0 73 % for. These must not have any unknown components (NaNs).
wolffd@0 74 % (struct) map struct
wolffd@0 75 % (matrix) codebook matrix, size munits x dim
wolffd@0 76 %
wolffd@0 77 % sData The data vector(s) for which the BMUs are searched.
wolffd@0 78 % (struct) data struct
wolffd@0 79 % (matrix) data matrix, size dlen x dim
wolffd@0 80 %
wolffd@0 81 % OPTIONAL INPUT ARGUMENTS
wolffd@0 82 %
wolffd@0 83 % which (vector) which BMUs are returned,
wolffd@0 84 % by default only the best (ie. which = [1])
wolffd@0 85 % (string) 'all', 'best' or 'worst' meaning [1:munits],
wolffd@0 86 % [1] and [munits] respectively
wolffd@0 87 % mask (vector) mask vector to be used in BMU search,
wolffd@0 88 % by default sMap.mask, or ones(dim,1) in case
wolffd@0 89 % a matrix was given
wolffd@0 90 %
wolffd@0 91 % OUTPUT ARGUMENTS
wolffd@0 92 %
wolffd@0 93 % Bmus (matrix) the requested BMUs for each data vector,
wolffd@0 94 % size dlen x length(which)
wolffd@0 95 % Qerrors (matrix) the corresponding quantization errors,
wolffd@0 96 % size equal to that of Bmus
wolffd@0 97 %
wolffd@0 98 % EXAMPLES
wolffd@0 99 %
wolffd@0 100 % Simplest case:
wolffd@0 101 % bmu = som_bmus(sM, [0.3 -0.4 1.0]);
wolffd@0 102 % % 3-dimensional data, returns BMU for vector [0.3 -0.4 1]
wolffd@0 103 % bmu = som_bmus(sM, [0.3 -0.4 1.0], [3 5]);
wolffd@0 104 % % as above, except returns the 3rd and 5th BMUs
wolffd@0 105 % bmu = som_bmus(sM, [0.3 -0.4 1.0], [], [1 0 1]);
wolffd@0 106 % % as above, except ignores second component in searching
wolffd@0 107 % [bmus qerrs] = som_bmus(sM, D);
wolffd@0 108 % % returns BMUs and corresponding quantization errors
wolffd@0 109 % % for each vector in D
wolffd@0 110 % bmus = som_bmus(sM, sD);
wolffd@0 111 % % returns BMUs for each vector in sD using the mask in sM
wolffd@0 112 %
wolffd@0 113 % SEE ALSO
wolffd@0 114 %
wolffd@0 115 % som_quality Measure the quantization and topographic error of a SOM.
wolffd@0 116
wolffd@0 117 % Copyright (c) 1997-2000 by the SOM toolbox programming team.
wolffd@0 118 % http://www.cis.hut.fi/projects/somtoolbox/
wolffd@0 119
wolffd@0 120 % Version 1.0beta juuso 071197, 101297
wolffd@0 121 % Version 2.0alpha juuso 201198 080200
wolffd@0 122
wolffd@0 123 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
wolffd@0 124 %% check arguments and initialize
wolffd@0 125
wolffd@0 126 error(nargchk(1, 4, nargin)); % check no. of input args is correct
wolffd@0 127
wolffd@0 128 % sMap
wolffd@0 129 if isstruct(sMap),
wolffd@0 130 switch sMap.type,
wolffd@0 131 case 'som_map', M = sMap.codebook;
wolffd@0 132 case 'som_data', M = sMap.data;
wolffd@0 133 otherwise, error('Invalid 1st argument.');
wolffd@0 134 end
wolffd@0 135 else
wolffd@0 136 M = sMap;
wolffd@0 137 end
wolffd@0 138 [munits dim] = size(M);
wolffd@0 139 if any(any(isnan(M))),
wolffd@0 140 error ('Map codebook must not have missing components.');
wolffd@0 141 end
wolffd@0 142
wolffd@0 143 % data
wolffd@0 144 if isstruct(sData),
wolffd@0 145 switch sData.type,
wolffd@0 146 case 'som_map', D = sData.codebook;
wolffd@0 147 case 'som_data', D = sData.data;
wolffd@0 148 otherwise, error('Invalid 2nd argument.');
wolffd@0 149 end
wolffd@0 150 else
wolffd@0 151 D = sData;
wolffd@0 152 end
wolffd@0 153 [dlen ddim] = size(D);
wolffd@0 154 if dim ~= ddim,
wolffd@0 155 error('Data and map dimensions do not match.')
wolffd@0 156 end
wolffd@0 157
wolffd@0 158 % which_bmus
wolffd@0 159 if nargin < 3 | isempty(which_bmus) | any(isnan(which_bmus)),
wolffd@0 160 which_bmus = 1;
wolffd@0 161 else
wolffd@0 162 if ischar(which_bmus),
wolffd@0 163 switch which_bmus,
wolffd@0 164 case 'best', which_bmus = 1;
wolffd@0 165 case 'worst', which_bmus = munits;
wolffd@0 166 case 'all', which_bmus = [1:munits];
wolffd@0 167 end
wolffd@0 168 end
wolffd@0 169 end
wolffd@0 170
wolffd@0 171 % mask
wolffd@0 172 if nargin < 4 | isempty(mask) | any(isnan(mask)),
wolffd@0 173 if isstruct(sMap) & strcmp(sMap.type,'som_map'),
wolffd@0 174 mask = sMap.mask;
wolffd@0 175 elseif isstruct(sData) & strcmp(sData.type,'som_map'),
wolffd@0 176 mask = sData.mask;
wolffd@0 177 else
wolffd@0 178 mask = ones(dim,1);
wolffd@0 179 end
wolffd@0 180 end
wolffd@0 181 if size(mask,1)==1, mask = mask'; end
wolffd@0 182 if all(mask == 0),
wolffd@0 183 error('All components masked off. BMU search cannot be done.');
wolffd@0 184 end
wolffd@0 185
wolffd@0 186 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
wolffd@0 187 %% action
wolffd@0 188
wolffd@0 189 Bmus = zeros(dlen,length(which_bmus));
wolffd@0 190 Qerrors = Bmus;
wolffd@0 191
wolffd@0 192 % The BMU search involves calculating weighted Euclidian distances
wolffd@0 193 % to all map units for each data vector. Basically this is done as
wolffd@0 194 % for i=1:dlen,
wolffd@0 195 % for j=1:munits,
wolffd@0 196 % for k=1:dim,
wolffd@0 197 % Dist(j,i) = Dist(j,i) + mask(k) * (D(i,k) - M(j,k))^2;
wolffd@0 198 % end
wolffd@0 199 % end
wolffd@0 200 % end
wolffd@0 201 % where mask is the weighting vector for distance calculation. However, taking
wolffd@0 202 % into account that distance between vectors m and v can be expressed as
wolffd@0 203 % |m - v|^2 = sum_i ((m_i - v_i)^2) = sum_i (m_i^2 + v_i^2 - 2*m_i*v_i)
wolffd@0 204 % this can be made much faster by transforming it to a matrix operation:
wolffd@0 205 % Dist = (M.^2)*mask*ones(1,d) + ones(m,1)*mask'*(D'.^2) - 2*M*diag(mask)*D'
wolffd@0 206 %
wolffd@0 207 % In the case where there are unknown components in the data, each data
wolffd@0 208 % vector will have an individual mask vector so that for that unit, the
wolffd@0 209 % unknown components are not taken into account in distance calculation.
wolffd@0 210 % In addition all NaN's are changed to zeros so that they don't screw up
wolffd@0 211 % the matrix multiplications.
wolffd@0 212
wolffd@0 213 % calculate distances & bmus
wolffd@0 214
wolffd@0 215 % This is done a block of data at a time rather than in a
wolffd@0 216 % single sweep to save memory consumption. The 'Dist' matrix has
wolffd@0 217 % size munits*blen which would be HUGE if you did it in a single-sweep
wolffd@0 218 % operation. If you _want_ to use the single-sweep version, just
wolffd@0 219 % set blen = dlen. If you're having problems with memory, try to
wolffd@0 220 % set the value of blen lower.
wolffd@0 221 blen = min(munits,dlen);
wolffd@0 222
wolffd@0 223 % handle unknown components
wolffd@0 224 Known = ~isnan(D);
wolffd@0 225 W1 = (mask*ones(1,dlen)) .* Known';
wolffd@0 226 D(find(~Known)) = 0;
wolffd@0 227 unknown = find(sum(Known')==0); % completely unknown vectors
wolffd@0 228
wolffd@0 229 % constant matrices
wolffd@0 230 WD = 2*diag(mask)*D'; % constant matrix
wolffd@0 231 dconst = ((D.^2)*mask); % constant term in the distances
wolffd@0 232
wolffd@0 233 i0 = 0;
wolffd@0 234 while i0+1<=dlen,
wolffd@0 235 % calculate distances
wolffd@0 236 inds = [(i0+1):min(dlen,i0+blen)]; i0 = i0+blen;
wolffd@0 237 Dist = (M.^2)*W1(:,inds) - M*WD(:,inds); % plus dconst for each sample
wolffd@0 238
wolffd@0 239 % find the bmus and the corresponding quantization errors
wolffd@0 240 if all(which_bmus==1), [Q B] = min(Dist); else [Q B] = sort(Dist); end
wolffd@0 241 if munits==1, Bmus(inds,:) = 1; else Bmus(inds,:) = B(which_bmus,:)'; end
wolffd@0 242 Qerrors(inds,:) = Q(which_bmus,:)' + dconst(inds,ones(length(which_bmus),1));
wolffd@0 243 end
wolffd@0 244
wolffd@0 245 % completely unknown vectors
wolffd@0 246 if ~isempty(unknown),
wolffd@0 247 Bmus(unknown,:) = NaN;
wolffd@0 248 Qerrors(unknown,:) = NaN;
wolffd@0 249 end
wolffd@0 250
wolffd@0 251 Qerrors = sqrt(Qerrors);
wolffd@0 252
wolffd@0 253 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%