diff toolboxes/MIRtoolbox1.3.2/somtoolbox/som_bmus.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/toolboxes/MIRtoolbox1.3.2/somtoolbox/som_bmus.m	Tue Feb 10 15:05:51 2015 +0000
@@ -0,0 +1,253 @@
+function [Bmus,Qerrors] = som_bmus(sMap, sData, which_bmus, mask)
+
+%SOM_BMUS Find the best-matching units from the map for the given vectors.
+%
+% [Bmus, Qerrors] = som_bmus(sMap, sData, [which], [mask])
+% 
+%   bmus = som_bmus(sM,sD);
+%   [bmus,qerrs] = som_bmus(sM,D,[1 2 3]);
+%   bmus = som_bmus(sM,D,1,[1 1 0 0 1]);
+%
+%  Input and output arguments ([]'s are optional): 
+%   sMap     (struct) map struct
+%            (matrix) codebook matrix, size munits x dim
+%   sData    (struct) data struct
+%            (matrix) data matrix, size dlen x dim
+%   [which]  (vector) which BMUs are returned, [1] by default 
+%            (string) 'all', 'best' or 'worst' meaning [1:munits],
+%                     [1] and [munits] respectively  
+%   [mask]   (vector) mask vector, length=dim, sMap.mask by default
+%
+%   Bmus     (matrix) the requested BMUs for each data vector, 
+%                     size dlen x length(which)
+%   Qerrors  (matrix) the corresponding quantization errors, size as Bmus
+%
+% NOTE: for a vector with all components NaN's, bmu=NaN and qerror=NaN
+% NOTE: the mask also effects the quantization errors
+%
+% For more help, try 'type som_bmus' or check out online documentation.
+% See also  SOM_QUALITY.
+
+%%%%%%%%%%%%% DETAILED DESCRIPTION %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%
+% som_bmus
+%
+% PURPOSE
+%
+% Finds Best-Matching Units (BMUs) for given data vector from a given map.
+%
+% SYNTAX
+%
+%  Bmus = som_bmus(sMap, sData)
+%  Bmus = som_bmus(..., which)
+%  Bmus = som_bmus(..., which, mask)
+%  [Bmus, Qerrs] = som_bmus(...)
+%
+% DESCRIPTION
+%
+% Returns the indexes and corresponding quantization errors of the
+% vectors in sMap that best matched the vectors in sData.
+%
+% By default only the index of the best matching unit (/vector) is
+% returned, but the 'which' argument can be used to get others as
+% well. For example it might be desirable to get also second- and
+% third-best matching units as well (which = [1:3]). 
+%
+% A mask can be used to weight the search process. The mask is used to
+% weight the influence of components in the distance calculation, as
+% follows: 
+%
+%   distance(x,y) = (x-y)' diag(mask) (x-y)
+%
+% where x and y are two vectors, and diag(mask) is a diagonal matrix with 
+% the elements of mask vector on the diagonal. 
+%
+% The vectors in the data set (sData) can contain unknown components
+% (NaNs), but the map (sMap) cannot. If there are completely empty
+% vectors (all NaNs), the returned BMUs and quantization errors for those 
+% vectors are NaNs.
+%
+% REQUIRED INPUT ARGUMENTS
+%
+%   sMap              The vectors from among which the BMUs are searched
+%                     for. These must not have any unknown components (NaNs).
+%            (struct) map struct
+%            (matrix) codebook matrix, size munits x dim
+%                     
+%   sData             The data vector(s) for which the BMUs are searched.
+%            (struct) data struct
+%            (matrix) data matrix, size dlen x dim
+%
+% OPTIONAL INPUT ARGUMENTS 
+%
+%   which    (vector) which BMUs are returned, 
+%                     by default only the best (ie. which = [1])
+%            (string) 'all', 'best' or 'worst' meaning [1:munits],
+%                     [1] and [munits] respectively  
+%   mask     (vector) mask vector to be used in BMU search, 
+%                     by default sMap.mask, or ones(dim,1) in case
+%                     a matrix was given
+%
+% OUTPUT ARGUMENTS
+% 
+%   Bmus     (matrix) the requested BMUs for each data vector, 
+%                     size dlen x length(which)
+%   Qerrors  (matrix) the corresponding quantization errors, 
+%                     size equal to that of Bmus
+%
+% EXAMPLES
+%
+% Simplest case:
+%  bmu = som_bmus(sM, [0.3 -0.4 1.0]);
+%           % 3-dimensional data, returns BMU for vector [0.3 -0.4 1]
+%  bmu = som_bmus(sM, [0.3 -0.4 1.0], [3 5]);
+%           % as above, except returns the 3rd and 5th BMUs
+%  bmu = som_bmus(sM, [0.3 -0.4 1.0], [], [1 0 1]);
+%           % as above, except ignores second component in searching
+%  [bmus qerrs] = som_bmus(sM, D);
+%           % returns BMUs and corresponding quantization errors 
+%           % for each vector in D
+%  bmus = som_bmus(sM, sD);
+%           % returns BMUs for each vector in sD using the mask in sM
+%
+% SEE ALSO
+% 
+%  som_quality      Measure the quantization and topographic error of a SOM.
+
+% Copyright (c) 1997-2000 by the SOM toolbox programming team.
+% http://www.cis.hut.fi/projects/somtoolbox/
+
+% Version 1.0beta juuso 071197, 101297 
+% Version 2.0alpha juuso 201198 080200
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% check arguments and initialize
+
+error(nargchk(1, 4, nargin));  % check no. of input args is correct
+
+% sMap
+if isstruct(sMap), 
+  switch sMap.type, 
+   case 'som_map', M = sMap.codebook; 
+   case 'som_data', M = sMap.data;
+   otherwise, error('Invalid 1st argument.');
+  end
+else 
+  M = sMap; 
+end
+[munits dim] = size(M);
+if any(any(isnan(M))), 
+  error ('Map codebook must not have missing components.');
+end
+
+% data
+if isstruct(sData), 
+  switch sData.type, 
+   case 'som_map', D = sData.codebook;
+   case 'som_data', D = sData.data;
+   otherwise, error('Invalid 2nd argument.');
+  end
+else 
+  D = sData;
+end
+[dlen ddim] = size(D);
+if dim ~= ddim, 
+  error('Data and map dimensions do not match.')
+end
+
+% which_bmus
+if nargin < 3 | isempty(which_bmus) | any(isnan(which_bmus)), 
+  which_bmus = 1; 
+else
+  if ischar(which_bmus), 
+    switch which_bmus,
+     case 'best', which_bmus = 1; 
+     case 'worst', which_bmus = munits; 
+     case 'all', which_bmus = [1:munits];
+    end
+  end
+end
+
+% mask
+if nargin < 4 | isempty(mask) | any(isnan(mask)), 
+  if isstruct(sMap) & strcmp(sMap.type,'som_map'), 
+    mask = sMap.mask; 
+  elseif isstruct(sData) & strcmp(sData.type,'som_map'), 
+    mask = sData.mask; 
+  else
+    mask = ones(dim,1); 
+  end
+end
+if size(mask,1)==1, mask = mask'; end
+if all(mask == 0), 
+  error('All components masked off. BMU search cannot be done.');
+end
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% action
+
+Bmus = zeros(dlen,length(which_bmus));
+Qerrors = Bmus;
+
+% The BMU search involves calculating weighted Euclidian distances 
+% to all map units for each data vector. Basically this is done as
+%   for i=1:dlen, 
+%     for j=1:munits, 
+%       for k=1:dim,
+%         Dist(j,i) = Dist(j,i) + mask(k) * (D(i,k) - M(j,k))^2;
+%       end
+%     end
+%   end
+% where mask is the weighting vector for distance calculation. However, taking 
+% into account that distance between vectors m and v can be expressed as
+%   |m - v|^2 = sum_i ((m_i - v_i)^2) = sum_i (m_i^2 + v_i^2 - 2*m_i*v_i)
+% this can be made much faster by transforming it to a matrix operation:
+%   Dist = (M.^2)*mask*ones(1,d) + ones(m,1)*mask'*(D'.^2) - 2*M*diag(mask)*D'
+%
+% In the case where there are unknown components in the data, each data
+% vector will have an individual mask vector so that for that unit, the 
+% unknown components are not taken into account in distance calculation.
+% In addition all NaN's are changed to zeros so that they don't screw up 
+% the matrix multiplications.
+
+% calculate distances & bmus
+
+% This is done a block of data at a time rather than in a
+% single sweep to save memory consumption. The 'Dist' matrix has 
+% size munits*blen which would be HUGE if you did it in a single-sweep
+% operation. If you _want_ to use the single-sweep version, just 
+% set blen = dlen. If you're having problems with memory, try to 
+% set the value of blen lower. 
+blen = min(munits,dlen);
+
+% handle unknown components
+Known = ~isnan(D);
+W1 = (mask*ones(1,dlen)) .* Known'; 
+D(find(~Known)) = 0;  
+unknown = find(sum(Known')==0); % completely unknown vectors 
+
+% constant matrices
+WD = 2*diag(mask)*D';   % constant matrix
+dconst = ((D.^2)*mask); % constant term in the distances
+
+i0 = 0; 
+while i0+1<=dlen, 
+  % calculate distances 
+  inds = [(i0+1):min(dlen,i0+blen)]; i0 = i0+blen;      
+  Dist = (M.^2)*W1(:,inds) - M*WD(:,inds); % plus dconst for each sample
+  
+  % find the bmus and the corresponding quantization errors
+  if all(which_bmus==1), [Q B] = min(Dist); else [Q B] = sort(Dist); end
+  if munits==1, Bmus(inds,:) = 1; else Bmus(inds,:) = B(which_bmus,:)'; end
+  Qerrors(inds,:) = Q(which_bmus,:)' + dconst(inds,ones(length(which_bmus),1));
+end  
+
+% completely unknown vectors
+if ~isempty(unknown), 
+  Bmus(unknown,:) = NaN;
+  Qerrors(unknown,:) = NaN;
+end
+
+Qerrors = sqrt(Qerrors);
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%