diff toolboxes/MIRtoolbox1.3.2/somtoolbox/knn_old.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/toolboxes/MIRtoolbox1.3.2/somtoolbox/knn_old.m	Tue Feb 10 15:05:51 2015 +0000
@@ -0,0 +1,251 @@
+function [Class,P]=knn_old(Data, Proto, proto_class, K)
+
+%KNN_OLD A K-nearest neighbor classifier using Euclidean distance 
+%
+% [Class,P]=knn_old(Data, Proto, proto_class, K)
+%
+%  [sM_class,P]=knn_old(sM, sData, [], 3);
+%  [sD_class,P]=knn_old(sD, sM, class);
+%  [class,P]=knn_old(data, proto, class);
+%  [class,P]=knn_old(sData, sM, class,5);
+%
+%  Input and output arguments ([]'s are optional): 
+%   Data   (matrix) size Nxd, vectors to be classified (=classifiees)
+%          (struct) map or data struct: map codebook vectors or
+%                   data vectors are considered as classifiees.
+%   Proto  (matrix) size Mxd, prototype vector matrix (=prototypes)
+%          (struct) map or data struct: map codebook vectors or
+%                   data vectors are considered as prototypes.
+%   [proto_class] (vector) size Nx1, integers 1,2,...,k indicating the
+%                   classes of corresponding protoptypes, default: see the 
+%                   explanation below. 
+%   [K]    (scalar) the K in KNN classifier, default is 1
+% 
+%   Class  (matrix) size Nx1, vector of 1,2, ..., k indicating the class 
+%                   desicion according to the KNN rule
+%   P      (matrix) size Nxk, the relative amount of prototypes of 
+%                   each class among the K closest prototypes for
+%                   each classifiee.
+%
+% If 'proto_class' is _not_ given, 'Proto' _must_ be a labeled SOM
+% Toolbox struct. The label of the data vector or the first label of
+% the map model vector is considered as class label for th prototype
+% vector. In this case the output 'Class' is a copy of 'Data' (map or
+% data struct) relabeled according to the classification.  If input
+% argument 'proto_class' _is_ given, the output argument 'Class' is
+% _always_ a vector of integers 1,2,...,k indiacating the class.
+%
+% If there is a tie between representatives of two or more classes
+% among the K closest neighbors to the classifiee, the class is
+% selected randomly among these candidates.
+%
+% IMPORTANT
+% 
+% ** Even if prototype vectors are given in a map struct the mask _is not 
+%    taken into account_ when calculating Euclidean distance
+% ** The function calculates the total distance matrix between all
+%    classifiees and prototype vectors. This results to an MxN matrix; 
+%    if N is high it is recommended to divide the matrix 'Data'
+%    (the classifiees) into smaller sets in order to avoid memory
+%    overflow or swapping. Also, if K>1 this function uses 'sort' which is
+%    considerably slower than 'max' which is used for K==1.
+%
+% See also KNN, SOM_LABEL, SOM_AUTOLABEL
+
+% Contributed to SOM Toolbox 2.0, February 11th, 2000 by Johan Himberg
+% Copyright (c) by Johan Himberg
+% http://www.cis.hut.fi/projects/somtoolbox/
+
+% Version 2.0beta Johan 040200
+
+%% Init %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% This must exist later
+classnames='';
+
+% Check K 
+if nargin<4 | isempty(K),
+  K=1;
+end
+
+if ~vis_valuetype(K,{'1x1'})
+  error('Value for K must be a scalar.');
+end
+
+% Take data from data or map struct
+
+if isstruct(Data);
+  if isfield(Data,'type') & ischar(Data.type),
+    ;
+  else
+    error('Invalid map/data struct?');
+  end
+  switch Data.type
+   case 'som_map'
+    data=Data.codebook;
+   case 'som_data'
+    data=Data.data;
+  end
+else
+  % is already a matrix
+  data=Data;
+end
+
+% Take prototype vectors from prototype struct
+
+if isstruct(Proto),
+  
+  if isfield(Proto,'type') & ischar(Proto.type),
+    ;
+  else
+    error('Invalid map/data struct?');
+  end
+  switch Proto.type
+   case 'som_map'
+    proto=Proto.codebook;
+   case 'som_data'
+    proto=Proto.data;
+  end
+else
+  % is already a matrix
+  proto=Proto; 
+end
+
+% Check that inputs are matrices
+if ~vis_valuetype(proto,{'nxm'}) | ~vis_valuetype(data,{'nxm'}),
+  error('Prototype or data input not valid.')
+end
+
+% Record data&proto sizes and check their dims 
+[N_data dim_data]=size(data); 
+[N_proto dim_proto]=size(proto);
+if dim_proto ~= dim_data,
+  error('Data and prototype vector dimension does not match.');
+end
+
+% Check if the classes are given as labels (no class input arg.)
+% if they are take them from prototype struct
+
+if nargin<3 | isempty(proto_class)
+  if ~isstruct(Proto)
+    error(['If prototypes are not in labeled map or data struct' ...
+	   'class must be given.']);  
+    % transform to interger (numerical) class labels
+  else
+    [proto_class,classnames]=class2num(Proto.labels); 
+  end
+end
+
+% Check class label vector: must be numerical and of integers
+if ~vis_valuetype(proto_class,{[N_proto 1]});
+  error(['Class vector is invalid: has to be a N-of-data_rows x 1' ...
+	 ' vector of integers']);
+elseif sum(fix(proto_class)-proto_class)~=0
+  error('Class labels in vector ''Class'' must be integers.');
+end
+
+% Find all class labels
+ClassIndex=unique(proto_class);
+N_class=length(ClassIndex); % number of different classes  
+
+% Calculate euclidean distances between classifiees and prototypes
+d=distance(proto,data);
+
+%%%% Classification %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+if K==1,   % sort distances only if K>1
+  
+  % 1NN
+  % Select the closest prototype
+  [tmp,proto_index]=min(d);
+  class=proto_class(proto_index);
+
+else 
+  
+  % Sort the prototypes for each classifiee according to distance
+  [tmp,proto_index]=sort(d);
+  
+  %% Select K closest prototypes
+  proto_index=proto_index(1:K,:);
+  knn_class=proto_class(proto_index);
+  for i=1:N_class,
+    classcounter(i,:)=sum(knn_class==ClassIndex(i));
+  end
+  
+  %% Vote between classes of K neighbors 
+  [winner,vote_index]=max(classcounter);
+  
+  %% Handle ties
+  
+  % set index to clases that got as amuch votes as winner
+  
+  equal_to_winner=(repmat(winner,N_class,1)==classcounter);
+  
+  % set index to ties
+  tie_index=find(sum(equal_to_winner)>1); % drop the winner from counter 
+  
+  % Go through equal classes and reset vote_index randomly to one
+  % of them 
+  
+  for i=1:length(tie_index),
+    tie_class_index=find(equal_to_winner(:,tie_index(i)));
+    fortuna=randperm(length(tie_class_index));
+    vote_index(tie_index(i))=tie_class_index(fortuna(1));
+  end
+  
+  class=ClassIndex(vote_index);
+end
+
+%% Build output %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+% Relative amount of classes in K neighbors for each classifiee
+
+if K==1,
+  P=zeros(N_data,N_class);
+  if nargout>1,
+    for i=1:N_data,
+      P(i,ClassIndex==class(i))=1;
+    end
+  end
+else
+  P=classcounter'./K;
+end
+
+% xMake class names to struct if they exist
+if ~isempty(classnames),
+  Class=Data;
+  for i=1:N_data,
+    Class.labels{i,1}=classnames{class(i)};
+  end
+else
+  Class=class;
+end
+
+
+%%% Subfunctions %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
+
+function [nos,names] = class2num(class)
+
+% Change string labels in map/data struct to integer numbers
+
+names = {};
+nos = zeros(length(class),1);
+for i=1:length(class)
+  if ~isempty(class{i}) & ~any(strcmp(class{i},names))
+    names=cat(1,names,class(i));
+  end
+end
+
+tmp_nos = (1:length(names))';
+for i=1:length(class)
+  if ~isempty(class{i})
+    nos(i,1) = find(strcmp(class{i},names));    
+  end
+end
+
+function d=distance(X,Y);
+
+% Euclidean distance matrix between row vectors in X and Y
+
+U=~isnan(Y); Y(~U)=0;
+V=~isnan(X); X(~V)=0;
+d=X.^2*U'+V*Y'.^2-2*X*Y';