annotate toolboxes/MIRtoolbox1.3.2/somtoolbox/knn_old.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
rev   line source
wolffd@0 1 function [Class,P]=knn_old(Data, Proto, proto_class, K)
wolffd@0 2
wolffd@0 3 %KNN_OLD A K-nearest neighbor classifier using Euclidean distance
wolffd@0 4 %
wolffd@0 5 % [Class,P]=knn_old(Data, Proto, proto_class, K)
wolffd@0 6 %
wolffd@0 7 % [sM_class,P]=knn_old(sM, sData, [], 3);
wolffd@0 8 % [sD_class,P]=knn_old(sD, sM, class);
wolffd@0 9 % [class,P]=knn_old(data, proto, class);
wolffd@0 10 % [class,P]=knn_old(sData, sM, class,5);
wolffd@0 11 %
wolffd@0 12 % Input and output arguments ([]'s are optional):
wolffd@0 13 % Data (matrix) size Nxd, vectors to be classified (=classifiees)
wolffd@0 14 % (struct) map or data struct: map codebook vectors or
wolffd@0 15 % data vectors are considered as classifiees.
wolffd@0 16 % Proto (matrix) size Mxd, prototype vector matrix (=prototypes)
wolffd@0 17 % (struct) map or data struct: map codebook vectors or
wolffd@0 18 % data vectors are considered as prototypes.
wolffd@0 19 % [proto_class] (vector) size Nx1, integers 1,2,...,k indicating the
wolffd@0 20 % classes of corresponding protoptypes, default: see the
wolffd@0 21 % explanation below.
wolffd@0 22 % [K] (scalar) the K in KNN classifier, default is 1
wolffd@0 23 %
wolffd@0 24 % Class (matrix) size Nx1, vector of 1,2, ..., k indicating the class
wolffd@0 25 % desicion according to the KNN rule
wolffd@0 26 % P (matrix) size Nxk, the relative amount of prototypes of
wolffd@0 27 % each class among the K closest prototypes for
wolffd@0 28 % each classifiee.
wolffd@0 29 %
wolffd@0 30 % If 'proto_class' is _not_ given, 'Proto' _must_ be a labeled SOM
wolffd@0 31 % Toolbox struct. The label of the data vector or the first label of
wolffd@0 32 % the map model vector is considered as class label for th prototype
wolffd@0 33 % vector. In this case the output 'Class' is a copy of 'Data' (map or
wolffd@0 34 % data struct) relabeled according to the classification. If input
wolffd@0 35 % argument 'proto_class' _is_ given, the output argument 'Class' is
wolffd@0 36 % _always_ a vector of integers 1,2,...,k indiacating the class.
wolffd@0 37 %
wolffd@0 38 % If there is a tie between representatives of two or more classes
wolffd@0 39 % among the K closest neighbors to the classifiee, the class is
wolffd@0 40 % selected randomly among these candidates.
wolffd@0 41 %
wolffd@0 42 % IMPORTANT
wolffd@0 43 %
wolffd@0 44 % ** Even if prototype vectors are given in a map struct the mask _is not
wolffd@0 45 % taken into account_ when calculating Euclidean distance
wolffd@0 46 % ** The function calculates the total distance matrix between all
wolffd@0 47 % classifiees and prototype vectors. This results to an MxN matrix;
wolffd@0 48 % if N is high it is recommended to divide the matrix 'Data'
wolffd@0 49 % (the classifiees) into smaller sets in order to avoid memory
wolffd@0 50 % overflow or swapping. Also, if K>1 this function uses 'sort' which is
wolffd@0 51 % considerably slower than 'max' which is used for K==1.
wolffd@0 52 %
wolffd@0 53 % See also KNN, SOM_LABEL, SOM_AUTOLABEL
wolffd@0 54
wolffd@0 55 % Contributed to SOM Toolbox 2.0, February 11th, 2000 by Johan Himberg
wolffd@0 56 % Copyright (c) by Johan Himberg
wolffd@0 57 % http://www.cis.hut.fi/projects/somtoolbox/
wolffd@0 58
wolffd@0 59 % Version 2.0beta Johan 040200
wolffd@0 60
wolffd@0 61 %% Init %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
wolffd@0 62 % This must exist later
wolffd@0 63 classnames='';
wolffd@0 64
wolffd@0 65 % Check K
wolffd@0 66 if nargin<4 | isempty(K),
wolffd@0 67 K=1;
wolffd@0 68 end
wolffd@0 69
wolffd@0 70 if ~vis_valuetype(K,{'1x1'})
wolffd@0 71 error('Value for K must be a scalar.');
wolffd@0 72 end
wolffd@0 73
wolffd@0 74 % Take data from data or map struct
wolffd@0 75
wolffd@0 76 if isstruct(Data);
wolffd@0 77 if isfield(Data,'type') & ischar(Data.type),
wolffd@0 78 ;
wolffd@0 79 else
wolffd@0 80 error('Invalid map/data struct?');
wolffd@0 81 end
wolffd@0 82 switch Data.type
wolffd@0 83 case 'som_map'
wolffd@0 84 data=Data.codebook;
wolffd@0 85 case 'som_data'
wolffd@0 86 data=Data.data;
wolffd@0 87 end
wolffd@0 88 else
wolffd@0 89 % is already a matrix
wolffd@0 90 data=Data;
wolffd@0 91 end
wolffd@0 92
wolffd@0 93 % Take prototype vectors from prototype struct
wolffd@0 94
wolffd@0 95 if isstruct(Proto),
wolffd@0 96
wolffd@0 97 if isfield(Proto,'type') & ischar(Proto.type),
wolffd@0 98 ;
wolffd@0 99 else
wolffd@0 100 error('Invalid map/data struct?');
wolffd@0 101 end
wolffd@0 102 switch Proto.type
wolffd@0 103 case 'som_map'
wolffd@0 104 proto=Proto.codebook;
wolffd@0 105 case 'som_data'
wolffd@0 106 proto=Proto.data;
wolffd@0 107 end
wolffd@0 108 else
wolffd@0 109 % is already a matrix
wolffd@0 110 proto=Proto;
wolffd@0 111 end
wolffd@0 112
wolffd@0 113 % Check that inputs are matrices
wolffd@0 114 if ~vis_valuetype(proto,{'nxm'}) | ~vis_valuetype(data,{'nxm'}),
wolffd@0 115 error('Prototype or data input not valid.')
wolffd@0 116 end
wolffd@0 117
wolffd@0 118 % Record data&proto sizes and check their dims
wolffd@0 119 [N_data dim_data]=size(data);
wolffd@0 120 [N_proto dim_proto]=size(proto);
wolffd@0 121 if dim_proto ~= dim_data,
wolffd@0 122 error('Data and prototype vector dimension does not match.');
wolffd@0 123 end
wolffd@0 124
wolffd@0 125 % Check if the classes are given as labels (no class input arg.)
wolffd@0 126 % if they are take them from prototype struct
wolffd@0 127
wolffd@0 128 if nargin<3 | isempty(proto_class)
wolffd@0 129 if ~isstruct(Proto)
wolffd@0 130 error(['If prototypes are not in labeled map or data struct' ...
wolffd@0 131 'class must be given.']);
wolffd@0 132 % transform to interger (numerical) class labels
wolffd@0 133 else
wolffd@0 134 [proto_class,classnames]=class2num(Proto.labels);
wolffd@0 135 end
wolffd@0 136 end
wolffd@0 137
wolffd@0 138 % Check class label vector: must be numerical and of integers
wolffd@0 139 if ~vis_valuetype(proto_class,{[N_proto 1]});
wolffd@0 140 error(['Class vector is invalid: has to be a N-of-data_rows x 1' ...
wolffd@0 141 ' vector of integers']);
wolffd@0 142 elseif sum(fix(proto_class)-proto_class)~=0
wolffd@0 143 error('Class labels in vector ''Class'' must be integers.');
wolffd@0 144 end
wolffd@0 145
wolffd@0 146 % Find all class labels
wolffd@0 147 ClassIndex=unique(proto_class);
wolffd@0 148 N_class=length(ClassIndex); % number of different classes
wolffd@0 149
wolffd@0 150 % Calculate euclidean distances between classifiees and prototypes
wolffd@0 151 d=distance(proto,data);
wolffd@0 152
wolffd@0 153 %%%% Classification %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
wolffd@0 154
wolffd@0 155 if K==1, % sort distances only if K>1
wolffd@0 156
wolffd@0 157 % 1NN
wolffd@0 158 % Select the closest prototype
wolffd@0 159 [tmp,proto_index]=min(d);
wolffd@0 160 class=proto_class(proto_index);
wolffd@0 161
wolffd@0 162 else
wolffd@0 163
wolffd@0 164 % Sort the prototypes for each classifiee according to distance
wolffd@0 165 [tmp,proto_index]=sort(d);
wolffd@0 166
wolffd@0 167 %% Select K closest prototypes
wolffd@0 168 proto_index=proto_index(1:K,:);
wolffd@0 169 knn_class=proto_class(proto_index);
wolffd@0 170 for i=1:N_class,
wolffd@0 171 classcounter(i,:)=sum(knn_class==ClassIndex(i));
wolffd@0 172 end
wolffd@0 173
wolffd@0 174 %% Vote between classes of K neighbors
wolffd@0 175 [winner,vote_index]=max(classcounter);
wolffd@0 176
wolffd@0 177 %% Handle ties
wolffd@0 178
wolffd@0 179 % set index to clases that got as amuch votes as winner
wolffd@0 180
wolffd@0 181 equal_to_winner=(repmat(winner,N_class,1)==classcounter);
wolffd@0 182
wolffd@0 183 % set index to ties
wolffd@0 184 tie_index=find(sum(equal_to_winner)>1); % drop the winner from counter
wolffd@0 185
wolffd@0 186 % Go through equal classes and reset vote_index randomly to one
wolffd@0 187 % of them
wolffd@0 188
wolffd@0 189 for i=1:length(tie_index),
wolffd@0 190 tie_class_index=find(equal_to_winner(:,tie_index(i)));
wolffd@0 191 fortuna=randperm(length(tie_class_index));
wolffd@0 192 vote_index(tie_index(i))=tie_class_index(fortuna(1));
wolffd@0 193 end
wolffd@0 194
wolffd@0 195 class=ClassIndex(vote_index);
wolffd@0 196 end
wolffd@0 197
wolffd@0 198 %% Build output %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
wolffd@0 199
wolffd@0 200 % Relative amount of classes in K neighbors for each classifiee
wolffd@0 201
wolffd@0 202 if K==1,
wolffd@0 203 P=zeros(N_data,N_class);
wolffd@0 204 if nargout>1,
wolffd@0 205 for i=1:N_data,
wolffd@0 206 P(i,ClassIndex==class(i))=1;
wolffd@0 207 end
wolffd@0 208 end
wolffd@0 209 else
wolffd@0 210 P=classcounter'./K;
wolffd@0 211 end
wolffd@0 212
wolffd@0 213 % xMake class names to struct if they exist
wolffd@0 214 if ~isempty(classnames),
wolffd@0 215 Class=Data;
wolffd@0 216 for i=1:N_data,
wolffd@0 217 Class.labels{i,1}=classnames{class(i)};
wolffd@0 218 end
wolffd@0 219 else
wolffd@0 220 Class=class;
wolffd@0 221 end
wolffd@0 222
wolffd@0 223
wolffd@0 224 %%% Subfunctions %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
wolffd@0 225
wolffd@0 226 function [nos,names] = class2num(class)
wolffd@0 227
wolffd@0 228 % Change string labels in map/data struct to integer numbers
wolffd@0 229
wolffd@0 230 names = {};
wolffd@0 231 nos = zeros(length(class),1);
wolffd@0 232 for i=1:length(class)
wolffd@0 233 if ~isempty(class{i}) & ~any(strcmp(class{i},names))
wolffd@0 234 names=cat(1,names,class(i));
wolffd@0 235 end
wolffd@0 236 end
wolffd@0 237
wolffd@0 238 tmp_nos = (1:length(names))';
wolffd@0 239 for i=1:length(class)
wolffd@0 240 if ~isempty(class{i})
wolffd@0 241 nos(i,1) = find(strcmp(class{i},names));
wolffd@0 242 end
wolffd@0 243 end
wolffd@0 244
wolffd@0 245 function d=distance(X,Y);
wolffd@0 246
wolffd@0 247 % Euclidean distance matrix between row vectors in X and Y
wolffd@0 248
wolffd@0 249 U=~isnan(Y); Y(~U)=0;
wolffd@0 250 V=~isnan(X); X(~V)=0;
wolffd@0 251 d=X.^2*U'+V*Y'.^2-2*X*Y';