annotate toolboxes/MIRtoolbox1.3.2/somtoolbox/knn_old.m @ 0:cc4b1211e677 tip

initial commit to HG from Changeset: 646 (e263d8a21543) added further path and more save "camirversion.m"
author Daniel Wolff
date Fri, 19 Aug 2016 13:07:06 +0200
parents
children
rev   line source
Daniel@0 1 function [Class,P]=knn_old(Data, Proto, proto_class, K)
Daniel@0 2
Daniel@0 3 %KNN_OLD A K-nearest neighbor classifier using Euclidean distance
Daniel@0 4 %
Daniel@0 5 % [Class,P]=knn_old(Data, Proto, proto_class, K)
Daniel@0 6 %
Daniel@0 7 % [sM_class,P]=knn_old(sM, sData, [], 3);
Daniel@0 8 % [sD_class,P]=knn_old(sD, sM, class);
Daniel@0 9 % [class,P]=knn_old(data, proto, class);
Daniel@0 10 % [class,P]=knn_old(sData, sM, class,5);
Daniel@0 11 %
Daniel@0 12 % Input and output arguments ([]'s are optional):
Daniel@0 13 % Data (matrix) size Nxd, vectors to be classified (=classifiees)
Daniel@0 14 % (struct) map or data struct: map codebook vectors or
Daniel@0 15 % data vectors are considered as classifiees.
Daniel@0 16 % Proto (matrix) size Mxd, prototype vector matrix (=prototypes)
Daniel@0 17 % (struct) map or data struct: map codebook vectors or
Daniel@0 18 % data vectors are considered as prototypes.
Daniel@0 19 % [proto_class] (vector) size Nx1, integers 1,2,...,k indicating the
Daniel@0 20 % classes of corresponding protoptypes, default: see the
Daniel@0 21 % explanation below.
Daniel@0 22 % [K] (scalar) the K in KNN classifier, default is 1
Daniel@0 23 %
Daniel@0 24 % Class (matrix) size Nx1, vector of 1,2, ..., k indicating the class
Daniel@0 25 % desicion according to the KNN rule
Daniel@0 26 % P (matrix) size Nxk, the relative amount of prototypes of
Daniel@0 27 % each class among the K closest prototypes for
Daniel@0 28 % each classifiee.
Daniel@0 29 %
Daniel@0 30 % If 'proto_class' is _not_ given, 'Proto' _must_ be a labeled SOM
Daniel@0 31 % Toolbox struct. The label of the data vector or the first label of
Daniel@0 32 % the map model vector is considered as class label for th prototype
Daniel@0 33 % vector. In this case the output 'Class' is a copy of 'Data' (map or
Daniel@0 34 % data struct) relabeled according to the classification. If input
Daniel@0 35 % argument 'proto_class' _is_ given, the output argument 'Class' is
Daniel@0 36 % _always_ a vector of integers 1,2,...,k indiacating the class.
Daniel@0 37 %
Daniel@0 38 % If there is a tie between representatives of two or more classes
Daniel@0 39 % among the K closest neighbors to the classifiee, the class is
Daniel@0 40 % selected randomly among these candidates.
Daniel@0 41 %
Daniel@0 42 % IMPORTANT
Daniel@0 43 %
Daniel@0 44 % ** Even if prototype vectors are given in a map struct the mask _is not
Daniel@0 45 % taken into account_ when calculating Euclidean distance
Daniel@0 46 % ** The function calculates the total distance matrix between all
Daniel@0 47 % classifiees and prototype vectors. This results to an MxN matrix;
Daniel@0 48 % if N is high it is recommended to divide the matrix 'Data'
Daniel@0 49 % (the classifiees) into smaller sets in order to avoid memory
Daniel@0 50 % overflow or swapping. Also, if K>1 this function uses 'sort' which is
Daniel@0 51 % considerably slower than 'max' which is used for K==1.
Daniel@0 52 %
Daniel@0 53 % See also KNN, SOM_LABEL, SOM_AUTOLABEL
Daniel@0 54
Daniel@0 55 % Contributed to SOM Toolbox 2.0, February 11th, 2000 by Johan Himberg
Daniel@0 56 % Copyright (c) by Johan Himberg
Daniel@0 57 % http://www.cis.hut.fi/projects/somtoolbox/
Daniel@0 58
Daniel@0 59 % Version 2.0beta Johan 040200
Daniel@0 60
Daniel@0 61 %% Init %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Daniel@0 62 % This must exist later
Daniel@0 63 classnames='';
Daniel@0 64
Daniel@0 65 % Check K
Daniel@0 66 if nargin<4 | isempty(K),
Daniel@0 67 K=1;
Daniel@0 68 end
Daniel@0 69
Daniel@0 70 if ~vis_valuetype(K,{'1x1'})
Daniel@0 71 error('Value for K must be a scalar.');
Daniel@0 72 end
Daniel@0 73
Daniel@0 74 % Take data from data or map struct
Daniel@0 75
Daniel@0 76 if isstruct(Data);
Daniel@0 77 if isfield(Data,'type') & ischar(Data.type),
Daniel@0 78 ;
Daniel@0 79 else
Daniel@0 80 error('Invalid map/data struct?');
Daniel@0 81 end
Daniel@0 82 switch Data.type
Daniel@0 83 case 'som_map'
Daniel@0 84 data=Data.codebook;
Daniel@0 85 case 'som_data'
Daniel@0 86 data=Data.data;
Daniel@0 87 end
Daniel@0 88 else
Daniel@0 89 % is already a matrix
Daniel@0 90 data=Data;
Daniel@0 91 end
Daniel@0 92
Daniel@0 93 % Take prototype vectors from prototype struct
Daniel@0 94
Daniel@0 95 if isstruct(Proto),
Daniel@0 96
Daniel@0 97 if isfield(Proto,'type') & ischar(Proto.type),
Daniel@0 98 ;
Daniel@0 99 else
Daniel@0 100 error('Invalid map/data struct?');
Daniel@0 101 end
Daniel@0 102 switch Proto.type
Daniel@0 103 case 'som_map'
Daniel@0 104 proto=Proto.codebook;
Daniel@0 105 case 'som_data'
Daniel@0 106 proto=Proto.data;
Daniel@0 107 end
Daniel@0 108 else
Daniel@0 109 % is already a matrix
Daniel@0 110 proto=Proto;
Daniel@0 111 end
Daniel@0 112
Daniel@0 113 % Check that inputs are matrices
Daniel@0 114 if ~vis_valuetype(proto,{'nxm'}) | ~vis_valuetype(data,{'nxm'}),
Daniel@0 115 error('Prototype or data input not valid.')
Daniel@0 116 end
Daniel@0 117
Daniel@0 118 % Record data&proto sizes and check their dims
Daniel@0 119 [N_data dim_data]=size(data);
Daniel@0 120 [N_proto dim_proto]=size(proto);
Daniel@0 121 if dim_proto ~= dim_data,
Daniel@0 122 error('Data and prototype vector dimension does not match.');
Daniel@0 123 end
Daniel@0 124
Daniel@0 125 % Check if the classes are given as labels (no class input arg.)
Daniel@0 126 % if they are take them from prototype struct
Daniel@0 127
Daniel@0 128 if nargin<3 | isempty(proto_class)
Daniel@0 129 if ~isstruct(Proto)
Daniel@0 130 error(['If prototypes are not in labeled map or data struct' ...
Daniel@0 131 'class must be given.']);
Daniel@0 132 % transform to interger (numerical) class labels
Daniel@0 133 else
Daniel@0 134 [proto_class,classnames]=class2num(Proto.labels);
Daniel@0 135 end
Daniel@0 136 end
Daniel@0 137
Daniel@0 138 % Check class label vector: must be numerical and of integers
Daniel@0 139 if ~vis_valuetype(proto_class,{[N_proto 1]});
Daniel@0 140 error(['Class vector is invalid: has to be a N-of-data_rows x 1' ...
Daniel@0 141 ' vector of integers']);
Daniel@0 142 elseif sum(fix(proto_class)-proto_class)~=0
Daniel@0 143 error('Class labels in vector ''Class'' must be integers.');
Daniel@0 144 end
Daniel@0 145
Daniel@0 146 % Find all class labels
Daniel@0 147 ClassIndex=unique(proto_class);
Daniel@0 148 N_class=length(ClassIndex); % number of different classes
Daniel@0 149
Daniel@0 150 % Calculate euclidean distances between classifiees and prototypes
Daniel@0 151 d=distance(proto,data);
Daniel@0 152
Daniel@0 153 %%%% Classification %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Daniel@0 154
Daniel@0 155 if K==1, % sort distances only if K>1
Daniel@0 156
Daniel@0 157 % 1NN
Daniel@0 158 % Select the closest prototype
Daniel@0 159 [tmp,proto_index]=min(d);
Daniel@0 160 class=proto_class(proto_index);
Daniel@0 161
Daniel@0 162 else
Daniel@0 163
Daniel@0 164 % Sort the prototypes for each classifiee according to distance
Daniel@0 165 [tmp,proto_index]=sort(d);
Daniel@0 166
Daniel@0 167 %% Select K closest prototypes
Daniel@0 168 proto_index=proto_index(1:K,:);
Daniel@0 169 knn_class=proto_class(proto_index);
Daniel@0 170 for i=1:N_class,
Daniel@0 171 classcounter(i,:)=sum(knn_class==ClassIndex(i));
Daniel@0 172 end
Daniel@0 173
Daniel@0 174 %% Vote between classes of K neighbors
Daniel@0 175 [winner,vote_index]=max(classcounter);
Daniel@0 176
Daniel@0 177 %% Handle ties
Daniel@0 178
Daniel@0 179 % set index to clases that got as amuch votes as winner
Daniel@0 180
Daniel@0 181 equal_to_winner=(repmat(winner,N_class,1)==classcounter);
Daniel@0 182
Daniel@0 183 % set index to ties
Daniel@0 184 tie_index=find(sum(equal_to_winner)>1); % drop the winner from counter
Daniel@0 185
Daniel@0 186 % Go through equal classes and reset vote_index randomly to one
Daniel@0 187 % of them
Daniel@0 188
Daniel@0 189 for i=1:length(tie_index),
Daniel@0 190 tie_class_index=find(equal_to_winner(:,tie_index(i)));
Daniel@0 191 fortuna=randperm(length(tie_class_index));
Daniel@0 192 vote_index(tie_index(i))=tie_class_index(fortuna(1));
Daniel@0 193 end
Daniel@0 194
Daniel@0 195 class=ClassIndex(vote_index);
Daniel@0 196 end
Daniel@0 197
Daniel@0 198 %% Build output %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Daniel@0 199
Daniel@0 200 % Relative amount of classes in K neighbors for each classifiee
Daniel@0 201
Daniel@0 202 if K==1,
Daniel@0 203 P=zeros(N_data,N_class);
Daniel@0 204 if nargout>1,
Daniel@0 205 for i=1:N_data,
Daniel@0 206 P(i,ClassIndex==class(i))=1;
Daniel@0 207 end
Daniel@0 208 end
Daniel@0 209 else
Daniel@0 210 P=classcounter'./K;
Daniel@0 211 end
Daniel@0 212
Daniel@0 213 % xMake class names to struct if they exist
Daniel@0 214 if ~isempty(classnames),
Daniel@0 215 Class=Data;
Daniel@0 216 for i=1:N_data,
Daniel@0 217 Class.labels{i,1}=classnames{class(i)};
Daniel@0 218 end
Daniel@0 219 else
Daniel@0 220 Class=class;
Daniel@0 221 end
Daniel@0 222
Daniel@0 223
Daniel@0 224 %%% Subfunctions %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Daniel@0 225
Daniel@0 226 function [nos,names] = class2num(class)
Daniel@0 227
Daniel@0 228 % Change string labels in map/data struct to integer numbers
Daniel@0 229
Daniel@0 230 names = {};
Daniel@0 231 nos = zeros(length(class),1);
Daniel@0 232 for i=1:length(class)
Daniel@0 233 if ~isempty(class{i}) & ~any(strcmp(class{i},names))
Daniel@0 234 names=cat(1,names,class(i));
Daniel@0 235 end
Daniel@0 236 end
Daniel@0 237
Daniel@0 238 tmp_nos = (1:length(names))';
Daniel@0 239 for i=1:length(class)
Daniel@0 240 if ~isempty(class{i})
Daniel@0 241 nos(i,1) = find(strcmp(class{i},names));
Daniel@0 242 end
Daniel@0 243 end
Daniel@0 244
Daniel@0 245 function d=distance(X,Y);
Daniel@0 246
Daniel@0 247 % Euclidean distance matrix between row vectors in X and Y
Daniel@0 248
Daniel@0 249 U=~isnan(Y); Y(~U)=0;
Daniel@0 250 V=~isnan(X); X(~V)=0;
Daniel@0 251 d=X.^2*U'+V*Y'.^2-2*X*Y';