wolffd@0
|
1 function [Class,P]=knn_old(Data, Proto, proto_class, K)
|
wolffd@0
|
2
|
wolffd@0
|
3 %KNN_OLD A K-nearest neighbor classifier using Euclidean distance
|
wolffd@0
|
4 %
|
wolffd@0
|
5 % [Class,P]=knn_old(Data, Proto, proto_class, K)
|
wolffd@0
|
6 %
|
wolffd@0
|
7 % [sM_class,P]=knn_old(sM, sData, [], 3);
|
wolffd@0
|
8 % [sD_class,P]=knn_old(sD, sM, class);
|
wolffd@0
|
9 % [class,P]=knn_old(data, proto, class);
|
wolffd@0
|
10 % [class,P]=knn_old(sData, sM, class,5);
|
wolffd@0
|
11 %
|
wolffd@0
|
12 % Input and output arguments ([]'s are optional):
|
wolffd@0
|
13 % Data (matrix) size Nxd, vectors to be classified (=classifiees)
|
wolffd@0
|
14 % (struct) map or data struct: map codebook vectors or
|
wolffd@0
|
15 % data vectors are considered as classifiees.
|
wolffd@0
|
16 % Proto (matrix) size Mxd, prototype vector matrix (=prototypes)
|
wolffd@0
|
17 % (struct) map or data struct: map codebook vectors or
|
wolffd@0
|
18 % data vectors are considered as prototypes.
|
wolffd@0
|
19 % [proto_class] (vector) size Nx1, integers 1,2,...,k indicating the
|
wolffd@0
|
20 % classes of corresponding protoptypes, default: see the
|
wolffd@0
|
21 % explanation below.
|
wolffd@0
|
22 % [K] (scalar) the K in KNN classifier, default is 1
|
wolffd@0
|
23 %
|
wolffd@0
|
24 % Class (matrix) size Nx1, vector of 1,2, ..., k indicating the class
|
wolffd@0
|
25 % desicion according to the KNN rule
|
wolffd@0
|
26 % P (matrix) size Nxk, the relative amount of prototypes of
|
wolffd@0
|
27 % each class among the K closest prototypes for
|
wolffd@0
|
28 % each classifiee.
|
wolffd@0
|
29 %
|
wolffd@0
|
30 % If 'proto_class' is _not_ given, 'Proto' _must_ be a labeled SOM
|
wolffd@0
|
31 % Toolbox struct. The label of the data vector or the first label of
|
wolffd@0
|
32 % the map model vector is considered as class label for th prototype
|
wolffd@0
|
33 % vector. In this case the output 'Class' is a copy of 'Data' (map or
|
wolffd@0
|
34 % data struct) relabeled according to the classification. If input
|
wolffd@0
|
35 % argument 'proto_class' _is_ given, the output argument 'Class' is
|
wolffd@0
|
36 % _always_ a vector of integers 1,2,...,k indiacating the class.
|
wolffd@0
|
37 %
|
wolffd@0
|
38 % If there is a tie between representatives of two or more classes
|
wolffd@0
|
39 % among the K closest neighbors to the classifiee, the class is
|
wolffd@0
|
40 % selected randomly among these candidates.
|
wolffd@0
|
41 %
|
wolffd@0
|
42 % IMPORTANT
|
wolffd@0
|
43 %
|
wolffd@0
|
44 % ** Even if prototype vectors are given in a map struct the mask _is not
|
wolffd@0
|
45 % taken into account_ when calculating Euclidean distance
|
wolffd@0
|
46 % ** The function calculates the total distance matrix between all
|
wolffd@0
|
47 % classifiees and prototype vectors. This results to an MxN matrix;
|
wolffd@0
|
48 % if N is high it is recommended to divide the matrix 'Data'
|
wolffd@0
|
49 % (the classifiees) into smaller sets in order to avoid memory
|
wolffd@0
|
50 % overflow or swapping. Also, if K>1 this function uses 'sort' which is
|
wolffd@0
|
51 % considerably slower than 'max' which is used for K==1.
|
wolffd@0
|
52 %
|
wolffd@0
|
53 % See also KNN, SOM_LABEL, SOM_AUTOLABEL
|
wolffd@0
|
54
|
wolffd@0
|
55 % Contributed to SOM Toolbox 2.0, February 11th, 2000 by Johan Himberg
|
wolffd@0
|
56 % Copyright (c) by Johan Himberg
|
wolffd@0
|
57 % http://www.cis.hut.fi/projects/somtoolbox/
|
wolffd@0
|
58
|
wolffd@0
|
59 % Version 2.0beta Johan 040200
|
wolffd@0
|
60
|
wolffd@0
|
61 %% Init %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
wolffd@0
|
62 % This must exist later
|
wolffd@0
|
63 classnames='';
|
wolffd@0
|
64
|
wolffd@0
|
65 % Check K
|
wolffd@0
|
66 if nargin<4 | isempty(K),
|
wolffd@0
|
67 K=1;
|
wolffd@0
|
68 end
|
wolffd@0
|
69
|
wolffd@0
|
70 if ~vis_valuetype(K,{'1x1'})
|
wolffd@0
|
71 error('Value for K must be a scalar.');
|
wolffd@0
|
72 end
|
wolffd@0
|
73
|
wolffd@0
|
74 % Take data from data or map struct
|
wolffd@0
|
75
|
wolffd@0
|
76 if isstruct(Data);
|
wolffd@0
|
77 if isfield(Data,'type') & ischar(Data.type),
|
wolffd@0
|
78 ;
|
wolffd@0
|
79 else
|
wolffd@0
|
80 error('Invalid map/data struct?');
|
wolffd@0
|
81 end
|
wolffd@0
|
82 switch Data.type
|
wolffd@0
|
83 case 'som_map'
|
wolffd@0
|
84 data=Data.codebook;
|
wolffd@0
|
85 case 'som_data'
|
wolffd@0
|
86 data=Data.data;
|
wolffd@0
|
87 end
|
wolffd@0
|
88 else
|
wolffd@0
|
89 % is already a matrix
|
wolffd@0
|
90 data=Data;
|
wolffd@0
|
91 end
|
wolffd@0
|
92
|
wolffd@0
|
93 % Take prototype vectors from prototype struct
|
wolffd@0
|
94
|
wolffd@0
|
95 if isstruct(Proto),
|
wolffd@0
|
96
|
wolffd@0
|
97 if isfield(Proto,'type') & ischar(Proto.type),
|
wolffd@0
|
98 ;
|
wolffd@0
|
99 else
|
wolffd@0
|
100 error('Invalid map/data struct?');
|
wolffd@0
|
101 end
|
wolffd@0
|
102 switch Proto.type
|
wolffd@0
|
103 case 'som_map'
|
wolffd@0
|
104 proto=Proto.codebook;
|
wolffd@0
|
105 case 'som_data'
|
wolffd@0
|
106 proto=Proto.data;
|
wolffd@0
|
107 end
|
wolffd@0
|
108 else
|
wolffd@0
|
109 % is already a matrix
|
wolffd@0
|
110 proto=Proto;
|
wolffd@0
|
111 end
|
wolffd@0
|
112
|
wolffd@0
|
113 % Check that inputs are matrices
|
wolffd@0
|
114 if ~vis_valuetype(proto,{'nxm'}) | ~vis_valuetype(data,{'nxm'}),
|
wolffd@0
|
115 error('Prototype or data input not valid.')
|
wolffd@0
|
116 end
|
wolffd@0
|
117
|
wolffd@0
|
118 % Record data&proto sizes and check their dims
|
wolffd@0
|
119 [N_data dim_data]=size(data);
|
wolffd@0
|
120 [N_proto dim_proto]=size(proto);
|
wolffd@0
|
121 if dim_proto ~= dim_data,
|
wolffd@0
|
122 error('Data and prototype vector dimension does not match.');
|
wolffd@0
|
123 end
|
wolffd@0
|
124
|
wolffd@0
|
125 % Check if the classes are given as labels (no class input arg.)
|
wolffd@0
|
126 % if they are take them from prototype struct
|
wolffd@0
|
127
|
wolffd@0
|
128 if nargin<3 | isempty(proto_class)
|
wolffd@0
|
129 if ~isstruct(Proto)
|
wolffd@0
|
130 error(['If prototypes are not in labeled map or data struct' ...
|
wolffd@0
|
131 'class must be given.']);
|
wolffd@0
|
132 % transform to interger (numerical) class labels
|
wolffd@0
|
133 else
|
wolffd@0
|
134 [proto_class,classnames]=class2num(Proto.labels);
|
wolffd@0
|
135 end
|
wolffd@0
|
136 end
|
wolffd@0
|
137
|
wolffd@0
|
138 % Check class label vector: must be numerical and of integers
|
wolffd@0
|
139 if ~vis_valuetype(proto_class,{[N_proto 1]});
|
wolffd@0
|
140 error(['Class vector is invalid: has to be a N-of-data_rows x 1' ...
|
wolffd@0
|
141 ' vector of integers']);
|
wolffd@0
|
142 elseif sum(fix(proto_class)-proto_class)~=0
|
wolffd@0
|
143 error('Class labels in vector ''Class'' must be integers.');
|
wolffd@0
|
144 end
|
wolffd@0
|
145
|
wolffd@0
|
146 % Find all class labels
|
wolffd@0
|
147 ClassIndex=unique(proto_class);
|
wolffd@0
|
148 N_class=length(ClassIndex); % number of different classes
|
wolffd@0
|
149
|
wolffd@0
|
150 % Calculate euclidean distances between classifiees and prototypes
|
wolffd@0
|
151 d=distance(proto,data);
|
wolffd@0
|
152
|
wolffd@0
|
153 %%%% Classification %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
wolffd@0
|
154
|
wolffd@0
|
155 if K==1, % sort distances only if K>1
|
wolffd@0
|
156
|
wolffd@0
|
157 % 1NN
|
wolffd@0
|
158 % Select the closest prototype
|
wolffd@0
|
159 [tmp,proto_index]=min(d);
|
wolffd@0
|
160 class=proto_class(proto_index);
|
wolffd@0
|
161
|
wolffd@0
|
162 else
|
wolffd@0
|
163
|
wolffd@0
|
164 % Sort the prototypes for each classifiee according to distance
|
wolffd@0
|
165 [tmp,proto_index]=sort(d);
|
wolffd@0
|
166
|
wolffd@0
|
167 %% Select K closest prototypes
|
wolffd@0
|
168 proto_index=proto_index(1:K,:);
|
wolffd@0
|
169 knn_class=proto_class(proto_index);
|
wolffd@0
|
170 for i=1:N_class,
|
wolffd@0
|
171 classcounter(i,:)=sum(knn_class==ClassIndex(i));
|
wolffd@0
|
172 end
|
wolffd@0
|
173
|
wolffd@0
|
174 %% Vote between classes of K neighbors
|
wolffd@0
|
175 [winner,vote_index]=max(classcounter);
|
wolffd@0
|
176
|
wolffd@0
|
177 %% Handle ties
|
wolffd@0
|
178
|
wolffd@0
|
179 % set index to clases that got as amuch votes as winner
|
wolffd@0
|
180
|
wolffd@0
|
181 equal_to_winner=(repmat(winner,N_class,1)==classcounter);
|
wolffd@0
|
182
|
wolffd@0
|
183 % set index to ties
|
wolffd@0
|
184 tie_index=find(sum(equal_to_winner)>1); % drop the winner from counter
|
wolffd@0
|
185
|
wolffd@0
|
186 % Go through equal classes and reset vote_index randomly to one
|
wolffd@0
|
187 % of them
|
wolffd@0
|
188
|
wolffd@0
|
189 for i=1:length(tie_index),
|
wolffd@0
|
190 tie_class_index=find(equal_to_winner(:,tie_index(i)));
|
wolffd@0
|
191 fortuna=randperm(length(tie_class_index));
|
wolffd@0
|
192 vote_index(tie_index(i))=tie_class_index(fortuna(1));
|
wolffd@0
|
193 end
|
wolffd@0
|
194
|
wolffd@0
|
195 class=ClassIndex(vote_index);
|
wolffd@0
|
196 end
|
wolffd@0
|
197
|
wolffd@0
|
198 %% Build output %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
wolffd@0
|
199
|
wolffd@0
|
200 % Relative amount of classes in K neighbors for each classifiee
|
wolffd@0
|
201
|
wolffd@0
|
202 if K==1,
|
wolffd@0
|
203 P=zeros(N_data,N_class);
|
wolffd@0
|
204 if nargout>1,
|
wolffd@0
|
205 for i=1:N_data,
|
wolffd@0
|
206 P(i,ClassIndex==class(i))=1;
|
wolffd@0
|
207 end
|
wolffd@0
|
208 end
|
wolffd@0
|
209 else
|
wolffd@0
|
210 P=classcounter'./K;
|
wolffd@0
|
211 end
|
wolffd@0
|
212
|
wolffd@0
|
213 % xMake class names to struct if they exist
|
wolffd@0
|
214 if ~isempty(classnames),
|
wolffd@0
|
215 Class=Data;
|
wolffd@0
|
216 for i=1:N_data,
|
wolffd@0
|
217 Class.labels{i,1}=classnames{class(i)};
|
wolffd@0
|
218 end
|
wolffd@0
|
219 else
|
wolffd@0
|
220 Class=class;
|
wolffd@0
|
221 end
|
wolffd@0
|
222
|
wolffd@0
|
223
|
wolffd@0
|
224 %%% Subfunctions %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
wolffd@0
|
225
|
wolffd@0
|
226 function [nos,names] = class2num(class)
|
wolffd@0
|
227
|
wolffd@0
|
228 % Change string labels in map/data struct to integer numbers
|
wolffd@0
|
229
|
wolffd@0
|
230 names = {};
|
wolffd@0
|
231 nos = zeros(length(class),1);
|
wolffd@0
|
232 for i=1:length(class)
|
wolffd@0
|
233 if ~isempty(class{i}) & ~any(strcmp(class{i},names))
|
wolffd@0
|
234 names=cat(1,names,class(i));
|
wolffd@0
|
235 end
|
wolffd@0
|
236 end
|
wolffd@0
|
237
|
wolffd@0
|
238 tmp_nos = (1:length(names))';
|
wolffd@0
|
239 for i=1:length(class)
|
wolffd@0
|
240 if ~isempty(class{i})
|
wolffd@0
|
241 nos(i,1) = find(strcmp(class{i},names));
|
wolffd@0
|
242 end
|
wolffd@0
|
243 end
|
wolffd@0
|
244
|
wolffd@0
|
245 function d=distance(X,Y);
|
wolffd@0
|
246
|
wolffd@0
|
247 % Euclidean distance matrix between row vectors in X and Y
|
wolffd@0
|
248
|
wolffd@0
|
249 U=~isnan(Y); Y(~U)=0;
|
wolffd@0
|
250 V=~isnan(X); X(~V)=0;
|
wolffd@0
|
251 d=X.^2*U'+V*Y'.^2-2*X*Y';
|