Mercurial > hg > camir-aes2014
comparison toolboxes/MIRtoolbox1.3.2/somtoolbox/knn_old.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e9a9cd732c1e |
---|---|
1 function [Class,P]=knn_old(Data, Proto, proto_class, K) | |
2 | |
3 %KNN_OLD A K-nearest neighbor classifier using Euclidean distance | |
4 % | |
5 % [Class,P]=knn_old(Data, Proto, proto_class, K) | |
6 % | |
7 % [sM_class,P]=knn_old(sM, sData, [], 3); | |
8 % [sD_class,P]=knn_old(sD, sM, class); | |
9 % [class,P]=knn_old(data, proto, class); | |
10 % [class,P]=knn_old(sData, sM, class,5); | |
11 % | |
12 % Input and output arguments ([]'s are optional): | |
13 % Data (matrix) size Nxd, vectors to be classified (=classifiees) | |
14 % (struct) map or data struct: map codebook vectors or | |
15 % data vectors are considered as classifiees. | |
16 % Proto (matrix) size Mxd, prototype vector matrix (=prototypes) | |
17 % (struct) map or data struct: map codebook vectors or | |
18 % data vectors are considered as prototypes. | |
19 % [proto_class] (vector) size Nx1, integers 1,2,...,k indicating the | |
20 % classes of corresponding protoptypes, default: see the | |
21 % explanation below. | |
22 % [K] (scalar) the K in KNN classifier, default is 1 | |
23 % | |
24 % Class (matrix) size Nx1, vector of 1,2, ..., k indicating the class | |
25 % desicion according to the KNN rule | |
26 % P (matrix) size Nxk, the relative amount of prototypes of | |
27 % each class among the K closest prototypes for | |
28 % each classifiee. | |
29 % | |
30 % If 'proto_class' is _not_ given, 'Proto' _must_ be a labeled SOM | |
31 % Toolbox struct. The label of the data vector or the first label of | |
32 % the map model vector is considered as class label for th prototype | |
33 % vector. In this case the output 'Class' is a copy of 'Data' (map or | |
34 % data struct) relabeled according to the classification. If input | |
35 % argument 'proto_class' _is_ given, the output argument 'Class' is | |
36 % _always_ a vector of integers 1,2,...,k indiacating the class. | |
37 % | |
38 % If there is a tie between representatives of two or more classes | |
39 % among the K closest neighbors to the classifiee, the class is | |
40 % selected randomly among these candidates. | |
41 % | |
42 % IMPORTANT | |
43 % | |
44 % ** Even if prototype vectors are given in a map struct the mask _is not | |
45 % taken into account_ when calculating Euclidean distance | |
46 % ** The function calculates the total distance matrix between all | |
47 % classifiees and prototype vectors. This results to an MxN matrix; | |
48 % if N is high it is recommended to divide the matrix 'Data' | |
49 % (the classifiees) into smaller sets in order to avoid memory | |
50 % overflow or swapping. Also, if K>1 this function uses 'sort' which is | |
51 % considerably slower than 'max' which is used for K==1. | |
52 % | |
53 % See also KNN, SOM_LABEL, SOM_AUTOLABEL | |
54 | |
55 % Contributed to SOM Toolbox 2.0, February 11th, 2000 by Johan Himberg | |
56 % Copyright (c) by Johan Himberg | |
57 % http://www.cis.hut.fi/projects/somtoolbox/ | |
58 | |
59 % Version 2.0beta Johan 040200 | |
60 | |
61 %% Init %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
62 % This must exist later | |
63 classnames=''; | |
64 | |
65 % Check K | |
66 if nargin<4 | isempty(K), | |
67 K=1; | |
68 end | |
69 | |
70 if ~vis_valuetype(K,{'1x1'}) | |
71 error('Value for K must be a scalar.'); | |
72 end | |
73 | |
74 % Take data from data or map struct | |
75 | |
76 if isstruct(Data); | |
77 if isfield(Data,'type') & ischar(Data.type), | |
78 ; | |
79 else | |
80 error('Invalid map/data struct?'); | |
81 end | |
82 switch Data.type | |
83 case 'som_map' | |
84 data=Data.codebook; | |
85 case 'som_data' | |
86 data=Data.data; | |
87 end | |
88 else | |
89 % is already a matrix | |
90 data=Data; | |
91 end | |
92 | |
93 % Take prototype vectors from prototype struct | |
94 | |
95 if isstruct(Proto), | |
96 | |
97 if isfield(Proto,'type') & ischar(Proto.type), | |
98 ; | |
99 else | |
100 error('Invalid map/data struct?'); | |
101 end | |
102 switch Proto.type | |
103 case 'som_map' | |
104 proto=Proto.codebook; | |
105 case 'som_data' | |
106 proto=Proto.data; | |
107 end | |
108 else | |
109 % is already a matrix | |
110 proto=Proto; | |
111 end | |
112 | |
113 % Check that inputs are matrices | |
114 if ~vis_valuetype(proto,{'nxm'}) | ~vis_valuetype(data,{'nxm'}), | |
115 error('Prototype or data input not valid.') | |
116 end | |
117 | |
118 % Record data&proto sizes and check their dims | |
119 [N_data dim_data]=size(data); | |
120 [N_proto dim_proto]=size(proto); | |
121 if dim_proto ~= dim_data, | |
122 error('Data and prototype vector dimension does not match.'); | |
123 end | |
124 | |
125 % Check if the classes are given as labels (no class input arg.) | |
126 % if they are take them from prototype struct | |
127 | |
128 if nargin<3 | isempty(proto_class) | |
129 if ~isstruct(Proto) | |
130 error(['If prototypes are not in labeled map or data struct' ... | |
131 'class must be given.']); | |
132 % transform to interger (numerical) class labels | |
133 else | |
134 [proto_class,classnames]=class2num(Proto.labels); | |
135 end | |
136 end | |
137 | |
138 % Check class label vector: must be numerical and of integers | |
139 if ~vis_valuetype(proto_class,{[N_proto 1]}); | |
140 error(['Class vector is invalid: has to be a N-of-data_rows x 1' ... | |
141 ' vector of integers']); | |
142 elseif sum(fix(proto_class)-proto_class)~=0 | |
143 error('Class labels in vector ''Class'' must be integers.'); | |
144 end | |
145 | |
146 % Find all class labels | |
147 ClassIndex=unique(proto_class); | |
148 N_class=length(ClassIndex); % number of different classes | |
149 | |
150 % Calculate euclidean distances between classifiees and prototypes | |
151 d=distance(proto,data); | |
152 | |
153 %%%% Classification %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
154 | |
155 if K==1, % sort distances only if K>1 | |
156 | |
157 % 1NN | |
158 % Select the closest prototype | |
159 [tmp,proto_index]=min(d); | |
160 class=proto_class(proto_index); | |
161 | |
162 else | |
163 | |
164 % Sort the prototypes for each classifiee according to distance | |
165 [tmp,proto_index]=sort(d); | |
166 | |
167 %% Select K closest prototypes | |
168 proto_index=proto_index(1:K,:); | |
169 knn_class=proto_class(proto_index); | |
170 for i=1:N_class, | |
171 classcounter(i,:)=sum(knn_class==ClassIndex(i)); | |
172 end | |
173 | |
174 %% Vote between classes of K neighbors | |
175 [winner,vote_index]=max(classcounter); | |
176 | |
177 %% Handle ties | |
178 | |
179 % set index to clases that got as amuch votes as winner | |
180 | |
181 equal_to_winner=(repmat(winner,N_class,1)==classcounter); | |
182 | |
183 % set index to ties | |
184 tie_index=find(sum(equal_to_winner)>1); % drop the winner from counter | |
185 | |
186 % Go through equal classes and reset vote_index randomly to one | |
187 % of them | |
188 | |
189 for i=1:length(tie_index), | |
190 tie_class_index=find(equal_to_winner(:,tie_index(i))); | |
191 fortuna=randperm(length(tie_class_index)); | |
192 vote_index(tie_index(i))=tie_class_index(fortuna(1)); | |
193 end | |
194 | |
195 class=ClassIndex(vote_index); | |
196 end | |
197 | |
198 %% Build output %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
199 | |
200 % Relative amount of classes in K neighbors for each classifiee | |
201 | |
202 if K==1, | |
203 P=zeros(N_data,N_class); | |
204 if nargout>1, | |
205 for i=1:N_data, | |
206 P(i,ClassIndex==class(i))=1; | |
207 end | |
208 end | |
209 else | |
210 P=classcounter'./K; | |
211 end | |
212 | |
213 % xMake class names to struct if they exist | |
214 if ~isempty(classnames), | |
215 Class=Data; | |
216 for i=1:N_data, | |
217 Class.labels{i,1}=classnames{class(i)}; | |
218 end | |
219 else | |
220 Class=class; | |
221 end | |
222 | |
223 | |
224 %%% Subfunctions %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
225 | |
226 function [nos,names] = class2num(class) | |
227 | |
228 % Change string labels in map/data struct to integer numbers | |
229 | |
230 names = {}; | |
231 nos = zeros(length(class),1); | |
232 for i=1:length(class) | |
233 if ~isempty(class{i}) & ~any(strcmp(class{i},names)) | |
234 names=cat(1,names,class(i)); | |
235 end | |
236 end | |
237 | |
238 tmp_nos = (1:length(names))'; | |
239 for i=1:length(class) | |
240 if ~isempty(class{i}) | |
241 nos(i,1) = find(strcmp(class{i},names)); | |
242 end | |
243 end | |
244 | |
245 function d=distance(X,Y); | |
246 | |
247 % Euclidean distance matrix between row vectors in X and Y | |
248 | |
249 U=~isnan(Y); Y(~U)=0; | |
250 V=~isnan(X); X(~V)=0; | |
251 d=X.^2*U'+V*Y'.^2-2*X*Y'; |