Daniel@0: function [Class,P]=knn_old(Data, Proto, proto_class, K) Daniel@0: Daniel@0: %KNN_OLD A K-nearest neighbor classifier using Euclidean distance Daniel@0: % Daniel@0: % [Class,P]=knn_old(Data, Proto, proto_class, K) Daniel@0: % Daniel@0: % [sM_class,P]=knn_old(sM, sData, [], 3); Daniel@0: % [sD_class,P]=knn_old(sD, sM, class); Daniel@0: % [class,P]=knn_old(data, proto, class); Daniel@0: % [class,P]=knn_old(sData, sM, class,5); Daniel@0: % Daniel@0: % Input and output arguments ([]'s are optional): Daniel@0: % Data (matrix) size Nxd, vectors to be classified (=classifiees) Daniel@0: % (struct) map or data struct: map codebook vectors or Daniel@0: % data vectors are considered as classifiees. Daniel@0: % Proto (matrix) size Mxd, prototype vector matrix (=prototypes) Daniel@0: % (struct) map or data struct: map codebook vectors or Daniel@0: % data vectors are considered as prototypes. Daniel@0: % [proto_class] (vector) size Nx1, integers 1,2,...,k indicating the Daniel@0: % classes of corresponding protoptypes, default: see the Daniel@0: % explanation below. Daniel@0: % [K] (scalar) the K in KNN classifier, default is 1 Daniel@0: % Daniel@0: % Class (matrix) size Nx1, vector of 1,2, ..., k indicating the class Daniel@0: % desicion according to the KNN rule Daniel@0: % P (matrix) size Nxk, the relative amount of prototypes of Daniel@0: % each class among the K closest prototypes for Daniel@0: % each classifiee. Daniel@0: % Daniel@0: % If 'proto_class' is _not_ given, 'Proto' _must_ be a labeled SOM Daniel@0: % Toolbox struct. The label of the data vector or the first label of Daniel@0: % the map model vector is considered as class label for th prototype Daniel@0: % vector. In this case the output 'Class' is a copy of 'Data' (map or Daniel@0: % data struct) relabeled according to the classification. If input Daniel@0: % argument 'proto_class' _is_ given, the output argument 'Class' is Daniel@0: % _always_ a vector of integers 1,2,...,k indiacating the class. Daniel@0: % Daniel@0: % If there is a tie between representatives of two or more classes Daniel@0: % among the K closest neighbors to the classifiee, the class is Daniel@0: % selected randomly among these candidates. Daniel@0: % Daniel@0: % IMPORTANT Daniel@0: % Daniel@0: % ** Even if prototype vectors are given in a map struct the mask _is not Daniel@0: % taken into account_ when calculating Euclidean distance Daniel@0: % ** The function calculates the total distance matrix between all Daniel@0: % classifiees and prototype vectors. This results to an MxN matrix; Daniel@0: % if N is high it is recommended to divide the matrix 'Data' Daniel@0: % (the classifiees) into smaller sets in order to avoid memory Daniel@0: % overflow or swapping. Also, if K>1 this function uses 'sort' which is Daniel@0: % considerably slower than 'max' which is used for K==1. Daniel@0: % Daniel@0: % See also KNN, SOM_LABEL, SOM_AUTOLABEL Daniel@0: Daniel@0: % Contributed to SOM Toolbox 2.0, February 11th, 2000 by Johan Himberg Daniel@0: % Copyright (c) by Johan Himberg Daniel@0: % http://www.cis.hut.fi/projects/somtoolbox/ Daniel@0: Daniel@0: % Version 2.0beta Johan 040200 Daniel@0: Daniel@0: %% Init %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Daniel@0: % This must exist later Daniel@0: classnames=''; Daniel@0: Daniel@0: % Check K Daniel@0: if nargin<4 | isempty(K), Daniel@0: K=1; Daniel@0: end Daniel@0: Daniel@0: if ~vis_valuetype(K,{'1x1'}) Daniel@0: error('Value for K must be a scalar.'); Daniel@0: end Daniel@0: Daniel@0: % Take data from data or map struct Daniel@0: Daniel@0: if isstruct(Data); Daniel@0: if isfield(Data,'type') & ischar(Data.type), Daniel@0: ; Daniel@0: else Daniel@0: error('Invalid map/data struct?'); Daniel@0: end Daniel@0: switch Data.type Daniel@0: case 'som_map' Daniel@0: data=Data.codebook; Daniel@0: case 'som_data' Daniel@0: data=Data.data; Daniel@0: end Daniel@0: else Daniel@0: % is already a matrix Daniel@0: data=Data; Daniel@0: end Daniel@0: Daniel@0: % Take prototype vectors from prototype struct Daniel@0: Daniel@0: if isstruct(Proto), Daniel@0: Daniel@0: if isfield(Proto,'type') & ischar(Proto.type), Daniel@0: ; Daniel@0: else Daniel@0: error('Invalid map/data struct?'); Daniel@0: end Daniel@0: switch Proto.type Daniel@0: case 'som_map' Daniel@0: proto=Proto.codebook; Daniel@0: case 'som_data' Daniel@0: proto=Proto.data; Daniel@0: end Daniel@0: else Daniel@0: % is already a matrix Daniel@0: proto=Proto; Daniel@0: end Daniel@0: Daniel@0: % Check that inputs are matrices Daniel@0: if ~vis_valuetype(proto,{'nxm'}) | ~vis_valuetype(data,{'nxm'}), Daniel@0: error('Prototype or data input not valid.') Daniel@0: end Daniel@0: Daniel@0: % Record data&proto sizes and check their dims Daniel@0: [N_data dim_data]=size(data); Daniel@0: [N_proto dim_proto]=size(proto); Daniel@0: if dim_proto ~= dim_data, Daniel@0: error('Data and prototype vector dimension does not match.'); Daniel@0: end Daniel@0: Daniel@0: % Check if the classes are given as labels (no class input arg.) Daniel@0: % if they are take them from prototype struct Daniel@0: Daniel@0: if nargin<3 | isempty(proto_class) Daniel@0: if ~isstruct(Proto) Daniel@0: error(['If prototypes are not in labeled map or data struct' ... Daniel@0: 'class must be given.']); Daniel@0: % transform to interger (numerical) class labels Daniel@0: else Daniel@0: [proto_class,classnames]=class2num(Proto.labels); Daniel@0: end Daniel@0: end Daniel@0: Daniel@0: % Check class label vector: must be numerical and of integers Daniel@0: if ~vis_valuetype(proto_class,{[N_proto 1]}); Daniel@0: error(['Class vector is invalid: has to be a N-of-data_rows x 1' ... Daniel@0: ' vector of integers']); Daniel@0: elseif sum(fix(proto_class)-proto_class)~=0 Daniel@0: error('Class labels in vector ''Class'' must be integers.'); Daniel@0: end Daniel@0: Daniel@0: % Find all class labels Daniel@0: ClassIndex=unique(proto_class); Daniel@0: N_class=length(ClassIndex); % number of different classes Daniel@0: Daniel@0: % Calculate euclidean distances between classifiees and prototypes Daniel@0: d=distance(proto,data); Daniel@0: Daniel@0: %%%% Classification %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Daniel@0: Daniel@0: if K==1, % sort distances only if K>1 Daniel@0: Daniel@0: % 1NN Daniel@0: % Select the closest prototype Daniel@0: [tmp,proto_index]=min(d); Daniel@0: class=proto_class(proto_index); Daniel@0: Daniel@0: else Daniel@0: Daniel@0: % Sort the prototypes for each classifiee according to distance Daniel@0: [tmp,proto_index]=sort(d); Daniel@0: Daniel@0: %% Select K closest prototypes Daniel@0: proto_index=proto_index(1:K,:); Daniel@0: knn_class=proto_class(proto_index); Daniel@0: for i=1:N_class, Daniel@0: classcounter(i,:)=sum(knn_class==ClassIndex(i)); Daniel@0: end Daniel@0: Daniel@0: %% Vote between classes of K neighbors Daniel@0: [winner,vote_index]=max(classcounter); Daniel@0: Daniel@0: %% Handle ties Daniel@0: Daniel@0: % set index to clases that got as amuch votes as winner Daniel@0: Daniel@0: equal_to_winner=(repmat(winner,N_class,1)==classcounter); Daniel@0: Daniel@0: % set index to ties Daniel@0: tie_index=find(sum(equal_to_winner)>1); % drop the winner from counter Daniel@0: Daniel@0: % Go through equal classes and reset vote_index randomly to one Daniel@0: % of them Daniel@0: Daniel@0: for i=1:length(tie_index), Daniel@0: tie_class_index=find(equal_to_winner(:,tie_index(i))); Daniel@0: fortuna=randperm(length(tie_class_index)); Daniel@0: vote_index(tie_index(i))=tie_class_index(fortuna(1)); Daniel@0: end Daniel@0: Daniel@0: class=ClassIndex(vote_index); Daniel@0: end Daniel@0: Daniel@0: %% Build output %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Daniel@0: Daniel@0: % Relative amount of classes in K neighbors for each classifiee Daniel@0: Daniel@0: if K==1, Daniel@0: P=zeros(N_data,N_class); Daniel@0: if nargout>1, Daniel@0: for i=1:N_data, Daniel@0: P(i,ClassIndex==class(i))=1; Daniel@0: end Daniel@0: end Daniel@0: else Daniel@0: P=classcounter'./K; Daniel@0: end Daniel@0: Daniel@0: % xMake class names to struct if they exist Daniel@0: if ~isempty(classnames), Daniel@0: Class=Data; Daniel@0: for i=1:N_data, Daniel@0: Class.labels{i,1}=classnames{class(i)}; Daniel@0: end Daniel@0: else Daniel@0: Class=class; Daniel@0: end Daniel@0: Daniel@0: Daniel@0: %%% Subfunctions %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Daniel@0: Daniel@0: function [nos,names] = class2num(class) Daniel@0: Daniel@0: % Change string labels in map/data struct to integer numbers Daniel@0: Daniel@0: names = {}; Daniel@0: nos = zeros(length(class),1); Daniel@0: for i=1:length(class) Daniel@0: if ~isempty(class{i}) & ~any(strcmp(class{i},names)) Daniel@0: names=cat(1,names,class(i)); Daniel@0: end Daniel@0: end Daniel@0: Daniel@0: tmp_nos = (1:length(names))'; Daniel@0: for i=1:length(class) Daniel@0: if ~isempty(class{i}) Daniel@0: nos(i,1) = find(strcmp(class{i},names)); Daniel@0: end Daniel@0: end Daniel@0: Daniel@0: function d=distance(X,Y); Daniel@0: Daniel@0: % Euclidean distance matrix between row vectors in X and Y Daniel@0: Daniel@0: U=~isnan(Y); Y(~U)=0; Daniel@0: V=~isnan(X); X(~V)=0; Daniel@0: d=X.^2*U'+V*Y'.^2-2*X*Y';