wolffd@0: function [Class,P]=knn_old(Data, Proto, proto_class, K) wolffd@0: wolffd@0: %KNN_OLD A K-nearest neighbor classifier using Euclidean distance wolffd@0: % wolffd@0: % [Class,P]=knn_old(Data, Proto, proto_class, K) wolffd@0: % wolffd@0: % [sM_class,P]=knn_old(sM, sData, [], 3); wolffd@0: % [sD_class,P]=knn_old(sD, sM, class); wolffd@0: % [class,P]=knn_old(data, proto, class); wolffd@0: % [class,P]=knn_old(sData, sM, class,5); wolffd@0: % wolffd@0: % Input and output arguments ([]'s are optional): wolffd@0: % Data (matrix) size Nxd, vectors to be classified (=classifiees) wolffd@0: % (struct) map or data struct: map codebook vectors or wolffd@0: % data vectors are considered as classifiees. wolffd@0: % Proto (matrix) size Mxd, prototype vector matrix (=prototypes) wolffd@0: % (struct) map or data struct: map codebook vectors or wolffd@0: % data vectors are considered as prototypes. wolffd@0: % [proto_class] (vector) size Nx1, integers 1,2,...,k indicating the wolffd@0: % classes of corresponding protoptypes, default: see the wolffd@0: % explanation below. wolffd@0: % [K] (scalar) the K in KNN classifier, default is 1 wolffd@0: % wolffd@0: % Class (matrix) size Nx1, vector of 1,2, ..., k indicating the class wolffd@0: % desicion according to the KNN rule wolffd@0: % P (matrix) size Nxk, the relative amount of prototypes of wolffd@0: % each class among the K closest prototypes for wolffd@0: % each classifiee. wolffd@0: % wolffd@0: % If 'proto_class' is _not_ given, 'Proto' _must_ be a labeled SOM wolffd@0: % Toolbox struct. The label of the data vector or the first label of wolffd@0: % the map model vector is considered as class label for th prototype wolffd@0: % vector. In this case the output 'Class' is a copy of 'Data' (map or wolffd@0: % data struct) relabeled according to the classification. If input wolffd@0: % argument 'proto_class' _is_ given, the output argument 'Class' is wolffd@0: % _always_ a vector of integers 1,2,...,k indiacating the class. wolffd@0: % wolffd@0: % If there is a tie between representatives of two or more classes wolffd@0: % among the K closest neighbors to the classifiee, the class is wolffd@0: % selected randomly among these candidates. wolffd@0: % wolffd@0: % IMPORTANT wolffd@0: % wolffd@0: % ** Even if prototype vectors are given in a map struct the mask _is not wolffd@0: % taken into account_ when calculating Euclidean distance wolffd@0: % ** The function calculates the total distance matrix between all wolffd@0: % classifiees and prototype vectors. This results to an MxN matrix; wolffd@0: % if N is high it is recommended to divide the matrix 'Data' wolffd@0: % (the classifiees) into smaller sets in order to avoid memory wolffd@0: % overflow or swapping. Also, if K>1 this function uses 'sort' which is wolffd@0: % considerably slower than 'max' which is used for K==1. wolffd@0: % wolffd@0: % See also KNN, SOM_LABEL, SOM_AUTOLABEL wolffd@0: wolffd@0: % Contributed to SOM Toolbox 2.0, February 11th, 2000 by Johan Himberg wolffd@0: % Copyright (c) by Johan Himberg wolffd@0: % http://www.cis.hut.fi/projects/somtoolbox/ wolffd@0: wolffd@0: % Version 2.0beta Johan 040200 wolffd@0: wolffd@0: %% Init %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% wolffd@0: % This must exist later wolffd@0: classnames=''; wolffd@0: wolffd@0: % Check K wolffd@0: if nargin<4 | isempty(K), wolffd@0: K=1; wolffd@0: end wolffd@0: wolffd@0: if ~vis_valuetype(K,{'1x1'}) wolffd@0: error('Value for K must be a scalar.'); wolffd@0: end wolffd@0: wolffd@0: % Take data from data or map struct wolffd@0: wolffd@0: if isstruct(Data); wolffd@0: if isfield(Data,'type') & ischar(Data.type), wolffd@0: ; wolffd@0: else wolffd@0: error('Invalid map/data struct?'); wolffd@0: end wolffd@0: switch Data.type wolffd@0: case 'som_map' wolffd@0: data=Data.codebook; wolffd@0: case 'som_data' wolffd@0: data=Data.data; wolffd@0: end wolffd@0: else wolffd@0: % is already a matrix wolffd@0: data=Data; wolffd@0: end wolffd@0: wolffd@0: % Take prototype vectors from prototype struct wolffd@0: wolffd@0: if isstruct(Proto), wolffd@0: wolffd@0: if isfield(Proto,'type') & ischar(Proto.type), wolffd@0: ; wolffd@0: else wolffd@0: error('Invalid map/data struct?'); wolffd@0: end wolffd@0: switch Proto.type wolffd@0: case 'som_map' wolffd@0: proto=Proto.codebook; wolffd@0: case 'som_data' wolffd@0: proto=Proto.data; wolffd@0: end wolffd@0: else wolffd@0: % is already a matrix wolffd@0: proto=Proto; wolffd@0: end wolffd@0: wolffd@0: % Check that inputs are matrices wolffd@0: if ~vis_valuetype(proto,{'nxm'}) | ~vis_valuetype(data,{'nxm'}), wolffd@0: error('Prototype or data input not valid.') wolffd@0: end wolffd@0: wolffd@0: % Record data&proto sizes and check their dims wolffd@0: [N_data dim_data]=size(data); wolffd@0: [N_proto dim_proto]=size(proto); wolffd@0: if dim_proto ~= dim_data, wolffd@0: error('Data and prototype vector dimension does not match.'); wolffd@0: end wolffd@0: wolffd@0: % Check if the classes are given as labels (no class input arg.) wolffd@0: % if they are take them from prototype struct wolffd@0: wolffd@0: if nargin<3 | isempty(proto_class) wolffd@0: if ~isstruct(Proto) wolffd@0: error(['If prototypes are not in labeled map or data struct' ... wolffd@0: 'class must be given.']); wolffd@0: % transform to interger (numerical) class labels wolffd@0: else wolffd@0: [proto_class,classnames]=class2num(Proto.labels); wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: % Check class label vector: must be numerical and of integers wolffd@0: if ~vis_valuetype(proto_class,{[N_proto 1]}); wolffd@0: error(['Class vector is invalid: has to be a N-of-data_rows x 1' ... wolffd@0: ' vector of integers']); wolffd@0: elseif sum(fix(proto_class)-proto_class)~=0 wolffd@0: error('Class labels in vector ''Class'' must be integers.'); wolffd@0: end wolffd@0: wolffd@0: % Find all class labels wolffd@0: ClassIndex=unique(proto_class); wolffd@0: N_class=length(ClassIndex); % number of different classes wolffd@0: wolffd@0: % Calculate euclidean distances between classifiees and prototypes wolffd@0: d=distance(proto,data); wolffd@0: wolffd@0: %%%% Classification %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% wolffd@0: wolffd@0: if K==1, % sort distances only if K>1 wolffd@0: wolffd@0: % 1NN wolffd@0: % Select the closest prototype wolffd@0: [tmp,proto_index]=min(d); wolffd@0: class=proto_class(proto_index); wolffd@0: wolffd@0: else wolffd@0: wolffd@0: % Sort the prototypes for each classifiee according to distance wolffd@0: [tmp,proto_index]=sort(d); wolffd@0: wolffd@0: %% Select K closest prototypes wolffd@0: proto_index=proto_index(1:K,:); wolffd@0: knn_class=proto_class(proto_index); wolffd@0: for i=1:N_class, wolffd@0: classcounter(i,:)=sum(knn_class==ClassIndex(i)); wolffd@0: end wolffd@0: wolffd@0: %% Vote between classes of K neighbors wolffd@0: [winner,vote_index]=max(classcounter); wolffd@0: wolffd@0: %% Handle ties wolffd@0: wolffd@0: % set index to clases that got as amuch votes as winner wolffd@0: wolffd@0: equal_to_winner=(repmat(winner,N_class,1)==classcounter); wolffd@0: wolffd@0: % set index to ties wolffd@0: tie_index=find(sum(equal_to_winner)>1); % drop the winner from counter wolffd@0: wolffd@0: % Go through equal classes and reset vote_index randomly to one wolffd@0: % of them wolffd@0: wolffd@0: for i=1:length(tie_index), wolffd@0: tie_class_index=find(equal_to_winner(:,tie_index(i))); wolffd@0: fortuna=randperm(length(tie_class_index)); wolffd@0: vote_index(tie_index(i))=tie_class_index(fortuna(1)); wolffd@0: end wolffd@0: wolffd@0: class=ClassIndex(vote_index); wolffd@0: end wolffd@0: wolffd@0: %% Build output %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% wolffd@0: wolffd@0: % Relative amount of classes in K neighbors for each classifiee wolffd@0: wolffd@0: if K==1, wolffd@0: P=zeros(N_data,N_class); wolffd@0: if nargout>1, wolffd@0: for i=1:N_data, wolffd@0: P(i,ClassIndex==class(i))=1; wolffd@0: end wolffd@0: end wolffd@0: else wolffd@0: P=classcounter'./K; wolffd@0: end wolffd@0: wolffd@0: % xMake class names to struct if they exist wolffd@0: if ~isempty(classnames), wolffd@0: Class=Data; wolffd@0: for i=1:N_data, wolffd@0: Class.labels{i,1}=classnames{class(i)}; wolffd@0: end wolffd@0: else wolffd@0: Class=class; wolffd@0: end wolffd@0: wolffd@0: wolffd@0: %%% Subfunctions %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% wolffd@0: wolffd@0: function [nos,names] = class2num(class) wolffd@0: wolffd@0: % Change string labels in map/data struct to integer numbers wolffd@0: wolffd@0: names = {}; wolffd@0: nos = zeros(length(class),1); wolffd@0: for i=1:length(class) wolffd@0: if ~isempty(class{i}) & ~any(strcmp(class{i},names)) wolffd@0: names=cat(1,names,class(i)); wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: tmp_nos = (1:length(names))'; wolffd@0: for i=1:length(class) wolffd@0: if ~isempty(class{i}) wolffd@0: nos(i,1) = find(strcmp(class{i},names)); wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: function d=distance(X,Y); wolffd@0: wolffd@0: % Euclidean distance matrix between row vectors in X and Y wolffd@0: wolffd@0: U=~isnan(Y); Y(~U)=0; wolffd@0: V=~isnan(X); X(~V)=0; wolffd@0: d=X.^2*U'+V*Y'.^2-2*X*Y';