Mercurial > hg > camir-aes2014
diff toolboxes/MIRtoolbox1.3.2/somtoolbox/knn_old.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/toolboxes/MIRtoolbox1.3.2/somtoolbox/knn_old.m Tue Feb 10 15:05:51 2015 +0000 @@ -0,0 +1,251 @@ +function [Class,P]=knn_old(Data, Proto, proto_class, K) + +%KNN_OLD A K-nearest neighbor classifier using Euclidean distance +% +% [Class,P]=knn_old(Data, Proto, proto_class, K) +% +% [sM_class,P]=knn_old(sM, sData, [], 3); +% [sD_class,P]=knn_old(sD, sM, class); +% [class,P]=knn_old(data, proto, class); +% [class,P]=knn_old(sData, sM, class,5); +% +% Input and output arguments ([]'s are optional): +% Data (matrix) size Nxd, vectors to be classified (=classifiees) +% (struct) map or data struct: map codebook vectors or +% data vectors are considered as classifiees. +% Proto (matrix) size Mxd, prototype vector matrix (=prototypes) +% (struct) map or data struct: map codebook vectors or +% data vectors are considered as prototypes. +% [proto_class] (vector) size Nx1, integers 1,2,...,k indicating the +% classes of corresponding protoptypes, default: see the +% explanation below. +% [K] (scalar) the K in KNN classifier, default is 1 +% +% Class (matrix) size Nx1, vector of 1,2, ..., k indicating the class +% desicion according to the KNN rule +% P (matrix) size Nxk, the relative amount of prototypes of +% each class among the K closest prototypes for +% each classifiee. +% +% If 'proto_class' is _not_ given, 'Proto' _must_ be a labeled SOM +% Toolbox struct. The label of the data vector or the first label of +% the map model vector is considered as class label for th prototype +% vector. In this case the output 'Class' is a copy of 'Data' (map or +% data struct) relabeled according to the classification. If input +% argument 'proto_class' _is_ given, the output argument 'Class' is +% _always_ a vector of integers 1,2,...,k indiacating the class. +% +% If there is a tie between representatives of two or more classes +% among the K closest neighbors to the classifiee, the class is +% selected randomly among these candidates. +% +% IMPORTANT +% +% ** Even if prototype vectors are given in a map struct the mask _is not +% taken into account_ when calculating Euclidean distance +% ** The function calculates the total distance matrix between all +% classifiees and prototype vectors. This results to an MxN matrix; +% if N is high it is recommended to divide the matrix 'Data' +% (the classifiees) into smaller sets in order to avoid memory +% overflow or swapping. Also, if K>1 this function uses 'sort' which is +% considerably slower than 'max' which is used for K==1. +% +% See also KNN, SOM_LABEL, SOM_AUTOLABEL + +% Contributed to SOM Toolbox 2.0, February 11th, 2000 by Johan Himberg +% Copyright (c) by Johan Himberg +% http://www.cis.hut.fi/projects/somtoolbox/ + +% Version 2.0beta Johan 040200 + +%% Init %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% This must exist later +classnames=''; + +% Check K +if nargin<4 | isempty(K), + K=1; +end + +if ~vis_valuetype(K,{'1x1'}) + error('Value for K must be a scalar.'); +end + +% Take data from data or map struct + +if isstruct(Data); + if isfield(Data,'type') & ischar(Data.type), + ; + else + error('Invalid map/data struct?'); + end + switch Data.type + case 'som_map' + data=Data.codebook; + case 'som_data' + data=Data.data; + end +else + % is already a matrix + data=Data; +end + +% Take prototype vectors from prototype struct + +if isstruct(Proto), + + if isfield(Proto,'type') & ischar(Proto.type), + ; + else + error('Invalid map/data struct?'); + end + switch Proto.type + case 'som_map' + proto=Proto.codebook; + case 'som_data' + proto=Proto.data; + end +else + % is already a matrix + proto=Proto; +end + +% Check that inputs are matrices +if ~vis_valuetype(proto,{'nxm'}) | ~vis_valuetype(data,{'nxm'}), + error('Prototype or data input not valid.') +end + +% Record data&proto sizes and check their dims +[N_data dim_data]=size(data); +[N_proto dim_proto]=size(proto); +if dim_proto ~= dim_data, + error('Data and prototype vector dimension does not match.'); +end + +% Check if the classes are given as labels (no class input arg.) +% if they are take them from prototype struct + +if nargin<3 | isempty(proto_class) + if ~isstruct(Proto) + error(['If prototypes are not in labeled map or data struct' ... + 'class must be given.']); + % transform to interger (numerical) class labels + else + [proto_class,classnames]=class2num(Proto.labels); + end +end + +% Check class label vector: must be numerical and of integers +if ~vis_valuetype(proto_class,{[N_proto 1]}); + error(['Class vector is invalid: has to be a N-of-data_rows x 1' ... + ' vector of integers']); +elseif sum(fix(proto_class)-proto_class)~=0 + error('Class labels in vector ''Class'' must be integers.'); +end + +% Find all class labels +ClassIndex=unique(proto_class); +N_class=length(ClassIndex); % number of different classes + +% Calculate euclidean distances between classifiees and prototypes +d=distance(proto,data); + +%%%% Classification %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +if K==1, % sort distances only if K>1 + + % 1NN + % Select the closest prototype + [tmp,proto_index]=min(d); + class=proto_class(proto_index); + +else + + % Sort the prototypes for each classifiee according to distance + [tmp,proto_index]=sort(d); + + %% Select K closest prototypes + proto_index=proto_index(1:K,:); + knn_class=proto_class(proto_index); + for i=1:N_class, + classcounter(i,:)=sum(knn_class==ClassIndex(i)); + end + + %% Vote between classes of K neighbors + [winner,vote_index]=max(classcounter); + + %% Handle ties + + % set index to clases that got as amuch votes as winner + + equal_to_winner=(repmat(winner,N_class,1)==classcounter); + + % set index to ties + tie_index=find(sum(equal_to_winner)>1); % drop the winner from counter + + % Go through equal classes and reset vote_index randomly to one + % of them + + for i=1:length(tie_index), + tie_class_index=find(equal_to_winner(:,tie_index(i))); + fortuna=randperm(length(tie_class_index)); + vote_index(tie_index(i))=tie_class_index(fortuna(1)); + end + + class=ClassIndex(vote_index); +end + +%% Build output %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +% Relative amount of classes in K neighbors for each classifiee + +if K==1, + P=zeros(N_data,N_class); + if nargout>1, + for i=1:N_data, + P(i,ClassIndex==class(i))=1; + end + end +else + P=classcounter'./K; +end + +% xMake class names to struct if they exist +if ~isempty(classnames), + Class=Data; + for i=1:N_data, + Class.labels{i,1}=classnames{class(i)}; + end +else + Class=class; +end + + +%%% Subfunctions %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +function [nos,names] = class2num(class) + +% Change string labels in map/data struct to integer numbers + +names = {}; +nos = zeros(length(class),1); +for i=1:length(class) + if ~isempty(class{i}) & ~any(strcmp(class{i},names)) + names=cat(1,names,class(i)); + end +end + +tmp_nos = (1:length(names))'; +for i=1:length(class) + if ~isempty(class{i}) + nos(i,1) = find(strcmp(class{i},names)); + end +end + +function d=distance(X,Y); + +% Euclidean distance matrix between row vectors in X and Y + +U=~isnan(Y); Y(~U)=0; +V=~isnan(X); X(~V)=0; +d=X.^2*U'+V*Y'.^2-2*X*Y';