annotate toolboxes/MIRtoolbox1.3.2/somtoolbox/som_kmeans.m @ 0:cc4b1211e677 tip

initial commit to HG from Changeset: 646 (e263d8a21543) added further path and more save "camirversion.m"
author Daniel Wolff
date Fri, 19 Aug 2016 13:07:06 +0200
parents
children
rev   line source
Daniel@0 1 function [codes,clusters,err] = som_kmeans(method, D, k, epochs, verbose)
Daniel@0 2
Daniel@0 3 % SOM_KMEANS K-means algorithm.
Daniel@0 4 %
Daniel@0 5 % [codes,clusters,err] = som_kmeans(method, D, k, [epochs], [verbose])
Daniel@0 6 %
Daniel@0 7 % Input and output arguments ([]'s are optional):
Daniel@0 8 % method (string) k-means algorithm type: 'batch' or 'seq'
Daniel@0 9 % D (matrix) data matrix
Daniel@0 10 % (struct) data or map struct
Daniel@0 11 % k (scalar) number of centroids
Daniel@0 12 % [epochs] (scalar) number of training epochs
Daniel@0 13 % [verbose] (scalar) if <> 0 display additonal information
Daniel@0 14 %
Daniel@0 15 % codes (matrix) codebook vectors
Daniel@0 16 % clusters (vector) cluster number for each sample
Daniel@0 17 % err (scalar) total quantization error for the data set
Daniel@0 18 %
Daniel@0 19 % See also KMEANS_CLUSTERS, SOM_MAKE, SOM_BATCHTRAIN, SOM_SEQTRAIN.
Daniel@0 20
Daniel@0 21 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Daniel@0 22 % Function has been renamed by Kimmo Raivio, because matlab65 also have
Daniel@0 23 % kmeans function 1.10.02
Daniel@0 24 %% input arguments
Daniel@0 25
Daniel@0 26 if isstruct(D),
Daniel@0 27 switch D.type,
Daniel@0 28 case 'som_map', data = D.codebook;
Daniel@0 29 case 'som_data', data = D.data;
Daniel@0 30 end
Daniel@0 31 else
Daniel@0 32 data = D;
Daniel@0 33 end
Daniel@0 34 [l dim] = size(data);
Daniel@0 35
Daniel@0 36 if nargin < 4 | isempty(epochs) | isnan(epochs), epochs = 100; end
Daniel@0 37 if nargin < 5, verbose = 0; end
Daniel@0 38
Daniel@0 39 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Daniel@0 40 %% action
Daniel@0 41
Daniel@0 42 rand('state', sum(100*clock)); % init rand generator
Daniel@0 43
Daniel@0 44 lr = 0.5; % learning rate for sequential k-means
Daniel@0 45 temp = randperm(l);
Daniel@0 46 centroids = data(temp(1:k),:);
Daniel@0 47 res = zeros(k,l);
Daniel@0 48 clusters = zeros(1, l);
Daniel@0 49
Daniel@0 50 if dim==1,
Daniel@0 51 [codes,clusters,err] = scalar_kmeans(data,k,epochs);
Daniel@0 52 return;
Daniel@0 53 end
Daniel@0 54
Daniel@0 55 switch method
Daniel@0 56 case 'seq',
Daniel@0 57 len = epochs * l;
Daniel@0 58 l_rate = linspace(lr,0,len);
Daniel@0 59 order = randperm(l);
Daniel@0 60 for iter = 1:len
Daniel@0 61 x = D(order(rem(iter,l)+1),:);
Daniel@0 62 dx = x(ones(k,1),:) - centroids;
Daniel@0 63 [dist nearest] = min(sum(dx.^2,2));
Daniel@0 64 centroids(nearest,:) = centroids(nearest,:) + l_rate(iter)*dx(nearest,:);
Daniel@0 65 end
Daniel@0 66 [dummy clusters] = min(((ones(k, 1) * sum((data.^2)', 1))' + ...
Daniel@0 67 ones(l, 1) * sum((centroids.^2)',1) - ...
Daniel@0 68 2.*(data*(centroids')))');
Daniel@0 69
Daniel@0 70 case 'batch',
Daniel@0 71 iter = 0;
Daniel@0 72 old_clusters = zeros(k, 1);
Daniel@0 73 while iter<epochs
Daniel@0 74
Daniel@0 75 [dummy clusters] = min(((ones(k, 1) * sum((data.^2)', 1))' + ...
Daniel@0 76 ones(l, 1) * sum((centroids.^2)',1) - ...
Daniel@0 77 2.*(data*(centroids')))');
Daniel@0 78
Daniel@0 79 for i = 1:k
Daniel@0 80 f = find(clusters==i);
Daniel@0 81 s = length(f);
Daniel@0 82 if s, centroids(i,:) = sum(data(f,:)) / s; end
Daniel@0 83 end
Daniel@0 84
Daniel@0 85 if iter
Daniel@0 86 if sum(old_clusters==clusters)==0
Daniel@0 87 if verbose, fprintf(1, 'Convergence in %d iterations\n', iter); end
Daniel@0 88 break;
Daniel@0 89 end
Daniel@0 90 end
Daniel@0 91
Daniel@0 92 old_clusters = clusters;
Daniel@0 93 iter = iter + 1;
Daniel@0 94 end
Daniel@0 95
Daniel@0 96 [dummy clusters] = min(((ones(k, 1) * sum((data.^2)', 1))' + ...
Daniel@0 97 ones(l, 1) * sum((centroids.^2)',1) - ...
Daniel@0 98 2.*(data*(centroids')))');
Daniel@0 99 otherwise,
Daniel@0 100 fprintf(2, 'Unknown method\n');
Daniel@0 101 end
Daniel@0 102
Daniel@0 103 err = 0;
Daniel@0 104 for i = 1:k
Daniel@0 105 f = find(clusters==i);
Daniel@0 106 s = length(f);
Daniel@0 107 if s, err = err + sum(sum((data(f,:)-ones(s,1)*centroids(i,:)).^2,2)); end
Daniel@0 108 end
Daniel@0 109
Daniel@0 110 codes = centroids;
Daniel@0 111 return;
Daniel@0 112
Daniel@0 113 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Daniel@0 114
Daniel@0 115 function [y,bm,qe] = scalar_kmeans(x,k,maxepochs)
Daniel@0 116
Daniel@0 117 nans = ~isfinite(x);
Daniel@0 118 x(nans) = [];
Daniel@0 119 n = length(x);
Daniel@0 120 mi = min(x); ma = max(x)
Daniel@0 121 y = linspace(mi,ma,k)';
Daniel@0 122 bm = ones(n,1);
Daniel@0 123 bmold = zeros(n,1);
Daniel@0 124 i = 0;
Daniel@0 125 while ~all(bm==bmold) & i<maxepochs,
Daniel@0 126 bmold = bm;
Daniel@0 127 [c bm] = histc(x,[-Inf; (y(2:end)+y(1:end-1))/2; Inf]);
Daniel@0 128 y = full(sum(sparse(bm,1:n,x,k,n),2));
Daniel@0 129 zh = (c(1:end-1)==0);
Daniel@0 130 y(~zh) = y(~zh)./c(~zh);
Daniel@0 131 inds = find(zh)';
Daniel@0 132 for j=inds, if j==1, y(j) = mi; else y(j) = y(j-1) + eps; end, end
Daniel@0 133 i=i+1;
Daniel@0 134 end
Daniel@0 135 if i==maxepochs, [c bm] = histc(x,[-Inf; (y(2:end)+y(1:end-1))/2; Inf]); end
Daniel@0 136 if nargout>2, qe = sum(abs(x-y(bm)))/n; end
Daniel@0 137 if any(nans),
Daniel@0 138 notnan = find(~nans); n = length(nans);
Daniel@0 139 y = full(sparse(notnan,1,y ,n,1)); y(nans) = NaN;
Daniel@0 140 bm = full(sparse(notnan,1,bm,n,1)); bm(nans) = NaN;
Daniel@0 141 if nargout>2, qe = full(sparse(notnan,1,qe,n,1)); qe(nans) = NaN; end
Daniel@0 142 end
Daniel@0 143
Daniel@0 144 return;
Daniel@0 145