annotate toolboxes/MIRtoolbox1.3.2/somtoolbox/som_kmeans.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
rev   line source
wolffd@0 1 function [codes,clusters,err] = som_kmeans(method, D, k, epochs, verbose)
wolffd@0 2
wolffd@0 3 % SOM_KMEANS K-means algorithm.
wolffd@0 4 %
wolffd@0 5 % [codes,clusters,err] = som_kmeans(method, D, k, [epochs], [verbose])
wolffd@0 6 %
wolffd@0 7 % Input and output arguments ([]'s are optional):
wolffd@0 8 % method (string) k-means algorithm type: 'batch' or 'seq'
wolffd@0 9 % D (matrix) data matrix
wolffd@0 10 % (struct) data or map struct
wolffd@0 11 % k (scalar) number of centroids
wolffd@0 12 % [epochs] (scalar) number of training epochs
wolffd@0 13 % [verbose] (scalar) if <> 0 display additonal information
wolffd@0 14 %
wolffd@0 15 % codes (matrix) codebook vectors
wolffd@0 16 % clusters (vector) cluster number for each sample
wolffd@0 17 % err (scalar) total quantization error for the data set
wolffd@0 18 %
wolffd@0 19 % See also KMEANS_CLUSTERS, SOM_MAKE, SOM_BATCHTRAIN, SOM_SEQTRAIN.
wolffd@0 20
wolffd@0 21 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
wolffd@0 22 % Function has been renamed by Kimmo Raivio, because matlab65 also have
wolffd@0 23 % kmeans function 1.10.02
wolffd@0 24 %% input arguments
wolffd@0 25
wolffd@0 26 if isstruct(D),
wolffd@0 27 switch D.type,
wolffd@0 28 case 'som_map', data = D.codebook;
wolffd@0 29 case 'som_data', data = D.data;
wolffd@0 30 end
wolffd@0 31 else
wolffd@0 32 data = D;
wolffd@0 33 end
wolffd@0 34 [l dim] = size(data);
wolffd@0 35
wolffd@0 36 if nargin < 4 | isempty(epochs) | isnan(epochs), epochs = 100; end
wolffd@0 37 if nargin < 5, verbose = 0; end
wolffd@0 38
wolffd@0 39 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
wolffd@0 40 %% action
wolffd@0 41
wolffd@0 42 rand('state', sum(100*clock)); % init rand generator
wolffd@0 43
wolffd@0 44 lr = 0.5; % learning rate for sequential k-means
wolffd@0 45 temp = randperm(l);
wolffd@0 46 centroids = data(temp(1:k),:);
wolffd@0 47 res = zeros(k,l);
wolffd@0 48 clusters = zeros(1, l);
wolffd@0 49
wolffd@0 50 if dim==1,
wolffd@0 51 [codes,clusters,err] = scalar_kmeans(data,k,epochs);
wolffd@0 52 return;
wolffd@0 53 end
wolffd@0 54
wolffd@0 55 switch method
wolffd@0 56 case 'seq',
wolffd@0 57 len = epochs * l;
wolffd@0 58 l_rate = linspace(lr,0,len);
wolffd@0 59 order = randperm(l);
wolffd@0 60 for iter = 1:len
wolffd@0 61 x = D(order(rem(iter,l)+1),:);
wolffd@0 62 dx = x(ones(k,1),:) - centroids;
wolffd@0 63 [dist nearest] = min(sum(dx.^2,2));
wolffd@0 64 centroids(nearest,:) = centroids(nearest,:) + l_rate(iter)*dx(nearest,:);
wolffd@0 65 end
wolffd@0 66 [dummy clusters] = min(((ones(k, 1) * sum((data.^2)', 1))' + ...
wolffd@0 67 ones(l, 1) * sum((centroids.^2)',1) - ...
wolffd@0 68 2.*(data*(centroids')))');
wolffd@0 69
wolffd@0 70 case 'batch',
wolffd@0 71 iter = 0;
wolffd@0 72 old_clusters = zeros(k, 1);
wolffd@0 73 while iter<epochs
wolffd@0 74
wolffd@0 75 [dummy clusters] = min(((ones(k, 1) * sum((data.^2)', 1))' + ...
wolffd@0 76 ones(l, 1) * sum((centroids.^2)',1) - ...
wolffd@0 77 2.*(data*(centroids')))');
wolffd@0 78
wolffd@0 79 for i = 1:k
wolffd@0 80 f = find(clusters==i);
wolffd@0 81 s = length(f);
wolffd@0 82 if s, centroids(i,:) = sum(data(f,:)) / s; end
wolffd@0 83 end
wolffd@0 84
wolffd@0 85 if iter
wolffd@0 86 if sum(old_clusters==clusters)==0
wolffd@0 87 if verbose, fprintf(1, 'Convergence in %d iterations\n', iter); end
wolffd@0 88 break;
wolffd@0 89 end
wolffd@0 90 end
wolffd@0 91
wolffd@0 92 old_clusters = clusters;
wolffd@0 93 iter = iter + 1;
wolffd@0 94 end
wolffd@0 95
wolffd@0 96 [dummy clusters] = min(((ones(k, 1) * sum((data.^2)', 1))' + ...
wolffd@0 97 ones(l, 1) * sum((centroids.^2)',1) - ...
wolffd@0 98 2.*(data*(centroids')))');
wolffd@0 99 otherwise,
wolffd@0 100 fprintf(2, 'Unknown method\n');
wolffd@0 101 end
wolffd@0 102
wolffd@0 103 err = 0;
wolffd@0 104 for i = 1:k
wolffd@0 105 f = find(clusters==i);
wolffd@0 106 s = length(f);
wolffd@0 107 if s, err = err + sum(sum((data(f,:)-ones(s,1)*centroids(i,:)).^2,2)); end
wolffd@0 108 end
wolffd@0 109
wolffd@0 110 codes = centroids;
wolffd@0 111 return;
wolffd@0 112
wolffd@0 113 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
wolffd@0 114
wolffd@0 115 function [y,bm,qe] = scalar_kmeans(x,k,maxepochs)
wolffd@0 116
wolffd@0 117 nans = ~isfinite(x);
wolffd@0 118 x(nans) = [];
wolffd@0 119 n = length(x);
wolffd@0 120 mi = min(x); ma = max(x)
wolffd@0 121 y = linspace(mi,ma,k)';
wolffd@0 122 bm = ones(n,1);
wolffd@0 123 bmold = zeros(n,1);
wolffd@0 124 i = 0;
wolffd@0 125 while ~all(bm==bmold) & i<maxepochs,
wolffd@0 126 bmold = bm;
wolffd@0 127 [c bm] = histc(x,[-Inf; (y(2:end)+y(1:end-1))/2; Inf]);
wolffd@0 128 y = full(sum(sparse(bm,1:n,x,k,n),2));
wolffd@0 129 zh = (c(1:end-1)==0);
wolffd@0 130 y(~zh) = y(~zh)./c(~zh);
wolffd@0 131 inds = find(zh)';
wolffd@0 132 for j=inds, if j==1, y(j) = mi; else y(j) = y(j-1) + eps; end, end
wolffd@0 133 i=i+1;
wolffd@0 134 end
wolffd@0 135 if i==maxepochs, [c bm] = histc(x,[-Inf; (y(2:end)+y(1:end-1))/2; Inf]); end
wolffd@0 136 if nargout>2, qe = sum(abs(x-y(bm)))/n; end
wolffd@0 137 if any(nans),
wolffd@0 138 notnan = find(~nans); n = length(nans);
wolffd@0 139 y = full(sparse(notnan,1,y ,n,1)); y(nans) = NaN;
wolffd@0 140 bm = full(sparse(notnan,1,bm,n,1)); bm(nans) = NaN;
wolffd@0 141 if nargout>2, qe = full(sparse(notnan,1,qe,n,1)); qe(nans) = NaN; end
wolffd@0 142 end
wolffd@0 143
wolffd@0 144 return;
wolffd@0 145