wolffd@0
|
1 function [codes,clusters,err] = som_kmeans(method, D, k, epochs, verbose)
|
wolffd@0
|
2
|
wolffd@0
|
3 % SOM_KMEANS K-means algorithm.
|
wolffd@0
|
4 %
|
wolffd@0
|
5 % [codes,clusters,err] = som_kmeans(method, D, k, [epochs], [verbose])
|
wolffd@0
|
6 %
|
wolffd@0
|
7 % Input and output arguments ([]'s are optional):
|
wolffd@0
|
8 % method (string) k-means algorithm type: 'batch' or 'seq'
|
wolffd@0
|
9 % D (matrix) data matrix
|
wolffd@0
|
10 % (struct) data or map struct
|
wolffd@0
|
11 % k (scalar) number of centroids
|
wolffd@0
|
12 % [epochs] (scalar) number of training epochs
|
wolffd@0
|
13 % [verbose] (scalar) if <> 0 display additonal information
|
wolffd@0
|
14 %
|
wolffd@0
|
15 % codes (matrix) codebook vectors
|
wolffd@0
|
16 % clusters (vector) cluster number for each sample
|
wolffd@0
|
17 % err (scalar) total quantization error for the data set
|
wolffd@0
|
18 %
|
wolffd@0
|
19 % See also KMEANS_CLUSTERS, SOM_MAKE, SOM_BATCHTRAIN, SOM_SEQTRAIN.
|
wolffd@0
|
20
|
wolffd@0
|
21 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
wolffd@0
|
22 % Function has been renamed by Kimmo Raivio, because matlab65 also have
|
wolffd@0
|
23 % kmeans function 1.10.02
|
wolffd@0
|
24 %% input arguments
|
wolffd@0
|
25
|
wolffd@0
|
26 if isstruct(D),
|
wolffd@0
|
27 switch D.type,
|
wolffd@0
|
28 case 'som_map', data = D.codebook;
|
wolffd@0
|
29 case 'som_data', data = D.data;
|
wolffd@0
|
30 end
|
wolffd@0
|
31 else
|
wolffd@0
|
32 data = D;
|
wolffd@0
|
33 end
|
wolffd@0
|
34 [l dim] = size(data);
|
wolffd@0
|
35
|
wolffd@0
|
36 if nargin < 4 | isempty(epochs) | isnan(epochs), epochs = 100; end
|
wolffd@0
|
37 if nargin < 5, verbose = 0; end
|
wolffd@0
|
38
|
wolffd@0
|
39 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
wolffd@0
|
40 %% action
|
wolffd@0
|
41
|
wolffd@0
|
42 rand('state', sum(100*clock)); % init rand generator
|
wolffd@0
|
43
|
wolffd@0
|
44 lr = 0.5; % learning rate for sequential k-means
|
wolffd@0
|
45 temp = randperm(l);
|
wolffd@0
|
46 centroids = data(temp(1:k),:);
|
wolffd@0
|
47 res = zeros(k,l);
|
wolffd@0
|
48 clusters = zeros(1, l);
|
wolffd@0
|
49
|
wolffd@0
|
50 if dim==1,
|
wolffd@0
|
51 [codes,clusters,err] = scalar_kmeans(data,k,epochs);
|
wolffd@0
|
52 return;
|
wolffd@0
|
53 end
|
wolffd@0
|
54
|
wolffd@0
|
55 switch method
|
wolffd@0
|
56 case 'seq',
|
wolffd@0
|
57 len = epochs * l;
|
wolffd@0
|
58 l_rate = linspace(lr,0,len);
|
wolffd@0
|
59 order = randperm(l);
|
wolffd@0
|
60 for iter = 1:len
|
wolffd@0
|
61 x = D(order(rem(iter,l)+1),:);
|
wolffd@0
|
62 dx = x(ones(k,1),:) - centroids;
|
wolffd@0
|
63 [dist nearest] = min(sum(dx.^2,2));
|
wolffd@0
|
64 centroids(nearest,:) = centroids(nearest,:) + l_rate(iter)*dx(nearest,:);
|
wolffd@0
|
65 end
|
wolffd@0
|
66 [dummy clusters] = min(((ones(k, 1) * sum((data.^2)', 1))' + ...
|
wolffd@0
|
67 ones(l, 1) * sum((centroids.^2)',1) - ...
|
wolffd@0
|
68 2.*(data*(centroids')))');
|
wolffd@0
|
69
|
wolffd@0
|
70 case 'batch',
|
wolffd@0
|
71 iter = 0;
|
wolffd@0
|
72 old_clusters = zeros(k, 1);
|
wolffd@0
|
73 while iter<epochs
|
wolffd@0
|
74
|
wolffd@0
|
75 [dummy clusters] = min(((ones(k, 1) * sum((data.^2)', 1))' + ...
|
wolffd@0
|
76 ones(l, 1) * sum((centroids.^2)',1) - ...
|
wolffd@0
|
77 2.*(data*(centroids')))');
|
wolffd@0
|
78
|
wolffd@0
|
79 for i = 1:k
|
wolffd@0
|
80 f = find(clusters==i);
|
wolffd@0
|
81 s = length(f);
|
wolffd@0
|
82 if s, centroids(i,:) = sum(data(f,:)) / s; end
|
wolffd@0
|
83 end
|
wolffd@0
|
84
|
wolffd@0
|
85 if iter
|
wolffd@0
|
86 if sum(old_clusters==clusters)==0
|
wolffd@0
|
87 if verbose, fprintf(1, 'Convergence in %d iterations\n', iter); end
|
wolffd@0
|
88 break;
|
wolffd@0
|
89 end
|
wolffd@0
|
90 end
|
wolffd@0
|
91
|
wolffd@0
|
92 old_clusters = clusters;
|
wolffd@0
|
93 iter = iter + 1;
|
wolffd@0
|
94 end
|
wolffd@0
|
95
|
wolffd@0
|
96 [dummy clusters] = min(((ones(k, 1) * sum((data.^2)', 1))' + ...
|
wolffd@0
|
97 ones(l, 1) * sum((centroids.^2)',1) - ...
|
wolffd@0
|
98 2.*(data*(centroids')))');
|
wolffd@0
|
99 otherwise,
|
wolffd@0
|
100 fprintf(2, 'Unknown method\n');
|
wolffd@0
|
101 end
|
wolffd@0
|
102
|
wolffd@0
|
103 err = 0;
|
wolffd@0
|
104 for i = 1:k
|
wolffd@0
|
105 f = find(clusters==i);
|
wolffd@0
|
106 s = length(f);
|
wolffd@0
|
107 if s, err = err + sum(sum((data(f,:)-ones(s,1)*centroids(i,:)).^2,2)); end
|
wolffd@0
|
108 end
|
wolffd@0
|
109
|
wolffd@0
|
110 codes = centroids;
|
wolffd@0
|
111 return;
|
wolffd@0
|
112
|
wolffd@0
|
113 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
wolffd@0
|
114
|
wolffd@0
|
115 function [y,bm,qe] = scalar_kmeans(x,k,maxepochs)
|
wolffd@0
|
116
|
wolffd@0
|
117 nans = ~isfinite(x);
|
wolffd@0
|
118 x(nans) = [];
|
wolffd@0
|
119 n = length(x);
|
wolffd@0
|
120 mi = min(x); ma = max(x)
|
wolffd@0
|
121 y = linspace(mi,ma,k)';
|
wolffd@0
|
122 bm = ones(n,1);
|
wolffd@0
|
123 bmold = zeros(n,1);
|
wolffd@0
|
124 i = 0;
|
wolffd@0
|
125 while ~all(bm==bmold) & i<maxepochs,
|
wolffd@0
|
126 bmold = bm;
|
wolffd@0
|
127 [c bm] = histc(x,[-Inf; (y(2:end)+y(1:end-1))/2; Inf]);
|
wolffd@0
|
128 y = full(sum(sparse(bm,1:n,x,k,n),2));
|
wolffd@0
|
129 zh = (c(1:end-1)==0);
|
wolffd@0
|
130 y(~zh) = y(~zh)./c(~zh);
|
wolffd@0
|
131 inds = find(zh)';
|
wolffd@0
|
132 for j=inds, if j==1, y(j) = mi; else y(j) = y(j-1) + eps; end, end
|
wolffd@0
|
133 i=i+1;
|
wolffd@0
|
134 end
|
wolffd@0
|
135 if i==maxepochs, [c bm] = histc(x,[-Inf; (y(2:end)+y(1:end-1))/2; Inf]); end
|
wolffd@0
|
136 if nargout>2, qe = sum(abs(x-y(bm)))/n; end
|
wolffd@0
|
137 if any(nans),
|
wolffd@0
|
138 notnan = find(~nans); n = length(nans);
|
wolffd@0
|
139 y = full(sparse(notnan,1,y ,n,1)); y(nans) = NaN;
|
wolffd@0
|
140 bm = full(sparse(notnan,1,bm,n,1)); bm(nans) = NaN;
|
wolffd@0
|
141 if nargout>2, qe = full(sparse(notnan,1,qe,n,1)); qe(nans) = NaN; end
|
wolffd@0
|
142 end
|
wolffd@0
|
143
|
wolffd@0
|
144 return;
|
wolffd@0
|
145
|