wolffd@0
|
1 function [centers,clusters,errors,ind] = kmeans_clusters(sD, n_max, c_max, verbose)
|
wolffd@0
|
2
|
wolffd@0
|
3 % KMEANS_CLUSTERS Clustering with k-means with different values for k.
|
wolffd@0
|
4 %
|
wolffd@0
|
5 % [c, p, err, ind] = kmeans_clusters(sD, [n_max], [c_max], [verbose])
|
wolffd@0
|
6 %
|
wolffd@0
|
7 % [c, p, err, ind] = kmeans_clusters(sD);
|
wolffd@0
|
8 %
|
wolffd@0
|
9 % Input and output arguments ([]'s are optional):
|
wolffd@0
|
10 % D (struct) map or data struct
|
wolffd@0
|
11 % (matrix) size dlen x dim, the data
|
wolffd@0
|
12 % [n_max] (scalar) maximum number of clusters, default is sqrt(dlen)
|
wolffd@0
|
13 % [c_max] (scalar) maximum number of k-means runs, default is 5
|
wolffd@0
|
14 % [verbose] (scalar) verbose level, 0 by default
|
wolffd@0
|
15 %
|
wolffd@0
|
16 % c (cell array) c{i} contains cluster centroids for k=i
|
wolffd@0
|
17 % p (cell array) p{i} contains cluster indeces for k=i
|
wolffd@0
|
18 % err (vector) squared sum of errors for each value of k
|
wolffd@0
|
19 % ind (vector) Davies-Bouldin index value for each clustering
|
wolffd@0
|
20 %
|
wolffd@0
|
21 % Makes a k-means to the given data set with different values of
|
wolffd@0
|
22 % k. The k-means is run multiple times for each k, and the best of
|
wolffd@0
|
23 % these is selected based on sum of squared errors. Finally, the
|
wolffd@0
|
24 % Davies-Bouldin index is calculated for each clustering.
|
wolffd@0
|
25 %
|
wolffd@0
|
26 % For example to cluster a SOM:
|
wolffd@0
|
27 % [c, p, err, ind] = kmeans_clusters(sM); % find clusterings
|
wolffd@0
|
28 % [dummy,i] = min(ind); % select the one with smallest index
|
wolffd@0
|
29 % som_show(sM,'color',{p{i},sprintf('%d clusters',i)}); % visualize
|
wolffd@0
|
30 % colormap(jet(i)), som_recolorbar % change colormap
|
wolffd@0
|
31 %
|
wolffd@0
|
32 % See also SOM_KMEANS.
|
wolffd@0
|
33
|
wolffd@0
|
34 % References:
|
wolffd@0
|
35 % Jain, A.K., Dubes, R.C., "Algorithms for Clustering Data",
|
wolffd@0
|
36 % Prentice Hall, 1988, pp. 96-101.
|
wolffd@0
|
37 %
|
wolffd@0
|
38 % Davies, D.L., Bouldin, D.W., "A Cluster Separation Measure",
|
wolffd@0
|
39 % IEEE Transactions on Pattern Analysis and Machine Intelligence,
|
wolffd@0
|
40 % vol. PAMI-1, no. 2, 1979, pp. 224-227.
|
wolffd@0
|
41 %
|
wolffd@0
|
42 % Vesanto, J., Alhoniemi, E., "Clustering of the Self-Organizing
|
wolffd@0
|
43 % Map", IEEE Transactions on Neural Networks, 2000.
|
wolffd@0
|
44
|
wolffd@0
|
45 % Contributed to SOM Toolbox vs2, February 2nd, 2000 by Esa Alhoniemi
|
wolffd@0
|
46 % Copyright (c) by Esa Alhoniemi
|
wolffd@0
|
47 % http://www.cis.hut.fi/projects/somtoolbox/
|
wolffd@0
|
48
|
wolffd@0
|
49 % ecco 301299 juuso 020200 211201
|
wolffd@0
|
50
|
wolffd@0
|
51 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
wolffd@0
|
52 %% input arguments and initialization
|
wolffd@0
|
53
|
wolffd@0
|
54 if isstruct(sD),
|
wolffd@0
|
55 if isfield(sD,'data'), D = sD.data;
|
wolffd@0
|
56 else D = sD.codebook;
|
wolffd@0
|
57 end
|
wolffd@0
|
58 else D = sD;
|
wolffd@0
|
59 end
|
wolffd@0
|
60 [dlen dim] = size(D);
|
wolffd@0
|
61
|
wolffd@0
|
62 if nargin < 2 | isempty(n_max) | isnan(n_max), n_max = ceil(sqrt(dlen)); end
|
wolffd@0
|
63 if nargin < 3 | isempty(c_max) | isnan(c_max), c_max = 5; end
|
wolffd@0
|
64 if nargin < 4 | isempty(verbose) | isnan(verbose), verbose = 0; end
|
wolffd@0
|
65
|
wolffd@0
|
66 centers = cell(n_max,1);
|
wolffd@0
|
67 clusters = cell(n_max,1);
|
wolffd@0
|
68 ind = zeros(1,n_max)+NaN;
|
wolffd@0
|
69 errors = zeros(1,n_max)+NaN;
|
wolffd@0
|
70
|
wolffd@0
|
71 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
wolffd@0
|
72 %% action
|
wolffd@0
|
73
|
wolffd@0
|
74 % the case k=1 is trivial, but Davies-Boulding index cannot be evaluated
|
wolffd@0
|
75 m = zeros(1,dim);
|
wolffd@0
|
76 for i=1:dim, m(i)=mean(D(isfinite(D(:,i)),i)); end
|
wolffd@0
|
77 centers{1} = m;
|
wolffd@0
|
78 clusters{1} = ones(dlen,1);
|
wolffd@0
|
79 [dummy qerr] = som_bmus(m,D);
|
wolffd@0
|
80 errors(1) = sum(qerr.^2);
|
wolffd@0
|
81 ind(1) = NaN;
|
wolffd@0
|
82
|
wolffd@0
|
83 if verbose, fprintf(2,'Doing k-means for 2-%d clusters\n',n_max); end
|
wolffd@0
|
84
|
wolffd@0
|
85 for i = 2:n_max, % number of clusters
|
wolffd@0
|
86
|
wolffd@0
|
87 % make k-means with k=i for c_max times and select the best based
|
wolffd@0
|
88 % on sum-of-squared errors (SSE)
|
wolffd@0
|
89 best = realmax;
|
wolffd@0
|
90 for j = 1:c_max % run number j for cluster i
|
wolffd@0
|
91 if verbose,
|
wolffd@0
|
92 fprintf('%d/%d clusters, k-means run %d/%d\r', i, n_max,j, c_max);
|
wolffd@0
|
93 end
|
wolffd@0
|
94 [c, k, err] = som_kmeans('batch', D, i, 100, 0);
|
wolffd@0
|
95 if err < best, k_best = k'; c_best = c; best = err; end
|
wolffd@0
|
96 % ' added in k_best = k'; by kr 1.10.02
|
wolffd@0
|
97 end
|
wolffd@0
|
98 if verbose, fprintf(1, '\n'); end
|
wolffd@0
|
99
|
wolffd@0
|
100 % store the results
|
wolffd@0
|
101 centers{i} = c_best;
|
wolffd@0
|
102 clusters{i} = k_best;
|
wolffd@0
|
103 errors(i) = best;
|
wolffd@0
|
104 % ind(i) = db_index(D, c_best, k_best, 2); wrong version in somtbx ??
|
wolffd@0
|
105 ind(i) = db_index(D, k_best, c_best, 2); % modified by kr 1.10.02
|
wolffd@0
|
106
|
wolffd@0
|
107 % if verbose mode, plot the index & SSE
|
wolffd@0
|
108 if verbose
|
wolffd@0
|
109 subplot(2,1,1), plot(ind), grid
|
wolffd@0
|
110 title('Davies-Bouldin''s index')
|
wolffd@0
|
111 subplot(2,1,2), plot(errors), grid
|
wolffd@0
|
112 title('SSE')
|
wolffd@0
|
113 drawnow
|
wolffd@0
|
114 end
|
wolffd@0
|
115 end
|
wolffd@0
|
116
|
wolffd@0
|
117 return;
|
wolffd@0
|
118
|
wolffd@0
|
119
|
wolffd@0
|
120
|