Mercurial > hg > camir-aes2014
comparison toolboxes/MIRtoolbox1.3.2/somtoolbox/kmeans_clusters.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e9a9cd732c1e |
---|---|
1 function [centers,clusters,errors,ind] = kmeans_clusters(sD, n_max, c_max, verbose) | |
2 | |
3 % KMEANS_CLUSTERS Clustering with k-means with different values for k. | |
4 % | |
5 % [c, p, err, ind] = kmeans_clusters(sD, [n_max], [c_max], [verbose]) | |
6 % | |
7 % [c, p, err, ind] = kmeans_clusters(sD); | |
8 % | |
9 % Input and output arguments ([]'s are optional): | |
10 % D (struct) map or data struct | |
11 % (matrix) size dlen x dim, the data | |
12 % [n_max] (scalar) maximum number of clusters, default is sqrt(dlen) | |
13 % [c_max] (scalar) maximum number of k-means runs, default is 5 | |
14 % [verbose] (scalar) verbose level, 0 by default | |
15 % | |
16 % c (cell array) c{i} contains cluster centroids for k=i | |
17 % p (cell array) p{i} contains cluster indeces for k=i | |
18 % err (vector) squared sum of errors for each value of k | |
19 % ind (vector) Davies-Bouldin index value for each clustering | |
20 % | |
21 % Makes a k-means to the given data set with different values of | |
22 % k. The k-means is run multiple times for each k, and the best of | |
23 % these is selected based on sum of squared errors. Finally, the | |
24 % Davies-Bouldin index is calculated for each clustering. | |
25 % | |
26 % For example to cluster a SOM: | |
27 % [c, p, err, ind] = kmeans_clusters(sM); % find clusterings | |
28 % [dummy,i] = min(ind); % select the one with smallest index | |
29 % som_show(sM,'color',{p{i},sprintf('%d clusters',i)}); % visualize | |
30 % colormap(jet(i)), som_recolorbar % change colormap | |
31 % | |
32 % See also SOM_KMEANS. | |
33 | |
34 % References: | |
35 % Jain, A.K., Dubes, R.C., "Algorithms for Clustering Data", | |
36 % Prentice Hall, 1988, pp. 96-101. | |
37 % | |
38 % Davies, D.L., Bouldin, D.W., "A Cluster Separation Measure", | |
39 % IEEE Transactions on Pattern Analysis and Machine Intelligence, | |
40 % vol. PAMI-1, no. 2, 1979, pp. 224-227. | |
41 % | |
42 % Vesanto, J., Alhoniemi, E., "Clustering of the Self-Organizing | |
43 % Map", IEEE Transactions on Neural Networks, 2000. | |
44 | |
45 % Contributed to SOM Toolbox vs2, February 2nd, 2000 by Esa Alhoniemi | |
46 % Copyright (c) by Esa Alhoniemi | |
47 % http://www.cis.hut.fi/projects/somtoolbox/ | |
48 | |
49 % ecco 301299 juuso 020200 211201 | |
50 | |
51 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
52 %% input arguments and initialization | |
53 | |
54 if isstruct(sD), | |
55 if isfield(sD,'data'), D = sD.data; | |
56 else D = sD.codebook; | |
57 end | |
58 else D = sD; | |
59 end | |
60 [dlen dim] = size(D); | |
61 | |
62 if nargin < 2 | isempty(n_max) | isnan(n_max), n_max = ceil(sqrt(dlen)); end | |
63 if nargin < 3 | isempty(c_max) | isnan(c_max), c_max = 5; end | |
64 if nargin < 4 | isempty(verbose) | isnan(verbose), verbose = 0; end | |
65 | |
66 centers = cell(n_max,1); | |
67 clusters = cell(n_max,1); | |
68 ind = zeros(1,n_max)+NaN; | |
69 errors = zeros(1,n_max)+NaN; | |
70 | |
71 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
72 %% action | |
73 | |
74 % the case k=1 is trivial, but Davies-Boulding index cannot be evaluated | |
75 m = zeros(1,dim); | |
76 for i=1:dim, m(i)=mean(D(isfinite(D(:,i)),i)); end | |
77 centers{1} = m; | |
78 clusters{1} = ones(dlen,1); | |
79 [dummy qerr] = som_bmus(m,D); | |
80 errors(1) = sum(qerr.^2); | |
81 ind(1) = NaN; | |
82 | |
83 if verbose, fprintf(2,'Doing k-means for 2-%d clusters\n',n_max); end | |
84 | |
85 for i = 2:n_max, % number of clusters | |
86 | |
87 % make k-means with k=i for c_max times and select the best based | |
88 % on sum-of-squared errors (SSE) | |
89 best = realmax; | |
90 for j = 1:c_max % run number j for cluster i | |
91 if verbose, | |
92 fprintf('%d/%d clusters, k-means run %d/%d\r', i, n_max,j, c_max); | |
93 end | |
94 [c, k, err] = som_kmeans('batch', D, i, 100, 0); | |
95 if err < best, k_best = k'; c_best = c; best = err; end | |
96 % ' added in k_best = k'; by kr 1.10.02 | |
97 end | |
98 if verbose, fprintf(1, '\n'); end | |
99 | |
100 % store the results | |
101 centers{i} = c_best; | |
102 clusters{i} = k_best; | |
103 errors(i) = best; | |
104 % ind(i) = db_index(D, c_best, k_best, 2); wrong version in somtbx ?? | |
105 ind(i) = db_index(D, k_best, c_best, 2); % modified by kr 1.10.02 | |
106 | |
107 % if verbose mode, plot the index & SSE | |
108 if verbose | |
109 subplot(2,1,1), plot(ind), grid | |
110 title('Davies-Bouldin''s index') | |
111 subplot(2,1,2), plot(errors), grid | |
112 title('SSE') | |
113 drawnow | |
114 end | |
115 end | |
116 | |
117 return; | |
118 | |
119 | |
120 |