annotate toolboxes/MIRtoolbox1.3.2/somtoolbox/kmeans_clusters.m @ 0:cc4b1211e677 tip

initial commit to HG from Changeset: 646 (e263d8a21543) added further path and more save "camirversion.m"
author Daniel Wolff
date Fri, 19 Aug 2016 13:07:06 +0200
parents
children
rev   line source
Daniel@0 1 function [centers,clusters,errors,ind] = kmeans_clusters(sD, n_max, c_max, verbose)
Daniel@0 2
Daniel@0 3 % KMEANS_CLUSTERS Clustering with k-means with different values for k.
Daniel@0 4 %
Daniel@0 5 % [c, p, err, ind] = kmeans_clusters(sD, [n_max], [c_max], [verbose])
Daniel@0 6 %
Daniel@0 7 % [c, p, err, ind] = kmeans_clusters(sD);
Daniel@0 8 %
Daniel@0 9 % Input and output arguments ([]'s are optional):
Daniel@0 10 % D (struct) map or data struct
Daniel@0 11 % (matrix) size dlen x dim, the data
Daniel@0 12 % [n_max] (scalar) maximum number of clusters, default is sqrt(dlen)
Daniel@0 13 % [c_max] (scalar) maximum number of k-means runs, default is 5
Daniel@0 14 % [verbose] (scalar) verbose level, 0 by default
Daniel@0 15 %
Daniel@0 16 % c (cell array) c{i} contains cluster centroids for k=i
Daniel@0 17 % p (cell array) p{i} contains cluster indeces for k=i
Daniel@0 18 % err (vector) squared sum of errors for each value of k
Daniel@0 19 % ind (vector) Davies-Bouldin index value for each clustering
Daniel@0 20 %
Daniel@0 21 % Makes a k-means to the given data set with different values of
Daniel@0 22 % k. The k-means is run multiple times for each k, and the best of
Daniel@0 23 % these is selected based on sum of squared errors. Finally, the
Daniel@0 24 % Davies-Bouldin index is calculated for each clustering.
Daniel@0 25 %
Daniel@0 26 % For example to cluster a SOM:
Daniel@0 27 % [c, p, err, ind] = kmeans_clusters(sM); % find clusterings
Daniel@0 28 % [dummy,i] = min(ind); % select the one with smallest index
Daniel@0 29 % som_show(sM,'color',{p{i},sprintf('%d clusters',i)}); % visualize
Daniel@0 30 % colormap(jet(i)), som_recolorbar % change colormap
Daniel@0 31 %
Daniel@0 32 % See also SOM_KMEANS.
Daniel@0 33
Daniel@0 34 % References:
Daniel@0 35 % Jain, A.K., Dubes, R.C., "Algorithms for Clustering Data",
Daniel@0 36 % Prentice Hall, 1988, pp. 96-101.
Daniel@0 37 %
Daniel@0 38 % Davies, D.L., Bouldin, D.W., "A Cluster Separation Measure",
Daniel@0 39 % IEEE Transactions on Pattern Analysis and Machine Intelligence,
Daniel@0 40 % vol. PAMI-1, no. 2, 1979, pp. 224-227.
Daniel@0 41 %
Daniel@0 42 % Vesanto, J., Alhoniemi, E., "Clustering of the Self-Organizing
Daniel@0 43 % Map", IEEE Transactions on Neural Networks, 2000.
Daniel@0 44
Daniel@0 45 % Contributed to SOM Toolbox vs2, February 2nd, 2000 by Esa Alhoniemi
Daniel@0 46 % Copyright (c) by Esa Alhoniemi
Daniel@0 47 % http://www.cis.hut.fi/projects/somtoolbox/
Daniel@0 48
Daniel@0 49 % ecco 301299 juuso 020200 211201
Daniel@0 50
Daniel@0 51 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Daniel@0 52 %% input arguments and initialization
Daniel@0 53
Daniel@0 54 if isstruct(sD),
Daniel@0 55 if isfield(sD,'data'), D = sD.data;
Daniel@0 56 else D = sD.codebook;
Daniel@0 57 end
Daniel@0 58 else D = sD;
Daniel@0 59 end
Daniel@0 60 [dlen dim] = size(D);
Daniel@0 61
Daniel@0 62 if nargin < 2 | isempty(n_max) | isnan(n_max), n_max = ceil(sqrt(dlen)); end
Daniel@0 63 if nargin < 3 | isempty(c_max) | isnan(c_max), c_max = 5; end
Daniel@0 64 if nargin < 4 | isempty(verbose) | isnan(verbose), verbose = 0; end
Daniel@0 65
Daniel@0 66 centers = cell(n_max,1);
Daniel@0 67 clusters = cell(n_max,1);
Daniel@0 68 ind = zeros(1,n_max)+NaN;
Daniel@0 69 errors = zeros(1,n_max)+NaN;
Daniel@0 70
Daniel@0 71 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Daniel@0 72 %% action
Daniel@0 73
Daniel@0 74 % the case k=1 is trivial, but Davies-Boulding index cannot be evaluated
Daniel@0 75 m = zeros(1,dim);
Daniel@0 76 for i=1:dim, m(i)=mean(D(isfinite(D(:,i)),i)); end
Daniel@0 77 centers{1} = m;
Daniel@0 78 clusters{1} = ones(dlen,1);
Daniel@0 79 [dummy qerr] = som_bmus(m,D);
Daniel@0 80 errors(1) = sum(qerr.^2);
Daniel@0 81 ind(1) = NaN;
Daniel@0 82
Daniel@0 83 if verbose, fprintf(2,'Doing k-means for 2-%d clusters\n',n_max); end
Daniel@0 84
Daniel@0 85 for i = 2:n_max, % number of clusters
Daniel@0 86
Daniel@0 87 % make k-means with k=i for c_max times and select the best based
Daniel@0 88 % on sum-of-squared errors (SSE)
Daniel@0 89 best = realmax;
Daniel@0 90 for j = 1:c_max % run number j for cluster i
Daniel@0 91 if verbose,
Daniel@0 92 fprintf('%d/%d clusters, k-means run %d/%d\r', i, n_max,j, c_max);
Daniel@0 93 end
Daniel@0 94 [c, k, err] = som_kmeans('batch', D, i, 100, 0);
Daniel@0 95 if err < best, k_best = k'; c_best = c; best = err; end
Daniel@0 96 % ' added in k_best = k'; by kr 1.10.02
Daniel@0 97 end
Daniel@0 98 if verbose, fprintf(1, '\n'); end
Daniel@0 99
Daniel@0 100 % store the results
Daniel@0 101 centers{i} = c_best;
Daniel@0 102 clusters{i} = k_best;
Daniel@0 103 errors(i) = best;
Daniel@0 104 % ind(i) = db_index(D, c_best, k_best, 2); wrong version in somtbx ??
Daniel@0 105 ind(i) = db_index(D, k_best, c_best, 2); % modified by kr 1.10.02
Daniel@0 106
Daniel@0 107 % if verbose mode, plot the index & SSE
Daniel@0 108 if verbose
Daniel@0 109 subplot(2,1,1), plot(ind), grid
Daniel@0 110 title('Davies-Bouldin''s index')
Daniel@0 111 subplot(2,1,2), plot(errors), grid
Daniel@0 112 title('SSE')
Daniel@0 113 drawnow
Daniel@0 114 end
Daniel@0 115 end
Daniel@0 116
Daniel@0 117 return;
Daniel@0 118
Daniel@0 119
Daniel@0 120