camir-aes2014: toolboxes/MIRtoolbox1.3.2/somtoolbox/kmeans

annotate toolboxes/MIRtoolbox1.3.2/somtoolbox/kmeans_clusters.m @ 0:e9a9cd732c1e tip

first hg version after svn

author	wolffd
date	Tue, 10 Feb 2015 15:05:51 +0000
parents
children

rev	line source
wolffd@0	1 function [centers,clusters,errors,ind] = kmeans_clusters(sD, n_max, c_max, verbose)
wolffd@0	2
wolffd@0	3 % KMEANS_CLUSTERS Clustering with k-means with different values for k.
wolffd@0	4 %
wolffd@0	5 % [c, p, err, ind] = kmeans_clusters(sD, [n_max], [c_max], [verbose])
wolffd@0	6 %
wolffd@0	7 % [c, p, err, ind] = kmeans_clusters(sD);
wolffd@0	8 %
wolffd@0	9 % Input and output arguments ([]'s are optional):
wolffd@0	10 % D (struct) map or data struct
wolffd@0	11 % (matrix) size dlen x dim, the data
wolffd@0	12 % [n_max] (scalar) maximum number of clusters, default is sqrt(dlen)
wolffd@0	13 % [c_max] (scalar) maximum number of k-means runs, default is 5
wolffd@0	14 % [verbose] (scalar) verbose level, 0 by default
wolffd@0	15 %
wolffd@0	16 % c (cell array) c{i} contains cluster centroids for k=i
wolffd@0	17 % p (cell array) p{i} contains cluster indeces for k=i
wolffd@0	18 % err (vector) squared sum of errors for each value of k
wolffd@0	19 % ind (vector) Davies-Bouldin index value for each clustering
wolffd@0	20 %
wolffd@0	21 % Makes a k-means to the given data set with different values of
wolffd@0	22 % k. The k-means is run multiple times for each k, and the best of
wolffd@0	23 % these is selected based on sum of squared errors. Finally, the
wolffd@0	24 % Davies-Bouldin index is calculated for each clustering.
wolffd@0	25 %
wolffd@0	26 % For example to cluster a SOM:
wolffd@0	27 % [c, p, err, ind] = kmeans_clusters(sM); % find clusterings
wolffd@0	28 % [dummy,i] = min(ind); % select the one with smallest index
wolffd@0	29 % som_show(sM,'color',{p{i},sprintf('%d clusters',i)}); % visualize
wolffd@0	30 % colormap(jet(i)), som_recolorbar % change colormap
wolffd@0	31 %
wolffd@0	32 % See also SOM_KMEANS.
wolffd@0	33
wolffd@0	34 % References:
wolffd@0	35 % Jain, A.K., Dubes, R.C., "Algorithms for Clustering Data",
wolffd@0	36 % Prentice Hall, 1988, pp. 96-101.
wolffd@0	37 %
wolffd@0	38 % Davies, D.L., Bouldin, D.W., "A Cluster Separation Measure",
wolffd@0	39 % IEEE Transactions on Pattern Analysis and Machine Intelligence,
wolffd@0	40 % vol. PAMI-1, no. 2, 1979, pp. 224-227.
wolffd@0	41 %
wolffd@0	42 % Vesanto, J., Alhoniemi, E., "Clustering of the Self-Organizing
wolffd@0	43 % Map", IEEE Transactions on Neural Networks, 2000.
wolffd@0	44
wolffd@0	45 % Contributed to SOM Toolbox vs2, February 2nd, 2000 by Esa Alhoniemi
wolffd@0	46 % Copyright (c) by Esa Alhoniemi
wolffd@0	47 % http://www.cis.hut.fi/projects/somtoolbox/
wolffd@0	48
wolffd@0	49 % ecco 301299 juuso 020200 211201
wolffd@0	50
wolffd@0	51 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
wolffd@0	52 %% input arguments and initialization
wolffd@0	53
wolffd@0	54 if isstruct(sD),
wolffd@0	55 if isfield(sD,'data'), D = sD.data;
wolffd@0	56 else D = sD.codebook;
wolffd@0	57 end
wolffd@0	58 else D = sD;
wolffd@0	59 end
wolffd@0	60 [dlen dim] = size(D);
wolffd@0	61
wolffd@0	62 if nargin < 2 \| isempty(n_max) \| isnan(n_max), n_max = ceil(sqrt(dlen)); end
wolffd@0	63 if nargin < 3 \| isempty(c_max) \| isnan(c_max), c_max = 5; end
wolffd@0	64 if nargin < 4 \| isempty(verbose) \| isnan(verbose), verbose = 0; end
wolffd@0	65
wolffd@0	66 centers = cell(n_max,1);
wolffd@0	67 clusters = cell(n_max,1);
wolffd@0	68 ind = zeros(1,n_max)+NaN;
wolffd@0	69 errors = zeros(1,n_max)+NaN;
wolffd@0	70
wolffd@0	71 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
wolffd@0	72 %% action
wolffd@0	73
wolffd@0	74 % the case k=1 is trivial, but Davies-Boulding index cannot be evaluated
wolffd@0	75 m = zeros(1,dim);
wolffd@0	76 for i=1:dim, m(i)=mean(D(isfinite(D(:,i)),i)); end
wolffd@0	77 centers{1} = m;
wolffd@0	78 clusters{1} = ones(dlen,1);
wolffd@0	79 [dummy qerr] = som_bmus(m,D);
wolffd@0	80 errors(1) = sum(qerr.^2);
wolffd@0	81 ind(1) = NaN;
wolffd@0	82
wolffd@0	83 if verbose, fprintf(2,'Doing k-means for 2-%d clusters\n',n_max); end
wolffd@0	84
wolffd@0	85 for i = 2:n_max, % number of clusters
wolffd@0	86
wolffd@0	87 % make k-means with k=i for c_max times and select the best based
wolffd@0	88 % on sum-of-squared errors (SSE)
wolffd@0	89 best = realmax;
wolffd@0	90 for j = 1:c_max % run number j for cluster i
wolffd@0	91 if verbose,
wolffd@0	92 fprintf('%d/%d clusters, k-means run %d/%d\r', i, n_max,j, c_max);
wolffd@0	93 end
wolffd@0	94 [c, k, err] = som_kmeans('batch', D, i, 100, 0);
wolffd@0	95 if err < best, k_best = k'; c_best = c; best = err; end
wolffd@0	96 % ' added in k_best = k'; by kr 1.10.02
wolffd@0	97 end
wolffd@0	98 if verbose, fprintf(1, '\n'); end
wolffd@0	99
wolffd@0	100 % store the results
wolffd@0	101 centers{i} = c_best;
wolffd@0	102 clusters{i} = k_best;
wolffd@0	103 errors(i) = best;
wolffd@0	104 % ind(i) = db_index(D, c_best, k_best, 2); wrong version in somtbx ??
wolffd@0	105 ind(i) = db_index(D, k_best, c_best, 2); % modified by kr 1.10.02
wolffd@0	106
wolffd@0	107 % if verbose mode, plot the index & SSE
wolffd@0	108 if verbose
wolffd@0	109 subplot(2,1,1), plot(ind), grid
wolffd@0	110 title('Davies-Bouldin''s index')
wolffd@0	111 subplot(2,1,2), plot(errors), grid
wolffd@0	112 title('SSE')
wolffd@0	113 drawnow
wolffd@0	114 end
wolffd@0	115 end
wolffd@0	116
wolffd@0	117 return;
wolffd@0	118
wolffd@0	119
wolffd@0	120

Mercurial > hg > camir-aes2014

annotate toolboxes/MIRtoolbox1.3.2/somtoolbox/kmeans_clusters.m @ 0:e9a9cd732c1e tip