view toolboxes/MIRtoolbox1.3.2/somtoolbox/kmeans_clusters.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
line wrap: on
line source
function [centers,clusters,errors,ind] = kmeans_clusters(sD, n_max, c_max, verbose)

% KMEANS_CLUSTERS Clustering with k-means with different values for k.
%
% [c, p, err, ind] = kmeans_clusters(sD, [n_max], [c_max], [verbose])
%
%   [c, p, err, ind] = kmeans_clusters(sD);
%  
%  Input and output arguments ([]'s are optional):
%   D         (struct) map or data struct
%             (matrix) size dlen x dim, the data 
%   [n_max]   (scalar) maximum number of clusters, default is sqrt(dlen)
%   [c_max]   (scalar) maximum number of k-means runs, default is 5
%   [verbose] (scalar) verbose level, 0 by default
%
%   c         (cell array) c{i} contains cluster centroids for k=i
%   p         (cell array) p{i} contains cluster indeces for k=i
%   err       (vector) squared sum of errors for each value of k
%   ind       (vector) Davies-Bouldin index value for each clustering
%
% Makes a k-means to the given data set with different values of
% k. The k-means is run multiple times for each k, and the best of
% these is selected based on sum of squared errors. Finally, the
% Davies-Bouldin index is calculated for each clustering. 
%
% For example to cluster a SOM: 
%    [c, p, err, ind] = kmeans_clusters(sM); % find clusterings
%    [dummy,i] = min(ind); % select the one with smallest index
%    som_show(sM,'color',{p{i},sprintf('%d clusters',i)}); % visualize
%    colormap(jet(i)), som_recolorbar % change colormap
%  
% See also SOM_KMEANS.

% References: 
%   Jain, A.K., Dubes, R.C., "Algorithms for Clustering Data", 
%   Prentice Hall, 1988, pp. 96-101.
%
%   Davies, D.L., Bouldin, D.W., "A Cluster Separation Measure", 
%   IEEE Transactions on Pattern Analysis and Machine Intelligence, 
%   vol. PAMI-1, no. 2, 1979, pp. 224-227.
%
%   Vesanto, J., Alhoniemi, E., "Clustering of the Self-Organizing
%   Map", IEEE Transactions on Neural Networks, 2000.

% Contributed to SOM Toolbox vs2, February 2nd, 2000 by Esa Alhoniemi
% Copyright (c) by Esa Alhoniemi
% http://www.cis.hut.fi/projects/somtoolbox/

% ecco 301299 juuso 020200 211201

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% input arguments and initialization

if isstruct(sD), 
  if isfield(sD,'data'), D = sD.data; 
  else D = sD.codebook; 
  end
else D = sD; 
end
[dlen dim] = size(D);

if nargin < 2 | isempty(n_max) | isnan(n_max), n_max = ceil(sqrt(dlen)); end
if nargin < 3 | isempty(c_max) | isnan(c_max), c_max = 5; end
if nargin < 4 | isempty(verbose) | isnan(verbose), verbose = 0; end

centers   = cell(n_max,1); 
clusters  = cell(n_max,1);
ind       = zeros(1,n_max)+NaN;
errors    = zeros(1,n_max)+NaN;

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% action

% the case k=1 is trivial, but Davies-Boulding index cannot be evaluated
m = zeros(1,dim);
for i=1:dim, m(i)=mean(D(isfinite(D(:,i)),i)); end
centers{1} = m;
clusters{1} = ones(dlen,1);
[dummy qerr] = som_bmus(m,D);
errors(1) = sum(qerr.^2);
ind(1) = NaN; 

if verbose, fprintf(2,'Doing k-means for 2-%d clusters\n',n_max); end

for i = 2:n_max, % number of clusters

  % make k-means with k=i for c_max times and select the best based
  % on sum-of-squared errors (SSE)
  best = realmax;  
  for j = 1:c_max     % run number j for cluster i      
    if verbose,
      fprintf('%d/%d clusters, k-means run %d/%d\r', i, n_max,j, c_max);
    end      
    [c, k, err] = som_kmeans('batch', D, i, 100, 0);
    if err < best, k_best = k'; c_best = c; best = err; end
    % ' added in k_best = k'; by kr 1.10.02
  end
  if verbose, fprintf(1, '\n');  end

  % store the results  
  centers{i}  = c_best;
  clusters{i} = k_best;
  errors(i)   = best;
%  ind(i)      = db_index(D, c_best, k_best, 2); wrong version in somtbx ??
  ind(i)      = db_index(D, k_best, c_best, 2); % modified by kr 1.10.02

  % if verbose mode, plot the index & SSE
  if verbose
    subplot(2,1,1), plot(ind), grid
    title('Davies-Bouldin''s index')
    subplot(2,1,2), plot(errors), grid
    title('SSE')
    drawnow
  end
end

return;