view toolboxes/MIRtoolbox1.3.2/somtoolbox/db_index.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
line wrap: on
line source
function [t,r] = db_index(D, cl, C, p, q)
 
% DB_INDEX Davies-Bouldin clustering evaluation index.
%
% [t,r] = db_index(D, cl, C, p, q)
%
%  Input and output arguments ([]'s are optional):  
%    D     (matrix) data (n x dim)
%          (struct) map or data struct
%    cl    (vector) cluster numbers corresponding to data samples (n x 1)
%    [C]   (matrix) prototype vectors (c x dim) (default = cluster means)
%    [p]   (scalar) norm used in the computation (default == 2)
%    [q]   (scalar) moment used to calculate cluster dispersions (default = 2)
% 
%    t     (scalar) Davies-Bouldin index for the clustering (=mean(r))
%    r     (vector) maximum DB index for each cluster (size c x 1)    
% 
% See also  KMEANS, KMEANS_CLUSTERS, SOM_GAPINDEX.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% input arguments

if isstruct(D), 
    switch D.type,
    case 'som_map', D = D.codebook; 
    case 'som_data', D = D.data; 
    end
end

% cluster centroids
[l dim] = size(D);
u = unique(cl); 
c = length(u); 
if nargin <3, 
  C = zeros(c,dim); 
  for i=1:c, 
      me = nanstats(D(find(cl==u(i)),:));
      C(i,:) = me';
  end 
end

u2i = zeros(max(u),1); u2i(u) = 1:c; 
D = som_fillnans(D,C,u2i(cl)); % replace NaN's with cluster centroid values

if nargin <4, p = 2; end % euclidian distance between cluster centers
if nargin <5, q = 2; end % dispersion = standard deviation
 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% action

% dispersion in each cluster 
for i = 1:c
  ind = find(cl==u(i)); % points in this cluster
  l   = length(ind);
  if l > 0
    S(i) = (mean(sqrt(sum((D(ind,:) - ones(l,1) * C(i,:)).^2,2)).^q))^(1/q);
  else
    S(i) = NaN;
  end
end
 
% distances between clusters
%for i = 1:c
%  for j = i+1:c
%    M(i,j) = sum(abs(C(i,:) - C(j,:)).^p)^(1/p);
%  end
%end
M = som_mdist(C,p); 

% Davies-Bouldin index
R = NaN * zeros(c);
r = NaN * zeros(c,1);
for i = 1:c
  for j = i+1:c
    R(i,j) = (S(i) + S(j))/M(i,j);
  end
  r(i) = max(R(i,:));
end
 
t = mean(r(isfinite(r)));
 
return;