diff toolboxes/MIRtoolbox1.3.2/somtoolbox/db_index.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/toolboxes/MIRtoolbox1.3.2/somtoolbox/db_index.m	Tue Feb 10 15:05:51 2015 +0000
@@ -0,0 +1,83 @@
+function [t,r] = db_index(D, cl, C, p, q)
+ 
+% DB_INDEX Davies-Bouldin clustering evaluation index.
+%
+% [t,r] = db_index(D, cl, C, p, q)
+%
+%  Input and output arguments ([]'s are optional):  
+%    D     (matrix) data (n x dim)
+%          (struct) map or data struct
+%    cl    (vector) cluster numbers corresponding to data samples (n x 1)
+%    [C]   (matrix) prototype vectors (c x dim) (default = cluster means)
+%    [p]   (scalar) norm used in the computation (default == 2)
+%    [q]   (scalar) moment used to calculate cluster dispersions (default = 2)
+% 
+%    t     (scalar) Davies-Bouldin index for the clustering (=mean(r))
+%    r     (vector) maximum DB index for each cluster (size c x 1)    
+% 
+% See also  KMEANS, KMEANS_CLUSTERS, SOM_GAPINDEX.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% input arguments
+
+if isstruct(D), 
+    switch D.type,
+    case 'som_map', D = D.codebook; 
+    case 'som_data', D = D.data; 
+    end
+end
+
+% cluster centroids
+[l dim] = size(D);
+u = unique(cl); 
+c = length(u); 
+if nargin <3, 
+  C = zeros(c,dim); 
+  for i=1:c, 
+      me = nanstats(D(find(cl==u(i)),:));
+      C(i,:) = me';
+  end 
+end
+
+u2i = zeros(max(u),1); u2i(u) = 1:c; 
+D = som_fillnans(D,C,u2i(cl)); % replace NaN's with cluster centroid values
+
+if nargin <4, p = 2; end % euclidian distance between cluster centers
+if nargin <5, q = 2; end % dispersion = standard deviation
+ 
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% action
+
+% dispersion in each cluster 
+for i = 1:c
+  ind = find(cl==u(i)); % points in this cluster
+  l   = length(ind);
+  if l > 0
+    S(i) = (mean(sqrt(sum((D(ind,:) - ones(l,1) * C(i,:)).^2,2)).^q))^(1/q);
+  else
+    S(i) = NaN;
+  end
+end
+ 
+% distances between clusters
+%for i = 1:c
+%  for j = i+1:c
+%    M(i,j) = sum(abs(C(i,:) - C(j,:)).^p)^(1/p);
+%  end
+%end
+M = som_mdist(C,p); 
+
+% Davies-Bouldin index
+R = NaN * zeros(c);
+r = NaN * zeros(c,1);
+for i = 1:c
+  for j = i+1:c
+    R(i,j) = (S(i) + S(j))/M(i,j);
+  end
+  r(i) = max(R(i,:));
+end
+ 
+t = mean(r(isfinite(r)));
+ 
+return;                                                                                                     
+