wolffd@0
|
1 function [t,r] = db_index(D, cl, C, p, q)
|
wolffd@0
|
2
|
wolffd@0
|
3 % DB_INDEX Davies-Bouldin clustering evaluation index.
|
wolffd@0
|
4 %
|
wolffd@0
|
5 % [t,r] = db_index(D, cl, C, p, q)
|
wolffd@0
|
6 %
|
wolffd@0
|
7 % Input and output arguments ([]'s are optional):
|
wolffd@0
|
8 % D (matrix) data (n x dim)
|
wolffd@0
|
9 % (struct) map or data struct
|
wolffd@0
|
10 % cl (vector) cluster numbers corresponding to data samples (n x 1)
|
wolffd@0
|
11 % [C] (matrix) prototype vectors (c x dim) (default = cluster means)
|
wolffd@0
|
12 % [p] (scalar) norm used in the computation (default == 2)
|
wolffd@0
|
13 % [q] (scalar) moment used to calculate cluster dispersions (default = 2)
|
wolffd@0
|
14 %
|
wolffd@0
|
15 % t (scalar) Davies-Bouldin index for the clustering (=mean(r))
|
wolffd@0
|
16 % r (vector) maximum DB index for each cluster (size c x 1)
|
wolffd@0
|
17 %
|
wolffd@0
|
18 % See also KMEANS, KMEANS_CLUSTERS, SOM_GAPINDEX.
|
wolffd@0
|
19
|
wolffd@0
|
20 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
wolffd@0
|
21 %% input arguments
|
wolffd@0
|
22
|
wolffd@0
|
23 if isstruct(D),
|
wolffd@0
|
24 switch D.type,
|
wolffd@0
|
25 case 'som_map', D = D.codebook;
|
wolffd@0
|
26 case 'som_data', D = D.data;
|
wolffd@0
|
27 end
|
wolffd@0
|
28 end
|
wolffd@0
|
29
|
wolffd@0
|
30 % cluster centroids
|
wolffd@0
|
31 [l dim] = size(D);
|
wolffd@0
|
32 u = unique(cl);
|
wolffd@0
|
33 c = length(u);
|
wolffd@0
|
34 if nargin <3,
|
wolffd@0
|
35 C = zeros(c,dim);
|
wolffd@0
|
36 for i=1:c,
|
wolffd@0
|
37 me = nanstats(D(find(cl==u(i)),:));
|
wolffd@0
|
38 C(i,:) = me';
|
wolffd@0
|
39 end
|
wolffd@0
|
40 end
|
wolffd@0
|
41
|
wolffd@0
|
42 u2i = zeros(max(u),1); u2i(u) = 1:c;
|
wolffd@0
|
43 D = som_fillnans(D,C,u2i(cl)); % replace NaN's with cluster centroid values
|
wolffd@0
|
44
|
wolffd@0
|
45 if nargin <4, p = 2; end % euclidian distance between cluster centers
|
wolffd@0
|
46 if nargin <5, q = 2; end % dispersion = standard deviation
|
wolffd@0
|
47
|
wolffd@0
|
48 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
wolffd@0
|
49 %% action
|
wolffd@0
|
50
|
wolffd@0
|
51 % dispersion in each cluster
|
wolffd@0
|
52 for i = 1:c
|
wolffd@0
|
53 ind = find(cl==u(i)); % points in this cluster
|
wolffd@0
|
54 l = length(ind);
|
wolffd@0
|
55 if l > 0
|
wolffd@0
|
56 S(i) = (mean(sqrt(sum((D(ind,:) - ones(l,1) * C(i,:)).^2,2)).^q))^(1/q);
|
wolffd@0
|
57 else
|
wolffd@0
|
58 S(i) = NaN;
|
wolffd@0
|
59 end
|
wolffd@0
|
60 end
|
wolffd@0
|
61
|
wolffd@0
|
62 % distances between clusters
|
wolffd@0
|
63 %for i = 1:c
|
wolffd@0
|
64 % for j = i+1:c
|
wolffd@0
|
65 % M(i,j) = sum(abs(C(i,:) - C(j,:)).^p)^(1/p);
|
wolffd@0
|
66 % end
|
wolffd@0
|
67 %end
|
wolffd@0
|
68 M = som_mdist(C,p);
|
wolffd@0
|
69
|
wolffd@0
|
70 % Davies-Bouldin index
|
wolffd@0
|
71 R = NaN * zeros(c);
|
wolffd@0
|
72 r = NaN * zeros(c,1);
|
wolffd@0
|
73 for i = 1:c
|
wolffd@0
|
74 for j = i+1:c
|
wolffd@0
|
75 R(i,j) = (S(i) + S(j))/M(i,j);
|
wolffd@0
|
76 end
|
wolffd@0
|
77 r(i) = max(R(i,:));
|
wolffd@0
|
78 end
|
wolffd@0
|
79
|
wolffd@0
|
80 t = mean(r(isfinite(r)));
|
wolffd@0
|
81
|
wolffd@0
|
82 return;
|
wolffd@0
|
83
|