wolffd@0: function [t,r] = db_index(D, cl, C, p, q) wolffd@0: wolffd@0: % DB_INDEX Davies-Bouldin clustering evaluation index. wolffd@0: % wolffd@0: % [t,r] = db_index(D, cl, C, p, q) wolffd@0: % wolffd@0: % Input and output arguments ([]'s are optional): wolffd@0: % D (matrix) data (n x dim) wolffd@0: % (struct) map or data struct wolffd@0: % cl (vector) cluster numbers corresponding to data samples (n x 1) wolffd@0: % [C] (matrix) prototype vectors (c x dim) (default = cluster means) wolffd@0: % [p] (scalar) norm used in the computation (default == 2) wolffd@0: % [q] (scalar) moment used to calculate cluster dispersions (default = 2) wolffd@0: % wolffd@0: % t (scalar) Davies-Bouldin index for the clustering (=mean(r)) wolffd@0: % r (vector) maximum DB index for each cluster (size c x 1) wolffd@0: % wolffd@0: % See also KMEANS, KMEANS_CLUSTERS, SOM_GAPINDEX. wolffd@0: wolffd@0: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% wolffd@0: %% input arguments wolffd@0: wolffd@0: if isstruct(D), wolffd@0: switch D.type, wolffd@0: case 'som_map', D = D.codebook; wolffd@0: case 'som_data', D = D.data; wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: % cluster centroids wolffd@0: [l dim] = size(D); wolffd@0: u = unique(cl); wolffd@0: c = length(u); wolffd@0: if nargin <3, wolffd@0: C = zeros(c,dim); wolffd@0: for i=1:c, wolffd@0: me = nanstats(D(find(cl==u(i)),:)); wolffd@0: C(i,:) = me'; wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: u2i = zeros(max(u),1); u2i(u) = 1:c; wolffd@0: D = som_fillnans(D,C,u2i(cl)); % replace NaN's with cluster centroid values wolffd@0: wolffd@0: if nargin <4, p = 2; end % euclidian distance between cluster centers wolffd@0: if nargin <5, q = 2; end % dispersion = standard deviation wolffd@0: wolffd@0: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% wolffd@0: %% action wolffd@0: wolffd@0: % dispersion in each cluster wolffd@0: for i = 1:c wolffd@0: ind = find(cl==u(i)); % points in this cluster wolffd@0: l = length(ind); wolffd@0: if l > 0 wolffd@0: S(i) = (mean(sqrt(sum((D(ind,:) - ones(l,1) * C(i,:)).^2,2)).^q))^(1/q); wolffd@0: else wolffd@0: S(i) = NaN; wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: % distances between clusters wolffd@0: %for i = 1:c wolffd@0: % for j = i+1:c wolffd@0: % M(i,j) = sum(abs(C(i,:) - C(j,:)).^p)^(1/p); wolffd@0: % end wolffd@0: %end wolffd@0: M = som_mdist(C,p); wolffd@0: wolffd@0: % Davies-Bouldin index wolffd@0: R = NaN * zeros(c); wolffd@0: r = NaN * zeros(c,1); wolffd@0: for i = 1:c wolffd@0: for j = i+1:c wolffd@0: R(i,j) = (S(i) + S(j))/M(i,j); wolffd@0: end wolffd@0: r(i) = max(R(i,:)); wolffd@0: end wolffd@0: wolffd@0: t = mean(r(isfinite(r))); wolffd@0: wolffd@0: return; wolffd@0: