wolffd@0
|
1 function [t,r,Cd,S] = som_gapindex(sM, base, between)
|
wolffd@0
|
2
|
wolffd@0
|
3 % SOM_GAPINDEX Gap clustering evaluation index.
|
wolffd@0
|
4 %
|
wolffd@0
|
5 % [t,r] = som_gapindex(sM, base, [between])
|
wolffd@0
|
6 %
|
wolffd@0
|
7 % Input and output arguments ([]'s are optional):
|
wolffd@0
|
8 % sM (struct) map struct
|
wolffd@0
|
9 % base (vector) clusters indeces for each map unit, map units
|
wolffd@0
|
10 % with index<=0 or NaN are not taken into account
|
wolffd@0
|
11 % [between] (vector) indices of prototypes which are "between" clusters:
|
wolffd@0
|
12 % the associated distances are doubled
|
wolffd@0
|
13 %
|
wolffd@0
|
14 % t (scalar) Gap index index for the clustering (=mean(r))
|
wolffd@0
|
15 % r (vector) maximum Gap index for each cluster (size max(base) x 1)
|
wolffd@0
|
16 %
|
wolffd@0
|
17 % See also KMEANS, KMEANS_CLUSTERS, SOM_GAPINDEX.
|
wolffd@0
|
18
|
wolffd@0
|
19 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
wolffd@0
|
20
|
wolffd@0
|
21 if nargin<3, between = find(isnan(base)); end
|
wolffd@0
|
22
|
wolffd@0
|
23 nc = max(base);
|
wolffd@0
|
24 cinds = cell(nc,1);
|
wolffd@0
|
25 for i=1:nc, cinds{i} = find(base==i); end
|
wolffd@0
|
26
|
wolffd@0
|
27 % distances between neighboring prototypes
|
wolffd@0
|
28 Ne = som_neighbors(sM,'N1');
|
wolffd@0
|
29 Md = som_mdist(sM.codebook,2,[],Ne);
|
wolffd@0
|
30 Md(Ne==0) = NaN;
|
wolffd@0
|
31
|
wolffd@0
|
32 Md(between,:) = Md(between,:)*2;
|
wolffd@0
|
33 Md(:,between) = Md(:,between)*2;
|
wolffd@0
|
34 Md(between,between) = Md(between,between)/2;
|
wolffd@0
|
35
|
wolffd@0
|
36 % dispersion in each cluster
|
wolffd@0
|
37 S = zeros(nc,1);
|
wolffd@0
|
38 for i=1:nc,
|
wolffd@0
|
39 inds = setdiff(cinds{i},between);
|
wolffd@0
|
40 if any(inds),
|
wolffd@0
|
41 indist = Md(inds,inds);
|
wolffd@0
|
42 for j=1:size(indist,1), indist(j,j) = NaN; end
|
wolffd@0
|
43 indist = indist(isfinite(indist(:)));
|
wolffd@0
|
44 if any(indist), S(i) = mean(indist); end
|
wolffd@0
|
45 end
|
wolffd@0
|
46 end
|
wolffd@0
|
47
|
wolffd@0
|
48 % distances between clusters
|
wolffd@0
|
49 Cd = zeros(nc,nc) + NaN;
|
wolffd@0
|
50 for i=1:nc,
|
wolffd@0
|
51 inds1 = cinds{i};
|
wolffd@0
|
52 for j=1:nc,
|
wolffd@0
|
53 inds2 = cinds{j};
|
wolffd@0
|
54 od = Md(inds1,inds2);
|
wolffd@0
|
55 od = od(isfinite(od(:)));
|
wolffd@0
|
56 if any(od), Cd(i,j) = mean(od(:)); end
|
wolffd@0
|
57 end
|
wolffd@0
|
58 end
|
wolffd@0
|
59
|
wolffd@0
|
60 % Gap index
|
wolffd@0
|
61 R = NaN * zeros(nc);
|
wolffd@0
|
62 for i = 1:nc
|
wolffd@0
|
63 for j = i+1:nc
|
wolffd@0
|
64 R(i,j) = (S(i) + S(j))/Cd(i,j);
|
wolffd@0
|
65 R(j,i) = R(i,j);
|
wolffd@0
|
66 end
|
wolffd@0
|
67 end
|
wolffd@0
|
68 r = max(R,[],2);
|
wolffd@0
|
69
|
wolffd@0
|
70 t = mean(r(isfinite(r)));
|
wolffd@0
|
71
|
wolffd@0
|
72 return;
|