DaveM@1
|
1
|
DaveM@1
|
2 load('Adobe.mat')
|
DaveM@1
|
3 load('Results1Percent.mat')
|
DaveM@1
|
4 %%
|
DaveM@1
|
5 datamap = featuredata(end).IdxVar;
|
DaveM@1
|
6 reduceData = Data(:,datamap);
|
DaveM@1
|
7 reduceLabels = Labels(datamap);
|
DaveM@1
|
8 %%
|
DaveM@1
|
9 reduceFeatures = FeatureNames(datamap);
|
DaveM@1
|
10
|
DaveM@1
|
11 %%
|
DaveM@1
|
12 load('Results1Percent.mat')
|
DaveM@1
|
13
|
DaveM@1
|
14 %%
|
DaveM@1
|
15 reduceFeatures = featuredata(1).FeatureNamesRanked;
|
DaveM@1
|
16
|
DaveM@1
|
17 dataToUseSize = 500;
|
DaveM@1
|
18 dataToUse = ceil(rand(dataToUseSize,1)*size(reduceData,1))';
|
DaveM@1
|
19
|
DaveM@1
|
20 dMap = pdist(reduceData(dataToUse,:));
|
DaveM@1
|
21 clusterMethod = 'ward';
|
DaveM@1
|
22 % 'average' Unweighted average distance (UPGMA)
|
DaveM@1
|
23 % 'centroid' Centroid distance (UPGMC), appropriate for Euclidean distances only
|
DaveM@1
|
24 % 'complete' Furthest distance
|
DaveM@1
|
25 % 'median' Weighted center of mass distance (WPGMC), appropriate for Euclidean distances only
|
DaveM@1
|
26 % 'single' Shortest distance
|
DaveM@1
|
27 % 'ward' Inner squared distance (minimum variance algorithm), appropriate for Euclidean distances only
|
DaveM@1
|
28 % 'weighted' Weighted average distance (WPGMA)
|
DaveM@1
|
29
|
DaveM@1
|
30 dl = linkage(dMap, clusterMethod);
|
DaveM@1
|
31 dendrogram(dl)
|
DaveM@1
|
32 % figure; imagesc(squareform(dMap_sp))
|
DaveM@1
|
33 % title('euclidian self similarity');
|
DaveM@1
|
34
|
DaveM@1
|
35 %%
|
DaveM@1
|
36 incon_sp = inconsistent(dl)
|
DaveM@1
|
37
|
DaveM@1
|
38
|
DaveM@1
|
39 %%
|
DaveM@1
|
40 % Use all data
|
DaveM@1
|
41
|
DaveM@1
|
42 dMapAll = pdist(reduceData);
|
DaveM@1
|
43 clusterMethod = 'ward';
|
DaveM@1
|
44 % 'average' Unweighted average distance (UPGMA)
|
DaveM@1
|
45 % 'centroid' Centroid distance (UPGMC),
|
DaveM@1
|
46 % appropriate for Euclidean distances only
|
DaveM@1
|
47 % 'complete' Furthest distance
|
DaveM@1
|
48 % 'median' Weighted center of mass distance (WPGMC),
|
DaveM@1
|
49 % appropriate for Euclidean distances only
|
DaveM@1
|
50 % 'single' Shortest distance
|
DaveM@1
|
51 % 'ward' Inner squared distance (minimum variance algorithm),
|
DaveM@1
|
52 % appropriate for Euclidean distances only
|
DaveM@1
|
53 % 'weighted' Weighted average distance (WPGMA)
|
DaveM@1
|
54
|
DaveM@1
|
55 dl_all = linkage(dMapAll, clusterMethod);
|
DaveM@1
|
56 % [~,T] = dendrogram(dl_all,0)
|
DaveM@1
|
57
|
DaveM@1
|
58 %%
|
DaveM@1
|
59 % print filelist for each cluster
|
DaveM@1
|
60
|
DaveM@1
|
61 numClusters = 25;
|
DaveM@1
|
62 fnames = cell(1,numClusters);
|
DaveM@1
|
63 [~,T] = dendrogram(dl_all,numClusters);
|
DaveM@1
|
64 for i = 1:numClusters
|
DaveM@1
|
65 numFiles = sum(T==i);
|
DaveM@1
|
66 fnames{i} = Filenames(find(T==i));
|
DaveM@1
|
67 end
|
DaveM@1
|
68
|
DaveM@1
|
69 %%
|
DaveM@1
|
70 % makeCSV for Weka
|
DaveM@1
|
71 % format
|
DaveM@1
|
72
|
DaveM@1
|
73 feats = reduceData;
|
DaveM@1
|
74
|
DaveM@1
|
75 % csvOut = mat2cell(feats,ones(size(feats,1),1), ones(size(feats,2),1))
|
DaveM@1
|
76 csvOut = num2cell(feats);
|
DaveM@1
|
77 csvOut = [csvOut, num2cell(T)];
|
DaveM@1
|
78 % size(csvOut)
|
DaveM@1
|
79 % size([FeatureNames(datamap)', {'Class'}])
|
DaveM@1
|
80 csvOut = [[FeatureNames(datamap)', {'Class'}]; csvOut];
|
DaveM@1
|
81
|
DaveM@1
|
82 %%
|
DaveM@1
|
83 % fnames to CSV
|
DaveM@1
|
84
|
DaveM@1
|
85 maxLen = size(fnames,2);
|
DaveM@1
|
86
|
DaveM@1
|
87 for i = 1:maxLen
|
DaveM@1
|
88 depth = size(fnames{i},1);
|
DaveM@1
|
89 for ii = 1:depth
|
DaveM@1
|
90 csvOut(i,ii) = fnames{i}(ii);
|
DaveM@1
|
91 end
|
DaveM@1
|
92 end
|
DaveM@1
|
93
|
DaveM@1
|
94 printString = '';
|
DaveM@1
|
95 for i = 1:maxLen
|
DaveM@1
|
96 printString = [printString ' %s, '];
|
DaveM@1
|
97 end
|
DaveM@1
|
98
|
DaveM@1
|
99 fid = fopen('junk.csv','w');
|
DaveM@1
|
100 fprintf(fid,[printString '\n'],csvOut{1:end,:});
|
DaveM@1
|
101 % fprintf(fid,'%f, %f, %f\n',c{2:end,:})
|
DaveM@1
|
102 fclose(fid) ;
|
DaveM@1
|
103 % dlmwrite('test.csv', csvOut, '-append') ;
|
DaveM@1
|
104
|
DaveM@1
|
105 %%
|
DaveM@1
|
106 T = cluster(dl_sp,'cutoff',1.3);
|
DaveM@1
|
107 figure; plot(T);
|
DaveM@1
|
108
|
DaveM@1
|
109
|
DaveM@1
|
110
|
DaveM@1
|
111 %%
|
DaveM@1
|
112
|
DaveM@1
|
113
|
DaveM@1
|
114 T = cluster(dl_sp,'maxclust',2);
|
DaveM@1
|
115 plot(T)
|
DaveM@1
|
116 %%
|
DaveM@1
|
117 T = cluster(dl_sp,'maxclust',3);
|
DaveM@1
|
118 plot(T)
|
DaveM@1
|
119 %%
|
DaveM@1
|
120 T = cluster(dl_sp,'maxclust',4);
|
DaveM@1
|
121 plot(T)
|
DaveM@1
|
122 T = cluster(dl_sp,'maxclust',5);
|
DaveM@1
|
123 plot(T)
|
DaveM@1
|
124 T = cluster(dl_sp,'maxclust',6);
|
DaveM@1
|
125 plot(T)
|
DaveM@1
|
126 T = cluster(dl_sp,'maxclust',7);
|
DaveM@1
|
127 plot(T)
|
DaveM@1
|
128 T = cluster(dl_sp,'maxclust',8);
|
DaveM@1
|
129 plot(T)
|
DaveM@1
|
130 T = cluster(dl_sp,'maxclust',9);
|
DaveM@1
|
131 plot(T)
|
DaveM@1
|
132 %%
|
DaveM@1
|
133 T = cluster(dl_sp,'maxclust',10);
|
DaveM@1
|
134 plot(T)
|
DaveM@1
|
135 %%
|
DaveM@1
|
136 T = cluster(dl_sp,'maxclust',100);
|
DaveM@1
|
137 plot(T)
|
DaveM@1
|
138 %%
|
DaveM@1
|
139 median(T)
|
DaveM@1
|
140
|
DaveM@1
|
141
|
DaveM@1
|
142 T = cluster(dl_sp,'maxclust',1000);
|
DaveM@1
|
143 median(T)
|
DaveM@1
|
144
|
DaveM@1
|
145
|
DaveM@1
|
146 plot(T)
|
DaveM@1
|
147 csvwrite('dataOutput',reduceData);
|
DaveM@1
|
148
|
DaveM@1
|
149
|
DaveM@1
|
150
|
DaveM@1
|
151
|
DaveM@1
|
152
|
DaveM@1
|
153
|
DaveM@1
|
154
|
DaveM@1
|
155
|
DaveM@1
|
156
|
DaveM@1
|
157
|
DaveM@1
|
158
|
DaveM@1
|
159
|
DaveM@1
|
160 % dMap_euc = pdist(reduceData);
|
DaveM@1
|
161 % dMap_cos = pdist(reduceData,'cos');
|
DaveM@1
|
162 % dMap_cos = pdist(reduceData,'cosine');
|
DaveM@1
|
163 % dl_euc = linkage(dMap_euc);
|
DaveM@1
|
164 % dl_cos = linkage(dMap_cos);
|
DaveM@1
|
165 % % dl_sp
|
DaveM@1
|
166 % dl_sp(10,:)
|
DaveM@1
|
167 % dl_sp(1:10,:)
|
DaveM@1
|
168 % sprintf('%f', dl_sp(1:10,:))
|
DaveM@1
|
169 % dl_sp(1:10,:)
|
DaveM@1
|
170 % format short g
|
DaveM@1
|
171 % dl_sp(1:10,:)
|
DaveM@1
|
172 % plot(dl_sp(:))
|
DaveM@1
|
173 % plot(dl_sp(:,3))
|
DaveM@1
|
174 % incon_sp = inconsistent(dl_sp)
|