DaveM@2
|
1
|
DaveM@2
|
2 load('Adobe.mat')
|
DaveM@2
|
3 load('Results1Percent.mat')
|
DaveM@2
|
4
|
DaveM@2
|
5 datamap = featuredata(end).IdxVar;
|
DaveM@2
|
6 reduceData = Data(:,datamap);
|
DaveM@2
|
7 reduceLabels = Labels(datamap);
|
DaveM@2
|
8 %% UNUSED
|
DaveM@2
|
9 % reduceFeatures = FeatureNames(datamap);
|
DaveM@2
|
10
|
DaveM@2
|
11 %% UNUSED
|
DaveM@2
|
12 load('Results1Percent.mat')
|
DaveM@2
|
13
|
DaveM@2
|
14 %%
|
DaveM@2
|
15 % Use Subset of data
|
DaveM@2
|
16 reduceFeatures = featuredata(1).FeatureNamesRanked;
|
DaveM@2
|
17
|
DaveM@2
|
18 dataToUseSize = 500;
|
DaveM@2
|
19 dataToUse = ceil(rand(dataToUseSize,1)*size(reduceData,1))';
|
DaveM@2
|
20
|
DaveM@2
|
21 dMap = pdist(reduceData(dataToUse,:));
|
DaveM@2
|
22 clusterMethod = 'ward';
|
DaveM@2
|
23 % 'average' Unweighted average distance (UPGMA)
|
DaveM@2
|
24 % 'centroid' Centroid distance (UPGMC), appropriate for Euclidean distances only
|
DaveM@2
|
25 % 'complete' Furthest distance
|
DaveM@2
|
26 % 'median' Weighted center of mass distance (WPGMC), appropriate for Euclidean distances only
|
DaveM@2
|
27 % 'single' Shortest distance
|
DaveM@2
|
28 % 'ward' Inner squared distance (minimum variance algorithm), appropriate for Euclidean distances only
|
DaveM@2
|
29 % 'weighted' Weighted average distance (WPGMA)
|
DaveM@2
|
30
|
DaveM@2
|
31 dl = linkage(dMap, clusterMethod);
|
DaveM@2
|
32 dendrogram(dl)
|
DaveM@2
|
33 incon_sp = inconsistent(dl)
|
DaveM@2
|
34 % figure; imagesc(squareform(dMap_sp))
|
DaveM@2
|
35 % title('euclidian self similarity');
|
DaveM@2
|
36
|
DaveM@2
|
37 %%
|
DaveM@2
|
38 % Use all data
|
DaveM@2
|
39
|
DaveM@2
|
40 dMapAll = pdist(reduceData);
|
DaveM@2
|
41 clusterMethod = 'ward';
|
DaveM@2
|
42 % 'average' Unweighted average distance (UPGMA)
|
DaveM@2
|
43 % 'centroid' Centroid distance (UPGMC),
|
DaveM@2
|
44 % appropriate for Euclidean distances only
|
DaveM@2
|
45 % 'complete' Furthest distance
|
DaveM@2
|
46 % 'median' Weighted center of mass distance (WPGMC),
|
DaveM@2
|
47 % appropriate for Euclidean distances only
|
DaveM@2
|
48 % 'single' Shortest distance
|
DaveM@2
|
49 % 'ward' Inner squared distance (minimum variance algorithm),
|
DaveM@2
|
50 % appropriate for Euclidean distances only
|
DaveM@2
|
51 % 'weighted' Weighted average distance (WPGMA)
|
DaveM@2
|
52
|
DaveM@2
|
53 dl_all = linkage(dMapAll, clusterMethod);
|
DaveM@2
|
54 % [~,T] = dendrogram(dl_all,0)
|
DaveM@2
|
55
|
DaveM@2
|
56 %%
|
DaveM@2
|
57 % print filelist for each cluster
|
DaveM@2
|
58
|
DaveM@2
|
59 numClusters = 100;
|
DaveM@2
|
60 fnames = cell(1,numClusters);
|
DaveM@2
|
61 [~,T] = dendrogram(dl_all,numClusters);
|
DaveM@2
|
62 plotName = ['data/ClusterWith' num2str(numClusters) 'Elements'];
|
DaveM@2
|
63 saveas(gcf, plotName, 'fig');
|
DaveM@2
|
64 saveas(gcf, plotName, 'pdf');
|
DaveM@2
|
65 for i = 1:numClusters
|
DaveM@2
|
66 numFiles = sum(T==i);
|
DaveM@2
|
67 fnames{i} = Filenames(find(T==i));
|
DaveM@2
|
68 end
|
DaveM@2
|
69
|
DaveM@2
|
70 %
|
DaveM@2
|
71 % makeCSV for Weka
|
DaveM@2
|
72 % format
|
DaveM@2
|
73
|
DaveM@2
|
74 feats = reduceData;
|
DaveM@2
|
75
|
DaveM@2
|
76 csvOut = num2cell(feats);
|
DaveM@2
|
77 csvOut = [csvOut, num2cell(T)];
|
DaveM@2
|
78 csvOut = [[reduceFeatures(datamap)', {'Class'}]; csvOut];
|
DaveM@2
|
79 cell2csv(['data/wekaReducedFeaturesWithNew' num2str(numClusters) '.csv'],csvOut)
|
DaveM@2
|
80
|
DaveM@2
|
81
|
DaveM@2
|
82 %%
|
DaveM@2
|
83 % fnames to CSV
|
DaveM@2
|
84
|
DaveM@2
|
85 maxLen = size(fnames,2);
|
DaveM@2
|
86
|
DaveM@2
|
87 for i = 1:maxLen
|
DaveM@2
|
88 depth = size(fnames{i},1);
|
DaveM@2
|
89 for ii = 1:depth
|
DaveM@2
|
90 csvOut(i,ii) = fnames{i}(ii);
|
DaveM@2
|
91 end
|
DaveM@2
|
92 end
|
DaveM@2
|
93
|
DaveM@2
|
94 printString = '';
|
DaveM@2
|
95 for i = 1:maxLen
|
DaveM@2
|
96 printString = [printString ' %s, '];
|
DaveM@2
|
97 end
|
DaveM@2
|
98
|
DaveM@2
|
99 fid = fopen('junk.csv','w');
|
DaveM@2
|
100 fprintf(fid,[printString '\n'],csvOut{1:end,:});
|
DaveM@2
|
101 % fprintf(fid,'%f, %f, %f\n',c{2:end,:})
|
DaveM@2
|
102 fclose(fid) ;
|
DaveM@2
|
103 % dlmwrite('test.csv', csvOut, '-append') ;
|
DaveM@2
|
104
|
DaveM@2
|
105 %%
|
DaveM@2
|
106 T = cluster(dl_sp,'cutoff',1.3);
|
DaveM@2
|
107 figure; plot(T);
|
DaveM@2
|
108
|
DaveM@2
|
109
|
DaveM@2
|
110
|
DaveM@2
|
111 %%
|
DaveM@2
|
112
|
DaveM@2
|
113
|
DaveM@2
|
114 T = cluster(dl_sp,'maxclust',2);
|
DaveM@2
|
115 plot(T)
|
DaveM@2
|
116 %%
|
DaveM@2
|
117 T = cluster(dl_sp,'maxclust',3);
|
DaveM@2
|
118 plot(T)
|
DaveM@2
|
119 %%
|
DaveM@2
|
120 T = cluster(dl_sp,'maxclust',4);
|
DaveM@2
|
121 plot(T)
|
DaveM@2
|
122 T = cluster(dl_sp,'maxclust',5);
|
DaveM@2
|
123 plot(T)
|
DaveM@2
|
124 T = cluster(dl_sp,'maxclust',6);
|
DaveM@2
|
125 plot(T)
|
DaveM@2
|
126 T = cluster(dl_sp,'maxclust',7);
|
DaveM@2
|
127 plot(T)
|
DaveM@2
|
128 T = cluster(dl_sp,'maxclust',8);
|
DaveM@2
|
129 plot(T)
|
DaveM@2
|
130 T = cluster(dl_sp,'maxclust',9);
|
DaveM@2
|
131 plot(T)
|
DaveM@2
|
132 %%
|
DaveM@2
|
133 T = cluster(dl_sp,'maxclust',10);
|
DaveM@2
|
134 plot(T)
|
DaveM@2
|
135 %%
|
DaveM@2
|
136 T = cluster(dl_sp,'maxclust',100);
|
DaveM@2
|
137 plot(T)
|
DaveM@2
|
138 %%
|
DaveM@2
|
139 median(T)
|
DaveM@2
|
140
|
DaveM@2
|
141
|
DaveM@2
|
142 T = cluster(dl_sp,'maxclust',1000);
|
DaveM@2
|
143 median(T)
|
DaveM@2
|
144
|
DaveM@2
|
145
|
DaveM@2
|
146 plot(T)
|
DaveM@2
|
147 csvwrite('dataOutput',reduceData);
|
DaveM@2
|
148
|
DaveM@2
|
149
|
DaveM@2
|
150
|
DaveM@2
|
151
|
DaveM@2
|
152
|
DaveM@2
|
153
|
DaveM@2
|
154
|
DaveM@2
|
155
|
DaveM@2
|
156
|
DaveM@2
|
157
|
DaveM@2
|
158
|
DaveM@2
|
159
|
DaveM@2
|
160 % dMap_euc = pdist(reduceData);
|
DaveM@2
|
161 % dMap_cos = pdist(reduceData,'cos');
|
DaveM@2
|
162 % dMap_cos = pdist(reduceData,'cosine');
|
DaveM@2
|
163 % dl_euc = linkage(dMap_euc);
|
DaveM@2
|
164 % dl_cos = linkage(dMap_cos);
|
DaveM@2
|
165 % % dl_sp
|
DaveM@2
|
166 % dl_sp(10,:)
|
DaveM@2
|
167 % dl_sp(1:10,:)
|
DaveM@2
|
168 % sprintf('%f', dl_sp(1:10,:))
|
DaveM@2
|
169 % dl_sp(1:10,:)
|
DaveM@2
|
170 % format short g
|
DaveM@2
|
171 % dl_sp(1:10,:)
|
DaveM@2
|
172 % plot(dl_sp(:))
|
DaveM@2
|
173 % plot(dl_sp(:,3))
|
DaveM@2
|
174 % incon_sp = inconsistent(dl_sp)
|