annotate misc/evalResults.m @ 37:d9a9a6b93026 tip

Add README
author DaveM
date Sat, 01 Apr 2017 17:03:14 +0100
parents 985cd163ba54
children
rev   line source
DaveM@2 1
DaveM@2 2 load('Adobe.mat')
DaveM@2 3 load('Results1Percent.mat')
DaveM@2 4
DaveM@2 5 datamap = featuredata(end).IdxVar;
DaveM@2 6 reduceData = Data(:,datamap);
DaveM@2 7 reduceLabels = Labels(datamap);
DaveM@2 8 %% UNUSED
DaveM@2 9 % reduceFeatures = FeatureNames(datamap);
DaveM@2 10
DaveM@2 11 %% UNUSED
DaveM@2 12 load('Results1Percent.mat')
DaveM@2 13
DaveM@2 14 %%
DaveM@2 15 % Use Subset of data
DaveM@2 16 reduceFeatures = featuredata(1).FeatureNamesRanked;
DaveM@2 17
DaveM@2 18 dataToUseSize = 500;
DaveM@2 19 dataToUse = ceil(rand(dataToUseSize,1)*size(reduceData,1))';
DaveM@2 20
DaveM@2 21 dMap = pdist(reduceData(dataToUse,:));
DaveM@2 22 clusterMethod = 'ward';
DaveM@2 23 % 'average' Unweighted average distance (UPGMA)
DaveM@2 24 % 'centroid' Centroid distance (UPGMC), appropriate for Euclidean distances only
DaveM@2 25 % 'complete' Furthest distance
DaveM@2 26 % 'median' Weighted center of mass distance (WPGMC), appropriate for Euclidean distances only
DaveM@2 27 % 'single' Shortest distance
DaveM@2 28 % 'ward' Inner squared distance (minimum variance algorithm), appropriate for Euclidean distances only
DaveM@2 29 % 'weighted' Weighted average distance (WPGMA)
DaveM@2 30
DaveM@2 31 dl = linkage(dMap, clusterMethod);
DaveM@2 32 dendrogram(dl)
DaveM@2 33 incon_sp = inconsistent(dl)
DaveM@2 34 % figure; imagesc(squareform(dMap_sp))
DaveM@2 35 % title('euclidian self similarity');
DaveM@2 36
DaveM@2 37 %%
DaveM@2 38 % Use all data
DaveM@2 39
DaveM@2 40 dMapAll = pdist(reduceData);
DaveM@2 41 clusterMethod = 'ward';
DaveM@2 42 % 'average' Unweighted average distance (UPGMA)
DaveM@2 43 % 'centroid' Centroid distance (UPGMC),
DaveM@2 44 % appropriate for Euclidean distances only
DaveM@2 45 % 'complete' Furthest distance
DaveM@2 46 % 'median' Weighted center of mass distance (WPGMC),
DaveM@2 47 % appropriate for Euclidean distances only
DaveM@2 48 % 'single' Shortest distance
DaveM@2 49 % 'ward' Inner squared distance (minimum variance algorithm),
DaveM@2 50 % appropriate for Euclidean distances only
DaveM@2 51 % 'weighted' Weighted average distance (WPGMA)
DaveM@2 52
DaveM@2 53 dl_all = linkage(dMapAll, clusterMethod);
DaveM@2 54 % [~,T] = dendrogram(dl_all,0)
DaveM@2 55
DaveM@2 56 %%
DaveM@2 57 % print filelist for each cluster
DaveM@2 58
DaveM@2 59 numClusters = 100;
DaveM@2 60 fnames = cell(1,numClusters);
DaveM@2 61 [~,T] = dendrogram(dl_all,numClusters);
DaveM@2 62 plotName = ['data/ClusterWith' num2str(numClusters) 'Elements'];
DaveM@2 63 saveas(gcf, plotName, 'fig');
DaveM@2 64 saveas(gcf, plotName, 'pdf');
DaveM@2 65 for i = 1:numClusters
DaveM@2 66 numFiles = sum(T==i);
DaveM@2 67 fnames{i} = Filenames(find(T==i));
DaveM@2 68 end
DaveM@2 69
DaveM@2 70 %
DaveM@2 71 % makeCSV for Weka
DaveM@2 72 % format
DaveM@2 73
DaveM@2 74 feats = reduceData;
DaveM@2 75
DaveM@2 76 csvOut = num2cell(feats);
DaveM@2 77 csvOut = [csvOut, num2cell(T)];
DaveM@2 78 csvOut = [[reduceFeatures(datamap)', {'Class'}]; csvOut];
DaveM@2 79 cell2csv(['data/wekaReducedFeaturesWithNew' num2str(numClusters) '.csv'],csvOut)
DaveM@2 80
DaveM@2 81
DaveM@2 82 %%
DaveM@2 83 % fnames to CSV
DaveM@2 84
DaveM@2 85 maxLen = size(fnames,2);
DaveM@2 86
DaveM@2 87 for i = 1:maxLen
DaveM@2 88 depth = size(fnames{i},1);
DaveM@2 89 for ii = 1:depth
DaveM@2 90 csvOut(i,ii) = fnames{i}(ii);
DaveM@2 91 end
DaveM@2 92 end
DaveM@2 93
DaveM@2 94 printString = '';
DaveM@2 95 for i = 1:maxLen
DaveM@2 96 printString = [printString ' %s, '];
DaveM@2 97 end
DaveM@2 98
DaveM@2 99 fid = fopen('junk.csv','w');
DaveM@2 100 fprintf(fid,[printString '\n'],csvOut{1:end,:});
DaveM@2 101 % fprintf(fid,'%f, %f, %f\n',c{2:end,:})
DaveM@2 102 fclose(fid) ;
DaveM@2 103 % dlmwrite('test.csv', csvOut, '-append') ;
DaveM@2 104
DaveM@2 105 %%
DaveM@2 106 T = cluster(dl_sp,'cutoff',1.3);
DaveM@2 107 figure; plot(T);
DaveM@2 108
DaveM@2 109
DaveM@2 110
DaveM@2 111 %%
DaveM@2 112
DaveM@2 113
DaveM@2 114 T = cluster(dl_sp,'maxclust',2);
DaveM@2 115 plot(T)
DaveM@2 116 %%
DaveM@2 117 T = cluster(dl_sp,'maxclust',3);
DaveM@2 118 plot(T)
DaveM@2 119 %%
DaveM@2 120 T = cluster(dl_sp,'maxclust',4);
DaveM@2 121 plot(T)
DaveM@2 122 T = cluster(dl_sp,'maxclust',5);
DaveM@2 123 plot(T)
DaveM@2 124 T = cluster(dl_sp,'maxclust',6);
DaveM@2 125 plot(T)
DaveM@2 126 T = cluster(dl_sp,'maxclust',7);
DaveM@2 127 plot(T)
DaveM@2 128 T = cluster(dl_sp,'maxclust',8);
DaveM@2 129 plot(T)
DaveM@2 130 T = cluster(dl_sp,'maxclust',9);
DaveM@2 131 plot(T)
DaveM@2 132 %%
DaveM@2 133 T = cluster(dl_sp,'maxclust',10);
DaveM@2 134 plot(T)
DaveM@2 135 %%
DaveM@2 136 T = cluster(dl_sp,'maxclust',100);
DaveM@2 137 plot(T)
DaveM@2 138 %%
DaveM@2 139 median(T)
DaveM@2 140
DaveM@2 141
DaveM@2 142 T = cluster(dl_sp,'maxclust',1000);
DaveM@2 143 median(T)
DaveM@2 144
DaveM@2 145
DaveM@2 146 plot(T)
DaveM@2 147 csvwrite('dataOutput',reduceData);
DaveM@2 148
DaveM@2 149
DaveM@2 150
DaveM@2 151
DaveM@2 152
DaveM@2 153
DaveM@2 154
DaveM@2 155
DaveM@2 156
DaveM@2 157
DaveM@2 158
DaveM@2 159
DaveM@2 160 % dMap_euc = pdist(reduceData);
DaveM@2 161 % dMap_cos = pdist(reduceData,'cos');
DaveM@2 162 % dMap_cos = pdist(reduceData,'cosine');
DaveM@2 163 % dl_euc = linkage(dMap_euc);
DaveM@2 164 % dl_cos = linkage(dMap_cos);
DaveM@2 165 % % dl_sp
DaveM@2 166 % dl_sp(10,:)
DaveM@2 167 % dl_sp(1:10,:)
DaveM@2 168 % sprintf('%f', dl_sp(1:10,:))
DaveM@2 169 % dl_sp(1:10,:)
DaveM@2 170 % format short g
DaveM@2 171 % dl_sp(1:10,:)
DaveM@2 172 % plot(dl_sp(:))
DaveM@2 173 % plot(dl_sp(:,3))
DaveM@2 174 % incon_sp = inconsistent(dl_sp)