annotate other/evalResults.m @ 37:d9a9a6b93026 tip

Add README
author DaveM
date Sat, 01 Apr 2017 17:03:14 +0100
parents 995546d09284
children
rev   line source
DaveM@1 1
DaveM@1 2 load('Adobe.mat')
DaveM@1 3 load('Results1Percent.mat')
DaveM@1 4 %%
DaveM@1 5 datamap = featuredata(end).IdxVar;
DaveM@1 6 reduceData = Data(:,datamap);
DaveM@1 7 reduceLabels = Labels(datamap);
DaveM@1 8 %%
DaveM@1 9 reduceFeatures = FeatureNames(datamap);
DaveM@1 10
DaveM@1 11 %%
DaveM@1 12 load('Results1Percent.mat')
DaveM@1 13
DaveM@1 14 %%
DaveM@1 15 reduceFeatures = featuredata(1).FeatureNamesRanked;
DaveM@1 16
DaveM@1 17 dataToUseSize = 500;
DaveM@1 18 dataToUse = ceil(rand(dataToUseSize,1)*size(reduceData,1))';
DaveM@1 19
DaveM@1 20 dMap = pdist(reduceData(dataToUse,:));
DaveM@1 21 clusterMethod = 'ward';
DaveM@1 22 % 'average' Unweighted average distance (UPGMA)
DaveM@1 23 % 'centroid' Centroid distance (UPGMC), appropriate for Euclidean distances only
DaveM@1 24 % 'complete' Furthest distance
DaveM@1 25 % 'median' Weighted center of mass distance (WPGMC), appropriate for Euclidean distances only
DaveM@1 26 % 'single' Shortest distance
DaveM@1 27 % 'ward' Inner squared distance (minimum variance algorithm), appropriate for Euclidean distances only
DaveM@1 28 % 'weighted' Weighted average distance (WPGMA)
DaveM@1 29
DaveM@1 30 dl = linkage(dMap, clusterMethod);
DaveM@1 31 dendrogram(dl)
DaveM@1 32 % figure; imagesc(squareform(dMap_sp))
DaveM@1 33 % title('euclidian self similarity');
DaveM@1 34
DaveM@1 35 %%
DaveM@1 36 incon_sp = inconsistent(dl)
DaveM@1 37
DaveM@1 38
DaveM@1 39 %%
DaveM@1 40 % Use all data
DaveM@1 41
DaveM@1 42 dMapAll = pdist(reduceData);
DaveM@1 43 clusterMethod = 'ward';
DaveM@1 44 % 'average' Unweighted average distance (UPGMA)
DaveM@1 45 % 'centroid' Centroid distance (UPGMC),
DaveM@1 46 % appropriate for Euclidean distances only
DaveM@1 47 % 'complete' Furthest distance
DaveM@1 48 % 'median' Weighted center of mass distance (WPGMC),
DaveM@1 49 % appropriate for Euclidean distances only
DaveM@1 50 % 'single' Shortest distance
DaveM@1 51 % 'ward' Inner squared distance (minimum variance algorithm),
DaveM@1 52 % appropriate for Euclidean distances only
DaveM@1 53 % 'weighted' Weighted average distance (WPGMA)
DaveM@1 54
DaveM@1 55 dl_all = linkage(dMapAll, clusterMethod);
DaveM@1 56 % [~,T] = dendrogram(dl_all,0)
DaveM@1 57
DaveM@1 58 %%
DaveM@1 59 % print filelist for each cluster
DaveM@1 60
DaveM@1 61 numClusters = 25;
DaveM@1 62 fnames = cell(1,numClusters);
DaveM@1 63 [~,T] = dendrogram(dl_all,numClusters);
DaveM@1 64 for i = 1:numClusters
DaveM@1 65 numFiles = sum(T==i);
DaveM@1 66 fnames{i} = Filenames(find(T==i));
DaveM@1 67 end
DaveM@1 68
DaveM@1 69 %%
DaveM@1 70 % makeCSV for Weka
DaveM@1 71 % format
DaveM@1 72
DaveM@1 73 feats = reduceData;
DaveM@1 74
DaveM@1 75 % csvOut = mat2cell(feats,ones(size(feats,1),1), ones(size(feats,2),1))
DaveM@1 76 csvOut = num2cell(feats);
DaveM@1 77 csvOut = [csvOut, num2cell(T)];
DaveM@1 78 % size(csvOut)
DaveM@1 79 % size([FeatureNames(datamap)', {'Class'}])
DaveM@1 80 csvOut = [[FeatureNames(datamap)', {'Class'}]; csvOut];
DaveM@1 81
DaveM@1 82 %%
DaveM@1 83 % fnames to CSV
DaveM@1 84
DaveM@1 85 maxLen = size(fnames,2);
DaveM@1 86
DaveM@1 87 for i = 1:maxLen
DaveM@1 88 depth = size(fnames{i},1);
DaveM@1 89 for ii = 1:depth
DaveM@1 90 csvOut(i,ii) = fnames{i}(ii);
DaveM@1 91 end
DaveM@1 92 end
DaveM@1 93
DaveM@1 94 printString = '';
DaveM@1 95 for i = 1:maxLen
DaveM@1 96 printString = [printString ' %s, '];
DaveM@1 97 end
DaveM@1 98
DaveM@1 99 fid = fopen('junk.csv','w');
DaveM@1 100 fprintf(fid,[printString '\n'],csvOut{1:end,:});
DaveM@1 101 % fprintf(fid,'%f, %f, %f\n',c{2:end,:})
DaveM@1 102 fclose(fid) ;
DaveM@1 103 % dlmwrite('test.csv', csvOut, '-append') ;
DaveM@1 104
DaveM@1 105 %%
DaveM@1 106 T = cluster(dl_sp,'cutoff',1.3);
DaveM@1 107 figure; plot(T);
DaveM@1 108
DaveM@1 109
DaveM@1 110
DaveM@1 111 %%
DaveM@1 112
DaveM@1 113
DaveM@1 114 T = cluster(dl_sp,'maxclust',2);
DaveM@1 115 plot(T)
DaveM@1 116 %%
DaveM@1 117 T = cluster(dl_sp,'maxclust',3);
DaveM@1 118 plot(T)
DaveM@1 119 %%
DaveM@1 120 T = cluster(dl_sp,'maxclust',4);
DaveM@1 121 plot(T)
DaveM@1 122 T = cluster(dl_sp,'maxclust',5);
DaveM@1 123 plot(T)
DaveM@1 124 T = cluster(dl_sp,'maxclust',6);
DaveM@1 125 plot(T)
DaveM@1 126 T = cluster(dl_sp,'maxclust',7);
DaveM@1 127 plot(T)
DaveM@1 128 T = cluster(dl_sp,'maxclust',8);
DaveM@1 129 plot(T)
DaveM@1 130 T = cluster(dl_sp,'maxclust',9);
DaveM@1 131 plot(T)
DaveM@1 132 %%
DaveM@1 133 T = cluster(dl_sp,'maxclust',10);
DaveM@1 134 plot(T)
DaveM@1 135 %%
DaveM@1 136 T = cluster(dl_sp,'maxclust',100);
DaveM@1 137 plot(T)
DaveM@1 138 %%
DaveM@1 139 median(T)
DaveM@1 140
DaveM@1 141
DaveM@1 142 T = cluster(dl_sp,'maxclust',1000);
DaveM@1 143 median(T)
DaveM@1 144
DaveM@1 145
DaveM@1 146 plot(T)
DaveM@1 147 csvwrite('dataOutput',reduceData);
DaveM@1 148
DaveM@1 149
DaveM@1 150
DaveM@1 151
DaveM@1 152
DaveM@1 153
DaveM@1 154
DaveM@1 155
DaveM@1 156
DaveM@1 157
DaveM@1 158
DaveM@1 159
DaveM@1 160 % dMap_euc = pdist(reduceData);
DaveM@1 161 % dMap_cos = pdist(reduceData,'cos');
DaveM@1 162 % dMap_cos = pdist(reduceData,'cosine');
DaveM@1 163 % dl_euc = linkage(dMap_euc);
DaveM@1 164 % dl_cos = linkage(dMap_cos);
DaveM@1 165 % % dl_sp
DaveM@1 166 % dl_sp(10,:)
DaveM@1 167 % dl_sp(1:10,:)
DaveM@1 168 % sprintf('%f', dl_sp(1:10,:))
DaveM@1 169 % dl_sp(1:10,:)
DaveM@1 170 % format short g
DaveM@1 171 % dl_sp(1:10,:)
DaveM@1 172 % plot(dl_sp(:))
DaveM@1 173 % plot(dl_sp(:,3))
DaveM@1 174 % incon_sp = inconsistent(dl_sp)