DaveM@2: DaveM@2: load('Adobe.mat') DaveM@2: load('Results1Percent.mat') DaveM@2: DaveM@2: datamap = featuredata(end).IdxVar; DaveM@2: reduceData = Data(:,datamap); DaveM@2: reduceLabels = Labels(datamap); DaveM@2: %% UNUSED DaveM@2: % reduceFeatures = FeatureNames(datamap); DaveM@2: DaveM@2: %% UNUSED DaveM@2: load('Results1Percent.mat') DaveM@2: DaveM@2: %% DaveM@2: % Use Subset of data DaveM@2: reduceFeatures = featuredata(1).FeatureNamesRanked; DaveM@2: DaveM@2: dataToUseSize = 500; DaveM@2: dataToUse = ceil(rand(dataToUseSize,1)*size(reduceData,1))'; DaveM@2: DaveM@2: dMap = pdist(reduceData(dataToUse,:)); DaveM@2: clusterMethod = 'ward'; DaveM@2: % 'average' Unweighted average distance (UPGMA) DaveM@2: % 'centroid' Centroid distance (UPGMC), appropriate for Euclidean distances only DaveM@2: % 'complete' Furthest distance DaveM@2: % 'median' Weighted center of mass distance (WPGMC), appropriate for Euclidean distances only DaveM@2: % 'single' Shortest distance DaveM@2: % 'ward' Inner squared distance (minimum variance algorithm), appropriate for Euclidean distances only DaveM@2: % 'weighted' Weighted average distance (WPGMA) DaveM@2: DaveM@2: dl = linkage(dMap, clusterMethod); DaveM@2: dendrogram(dl) DaveM@2: incon_sp = inconsistent(dl) DaveM@2: % figure; imagesc(squareform(dMap_sp)) DaveM@2: % title('euclidian self similarity'); DaveM@2: DaveM@2: %% DaveM@2: % Use all data DaveM@2: DaveM@2: dMapAll = pdist(reduceData); DaveM@2: clusterMethod = 'ward'; DaveM@2: % 'average' Unweighted average distance (UPGMA) DaveM@2: % 'centroid' Centroid distance (UPGMC), DaveM@2: % appropriate for Euclidean distances only DaveM@2: % 'complete' Furthest distance DaveM@2: % 'median' Weighted center of mass distance (WPGMC), DaveM@2: % appropriate for Euclidean distances only DaveM@2: % 'single' Shortest distance DaveM@2: % 'ward' Inner squared distance (minimum variance algorithm), DaveM@2: % appropriate for Euclidean distances only DaveM@2: % 'weighted' Weighted average distance (WPGMA) DaveM@2: DaveM@2: dl_all = linkage(dMapAll, clusterMethod); DaveM@2: % [~,T] = dendrogram(dl_all,0) DaveM@2: DaveM@2: %% DaveM@2: % print filelist for each cluster DaveM@2: DaveM@2: numClusters = 100; DaveM@2: fnames = cell(1,numClusters); DaveM@2: [~,T] = dendrogram(dl_all,numClusters); DaveM@2: plotName = ['data/ClusterWith' num2str(numClusters) 'Elements']; DaveM@2: saveas(gcf, plotName, 'fig'); DaveM@2: saveas(gcf, plotName, 'pdf'); DaveM@2: for i = 1:numClusters DaveM@2: numFiles = sum(T==i); DaveM@2: fnames{i} = Filenames(find(T==i)); DaveM@2: end DaveM@2: DaveM@2: % DaveM@2: % makeCSV for Weka DaveM@2: % format DaveM@2: DaveM@2: feats = reduceData; DaveM@2: DaveM@2: csvOut = num2cell(feats); DaveM@2: csvOut = [csvOut, num2cell(T)]; DaveM@2: csvOut = [[reduceFeatures(datamap)', {'Class'}]; csvOut]; DaveM@2: cell2csv(['data/wekaReducedFeaturesWithNew' num2str(numClusters) '.csv'],csvOut) DaveM@2: DaveM@2: DaveM@2: %% DaveM@2: % fnames to CSV DaveM@2: DaveM@2: maxLen = size(fnames,2); DaveM@2: DaveM@2: for i = 1:maxLen DaveM@2: depth = size(fnames{i},1); DaveM@2: for ii = 1:depth DaveM@2: csvOut(i,ii) = fnames{i}(ii); DaveM@2: end DaveM@2: end DaveM@2: DaveM@2: printString = ''; DaveM@2: for i = 1:maxLen DaveM@2: printString = [printString ' %s, ']; DaveM@2: end DaveM@2: DaveM@2: fid = fopen('junk.csv','w'); DaveM@2: fprintf(fid,[printString '\n'],csvOut{1:end,:}); DaveM@2: % fprintf(fid,'%f, %f, %f\n',c{2:end,:}) DaveM@2: fclose(fid) ; DaveM@2: % dlmwrite('test.csv', csvOut, '-append') ; DaveM@2: DaveM@2: %% DaveM@2: T = cluster(dl_sp,'cutoff',1.3); DaveM@2: figure; plot(T); DaveM@2: DaveM@2: DaveM@2: DaveM@2: %% DaveM@2: DaveM@2: DaveM@2: T = cluster(dl_sp,'maxclust',2); DaveM@2: plot(T) DaveM@2: %% DaveM@2: T = cluster(dl_sp,'maxclust',3); DaveM@2: plot(T) DaveM@2: %% DaveM@2: T = cluster(dl_sp,'maxclust',4); DaveM@2: plot(T) DaveM@2: T = cluster(dl_sp,'maxclust',5); DaveM@2: plot(T) DaveM@2: T = cluster(dl_sp,'maxclust',6); DaveM@2: plot(T) DaveM@2: T = cluster(dl_sp,'maxclust',7); DaveM@2: plot(T) DaveM@2: T = cluster(dl_sp,'maxclust',8); DaveM@2: plot(T) DaveM@2: T = cluster(dl_sp,'maxclust',9); DaveM@2: plot(T) DaveM@2: %% DaveM@2: T = cluster(dl_sp,'maxclust',10); DaveM@2: plot(T) DaveM@2: %% DaveM@2: T = cluster(dl_sp,'maxclust',100); DaveM@2: plot(T) DaveM@2: %% DaveM@2: median(T) DaveM@2: DaveM@2: DaveM@2: T = cluster(dl_sp,'maxclust',1000); DaveM@2: median(T) DaveM@2: DaveM@2: DaveM@2: plot(T) DaveM@2: csvwrite('dataOutput',reduceData); DaveM@2: DaveM@2: DaveM@2: DaveM@2: DaveM@2: DaveM@2: DaveM@2: DaveM@2: DaveM@2: DaveM@2: DaveM@2: DaveM@2: DaveM@2: % dMap_euc = pdist(reduceData); DaveM@2: % dMap_cos = pdist(reduceData,'cos'); DaveM@2: % dMap_cos = pdist(reduceData,'cosine'); DaveM@2: % dl_euc = linkage(dMap_euc); DaveM@2: % dl_cos = linkage(dMap_cos); DaveM@2: % % dl_sp DaveM@2: % dl_sp(10,:) DaveM@2: % dl_sp(1:10,:) DaveM@2: % sprintf('%f', dl_sp(1:10,:)) DaveM@2: % dl_sp(1:10,:) DaveM@2: % format short g DaveM@2: % dl_sp(1:10,:) DaveM@2: % plot(dl_sp(:)) DaveM@2: % plot(dl_sp(:,3)) DaveM@2: % incon_sp = inconsistent(dl_sp)