DaveM@1: DaveM@1: load('Adobe.mat') DaveM@1: load('Results1Percent.mat') DaveM@1: %% DaveM@1: datamap = featuredata(end).IdxVar; DaveM@1: reduceData = Data(:,datamap); DaveM@1: reduceLabels = Labels(datamap); DaveM@1: %% DaveM@1: reduceFeatures = FeatureNames(datamap); DaveM@1: DaveM@1: %% DaveM@1: load('Results1Percent.mat') DaveM@1: DaveM@1: %% DaveM@1: reduceFeatures = featuredata(1).FeatureNamesRanked; DaveM@1: DaveM@1: dataToUseSize = 500; DaveM@1: dataToUse = ceil(rand(dataToUseSize,1)*size(reduceData,1))'; DaveM@1: DaveM@1: dMap = pdist(reduceData(dataToUse,:)); DaveM@1: clusterMethod = 'ward'; DaveM@1: % 'average' Unweighted average distance (UPGMA) DaveM@1: % 'centroid' Centroid distance (UPGMC), appropriate for Euclidean distances only DaveM@1: % 'complete' Furthest distance DaveM@1: % 'median' Weighted center of mass distance (WPGMC), appropriate for Euclidean distances only DaveM@1: % 'single' Shortest distance DaveM@1: % 'ward' Inner squared distance (minimum variance algorithm), appropriate for Euclidean distances only DaveM@1: % 'weighted' Weighted average distance (WPGMA) DaveM@1: DaveM@1: dl = linkage(dMap, clusterMethod); DaveM@1: dendrogram(dl) DaveM@1: % figure; imagesc(squareform(dMap_sp)) DaveM@1: % title('euclidian self similarity'); DaveM@1: DaveM@1: %% DaveM@1: incon_sp = inconsistent(dl) DaveM@1: DaveM@1: DaveM@1: %% DaveM@1: % Use all data DaveM@1: DaveM@1: dMapAll = pdist(reduceData); DaveM@1: clusterMethod = 'ward'; DaveM@1: % 'average' Unweighted average distance (UPGMA) DaveM@1: % 'centroid' Centroid distance (UPGMC), DaveM@1: % appropriate for Euclidean distances only DaveM@1: % 'complete' Furthest distance DaveM@1: % 'median' Weighted center of mass distance (WPGMC), DaveM@1: % appropriate for Euclidean distances only DaveM@1: % 'single' Shortest distance DaveM@1: % 'ward' Inner squared distance (minimum variance algorithm), DaveM@1: % appropriate for Euclidean distances only DaveM@1: % 'weighted' Weighted average distance (WPGMA) DaveM@1: DaveM@1: dl_all = linkage(dMapAll, clusterMethod); DaveM@1: % [~,T] = dendrogram(dl_all,0) DaveM@1: DaveM@1: %% DaveM@1: % print filelist for each cluster DaveM@1: DaveM@1: numClusters = 25; DaveM@1: fnames = cell(1,numClusters); DaveM@1: [~,T] = dendrogram(dl_all,numClusters); DaveM@1: for i = 1:numClusters DaveM@1: numFiles = sum(T==i); DaveM@1: fnames{i} = Filenames(find(T==i)); DaveM@1: end DaveM@1: DaveM@1: %% DaveM@1: % makeCSV for Weka DaveM@1: % format DaveM@1: DaveM@1: feats = reduceData; DaveM@1: DaveM@1: % csvOut = mat2cell(feats,ones(size(feats,1),1), ones(size(feats,2),1)) DaveM@1: csvOut = num2cell(feats); DaveM@1: csvOut = [csvOut, num2cell(T)]; DaveM@1: % size(csvOut) DaveM@1: % size([FeatureNames(datamap)', {'Class'}]) DaveM@1: csvOut = [[FeatureNames(datamap)', {'Class'}]; csvOut]; DaveM@1: DaveM@1: %% DaveM@1: % fnames to CSV DaveM@1: DaveM@1: maxLen = size(fnames,2); DaveM@1: DaveM@1: for i = 1:maxLen DaveM@1: depth = size(fnames{i},1); DaveM@1: for ii = 1:depth DaveM@1: csvOut(i,ii) = fnames{i}(ii); DaveM@1: end DaveM@1: end DaveM@1: DaveM@1: printString = ''; DaveM@1: for i = 1:maxLen DaveM@1: printString = [printString ' %s, ']; DaveM@1: end DaveM@1: DaveM@1: fid = fopen('junk.csv','w'); DaveM@1: fprintf(fid,[printString '\n'],csvOut{1:end,:}); DaveM@1: % fprintf(fid,'%f, %f, %f\n',c{2:end,:}) DaveM@1: fclose(fid) ; DaveM@1: % dlmwrite('test.csv', csvOut, '-append') ; DaveM@1: DaveM@1: %% DaveM@1: T = cluster(dl_sp,'cutoff',1.3); DaveM@1: figure; plot(T); DaveM@1: DaveM@1: DaveM@1: DaveM@1: %% DaveM@1: DaveM@1: DaveM@1: T = cluster(dl_sp,'maxclust',2); DaveM@1: plot(T) DaveM@1: %% DaveM@1: T = cluster(dl_sp,'maxclust',3); DaveM@1: plot(T) DaveM@1: %% DaveM@1: T = cluster(dl_sp,'maxclust',4); DaveM@1: plot(T) DaveM@1: T = cluster(dl_sp,'maxclust',5); DaveM@1: plot(T) DaveM@1: T = cluster(dl_sp,'maxclust',6); DaveM@1: plot(T) DaveM@1: T = cluster(dl_sp,'maxclust',7); DaveM@1: plot(T) DaveM@1: T = cluster(dl_sp,'maxclust',8); DaveM@1: plot(T) DaveM@1: T = cluster(dl_sp,'maxclust',9); DaveM@1: plot(T) DaveM@1: %% DaveM@1: T = cluster(dl_sp,'maxclust',10); DaveM@1: plot(T) DaveM@1: %% DaveM@1: T = cluster(dl_sp,'maxclust',100); DaveM@1: plot(T) DaveM@1: %% DaveM@1: median(T) DaveM@1: DaveM@1: DaveM@1: T = cluster(dl_sp,'maxclust',1000); DaveM@1: median(T) DaveM@1: DaveM@1: DaveM@1: plot(T) DaveM@1: csvwrite('dataOutput',reduceData); DaveM@1: DaveM@1: DaveM@1: DaveM@1: DaveM@1: DaveM@1: DaveM@1: DaveM@1: DaveM@1: DaveM@1: DaveM@1: DaveM@1: DaveM@1: % dMap_euc = pdist(reduceData); DaveM@1: % dMap_cos = pdist(reduceData,'cos'); DaveM@1: % dMap_cos = pdist(reduceData,'cosine'); DaveM@1: % dl_euc = linkage(dMap_euc); DaveM@1: % dl_cos = linkage(dMap_cos); DaveM@1: % % dl_sp DaveM@1: % dl_sp(10,:) DaveM@1: % dl_sp(1:10,:) DaveM@1: % sprintf('%f', dl_sp(1:10,:)) DaveM@1: % dl_sp(1:10,:) DaveM@1: % format short g DaveM@1: % dl_sp(1:10,:) DaveM@1: % plot(dl_sp(:)) DaveM@1: % plot(dl_sp(:,3)) DaveM@1: % incon_sp = inconsistent(dl_sp)