Maria@1: # -*- coding: utf-8 -*- Maria@1: """ Maria@1: Created on Fri Feb 12 18:56:28 2016 Maria@1: Maria@1: @author: mariapanteli Maria@1: """ Maria@1: """Run classification and retrieval experiments""" Maria@1: Maria@1: import os Maria@1: import numpy Maria@1: import pandas Maria@6: import pickle Maria@1: import sklearn.metrics.pairwise as PW Maria@1: from sklearn.decomposition import PCA Maria@1: from sklearn.preprocessing import StandardScaler Maria@1: Maria@1: import classifiers as cc Maria@1: Maria@1: Maria@1: def post_process_frames(frames, pca_frames=True, n_pcas=20): Maria@1: """Standardize and PCA data.""" Maria@1: frames = StandardScaler().fit_transform(frames.T).T # standardise n_samples Maria@1: if pca_frames: Maria@1: frames = PCA(n_components=n_pcas).fit_transform(frames) Maria@1: return frames Maria@1: Maria@1: Maria@1: def classification_experiments(features, labels, feat_labels, group_labels, nfolds=5): Maria@1: """ classify rhythms/melodies and average accuracy by label grouping, Maria@1: eg, average accuracy per transformation or transformation value Maria@1: """ Maria@1: tlabels, inds = numpy.unique(group_labels, return_index=True) Maria@1: tlabels = tlabels[numpy.argsort(inds)] Maria@1: tlabelinds = [numpy.where(group_labels==tt)[0] for tt in tlabels] Maria@1: Maria@1: results_classification = [] Maria@1: classifiers = ["KNN", "LDA", "NB", "SVM"] Maria@1: for feat, feat_label in zip(features, feat_labels): Maria@1: for cl in classifiers: Maria@1: if cl == "KNN": Maria@1: accuracies = cc.classifyKNN(feat, labels, kfold=nfolds) Maria@1: elif cl == "LDA": Maria@1: accuracies = cc.classifyLDA(feat, labels, kfold=nfolds) Maria@1: elif cl == "NB": Maria@1: accuracies = cc.classifyNB(feat, labels, kfold=nfolds) Maria@1: elif cl == "SVM": Maria@1: accuracies = cc.classifySVM(feat, labels, kfold=nfolds) Maria@1: group_accuracy = [numpy.nanmean(accuracies[labelinds]) for labelinds in tlabelinds] Maria@1: group_accuracy.append(numpy.mean(accuracies)) Maria@1: group_accuracy.append(cl) Maria@1: group_accuracy.append(feat_label) Maria@1: results_classification.append(group_accuracy) Maria@1: return results_classification, tlabels Maria@1: Maria@1: Maria@1: def topK_experiments(features, labels, feat_labels, group_labels, K=99): Maria@1: """ query rhythms/melodies and assess recall rate at top K , Maria@1: average accuracy by label grouping, eg, by transformation or transformation value Maria@1: """ Maria@1: tlabels, inds = numpy.unique(group_labels, return_index=True) Maria@1: tlabels = tlabels[numpy.argsort(inds)] Maria@1: tlabelinds = [numpy.where(group_labels==tt)[0] for tt in tlabels] Maria@1: Maria@1: results_topK = [] Maria@1: dist_metrics = ["euclidean", "cosine", "correlation", "mahalanobis"] Maria@1: for feat, feat_label in zip(features, feat_labels): Maria@1: for metric in dist_metrics: Maria@1: D = PW.pairwise_distances(feat, metric=metric) Maria@1: accuracies = numpy.ones((len(labels), 1), dtype=float) * numpy.nan Maria@1: for label in numpy.unique(labels): Maria@1: truematchinds = numpy.where(labels == label)[0] Maria@6: # default timbre is the first filename of the family (eg. 1_2_1.wav for family 2) Maria@6: queryind = numpy.array([truematchinds[0]]) Maria@1: truematchinds = set(truematchinds) - set(queryind) # remove queryind Maria@1: sortindex = numpy.argsort(D[queryind, :]).flatten() Maria@1: sortindex = sortindex[1:] # remove queryind (top of list) Maria@1: topKinds = set(sortindex[:K]) Maria@1: correctinds = truematchinds & topKinds Maria@1: wronginds = truematchinds - correctinds Maria@1: accuracies[list(correctinds)] = 1 Maria@1: accuracies[list(wronginds)] = 0 Maria@1: group_accuracy = [numpy.nanmean(accuracies[labelinds]) for labelinds in tlabelinds] Maria@1: group_accuracy.append(numpy.mean(accuracies[numpy.where(numpy.isnan(accuracies) == False)[0]])) Maria@1: group_accuracy.append(metric) Maria@1: group_accuracy.append(feat_label) Maria@1: results_topK.append(group_accuracy) Maria@1: return results_topK, tlabels Maria@1: Maria@1: Maria@1: if __name__ == '__main__': Maria@1: # Load metadata Maria@1: meta = pandas.read_csv(os.path.join('data', 'Metadata.csv'), sep=',') Maria@1: labels = numpy.array(meta["family"].get_values(), dtype=str) Maria@1: Maria@1: # Load features and post process Maria@6: try: Maria@6: st = post_process_frames(pandas.read_csv(os.path.join('data','ST.csv'),header=None).get_values()) Maria@6: op = post_process_frames(pandas.read_csv(os.path.join('data','OP.csv'),header=None).get_values()) Maria@6: fp = post_process_frames(pandas.read_csv(os.path.join('data','FP.csv'),header=None).get_values()) Maria@6: pb = post_process_frames(pandas.read_csv(os.path.join('data','PB.csv'),header=None).get_values()) Maria@6: ig = post_process_frames(pandas.read_csv(os.path.join('data','IG.csv'),header=None).get_values()) Maria@6: fmt = post_process_frames(pandas.read_csv(os.path.join('data','FMT.csv'),header=None).get_values()) Maria@6: features = [st, op, fp, pb, ig, fmt] Maria@6: except Exception as e: Maria@6: with open(os.path.join('data', 'features.pickle'), 'rb') as f: Maria@6: features = pickle.load(f) Maria@1: Maria@1: feat_labels = ["ST", "OP", "FP", "PB", "IG", "FMT"] Maria@1: test_classes = ["transformation", "value", "style", "monopoly"] Maria@1: Maria@1: write_file = False # set it to True if you want to write output file Maria@1: for test_class in test_classes: Maria@1: group_labels = meta[test_class].get_values() Maria@1: results_class, tlabels = classification_experiments(features, labels, feat_labels, group_labels) Maria@1: results_topK, tlabels = topK_experiments(features, labels, feat_labels, group_labels) Maria@1: header = numpy.append(tlabels, ['mean accuracy', 'metric', 'feature']) Maria@1: results = numpy.concatenate((header[None, :], numpy.array(results_class), numpy.array(results_topK))) Maria@6: print results Maria@6: Maria@1: if write_file: Maria@1: filename = os.path.join('data','results_' + test_class + '.csv') Maria@1: numpy.savetxt(filename, results, fmt='%s', delimiter=',')