annotate evaluate.py @ 7:b7169083b9ea tip

fix typo in variable name
author Maria Panteli
date Tue, 01 Jan 2019 15:51:38 +0200
parents 2732137aa9b5
children
rev   line source
Maria@1 1 # -*- coding: utf-8 -*-
Maria@1 2 """
Maria@1 3 Created on Fri Feb 12 18:56:28 2016
Maria@1 4
Maria@1 5 @author: mariapanteli
Maria@1 6 """
Maria@1 7 """Run classification and retrieval experiments"""
Maria@1 8
Maria@1 9 import os
Maria@1 10 import numpy
Maria@1 11 import pandas
Maria@6 12 import pickle
Maria@1 13 import sklearn.metrics.pairwise as PW
Maria@1 14 from sklearn.decomposition import PCA
Maria@1 15 from sklearn.preprocessing import StandardScaler
Maria@1 16
Maria@1 17 import classifiers as cc
Maria@1 18
Maria@1 19
Maria@1 20 def post_process_frames(frames, pca_frames=True, n_pcas=20):
Maria@1 21 """Standardize and PCA data."""
Maria@1 22 frames = StandardScaler().fit_transform(frames.T).T # standardise n_samples
Maria@1 23 if pca_frames:
Maria@1 24 frames = PCA(n_components=n_pcas).fit_transform(frames)
Maria@1 25 return frames
Maria@1 26
Maria@1 27
Maria@1 28 def classification_experiments(features, labels, feat_labels, group_labels, nfolds=5):
Maria@1 29 """ classify rhythms/melodies and average accuracy by label grouping,
Maria@1 30 eg, average accuracy per transformation or transformation value
Maria@1 31 """
Maria@1 32 tlabels, inds = numpy.unique(group_labels, return_index=True)
Maria@1 33 tlabels = tlabels[numpy.argsort(inds)]
Maria@1 34 tlabelinds = [numpy.where(group_labels==tt)[0] for tt in tlabels]
Maria@1 35
Maria@1 36 results_classification = []
Maria@1 37 classifiers = ["KNN", "LDA", "NB", "SVM"]
Maria@1 38 for feat, feat_label in zip(features, feat_labels):
Maria@1 39 for cl in classifiers:
Maria@1 40 if cl == "KNN":
Maria@1 41 accuracies = cc.classifyKNN(feat, labels, kfold=nfolds)
Maria@1 42 elif cl == "LDA":
Maria@1 43 accuracies = cc.classifyLDA(feat, labels, kfold=nfolds)
Maria@1 44 elif cl == "NB":
Maria@1 45 accuracies = cc.classifyNB(feat, labels, kfold=nfolds)
Maria@1 46 elif cl == "SVM":
Maria@1 47 accuracies = cc.classifySVM(feat, labels, kfold=nfolds)
Maria@1 48 group_accuracy = [numpy.nanmean(accuracies[labelinds]) for labelinds in tlabelinds]
Maria@1 49 group_accuracy.append(numpy.mean(accuracies))
Maria@1 50 group_accuracy.append(cl)
Maria@1 51 group_accuracy.append(feat_label)
Maria@1 52 results_classification.append(group_accuracy)
Maria@1 53 return results_classification, tlabels
Maria@1 54
Maria@1 55
Maria@1 56 def topK_experiments(features, labels, feat_labels, group_labels, K=99):
Maria@1 57 """ query rhythms/melodies and assess recall rate at top K ,
Maria@1 58 average accuracy by label grouping, eg, by transformation or transformation value
Maria@1 59 """
Maria@1 60 tlabels, inds = numpy.unique(group_labels, return_index=True)
Maria@1 61 tlabels = tlabels[numpy.argsort(inds)]
Maria@1 62 tlabelinds = [numpy.where(group_labels==tt)[0] for tt in tlabels]
Maria@1 63
Maria@1 64 results_topK = []
Maria@1 65 dist_metrics = ["euclidean", "cosine", "correlation", "mahalanobis"]
Maria@1 66 for feat, feat_label in zip(features, feat_labels):
Maria@1 67 for metric in dist_metrics:
Maria@1 68 D = PW.pairwise_distances(feat, metric=metric)
Maria@1 69 accuracies = numpy.ones((len(labels), 1), dtype=float) * numpy.nan
Maria@1 70 for label in numpy.unique(labels):
Maria@1 71 truematchinds = numpy.where(labels == label)[0]
Maria@6 72 # default timbre is the first filename of the family (eg. 1_2_1.wav for family 2)
Maria@6 73 queryind = numpy.array([truematchinds[0]])
Maria@1 74 truematchinds = set(truematchinds) - set(queryind) # remove queryind
Maria@1 75 sortindex = numpy.argsort(D[queryind, :]).flatten()
Maria@1 76 sortindex = sortindex[1:] # remove queryind (top of list)
Maria@1 77 topKinds = set(sortindex[:K])
Maria@1 78 correctinds = truematchinds & topKinds
Maria@1 79 wronginds = truematchinds - correctinds
Maria@1 80 accuracies[list(correctinds)] = 1
Maria@1 81 accuracies[list(wronginds)] = 0
Maria@1 82 group_accuracy = [numpy.nanmean(accuracies[labelinds]) for labelinds in tlabelinds]
Maria@1 83 group_accuracy.append(numpy.mean(accuracies[numpy.where(numpy.isnan(accuracies) == False)[0]]))
Maria@1 84 group_accuracy.append(metric)
Maria@1 85 group_accuracy.append(feat_label)
Maria@1 86 results_topK.append(group_accuracy)
Maria@1 87 return results_topK, tlabels
Maria@1 88
Maria@1 89
Maria@1 90 if __name__ == '__main__':
Maria@1 91 # Load metadata
Maria@1 92 meta = pandas.read_csv(os.path.join('data', 'Metadata.csv'), sep=',')
Maria@1 93 labels = numpy.array(meta["family"].get_values(), dtype=str)
Maria@1 94
Maria@1 95 # Load features and post process
Maria@6 96 try:
Maria@6 97 st = post_process_frames(pandas.read_csv(os.path.join('data','ST.csv'),header=None).get_values())
Maria@6 98 op = post_process_frames(pandas.read_csv(os.path.join('data','OP.csv'),header=None).get_values())
Maria@6 99 fp = post_process_frames(pandas.read_csv(os.path.join('data','FP.csv'),header=None).get_values())
Maria@6 100 pb = post_process_frames(pandas.read_csv(os.path.join('data','PB.csv'),header=None).get_values())
Maria@6 101 ig = post_process_frames(pandas.read_csv(os.path.join('data','IG.csv'),header=None).get_values())
Maria@6 102 fmt = post_process_frames(pandas.read_csv(os.path.join('data','FMT.csv'),header=None).get_values())
Maria@6 103 features = [st, op, fp, pb, ig, fmt]
Maria@6 104 except Exception as e:
Maria@6 105 with open(os.path.join('data', 'features.pickle'), 'rb') as f:
Maria@6 106 features = pickle.load(f)
Maria@1 107
Maria@1 108 feat_labels = ["ST", "OP", "FP", "PB", "IG", "FMT"]
Maria@1 109 test_classes = ["transformation", "value", "style", "monopoly"]
Maria@1 110
Maria@1 111 write_file = False # set it to True if you want to write output file
Maria@1 112 for test_class in test_classes:
Maria@1 113 group_labels = meta[test_class].get_values()
Maria@1 114 results_class, tlabels = classification_experiments(features, labels, feat_labels, group_labels)
Maria@1 115 results_topK, tlabels = topK_experiments(features, labels, feat_labels, group_labels)
Maria@1 116 header = numpy.append(tlabels, ['mean accuracy', 'metric', 'feature'])
Maria@1 117 results = numpy.concatenate((header[None, :], numpy.array(results_class), numpy.array(results_topK)))
Maria@6 118 print results
Maria@6 119
Maria@1 120 if write_file:
Maria@1 121 filename = os.path.join('data','results_' + test_class + '.csv')
Maria@1 122 numpy.savetxt(filename, results, fmt='%s', delimiter=',')