Maria@1
|
1 # -*- coding: utf-8 -*-
|
Maria@1
|
2 """
|
Maria@1
|
3 Created on Fri Feb 12 18:56:28 2016
|
Maria@1
|
4
|
Maria@1
|
5 @author: mariapanteli
|
Maria@1
|
6 """
|
Maria@1
|
7 """Run classification and retrieval experiments"""
|
Maria@1
|
8
|
Maria@1
|
9 import os
|
Maria@1
|
10 import numpy
|
Maria@1
|
11 import pandas
|
Maria@6
|
12 import pickle
|
Maria@1
|
13 import sklearn.metrics.pairwise as PW
|
Maria@1
|
14 from sklearn.decomposition import PCA
|
Maria@1
|
15 from sklearn.preprocessing import StandardScaler
|
Maria@1
|
16
|
Maria@1
|
17 import classifiers as cc
|
Maria@1
|
18
|
Maria@1
|
19
|
Maria@1
|
20 def post_process_frames(frames, pca_frames=True, n_pcas=20):
|
Maria@1
|
21 """Standardize and PCA data."""
|
Maria@1
|
22 frames = StandardScaler().fit_transform(frames.T).T # standardise n_samples
|
Maria@1
|
23 if pca_frames:
|
Maria@1
|
24 frames = PCA(n_components=n_pcas).fit_transform(frames)
|
Maria@1
|
25 return frames
|
Maria@1
|
26
|
Maria@1
|
27
|
Maria@1
|
28 def classification_experiments(features, labels, feat_labels, group_labels, nfolds=5):
|
Maria@1
|
29 """ classify rhythms/melodies and average accuracy by label grouping,
|
Maria@1
|
30 eg, average accuracy per transformation or transformation value
|
Maria@1
|
31 """
|
Maria@1
|
32 tlabels, inds = numpy.unique(group_labels, return_index=True)
|
Maria@1
|
33 tlabels = tlabels[numpy.argsort(inds)]
|
Maria@1
|
34 tlabelinds = [numpy.where(group_labels==tt)[0] for tt in tlabels]
|
Maria@1
|
35
|
Maria@1
|
36 results_classification = []
|
Maria@1
|
37 classifiers = ["KNN", "LDA", "NB", "SVM"]
|
Maria@1
|
38 for feat, feat_label in zip(features, feat_labels):
|
Maria@1
|
39 for cl in classifiers:
|
Maria@1
|
40 if cl == "KNN":
|
Maria@1
|
41 accuracies = cc.classifyKNN(feat, labels, kfold=nfolds)
|
Maria@1
|
42 elif cl == "LDA":
|
Maria@1
|
43 accuracies = cc.classifyLDA(feat, labels, kfold=nfolds)
|
Maria@1
|
44 elif cl == "NB":
|
Maria@1
|
45 accuracies = cc.classifyNB(feat, labels, kfold=nfolds)
|
Maria@1
|
46 elif cl == "SVM":
|
Maria@1
|
47 accuracies = cc.classifySVM(feat, labels, kfold=nfolds)
|
Maria@1
|
48 group_accuracy = [numpy.nanmean(accuracies[labelinds]) for labelinds in tlabelinds]
|
Maria@1
|
49 group_accuracy.append(numpy.mean(accuracies))
|
Maria@1
|
50 group_accuracy.append(cl)
|
Maria@1
|
51 group_accuracy.append(feat_label)
|
Maria@1
|
52 results_classification.append(group_accuracy)
|
Maria@1
|
53 return results_classification, tlabels
|
Maria@1
|
54
|
Maria@1
|
55
|
Maria@1
|
56 def topK_experiments(features, labels, feat_labels, group_labels, K=99):
|
Maria@1
|
57 """ query rhythms/melodies and assess recall rate at top K ,
|
Maria@1
|
58 average accuracy by label grouping, eg, by transformation or transformation value
|
Maria@1
|
59 """
|
Maria@1
|
60 tlabels, inds = numpy.unique(group_labels, return_index=True)
|
Maria@1
|
61 tlabels = tlabels[numpy.argsort(inds)]
|
Maria@1
|
62 tlabelinds = [numpy.where(group_labels==tt)[0] for tt in tlabels]
|
Maria@1
|
63
|
Maria@1
|
64 results_topK = []
|
Maria@1
|
65 dist_metrics = ["euclidean", "cosine", "correlation", "mahalanobis"]
|
Maria@1
|
66 for feat, feat_label in zip(features, feat_labels):
|
Maria@1
|
67 for metric in dist_metrics:
|
Maria@1
|
68 D = PW.pairwise_distances(feat, metric=metric)
|
Maria@1
|
69 accuracies = numpy.ones((len(labels), 1), dtype=float) * numpy.nan
|
Maria@1
|
70 for label in numpy.unique(labels):
|
Maria@1
|
71 truematchinds = numpy.where(labels == label)[0]
|
Maria@6
|
72 # default timbre is the first filename of the family (eg. 1_2_1.wav for family 2)
|
Maria@6
|
73 queryind = numpy.array([truematchinds[0]])
|
Maria@1
|
74 truematchinds = set(truematchinds) - set(queryind) # remove queryind
|
Maria@1
|
75 sortindex = numpy.argsort(D[queryind, :]).flatten()
|
Maria@1
|
76 sortindex = sortindex[1:] # remove queryind (top of list)
|
Maria@1
|
77 topKinds = set(sortindex[:K])
|
Maria@1
|
78 correctinds = truematchinds & topKinds
|
Maria@1
|
79 wronginds = truematchinds - correctinds
|
Maria@1
|
80 accuracies[list(correctinds)] = 1
|
Maria@1
|
81 accuracies[list(wronginds)] = 0
|
Maria@1
|
82 group_accuracy = [numpy.nanmean(accuracies[labelinds]) for labelinds in tlabelinds]
|
Maria@1
|
83 group_accuracy.append(numpy.mean(accuracies[numpy.where(numpy.isnan(accuracies) == False)[0]]))
|
Maria@1
|
84 group_accuracy.append(metric)
|
Maria@1
|
85 group_accuracy.append(feat_label)
|
Maria@1
|
86 results_topK.append(group_accuracy)
|
Maria@1
|
87 return results_topK, tlabels
|
Maria@1
|
88
|
Maria@1
|
89
|
Maria@1
|
90 if __name__ == '__main__':
|
Maria@1
|
91 # Load metadata
|
Maria@1
|
92 meta = pandas.read_csv(os.path.join('data', 'Metadata.csv'), sep=',')
|
Maria@1
|
93 labels = numpy.array(meta["family"].get_values(), dtype=str)
|
Maria@1
|
94
|
Maria@1
|
95 # Load features and post process
|
Maria@6
|
96 try:
|
Maria@6
|
97 st = post_process_frames(pandas.read_csv(os.path.join('data','ST.csv'),header=None).get_values())
|
Maria@6
|
98 op = post_process_frames(pandas.read_csv(os.path.join('data','OP.csv'),header=None).get_values())
|
Maria@6
|
99 fp = post_process_frames(pandas.read_csv(os.path.join('data','FP.csv'),header=None).get_values())
|
Maria@6
|
100 pb = post_process_frames(pandas.read_csv(os.path.join('data','PB.csv'),header=None).get_values())
|
Maria@6
|
101 ig = post_process_frames(pandas.read_csv(os.path.join('data','IG.csv'),header=None).get_values())
|
Maria@6
|
102 fmt = post_process_frames(pandas.read_csv(os.path.join('data','FMT.csv'),header=None).get_values())
|
Maria@6
|
103 features = [st, op, fp, pb, ig, fmt]
|
Maria@6
|
104 except Exception as e:
|
Maria@6
|
105 with open(os.path.join('data', 'features.pickle'), 'rb') as f:
|
Maria@6
|
106 features = pickle.load(f)
|
Maria@1
|
107
|
Maria@1
|
108 feat_labels = ["ST", "OP", "FP", "PB", "IG", "FMT"]
|
Maria@1
|
109 test_classes = ["transformation", "value", "style", "monopoly"]
|
Maria@1
|
110
|
Maria@1
|
111 write_file = False # set it to True if you want to write output file
|
Maria@1
|
112 for test_class in test_classes:
|
Maria@1
|
113 group_labels = meta[test_class].get_values()
|
Maria@1
|
114 results_class, tlabels = classification_experiments(features, labels, feat_labels, group_labels)
|
Maria@1
|
115 results_topK, tlabels = topK_experiments(features, labels, feat_labels, group_labels)
|
Maria@1
|
116 header = numpy.append(tlabels, ['mean accuracy', 'metric', 'feature'])
|
Maria@1
|
117 results = numpy.concatenate((header[None, :], numpy.array(results_class), numpy.array(results_topK)))
|
Maria@6
|
118 print results
|
Maria@6
|
119
|
Maria@1
|
120 if write_file:
|
Maria@1
|
121 filename = os.path.join('data','results_' + test_class + '.csv')
|
Maria@1
|
122 numpy.savetxt(filename, results, fmt='%s', delimiter=',')
|