Mercurial > hg > rhythm-melody-feature-evaluation
view evaluate.py @ 7:b7169083b9ea tip
fix typo in variable name
author | Maria Panteli |
---|---|
date | Tue, 01 Jan 2019 15:51:38 +0200 |
parents | 2732137aa9b5 |
children |
line wrap: on
line source
# -*- coding: utf-8 -*- """ Created on Fri Feb 12 18:56:28 2016 @author: mariapanteli """ """Run classification and retrieval experiments""" import os import numpy import pandas import pickle import sklearn.metrics.pairwise as PW from sklearn.decomposition import PCA from sklearn.preprocessing import StandardScaler import classifiers as cc def post_process_frames(frames, pca_frames=True, n_pcas=20): """Standardize and PCA data.""" frames = StandardScaler().fit_transform(frames.T).T # standardise n_samples if pca_frames: frames = PCA(n_components=n_pcas).fit_transform(frames) return frames def classification_experiments(features, labels, feat_labels, group_labels, nfolds=5): """ classify rhythms/melodies and average accuracy by label grouping, eg, average accuracy per transformation or transformation value """ tlabels, inds = numpy.unique(group_labels, return_index=True) tlabels = tlabels[numpy.argsort(inds)] tlabelinds = [numpy.where(group_labels==tt)[0] for tt in tlabels] results_classification = [] classifiers = ["KNN", "LDA", "NB", "SVM"] for feat, feat_label in zip(features, feat_labels): for cl in classifiers: if cl == "KNN": accuracies = cc.classifyKNN(feat, labels, kfold=nfolds) elif cl == "LDA": accuracies = cc.classifyLDA(feat, labels, kfold=nfolds) elif cl == "NB": accuracies = cc.classifyNB(feat, labels, kfold=nfolds) elif cl == "SVM": accuracies = cc.classifySVM(feat, labels, kfold=nfolds) group_accuracy = [numpy.nanmean(accuracies[labelinds]) for labelinds in tlabelinds] group_accuracy.append(numpy.mean(accuracies)) group_accuracy.append(cl) group_accuracy.append(feat_label) results_classification.append(group_accuracy) return results_classification, tlabels def topK_experiments(features, labels, feat_labels, group_labels, K=99): """ query rhythms/melodies and assess recall rate at top K , average accuracy by label grouping, eg, by transformation or transformation value """ tlabels, inds = numpy.unique(group_labels, return_index=True) tlabels = tlabels[numpy.argsort(inds)] tlabelinds = [numpy.where(group_labels==tt)[0] for tt in tlabels] results_topK = [] dist_metrics = ["euclidean", "cosine", "correlation", "mahalanobis"] for feat, feat_label in zip(features, feat_labels): for metric in dist_metrics: D = PW.pairwise_distances(feat, metric=metric) accuracies = numpy.ones((len(labels), 1), dtype=float) * numpy.nan for label in numpy.unique(labels): truematchinds = numpy.where(labels == label)[0] # default timbre is the first filename of the family (eg. 1_2_1.wav for family 2) queryind = numpy.array([truematchinds[0]]) truematchinds = set(truematchinds) - set(queryind) # remove queryind sortindex = numpy.argsort(D[queryind, :]).flatten() sortindex = sortindex[1:] # remove queryind (top of list) topKinds = set(sortindex[:K]) correctinds = truematchinds & topKinds wronginds = truematchinds - correctinds accuracies[list(correctinds)] = 1 accuracies[list(wronginds)] = 0 group_accuracy = [numpy.nanmean(accuracies[labelinds]) for labelinds in tlabelinds] group_accuracy.append(numpy.mean(accuracies[numpy.where(numpy.isnan(accuracies) == False)[0]])) group_accuracy.append(metric) group_accuracy.append(feat_label) results_topK.append(group_accuracy) return results_topK, tlabels if __name__ == '__main__': # Load metadata meta = pandas.read_csv(os.path.join('data', 'Metadata.csv'), sep=',') labels = numpy.array(meta["family"].get_values(), dtype=str) # Load features and post process try: st = post_process_frames(pandas.read_csv(os.path.join('data','ST.csv'),header=None).get_values()) op = post_process_frames(pandas.read_csv(os.path.join('data','OP.csv'),header=None).get_values()) fp = post_process_frames(pandas.read_csv(os.path.join('data','FP.csv'),header=None).get_values()) pb = post_process_frames(pandas.read_csv(os.path.join('data','PB.csv'),header=None).get_values()) ig = post_process_frames(pandas.read_csv(os.path.join('data','IG.csv'),header=None).get_values()) fmt = post_process_frames(pandas.read_csv(os.path.join('data','FMT.csv'),header=None).get_values()) features = [st, op, fp, pb, ig, fmt] except Exception as e: with open(os.path.join('data', 'features.pickle'), 'rb') as f: features = pickle.load(f) feat_labels = ["ST", "OP", "FP", "PB", "IG", "FMT"] test_classes = ["transformation", "value", "style", "monopoly"] write_file = False # set it to True if you want to write output file for test_class in test_classes: group_labels = meta[test_class].get_values() results_class, tlabels = classification_experiments(features, labels, feat_labels, group_labels) results_topK, tlabels = topK_experiments(features, labels, feat_labels, group_labels) header = numpy.append(tlabels, ['mean accuracy', 'metric', 'feature']) results = numpy.concatenate((header[None, :], numpy.array(results_class), numpy.array(results_topK))) print results if write_file: filename = os.path.join('data','results_' + test_class + '.csv') numpy.savetxt(filename, results, fmt='%s', delimiter=',')