Mercurial > hg > rhythm-melody-feature-evaluation
view evaluate.py @ 1:c4ef4a02fc19
core functions
author | Maria Panteli |
---|---|
date | Mon, 01 Aug 2016 21:10:31 -0400 |
parents | |
children | 2732137aa9b5 |
line wrap: on
line source
# -*- coding: utf-8 -*- """ Created on Fri Feb 12 18:56:28 2016 @author: mariapanteli """ """Run classification and retrieval experiments""" import os import numpy import pandas import sklearn.metrics.pairwise as PW from sklearn.decomposition import PCA from sklearn.preprocessing import StandardScaler import classifiers as cc def post_process_frames(frames, pca_frames=True, n_pcas=20): """Standardize and PCA data.""" frames = StandardScaler().fit_transform(frames.T).T # standardise n_samples if pca_frames: frames = PCA(n_components=n_pcas).fit_transform(frames) return frames def classification_experiments(features, labels, feat_labels, group_labels, nfolds=5): """ classify rhythms/melodies and average accuracy by label grouping, eg, average accuracy per transformation or transformation value """ tlabels, inds = numpy.unique(group_labels, return_index=True) tlabels = tlabels[numpy.argsort(inds)] tlabelinds = [numpy.where(group_labels==tt)[0] for tt in tlabels] results_classification = [] classifiers = ["KNN", "LDA", "NB", "SVM"] for feat, feat_label in zip(features, feat_labels): for cl in classifiers: if cl == "KNN": accuracies = cc.classifyKNN(feat, labels, kfold=nfolds) elif cl == "LDA": accuracies = cc.classifyLDA(feat, labels, kfold=nfolds) elif cl == "NB": accuracies = cc.classifyNB(feat, labels, kfold=nfolds) elif cl == "SVM": accuracies = cc.classifySVM(feat, labels, kfold=nfolds) group_accuracy = [numpy.nanmean(accuracies[labelinds]) for labelinds in tlabelinds] group_accuracy.append(numpy.mean(accuracies)) group_accuracy.append(cl) group_accuracy.append(feat_label) results_classification.append(group_accuracy) return results_classification, tlabels def topK_experiments(features, labels, feat_labels, group_labels, K=99): """ query rhythms/melodies and assess recall rate at top K , average accuracy by label grouping, eg, by transformation or transformation value """ tlabels, inds = numpy.unique(group_labels, return_index=True) tlabels = tlabels[numpy.argsort(inds)] tlabelinds = [numpy.where(group_labels==tt)[0] for tt in tlabels] results_topK = [] dist_metrics = ["euclidean", "cosine", "correlation", "mahalanobis"] for feat, feat_label in zip(features, feat_labels): for metric in dist_metrics: D = PW.pairwise_distances(feat, metric=metric) accuracies = numpy.ones((len(labels), 1), dtype=float) * numpy.nan for label in numpy.unique(labels): queryind = numpy.where(labels == label)[0] truematchinds = numpy.where(labels == label)[0] truematchinds = set(truematchinds) - set(queryind) # remove queryind sortindex = numpy.argsort(D[queryind, :]).flatten() sortindex = sortindex[1:] # remove queryind (top of list) topKinds = set(sortindex[:K]) correctinds = truematchinds & topKinds wronginds = truematchinds - correctinds accuracies[list(correctinds)] = 1 accuracies[list(wronginds)] = 0 group_accuracy = [numpy.nanmean(accuracies[labelinds]) for labelinds in tlabelinds] group_accuracy.append(numpy.mean(accuracies[numpy.where(numpy.isnan(accuracies) == False)[0]])) group_accuracy.append(metric) group_accuracy.append(feat_label) results_topK.append(group_accuracy) return results_topK, tlabels if __name__ == '__main__': # Load metadata meta = pandas.read_csv(os.path.join('data', 'Metadata.csv'), sep=',') labels = numpy.array(meta["family"].get_values(), dtype=str) # Load features and post process st = post_process_frames(pandas.read_csv(os.path.join('data','ST.csv'),header=None).get_values()) op = post_process_frames(pandas.read_csv(os.path.join('data','OP.csv'),header=None).get_values()) fp = post_process_frames(pandas.read_csv(os.path.join('data','FP.csv'),header=None).get_values()) pb = post_process_frames(pandas.read_csv(os.path.join('data','PB.csv'),header=None).get_values()) ig = post_process_frames(pandas.read_csv(os.path.join('data','IG.csv'),header=None).get_values()) fmt = post_process_frames(pandas.read_csv(os.path.join('data','FMT.csv'),header=None).get_values()) features = [st, op, fp, pb, ig, fmt] feat_labels = ["ST", "OP", "FP", "PB", "IG", "FMT"] test_classes = ["transformation", "value", "style", "monopoly"] write_file = False # set it to True if you want to write output file for test_class in test_classes: group_labels = meta[test_class].get_values() results_class, tlabels = classification_experiments(features, labels, feat_labels, group_labels) results_topK, tlabels = topK_experiments(features, labels, feat_labels, group_labels) header = numpy.append(tlabels, ['mean accuracy', 'metric', 'feature']) results = numpy.concatenate((header[None, :], numpy.array(results_class), numpy.array(results_topK))) if write_file: filename = os.path.join('data','results_' + test_class + '.csv') numpy.savetxt(filename, results, fmt='%s', delimiter=',')