view results.py @ 7:b7169083b9ea tip

fix typo in variable name
author Maria Panteli
date Tue, 01 Jan 2019 15:51:38 +0200
parents c4ef4a02fc19
children
line wrap: on
line source
# -*- coding: utf-8 -*-
"""
Created on Wed Jul 20 11:30:20 2016

@author: mariapanteli
"""

import numpy
import pandas
import os
import matplotlib.pyplot as plt
#%matplotlib inline
from scipy import stats
from statsmodels.stats.multicomp import MultiComparison


# utility functions
def load_csv_results(testclass="transformation"):
    datadf = pandas.read_csv(os.path.join('data', 'results_' + testclass + '.csv'), sep=',')
    return datadf


def data_frame_to_latex_table(results_df, rhyinds, melinds):
    results_df = pandas.concat([results_df.iloc[:, -1], results_df.iloc[:, -2], results_df.iloc[:, 0:-2]], axis=1)
    print results_df.iloc[rhyinds, :].to_latex(index=False)
    print results_df.iloc[melinds, :].to_latex(index=False)


def boxplotfigure(data, labels=None, rotate=False, xlabel='', ylabel='', figurename=None):
    plt.figure()    
    plt.boxplot(data, 1, labels=labels)
    plt.ylim(0,1)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    if rotate:
        plt.xticks(plt.xticks()[0], labels, rotation=45)
    plt.tight_layout()
    if figurename is not None:
        plt.savefig(figurename, bbox_inches='tight')
        
meta = pandas.read_csv(os.path.join('data', 'Metadata.csv'), sep=',')
rhyfeat = ["ST", "OP", "FP"]
melfeat = ["PB", "IG", "FMT"]
transformations = load_csv_results("transformation")
rhyinds = numpy.concatenate([numpy.where(transformations["feature"]==feat)[0] for feat in rhyfeat])
melinds = numpy.concatenate([numpy.where(transformations["feature"]==feat)[0] for feat in melfeat])

# print mean accuracy as Latex table
mean_accuracy_df = transformations.iloc[:, -3:]
data_frame_to_latex_table(mean_accuracy_df, rhyinds, melinds)

# print accuracy per transformation as Latex table
data_frame_to_latex_table(load_csv_results("transformation"), rhyinds, melinds)

# print accuracy per transformation value as Latex table
# warning: this gives a big table 48 x 100
# data_frame_to_latex_table(loadcsvresults("value"), rhyinds, melinds)

# indices of classification accuracies (for boxplot results and statistical tests)
classmodels = ["KNN", "LDA", "NB", "SVM"]
classmodelsinds = numpy.concatenate([numpy.where(transformations["metric"]==model)[0] for model in classmodels])

rhyinds = numpy.asarray(list(set(rhyinds) & set(classmodelsinds)))
melinds = numpy.asarray(list(set(melinds) & set(classmodelsinds)))

# load style data
styledata = load_csv_results("style").iloc[:, 0:-3]
styledatarhy = styledata.get_values()[rhyinds, :]
styledatamel = styledata.get_values()[melinds, :]

# style box plot
rhystylelabels = ['Afro-American', 'North-Indian', 'African', 'Classical', 'EDM', 'Latin-Brazilian']
boxplotfigure(styledatarhy, labels=rhystylelabels, rotate=True, xlabel="Music Style", ylabel="Classification Accuracy")
melstylelabels = ['Dutch Folk', 'Classical (M)', 'Byzantine', 'Pop (M)', 'Classical (P)', 'Pop (P)']
boxplotfigure(styledatamel, labels=melstylelabels, rotate=True, xlabel="Music Style", ylabel="Classification Accuracy")

# style paired t-test
DataRhy = numpy.asarray(numpy.reshape(styledatarhy, -1), dtype='float')
Groups = numpy.reshape(numpy.repeat(numpy.asarray(rhystylelabels)[:, None].T, styledatarhy.shape[0], axis=0), -1)
modrhy = MultiComparison(DataRhy, Groups)
DataMel = numpy.asarray(numpy.reshape(styledatamel, -1), dtype='float')
Groups = numpy.reshape(numpy.repeat(numpy.asarray(melstylelabels)[:, None].T, styledatamel.shape[0], axis=0), -1)
modmel = MultiComparison(DataMel, Groups)
print "multiple comparison test for each rhythm style"
print modrhy.allpairtest(stats.ttest_rel, method='Bonf')[0]
print "multiple comparison test for each melody style"
print modmel.allpairtest(stats.ttest_rel, method='Bonf')[0]

# load monophonic/polyphonic data
monopolydata = numpy.asarray(load_csv_results("monopoly").get_values()[:, 0:-3], dtype=float)

# mono/poly paired t-test
print "Melody, ttest, mono/poly: "+str(stats.ttest_rel(monopolydata[melinds, 0], monopolydata[melinds, 1]))
print "Melody, Mean, mono/poly: "+str(numpy.mean(monopolydata[melinds, :], axis=0))
print "Melody, Std, mono/poly: "+str(numpy.std(monopolydata[melinds, :], axis=0))
print "Rhythm, ttest, mono/poly: "+str(stats.ttest_rel(monopolydata[rhyinds, 0], monopolydata[rhyinds, 1]))
print "Rhythm, Mean, mono/poly: "+str(numpy.mean(monopolydata[rhyinds, :], axis=0))
print "Rhythm, Std, mono/poly: "+str(numpy.std(monopolydata[rhyinds, :], axis=0))