Maria@1: # -*- coding: utf-8 -*- Maria@1: """ Maria@1: Created on Wed Jul 20 11:30:20 2016 Maria@1: Maria@1: @author: mariapanteli Maria@1: """ Maria@1: Maria@1: import numpy Maria@1: import pandas Maria@1: import os Maria@1: import matplotlib.pyplot as plt Maria@1: #%matplotlib inline Maria@1: from scipy import stats Maria@1: from statsmodels.stats.multicomp import MultiComparison Maria@1: Maria@1: Maria@1: # utility functions Maria@1: def load_csv_results(testclass="transformation"): Maria@1: datadf = pandas.read_csv(os.path.join('data', 'results_' + testclass + '.csv'), sep=',') Maria@1: return datadf Maria@1: Maria@1: Maria@1: def data_frame_to_latex_table(results_df, rhyinds, melinds): Maria@1: results_df = pandas.concat([results_df.iloc[:, -1], results_df.iloc[:, -2], results_df.iloc[:, 0:-2]], axis=1) Maria@1: print results_df.iloc[rhyinds, :].to_latex(index=False) Maria@1: print results_df.iloc[melinds, :].to_latex(index=False) Maria@1: Maria@1: Maria@1: def boxplotfigure(data, labels=None, rotate=False, xlabel='', ylabel='', figurename=None): Maria@1: plt.figure() Maria@1: plt.boxplot(data, 1, labels=labels) Maria@1: plt.ylim(0,1) Maria@1: plt.xlabel(xlabel) Maria@1: plt.ylabel(ylabel) Maria@1: if rotate: Maria@1: plt.xticks(plt.xticks()[0], labels, rotation=45) Maria@1: plt.tight_layout() Maria@1: if figurename is not None: Maria@1: plt.savefig(figurename, bbox_inches='tight') Maria@1: Maria@1: meta = pandas.read_csv(os.path.join('data', 'Metadata.csv'), sep=',') Maria@1: rhyfeat = ["ST", "OP", "FP"] Maria@1: melfeat = ["PB", "IG", "FMT"] Maria@1: transformations = load_csv_results("transformation") Maria@1: rhyinds = numpy.concatenate([numpy.where(transformations["feature"]==feat)[0] for feat in rhyfeat]) Maria@1: melinds = numpy.concatenate([numpy.where(transformations["feature"]==feat)[0] for feat in melfeat]) Maria@1: Maria@1: # print mean accuracy as Latex table Maria@1: mean_accuracy_df = transformations.iloc[:, -3:] Maria@1: data_frame_to_latex_table(mean_accuracy_df, rhyinds, melinds) Maria@1: Maria@1: # print accuracy per transformation as Latex table Maria@1: data_frame_to_latex_table(load_csv_results("transformation"), rhyinds, melinds) Maria@1: Maria@1: # print accuracy per transformation value as Latex table Maria@1: # warning: this gives a big table 48 x 100 Maria@1: # data_frame_to_latex_table(loadcsvresults("value"), rhyinds, melinds) Maria@1: Maria@1: # indices of classification accuracies (for boxplot results and statistical tests) Maria@1: classmodels = ["KNN", "LDA", "NB", "SVM"] Maria@1: classmodelsinds = numpy.concatenate([numpy.where(transformations["metric"]==model)[0] for model in classmodels]) Maria@1: Maria@1: rhyinds = numpy.asarray(list(set(rhyinds) & set(classmodelsinds))) Maria@1: melinds = numpy.asarray(list(set(melinds) & set(classmodelsinds))) Maria@1: Maria@1: # load style data Maria@1: styledata = load_csv_results("style").iloc[:, 0:-3] Maria@1: styledatarhy = styledata.get_values()[rhyinds, :] Maria@1: styledatamel = styledata.get_values()[melinds, :] Maria@1: Maria@1: # style box plot Maria@1: rhystylelabels = ['Afro-American', 'North-Indian', 'African', 'Classical', 'EDM', 'Latin-Brazilian'] Maria@1: boxplotfigure(styledatarhy, labels=rhystylelabels, rotate=True, xlabel="Music Style", ylabel="Classification Accuracy") Maria@1: melstylelabels = ['Dutch Folk', 'Classical (M)', 'Byzantine', 'Pop (M)', 'Classical (P)', 'Pop (P)'] Maria@1: boxplotfigure(styledatamel, labels=melstylelabels, rotate=True, xlabel="Music Style", ylabel="Classification Accuracy") Maria@1: Maria@1: # style paired t-test Maria@1: DataRhy = numpy.asarray(numpy.reshape(styledatarhy, -1), dtype='float') Maria@1: Groups = numpy.reshape(numpy.repeat(numpy.asarray(rhystylelabels)[:, None].T, styledatarhy.shape[0], axis=0), -1) Maria@1: modrhy = MultiComparison(DataRhy, Groups) Maria@1: DataMel = numpy.asarray(numpy.reshape(styledatamel, -1), dtype='float') Maria@1: Groups = numpy.reshape(numpy.repeat(numpy.asarray(melstylelabels)[:, None].T, styledatamel.shape[0], axis=0), -1) Maria@1: modmel = MultiComparison(DataMel, Groups) Maria@1: print "multiple comparison test for each rhythm style" Maria@1: print modrhy.allpairtest(stats.ttest_rel, method='Bonf')[0] Maria@1: print "multiple comparison test for each melody style" Maria@1: print modmel.allpairtest(stats.ttest_rel, method='Bonf')[0] Maria@1: Maria@1: # load monophonic/polyphonic data Maria@1: monopolydata = numpy.asarray(load_csv_results("monopoly").get_values()[:, 0:-3], dtype=float) Maria@1: Maria@1: # mono/poly paired t-test Maria@1: print "Melody, ttest, mono/poly: "+str(stats.ttest_rel(monopolydata[melinds, 0], monopolydata[melinds, 1])) Maria@1: print "Melody, Mean, mono/poly: "+str(numpy.mean(monopolydata[melinds, :], axis=0)) Maria@1: print "Melody, Std, mono/poly: "+str(numpy.std(monopolydata[melinds, :], axis=0)) Maria@1: print "Rhythm, ttest, mono/poly: "+str(stats.ttest_rel(monopolydata[rhyinds, 0], monopolydata[rhyinds, 1])) Maria@1: print "Rhythm, Mean, mono/poly: "+str(numpy.mean(monopolydata[rhyinds, :], axis=0)) Maria@1: print "Rhythm, Std, mono/poly: "+str(numpy.std(monopolydata[rhyinds, :], axis=0))