Mercurial > hg > chime-home-dataset-annotation-and-baseline-evaluation-code
diff gmm_baseline_experiments/compute_performance_statistics.py @ 2:cb535b80218a
Remaining scripts and brief documentation
author | peterf |
---|---|
date | Fri, 10 Jul 2015 23:24:23 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gmm_baseline_experiments/compute_performance_statistics.py Fri Jul 10 23:24:23 2015 +0100 @@ -0,0 +1,59 @@ +# +# compute_performance_statistics.py: +# Compute performance statistics using experiment data structures +# +# Author: Peter Foster +# (c) 2015 Peter Foster +# + +from pylab import * +from pandas import Series, DataFrame +import pandas as pd +from sklearn.metrics import roc_auc_score +import itertools +import pdb + +def compute_performance_statistics(EXPER, Datasets, Settings, iterableParameters=None, discardMissingPredictions=False): + for dataset in EXPER['datasets'].keys(): + if iterableParameters: + for parameterTuple in itertools.product(*[EXPER['settings'][p] for p in iterableParameters]): + parameterStr = "_" + for i,p in zip(iterableParameters,parameterTuple): parameterStr += i + '=' + str(p) + EXPER['datasets'][dataset][parameterTuple] = __compute_performance(EXPER['datasets'][dataset][parameterTuple], Datasets, Settings, dataset, EXPER['name'] + parameterStr, discardMissingPredictions) + else: + EXPER['datasets'][dataset] = __compute_performance(EXPER['datasets'][dataset], Datasets, Settings, dataset, EXPER['name'], discardMissingPredictions) + return EXPER + +def __compute_performance(Results, Datasets, Settings, dataset, experName, discardMissingPredictions): + #pdb.set_trace() + Results['allresults'] = DataFrame(Results['scores']).T + + Data = Datasets[dataset]['dataset'].copy(deep=True) + + #Remove entries in Dataset for which we have no result (cf. EXPER004) + if discardMissingPredictions: + I = find(Results['allresults'].isnull().apply(lambda b: ~b)) + Data = Data.iloc[I] + Results['allresults'] = DataFrame(Results['allresults'].iloc[I]) + + #Test integrity of results + assert(not any(pd.isnull(Results['allresults']))) + #Performance statistics + Results['performance'] = {} + #Classification accuracy + + if 'predictions' in Results.keys() and not(isinstance(Results['predictions'], DataFrame)): + Results['predictions'] = DataFrame(Results['predictions']).T + Results['performance']['classwise'] = {} + #Evaluation code for multilabel classification; for each label there are two models, each which yield scores for tag presence versus absence + for col in Datasets[dataset]['consideredlabels']: + Results['performance']['classwise'][col] = {} + #Classification accuracy not easily interpretable here due to skew + scores = Results['allresults'][col].apply(lambda o: o[True] - o[False]) + area = roc_auc_score(Data[col]==1, scores) + Results['performance']['classwise'][col]['auc_precisionrecall'] = area + + #Store summary statistics in data frame + Results['performance']['classwise'] = DataFrame(Results['performance']['classwise']) + return Results + \ No newline at end of file