Mercurial > hg > chime-home-dataset-annotation-and-baseline-evaluation-code
comparison gmm_baseline_experiments/compute_performance_statistics.py @ 2:cb535b80218a
Remaining scripts and brief documentation
author | peterf |
---|---|
date | Fri, 10 Jul 2015 23:24:23 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
1:f079d2de4aa2 | 2:cb535b80218a |
---|---|
1 # | |
2 # compute_performance_statistics.py: | |
3 # Compute performance statistics using experiment data structures | |
4 # | |
5 # Author: Peter Foster | |
6 # (c) 2015 Peter Foster | |
7 # | |
8 | |
9 from pylab import * | |
10 from pandas import Series, DataFrame | |
11 import pandas as pd | |
12 from sklearn.metrics import roc_auc_score | |
13 import itertools | |
14 import pdb | |
15 | |
16 def compute_performance_statistics(EXPER, Datasets, Settings, iterableParameters=None, discardMissingPredictions=False): | |
17 for dataset in EXPER['datasets'].keys(): | |
18 if iterableParameters: | |
19 for parameterTuple in itertools.product(*[EXPER['settings'][p] for p in iterableParameters]): | |
20 parameterStr = "_" | |
21 for i,p in zip(iterableParameters,parameterTuple): parameterStr += i + '=' + str(p) | |
22 EXPER['datasets'][dataset][parameterTuple] = __compute_performance(EXPER['datasets'][dataset][parameterTuple], Datasets, Settings, dataset, EXPER['name'] + parameterStr, discardMissingPredictions) | |
23 else: | |
24 EXPER['datasets'][dataset] = __compute_performance(EXPER['datasets'][dataset], Datasets, Settings, dataset, EXPER['name'], discardMissingPredictions) | |
25 return EXPER | |
26 | |
27 def __compute_performance(Results, Datasets, Settings, dataset, experName, discardMissingPredictions): | |
28 #pdb.set_trace() | |
29 Results['allresults'] = DataFrame(Results['scores']).T | |
30 | |
31 Data = Datasets[dataset]['dataset'].copy(deep=True) | |
32 | |
33 #Remove entries in Dataset for which we have no result (cf. EXPER004) | |
34 if discardMissingPredictions: | |
35 I = find(Results['allresults'].isnull().apply(lambda b: ~b)) | |
36 Data = Data.iloc[I] | |
37 Results['allresults'] = DataFrame(Results['allresults'].iloc[I]) | |
38 | |
39 #Test integrity of results | |
40 assert(not any(pd.isnull(Results['allresults']))) | |
41 #Performance statistics | |
42 Results['performance'] = {} | |
43 #Classification accuracy | |
44 | |
45 if 'predictions' in Results.keys() and not(isinstance(Results['predictions'], DataFrame)): | |
46 Results['predictions'] = DataFrame(Results['predictions']).T | |
47 Results['performance']['classwise'] = {} | |
48 #Evaluation code for multilabel classification; for each label there are two models, each which yield scores for tag presence versus absence | |
49 for col in Datasets[dataset]['consideredlabels']: | |
50 Results['performance']['classwise'][col] = {} | |
51 #Classification accuracy not easily interpretable here due to skew | |
52 scores = Results['allresults'][col].apply(lambda o: o[True] - o[False]) | |
53 area = roc_auc_score(Data[col]==1, scores) | |
54 Results['performance']['classwise'][col]['auc_precisionrecall'] = area | |
55 | |
56 #Store summary statistics in data frame | |
57 Results['performance']['classwise'] = DataFrame(Results['performance']['classwise']) | |
58 return Results | |
59 |