peterf@2
|
1 #
|
peterf@2
|
2 # compute_performance_statistics.py:
|
peterf@2
|
3 # Compute performance statistics using experiment data structures
|
peterf@2
|
4 #
|
peterf@2
|
5 # Author: Peter Foster
|
peterf@2
|
6 # (c) 2015 Peter Foster
|
peterf@2
|
7 #
|
peterf@2
|
8
|
peterf@2
|
9 from pylab import *
|
peterf@2
|
10 from pandas import Series, DataFrame
|
peterf@2
|
11 import pandas as pd
|
peterf@2
|
12 from sklearn.metrics import roc_auc_score
|
peterf@2
|
13 import itertools
|
peterf@2
|
14 import pdb
|
peterf@2
|
15
|
peterf@2
|
16 def compute_performance_statistics(EXPER, Datasets, Settings, iterableParameters=None, discardMissingPredictions=False):
|
peterf@2
|
17 for dataset in EXPER['datasets'].keys():
|
peterf@2
|
18 if iterableParameters:
|
peterf@2
|
19 for parameterTuple in itertools.product(*[EXPER['settings'][p] for p in iterableParameters]):
|
peterf@2
|
20 parameterStr = "_"
|
peterf@2
|
21 for i,p in zip(iterableParameters,parameterTuple): parameterStr += i + '=' + str(p)
|
peterf@2
|
22 EXPER['datasets'][dataset][parameterTuple] = __compute_performance(EXPER['datasets'][dataset][parameterTuple], Datasets, Settings, dataset, EXPER['name'] + parameterStr, discardMissingPredictions)
|
peterf@2
|
23 else:
|
peterf@2
|
24 EXPER['datasets'][dataset] = __compute_performance(EXPER['datasets'][dataset], Datasets, Settings, dataset, EXPER['name'], discardMissingPredictions)
|
peterf@2
|
25 return EXPER
|
peterf@2
|
26
|
peterf@2
|
27 def __compute_performance(Results, Datasets, Settings, dataset, experName, discardMissingPredictions):
|
peterf@2
|
28 #pdb.set_trace()
|
peterf@2
|
29 Results['allresults'] = DataFrame(Results['scores']).T
|
peterf@2
|
30
|
peterf@2
|
31 Data = Datasets[dataset]['dataset'].copy(deep=True)
|
peterf@2
|
32
|
peterf@2
|
33 #Remove entries in Dataset for which we have no result (cf. EXPER004)
|
peterf@2
|
34 if discardMissingPredictions:
|
peterf@2
|
35 I = find(Results['allresults'].isnull().apply(lambda b: ~b))
|
peterf@2
|
36 Data = Data.iloc[I]
|
peterf@2
|
37 Results['allresults'] = DataFrame(Results['allresults'].iloc[I])
|
peterf@2
|
38
|
peterf@2
|
39 #Test integrity of results
|
peterf@2
|
40 assert(not any(pd.isnull(Results['allresults'])))
|
peterf@2
|
41 #Performance statistics
|
peterf@2
|
42 Results['performance'] = {}
|
peterf@2
|
43 #Classification accuracy
|
peterf@2
|
44
|
peterf@2
|
45 if 'predictions' in Results.keys() and not(isinstance(Results['predictions'], DataFrame)):
|
peterf@2
|
46 Results['predictions'] = DataFrame(Results['predictions']).T
|
peterf@2
|
47 Results['performance']['classwise'] = {}
|
peterf@2
|
48 #Evaluation code for multilabel classification; for each label there are two models, each which yield scores for tag presence versus absence
|
peterf@2
|
49 for col in Datasets[dataset]['consideredlabels']:
|
peterf@2
|
50 Results['performance']['classwise'][col] = {}
|
peterf@2
|
51 #Classification accuracy not easily interpretable here due to skew
|
peterf@2
|
52 scores = Results['allresults'][col].apply(lambda o: o[True] - o[False])
|
peterf@2
|
53 area = roc_auc_score(Data[col]==1, scores)
|
peterf@2
|
54 Results['performance']['classwise'][col]['auc_precisionrecall'] = area
|
peterf@2
|
55
|
peterf@2
|
56 #Store summary statistics in data frame
|
peterf@2
|
57 Results['performance']['classwise'] = DataFrame(Results['performance']['classwise'])
|
peterf@2
|
58 return Results
|
peterf@2
|
59 |