Mercurial > hg > chime-home-dataset-annotation-and-baseline-evaluation-code

diff gmm_baseline_experiments/compute_performance_statistics.py @ 2:cb535b80218a
Remaining scripts and brief documentation
author: peterf
date: Fri, 10 Jul 2015 23:24:23 +0100
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gmm_baseline_experiments/compute_performance_statistics.py	Fri Jul 10 23:24:23 2015 +0100
@@ -0,0 +1,59 @@
+#
+# compute_performance_statistics.py:
+#    Compute performance statistics using experiment data structures
+#
+# Author: Peter Foster
+# (c) 2015 Peter Foster
+#
+
+from pylab import *
+from pandas import Series, DataFrame
+import pandas as pd
+from sklearn.metrics import roc_auc_score
+import itertools
+import pdb
+
+def compute_performance_statistics(EXPER, Datasets, Settings, iterableParameters=None, discardMissingPredictions=False):
+    for dataset in EXPER['datasets'].keys():
+        if iterableParameters:
+            for parameterTuple in itertools.product(*[EXPER['settings'][p] for p in iterableParameters]):
+                parameterStr = "_"
+                for i,p in zip(iterableParameters,parameterTuple): parameterStr += i + '=' + str(p)
+                EXPER['datasets'][dataset][parameterTuple] = __compute_performance(EXPER['datasets'][dataset][parameterTuple], Datasets, Settings, dataset, EXPER['name'] + parameterStr, discardMissingPredictions)
+        else:
+                EXPER['datasets'][dataset] = __compute_performance(EXPER['datasets'][dataset], Datasets, Settings, dataset, EXPER['name'], discardMissingPredictions)
+    return EXPER
+    
+def __compute_performance(Results, Datasets, Settings, dataset, experName, discardMissingPredictions):
+    #pdb.set_trace()
+    Results['allresults'] = DataFrame(Results['scores']).T
+    
+    Data = Datasets[dataset]['dataset'].copy(deep=True)
+    
+    #Remove entries in Dataset for which we have no result (cf. EXPER004)
+    if discardMissingPredictions:
+        I = find(Results['allresults'].isnull().apply(lambda b: ~b))
+        Data = Data.iloc[I]
+        Results['allresults'] = DataFrame(Results['allresults'].iloc[I])
+    
+    #Test integrity of results
+    assert(not any(pd.isnull(Results['allresults'])))
+    #Performance statistics
+    Results['performance'] = {}
+    #Classification accuracy
+
+    if 'predictions' in Results.keys() and not(isinstance(Results['predictions'], DataFrame)):
+        Results['predictions'] = DataFrame(Results['predictions']).T
+    Results['performance']['classwise'] = {}
+    #Evaluation code for multilabel classification; for each label there are two models, each which yield scores for tag presence versus absence
+    for col in Datasets[dataset]['consideredlabels']:
+        Results['performance']['classwise'][col] = {}
+        #Classification accuracy not easily interpretable here due to skew
+        scores = Results['allresults'][col].apply(lambda o: o[True] - o[False])
+        area = roc_auc_score(Data[col]==1, scores)
+        Results['performance']['classwise'][col]['auc_precisionrecall'] = area
+                
+    #Store summary statistics in data frame
+    Results['performance']['classwise'] = DataFrame(Results['performance']['classwise'])
+    return Results
+        
\ No newline at end of file
author	peterf
date	Fri, 10 Jul 2015 23:24:23 +0100
parents
children