peterf@2: # peterf@2: # compute_performance_statistics.py: peterf@2: # Compute performance statistics using experiment data structures peterf@2: # peterf@2: # Author: Peter Foster peterf@2: # (c) 2015 Peter Foster peterf@2: # peterf@2: peterf@2: from pylab import * peterf@2: from pandas import Series, DataFrame peterf@2: import pandas as pd peterf@2: from sklearn.metrics import roc_auc_score peterf@2: import itertools peterf@2: import pdb peterf@2: peterf@2: def compute_performance_statistics(EXPER, Datasets, Settings, iterableParameters=None, discardMissingPredictions=False): peterf@2: for dataset in EXPER['datasets'].keys(): peterf@2: if iterableParameters: peterf@2: for parameterTuple in itertools.product(*[EXPER['settings'][p] for p in iterableParameters]): peterf@2: parameterStr = "_" peterf@2: for i,p in zip(iterableParameters,parameterTuple): parameterStr += i + '=' + str(p) peterf@2: EXPER['datasets'][dataset][parameterTuple] = __compute_performance(EXPER['datasets'][dataset][parameterTuple], Datasets, Settings, dataset, EXPER['name'] + parameterStr, discardMissingPredictions) peterf@2: else: peterf@2: EXPER['datasets'][dataset] = __compute_performance(EXPER['datasets'][dataset], Datasets, Settings, dataset, EXPER['name'], discardMissingPredictions) peterf@2: return EXPER peterf@2: peterf@2: def __compute_performance(Results, Datasets, Settings, dataset, experName, discardMissingPredictions): peterf@2: #pdb.set_trace() peterf@2: Results['allresults'] = DataFrame(Results['scores']).T peterf@2: peterf@2: Data = Datasets[dataset]['dataset'].copy(deep=True) peterf@2: peterf@2: #Remove entries in Dataset for which we have no result (cf. EXPER004) peterf@2: if discardMissingPredictions: peterf@2: I = find(Results['allresults'].isnull().apply(lambda b: ~b)) peterf@2: Data = Data.iloc[I] peterf@2: Results['allresults'] = DataFrame(Results['allresults'].iloc[I]) peterf@2: peterf@2: #Test integrity of results peterf@2: assert(not any(pd.isnull(Results['allresults']))) peterf@2: #Performance statistics peterf@2: Results['performance'] = {} peterf@2: #Classification accuracy peterf@2: peterf@2: if 'predictions' in Results.keys() and not(isinstance(Results['predictions'], DataFrame)): peterf@2: Results['predictions'] = DataFrame(Results['predictions']).T peterf@2: Results['performance']['classwise'] = {} peterf@2: #Evaluation code for multilabel classification; for each label there are two models, each which yield scores for tag presence versus absence peterf@2: for col in Datasets[dataset]['consideredlabels']: peterf@2: Results['performance']['classwise'][col] = {} peterf@2: #Classification accuracy not easily interpretable here due to skew peterf@2: scores = Results['allresults'][col].apply(lambda o: o[True] - o[False]) peterf@2: area = roc_auc_score(Data[col]==1, scores) peterf@2: Results['performance']['classwise'][col]['auc_precisionrecall'] = area peterf@2: peterf@2: #Store summary statistics in data frame peterf@2: Results['performance']['classwise'] = DataFrame(Results['performance']['classwise']) peterf@2: return Results peterf@2: