Mercurial > hg > chime-home-dataset-annotation-and-baseline-evaluation-code

#
# compute_performance_statistics.py:
#    Compute performance statistics using experiment data structures
#
# Author: Peter Foster
# (c) 2015 Peter Foster
#

from pylab import *
from pandas import Series, DataFrame
import pandas as pd
from sklearn.metrics import roc_auc_score
import itertools
import pdb

def compute_performance_statistics(EXPER, Datasets, Settings, iterableParameters=None, discardMissingPredictions=False):
    for dataset in EXPER['datasets'].keys():
        if iterableParameters:
            for parameterTuple in itertools.product(*[EXPER['settings'][p] for p in iterableParameters]):
                parameterStr = "_"
                for i,p in zip(iterableParameters,parameterTuple): parameterStr += i + '=' + str(p)
                EXPER['datasets'][dataset][parameterTuple] = __compute_performance(EXPER['datasets'][dataset][parameterTuple], Datasets, Settings, dataset, EXPER['name'] + parameterStr, discardMissingPredictions)
        else:
                EXPER['datasets'][dataset] = __compute_performance(EXPER['datasets'][dataset], Datasets, Settings, dataset, EXPER['name'], discardMissingPredictions)
    return EXPER

def __compute_performance(Results, Datasets, Settings, dataset, experName, discardMissingPredictions):
    #pdb.set_trace()
    Results['allresults'] = DataFrame(Results['scores']).T

    Data = Datasets[dataset]['dataset'].copy(deep=True)

    #Remove entries in Dataset for which we have no result (cf. EXPER004)
    if discardMissingPredictions:
        I = find(Results['allresults'].isnull().apply(lambda b: ~b))
        Data = Data.iloc[I]
        Results['allresults'] = DataFrame(Results['allresults'].iloc[I])

    #Test integrity of results
    assert(not any(pd.isnull(Results['allresults'])))
    #Performance statistics
    Results['performance'] = {}
    #Classification accuracy

    if 'predictions' in Results.keys() and not(isinstance(Results['predictions'], DataFrame)):
        Results['predictions'] = DataFrame(Results['predictions']).T
    Results['performance']['classwise'] = {}
    #Evaluation code for multilabel classification; for each label there are two models, each which yield scores for tag presence versus absence
    for col in Datasets[dataset]['consideredlabels']:
        Results['performance']['classwise'][col] = {}
        #Classification accuracy not easily interpretable here due to skew
        scores = Results['allresults'][col].apply(lambda o: o[True] - o[False])
        area = roc_auc_score(Data[col]==1, scores)
        Results['performance']['classwise'][col]['auc_precisionrecall'] = area

    #Store summary statistics in data frame
    Results['performance']['classwise'] = DataFrame(Results['performance']['classwise'])
    return Results
author	peterf
date	Mon, 01 Feb 2016 21:35:27 +0000
parents	cb535b80218a
children