view gmm_baseline_experiments/exper002.py @ 5:b523456082ca tip

Update path to dataset and reflect modified chunk naming convention.
author peterf
date Mon, 01 Feb 2016 21:35:27 +0000
parents cb535b80218a
children
line wrap: on
line source
#
# exper002.py:
#    Wrapper function for GMM model estimation
#
# Author: Peter Foster
# (c) 2015 Peter Foster
#

from pylab import *
from gmm import GMM_methods
import pdb

    
def exper002_multilabelclassification(Dataset, ConsideredLabels, CrossVal, Settings, numComponentValues=(1,), featureTypeValues=('librosa_mfccs',)):
    np.random.seed(10553)  
        
    EXPER = {}
    for numComponents in numComponentValues:
        for featureType in featureTypeValues:
            parameterTuple = (numComponents,featureType)
            print('Estimating models for numComponents=' + str(numComponents) + 'featureType=' + str(featureType))
            EXPER[parameterTuple] = {}
            #For each track and for each label store presence and absence scores
            EXPER[parameterTuple]['scores'] = {i:{} for i in range(len(Dataset))}    
                                
            #Iterate through folds
            for trainI, testI in CrossVal:
                Data = {}
                Data['trainingData'] = [np.copy(Dataset.ix[i]['features'][featureType]) for i in trainI]
                NFeaturesPerFile = [features.shape[0] for features in Data['trainingData']]
                Data['trainingData'] = np.vstack(Data['trainingData'])
                Data['testDataPerFile'] = [np.copy(Dataset.ix[i]['features'][featureType]) for i in testI]                                                    
                                                
                for label in ConsideredLabels:                            
                    Data['trainingTargets'] = [Dataset.ix[i][label] for i in trainI]
                    #Duplicate labels according to number of feature vectors in each file
                    Data['trainingTargets'] = [[target]*N  for (target,N) in zip(Data['trainingTargets'], NFeaturesPerFile)]
                    #Convert data to one large vector / matrix (for GMM code)
                    Data['trainingTargets'] = np.append([], Data['trainingTargets'])                            
                    
                    Result = GMM_methods.GMMTrainAndTest(Data,numComponents=numComponents)
                    
                    for k in Result['fileScores'].keys():
                        assert(np.all(np.isfinite(Result['fileScores'][k])))
                    
                    #Assign scores to output data structure
                    for i,j in zip(testI, range(len(testI))):    
                        EXPER[parameterTuple]['scores'][i][label] = {True:Result['fileScores'][True][j], False:Result['fileScores'][False][j]}
    return EXPER