annotate gmm_baseline_experiments/gmm/GMM_methods.py @ 5:b523456082ca tip

Update path to dataset and reflect modified chunk naming convention.
author peterf
date Mon, 01 Feb 2016 21:35:27 +0000
parents 39258b875228
children
rev   line source
peterf@2 1 #
peterf@2 2 # exper002.py:
peterf@2 3 # Estimate GMMs
peterf@2 4 #
peterf@2 5 # Author: Adam Stark, Peter Foster
peterf@2 6 # (c) 2014 Adam Stark
peterf@2 7 # (c) 2015 Peter Foster
peterf@2 8 #
peterf@2 9
peterf@2 10 import numpy as np
peterf@2 11 import time
peterf@2 12 from sklearn import mixture
peterf@2 13 import pdb
peterf@2 14
peterf@2 15 #=================================================================
peterf@2 16 def scale(X):
peterf@2 17
peterf@2 18 m = np.mean(X,0)
peterf@2 19 std = np.std(X,0)
peterf@2 20
peterf@2 21 X = X-m
peterf@2 22 X = X / std
peterf@2 23
peterf@2 24 return [X,m,std]
peterf@2 25
peterf@2 26
peterf@2 27 #=================================================================
peterf@2 28 def GMMTrainAndTest(data,numComponents, covarianceType='full'):
peterf@2 29
peterf@2 30 X = data['trainingData']
peterf@2 31 Y = data['trainingTargets']
peterf@2 32
peterf@2 33 # scale data
peterf@2 34 [X,trainMean,trainStd] = scale(X)
peterf@2 35
peterf@4 36 #No sampling; use entire set of frames
peterf@4 37 #numTrainingExamplesOfEachType = 20000
peterf@2 38
peterf@2 39 X = X.tolist()
peterf@2 40 Y = Y.tolist()
peterf@2 41
peterf@2 42 #Get label set
peterf@2 43 Labels = set(Y)
peterf@2 44 #Partition data according to labels
peterf@2 45 DataByLabel = {label:[] for label in Labels}
peterf@2 46 for x,y in zip(X, Y):
peterf@2 47 DataByLabel[y].append(x)
peterf@2 48
peterf@2 49 #Sample data
peterf@2 50 for label in Labels:
peterf@2 51 DataByLabel[label] = np.array(DataByLabel[label])
peterf@4 52 #I = np.random.choice(DataByLabel[label].shape[0],numTrainingExamplesOfEachType)
peterf@4 53 #DataByLabel[label] = DataByLabel[label][I]
peterf@2 54
peterf@4 55 print "Training..."
peterf@2 56 GMMS = {}
peterf@2 57 for label in Labels:
peterf@2 58 GMMS[label] = mixture.GMM(n_components=numComponents, covariance_type=covarianceType)
peterf@2 59 GMMS[label].fit(DataByLabel[label])
peterf@4 60 print "Done!"
peterf@2 61
peterf@2 62 startTime = time.time()
peterf@2 63
peterf@2 64 allFileScores = {label:[] for label in Labels}
peterf@2 65 i = 0
peterf@2 66 for fileFeatures in data['testDataPerFile']:
peterf@2 67 #print "Testing file ", (i+1)
peterf@2 68 tmp = fileFeatures
peterf@2 69 tmp = tmp - trainMean
peterf@2 70 tmp = tmp / trainStd
peterf@2 71
peterf@2 72 for label in Labels:
peterf@2 73 if tmp.size > 0:
peterf@2 74 #Average score across all frames
peterf@2 75 nonNan = np.all(~np.isnan(tmp), axis=1)
peterf@2 76 score = np.nanmax((np.mean(GMMS[label].score(tmp[nonNan,:])), -100000000))
peterf@2 77 allFileScores[label].append(score)
peterf@2 78 else:
peterf@2 79 score = -100000000
peterf@2 80 allFileScores[label].append(score)
peterf@2 81 i +=1
peterf@2 82
peterf@2 83 # store the running time
peterf@2 84 runningTime = time.time()-startTime
peterf@2 85
peterf@2 86 # create a suffix (for saving files)
peterf@2 87 suffix = "_M=" + str(numComponents)
peterf@2 88
peterf@2 89 result = {}
peterf@2 90 result['fileScores'] = allFileScores
peterf@2 91 result['algorithm'] = "GMM"
peterf@2 92 result['numComponents'] = numComponents
peterf@2 93
peterf@2 94 result['runningTime'] = runningTime
peterf@2 95 result['suffix'] = suffix
peterf@2 96
peterf@2 97 return result