peterf@2
|
1 #
|
peterf@2
|
2 # exper002.py:
|
peterf@2
|
3 # Estimate GMMs
|
peterf@2
|
4 #
|
peterf@2
|
5 # Author: Adam Stark, Peter Foster
|
peterf@2
|
6 # (c) 2014 Adam Stark
|
peterf@2
|
7 # (c) 2015 Peter Foster
|
peterf@2
|
8 #
|
peterf@2
|
9
|
peterf@2
|
10 import numpy as np
|
peterf@2
|
11 import time
|
peterf@2
|
12 from sklearn import mixture
|
peterf@2
|
13 import pdb
|
peterf@2
|
14
|
peterf@2
|
15 #=================================================================
|
peterf@2
|
16 def scale(X):
|
peterf@2
|
17
|
peterf@2
|
18 m = np.mean(X,0)
|
peterf@2
|
19 std = np.std(X,0)
|
peterf@2
|
20
|
peterf@2
|
21 X = X-m
|
peterf@2
|
22 X = X / std
|
peterf@2
|
23
|
peterf@2
|
24 return [X,m,std]
|
peterf@2
|
25
|
peterf@2
|
26
|
peterf@2
|
27 #=================================================================
|
peterf@2
|
28 def GMMTrainAndTest(data,numComponents, covarianceType='full'):
|
peterf@2
|
29
|
peterf@2
|
30 X = data['trainingData']
|
peterf@2
|
31 Y = data['trainingTargets']
|
peterf@2
|
32
|
peterf@2
|
33 # scale data
|
peterf@2
|
34 [X,trainMean,trainStd] = scale(X)
|
peterf@2
|
35
|
peterf@4
|
36 #No sampling; use entire set of frames
|
peterf@4
|
37 #numTrainingExamplesOfEachType = 20000
|
peterf@2
|
38
|
peterf@2
|
39 X = X.tolist()
|
peterf@2
|
40 Y = Y.tolist()
|
peterf@2
|
41
|
peterf@2
|
42 #Get label set
|
peterf@2
|
43 Labels = set(Y)
|
peterf@2
|
44 #Partition data according to labels
|
peterf@2
|
45 DataByLabel = {label:[] for label in Labels}
|
peterf@2
|
46 for x,y in zip(X, Y):
|
peterf@2
|
47 DataByLabel[y].append(x)
|
peterf@2
|
48
|
peterf@2
|
49 #Sample data
|
peterf@2
|
50 for label in Labels:
|
peterf@2
|
51 DataByLabel[label] = np.array(DataByLabel[label])
|
peterf@4
|
52 #I = np.random.choice(DataByLabel[label].shape[0],numTrainingExamplesOfEachType)
|
peterf@4
|
53 #DataByLabel[label] = DataByLabel[label][I]
|
peterf@2
|
54
|
peterf@4
|
55 print "Training..."
|
peterf@2
|
56 GMMS = {}
|
peterf@2
|
57 for label in Labels:
|
peterf@2
|
58 GMMS[label] = mixture.GMM(n_components=numComponents, covariance_type=covarianceType)
|
peterf@2
|
59 GMMS[label].fit(DataByLabel[label])
|
peterf@4
|
60 print "Done!"
|
peterf@2
|
61
|
peterf@2
|
62 startTime = time.time()
|
peterf@2
|
63
|
peterf@2
|
64 allFileScores = {label:[] for label in Labels}
|
peterf@2
|
65 i = 0
|
peterf@2
|
66 for fileFeatures in data['testDataPerFile']:
|
peterf@2
|
67 #print "Testing file ", (i+1)
|
peterf@2
|
68 tmp = fileFeatures
|
peterf@2
|
69 tmp = tmp - trainMean
|
peterf@2
|
70 tmp = tmp / trainStd
|
peterf@2
|
71
|
peterf@2
|
72 for label in Labels:
|
peterf@2
|
73 if tmp.size > 0:
|
peterf@2
|
74 #Average score across all frames
|
peterf@2
|
75 nonNan = np.all(~np.isnan(tmp), axis=1)
|
peterf@2
|
76 score = np.nanmax((np.mean(GMMS[label].score(tmp[nonNan,:])), -100000000))
|
peterf@2
|
77 allFileScores[label].append(score)
|
peterf@2
|
78 else:
|
peterf@2
|
79 score = -100000000
|
peterf@2
|
80 allFileScores[label].append(score)
|
peterf@2
|
81 i +=1
|
peterf@2
|
82
|
peterf@2
|
83 # store the running time
|
peterf@2
|
84 runningTime = time.time()-startTime
|
peterf@2
|
85
|
peterf@2
|
86 # create a suffix (for saving files)
|
peterf@2
|
87 suffix = "_M=" + str(numComponents)
|
peterf@2
|
88
|
peterf@2
|
89 result = {}
|
peterf@2
|
90 result['fileScores'] = allFileScores
|
peterf@2
|
91 result['algorithm'] = "GMM"
|
peterf@2
|
92 result['numComponents'] = numComponents
|
peterf@2
|
93
|
peterf@2
|
94 result['runningTime'] = runningTime
|
peterf@2
|
95 result['suffix'] = suffix
|
peterf@2
|
96
|
peterf@2
|
97 return result
|