peterf@2: # peterf@2: # exper002.py: peterf@2: # Estimate GMMs peterf@2: # peterf@2: # Author: Adam Stark, Peter Foster peterf@2: # (c) 2014 Adam Stark peterf@2: # (c) 2015 Peter Foster peterf@2: # peterf@2: peterf@2: import numpy as np peterf@2: import time peterf@2: from sklearn import mixture peterf@2: import pdb peterf@2: peterf@2: #================================================================= peterf@2: def scale(X): peterf@2: peterf@2: m = np.mean(X,0) peterf@2: std = np.std(X,0) peterf@2: peterf@2: X = X-m peterf@2: X = X / std peterf@2: peterf@2: return [X,m,std] peterf@2: peterf@2: peterf@2: #================================================================= peterf@2: def GMMTrainAndTest(data,numComponents, covarianceType='full'): peterf@2: peterf@2: X = data['trainingData'] peterf@2: Y = data['trainingTargets'] peterf@2: peterf@2: # scale data peterf@2: [X,trainMean,trainStd] = scale(X) peterf@2: peterf@4: #No sampling; use entire set of frames peterf@4: #numTrainingExamplesOfEachType = 20000 peterf@2: peterf@2: X = X.tolist() peterf@2: Y = Y.tolist() peterf@2: peterf@2: #Get label set peterf@2: Labels = set(Y) peterf@2: #Partition data according to labels peterf@2: DataByLabel = {label:[] for label in Labels} peterf@2: for x,y in zip(X, Y): peterf@2: DataByLabel[y].append(x) peterf@2: peterf@2: #Sample data peterf@2: for label in Labels: peterf@2: DataByLabel[label] = np.array(DataByLabel[label]) peterf@4: #I = np.random.choice(DataByLabel[label].shape[0],numTrainingExamplesOfEachType) peterf@4: #DataByLabel[label] = DataByLabel[label][I] peterf@2: peterf@4: print "Training..." peterf@2: GMMS = {} peterf@2: for label in Labels: peterf@2: GMMS[label] = mixture.GMM(n_components=numComponents, covariance_type=covarianceType) peterf@2: GMMS[label].fit(DataByLabel[label]) peterf@4: print "Done!" peterf@2: peterf@2: startTime = time.time() peterf@2: peterf@2: allFileScores = {label:[] for label in Labels} peterf@2: i = 0 peterf@2: for fileFeatures in data['testDataPerFile']: peterf@2: #print "Testing file ", (i+1) peterf@2: tmp = fileFeatures peterf@2: tmp = tmp - trainMean peterf@2: tmp = tmp / trainStd peterf@2: peterf@2: for label in Labels: peterf@2: if tmp.size > 0: peterf@2: #Average score across all frames peterf@2: nonNan = np.all(~np.isnan(tmp), axis=1) peterf@2: score = np.nanmax((np.mean(GMMS[label].score(tmp[nonNan,:])), -100000000)) peterf@2: allFileScores[label].append(score) peterf@2: else: peterf@2: score = -100000000 peterf@2: allFileScores[label].append(score) peterf@2: i +=1 peterf@2: peterf@2: # store the running time peterf@2: runningTime = time.time()-startTime peterf@2: peterf@2: # create a suffix (for saving files) peterf@2: suffix = "_M=" + str(numComponents) peterf@2: peterf@2: result = {} peterf@2: result['fileScores'] = allFileScores peterf@2: result['algorithm'] = "GMM" peterf@2: result['numComponents'] = numComponents peterf@2: peterf@2: result['runningTime'] = runningTime peterf@2: result['suffix'] = suffix peterf@2: peterf@2: return result