Mercurial > hg > chime-home-dataset-annotation-and-baseline-evaluation-code
view gmm_baseline_experiments/gmm/GMM_methods.py @ 2:cb535b80218a
Remaining scripts and brief documentation
author | peterf |
---|---|
date | Fri, 10 Jul 2015 23:24:23 +0100 |
parents | |
children | 0147bf388eb8 |
line wrap: on
line source
# # exper002.py: # Estimate GMMs # # Author: Adam Stark, Peter Foster # (c) 2014 Adam Stark # (c) 2015 Peter Foster # import numpy as np import time from sklearn import mixture import pdb #================================================================= def scale(X): m = np.mean(X,0) std = np.std(X,0) X = X-m X = X / std return [X,m,std] #================================================================= def GMMTrainAndTest(data,numComponents, covarianceType='full'): X = data['trainingData'] Y = data['trainingTargets'] # scale data [X,trainMean,trainStd] = scale(X) numTrainingExamplesOfEachType = 2000 X = X.tolist() Y = Y.tolist() #Get label set Labels = set(Y) #Partition data according to labels DataByLabel = {label:[] for label in Labels} for x,y in zip(X, Y): DataByLabel[y].append(x) #Sample data for label in Labels: DataByLabel[label] = np.array(DataByLabel[label]) I = np.random.choice(DataByLabel[label].shape[0],numTrainingExamplesOfEachType) DataByLabel[label] = DataByLabel[label][I] #print "Training..." GMMS = {} for label in Labels: GMMS[label] = mixture.GMM(n_components=numComponents, covariance_type=covarianceType) GMMS[label].fit(DataByLabel[label]) #print "Done!" startTime = time.time() allFileScores = {label:[] for label in Labels} i = 0 for fileFeatures in data['testDataPerFile']: #print "Testing file ", (i+1) tmp = fileFeatures tmp = tmp - trainMean tmp = tmp / trainStd for label in Labels: if tmp.size > 0: #Average score across all frames nonNan = np.all(~np.isnan(tmp), axis=1) score = np.nanmax((np.mean(GMMS[label].score(tmp[nonNan,:])), -100000000)) allFileScores[label].append(score) else: score = -100000000 allFileScores[label].append(score) i +=1 # store the running time runningTime = time.time()-startTime # create a suffix (for saving files) suffix = "_M=" + str(numComponents) result = {} result['fileScores'] = allFileScores result['algorithm'] = "GMM" result['numComponents'] = numComponents result['runningTime'] = runningTime result['suffix'] = suffix return result