e@0
|
1 #!/usr/bin/python2
|
e@0
|
2 # -*- coding: utf-8 -*-
|
e@0
|
3 """
|
e@0
|
4 Created on Thu Apr 23 11:53:17 2015
|
e@0
|
5
|
e@0
|
6 @author: mmxgn
|
e@0
|
7 """
|
e@0
|
8
|
e@0
|
9 # This file does the cluster estimation and the removal of outliers
|
e@0
|
10
|
e@0
|
11 from sys import argv, exit
|
e@0
|
12 from essentia.standard import YamlInput, YamlOutput
|
e@0
|
13 from essentia import Pool
|
e@0
|
14 from pca import *
|
e@0
|
15
|
e@0
|
16 from numpy import *
|
e@0
|
17 from sklearn import cluster
|
e@0
|
18 from sklearn.metrics import pairwise_distances
|
e@0
|
19 from sklearn.cluster import KMeans, MiniBatchKMeans
|
e@0
|
20 from matplotlib.pyplot import *
|
e@0
|
21 #from sklearn.mixture import GMM
|
e@0
|
22 from sklearn.naive_bayes import GaussianNB, MultinomialNB
|
e@0
|
23 from scipy.signal import decimate
|
e@0
|
24 from sklearn import cross_validation
|
e@0
|
25
|
e@0
|
26 #from hmmlearn import hmm
|
e@0
|
27 from hmmlearn.hmm import GMM
|
e@0
|
28 from hmmlearn import hmm
|
e@0
|
29
|
e@0
|
30 from sklearn import svm
|
e@0
|
31 #from adpcm import adm, adm_reconstruct
|
e@0
|
32
|
e@0
|
33
|
e@0
|
34 mse = lambda A,B: ((array(A)-array(B)) ** 2).mean()
|
e@0
|
35
|
e@0
|
36
|
e@0
|
37 def smooth_matrix_1D(X):
|
e@0
|
38 window = scipy.signal.gaussian(51,4)
|
e@0
|
39 window = window/sum(window)
|
e@0
|
40 intermx = zeros((X.shape[0],X.shape[1]+100))
|
e@0
|
41 intermx[:, 50:-50] = X
|
e@0
|
42
|
e@0
|
43 for m in range(0, X.shape[0]):
|
e@0
|
44 # print intermx.shape
|
e@0
|
45 intermx[m,:] = convolve(ravel(intermx[m,:]), window,'same')
|
e@0
|
46
|
e@0
|
47 return intermx[:,50:-50]
|
e@0
|
48
|
e@0
|
49 def adm_reconstruct(codeword, h, dmin=.01, dmax=.28):
|
e@0
|
50 x = zeros((1, codeword.shape[1]))
|
e@0
|
51
|
e@0
|
52 delta1 = dmin
|
e@0
|
53 delta2 = dmin
|
e@0
|
54 Sum = h
|
e@0
|
55
|
e@0
|
56 x[0] = h
|
e@0
|
57 for i in range(0, codeword.shape[1]):
|
e@0
|
58 if codeword[0,i] == 0:
|
e@0
|
59 delta1 = dmin
|
e@0
|
60 delta2 = dmin
|
e@0
|
61
|
e@0
|
62 elif codeword[0,i] == 1:
|
e@0
|
63 delta2 = dmin
|
e@0
|
64 Sum += delta1
|
e@0
|
65 delta1 *= 2
|
e@0
|
66 if delta1 > dmax:
|
e@0
|
67 delta1 = dmax
|
e@0
|
68
|
e@0
|
69 elif codeword[0,i] == -1:
|
e@0
|
70 delta1 = dmin
|
e@0
|
71 Sum -= delta2
|
e@0
|
72 delta2 *= 2
|
e@0
|
73 if delta2 > dmax:
|
e@0
|
74 delta2 = dmax
|
e@0
|
75 x[0,i] = Sum
|
e@0
|
76 return x
|
e@0
|
77
|
e@0
|
78 def adm(x, dmin=.01, dmax=.28, tol=0.0001):
|
e@0
|
79
|
e@0
|
80 # Adaptive delta modulation adapted by code:
|
e@0
|
81 # (adeltamod.m)
|
e@0
|
82 #
|
e@0
|
83 # Adaptive Delta Modulator
|
e@0
|
84 # by Gandhar Desai (gdesai)
|
e@0
|
85 # BITS Pilani Goa Campus
|
e@0
|
86 # Date: 28 Sept, 2013
|
e@0
|
87
|
e@0
|
88 xsig = x
|
e@0
|
89
|
e@0
|
90 Lx = len(x)
|
e@0
|
91
|
e@0
|
92 ADMout = zeros((1, Lx))
|
e@0
|
93 codevec = zeros((1, Lx))
|
e@0
|
94
|
e@0
|
95
|
e@0
|
96 Sum = x[0]
|
e@0
|
97 delta1 = dmin
|
e@0
|
98 delta2 = dmin
|
e@0
|
99 mult1 = 2
|
e@0
|
100 mult2 = 2
|
e@0
|
101 for i in range(0, Lx):
|
e@0
|
102 #print abs(xsig[i] - Sum)
|
e@0
|
103 if (abs(xsig[i] - Sum) < tol):
|
e@0
|
104 bit = 0
|
e@0
|
105 delta2 = dmin
|
e@0
|
106 delta1 = dmin
|
e@0
|
107
|
e@0
|
108
|
e@0
|
109 elif (xsig[i] >= Sum):
|
e@0
|
110 bit = 1
|
e@0
|
111 delta2 = dmin
|
e@0
|
112 Sum += delta1
|
e@0
|
113 delta1 *= mult1
|
e@0
|
114 if delta1 > dmax:
|
e@0
|
115 delta1 = dmax
|
e@0
|
116
|
e@0
|
117
|
e@0
|
118 else:
|
e@0
|
119 bit = -1
|
e@0
|
120 delta1 = dmin
|
e@0
|
121 Sum -= delta2
|
e@0
|
122 delta2 *= mult2
|
e@0
|
123 if delta2 > dmax:
|
e@0
|
124 delta2 = dmax
|
e@0
|
125
|
e@0
|
126
|
e@0
|
127
|
e@0
|
128 ADMout[0, i] = Sum
|
e@0
|
129 codevec[0, i]= bit
|
e@0
|
130
|
e@0
|
131 return ADMout,codevec, x[0]
|
e@0
|
132
|
e@0
|
133 if __name__=="__main__":
|
e@0
|
134 if len(argv) != 2:
|
e@0
|
135 print "[EE] Wrong number of arguments"
|
e@0
|
136 print "[II] Correct syntax is:"
|
e@0
|
137 print "[II] \t%s <training_file>"
|
e@0
|
138 print "[II] where <training_file> is a .yaml file containing the"
|
e@0
|
139 print "[II] features of the dataset (try output2_stage/fulltraining-last.yaml)"
|
e@0
|
140 exit(-1)
|
e@0
|
141
|
e@0
|
142
|
e@0
|
143 n_clusters = 4
|
e@0
|
144 UpsamplingFactor = 1
|
e@0
|
145 dmin = 0.001
|
e@0
|
146 dmax = 0.28
|
e@0
|
147 tol = 0.001
|
e@0
|
148
|
e@0
|
149 infile = argv[1]
|
e@0
|
150
|
e@0
|
151 features_pool = YamlInput(filename = infile)()
|
e@0
|
152
|
e@0
|
153
|
e@0
|
154
|
e@0
|
155 feature_captions = features_pool.descriptorNames()
|
e@0
|
156 parameter_captions = []
|
e@0
|
157
|
e@0
|
158
|
e@0
|
159 for c in features_pool.descriptorNames():
|
e@0
|
160 if c.split('.')[0] == 'parameter':
|
e@0
|
161 parameter_captions.append(c)
|
e@0
|
162 if c.split('.')[0] == 'metadata' or c.split('.')[0] == 'parameter':
|
e@0
|
163 feature_captions.remove(c)
|
e@0
|
164
|
e@0
|
165
|
e@0
|
166
|
e@0
|
167 # close('all')
|
e@0
|
168
|
e@0
|
169 print "[II] Loaded training data from %s (%s) " % (infile, features_pool['metadata.date'][0])
|
e@0
|
170 print "[II] %d Features Available: " % len(feature_captions)
|
e@0
|
171
|
e@0
|
172
|
e@0
|
173
|
e@0
|
174 print str(feature_captions).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7]
|
e@0
|
175
|
e@0
|
176 nfeatures_in = len(feature_captions)
|
e@0
|
177 nparameters_in = len(parameter_captions)
|
e@0
|
178 features_vector = zeros((nfeatures_in, len(features_pool[feature_captions[0]])))
|
e@0
|
179
|
e@0
|
180 parameters_vector = zeros((nparameters_in, len(features_pool[parameter_captions[0]])))
|
e@0
|
181
|
e@0
|
182
|
e@0
|
183 for i in range(0, nfeatures_in):
|
e@0
|
184 features_vector[i, :] = features_pool[feature_captions[i]].T
|
e@0
|
185 for i in range(0, nparameters_in):
|
e@0
|
186 parameters_vector[i, :] = features_pool[parameter_captions[0]].T
|
e@0
|
187
|
e@0
|
188 print "[II] %d parameters used:" % len(parameter_captions)
|
e@0
|
189 print str(parameter_captions).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7].replace('parameter.','')
|
e@0
|
190
|
e@0
|
191 print "[II] Marking silent parts"
|
e@0
|
192
|
e@0
|
193 silent_parts = zeros((1, len(features_pool[feature_captions[i]].T)))
|
e@0
|
194
|
e@0
|
195 rms = features_vector[feature_captions.index('rms'), :]
|
e@0
|
196
|
e@0
|
197 # Implementing Hysteresis Gate -- High threshold is halfway between
|
e@0
|
198 # the mean and the max and Low is halfway between the mean dn the min
|
e@0
|
199
|
e@0
|
200 rms_threshold_mean = mean(rms)
|
e@0
|
201
|
e@0
|
202 rms_threshold_max = max(rms)
|
e@0
|
203 rms_threshold_min = min(rms)
|
e@0
|
204
|
e@0
|
205 rms_threshold_high = 0.1 * rms_threshold_mean
|
e@0
|
206 rms_threshold_low = 0.01 * rms_threshold_mean
|
e@0
|
207
|
e@0
|
208 for n in range(1, len(rms)):
|
e@0
|
209 prev = rms[n-1]
|
e@0
|
210 curr = rms[n]
|
e@0
|
211
|
e@0
|
212 if prev >= rms_threshold_high:
|
e@0
|
213 if curr < rms_threshold_low:
|
e@0
|
214 silent_parts[0,n] = 1
|
e@0
|
215 else:
|
e@0
|
216 silent_parts[0,n] = 0
|
e@0
|
217 elif prev <= rms_threshold_low:
|
e@0
|
218 if curr > rms_threshold_high:
|
e@0
|
219 silent_parts[0,n] = 0
|
e@0
|
220 else:
|
e@0
|
221 silent_parts[0,n] = 1
|
e@0
|
222 else:
|
e@0
|
223 silent_parts[0,n] = silent_parts[0,n-1]
|
e@0
|
224
|
e@0
|
225
|
e@0
|
226 if silent_parts[0,1] == 1:
|
e@0
|
227 silent_parts[0, 0] = 1
|
e@0
|
228
|
e@0
|
229
|
e@0
|
230
|
e@0
|
231 active_index = invert(silent_parts.flatten().astype(bool))
|
e@0
|
232
|
e@0
|
233 # Keep only active parts
|
e@0
|
234
|
e@0
|
235 # Uncomment this
|
e@0
|
236 features_vector = features_vector[:, active_index]
|
e@0
|
237
|
e@0
|
238 moments_vector = zeros((features_vector.shape[0], 2))
|
e@0
|
239
|
e@0
|
240 print "[II] Storing moments vector"
|
e@0
|
241 for i in range(0, features_vector.shape[0]):
|
e@0
|
242 mean_ = mean(features_vector[i,:])
|
e@0
|
243 std_ = std(features_vector[i,:], ddof=1)
|
e@0
|
244 moments_vector[i,0] = mean_
|
e@0
|
245 moments_vector[i,1] = std_
|
e@0
|
246
|
e@0
|
247 features_vector[i,:] = (features_vector[i,:] - mean_)/std_
|
e@0
|
248
|
e@0
|
249 features_vector_original = features_vector
|
e@0
|
250
|
e@0
|
251
|
e@0
|
252 print "[II] Extracting PCA configuration "
|
e@0
|
253
|
e@0
|
254 kernel, q, featurelist = extract_pca_configuration_from_data(features_vector)
|
e@0
|
255
|
e@0
|
256 print "[II] Optimal number of PCs to keep: %d" % q
|
e@0
|
257
|
e@0
|
258 feature_captions_array = array(feature_captions)
|
e@0
|
259
|
e@0
|
260 features_to_keep = list(feature_captions_array[featurelist])
|
e@0
|
261 print "[II] Decided to keep %d features:" % len(features_to_keep)
|
e@0
|
262 print str(features_to_keep).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7]
|
e@0
|
263
|
e@0
|
264
|
e@0
|
265 features_kept_data = features_vector[featurelist,:]
|
e@0
|
266
|
e@0
|
267 features_vector = (kernel.T*features_kept_data)[0:q,:]
|
e@0
|
268
|
e@0
|
269 parameters_k_means = KMeans(init='k-means++', n_init=10, max_iter=300, tol=0.0000001, verbose = 0)
|
e@0
|
270
|
e@0
|
271 print "[II] Trying ADM-coded parameters"
|
e@0
|
272 print "[II] Upsampling features and parameters by a factor of %d" % UpsamplingFactor
|
e@0
|
273
|
e@0
|
274
|
e@0
|
275 # Upsampled features and parameters
|
e@0
|
276 features_vector_upsampled = smooth_matrix_1D(repeat(features_vector,UpsamplingFactor, axis=1))
|
e@0
|
277
|
e@0
|
278 # feature_labels_upsampled = repeat(features_clustering_labels,UpsamplingFactor, axis=0)
|
e@0
|
279 parameters_vector_upsampled = repeat(parameters_vector,UpsamplingFactor, axis=1)
|
e@0
|
280
|
e@0
|
281 # parameters_vector_upsampled = smooth_matrix_1D(parameters_vector_upsampled)
|
e@0
|
282
|
e@0
|
283 parameters_vector_upsampled_adm = matrix(zeros(shape(parameters_vector_upsampled)))
|
e@0
|
284 parameters_vector_upsampled_code = matrix(zeros(shape(parameters_vector_upsampled)))
|
e@0
|
285 parameters_vector_upsampled_firstval = matrix(zeros((parameters_vector_upsampled.shape[0],1)))
|
e@0
|
286
|
e@0
|
287 # Reconstructed parameters
|
e@0
|
288
|
e@0
|
289 parameters_vector_upsampled_reconstructed = matrix(zeros(shape(parameters_vector_upsampled)))
|
e@0
|
290
|
e@0
|
291
|
e@0
|
292
|
e@0
|
293
|
e@0
|
294 def adm_matrix(X, dmin=0.001,dmax=0.28,tol=0.001):
|
e@0
|
295
|
e@0
|
296 out = matrix(zeros(shape(X)))
|
e@0
|
297 code = matrix(zeros(shape(X)))
|
e@0
|
298 firstval = matrix(zeros((X.shape[0], 1)))
|
e@0
|
299
|
e@0
|
300 for i in range(0, X.shape[0]):
|
e@0
|
301 out[i,:], code[i,:], firstval[i,0] = adm(X[i,:],dmin=dmin,dmax=dmax,tol=tol)
|
e@0
|
302
|
e@0
|
303 return out,code,firstval
|
e@0
|
304
|
e@0
|
305 # parameters_vector_upsampled_reconstructed[i,:] = adm_reconstruct(parameters_vector_upsampled_code[i,:],parameters_vector_upsampled_firstval[i,0], dmin=dmin,dmax=dmax)
|
e@0
|
306
|
e@0
|
307 def adm_matrix_reconstruct(code, firstval, dmin=0.001, dmax=0.28):
|
e@0
|
308 X = matrix(zeros(shape(code)))
|
e@0
|
309 for i in range(0, code.shape[0]):
|
e@0
|
310 X[i,:] = adm_reconstruct(code[i,:], firstval[i,0], dmin=dmin, dmax=dmax)
|
e@0
|
311
|
e@0
|
312 return X
|
e@0
|
313
|
e@0
|
314
|
e@0
|
315 parameters_vector_upsampled_adm, parameters_vector_upsampled_code, parameters_vector_upsampled_firstval = adm_matrix(parameters_vector_upsampled, dmin, dmax, tol)
|
e@0
|
316
|
e@0
|
317
|
e@0
|
318 def diff_and_pad(X):
|
e@0
|
319 return concatenate((
|
e@0
|
320 zeros((
|
e@0
|
321 shape(X)[0],
|
e@0
|
322 1
|
e@0
|
323 )),
|
e@0
|
324 diff(X, axis=1)),
|
e@0
|
325 axis=1)
|
e@0
|
326
|
e@0
|
327
|
e@0
|
328 print "[II] Clustering features."
|
e@0
|
329 #
|
e@0
|
330 features_clustering = GMM(n_components = n_clusters, covariance_type='diag')
|
e@0
|
331 #
|
e@0
|
332 features_clustering.fit( features_vector_upsampled.T, y=parameters_vector_upsampled_code)
|
e@0
|
333 #
|
e@0
|
334 features_clustering_means = features_clustering.means_
|
e@0
|
335 features_clustering_labels = features_clustering.predict(features_vector_upsampled.T)
|
e@0
|
336 features_clustering_sigmas = features_clustering.covars_
|
e@0
|
337 #
|
e@0
|
338 features_vector_upsampled_estimated = zeros(shape(features_vector_upsampled))
|
e@0
|
339 #
|
e@0
|
340 #
|
e@0
|
341 for n in range(0, len(features_vector_upsampled_estimated[0])):
|
e@0
|
342 features_vector_upsampled_estimated[:,n] = features_clustering_means[features_clustering_labels[n]]
|
e@0
|
343 #
|
e@0
|
344 #
|
e@0
|
345 print "[II] Features MSE for %d clusters: %.3f" % (n_clusters, mse(features_vector_upsampled, features_vector_upsampled_estimated))
|
e@0
|
346
|
e@0
|
347
|
e@0
|
348
|
e@0
|
349 def happy_curve_classification(data, classes, estimator, Nd=1):
|
e@0
|
350 print "[II] Generating Happy Curve "
|
e@0
|
351 from copy import deepcopy
|
e@0
|
352 estimator_fulldata = deepcopy(estimator)
|
e@0
|
353 estimator_fulldata.fit(data, classes)
|
e@0
|
354 labels = estimator.predict(data)
|
e@0
|
355
|
e@0
|
356 # 1. Split data in two, training and testing
|
e@0
|
357
|
e@0
|
358 Ntr = int(round(data.shape[0]/2)) # Training dataset size
|
e@0
|
359 Nts = data.shape[0] - Ntr # Testing dataset size
|
e@0
|
360
|
e@0
|
361 ratios = []
|
e@0
|
362
|
e@0
|
363 for n in range(Nd, Ntr):
|
e@0
|
364 train, test, trainl, testl = cross_validation.train_test_split(data, classes, test_size = 0.5, random_state = 0)
|
e@0
|
365 train = train[0:n,:]
|
e@0
|
366 trainl = trainl[0:n]
|
e@0
|
367 # print "trainl"
|
e@0
|
368 # print trainl
|
e@0
|
369 estimator_ = deepcopy(estimator)
|
e@0
|
370 estimator_.fit(train,trainl)
|
e@0
|
371 labels = estimator_.predict(test)
|
e@0
|
372
|
e@0
|
373 ratio = sum(array(testl==labels).astype(float))/len(labels)
|
e@0
|
374
|
e@0
|
375 ratios.append(ratio)
|
e@0
|
376
|
e@0
|
377
|
e@0
|
378 return ratios
|
e@0
|
379
|
e@0
|
380
|
e@0
|
381 def cross_validate_classification(data, classes, estimator):
|
e@0
|
382 print "[II] Crossvalidating... "
|
e@0
|
383 from copy import deepcopy
|
e@0
|
384 estimator_fulldata = deepcopy(estimator)
|
e@0
|
385 estimator_fulldata.fit(data, classes)
|
e@0
|
386
|
e@0
|
387 percents = arange(0.1,0.9,0.1)
|
e@0
|
388 MSEs = []
|
e@0
|
389 labels = estimator.predict(data)
|
e@0
|
390
|
e@0
|
391 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels))
|
e@0
|
392
|
e@0
|
393 for p in percents:
|
e@0
|
394 train,test,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=0)
|
e@0
|
395 estimator_ = deepcopy(estimator)
|
e@0
|
396 estimator_.fit(train, trainlabels)
|
e@0
|
397 labels = estimator_.predict(test)
|
e@0
|
398 print "[II] for training(%.2f)-testing(%.2f): %.2f" % ((1-p),p,sum(array(testlabels==labels).astype(float))/len(labels))
|
e@0
|
399
|
e@0
|
400 return MSEs
|
e@0
|
401
|
e@0
|
402 def cross_validate_clustering(data, estimator):
|
e@0
|
403 print "[II] Crossvalidating... "
|
e@0
|
404 estimator_fulldata = estimator
|
e@0
|
405 estimator_fulldata.fit(data)
|
e@0
|
406
|
e@0
|
407 # labels = estimator_fulldata.predict(data)
|
e@0
|
408 means = estimator_fulldata.means_
|
e@0
|
409 # print means
|
e@0
|
410
|
e@0
|
411 percents = arange(0.1,0.6,0.1)
|
e@0
|
412 MSEs = []
|
e@0
|
413 reconstructed = zeros(shape(data))
|
e@0
|
414 labels = estimator.predict(data)
|
e@0
|
415 for n in range(0, len(reconstructed)):
|
e@0
|
416 reconstructed[n,:] = means[labels[n]]
|
e@0
|
417
|
e@0
|
418 MSEs.append(mse(data,reconstructed))
|
e@0
|
419 for p in percents:
|
e@0
|
420 train,test = cross_validation.train_test_split(data,test_size=p,random_state=0)
|
e@0
|
421 train = matrix(train)
|
e@0
|
422 test = matrix(test)
|
e@0
|
423
|
e@0
|
424 estimator.fit(train)
|
e@0
|
425 means = estimator.means_
|
e@0
|
426 labels = estimator.predict(test)
|
e@0
|
427 reconstructed = zeros(shape(test))
|
e@0
|
428 for n in range(0, len(reconstructed)):
|
e@0
|
429 reconstructed[n,:] = means[labels[n]]
|
e@0
|
430
|
e@0
|
431 m = mse(test,reconstructed)
|
e@0
|
432
|
e@0
|
433 print "[II] MSE for clustering crossvalidated data %.2f-%.2f: %.5f" % ((1-p), p, m)
|
e@0
|
434 MSEs.append(m)
|
e@0
|
435
|
e@0
|
436 print "[II] Crossvalidation complete"
|
e@0
|
437
|
e@0
|
438 return MSEs
|
e@0
|
439
|
e@0
|
440
|
e@0
|
441
|
e@0
|
442
|
e@0
|
443 # Construct parameters alphabet, each symbol is going to be a different column vector
|
e@0
|
444 # in parameter code matrix
|
e@0
|
445
|
e@0
|
446
|
e@0
|
447 def vector_to_states(X):
|
e@0
|
448 """
|
e@0
|
449 Input: a vector MxN with N samples and M variables
|
e@0
|
450 Output: a codeword dictionary `parameters_alphabet',
|
e@0
|
451 state_seq, inverse `parameters_alphabet_inv' """
|
e@0
|
452
|
e@0
|
453
|
e@0
|
454 parameters_alphabet = {}
|
e@0
|
455 n = 0
|
e@0
|
456
|
e@0
|
457 for i in range(0, X.shape[1]):
|
e@0
|
458 vec = tuple(ravel(X[:,i]))
|
e@0
|
459 if vec not in parameters_alphabet:
|
e@0
|
460 parameters_alphabet[vec] = n
|
e@0
|
461 n += 1
|
e@0
|
462
|
e@0
|
463 parameters_alphabet_inv = dict([(parameters_alphabet[m],m) for m in parameters_alphabet])
|
e@0
|
464
|
e@0
|
465 state_seq = array([parameters_alphabet[tuple(ravel(X[:,m]))] for m in range(0, parameters_vector_upsampled_code.shape[1])] )
|
e@0
|
466
|
e@0
|
467 return state_seq, parameters_alphabet, parameters_alphabet_inv
|
e@0
|
468
|
e@0
|
469
|
e@0
|
470 def states_to_vector(predicted, parameters_alphabet_inv):
|
e@0
|
471 estimated = matrix(zeros((len(parameters_alphabet_inv[0]), len(predicted))))
|
e@0
|
472 for i in range(0, len(state_seq)):
|
e@0
|
473 estimated[:, i] = matrix(parameters_alphabet_inv[predicted[i]]).T
|
e@0
|
474
|
e@0
|
475 return estimated
|
e@0
|
476
|
e@0
|
477 state_seq, parameters_alphabet, parameters_alphabet_inv = vector_to_states(parameters_vector_upsampled_code)
|
e@0
|
478
|
e@0
|
479
|
e@0
|
480 parameters_change_variable = matrix(diff_and_pad(parameters_vector_upsampled)!=0).astype(int)
|
e@0
|
481
|
e@0
|
482 changes_state_seq, changes_parameters_alphabet, changes_parameters_alphabet_inv = vector_to_states(parameters_change_variable)
|
e@0
|
483
|
e@0
|
484
|
e@0
|
485 # This is an hmm that just codes the changes"
|
e@0
|
486 # We have only two states, change and stay the same.
|
e@0
|
487
|
e@0
|
488 # Uncomment that here
|
e@0
|
489
|
e@0
|
490 # parameters_vector_upsampled = parameters_vector_upsampled_code
|
e@0
|
491 # parameters_state_orig, parameter_state_alphabet_orig, parameter_state_alphabet_inv_orig = vector_to_states(parameters_vector)
|
e@0
|
492
|
e@0
|
493 parameters_state, parameter_state_alphabet, parameter_state_alphabet_inv = vector_to_states(parameters_vector_upsampled)
|
e@0
|
494
|
e@0
|
495
|
e@0
|
496 print "[II] Testing Gaussian Naive Bayes Classifier"
|
e@0
|
497 gnb = GaussianNB()
|
e@0
|
498 gnb.fit(features_vector_upsampled.T, parameters_state)
|
e@0
|
499
|
e@0
|
500 parameters_state_estimated = gnb.predict(features_vector_upsampled.T)
|
e@0
|
501
|
e@0
|
502 output = states_to_vector(parameters_state_estimated, parameter_state_alphabet_inv)
|
e@0
|
503
|
e@0
|
504 figure()
|
e@0
|
505 subplot(211)
|
e@0
|
506 plot(parameters_vector_upsampled.T)
|
e@0
|
507 title('Parameter value upsampled by a factor of %d' % UpsamplingFactor)
|
e@0
|
508 ylabel('value')
|
e@0
|
509 xlabel('frame #')
|
e@0
|
510 subplot(212)
|
e@0
|
511 #plot(smooth_matrix_1D(output).T)
|
e@0
|
512 plot(output.T)
|
e@0
|
513 ylabel('value')
|
e@0
|
514 xlabel('frame #')
|
e@0
|
515 #cross_validate_classification(features_vector_upsampled.T, parameters_state, gnb)
|
e@0
|
516 # hc = happy_curve_classification(features_vector_upsampled.T, parameters_state, gnb)
|
e@0
|
517
|
e@0
|
518 #
|
e@0
|
519 # figure()
|
e@0
|
520 # plot(hc)
|
e@0
|
521 # figure()
|
e@0
|
522
|
e@0
|
523 print "[II] Trying Multinomial HMM"
|
e@0
|
524
|
e@0
|
525 # In order to do classification with HMMs, we need to:
|
e@0
|
526 # 1. Split the parameters into classes
|
e@0
|
527 # 2. Train one model per class
|
e@0
|
528 # 3. Feed our data to all the models
|
e@0
|
529 # 4. Check which has a better score and assig,n to M
|
e@0
|
530
|
e@0
|
531
|
e@0
|
532 class HmmClassifier:
|
e@0
|
533 def __init__(self, N=2, n_components = 1):
|
e@0
|
534 self.n_components = n_components
|
e@0
|
535 self.chain_size = N
|
e@0
|
536 self.hmms_ = []
|
e@0
|
537 self.N = N
|
e@0
|
538
|
e@0
|
539 def fit(self, X, states):
|
e@0
|
540 self.n_states = len(unique(states))
|
e@0
|
541
|
e@0
|
542 for n in range(0, self.n_states):
|
e@0
|
543 hmm_ = hmm.GaussianHMM(n_components = self.n_components, covariance_type = 'full')
|
e@0
|
544
|
e@0
|
545 # Get training data for each class
|
e@0
|
546 vector = X[states == n,:]
|
e@0
|
547
|
e@0
|
548 # Fit the HMM
|
e@0
|
549 # print vector
|
e@0
|
550 hmm_.fit([vector])
|
e@0
|
551
|
e@0
|
552 # And append to the list
|
e@0
|
553 self.hmms_.append(hmm_)
|
e@0
|
554
|
e@0
|
555 def predict(self,X):
|
e@0
|
556 labels = zeros((X.shape[0],))
|
e@0
|
557 N = self.N
|
e@0
|
558
|
e@0
|
559 m = 0
|
e@0
|
560
|
e@0
|
561 scores = zeros((1, self.n_states))
|
e@0
|
562
|
e@0
|
563
|
e@0
|
564 while m*N < X.shape[0]:
|
e@0
|
565 if (m+1)*N > X.shape[0]:
|
e@0
|
566 testdata = X[m*N:,:]
|
e@0
|
567 else:
|
e@0
|
568 testdata = X[m*N:(m+1)*N,:]
|
e@0
|
569
|
e@0
|
570 # print testdata
|
e@0
|
571
|
e@0
|
572 for i in range(0, self.n_states):
|
e@0
|
573 scores[0,i] = self.hmms_[i].score(testdata)
|
e@0
|
574
|
e@0
|
575 if (m+1)*N > X.shape[0]:
|
e@0
|
576 labels[m*N:] = argmax(scores)
|
e@0
|
577 else:
|
e@0
|
578 labels[m*N:(m+1)*N] = argmax(scores)
|
e@0
|
579
|
e@0
|
580 m+= 1
|
e@0
|
581
|
e@0
|
582 return labels
|
e@0
|
583
|
e@0
|
584 N = 2
|
e@0
|
585 n_components = 1
|
e@0
|
586
|
e@0
|
587 hmmc = HmmClassifier(N = N, n_components = n_components)
|
e@0
|
588 hmmc.fit(features_vector_upsampled.T, parameters_state)
|
e@0
|
589
|
e@0
|
590
|
e@0
|
591 # hchmmc = happy_curve_classification(features_vector_upsampled.T, parameters_state, hmmc, Nd=100)
|
e@0
|
592 # cross_validate_classification(features_vector_upsampled.T, parameters_state, hmmc)
|
e@0
|
593
|
e@0
|
594
|
e@0
|
595
|
e@0
|
596
|
e@0
|
597
|
e@0
|
598
|
e@0
|
599 # hmms_ = []
|
e@0
|
600 #
|
e@0
|
601 # for n in range(0, len(parameter_state_alphabet)):
|
e@0
|
602 # #hmm_ = hmm.GMMHMM(n_components = 1, n_mix = 2)
|
e@0
|
603 # hmm_ = hmm.GaussianHMM(n_components = 1,covariance_type = 'full')
|
e@0
|
604 #
|
e@0
|
605 # # Get training data for each class
|
e@0
|
606 # vector = features_vector_upsampled[:,parameters_state == n]
|
e@0
|
607 #
|
e@0
|
608 # #if vector.shape[1] < n_clusters:
|
e@0
|
609 # # hmms_.append(None)
|
e@0
|
610 # #else:
|
e@0
|
611 #
|
e@0
|
612 # hmm_.fit([vector.T])
|
e@0
|
613 #
|
e@0
|
614 # # Append to the list
|
e@0
|
615 #
|
e@0
|
616 # hmms_.append(hmm_)
|
e@0
|
617 #
|
e@0
|
618 # labels = zeros((features_vector_upsampled.shape[1],))
|
e@0
|
619 #
|
e@0
|
620 # N = 20
|
e@0
|
621 # m = 0
|
e@0
|
622 #
|
e@0
|
623 # while m*N < features_vector_upsampled.shape[1]:
|
e@0
|
624 #
|
e@0
|
625 # scores = zeros((1, len(parameter_state_alphabet)))
|
e@0
|
626 #
|
e@0
|
627 # if (m+1)*N > features_vector_upsampled.shape[1]:
|
e@0
|
628 # testdata = features_vector_upsampled[:,m*N:]
|
e@0
|
629 # else:
|
e@0
|
630 # testdata = features_vector_upsampled[:,m*N:(m+1)*N]
|
e@0
|
631 #
|
e@0
|
632 # for i in range(0, len(parameter_state_alphabet)):
|
e@0
|
633 # if hmms_[i] is not None:
|
e@0
|
634 # scores[0,i] = hmms_[i].score(testdata.T)
|
e@0
|
635 # else:
|
e@0
|
636 # scores[0,i] = -100000 # Very large negative score
|
e@0
|
637 # if (m+1)*N >= features_vector_upsampled.shape[1]:
|
e@0
|
638 # labels[m*N:] = argmax(scores)
|
e@0
|
639 # else:
|
e@0
|
640 # labels[m*N:(m+1)*N] = argmax(scores)
|
e@0
|
641 #
|
e@0
|
642 # m += 1
|
e@0
|
643
|
e@0
|
644
|
e@0
|
645 # figure()
|
e@0
|
646 #plot(labels.T)
|
e@0
|
647
|
e@0
|
648
|
e@0
|
649 labels = hmmc.predict(features_vector_upsampled.T)
|
e@0
|
650 estimated = states_to_vector(labels,parameter_state_alphabet_inv)
|
e@0
|
651 plot(estimated.T,'r--')
|
e@0
|
652
|
e@0
|
653 title('Estimated parameter values')
|
e@0
|
654 legend(['Naive Bayes Classifier', 'HMM chain size %d (%.1fms)' % (N, float(N)/UpsamplingFactor*23.0)])
|
e@0
|
655
|
e@0
|
656 ylabel('value')
|
e@0
|
657 xlabel('frame #')
|
e@0
|
658
|
e@0
|
659
|
e@0
|
660 close('all')
|
e@0
|
661
|
e@0
|
662 plot(features_clustering_labels/float(max(features_clustering_labels)))
|
e@0
|
663 plot(parameters_vector_upsampled.T/max(ravel(parameters_vector_upsampled)))
|
e@0
|
664
|
e@0
|
665
|
e@0
|
666 def plot_clusters(x, labels):
|
e@0
|
667 figure()
|
e@0
|
668 symbols = ['>', 'x', '.', '<','v']
|
e@0
|
669 colors = ['b', 'r', 'g', 'm','c']
|
e@0
|
670
|
e@0
|
671 for r in range(0, x.shape[0]):
|
e@0
|
672 scatter(x[r,0], x[r,1], c=colors[int(labels[r]) % len(colors)], marker=symbols[int(labels[r]) % len(symbols)])
|
e@0
|
673
|
e@0
|
674
|
e@0
|
675 plot_clusters(features_vector_upsampled.T, parameters_state)
|
e@0
|
676
|
e@0
|
677
|
e@0
|
678 # SVM
|
e@0
|
679
|
e@0
|
680 X = features_vector_upsampled.T
|
e@0
|
681 y = parameters_state
|
e@0
|
682
|
e@0
|
683 clf = svm.SVC()
|
e@0
|
684 clf.fit(X,y)
|
e@0
|
685
|
e@0
|
686 parameters_state_y = clf.predict(X)
|
e@0
|
687 #plot(parameters) |