Mercurial > hg > chourdakisreiss2016
comparison experiment-reverb/code/train.py @ 0:246d5546657c
initial commit, needs cleanup
author | Emmanouil Theofanis Chourdakis <e.t.chourdakis@qmul.ac.uk> |
---|---|
date | Wed, 14 Dec 2016 13:15:48 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:246d5546657c |
---|---|
1 #!/usr/bin/python2 | |
2 # -*- coding: utf-8 -*- | |
3 """ | |
4 Created on Thu Apr 23 11:53:17 2015 | |
5 | |
6 @author: mmxgn | |
7 """ | |
8 | |
9 # This file does the cluster estimation and the removal of outliers | |
10 | |
11 from sys import argv, exit | |
12 from essentia.standard import YamlInput, YamlOutput | |
13 from essentia import Pool | |
14 from pca import * | |
15 | |
16 from numpy import * | |
17 from sklearn import cluster | |
18 from sklearn.metrics import pairwise_distances | |
19 from sklearn.cluster import KMeans, MiniBatchKMeans | |
20 from matplotlib.pyplot import * | |
21 #from sklearn.mixture import GMM | |
22 from sklearn.naive_bayes import GaussianNB, MultinomialNB | |
23 from scipy.signal import decimate | |
24 from sklearn import cross_validation | |
25 | |
26 #from hmmlearn import hmm | |
27 from hmmlearn.hmm import GMM | |
28 from hmmlearn import hmm | |
29 | |
30 from sklearn import svm | |
31 #from adpcm import adm, adm_reconstruct | |
32 | |
33 | |
34 mse = lambda A,B: ((array(A)-array(B)) ** 2).mean() | |
35 | |
36 | |
37 def smooth_matrix_1D(X): | |
38 window = scipy.signal.gaussian(51,4) | |
39 window = window/sum(window) | |
40 intermx = zeros((X.shape[0],X.shape[1]+100)) | |
41 intermx[:, 50:-50] = X | |
42 | |
43 for m in range(0, X.shape[0]): | |
44 # print intermx.shape | |
45 intermx[m,:] = convolve(ravel(intermx[m,:]), window,'same') | |
46 | |
47 return intermx[:,50:-50] | |
48 | |
49 def adm_reconstruct(codeword, h, dmin=.01, dmax=.28): | |
50 x = zeros((1, codeword.shape[1])) | |
51 | |
52 delta1 = dmin | |
53 delta2 = dmin | |
54 Sum = h | |
55 | |
56 x[0] = h | |
57 for i in range(0, codeword.shape[1]): | |
58 if codeword[0,i] == 0: | |
59 delta1 = dmin | |
60 delta2 = dmin | |
61 | |
62 elif codeword[0,i] == 1: | |
63 delta2 = dmin | |
64 Sum += delta1 | |
65 delta1 *= 2 | |
66 if delta1 > dmax: | |
67 delta1 = dmax | |
68 | |
69 elif codeword[0,i] == -1: | |
70 delta1 = dmin | |
71 Sum -= delta2 | |
72 delta2 *= 2 | |
73 if delta2 > dmax: | |
74 delta2 = dmax | |
75 x[0,i] = Sum | |
76 return x | |
77 | |
78 def adm(x, dmin=.01, dmax=.28, tol=0.0001): | |
79 | |
80 # Adaptive delta modulation adapted by code: | |
81 # (adeltamod.m) | |
82 # | |
83 # Adaptive Delta Modulator | |
84 # by Gandhar Desai (gdesai) | |
85 # BITS Pilani Goa Campus | |
86 # Date: 28 Sept, 2013 | |
87 | |
88 xsig = x | |
89 | |
90 Lx = len(x) | |
91 | |
92 ADMout = zeros((1, Lx)) | |
93 codevec = zeros((1, Lx)) | |
94 | |
95 | |
96 Sum = x[0] | |
97 delta1 = dmin | |
98 delta2 = dmin | |
99 mult1 = 2 | |
100 mult2 = 2 | |
101 for i in range(0, Lx): | |
102 #print abs(xsig[i] - Sum) | |
103 if (abs(xsig[i] - Sum) < tol): | |
104 bit = 0 | |
105 delta2 = dmin | |
106 delta1 = dmin | |
107 | |
108 | |
109 elif (xsig[i] >= Sum): | |
110 bit = 1 | |
111 delta2 = dmin | |
112 Sum += delta1 | |
113 delta1 *= mult1 | |
114 if delta1 > dmax: | |
115 delta1 = dmax | |
116 | |
117 | |
118 else: | |
119 bit = -1 | |
120 delta1 = dmin | |
121 Sum -= delta2 | |
122 delta2 *= mult2 | |
123 if delta2 > dmax: | |
124 delta2 = dmax | |
125 | |
126 | |
127 | |
128 ADMout[0, i] = Sum | |
129 codevec[0, i]= bit | |
130 | |
131 return ADMout,codevec, x[0] | |
132 | |
133 if __name__=="__main__": | |
134 if len(argv) != 2: | |
135 print "[EE] Wrong number of arguments" | |
136 print "[II] Correct syntax is:" | |
137 print "[II] \t%s <training_file>" | |
138 print "[II] where <training_file> is a .yaml file containing the" | |
139 print "[II] features of the dataset (try output2_stage/fulltraining-last.yaml)" | |
140 exit(-1) | |
141 | |
142 | |
143 n_clusters = 4 | |
144 UpsamplingFactor = 1 | |
145 dmin = 0.001 | |
146 dmax = 0.28 | |
147 tol = 0.001 | |
148 | |
149 infile = argv[1] | |
150 | |
151 features_pool = YamlInput(filename = infile)() | |
152 | |
153 | |
154 | |
155 feature_captions = features_pool.descriptorNames() | |
156 parameter_captions = [] | |
157 | |
158 | |
159 for c in features_pool.descriptorNames(): | |
160 if c.split('.')[0] == 'parameter': | |
161 parameter_captions.append(c) | |
162 if c.split('.')[0] == 'metadata' or c.split('.')[0] == 'parameter': | |
163 feature_captions.remove(c) | |
164 | |
165 | |
166 | |
167 # close('all') | |
168 | |
169 print "[II] Loaded training data from %s (%s) " % (infile, features_pool['metadata.date'][0]) | |
170 print "[II] %d Features Available: " % len(feature_captions) | |
171 | |
172 | |
173 | |
174 print str(feature_captions).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7] | |
175 | |
176 nfeatures_in = len(feature_captions) | |
177 nparameters_in = len(parameter_captions) | |
178 features_vector = zeros((nfeatures_in, len(features_pool[feature_captions[0]]))) | |
179 | |
180 parameters_vector = zeros((nparameters_in, len(features_pool[parameter_captions[0]]))) | |
181 | |
182 | |
183 for i in range(0, nfeatures_in): | |
184 features_vector[i, :] = features_pool[feature_captions[i]].T | |
185 for i in range(0, nparameters_in): | |
186 parameters_vector[i, :] = features_pool[parameter_captions[0]].T | |
187 | |
188 print "[II] %d parameters used:" % len(parameter_captions) | |
189 print str(parameter_captions).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7].replace('parameter.','') | |
190 | |
191 print "[II] Marking silent parts" | |
192 | |
193 silent_parts = zeros((1, len(features_pool[feature_captions[i]].T))) | |
194 | |
195 rms = features_vector[feature_captions.index('rms'), :] | |
196 | |
197 # Implementing Hysteresis Gate -- High threshold is halfway between | |
198 # the mean and the max and Low is halfway between the mean dn the min | |
199 | |
200 rms_threshold_mean = mean(rms) | |
201 | |
202 rms_threshold_max = max(rms) | |
203 rms_threshold_min = min(rms) | |
204 | |
205 rms_threshold_high = 0.1 * rms_threshold_mean | |
206 rms_threshold_low = 0.01 * rms_threshold_mean | |
207 | |
208 for n in range(1, len(rms)): | |
209 prev = rms[n-1] | |
210 curr = rms[n] | |
211 | |
212 if prev >= rms_threshold_high: | |
213 if curr < rms_threshold_low: | |
214 silent_parts[0,n] = 1 | |
215 else: | |
216 silent_parts[0,n] = 0 | |
217 elif prev <= rms_threshold_low: | |
218 if curr > rms_threshold_high: | |
219 silent_parts[0,n] = 0 | |
220 else: | |
221 silent_parts[0,n] = 1 | |
222 else: | |
223 silent_parts[0,n] = silent_parts[0,n-1] | |
224 | |
225 | |
226 if silent_parts[0,1] == 1: | |
227 silent_parts[0, 0] = 1 | |
228 | |
229 | |
230 | |
231 active_index = invert(silent_parts.flatten().astype(bool)) | |
232 | |
233 # Keep only active parts | |
234 | |
235 # Uncomment this | |
236 features_vector = features_vector[:, active_index] | |
237 | |
238 moments_vector = zeros((features_vector.shape[0], 2)) | |
239 | |
240 print "[II] Storing moments vector" | |
241 for i in range(0, features_vector.shape[0]): | |
242 mean_ = mean(features_vector[i,:]) | |
243 std_ = std(features_vector[i,:], ddof=1) | |
244 moments_vector[i,0] = mean_ | |
245 moments_vector[i,1] = std_ | |
246 | |
247 features_vector[i,:] = (features_vector[i,:] - mean_)/std_ | |
248 | |
249 features_vector_original = features_vector | |
250 | |
251 | |
252 print "[II] Extracting PCA configuration " | |
253 | |
254 kernel, q, featurelist = extract_pca_configuration_from_data(features_vector) | |
255 | |
256 print "[II] Optimal number of PCs to keep: %d" % q | |
257 | |
258 feature_captions_array = array(feature_captions) | |
259 | |
260 features_to_keep = list(feature_captions_array[featurelist]) | |
261 print "[II] Decided to keep %d features:" % len(features_to_keep) | |
262 print str(features_to_keep).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7] | |
263 | |
264 | |
265 features_kept_data = features_vector[featurelist,:] | |
266 | |
267 features_vector = (kernel.T*features_kept_data)[0:q,:] | |
268 | |
269 parameters_k_means = KMeans(init='k-means++', n_init=10, max_iter=300, tol=0.0000001, verbose = 0) | |
270 | |
271 print "[II] Trying ADM-coded parameters" | |
272 print "[II] Upsampling features and parameters by a factor of %d" % UpsamplingFactor | |
273 | |
274 | |
275 # Upsampled features and parameters | |
276 features_vector_upsampled = smooth_matrix_1D(repeat(features_vector,UpsamplingFactor, axis=1)) | |
277 | |
278 # feature_labels_upsampled = repeat(features_clustering_labels,UpsamplingFactor, axis=0) | |
279 parameters_vector_upsampled = repeat(parameters_vector,UpsamplingFactor, axis=1) | |
280 | |
281 # parameters_vector_upsampled = smooth_matrix_1D(parameters_vector_upsampled) | |
282 | |
283 parameters_vector_upsampled_adm = matrix(zeros(shape(parameters_vector_upsampled))) | |
284 parameters_vector_upsampled_code = matrix(zeros(shape(parameters_vector_upsampled))) | |
285 parameters_vector_upsampled_firstval = matrix(zeros((parameters_vector_upsampled.shape[0],1))) | |
286 | |
287 # Reconstructed parameters | |
288 | |
289 parameters_vector_upsampled_reconstructed = matrix(zeros(shape(parameters_vector_upsampled))) | |
290 | |
291 | |
292 | |
293 | |
294 def adm_matrix(X, dmin=0.001,dmax=0.28,tol=0.001): | |
295 | |
296 out = matrix(zeros(shape(X))) | |
297 code = matrix(zeros(shape(X))) | |
298 firstval = matrix(zeros((X.shape[0], 1))) | |
299 | |
300 for i in range(0, X.shape[0]): | |
301 out[i,:], code[i,:], firstval[i,0] = adm(X[i,:],dmin=dmin,dmax=dmax,tol=tol) | |
302 | |
303 return out,code,firstval | |
304 | |
305 # parameters_vector_upsampled_reconstructed[i,:] = adm_reconstruct(parameters_vector_upsampled_code[i,:],parameters_vector_upsampled_firstval[i,0], dmin=dmin,dmax=dmax) | |
306 | |
307 def adm_matrix_reconstruct(code, firstval, dmin=0.001, dmax=0.28): | |
308 X = matrix(zeros(shape(code))) | |
309 for i in range(0, code.shape[0]): | |
310 X[i,:] = adm_reconstruct(code[i,:], firstval[i,0], dmin=dmin, dmax=dmax) | |
311 | |
312 return X | |
313 | |
314 | |
315 parameters_vector_upsampled_adm, parameters_vector_upsampled_code, parameters_vector_upsampled_firstval = adm_matrix(parameters_vector_upsampled, dmin, dmax, tol) | |
316 | |
317 | |
318 def diff_and_pad(X): | |
319 return concatenate(( | |
320 zeros(( | |
321 shape(X)[0], | |
322 1 | |
323 )), | |
324 diff(X, axis=1)), | |
325 axis=1) | |
326 | |
327 | |
328 print "[II] Clustering features." | |
329 # | |
330 features_clustering = GMM(n_components = n_clusters, covariance_type='diag') | |
331 # | |
332 features_clustering.fit( features_vector_upsampled.T, y=parameters_vector_upsampled_code) | |
333 # | |
334 features_clustering_means = features_clustering.means_ | |
335 features_clustering_labels = features_clustering.predict(features_vector_upsampled.T) | |
336 features_clustering_sigmas = features_clustering.covars_ | |
337 # | |
338 features_vector_upsampled_estimated = zeros(shape(features_vector_upsampled)) | |
339 # | |
340 # | |
341 for n in range(0, len(features_vector_upsampled_estimated[0])): | |
342 features_vector_upsampled_estimated[:,n] = features_clustering_means[features_clustering_labels[n]] | |
343 # | |
344 # | |
345 print "[II] Features MSE for %d clusters: %.3f" % (n_clusters, mse(features_vector_upsampled, features_vector_upsampled_estimated)) | |
346 | |
347 | |
348 | |
349 def happy_curve_classification(data, classes, estimator, Nd=1): | |
350 print "[II] Generating Happy Curve " | |
351 from copy import deepcopy | |
352 estimator_fulldata = deepcopy(estimator) | |
353 estimator_fulldata.fit(data, classes) | |
354 labels = estimator.predict(data) | |
355 | |
356 # 1. Split data in two, training and testing | |
357 | |
358 Ntr = int(round(data.shape[0]/2)) # Training dataset size | |
359 Nts = data.shape[0] - Ntr # Testing dataset size | |
360 | |
361 ratios = [] | |
362 | |
363 for n in range(Nd, Ntr): | |
364 train, test, trainl, testl = cross_validation.train_test_split(data, classes, test_size = 0.5, random_state = 0) | |
365 train = train[0:n,:] | |
366 trainl = trainl[0:n] | |
367 # print "trainl" | |
368 # print trainl | |
369 estimator_ = deepcopy(estimator) | |
370 estimator_.fit(train,trainl) | |
371 labels = estimator_.predict(test) | |
372 | |
373 ratio = sum(array(testl==labels).astype(float))/len(labels) | |
374 | |
375 ratios.append(ratio) | |
376 | |
377 | |
378 return ratios | |
379 | |
380 | |
381 def cross_validate_classification(data, classes, estimator): | |
382 print "[II] Crossvalidating... " | |
383 from copy import deepcopy | |
384 estimator_fulldata = deepcopy(estimator) | |
385 estimator_fulldata.fit(data, classes) | |
386 | |
387 percents = arange(0.1,0.9,0.1) | |
388 MSEs = [] | |
389 labels = estimator.predict(data) | |
390 | |
391 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels)) | |
392 | |
393 for p in percents: | |
394 train,test,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=0) | |
395 estimator_ = deepcopy(estimator) | |
396 estimator_.fit(train, trainlabels) | |
397 labels = estimator_.predict(test) | |
398 print "[II] for training(%.2f)-testing(%.2f): %.2f" % ((1-p),p,sum(array(testlabels==labels).astype(float))/len(labels)) | |
399 | |
400 return MSEs | |
401 | |
402 def cross_validate_clustering(data, estimator): | |
403 print "[II] Crossvalidating... " | |
404 estimator_fulldata = estimator | |
405 estimator_fulldata.fit(data) | |
406 | |
407 # labels = estimator_fulldata.predict(data) | |
408 means = estimator_fulldata.means_ | |
409 # print means | |
410 | |
411 percents = arange(0.1,0.6,0.1) | |
412 MSEs = [] | |
413 reconstructed = zeros(shape(data)) | |
414 labels = estimator.predict(data) | |
415 for n in range(0, len(reconstructed)): | |
416 reconstructed[n,:] = means[labels[n]] | |
417 | |
418 MSEs.append(mse(data,reconstructed)) | |
419 for p in percents: | |
420 train,test = cross_validation.train_test_split(data,test_size=p,random_state=0) | |
421 train = matrix(train) | |
422 test = matrix(test) | |
423 | |
424 estimator.fit(train) | |
425 means = estimator.means_ | |
426 labels = estimator.predict(test) | |
427 reconstructed = zeros(shape(test)) | |
428 for n in range(0, len(reconstructed)): | |
429 reconstructed[n,:] = means[labels[n]] | |
430 | |
431 m = mse(test,reconstructed) | |
432 | |
433 print "[II] MSE for clustering crossvalidated data %.2f-%.2f: %.5f" % ((1-p), p, m) | |
434 MSEs.append(m) | |
435 | |
436 print "[II] Crossvalidation complete" | |
437 | |
438 return MSEs | |
439 | |
440 | |
441 | |
442 | |
443 # Construct parameters alphabet, each symbol is going to be a different column vector | |
444 # in parameter code matrix | |
445 | |
446 | |
447 def vector_to_states(X): | |
448 """ | |
449 Input: a vector MxN with N samples and M variables | |
450 Output: a codeword dictionary `parameters_alphabet', | |
451 state_seq, inverse `parameters_alphabet_inv' """ | |
452 | |
453 | |
454 parameters_alphabet = {} | |
455 n = 0 | |
456 | |
457 for i in range(0, X.shape[1]): | |
458 vec = tuple(ravel(X[:,i])) | |
459 if vec not in parameters_alphabet: | |
460 parameters_alphabet[vec] = n | |
461 n += 1 | |
462 | |
463 parameters_alphabet_inv = dict([(parameters_alphabet[m],m) for m in parameters_alphabet]) | |
464 | |
465 state_seq = array([parameters_alphabet[tuple(ravel(X[:,m]))] for m in range(0, parameters_vector_upsampled_code.shape[1])] ) | |
466 | |
467 return state_seq, parameters_alphabet, parameters_alphabet_inv | |
468 | |
469 | |
470 def states_to_vector(predicted, parameters_alphabet_inv): | |
471 estimated = matrix(zeros((len(parameters_alphabet_inv[0]), len(predicted)))) | |
472 for i in range(0, len(state_seq)): | |
473 estimated[:, i] = matrix(parameters_alphabet_inv[predicted[i]]).T | |
474 | |
475 return estimated | |
476 | |
477 state_seq, parameters_alphabet, parameters_alphabet_inv = vector_to_states(parameters_vector_upsampled_code) | |
478 | |
479 | |
480 parameters_change_variable = matrix(diff_and_pad(parameters_vector_upsampled)!=0).astype(int) | |
481 | |
482 changes_state_seq, changes_parameters_alphabet, changes_parameters_alphabet_inv = vector_to_states(parameters_change_variable) | |
483 | |
484 | |
485 # This is an hmm that just codes the changes" | |
486 # We have only two states, change and stay the same. | |
487 | |
488 # Uncomment that here | |
489 | |
490 # parameters_vector_upsampled = parameters_vector_upsampled_code | |
491 # parameters_state_orig, parameter_state_alphabet_orig, parameter_state_alphabet_inv_orig = vector_to_states(parameters_vector) | |
492 | |
493 parameters_state, parameter_state_alphabet, parameter_state_alphabet_inv = vector_to_states(parameters_vector_upsampled) | |
494 | |
495 | |
496 print "[II] Testing Gaussian Naive Bayes Classifier" | |
497 gnb = GaussianNB() | |
498 gnb.fit(features_vector_upsampled.T, parameters_state) | |
499 | |
500 parameters_state_estimated = gnb.predict(features_vector_upsampled.T) | |
501 | |
502 output = states_to_vector(parameters_state_estimated, parameter_state_alphabet_inv) | |
503 | |
504 figure() | |
505 subplot(211) | |
506 plot(parameters_vector_upsampled.T) | |
507 title('Parameter value upsampled by a factor of %d' % UpsamplingFactor) | |
508 ylabel('value') | |
509 xlabel('frame #') | |
510 subplot(212) | |
511 #plot(smooth_matrix_1D(output).T) | |
512 plot(output.T) | |
513 ylabel('value') | |
514 xlabel('frame #') | |
515 #cross_validate_classification(features_vector_upsampled.T, parameters_state, gnb) | |
516 # hc = happy_curve_classification(features_vector_upsampled.T, parameters_state, gnb) | |
517 | |
518 # | |
519 # figure() | |
520 # plot(hc) | |
521 # figure() | |
522 | |
523 print "[II] Trying Multinomial HMM" | |
524 | |
525 # In order to do classification with HMMs, we need to: | |
526 # 1. Split the parameters into classes | |
527 # 2. Train one model per class | |
528 # 3. Feed our data to all the models | |
529 # 4. Check which has a better score and assig,n to M | |
530 | |
531 | |
532 class HmmClassifier: | |
533 def __init__(self, N=2, n_components = 1): | |
534 self.n_components = n_components | |
535 self.chain_size = N | |
536 self.hmms_ = [] | |
537 self.N = N | |
538 | |
539 def fit(self, X, states): | |
540 self.n_states = len(unique(states)) | |
541 | |
542 for n in range(0, self.n_states): | |
543 hmm_ = hmm.GaussianHMM(n_components = self.n_components, covariance_type = 'full') | |
544 | |
545 # Get training data for each class | |
546 vector = X[states == n,:] | |
547 | |
548 # Fit the HMM | |
549 # print vector | |
550 hmm_.fit([vector]) | |
551 | |
552 # And append to the list | |
553 self.hmms_.append(hmm_) | |
554 | |
555 def predict(self,X): | |
556 labels = zeros((X.shape[0],)) | |
557 N = self.N | |
558 | |
559 m = 0 | |
560 | |
561 scores = zeros((1, self.n_states)) | |
562 | |
563 | |
564 while m*N < X.shape[0]: | |
565 if (m+1)*N > X.shape[0]: | |
566 testdata = X[m*N:,:] | |
567 else: | |
568 testdata = X[m*N:(m+1)*N,:] | |
569 | |
570 # print testdata | |
571 | |
572 for i in range(0, self.n_states): | |
573 scores[0,i] = self.hmms_[i].score(testdata) | |
574 | |
575 if (m+1)*N > X.shape[0]: | |
576 labels[m*N:] = argmax(scores) | |
577 else: | |
578 labels[m*N:(m+1)*N] = argmax(scores) | |
579 | |
580 m+= 1 | |
581 | |
582 return labels | |
583 | |
584 N = 2 | |
585 n_components = 1 | |
586 | |
587 hmmc = HmmClassifier(N = N, n_components = n_components) | |
588 hmmc.fit(features_vector_upsampled.T, parameters_state) | |
589 | |
590 | |
591 # hchmmc = happy_curve_classification(features_vector_upsampled.T, parameters_state, hmmc, Nd=100) | |
592 # cross_validate_classification(features_vector_upsampled.T, parameters_state, hmmc) | |
593 | |
594 | |
595 | |
596 | |
597 | |
598 | |
599 # hmms_ = [] | |
600 # | |
601 # for n in range(0, len(parameter_state_alphabet)): | |
602 # #hmm_ = hmm.GMMHMM(n_components = 1, n_mix = 2) | |
603 # hmm_ = hmm.GaussianHMM(n_components = 1,covariance_type = 'full') | |
604 # | |
605 # # Get training data for each class | |
606 # vector = features_vector_upsampled[:,parameters_state == n] | |
607 # | |
608 # #if vector.shape[1] < n_clusters: | |
609 # # hmms_.append(None) | |
610 # #else: | |
611 # | |
612 # hmm_.fit([vector.T]) | |
613 # | |
614 # # Append to the list | |
615 # | |
616 # hmms_.append(hmm_) | |
617 # | |
618 # labels = zeros((features_vector_upsampled.shape[1],)) | |
619 # | |
620 # N = 20 | |
621 # m = 0 | |
622 # | |
623 # while m*N < features_vector_upsampled.shape[1]: | |
624 # | |
625 # scores = zeros((1, len(parameter_state_alphabet))) | |
626 # | |
627 # if (m+1)*N > features_vector_upsampled.shape[1]: | |
628 # testdata = features_vector_upsampled[:,m*N:] | |
629 # else: | |
630 # testdata = features_vector_upsampled[:,m*N:(m+1)*N] | |
631 # | |
632 # for i in range(0, len(parameter_state_alphabet)): | |
633 # if hmms_[i] is not None: | |
634 # scores[0,i] = hmms_[i].score(testdata.T) | |
635 # else: | |
636 # scores[0,i] = -100000 # Very large negative score | |
637 # if (m+1)*N >= features_vector_upsampled.shape[1]: | |
638 # labels[m*N:] = argmax(scores) | |
639 # else: | |
640 # labels[m*N:(m+1)*N] = argmax(scores) | |
641 # | |
642 # m += 1 | |
643 | |
644 | |
645 # figure() | |
646 #plot(labels.T) | |
647 | |
648 | |
649 labels = hmmc.predict(features_vector_upsampled.T) | |
650 estimated = states_to_vector(labels,parameter_state_alphabet_inv) | |
651 plot(estimated.T,'r--') | |
652 | |
653 title('Estimated parameter values') | |
654 legend(['Naive Bayes Classifier', 'HMM chain size %d (%.1fms)' % (N, float(N)/UpsamplingFactor*23.0)]) | |
655 | |
656 ylabel('value') | |
657 xlabel('frame #') | |
658 | |
659 | |
660 close('all') | |
661 | |
662 plot(features_clustering_labels/float(max(features_clustering_labels))) | |
663 plot(parameters_vector_upsampled.T/max(ravel(parameters_vector_upsampled))) | |
664 | |
665 | |
666 def plot_clusters(x, labels): | |
667 figure() | |
668 symbols = ['>', 'x', '.', '<','v'] | |
669 colors = ['b', 'r', 'g', 'm','c'] | |
670 | |
671 for r in range(0, x.shape[0]): | |
672 scatter(x[r,0], x[r,1], c=colors[int(labels[r]) % len(colors)], marker=symbols[int(labels[r]) % len(symbols)]) | |
673 | |
674 | |
675 plot_clusters(features_vector_upsampled.T, parameters_state) | |
676 | |
677 | |
678 # SVM | |
679 | |
680 X = features_vector_upsampled.T | |
681 y = parameters_state | |
682 | |
683 clf = svm.SVC() | |
684 clf.fit(X,y) | |
685 | |
686 parameters_state_y = clf.predict(X) | |
687 #plot(parameters) |