Mercurial > hg > chourdakisreiss2016
comparison experiment-reverb/code/training_bak.py @ 0:246d5546657c
initial commit, needs cleanup
author | Emmanouil Theofanis Chourdakis <e.t.chourdakis@qmul.ac.uk> |
---|---|
date | Wed, 14 Dec 2016 13:15:48 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:246d5546657c |
---|---|
1 #!/usr/bin/python2 | |
2 # -*- coding: utf-8 -*- | |
3 """ | |
4 Created on Thu Apr 23 11:53:17 2015 | |
5 | |
6 @author: mmxgn | |
7 """ | |
8 | |
9 # This file does the cluster estimation and the removal of outliers | |
10 | |
11 from sys import argv, exit | |
12 from essentia.standard import YamlInput, YamlOutput | |
13 from essentia import Pool | |
14 from pca import * | |
15 | |
16 from numpy import * | |
17 from sklearn import cluster | |
18 from sklearn.metrics import pairwise_distances | |
19 from sklearn.cluster import KMeans, MiniBatchKMeans | |
20 from matplotlib.pyplot import * | |
21 #from sklearn.mixture import GMM | |
22 from sklearn.naive_bayes import GaussianNB, MultinomialNB | |
23 from scipy.signal import decimate | |
24 from sklearn import cross_validation | |
25 from sklearn import multiclass | |
26 | |
27 #from hmmlearn import hmm | |
28 from hmmlearn.hmm import GMM | |
29 from hmmlearn import hmm | |
30 | |
31 from sklearn import svm | |
32 import copy as cp | |
33 | |
34 from scipy.misc import logsumexp | |
35 import pickle | |
36 #from adpcm import adm, adm_reconstruct | |
37 | |
38 | |
39 mse = lambda A,B: ((array(A)-array(B)) ** 2).mean() | |
40 | |
41 lognorm = lambda A: A - logsumexp(A) | |
42 | |
43 #logsum = lambda X: logsumexp(log(X)) | |
44 | |
45 | |
46 def logsum(X): | |
47 if len(X) == 1: | |
48 return log(X[0]) | |
49 else: | |
50 return logaddexp(log(X[0]),logsum(X[1:])) | |
51 | |
52 | |
53 | |
54 | |
55 def smooth_matrix_1D(X): | |
56 window = scipy.signal.gaussian(51,4) | |
57 window = window/sum(window) | |
58 intermx = zeros((X.shape[0],X.shape[1]+100)) | |
59 intermx[:, 50:-50] = X | |
60 | |
61 for m in range(0, X.shape[0]): | |
62 # print intermx.shape | |
63 intermx[m,:] = convolve(ravel(intermx[m,:]), window,'same') | |
64 | |
65 return intermx[:,50:-50] | |
66 | |
67 def adm_reconstruct(codeword, h, dmin=.01, dmax=.28): | |
68 x = zeros((1, codeword.shape[1])) | |
69 | |
70 delta1 = dmin | |
71 delta2 = dmin | |
72 Sum = h | |
73 | |
74 x[0] = h | |
75 for i in range(0, codeword.shape[1]): | |
76 if codeword[0,i] == 0: | |
77 delta1 = dmin | |
78 delta2 = dmin | |
79 | |
80 elif codeword[0,i] == 1: | |
81 delta2 = dmin | |
82 Sum += delta1 | |
83 delta1 *= 2 | |
84 if delta1 > dmax: | |
85 delta1 = dmax | |
86 | |
87 elif codeword[0,i] == -1: | |
88 delta1 = dmin | |
89 Sum -= delta2 | |
90 delta2 *= 2 | |
91 if delta2 > dmax: | |
92 delta2 = dmax | |
93 x[0,i] = Sum | |
94 return x | |
95 | |
96 def adm(x, dmin=.01, dmax=.28, tol=0.0001): | |
97 | |
98 # Adaptive delta modulation adapted by code: | |
99 # (adeltamod.m) | |
100 # | |
101 # Adaptive Delta Modulator | |
102 # by Gandhar Desai (gdesai) | |
103 # BITS Pilani Goa Campus | |
104 # Date: 28 Sept, 2013 | |
105 | |
106 xsig = x | |
107 | |
108 Lx = len(x) | |
109 | |
110 ADMout = zeros((1, Lx)) | |
111 codevec = zeros((1, Lx)) | |
112 | |
113 | |
114 Sum = x[0] | |
115 delta1 = dmin | |
116 delta2 = dmin | |
117 mult1 = 2 | |
118 mult2 = 2 | |
119 for i in range(0, Lx): | |
120 #print abs(xsig[i] - Sum) | |
121 if (abs(xsig[i] - Sum) < tol): | |
122 bit = 0 | |
123 delta2 = dmin | |
124 delta1 = dmin | |
125 | |
126 | |
127 elif (xsig[i] >= Sum): | |
128 bit = 1 | |
129 delta2 = dmin | |
130 Sum += delta1 | |
131 delta1 *= mult1 | |
132 if delta1 > dmax: | |
133 delta1 = dmax | |
134 | |
135 | |
136 else: | |
137 bit = -1 | |
138 delta1 = dmin | |
139 Sum -= delta2 | |
140 delta2 *= mult2 | |
141 if delta2 > dmax: | |
142 delta2 = dmax | |
143 | |
144 | |
145 | |
146 ADMout[0, i] = Sum | |
147 codevec[0, i]= bit | |
148 | |
149 return ADMout,codevec, x[0] | |
150 | |
151 def median_filter(v, K): | |
152 v2 = zeros((len(v),)) | |
153 for i in range(K, len(v)): | |
154 seq = v[i-K:i] | |
155 m = median(seq) | |
156 v2[i-K:i] = m | |
157 | |
158 return v2 | |
159 | |
160 | |
161 | |
162 | |
163 from models import ReverbModel | |
164 | |
165 | |
166 | |
167 if __name__=="__main__": | |
168 if len(argv) != 2: | |
169 print "[EE] Wrong number of arguments" | |
170 print "[II] Correct syntax is:" | |
171 print "[II] \t%s <trainingdir>" | |
172 print "[II] where <trainingdir> is the directory containing the features files in the format *_features.yaml" | |
173 print "[II] and the parameters in the format *_parameters.yaml" | |
174 exit(-1) | |
175 | |
176 | |
177 n_clusters = 3 | |
178 UpsamplingFactor = 1 | |
179 dmin = 0.001 | |
180 dmax = 0.28 | |
181 tol = 0.001 | |
182 | |
183 d1min = 0.01 | |
184 d1max = 0.1 | |
185 | |
186 g1min = 0.01 | |
187 g1max = 0.99 | |
188 | |
189 damin = 0.006 | |
190 damax = 0.012 | |
191 | |
192 Gmin = 0.01 | |
193 Gmax = 0.99 | |
194 | |
195 gcmin = 0.01 | |
196 gcmax = 0.99 | |
197 | |
198 # For searching the directory | |
199 from glob import glob | |
200 | |
201 traindir = argv[1] | |
202 | |
203 songs_in_dir = glob("%s/*_features.yaml" % traindir) | |
204 | |
205 print "[II] Using files: %s" % songs_in_dir | |
206 | |
207 | |
208 chain_length=1 | |
209 | |
210 | |
211 # asdsd | |
212 | |
213 # fullfeatures_pool = Pool() | |
214 | |
215 features_vector = None | |
216 parameters_vector = None | |
217 index_vector = None | |
218 | |
219 idx = 0 | |
220 | |
221 for f_ in songs_in_dir: | |
222 infile = f_ | |
223 paramfile = "%s_parameters.yaml" % f_.split('_features.yaml')[0] | |
224 | |
225 print "[II] Using features file: %s" % infile | |
226 print "[II] and parameters file: %s" % paramfile | |
227 | |
228 | |
229 # paramfile = infile.split(') | |
230 | |
231 features_pool = YamlInput(filename = infile)() | |
232 parameters_pool = YamlInput(filename = paramfile)() | |
233 | |
234 d1 = parameters_pool['parameters.d1'] | |
235 da = parameters_pool['parameters.da'] | |
236 g1 = parameters_pool['parameters.g1'] | |
237 gc = parameters_pool['parameters.gc'] | |
238 G = parameters_pool['parameters.G'] | |
239 | |
240 | |
241 | |
242 | |
243 | |
244 feature_captions = features_pool.descriptorNames() | |
245 parameter_captions = ['d1','da','g1','gc','G'] | |
246 | |
247 | |
248 svmhmmstr = "" | |
249 | |
250 | |
251 for c in features_pool.descriptorNames(): | |
252 if c.split('.')[0] == 'metadata' or c.split('.')[0] == 'parameter': | |
253 feature_captions.remove(c) | |
254 | |
255 | |
256 # close('all') | |
257 | |
258 # print "[II] Loaded training data from %s (%s) " % (infile, features_pool['metadata.date'][0]) | |
259 print "[II] %d Features Available: " % len(feature_captions) | |
260 | |
261 | |
262 | |
263 print str(feature_captions).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7] | |
264 | |
265 nfeatures_in = len(feature_captions) | |
266 nparameters_in = 5 | |
267 features_vector_one = zeros((nfeatures_in, len(features_pool[feature_captions[0]]))) | |
268 index_vector_one = ones((len(features_pool[feature_captions[0]]),)) | |
269 parameters_vector_one = ones((nparameters_in, len(features_pool[feature_captions[0]]))) | |
270 # paramet | |
271 | |
272 | |
273 for i in range(0, nfeatures_in): | |
274 features_vector_one[i, :] = features_pool[feature_captions[i]].T | |
275 # for i in range(0, nparameters_in): | |
276 # parameters_vector[i, :] = features_pool[parameter_captions[0]].T | |
277 | |
278 parameters_vector_one[0, :] = d1*parameters_vector_one[0,:] | |
279 parameters_vector_one[1, :] = g1*parameters_vector_one[1,:] | |
280 parameters_vector_one[2, :] = da*parameters_vector_one[2,:] | |
281 parameters_vector_one[3, :] = gc*parameters_vector_one[3,:] | |
282 parameters_vector_one[4, :] = G*parameters_vector_one[4,:] | |
283 | |
284 # Stores example index number | |
285 | |
286 index_vector_one *= idx | |
287 | |
288 | |
289 | |
290 | |
291 | |
292 | |
293 | |
294 | |
295 | |
296 print "[II] %d parameters used:" % len(parameter_captions) | |
297 print str(parameter_captions).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7].replace('parameter.','') | |
298 | |
299 if features_vector is None: | |
300 features_vector = features_vector_one | |
301 else: | |
302 features_vector = append(features_vector, features_vector_one, 1) | |
303 | |
304 if parameters_vector is None: | |
305 parameters_vector = parameters_vector_one | |
306 else: | |
307 parameters_vector = append(parameters_vector, parameters_vector_one, 1) | |
308 | |
309 if index_vector is None: | |
310 index_vector = index_vector_one | |
311 else: | |
312 index_vector = append(index_vector, index_vector_one) | |
313 | |
314 | |
315 idx += 1 | |
316 | |
317 | |
318 print "[II] Marking silent parts" | |
319 features_full_nontransformed_train = features_vector.copy() | |
320 # silent_parts = zeros((1, len(features_pool[feature_captions[i]].T))) | |
321 | |
322 silent_parts = zeros((1, features_vector.shape[1])) | |
323 | |
324 rms = features_vector[feature_captions.index('rms'), :] | |
325 | |
326 # Implementing Hysteresis Gate -- High threshold is halfway between | |
327 # the mean and the max and Low is halfway between the mean dn the min | |
328 | |
329 rms_threshold_mean = mean(rms) | |
330 | |
331 rms_threshold_max = max(rms) | |
332 rms_threshold_min = min(rms) | |
333 | |
334 rms_threshold_high = 0.1 * rms_threshold_mean | |
335 rms_threshold_low = 0.01 * rms_threshold_mean | |
336 | |
337 for n in range(1, len(rms)): | |
338 prev = rms[n-1] | |
339 curr = rms[n] | |
340 | |
341 if prev >= rms_threshold_high: | |
342 if curr < rms_threshold_low: | |
343 silent_parts[0,n] = 1 | |
344 else: | |
345 silent_parts[0,n] = 0 | |
346 elif prev <= rms_threshold_low: | |
347 if curr > rms_threshold_high: | |
348 silent_parts[0,n] = 0 | |
349 else: | |
350 silent_parts[0,n] = 1 | |
351 else: | |
352 silent_parts[0,n] = silent_parts[0,n-1] | |
353 | |
354 | |
355 if silent_parts[0,1] == 1: | |
356 silent_parts[0, 0] = 1 | |
357 | |
358 | |
359 | |
360 active_index = invert(silent_parts.flatten().astype(bool)) | |
361 | |
362 # Keep only active parts | |
363 | |
364 | |
365 # Uncomment this | |
366 # features_vector = features_vector[:, active_index] | |
367 | |
368 # Keep this for debugging purposes | |
369 | |
370 | |
371 # parameters_vector = parameters_vector[:, active_index] | |
372 # index_vector = index_vector[active_index] | |
373 | |
374 # Label examples for a chain of chain_length | |
375 | |
376 # idx = 0 | |
377 # for i in range(0, len(index_vector)): | |
378 # index_vector[i] = idx | |
379 # if i % chain_length == 0: | |
380 # idx += 1 | |
381 # number_of_examples = idx | |
382 | |
383 | |
384 | |
385 # Scale parameters to [0,1] | |
386 | |
387 | |
388 parameters_vector[0,:] = (parameters_vector[0,:] - d1min)/d1max #d1 | |
389 parameters_vector[1,:] = (parameters_vector[1,:] - g1min)/g1max #g1 | |
390 parameters_vector[2,:] = (parameters_vector[2,:] - damin)/damax #g1 | |
391 parameters_vector[3,:] = (parameters_vector[3,:] - Gmin)/Gmax #G | |
392 parameters_vector[4,:] = (parameters_vector[4,:] - gcmin)/gcmax #gc | |
393 | |
394 | |
395 | |
396 | |
397 | |
398 | |
399 | |
400 moments_vector = zeros((features_vector.shape[0], 2)) | |
401 | |
402 features_vector_original = features_vector.copy() | |
403 | |
404 | |
405 | |
406 print "[II] Storing moments vector" | |
407 for i in range(0, features_vector.shape[0]): | |
408 mean_ = mean(features_vector[i,:]) | |
409 std_ = std(features_vector[i,:], ddof=1) | |
410 moments_vector[i,0] = mean_ | |
411 moments_vector[i,1] = std_ | |
412 | |
413 features_vector[i,:] = (features_vector[i,:] - mean_)/std_ | |
414 | |
415 features_vector_old_train = features_vector.copy() | |
416 # moments_vector_train = moments_vector | |
417 | |
418 | |
419 print "[II] Extracting PCA configuration " | |
420 | |
421 kernel, q, featurelist = extract_pca_configuration_from_data(features_vector) | |
422 | |
423 q = q + 1 | |
424 | |
425 print "[II] Optimal number of PCs to keep: %d" % q | |
426 | |
427 | |
428 | |
429 | |
430 feature_captions_array = array(feature_captions) | |
431 | |
432 features_to_keep = list(feature_captions_array[featurelist]) | |
433 print "[II] Decided to keep %d features:" % len(features_to_keep) | |
434 print str(features_to_keep).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7] | |
435 | |
436 | |
437 features_kept_data = features_vector[featurelist,:] | |
438 | |
439 features_vector_old_train = features_kept_data | |
440 features_vector = (kernel.T*features_kept_data)[0:q,:] | |
441 features_vector_new_train = features_vector | |
442 | |
443 | |
444 # example_features = None | |
445 # example_parameters = None | |
446 # example_idx = None | |
447 # | |
448 # for idx in range(0, shape(parameters_vector)[1]-chain_length): | |
449 # example_features_ = features_vector[:, idx:idx+chain_length] | |
450 # # print example_features_ | |
451 # example_parameters_ = parameters_vector[:, idx:idx+chain_length] | |
452 # example_idx_ = ones((shape(example_parameters_)[1],)) | |
453 # | |
454 # if example_features is None: | |
455 # example_features = example_features_ | |
456 # else: | |
457 # #print "appending" | |
458 # example_features = append(example_features, example_features_, 1) | |
459 # | |
460 # if example_parameters is None: | |
461 # example_parameters = example_parameters_ | |
462 # else: | |
463 # example_parameters = append(example_parameters, example_parameters_, 1) | |
464 # | |
465 # if example_idx is None: | |
466 # example_idx = example_idx_*idx | |
467 # else: | |
468 # example_idx = append(example_idx, example_idx_*idx, 1) | |
469 # | |
470 # | |
471 # | |
472 # | |
473 # features_vector = example_features | |
474 # parameters_vector = example_parameters | |
475 # index_vector = example_idx | |
476 | |
477 print "[II] Trying ADM-coded parameters" | |
478 print "[II] Upsampling features and parameters by a factor of %d" % UpsamplingFactor | |
479 | |
480 | |
481 # Upsampled features and parameters | |
482 features_vector_upsampled = smooth_matrix_1D(repeat(features_vector,UpsamplingFactor, axis=1)) | |
483 | |
484 # feature_labels_upsampled = repeat(features_clustering_labels,UpsamplingFactor, axis=0) | |
485 parameters_vector_upsampled = repeat(parameters_vector,UpsamplingFactor, axis=1) | |
486 | |
487 | |
488 # km = KMeans(init='k-means++', n_init=10, max_iter=300, tol=0.0000001, verbose = 0).fit(parameters_vector_upsampled.T) | |
489 # centers_ = km.cluster_centers_ | |
490 # labels__ = km.labels_ | |
491 # # Remove the following two lines in order to restore non-kmeans version | |
492 # parameters_vector_kmeans_upsampled = array(centers_[labels__]) | |
493 # | |
494 # parameters_vector_upsampled = parameters_vector_kmeans_upsampled.T | |
495 # | |
496 # | |
497 # parameters_vector_upsampled = smooth_matrix_1D(parameters_vector_upsampled) | |
498 | |
499 parameters_vector_upsampled_adm = matrix(zeros(shape(parameters_vector_upsampled))) | |
500 parameters_vector_upsampled_code = matrix(zeros(shape(parameters_vector_upsampled))) | |
501 parameters_vector_upsampled_firstval = matrix(zeros((parameters_vector_upsampled.shape[0],1))) | |
502 | |
503 # Reconstructed parameters | |
504 | |
505 parameters_vector_upsampled_reconstructed = matrix(zeros(shape(parameters_vector_upsampled))) | |
506 | |
507 | |
508 | |
509 | |
510 | |
511 def adm_matrix(X, dmin=0.001,dmax=0.28,tol=0.001): | |
512 | |
513 out = matrix(zeros(shape(X))) | |
514 code = matrix(zeros(shape(X))) | |
515 firstval = matrix(zeros((X.shape[0], 1))) | |
516 | |
517 for i in range(0, X.shape[0]): | |
518 out[i,:], code[i,:], firstval[i,0] = adm(X[i,:],dmin=dmin,dmax=dmax,tol=tol) | |
519 | |
520 return out,code,firstval | |
521 | |
522 # parameters_vector_upsampled_reconstructed[i,:] = adm_reconstruct(parameters_vector_upsampled_code[i,:],parameters_vector_upsampled_firstval[i,0], dmin=dmin,dmax=dmax) | |
523 | |
524 def adm_matrix_reconstruct(code, firstval, dmin=0.001, dmax=0.28): | |
525 X = matrix(zeros(shape(code))) | |
526 for i in range(0, code.shape[0]): | |
527 X[i,:] = adm_reconstruct(code[i,:], firstval[i,0], dmin=dmin, dmax=dmax) | |
528 | |
529 return X | |
530 | |
531 | |
532 parameters_vector_upsampled_adm, parameters_vector_upsampled_code, parameters_vector_upsampled_firstval = adm_matrix(parameters_vector_upsampled, dmin, dmax, tol) | |
533 | |
534 | |
535 def diff_and_pad(X): | |
536 return concatenate(( | |
537 zeros(( | |
538 shape(X)[0], | |
539 1 | |
540 )), | |
541 diff(X, axis=1)), | |
542 axis=1) | |
543 | |
544 | |
545 # Change for adm, | |
546 | |
547 # parameters_vector_upsampled = parameters_vector_upsampled_code | |
548 print "[II] Clustering features." | |
549 # | |
550 features_clustering = GMM(n_components = n_clusters, covariance_type='diag') | |
551 # | |
552 features_clustering.fit( features_vector_upsampled.T, y=parameters_vector_upsampled_code) | |
553 # | |
554 features_clustering_means = features_clustering.means_ | |
555 features_clustering_labels = features_clustering.predict(features_vector_upsampled.T) | |
556 features_clustering_sigmas = features_clustering.covars_ | |
557 # | |
558 features_vector_upsampled_estimated = zeros(shape(features_vector_upsampled)) | |
559 # | |
560 # | |
561 for n in range(0, len(features_vector_upsampled_estimated[0])): | |
562 features_vector_upsampled_estimated[:,n] = features_clustering_means[features_clustering_labels[n]] | |
563 # | |
564 # | |
565 print "[II] Features MSE for %d clusters: %.3f" % (n_clusters, mse(features_vector_upsampled, features_vector_upsampled_estimated)) | |
566 | |
567 | |
568 | |
569 def happy_curve_classification(data, classes, estimator, Nd=1): | |
570 print "[II] Generating Happy Curve " | |
571 from copy import deepcopy | |
572 estimator_fulldata = deepcopy(estimator) | |
573 estimator_fulldata.fit(data, classes) | |
574 labels = estimator.predict(data) | |
575 | |
576 # 1. Split data in two, training and testing | |
577 | |
578 Ntr = int(round(data.shape[0]/2)) # Training dataset size | |
579 Nts = data.shape[0] - Ntr # Testing dataset size | |
580 | |
581 ratios = [] | |
582 | |
583 for n in range(Nd, Ntr): | |
584 train, test, trainl, testl = cross_validation.train_test_split(data, classes, test_size = 0.5, random_state = 0) | |
585 train = train[0:n,:] | |
586 trainl = trainl[0:n] | |
587 # print "trainl" | |
588 # print trainl | |
589 estimator_ = deepcopy(estimator) | |
590 estimator_.fit(train,trainl) | |
591 labels = estimator_.predict(test) | |
592 | |
593 ratio = sum(array(testl==labels).astype(float))/len(labels) | |
594 | |
595 ratios.append(ratio) | |
596 | |
597 | |
598 return ratios | |
599 | |
600 | |
601 def cross_validate_classification(data, classes, estimator): | |
602 print "[II] Crossvalidating... " | |
603 from copy import deepcopy | |
604 estimator_fulldata = deepcopy(estimator) | |
605 estimator_fulldata.fit(data, classes) | |
606 | |
607 percents = arange(0.1,0.9,0.1) | |
608 MSEs = [] | |
609 labels = estimator.predict(data) | |
610 | |
611 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels)) | |
612 | |
613 for p in percents: | |
614 train,test,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=0) | |
615 estimator_ = deepcopy(estimator) | |
616 estimator_.fit(train, trainlabels) | |
617 labels = estimator_.predict(test) | |
618 print "[II] for training(%.2f)-testing(%.2f): %.2f" % ((1-p),p,sum(array(testlabels==labels).astype(float))/len(labels)) | |
619 | |
620 return MSEs | |
621 | |
622 def cross_validate_clustering(data, estimator): | |
623 print "[II] Crossvalidating... " | |
624 estimator_fulldata = estimator | |
625 estimator_fulldata.fit(data) | |
626 | |
627 # labels = estimator_fulldata.predict(data) | |
628 means = estimator_fulldata.means_ | |
629 # print means | |
630 | |
631 percents = arange(0.1,0.6,0.1) | |
632 MSEs = [] | |
633 reconstructed = zeros(shape(data)) | |
634 labels = estimator.predict(data) | |
635 for n in range(0, len(reconstructed)): | |
636 reconstructed[n,:] = means[labels[n]] | |
637 | |
638 MSEs.append(mse(data,reconstructed)) | |
639 for p in percents: | |
640 train,test = cross_validation.train_test_split(data,test_size=p,random_state=0) | |
641 train = matrix(train) | |
642 test = matrix(test) | |
643 | |
644 estimator.fit(train) | |
645 means = estimator.means_ | |
646 labels = estimator.predict(test) | |
647 reconstructed = zeros(shape(test)) | |
648 for n in range(0, len(reconstructed)): | |
649 reconstructed[n,:] = means[labels[n]] | |
650 | |
651 m = mse(test,reconstructed) | |
652 | |
653 print "[II] MSE for clustering crossvalidated data %.2f-%.2f: %.5f" % ((1-p), p, m) | |
654 MSEs.append(m) | |
655 | |
656 print "[II] Crossvalidation complete" | |
657 | |
658 return MSEs | |
659 | |
660 | |
661 | |
662 | |
663 # Construct parameters alphabet, each symbol is going to be a different column vector | |
664 # in parameter code matrix | |
665 | |
666 | |
667 def vector_to_states(X): | |
668 """ | |
669 Input: a vector MxN with N samples and M variables | |
670 Output: a codeword dictionary `parameters_alphabet', | |
671 state_seq, inverse `parameters_alphabet_inv' """ | |
672 | |
673 | |
674 parameters_alphabet = {} | |
675 n = 0 | |
676 | |
677 for i in range(0, X.shape[1]): | |
678 vec = tuple(ravel(X[:,i])) | |
679 if vec not in parameters_alphabet: | |
680 parameters_alphabet[vec] = n | |
681 n += 1 | |
682 | |
683 parameters_alphabet_inv = dict([(parameters_alphabet[m],m) for m in parameters_alphabet]) | |
684 | |
685 state_seq = array([parameters_alphabet[tuple(ravel(X[:,m]))] for m in range(0, parameters_vector_upsampled_code.shape[1])] ) | |
686 | |
687 return state_seq, parameters_alphabet, parameters_alphabet_inv | |
688 | |
689 | |
690 def states_to_vector(predicted, parameters_alphabet_inv): | |
691 estimated = matrix(zeros((len(parameters_alphabet_inv[0]), len(predicted)))) | |
692 for i in range(0, len(predicted)): | |
693 # print "[II] Predicted: ", predicted[i] | |
694 estimated[:, i] = matrix(parameters_alphabet_inv[predicted[i]]).T | |
695 | |
696 return estimated | |
697 | |
698 # state_seq, parameters_alphabet, parameters_alphabet_inv = vector_to_states(parameters_vector_upsampled_code) | |
699 | |
700 | |
701 # parameters_change_variable = matrix(diff_and_pad(parameters_vector_upsampled)!=0).astype(int) | |
702 | |
703 # changes_state_seq, changes_parameters_alphabet, changes_parameters_alphabet_inv = vector_to_states(parameters_change_variable) | |
704 | |
705 | |
706 # This is an hmm that just codes the changes" | |
707 # We have only two states, change and stay the same. | |
708 | |
709 # Uncomment that here | |
710 | |
711 # parameters_vector_upsampled = parameters_vector_upsampled_code | |
712 # parameters_state_orig, parameter_state_alphabet_orig, parameter_state_alphabet_inv_orig = vector_to_states(parameters_vector) | |
713 # TODO: HA | |
714 # parmaeters_vector_upsampled = parameters_vector_upsampled_code | |
715 parameters_state, parameter_state_alphabet, parameter_state_alphabet_inv = vector_to_states(parameters_vector_upsampled) | |
716 | |
717 # asdasdasd | |
718 print "[II] Testing Gaussian Naive Bayes Classifier" | |
719 gnb = GaussianNB() | |
720 gnb.fit(features_vector_upsampled.T, parameters_state) | |
721 | |
722 | |
723 | |
724 parameters_state_estimated = gnb.predict(features_vector_upsampled.T) | |
725 | |
726 output = states_to_vector(parameters_state_estimated, parameter_state_alphabet_inv) | |
727 | |
728 figure() | |
729 subplot(211) | |
730 plot(parameters_vector_upsampled.T) | |
731 title('Parameter value upsampled by a factor of %d' % UpsamplingFactor) | |
732 ylabel('value') | |
733 xlabel('frame #') | |
734 subplot(212) | |
735 #plot(smooth_matrix_1D(output).T) | |
736 plot(output.T) | |
737 ylabel('value') | |
738 xlabel('frame #') | |
739 | |
740 errorvector_nbc = [mse(output[:,i], parameters_vector_upsampled[:,i]) for i in range(0, len(parameters_state_estimated))] | |
741 | |
742 | |
743 | |
744 | |
745 #cross_validate_classification(features_vector_upsampled.T, parameters_state, gnb) | |
746 # hc = happy_curve_classification(features_vector_upsampled.T, parameters_state, gnb) | |
747 | |
748 # | |
749 # figure() | |
750 # plot(hc) | |
751 # figure() | |
752 | |
753 print "[II] Trying Multinomial HMM" | |
754 | |
755 # In order to do classification with HMMs, we need to: | |
756 # 1. Split the parameters into classes | |
757 # 2. Train one model per class | |
758 # 3. Feed our data to all the models | |
759 # 4. Check which has a better score and assig,n to M | |
760 class SVMClassifier: | |
761 def __init__(self,**kwargs): | |
762 print "[II] SVM Classifier " | |
763 # self.clf = svm.SVC(**kwargs) | |
764 self.name = "SVM" | |
765 self.clf = multiclass.OneVsOneClassifier(svm.SVC(**kwargs) ) | |
766 | |
767 def fit(self, X, classes): | |
768 n_classes = max(unique(classes))+1 | |
769 | |
770 self.clf.fit(X,classes) | |
771 | |
772 def predict(self, X): | |
773 return self.clf.predict(X) | |
774 | |
775 def getName(self): | |
776 return self.name | |
777 | |
778 def cross_validate(self, data, classes): | |
779 print "[II] SVN Classifier Crossvalidating... " | |
780 from copy import deepcopy | |
781 estimator = deepcopy(self) | |
782 estimator_fulldata = deepcopy(self) | |
783 estimator_fulldata.fit(data, classes) | |
784 | |
785 percents = arange(0.1,0.9,0.1) | |
786 self.percents = percents * 100 | |
787 | |
788 RATIOS = [] | |
789 labels = estimator.predict(data) | |
790 | |
791 | |
792 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels)) | |
793 | |
794 for p in percents: | |
795 traindata,testdata,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=1) | |
796 | |
797 | |
798 # print "[II] [%.2f-%.2f] Examples used for training: %s" % (p, (1-p), str(traindata)) | |
799 # print "[II] [%.2f-%.2f] Examples used for testing: %s" % (p, (1-p), str(testdata)) | |
800 # Training phase | |
801 | |
802 | |
803 | |
804 estimator_ = deepcopy(estimator) | |
805 estimator_.fit(traindata, trainlabels) | |
806 | |
807 predicted_labels = estimator_.predict(testdata) | |
808 | |
809 m = sum(array(predicted_labels==testlabels).astype(float))/len(testlabels) | |
810 | |
811 print "[II] [%.2f-%.2f] 5-fold cross-validation for SVN Classifier: %.1f" % (p, (1-p), m) | |
812 | |
813 RATIOS.append(m) | |
814 | |
815 return RATIOS,percents | |
816 | |
817 | |
818 class NBClassifier: | |
819 def __init__(self): | |
820 print "[II] Gaussian Naive Bayes Classifier" | |
821 self.name = "Naive Bayes" | |
822 self.gnb = GaussianNB() | |
823 | |
824 def getName(self): | |
825 return self.name | |
826 | |
827 def fit(self, X, states): | |
828 self.gnb.fit(X, states) | |
829 | |
830 def predict(self, X): | |
831 return self.gnb.predict(X) | |
832 | |
833 def cross_validate(self, data, classes): | |
834 print "[II] SVN Classifier Crossvalidating... " | |
835 from copy import deepcopy | |
836 estimator = deepcopy(self) | |
837 estimator_fulldata = deepcopy(self) | |
838 estimator_fulldata.fit(data, classes) | |
839 | |
840 percents = arange(0.1,0.9,0.1) | |
841 self.percents = percents * 100 | |
842 | |
843 RATIOS = [] | |
844 labels = estimator.predict(data) | |
845 | |
846 | |
847 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels)) | |
848 | |
849 for p in percents: | |
850 traindata,testdata,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=1) | |
851 | |
852 # print "[II] [%.2f-%.2f] Examples used for training: %s" % (p, (1-p), str(traindata)) | |
853 # print "[II] [%.2f-%.2f] Examples used for testing: %s" % (p, (1-p), str(testdata)) | |
854 | |
855 # Training phase | |
856 | |
857 | |
858 | |
859 estimator_ = deepcopy(estimator) | |
860 estimator_.fit(traindata, trainlabels) | |
861 | |
862 predicted_labels = estimator_.predict(testdata) | |
863 | |
864 m = sum(array(predicted_labels==testlabels).astype(float))/len(testlabels) | |
865 | |
866 print "[II] [%.2f-%.2f] 5-fold cross-validation for NB Classifier: %.1f" % (p, (1-p), m) | |
867 | |
868 RATIOS.append(m) | |
869 | |
870 return RATIOS,percents | |
871 | |
872 | |
873 | |
874 | |
875 | |
876 | |
877 | |
878 class HmmClassifier3: | |
879 def __init__(self, N=2,n_components = 1): | |
880 print "[II] HMM Classifier (%d states, %d components)" % (N, n_components) | |
881 self.name = "HMM (%d time steps, %d components)" % (N, n_components) | |
882 self.n_components = n_components | |
883 self.chain_size = N | |
884 | |
885 def getName(self): | |
886 return self.name | |
887 | |
888 def fit(self, features, parameters): | |
889 | |
890 # print parameters | |
891 n_classes = max(unique(parameters)) + 1 | |
892 | |
893 print "[II] Number of classes: ", n_classes | |
894 hmms = [None]*n_classes | |
895 svms = [None]*n_classes | |
896 chain_size = self.chain_size | |
897 obs = [None]*n_classes | |
898 | |
899 for i in range(chain_size, len(parameters)): | |
900 class_ = parameters[i-1] | |
901 seq = features[i-chain_size:i,:] | |
902 | |
903 if obs[class_] is None: | |
904 obs[class_] = [seq] | |
905 else: | |
906 obs[class_].append(seq) | |
907 | |
908 | |
909 | |
910 for i in range(0, len(obs)): | |
911 if obs[i] is not None and len(obs[i]) != 0: | |
912 hmm_ = hmm.GaussianHMM(n_components=self.n_components, covariance_type='full') | |
913 hmm_.fit(obs[i]) | |
914 hmms[i] = hmm_ | |
915 | |
916 self.hmms = hmms | |
917 | |
918 return obs | |
919 | |
920 def predict(self, features, mfilt=20): | |
921 chain_size = self.chain_size | |
922 hmms = self.hmms | |
923 predicted_classes = zeros((features.shape[0],)) | |
924 for i in range(chain_size, features.shape[0]): | |
925 scores = zeros((len(hmms),)) | |
926 | |
927 seq = features[i-chain_size:i, :] | |
928 | |
929 for j in range(0, len(hmms)): | |
930 if hmms[j] is not None: | |
931 scores[j] = hmms[j].score(seq) | |
932 else: | |
933 scores[j] = -infty | |
934 | |
935 predicted_classes[i] = argmax(scores) | |
936 | |
937 # Do a median filter at the end | |
938 | |
939 # predicted_classes = median_filter(predicted_classes,mfilt) | |
940 | |
941 return predicted_classes | |
942 | |
943 | |
944 | |
945 def k_fold_cross_validate(self, data, classes, K=5): | |
946 print "[II] HMM Classifier Crossvalidating... " | |
947 print "[II] Validating data: ", data | |
948 print "[II] With classes: ", classes | |
949 from copy import deepcopy | |
950 estimator = deepcopy(self) | |
951 estimator_fulldata = deepcopy(self) | |
952 estimator_fulldata.fit(data, classes) | |
953 | |
954 | |
955 labels = estimator_fulldata.predict(data) | |
956 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(classes)) | |
957 | |
958 # Crossvalidate this. | |
959 | |
960 example_sequences = [] | |
961 example_labels = [] | |
962 | |
963 chain_size = self.chain_size | |
964 | |
965 percents = arange(0.1,0.9,0.1) | |
966 self.percents = percents * 100 | |
967 | |
968 RATIOS = [] | |
969 | |
970 | |
971 | |
972 # Constructing examples and labels | |
973 | |
974 | |
975 L = data.shape[0]/K | |
976 M = K | |
977 # print "[II] Example size:", L | |
978 | |
979 n = 1 | |
980 while L*(n-1) < M*L: | |
981 if L*n > shape(data)[0]: | |
982 example = data[L*(n-1):,:] | |
983 classes_ = classes[L*(n-1):] | |
984 else: | |
985 example = data[L*(n-1):L*n, :] | |
986 classes_ = classes[L*(n-1):L*n] | |
987 | |
988 # print example | |
989 # print classes_ | |
990 | |
991 example_sequences.append(example) | |
992 example_labels.append(classes_) | |
993 n+=1 | |
994 | |
995 # print len(example_sequences) | |
996 # print len(example_labels) | |
997 from sklearn.cross_validation import KFold | |
998 kf = KFold(K, n_folds=K) | |
999 | |
1000 ratio = 0 | |
1001 | |
1002 for train, test in kf: | |
1003 print "[II] Validating examples %s against %s." % (train, test) | |
1004 | |
1005 topredict_data = example_sequences[test[0]] | |
1006 topredict_labels = example_labels[test[0]] | |
1007 | |
1008 tofit_data = None | |
1009 tofit_labels = None | |
1010 | |
1011 for t in train: | |
1012 # print t | |
1013 if tofit_data is None: | |
1014 tofit_data = example_sequences[t] | |
1015 tofit_labels = example_labels[t] | |
1016 else: | |
1017 tofit_data = append(tofit_data, example_sequences[t], 0) | |
1018 tofit_labels = append(tofit_labels, example_labels[t], 0) | |
1019 | |
1020 estimator_ = deepcopy(estimator) | |
1021 estimator_.fit(tofit_data, tofit_labels) | |
1022 | |
1023 labels = estimator_.predict(topredict_data) | |
1024 | |
1025 m = sum(array(topredict_labels==labels).astype(float))/len(labels) | |
1026 | |
1027 | |
1028 print "[II] %d-fold cross-validation for HMM Classifier: %.1f" % (K, m) | |
1029 | |
1030 ratio += m/float(len(kf)) | |
1031 | |
1032 return ratio | |
1033 | |
1034 | |
1035 | |
1036 | |
1037 | |
1038 | |
1039 # Splitting to train/test | |
1040 | |
1041 | |
1042 def cross_validate(self, data, classes): | |
1043 print "[II] HMM Classifier Crossvalidating... " | |
1044 from copy import deepcopy | |
1045 estimator = deepcopy(self) | |
1046 estimator_fulldata = deepcopy(self) | |
1047 estimator_fulldata.fit(data, classes) | |
1048 | |
1049 | |
1050 labels = estimator_fulldata.predict(data) | |
1051 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(classes)) | |
1052 | |
1053 # Crossvalidate this. | |
1054 | |
1055 example_sequences = [] | |
1056 example_labels = [] | |
1057 | |
1058 chain_size = self.chain_size | |
1059 | |
1060 percents = arange(0.5,0.9,0.1) | |
1061 self.percents = percents * 100 | |
1062 | |
1063 RATIOS = [] | |
1064 | |
1065 | |
1066 | |
1067 # Constructing examples and labels | |
1068 | |
1069 M = self.chain_size | |
1070 L = data.shape[0]/20 | |
1071 | |
1072 print "[II] Example size:", L | |
1073 | |
1074 n = 1 | |
1075 while L*n < M*L-L: | |
1076 example = data[L*(n-1):L*n, :] | |
1077 # print example | |
1078 classes_ = classes[L*(n-1):L*n] | |
1079 # print classes_ | |
1080 | |
1081 example_sequences.append(example) | |
1082 example_labels.append(classes_) | |
1083 n+=1 | |
1084 | |
1085 | |
1086 | |
1087 # Splitting to train/test | |
1088 | |
1089 for p in percents: | |
1090 traindata,testdata,trainlabels,testlabels = cross_validation.train_test_split(example_sequences, example_labels, test_size=p, random_state=1) | |
1091 | |
1092 print traindata | |
1093 print testdata | |
1094 # Concatenate traindata | |
1095 | |
1096 tofit_data = None | |
1097 tofit_labels = None | |
1098 | |
1099 topredict_data = None | |
1100 topredict_labels = None | |
1101 | |
1102 | |
1103 for t in traindata: | |
1104 if tofit_data is None: | |
1105 tofit_data = t | |
1106 else: | |
1107 tofit_data = append(tofit_data, t, 0) | |
1108 | |
1109 for l in trainlabels: | |
1110 if tofit_labels is None: | |
1111 tofit_labels = l | |
1112 else: | |
1113 tofit_labels = append(tofit_labels, l) | |
1114 | |
1115 for t in testdata: | |
1116 if topredict_data is None: | |
1117 topredict_data = t | |
1118 else: | |
1119 topredict_data = append(topredict_data, t, 0) | |
1120 | |
1121 for l in testlabels: | |
1122 if topredict_labels is None: | |
1123 topredict_labels = l | |
1124 else: | |
1125 topredict_labels = append(topredict_labels, l) | |
1126 | |
1127 | |
1128 # print "[II] Fit data: ", tofit_data | |
1129 ## print "[II] Fit labels: ", tofit_labels | |
1130 # print "[II] Predict data: ", topredict_data | |
1131 # print "[II] Predict labels: ", topredict_labels | |
1132 # | |
1133 estimator_ = deepcopy(estimator) | |
1134 | |
1135 print tofit_labels | |
1136 estimator_.fit(tofit_data, tofit_labels) | |
1137 | |
1138 labels = estimator_.predict(topredict_data) | |
1139 | |
1140 m = sum(array(topredict_labels==labels).astype(float))/len(labels) | |
1141 | |
1142 | |
1143 print "[II] [%.2f-%.2f] cross-validation for HMM Classifier: %.1f" % (p, (1-p), m) | |
1144 | |
1145 RATIOS.append(m) | |
1146 | |
1147 return RATIOS,percents | |
1148 class HmmClassifier2: | |
1149 def __init__(self, N=2, chain_size=4, n_components = 1): | |
1150 print "[II] HMM Classifier (%d states, %d components)" % (N, n_components) | |
1151 self.name = "HMM (%d states, %d components)" % (N, n_components) | |
1152 self.n_components = n_components | |
1153 self.chain_size = chain_size | |
1154 self.hmms_ = [] | |
1155 self.N = N | |
1156 self.n_states = N | |
1157 | |
1158 def getName(self): | |
1159 return self.name | |
1160 | |
1161 def fit(self, features, parameters): | |
1162 self.n_params = len(unique(parameters)) | |
1163 | |
1164 | |
1165 | |
1166 for n in range(0, self.n_params): | |
1167 hmm_ = hmm.GaussianHMM(n_components = self.n_components, covariance_type='full') | |
1168 | |
1169 # Get training data for each parameter | |
1170 | |
1171 for i in range(0, len(parameters)-self.chain_size): | |
1172 seq = features[i:i+self.chain_size, :] | |
1173 if parameters[i+self.chain_size-1] == n and seq.shape[0] == self.chain_size: | |
1174 # print "[II] Fitting: %s" % str(seq) | |
1175 | |
1176 hmm_.fit([seq]) | |
1177 | |
1178 self.hmms_.append(hmm_) | |
1179 | |
1180 def predict(self, X): | |
1181 labels = zeros((X.shape[0],)) | |
1182 N = self.N | |
1183 | |
1184 scores = zeros((self.n_states, )) | |
1185 | |
1186 for i in range(0, len(labels)): | |
1187 testdata = X[i:i+self.chain_size,:] | |
1188 | |
1189 for j in range(0, self.n_states): | |
1190 scores[j] = self.hmms_[j].score(testdata) | |
1191 labels[i] = argmax(scores) | |
1192 | |
1193 return labels | |
1194 | |
1195 def cross_validate(self, data, classes): | |
1196 print "[II] SVN Classifier Crossvalidating... " | |
1197 from copy import deepcopy | |
1198 estimator = deepcopy(self) | |
1199 estimator_fulldata = deepcopy(self) | |
1200 estimator_fulldata.fit(data, classes) | |
1201 | |
1202 percents = arange(0.1,0.9,0.1) | |
1203 self.percents = percents * 100 | |
1204 | |
1205 RATIOS = [] | |
1206 labels = estimator.predict(data) | |
1207 | |
1208 | |
1209 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels)) | |
1210 | |
1211 for p in percents: | |
1212 traindata,testdata,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=1) | |
1213 | |
1214 | |
1215 # print "[II] [%.2f-%.2f] Examples used for training: %s" % (p, (1-p), str(traindata)) | |
1216 # print "[II] [%.2f-%.2f] Examples used for testing: %s" % (p, (1-p), str(testdata)) | |
1217 | |
1218 # Training phase | |
1219 | |
1220 | |
1221 | |
1222 estimator_ = deepcopy(estimator) | |
1223 estimator_.fit(traindata, trainlabels) | |
1224 | |
1225 predicted_labels = estimator_.predict(testdata) | |
1226 | |
1227 m = sum(array(predicted_labels==testlabels).astype(float))/len(testlabels) | |
1228 | |
1229 print "[II] [%.2f-%.2f] 5-fold cross-validation for HMM2 Classifier: %.1f" % (p, (1-p), m) | |
1230 | |
1231 RATIOS.append(m) | |
1232 | |
1233 return RATIOS,percents | |
1234 | |
1235 | |
1236 | |
1237 | |
1238 class HmmClassifier: | |
1239 def __init__(self, N=2, n_components = 1): | |
1240 print "[II] HMM Classifier (%d states, %d components)" % (N, n_components) | |
1241 self.name = "HMM (%d states, %d components)" % (N, n_components) | |
1242 self.n_components = n_components | |
1243 self.chain_size = N | |
1244 self.hmms_ = [] | |
1245 self.N = N | |
1246 | |
1247 def getName(self): | |
1248 return self.name | |
1249 | |
1250 def fit(self, X, states): | |
1251 self.n_states = len(unique(states)) | |
1252 | |
1253 for n in range(0, self.n_states): | |
1254 hmm_ = hmm.GaussianHMM(n_components = self.n_components, covariance_type = 'full') | |
1255 | |
1256 # Get training data for each class | |
1257 vector = X[states == n,:] | |
1258 | |
1259 # Fit the HMM | |
1260 # print vector | |
1261 hmm_.fit([vector]) | |
1262 | |
1263 # And append to the list | |
1264 self.hmms_.append(hmm_) | |
1265 | |
1266 def predict(self,X): | |
1267 labels = zeros((X.shape[0],)) | |
1268 N = self.N | |
1269 | |
1270 m = 0 | |
1271 | |
1272 scores = zeros((1, self.n_states)) | |
1273 | |
1274 | |
1275 while m*N < X.shape[0]: | |
1276 if (m+1)*N > X.shape[0]: | |
1277 testdata = X[m*N:,:] | |
1278 else: | |
1279 testdata = X[m*N:(m+1)*N,:] | |
1280 | |
1281 # print testdata | |
1282 | |
1283 for i in range(0, self.n_states): | |
1284 scores[0,i] = self.hmms_[i].score(testdata) | |
1285 | |
1286 if (m+1)*N > X.shape[0]: | |
1287 labels[m*N:] = argmax(scores) | |
1288 else: | |
1289 labels[m*N:(m+1)*N] = argmax(scores) | |
1290 | |
1291 m+= 1 | |
1292 | |
1293 return labels | |
1294 | |
1295 # def cross_validate(self, data, classes, index_vector): | |
1296 # print "[II] Crossvalidating... " | |
1297 # from copy import deepcopy | |
1298 # estimator = deepcopy(self) | |
1299 # estimator_fulldata = deepcopy(self) | |
1300 # estimator_fulldata.fit(data, classes) | |
1301 # | |
1302 # percents = arange(0.1,0.9,0.1) | |
1303 # self.percents = percents * 100 | |
1304 # RATIOS = [] | |
1305 # labels = estimator.predict(data) | |
1306 # | |
1307 # print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels)) | |
1308 # | |
1309 # for p in percents: | |
1310 # train,test,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=1) | |
1311 # estimator_ = deepcopy(estimator) | |
1312 # estimator_.fit(train, trainlabels) | |
1313 # labels = estimator_.predict(test) | |
1314 # m = sum(array(testlabels==labels).astype(float))/len(labels) | |
1315 # RATIOS.append(m) | |
1316 # print "[II] for training(%.2f)-testing(%.2f): %.2f" % ((1-p),p,sum(array(testlabels==labels).astype(float))/len(labels)) | |
1317 # | |
1318 # return RATIOS, percents | |
1319 | |
1320 def cross_validate(self, data, classes): | |
1321 print "[II] SVN Classifier Crossvalidating... " | |
1322 from copy import deepcopy | |
1323 estimator = deepcopy(self) | |
1324 estimator_fulldata = deepcopy(self) | |
1325 estimator_fulldata.fit(data, classes) | |
1326 | |
1327 percents = arange(0.1,0.9,0.1) | |
1328 self.percents = percents * 100 | |
1329 | |
1330 RATIOS = [] | |
1331 labels = estimator.predict(data) | |
1332 | |
1333 | |
1334 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels)) | |
1335 | |
1336 for p in percents: | |
1337 traindata,testdata,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=1) | |
1338 | |
1339 | |
1340 # print "[II] [%.2f-%.2f] Examples used for training: %s" % (p, (1-p), str(traindata)) | |
1341 # print "[II] [%.2f-%.2f] Examples used for testing: %s" % (p, (1-p), str(testdata)) | |
1342 # Training phase | |
1343 | |
1344 | |
1345 | |
1346 estimator_ = deepcopy(estimator) | |
1347 estimator_.fit(traindata, trainlabels) | |
1348 | |
1349 predicted_labels = estimator_.predict(testdata) | |
1350 m = sum(array(predicted_labels==testlabels).astype(float))/len(testlabels) | |
1351 | |
1352 | |
1353 # testdata = None | |
1354 # | |
1355 # traindata = None | |
1356 # trainlabels = None | |
1357 # testlabels = None | |
1358 # | |
1359 # for t in train: | |
1360 # if traindata is None: | |
1361 # traindata = data[index_vector == t, :] | |
1362 # trainlabels = classes[index_vector == t] | |
1363 # else: | |
1364 # traindata = append(traindata, data[index_vector == t, :], 0) | |
1365 # trainlabels = append(trainlabels, classes[index_vector ==t]) | |
1366 # | |
1367 # estimator_ = deepcopy(estimator) | |
1368 # estimator_.fit(traindata, trainlabels) | |
1369 # | |
1370 # | |
1371 # for t in test: | |
1372 # if testdata is None: | |
1373 # testdata = data[index_vector == t, :] | |
1374 # testlabels = classes[index_vector == t] | |
1375 # else: | |
1376 # testdata = append(testdata, data[index_vector == t,:], 0) | |
1377 # testlabels = append(testlabels, classes[index_vector == t]) | |
1378 | |
1379 | |
1380 | |
1381 # predicted_labels = estimator_.predict(testdata) | |
1382 | |
1383 m = sum(array(predicted_labels==testlabels).astype(float))/len(testlabels) | |
1384 | |
1385 print "[II] [%.2f-%.2f] 5-fold cross-validation for HMM Classifier: %.1f" % (p, (1-p), m) | |
1386 | |
1387 RATIOS.append(m) | |
1388 | |
1389 return RATIOS,percents | |
1390 | |
1391 def cross_validate_full_k_fold(data, classes, list_of_classifiers, K=10): | |
1392 """ K-Folds Cross-validates all available classifiers, plots and saves barplots. """ | |
1393 | |
1394 ratios = [] | |
1395 for l in list_of_classifiers: | |
1396 ratio = l.k_fold_cross_validate(data, classes, K) | |
1397 ratios.append(ratio) | |
1398 | |
1399 L = len(ratios) | |
1400 | |
1401 ind = arange(L) | |
1402 | |
1403 fig, ax = subplots() | |
1404 rects = [] | |
1405 | |
1406 | |
1407 | |
1408 width = 10/float(len(ratios)) | |
1409 | |
1410 colors = ['r', 'y', 'g', 'c'] | |
1411 | |
1412 labels_ = [] | |
1413 | |
1414 for i in range(0, len(ratios)): | |
1415 rects.append(ax.bar(i*width,ratios[i],width,color=colors[i % len(colors)])) | |
1416 labels_.append(list_of_classifiers[i].getName()) | |
1417 | |
1418 return ratios | |
1419 | |
1420 | |
1421 | |
1422 def cross_validate_full(seq, classes, list_of_classifiers): | |
1423 """ Cross-validates all available classifiers, plots and saves barplots. """ | |
1424 | |
1425 ratios = [] | |
1426 percents = [] | |
1427 for l in list_of_classifiers: | |
1428 ratios_, percents_ = l.cross_validate(seq, classes) | |
1429 ratios.append(ratios_) | |
1430 percents.append(percents_) | |
1431 | |
1432 | |
1433 L = len(ratios[0]) | |
1434 | |
1435 ind = arange(L) | |
1436 | |
1437 fig,ax = subplots() | |
1438 | |
1439 rects = [] | |
1440 | |
1441 W = 0.8 | |
1442 width = W/float(len(ratios)) | |
1443 | |
1444 colors = ['r', 'y', 'g', 'c'] | |
1445 | |
1446 labels_ = [] | |
1447 for i in range(0, len(ratios)): | |
1448 rects.append(ax.bar(ind+i*width,ratios[i],width,color=colors[i % len(colors)])) | |
1449 labels_.append(list_of_classifiers[i].getName()) | |
1450 | |
1451 ax.legend(tuple(labels_)) | |
1452 | |
1453 ax.set_xticks(ind+W/2) | |
1454 ax.set_xticklabels(tuple(percents[0]*100)) | |
1455 ax.set_xlabel("Percentage % of test data") | |
1456 ax.set_ylabel("Success ratio") | |
1457 | |
1458 | |
1459 | |
1460 | |
1461 | |
1462 | |
1463 # rects1 = ax.bar(ind,ratios[0],0.35,color='r') | |
1464 # rects2 = ax.bar(ind+0.351,ratios[1],0.35,color='y') | |
1465 | |
1466 | |
1467 | |
1468 | |
1469 return ratios | |
1470 | |
1471 | |
1472 | |
1473 | |
1474 | |
1475 | |
1476 | |
1477 # hmmc = HmmClassifier(N = len(unique(parameters_state)), n_components = 1) | |
1478 # hmmc.fit(features_vector_upsampled.T, parameters_state) | |
1479 # | |
1480 # hmmc2 = HmmClassifier(N = 12, n_components = 4) | |
1481 # hmmc2.fit(features_vector_upsampled.T, parameters_state) | |
1482 # | |
1483 svmc = SVMClassifier(probability=True) | |
1484 svmc.fit(features_vector_upsampled.T, parameters_state) | |
1485 # # svmc.cross_validate(features_vector_upsampled.T, parameters_state, index_vector) | |
1486 # | |
1487 nbc = NBClassifier() | |
1488 nbc.fit(features_vector_upsampled.T, parameters_state) | |
1489 # | |
1490 # | |
1491 #hchmmc = happy_curve_classification(features_vector_upsampled.T, parameters_state, hmmc, Nd=100) | |
1492 # # cross_validate_classification(features_vector_upsampled.T, parameters_state, hmmc) | |
1493 # | |
1494 # #hmmc.cross_validate(features_vector_upsampled.T, parameters_state) | |
1495 # | |
1496 | |
1497 hmmc3 = HmmClassifier3(N=6, n_components = max(unique(parameters_state))) | |
1498 obs = hmmc3.fit(features_vector_upsampled.T, parameters_state) | |
1499 | |
1500 | |
1501 # svmhmm = HmmSVMClassifier() | |
1502 # svmhmm.fit(features_vector_upsampled.T, parameters_state) | |
1503 # ratios = cross_validate_full(features_vector_upsampled.T, parameters_state, [nbc, svmc]) | |
1504 # ratios = cross_validate_full_k_fold(features_vector_upsampled.T, parameters_state, | |
1505 # [hmmc3]) | |
1506 | |
1507 def approximate_chain_size(data,parameters): | |
1508 ratios = [] | |
1509 | |
1510 # chainsizes = arange(1, 40) | |
1511 # for cs in chainsizes: | |
1512 # print "[II] Trying chain size: %d" % cs | |
1513 # hmmc = HmmClassifier3(N=cs, n_components = 2) | |
1514 # ratios.append(hmmc.k_fold_cross_validate(data,parameters,K=10)) | |
1515 | |
1516 | |
1517 componentssize = arange(1, 12) | |
1518 | |
1519 for cs in componentssize: | |
1520 print "[II] Trying component size: %d" % cs | |
1521 hmmc = HmmClassifier3(N=6, n_components = cs) | |
1522 ratios.append(hmmc.k_fold_cross_validate(data, parameters, K=2)) | |
1523 | |
1524 | |
1525 | |
1526 | |
1527 figure() | |
1528 plot(componentssize, ratios) | |
1529 xlabel('Chain Size') | |
1530 ylabel('Success Ratio') | |
1531 title('10-Fold cross validation success ratio vs chain size') | |
1532 | |
1533 | |
1534 return ratios | |
1535 | |
1536 # ratios = approximate_chain_size(features_vector_upsampled.T, parameters_state) | |
1537 | |
1538 #predictedhmmc2 = hmmc2.predict(features_vector_upsampled.T) | |
1539 | |
1540 # figure() | |
1541 | |
1542 | |
1543 | |
1544 | |
1545 | |
1546 # hmms_ = [] | |
1547 # | |
1548 # for n in range(0, len(parameter_state_alphabet)): | |
1549 # #hmm_ = hmm.GMMHMM(n_components = 1, n_mix = 2) | |
1550 # hmm_ = hmm.GaussianHMM(n_components = 1,covariance_type = 'full') | |
1551 # | |
1552 # # Get training data for each class | |
1553 # vector = features_vector_upsampled[:,parameters_state == n] | |
1554 # | |
1555 # #if vector.shape[1] < n_clusters: | |
1556 # # hmms_.append(None) | |
1557 # #else: | |
1558 # | |
1559 # hmm_.fit([vector.T]) | |
1560 # | |
1561 # # Append to the list | |
1562 # | |
1563 # hmms_.append(hmm_) | |
1564 # | |
1565 # labels = zeros((features_vector_upsampled.shape[1],)) | |
1566 # | |
1567 # N = 20 | |
1568 # m = 0 | |
1569 # | |
1570 # while m*N < features_vector_upsampled.shape[1]: | |
1571 # | |
1572 # scores = zeros((1, len(parameter_state_alphabet))) | |
1573 # | |
1574 # if (m+1)*N > features_vector_upsampled.shape[1]: | |
1575 # testdata = features_vector_upsampled[:,m*N:] | |
1576 # else: | |
1577 # testdata = features_vector_upsampled[:,m*N:(m+1)*N] | |
1578 # | |
1579 # for i in range(0, len(parameter_state_alphabet)): | |
1580 # if hmms_[i] is not None: | |
1581 # scores[0,i] = hmms_[i].score(testdata.T) | |
1582 # else: | |
1583 # scores[0,i] = -100000 # Very large negative score | |
1584 # if (m+1)*N >= features_vector_upsampled.shape[1]: | |
1585 # labels[m*N:] = argmax(scores) | |
1586 # else: | |
1587 # labels[m*N:(m+1)*N] = argmax(scores) | |
1588 # | |
1589 # m += 1 | |
1590 | |
1591 | |
1592 # figure() | |
1593 #plot(labels.T) | |
1594 | |
1595 | |
1596 # labels = hmmc.predict(features_vector_upsampled.T) | |
1597 # estimated = states_to_vector(labels,parameter_state_alphabet_inv) | |
1598 # plot(estimated.T,'r--') | |
1599 # | |
1600 # title('Estimated parameter values') | |
1601 # legend(['Naive Bayes Classifier', 'HMM chain size %d (%.1fms)' % (N, float(N)/UpsamplingFactor*23.0)]) | |
1602 # | |
1603 # ylabel('value') | |
1604 # xlabel('frame #') | |
1605 # | |
1606 | |
1607 # close('all') | |
1608 | |
1609 # plot(features_clustering_labels/float(max(features_clustering_labels))) | |
1610 # plot(parameters_vector_upsampled.T/max(ravel(parameters_vector_upsampled))) | |
1611 | |
1612 | |
1613 def plot_clusters(x, labels): | |
1614 figure() | |
1615 symbols = ['>', 'x', '.', '<','v'] | |
1616 colors = ['b', 'r', 'g', 'm','c'] | |
1617 | |
1618 for r in range(0, x.shape[0]): | |
1619 scatter(x[r,0], x[r,1], c=colors[int(labels[r]) % len(colors)], marker=symbols[int(labels[r]) % len(symbols)]) | |
1620 | |
1621 | |
1622 # plot_clusters(features_vector_upsampled.T, parameters_state) | |
1623 # | |
1624 | |
1625 # SVM | |
1626 | |
1627 class HmmClassifierSVN_Naive: | |
1628 def __init__(self, N=2, n_components = 1): | |
1629 self.n_components = n_components | |
1630 self.chain_size = N | |
1631 self.hmms_ = [] | |
1632 self.N = N | |
1633 | |
1634 def fit(self, X, states): | |
1635 self.n_states = len(unique(states)) | |
1636 | |
1637 for n in range(0, self.n_states): | |
1638 hmm_ = hmm.GaussianHMM(n_components = self.n_components, covariance_type = 'full') | |
1639 | |
1640 # Get training data for each class | |
1641 vector = X[states == n,:] | |
1642 | |
1643 # Fit the HMM | |
1644 # print vector | |
1645 hmm_.fit([vector]) | |
1646 | |
1647 # And append to the list | |
1648 self.hmms_.append(hmm_) | |
1649 | |
1650 def predict(self,X): | |
1651 labels = zeros((X.shape[0],)) | |
1652 N = self.N | |
1653 | |
1654 m = 0 | |
1655 | |
1656 scores = zeros((1, self.n_states)) | |
1657 | |
1658 | |
1659 while m*N < X.shape[0]: | |
1660 if (m+1)*N > X.shape[0]: | |
1661 testdata = X[m*N:,:] | |
1662 else: | |
1663 testdata = X[m*N:(m+1)*N,:] | |
1664 | |
1665 # print testdata | |
1666 | |
1667 for i in range(0, self.n_states): | |
1668 scores[0,i] = self.hmms_[i].score(testdata) | |
1669 | |
1670 if (m+1)*N > X.shape[0]: | |
1671 labels[m*N:] = argmax(scores) | |
1672 else: | |
1673 labels[m*N:(m+1)*N] = argmax(scores) | |
1674 | |
1675 m+= 1 | |
1676 | |
1677 return labels | |
1678 | |
1679 | |
1680 | |
1681 | |
1682 def plot_parameters_estimation(list_of_estimators): | |
1683 for e in list_of_estimators: | |
1684 name_ = e.getName() | |
1685 | |
1686 | |
1687 | |
1688 fig = figure() | |
1689 fig.suptitle('Parameters estimation using %s' % name_, fontweight='bold') | |
1690 subplot(311) | |
1691 title ('original parameters') | |
1692 param_real = states_to_vector(parameters_state,parameter_state_alphabet_inv).T | |
1693 plot(param_real) | |
1694 legend(parameter_captions) | |
1695 subplot(312) | |
1696 title('estimated parameters') | |
1697 pred = e.predict(features_vector_upsampled.T) | |
1698 param_est = states_to_vector(pred,parameter_state_alphabet_inv).T | |
1699 plot(param_est) | |
1700 legend(parameter_captions) | |
1701 subplot(313) | |
1702 error_real_est = zeros((len(param_est),)) | |
1703 for i in range(0, len(error_real_est)): | |
1704 error_real_est[i] = mse(param_real[i],param_est[i]) | |
1705 | |
1706 totalmse = sum(error_real_est) | |
1707 title('mean squared error (total: %.3f)' % totalmse) | |
1708 | |
1709 plot(error_real_est) | |
1710 | |
1711 plot_parameters_estimation([nbc, svmc, hmmc3]) | |
1712 | |
1713 class HmSVMClassifier: | |
1714 def __init__(self, N=2,n_components = 1): | |
1715 print "[II] HMM Classifier (%d states, %d components)" % (N, n_components) | |
1716 self.name = "HMM (%d time steps, %d components)" % (N, n_components) | |
1717 self.n_components = n_components | |
1718 self.chain_size = N | |
1719 | |
1720 def getName(self): | |
1721 return self.name | |
1722 | |
1723 def fit(self, features, parameters, fname_training = "trainingdat.dat", | |
1724 fname_model = "training.model", C = 689): | |
1725 svmhmm = "" | |
1726 idx = 0 | |
1727 chain_size = self.chain_size | |
1728 for i in range(chain_size, len(parameters)): | |
1729 idx += 1 | |
1730 class_ = parameters[i-1] | |
1731 seq = features[i-chain_size:i,:] | |
1732 | |
1733 for m in range(0, len(seq)): | |
1734 s = seq[m] | |
1735 | |
1736 svmhmm += "%d qid:1.%d " % (class_, idx) | |
1737 for j in range(0, len(s)): | |
1738 svmhmm += "%d:%.4f " % (j+1, s[j]) | |
1739 svmhmm += "\n" | |
1740 | |
1741 fileout = open(fname_training, 'w') | |
1742 fileout.write(svmhmm) | |
1743 fileout.close() | |
1744 | |
1745 import subprocess | |
1746 | |
1747 # C = idx | |
1748 | |
1749 subprocess.call(["svmhmm/svm_hmm_learn", '-c', '%d' % C, '-e', '0.5', fname_training, fname_model]) | |
1750 return svmhmm | |
1751 | |
1752 | |
1753 n_classes = max(unique(parameters_state)) + 1 | |
1754 | |
1755 hist_p_q = histogram(parameters_state, n_classes)[0].astype(float) | |
1756 prior_p_q = hist_p_q/sum(hist_p_q) | |
1757 | |
1758 transmat = zeros((n_classes,n_classes)) | |
1759 for i in range(1, len(parameters_state)): | |
1760 prev = parameters_state[i-1] | |
1761 cur = parameters_state[i] | |
1762 transmat[prev,cur] += 1 | |
1763 | |
1764 transmat = transmat/sum(transmat) | |
1765 | |
1766 hmm2 = hmm.MultinomialHMM(n_components = n_classes, startprob = prior_p_q, transmat = transmat ) | |
1767 | |
1768 | |
1769 | |
1770 # TODO: Embed SVM emissions to HMM. Try to do pick maximum likelihood sequence from | |
1771 # classifiers like in HmmClassifier3. For every class, train an SVM, with n_components as | |
1772 # states, and compute the log-likelihood for prediction. Should work. | |
1773 | |
1774 | |
1775 | |
1776 class MyAVAClassifier: | |
1777 | |
1778 def __init__(self): | |
1779 self.classifiers = {} | |
1780 self.name = "Linear SVM Classifier" | |
1781 # self.probabilities = {} | |
1782 | |
1783 | |
1784 def fit(self, X, y, flr = 0): | |
1785 | |
1786 n_classes = max(unique(y)) + 1 | |
1787 classes = arange(0, n_classes) | |
1788 self.n_classes = n_classes | |
1789 # M = n_classes*(n_classes-1) # Number of classifiers | |
1790 | |
1791 h = histogram(y, n_classes)[0].astype(float) | |
1792 self.prior = h/sum(h) | |
1793 | |
1794 transmat = zeros((n_classes, n_classes)) | |
1795 | |
1796 for i in range(1, len(y)): | |
1797 prev = y[i-1] | |
1798 cur = y[i] | |
1799 transmat[prev,cur] += 1 | |
1800 | |
1801 transmat = transmat/sum(transmat) | |
1802 | |
1803 self.transmat = transmat | |
1804 | |
1805 # Add a very small probability for random jump to avoid zero values | |
1806 | |
1807 self.transmat += flr | |
1808 self.transmat = self.transmat/sum(self.transmat) | |
1809 | |
1810 for i in range(0, n_classes): | |
1811 for j in range(0, n_classes): | |
1812 if i != j and (i,j) not in self.classifiers and (j,i) not in self.classifiers: | |
1813 print (i,j) | |
1814 idx_ = bitwise_or(y == classes[i], y == classes[j]) | |
1815 X_ = X[idx_, :] | |
1816 y_ = y[idx_] | |
1817 | |
1818 svm_ = svm.SVC(probability = True) | |
1819 | |
1820 # print y_ | |
1821 svm_.fit(X_, y_) | |
1822 self.classifiers[(i,j)] = svm_ | |
1823 # self.probabilities[(i,j)] = svm_.predict_proba(X) | |
1824 | |
1825 | |
1826 def estimate_pairwise_class_probability(self, i, j, x): | |
1827 if (i,j) not in self.classifiers and (j,i) in self.classifiers: | |
1828 return self.classifiers[(j,i)].predict_proba(x)[0,1] | |
1829 else: | |
1830 return self.classifiers[(i,j)].predict_proba(x)[0,0] | |
1831 | |
1832 def estimate_posterior_probability(self, i, x): | |
1833 mus = zeros((self.n_classes,)) | |
1834 for j in range(0, self.n_classes): | |
1835 if i != j: | |
1836 mus[j] = 1/self.estimate_pairwise_class_probability(i,j,x) | |
1837 # print mus | |
1838 S = sum(mus) - (self.n_classes - 2) | |
1839 # print S | |
1840 return 1/S | |
1841 | |
1842 def estimate_posterior_probability_vector(self, x): | |
1843 posterior = zeros((self.n_classes,)) | |
1844 for i in range(0, len(posterior)): | |
1845 posterior[i] = self.estimate_posterior_probability(i, x) | |
1846 | |
1847 return posterior | |
1848 | |
1849 # def estimate_emission_probability(self, i, x): | |
1850 # post = self.estimate_posterior_probability_vector(x) | |
1851 # # print "post: ", post | |
1852 # prior = self.prior | |
1853 # # print "prior: ", prior | |
1854 # p = post/prior | |
1855 # p = p/sum(p) | |
1856 # | |
1857 # return p[i] | |
1858 | |
1859 # def estimate_emissmat(self, X): | |
1860 # emissmat = zeros((X.shape[0], self.n_classes)) | |
1861 # for i in range(0, X.shape[0]): | |
1862 # for j in range(0, self.n_classes): | |
1863 # emissmat[i,j] = self.estimate_emission_probability(j, X[i,:]) | |
1864 # | |
1865 # return emissmat | |
1866 | |
1867 def predict(self, X): | |
1868 predicted = zeros((X.shape[0],)) | |
1869 for i in range(0, X.shape[0]): | |
1870 x = X[i,:] | |
1871 P = zeros((self.n_classes,)) | |
1872 | |
1873 | |
1874 for c in range(0, len(P)): | |
1875 P[c] = self.estimate_posterior_probability(c, x) | |
1876 | |
1877 pred = argmax(P) | |
1878 predicted[i] = pred | |
1879 | |
1880 return predicted | |
1881 | |
1882 class HMMsvmClassifier: | |
1883 def __init__(self, N=2): | |
1884 print "[II] HMM-SVM Classifier (%d states)" % N | |
1885 self.name = "HMM-SVM (%d time steps)" % N | |
1886 # self.n_components = n_components | |
1887 self.chain_size = N | |
1888 self.svm = MyAVAClassifier() | |
1889 | |
1890 | |
1891 def getName(self): | |
1892 return self.name | |
1893 | |
1894 def fit(self, features, parameters): | |
1895 | |
1896 # First train emission svm | |
1897 | |
1898 self.svm.fit(features, parameters) | |
1899 | |
1900 # print parameters | |
1901 n_classes = max(unique(parameters)) + 1 | |
1902 | |
1903 print "[II] Number of classes: ", n_classes | |
1904 hmms = [None]*n_classes | |
1905 chain_size = self.chain_size | |
1906 params = [None]*n_classes | |
1907 obs = [None]*n_classes | |
1908 | |
1909 for i in range(chain_size, len(parameters)): | |
1910 class_ = parameters[i-1] | |
1911 params_ = parameters[i-chain_size:i] | |
1912 feats_ =features[i-chain_size:i,:] | |
1913 | |
1914 if params[class_] is None: | |
1915 params[class_] = [params_] | |
1916 obs[class_] = [feats_] | |
1917 else: | |
1918 params[class_].append(params_) | |
1919 obs[class_].append(feats_) | |
1920 | |
1921 | |
1922 | |
1923 for i in range(0, len(params)): | |
1924 if params[i] is not None and len(params[i]) != 0: | |
1925 hmm_ = HMMsvm(self.svm) | |
1926 print "[II] %d Fitting params: " % i, params[i] | |
1927 hmm_.fit(obs[i], params[i]) | |
1928 | |
1929 # TODO: Left here on 06/07 19:56 | |
1930 | |
1931 # hmm_.fit(features,) | |
1932 # print "obs: ", obs[i] | |
1933 # print "params: ", params[i] | |
1934 # hmm_.fit(obs[i], params[i],flr=1e-9) | |
1935 hmms[i] = hmm_ | |
1936 | |
1937 self.hmms = hmms | |
1938 | |
1939 return obs | |
1940 def predict(self, features, mfilt=20): | |
1941 chain_size = self.chain_size | |
1942 hmms = self.hmms | |
1943 predicted_classes = zeros((features.shape[0],)) | |
1944 for i in range(chain_size, features.shape[0]): | |
1945 scores = zeros((len(hmms),)) | |
1946 | |
1947 seq = features[i-chain_size:i, :] | |
1948 | |
1949 for j in range(0, len(hmms)): | |
1950 if hmms[j] is not None: | |
1951 scores[j] = hmms[j]._log_likelihood(seq) | |
1952 else: | |
1953 scores[j] = -infty | |
1954 | |
1955 predicted_classes[i] = argmax(scores) | |
1956 | |
1957 # Do a median filter at the end | |
1958 | |
1959 # predicted_classes = median_filter(predicted_classes,mfilt) | |
1960 | |
1961 return predicted_classes | |
1962 | |
1963 | |
1964 class HMMsvm: | |
1965 def __init__(self, svm_): | |
1966 self.svm = svm_ | |
1967 | |
1968 | |
1969 # self.svm = MyAVAClassifier() | |
1970 | |
1971 | |
1972 def fit(self, features_list, states, flr=1e-12): | |
1973 | |
1974 # TODO: Concatenate features, train | |
1975 #self.svm.fit(X, states, flr) | |
1976 #self.prior = self.svm.prior | |
1977 #self.transmat = self.svm.transmat | |
1978 | |
1979 features = None | |
1980 | |
1981 for f in features_list: | |
1982 if features is None: | |
1983 features = f | |
1984 else: | |
1985 features = append(features, f, 0) | |
1986 | |
1987 fullstates = None | |
1988 T = features.shape[0] | |
1989 for s in states: | |
1990 if fullstates is None: | |
1991 fullstates = s | |
1992 else: | |
1993 fullstates = append(fullstates, s) | |
1994 | |
1995 | |
1996 | |
1997 self.n_classes = self.svm.n_classes | |
1998 n_classes = self.n_classes | |
1999 | |
2000 print fullstates, shape(fullstates) | |
2001 | |
2002 h = histogram(fullstates, n_classes)[0].astype(float) | |
2003 self.prior = h/sum(h) | |
2004 | |
2005 # Add a very small probability for random jump | |
2006 | |
2007 self.prior += flr | |
2008 self.prior = self.prior/sum(self.prior) | |
2009 | |
2010 transmat = zeros((n_classes, n_classes)) | |
2011 transitions = zeros((n_classes, )) | |
2012 for s in states: | |
2013 for i in range(1, len(s)): | |
2014 prev = s[i-1] | |
2015 cur = s[i] | |
2016 transmat[prev,cur] += 1 | |
2017 transitions[prev] += 1 | |
2018 # print "Adding one to", (prev,cur) | |
2019 | |
2020 transitions[transitions == 0] = 1 | |
2021 | |
2022 for i in range(0, transmat.shape[0]): | |
2023 transmat[i,:] = transmat[i,:]/transitions[i] | |
2024 | |
2025 self.transmat = transmat | |
2026 | |
2027 # Add a very small probability for random jump to avoid zero values | |
2028 | |
2029 self.transmat += flr | |
2030 self.transmat = self.transmat/sum(self.transmat) | |
2031 | |
2032 | |
2033 # Alphas and Betas | |
2034 X = features | |
2035 alphas = zeros((T,n_classes)) | |
2036 betas = zeros((T,n_classes)) | |
2037 | |
2038 forward_messages = zeros((T,n_classes)) | |
2039 backward_messages = zeros((T, n_classes)) | |
2040 | |
2041 print "[II] Computing alpha, beta..." | |
2042 for t in range(1, T+1): | |
2043 forward_messages[t-1,:] = self._forward_message(forward_messages[t-2,:],X[0:t,]) | |
2044 backward_messages[-t,:] = self._backward_message(backward_messages[-t+1], X[-t:,:]) | |
2045 | |
2046 | |
2047 print "[II] Computing ksu..." | |
2048 | |
2049 a_norm = forward_messages[-1,n_classes-1] # alpha_T | |
2050 | |
2051 ksu = zeros((T, n_classes, n_classes)) | |
2052 | |
2053 for i in range(0, n_classes): | |
2054 for j in range(0, n_classes): | |
2055 for t in range(0, T-1): | |
2056 ksu[t,i,j] = exp(forward_messages[t, i] + log(transmat[i,j]) + log(self.estimate_emission_probability(X[t,:],j)) + backward_messages[t+1,j] - a_norm) | |
2057 | |
2058 | |
2059 | |
2060 self.alphas = forward_messages | |
2061 self.betas = backward_messages | |
2062 self.ksu = ksu | |
2063 | |
2064 transmat_new = zeros((n_classes,n_classes)) | |
2065 | |
2066 for i in range(0, n_classes): | |
2067 | |
2068 for j in range(0, n_classes): | |
2069 transmat_new[i,j] = sum(ksu[:,i,j])/sum(ksu[:,i,:]) | |
2070 | |
2071 self.transmat_new = transmat_new | |
2072 | |
2073 | |
2074 | |
2075 | |
2076 | |
2077 | |
2078 | |
2079 | |
2080 | |
2081 | |
2082 # print observations | |
2083 def estimate_emission_probability(self, x, q): | |
2084 post = self.svm.estimate_posterior_probability_vector(x) | |
2085 # print "post: ", post | |
2086 prior = self.prior | |
2087 # print "prior: ", prior | |
2088 p = post/prior | |
2089 p = p/sum(p) | |
2090 | |
2091 return p[q] | |
2092 | |
2093 # def estimate_emission_probability(self, x, q): | |
2094 # return self.svm.estimate_emission_probability(q, x) | |
2095 | |
2096 | |
2097 def logviterbi(self, X): | |
2098 # Number of states | |
2099 | |
2100 N = self.n_classes | |
2101 | |
2102 # Number of observations | |
2103 | |
2104 T = X.shape[0] | |
2105 | |
2106 transmat = self.transmat | |
2107 | |
2108 #1. Initalization | |
2109 | |
2110 a1 = self.prior | |
2111 | |
2112 delta = zeros((N,)) | |
2113 psi = zeros((N,)) | |
2114 | |
2115 for i in range(0, N): | |
2116 delta[i] = log(a1[i]) + log(self.estimate_emission_probability(X[0,:],i)) | |
2117 | |
2118 | |
2119 #2. Recursion | |
2120 | |
2121 for t in range(1, T): | |
2122 delta_old = delta.copy() | |
2123 x = X[t, :] | |
2124 for j in range(0, N): | |
2125 delta[j] = max(delta_old + log(transmat[i,j])) | |
2126 psi[j] = argmax(delta_old + log(transmat[i,j])) | |
2127 | |
2128 | |
2129 # 3. Termination | |
2130 | |
2131 p_star = max(delta + log(transmat[:,N-1])) | |
2132 q_star = argmax(delta + log(transmat[:, N-1])) | |
2133 | |
2134 | |
2135 # 4. Backtracking | |
2136 | |
2137 q = zeros((T,)) | |
2138 q[-1] = q_star | |
2139 | |
2140 for t in range(1, T): | |
2141 q[-t-1] = psi[q[-t]] | |
2142 | |
2143 return q | |
2144 | |
2145 def viterbi(self, X): | |
2146 # Number of states | |
2147 | |
2148 N = self.n_classes | |
2149 | |
2150 # Number of observations | |
2151 | |
2152 T = X.shape[0] | |
2153 | |
2154 transmat = self.transmat | |
2155 | |
2156 #1. Initialization | |
2157 | |
2158 a1 = self.prior | |
2159 | |
2160 delta = zeros((N, )) | |
2161 psi = zeros((N, )) | |
2162 | |
2163 for i in range(0, N): | |
2164 delta[i] = a1[i]*self.estimate_emission_probability(X[0,:],i) | |
2165 | |
2166 | |
2167 | |
2168 | |
2169 | |
2170 #2. Recursion | |
2171 | |
2172 for t in range(1, T): | |
2173 delta_old = delta.copy() | |
2174 x = X[t,:] | |
2175 for j in range(0, N): | |
2176 delta[j] = max(delta_old*transmat[:,j])*self.estimate_emission_probability(x, j) | |
2177 psi[j] = argmax(delta_old*transmat[:,j]) | |
2178 | |
2179 #3. Termination | |
2180 | |
2181 p_star = max(delta*transmat[:,N-1]) | |
2182 q_star = argmax(delta*transmat[:,N-1]) | |
2183 | |
2184 | |
2185 | |
2186 #4. Backtracking | |
2187 | |
2188 q = zeros((T,)) | |
2189 q[-1] = q_star | |
2190 | |
2191 for t in range(1, T): | |
2192 q[-t-1] = psi[q[-t]] | |
2193 | |
2194 return q | |
2195 | |
2196 | |
2197 def _log_likelihood_matrix(self, X): | |
2198 N = self.n_classes | |
2199 ll = zeros((X.shape[0], N)) | |
2200 | |
2201 for i in range(0, X.shape[0]): | |
2202 ll[i,:] = self._forward(i, X) | |
2203 | |
2204 return ll | |
2205 | |
2206 def compute_ksus(self, X): | |
2207 N = self.n_classes | |
2208 T = X.shape[0] | |
2209 print "[II] Computing gammas... " | |
2210 | |
2211 gammas = self._forward_backward(X) | |
2212 | |
2213 # Initialize | |
2214 | |
2215 aij = self.transmat | |
2216 | |
2217 | |
2218 | |
2219 | |
2220 | |
2221 | |
2222 def _not_quite_ksu(self, t, i, j, X): | |
2223 alpha = exp(self.forward(X[0:t+1,:]))[i] | |
2224 beta = exp(self.backward(X[t+1:,:]))[j] | |
2225 | |
2226 aij = self.transmat[i,j] | |
2227 bj = self.estimate_emission_probability(X[t+1,:], j) | |
2228 | |
2229 return alpha*beta*aij*bj | |
2230 | |
2231 def _ksu(self, t, i, j, X): | |
2232 alphaT = exp(self.forward(X[0:t+1,:]))[0] | |
2233 | |
2234 return self._not_quite_ksu(t,i,j,X) | |
2235 | |
2236 | |
2237 def _forward_backward(self, X): | |
2238 T = X.shape[0] | |
2239 N = self.n_classes | |
2240 fv = zeros((T, N)) | |
2241 sv = zeros((T, N)) | |
2242 | |
2243 b = None | |
2244 for t in range(1, T+1): | |
2245 | |
2246 fv[t-1,:] = self._forward_message(fv[t-2,:], X[0:t, ]) | |
2247 | |
2248 for t in range(1, T+1): | |
2249 b = self._backward_message(b, X[-t: , :]) | |
2250 sv[-t,:] = lognorm(fv[t-1,:]*b) | |
2251 | |
2252 return sv | |
2253 | |
2254 | |
2255 def _backward(self, t, X): | |
2256 N = self.n_classes | |
2257 a = ones((N,))/N | |
2258 | |
2259 beta = zeros((N, )) | |
2260 transmat = self.transmat | |
2261 T = X.shape[0] | |
2262 x = X[t, :] | |
2263 if t == T-1: | |
2264 beta = log(a) | |
2265 | |
2266 return beta | |
2267 else: | |
2268 tosum = zeros((N, )) | |
2269 bt = self._backward(t+1, X) | |
2270 btnew = zeros((N, )) | |
2271 # print bt | |
2272 for j in range(0, N): | |
2273 for i in range(0, N): | |
2274 # print log(self.estimate_emission_probability(x, j)), bt[i], log(transmat[i,j]) | |
2275 tosum[i] = log(self.estimate_emission_probability(x, j)) + bt[i] + log(transmat[i,j]) | |
2276 # print tosum | |
2277 | |
2278 btnew[j] = logsumexp(tosum) | |
2279 btnew = lognorm(btnew) | |
2280 return btnew | |
2281 | |
2282 | |
2283 | |
2284 | |
2285 def forward(self, X): | |
2286 T = X.shape[0] | |
2287 f = None | |
2288 for t in range(1, T+1): | |
2289 f = self._forward_message(f, X[0:t, :]) | |
2290 | |
2291 return f | |
2292 | |
2293 def backward(self, X): | |
2294 T = X.shape[0] | |
2295 b = None | |
2296 for t in range(1,T+1): | |
2297 # print t | |
2298 # print t,b | |
2299 idx = arange(-t,T) | |
2300 # print idx | |
2301 b = self._backward_message(b, X[-t:, :]) | |
2302 | |
2303 return b | |
2304 | |
2305 def _backward_message(self, b, X): | |
2306 N = self.n_classes | |
2307 | |
2308 | |
2309 beta = zeros((N, )) | |
2310 transmat = self.transmat | |
2311 | |
2312 x = X[0, :] | |
2313 | |
2314 if X.shape[0] == 1: | |
2315 beta = log(ones((N,))/N) | |
2316 return beta | |
2317 else: | |
2318 tosum = zeros((N, )) | |
2319 btnew = zeros((N, )) | |
2320 for j in range(0, N): | |
2321 for i in range(0, N): | |
2322 tosum[i] = log(self.estimate_emission_probability(x,j)) + b[i] + log(transmat[i,j]) | |
2323 | |
2324 btnew[j] = logsumexp(tosum) | |
2325 btnew = lognorm(btnew) | |
2326 return btnew | |
2327 | |
2328 def _forward_message(self, f, X): | |
2329 N = self.n_classes | |
2330 a = self.prior | |
2331 | |
2332 alpha = zeros((N, )) | |
2333 transmat = self.transmat | |
2334 | |
2335 x = X[-1, :] | |
2336 | |
2337 if X.shape[0] == 1: | |
2338 for i in range(0, N): | |
2339 alpha[i] = log(a[i]) + log(self.estimate_emission_probability(x, i)) | |
2340 alpha = lognorm(alpha) | |
2341 return alpha | |
2342 | |
2343 else: | |
2344 tosum = zeros((N,)) | |
2345 ftnew = zeros((N,)) | |
2346 for j in range(0, N): | |
2347 for i in range(0, N): | |
2348 tosum[i] = f[i] + log(transmat[i,j]) | |
2349 Sum = logsumexp(tosum) | |
2350 bj = self.estimate_emission_probability(x, j) | |
2351 ftnew[j] = Sum+log(bj) | |
2352 | |
2353 ftnew = lognorm(ftnew) | |
2354 return ftnew | |
2355 | |
2356 def _forward(self, t, X): | |
2357 N = self.n_classes | |
2358 a = self.prior | |
2359 # print a | |
2360 alpha = zeros((N, )) | |
2361 transmat = self.transmat | |
2362 x = X[t, :] | |
2363 | |
2364 if t == 0: | |
2365 for i in range(0, N): | |
2366 alpha[i] = log(a[i]) + log(self.estimate_emission_probability(x, i)) | |
2367 # print "--" | |
2368 # print alpha | |
2369 alpha = lognorm(alpha) | |
2370 # print alpha | |
2371 # print "xx" | |
2372 return alpha | |
2373 else: | |
2374 tosum = zeros((N, )) | |
2375 ft = self._forward(t-1, X) | |
2376 ftnew = zeros((N, )) | |
2377 for j in range(0, N): | |
2378 for i in range(0, N): | |
2379 # print ft | |
2380 tosum[i] = ft[i] + log(transmat[i,j]) | |
2381 | |
2382 Sum = logsumexp(tosum) | |
2383 bj = self.estimate_emission_probability(x, j) | |
2384 ftnew[j] = Sum+log(bj) | |
2385 ftnew = lognorm(ftnew) | |
2386 | |
2387 return ftnew | |
2388 | |
2389 | |
2390 | |
2391 | |
2392 | |
2393 | |
2394 | |
2395 | |
2396 | |
2397 # | |
2398 # def forward(self, X): | |
2399 # # Compute log likelihood using the forward algorithm | |
2400 # | |
2401 # # Number of states | |
2402 # N = self.n_classes | |
2403 # | |
2404 # # Number of Observations | |
2405 # T = X.shape[0] | |
2406 # | |
2407 # | |
2408 # transmat = self.transmat | |
2409 # | |
2410 # # Initialization | |
2411 # # a1 = ones((N,))/N | |
2412 # a1 = self.prior | |
2413 # | |
2414 # alpha = zeros((N,)) | |
2415 # for i in range(0, N): | |
2416 # alpha[i] = log(a1[i])+log(self.estimate_emission_probability(X[0,:], i)) | |
2417 # # print alpha | |
2418 # # Recursion | |
2419 # for t in range(0, T): | |
2420 # alpha_old = alpha.copy() | |
2421 # x = X[t, :] | |
2422 # for j in range(0, N): | |
2423 # tosum = zeros((N,)) | |
2424 # for i in range(0, N): | |
2425 # tosum[i] = alpha_old[i]+log(transmat[i,j]) | |
2426 # | |
2427 # # Sum = logsum(tosum) | |
2428 # Sum = logsumexp(tosum) | |
2429 # bj = self.estimate_emission_probability(x, j) | |
2430 # | |
2431 # alpha[j] = Sum+log(bj) | |
2432 # # print alpha | |
2433 # | |
2434 # tosum = zeros((N,)) | |
2435 # for i in range(0, N): | |
2436 # tosum[i] = alpha[i] + log(transmat[i,N-1]) | |
2437 # | |
2438 # return tosum | |
2439 # | |
2440 # def backward(self, X): | |
2441 # # Number of states | |
2442 # N = self.n_classes | |
2443 # | |
2444 # # Number of Observations | |
2445 # T = X.shape[0] | |
2446 # | |
2447 # transmat = self.transmat | |
2448 # | |
2449 # # Initialization | |
2450 # | |
2451 # b1 = ones((N,))/N | |
2452 # | |
2453 # beta = zeros((N,)) | |
2454 # for i in range(0, N): | |
2455 # beta[i] = log(b1[i])+log(self.estimate_emission_probability(X[-1,:], i)) | |
2456 # | |
2457 # for t in range(0, T): | |
2458 # beta_old = beta.copy() | |
2459 # x = X[-t, :] | |
2460 # for j in range(0, N): | |
2461 # tosum = zeros((N, )) | |
2462 # for i in range(0, N): | |
2463 # tosum[i] = beta_old[i]+log(transmat[i,j]) | |
2464 # | |
2465 # Sum = logsumexp(tosum) | |
2466 # bj = self.estimate_emission_probability(x, j) | |
2467 # beta[j] = Sum+log(bj) | |
2468 # | |
2469 # tosum = zeros((N,)) | |
2470 # | |
2471 # for i in range(0, N): | |
2472 # tosum[i] = beta[i] + log(transmat[i,0]) | |
2473 # | |
2474 # #3 p = logsumexp(tosum) | |
2475 # | |
2476 # return tosum | |
2477 | |
2478 | |
2479 def _log_likelihood(self, X): | |
2480 | |
2481 return logsumexp(self.forward(X)) | |
2482 | |
2483 | |
2484 | |
2485 | |
2486 | |
2487 def _likelihood(self, X): | |
2488 # Number of states | |
2489 N = self.n_classes | |
2490 | |
2491 # Number of Observations | |
2492 T = X.shape[0] | |
2493 | |
2494 | |
2495 transmat = self.transmat | |
2496 | |
2497 # Initialization | |
2498 # a1 = ones((N,))/N | |
2499 a1 = self.prior | |
2500 | |
2501 alpha = zeros((N,)) | |
2502 for i in range(0, N): | |
2503 alpha[i] = a1[i]*self.estimate_emission_probability(X[0,:], i) | |
2504 | |
2505 # Recursion | |
2506 print alpha | |
2507 for t in range(1, T): | |
2508 alpha_old = alpha.copy() | |
2509 x = X[t, :] | |
2510 for j in range(0, N): | |
2511 Sum = 0 | |
2512 for i in range(0, N): | |
2513 Sum += alpha_old[i]*transmat[i,j] | |
2514 | |
2515 alpha[j] = Sum*self.estimate_emission_probability(x, j) | |
2516 print alpha | |
2517 | |
2518 | |
2519 # Termination | |
2520 | |
2521 Sum = 0 | |
2522 for i in range(0, N): | |
2523 Sum += alpha[i]*transmat[i, N-1] | |
2524 | |
2525 p = Sum | |
2526 | |
2527 return p | |
2528 | |
2529 | |
2530 | |
2531 | |
2532 | |
2533 | |
2534 | |
2535 # def fit(self, X, states): | |
2536 # # print parameters | |
2537 # n_classes = max(unique(states)) + 1 | |
2538 # | |
2539 # # svms = [None]*n_classes | |
2540 # obs = [None]*n_classes | |
2541 # sta = [None]*n_classes | |
2542 # chain_size = self.chain_size | |
2543 # | |
2544 # #22 obs = None | |
2545 # # sta = None | |
2546 # | |
2547 # print "[II] Number of classes: ", n_classes | |
2548 # | |
2549 # # For each class: | |
2550 # # Concatenate examples | |
2551 # # Fit SVM | |
2552 # | |
2553 # for i in range(chain_size, len(states)): | |
2554 # class_ = states[i-1] | |
2555 # seq = X[i-chain_size:i, :] | |
2556 # states_ = states[i-chain_size:i] | |
2557 # | |
2558 # | |
2559 # if obs[class_] is None: | |
2560 # obs[class_] = [seq] | |
2561 # sta[class_] = [states_] | |
2562 # self.svms.append(MyAVAClassifier()) | |
2563 # else: | |
2564 # obs[class_].append(seq) | |
2565 # sta[class_].append(states_) | |
2566 # | |
2567 # | |
2568 # for i in range(0, len(obs)): | |
2569 # | |
2570 # o = None | |
2571 # s = None | |
2572 # | |
2573 # for j in range(0, len(obs[i])): | |
2574 # if o is None: | |
2575 # o = obs[i][j] | |
2576 # s = sta[i][j] | |
2577 # | |
2578 # else: | |
2579 # o = append(o, obs[i][j],0) | |
2580 # s = append(s, sta[i][j]) | |
2581 # | |
2582 # | |
2583 # self.svms[i].fit(o, s) | |
2584 | |
2585 # def predict(self, features): | |
2586 # chain_size = self.chain_size | |
2587 # svms = self.svms | |
2588 # | |
2589 # predicted_classes = zeros((features.shape[0],)) | |
2590 # for i in range(chain_size, features.shape[0]): | |
2591 # scores = zeros((len(svms))) | |
2592 # | |
2593 # seq = features[i-chain_size:i, :] | |
2594 # | |
2595 # for j in range(0, len(svms)): | |
2596 # if svms[j] is not None: | |
2597 # scores[j] = svms[j].compute_log_likelihood(seq) | |
2598 # else: | |
2599 # scores[k] = -infty | |
2600 # predicted_classes[i] = argmax(scores) | |
2601 # | |
2602 # return predicted_classes | |
2603 | |
2604 | |
2605 | |
2606 | |
2607 | |
2608 # Marginalize over the latent variable | |
2609 # for i in range(0, X.shape[0]): | |
2610 # P = zeros((self.n_classes,)) | |
2611 # x = X[i,:] | |
2612 # for j in range(0, len(P)): | |
2613 # P[j] = self.estimate_emission_probability(j, x) | |
2614 # | |
2615 # S[i] = log(sum(P[j])) | |
2616 # | |
2617 # return sum(S) | |
2618 | |
2619 | |
2620 | |
2621 | |
2622 # Continue from here | |
2623 X = features_vector_upsampled.T | |
2624 y = parameters_state | |
2625 | |
2626 clf = svm.SVC() | |
2627 clf.fit(X,y) | |
2628 | |
2629 | |
2630 parameters_state_y = clf.predict(X) | |
2631 | |
2632 predhmmc3 = hmmc3.predict(features_vector_upsampled.T) | |
2633 | |
2634 myava = MyAVAClassifier() | |
2635 myava.fit(features_vector_upsampled.T, parameters_state) | |
2636 | |
2637 predmyava = myava.predict(features_vector_upsampled.T) | |
2638 | |
2639 # hmmsvmc = HMMsvmClassifier(N=2) | |
2640 # hmmsvmc.fit(features_vector_upsampled.T, parameters_state) | |
2641 #predhmmsvmc = hmmsvmc.predict(features_vector_upsampled.T) | |
2642 | |
2643 #plot(parameters) | |
2644 figure() | |
2645 plot(parameters_state,'b') | |
2646 | |
2647 plot(parameters_state_y,'r--') | |
2648 plot(predhmmc3,'g+-') | |
2649 | |
2650 # plot(predhmmsvmc, 'bo-') | |
2651 | |
2652 legend(['Real', 'SVM', 'HMM3','HMM-SVM']) | |
2653 | |
2654 # TODO, HMM with SVN, Cross-validation | |
2655 | |
2656 # Using hidden markov svm | |
2657 | |
2658 svmhmm = "" | |
2659 | |
2660 print "[II] Success ratio for SVN: %.2f" % (sum(parameters_state==parameters_state_y).astype(float)/float(len(parameters_state))) | |
2661 print "[II] Success ratio for HMM: %.2f" % (sum(parameters_state==predhmmc3).astype(float)/float(len(predhmmc3)) ) | |
2662 | |
2663 | |
2664 | |
2665 print "[II] All-vs-All custom Support Vector Classifier Success Ratio: %.2f " %(sum(predmyava==parameters_state).astype(float)/len(parameters_state)) | |
2666 | |
2667 | |
2668 model_ghmm = ReverbModel("GaussianHMM Classifier Model", kernel, q, features_to_keep, hmmc3, parameter_dictionary=parameter_state_alphabet_inv, moments_vector=moments_vector) | |
2669 model_gnb = ReverbModel("Naive Bayes Classifier Model", kernel, q, features_to_keep, nbc, parameter_dictionary=parameter_state_alphabet_inv, moments_vector=moments_vector ) | |
2670 model_svm = ReverbModel("AVA LinearSVM Classifier Model", kernel, q, features_to_keep, myava, parameter_dictionary=parameter_state_alphabet_inv, moments_vector=moments_vector ) | |
2671 | |
2672 model_ghmm.save("model_ghmm.dat") | |
2673 model_gnb.save("model_gnb.dat") | |
2674 model_svm.save("model_svm.dat") | |
2675 | |
2676 | |
2677 | |
2678 # HMM classifier with SVN emissions | |
2679 | |
2680 # print "[II] All-vs-All custom HMM-SVM classifier Success Ratio: %.2f " % (sum(predhmmsvmc==parameters_state).astype(float)/len(parameters_state)) | |
2681 # | |
2682 | |
2683 # obsc = MyAVAClassifier() | |
2684 # obsc.fit(features_vector_upsampled.T, parameters_state) | |
2685 | |
2686 # hmm2 = HMMsvm(obsc) | |
2687 # hmm2.fit([features_vector_upsampled.T], [parameters_state]) | |
2688 | |
2689 # hmmsvmc = HMMsvmClassifier() | |
2690 # hmmsvmc.fit(features_vector_upsampled.T, parameters_state) | |
2691 # svmhmm = HMMSVMClassifier(N=6, n_components=max(unique(parameters_state))) | |
2692 # svmhmm.fit(features_vector_upsampled.T, parameters_state) | |
2693 # pred_svmhmm = svmhmm.predict(features_vector_upsampled.T | |
2694 | |
2695 # ) | |
2696 # def predict(self, features, fname_testing = "testingdat.dat', fname_model = "training.model", fname_tags = "classes.tags"): | |
2697 # import subprocess | |
2698 | |
2699 | |
2700 | |
2701 # | |
2702 # subprocess.call(["svmhmm/svm_hmm_classify", 'trainingdat.dat', 'training.model', 'classes.tags']) | |
2703 | |
2704 | |
2705 | |
2706 # hmsvmc = HmSVMClassifier() | |
2707 | |
2708 # print hmsvmc.fit(features_vector_upsampled.T, parameters_state) | |
2709 | |
2710 | |
2711 | |
2712 #plot(mse(states_to_vector(predhmmc3,parameter_state_alphabet_inv).T, | |
2713 #states_to_vector(parameters_state,parameter_state_alphabet_inv).T)) | |
2714 | |
2715 | |
2716 | |
2717 # | |
2718 # print "[II] Classifying using svmhmm..." | |
2719 # | |
2720 # | |
2721 # for l in range(1, len(parameters_state)+1): | |
2722 # svmhmm += "%d qid:1.%d " % (parameters_state[l-1], l) | |
2723 # for f in range(1, features_vector_upsampled.shape[0]+1): | |
2724 # svmhmm += "%d:%.4f " % (f, features_vector_upsampled[f-1,l-1]) | |
2725 # | |
2726 # svmhmm += "#training \n" | |
2727 # | |
2728 # | |
2729 # | |
2730 # | |
2731 # fileout = open("trainingdat.dat", "w") | |
2732 # | |
2733 # fileout.write(svmhmm) | |
2734 # fileout.close() | |
2735 # | |
2736 # import subprocess | |
2737 # | |
2738 # C = len(parameters_state)/3 | |
2739 # # C = 100 | |
2740 # #subprocess.call("svmhmm/svm_hmm_learn -c %d -e 0.5 trainingdat.dat training.model" % len(parameters_state)) | |
2741 # subprocess.call(["svmhmm/svm_hmm_learn",'-c', '%d' % C, '-e', '0.7', 'trainingdat.dat', 'training.model']) | |
2742 # | |
2743 # | |
2744 # | |
2745 # subprocess.call(["svmhmm/svm_hmm_classify", 'trainingdat.dat', 'training.model', 'classes.tags']) | |
2746 # | |
2747 # f = open('classes.tags') | |
2748 # s = f.read() | |
2749 # s = s[2:-2] | |
2750 # parameters_state_y2 = [int(d) for d in s if d!=' '] | |
2751 # f.close() | |
2752 # | |
2753 # plot(parameters_state_y2) | |
2754 # | |
2755 # classif_ratio_svm = 1 - float(sum(parameters_state != parameters_state_y))/len(parameters_state) | |
2756 # classif_ratio_svmhmm = 1- float(sum(parameters_state != parameters_state_y2))/len(parameters_state) | |
2757 # | |
2758 # print "[II] SVM classification ratio: %.2f" % classif_ratio_svm | |
2759 # print "[II] SVM HMM classification ratio: %.2f" % classif_ratio_svmhmm |