Mercurial > hg > chourdakisreiss2016
comparison experiment-reverb/code/#training.py# @ 0:246d5546657c
initial commit, needs cleanup
author | Emmanouil Theofanis Chourdakis <e.t.chourdakis@qmul.ac.uk> |
---|---|
date | Wed, 14 Dec 2016 13:15:48 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:246d5546657c |
---|---|
1 #!/usr/bin/python2 | |
2 # -*- coding: utf-8 -*- | |
3 """ | |
4 Created on Thu Apr 23 11:53:17 2015 | |
5 | |
6 @author: mmxgn | |
7 """ | |
8 | |
9 # This file does the cluster estimation and the removal of outliers | |
10 | |
11 from warnings import filterwarnings | |
12 | |
13 from sys import argv, exit | |
14 from essentia.standard import YamlInput, YamlOutput | |
15 from essentia import Pool | |
16 from pca import * | |
17 | |
18 from numpy import * | |
19 from sklearn import cluster | |
20 from sklearn.metrics import pairwise_distances | |
21 from sklearn.cluster import KMeans, MiniBatchKMeans | |
22 from matplotlib.pyplot import * | |
23 #from sklearn.mixture import GMM | |
24 from sklearn.naive_bayes import GaussianNB, MultinomialNB | |
25 from scipy.signal import decimate | |
26 from sklearn import cross_validation | |
27 from sklearn import multiclass | |
28 from sklearn.covariance import EllipticEnvelope | |
29 | |
30 #from hmmlearn import hmm | |
31 from hmmlearn.hmm import GMM | |
32 from hmmlearn import hmm | |
33 | |
34 from sklearn import svm | |
35 import copy as cp | |
36 | |
37 from scipy.misc import logsumexp | |
38 import pickle | |
39 | |
40 from collections import Counter | |
41 #from adpcm import adm, adm_reconstruct | |
42 | |
43 | |
44 from reshape_observations import reshape_observations | |
45 | |
46 mse = lambda A,B: ((array(A)-array(B)) ** 2).mean() | |
47 | |
48 lognorm = lambda A: A - logsumexp(A) | |
49 | |
50 | |
51 rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']}) | |
52 ## for Palatino and other serif fonts use: | |
53 #rc('font',**{'family':'serif','serif':['Palatino']}) | |
54 rc('text', usetex=True) | |
55 #logsum = lambda X: logsumexp(log(X)) | |
56 | |
57 | |
58 filterwarnings("ignore") | |
59 C = 0.7 | |
60 NCOMPONENTS=2 | |
61 CHAINSIZE = 5 | |
62 | |
63 close('all') | |
64 | |
65 from numpy.random import randint | |
66 | |
67 class HMMsvm: | |
68 def __init__(self, svm_): | |
69 self.svm = svm_ | |
70 | |
71 | |
72 # self.svm = MyAVAClassifier() | |
73 | |
74 | |
75 def fit(self, features_list, states, flr=1e-13): | |
76 # def fit(self, features_list, flr=1e-13): | |
77 | |
78 # TODO: Concatenate features, train | |
79 #self.svm.fit(X, states, flr) | |
80 #self.prior = self.svm.prior | |
81 #self.transmat = self.svm.transmat | |
82 | |
83 | |
84 features = None | |
85 | |
86 for f in features_list: | |
87 if features is None: | |
88 features = f | |
89 else: | |
90 features = append(features, f, 0) | |
91 | |
92 fullstates = None | |
93 # T = features.shape[0] | |
94 for s in states: | |
95 if fullstates is None: | |
96 fullstates = s | |
97 else: | |
98 fullstates = append(fullstates, s) | |
99 | |
100 | |
101 | |
102 | |
103 | |
104 | |
105 self.n_classes = self.svm.n_classes | |
106 n_classes = self.n_classes | |
107 | |
108 # print fullstates, shape(fullstates) | |
109 | |
110 h = histogram(fullstates, n_classes)[0].astype(float) | |
111 self.prior = h/sum(h) | |
112 | |
113 # Add a very small probability for random jump | |
114 | |
115 self.prior += flr | |
116 self.prior = self.prior/sum(self.prior) | |
117 | |
118 transmat = zeros((n_classes, n_classes)) | |
119 transitions = zeros((n_classes, )) | |
120 for s in states: | |
121 for i in range(1, len(s)): | |
122 prev = s[i-1] | |
123 cur = s[i] | |
124 transmat[prev,cur] += 1 | |
125 transitions[prev] += 1 | |
126 | |
127 transitions[transitions == 0] = 1 | |
128 | |
129 for i in range(0, transmat.shape[0]): | |
130 transmat[i,:] = transmat[i,:]/transitions[i] | |
131 | |
132 self.transmat = transmat | |
133 | |
134 # Add a very small probability for random jump to avoid zero values | |
135 | |
136 self.transmat += flr | |
137 | |
138 for i in range(0, self.transmat.shape[0]): | |
139 self.transmat[i,:] = self.transmat[i,:]/sum(self.transmat[i,:]) | |
140 | |
141 | |
142 A = zeros((n_classes, n_classes)) | |
143 | |
144 Aold = self.transmat | |
145 | |
146 while mse(A,Aold)>10*size(A)*flr: | |
147 Aold = A.copy() | |
148 A = zeros((n_classes, n_classes)) | |
149 transitions = zeros((n_classes, )) | |
150 | |
151 for i in range(0,len(features_list)): | |
152 f = features_list[i] | |
153 # print "FEATURES:", f | |
154 # print f | |
155 s,p = self.logviterbi(f) | |
156 # print s | |
157 for j in range(1, len(s)): | |
158 prev = s[j-1] | |
159 cur = s[j] | |
160 A[prev,cur] += 1 | |
161 transitions[prev] += 1 | |
162 | |
163 transitions[transitions == 0] = 1 | |
164 | |
165 | |
166 for i in range(0, A.shape[0]): | |
167 A[i,:] = A[i,:]/transitions[i] | |
168 | |
169 A += flr | |
170 | |
171 | |
172 | |
173 self.transmat = A | |
174 | |
175 for i in range(0, A.shape[0]): | |
176 if sum(A[i,:]) > 0: | |
177 A[i,:] = A[i,:]/sum(A[i,:]) | |
178 | |
179 | |
180 | |
181 # print observations | |
182 | |
183 | |
184 def estimate_emission_probability(self, x, q): | |
185 post = self.svm.estimate_posterior_probability_vector(x) | |
186 # print "post: ", post | |
187 prior = self.prior | |
188 # print "prior: ", prior | |
189 p = post/prior | |
190 p = p/sum(p) | |
191 | |
192 return p[q] | |
193 | |
194 # def estimate_emission_probability(self, x, q): | |
195 # return self.svm.estimate_emission_probability(q, x) | |
196 | |
197 | |
198 def predict(self, X): | |
199 return self.logviterbi(X)[0] | |
200 | |
201 | |
202 def logviterbi(self, X): | |
203 # Number of states | |
204 | |
205 N = self.n_classes | |
206 | |
207 # Number of observations | |
208 | |
209 | |
210 | |
211 T = X.shape[0] | |
212 | |
213 | |
214 | |
215 transmat = self.transmat | |
216 | |
217 #1. Initalization | |
218 | |
219 a1 = self.prior | |
220 | |
221 delta = zeros((T,N)) | |
222 psi = zeros((T,N)) | |
223 | |
224 for i in range(0, N): | |
225 delta[0, i] = log(self.transmat[0,i]) + log(self.estimate_emission_probability(X[0,:],i)) | |
226 | |
227 | |
228 #2. Recursion | |
229 | |
230 for t in range(1, T): | |
231 # delta_old = delta.copy() | |
232 x = X[t, :] | |
233 for j in range(0, N): | |
234 # print "j: %d, S" % j, delta_old+log(transmat[:,j]) | |
235 delta[t,j] = max(delta[t-1,:] + log(transmat[:,j])) + log(self.estimate_emission_probability(x,j)) | |
236 psi[t,j] = argmax(delta[t-1,:] + log(transmat[:,j])) | |
237 | |
238 # print "t: %d, delta: " % t, delta | |
239 # print "t: %d, psi:" % t, psi | |
240 | |
241 | |
242 # 3. Termination | |
243 | |
244 p_star = max(delta[T-1,:] + log(transmat[:,N-1])) | |
245 q_star = argmax(delta[T-1,:] + log(transmat[:, N-1])) | |
246 | |
247 | |
248 # 4. Backtracking | |
249 | |
250 q = zeros((T,)) | |
251 p = zeros((T,)) | |
252 | |
253 q[-1] = q_star | |
254 p[-1] = p_star | |
255 for t in range(1, T): | |
256 q[-t-1] = psi[-t, q[-t]] | |
257 p[-t-1] = delta[-t, q[-t]] | |
258 | |
259 | |
260 return q,p | |
261 | |
262 def viterbi(self, X): | |
263 # Number of states | |
264 | |
265 N = self.n_classes | |
266 | |
267 # Number of observations | |
268 | |
269 T = X.shape[0] | |
270 | |
271 transmat = self.transmat | |
272 | |
273 #1. Initialization | |
274 | |
275 a1 = self.prior | |
276 | |
277 delta = zeros((N, )) | |
278 psi = zeros((N, )) | |
279 | |
280 for i in range(0, N): | |
281 delta[i] = a1[i]*self.estimate_emission_probability(X[0,:],i) | |
282 | |
283 | |
284 | |
285 | |
286 | |
287 #2. Recursion | |
288 | |
289 for t in range(1, T): | |
290 delta_old = delta.copy() | |
291 x = X[t,:] | |
292 for j in range(0, N): | |
293 delta[j] = max(delta_old*transmat[:,j])*self.estimate_emission_probability(x, j) | |
294 psi[j] = argmax(delta_old*transmat[:,j]) | |
295 | |
296 #3. Termination | |
297 | |
298 p_star = max(delta*transmat[:,N-1]) | |
299 q_star = argmax(delta*transmat[:,N-1]) | |
300 | |
301 | |
302 | |
303 #4. Backtracking | |
304 | |
305 q = zeros((T,)) | |
306 q[-1] = q_star | |
307 | |
308 for t in range(1, T): | |
309 q[-t-1] = psi[q[-t]] | |
310 | |
311 return q | |
312 | |
313 | |
314 def _log_likelihood_matrix(self, X): | |
315 N = self.n_classes | |
316 ll = zeros((X.shape[0], N)) | |
317 | |
318 for i in range(0, X.shape[0]): | |
319 ll[i,:] = self._forward(i, X) | |
320 | |
321 return ll | |
322 | |
323 def compute_ksus(self, X): | |
324 N = self.n_classes | |
325 T = X.shape[0] | |
326 print "[II] Computing gammas... " | |
327 | |
328 gammas = self._forward_backward(X) | |
329 | |
330 # Initialize | |
331 | |
332 aij = self.transmat | |
333 | |
334 | |
335 | |
336 | |
337 | |
338 | |
339 def _not_quite_ksu(self, t, i, j, X): | |
340 alpha = exp(self.forward(X[0:t+1,:]))[i] | |
341 beta = exp(self.backward(X[t+1:,:]))[j] | |
342 | |
343 aij = self.transmat[i,j] | |
344 bj = self.estimate_emission_probability(X[t+1,:], j) | |
345 | |
346 return alpha*beta*aij*bj | |
347 | |
348 def _ksu(self, t, i, j, X): | |
349 alphaT = exp(self.forward(X[0:t+1,:]))[0] | |
350 | |
351 return self._not_quite_ksu(t,i,j,X) | |
352 | |
353 | |
354 def _forward_backward(self, X): | |
355 T = X.shape[0] | |
356 N = self.n_classes | |
357 fv = zeros((T, N)) | |
358 sv = zeros((T, N)) | |
359 | |
360 b = None | |
361 for t in range(1, T+1): | |
362 | |
363 fv[t-1,:] = self._forward_message(fv[t-2,:], X[0:t, ]) | |
364 | |
365 for t in range(1, T+1): | |
366 b = self._backward_message(b, X[-t: , :]) | |
367 sv[-t,:] = lognorm(fv[t-1,:]*b) | |
368 | |
369 return sv | |
370 | |
371 | |
372 def _backward(self, t, X): | |
373 N = self.n_classes | |
374 a = ones((N,))/N | |
375 | |
376 beta = zeros((N, )) | |
377 transmat = self.transmat | |
378 T = X.shape[0] | |
379 x = X[t, :] | |
380 if t == T-1: | |
381 beta = log(a) | |
382 | |
383 return beta | |
384 else: | |
385 tosum = zeros((N, )) | |
386 bt = self._backward(t+1, X) | |
387 btnew = zeros((N, )) | |
388 # print bt | |
389 for j in range(0, N): | |
390 for i in range(0, N): | |
391 # print log(self.estimate_emission_probability(x, j)), bt[i], log(transmat[i,j]) | |
392 tosum[i] = log(self.estimate_emission_probability(x, j)) + bt[i] + log(transmat[i,j]) | |
393 # print tosum | |
394 | |
395 btnew[j] = logsumexp(tosum) | |
396 btnew = lognorm(btnew) | |
397 | |
398 return btnew | |
399 | |
400 | |
401 def score(self, X): | |
402 return self.forward(X) | |
403 | |
404 def forward(self, X): | |
405 T = X.shape[0] | |
406 f = None | |
407 for t in range(1, T+1): | |
408 f = self._forward_message(f, X[0:t, :]) | |
409 | |
410 return f | |
411 | |
412 def backward(self, X): | |
413 T = X.shape[0] | |
414 b = None | |
415 for t in range(1,T+1): | |
416 # print t | |
417 # print t,b | |
418 idx = arange(-t,T) | |
419 # print idx | |
420 b = self._backward_message(b, X[-t:, :]) | |
421 | |
422 return b | |
423 | |
424 def _backward_message(self, b, X): | |
425 N = self.n_classes | |
426 | |
427 | |
428 beta = zeros((N, )) | |
429 transmat = self.transmat | |
430 | |
431 x = X[0, :] | |
432 | |
433 if X.shape[0] == 1: | |
434 beta = log(ones((N,))/N) | |
435 return beta | |
436 else: | |
437 tosum = zeros((N, )) | |
438 btnew = zeros((N, )) | |
439 for j in range(0, N): | |
440 for i in range(0, N): | |
441 tosum[i] = log(self.estimate_emission_probability(x,j)) + b[i] + log(transmat[i,j]) | |
442 | |
443 btnew[j] = logsumexp(tosum) | |
444 # btnew = lognorm(btnew) | |
445 return btnew | |
446 | |
447 def _forward_message(self, f, X): | |
448 N = self.n_classes | |
449 a = self.prior | |
450 | |
451 alpha = zeros((N, )) | |
452 transmat = self.transmat | |
453 | |
454 x = X[-1, :] | |
455 | |
456 if X.shape[0] == 1: | |
457 for i in range(0, N): | |
458 alpha[i] = log(a[i]) + log(self.estimate_emission_probability(x, i)) | |
459 # alpha = lognorm(alpha) | |
460 return alpha | |
461 | |
462 else: | |
463 tosum = zeros((N,)) | |
464 ftnew = zeros((N,)) | |
465 for j in range(0, N): | |
466 for i in range(0, N): | |
467 tosum[i] = f[i] + log(transmat[i,j]) | |
468 Sum = logsumexp(tosum) | |
469 bj = self.estimate_emission_probability(x, j) | |
470 ftnew[j] = Sum+log(bj) | |
471 | |
472 # ftnew = lognorm(ftnew) | |
473 return ftnew | |
474 | |
475 def _forward(self, t, X): | |
476 N = self.n_classes | |
477 a = self.prior | |
478 # print a | |
479 alpha = zeros((N, )) | |
480 transmat = self.transmat | |
481 x = X[t, :] | |
482 | |
483 if t == 0: | |
484 for i in range(0, N): | |
485 alpha[i] = log(a[i]) + log(self.estimate_emission_probability(x, i)) | |
486 # print "--" | |
487 # print alpha | |
488 alpha = lognorm(alpha) | |
489 # print alpha | |
490 # print "xx" | |
491 return alpha | |
492 else: | |
493 tosum = zeros((N, )) | |
494 ft = self._forward(t-1, X) | |
495 ftnew = zeros((N, )) | |
496 for j in range(0, N): | |
497 for i in range(0, N): | |
498 # print ft | |
499 tosum[i] = ft[i] + log(transmat[i,j]) | |
500 | |
501 Sum = logsumexp(tosum) | |
502 bj = self.estimate_emission_probability(x, j) | |
503 ftnew[j] = Sum+log(bj) | |
504 ftnew = lognorm(ftnew) | |
505 | |
506 return ftnew | |
507 | |
508 class HMMsvm2: | |
509 def __init__(self, svm_, n_components=4): | |
510 self.svm = svm_ | |
511 self.n_components = n_components | |
512 | |
513 | |
514 # self.svm = MyAVAClassifier() | |
515 | |
516 | |
517 def fit(self, features_list, flr=1e-13): | |
518 # def fit(self, features_list, flr=1e-13): | |
519 | |
520 # TODO: Concatenate features, train | |
521 #self.svm.fit(X, states, flr) | |
522 #self.prior = self.svm.prior | |
523 #self.transmat = self.svm.transmat | |
524 | |
525 | |
526 n_classes = self.n_components | |
527 self.n_classes = n_classes | |
528 | |
529 A = zeros((n_classes, n_classes)) | |
530 | |
531 Aold = randn(n_classes, n_classes) | |
532 self.transmat = Aold | |
533 self.prior = randn(n_classes) | |
534 self.prior = self.prior/sum(self.prior) | |
535 for i in range(0, Aold.shape[0]): | |
536 Aold[i,:] = Aold[i,:]/sum(Aold[i,:]) | |
537 | |
538 while mse(A,Aold)>10*size(A)*flr: | |
539 Aold = A.copy() | |
540 A = zeros((n_classes, n_classes)) | |
541 transitions = zeros((n_classes, )) | |
542 | |
543 for i in range(0,len(features_list)): | |
544 f = features_list[i] | |
545 # print "FEATURES:", f | |
546 # print f | |
547 s,p = self.logviterbi(f) | |
548 # print s | |
549 | |
550 h = histogram(s, n_classes)[0].astype(float) | |
551 self.prior = h/sum(h) | |
552 | |
553 | |
554 self.prior += flr | |
555 self.prior = self.prior/sum(self.prior) | |
556 | |
557 | |
558 for j in range(1, len(s)): | |
559 prev = s[j-1] | |
560 cur = s[j] | |
561 A[prev,cur] += 1 | |
562 transitions[prev] += 1 | |
563 | |
564 transitions[transitions == 0] = 1 | |
565 | |
566 | |
567 for i in range(0, A.shape[0]): | |
568 A[i,:] = A[i,:]/transitions[i] | |
569 | |
570 A += flr | |
571 | |
572 | |
573 | |
574 self.transmat = A | |
575 | |
576 for i in range(0, A.shape[0]): | |
577 if sum(A[i,:]) > 0: | |
578 A[i,:] = A[i,:]/sum(A[i,:]) | |
579 | |
580 | |
581 | |
582 # print observations | |
583 | |
584 | |
585 def estimate_emission_probability(self, x, q): | |
586 post = self.svm.estimate_posterior_probability_vector(x) | |
587 # print "post: ", post | |
588 prior = self.prior | |
589 # print "prior: ", prior | |
590 p = post/prior | |
591 p = p/sum(p) | |
592 | |
593 return p[q] | |
594 | |
595 # def estimate_emission_probability(self, x, q): | |
596 # return self.svm.estimate_emission_probability(q, x) | |
597 | |
598 | |
599 def predict(self, X): | |
600 return self.logviterbi(X)[0] | |
601 | |
602 | |
603 def logviterbi(self, X): | |
604 # Number of states | |
605 | |
606 N = self.n_classes | |
607 | |
608 # Number of observations | |
609 | |
610 | |
611 | |
612 T = X.shape[0] | |
613 | |
614 | |
615 | |
616 transmat = self.transmat | |
617 | |
618 #1. Initalization | |
619 | |
620 # a1 = self.prior | |
621 | |
622 delta = zeros((T,N)) | |
623 psi = zeros((T,N)) | |
624 | |
625 for i in range(0, N): | |
626 delta[0, i] = log(self.transmat[0,i]) + log(self.estimate_emission_probability(X[0,:],i)) | |
627 | |
628 | |
629 #2. Recursion | |
630 | |
631 for t in range(1, T): | |
632 # delta_old = delta.copy() | |
633 x = X[t, :] | |
634 for j in range(0, N): | |
635 # print "j: %d, S" % j, delta_old+log(transmat[:,j]) | |
636 delta[t,j] = max(delta[t-1,:] + log(transmat[:,j])) + log(self.estimate_emission_probability(x,j)) | |
637 psi[t,j] = argmax(delta[t-1,:] + log(transmat[:,j])) | |
638 | |
639 # print "t: %d, delta: " % t, delta | |
640 # print "t: %d, psi:" % t, psi | |
641 | |
642 | |
643 # 3. Termination | |
644 | |
645 p_star = max(delta[T-1,:] + log(transmat[:,N-1])) | |
646 q_star = argmax(delta[T-1,:] + log(transmat[:, N-1])) | |
647 | |
648 | |
649 # 4. Backtracking | |
650 | |
651 q = zeros((T,)) | |
652 p = zeros((T,)) | |
653 | |
654 q[-1] = q_star | |
655 p[-1] = p_star | |
656 for t in range(1, T): | |
657 q[-t-1] = psi[-t, q[-t]] | |
658 p[-t-1] = delta[-t, q[-t]] | |
659 | |
660 | |
661 return q,p | |
662 | |
663 def viterbi(self, X): | |
664 # Number of states | |
665 | |
666 N = self.n_classes | |
667 | |
668 # Number of observations | |
669 | |
670 T = X.shape[0] | |
671 | |
672 transmat = self.transmat | |
673 | |
674 #1. Initialization | |
675 | |
676 a1 = self.prior | |
677 | |
678 delta = zeros((N, )) | |
679 psi = zeros((N, )) | |
680 | |
681 for i in range(0, N): | |
682 delta[i] = a1[i]*self.estimate_emission_probability(X[0,:],i) | |
683 | |
684 | |
685 | |
686 | |
687 | |
688 #2. Recursion | |
689 | |
690 for t in range(1, T): | |
691 delta_old = delta.copy() | |
692 x = X[t,:] | |
693 for j in range(0, N): | |
694 delta[j] = max(delta_old*transmat[:,j])*self.estimate_emission_probability(x, j) | |
695 psi[j] = argmax(delta_old*transmat[:,j]) | |
696 | |
697 #3. Termination | |
698 | |
699 p_star = max(delta*transmat[:,N-1]) | |
700 q_star = argmax(delta*transmat[:,N-1]) | |
701 | |
702 | |
703 | |
704 #4. Backtracking | |
705 | |
706 q = zeros((T,)) | |
707 q[-1] = q_star | |
708 | |
709 for t in range(1, T): | |
710 q[-t-1] = psi[q[-t]] | |
711 | |
712 return q | |
713 | |
714 | |
715 def _log_likelihood_matrix(self, X): | |
716 N = self.n_classes | |
717 ll = zeros((X.shape[0], N)) | |
718 | |
719 for i in range(0, X.shape[0]): | |
720 ll[i,:] = self._forward(i, X) | |
721 | |
722 return ll | |
723 | |
724 def compute_ksus(self, X): | |
725 N = self.n_classes | |
726 T = X.shape[0] | |
727 print "[II] Computing gammas... " | |
728 | |
729 gammas = self._forward_backward(X) | |
730 | |
731 # Initialize | |
732 | |
733 aij = self.transmat | |
734 | |
735 | |
736 | |
737 | |
738 | |
739 | |
740 def _not_quite_ksu(self, t, i, j, X): | |
741 alpha = exp(self.forward(X[0:t+1,:]))[i] | |
742 beta = exp(self.backward(X[t+1:,:]))[j] | |
743 | |
744 aij = self.transmat[i,j] | |
745 bj = self.estimate_emission_probability(X[t+1,:], j) | |
746 | |
747 return alpha*beta*aij*bj | |
748 | |
749 def _ksu(self, t, i, j, X): | |
750 alphaT = exp(self.forward(X[0:t+1,:]))[0] | |
751 | |
752 return self._not_quite_ksu(t,i,j,X) | |
753 | |
754 | |
755 def _forward_backward(self, X): | |
756 T = X.shape[0] | |
757 N = self.n_classes | |
758 fv = zeros((T, N)) | |
759 sv = zeros((T, N)) | |
760 | |
761 b = None | |
762 for t in range(1, T+1): | |
763 | |
764 fv[t-1,:] = self._forward_message(fv[t-2,:], X[0:t, ]) | |
765 | |
766 for t in range(1, T+1): | |
767 b = self._backward_message(b, X[-t: , :]) | |
768 sv[-t,:] = lognorm(fv[t-1,:]*b) | |
769 | |
770 return sv | |
771 | |
772 | |
773 def _backward(self, t, X): | |
774 N = self.n_classes | |
775 a = ones((N,))/N | |
776 | |
777 beta = zeros((N, )) | |
778 transmat = self.transmat | |
779 T = X.shape[0] | |
780 x = X[t, :] | |
781 if t == T-1: | |
782 beta = log(a) | |
783 | |
784 return beta | |
785 else: | |
786 tosum = zeros((N, )) | |
787 bt = self._backward(t+1, X) | |
788 btnew = zeros((N, )) | |
789 # print bt | |
790 for j in range(0, N): | |
791 for i in range(0, N): | |
792 # print log(self.estimate_emission_probability(x, j)), bt[i], log(transmat[i,j]) | |
793 tosum[i] = log(self.estimate_emission_probability(x, j)) + bt[i] + log(transmat[i,j]) | |
794 # print tosum | |
795 | |
796 btnew[j] = logsumexp(tosum) | |
797 btnew = lognorm(btnew) | |
798 return btnew | |
799 | |
800 | |
801 def score(self, X): | |
802 return self.forward(X) | |
803 | |
804 def forward(self, X): | |
805 T = X.shape[0] | |
806 f = None | |
807 for t in range(1, T+1): | |
808 f = self._forward_message(f, X[0:t, :]) | |
809 | |
810 return f | |
811 | |
812 def backward(self, X): | |
813 T = X.shape[0] | |
814 b = None | |
815 for t in range(1,T+1): | |
816 # print t | |
817 # print t,b | |
818 idx = arange(-t,T) | |
819 # print idx | |
820 b = self._backward_message(b, X[-t:, :]) | |
821 | |
822 return b | |
823 | |
824 def _backward_message(self, b, X): | |
825 N = self.n_classes | |
826 | |
827 | |
828 beta = zeros((N, )) | |
829 transmat = self.transmat | |
830 | |
831 x = X[0, :] | |
832 | |
833 if X.shape[0] == 1: | |
834 beta = log(ones((N,))/N) | |
835 return beta | |
836 else: | |
837 tosum = zeros((N, )) | |
838 btnew = zeros((N, )) | |
839 for j in range(0, N): | |
840 for i in range(0, N): | |
841 tosum[i] = log(self.estimate_emission_probability(x,j)) + b[i] + log(transmat[i,j]) | |
842 | |
843 btnew[j] = logsumexp(tosum) | |
844 # btnew = lognorm(btnew) | |
845 return btnew | |
846 | |
847 def _forward_message(self, f, X): | |
848 N = self.n_classes | |
849 a = self.prior | |
850 | |
851 alpha = zeros((N, )) | |
852 transmat = self.transmat | |
853 | |
854 x = X[-1, :] | |
855 | |
856 if X.shape[0] == 1: | |
857 for i in range(0, N): | |
858 alpha[i] = log(a[i]) + log(self.estimate_emission_probability(x, i)) | |
859 # alpha = lognorm(alpha) | |
860 return alpha | |
861 | |
862 else: | |
863 tosum = zeros((N,)) | |
864 ftnew = zeros((N,)) | |
865 for j in range(0, N): | |
866 for i in range(0, N): | |
867 tosum[i] = f[i] + log(transmat[i,j]) | |
868 Sum = logsumexp(tosum) | |
869 bj = self.estimate_emission_probability(x, j) | |
870 ftnew[j] = Sum+log(bj) | |
871 | |
872 # ftnew = lognorm(ftnew) | |
873 return ftnew | |
874 | |
875 def _forward(self, t, X): | |
876 N = self.n_classes | |
877 a = self.prior | |
878 # print a | |
879 alpha = zeros((N, )) | |
880 transmat = self.transmat | |
881 x = X[t, :] | |
882 | |
883 if t == 0: | |
884 for i in range(0, N): | |
885 alpha[i] = log(a[i]) + log(self.estimate_emission_probability(x, i)) | |
886 # print "--" | |
887 # print alpha | |
888 alpha = lognorm(alpha) | |
889 # print alpha | |
890 # print "xx" | |
891 return alpha | |
892 else: | |
893 tosum = zeros((N, )) | |
894 ft = self._forward(t-1, X) | |
895 ftnew = zeros((N, )) | |
896 for j in range(0, N): | |
897 for i in range(0, N): | |
898 # print ft | |
899 tosum[i] = ft[i] + log(transmat[i,j]) | |
900 | |
901 Sum = logsumexp(tosum) | |
902 bj = self.estimate_emission_probability(x, j) | |
903 ftnew[j] = Sum+log(bj) | |
904 ftnew = lognorm(ftnew) | |
905 | |
906 return ftnew | |
907 | |
908 | |
909 | |
910 | |
911 | |
912 | |
913 | |
914 class NBClassifier: | |
915 def __init__(self): | |
916 print "[II] Gaussian Naive Bayes Classifier" | |
917 self.name = "Naive Bayes" | |
918 self.smallname = "gnbc" | |
919 self.gnb = GaussianNB() | |
920 | |
921 def getName(self): | |
922 return self.name | |
923 | |
924 def fit(self, X, states): | |
925 self.gnb.fit(X, states) | |
926 | |
927 def predict(self, X): | |
928 return self.gnb.predict(X) | |
929 | |
930 def cross_validate(self, data, classes): | |
931 print "[II] SVN Classifier Crossvalidating... " | |
932 from copy import deepcopy | |
933 estimator = deepcopy(self) | |
934 estimator_fulldata = deepcopy(self) | |
935 estimator_fulldata.fit(data, classes) | |
936 | |
937 percents = arange(0.1,0.9,0.1) | |
938 self.percents = percents * 100 | |
939 | |
940 RATIOS = [] | |
941 labels = estimator.predict(data) | |
942 | |
943 | |
944 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels)) | |
945 | |
946 for p in percents: | |
947 traindata,testdata,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=1) | |
948 | |
949 # print "[II] [%.2f-%.2f] Examples used for training: %s" % (p, (1-p), str(traindata)) | |
950 # print "[II] [%.2f-%.2f] Examples used for testing: %s" % (p, (1-p), str(testdata)) | |
951 | |
952 # Training phase | |
953 | |
954 | |
955 | |
956 estimator_ = deepcopy(estimator) | |
957 estimator_.fit(traindata, trainlabels) | |
958 | |
959 predicted_labels = estimator_.predict(testdata) | |
960 | |
961 m = sum(array(predicted_labels==testlabels).astype(float))/len(testlabels) | |
962 | |
963 print "[II] [%.2f-%.2f] 5-fold cross-validation for NB Classifier: %.1f" % (p, (1-p), m) | |
964 | |
965 RATIOS.append(m) | |
966 | |
967 return RATIOS,percents | |
968 | |
969 | |
970 | |
971 | |
972 class HmmClassifier3: | |
973 def __init__(self, N=2,n_components = 1): | |
974 print "[II] HMM Classifier (%d states, %d components)" % (N, n_components) | |
975 self.name = "HMM (%d time steps, %d components)" % (N, n_components) | |
976 self.n_components = n_components | |
977 | |
978 self.n_components = n_components | |
979 self.smallname = "hmmc" | |
980 self.chain_size = N | |
981 | |
982 def getName(self): | |
983 return self.name | |
984 | |
985 def fit(self, features, parameters): | |
986 | |
987 | |
988 # print "Parameters:" | |
989 # print parameters | |
990 n_classes = max(unique(parameters)) + 1 | |
991 | |
992 if n_classes == 1: | |
993 self.only_one_class = True | |
994 return | |
995 else: | |
996 self.only_one_class = False | |
997 | |
998 print "[II] Number of classes: ", n_classes | |
999 hmms = [None]*n_classes | |
1000 | |
1001 chain_size = self.chain_size | |
1002 obs = [None]*n_classes | |
1003 | |
1004 for i in range(chain_size, len(parameters)): | |
1005 class_ = parameters[i] | |
1006 seq = features[i-chain_size:i,:] | |
1007 | |
1008 | |
1009 if obs[class_] is None: | |
1010 obs[class_] = [seq] | |
1011 else: | |
1012 obs[class_].append(seq) | |
1013 | |
1014 | |
1015 | |
1016 for i in range(0, len(obs)): | |
1017 | |
1018 if obs[i] is not None and len(obs[i]) != 0: | |
1019 hmm_ = hmm.GaussianHMM(n_components=self.n_components, covariance_type='diag') | |
1020 # print(obs[i]) | |
1021 #obs_ = reshape_observations(obs[i]) | |
1022 obs_ = concatenate(obs[i]) | |
1023 #lengths_ = [o.shape[0] for o in obs[i]] | |
1024 #print (lengths_) | |
1025 # print len(obs[i]) | |
1026 # print obs[i][0][0].shape | |
1027 # print obs[i] | |
1028 # for s in obs[i]: | |
1029 # if s.ndim > 2: | |
1030 # print s | |
1031 hmm_.fit(obs_, [self.chain_size]*len(obs[i])) | |
1032 hmms[i] = hmm_ | |
1033 | |
1034 self.hmms = hmms | |
1035 | |
1036 return obs | |
1037 | |
1038 def predict(self, features, mfilt=20): | |
1039 | |
1040 if self.only_one_class == True: | |
1041 return zeros((features.shape[0], )) | |
1042 | |
1043 chain_size = self.chain_size | |
1044 hmms = self.hmms | |
1045 predicted_classes = zeros((features.shape[0],)) | |
1046 | |
1047 | |
1048 for i in range(chain_size, features.shape[0]): | |
1049 scores = zeros((len(hmms),)) | |
1050 | |
1051 seq = features[i-chain_size:i, :] | |
1052 | |
1053 for j in range(0, len(hmms)): | |
1054 if hmms[j] is not None: | |
1055 scores[j] = hmms[j].score(seq) | |
1056 else: | |
1057 scores[j] = -infty | |
1058 | |
1059 predicted_classes[i] = argmax(scores) | |
1060 | |
1061 # Do a median filter at the end | |
1062 | |
1063 # predicted_classes = median_filter(predicted_classes,mfilt) | |
1064 | |
1065 return predicted_classes | |
1066 | |
1067 | |
1068 | |
1069 def k_fold_cross_validate(self, data, classes, K=5): | |
1070 print "[II] HMM Classifier Crossvalidating... " | |
1071 print "[II] Validating data: ", data | |
1072 print "[II] With classes: ", classes | |
1073 from copy import deepcopy | |
1074 estimator = deepcopy(self) | |
1075 estimator_fulldata = deepcopy(self) | |
1076 estimator_fulldata.fit(data, classes) | |
1077 | |
1078 | |
1079 labels = estimator_fulldata.predict(data) | |
1080 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(classes)) | |
1081 | |
1082 # Crossvalidate this. | |
1083 | |
1084 example_sequences = [] | |
1085 example_labels = [] | |
1086 | |
1087 chain_size = self.chain_size | |
1088 | |
1089 percents = arange(0.1,0.9,0.1) | |
1090 self.percents = percents * 100 | |
1091 | |
1092 RATIOS = [] | |
1093 | |
1094 | |
1095 | |
1096 # Constructing examples and labels | |
1097 | |
1098 | |
1099 L = data.shape[0]/K | |
1100 M = K | |
1101 # print "[II] Example size:", L | |
1102 | |
1103 n = 1 | |
1104 while L*(n-1) < M*L: | |
1105 if L*n > shape(data)[0]: | |
1106 example = data[L*(n-1):,:] | |
1107 classes_ = classes[L*(n-1):] | |
1108 else: | |
1109 example = data[L*(n-1):L*n, :] | |
1110 classes_ = classes[L*(n-1):L*n] | |
1111 | |
1112 # print example | |
1113 # print classes_ | |
1114 | |
1115 example_sequences.append(example) | |
1116 example_labels.append(classes_) | |
1117 n+=1 | |
1118 | |
1119 # print len(example_sequences) | |
1120 # print len(example_labels) | |
1121 from sklearn.cross_validation import KFold | |
1122 kf = KFold(K, n_folds=K) | |
1123 | |
1124 ratio = 0 | |
1125 | |
1126 for train, test in kf: | |
1127 print "[II] Validating examples %s against %s." % (train, test) | |
1128 | |
1129 topredict_data = example_sequences[test[0]] | |
1130 topredict_labels = example_labels[test[0]] | |
1131 | |
1132 tofit_data = None | |
1133 tofit_labels = None | |
1134 | |
1135 for t in train: | |
1136 # print t | |
1137 if tofit_data is None: | |
1138 tofit_data = example_sequences[t] | |
1139 tofit_labels = example_labels[t] | |
1140 else: | |
1141 tofit_data = append(tofit_data, example_sequences[t], 0) | |
1142 tofit_labels = append(tofit_labels, example_labels[t], 0) | |
1143 | |
1144 estimator_ = deepcopy(estimator) | |
1145 estimator_.fit(tofit_data, tofit_labels) | |
1146 | |
1147 labels = estimator_.predict(topredict_data) | |
1148 | |
1149 m = sum(array(topredict_labels==labels).astype(float))/len(labels) | |
1150 | |
1151 | |
1152 print "[II] %d-fold cross-validation for HMM Classifier: %.1f" % (K, m) | |
1153 | |
1154 ratio += m/float(len(kf)) | |
1155 | |
1156 return ratio | |
1157 | |
1158 | |
1159 | |
1160 | |
1161 | |
1162 | |
1163 # Splitting to train/test | |
1164 | |
1165 | |
1166 def cross_validate(self, data, classes): | |
1167 print "[II] HMM Classifier Crossvalidating... " | |
1168 from copy import deepcopy | |
1169 estimator = deepcopy(self) | |
1170 estimator_fulldata = deepcopy(self) | |
1171 estimator_fulldata.fit(data, classes) | |
1172 | |
1173 | |
1174 labels = estimator_fulldata.predict(data) | |
1175 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(classes)) | |
1176 | |
1177 # Crossvalidate this. | |
1178 | |
1179 example_sequences = [] | |
1180 example_labels = [] | |
1181 | |
1182 chain_size = self.chain_size | |
1183 | |
1184 percents = arange(0.5,0.9,0.1) | |
1185 self.percents = percents * 100 | |
1186 | |
1187 RATIOS = [] | |
1188 | |
1189 | |
1190 | |
1191 # Constructing examples and labels | |
1192 | |
1193 M = self.chain_size | |
1194 L = data.shape[0]/20 | |
1195 | |
1196 print "[II] Example size:", L | |
1197 | |
1198 n = 1 | |
1199 while L*n < M*L-L: | |
1200 example = data[L*(n-1):L*n, :] | |
1201 # print example | |
1202 classes_ = classes[L*(n-1):L*n] | |
1203 # print classes_ | |
1204 | |
1205 example_sequences.append(example) | |
1206 example_labels.append(classes_) | |
1207 n+=1 | |
1208 | |
1209 | |
1210 | |
1211 # Splitting to train/test | |
1212 | |
1213 for p in percents: | |
1214 traindata,testdata,trainlabels,testlabels = cross_validation.train_test_split(example_sequences, example_labels, test_size=p, random_state=1) | |
1215 | |
1216 print traindata | |
1217 print testdata | |
1218 # Concatenate traindata | |
1219 | |
1220 tofit_data = None | |
1221 tofit_labels = None | |
1222 | |
1223 topredict_data = None | |
1224 topredict_labels = None | |
1225 | |
1226 | |
1227 for t in traindata: | |
1228 if tofit_data is None: | |
1229 tofit_data = t | |
1230 else: | |
1231 tofit_data = append(tofit_data, t, 0) | |
1232 | |
1233 for l in trainlabels: | |
1234 if tofit_labels is None: | |
1235 tofit_labels = l | |
1236 else: | |
1237 tofit_labels = append(tofit_labels, l) | |
1238 | |
1239 for t in testdata: | |
1240 if topredict_data is None: | |
1241 topredict_data = t | |
1242 else: | |
1243 topredict_data = append(topredict_data, t, 0) | |
1244 | |
1245 for l in testlabels: | |
1246 if topredict_labels is None: | |
1247 topredict_labels = l | |
1248 else: | |
1249 topredict_labels = append(topredict_labels, l) | |
1250 | |
1251 | |
1252 # print "[II] Fit data: ", tofit_data | |
1253 ## print "[II] Fit labels: ", tofit_labels | |
1254 # print "[II] Predict data: ", topredict_data | |
1255 # print "[II] Predict labels: ", topredict_labels | |
1256 # | |
1257 estimator_ = deepcopy(estimator) | |
1258 | |
1259 print tofit_labels | |
1260 estimator_.fit(tofit_data, tofit_labels) | |
1261 | |
1262 labels = estimator_.predict(topredict_data) | |
1263 | |
1264 m = sum(array(topredict_labels==labels).astype(float))/len(labels) | |
1265 | |
1266 | |
1267 print "[II] [%.2f-%.2f] cross-validation for HMM Classifier: %.1f" % (p, (1-p), m) | |
1268 | |
1269 RATIOS.append(m) | |
1270 | |
1271 return RATIOS,percents | |
1272 | |
1273 | |
1274 class HMMsvmClassifier2: | |
1275 def __init__(self, N=2): | |
1276 self.classifiers = {} | |
1277 self.name = "HMM-SVM Classifier 2" | |
1278 self.smallname = "hmmsvm2" | |
1279 self.obs = MyAVAClassifier() | |
1280 self.chain_size = N | |
1281 | |
1282 def getName(self): | |
1283 return self.name | |
1284 | |
1285 def fit(self, X, y): | |
1286 self.n_classes = max(unique(y))+1 | |
1287 n_classes = self.n_classes | |
1288 chain_size = self.chain_size | |
1289 | |
1290 if n_classes == 1: | |
1291 self.only_one_class = True | |
1292 return | |
1293 else: | |
1294 self.only_one_class = False | |
1295 | |
1296 print "[II] Number of classes: ", n_classes | |
1297 | |
1298 hmms = [None]*n_classes | |
1299 obs = [None]*n_classes | |
1300 | |
1301 | |
1302 for i in range(chain_size, len(parameters)): | |
1303 class_ = parameters[i-1] | |
1304 | |
1305 | |
1306 | |
1307 class HMMsvmClassifier: | |
1308 def __init__(self, N=2): | |
1309 self.classifiers = {} | |
1310 self.name = "HMM-SVM Classifier" | |
1311 self.smallname = "hmmsvm" | |
1312 self.obs = MyAVAClassifier() | |
1313 self.chain_size = N | |
1314 | |
1315 def getName(self): | |
1316 return self.name | |
1317 | |
1318 def fit(self, X, y): | |
1319 self.n_classes = max(unique(y))+1 | |
1320 | |
1321 self.obs.fit(X,y) | |
1322 self.hmm = HMMsvm(self.obs) | |
1323 self.hmm.fit([X],[y]) | |
1324 | |
1325 def predict(self, X): | |
1326 # chain_size = self.chain_size | |
1327 # predicted = zeros((X.shape[0]),) | |
1328 # for i in range(chain_size, len(predicted)): | |
1329 # predicted[i] = self.hmm.predict(X[i-chain_size:i,:])[-1] | |
1330 | |
1331 | |
1332 | |
1333 # return predicted | |
1334 | |
1335 return self.hmm.predict(X) | |
1336 | |
1337 def confidence(self, x, q): | |
1338 return self.hmm.estimate_emission_probability(x, q) | |
1339 | |
1340 | |
1341 | |
1342 class SinkHoleClassifier: | |
1343 def __init__(self): | |
1344 self.classifierNB = NBClassifier() | |
1345 self.classifierSVM = MyAVAClassifier() | |
1346 self.classifierHMM = HmmClassifier3() | |
1347 self.classifierHMMSVM = HMMsvmClassifier() | |
1348 | |
1349 | |
1350 | |
1351 def getName(self): | |
1352 return self.name | |
1353 | |
1354 | |
1355 def fit(self, X, y): | |
1356 self.n_classes = max(unique(y))+1 | |
1357 | |
1358 self.classifierNB.fit(X,y) | |
1359 self.classifierSVM.fit(X,y) | |
1360 self.classifierHMM.fit(X,y) | |
1361 self.classifierHMMSVM.fit(X,y) | |
1362 | |
1363 | |
1364 def predict(self, X): | |
1365 predictedNB = self.classifierNB.predict(X) | |
1366 predictedSVM = self.classifierSVM.predict(X) | |
1367 predictedHMM = self.classifierHMM.predict(X) | |
1368 predictedHMMSVM = self.classifierHMMSVM.predict(X) | |
1369 | |
1370 | |
1371 | |
1372 predicted = zeros((X.shape[0], )) | |
1373 | |
1374 for i in range(0, len(predicted)): | |
1375 candidates = [predictedNB[i], predictedSVM[i], predictedHMM[i], predictedHMMSVM[i]] | |
1376 c = Counter(candidates) | |
1377 | |
1378 most_common = c.most_common() | |
1379 | |
1380 # If there is an equal voting, select something at random | |
1381 if len(unique([k[1] for k in most_common])) == 1: | |
1382 predicted[i] = most_common[randint(len(most_common))][0] | |
1383 else: | |
1384 predicted[i] = most_common[0][0] | |
1385 | |
1386 | |
1387 | |
1388 return predicted | |
1389 | |
1390 class MyAVAClassifier: | |
1391 | |
1392 def __init__(self): | |
1393 self.classifiers = {} | |
1394 self.name = "Linear SVM Classifier" | |
1395 self.smallname = "svc-ava" | |
1396 # self.probabilities = {} | |
1397 | |
1398 | |
1399 def getName(self): | |
1400 return self.name | |
1401 def fit(self, X, y, flr = 0): | |
1402 | |
1403 n_classes = max(unique(y)) + 1 | |
1404 | |
1405 # Transform to a binary if there are only two classes | |
1406 if len(unique(y)) == 1: | |
1407 self.only_one_class = True | |
1408 self.n_classes = 1 | |
1409 self.one_class_label = y[0] | |
1410 return | |
1411 elif len(unique(y)) == 2: | |
1412 | |
1413 self.n_classes = n_classes | |
1414 self.svm = svm.SVC(probability = True, kernel='poly', gamma=2, C = C) | |
1415 self.svm.fit(X,y) | |
1416 classes_ = unique(y) | |
1417 self.classifiers[(classes_[0], classes_[1])] = self.svm | |
1418 self.only_two_classes = True | |
1419 self.only_one_class = False | |
1420 | |
1421 return | |
1422 else: | |
1423 self.only_two_classes = False | |
1424 self.only_one_class = False | |
1425 | |
1426 | |
1427 classes = arange(0, n_classes) | |
1428 self.n_classes = n_classes | |
1429 # M = n_classes*(n_classes-1) # Number of classifiers | |
1430 | |
1431 h = histogram(y, n_classes)[0].astype(float) | |
1432 self.prior = h/sum(h) | |
1433 | |
1434 transmat = zeros((n_classes, n_classes)) | |
1435 | |
1436 for i in range(1, len(y)): | |
1437 prev = y[i-1] | |
1438 cur = y[i] | |
1439 transmat[prev,cur] += 1 | |
1440 | |
1441 transmat = transmat/sum(transmat) | |
1442 | |
1443 self.transmat = transmat | |
1444 | |
1445 # Add a very small probability for random jump to avoid zero values | |
1446 | |
1447 self.transmat += flr | |
1448 self.transmat = self.transmat/sum(self.transmat) | |
1449 | |
1450 for i in range(0, n_classes): | |
1451 for j in range(0, n_classes): | |
1452 if i != j and (i,j) not in self.classifiers and (j,i) not in self.classifiers: | |
1453 # print (i,j) | |
1454 # print classes | |
1455 idx_ = bitwise_or(y == classes[i], y == classes[j]) | |
1456 # idx_ = bitwise_or(y == i, y == j) | |
1457 | |
1458 X_ = X[idx_, :] | |
1459 | |
1460 y_ = y[idx_] | |
1461 #print y_ | |
1462 | |
1463 if len(unique(y_)) > 1: | |
1464 svm_ = svm.SVC(probability = True, kernel='poly', gamma=2, C = C) | |
1465 | |
1466 # print y_ | |
1467 svm_.fit(X_, y_) | |
1468 self.classifiers[(i,j)] = svm_ | |
1469 # self.probabilities[(i,j)] = svm_.predict_proba(X) | |
1470 | |
1471 | |
1472 def estimate_pairwise_class_probability(self, i, j, x): | |
1473 | |
1474 # print self.classifiers.keys() | |
1475 | |
1476 if (i,j) not in self.classifiers and (j,i) in self.classifiers: | |
1477 return self.classifiers[(j,i)].predict_proba(x)[0,1] | |
1478 elif (i,j) not in self.classifiers and (j,i) not in self.classifiers: | |
1479 return 0.0 | |
1480 else: | |
1481 return self.classifiers[(i,j)].predict_proba(x)[0,0] | |
1482 | |
1483 def estimate_posterior_probability(self, i, x): | |
1484 mus = zeros((self.n_classes,)) | |
1485 for j in range(0, self.n_classes): | |
1486 if i != j: | |
1487 pcp = self.estimate_pairwise_class_probability(i,j,x) | |
1488 pcp += 1e-18 | |
1489 mus[j] = 1/pcp | |
1490 # print mus | |
1491 S = sum(mus) - (self.n_classes - 2) | |
1492 # print S | |
1493 return 1/S | |
1494 | |
1495 def estimate_posterior_probability_vector(self, x): | |
1496 posterior = zeros((self.n_classes,)) | |
1497 for i in range(0, len(posterior)): | |
1498 posterior[i] = self.estimate_posterior_probability(i, x) | |
1499 | |
1500 return posterior | |
1501 | |
1502 # def estimate_emission_probability(self, i, x): | |
1503 # post = self.estimate_posterior_probability_vector(x) | |
1504 # # print "post: ", post | |
1505 # prior = self.prior | |
1506 # # print "prior: ", prior | |
1507 # p = post/prior | |
1508 # p = p/sum(p) | |
1509 # | |
1510 # return p[i] | |
1511 | |
1512 # def estimate_emissmat(self, X): | |
1513 # emissmat = zeros((X.shape[0], self.n_classes)) | |
1514 # for i in range(0, X.shape[0]): | |
1515 # for j in range(0, self.n_classes): | |
1516 # emissmat[i,j] = self.estimate_emission_probability(j, X[i,:]) | |
1517 # | |
1518 # return emissmat | |
1519 | |
1520 def predict(self, X): | |
1521 predicted = zeros((X.shape[0],)) | |
1522 | |
1523 if self.only_one_class == True: | |
1524 return ones((X.shape[0],))*self.one_class_label | |
1525 elif self.only_two_classes == True: | |
1526 return self.svm.predict(X) | |
1527 | |
1528 | |
1529 for i in range(0, X.shape[0]): | |
1530 x = X[i,:] | |
1531 P = zeros((self.n_classes,)) | |
1532 | |
1533 | |
1534 for c in range(0, len(P)): | |
1535 P[c] = self.estimate_posterior_probability(c, x) | |
1536 | |
1537 pred = argmax(P) | |
1538 predicted[i] = pred | |
1539 | |
1540 return predicted | |
1541 | |
1542 def logsum(X): | |
1543 if len(X) == 1: | |
1544 return log(X[0]) | |
1545 else: | |
1546 return logaddexp(log(X[0]),logsum(X[1:])) | |
1547 | |
1548 | |
1549 | |
1550 | |
1551 def smooth_matrix_1D(X): | |
1552 window = scipy.signal.gaussian(51,4) | |
1553 window = window/sum(window) | |
1554 intermx = zeros((X.shape[0],X.shape[1]+100)) | |
1555 intermx[:, 50:-50] = X | |
1556 | |
1557 for m in range(0, X.shape[0]): | |
1558 # print intermx.shape | |
1559 intermx[m,:] = convolve(ravel(intermx[m,:]), window,'same') | |
1560 | |
1561 return intermx[:,50:-50] | |
1562 | |
1563 def adm_reconstruct(codeword, h, dmin=.01, dmax=.28): | |
1564 x = zeros((1, codeword.shape[1])) | |
1565 | |
1566 delta1 = dmin | |
1567 delta2 = dmin | |
1568 Sum = h | |
1569 | |
1570 x[0] = h | |
1571 for i in range(0, codeword.shape[1]): | |
1572 if codeword[0,i] == 0: | |
1573 delta1 = dmin | |
1574 delta2 = dmin | |
1575 | |
1576 elif codeword[0,i] == 1: | |
1577 delta2 = dmin | |
1578 Sum += delta1 | |
1579 delta1 *= 2 | |
1580 if delta1 > dmax: | |
1581 delta1 = dmax | |
1582 | |
1583 elif codeword[0,i] == -1: | |
1584 delta1 = dmin | |
1585 Sum -= delta2 | |
1586 delta2 *= 2 | |
1587 if delta2 > dmax: | |
1588 delta2 = dmax | |
1589 x[0,i] = Sum | |
1590 return x | |
1591 | |
1592 def adm(x, dmin=.01, dmax=.28, tol=0.0001): | |
1593 | |
1594 # Adaptive delta modulation adapted by code: | |
1595 # (adeltamod.m) | |
1596 # | |
1597 # Adaptive Delta Modulator | |
1598 # by Gandhar Desai (gdesai) | |
1599 # BITS Pilani Goa Campus | |
1600 # Date: 28 Sept, 2013 | |
1601 | |
1602 xsig = x | |
1603 | |
1604 Lx = len(x) | |
1605 | |
1606 ADMout = zeros((1, Lx)) | |
1607 codevec = zeros((1, Lx)) | |
1608 | |
1609 | |
1610 Sum = x[0] | |
1611 delta1 = dmin | |
1612 delta2 = dmin | |
1613 mult1 = 2 | |
1614 mult2 = 2 | |
1615 for i in range(0, Lx): | |
1616 #print abs(xsig[i] - Sum) | |
1617 if (abs(xsig[i] - Sum) < tol): | |
1618 bit = 0 | |
1619 delta2 = dmin | |
1620 delta1 = dmin | |
1621 | |
1622 | |
1623 elif (xsig[i] >= Sum): | |
1624 bit = 1 | |
1625 delta2 = dmin | |
1626 Sum += delta1 | |
1627 delta1 *= mult1 | |
1628 if delta1 > dmax: | |
1629 delta1 = dmax | |
1630 | |
1631 | |
1632 else: | |
1633 bit = -1 | |
1634 delta1 = dmin | |
1635 Sum -= delta2 | |
1636 delta2 *= mult2 | |
1637 if delta2 > dmax: | |
1638 delta2 = dmax | |
1639 | |
1640 | |
1641 | |
1642 ADMout[0, i] = Sum | |
1643 codevec[0, i]= bit | |
1644 | |
1645 return ADMout,codevec, x[0] | |
1646 | |
1647 def median_filter(v, K): | |
1648 v2 = zeros((len(v),)) | |
1649 for i in range(K, len(v)): | |
1650 seq = v[i-K:i] | |
1651 m = median(seq) | |
1652 v2[i-K:i] = m | |
1653 | |
1654 return v2 | |
1655 | |
1656 | |
1657 | |
1658 | |
1659 from models import ReverbModel | |
1660 | |
1661 | |
1662 | |
1663 if __name__=="__main__": | |
1664 if len(argv) != 2: | |
1665 print "[EE] Wrong number of arguments" | |
1666 print "[II] Correct syntax is:" | |
1667 print "[II] \t%s <trainingdir>" | |
1668 print "[II] where <trainingdir> is the directory containing the features files in the format *_features.yaml" | |
1669 print "[II] and the parameters in the format *_parameters.yaml" | |
1670 exit(-1) | |
1671 | |
1672 | |
1673 n_clusters = 3 | |
1674 UpsamplingFactor = 1 | |
1675 dmin = 0.001 | |
1676 dmax = 0.28 | |
1677 tol = 0.001 | |
1678 | |
1679 d1min = 0.01 | |
1680 d1max = 0.1 | |
1681 | |
1682 g1min = 0.01 | |
1683 g1max = 0.99 | |
1684 | |
1685 damin = 0.006 | |
1686 damax = 0.012 | |
1687 | |
1688 Gmin = 0.01 | |
1689 Gmax = 0.99 | |
1690 | |
1691 gcmin = 0.01 | |
1692 gcmax = 0.99 | |
1693 | |
1694 # For searching the directory | |
1695 from glob import glob | |
1696 | |
1697 traindir = argv[1] | |
1698 | |
1699 songs_in_dir = glob("%s/*_features.yaml" % traindir) | |
1700 | |
1701 print "[II] Using files: %s" % songs_in_dir | |
1702 | |
1703 | |
1704 chain_length=1 | |
1705 | |
1706 | |
1707 # asdsd | |
1708 | |
1709 # fullfeatures_pool = Pool() | |
1710 | |
1711 features_vector = None | |
1712 parameters_vector = None | |
1713 index_vector = None | |
1714 | |
1715 idx = 0 | |
1716 # Shuffle segments | |
1717 random.shuffle(songs_in_dir) | |
1718 for f_ in songs_in_dir: | |
1719 infile = f_ | |
1720 paramfile = "%s_parameters.yaml" % f_.split('_features.yaml')[0] | |
1721 | |
1722 print "[II] Using features file: %s" % infile | |
1723 print "[II] and parameters file: %s" % paramfile | |
1724 | |
1725 | |
1726 # paramfile = infile.split(') | |
1727 | |
1728 features_pool = YamlInput(filename = infile)() | |
1729 parameters_pool = YamlInput(filename = paramfile)() | |
1730 | |
1731 d1 = parameters_pool['parameters.d1'] | |
1732 da = parameters_pool['parameters.da'] | |
1733 g1 = parameters_pool['parameters.g1'] | |
1734 gc = parameters_pool['parameters.gc'] | |
1735 G = parameters_pool['parameters.G'] | |
1736 | |
1737 | |
1738 | |
1739 | |
1740 | |
1741 feature_captions = features_pool.descriptorNames() | |
1742 parameter_captions = ['d1','da','g1','gc','G'] | |
1743 | |
1744 | |
1745 svmhmmstr = "" | |
1746 | |
1747 | |
1748 for c in features_pool.descriptorNames(): | |
1749 if c.split('.')[0] == 'metadata' or c.split('.')[0] == 'parameter': | |
1750 feature_captions.remove(c) | |
1751 | |
1752 | |
1753 # close('all') | |
1754 | |
1755 # print "[II] Loaded training data from %s (%s) " % (infile, features_pool['metadata.date'][0]) | |
1756 print "[II] %d Features Available: " % len(feature_captions) | |
1757 | |
1758 | |
1759 | |
1760 print str(feature_captions).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7] | |
1761 | |
1762 nfeatures_in = len(feature_captions) | |
1763 nparameters_in = 5 | |
1764 features_vector_one = zeros((nfeatures_in, len(features_pool[feature_captions[0]]))) | |
1765 index_vector_one = ones((len(features_pool[feature_captions[0]]),)) | |
1766 parameters_vector_one = ones((nparameters_in, len(features_pool[feature_captions[0]]))) | |
1767 # paramet | |
1768 | |
1769 | |
1770 for i in range(0, nfeatures_in): | |
1771 features_vector_one[i, :] = features_pool[feature_captions[i]].T | |
1772 # for i in range(0, nparameters_in): | |
1773 # parameters_vector[i, :] = features_pool[parameter_captions[0]].T | |
1774 | |
1775 parameters_vector_one[0, :] = d1*parameters_vector_one[0,:] | |
1776 parameters_vector_one[1, :] = g1*parameters_vector_one[1,:] | |
1777 parameters_vector_one[2, :] = da*parameters_vector_one[2,:] | |
1778 parameters_vector_one[3, :] = gc*parameters_vector_one[3,:] | |
1779 parameters_vector_one[4, :] = G*parameters_vector_one[4,:] | |
1780 | |
1781 # Stores example index number | |
1782 | |
1783 index_vector_one *= idx | |
1784 | |
1785 | |
1786 | |
1787 | |
1788 | |
1789 | |
1790 | |
1791 | |
1792 | |
1793 print "[II] %d parameters used:" % len(parameter_captions) | |
1794 print str(parameter_captions).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7].replace('parameter.','') | |
1795 | |
1796 if features_vector is None: | |
1797 features_vector = features_vector_one | |
1798 else: | |
1799 features_vector = append(features_vector, features_vector_one, 1) | |
1800 | |
1801 if parameters_vector is None: | |
1802 parameters_vector = parameters_vector_one | |
1803 else: | |
1804 parameters_vector = append(parameters_vector, parameters_vector_one, 1) | |
1805 | |
1806 if index_vector is None: | |
1807 index_vector = index_vector_one | |
1808 else: | |
1809 index_vector = append(index_vector, index_vector_one) | |
1810 | |
1811 | |
1812 idx += 1 | |
1813 | |
1814 | |
1815 print "[II] Marking silent parts" | |
1816 features_full_nontransformed_train = features_vector.copy() | |
1817 # silent_parts = zeros((1, len(features_pool[feature_captions[i]].T))) | |
1818 | |
1819 silent_parts = zeros((1, features_vector.shape[1])) | |
1820 | |
1821 rms = features_vector[feature_captions.index('rms'), :] | |
1822 | |
1823 # Implementing Hysteresis Gate -- High threshold is halfway between | |
1824 # the mean and the max and Low is halfway between the mean dn the min | |
1825 | |
1826 rms_threshold_mean = mean(rms) | |
1827 | |
1828 rms_threshold_max = max(rms) | |
1829 rms_threshold_min = min(rms) | |
1830 | |
1831 rms_threshold_high = 0.1 * rms_threshold_mean | |
1832 rms_threshold_low = 0.01 * rms_threshold_mean | |
1833 | |
1834 for n in range(1, len(rms)): | |
1835 prev = rms[n-1] | |
1836 curr = rms[n] | |
1837 | |
1838 if prev >= rms_threshold_high: | |
1839 if curr < rms_threshold_low: | |
1840 silent_parts[0,n] = 1 | |
1841 else: | |
1842 silent_parts[0,n] = 0 | |
1843 elif prev <= rms_threshold_low: | |
1844 if curr > rms_threshold_high: | |
1845 silent_parts[0,n] = 0 | |
1846 else: | |
1847 silent_parts[0,n] = 1 | |
1848 else: | |
1849 silent_parts[0,n] = silent_parts[0,n-1] | |
1850 | |
1851 | |
1852 if silent_parts[0,1] == 1: | |
1853 silent_parts[0, 0] = 1 | |
1854 | |
1855 | |
1856 | |
1857 active_index = invert(silent_parts.flatten().astype(bool)) | |
1858 | |
1859 # Keep only active parts | |
1860 | |
1861 | |
1862 # Uncomment this | |
1863 features_vector = features_vector[:, active_index] | |
1864 | |
1865 # Keep this for debugging purposes | |
1866 | |
1867 | |
1868 parameters_vector = parameters_vector[:, active_index] | |
1869 # index_vector = index_vector[active_index] | |
1870 | |
1871 # Label examples for a chain of chain_length | |
1872 | |
1873 # idx = 0 | |
1874 # for i in range(0, len(index_vector)): | |
1875 # index_vector[i] = idx | |
1876 # if i % chain_length == 0: | |
1877 # idx += 1 | |
1878 # number_of_examples = idx | |
1879 | |
1880 | |
1881 | |
1882 # Scale parameters to [0,1] | |
1883 | |
1884 | |
1885 parameters_vector[0,:] = (parameters_vector[0,:] - d1min)/d1max #d1 | |
1886 parameters_vector[1,:] = (parameters_vector[1,:] - g1min)/g1max #g1 | |
1887 parameters_vector[2,:] = (parameters_vector[2,:] - damin)/damax #g1 | |
1888 parameters_vector[3,:] = (parameters_vector[3,:] - Gmin)/Gmax #G | |
1889 parameters_vector[4,:] = (parameters_vector[4,:] - gcmin)/gcmax #gc | |
1890 | |
1891 | |
1892 | |
1893 | |
1894 | |
1895 | |
1896 | |
1897 moments_vector = zeros((features_vector.shape[0], 2)) | |
1898 | |
1899 features_vector_original = features_vector.copy() | |
1900 | |
1901 | |
1902 | |
1903 print "[II] Storing moments vector" | |
1904 for i in range(0, features_vector.shape[0]): | |
1905 mean_ = mean(features_vector[i,:]) | |
1906 std_ = std(features_vector[i,:], ddof=1) | |
1907 moments_vector[i,0] = mean_ | |
1908 moments_vector[i,1] = std_ | |
1909 | |
1910 features_vector[i,:] = (features_vector[i,:] - mean_)/std_ | |
1911 | |
1912 features_vector_old_train = features_vector.copy() | |
1913 # moments_vector_train = moments_vector | |
1914 | |
1915 | |
1916 print "[II] Extracting PCA configuration " | |
1917 | |
1918 kernel, q, featurelist = extract_pca_configuration_from_data(features_vector) | |
1919 | |
1920 | |
1921 #q = 1 | |
1922 | |
1923 | |
1924 # q = q + 1 | |
1925 | |
1926 print "[II] Optimal number of PCs to keep: %d" % q | |
1927 | |
1928 | |
1929 | |
1930 | |
1931 feature_captions_array = array(feature_captions) | |
1932 | |
1933 features_to_keep = list(feature_captions_array[featurelist]) | |
1934 | |
1935 print "[II] Decided to keep %d features:" % len(features_to_keep) | |
1936 print str(features_to_keep).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7] | |
1937 | |
1938 featurelist_bak = featurelist | |
1939 features_kept_data = features_vector[featurelist,:] | |
1940 features_kept_data_bak = features_kept_data.copy() | |
1941 # features_vector_old_train = features_kept_data | |
1942 features_vector = (kernel.T*features_kept_data)[0:q,:] | |
1943 features_vector_new_train = features_vector.copy() | |
1944 | |
1945 | |
1946 # example_features = None | |
1947 # example_parameters = None | |
1948 # example_idx = None | |
1949 # | |
1950 # for idx in range(0, shape(parameters_vector)[1]-chain_length): | |
1951 # example_features_ = features_vector[:, idx:idx+chain_length] | |
1952 # # print example_features_ | |
1953 # example_parameters_ = parameters_vector[:, idx:idx+chain_length] | |
1954 # example_idx_ = ones((shape(example_parameters_)[1],)) | |
1955 # | |
1956 # if example_features is None: | |
1957 # example_features = example_features_ | |
1958 # else: | |
1959 # #print "appending" | |
1960 # example_features = append(example_features, example_features_, 1) | |
1961 # | |
1962 # if example_parameters is None: | |
1963 # example_parameters = example_parameters_ | |
1964 # else: | |
1965 # example_parameters = append(example_parameters, example_parameters_, 1) | |
1966 # | |
1967 # if example_idx is None: | |
1968 # example_idx = example_idx_*idx | |
1969 # else: | |
1970 # example_idx = append(example_idx, example_idx_*idx, 1) | |
1971 # | |
1972 # | |
1973 # | |
1974 # | |
1975 # features_vector = example_features | |
1976 # parameters_vector = example_parameters | |
1977 # index_vector = example_idx | |
1978 | |
1979 print "[II] Trying ADM-coded parameters" | |
1980 print "[II] Upsampling features and parameters by a factor of %d" % UpsamplingFactor | |
1981 | |
1982 | |
1983 # Upsampled features and parameters | |
1984 features_vector_upsampled = smooth_matrix_1D(repeat(features_vector,UpsamplingFactor, axis=1)) | |
1985 # features_vector_upsampled = repeat(features_vector, UpsamplingFactor, axis=1) | |
1986 | |
1987 | |
1988 # features_vector_upsampled = smooth_matrix_1D(repeat(features_vector_old_train,UpsamplingFactor, axis=1)) | |
1989 | |
1990 # feature_labels_upsampled = repeat(features_clustering_labels,UpsamplingFactor, axis=0) | |
1991 parameters_vector_upsampled = repeat(parameters_vector,UpsamplingFactor, axis=1) | |
1992 | |
1993 | |
1994 # km = KMeans(init='k-means++', n_init=10, max_iter=300, tol=0.0000001, verbose = 0, n_clusters=12).fit(parameters_vector_upsampled.T) | |
1995 # centers_ = km.cluster_centers_ | |
1996 # labels__ = km.labels_ | |
1997 # Remove the following two lines in order to restore non-kmeans version | |
1998 # parameters_vector_kmeans_upsampled = array(centers_[labels__]) | |
1999 # | |
2000 # parameters_vector_upsampled = parameters_vector_kmeans_upsampled.T | |
2001 # | |
2002 # | |
2003 # parameters_vector_upsampled = smooth_matrix_1D(parameters_vector_upsampled) | |
2004 | |
2005 parameters_vector_upsampled_adm = matrix(zeros(shape(parameters_vector_upsampled))) | |
2006 parameters_vector_upsampled_code = matrix(zeros(shape(parameters_vector_upsampled))) | |
2007 parameters_vector_upsampled_firstval = matrix(zeros((parameters_vector_upsampled.shape[0],1))) | |
2008 | |
2009 # Reconstructed parameters | |
2010 | |
2011 | |
2012 | |
2013 parameters_vector_upsampled_reconstructed = matrix(zeros(shape(parameters_vector_upsampled))) | |
2014 | |
2015 | |
2016 | |
2017 # ADM HERE | |
2018 # | |
2019 | |
2020 def adm_matrix(X, dmin=0.001,dmax=0.28,tol=0.001): | |
2021 | |
2022 out = matrix(zeros(shape(X))) | |
2023 code = matrix(zeros(shape(X))) | |
2024 firstval = matrix(zeros((X.shape[0], 1))) | |
2025 | |
2026 for i in range(0, X.shape[0]): | |
2027 out[i,:], code[i,:], firstval[i,0] = adm(X[i,:],dmin=dmin,dmax=dmax,tol=tol) | |
2028 | |
2029 return out,code,firstval | |
2030 | |
2031 # parameters_vector_upsampled_reconstructed[i,:] = adm_reconstruct(parameters_vector_upsampled_code[i,:],parameters_vector_upsampled_firstval[i,0], dmin=dmin,dmax=dmax) | |
2032 | |
2033 def adm_matrix_reconstruct(code, firstval, dmin=0.001, dmax=0.28): | |
2034 X = matrix(zeros(shape(code))) | |
2035 for i in range(0, code.shape[0]): | |
2036 X[i,:] = adm_reconstruct(code[i,:], firstval[i,0], dmin=dmin, dmax=dmax) | |
2037 | |
2038 return X | |
2039 | |
2040 | |
2041 parameters_vector_upsampled_adm, parameters_vector_upsampled_code, parameters_vector_upsampled_firstval = adm_matrix(parameters_vector_upsampled, dmin, dmax, tol) | |
2042 # parameters_vector_upsampled = parameters_vector_upsampled_code | |
2043 | |
2044 | |
2045 def diff_and_pad(X): | |
2046 return concatenate(( | |
2047 zeros(( | |
2048 shape(X)[0], | |
2049 1 | |
2050 )), | |
2051 diff(X, axis=1)), | |
2052 axis=1) | |
2053 | |
2054 | |
2055 # Change for adm, | |
2056 | |
2057 # parameters_vector_upsampled = parameters_vector_upsampled_code | |
2058 print "[II] Clustering features." | |
2059 # | |
2060 features_clustering = GMM(n_components = n_clusters, covariance_type='diag') | |
2061 # | |
2062 features_clustering.fit( features_vector_upsampled.T, y=parameters_vector_upsampled_code) | |
2063 # | |
2064 features_clustering_means = features_clustering.means_ | |
2065 features_clustering_labels = features_clustering.predict(features_vector_upsampled.T) | |
2066 features_clustering_sigmas = features_clustering.covars_ | |
2067 # | |
2068 features_vector_upsampled_estimated = zeros(shape(features_vector_upsampled)) | |
2069 # | |
2070 # | |
2071 for n in range(0, len(features_vector_upsampled_estimated[0])): | |
2072 features_vector_upsampled_estimated[:,n] = features_clustering_means[features_clustering_labels[n]] | |
2073 # | |
2074 # | |
2075 print "[II] Features MSE for %d clusters: %.3f" % (n_clusters, mse(features_vector_upsampled, features_vector_upsampled_estimated)) | |
2076 | |
2077 | |
2078 | |
2079 def happy_curve_classification(data, classes, estimator, Nd=1): | |
2080 print "[II] Generating Happy Curve " | |
2081 from copy import deepcopy | |
2082 estimator_fulldata = deepcopy(estimator) | |
2083 estimator_fulldata.fit(data, classes) | |
2084 labels = estimator.predict(data) | |
2085 | |
2086 # 1. Split data in two, training and testing | |
2087 | |
2088 Ntr = int(round(data.shape[0]/2)) # Training dataset size | |
2089 Nts = data.shape[0] - Ntr # Testing dataset size | |
2090 | |
2091 ratios = [] | |
2092 | |
2093 for n in range(Nd, Ntr): | |
2094 train, test, trainl, testl = cross_validation.train_test_split(data, classes, test_size = 0.5, random_state = 0) | |
2095 train = train[0:n,:] | |
2096 trainl = trainl[0:n] | |
2097 # print "trainl" | |
2098 # print trainl | |
2099 estimator_ = deepcopy(estimator) | |
2100 estimator_.fit(train,trainl) | |
2101 labels = estimator_.predict(test) | |
2102 | |
2103 ratio = sum(array(testl==labels).astype(float))/len(labels) | |
2104 | |
2105 ratios.append(ratio) | |
2106 | |
2107 | |
2108 return ratios | |
2109 | |
2110 | |
2111 def cross_validate_classification(data, classes, estimator): | |
2112 print "[II] Crossvalidating... " | |
2113 from copy import deepcopy | |
2114 estimator_fulldata = deepcopy(estimator) | |
2115 estimator_fulldata.fit(data, classes) | |
2116 | |
2117 percents = arange(0.1,0.9,0.1) | |
2118 MSEs = [] | |
2119 labels = estimator.predict(data) | |
2120 | |
2121 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels)) | |
2122 | |
2123 for p in percents: | |
2124 train,test,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=0) | |
2125 estimator_ = deepcopy(estimator) | |
2126 estimator_.fit(train, trainlabels) | |
2127 labels = estimator_.predict(test) | |
2128 print "[II] for training(%.2f)-testing(%.2f): %.2f" % ((1-p),p,sum(array(testlabels==labels).astype(float))/len(labels)) | |
2129 | |
2130 return MSEs | |
2131 | |
2132 def cross_validate_clustering(data, estimator): | |
2133 print "[II] Crossvalidating... " | |
2134 estimator_fulldata = estimator | |
2135 estimator_fulldata.fit(data) | |
2136 | |
2137 # labels = estimator_fulldata.predict(data) | |
2138 means = estimator_fulldata.means_ | |
2139 # print means | |
2140 | |
2141 percents = arange(0.1,0.6,0.1) | |
2142 MSEs = [] | |
2143 reconstructed = zeros(shape(data)) | |
2144 labels = estimator.predict(data) | |
2145 for n in range(0, len(reconstructed)): | |
2146 reconstructed[n,:] = means[labels[n]] | |
2147 | |
2148 MSEs.append(mse(data,reconstructed)) | |
2149 for p in percents: | |
2150 train,test = cross_validation.train_test_split(data,test_size=p,random_state=0) | |
2151 train = matrix(train) | |
2152 test = matrix(test) | |
2153 | |
2154 estimator.fit(train) | |
2155 means = estimator.means_ | |
2156 labels = estimator.predict(test) | |
2157 reconstructed = zeros(shape(test)) | |
2158 for n in range(0, len(reconstructed)): | |
2159 reconstructed[n,:] = means[labels[n]] | |
2160 | |
2161 m = mse(test,reconstructed) | |
2162 | |
2163 print "[II] MSE for clustering crossvalidated data %.2f-%.2f: %.5f" % ((1-p), p, m) | |
2164 MSEs.append(m) | |
2165 | |
2166 print "[II] Crossvalidation complete" | |
2167 | |
2168 return MSEs | |
2169 | |
2170 | |
2171 | |
2172 | |
2173 # Construct parameters alphabet, each symbol is going to be a different column vector | |
2174 # in parameter code matrix | |
2175 | |
2176 | |
2177 def vector_to_states(X): | |
2178 """ | |
2179 Input: a vector MxN with N samples and M variables | |
2180 Output: a codeword dictionary `parameters_alphabet', | |
2181 state_seq, inverse `parameters_alphabet_inv' """ | |
2182 | |
2183 | |
2184 parameters_alphabet = {} | |
2185 n = 0 | |
2186 | |
2187 for i in range(0, X.shape[1]): | |
2188 vec = tuple(ravel(X[:,i])) | |
2189 if vec not in parameters_alphabet: | |
2190 parameters_alphabet[vec] = n | |
2191 n += 1 | |
2192 | |
2193 parameters_alphabet_inv = dict([(parameters_alphabet[m],m) for m in parameters_alphabet]) | |
2194 | |
2195 state_seq = array([parameters_alphabet[tuple(ravel(X[:,m]))] for m in range(0, parameters_vector_upsampled_code.shape[1])] ) | |
2196 | |
2197 return state_seq, parameters_alphabet, parameters_alphabet_inv | |
2198 | |
2199 | |
2200 def states_to_vector(predicted, parameters_alphabet_inv): | |
2201 estimated = matrix(zeros((len(parameters_alphabet_inv[0]), len(predicted)))) | |
2202 for i in range(0, len(predicted)): | |
2203 # print "[II] Predicted: ", predicted[i] | |
2204 estimated[:, i] = matrix(parameters_alphabet_inv[predicted[i]]).T | |
2205 | |
2206 return estimated | |
2207 | |
2208 # state_seq, parameters_alphabet, parameters_alphabet_inv = vector_to_states(parameters_vector_upsampled_code) | |
2209 | |
2210 | |
2211 # parameters_change_variable = matrix(diff_and_pad(parameters_vector_upsampled)!=0).astype(int) | |
2212 | |
2213 # changes_state_seq, changes_parameters_alphabet, changes_parameters_alphabet_inv = vector_to_states(parameters_change_variable) | |
2214 | |
2215 | |
2216 # This is an hmm that just codes the changes" | |
2217 # We have only two states, change and stay the same. | |
2218 | |
2219 # Uncomment that here | |
2220 | |
2221 # parameters_vector_upsampled = parameters_vector_upsampled_code | |
2222 # parameters_state_orig, parameter_state_alphabet_orig, parameter_state_alphabet_inv_orig = vector_to_states(parameters_vector) | |
2223 # TODO: HA | |
2224 # parmaeters_vector_upsampled = parameters_vector_upsampled_code | |
2225 parameters_state, parameter_state_alphabet, parameter_state_alphabet_inv = vector_to_states(parameters_vector_upsampled) | |
2226 | |
2227 | |
2228 # Remove outliers | |
2229 | |
2230 ee = EllipticEnvelope() | |
2231 | |
2232 # inliers = ee.fit(features_vector_upsampled.T).predict(features_vector_upsampled.T) == 1 | |
2233 | |
2234 inoutliers = zeros((len(parameters_state),)) | |
2235 # | |
2236 for p in unique(parameters_state): | |
2237 ee = EllipticEnvelope(contamination=0.1) | |
2238 inoutliers[parameters_state == p] = ee.fit(features_vector_upsampled[:, parameters_state == p].T).predict(features_vector_upsampled[:, parameters_state == p].T) | |
2239 # | |
2240 # | |
2241 # Label inliers and outliers | |
2242 | |
2243 inliers = inoutliers > 0 | |
2244 outliers = inoutliers < 0 | |
2245 # | |
2246 # | |
2247 # | |
2248 | |
2249 # Do pre-processing and re-label the outliers | |
2250 | |
2251 #completeavac = SinkHoleClassifier() | |
2252 #completeavac.fit(features_vector_upsampled[:, inliers].T, parameters_state[inliers]) | |
2253 | |
2254 #parameters_state_original = parameters_state.copy() | |
2255 #parameters_state[outliers] = completeavac.predict(features_vector_upsampled[:,outliers].T) | |
2256 | |
2257 | |
2258 | |
2259 | |
2260 | |
2261 # features_vector_upsampled = features_vector_upsampled[:, inliers] | |
2262 # parameters_state = parameters_state[inliers] | |
2263 | |
2264 # print "UNIQUE PARAMETERS STATE" | |
2265 # print unique(parameters_state) | |
2266 # asdasdasd | |
2267 print "[II] Testing Gaussian Naive Bayes Classifier" | |
2268 gnb = GaussianNB() | |
2269 gnb.fit(features_vector_upsampled.T, parameters_state) | |
2270 | |
2271 | |
2272 | |
2273 parameters_state_estimated = gnb.predict(features_vector_upsampled.T) | |
2274 | |
2275 output = states_to_vector(parameters_state_estimated, parameter_state_alphabet_inv) | |
2276 | |
2277 figure() | |
2278 subplot(211) | |
2279 plot(parameters_vector_upsampled.T) | |
2280 title('Parameter value')# % UpsamplingFactor) | |
2281 ylabel('value') | |
2282 xlabel('frame \#') | |
2283 subplot(212) | |
2284 #plot(smooth_matrix_1D(output).T) | |
2285 plot(output.T) | |
2286 ylabel('value') | |
2287 xlabel('frame \#') | |
2288 | |
2289 errorvector_nbc = [mse(output[:,i], parameters_vector_upsampled[:,i]) for i in range(0, len(parameters_state_estimated))] | |
2290 | |
2291 | |
2292 | |
2293 | |
2294 #cross_validate_classification(features_vector_upsampled.T, parameters_state, gnb) | |
2295 # hc = happy_curve_classification(features_vector_upsampled.T, parameters_state, gnb) | |
2296 | |
2297 # | |
2298 # figure() | |
2299 # plot(hc) | |
2300 # figure() | |
2301 | |
2302 print "[II] Trying Multinomial HMM" | |
2303 | |
2304 # In order to do classification with HMMs, we need to: | |
2305 # 1. Split the parameters into classes | |
2306 # 2. Train one model per class | |
2307 # 3. Feed our data to all the models | |
2308 # 4. Check which has a better score and assig,n to M | |
2309 class SVMClassifier: | |
2310 def __init__(self,**kwargs): | |
2311 print "[II] SVM Classifier " | |
2312 # self.clf = svm.SVC(**kwargs) | |
2313 self.name = "SVM" | |
2314 self.smallname = "svm" | |
2315 self.clf = multiclass.OneVsOneClassifier(svm.SVC(**kwargs) ) | |
2316 | |
2317 def fit(self, X, classes): | |
2318 n_classes = max(unique(classes))+1 | |
2319 | |
2320 self.clf.fit(X,classes) | |
2321 | |
2322 def predict(self, X): | |
2323 return self.clf.predict(X) | |
2324 | |
2325 def getName(self): | |
2326 return self.name | |
2327 | |
2328 def cross_validate(self, data, classes): | |
2329 print "[II] SVN Classifier Crossvalidating... " | |
2330 from copy import deepcopy | |
2331 estimator = deepcopy(self) | |
2332 estimator_fulldata = deepcopy(self) | |
2333 estimator_fulldata.fit(data, classes) | |
2334 | |
2335 percents = arange(0.1,0.9,0.1) | |
2336 self.percents = percents * 100 | |
2337 | |
2338 RATIOS = [] | |
2339 labels = estimator.predict(data) | |
2340 | |
2341 | |
2342 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels)) | |
2343 | |
2344 for p in percents: | |
2345 traindata,testdata,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=1) | |
2346 | |
2347 | |
2348 # print "[II] [%.2f-%.2f] Examples used for training: %s" % (p, (1-p), str(traindata)) | |
2349 # print "[II] [%.2f-%.2f] Examples used for testing: %s" % (p, (1-p), str(testdata)) | |
2350 # Training phase | |
2351 | |
2352 | |
2353 | |
2354 estimator_ = deepcopy(estimator) | |
2355 estimator_.fit(traindata, trainlabels) | |
2356 | |
2357 predicted_labels = estimator_.predict(testdata) | |
2358 | |
2359 m = sum(array(predicted_labels==testlabels).astype(float))/len(testlabels) | |
2360 | |
2361 print "[II] [%.2f-%.2f] 5-fold cross-validation for SVN Classifier: %.1f" % (p, (1-p), m) | |
2362 | |
2363 RATIOS.append(m) | |
2364 | |
2365 return RATIOS,percents | |
2366 | |
2367 | |
2368 | |
2369 | |
2370 | |
2371 | |
2372 | |
2373 class HmmClassifier2: | |
2374 def __init__(self, N=2, chain_size=4, n_components = 1): | |
2375 print "[II] HMM Classifier (%d states, %d components)" % (N, n_components) | |
2376 self.name = "HMM (%d states, %d components)" % (N, n_components) | |
2377 self.n_components = n_components | |
2378 self.chain_size = chain_size | |
2379 self.hmms_ = [] | |
2380 self.N = N | |
2381 self.n_states = N | |
2382 | |
2383 def getName(self): | |
2384 return self.name | |
2385 | |
2386 def fit(self, features, parameters): | |
2387 self.n_params = len(unique(parameters)) | |
2388 | |
2389 | |
2390 | |
2391 for n in range(0, self.n_params): | |
2392 hmm_ = hmm.GaussianHMM(n_components = self.n_components, covariance_type='full') | |
2393 | |
2394 # Get training data for each parameter | |
2395 | |
2396 for i in range(0, len(parameters)-self.chain_size): | |
2397 seq = features[i:i+self.chain_size, :] | |
2398 if parameters[i+self.chain_size-1] == n and seq.shape[0] == self.chain_size: | |
2399 # print "[II] Fitting: %s" % str(seq) | |
2400 | |
2401 hmm_.fit([seq]) | |
2402 | |
2403 self.hmms_.append(hmm_) | |
2404 | |
2405 def predict(self, X): | |
2406 labels = zeros((X.shape[0],)) | |
2407 N = self.N | |
2408 | |
2409 scores = zeros((self.n_states, )) | |
2410 | |
2411 for i in range(0, len(labels)): | |
2412 testdata = X[i:i+self.chain_size,:] | |
2413 | |
2414 for j in range(0, self.n_states): | |
2415 scores[j] = self.hmms_[j].score(testdata) | |
2416 labels[i] = argmax(scores) | |
2417 | |
2418 return labels | |
2419 | |
2420 def cross_validate(self, data, classes): | |
2421 print "[II] SVN Classifier Crossvalidating... " | |
2422 from copy import deepcopy | |
2423 estimator = deepcopy(self) | |
2424 estimator_fulldata = deepcopy(self) | |
2425 estimator_fulldata.fit(data, classes) | |
2426 | |
2427 percents = arange(0.1,0.9,0.1) | |
2428 self.percents = percents * 100 | |
2429 | |
2430 RATIOS = [] | |
2431 labels = estimator.predict(data) | |
2432 | |
2433 | |
2434 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels)) | |
2435 | |
2436 for p in percents: | |
2437 traindata,testdata,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=1) | |
2438 | |
2439 | |
2440 # print "[II] [%.2f-%.2f] Examples used for training: %s" % (p, (1-p), str(traindata)) | |
2441 # print "[II] [%.2f-%.2f] Examples used for testing: %s" % (p, (1-p), str(testdata)) | |
2442 | |
2443 # Training phase | |
2444 | |
2445 | |
2446 | |
2447 estimator_ = deepcopy(estimator) | |
2448 estimator_.fit(traindata, trainlabels) | |
2449 | |
2450 predicted_labels = estimator_.predict(testdata) | |
2451 | |
2452 m = sum(array(predicted_labels==testlabels).astype(float))/len(testlabels) | |
2453 | |
2454 print "[II] [%.2f-%.2f] 5-fold cross-validation for HMM2 Classifier: %.1f" % (p, (1-p), m) | |
2455 | |
2456 RATIOS.append(m) | |
2457 | |
2458 return RATIOS,percents | |
2459 | |
2460 | |
2461 | |
2462 | |
2463 class HmmClassifier: | |
2464 def __init__(self, N=2, n_components = 1): | |
2465 print "[II] HMM Classifier (%d states, %d components)" % (N, n_components) | |
2466 self.name = "HMM (%d states, %d components)" % (N, n_components) | |
2467 self.n_components = n_components | |
2468 self.chain_size = N | |
2469 self.hmms_ = [] | |
2470 self.N = N | |
2471 | |
2472 def getName(self): | |
2473 return self.name | |
2474 | |
2475 def fit(self, X, states): | |
2476 self.n_states = len(unique(states)) | |
2477 | |
2478 for n in range(0, self.n_states): | |
2479 hmm_ = hmm.GaussianHMM(n_components = self.n_components, covariance_type = 'full') | |
2480 | |
2481 # Get training data for each class | |
2482 vector = X[states == n,:] | |
2483 | |
2484 # Fit the HMM | |
2485 # print vector | |
2486 hmm_.fit([vector]) | |
2487 | |
2488 # And append to the list | |
2489 self.hmms_.append(hmm_) | |
2490 | |
2491 def predict(self,X): | |
2492 labels = zeros((X.shape[0],)) | |
2493 N = self.N | |
2494 | |
2495 m = 0 | |
2496 | |
2497 scores = zeros((1, self.n_states)) | |
2498 | |
2499 | |
2500 while m*N < X.shape[0]: | |
2501 if (m+1)*N > X.shape[0]: | |
2502 testdata = X[m*N:,:] | |
2503 else: | |
2504 testdata = X[m*N:(m+1)*N,:] | |
2505 | |
2506 # print testdata | |
2507 | |
2508 for i in range(0, self.n_states): | |
2509 scores[0,i] = self.hmms_[i].score(testdata) | |
2510 | |
2511 if (m+1)*N > X.shape[0]: | |
2512 labels[m*N:] = argmax(scores) | |
2513 else: | |
2514 labels[m*N:(m+1)*N] = argmax(scores) | |
2515 | |
2516 m+= 1 | |
2517 | |
2518 return labels | |
2519 | |
2520 # def cross_validate(self, data, classes, index_vector): | |
2521 # print "[II] Crossvalidating... " | |
2522 # from copy import deepcopy | |
2523 # estimator = deepcopy(self) | |
2524 # estimator_fulldata = deepcopy(self) | |
2525 # estimator_fulldata.fit(data, classes) | |
2526 # | |
2527 # percents = arange(0.1,0.9,0.1) | |
2528 # self.percents = percents * 100 | |
2529 # RATIOS = [] | |
2530 # labels = estimator.predict(data) | |
2531 # | |
2532 # print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels)) | |
2533 # | |
2534 # for p in percents: | |
2535 # train,test,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=1) | |
2536 # estimator_ = deepcopy(estimator) | |
2537 # estimator_.fit(train, trainlabels) | |
2538 # labels = estimator_.predict(test) | |
2539 # m = sum(array(testlabels==labels).astype(float))/len(labels) | |
2540 # RATIOS.append(m) | |
2541 # print "[II] for training(%.2f)-testing(%.2f): %.2f" % ((1-p),p,sum(array(testlabels==labels).astype(float))/len(labels)) | |
2542 # | |
2543 # return RATIOS, percents | |
2544 | |
2545 def cross_validate(self, data, classes): | |
2546 print "[II] SVN Classifier Crossvalidating... " | |
2547 from copy import deepcopy | |
2548 estimator = deepcopy(self) | |
2549 estimator_fulldata = deepcopy(self) | |
2550 estimator_fulldata.fit(data, classes) | |
2551 | |
2552 percents = arange(0.1,0.9,0.1) | |
2553 self.percents = percents * 100 | |
2554 | |
2555 RATIOS = [] | |
2556 labels = estimator.predict(data) | |
2557 | |
2558 | |
2559 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels)) | |
2560 | |
2561 for p in percents: | |
2562 traindata,testdata,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=1) | |
2563 | |
2564 | |
2565 # print "[II] [%.2f-%.2f] Examples used for training: %s" % (p, (1-p), str(traindata)) | |
2566 # print "[II] [%.2f-%.2f] Examples used for testing: %s" % (p, (1-p), str(testdata)) | |
2567 # Training phase | |
2568 | |
2569 | |
2570 | |
2571 estimator_ = deepcopy(estimator) | |
2572 estimator_.fit(traindata, trainlabels) | |
2573 | |
2574 predicted_labels = estimator_.predict(testdata) | |
2575 m = sum(array(predicted_labels==testlabels).astype(float))/len(testlabels) | |
2576 | |
2577 | |
2578 # testdata = None | |
2579 # | |
2580 # traindata = None | |
2581 # trainlabels = None | |
2582 # testlabels = None | |
2583 # | |
2584 # for t in train: | |
2585 # if traindata is None: | |
2586 # traindata = data[index_vector == t, :] | |
2587 # trainlabels = classes[index_vector == t] | |
2588 # else: | |
2589 # traindata = append(traindata, data[index_vector == t, :], 0) | |
2590 # trainlabels = append(trainlabels, classes[index_vector ==t]) | |
2591 # | |
2592 # estimator_ = deepcopy(estimator) | |
2593 # estimator_.fit(traindata, trainlabels) | |
2594 # | |
2595 # | |
2596 # for t in test: | |
2597 # if testdata is None: | |
2598 # testdata = data[index_vector == t, :] | |
2599 # testlabels = classes[index_vector == t] | |
2600 # else: | |
2601 # testdata = append(testdata, data[index_vector == t,:], 0) | |
2602 # testlabels = append(testlabels, classes[index_vector == t]) | |
2603 | |
2604 | |
2605 | |
2606 # predicted_labels = estimator_.predict(testdata) | |
2607 | |
2608 m = sum(array(predicted_labels==testlabels).astype(float))/len(testlabels) | |
2609 | |
2610 print "[II] [%.2f-%.2f] 5-fold cross-validation for HMM Classifier: %.1f" % (p, (1-p), m) | |
2611 | |
2612 RATIOS.append(m) | |
2613 | |
2614 return RATIOS,percents | |
2615 | |
2616 def cross_validate_full_k_fold(data, classes, list_of_classifiers, K=10): | |
2617 """ K-Folds Cross-validates all available classifiers, plots and saves barplots. """ | |
2618 | |
2619 ratios = [] | |
2620 for l in list_of_classifiers: | |
2621 ratio = l.k_fold_cross_validate(data, classes, K) | |
2622 ratios.append(ratio) | |
2623 | |
2624 L = len(ratios) | |
2625 | |
2626 ind = arange(L) | |
2627 | |
2628 fig, ax = subplots() | |
2629 rects = [] | |
2630 | |
2631 | |
2632 | |
2633 width = 10/float(len(ratios)) | |
2634 | |
2635 colors = ['r', 'y', 'g', 'c'] | |
2636 | |
2637 labels_ = [] | |
2638 | |
2639 for i in range(0, len(ratios)): | |
2640 rects.append(ax.bar(i*width,ratios[i],width,color=colors[i % len(colors)])) | |
2641 labels_.append(list_of_classifiers[i].getName()) | |
2642 | |
2643 return ratios | |
2644 | |
2645 | |
2646 | |
2647 def cross_validate_full(seq, classes, list_of_classifiers): | |
2648 """ Cross-validates all available classifiers, plots and saves barplots. """ | |
2649 | |
2650 ratios = [] | |
2651 percents = [] | |
2652 for l in list_of_classifiers: | |
2653 ratios_, percents_ = l.cross_validate(seq, classes) | |
2654 ratios.append(ratios_) | |
2655 percents.append(percents_) | |
2656 | |
2657 | |
2658 L = len(ratios[0]) | |
2659 | |
2660 ind = arange(L) | |
2661 | |
2662 fig,ax = subplots() | |
2663 | |
2664 rects = [] | |
2665 | |
2666 W = 0.8 | |
2667 width = W/float(len(ratios)) | |
2668 | |
2669 colors = ['r', 'y', 'g', 'c'] | |
2670 | |
2671 labels_ = [] | |
2672 for i in range(0, len(ratios)): | |
2673 rects.append(ax.bar(ind+i*width,ratios[i],width,color=colors[i % len(colors)])) | |
2674 labels_.append(list_of_classifiers[i].getName()) | |
2675 | |
2676 ax.legend(tuple(labels_)) | |
2677 | |
2678 ax.set_xticks(ind+W/2) | |
2679 ax.set_xticklabels(tuple(percents[0]*100)) | |
2680 ax.set_xlabel("Percentage % of test data") | |
2681 ax.set_ylabel("Success ratio") | |
2682 | |
2683 | |
2684 | |
2685 | |
2686 | |
2687 | |
2688 # rects1 = ax.bar(ind,ratios[0],0.35,color='r') | |
2689 # rects2 = ax.bar(ind+0.351,ratios[1],0.35,color='y') | |
2690 | |
2691 | |
2692 | |
2693 | |
2694 return ratios | |
2695 | |
2696 | |
2697 | |
2698 | |
2699 | |
2700 | |
2701 | |
2702 # hmmc = HmmClassifier(N = len(unique(parameters_state)), n_components = 1) | |
2703 # hmmc.fit(features_vector_upsampled.T, parameters_state) | |
2704 # | |
2705 # hmmc2 = HmmClassifier(N = 12, n_components = 4) | |
2706 # hmmc2.fit(features_vector_upsampled.T, parameters_state) | |
2707 # | |
2708 svmc = SVMClassifier(probability=True) | |
2709 svmc.fit(features_vector_upsampled.T, parameters_state) | |
2710 # # svmc.cross_validate(features_vector_upsampled.T, parameters_state, index_vector) | |
2711 # | |
2712 nbc = NBClassifier() | |
2713 nbc.fit(features_vector_upsampled.T, parameters_state) | |
2714 # | |
2715 # | |
2716 #hchmmc = happy_curve_classification(features_vector_upsampled.T, parameters_state, hmmc, Nd=100) | |
2717 # # cross_validate_classification(features_vector_upsampled.T, parameters_state, hmmc) | |
2718 # | |
2719 # #hmmc.cross_validate(features_vector_upsampled.T, parameters_state) | |
2720 # | |
2721 | |
2722 hmmc3 = HmmClassifier3(N=2, n_components = max(unique(parameters_state))) | |
2723 #print(parameters_state) | |
2724 plot(parameters_state) | |
2725 | |
2726 | |
2727 print(shape(features_vector_upsampled)) | |
2728 obs = hmmc3.fit(features_vector_upsampled.T, parameters_state) | |
2729 | |
2730 | |
2731 | |
2732 # svmhmm = HmmSVMClassifier() | |
2733 # svmhmm.fit(features_vector_upsampled.T, parameters_state) | |
2734 # ratios = cross_validate_full(features_vector_upsampled.T, parameters_state, [nbc, svmc]) | |
2735 # ratios = cross_validate_full_k_fold(features_vector_upsampled.T, parameters_state, | |
2736 # [hmmc3]) | |
2737 | |
2738 def approximate_chain_size(data,parameters): | |
2739 ratios = [] | |
2740 | |
2741 # chainsizes = arange(1, 40) | |
2742 # for cs in chainsizes: | |
2743 # print "[II] Trying chain size: %d" % cs | |
2744 # hmmc = HmmClassifier3(N=cs, n_components = 2) | |
2745 # ratios.append(hmmc.k_fold_cross_validate(data,parameters,K=10)) | |
2746 | |
2747 | |
2748 componentssize = arange(1, 12) | |
2749 | |
2750 for cs in componentssize: | |
2751 print "[II] Trying component size: %d" % cs | |
2752 hmmc = HmmClassifier3(N=2, n_components = cs) | |
2753 ratios.append(hmmc.k_fold_cross_validate(data, parameters, K=2)) | |
2754 | |
2755 | |
2756 | |
2757 | |
2758 figure() | |
2759 plot(componentssize, ratios) | |
2760 xlabel('Chain Size') | |
2761 ylabel('Success Ratio') | |
2762 title('10-Fold cross validation success ratio vs chain size') | |
2763 | |
2764 | |
2765 return ratios | |
2766 | |
2767 # ratios = approximate_chain_size(features_vector_upsampled.T, parameters_state) | |
2768 | |
2769 #predictedhmmc2 = hmmc2.predict(features_vector_upsampled.T) | |
2770 | |
2771 # figure() | |
2772 | |
2773 | |
2774 | |
2775 | |
2776 | |
2777 # hmms_ = [] | |
2778 # | |
2779 # for n in range(0, len(parameter_state_alphabet)): | |
2780 # #hmm_ = hmm.GMMHMM(n_components = 1, n_mix = 2) | |
2781 # hmm_ = hmm.GaussianHMM(n_components = 1,covariance_type = 'full') | |
2782 # | |
2783 # # Get training data for each class | |
2784 # vector = features_vector_upsampled[:,parameters_state == n] | |
2785 # | |
2786 # #if vector.shape[1] < n_clusters: | |
2787 # # hmms_.append(None) | |
2788 # #else: | |
2789 # | |
2790 # hmm_.fit([vector.T]) | |
2791 # | |
2792 # # Append to the list | |
2793 # | |
2794 # hmms_.append(hmm_) | |
2795 # | |
2796 # labels = zeros((features_vector_upsampled.shape[1],)) | |
2797 # | |
2798 # N = 20 | |
2799 # m = 0 | |
2800 # | |
2801 # while m*N < features_vector_upsampled.shape[1]: | |
2802 # | |
2803 # scores = zeros((1, len(parameter_state_alphabet))) | |
2804 # | |
2805 # if (m+1)*N > features_vector_upsampled.shape[1]: | |
2806 # testdata = features_vector_upsampled[:,m*N:] | |
2807 # else: | |
2808 # testdata = features_vector_upsampled[:,m*N:(m+1)*N] | |
2809 # | |
2810 # for i in range(0, len(parameter_state_alphabet)): | |
2811 # if hmms_[i] is not None: | |
2812 # scores[0,i] = hmms_[i].score(testdata.T) | |
2813 # else: | |
2814 # scores[0,i] = -100000 # Very large negative score | |
2815 # if (m+1)*N >= features_vector_upsampled.shape[1]: | |
2816 # labels[m*N:] = argmax(scores) | |
2817 # else: | |
2818 # labels[m*N:(m+1)*N] = argmax(scores) | |
2819 # | |
2820 # m += 1 | |
2821 | |
2822 | |
2823 # figure() | |
2824 #plot(labels.T) | |
2825 | |
2826 | |
2827 # labels = hmmc.predict(features_vector_upsampled.T) | |
2828 # estimated = states_to_vector(labels,parameter_state_alphabet_inv) | |
2829 # plot(estimated.T,'r--') | |
2830 # | |
2831 # title('Estimated parameter values') | |
2832 # legend(['Naive Bayes Classifier', 'HMM chain size %d (%.1fms)' % (N, float(N)/UpsamplingFactor*23.0)]) | |
2833 # | |
2834 # ylabel('value') | |
2835 # xlabel('frame #') | |
2836 # | |
2837 | |
2838 # close('all') | |
2839 | |
2840 # plot(features_clustering_labels/float(max(features_clustering_labels))) | |
2841 # plot(parameters_vector_upsampled.T/max(ravel(parameters_vector_upsampled))) | |
2842 | |
2843 | |
2844 def plot_clusters(x, labels): | |
2845 figure() | |
2846 symbols = ['>', 'x', '.', '<','v'] | |
2847 colors = ['b', 'r', 'g', 'm','c'] | |
2848 | |
2849 for r in range(0, x.shape[0]): | |
2850 scatter(x[r,0], x[r,1], c=colors[int(labels[r]) % len(colors)], marker=symbols[int(labels[r]) % len(symbols)]) | |
2851 | |
2852 | |
2853 # plot_clusters(features_vector_upsampled.T, parameters_state) | |
2854 # | |
2855 | |
2856 # SVM | |
2857 | |
2858 class HmmClassifierSVN_Naive: | |
2859 def __init__(self, N=2, n_components = 1): | |
2860 self.n_components = n_components | |
2861 self.chain_size = N | |
2862 self.hmms_ = [] | |
2863 self.N = N | |
2864 | |
2865 def fit(self, X, states): | |
2866 self.n_states = len(unique(states)) | |
2867 | |
2868 for n in range(0, self.n_states): | |
2869 hmm_ = hmm.GaussianHMM(n_components = self.n_components, covariance_type = 'full') | |
2870 | |
2871 # Get training data for each class | |
2872 vector = X[states == n,:] | |
2873 | |
2874 # Fit the HMM | |
2875 # print vector | |
2876 hmm_.fit([vector]) | |
2877 | |
2878 # And append to the list | |
2879 self.hmms_.append(hmm_) | |
2880 | |
2881 def predict(self,X): | |
2882 labels = zeros((X.shape[0],)) | |
2883 N = self.N | |
2884 | |
2885 m = 0 | |
2886 | |
2887 scores = zeros((1, self.n_states)) | |
2888 | |
2889 | |
2890 while m*N < X.shape[0]: | |
2891 if (m+1)*N > X.shape[0]: | |
2892 testdata = X[m*N:,:] | |
2893 else: | |
2894 testdata = X[m*N:(m+1)*N,:] | |
2895 | |
2896 # print testdata | |
2897 | |
2898 for i in range(0, self.n_states): | |
2899 scores[0,i] = self.hmms_[i].score(testdata) | |
2900 | |
2901 if (m+1)*N > X.shape[0]: | |
2902 labels[m*N:] = argmax(scores) | |
2903 else: | |
2904 labels[m*N:(m+1)*N] = argmax(scores) | |
2905 | |
2906 m+= 1 | |
2907 | |
2908 return labels | |
2909 | |
2910 | |
2911 | |
2912 | |
2913 def plot_parameters_estimation(list_of_estimators): | |
2914 for e in list_of_estimators: | |
2915 name_ = e.getName() | |
2916 | |
2917 | |
2918 | |
2919 fig = figure() | |
2920 fig.suptitle('Parameters estimation using %s' % name_, fontweight='bold') | |
2921 subplot(311) | |
2922 title ('original parameters') | |
2923 param_real = states_to_vector(parameters_state,parameter_state_alphabet_inv).T | |
2924 plot(param_real) | |
2925 xlabel('sample') | |
2926 ylabel('normalized parameter value') | |
2927 legend(parameter_captions) | |
2928 subplot(312) | |
2929 title('estimated parameters') | |
2930 pred = e.predict(features_vector_upsampled.T) | |
2931 param_est = states_to_vector(pred,parameter_state_alphabet_inv).T | |
2932 plot(param_est) | |
2933 xlabel('sample') | |
2934 ylabel('normalized parameter value') | |
2935 legend(parameter_captions) | |
2936 subplot(313) | |
2937 error_real_est = zeros((len(param_est),)) | |
2938 for i in range(0, len(error_real_est)): | |
2939 error_real_est[i] = mse(param_real[i],param_est[i]) | |
2940 | |
2941 totalmse = sum(error_real_est) | |
2942 title('mean squared error (total: %.3f)' % totalmse) | |
2943 | |
2944 plot(error_real_est) | |
2945 xlabel('sample') | |
2946 ylabel('normalized parameter value') | |
2947 | |
2948 | |
2949 | |
2950 # hmm2 = hmm.MultinomialHMM(n_components = n_classes, startprob = prior_p_q, transmat = transmat ) | |
2951 | |
2952 | |
2953 | |
2954 # TODO: Embed SVM emissions to HMM. Try to do pick maximum likelihood sequence from | |
2955 # classifiers like in HmmClassifier3. For every class, train an SVM, with n_components as | |
2956 # states, and compute the log-likelihood for prediction. Should work. | |
2957 | |
2958 | |
2959 | |
2960 # class HMMsvmClassifier: | |
2961 # def __init__(self, N=6): | |
2962 # print "[II] HMM-SVM Classifier (%d time steps)" % N | |
2963 # self.name = "HMM-SVM (%d time steps)" % N | |
2964 # # self.n_components = n_components | |
2965 # self.chain_size = N | |
2966 # self.svm = MyAVAClassifier() | |
2967 # | |
2968 # | |
2969 # def getName(self): | |
2970 # return self.name | |
2971 # | |
2972 # def fit(self, features, parameters): | |
2973 # | |
2974 # # First train emission svm | |
2975 # | |
2976 # self.svm.fit(features, parameters) | |
2977 # | |
2978 # # print parameters | |
2979 # n_classes = max(unique(parameters)) + 1 | |
2980 # | |
2981 # print "[II] Number of classes: ", n_classes | |
2982 # hmms = [None]*n_classes | |
2983 # chain_size = self.chain_size | |
2984 # params = [None]*n_classes | |
2985 # obs = [None]*n_classes | |
2986 # | |
2987 # for i in range(chain_size, len(parameters)): | |
2988 # class_ = parameters[i-1] | |
2989 # params_ = parameters[i-chain_size:i] | |
2990 # feats_ =features[i-chain_size:i,:] | |
2991 # | |
2992 # if params[class_] is None: | |
2993 # params[class_] = [params_] | |
2994 # obs[class_] = [feats_] | |
2995 # else: | |
2996 # params[class_].append(params_) | |
2997 # obs[class_].append(feats_) | |
2998 # | |
2999 # | |
3000 # | |
3001 # for i in range(0, len(params)): | |
3002 # if params[i] is not None and len(params[i]) != 0: | |
3003 # hmm_ = HMMsvm(self.svm) | |
3004 # # print "[II] %d Fitting params: " % i, params[i] | |
3005 # print "[!!] OBSERVATIONS: " | |
3006 # print obs[i] | |
3007 # hmm_.fit(obs[i], params[i]) | |
3008 # | |
3009 # print "[!!] PARAMETERS: " | |
3010 # print params[i] | |
3011 # | |
3012 # # TODO: Left here on 06/07 19:56 | |
3013 # | |
3014 # # hmm_.fit(features,) | |
3015 # # print "obs: ", obs[i] | |
3016 # # print "params: ", params[i] | |
3017 # # hmm_.fit(obs[i], params[i],flr=1e-9) | |
3018 # hmms[i] = hmm_ | |
3019 # | |
3020 # self.hmms = hmms | |
3021 # | |
3022 # return obs | |
3023 # def predict(self, features, mfilt=20): | |
3024 # chain_size = self.chain_size | |
3025 # hmms = self.hmms | |
3026 # predicted_classes = zeros((features.shape[0],)) | |
3027 # for i in range(chain_size, features.shape[0]): | |
3028 # scores = zeros((len(hmms),)) | |
3029 # | |
3030 # seq = features[i-chain_size:i, :] | |
3031 # | |
3032 # for j in range(0, len(hmms)): | |
3033 # if hmms[j] is not None: | |
3034 # q,p = hmms[j].logviterbi(seq) | |
3035 # | |
3036 # scores[j] = logsumexp(p) | |
3037 # else: | |
3038 # scores[j] = -infty | |
3039 # | |
3040 # predicted_classes[i] = argmax(scores) | |
3041 # | |
3042 # # Do a median filter at the end | |
3043 # | |
3044 # # predicted_classes = median_filter(predicted_classes,mfilt) | |
3045 # | |
3046 # return predicted_classes | |
3047 | |
3048 class HmmSvmClassifier3: | |
3049 def __init__(self, N=2,n_components = 1): | |
3050 print "[II] HMM/SVM Classifier (%d states, %d components)" % (N, n_components) | |
3051 self.name = "HMM (%d time steps, %d components)" % (N, n_components) | |
3052 self.n_components = n_components | |
3053 self.smallname = "hmmsvmc" | |
3054 self.chain_size = N | |
3055 | |
3056 def getName(self): | |
3057 return self.name | |
3058 | |
3059 def fit(self, features, parameters): | |
3060 | |
3061 | |
3062 | |
3063 # print parameters | |
3064 n_classes = max(unique(parameters)) + 1 | |
3065 | |
3066 if n_classes == 1: | |
3067 self.only_one_class = True | |
3068 return | |
3069 else: | |
3070 self.only_one_class = False | |
3071 | |
3072 print "[II] Number of classes: ", n_classes | |
3073 hmms = [None]*n_classes | |
3074 # svms = [None]*n_classes | |
3075 chain_size = self.chain_size | |
3076 obs = [None]*n_classes | |
3077 cls = [None]*n_classes | |
3078 | |
3079 for i in range(chain_size, len(parameters)): | |
3080 class_ = parameters[i-1] | |
3081 seq = features[i-chain_size:i,:] | |
3082 params = parameters[i-chain_size:i] | |
3083 | |
3084 if obs[class_] is None: | |
3085 obs[class_] = [seq] | |
3086 cls[class_] = [params] | |
3087 else: | |
3088 obs[class_].append(seq) | |
3089 cls[class_].append(params) | |
3090 | |
3091 | |
3092 | |
3093 for i in range(0, len(obs)): | |
3094 if obs[i] is not None and len(obs[i]) != 0: | |
3095 # hmm_ = hmm.GaussianHMM(n_components=self.n_components, covariance_type='full') | |
3096 svm_ = MyAVAClassifier() | |
3097 print obs[i] | |
3098 print cls[i] | |
3099 | |
3100 O = obs[i][0] | |
3101 C = cls[i][0] | |
3102 | |
3103 for j in range(0, len(obs[i])): | |
3104 O = append(O, obs[i][j], 1) | |
3105 C = append(C, cls[i][j], 1) | |
3106 | |
3107 print O | |
3108 print C | |
3109 | |
3110 svm_.fit(O.T,C) | |
3111 hmm_ = HMMsvm(svm_) | |
3112 hmm_.fit(obs[i],cls[i]) | |
3113 hmms[i] = hmm_ | |
3114 | |
3115 self.hmms = hmms | |
3116 | |
3117 return obs | |
3118 | |
3119 def predict(self, features, mfilt=20): | |
3120 | |
3121 if self.only_one_class == True: | |
3122 return zeros((features.shape[0], )) | |
3123 | |
3124 chain_size = self.chain_size | |
3125 hmms = self.hmms | |
3126 predicted_classes = zeros((features.shape[0],)) | |
3127 | |
3128 | |
3129 for i in range(chain_size, features.shape[0]): | |
3130 scores = zeros((len(hmms),)) | |
3131 | |
3132 seq = features[i-chain_size:i, :] | |
3133 | |
3134 for j in range(0, len(hmms)): | |
3135 if hmms[j] is not None: | |
3136 print hmms[j].score(seq) | |
3137 scores[j] = hmms[j].score(seq)[j] | |
3138 else: | |
3139 scores[j] = -infty | |
3140 | |
3141 predicted_classes[i] = argmax(scores) | |
3142 | |
3143 # Do a median filter at the end | |
3144 | |
3145 # predicted_classes = median_filter(predicted_classes,mfilt) | |
3146 | |
3147 return predicted_classes | |
3148 | |
3149 | |
3150 | |
3151 | |
3152 | |
3153 | |
3154 # newstates = self.logviterbi() | |
3155 | |
3156 # | |
3157 # | |
3158 # # Alphas and Betas | |
3159 # X = features | |
3160 # alphas = zeros((T,n_classes)) | |
3161 # betas = zeros((T,n_classes)) | |
3162 # | |
3163 # forward_messages = zeros((T,n_classes)) | |
3164 # backward_messages = zeros((T, n_classes)) | |
3165 # | |
3166 # print "[II] Computing alpha, beta..." | |
3167 # for t in range(1, T+1): | |
3168 # forward_messages[t-1,:] = self._forward_message(forward_messages[t-2,:],X[0:t,]) | |
3169 # backward_messages[-t,:] = self._backward_message(backward_messages[-t+1], X[-t:,:]) | |
3170 # | |
3171 # | |
3172 # print "[II] Computing ksu..." | |
3173 # | |
3174 # a_norm = forward_messages[-1,n_classes-1] # alpha_T | |
3175 # | |
3176 # ksu = zeros((T, n_classes, n_classes)) | |
3177 # | |
3178 # for i in range(0, n_classes): | |
3179 # for j in range(0, n_classes): | |
3180 # for t in range(0, T-1): | |
3181 # ksu[t,i,j] = exp(forward_messages[t, i] + log(transmat[i,j]) + log(self.estimate_emission_probability(X[t,:],j)) + backward_messages[t+1,j] - a_norm) | |
3182 # | |
3183 # | |
3184 # | |
3185 # self.alphas = forward_messages | |
3186 # self.betas = backward_messages | |
3187 # self.ksu = ksu | |
3188 # | |
3189 # transmat_new = zeros((n_classes,n_classes)) | |
3190 # | |
3191 # for i in range(0, n_classes): | |
3192 # | |
3193 # for j in range(0, n_classes): | |
3194 # transmat_new[i,j] = sum(ksu[:,i,j])/sum(ksu[:,i,:]) | |
3195 # | |
3196 # self.transmat_new = transmat_new | |
3197 # | |
3198 | |
3199 | |
3200 | |
3201 | |
3202 | |
3203 | |
3204 | |
3205 | |
3206 | |
3207 | |
3208 | |
3209 # | |
3210 # def forward(self, X): | |
3211 # # Compute log likelihood using the forward algorithm | |
3212 # | |
3213 # # Number of states | |
3214 # N = self.n_classes | |
3215 # | |
3216 # # Number of Observations | |
3217 # T = X.shape[0] | |
3218 # | |
3219 # | |
3220 # transmat = self.transmat | |
3221 # | |
3222 # # Initialization | |
3223 # # a1 = ones((N,))/N | |
3224 # a1 = self.prior | |
3225 # | |
3226 # alpha = zeros((N,)) | |
3227 # for i in range(0, N): | |
3228 # alpha[i] = log(a1[i])+log(self.estimate_emission_probability(X[0,:], i)) | |
3229 # # print alpha | |
3230 # # Recursion | |
3231 # for t in range(0, T): | |
3232 # alpha_old = alpha.copy() | |
3233 # x = X[t, :] | |
3234 # for j in range(0, N): | |
3235 # tosum = zeros((N,)) | |
3236 # for i in range(0, N): | |
3237 # tosum[i] = alpha_old[i]+log(transmat[i,j]) | |
3238 # | |
3239 # # Sum = logsum(tosum) | |
3240 # Sum = logsumexp(tosum) | |
3241 # bj = self.estimate_emission_probability(x, j) | |
3242 # | |
3243 # alpha[j] = Sum+log(bj) | |
3244 # # print alpha | |
3245 # | |
3246 # tosum = zeros((N,)) | |
3247 # for i in range(0, N): | |
3248 # tosum[i] = alpha[i] + log(transmat[i,N-1]) | |
3249 # | |
3250 # return tosum | |
3251 # | |
3252 # def backward(self, X): | |
3253 # # Number of states | |
3254 # N = self.n_classes | |
3255 # | |
3256 # # Number of Observations | |
3257 # T = X.shape[0] | |
3258 # | |
3259 # transmat = self.transmat | |
3260 # | |
3261 # # Initialization | |
3262 # | |
3263 # b1 = ones((N,))/N | |
3264 # | |
3265 # beta = zeros((N,)) | |
3266 # for i in range(0, N): | |
3267 # beta[i] = log(b1[i])+log(self.estimate_emission_probability(X[-1,:], i)) | |
3268 # | |
3269 # for t in range(0, T): | |
3270 # beta_old = beta.copy() | |
3271 # x = X[-t, :] | |
3272 # for j in range(0, N): | |
3273 # tosum = zeros((N, )) | |
3274 # for i in range(0, N): | |
3275 # tosum[i] = beta_old[i]+log(transmat[i,j]) | |
3276 # | |
3277 # Sum = logsumexp(tosum) | |
3278 # bj = self.estimate_emission_probability(x, j) | |
3279 # beta[j] = Sum+log(bj) | |
3280 # | |
3281 # tosum = zeros((N,)) | |
3282 # | |
3283 # for i in range(0, N): | |
3284 # tosum[i] = beta[i] + log(transmat[i,0]) | |
3285 # | |
3286 # #3 p = logsumexp(tosum) | |
3287 # | |
3288 # return tosum | |
3289 | |
3290 | |
3291 def _log_likelihood(self, X): | |
3292 | |
3293 return logsumexp(self.forward(X)) | |
3294 | |
3295 | |
3296 | |
3297 | |
3298 | |
3299 def _likelihood(self, X): | |
3300 # Number of states | |
3301 N = self.n_classes | |
3302 | |
3303 # Number of Observations | |
3304 T = X.shape[0] | |
3305 | |
3306 | |
3307 transmat = self.transmat | |
3308 | |
3309 # Initialization | |
3310 # a1 = ones((N,))/N | |
3311 a1 = self.prior | |
3312 | |
3313 alpha = zeros((N,)) | |
3314 for i in range(0, N): | |
3315 alpha[i] = a1[i]*self.estimate_emission_probability(X[0,:], i) | |
3316 | |
3317 # Recursion | |
3318 print alpha | |
3319 for t in range(1, T): | |
3320 alpha_old = alpha.copy() | |
3321 x = X[t, :] | |
3322 for j in range(0, N): | |
3323 Sum = 0 | |
3324 for i in range(0, N): | |
3325 Sum += alpha_old[i]*transmat[i,j] | |
3326 | |
3327 alpha[j] = Sum*self.estimate_emission_probability(x, j) | |
3328 print alpha | |
3329 | |
3330 | |
3331 # Termination | |
3332 | |
3333 Sum = 0 | |
3334 for i in range(0, N): | |
3335 Sum += alpha[i]*transmat[i, N-1] | |
3336 | |
3337 p = Sum | |
3338 | |
3339 return p | |
3340 | |
3341 | |
3342 | |
3343 | |
3344 | |
3345 | |
3346 | |
3347 # def fit(self, X, states): | |
3348 # # print parameters | |
3349 # n_classes = max(unique(states)) + 1 | |
3350 # | |
3351 # # svms = [None]*n_classes | |
3352 # obs = [None]*n_classes | |
3353 # sta = [None]*n_classes | |
3354 # chain_size = self.chain_size | |
3355 # | |
3356 # #22 obs = None | |
3357 # # sta = None | |
3358 # | |
3359 # print "[II] Number of classes: ", n_classes | |
3360 # | |
3361 # # For each class: | |
3362 # # Concatenate examples | |
3363 # # Fit SVM | |
3364 # | |
3365 # for i in range(chain_size, len(states)): | |
3366 # class_ = states[i-1] | |
3367 # seq = X[i-chain_size:i, :] | |
3368 # states_ = states[i-chain_size:i] | |
3369 # | |
3370 # | |
3371 # if obs[class_] is None: | |
3372 # obs[class_] = [seq] | |
3373 # sta[class_] = [states_] | |
3374 # self.svms.append(MyAVAClassifier()) | |
3375 # else: | |
3376 # obs[class_].append(seq) | |
3377 # sta[class_].append(states_) | |
3378 # | |
3379 # | |
3380 # for i in range(0, len(obs)): | |
3381 # | |
3382 # o = None | |
3383 # s = None | |
3384 # | |
3385 # for j in range(0, len(obs[i])): | |
3386 # if o is None: | |
3387 # o = obs[i][j] | |
3388 # s = sta[i][j] | |
3389 # | |
3390 # else: | |
3391 # o = append(o, obs[i][j],0) | |
3392 # s = append(s, sta[i][j]) | |
3393 # | |
3394 # | |
3395 # self.svms[i].fit(o, s) | |
3396 | |
3397 # def predict(self, features): | |
3398 # chain_size = self.chain_size | |
3399 # svms = self.svms | |
3400 # | |
3401 # predicted_classes = zeros((features.shape[0],)) | |
3402 # for i in range(chain_size, features.shape[0]): | |
3403 # scores = zeros((len(svms))) | |
3404 # | |
3405 # seq = features[i-chain_size:i, :] | |
3406 # | |
3407 # for j in range(0, len(svms)): | |
3408 # if svms[j] is not None: | |
3409 # scores[j] = svms[j].compute_log_likelihood(seq) | |
3410 # else: | |
3411 # scores[k] = -infty | |
3412 # predicted_classes[i] = argmax(scores) | |
3413 # | |
3414 # return predicted_classes | |
3415 | |
3416 | |
3417 | |
3418 | |
3419 | |
3420 # Marginalize over the latent variable | |
3421 # for i in range(0, X.shape[0]): | |
3422 # P = zeros((self.n_classes,)) | |
3423 # x = X[i,:] | |
3424 # for j in range(0, len(P)): | |
3425 # P[j] = self.estimate_emission_probability(j, x) | |
3426 # | |
3427 # S[i] = log(sum(P[j])) | |
3428 # | |
3429 # return sum(S) | |
3430 | |
3431 | |
3432 | |
3433 | |
3434 # Continue from here | |
3435 X = features_vector_upsampled.T | |
3436 y = parameters_state | |
3437 | |
3438 # clf = svm.SVC() | |
3439 # clf.fit(X,y) | |
3440 | |
3441 | |
3442 # parameters_state_y = clf.predict(X) | |
3443 | |
3444 | |
3445 predhmmc3 = hmmc3.predict(features_vector_upsampled.T) | |
3446 | |
3447 myava = MyAVAClassifier() | |
3448 myava.fit(features_vector_upsampled.T, parameters_state) | |
3449 | |
3450 predmyava = myava.predict(features_vector_upsampled.T) | |
3451 | |
3452 # hmmsvmc = HMMsvmClassifier(N=2) | |
3453 # hmmsvmc.fit(features_vector_upsampled.T, parameters_state) | |
3454 #predhmmsvmc = hmmsvmc.predict(features_vector_upsampled.T) | |
3455 | |
3456 obsc = MyAVAClassifier() | |
3457 obsc.fit(features_vector_upsampled.T, parameters_state) | |
3458 # hmm2 = hmm.GaussianHMM(n_components=6) | |
3459 | |
3460 # hmm2.fit([features_vector_upsampled.T]) | |
3461 # hmm3 = HMMsvm(obsc) | |
3462 # hmm3.fit([features_vector_upsampled.T],[parameters_state]) | |
3463 # predhmmsvm = hmm3.predict(features_vector_upsampled.T) | |
3464 | |
3465 # hmmsvmc = HMMsvmClassifier() | |
3466 | |
3467 hmmsvmc = HMMsvmClassifier() | |
3468 # hmmsvmc = HmmSvmClassifier3() | |
3469 hmmsvmc.fit(features_vector_upsampled.T, parameters_state) | |
3470 predhmmsvm = hmmsvmc.predict(features_vector_upsampled.T) | |
3471 plot_parameters_estimation([nbc, myava, hmmc3, hmmsvmc]) | |
3472 | |
3473 | |
3474 #plot(parameters) | |
3475 figure() | |
3476 plot(parameters_state,'b') | |
3477 | |
3478 # plot(parameters_state_y,'r--') | |
3479 plot(predhmmc3,'g+-') | |
3480 | |
3481 plot(predhmmsvm, 'co-') | |
3482 | |
3483 # plot(predhmmsvmc, 'bo-') | |
3484 | |
3485 legend(['Real', 'SVM', 'HMM','HMM-SVM']) | |
3486 | |
3487 xlabel('sample') | |
3488 ylabel('state') | |
3489 # TODO, HMM with SVN, Cross-validation | |
3490 | |
3491 # Using hidden markov svm | |
3492 | |
3493 svmhmm = "" | |
3494 | |
3495 # print "[II] Success ratio for SVM-RBF Classifier: %.2f" % (sum(parameters_state==parameters_state_y).astype(float)/float(len(parameters_state))) | |
3496 | |
3497 print "[II] All-vs-All custom Support Vector Classifier Success Ratio: %.2f " %(sum(predmyava==parameters_state).astype(float)/len(parameters_state)) | |
3498 print "[II] Success ratio for HMM: %.2f" % (sum(parameters_state==predhmmc3).astype(float)/float(len(predhmmc3))) | |
3499 print "[II] Success ratio for HMM-SVM: %.2f" % (sum(parameters_state==predhmmsvm).astype(float)/float(len(predhmmsvm))) | |
3500 | |
3501 | |
3502 | |
3503 | |
3504 | |
3505 model_ghmm = ReverbModel("GaussianHMM Classifier Model", kernel, q, features_to_keep, hmmc3, parameter_dictionary=parameter_state_alphabet_inv, moments_vector=moments_vector) | |
3506 model_gnb = ReverbModel("Naive Bayes Classifier Model", kernel, q, features_to_keep, nbc, parameter_dictionary=parameter_state_alphabet_inv, moments_vector=moments_vector ) | |
3507 model_svm = ReverbModel("AVA LinearSVM Classifier Model", kernel, q, features_to_keep, myava, parameter_dictionary=parameter_state_alphabet_inv, moments_vector=moments_vector ) | |
3508 model_hmmsvm = ReverbModel("HMM0SVM Classifier Model", kernel, q, features_to_keep, hmmsvmc, parameter_dictionary=parameter_state_alphabet_inv, moments_vector=moments_vector ) | |
3509 | |
3510 model_ghmm.save("model_ghmm.dat") | |
3511 model_gnb.save("model_gnb.dat") | |
3512 model_svm.save("model_svm.dat") | |
3513 model_hmmsvm.save("model_hmmsvm.dat") | |
3514 | |
3515 | |
3516 | |
3517 def split_data_to_k_samples(data, params, K): | |
3518 L = shape(data)[0] | |
3519 M = int(round(L/float(K))) | |
3520 | |
3521 samples_data = [] | |
3522 samples_parameters = [] | |
3523 for k in range(K): | |
3524 if (k+1)*M > L: | |
3525 samples_data.append(data[k*M:,:]) | |
3526 samples_parameters.append(params[k*M:]) | |
3527 else: | |
3528 samples_data.append(data[k*M:(k+1)*M,:]) | |
3529 samples_parameters.append(params[k*M:(k+1)*M]) | |
3530 | |
3531 return samples_data, samples_parameters | |
3532 | |
3533 def join_data(data, params): | |
3534 L = 0 | |
3535 for p in params: | |
3536 L += len(p) | |
3537 | |
3538 new_data = zeros((L, data[0].shape[1])) | |
3539 new_params = zeros((L,)) | |
3540 | |
3541 idx = 0 | |
3542 for n in range(0,len(data)): | |
3543 new_data[idx:idx+data[n].shape[0],:] = data[n] | |
3544 new_params[idx:idx+len(params[n])] = params[n] | |
3545 idx += len(params[n]) | |
3546 | |
3547 return new_data, new_params | |
3548 | |
3549 cross_validation_ratios = {} | |
3550 cross_validation_mses = {} | |
3551 | |
3552 | |
3553 features_samples, parameters_samples = split_data_to_k_samples(features_vector_upsampled.T, parameters_state, 5) | |
3554 | |
3555 from sklearn.cross_validation import KFold | |
3556 | |
3557 from sklearn import metrics | |
3558 | |
3559 | |
3560 | |
3561 KF = 10 | |
3562 | |
3563 s1,s2 = split_data_to_k_samples(features_vector_upsampled.T, parameters_state, KF) | |
3564 r1,r2 = join_data(s1,s2) | |
3565 | |
3566 kf = KFold(KF, n_folds=KF) # Leave one out | |
3567 | |
3568 trial = 1.0 | |
3569 | |
3570 rhmmc = 0 | |
3571 rgnbc = 0 | |
3572 ravac = 0 | |
3573 rhmmsvmc = 0 | |
3574 | |
3575 | |
3576 msegbnc = 0 | |
3577 msehmmc = 0 | |
3578 mseavac = 0 | |
3579 msehmmsvmc = 0 | |
3580 | |
3581 fullratios = [] | |
3582 fullmses = [] | |
3583 | |
3584 | |
3585 def calculate_mse(pred, real): | |
3586 realparams = states_to_vector(real, parameter_state_alphabet_inv) | |
3587 estparams = states_to_vector(pred, parameter_state_alphabet_inv) | |
3588 return mse(realparams, estparams) | |
3589 ratios_gnb = [] | |
3590 ratios_hmm = [] | |
3591 ratios_svm = [] | |
3592 ratios_hmmsvm = [] | |
3593 ratios_sinkhole = [] | |
3594 | |
3595 mse_gnb = [] | |
3596 mse_hmm = [] | |
3597 mse_svm = [] | |
3598 mse_hmmsvm = [] | |
3599 mse_sinkhole = [] | |
3600 | |
3601 | |
3602 for train, test in kf: | |
3603 | |
3604 train_idx = 100 | |
3605 | |
3606 print train,test | |
3607 f1 = [] | |
3608 p1 = [] | |
3609 | |
3610 tf1 = [] | |
3611 tp1 = [] | |
3612 | |
3613 for i in train: | |
3614 f1.append(s1[i-1]) | |
3615 p1.append(s2[i-1]) | |
3616 | |
3617 for i in test: | |
3618 tf1.append(s1[i-1]) | |
3619 tp1.append(s2[i-1]) | |
3620 | |
3621 new_trainset_data, new_trainset_params = join_data(f1, p1) | |
3622 new_trainset_params = new_trainset_params.astype(int) | |
3623 | |
3624 | |
3625 # Detect and remove outliers | |
3626 | |
3627 # ee = EllipticEnvelope() | |
3628 | |
3629 # ee.fit(new_trainset_data, new_trainset_params) | |
3630 | |
3631 | |
3632 # inliers = ee.predict(new_trainset_data) == 1 | |
3633 | |
3634 # new_trainset_data = new_trainset_data[inliers, :] | |
3635 # new_trainset_params = new_trainset_params[inliers] | |
3636 | |
3637 | |
3638 new_testset_data, new_testset_params = join_data(tf1, tp1) | |
3639 new_testset_params = new_testset_params.astype(int) | |
3640 | |
3641 | |
3642 | |
3643 param_est = states_to_vector(new_testset_params,parameter_state_alphabet_inv).T | |
3644 | |
3645 | |
3646 | |
3647 | |
3648 | |
3649 | |
3650 | |
3651 X = arange(50, len(new_trainset_params), 50) | |
3652 print X | |
3653 # X = append(X, [len(new_trainset_params)], 1) | |
3654 | |
3655 | |
3656 # | |
3657 for M in X: | |
3658 # print CHAINSIZE | |
3659 # print NCOMPONENTS | |
3660 hmmc = HmmClassifier3(N=CHAINSIZE, n_components = NCOMPONENTS) | |
3661 gnbc = GaussianNB() | |
3662 avac = MyAVAClassifier() | |
3663 hmmsvmc = HMMsvmClassifier() | |
3664 | |
3665 gnbc.fit(new_trainset_data[0:M,:], new_trainset_params[0:M]) | |
3666 pgnbc = gnbc.predict(new_testset_data) | |
3667 rgnbc = metrics.accuracy_score(pgnbc, new_testset_params) | |
3668 mgnbc = calculate_mse(pgnbc,new_testset_params) | |
3669 | |
3670 # | |
3671 hmmc.fit(new_trainset_data[0:M,:], new_trainset_params[0:M]) | |
3672 phmmc = hmmc.predict(new_testset_data) | |
3673 rhmmc = metrics.accuracy_score(phmmc, new_testset_params) | |
3674 mhmmc = calculate_mse(phmmc,new_testset_params) | |
3675 | |
3676 # | |
3677 try: | |
3678 avac.fit(new_trainset_data[0:M,:], new_trainset_params[0:M]) | |
3679 pavac = avac.predict(new_testset_data) | |
3680 ravac = metrics.accuracy_score(pavac, new_testset_params) | |
3681 mavac = calculate_mse(pavac,new_testset_params) | |
3682 except: | |
3683 ravac = 0 | |
3684 mavac = inf | |
3685 # | |
3686 # | |
3687 # | |
3688 # | |
3689 # | |
3690 try: | |
3691 hmmsvmc.fit(new_trainset_data[0:M,:], new_trainset_params[0:M]) | |
3692 phmmsvm = hmmsvmc.predict(new_testset_data) | |
3693 rhmmsvm = metrics.accuracy_score(phmmsvm, new_testset_params) | |
3694 mhmmsvm = calculate_mse(phmmsvm,new_testset_params) | |
3695 # | |
3696 except: | |
3697 rhmmsvm = 0 | |
3698 mavac = inf | |
3699 # | |
3700 # ratios_gnb.append(rgnbc) | |
3701 # ratios_hmm.append(rhmmc) | |
3702 # ratios_svm.append(ravac) | |
3703 # ratios_hmmsvm.append(rhmmsvm) | |
3704 # | |
3705 # mse_gnb.append(mgnbc) | |
3706 # mse_hmm.append(mhmmc) | |
3707 # mse_svm.append(mavac) | |
3708 # mse_hmmsvm.append(mhmmsvm) | |
3709 # | |
3710 # fullratios.append((trial,X,ratios_gnb,ratios_hmm,ratios_svm,ratios_hmmsvm)) | |
3711 # fullmses.append((trial,X,mse_gnb,mse_hmm,mse_svm,mse_hmmsvm)) | |
3712 | |
3713 # | |
3714 # | |
3715 # | |
3716 # | |
3717 | |
3718 | |
3719 | |
3720 | |
3721 | |
3722 | |
3723 # hmmc = HmmClassifier3(N=3, n_components = max(unique(parameters_state))+1) | |
3724 hmmc = HmmClassifier3(N=CHAINSIZE, n_components = NCOMPONENTS) | |
3725 gnbc = GaussianNB() | |
3726 avac = MyAVAClassifier() | |
3727 hmmsvmc = HMMsvmClassifier() | |
3728 sinkholec = SinkHoleClassifier() | |
3729 | |
3730 hmmc.fit(new_trainset_data, new_trainset_params) | |
3731 gnbc.fit(new_trainset_data, new_trainset_params) | |
3732 # try: | |
3733 avac.fit(new_trainset_data, new_trainset_params) | |
3734 sinkholec.fit(new_trainset_data, new_trainset_params) | |
3735 pavac = avac.predict(new_testset_data) | |
3736 | |
3737 | |
3738 # except: | |
3739 # pavac = ones((new_testset_data.shape[0],))*new_trainset_params[0] | |
3740 | |
3741 | |
3742 # try: | |
3743 hmmsvmc.fit(new_trainset_data, new_trainset_params) | |
3744 phmmsvm = hmmsvmc.predict(new_testset_data) | |
3745 # except: | |
3746 # | |
3747 # # phmmsvm = hmmsvmc.predict(new_testset_data) | |
3748 # phmmsvm = ones((new_testset_data.shape[0],))*new_trainset_params[0] | |
3749 | |
3750 | |
3751 phmmc = hmmc.predict(new_testset_data) | |
3752 pgnbc = gnbc.predict(new_testset_data) | |
3753 psinkhole = sinkholec.predict(new_testset_data) | |
3754 | |
3755 print "[II] (Trial %d) Classification ratio for GNB: %.2f" % (trial, metrics.accuracy_score(pgnbc, new_testset_params)) | |
3756 print "[II] (Trial %d) Classification ratio for SVM: %.2f" % (trial, metrics.accuracy_score(pavac, new_testset_params)) | |
3757 print "[II] (Trial %d) Classification ratio for HMM: %.2f" % (trial, metrics.accuracy_score(phmmc, new_testset_params)) | |
3758 print "[II] (Trial %d) Classification ratio for HMM/SVM: %.2f" % (trial, metrics.accuracy_score(phmmsvm, new_testset_params)) | |
3759 print "[II] (Trial %d) Classification ratio for Sinkhole Approach: %.2f" % (trial, metrics.accuracy_score(psinkhole, new_testset_params)) | |
3760 | |
3761 | |
3762 mse | |
3763 | |
3764 # rgnbc = rhmmc*(trial-1)/trial + metrics.accuracy_score(pgnbc, new_testset_params)/trial | |
3765 # ravac = rhmmc*(trial-1)/trial + metrics.accuracy_score(pavac, new_testset_params)/trial | |
3766 # rhmmc = rhmmc*(trial-1)/trial + metrics.accuracy_score(phmmc, new_testset_params)/trial | |
3767 # rhmmsvmc = rhmmc*(trial-1)/trial + metrics.accuracy_score(phmmsvm, new_testset_params)/trial | |
3768 | |
3769 rgnbc = metrics.accuracy_score(pgnbc, new_testset_params) | |
3770 ravac = metrics.accuracy_score(pavac, new_testset_params) | |
3771 rhmmc = metrics.accuracy_score(phmmc, new_testset_params) | |
3772 rhmmsvmc = metrics.accuracy_score(phmmsvm, new_testset_params) | |
3773 rsinkhole = metrics.accuracy_score(psinkhole, new_testset_params) | |
3774 | |
3775 ratios_gnb.append(rgnbc) | |
3776 ratios_svm.append(ravac) | |
3777 ratios_hmm.append(rhmmc) | |
3778 ratios_hmmsvm.append(rhmmsvmc) | |
3779 ratios_sinkhole.append(rsinkhole) | |
3780 | |
3781 | |
3782 | |
3783 | |
3784 | |
3785 # msegbnc = msegbnc*(trial-1)/trial + calculate_mse(pgnbc,new_testset_params)/trial | |
3786 # msehmmc = msehmmc*(trial-1)/trial + calculate_mse(phmmc,new_testset_params)/trial | |
3787 # mseavac = mseavac*(trial-1)/trial + calculate_mse(pavac,new_testset_params)/trial | |
3788 # msehmmsvmc = msehmmsvmc*(trial-1)/trial + calculate_mse(phmmsvm,new_testset_params)/trial | |
3789 | |
3790 msegnb = calculate_mse(pgnbc,new_testset_params) | |
3791 msesvm = calculate_mse(pavac,new_testset_params) | |
3792 msehmm = calculate_mse(phmmc,new_testset_params) | |
3793 msehmmsvm = calculate_mse(phmmsvm,new_testset_params) | |
3794 msesinkhole = calculate_mse(psinkhole,new_testset_params) | |
3795 | |
3796 mse_gnb.append(msegnb) | |
3797 mse_svm.append(msesvm) | |
3798 mse_hmm.append(msehmm) | |
3799 mse_hmmsvm.append(msehmmsvm) | |
3800 mse_sinkhole.append(msesinkhole) | |
3801 | |
3802 | |
3803 | |
3804 | |
3805 | |
3806 | |
3807 trial += 1 | |
3808 | |
3809 print ratios_gnb | |
3810 | |
3811 | |
3812 sucrats = [mean(ratios_gnb),mean(ratios_svm), mean(ratios_hmm),mean(ratios_hmmsvm), mean(ratios_sinkhole)] | |
3813 sucratsstd = [std(ratios_gnb,ddof=1),std(ratios_svm,ddof=1), std(ratios_hmm,ddof=1),std(ratios_hmmsvm,ddof=1), std(ratios_sinkhole,ddof=1)] | |
3814 mserats = [mean(mse_gnb),mean(mse_svm), mean(mse_hmm),mean(mse_hmmsvm), mean(mse_sinkhole)] | |
3815 #mserats = [msegbnc, mseavac, msehmmc,msehmmsvmc] | |
3816 | |
3817 print tuple(sucrats) | |
3818 # print tuple(sucratsstd) | |
3819 | |
3820 | |
3821 # scores = zeros((len(sucrats), len(sucrats))) | |
3822 # for i in range(0, len(sucrats)): | |
3823 # for j in range(0, len(sucrats)): | |
3824 # scores[i,j] = (sucrats[i] - sucrats[j])/sqrt(sucratsstd[i]**2 + sucratsstd[j]**2) | |
3825 print tuple(mserats) | |
3826 # print scores | |
3827 | |
3828 | |
3829 | |
3830 # print (msegbnc, mseavac, msehmmc,msehmmsvmc ) | |
3831 | |
3832 sample_sizes = X/float(max(X)) | |
3833 gnbc_ratios_curves = zeros((len(X),len(fullratios))) | |
3834 gnbc_mses_curves = zeros((len(X),len(fullratios))) | |
3835 # | |
3836 avac_ratios_curves = zeros((len(X),len(fullratios))) | |
3837 avac_mses_curves = zeros((len(X),len(fullratios))) | |
3838 # | |
3839 hmmc_ratios_curves = zeros((len(X),len(fullratios))) | |
3840 hmmc_mses_curves = zeros((len(X),len(fullratios))) | |
3841 # | |
3842 hmmsvmc_ratios_curves = zeros((len(X),len(fullratios))) | |
3843 hmmsvmc_mses_curves = zeros((len(X),len(fullratios))) | |
3844 # | |
3845 # | |
3846 for i in range(0, len(fullratios)): | |
3847 for j in range(0, len(X)): | |
3848 gnbc_ratios_curves[j,i] = fullratios[i][2][j] | |
3849 gnbc_mses_curves[j,i] = fullmses[i][2][j] | |
3850 | |
3851 avac_ratios_curves[j,i] = fullratios[i][3][j] | |
3852 avac_mses_curves[j,i] = fullmses[i][3][j] | |
3853 | |
3854 hmmc_ratios_curves[j,i] = fullratios[i][4][j] | |
3855 hmmc_mses_curves[j,i] = fullmses[i][4][j] | |
3856 | |
3857 hmmsvmc_ratios_curves[j,i] = fullratios[i][5][j] | |
3858 hmmsvmc_mses_curves[j,i] = fullmses[i][5][j] | |
3859 # | |
3860 # | |
3861 # | |
3862 # | |
3863 # | |
3864 gnbc_mses_curve = mean(gnbc_mses_curves,1) | |
3865 avac_mses_curve = mean(avac_mses_curves,1) | |
3866 hmmc_mses_curve = mean(hmmc_mses_curves,1) | |
3867 hmmsvmc_mses_curve = mean(hmmsvmc_mses_curves,1) | |
3868 # | |
3869 gnbc_ratios_curve = mean(gnbc_ratios_curves,1) | |
3870 avac_ratios_curve = mean(avac_ratios_curves,1) | |
3871 hmmc_ratios_curve = mean(hmmc_ratios_curves,1) | |
3872 hmmsvmc_ratios_curve = mean(hmmsvmc_ratios_curves,1) | |
3873 # | |
3874 figure() | |
3875 subplot(211) | |
3876 plot(sample_sizes,gnbc_ratios_curve) | |
3877 plot(sample_sizes,avac_ratios_curve) | |
3878 plot(sample_sizes,hmmc_ratios_curve) | |
3879 plot(sample_sizes,hmmsvmc_ratios_curve) | |
3880 xlabel('Part of training set used') | |
3881 ylabel('Accuracy') | |
3882 legend(['GNB','SVM','HMM','HMM/SVM']) | |
3883 # | |
3884 subplot(212) | |
3885 plot(sample_sizes,gnbc_mses_curve) | |
3886 plot(sample_sizes,avac_mses_curve) | |
3887 plot(sample_sizes,hmmc_mses_curve) | |
3888 plot(sample_sizes,hmmsvmc_mses_curve) | |
3889 xlabel('Part of training set used') | |
3890 ylabel('Mean Squared Error') | |
3891 # | |
3892 legend(['GNB','SVM','HMM','HMM/SVM']) | |
3893 # | |
3894 show() | |
3895 # # tight_layout() | |
3896 savefig('ratios-2.png',dpi=500) | |
3897 # | |
3898 # | |
3899 # |