comparison experiment-reverb/code/#training.py# @ 0:246d5546657c

initial commit, needs cleanup
author Emmanouil Theofanis Chourdakis <e.t.chourdakis@qmul.ac.uk>
date Wed, 14 Dec 2016 13:15:48 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:246d5546657c
1 #!/usr/bin/python2
2 # -*- coding: utf-8 -*-
3 """
4 Created on Thu Apr 23 11:53:17 2015
5
6 @author: mmxgn
7 """
8
9 # This file does the cluster estimation and the removal of outliers
10
11 from warnings import filterwarnings
12
13 from sys import argv, exit
14 from essentia.standard import YamlInput, YamlOutput
15 from essentia import Pool
16 from pca import *
17
18 from numpy import *
19 from sklearn import cluster
20 from sklearn.metrics import pairwise_distances
21 from sklearn.cluster import KMeans, MiniBatchKMeans
22 from matplotlib.pyplot import *
23 #from sklearn.mixture import GMM
24 from sklearn.naive_bayes import GaussianNB, MultinomialNB
25 from scipy.signal import decimate
26 from sklearn import cross_validation
27 from sklearn import multiclass
28 from sklearn.covariance import EllipticEnvelope
29
30 #from hmmlearn import hmm
31 from hmmlearn.hmm import GMM
32 from hmmlearn import hmm
33
34 from sklearn import svm
35 import copy as cp
36
37 from scipy.misc import logsumexp
38 import pickle
39
40 from collections import Counter
41 #from adpcm import adm, adm_reconstruct
42
43
44 from reshape_observations import reshape_observations
45
46 mse = lambda A,B: ((array(A)-array(B)) ** 2).mean()
47
48 lognorm = lambda A: A - logsumexp(A)
49
50
51 rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
52 ## for Palatino and other serif fonts use:
53 #rc('font',**{'family':'serif','serif':['Palatino']})
54 rc('text', usetex=True)
55 #logsum = lambda X: logsumexp(log(X))
56
57
58 filterwarnings("ignore")
59 C = 0.7
60 NCOMPONENTS=2
61 CHAINSIZE = 5
62
63 close('all')
64
65 from numpy.random import randint
66
67 class HMMsvm:
68 def __init__(self, svm_):
69 self.svm = svm_
70
71
72 # self.svm = MyAVAClassifier()
73
74
75 def fit(self, features_list, states, flr=1e-13):
76 # def fit(self, features_list, flr=1e-13):
77
78 # TODO: Concatenate features, train
79 #self.svm.fit(X, states, flr)
80 #self.prior = self.svm.prior
81 #self.transmat = self.svm.transmat
82
83
84 features = None
85
86 for f in features_list:
87 if features is None:
88 features = f
89 else:
90 features = append(features, f, 0)
91
92 fullstates = None
93 # T = features.shape[0]
94 for s in states:
95 if fullstates is None:
96 fullstates = s
97 else:
98 fullstates = append(fullstates, s)
99
100
101
102
103
104
105 self.n_classes = self.svm.n_classes
106 n_classes = self.n_classes
107
108 # print fullstates, shape(fullstates)
109
110 h = histogram(fullstates, n_classes)[0].astype(float)
111 self.prior = h/sum(h)
112
113 # Add a very small probability for random jump
114
115 self.prior += flr
116 self.prior = self.prior/sum(self.prior)
117
118 transmat = zeros((n_classes, n_classes))
119 transitions = zeros((n_classes, ))
120 for s in states:
121 for i in range(1, len(s)):
122 prev = s[i-1]
123 cur = s[i]
124 transmat[prev,cur] += 1
125 transitions[prev] += 1
126
127 transitions[transitions == 0] = 1
128
129 for i in range(0, transmat.shape[0]):
130 transmat[i,:] = transmat[i,:]/transitions[i]
131
132 self.transmat = transmat
133
134 # Add a very small probability for random jump to avoid zero values
135
136 self.transmat += flr
137
138 for i in range(0, self.transmat.shape[0]):
139 self.transmat[i,:] = self.transmat[i,:]/sum(self.transmat[i,:])
140
141
142 A = zeros((n_classes, n_classes))
143
144 Aold = self.transmat
145
146 while mse(A,Aold)>10*size(A)*flr:
147 Aold = A.copy()
148 A = zeros((n_classes, n_classes))
149 transitions = zeros((n_classes, ))
150
151 for i in range(0,len(features_list)):
152 f = features_list[i]
153 # print "FEATURES:", f
154 # print f
155 s,p = self.logviterbi(f)
156 # print s
157 for j in range(1, len(s)):
158 prev = s[j-1]
159 cur = s[j]
160 A[prev,cur] += 1
161 transitions[prev] += 1
162
163 transitions[transitions == 0] = 1
164
165
166 for i in range(0, A.shape[0]):
167 A[i,:] = A[i,:]/transitions[i]
168
169 A += flr
170
171
172
173 self.transmat = A
174
175 for i in range(0, A.shape[0]):
176 if sum(A[i,:]) > 0:
177 A[i,:] = A[i,:]/sum(A[i,:])
178
179
180
181 # print observations
182
183
184 def estimate_emission_probability(self, x, q):
185 post = self.svm.estimate_posterior_probability_vector(x)
186 # print "post: ", post
187 prior = self.prior
188 # print "prior: ", prior
189 p = post/prior
190 p = p/sum(p)
191
192 return p[q]
193
194 # def estimate_emission_probability(self, x, q):
195 # return self.svm.estimate_emission_probability(q, x)
196
197
198 def predict(self, X):
199 return self.logviterbi(X)[0]
200
201
202 def logviterbi(self, X):
203 # Number of states
204
205 N = self.n_classes
206
207 # Number of observations
208
209
210
211 T = X.shape[0]
212
213
214
215 transmat = self.transmat
216
217 #1. Initalization
218
219 a1 = self.prior
220
221 delta = zeros((T,N))
222 psi = zeros((T,N))
223
224 for i in range(0, N):
225 delta[0, i] = log(self.transmat[0,i]) + log(self.estimate_emission_probability(X[0,:],i))
226
227
228 #2. Recursion
229
230 for t in range(1, T):
231 # delta_old = delta.copy()
232 x = X[t, :]
233 for j in range(0, N):
234 # print "j: %d, S" % j, delta_old+log(transmat[:,j])
235 delta[t,j] = max(delta[t-1,:] + log(transmat[:,j])) + log(self.estimate_emission_probability(x,j))
236 psi[t,j] = argmax(delta[t-1,:] + log(transmat[:,j]))
237
238 # print "t: %d, delta: " % t, delta
239 # print "t: %d, psi:" % t, psi
240
241
242 # 3. Termination
243
244 p_star = max(delta[T-1,:] + log(transmat[:,N-1]))
245 q_star = argmax(delta[T-1,:] + log(transmat[:, N-1]))
246
247
248 # 4. Backtracking
249
250 q = zeros((T,))
251 p = zeros((T,))
252
253 q[-1] = q_star
254 p[-1] = p_star
255 for t in range(1, T):
256 q[-t-1] = psi[-t, q[-t]]
257 p[-t-1] = delta[-t, q[-t]]
258
259
260 return q,p
261
262 def viterbi(self, X):
263 # Number of states
264
265 N = self.n_classes
266
267 # Number of observations
268
269 T = X.shape[0]
270
271 transmat = self.transmat
272
273 #1. Initialization
274
275 a1 = self.prior
276
277 delta = zeros((N, ))
278 psi = zeros((N, ))
279
280 for i in range(0, N):
281 delta[i] = a1[i]*self.estimate_emission_probability(X[0,:],i)
282
283
284
285
286
287 #2. Recursion
288
289 for t in range(1, T):
290 delta_old = delta.copy()
291 x = X[t,:]
292 for j in range(0, N):
293 delta[j] = max(delta_old*transmat[:,j])*self.estimate_emission_probability(x, j)
294 psi[j] = argmax(delta_old*transmat[:,j])
295
296 #3. Termination
297
298 p_star = max(delta*transmat[:,N-1])
299 q_star = argmax(delta*transmat[:,N-1])
300
301
302
303 #4. Backtracking
304
305 q = zeros((T,))
306 q[-1] = q_star
307
308 for t in range(1, T):
309 q[-t-1] = psi[q[-t]]
310
311 return q
312
313
314 def _log_likelihood_matrix(self, X):
315 N = self.n_classes
316 ll = zeros((X.shape[0], N))
317
318 for i in range(0, X.shape[0]):
319 ll[i,:] = self._forward(i, X)
320
321 return ll
322
323 def compute_ksus(self, X):
324 N = self.n_classes
325 T = X.shape[0]
326 print "[II] Computing gammas... "
327
328 gammas = self._forward_backward(X)
329
330 # Initialize
331
332 aij = self.transmat
333
334
335
336
337
338
339 def _not_quite_ksu(self, t, i, j, X):
340 alpha = exp(self.forward(X[0:t+1,:]))[i]
341 beta = exp(self.backward(X[t+1:,:]))[j]
342
343 aij = self.transmat[i,j]
344 bj = self.estimate_emission_probability(X[t+1,:], j)
345
346 return alpha*beta*aij*bj
347
348 def _ksu(self, t, i, j, X):
349 alphaT = exp(self.forward(X[0:t+1,:]))[0]
350
351 return self._not_quite_ksu(t,i,j,X)
352
353
354 def _forward_backward(self, X):
355 T = X.shape[0]
356 N = self.n_classes
357 fv = zeros((T, N))
358 sv = zeros((T, N))
359
360 b = None
361 for t in range(1, T+1):
362
363 fv[t-1,:] = self._forward_message(fv[t-2,:], X[0:t, ])
364
365 for t in range(1, T+1):
366 b = self._backward_message(b, X[-t: , :])
367 sv[-t,:] = lognorm(fv[t-1,:]*b)
368
369 return sv
370
371
372 def _backward(self, t, X):
373 N = self.n_classes
374 a = ones((N,))/N
375
376 beta = zeros((N, ))
377 transmat = self.transmat
378 T = X.shape[0]
379 x = X[t, :]
380 if t == T-1:
381 beta = log(a)
382
383 return beta
384 else:
385 tosum = zeros((N, ))
386 bt = self._backward(t+1, X)
387 btnew = zeros((N, ))
388 # print bt
389 for j in range(0, N):
390 for i in range(0, N):
391 # print log(self.estimate_emission_probability(x, j)), bt[i], log(transmat[i,j])
392 tosum[i] = log(self.estimate_emission_probability(x, j)) + bt[i] + log(transmat[i,j])
393 # print tosum
394
395 btnew[j] = logsumexp(tosum)
396 btnew = lognorm(btnew)
397
398 return btnew
399
400
401 def score(self, X):
402 return self.forward(X)
403
404 def forward(self, X):
405 T = X.shape[0]
406 f = None
407 for t in range(1, T+1):
408 f = self._forward_message(f, X[0:t, :])
409
410 return f
411
412 def backward(self, X):
413 T = X.shape[0]
414 b = None
415 for t in range(1,T+1):
416 # print t
417 # print t,b
418 idx = arange(-t,T)
419 # print idx
420 b = self._backward_message(b, X[-t:, :])
421
422 return b
423
424 def _backward_message(self, b, X):
425 N = self.n_classes
426
427
428 beta = zeros((N, ))
429 transmat = self.transmat
430
431 x = X[0, :]
432
433 if X.shape[0] == 1:
434 beta = log(ones((N,))/N)
435 return beta
436 else:
437 tosum = zeros((N, ))
438 btnew = zeros((N, ))
439 for j in range(0, N):
440 for i in range(0, N):
441 tosum[i] = log(self.estimate_emission_probability(x,j)) + b[i] + log(transmat[i,j])
442
443 btnew[j] = logsumexp(tosum)
444 # btnew = lognorm(btnew)
445 return btnew
446
447 def _forward_message(self, f, X):
448 N = self.n_classes
449 a = self.prior
450
451 alpha = zeros((N, ))
452 transmat = self.transmat
453
454 x = X[-1, :]
455
456 if X.shape[0] == 1:
457 for i in range(0, N):
458 alpha[i] = log(a[i]) + log(self.estimate_emission_probability(x, i))
459 # alpha = lognorm(alpha)
460 return alpha
461
462 else:
463 tosum = zeros((N,))
464 ftnew = zeros((N,))
465 for j in range(0, N):
466 for i in range(0, N):
467 tosum[i] = f[i] + log(transmat[i,j])
468 Sum = logsumexp(tosum)
469 bj = self.estimate_emission_probability(x, j)
470 ftnew[j] = Sum+log(bj)
471
472 # ftnew = lognorm(ftnew)
473 return ftnew
474
475 def _forward(self, t, X):
476 N = self.n_classes
477 a = self.prior
478 # print a
479 alpha = zeros((N, ))
480 transmat = self.transmat
481 x = X[t, :]
482
483 if t == 0:
484 for i in range(0, N):
485 alpha[i] = log(a[i]) + log(self.estimate_emission_probability(x, i))
486 # print "--"
487 # print alpha
488 alpha = lognorm(alpha)
489 # print alpha
490 # print "xx"
491 return alpha
492 else:
493 tosum = zeros((N, ))
494 ft = self._forward(t-1, X)
495 ftnew = zeros((N, ))
496 for j in range(0, N):
497 for i in range(0, N):
498 # print ft
499 tosum[i] = ft[i] + log(transmat[i,j])
500
501 Sum = logsumexp(tosum)
502 bj = self.estimate_emission_probability(x, j)
503 ftnew[j] = Sum+log(bj)
504 ftnew = lognorm(ftnew)
505
506 return ftnew
507
508 class HMMsvm2:
509 def __init__(self, svm_, n_components=4):
510 self.svm = svm_
511 self.n_components = n_components
512
513
514 # self.svm = MyAVAClassifier()
515
516
517 def fit(self, features_list, flr=1e-13):
518 # def fit(self, features_list, flr=1e-13):
519
520 # TODO: Concatenate features, train
521 #self.svm.fit(X, states, flr)
522 #self.prior = self.svm.prior
523 #self.transmat = self.svm.transmat
524
525
526 n_classes = self.n_components
527 self.n_classes = n_classes
528
529 A = zeros((n_classes, n_classes))
530
531 Aold = randn(n_classes, n_classes)
532 self.transmat = Aold
533 self.prior = randn(n_classes)
534 self.prior = self.prior/sum(self.prior)
535 for i in range(0, Aold.shape[0]):
536 Aold[i,:] = Aold[i,:]/sum(Aold[i,:])
537
538 while mse(A,Aold)>10*size(A)*flr:
539 Aold = A.copy()
540 A = zeros((n_classes, n_classes))
541 transitions = zeros((n_classes, ))
542
543 for i in range(0,len(features_list)):
544 f = features_list[i]
545 # print "FEATURES:", f
546 # print f
547 s,p = self.logviterbi(f)
548 # print s
549
550 h = histogram(s, n_classes)[0].astype(float)
551 self.prior = h/sum(h)
552
553
554 self.prior += flr
555 self.prior = self.prior/sum(self.prior)
556
557
558 for j in range(1, len(s)):
559 prev = s[j-1]
560 cur = s[j]
561 A[prev,cur] += 1
562 transitions[prev] += 1
563
564 transitions[transitions == 0] = 1
565
566
567 for i in range(0, A.shape[0]):
568 A[i,:] = A[i,:]/transitions[i]
569
570 A += flr
571
572
573
574 self.transmat = A
575
576 for i in range(0, A.shape[0]):
577 if sum(A[i,:]) > 0:
578 A[i,:] = A[i,:]/sum(A[i,:])
579
580
581
582 # print observations
583
584
585 def estimate_emission_probability(self, x, q):
586 post = self.svm.estimate_posterior_probability_vector(x)
587 # print "post: ", post
588 prior = self.prior
589 # print "prior: ", prior
590 p = post/prior
591 p = p/sum(p)
592
593 return p[q]
594
595 # def estimate_emission_probability(self, x, q):
596 # return self.svm.estimate_emission_probability(q, x)
597
598
599 def predict(self, X):
600 return self.logviterbi(X)[0]
601
602
603 def logviterbi(self, X):
604 # Number of states
605
606 N = self.n_classes
607
608 # Number of observations
609
610
611
612 T = X.shape[0]
613
614
615
616 transmat = self.transmat
617
618 #1. Initalization
619
620 # a1 = self.prior
621
622 delta = zeros((T,N))
623 psi = zeros((T,N))
624
625 for i in range(0, N):
626 delta[0, i] = log(self.transmat[0,i]) + log(self.estimate_emission_probability(X[0,:],i))
627
628
629 #2. Recursion
630
631 for t in range(1, T):
632 # delta_old = delta.copy()
633 x = X[t, :]
634 for j in range(0, N):
635 # print "j: %d, S" % j, delta_old+log(transmat[:,j])
636 delta[t,j] = max(delta[t-1,:] + log(transmat[:,j])) + log(self.estimate_emission_probability(x,j))
637 psi[t,j] = argmax(delta[t-1,:] + log(transmat[:,j]))
638
639 # print "t: %d, delta: " % t, delta
640 # print "t: %d, psi:" % t, psi
641
642
643 # 3. Termination
644
645 p_star = max(delta[T-1,:] + log(transmat[:,N-1]))
646 q_star = argmax(delta[T-1,:] + log(transmat[:, N-1]))
647
648
649 # 4. Backtracking
650
651 q = zeros((T,))
652 p = zeros((T,))
653
654 q[-1] = q_star
655 p[-1] = p_star
656 for t in range(1, T):
657 q[-t-1] = psi[-t, q[-t]]
658 p[-t-1] = delta[-t, q[-t]]
659
660
661 return q,p
662
663 def viterbi(self, X):
664 # Number of states
665
666 N = self.n_classes
667
668 # Number of observations
669
670 T = X.shape[0]
671
672 transmat = self.transmat
673
674 #1. Initialization
675
676 a1 = self.prior
677
678 delta = zeros((N, ))
679 psi = zeros((N, ))
680
681 for i in range(0, N):
682 delta[i] = a1[i]*self.estimate_emission_probability(X[0,:],i)
683
684
685
686
687
688 #2. Recursion
689
690 for t in range(1, T):
691 delta_old = delta.copy()
692 x = X[t,:]
693 for j in range(0, N):
694 delta[j] = max(delta_old*transmat[:,j])*self.estimate_emission_probability(x, j)
695 psi[j] = argmax(delta_old*transmat[:,j])
696
697 #3. Termination
698
699 p_star = max(delta*transmat[:,N-1])
700 q_star = argmax(delta*transmat[:,N-1])
701
702
703
704 #4. Backtracking
705
706 q = zeros((T,))
707 q[-1] = q_star
708
709 for t in range(1, T):
710 q[-t-1] = psi[q[-t]]
711
712 return q
713
714
715 def _log_likelihood_matrix(self, X):
716 N = self.n_classes
717 ll = zeros((X.shape[0], N))
718
719 for i in range(0, X.shape[0]):
720 ll[i,:] = self._forward(i, X)
721
722 return ll
723
724 def compute_ksus(self, X):
725 N = self.n_classes
726 T = X.shape[0]
727 print "[II] Computing gammas... "
728
729 gammas = self._forward_backward(X)
730
731 # Initialize
732
733 aij = self.transmat
734
735
736
737
738
739
740 def _not_quite_ksu(self, t, i, j, X):
741 alpha = exp(self.forward(X[0:t+1,:]))[i]
742 beta = exp(self.backward(X[t+1:,:]))[j]
743
744 aij = self.transmat[i,j]
745 bj = self.estimate_emission_probability(X[t+1,:], j)
746
747 return alpha*beta*aij*bj
748
749 def _ksu(self, t, i, j, X):
750 alphaT = exp(self.forward(X[0:t+1,:]))[0]
751
752 return self._not_quite_ksu(t,i,j,X)
753
754
755 def _forward_backward(self, X):
756 T = X.shape[0]
757 N = self.n_classes
758 fv = zeros((T, N))
759 sv = zeros((T, N))
760
761 b = None
762 for t in range(1, T+1):
763
764 fv[t-1,:] = self._forward_message(fv[t-2,:], X[0:t, ])
765
766 for t in range(1, T+1):
767 b = self._backward_message(b, X[-t: , :])
768 sv[-t,:] = lognorm(fv[t-1,:]*b)
769
770 return sv
771
772
773 def _backward(self, t, X):
774 N = self.n_classes
775 a = ones((N,))/N
776
777 beta = zeros((N, ))
778 transmat = self.transmat
779 T = X.shape[0]
780 x = X[t, :]
781 if t == T-1:
782 beta = log(a)
783
784 return beta
785 else:
786 tosum = zeros((N, ))
787 bt = self._backward(t+1, X)
788 btnew = zeros((N, ))
789 # print bt
790 for j in range(0, N):
791 for i in range(0, N):
792 # print log(self.estimate_emission_probability(x, j)), bt[i], log(transmat[i,j])
793 tosum[i] = log(self.estimate_emission_probability(x, j)) + bt[i] + log(transmat[i,j])
794 # print tosum
795
796 btnew[j] = logsumexp(tosum)
797 btnew = lognorm(btnew)
798 return btnew
799
800
801 def score(self, X):
802 return self.forward(X)
803
804 def forward(self, X):
805 T = X.shape[0]
806 f = None
807 for t in range(1, T+1):
808 f = self._forward_message(f, X[0:t, :])
809
810 return f
811
812 def backward(self, X):
813 T = X.shape[0]
814 b = None
815 for t in range(1,T+1):
816 # print t
817 # print t,b
818 idx = arange(-t,T)
819 # print idx
820 b = self._backward_message(b, X[-t:, :])
821
822 return b
823
824 def _backward_message(self, b, X):
825 N = self.n_classes
826
827
828 beta = zeros((N, ))
829 transmat = self.transmat
830
831 x = X[0, :]
832
833 if X.shape[0] == 1:
834 beta = log(ones((N,))/N)
835 return beta
836 else:
837 tosum = zeros((N, ))
838 btnew = zeros((N, ))
839 for j in range(0, N):
840 for i in range(0, N):
841 tosum[i] = log(self.estimate_emission_probability(x,j)) + b[i] + log(transmat[i,j])
842
843 btnew[j] = logsumexp(tosum)
844 # btnew = lognorm(btnew)
845 return btnew
846
847 def _forward_message(self, f, X):
848 N = self.n_classes
849 a = self.prior
850
851 alpha = zeros((N, ))
852 transmat = self.transmat
853
854 x = X[-1, :]
855
856 if X.shape[0] == 1:
857 for i in range(0, N):
858 alpha[i] = log(a[i]) + log(self.estimate_emission_probability(x, i))
859 # alpha = lognorm(alpha)
860 return alpha
861
862 else:
863 tosum = zeros((N,))
864 ftnew = zeros((N,))
865 for j in range(0, N):
866 for i in range(0, N):
867 tosum[i] = f[i] + log(transmat[i,j])
868 Sum = logsumexp(tosum)
869 bj = self.estimate_emission_probability(x, j)
870 ftnew[j] = Sum+log(bj)
871
872 # ftnew = lognorm(ftnew)
873 return ftnew
874
875 def _forward(self, t, X):
876 N = self.n_classes
877 a = self.prior
878 # print a
879 alpha = zeros((N, ))
880 transmat = self.transmat
881 x = X[t, :]
882
883 if t == 0:
884 for i in range(0, N):
885 alpha[i] = log(a[i]) + log(self.estimate_emission_probability(x, i))
886 # print "--"
887 # print alpha
888 alpha = lognorm(alpha)
889 # print alpha
890 # print "xx"
891 return alpha
892 else:
893 tosum = zeros((N, ))
894 ft = self._forward(t-1, X)
895 ftnew = zeros((N, ))
896 for j in range(0, N):
897 for i in range(0, N):
898 # print ft
899 tosum[i] = ft[i] + log(transmat[i,j])
900
901 Sum = logsumexp(tosum)
902 bj = self.estimate_emission_probability(x, j)
903 ftnew[j] = Sum+log(bj)
904 ftnew = lognorm(ftnew)
905
906 return ftnew
907
908
909
910
911
912
913
914 class NBClassifier:
915 def __init__(self):
916 print "[II] Gaussian Naive Bayes Classifier"
917 self.name = "Naive Bayes"
918 self.smallname = "gnbc"
919 self.gnb = GaussianNB()
920
921 def getName(self):
922 return self.name
923
924 def fit(self, X, states):
925 self.gnb.fit(X, states)
926
927 def predict(self, X):
928 return self.gnb.predict(X)
929
930 def cross_validate(self, data, classes):
931 print "[II] SVN Classifier Crossvalidating... "
932 from copy import deepcopy
933 estimator = deepcopy(self)
934 estimator_fulldata = deepcopy(self)
935 estimator_fulldata.fit(data, classes)
936
937 percents = arange(0.1,0.9,0.1)
938 self.percents = percents * 100
939
940 RATIOS = []
941 labels = estimator.predict(data)
942
943
944 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels))
945
946 for p in percents:
947 traindata,testdata,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=1)
948
949 # print "[II] [%.2f-%.2f] Examples used for training: %s" % (p, (1-p), str(traindata))
950 # print "[II] [%.2f-%.2f] Examples used for testing: %s" % (p, (1-p), str(testdata))
951
952 # Training phase
953
954
955
956 estimator_ = deepcopy(estimator)
957 estimator_.fit(traindata, trainlabels)
958
959 predicted_labels = estimator_.predict(testdata)
960
961 m = sum(array(predicted_labels==testlabels).astype(float))/len(testlabels)
962
963 print "[II] [%.2f-%.2f] 5-fold cross-validation for NB Classifier: %.1f" % (p, (1-p), m)
964
965 RATIOS.append(m)
966
967 return RATIOS,percents
968
969
970
971
972 class HmmClassifier3:
973 def __init__(self, N=2,n_components = 1):
974 print "[II] HMM Classifier (%d states, %d components)" % (N, n_components)
975 self.name = "HMM (%d time steps, %d components)" % (N, n_components)
976 self.n_components = n_components
977
978 self.n_components = n_components
979 self.smallname = "hmmc"
980 self.chain_size = N
981
982 def getName(self):
983 return self.name
984
985 def fit(self, features, parameters):
986
987
988 # print "Parameters:"
989 # print parameters
990 n_classes = max(unique(parameters)) + 1
991
992 if n_classes == 1:
993 self.only_one_class = True
994 return
995 else:
996 self.only_one_class = False
997
998 print "[II] Number of classes: ", n_classes
999 hmms = [None]*n_classes
1000
1001 chain_size = self.chain_size
1002 obs = [None]*n_classes
1003
1004 for i in range(chain_size, len(parameters)):
1005 class_ = parameters[i]
1006 seq = features[i-chain_size:i,:]
1007
1008
1009 if obs[class_] is None:
1010 obs[class_] = [seq]
1011 else:
1012 obs[class_].append(seq)
1013
1014
1015
1016 for i in range(0, len(obs)):
1017
1018 if obs[i] is not None and len(obs[i]) != 0:
1019 hmm_ = hmm.GaussianHMM(n_components=self.n_components, covariance_type='diag')
1020 # print(obs[i])
1021 #obs_ = reshape_observations(obs[i])
1022 obs_ = concatenate(obs[i])
1023 #lengths_ = [o.shape[0] for o in obs[i]]
1024 #print (lengths_)
1025 # print len(obs[i])
1026 # print obs[i][0][0].shape
1027 # print obs[i]
1028 # for s in obs[i]:
1029 # if s.ndim > 2:
1030 # print s
1031 hmm_.fit(obs_, [self.chain_size]*len(obs[i]))
1032 hmms[i] = hmm_
1033
1034 self.hmms = hmms
1035
1036 return obs
1037
1038 def predict(self, features, mfilt=20):
1039
1040 if self.only_one_class == True:
1041 return zeros((features.shape[0], ))
1042
1043 chain_size = self.chain_size
1044 hmms = self.hmms
1045 predicted_classes = zeros((features.shape[0],))
1046
1047
1048 for i in range(chain_size, features.shape[0]):
1049 scores = zeros((len(hmms),))
1050
1051 seq = features[i-chain_size:i, :]
1052
1053 for j in range(0, len(hmms)):
1054 if hmms[j] is not None:
1055 scores[j] = hmms[j].score(seq)
1056 else:
1057 scores[j] = -infty
1058
1059 predicted_classes[i] = argmax(scores)
1060
1061 # Do a median filter at the end
1062
1063 # predicted_classes = median_filter(predicted_classes,mfilt)
1064
1065 return predicted_classes
1066
1067
1068
1069 def k_fold_cross_validate(self, data, classes, K=5):
1070 print "[II] HMM Classifier Crossvalidating... "
1071 print "[II] Validating data: ", data
1072 print "[II] With classes: ", classes
1073 from copy import deepcopy
1074 estimator = deepcopy(self)
1075 estimator_fulldata = deepcopy(self)
1076 estimator_fulldata.fit(data, classes)
1077
1078
1079 labels = estimator_fulldata.predict(data)
1080 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(classes))
1081
1082 # Crossvalidate this.
1083
1084 example_sequences = []
1085 example_labels = []
1086
1087 chain_size = self.chain_size
1088
1089 percents = arange(0.1,0.9,0.1)
1090 self.percents = percents * 100
1091
1092 RATIOS = []
1093
1094
1095
1096 # Constructing examples and labels
1097
1098
1099 L = data.shape[0]/K
1100 M = K
1101 # print "[II] Example size:", L
1102
1103 n = 1
1104 while L*(n-1) < M*L:
1105 if L*n > shape(data)[0]:
1106 example = data[L*(n-1):,:]
1107 classes_ = classes[L*(n-1):]
1108 else:
1109 example = data[L*(n-1):L*n, :]
1110 classes_ = classes[L*(n-1):L*n]
1111
1112 # print example
1113 # print classes_
1114
1115 example_sequences.append(example)
1116 example_labels.append(classes_)
1117 n+=1
1118
1119 # print len(example_sequences)
1120 # print len(example_labels)
1121 from sklearn.cross_validation import KFold
1122 kf = KFold(K, n_folds=K)
1123
1124 ratio = 0
1125
1126 for train, test in kf:
1127 print "[II] Validating examples %s against %s." % (train, test)
1128
1129 topredict_data = example_sequences[test[0]]
1130 topredict_labels = example_labels[test[0]]
1131
1132 tofit_data = None
1133 tofit_labels = None
1134
1135 for t in train:
1136 # print t
1137 if tofit_data is None:
1138 tofit_data = example_sequences[t]
1139 tofit_labels = example_labels[t]
1140 else:
1141 tofit_data = append(tofit_data, example_sequences[t], 0)
1142 tofit_labels = append(tofit_labels, example_labels[t], 0)
1143
1144 estimator_ = deepcopy(estimator)
1145 estimator_.fit(tofit_data, tofit_labels)
1146
1147 labels = estimator_.predict(topredict_data)
1148
1149 m = sum(array(topredict_labels==labels).astype(float))/len(labels)
1150
1151
1152 print "[II] %d-fold cross-validation for HMM Classifier: %.1f" % (K, m)
1153
1154 ratio += m/float(len(kf))
1155
1156 return ratio
1157
1158
1159
1160
1161
1162
1163 # Splitting to train/test
1164
1165
1166 def cross_validate(self, data, classes):
1167 print "[II] HMM Classifier Crossvalidating... "
1168 from copy import deepcopy
1169 estimator = deepcopy(self)
1170 estimator_fulldata = deepcopy(self)
1171 estimator_fulldata.fit(data, classes)
1172
1173
1174 labels = estimator_fulldata.predict(data)
1175 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(classes))
1176
1177 # Crossvalidate this.
1178
1179 example_sequences = []
1180 example_labels = []
1181
1182 chain_size = self.chain_size
1183
1184 percents = arange(0.5,0.9,0.1)
1185 self.percents = percents * 100
1186
1187 RATIOS = []
1188
1189
1190
1191 # Constructing examples and labels
1192
1193 M = self.chain_size
1194 L = data.shape[0]/20
1195
1196 print "[II] Example size:", L
1197
1198 n = 1
1199 while L*n < M*L-L:
1200 example = data[L*(n-1):L*n, :]
1201 # print example
1202 classes_ = classes[L*(n-1):L*n]
1203 # print classes_
1204
1205 example_sequences.append(example)
1206 example_labels.append(classes_)
1207 n+=1
1208
1209
1210
1211 # Splitting to train/test
1212
1213 for p in percents:
1214 traindata,testdata,trainlabels,testlabels = cross_validation.train_test_split(example_sequences, example_labels, test_size=p, random_state=1)
1215
1216 print traindata
1217 print testdata
1218 # Concatenate traindata
1219
1220 tofit_data = None
1221 tofit_labels = None
1222
1223 topredict_data = None
1224 topredict_labels = None
1225
1226
1227 for t in traindata:
1228 if tofit_data is None:
1229 tofit_data = t
1230 else:
1231 tofit_data = append(tofit_data, t, 0)
1232
1233 for l in trainlabels:
1234 if tofit_labels is None:
1235 tofit_labels = l
1236 else:
1237 tofit_labels = append(tofit_labels, l)
1238
1239 for t in testdata:
1240 if topredict_data is None:
1241 topredict_data = t
1242 else:
1243 topredict_data = append(topredict_data, t, 0)
1244
1245 for l in testlabels:
1246 if topredict_labels is None:
1247 topredict_labels = l
1248 else:
1249 topredict_labels = append(topredict_labels, l)
1250
1251
1252 # print "[II] Fit data: ", tofit_data
1253 ## print "[II] Fit labels: ", tofit_labels
1254 # print "[II] Predict data: ", topredict_data
1255 # print "[II] Predict labels: ", topredict_labels
1256 #
1257 estimator_ = deepcopy(estimator)
1258
1259 print tofit_labels
1260 estimator_.fit(tofit_data, tofit_labels)
1261
1262 labels = estimator_.predict(topredict_data)
1263
1264 m = sum(array(topredict_labels==labels).astype(float))/len(labels)
1265
1266
1267 print "[II] [%.2f-%.2f] cross-validation for HMM Classifier: %.1f" % (p, (1-p), m)
1268
1269 RATIOS.append(m)
1270
1271 return RATIOS,percents
1272
1273
1274 class HMMsvmClassifier2:
1275 def __init__(self, N=2):
1276 self.classifiers = {}
1277 self.name = "HMM-SVM Classifier 2"
1278 self.smallname = "hmmsvm2"
1279 self.obs = MyAVAClassifier()
1280 self.chain_size = N
1281
1282 def getName(self):
1283 return self.name
1284
1285 def fit(self, X, y):
1286 self.n_classes = max(unique(y))+1
1287 n_classes = self.n_classes
1288 chain_size = self.chain_size
1289
1290 if n_classes == 1:
1291 self.only_one_class = True
1292 return
1293 else:
1294 self.only_one_class = False
1295
1296 print "[II] Number of classes: ", n_classes
1297
1298 hmms = [None]*n_classes
1299 obs = [None]*n_classes
1300
1301
1302 for i in range(chain_size, len(parameters)):
1303 class_ = parameters[i-1]
1304
1305
1306
1307 class HMMsvmClassifier:
1308 def __init__(self, N=2):
1309 self.classifiers = {}
1310 self.name = "HMM-SVM Classifier"
1311 self.smallname = "hmmsvm"
1312 self.obs = MyAVAClassifier()
1313 self.chain_size = N
1314
1315 def getName(self):
1316 return self.name
1317
1318 def fit(self, X, y):
1319 self.n_classes = max(unique(y))+1
1320
1321 self.obs.fit(X,y)
1322 self.hmm = HMMsvm(self.obs)
1323 self.hmm.fit([X],[y])
1324
1325 def predict(self, X):
1326 # chain_size = self.chain_size
1327 # predicted = zeros((X.shape[0]),)
1328 # for i in range(chain_size, len(predicted)):
1329 # predicted[i] = self.hmm.predict(X[i-chain_size:i,:])[-1]
1330
1331
1332
1333 # return predicted
1334
1335 return self.hmm.predict(X)
1336
1337 def confidence(self, x, q):
1338 return self.hmm.estimate_emission_probability(x, q)
1339
1340
1341
1342 class SinkHoleClassifier:
1343 def __init__(self):
1344 self.classifierNB = NBClassifier()
1345 self.classifierSVM = MyAVAClassifier()
1346 self.classifierHMM = HmmClassifier3()
1347 self.classifierHMMSVM = HMMsvmClassifier()
1348
1349
1350
1351 def getName(self):
1352 return self.name
1353
1354
1355 def fit(self, X, y):
1356 self.n_classes = max(unique(y))+1
1357
1358 self.classifierNB.fit(X,y)
1359 self.classifierSVM.fit(X,y)
1360 self.classifierHMM.fit(X,y)
1361 self.classifierHMMSVM.fit(X,y)
1362
1363
1364 def predict(self, X):
1365 predictedNB = self.classifierNB.predict(X)
1366 predictedSVM = self.classifierSVM.predict(X)
1367 predictedHMM = self.classifierHMM.predict(X)
1368 predictedHMMSVM = self.classifierHMMSVM.predict(X)
1369
1370
1371
1372 predicted = zeros((X.shape[0], ))
1373
1374 for i in range(0, len(predicted)):
1375 candidates = [predictedNB[i], predictedSVM[i], predictedHMM[i], predictedHMMSVM[i]]
1376 c = Counter(candidates)
1377
1378 most_common = c.most_common()
1379
1380 # If there is an equal voting, select something at random
1381 if len(unique([k[1] for k in most_common])) == 1:
1382 predicted[i] = most_common[randint(len(most_common))][0]
1383 else:
1384 predicted[i] = most_common[0][0]
1385
1386
1387
1388 return predicted
1389
1390 class MyAVAClassifier:
1391
1392 def __init__(self):
1393 self.classifiers = {}
1394 self.name = "Linear SVM Classifier"
1395 self.smallname = "svc-ava"
1396 # self.probabilities = {}
1397
1398
1399 def getName(self):
1400 return self.name
1401 def fit(self, X, y, flr = 0):
1402
1403 n_classes = max(unique(y)) + 1
1404
1405 # Transform to a binary if there are only two classes
1406 if len(unique(y)) == 1:
1407 self.only_one_class = True
1408 self.n_classes = 1
1409 self.one_class_label = y[0]
1410 return
1411 elif len(unique(y)) == 2:
1412
1413 self.n_classes = n_classes
1414 self.svm = svm.SVC(probability = True, kernel='poly', gamma=2, C = C)
1415 self.svm.fit(X,y)
1416 classes_ = unique(y)
1417 self.classifiers[(classes_[0], classes_[1])] = self.svm
1418 self.only_two_classes = True
1419 self.only_one_class = False
1420
1421 return
1422 else:
1423 self.only_two_classes = False
1424 self.only_one_class = False
1425
1426
1427 classes = arange(0, n_classes)
1428 self.n_classes = n_classes
1429 # M = n_classes*(n_classes-1) # Number of classifiers
1430
1431 h = histogram(y, n_classes)[0].astype(float)
1432 self.prior = h/sum(h)
1433
1434 transmat = zeros((n_classes, n_classes))
1435
1436 for i in range(1, len(y)):
1437 prev = y[i-1]
1438 cur = y[i]
1439 transmat[prev,cur] += 1
1440
1441 transmat = transmat/sum(transmat)
1442
1443 self.transmat = transmat
1444
1445 # Add a very small probability for random jump to avoid zero values
1446
1447 self.transmat += flr
1448 self.transmat = self.transmat/sum(self.transmat)
1449
1450 for i in range(0, n_classes):
1451 for j in range(0, n_classes):
1452 if i != j and (i,j) not in self.classifiers and (j,i) not in self.classifiers:
1453 # print (i,j)
1454 # print classes
1455 idx_ = bitwise_or(y == classes[i], y == classes[j])
1456 # idx_ = bitwise_or(y == i, y == j)
1457
1458 X_ = X[idx_, :]
1459
1460 y_ = y[idx_]
1461 #print y_
1462
1463 if len(unique(y_)) > 1:
1464 svm_ = svm.SVC(probability = True, kernel='poly', gamma=2, C = C)
1465
1466 # print y_
1467 svm_.fit(X_, y_)
1468 self.classifiers[(i,j)] = svm_
1469 # self.probabilities[(i,j)] = svm_.predict_proba(X)
1470
1471
1472 def estimate_pairwise_class_probability(self, i, j, x):
1473
1474 # print self.classifiers.keys()
1475
1476 if (i,j) not in self.classifiers and (j,i) in self.classifiers:
1477 return self.classifiers[(j,i)].predict_proba(x)[0,1]
1478 elif (i,j) not in self.classifiers and (j,i) not in self.classifiers:
1479 return 0.0
1480 else:
1481 return self.classifiers[(i,j)].predict_proba(x)[0,0]
1482
1483 def estimate_posterior_probability(self, i, x):
1484 mus = zeros((self.n_classes,))
1485 for j in range(0, self.n_classes):
1486 if i != j:
1487 pcp = self.estimate_pairwise_class_probability(i,j,x)
1488 pcp += 1e-18
1489 mus[j] = 1/pcp
1490 # print mus
1491 S = sum(mus) - (self.n_classes - 2)
1492 # print S
1493 return 1/S
1494
1495 def estimate_posterior_probability_vector(self, x):
1496 posterior = zeros((self.n_classes,))
1497 for i in range(0, len(posterior)):
1498 posterior[i] = self.estimate_posterior_probability(i, x)
1499
1500 return posterior
1501
1502 # def estimate_emission_probability(self, i, x):
1503 # post = self.estimate_posterior_probability_vector(x)
1504 # # print "post: ", post
1505 # prior = self.prior
1506 # # print "prior: ", prior
1507 # p = post/prior
1508 # p = p/sum(p)
1509 #
1510 # return p[i]
1511
1512 # def estimate_emissmat(self, X):
1513 # emissmat = zeros((X.shape[0], self.n_classes))
1514 # for i in range(0, X.shape[0]):
1515 # for j in range(0, self.n_classes):
1516 # emissmat[i,j] = self.estimate_emission_probability(j, X[i,:])
1517 #
1518 # return emissmat
1519
1520 def predict(self, X):
1521 predicted = zeros((X.shape[0],))
1522
1523 if self.only_one_class == True:
1524 return ones((X.shape[0],))*self.one_class_label
1525 elif self.only_two_classes == True:
1526 return self.svm.predict(X)
1527
1528
1529 for i in range(0, X.shape[0]):
1530 x = X[i,:]
1531 P = zeros((self.n_classes,))
1532
1533
1534 for c in range(0, len(P)):
1535 P[c] = self.estimate_posterior_probability(c, x)
1536
1537 pred = argmax(P)
1538 predicted[i] = pred
1539
1540 return predicted
1541
1542 def logsum(X):
1543 if len(X) == 1:
1544 return log(X[0])
1545 else:
1546 return logaddexp(log(X[0]),logsum(X[1:]))
1547
1548
1549
1550
1551 def smooth_matrix_1D(X):
1552 window = scipy.signal.gaussian(51,4)
1553 window = window/sum(window)
1554 intermx = zeros((X.shape[0],X.shape[1]+100))
1555 intermx[:, 50:-50] = X
1556
1557 for m in range(0, X.shape[0]):
1558 # print intermx.shape
1559 intermx[m,:] = convolve(ravel(intermx[m,:]), window,'same')
1560
1561 return intermx[:,50:-50]
1562
1563 def adm_reconstruct(codeword, h, dmin=.01, dmax=.28):
1564 x = zeros((1, codeword.shape[1]))
1565
1566 delta1 = dmin
1567 delta2 = dmin
1568 Sum = h
1569
1570 x[0] = h
1571 for i in range(0, codeword.shape[1]):
1572 if codeword[0,i] == 0:
1573 delta1 = dmin
1574 delta2 = dmin
1575
1576 elif codeword[0,i] == 1:
1577 delta2 = dmin
1578 Sum += delta1
1579 delta1 *= 2
1580 if delta1 > dmax:
1581 delta1 = dmax
1582
1583 elif codeword[0,i] == -1:
1584 delta1 = dmin
1585 Sum -= delta2
1586 delta2 *= 2
1587 if delta2 > dmax:
1588 delta2 = dmax
1589 x[0,i] = Sum
1590 return x
1591
1592 def adm(x, dmin=.01, dmax=.28, tol=0.0001):
1593
1594 # Adaptive delta modulation adapted by code:
1595 # (adeltamod.m)
1596 #
1597 # Adaptive Delta Modulator
1598 # by Gandhar Desai (gdesai)
1599 # BITS Pilani Goa Campus
1600 # Date: 28 Sept, 2013
1601
1602 xsig = x
1603
1604 Lx = len(x)
1605
1606 ADMout = zeros((1, Lx))
1607 codevec = zeros((1, Lx))
1608
1609
1610 Sum = x[0]
1611 delta1 = dmin
1612 delta2 = dmin
1613 mult1 = 2
1614 mult2 = 2
1615 for i in range(0, Lx):
1616 #print abs(xsig[i] - Sum)
1617 if (abs(xsig[i] - Sum) < tol):
1618 bit = 0
1619 delta2 = dmin
1620 delta1 = dmin
1621
1622
1623 elif (xsig[i] >= Sum):
1624 bit = 1
1625 delta2 = dmin
1626 Sum += delta1
1627 delta1 *= mult1
1628 if delta1 > dmax:
1629 delta1 = dmax
1630
1631
1632 else:
1633 bit = -1
1634 delta1 = dmin
1635 Sum -= delta2
1636 delta2 *= mult2
1637 if delta2 > dmax:
1638 delta2 = dmax
1639
1640
1641
1642 ADMout[0, i] = Sum
1643 codevec[0, i]= bit
1644
1645 return ADMout,codevec, x[0]
1646
1647 def median_filter(v, K):
1648 v2 = zeros((len(v),))
1649 for i in range(K, len(v)):
1650 seq = v[i-K:i]
1651 m = median(seq)
1652 v2[i-K:i] = m
1653
1654 return v2
1655
1656
1657
1658
1659 from models import ReverbModel
1660
1661
1662
1663 if __name__=="__main__":
1664 if len(argv) != 2:
1665 print "[EE] Wrong number of arguments"
1666 print "[II] Correct syntax is:"
1667 print "[II] \t%s <trainingdir>"
1668 print "[II] where <trainingdir> is the directory containing the features files in the format *_features.yaml"
1669 print "[II] and the parameters in the format *_parameters.yaml"
1670 exit(-1)
1671
1672
1673 n_clusters = 3
1674 UpsamplingFactor = 1
1675 dmin = 0.001
1676 dmax = 0.28
1677 tol = 0.001
1678
1679 d1min = 0.01
1680 d1max = 0.1
1681
1682 g1min = 0.01
1683 g1max = 0.99
1684
1685 damin = 0.006
1686 damax = 0.012
1687
1688 Gmin = 0.01
1689 Gmax = 0.99
1690
1691 gcmin = 0.01
1692 gcmax = 0.99
1693
1694 # For searching the directory
1695 from glob import glob
1696
1697 traindir = argv[1]
1698
1699 songs_in_dir = glob("%s/*_features.yaml" % traindir)
1700
1701 print "[II] Using files: %s" % songs_in_dir
1702
1703
1704 chain_length=1
1705
1706
1707 # asdsd
1708
1709 # fullfeatures_pool = Pool()
1710
1711 features_vector = None
1712 parameters_vector = None
1713 index_vector = None
1714
1715 idx = 0
1716 # Shuffle segments
1717 random.shuffle(songs_in_dir)
1718 for f_ in songs_in_dir:
1719 infile = f_
1720 paramfile = "%s_parameters.yaml" % f_.split('_features.yaml')[0]
1721
1722 print "[II] Using features file: %s" % infile
1723 print "[II] and parameters file: %s" % paramfile
1724
1725
1726 # paramfile = infile.split(')
1727
1728 features_pool = YamlInput(filename = infile)()
1729 parameters_pool = YamlInput(filename = paramfile)()
1730
1731 d1 = parameters_pool['parameters.d1']
1732 da = parameters_pool['parameters.da']
1733 g1 = parameters_pool['parameters.g1']
1734 gc = parameters_pool['parameters.gc']
1735 G = parameters_pool['parameters.G']
1736
1737
1738
1739
1740
1741 feature_captions = features_pool.descriptorNames()
1742 parameter_captions = ['d1','da','g1','gc','G']
1743
1744
1745 svmhmmstr = ""
1746
1747
1748 for c in features_pool.descriptorNames():
1749 if c.split('.')[0] == 'metadata' or c.split('.')[0] == 'parameter':
1750 feature_captions.remove(c)
1751
1752
1753 # close('all')
1754
1755 # print "[II] Loaded training data from %s (%s) " % (infile, features_pool['metadata.date'][0])
1756 print "[II] %d Features Available: " % len(feature_captions)
1757
1758
1759
1760 print str(feature_captions).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7]
1761
1762 nfeatures_in = len(feature_captions)
1763 nparameters_in = 5
1764 features_vector_one = zeros((nfeatures_in, len(features_pool[feature_captions[0]])))
1765 index_vector_one = ones((len(features_pool[feature_captions[0]]),))
1766 parameters_vector_one = ones((nparameters_in, len(features_pool[feature_captions[0]])))
1767 # paramet
1768
1769
1770 for i in range(0, nfeatures_in):
1771 features_vector_one[i, :] = features_pool[feature_captions[i]].T
1772 # for i in range(0, nparameters_in):
1773 # parameters_vector[i, :] = features_pool[parameter_captions[0]].T
1774
1775 parameters_vector_one[0, :] = d1*parameters_vector_one[0,:]
1776 parameters_vector_one[1, :] = g1*parameters_vector_one[1,:]
1777 parameters_vector_one[2, :] = da*parameters_vector_one[2,:]
1778 parameters_vector_one[3, :] = gc*parameters_vector_one[3,:]
1779 parameters_vector_one[4, :] = G*parameters_vector_one[4,:]
1780
1781 # Stores example index number
1782
1783 index_vector_one *= idx
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793 print "[II] %d parameters used:" % len(parameter_captions)
1794 print str(parameter_captions).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7].replace('parameter.','')
1795
1796 if features_vector is None:
1797 features_vector = features_vector_one
1798 else:
1799 features_vector = append(features_vector, features_vector_one, 1)
1800
1801 if parameters_vector is None:
1802 parameters_vector = parameters_vector_one
1803 else:
1804 parameters_vector = append(parameters_vector, parameters_vector_one, 1)
1805
1806 if index_vector is None:
1807 index_vector = index_vector_one
1808 else:
1809 index_vector = append(index_vector, index_vector_one)
1810
1811
1812 idx += 1
1813
1814
1815 print "[II] Marking silent parts"
1816 features_full_nontransformed_train = features_vector.copy()
1817 # silent_parts = zeros((1, len(features_pool[feature_captions[i]].T)))
1818
1819 silent_parts = zeros((1, features_vector.shape[1]))
1820
1821 rms = features_vector[feature_captions.index('rms'), :]
1822
1823 # Implementing Hysteresis Gate -- High threshold is halfway between
1824 # the mean and the max and Low is halfway between the mean dn the min
1825
1826 rms_threshold_mean = mean(rms)
1827
1828 rms_threshold_max = max(rms)
1829 rms_threshold_min = min(rms)
1830
1831 rms_threshold_high = 0.1 * rms_threshold_mean
1832 rms_threshold_low = 0.01 * rms_threshold_mean
1833
1834 for n in range(1, len(rms)):
1835 prev = rms[n-1]
1836 curr = rms[n]
1837
1838 if prev >= rms_threshold_high:
1839 if curr < rms_threshold_low:
1840 silent_parts[0,n] = 1
1841 else:
1842 silent_parts[0,n] = 0
1843 elif prev <= rms_threshold_low:
1844 if curr > rms_threshold_high:
1845 silent_parts[0,n] = 0
1846 else:
1847 silent_parts[0,n] = 1
1848 else:
1849 silent_parts[0,n] = silent_parts[0,n-1]
1850
1851
1852 if silent_parts[0,1] == 1:
1853 silent_parts[0, 0] = 1
1854
1855
1856
1857 active_index = invert(silent_parts.flatten().astype(bool))
1858
1859 # Keep only active parts
1860
1861
1862 # Uncomment this
1863 features_vector = features_vector[:, active_index]
1864
1865 # Keep this for debugging purposes
1866
1867
1868 parameters_vector = parameters_vector[:, active_index]
1869 # index_vector = index_vector[active_index]
1870
1871 # Label examples for a chain of chain_length
1872
1873 # idx = 0
1874 # for i in range(0, len(index_vector)):
1875 # index_vector[i] = idx
1876 # if i % chain_length == 0:
1877 # idx += 1
1878 # number_of_examples = idx
1879
1880
1881
1882 # Scale parameters to [0,1]
1883
1884
1885 parameters_vector[0,:] = (parameters_vector[0,:] - d1min)/d1max #d1
1886 parameters_vector[1,:] = (parameters_vector[1,:] - g1min)/g1max #g1
1887 parameters_vector[2,:] = (parameters_vector[2,:] - damin)/damax #g1
1888 parameters_vector[3,:] = (parameters_vector[3,:] - Gmin)/Gmax #G
1889 parameters_vector[4,:] = (parameters_vector[4,:] - gcmin)/gcmax #gc
1890
1891
1892
1893
1894
1895
1896
1897 moments_vector = zeros((features_vector.shape[0], 2))
1898
1899 features_vector_original = features_vector.copy()
1900
1901
1902
1903 print "[II] Storing moments vector"
1904 for i in range(0, features_vector.shape[0]):
1905 mean_ = mean(features_vector[i,:])
1906 std_ = std(features_vector[i,:], ddof=1)
1907 moments_vector[i,0] = mean_
1908 moments_vector[i,1] = std_
1909
1910 features_vector[i,:] = (features_vector[i,:] - mean_)/std_
1911
1912 features_vector_old_train = features_vector.copy()
1913 # moments_vector_train = moments_vector
1914
1915
1916 print "[II] Extracting PCA configuration "
1917
1918 kernel, q, featurelist = extract_pca_configuration_from_data(features_vector)
1919
1920
1921 #q = 1
1922
1923
1924 # q = q + 1
1925
1926 print "[II] Optimal number of PCs to keep: %d" % q
1927
1928
1929
1930
1931 feature_captions_array = array(feature_captions)
1932
1933 features_to_keep = list(feature_captions_array[featurelist])
1934
1935 print "[II] Decided to keep %d features:" % len(features_to_keep)
1936 print str(features_to_keep).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7]
1937
1938 featurelist_bak = featurelist
1939 features_kept_data = features_vector[featurelist,:]
1940 features_kept_data_bak = features_kept_data.copy()
1941 # features_vector_old_train = features_kept_data
1942 features_vector = (kernel.T*features_kept_data)[0:q,:]
1943 features_vector_new_train = features_vector.copy()
1944
1945
1946 # example_features = None
1947 # example_parameters = None
1948 # example_idx = None
1949 #
1950 # for idx in range(0, shape(parameters_vector)[1]-chain_length):
1951 # example_features_ = features_vector[:, idx:idx+chain_length]
1952 # # print example_features_
1953 # example_parameters_ = parameters_vector[:, idx:idx+chain_length]
1954 # example_idx_ = ones((shape(example_parameters_)[1],))
1955 #
1956 # if example_features is None:
1957 # example_features = example_features_
1958 # else:
1959 # #print "appending"
1960 # example_features = append(example_features, example_features_, 1)
1961 #
1962 # if example_parameters is None:
1963 # example_parameters = example_parameters_
1964 # else:
1965 # example_parameters = append(example_parameters, example_parameters_, 1)
1966 #
1967 # if example_idx is None:
1968 # example_idx = example_idx_*idx
1969 # else:
1970 # example_idx = append(example_idx, example_idx_*idx, 1)
1971 #
1972 #
1973 #
1974 #
1975 # features_vector = example_features
1976 # parameters_vector = example_parameters
1977 # index_vector = example_idx
1978
1979 print "[II] Trying ADM-coded parameters"
1980 print "[II] Upsampling features and parameters by a factor of %d" % UpsamplingFactor
1981
1982
1983 # Upsampled features and parameters
1984 features_vector_upsampled = smooth_matrix_1D(repeat(features_vector,UpsamplingFactor, axis=1))
1985 # features_vector_upsampled = repeat(features_vector, UpsamplingFactor, axis=1)
1986
1987
1988 # features_vector_upsampled = smooth_matrix_1D(repeat(features_vector_old_train,UpsamplingFactor, axis=1))
1989
1990 # feature_labels_upsampled = repeat(features_clustering_labels,UpsamplingFactor, axis=0)
1991 parameters_vector_upsampled = repeat(parameters_vector,UpsamplingFactor, axis=1)
1992
1993
1994 # km = KMeans(init='k-means++', n_init=10, max_iter=300, tol=0.0000001, verbose = 0, n_clusters=12).fit(parameters_vector_upsampled.T)
1995 # centers_ = km.cluster_centers_
1996 # labels__ = km.labels_
1997 # Remove the following two lines in order to restore non-kmeans version
1998 # parameters_vector_kmeans_upsampled = array(centers_[labels__])
1999 #
2000 # parameters_vector_upsampled = parameters_vector_kmeans_upsampled.T
2001 #
2002 #
2003 # parameters_vector_upsampled = smooth_matrix_1D(parameters_vector_upsampled)
2004
2005 parameters_vector_upsampled_adm = matrix(zeros(shape(parameters_vector_upsampled)))
2006 parameters_vector_upsampled_code = matrix(zeros(shape(parameters_vector_upsampled)))
2007 parameters_vector_upsampled_firstval = matrix(zeros((parameters_vector_upsampled.shape[0],1)))
2008
2009 # Reconstructed parameters
2010
2011
2012
2013 parameters_vector_upsampled_reconstructed = matrix(zeros(shape(parameters_vector_upsampled)))
2014
2015
2016
2017 # ADM HERE
2018 #
2019
2020 def adm_matrix(X, dmin=0.001,dmax=0.28,tol=0.001):
2021
2022 out = matrix(zeros(shape(X)))
2023 code = matrix(zeros(shape(X)))
2024 firstval = matrix(zeros((X.shape[0], 1)))
2025
2026 for i in range(0, X.shape[0]):
2027 out[i,:], code[i,:], firstval[i,0] = adm(X[i,:],dmin=dmin,dmax=dmax,tol=tol)
2028
2029 return out,code,firstval
2030
2031 # parameters_vector_upsampled_reconstructed[i,:] = adm_reconstruct(parameters_vector_upsampled_code[i,:],parameters_vector_upsampled_firstval[i,0], dmin=dmin,dmax=dmax)
2032
2033 def adm_matrix_reconstruct(code, firstval, dmin=0.001, dmax=0.28):
2034 X = matrix(zeros(shape(code)))
2035 for i in range(0, code.shape[0]):
2036 X[i,:] = adm_reconstruct(code[i,:], firstval[i,0], dmin=dmin, dmax=dmax)
2037
2038 return X
2039
2040
2041 parameters_vector_upsampled_adm, parameters_vector_upsampled_code, parameters_vector_upsampled_firstval = adm_matrix(parameters_vector_upsampled, dmin, dmax, tol)
2042 # parameters_vector_upsampled = parameters_vector_upsampled_code
2043
2044
2045 def diff_and_pad(X):
2046 return concatenate((
2047 zeros((
2048 shape(X)[0],
2049 1
2050 )),
2051 diff(X, axis=1)),
2052 axis=1)
2053
2054
2055 # Change for adm,
2056
2057 # parameters_vector_upsampled = parameters_vector_upsampled_code
2058 print "[II] Clustering features."
2059 #
2060 features_clustering = GMM(n_components = n_clusters, covariance_type='diag')
2061 #
2062 features_clustering.fit( features_vector_upsampled.T, y=parameters_vector_upsampled_code)
2063 #
2064 features_clustering_means = features_clustering.means_
2065 features_clustering_labels = features_clustering.predict(features_vector_upsampled.T)
2066 features_clustering_sigmas = features_clustering.covars_
2067 #
2068 features_vector_upsampled_estimated = zeros(shape(features_vector_upsampled))
2069 #
2070 #
2071 for n in range(0, len(features_vector_upsampled_estimated[0])):
2072 features_vector_upsampled_estimated[:,n] = features_clustering_means[features_clustering_labels[n]]
2073 #
2074 #
2075 print "[II] Features MSE for %d clusters: %.3f" % (n_clusters, mse(features_vector_upsampled, features_vector_upsampled_estimated))
2076
2077
2078
2079 def happy_curve_classification(data, classes, estimator, Nd=1):
2080 print "[II] Generating Happy Curve "
2081 from copy import deepcopy
2082 estimator_fulldata = deepcopy(estimator)
2083 estimator_fulldata.fit(data, classes)
2084 labels = estimator.predict(data)
2085
2086 # 1. Split data in two, training and testing
2087
2088 Ntr = int(round(data.shape[0]/2)) # Training dataset size
2089 Nts = data.shape[0] - Ntr # Testing dataset size
2090
2091 ratios = []
2092
2093 for n in range(Nd, Ntr):
2094 train, test, trainl, testl = cross_validation.train_test_split(data, classes, test_size = 0.5, random_state = 0)
2095 train = train[0:n,:]
2096 trainl = trainl[0:n]
2097 # print "trainl"
2098 # print trainl
2099 estimator_ = deepcopy(estimator)
2100 estimator_.fit(train,trainl)
2101 labels = estimator_.predict(test)
2102
2103 ratio = sum(array(testl==labels).astype(float))/len(labels)
2104
2105 ratios.append(ratio)
2106
2107
2108 return ratios
2109
2110
2111 def cross_validate_classification(data, classes, estimator):
2112 print "[II] Crossvalidating... "
2113 from copy import deepcopy
2114 estimator_fulldata = deepcopy(estimator)
2115 estimator_fulldata.fit(data, classes)
2116
2117 percents = arange(0.1,0.9,0.1)
2118 MSEs = []
2119 labels = estimator.predict(data)
2120
2121 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels))
2122
2123 for p in percents:
2124 train,test,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=0)
2125 estimator_ = deepcopy(estimator)
2126 estimator_.fit(train, trainlabels)
2127 labels = estimator_.predict(test)
2128 print "[II] for training(%.2f)-testing(%.2f): %.2f" % ((1-p),p,sum(array(testlabels==labels).astype(float))/len(labels))
2129
2130 return MSEs
2131
2132 def cross_validate_clustering(data, estimator):
2133 print "[II] Crossvalidating... "
2134 estimator_fulldata = estimator
2135 estimator_fulldata.fit(data)
2136
2137 # labels = estimator_fulldata.predict(data)
2138 means = estimator_fulldata.means_
2139 # print means
2140
2141 percents = arange(0.1,0.6,0.1)
2142 MSEs = []
2143 reconstructed = zeros(shape(data))
2144 labels = estimator.predict(data)
2145 for n in range(0, len(reconstructed)):
2146 reconstructed[n,:] = means[labels[n]]
2147
2148 MSEs.append(mse(data,reconstructed))
2149 for p in percents:
2150 train,test = cross_validation.train_test_split(data,test_size=p,random_state=0)
2151 train = matrix(train)
2152 test = matrix(test)
2153
2154 estimator.fit(train)
2155 means = estimator.means_
2156 labels = estimator.predict(test)
2157 reconstructed = zeros(shape(test))
2158 for n in range(0, len(reconstructed)):
2159 reconstructed[n,:] = means[labels[n]]
2160
2161 m = mse(test,reconstructed)
2162
2163 print "[II] MSE for clustering crossvalidated data %.2f-%.2f: %.5f" % ((1-p), p, m)
2164 MSEs.append(m)
2165
2166 print "[II] Crossvalidation complete"
2167
2168 return MSEs
2169
2170
2171
2172
2173 # Construct parameters alphabet, each symbol is going to be a different column vector
2174 # in parameter code matrix
2175
2176
2177 def vector_to_states(X):
2178 """
2179 Input: a vector MxN with N samples and M variables
2180 Output: a codeword dictionary `parameters_alphabet',
2181 state_seq, inverse `parameters_alphabet_inv' """
2182
2183
2184 parameters_alphabet = {}
2185 n = 0
2186
2187 for i in range(0, X.shape[1]):
2188 vec = tuple(ravel(X[:,i]))
2189 if vec not in parameters_alphabet:
2190 parameters_alphabet[vec] = n
2191 n += 1
2192
2193 parameters_alphabet_inv = dict([(parameters_alphabet[m],m) for m in parameters_alphabet])
2194
2195 state_seq = array([parameters_alphabet[tuple(ravel(X[:,m]))] for m in range(0, parameters_vector_upsampled_code.shape[1])] )
2196
2197 return state_seq, parameters_alphabet, parameters_alphabet_inv
2198
2199
2200 def states_to_vector(predicted, parameters_alphabet_inv):
2201 estimated = matrix(zeros((len(parameters_alphabet_inv[0]), len(predicted))))
2202 for i in range(0, len(predicted)):
2203 # print "[II] Predicted: ", predicted[i]
2204 estimated[:, i] = matrix(parameters_alphabet_inv[predicted[i]]).T
2205
2206 return estimated
2207
2208 # state_seq, parameters_alphabet, parameters_alphabet_inv = vector_to_states(parameters_vector_upsampled_code)
2209
2210
2211 # parameters_change_variable = matrix(diff_and_pad(parameters_vector_upsampled)!=0).astype(int)
2212
2213 # changes_state_seq, changes_parameters_alphabet, changes_parameters_alphabet_inv = vector_to_states(parameters_change_variable)
2214
2215
2216 # This is an hmm that just codes the changes"
2217 # We have only two states, change and stay the same.
2218
2219 # Uncomment that here
2220
2221 # parameters_vector_upsampled = parameters_vector_upsampled_code
2222 # parameters_state_orig, parameter_state_alphabet_orig, parameter_state_alphabet_inv_orig = vector_to_states(parameters_vector)
2223 # TODO: HA
2224 # parmaeters_vector_upsampled = parameters_vector_upsampled_code
2225 parameters_state, parameter_state_alphabet, parameter_state_alphabet_inv = vector_to_states(parameters_vector_upsampled)
2226
2227
2228 # Remove outliers
2229
2230 ee = EllipticEnvelope()
2231
2232 # inliers = ee.fit(features_vector_upsampled.T).predict(features_vector_upsampled.T) == 1
2233
2234 inoutliers = zeros((len(parameters_state),))
2235 #
2236 for p in unique(parameters_state):
2237 ee = EllipticEnvelope(contamination=0.1)
2238 inoutliers[parameters_state == p] = ee.fit(features_vector_upsampled[:, parameters_state == p].T).predict(features_vector_upsampled[:, parameters_state == p].T)
2239 #
2240 #
2241 # Label inliers and outliers
2242
2243 inliers = inoutliers > 0
2244 outliers = inoutliers < 0
2245 #
2246 #
2247 #
2248
2249 # Do pre-processing and re-label the outliers
2250
2251 #completeavac = SinkHoleClassifier()
2252 #completeavac.fit(features_vector_upsampled[:, inliers].T, parameters_state[inliers])
2253
2254 #parameters_state_original = parameters_state.copy()
2255 #parameters_state[outliers] = completeavac.predict(features_vector_upsampled[:,outliers].T)
2256
2257
2258
2259
2260
2261 # features_vector_upsampled = features_vector_upsampled[:, inliers]
2262 # parameters_state = parameters_state[inliers]
2263
2264 # print "UNIQUE PARAMETERS STATE"
2265 # print unique(parameters_state)
2266 # asdasdasd
2267 print "[II] Testing Gaussian Naive Bayes Classifier"
2268 gnb = GaussianNB()
2269 gnb.fit(features_vector_upsampled.T, parameters_state)
2270
2271
2272
2273 parameters_state_estimated = gnb.predict(features_vector_upsampled.T)
2274
2275 output = states_to_vector(parameters_state_estimated, parameter_state_alphabet_inv)
2276
2277 figure()
2278 subplot(211)
2279 plot(parameters_vector_upsampled.T)
2280 title('Parameter value')# % UpsamplingFactor)
2281 ylabel('value')
2282 xlabel('frame \#')
2283 subplot(212)
2284 #plot(smooth_matrix_1D(output).T)
2285 plot(output.T)
2286 ylabel('value')
2287 xlabel('frame \#')
2288
2289 errorvector_nbc = [mse(output[:,i], parameters_vector_upsampled[:,i]) for i in range(0, len(parameters_state_estimated))]
2290
2291
2292
2293
2294 #cross_validate_classification(features_vector_upsampled.T, parameters_state, gnb)
2295 # hc = happy_curve_classification(features_vector_upsampled.T, parameters_state, gnb)
2296
2297 #
2298 # figure()
2299 # plot(hc)
2300 # figure()
2301
2302 print "[II] Trying Multinomial HMM"
2303
2304 # In order to do classification with HMMs, we need to:
2305 # 1. Split the parameters into classes
2306 # 2. Train one model per class
2307 # 3. Feed our data to all the models
2308 # 4. Check which has a better score and assig,n to M
2309 class SVMClassifier:
2310 def __init__(self,**kwargs):
2311 print "[II] SVM Classifier "
2312 # self.clf = svm.SVC(**kwargs)
2313 self.name = "SVM"
2314 self.smallname = "svm"
2315 self.clf = multiclass.OneVsOneClassifier(svm.SVC(**kwargs) )
2316
2317 def fit(self, X, classes):
2318 n_classes = max(unique(classes))+1
2319
2320 self.clf.fit(X,classes)
2321
2322 def predict(self, X):
2323 return self.clf.predict(X)
2324
2325 def getName(self):
2326 return self.name
2327
2328 def cross_validate(self, data, classes):
2329 print "[II] SVN Classifier Crossvalidating... "
2330 from copy import deepcopy
2331 estimator = deepcopy(self)
2332 estimator_fulldata = deepcopy(self)
2333 estimator_fulldata.fit(data, classes)
2334
2335 percents = arange(0.1,0.9,0.1)
2336 self.percents = percents * 100
2337
2338 RATIOS = []
2339 labels = estimator.predict(data)
2340
2341
2342 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels))
2343
2344 for p in percents:
2345 traindata,testdata,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=1)
2346
2347
2348 # print "[II] [%.2f-%.2f] Examples used for training: %s" % (p, (1-p), str(traindata))
2349 # print "[II] [%.2f-%.2f] Examples used for testing: %s" % (p, (1-p), str(testdata))
2350 # Training phase
2351
2352
2353
2354 estimator_ = deepcopy(estimator)
2355 estimator_.fit(traindata, trainlabels)
2356
2357 predicted_labels = estimator_.predict(testdata)
2358
2359 m = sum(array(predicted_labels==testlabels).astype(float))/len(testlabels)
2360
2361 print "[II] [%.2f-%.2f] 5-fold cross-validation for SVN Classifier: %.1f" % (p, (1-p), m)
2362
2363 RATIOS.append(m)
2364
2365 return RATIOS,percents
2366
2367
2368
2369
2370
2371
2372
2373 class HmmClassifier2:
2374 def __init__(self, N=2, chain_size=4, n_components = 1):
2375 print "[II] HMM Classifier (%d states, %d components)" % (N, n_components)
2376 self.name = "HMM (%d states, %d components)" % (N, n_components)
2377 self.n_components = n_components
2378 self.chain_size = chain_size
2379 self.hmms_ = []
2380 self.N = N
2381 self.n_states = N
2382
2383 def getName(self):
2384 return self.name
2385
2386 def fit(self, features, parameters):
2387 self.n_params = len(unique(parameters))
2388
2389
2390
2391 for n in range(0, self.n_params):
2392 hmm_ = hmm.GaussianHMM(n_components = self.n_components, covariance_type='full')
2393
2394 # Get training data for each parameter
2395
2396 for i in range(0, len(parameters)-self.chain_size):
2397 seq = features[i:i+self.chain_size, :]
2398 if parameters[i+self.chain_size-1] == n and seq.shape[0] == self.chain_size:
2399 # print "[II] Fitting: %s" % str(seq)
2400
2401 hmm_.fit([seq])
2402
2403 self.hmms_.append(hmm_)
2404
2405 def predict(self, X):
2406 labels = zeros((X.shape[0],))
2407 N = self.N
2408
2409 scores = zeros((self.n_states, ))
2410
2411 for i in range(0, len(labels)):
2412 testdata = X[i:i+self.chain_size,:]
2413
2414 for j in range(0, self.n_states):
2415 scores[j] = self.hmms_[j].score(testdata)
2416 labels[i] = argmax(scores)
2417
2418 return labels
2419
2420 def cross_validate(self, data, classes):
2421 print "[II] SVN Classifier Crossvalidating... "
2422 from copy import deepcopy
2423 estimator = deepcopy(self)
2424 estimator_fulldata = deepcopy(self)
2425 estimator_fulldata.fit(data, classes)
2426
2427 percents = arange(0.1,0.9,0.1)
2428 self.percents = percents * 100
2429
2430 RATIOS = []
2431 labels = estimator.predict(data)
2432
2433
2434 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels))
2435
2436 for p in percents:
2437 traindata,testdata,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=1)
2438
2439
2440 # print "[II] [%.2f-%.2f] Examples used for training: %s" % (p, (1-p), str(traindata))
2441 # print "[II] [%.2f-%.2f] Examples used for testing: %s" % (p, (1-p), str(testdata))
2442
2443 # Training phase
2444
2445
2446
2447 estimator_ = deepcopy(estimator)
2448 estimator_.fit(traindata, trainlabels)
2449
2450 predicted_labels = estimator_.predict(testdata)
2451
2452 m = sum(array(predicted_labels==testlabels).astype(float))/len(testlabels)
2453
2454 print "[II] [%.2f-%.2f] 5-fold cross-validation for HMM2 Classifier: %.1f" % (p, (1-p), m)
2455
2456 RATIOS.append(m)
2457
2458 return RATIOS,percents
2459
2460
2461
2462
2463 class HmmClassifier:
2464 def __init__(self, N=2, n_components = 1):
2465 print "[II] HMM Classifier (%d states, %d components)" % (N, n_components)
2466 self.name = "HMM (%d states, %d components)" % (N, n_components)
2467 self.n_components = n_components
2468 self.chain_size = N
2469 self.hmms_ = []
2470 self.N = N
2471
2472 def getName(self):
2473 return self.name
2474
2475 def fit(self, X, states):
2476 self.n_states = len(unique(states))
2477
2478 for n in range(0, self.n_states):
2479 hmm_ = hmm.GaussianHMM(n_components = self.n_components, covariance_type = 'full')
2480
2481 # Get training data for each class
2482 vector = X[states == n,:]
2483
2484 # Fit the HMM
2485 # print vector
2486 hmm_.fit([vector])
2487
2488 # And append to the list
2489 self.hmms_.append(hmm_)
2490
2491 def predict(self,X):
2492 labels = zeros((X.shape[0],))
2493 N = self.N
2494
2495 m = 0
2496
2497 scores = zeros((1, self.n_states))
2498
2499
2500 while m*N < X.shape[0]:
2501 if (m+1)*N > X.shape[0]:
2502 testdata = X[m*N:,:]
2503 else:
2504 testdata = X[m*N:(m+1)*N,:]
2505
2506 # print testdata
2507
2508 for i in range(0, self.n_states):
2509 scores[0,i] = self.hmms_[i].score(testdata)
2510
2511 if (m+1)*N > X.shape[0]:
2512 labels[m*N:] = argmax(scores)
2513 else:
2514 labels[m*N:(m+1)*N] = argmax(scores)
2515
2516 m+= 1
2517
2518 return labels
2519
2520 # def cross_validate(self, data, classes, index_vector):
2521 # print "[II] Crossvalidating... "
2522 # from copy import deepcopy
2523 # estimator = deepcopy(self)
2524 # estimator_fulldata = deepcopy(self)
2525 # estimator_fulldata.fit(data, classes)
2526 #
2527 # percents = arange(0.1,0.9,0.1)
2528 # self.percents = percents * 100
2529 # RATIOS = []
2530 # labels = estimator.predict(data)
2531 #
2532 # print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels))
2533 #
2534 # for p in percents:
2535 # train,test,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=1)
2536 # estimator_ = deepcopy(estimator)
2537 # estimator_.fit(train, trainlabels)
2538 # labels = estimator_.predict(test)
2539 # m = sum(array(testlabels==labels).astype(float))/len(labels)
2540 # RATIOS.append(m)
2541 # print "[II] for training(%.2f)-testing(%.2f): %.2f" % ((1-p),p,sum(array(testlabels==labels).astype(float))/len(labels))
2542 #
2543 # return RATIOS, percents
2544
2545 def cross_validate(self, data, classes):
2546 print "[II] SVN Classifier Crossvalidating... "
2547 from copy import deepcopy
2548 estimator = deepcopy(self)
2549 estimator_fulldata = deepcopy(self)
2550 estimator_fulldata.fit(data, classes)
2551
2552 percents = arange(0.1,0.9,0.1)
2553 self.percents = percents * 100
2554
2555 RATIOS = []
2556 labels = estimator.predict(data)
2557
2558
2559 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels))
2560
2561 for p in percents:
2562 traindata,testdata,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=1)
2563
2564
2565 # print "[II] [%.2f-%.2f] Examples used for training: %s" % (p, (1-p), str(traindata))
2566 # print "[II] [%.2f-%.2f] Examples used for testing: %s" % (p, (1-p), str(testdata))
2567 # Training phase
2568
2569
2570
2571 estimator_ = deepcopy(estimator)
2572 estimator_.fit(traindata, trainlabels)
2573
2574 predicted_labels = estimator_.predict(testdata)
2575 m = sum(array(predicted_labels==testlabels).astype(float))/len(testlabels)
2576
2577
2578 # testdata = None
2579 #
2580 # traindata = None
2581 # trainlabels = None
2582 # testlabels = None
2583 #
2584 # for t in train:
2585 # if traindata is None:
2586 # traindata = data[index_vector == t, :]
2587 # trainlabels = classes[index_vector == t]
2588 # else:
2589 # traindata = append(traindata, data[index_vector == t, :], 0)
2590 # trainlabels = append(trainlabels, classes[index_vector ==t])
2591 #
2592 # estimator_ = deepcopy(estimator)
2593 # estimator_.fit(traindata, trainlabels)
2594 #
2595 #
2596 # for t in test:
2597 # if testdata is None:
2598 # testdata = data[index_vector == t, :]
2599 # testlabels = classes[index_vector == t]
2600 # else:
2601 # testdata = append(testdata, data[index_vector == t,:], 0)
2602 # testlabels = append(testlabels, classes[index_vector == t])
2603
2604
2605
2606 # predicted_labels = estimator_.predict(testdata)
2607
2608 m = sum(array(predicted_labels==testlabels).astype(float))/len(testlabels)
2609
2610 print "[II] [%.2f-%.2f] 5-fold cross-validation for HMM Classifier: %.1f" % (p, (1-p), m)
2611
2612 RATIOS.append(m)
2613
2614 return RATIOS,percents
2615
2616 def cross_validate_full_k_fold(data, classes, list_of_classifiers, K=10):
2617 """ K-Folds Cross-validates all available classifiers, plots and saves barplots. """
2618
2619 ratios = []
2620 for l in list_of_classifiers:
2621 ratio = l.k_fold_cross_validate(data, classes, K)
2622 ratios.append(ratio)
2623
2624 L = len(ratios)
2625
2626 ind = arange(L)
2627
2628 fig, ax = subplots()
2629 rects = []
2630
2631
2632
2633 width = 10/float(len(ratios))
2634
2635 colors = ['r', 'y', 'g', 'c']
2636
2637 labels_ = []
2638
2639 for i in range(0, len(ratios)):
2640 rects.append(ax.bar(i*width,ratios[i],width,color=colors[i % len(colors)]))
2641 labels_.append(list_of_classifiers[i].getName())
2642
2643 return ratios
2644
2645
2646
2647 def cross_validate_full(seq, classes, list_of_classifiers):
2648 """ Cross-validates all available classifiers, plots and saves barplots. """
2649
2650 ratios = []
2651 percents = []
2652 for l in list_of_classifiers:
2653 ratios_, percents_ = l.cross_validate(seq, classes)
2654 ratios.append(ratios_)
2655 percents.append(percents_)
2656
2657
2658 L = len(ratios[0])
2659
2660 ind = arange(L)
2661
2662 fig,ax = subplots()
2663
2664 rects = []
2665
2666 W = 0.8
2667 width = W/float(len(ratios))
2668
2669 colors = ['r', 'y', 'g', 'c']
2670
2671 labels_ = []
2672 for i in range(0, len(ratios)):
2673 rects.append(ax.bar(ind+i*width,ratios[i],width,color=colors[i % len(colors)]))
2674 labels_.append(list_of_classifiers[i].getName())
2675
2676 ax.legend(tuple(labels_))
2677
2678 ax.set_xticks(ind+W/2)
2679 ax.set_xticklabels(tuple(percents[0]*100))
2680 ax.set_xlabel("Percentage % of test data")
2681 ax.set_ylabel("Success ratio")
2682
2683
2684
2685
2686
2687
2688 # rects1 = ax.bar(ind,ratios[0],0.35,color='r')
2689 # rects2 = ax.bar(ind+0.351,ratios[1],0.35,color='y')
2690
2691
2692
2693
2694 return ratios
2695
2696
2697
2698
2699
2700
2701
2702 # hmmc = HmmClassifier(N = len(unique(parameters_state)), n_components = 1)
2703 # hmmc.fit(features_vector_upsampled.T, parameters_state)
2704 #
2705 # hmmc2 = HmmClassifier(N = 12, n_components = 4)
2706 # hmmc2.fit(features_vector_upsampled.T, parameters_state)
2707 #
2708 svmc = SVMClassifier(probability=True)
2709 svmc.fit(features_vector_upsampled.T, parameters_state)
2710 # # svmc.cross_validate(features_vector_upsampled.T, parameters_state, index_vector)
2711 #
2712 nbc = NBClassifier()
2713 nbc.fit(features_vector_upsampled.T, parameters_state)
2714 #
2715 #
2716 #hchmmc = happy_curve_classification(features_vector_upsampled.T, parameters_state, hmmc, Nd=100)
2717 # # cross_validate_classification(features_vector_upsampled.T, parameters_state, hmmc)
2718 #
2719 # #hmmc.cross_validate(features_vector_upsampled.T, parameters_state)
2720 #
2721
2722 hmmc3 = HmmClassifier3(N=2, n_components = max(unique(parameters_state)))
2723 #print(parameters_state)
2724 plot(parameters_state)
2725
2726
2727 print(shape(features_vector_upsampled))
2728 obs = hmmc3.fit(features_vector_upsampled.T, parameters_state)
2729
2730
2731
2732 # svmhmm = HmmSVMClassifier()
2733 # svmhmm.fit(features_vector_upsampled.T, parameters_state)
2734 # ratios = cross_validate_full(features_vector_upsampled.T, parameters_state, [nbc, svmc])
2735 # ratios = cross_validate_full_k_fold(features_vector_upsampled.T, parameters_state,
2736 # [hmmc3])
2737
2738 def approximate_chain_size(data,parameters):
2739 ratios = []
2740
2741 # chainsizes = arange(1, 40)
2742 # for cs in chainsizes:
2743 # print "[II] Trying chain size: %d" % cs
2744 # hmmc = HmmClassifier3(N=cs, n_components = 2)
2745 # ratios.append(hmmc.k_fold_cross_validate(data,parameters,K=10))
2746
2747
2748 componentssize = arange(1, 12)
2749
2750 for cs in componentssize:
2751 print "[II] Trying component size: %d" % cs
2752 hmmc = HmmClassifier3(N=2, n_components = cs)
2753 ratios.append(hmmc.k_fold_cross_validate(data, parameters, K=2))
2754
2755
2756
2757
2758 figure()
2759 plot(componentssize, ratios)
2760 xlabel('Chain Size')
2761 ylabel('Success Ratio')
2762 title('10-Fold cross validation success ratio vs chain size')
2763
2764
2765 return ratios
2766
2767 # ratios = approximate_chain_size(features_vector_upsampled.T, parameters_state)
2768
2769 #predictedhmmc2 = hmmc2.predict(features_vector_upsampled.T)
2770
2771 # figure()
2772
2773
2774
2775
2776
2777 # hmms_ = []
2778 #
2779 # for n in range(0, len(parameter_state_alphabet)):
2780 # #hmm_ = hmm.GMMHMM(n_components = 1, n_mix = 2)
2781 # hmm_ = hmm.GaussianHMM(n_components = 1,covariance_type = 'full')
2782 #
2783 # # Get training data for each class
2784 # vector = features_vector_upsampled[:,parameters_state == n]
2785 #
2786 # #if vector.shape[1] < n_clusters:
2787 # # hmms_.append(None)
2788 # #else:
2789 #
2790 # hmm_.fit([vector.T])
2791 #
2792 # # Append to the list
2793 #
2794 # hmms_.append(hmm_)
2795 #
2796 # labels = zeros((features_vector_upsampled.shape[1],))
2797 #
2798 # N = 20
2799 # m = 0
2800 #
2801 # while m*N < features_vector_upsampled.shape[1]:
2802 #
2803 # scores = zeros((1, len(parameter_state_alphabet)))
2804 #
2805 # if (m+1)*N > features_vector_upsampled.shape[1]:
2806 # testdata = features_vector_upsampled[:,m*N:]
2807 # else:
2808 # testdata = features_vector_upsampled[:,m*N:(m+1)*N]
2809 #
2810 # for i in range(0, len(parameter_state_alphabet)):
2811 # if hmms_[i] is not None:
2812 # scores[0,i] = hmms_[i].score(testdata.T)
2813 # else:
2814 # scores[0,i] = -100000 # Very large negative score
2815 # if (m+1)*N >= features_vector_upsampled.shape[1]:
2816 # labels[m*N:] = argmax(scores)
2817 # else:
2818 # labels[m*N:(m+1)*N] = argmax(scores)
2819 #
2820 # m += 1
2821
2822
2823 # figure()
2824 #plot(labels.T)
2825
2826
2827 # labels = hmmc.predict(features_vector_upsampled.T)
2828 # estimated = states_to_vector(labels,parameter_state_alphabet_inv)
2829 # plot(estimated.T,'r--')
2830 #
2831 # title('Estimated parameter values')
2832 # legend(['Naive Bayes Classifier', 'HMM chain size %d (%.1fms)' % (N, float(N)/UpsamplingFactor*23.0)])
2833 #
2834 # ylabel('value')
2835 # xlabel('frame #')
2836 #
2837
2838 # close('all')
2839
2840 # plot(features_clustering_labels/float(max(features_clustering_labels)))
2841 # plot(parameters_vector_upsampled.T/max(ravel(parameters_vector_upsampled)))
2842
2843
2844 def plot_clusters(x, labels):
2845 figure()
2846 symbols = ['>', 'x', '.', '<','v']
2847 colors = ['b', 'r', 'g', 'm','c']
2848
2849 for r in range(0, x.shape[0]):
2850 scatter(x[r,0], x[r,1], c=colors[int(labels[r]) % len(colors)], marker=symbols[int(labels[r]) % len(symbols)])
2851
2852
2853 # plot_clusters(features_vector_upsampled.T, parameters_state)
2854 #
2855
2856 # SVM
2857
2858 class HmmClassifierSVN_Naive:
2859 def __init__(self, N=2, n_components = 1):
2860 self.n_components = n_components
2861 self.chain_size = N
2862 self.hmms_ = []
2863 self.N = N
2864
2865 def fit(self, X, states):
2866 self.n_states = len(unique(states))
2867
2868 for n in range(0, self.n_states):
2869 hmm_ = hmm.GaussianHMM(n_components = self.n_components, covariance_type = 'full')
2870
2871 # Get training data for each class
2872 vector = X[states == n,:]
2873
2874 # Fit the HMM
2875 # print vector
2876 hmm_.fit([vector])
2877
2878 # And append to the list
2879 self.hmms_.append(hmm_)
2880
2881 def predict(self,X):
2882 labels = zeros((X.shape[0],))
2883 N = self.N
2884
2885 m = 0
2886
2887 scores = zeros((1, self.n_states))
2888
2889
2890 while m*N < X.shape[0]:
2891 if (m+1)*N > X.shape[0]:
2892 testdata = X[m*N:,:]
2893 else:
2894 testdata = X[m*N:(m+1)*N,:]
2895
2896 # print testdata
2897
2898 for i in range(0, self.n_states):
2899 scores[0,i] = self.hmms_[i].score(testdata)
2900
2901 if (m+1)*N > X.shape[0]:
2902 labels[m*N:] = argmax(scores)
2903 else:
2904 labels[m*N:(m+1)*N] = argmax(scores)
2905
2906 m+= 1
2907
2908 return labels
2909
2910
2911
2912
2913 def plot_parameters_estimation(list_of_estimators):
2914 for e in list_of_estimators:
2915 name_ = e.getName()
2916
2917
2918
2919 fig = figure()
2920 fig.suptitle('Parameters estimation using %s' % name_, fontweight='bold')
2921 subplot(311)
2922 title ('original parameters')
2923 param_real = states_to_vector(parameters_state,parameter_state_alphabet_inv).T
2924 plot(param_real)
2925 xlabel('sample')
2926 ylabel('normalized parameter value')
2927 legend(parameter_captions)
2928 subplot(312)
2929 title('estimated parameters')
2930 pred = e.predict(features_vector_upsampled.T)
2931 param_est = states_to_vector(pred,parameter_state_alphabet_inv).T
2932 plot(param_est)
2933 xlabel('sample')
2934 ylabel('normalized parameter value')
2935 legend(parameter_captions)
2936 subplot(313)
2937 error_real_est = zeros((len(param_est),))
2938 for i in range(0, len(error_real_est)):
2939 error_real_est[i] = mse(param_real[i],param_est[i])
2940
2941 totalmse = sum(error_real_est)
2942 title('mean squared error (total: %.3f)' % totalmse)
2943
2944 plot(error_real_est)
2945 xlabel('sample')
2946 ylabel('normalized parameter value')
2947
2948
2949
2950 # hmm2 = hmm.MultinomialHMM(n_components = n_classes, startprob = prior_p_q, transmat = transmat )
2951
2952
2953
2954 # TODO: Embed SVM emissions to HMM. Try to do pick maximum likelihood sequence from
2955 # classifiers like in HmmClassifier3. For every class, train an SVM, with n_components as
2956 # states, and compute the log-likelihood for prediction. Should work.
2957
2958
2959
2960 # class HMMsvmClassifier:
2961 # def __init__(self, N=6):
2962 # print "[II] HMM-SVM Classifier (%d time steps)" % N
2963 # self.name = "HMM-SVM (%d time steps)" % N
2964 # # self.n_components = n_components
2965 # self.chain_size = N
2966 # self.svm = MyAVAClassifier()
2967 #
2968 #
2969 # def getName(self):
2970 # return self.name
2971 #
2972 # def fit(self, features, parameters):
2973 #
2974 # # First train emission svm
2975 #
2976 # self.svm.fit(features, parameters)
2977 #
2978 # # print parameters
2979 # n_classes = max(unique(parameters)) + 1
2980 #
2981 # print "[II] Number of classes: ", n_classes
2982 # hmms = [None]*n_classes
2983 # chain_size = self.chain_size
2984 # params = [None]*n_classes
2985 # obs = [None]*n_classes
2986 #
2987 # for i in range(chain_size, len(parameters)):
2988 # class_ = parameters[i-1]
2989 # params_ = parameters[i-chain_size:i]
2990 # feats_ =features[i-chain_size:i,:]
2991 #
2992 # if params[class_] is None:
2993 # params[class_] = [params_]
2994 # obs[class_] = [feats_]
2995 # else:
2996 # params[class_].append(params_)
2997 # obs[class_].append(feats_)
2998 #
2999 #
3000 #
3001 # for i in range(0, len(params)):
3002 # if params[i] is not None and len(params[i]) != 0:
3003 # hmm_ = HMMsvm(self.svm)
3004 # # print "[II] %d Fitting params: " % i, params[i]
3005 # print "[!!] OBSERVATIONS: "
3006 # print obs[i]
3007 # hmm_.fit(obs[i], params[i])
3008 #
3009 # print "[!!] PARAMETERS: "
3010 # print params[i]
3011 #
3012 # # TODO: Left here on 06/07 19:56
3013 #
3014 # # hmm_.fit(features,)
3015 # # print "obs: ", obs[i]
3016 # # print "params: ", params[i]
3017 # # hmm_.fit(obs[i], params[i],flr=1e-9)
3018 # hmms[i] = hmm_
3019 #
3020 # self.hmms = hmms
3021 #
3022 # return obs
3023 # def predict(self, features, mfilt=20):
3024 # chain_size = self.chain_size
3025 # hmms = self.hmms
3026 # predicted_classes = zeros((features.shape[0],))
3027 # for i in range(chain_size, features.shape[0]):
3028 # scores = zeros((len(hmms),))
3029 #
3030 # seq = features[i-chain_size:i, :]
3031 #
3032 # for j in range(0, len(hmms)):
3033 # if hmms[j] is not None:
3034 # q,p = hmms[j].logviterbi(seq)
3035 #
3036 # scores[j] = logsumexp(p)
3037 # else:
3038 # scores[j] = -infty
3039 #
3040 # predicted_classes[i] = argmax(scores)
3041 #
3042 # # Do a median filter at the end
3043 #
3044 # # predicted_classes = median_filter(predicted_classes,mfilt)
3045 #
3046 # return predicted_classes
3047
3048 class HmmSvmClassifier3:
3049 def __init__(self, N=2,n_components = 1):
3050 print "[II] HMM/SVM Classifier (%d states, %d components)" % (N, n_components)
3051 self.name = "HMM (%d time steps, %d components)" % (N, n_components)
3052 self.n_components = n_components
3053 self.smallname = "hmmsvmc"
3054 self.chain_size = N
3055
3056 def getName(self):
3057 return self.name
3058
3059 def fit(self, features, parameters):
3060
3061
3062
3063 # print parameters
3064 n_classes = max(unique(parameters)) + 1
3065
3066 if n_classes == 1:
3067 self.only_one_class = True
3068 return
3069 else:
3070 self.only_one_class = False
3071
3072 print "[II] Number of classes: ", n_classes
3073 hmms = [None]*n_classes
3074 # svms = [None]*n_classes
3075 chain_size = self.chain_size
3076 obs = [None]*n_classes
3077 cls = [None]*n_classes
3078
3079 for i in range(chain_size, len(parameters)):
3080 class_ = parameters[i-1]
3081 seq = features[i-chain_size:i,:]
3082 params = parameters[i-chain_size:i]
3083
3084 if obs[class_] is None:
3085 obs[class_] = [seq]
3086 cls[class_] = [params]
3087 else:
3088 obs[class_].append(seq)
3089 cls[class_].append(params)
3090
3091
3092
3093 for i in range(0, len(obs)):
3094 if obs[i] is not None and len(obs[i]) != 0:
3095 # hmm_ = hmm.GaussianHMM(n_components=self.n_components, covariance_type='full')
3096 svm_ = MyAVAClassifier()
3097 print obs[i]
3098 print cls[i]
3099
3100 O = obs[i][0]
3101 C = cls[i][0]
3102
3103 for j in range(0, len(obs[i])):
3104 O = append(O, obs[i][j], 1)
3105 C = append(C, cls[i][j], 1)
3106
3107 print O
3108 print C
3109
3110 svm_.fit(O.T,C)
3111 hmm_ = HMMsvm(svm_)
3112 hmm_.fit(obs[i],cls[i])
3113 hmms[i] = hmm_
3114
3115 self.hmms = hmms
3116
3117 return obs
3118
3119 def predict(self, features, mfilt=20):
3120
3121 if self.only_one_class == True:
3122 return zeros((features.shape[0], ))
3123
3124 chain_size = self.chain_size
3125 hmms = self.hmms
3126 predicted_classes = zeros((features.shape[0],))
3127
3128
3129 for i in range(chain_size, features.shape[0]):
3130 scores = zeros((len(hmms),))
3131
3132 seq = features[i-chain_size:i, :]
3133
3134 for j in range(0, len(hmms)):
3135 if hmms[j] is not None:
3136 print hmms[j].score(seq)
3137 scores[j] = hmms[j].score(seq)[j]
3138 else:
3139 scores[j] = -infty
3140
3141 predicted_classes[i] = argmax(scores)
3142
3143 # Do a median filter at the end
3144
3145 # predicted_classes = median_filter(predicted_classes,mfilt)
3146
3147 return predicted_classes
3148
3149
3150
3151
3152
3153
3154 # newstates = self.logviterbi()
3155
3156 #
3157 #
3158 # # Alphas and Betas
3159 # X = features
3160 # alphas = zeros((T,n_classes))
3161 # betas = zeros((T,n_classes))
3162 #
3163 # forward_messages = zeros((T,n_classes))
3164 # backward_messages = zeros((T, n_classes))
3165 #
3166 # print "[II] Computing alpha, beta..."
3167 # for t in range(1, T+1):
3168 # forward_messages[t-1,:] = self._forward_message(forward_messages[t-2,:],X[0:t,])
3169 # backward_messages[-t,:] = self._backward_message(backward_messages[-t+1], X[-t:,:])
3170 #
3171 #
3172 # print "[II] Computing ksu..."
3173 #
3174 # a_norm = forward_messages[-1,n_classes-1] # alpha_T
3175 #
3176 # ksu = zeros((T, n_classes, n_classes))
3177 #
3178 # for i in range(0, n_classes):
3179 # for j in range(0, n_classes):
3180 # for t in range(0, T-1):
3181 # ksu[t,i,j] = exp(forward_messages[t, i] + log(transmat[i,j]) + log(self.estimate_emission_probability(X[t,:],j)) + backward_messages[t+1,j] - a_norm)
3182 #
3183 #
3184 #
3185 # self.alphas = forward_messages
3186 # self.betas = backward_messages
3187 # self.ksu = ksu
3188 #
3189 # transmat_new = zeros((n_classes,n_classes))
3190 #
3191 # for i in range(0, n_classes):
3192 #
3193 # for j in range(0, n_classes):
3194 # transmat_new[i,j] = sum(ksu[:,i,j])/sum(ksu[:,i,:])
3195 #
3196 # self.transmat_new = transmat_new
3197 #
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209 #
3210 # def forward(self, X):
3211 # # Compute log likelihood using the forward algorithm
3212 #
3213 # # Number of states
3214 # N = self.n_classes
3215 #
3216 # # Number of Observations
3217 # T = X.shape[0]
3218 #
3219 #
3220 # transmat = self.transmat
3221 #
3222 # # Initialization
3223 # # a1 = ones((N,))/N
3224 # a1 = self.prior
3225 #
3226 # alpha = zeros((N,))
3227 # for i in range(0, N):
3228 # alpha[i] = log(a1[i])+log(self.estimate_emission_probability(X[0,:], i))
3229 # # print alpha
3230 # # Recursion
3231 # for t in range(0, T):
3232 # alpha_old = alpha.copy()
3233 # x = X[t, :]
3234 # for j in range(0, N):
3235 # tosum = zeros((N,))
3236 # for i in range(0, N):
3237 # tosum[i] = alpha_old[i]+log(transmat[i,j])
3238 #
3239 # # Sum = logsum(tosum)
3240 # Sum = logsumexp(tosum)
3241 # bj = self.estimate_emission_probability(x, j)
3242 #
3243 # alpha[j] = Sum+log(bj)
3244 # # print alpha
3245 #
3246 # tosum = zeros((N,))
3247 # for i in range(0, N):
3248 # tosum[i] = alpha[i] + log(transmat[i,N-1])
3249 #
3250 # return tosum
3251 #
3252 # def backward(self, X):
3253 # # Number of states
3254 # N = self.n_classes
3255 #
3256 # # Number of Observations
3257 # T = X.shape[0]
3258 #
3259 # transmat = self.transmat
3260 #
3261 # # Initialization
3262 #
3263 # b1 = ones((N,))/N
3264 #
3265 # beta = zeros((N,))
3266 # for i in range(0, N):
3267 # beta[i] = log(b1[i])+log(self.estimate_emission_probability(X[-1,:], i))
3268 #
3269 # for t in range(0, T):
3270 # beta_old = beta.copy()
3271 # x = X[-t, :]
3272 # for j in range(0, N):
3273 # tosum = zeros((N, ))
3274 # for i in range(0, N):
3275 # tosum[i] = beta_old[i]+log(transmat[i,j])
3276 #
3277 # Sum = logsumexp(tosum)
3278 # bj = self.estimate_emission_probability(x, j)
3279 # beta[j] = Sum+log(bj)
3280 #
3281 # tosum = zeros((N,))
3282 #
3283 # for i in range(0, N):
3284 # tosum[i] = beta[i] + log(transmat[i,0])
3285 #
3286 # #3 p = logsumexp(tosum)
3287 #
3288 # return tosum
3289
3290
3291 def _log_likelihood(self, X):
3292
3293 return logsumexp(self.forward(X))
3294
3295
3296
3297
3298
3299 def _likelihood(self, X):
3300 # Number of states
3301 N = self.n_classes
3302
3303 # Number of Observations
3304 T = X.shape[0]
3305
3306
3307 transmat = self.transmat
3308
3309 # Initialization
3310 # a1 = ones((N,))/N
3311 a1 = self.prior
3312
3313 alpha = zeros((N,))
3314 for i in range(0, N):
3315 alpha[i] = a1[i]*self.estimate_emission_probability(X[0,:], i)
3316
3317 # Recursion
3318 print alpha
3319 for t in range(1, T):
3320 alpha_old = alpha.copy()
3321 x = X[t, :]
3322 for j in range(0, N):
3323 Sum = 0
3324 for i in range(0, N):
3325 Sum += alpha_old[i]*transmat[i,j]
3326
3327 alpha[j] = Sum*self.estimate_emission_probability(x, j)
3328 print alpha
3329
3330
3331 # Termination
3332
3333 Sum = 0
3334 for i in range(0, N):
3335 Sum += alpha[i]*transmat[i, N-1]
3336
3337 p = Sum
3338
3339 return p
3340
3341
3342
3343
3344
3345
3346
3347 # def fit(self, X, states):
3348 # # print parameters
3349 # n_classes = max(unique(states)) + 1
3350 #
3351 # # svms = [None]*n_classes
3352 # obs = [None]*n_classes
3353 # sta = [None]*n_classes
3354 # chain_size = self.chain_size
3355 #
3356 # #22 obs = None
3357 # # sta = None
3358 #
3359 # print "[II] Number of classes: ", n_classes
3360 #
3361 # # For each class:
3362 # # Concatenate examples
3363 # # Fit SVM
3364 #
3365 # for i in range(chain_size, len(states)):
3366 # class_ = states[i-1]
3367 # seq = X[i-chain_size:i, :]
3368 # states_ = states[i-chain_size:i]
3369 #
3370 #
3371 # if obs[class_] is None:
3372 # obs[class_] = [seq]
3373 # sta[class_] = [states_]
3374 # self.svms.append(MyAVAClassifier())
3375 # else:
3376 # obs[class_].append(seq)
3377 # sta[class_].append(states_)
3378 #
3379 #
3380 # for i in range(0, len(obs)):
3381 #
3382 # o = None
3383 # s = None
3384 #
3385 # for j in range(0, len(obs[i])):
3386 # if o is None:
3387 # o = obs[i][j]
3388 # s = sta[i][j]
3389 #
3390 # else:
3391 # o = append(o, obs[i][j],0)
3392 # s = append(s, sta[i][j])
3393 #
3394 #
3395 # self.svms[i].fit(o, s)
3396
3397 # def predict(self, features):
3398 # chain_size = self.chain_size
3399 # svms = self.svms
3400 #
3401 # predicted_classes = zeros((features.shape[0],))
3402 # for i in range(chain_size, features.shape[0]):
3403 # scores = zeros((len(svms)))
3404 #
3405 # seq = features[i-chain_size:i, :]
3406 #
3407 # for j in range(0, len(svms)):
3408 # if svms[j] is not None:
3409 # scores[j] = svms[j].compute_log_likelihood(seq)
3410 # else:
3411 # scores[k] = -infty
3412 # predicted_classes[i] = argmax(scores)
3413 #
3414 # return predicted_classes
3415
3416
3417
3418
3419
3420 # Marginalize over the latent variable
3421 # for i in range(0, X.shape[0]):
3422 # P = zeros((self.n_classes,))
3423 # x = X[i,:]
3424 # for j in range(0, len(P)):
3425 # P[j] = self.estimate_emission_probability(j, x)
3426 #
3427 # S[i] = log(sum(P[j]))
3428 #
3429 # return sum(S)
3430
3431
3432
3433
3434 # Continue from here
3435 X = features_vector_upsampled.T
3436 y = parameters_state
3437
3438 # clf = svm.SVC()
3439 # clf.fit(X,y)
3440
3441
3442 # parameters_state_y = clf.predict(X)
3443
3444
3445 predhmmc3 = hmmc3.predict(features_vector_upsampled.T)
3446
3447 myava = MyAVAClassifier()
3448 myava.fit(features_vector_upsampled.T, parameters_state)
3449
3450 predmyava = myava.predict(features_vector_upsampled.T)
3451
3452 # hmmsvmc = HMMsvmClassifier(N=2)
3453 # hmmsvmc.fit(features_vector_upsampled.T, parameters_state)
3454 #predhmmsvmc = hmmsvmc.predict(features_vector_upsampled.T)
3455
3456 obsc = MyAVAClassifier()
3457 obsc.fit(features_vector_upsampled.T, parameters_state)
3458 # hmm2 = hmm.GaussianHMM(n_components=6)
3459
3460 # hmm2.fit([features_vector_upsampled.T])
3461 # hmm3 = HMMsvm(obsc)
3462 # hmm3.fit([features_vector_upsampled.T],[parameters_state])
3463 # predhmmsvm = hmm3.predict(features_vector_upsampled.T)
3464
3465 # hmmsvmc = HMMsvmClassifier()
3466
3467 hmmsvmc = HMMsvmClassifier()
3468 # hmmsvmc = HmmSvmClassifier3()
3469 hmmsvmc.fit(features_vector_upsampled.T, parameters_state)
3470 predhmmsvm = hmmsvmc.predict(features_vector_upsampled.T)
3471 plot_parameters_estimation([nbc, myava, hmmc3, hmmsvmc])
3472
3473
3474 #plot(parameters)
3475 figure()
3476 plot(parameters_state,'b')
3477
3478 # plot(parameters_state_y,'r--')
3479 plot(predhmmc3,'g+-')
3480
3481 plot(predhmmsvm, 'co-')
3482
3483 # plot(predhmmsvmc, 'bo-')
3484
3485 legend(['Real', 'SVM', 'HMM','HMM-SVM'])
3486
3487 xlabel('sample')
3488 ylabel('state')
3489 # TODO, HMM with SVN, Cross-validation
3490
3491 # Using hidden markov svm
3492
3493 svmhmm = ""
3494
3495 # print "[II] Success ratio for SVM-RBF Classifier: %.2f" % (sum(parameters_state==parameters_state_y).astype(float)/float(len(parameters_state)))
3496
3497 print "[II] All-vs-All custom Support Vector Classifier Success Ratio: %.2f " %(sum(predmyava==parameters_state).astype(float)/len(parameters_state))
3498 print "[II] Success ratio for HMM: %.2f" % (sum(parameters_state==predhmmc3).astype(float)/float(len(predhmmc3)))
3499 print "[II] Success ratio for HMM-SVM: %.2f" % (sum(parameters_state==predhmmsvm).astype(float)/float(len(predhmmsvm)))
3500
3501
3502
3503
3504
3505 model_ghmm = ReverbModel("GaussianHMM Classifier Model", kernel, q, features_to_keep, hmmc3, parameter_dictionary=parameter_state_alphabet_inv, moments_vector=moments_vector)
3506 model_gnb = ReverbModel("Naive Bayes Classifier Model", kernel, q, features_to_keep, nbc, parameter_dictionary=parameter_state_alphabet_inv, moments_vector=moments_vector )
3507 model_svm = ReverbModel("AVA LinearSVM Classifier Model", kernel, q, features_to_keep, myava, parameter_dictionary=parameter_state_alphabet_inv, moments_vector=moments_vector )
3508 model_hmmsvm = ReverbModel("HMM0SVM Classifier Model", kernel, q, features_to_keep, hmmsvmc, parameter_dictionary=parameter_state_alphabet_inv, moments_vector=moments_vector )
3509
3510 model_ghmm.save("model_ghmm.dat")
3511 model_gnb.save("model_gnb.dat")
3512 model_svm.save("model_svm.dat")
3513 model_hmmsvm.save("model_hmmsvm.dat")
3514
3515
3516
3517 def split_data_to_k_samples(data, params, K):
3518 L = shape(data)[0]
3519 M = int(round(L/float(K)))
3520
3521 samples_data = []
3522 samples_parameters = []
3523 for k in range(K):
3524 if (k+1)*M > L:
3525 samples_data.append(data[k*M:,:])
3526 samples_parameters.append(params[k*M:])
3527 else:
3528 samples_data.append(data[k*M:(k+1)*M,:])
3529 samples_parameters.append(params[k*M:(k+1)*M])
3530
3531 return samples_data, samples_parameters
3532
3533 def join_data(data, params):
3534 L = 0
3535 for p in params:
3536 L += len(p)
3537
3538 new_data = zeros((L, data[0].shape[1]))
3539 new_params = zeros((L,))
3540
3541 idx = 0
3542 for n in range(0,len(data)):
3543 new_data[idx:idx+data[n].shape[0],:] = data[n]
3544 new_params[idx:idx+len(params[n])] = params[n]
3545 idx += len(params[n])
3546
3547 return new_data, new_params
3548
3549 cross_validation_ratios = {}
3550 cross_validation_mses = {}
3551
3552
3553 features_samples, parameters_samples = split_data_to_k_samples(features_vector_upsampled.T, parameters_state, 5)
3554
3555 from sklearn.cross_validation import KFold
3556
3557 from sklearn import metrics
3558
3559
3560
3561 KF = 10
3562
3563 s1,s2 = split_data_to_k_samples(features_vector_upsampled.T, parameters_state, KF)
3564 r1,r2 = join_data(s1,s2)
3565
3566 kf = KFold(KF, n_folds=KF) # Leave one out
3567
3568 trial = 1.0
3569
3570 rhmmc = 0
3571 rgnbc = 0
3572 ravac = 0
3573 rhmmsvmc = 0
3574
3575
3576 msegbnc = 0
3577 msehmmc = 0
3578 mseavac = 0
3579 msehmmsvmc = 0
3580
3581 fullratios = []
3582 fullmses = []
3583
3584
3585 def calculate_mse(pred, real):
3586 realparams = states_to_vector(real, parameter_state_alphabet_inv)
3587 estparams = states_to_vector(pred, parameter_state_alphabet_inv)
3588 return mse(realparams, estparams)
3589 ratios_gnb = []
3590 ratios_hmm = []
3591 ratios_svm = []
3592 ratios_hmmsvm = []
3593 ratios_sinkhole = []
3594
3595 mse_gnb = []
3596 mse_hmm = []
3597 mse_svm = []
3598 mse_hmmsvm = []
3599 mse_sinkhole = []
3600
3601
3602 for train, test in kf:
3603
3604 train_idx = 100
3605
3606 print train,test
3607 f1 = []
3608 p1 = []
3609
3610 tf1 = []
3611 tp1 = []
3612
3613 for i in train:
3614 f1.append(s1[i-1])
3615 p1.append(s2[i-1])
3616
3617 for i in test:
3618 tf1.append(s1[i-1])
3619 tp1.append(s2[i-1])
3620
3621 new_trainset_data, new_trainset_params = join_data(f1, p1)
3622 new_trainset_params = new_trainset_params.astype(int)
3623
3624
3625 # Detect and remove outliers
3626
3627 # ee = EllipticEnvelope()
3628
3629 # ee.fit(new_trainset_data, new_trainset_params)
3630
3631
3632 # inliers = ee.predict(new_trainset_data) == 1
3633
3634 # new_trainset_data = new_trainset_data[inliers, :]
3635 # new_trainset_params = new_trainset_params[inliers]
3636
3637
3638 new_testset_data, new_testset_params = join_data(tf1, tp1)
3639 new_testset_params = new_testset_params.astype(int)
3640
3641
3642
3643 param_est = states_to_vector(new_testset_params,parameter_state_alphabet_inv).T
3644
3645
3646
3647
3648
3649
3650
3651 X = arange(50, len(new_trainset_params), 50)
3652 print X
3653 # X = append(X, [len(new_trainset_params)], 1)
3654
3655
3656 #
3657 for M in X:
3658 # print CHAINSIZE
3659 # print NCOMPONENTS
3660 hmmc = HmmClassifier3(N=CHAINSIZE, n_components = NCOMPONENTS)
3661 gnbc = GaussianNB()
3662 avac = MyAVAClassifier()
3663 hmmsvmc = HMMsvmClassifier()
3664
3665 gnbc.fit(new_trainset_data[0:M,:], new_trainset_params[0:M])
3666 pgnbc = gnbc.predict(new_testset_data)
3667 rgnbc = metrics.accuracy_score(pgnbc, new_testset_params)
3668 mgnbc = calculate_mse(pgnbc,new_testset_params)
3669
3670 #
3671 hmmc.fit(new_trainset_data[0:M,:], new_trainset_params[0:M])
3672 phmmc = hmmc.predict(new_testset_data)
3673 rhmmc = metrics.accuracy_score(phmmc, new_testset_params)
3674 mhmmc = calculate_mse(phmmc,new_testset_params)
3675
3676 #
3677 try:
3678 avac.fit(new_trainset_data[0:M,:], new_trainset_params[0:M])
3679 pavac = avac.predict(new_testset_data)
3680 ravac = metrics.accuracy_score(pavac, new_testset_params)
3681 mavac = calculate_mse(pavac,new_testset_params)
3682 except:
3683 ravac = 0
3684 mavac = inf
3685 #
3686 #
3687 #
3688 #
3689 #
3690 try:
3691 hmmsvmc.fit(new_trainset_data[0:M,:], new_trainset_params[0:M])
3692 phmmsvm = hmmsvmc.predict(new_testset_data)
3693 rhmmsvm = metrics.accuracy_score(phmmsvm, new_testset_params)
3694 mhmmsvm = calculate_mse(phmmsvm,new_testset_params)
3695 #
3696 except:
3697 rhmmsvm = 0
3698 mavac = inf
3699 #
3700 # ratios_gnb.append(rgnbc)
3701 # ratios_hmm.append(rhmmc)
3702 # ratios_svm.append(ravac)
3703 # ratios_hmmsvm.append(rhmmsvm)
3704 #
3705 # mse_gnb.append(mgnbc)
3706 # mse_hmm.append(mhmmc)
3707 # mse_svm.append(mavac)
3708 # mse_hmmsvm.append(mhmmsvm)
3709 #
3710 # fullratios.append((trial,X,ratios_gnb,ratios_hmm,ratios_svm,ratios_hmmsvm))
3711 # fullmses.append((trial,X,mse_gnb,mse_hmm,mse_svm,mse_hmmsvm))
3712
3713 #
3714 #
3715 #
3716 #
3717
3718
3719
3720
3721
3722
3723 # hmmc = HmmClassifier3(N=3, n_components = max(unique(parameters_state))+1)
3724 hmmc = HmmClassifier3(N=CHAINSIZE, n_components = NCOMPONENTS)
3725 gnbc = GaussianNB()
3726 avac = MyAVAClassifier()
3727 hmmsvmc = HMMsvmClassifier()
3728 sinkholec = SinkHoleClassifier()
3729
3730 hmmc.fit(new_trainset_data, new_trainset_params)
3731 gnbc.fit(new_trainset_data, new_trainset_params)
3732 # try:
3733 avac.fit(new_trainset_data, new_trainset_params)
3734 sinkholec.fit(new_trainset_data, new_trainset_params)
3735 pavac = avac.predict(new_testset_data)
3736
3737
3738 # except:
3739 # pavac = ones((new_testset_data.shape[0],))*new_trainset_params[0]
3740
3741
3742 # try:
3743 hmmsvmc.fit(new_trainset_data, new_trainset_params)
3744 phmmsvm = hmmsvmc.predict(new_testset_data)
3745 # except:
3746 #
3747 # # phmmsvm = hmmsvmc.predict(new_testset_data)
3748 # phmmsvm = ones((new_testset_data.shape[0],))*new_trainset_params[0]
3749
3750
3751 phmmc = hmmc.predict(new_testset_data)
3752 pgnbc = gnbc.predict(new_testset_data)
3753 psinkhole = sinkholec.predict(new_testset_data)
3754
3755 print "[II] (Trial %d) Classification ratio for GNB: %.2f" % (trial, metrics.accuracy_score(pgnbc, new_testset_params))
3756 print "[II] (Trial %d) Classification ratio for SVM: %.2f" % (trial, metrics.accuracy_score(pavac, new_testset_params))
3757 print "[II] (Trial %d) Classification ratio for HMM: %.2f" % (trial, metrics.accuracy_score(phmmc, new_testset_params))
3758 print "[II] (Trial %d) Classification ratio for HMM/SVM: %.2f" % (trial, metrics.accuracy_score(phmmsvm, new_testset_params))
3759 print "[II] (Trial %d) Classification ratio for Sinkhole Approach: %.2f" % (trial, metrics.accuracy_score(psinkhole, new_testset_params))
3760
3761
3762 mse
3763
3764 # rgnbc = rhmmc*(trial-1)/trial + metrics.accuracy_score(pgnbc, new_testset_params)/trial
3765 # ravac = rhmmc*(trial-1)/trial + metrics.accuracy_score(pavac, new_testset_params)/trial
3766 # rhmmc = rhmmc*(trial-1)/trial + metrics.accuracy_score(phmmc, new_testset_params)/trial
3767 # rhmmsvmc = rhmmc*(trial-1)/trial + metrics.accuracy_score(phmmsvm, new_testset_params)/trial
3768
3769 rgnbc = metrics.accuracy_score(pgnbc, new_testset_params)
3770 ravac = metrics.accuracy_score(pavac, new_testset_params)
3771 rhmmc = metrics.accuracy_score(phmmc, new_testset_params)
3772 rhmmsvmc = metrics.accuracy_score(phmmsvm, new_testset_params)
3773 rsinkhole = metrics.accuracy_score(psinkhole, new_testset_params)
3774
3775 ratios_gnb.append(rgnbc)
3776 ratios_svm.append(ravac)
3777 ratios_hmm.append(rhmmc)
3778 ratios_hmmsvm.append(rhmmsvmc)
3779 ratios_sinkhole.append(rsinkhole)
3780
3781
3782
3783
3784
3785 # msegbnc = msegbnc*(trial-1)/trial + calculate_mse(pgnbc,new_testset_params)/trial
3786 # msehmmc = msehmmc*(trial-1)/trial + calculate_mse(phmmc,new_testset_params)/trial
3787 # mseavac = mseavac*(trial-1)/trial + calculate_mse(pavac,new_testset_params)/trial
3788 # msehmmsvmc = msehmmsvmc*(trial-1)/trial + calculate_mse(phmmsvm,new_testset_params)/trial
3789
3790 msegnb = calculate_mse(pgnbc,new_testset_params)
3791 msesvm = calculate_mse(pavac,new_testset_params)
3792 msehmm = calculate_mse(phmmc,new_testset_params)
3793 msehmmsvm = calculate_mse(phmmsvm,new_testset_params)
3794 msesinkhole = calculate_mse(psinkhole,new_testset_params)
3795
3796 mse_gnb.append(msegnb)
3797 mse_svm.append(msesvm)
3798 mse_hmm.append(msehmm)
3799 mse_hmmsvm.append(msehmmsvm)
3800 mse_sinkhole.append(msesinkhole)
3801
3802
3803
3804
3805
3806
3807 trial += 1
3808
3809 print ratios_gnb
3810
3811
3812 sucrats = [mean(ratios_gnb),mean(ratios_svm), mean(ratios_hmm),mean(ratios_hmmsvm), mean(ratios_sinkhole)]
3813 sucratsstd = [std(ratios_gnb,ddof=1),std(ratios_svm,ddof=1), std(ratios_hmm,ddof=1),std(ratios_hmmsvm,ddof=1), std(ratios_sinkhole,ddof=1)]
3814 mserats = [mean(mse_gnb),mean(mse_svm), mean(mse_hmm),mean(mse_hmmsvm), mean(mse_sinkhole)]
3815 #mserats = [msegbnc, mseavac, msehmmc,msehmmsvmc]
3816
3817 print tuple(sucrats)
3818 # print tuple(sucratsstd)
3819
3820
3821 # scores = zeros((len(sucrats), len(sucrats)))
3822 # for i in range(0, len(sucrats)):
3823 # for j in range(0, len(sucrats)):
3824 # scores[i,j] = (sucrats[i] - sucrats[j])/sqrt(sucratsstd[i]**2 + sucratsstd[j]**2)
3825 print tuple(mserats)
3826 # print scores
3827
3828
3829
3830 # print (msegbnc, mseavac, msehmmc,msehmmsvmc )
3831
3832 sample_sizes = X/float(max(X))
3833 gnbc_ratios_curves = zeros((len(X),len(fullratios)))
3834 gnbc_mses_curves = zeros((len(X),len(fullratios)))
3835 #
3836 avac_ratios_curves = zeros((len(X),len(fullratios)))
3837 avac_mses_curves = zeros((len(X),len(fullratios)))
3838 #
3839 hmmc_ratios_curves = zeros((len(X),len(fullratios)))
3840 hmmc_mses_curves = zeros((len(X),len(fullratios)))
3841 #
3842 hmmsvmc_ratios_curves = zeros((len(X),len(fullratios)))
3843 hmmsvmc_mses_curves = zeros((len(X),len(fullratios)))
3844 #
3845 #
3846 for i in range(0, len(fullratios)):
3847 for j in range(0, len(X)):
3848 gnbc_ratios_curves[j,i] = fullratios[i][2][j]
3849 gnbc_mses_curves[j,i] = fullmses[i][2][j]
3850
3851 avac_ratios_curves[j,i] = fullratios[i][3][j]
3852 avac_mses_curves[j,i] = fullmses[i][3][j]
3853
3854 hmmc_ratios_curves[j,i] = fullratios[i][4][j]
3855 hmmc_mses_curves[j,i] = fullmses[i][4][j]
3856
3857 hmmsvmc_ratios_curves[j,i] = fullratios[i][5][j]
3858 hmmsvmc_mses_curves[j,i] = fullmses[i][5][j]
3859 #
3860 #
3861 #
3862 #
3863 #
3864 gnbc_mses_curve = mean(gnbc_mses_curves,1)
3865 avac_mses_curve = mean(avac_mses_curves,1)
3866 hmmc_mses_curve = mean(hmmc_mses_curves,1)
3867 hmmsvmc_mses_curve = mean(hmmsvmc_mses_curves,1)
3868 #
3869 gnbc_ratios_curve = mean(gnbc_ratios_curves,1)
3870 avac_ratios_curve = mean(avac_ratios_curves,1)
3871 hmmc_ratios_curve = mean(hmmc_ratios_curves,1)
3872 hmmsvmc_ratios_curve = mean(hmmsvmc_ratios_curves,1)
3873 #
3874 figure()
3875 subplot(211)
3876 plot(sample_sizes,gnbc_ratios_curve)
3877 plot(sample_sizes,avac_ratios_curve)
3878 plot(sample_sizes,hmmc_ratios_curve)
3879 plot(sample_sizes,hmmsvmc_ratios_curve)
3880 xlabel('Part of training set used')
3881 ylabel('Accuracy')
3882 legend(['GNB','SVM','HMM','HMM/SVM'])
3883 #
3884 subplot(212)
3885 plot(sample_sizes,gnbc_mses_curve)
3886 plot(sample_sizes,avac_mses_curve)
3887 plot(sample_sizes,hmmc_mses_curve)
3888 plot(sample_sizes,hmmsvmc_mses_curve)
3889 xlabel('Part of training set used')
3890 ylabel('Mean Squared Error')
3891 #
3892 legend(['GNB','SVM','HMM','HMM/SVM'])
3893 #
3894 show()
3895 # # tight_layout()
3896 savefig('ratios-2.png',dpi=500)
3897 #
3898 #
3899 #