annotate experiment-reverb/code/#training.py# @ 2:c87a9505f294 tip

Added LICENSE for code, removed .wav files
author Emmanouil Theofanis Chourdakis <e.t.chourdakis@qmul.ac.uk>
date Sat, 30 Sep 2017 13:25:50 +0100
parents 246d5546657c
children
rev   line source
e@0 1 #!/usr/bin/python2
e@0 2 # -*- coding: utf-8 -*-
e@0 3 """
e@0 4 Created on Thu Apr 23 11:53:17 2015
e@0 5
e@0 6 @author: mmxgn
e@0 7 """
e@0 8
e@0 9 # This file does the cluster estimation and the removal of outliers
e@0 10
e@0 11 from warnings import filterwarnings
e@0 12
e@0 13 from sys import argv, exit
e@0 14 from essentia.standard import YamlInput, YamlOutput
e@0 15 from essentia import Pool
e@0 16 from pca import *
e@0 17
e@0 18 from numpy import *
e@0 19 from sklearn import cluster
e@0 20 from sklearn.metrics import pairwise_distances
e@0 21 from sklearn.cluster import KMeans, MiniBatchKMeans
e@0 22 from matplotlib.pyplot import *
e@0 23 #from sklearn.mixture import GMM
e@0 24 from sklearn.naive_bayes import GaussianNB, MultinomialNB
e@0 25 from scipy.signal import decimate
e@0 26 from sklearn import cross_validation
e@0 27 from sklearn import multiclass
e@0 28 from sklearn.covariance import EllipticEnvelope
e@0 29
e@0 30 #from hmmlearn import hmm
e@0 31 from hmmlearn.hmm import GMM
e@0 32 from hmmlearn import hmm
e@0 33
e@0 34 from sklearn import svm
e@0 35 import copy as cp
e@0 36
e@0 37 from scipy.misc import logsumexp
e@0 38 import pickle
e@0 39
e@0 40 from collections import Counter
e@0 41 #from adpcm import adm, adm_reconstruct
e@0 42
e@0 43
e@0 44 from reshape_observations import reshape_observations
e@0 45
e@0 46 mse = lambda A,B: ((array(A)-array(B)) ** 2).mean()
e@0 47
e@0 48 lognorm = lambda A: A - logsumexp(A)
e@0 49
e@0 50
e@0 51 rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
e@0 52 ## for Palatino and other serif fonts use:
e@0 53 #rc('font',**{'family':'serif','serif':['Palatino']})
e@0 54 rc('text', usetex=True)
e@0 55 #logsum = lambda X: logsumexp(log(X))
e@0 56
e@0 57
e@0 58 filterwarnings("ignore")
e@0 59 C = 0.7
e@0 60 NCOMPONENTS=2
e@0 61 CHAINSIZE = 5
e@0 62
e@0 63 close('all')
e@0 64
e@0 65 from numpy.random import randint
e@0 66
e@0 67 class HMMsvm:
e@0 68 def __init__(self, svm_):
e@0 69 self.svm = svm_
e@0 70
e@0 71
e@0 72 # self.svm = MyAVAClassifier()
e@0 73
e@0 74
e@0 75 def fit(self, features_list, states, flr=1e-13):
e@0 76 # def fit(self, features_list, flr=1e-13):
e@0 77
e@0 78 # TODO: Concatenate features, train
e@0 79 #self.svm.fit(X, states, flr)
e@0 80 #self.prior = self.svm.prior
e@0 81 #self.transmat = self.svm.transmat
e@0 82
e@0 83
e@0 84 features = None
e@0 85
e@0 86 for f in features_list:
e@0 87 if features is None:
e@0 88 features = f
e@0 89 else:
e@0 90 features = append(features, f, 0)
e@0 91
e@0 92 fullstates = None
e@0 93 # T = features.shape[0]
e@0 94 for s in states:
e@0 95 if fullstates is None:
e@0 96 fullstates = s
e@0 97 else:
e@0 98 fullstates = append(fullstates, s)
e@0 99
e@0 100
e@0 101
e@0 102
e@0 103
e@0 104
e@0 105 self.n_classes = self.svm.n_classes
e@0 106 n_classes = self.n_classes
e@0 107
e@0 108 # print fullstates, shape(fullstates)
e@0 109
e@0 110 h = histogram(fullstates, n_classes)[0].astype(float)
e@0 111 self.prior = h/sum(h)
e@0 112
e@0 113 # Add a very small probability for random jump
e@0 114
e@0 115 self.prior += flr
e@0 116 self.prior = self.prior/sum(self.prior)
e@0 117
e@0 118 transmat = zeros((n_classes, n_classes))
e@0 119 transitions = zeros((n_classes, ))
e@0 120 for s in states:
e@0 121 for i in range(1, len(s)):
e@0 122 prev = s[i-1]
e@0 123 cur = s[i]
e@0 124 transmat[prev,cur] += 1
e@0 125 transitions[prev] += 1
e@0 126
e@0 127 transitions[transitions == 0] = 1
e@0 128
e@0 129 for i in range(0, transmat.shape[0]):
e@0 130 transmat[i,:] = transmat[i,:]/transitions[i]
e@0 131
e@0 132 self.transmat = transmat
e@0 133
e@0 134 # Add a very small probability for random jump to avoid zero values
e@0 135
e@0 136 self.transmat += flr
e@0 137
e@0 138 for i in range(0, self.transmat.shape[0]):
e@0 139 self.transmat[i,:] = self.transmat[i,:]/sum(self.transmat[i,:])
e@0 140
e@0 141
e@0 142 A = zeros((n_classes, n_classes))
e@0 143
e@0 144 Aold = self.transmat
e@0 145
e@0 146 while mse(A,Aold)>10*size(A)*flr:
e@0 147 Aold = A.copy()
e@0 148 A = zeros((n_classes, n_classes))
e@0 149 transitions = zeros((n_classes, ))
e@0 150
e@0 151 for i in range(0,len(features_list)):
e@0 152 f = features_list[i]
e@0 153 # print "FEATURES:", f
e@0 154 # print f
e@0 155 s,p = self.logviterbi(f)
e@0 156 # print s
e@0 157 for j in range(1, len(s)):
e@0 158 prev = s[j-1]
e@0 159 cur = s[j]
e@0 160 A[prev,cur] += 1
e@0 161 transitions[prev] += 1
e@0 162
e@0 163 transitions[transitions == 0] = 1
e@0 164
e@0 165
e@0 166 for i in range(0, A.shape[0]):
e@0 167 A[i,:] = A[i,:]/transitions[i]
e@0 168
e@0 169 A += flr
e@0 170
e@0 171
e@0 172
e@0 173 self.transmat = A
e@0 174
e@0 175 for i in range(0, A.shape[0]):
e@0 176 if sum(A[i,:]) > 0:
e@0 177 A[i,:] = A[i,:]/sum(A[i,:])
e@0 178
e@0 179
e@0 180
e@0 181 # print observations
e@0 182
e@0 183
e@0 184 def estimate_emission_probability(self, x, q):
e@0 185 post = self.svm.estimate_posterior_probability_vector(x)
e@0 186 # print "post: ", post
e@0 187 prior = self.prior
e@0 188 # print "prior: ", prior
e@0 189 p = post/prior
e@0 190 p = p/sum(p)
e@0 191
e@0 192 return p[q]
e@0 193
e@0 194 # def estimate_emission_probability(self, x, q):
e@0 195 # return self.svm.estimate_emission_probability(q, x)
e@0 196
e@0 197
e@0 198 def predict(self, X):
e@0 199 return self.logviterbi(X)[0]
e@0 200
e@0 201
e@0 202 def logviterbi(self, X):
e@0 203 # Number of states
e@0 204
e@0 205 N = self.n_classes
e@0 206
e@0 207 # Number of observations
e@0 208
e@0 209
e@0 210
e@0 211 T = X.shape[0]
e@0 212
e@0 213
e@0 214
e@0 215 transmat = self.transmat
e@0 216
e@0 217 #1. Initalization
e@0 218
e@0 219 a1 = self.prior
e@0 220
e@0 221 delta = zeros((T,N))
e@0 222 psi = zeros((T,N))
e@0 223
e@0 224 for i in range(0, N):
e@0 225 delta[0, i] = log(self.transmat[0,i]) + log(self.estimate_emission_probability(X[0,:],i))
e@0 226
e@0 227
e@0 228 #2. Recursion
e@0 229
e@0 230 for t in range(1, T):
e@0 231 # delta_old = delta.copy()
e@0 232 x = X[t, :]
e@0 233 for j in range(0, N):
e@0 234 # print "j: %d, S" % j, delta_old+log(transmat[:,j])
e@0 235 delta[t,j] = max(delta[t-1,:] + log(transmat[:,j])) + log(self.estimate_emission_probability(x,j))
e@0 236 psi[t,j] = argmax(delta[t-1,:] + log(transmat[:,j]))
e@0 237
e@0 238 # print "t: %d, delta: " % t, delta
e@0 239 # print "t: %d, psi:" % t, psi
e@0 240
e@0 241
e@0 242 # 3. Termination
e@0 243
e@0 244 p_star = max(delta[T-1,:] + log(transmat[:,N-1]))
e@0 245 q_star = argmax(delta[T-1,:] + log(transmat[:, N-1]))
e@0 246
e@0 247
e@0 248 # 4. Backtracking
e@0 249
e@0 250 q = zeros((T,))
e@0 251 p = zeros((T,))
e@0 252
e@0 253 q[-1] = q_star
e@0 254 p[-1] = p_star
e@0 255 for t in range(1, T):
e@0 256 q[-t-1] = psi[-t, q[-t]]
e@0 257 p[-t-1] = delta[-t, q[-t]]
e@0 258
e@0 259
e@0 260 return q,p
e@0 261
e@0 262 def viterbi(self, X):
e@0 263 # Number of states
e@0 264
e@0 265 N = self.n_classes
e@0 266
e@0 267 # Number of observations
e@0 268
e@0 269 T = X.shape[0]
e@0 270
e@0 271 transmat = self.transmat
e@0 272
e@0 273 #1. Initialization
e@0 274
e@0 275 a1 = self.prior
e@0 276
e@0 277 delta = zeros((N, ))
e@0 278 psi = zeros((N, ))
e@0 279
e@0 280 for i in range(0, N):
e@0 281 delta[i] = a1[i]*self.estimate_emission_probability(X[0,:],i)
e@0 282
e@0 283
e@0 284
e@0 285
e@0 286
e@0 287 #2. Recursion
e@0 288
e@0 289 for t in range(1, T):
e@0 290 delta_old = delta.copy()
e@0 291 x = X[t,:]
e@0 292 for j in range(0, N):
e@0 293 delta[j] = max(delta_old*transmat[:,j])*self.estimate_emission_probability(x, j)
e@0 294 psi[j] = argmax(delta_old*transmat[:,j])
e@0 295
e@0 296 #3. Termination
e@0 297
e@0 298 p_star = max(delta*transmat[:,N-1])
e@0 299 q_star = argmax(delta*transmat[:,N-1])
e@0 300
e@0 301
e@0 302
e@0 303 #4. Backtracking
e@0 304
e@0 305 q = zeros((T,))
e@0 306 q[-1] = q_star
e@0 307
e@0 308 for t in range(1, T):
e@0 309 q[-t-1] = psi[q[-t]]
e@0 310
e@0 311 return q
e@0 312
e@0 313
e@0 314 def _log_likelihood_matrix(self, X):
e@0 315 N = self.n_classes
e@0 316 ll = zeros((X.shape[0], N))
e@0 317
e@0 318 for i in range(0, X.shape[0]):
e@0 319 ll[i,:] = self._forward(i, X)
e@0 320
e@0 321 return ll
e@0 322
e@0 323 def compute_ksus(self, X):
e@0 324 N = self.n_classes
e@0 325 T = X.shape[0]
e@0 326 print "[II] Computing gammas... "
e@0 327
e@0 328 gammas = self._forward_backward(X)
e@0 329
e@0 330 # Initialize
e@0 331
e@0 332 aij = self.transmat
e@0 333
e@0 334
e@0 335
e@0 336
e@0 337
e@0 338
e@0 339 def _not_quite_ksu(self, t, i, j, X):
e@0 340 alpha = exp(self.forward(X[0:t+1,:]))[i]
e@0 341 beta = exp(self.backward(X[t+1:,:]))[j]
e@0 342
e@0 343 aij = self.transmat[i,j]
e@0 344 bj = self.estimate_emission_probability(X[t+1,:], j)
e@0 345
e@0 346 return alpha*beta*aij*bj
e@0 347
e@0 348 def _ksu(self, t, i, j, X):
e@0 349 alphaT = exp(self.forward(X[0:t+1,:]))[0]
e@0 350
e@0 351 return self._not_quite_ksu(t,i,j,X)
e@0 352
e@0 353
e@0 354 def _forward_backward(self, X):
e@0 355 T = X.shape[0]
e@0 356 N = self.n_classes
e@0 357 fv = zeros((T, N))
e@0 358 sv = zeros((T, N))
e@0 359
e@0 360 b = None
e@0 361 for t in range(1, T+1):
e@0 362
e@0 363 fv[t-1,:] = self._forward_message(fv[t-2,:], X[0:t, ])
e@0 364
e@0 365 for t in range(1, T+1):
e@0 366 b = self._backward_message(b, X[-t: , :])
e@0 367 sv[-t,:] = lognorm(fv[t-1,:]*b)
e@0 368
e@0 369 return sv
e@0 370
e@0 371
e@0 372 def _backward(self, t, X):
e@0 373 N = self.n_classes
e@0 374 a = ones((N,))/N
e@0 375
e@0 376 beta = zeros((N, ))
e@0 377 transmat = self.transmat
e@0 378 T = X.shape[0]
e@0 379 x = X[t, :]
e@0 380 if t == T-1:
e@0 381 beta = log(a)
e@0 382
e@0 383 return beta
e@0 384 else:
e@0 385 tosum = zeros((N, ))
e@0 386 bt = self._backward(t+1, X)
e@0 387 btnew = zeros((N, ))
e@0 388 # print bt
e@0 389 for j in range(0, N):
e@0 390 for i in range(0, N):
e@0 391 # print log(self.estimate_emission_probability(x, j)), bt[i], log(transmat[i,j])
e@0 392 tosum[i] = log(self.estimate_emission_probability(x, j)) + bt[i] + log(transmat[i,j])
e@0 393 # print tosum
e@0 394
e@0 395 btnew[j] = logsumexp(tosum)
e@0 396 btnew = lognorm(btnew)
e@0 397
e@0 398 return btnew
e@0 399
e@0 400
e@0 401 def score(self, X):
e@0 402 return self.forward(X)
e@0 403
e@0 404 def forward(self, X):
e@0 405 T = X.shape[0]
e@0 406 f = None
e@0 407 for t in range(1, T+1):
e@0 408 f = self._forward_message(f, X[0:t, :])
e@0 409
e@0 410 return f
e@0 411
e@0 412 def backward(self, X):
e@0 413 T = X.shape[0]
e@0 414 b = None
e@0 415 for t in range(1,T+1):
e@0 416 # print t
e@0 417 # print t,b
e@0 418 idx = arange(-t,T)
e@0 419 # print idx
e@0 420 b = self._backward_message(b, X[-t:, :])
e@0 421
e@0 422 return b
e@0 423
e@0 424 def _backward_message(self, b, X):
e@0 425 N = self.n_classes
e@0 426
e@0 427
e@0 428 beta = zeros((N, ))
e@0 429 transmat = self.transmat
e@0 430
e@0 431 x = X[0, :]
e@0 432
e@0 433 if X.shape[0] == 1:
e@0 434 beta = log(ones((N,))/N)
e@0 435 return beta
e@0 436 else:
e@0 437 tosum = zeros((N, ))
e@0 438 btnew = zeros((N, ))
e@0 439 for j in range(0, N):
e@0 440 for i in range(0, N):
e@0 441 tosum[i] = log(self.estimate_emission_probability(x,j)) + b[i] + log(transmat[i,j])
e@0 442
e@0 443 btnew[j] = logsumexp(tosum)
e@0 444 # btnew = lognorm(btnew)
e@0 445 return btnew
e@0 446
e@0 447 def _forward_message(self, f, X):
e@0 448 N = self.n_classes
e@0 449 a = self.prior
e@0 450
e@0 451 alpha = zeros((N, ))
e@0 452 transmat = self.transmat
e@0 453
e@0 454 x = X[-1, :]
e@0 455
e@0 456 if X.shape[0] == 1:
e@0 457 for i in range(0, N):
e@0 458 alpha[i] = log(a[i]) + log(self.estimate_emission_probability(x, i))
e@0 459 # alpha = lognorm(alpha)
e@0 460 return alpha
e@0 461
e@0 462 else:
e@0 463 tosum = zeros((N,))
e@0 464 ftnew = zeros((N,))
e@0 465 for j in range(0, N):
e@0 466 for i in range(0, N):
e@0 467 tosum[i] = f[i] + log(transmat[i,j])
e@0 468 Sum = logsumexp(tosum)
e@0 469 bj = self.estimate_emission_probability(x, j)
e@0 470 ftnew[j] = Sum+log(bj)
e@0 471
e@0 472 # ftnew = lognorm(ftnew)
e@0 473 return ftnew
e@0 474
e@0 475 def _forward(self, t, X):
e@0 476 N = self.n_classes
e@0 477 a = self.prior
e@0 478 # print a
e@0 479 alpha = zeros((N, ))
e@0 480 transmat = self.transmat
e@0 481 x = X[t, :]
e@0 482
e@0 483 if t == 0:
e@0 484 for i in range(0, N):
e@0 485 alpha[i] = log(a[i]) + log(self.estimate_emission_probability(x, i))
e@0 486 # print "--"
e@0 487 # print alpha
e@0 488 alpha = lognorm(alpha)
e@0 489 # print alpha
e@0 490 # print "xx"
e@0 491 return alpha
e@0 492 else:
e@0 493 tosum = zeros((N, ))
e@0 494 ft = self._forward(t-1, X)
e@0 495 ftnew = zeros((N, ))
e@0 496 for j in range(0, N):
e@0 497 for i in range(0, N):
e@0 498 # print ft
e@0 499 tosum[i] = ft[i] + log(transmat[i,j])
e@0 500
e@0 501 Sum = logsumexp(tosum)
e@0 502 bj = self.estimate_emission_probability(x, j)
e@0 503 ftnew[j] = Sum+log(bj)
e@0 504 ftnew = lognorm(ftnew)
e@0 505
e@0 506 return ftnew
e@0 507
e@0 508 class HMMsvm2:
e@0 509 def __init__(self, svm_, n_components=4):
e@0 510 self.svm = svm_
e@0 511 self.n_components = n_components
e@0 512
e@0 513
e@0 514 # self.svm = MyAVAClassifier()
e@0 515
e@0 516
e@0 517 def fit(self, features_list, flr=1e-13):
e@0 518 # def fit(self, features_list, flr=1e-13):
e@0 519
e@0 520 # TODO: Concatenate features, train
e@0 521 #self.svm.fit(X, states, flr)
e@0 522 #self.prior = self.svm.prior
e@0 523 #self.transmat = self.svm.transmat
e@0 524
e@0 525
e@0 526 n_classes = self.n_components
e@0 527 self.n_classes = n_classes
e@0 528
e@0 529 A = zeros((n_classes, n_classes))
e@0 530
e@0 531 Aold = randn(n_classes, n_classes)
e@0 532 self.transmat = Aold
e@0 533 self.prior = randn(n_classes)
e@0 534 self.prior = self.prior/sum(self.prior)
e@0 535 for i in range(0, Aold.shape[0]):
e@0 536 Aold[i,:] = Aold[i,:]/sum(Aold[i,:])
e@0 537
e@0 538 while mse(A,Aold)>10*size(A)*flr:
e@0 539 Aold = A.copy()
e@0 540 A = zeros((n_classes, n_classes))
e@0 541 transitions = zeros((n_classes, ))
e@0 542
e@0 543 for i in range(0,len(features_list)):
e@0 544 f = features_list[i]
e@0 545 # print "FEATURES:", f
e@0 546 # print f
e@0 547 s,p = self.logviterbi(f)
e@0 548 # print s
e@0 549
e@0 550 h = histogram(s, n_classes)[0].astype(float)
e@0 551 self.prior = h/sum(h)
e@0 552
e@0 553
e@0 554 self.prior += flr
e@0 555 self.prior = self.prior/sum(self.prior)
e@0 556
e@0 557
e@0 558 for j in range(1, len(s)):
e@0 559 prev = s[j-1]
e@0 560 cur = s[j]
e@0 561 A[prev,cur] += 1
e@0 562 transitions[prev] += 1
e@0 563
e@0 564 transitions[transitions == 0] = 1
e@0 565
e@0 566
e@0 567 for i in range(0, A.shape[0]):
e@0 568 A[i,:] = A[i,:]/transitions[i]
e@0 569
e@0 570 A += flr
e@0 571
e@0 572
e@0 573
e@0 574 self.transmat = A
e@0 575
e@0 576 for i in range(0, A.shape[0]):
e@0 577 if sum(A[i,:]) > 0:
e@0 578 A[i,:] = A[i,:]/sum(A[i,:])
e@0 579
e@0 580
e@0 581
e@0 582 # print observations
e@0 583
e@0 584
e@0 585 def estimate_emission_probability(self, x, q):
e@0 586 post = self.svm.estimate_posterior_probability_vector(x)
e@0 587 # print "post: ", post
e@0 588 prior = self.prior
e@0 589 # print "prior: ", prior
e@0 590 p = post/prior
e@0 591 p = p/sum(p)
e@0 592
e@0 593 return p[q]
e@0 594
e@0 595 # def estimate_emission_probability(self, x, q):
e@0 596 # return self.svm.estimate_emission_probability(q, x)
e@0 597
e@0 598
e@0 599 def predict(self, X):
e@0 600 return self.logviterbi(X)[0]
e@0 601
e@0 602
e@0 603 def logviterbi(self, X):
e@0 604 # Number of states
e@0 605
e@0 606 N = self.n_classes
e@0 607
e@0 608 # Number of observations
e@0 609
e@0 610
e@0 611
e@0 612 T = X.shape[0]
e@0 613
e@0 614
e@0 615
e@0 616 transmat = self.transmat
e@0 617
e@0 618 #1. Initalization
e@0 619
e@0 620 # a1 = self.prior
e@0 621
e@0 622 delta = zeros((T,N))
e@0 623 psi = zeros((T,N))
e@0 624
e@0 625 for i in range(0, N):
e@0 626 delta[0, i] = log(self.transmat[0,i]) + log(self.estimate_emission_probability(X[0,:],i))
e@0 627
e@0 628
e@0 629 #2. Recursion
e@0 630
e@0 631 for t in range(1, T):
e@0 632 # delta_old = delta.copy()
e@0 633 x = X[t, :]
e@0 634 for j in range(0, N):
e@0 635 # print "j: %d, S" % j, delta_old+log(transmat[:,j])
e@0 636 delta[t,j] = max(delta[t-1,:] + log(transmat[:,j])) + log(self.estimate_emission_probability(x,j))
e@0 637 psi[t,j] = argmax(delta[t-1,:] + log(transmat[:,j]))
e@0 638
e@0 639 # print "t: %d, delta: " % t, delta
e@0 640 # print "t: %d, psi:" % t, psi
e@0 641
e@0 642
e@0 643 # 3. Termination
e@0 644
e@0 645 p_star = max(delta[T-1,:] + log(transmat[:,N-1]))
e@0 646 q_star = argmax(delta[T-1,:] + log(transmat[:, N-1]))
e@0 647
e@0 648
e@0 649 # 4. Backtracking
e@0 650
e@0 651 q = zeros((T,))
e@0 652 p = zeros((T,))
e@0 653
e@0 654 q[-1] = q_star
e@0 655 p[-1] = p_star
e@0 656 for t in range(1, T):
e@0 657 q[-t-1] = psi[-t, q[-t]]
e@0 658 p[-t-1] = delta[-t, q[-t]]
e@0 659
e@0 660
e@0 661 return q,p
e@0 662
e@0 663 def viterbi(self, X):
e@0 664 # Number of states
e@0 665
e@0 666 N = self.n_classes
e@0 667
e@0 668 # Number of observations
e@0 669
e@0 670 T = X.shape[0]
e@0 671
e@0 672 transmat = self.transmat
e@0 673
e@0 674 #1. Initialization
e@0 675
e@0 676 a1 = self.prior
e@0 677
e@0 678 delta = zeros((N, ))
e@0 679 psi = zeros((N, ))
e@0 680
e@0 681 for i in range(0, N):
e@0 682 delta[i] = a1[i]*self.estimate_emission_probability(X[0,:],i)
e@0 683
e@0 684
e@0 685
e@0 686
e@0 687
e@0 688 #2. Recursion
e@0 689
e@0 690 for t in range(1, T):
e@0 691 delta_old = delta.copy()
e@0 692 x = X[t,:]
e@0 693 for j in range(0, N):
e@0 694 delta[j] = max(delta_old*transmat[:,j])*self.estimate_emission_probability(x, j)
e@0 695 psi[j] = argmax(delta_old*transmat[:,j])
e@0 696
e@0 697 #3. Termination
e@0 698
e@0 699 p_star = max(delta*transmat[:,N-1])
e@0 700 q_star = argmax(delta*transmat[:,N-1])
e@0 701
e@0 702
e@0 703
e@0 704 #4. Backtracking
e@0 705
e@0 706 q = zeros((T,))
e@0 707 q[-1] = q_star
e@0 708
e@0 709 for t in range(1, T):
e@0 710 q[-t-1] = psi[q[-t]]
e@0 711
e@0 712 return q
e@0 713
e@0 714
e@0 715 def _log_likelihood_matrix(self, X):
e@0 716 N = self.n_classes
e@0 717 ll = zeros((X.shape[0], N))
e@0 718
e@0 719 for i in range(0, X.shape[0]):
e@0 720 ll[i,:] = self._forward(i, X)
e@0 721
e@0 722 return ll
e@0 723
e@0 724 def compute_ksus(self, X):
e@0 725 N = self.n_classes
e@0 726 T = X.shape[0]
e@0 727 print "[II] Computing gammas... "
e@0 728
e@0 729 gammas = self._forward_backward(X)
e@0 730
e@0 731 # Initialize
e@0 732
e@0 733 aij = self.transmat
e@0 734
e@0 735
e@0 736
e@0 737
e@0 738
e@0 739
e@0 740 def _not_quite_ksu(self, t, i, j, X):
e@0 741 alpha = exp(self.forward(X[0:t+1,:]))[i]
e@0 742 beta = exp(self.backward(X[t+1:,:]))[j]
e@0 743
e@0 744 aij = self.transmat[i,j]
e@0 745 bj = self.estimate_emission_probability(X[t+1,:], j)
e@0 746
e@0 747 return alpha*beta*aij*bj
e@0 748
e@0 749 def _ksu(self, t, i, j, X):
e@0 750 alphaT = exp(self.forward(X[0:t+1,:]))[0]
e@0 751
e@0 752 return self._not_quite_ksu(t,i,j,X)
e@0 753
e@0 754
e@0 755 def _forward_backward(self, X):
e@0 756 T = X.shape[0]
e@0 757 N = self.n_classes
e@0 758 fv = zeros((T, N))
e@0 759 sv = zeros((T, N))
e@0 760
e@0 761 b = None
e@0 762 for t in range(1, T+1):
e@0 763
e@0 764 fv[t-1,:] = self._forward_message(fv[t-2,:], X[0:t, ])
e@0 765
e@0 766 for t in range(1, T+1):
e@0 767 b = self._backward_message(b, X[-t: , :])
e@0 768 sv[-t,:] = lognorm(fv[t-1,:]*b)
e@0 769
e@0 770 return sv
e@0 771
e@0 772
e@0 773 def _backward(self, t, X):
e@0 774 N = self.n_classes
e@0 775 a = ones((N,))/N
e@0 776
e@0 777 beta = zeros((N, ))
e@0 778 transmat = self.transmat
e@0 779 T = X.shape[0]
e@0 780 x = X[t, :]
e@0 781 if t == T-1:
e@0 782 beta = log(a)
e@0 783
e@0 784 return beta
e@0 785 else:
e@0 786 tosum = zeros((N, ))
e@0 787 bt = self._backward(t+1, X)
e@0 788 btnew = zeros((N, ))
e@0 789 # print bt
e@0 790 for j in range(0, N):
e@0 791 for i in range(0, N):
e@0 792 # print log(self.estimate_emission_probability(x, j)), bt[i], log(transmat[i,j])
e@0 793 tosum[i] = log(self.estimate_emission_probability(x, j)) + bt[i] + log(transmat[i,j])
e@0 794 # print tosum
e@0 795
e@0 796 btnew[j] = logsumexp(tosum)
e@0 797 btnew = lognorm(btnew)
e@0 798 return btnew
e@0 799
e@0 800
e@0 801 def score(self, X):
e@0 802 return self.forward(X)
e@0 803
e@0 804 def forward(self, X):
e@0 805 T = X.shape[0]
e@0 806 f = None
e@0 807 for t in range(1, T+1):
e@0 808 f = self._forward_message(f, X[0:t, :])
e@0 809
e@0 810 return f
e@0 811
e@0 812 def backward(self, X):
e@0 813 T = X.shape[0]
e@0 814 b = None
e@0 815 for t in range(1,T+1):
e@0 816 # print t
e@0 817 # print t,b
e@0 818 idx = arange(-t,T)
e@0 819 # print idx
e@0 820 b = self._backward_message(b, X[-t:, :])
e@0 821
e@0 822 return b
e@0 823
e@0 824 def _backward_message(self, b, X):
e@0 825 N = self.n_classes
e@0 826
e@0 827
e@0 828 beta = zeros((N, ))
e@0 829 transmat = self.transmat
e@0 830
e@0 831 x = X[0, :]
e@0 832
e@0 833 if X.shape[0] == 1:
e@0 834 beta = log(ones((N,))/N)
e@0 835 return beta
e@0 836 else:
e@0 837 tosum = zeros((N, ))
e@0 838 btnew = zeros((N, ))
e@0 839 for j in range(0, N):
e@0 840 for i in range(0, N):
e@0 841 tosum[i] = log(self.estimate_emission_probability(x,j)) + b[i] + log(transmat[i,j])
e@0 842
e@0 843 btnew[j] = logsumexp(tosum)
e@0 844 # btnew = lognorm(btnew)
e@0 845 return btnew
e@0 846
e@0 847 def _forward_message(self, f, X):
e@0 848 N = self.n_classes
e@0 849 a = self.prior
e@0 850
e@0 851 alpha = zeros((N, ))
e@0 852 transmat = self.transmat
e@0 853
e@0 854 x = X[-1, :]
e@0 855
e@0 856 if X.shape[0] == 1:
e@0 857 for i in range(0, N):
e@0 858 alpha[i] = log(a[i]) + log(self.estimate_emission_probability(x, i))
e@0 859 # alpha = lognorm(alpha)
e@0 860 return alpha
e@0 861
e@0 862 else:
e@0 863 tosum = zeros((N,))
e@0 864 ftnew = zeros((N,))
e@0 865 for j in range(0, N):
e@0 866 for i in range(0, N):
e@0 867 tosum[i] = f[i] + log(transmat[i,j])
e@0 868 Sum = logsumexp(tosum)
e@0 869 bj = self.estimate_emission_probability(x, j)
e@0 870 ftnew[j] = Sum+log(bj)
e@0 871
e@0 872 # ftnew = lognorm(ftnew)
e@0 873 return ftnew
e@0 874
e@0 875 def _forward(self, t, X):
e@0 876 N = self.n_classes
e@0 877 a = self.prior
e@0 878 # print a
e@0 879 alpha = zeros((N, ))
e@0 880 transmat = self.transmat
e@0 881 x = X[t, :]
e@0 882
e@0 883 if t == 0:
e@0 884 for i in range(0, N):
e@0 885 alpha[i] = log(a[i]) + log(self.estimate_emission_probability(x, i))
e@0 886 # print "--"
e@0 887 # print alpha
e@0 888 alpha = lognorm(alpha)
e@0 889 # print alpha
e@0 890 # print "xx"
e@0 891 return alpha
e@0 892 else:
e@0 893 tosum = zeros((N, ))
e@0 894 ft = self._forward(t-1, X)
e@0 895 ftnew = zeros((N, ))
e@0 896 for j in range(0, N):
e@0 897 for i in range(0, N):
e@0 898 # print ft
e@0 899 tosum[i] = ft[i] + log(transmat[i,j])
e@0 900
e@0 901 Sum = logsumexp(tosum)
e@0 902 bj = self.estimate_emission_probability(x, j)
e@0 903 ftnew[j] = Sum+log(bj)
e@0 904 ftnew = lognorm(ftnew)
e@0 905
e@0 906 return ftnew
e@0 907
e@0 908
e@0 909
e@0 910
e@0 911
e@0 912
e@0 913
e@0 914 class NBClassifier:
e@0 915 def __init__(self):
e@0 916 print "[II] Gaussian Naive Bayes Classifier"
e@0 917 self.name = "Naive Bayes"
e@0 918 self.smallname = "gnbc"
e@0 919 self.gnb = GaussianNB()
e@0 920
e@0 921 def getName(self):
e@0 922 return self.name
e@0 923
e@0 924 def fit(self, X, states):
e@0 925 self.gnb.fit(X, states)
e@0 926
e@0 927 def predict(self, X):
e@0 928 return self.gnb.predict(X)
e@0 929
e@0 930 def cross_validate(self, data, classes):
e@0 931 print "[II] SVN Classifier Crossvalidating... "
e@0 932 from copy import deepcopy
e@0 933 estimator = deepcopy(self)
e@0 934 estimator_fulldata = deepcopy(self)
e@0 935 estimator_fulldata.fit(data, classes)
e@0 936
e@0 937 percents = arange(0.1,0.9,0.1)
e@0 938 self.percents = percents * 100
e@0 939
e@0 940 RATIOS = []
e@0 941 labels = estimator.predict(data)
e@0 942
e@0 943
e@0 944 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels))
e@0 945
e@0 946 for p in percents:
e@0 947 traindata,testdata,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=1)
e@0 948
e@0 949 # print "[II] [%.2f-%.2f] Examples used for training: %s" % (p, (1-p), str(traindata))
e@0 950 # print "[II] [%.2f-%.2f] Examples used for testing: %s" % (p, (1-p), str(testdata))
e@0 951
e@0 952 # Training phase
e@0 953
e@0 954
e@0 955
e@0 956 estimator_ = deepcopy(estimator)
e@0 957 estimator_.fit(traindata, trainlabels)
e@0 958
e@0 959 predicted_labels = estimator_.predict(testdata)
e@0 960
e@0 961 m = sum(array(predicted_labels==testlabels).astype(float))/len(testlabels)
e@0 962
e@0 963 print "[II] [%.2f-%.2f] 5-fold cross-validation for NB Classifier: %.1f" % (p, (1-p), m)
e@0 964
e@0 965 RATIOS.append(m)
e@0 966
e@0 967 return RATIOS,percents
e@0 968
e@0 969
e@0 970
e@0 971
e@0 972 class HmmClassifier3:
e@0 973 def __init__(self, N=2,n_components = 1):
e@0 974 print "[II] HMM Classifier (%d states, %d components)" % (N, n_components)
e@0 975 self.name = "HMM (%d time steps, %d components)" % (N, n_components)
e@0 976 self.n_components = n_components
e@0 977
e@0 978 self.n_components = n_components
e@0 979 self.smallname = "hmmc"
e@0 980 self.chain_size = N
e@0 981
e@0 982 def getName(self):
e@0 983 return self.name
e@0 984
e@0 985 def fit(self, features, parameters):
e@0 986
e@0 987
e@0 988 # print "Parameters:"
e@0 989 # print parameters
e@0 990 n_classes = max(unique(parameters)) + 1
e@0 991
e@0 992 if n_classes == 1:
e@0 993 self.only_one_class = True
e@0 994 return
e@0 995 else:
e@0 996 self.only_one_class = False
e@0 997
e@0 998 print "[II] Number of classes: ", n_classes
e@0 999 hmms = [None]*n_classes
e@0 1000
e@0 1001 chain_size = self.chain_size
e@0 1002 obs = [None]*n_classes
e@0 1003
e@0 1004 for i in range(chain_size, len(parameters)):
e@0 1005 class_ = parameters[i]
e@0 1006 seq = features[i-chain_size:i,:]
e@0 1007
e@0 1008
e@0 1009 if obs[class_] is None:
e@0 1010 obs[class_] = [seq]
e@0 1011 else:
e@0 1012 obs[class_].append(seq)
e@0 1013
e@0 1014
e@0 1015
e@0 1016 for i in range(0, len(obs)):
e@0 1017
e@0 1018 if obs[i] is not None and len(obs[i]) != 0:
e@0 1019 hmm_ = hmm.GaussianHMM(n_components=self.n_components, covariance_type='diag')
e@0 1020 # print(obs[i])
e@0 1021 #obs_ = reshape_observations(obs[i])
e@0 1022 obs_ = concatenate(obs[i])
e@0 1023 #lengths_ = [o.shape[0] for o in obs[i]]
e@0 1024 #print (lengths_)
e@0 1025 # print len(obs[i])
e@0 1026 # print obs[i][0][0].shape
e@0 1027 # print obs[i]
e@0 1028 # for s in obs[i]:
e@0 1029 # if s.ndim > 2:
e@0 1030 # print s
e@0 1031 hmm_.fit(obs_, [self.chain_size]*len(obs[i]))
e@0 1032 hmms[i] = hmm_
e@0 1033
e@0 1034 self.hmms = hmms
e@0 1035
e@0 1036 return obs
e@0 1037
e@0 1038 def predict(self, features, mfilt=20):
e@0 1039
e@0 1040 if self.only_one_class == True:
e@0 1041 return zeros((features.shape[0], ))
e@0 1042
e@0 1043 chain_size = self.chain_size
e@0 1044 hmms = self.hmms
e@0 1045 predicted_classes = zeros((features.shape[0],))
e@0 1046
e@0 1047
e@0 1048 for i in range(chain_size, features.shape[0]):
e@0 1049 scores = zeros((len(hmms),))
e@0 1050
e@0 1051 seq = features[i-chain_size:i, :]
e@0 1052
e@0 1053 for j in range(0, len(hmms)):
e@0 1054 if hmms[j] is not None:
e@0 1055 scores[j] = hmms[j].score(seq)
e@0 1056 else:
e@0 1057 scores[j] = -infty
e@0 1058
e@0 1059 predicted_classes[i] = argmax(scores)
e@0 1060
e@0 1061 # Do a median filter at the end
e@0 1062
e@0 1063 # predicted_classes = median_filter(predicted_classes,mfilt)
e@0 1064
e@0 1065 return predicted_classes
e@0 1066
e@0 1067
e@0 1068
e@0 1069 def k_fold_cross_validate(self, data, classes, K=5):
e@0 1070 print "[II] HMM Classifier Crossvalidating... "
e@0 1071 print "[II] Validating data: ", data
e@0 1072 print "[II] With classes: ", classes
e@0 1073 from copy import deepcopy
e@0 1074 estimator = deepcopy(self)
e@0 1075 estimator_fulldata = deepcopy(self)
e@0 1076 estimator_fulldata.fit(data, classes)
e@0 1077
e@0 1078
e@0 1079 labels = estimator_fulldata.predict(data)
e@0 1080 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(classes))
e@0 1081
e@0 1082 # Crossvalidate this.
e@0 1083
e@0 1084 example_sequences = []
e@0 1085 example_labels = []
e@0 1086
e@0 1087 chain_size = self.chain_size
e@0 1088
e@0 1089 percents = arange(0.1,0.9,0.1)
e@0 1090 self.percents = percents * 100
e@0 1091
e@0 1092 RATIOS = []
e@0 1093
e@0 1094
e@0 1095
e@0 1096 # Constructing examples and labels
e@0 1097
e@0 1098
e@0 1099 L = data.shape[0]/K
e@0 1100 M = K
e@0 1101 # print "[II] Example size:", L
e@0 1102
e@0 1103 n = 1
e@0 1104 while L*(n-1) < M*L:
e@0 1105 if L*n > shape(data)[0]:
e@0 1106 example = data[L*(n-1):,:]
e@0 1107 classes_ = classes[L*(n-1):]
e@0 1108 else:
e@0 1109 example = data[L*(n-1):L*n, :]
e@0 1110 classes_ = classes[L*(n-1):L*n]
e@0 1111
e@0 1112 # print example
e@0 1113 # print classes_
e@0 1114
e@0 1115 example_sequences.append(example)
e@0 1116 example_labels.append(classes_)
e@0 1117 n+=1
e@0 1118
e@0 1119 # print len(example_sequences)
e@0 1120 # print len(example_labels)
e@0 1121 from sklearn.cross_validation import KFold
e@0 1122 kf = KFold(K, n_folds=K)
e@0 1123
e@0 1124 ratio = 0
e@0 1125
e@0 1126 for train, test in kf:
e@0 1127 print "[II] Validating examples %s against %s." % (train, test)
e@0 1128
e@0 1129 topredict_data = example_sequences[test[0]]
e@0 1130 topredict_labels = example_labels[test[0]]
e@0 1131
e@0 1132 tofit_data = None
e@0 1133 tofit_labels = None
e@0 1134
e@0 1135 for t in train:
e@0 1136 # print t
e@0 1137 if tofit_data is None:
e@0 1138 tofit_data = example_sequences[t]
e@0 1139 tofit_labels = example_labels[t]
e@0 1140 else:
e@0 1141 tofit_data = append(tofit_data, example_sequences[t], 0)
e@0 1142 tofit_labels = append(tofit_labels, example_labels[t], 0)
e@0 1143
e@0 1144 estimator_ = deepcopy(estimator)
e@0 1145 estimator_.fit(tofit_data, tofit_labels)
e@0 1146
e@0 1147 labels = estimator_.predict(topredict_data)
e@0 1148
e@0 1149 m = sum(array(topredict_labels==labels).astype(float))/len(labels)
e@0 1150
e@0 1151
e@0 1152 print "[II] %d-fold cross-validation for HMM Classifier: %.1f" % (K, m)
e@0 1153
e@0 1154 ratio += m/float(len(kf))
e@0 1155
e@0 1156 return ratio
e@0 1157
e@0 1158
e@0 1159
e@0 1160
e@0 1161
e@0 1162
e@0 1163 # Splitting to train/test
e@0 1164
e@0 1165
e@0 1166 def cross_validate(self, data, classes):
e@0 1167 print "[II] HMM Classifier Crossvalidating... "
e@0 1168 from copy import deepcopy
e@0 1169 estimator = deepcopy(self)
e@0 1170 estimator_fulldata = deepcopy(self)
e@0 1171 estimator_fulldata.fit(data, classes)
e@0 1172
e@0 1173
e@0 1174 labels = estimator_fulldata.predict(data)
e@0 1175 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(classes))
e@0 1176
e@0 1177 # Crossvalidate this.
e@0 1178
e@0 1179 example_sequences = []
e@0 1180 example_labels = []
e@0 1181
e@0 1182 chain_size = self.chain_size
e@0 1183
e@0 1184 percents = arange(0.5,0.9,0.1)
e@0 1185 self.percents = percents * 100
e@0 1186
e@0 1187 RATIOS = []
e@0 1188
e@0 1189
e@0 1190
e@0 1191 # Constructing examples and labels
e@0 1192
e@0 1193 M = self.chain_size
e@0 1194 L = data.shape[0]/20
e@0 1195
e@0 1196 print "[II] Example size:", L
e@0 1197
e@0 1198 n = 1
e@0 1199 while L*n < M*L-L:
e@0 1200 example = data[L*(n-1):L*n, :]
e@0 1201 # print example
e@0 1202 classes_ = classes[L*(n-1):L*n]
e@0 1203 # print classes_
e@0 1204
e@0 1205 example_sequences.append(example)
e@0 1206 example_labels.append(classes_)
e@0 1207 n+=1
e@0 1208
e@0 1209
e@0 1210
e@0 1211 # Splitting to train/test
e@0 1212
e@0 1213 for p in percents:
e@0 1214 traindata,testdata,trainlabels,testlabels = cross_validation.train_test_split(example_sequences, example_labels, test_size=p, random_state=1)
e@0 1215
e@0 1216 print traindata
e@0 1217 print testdata
e@0 1218 # Concatenate traindata
e@0 1219
e@0 1220 tofit_data = None
e@0 1221 tofit_labels = None
e@0 1222
e@0 1223 topredict_data = None
e@0 1224 topredict_labels = None
e@0 1225
e@0 1226
e@0 1227 for t in traindata:
e@0 1228 if tofit_data is None:
e@0 1229 tofit_data = t
e@0 1230 else:
e@0 1231 tofit_data = append(tofit_data, t, 0)
e@0 1232
e@0 1233 for l in trainlabels:
e@0 1234 if tofit_labels is None:
e@0 1235 tofit_labels = l
e@0 1236 else:
e@0 1237 tofit_labels = append(tofit_labels, l)
e@0 1238
e@0 1239 for t in testdata:
e@0 1240 if topredict_data is None:
e@0 1241 topredict_data = t
e@0 1242 else:
e@0 1243 topredict_data = append(topredict_data, t, 0)
e@0 1244
e@0 1245 for l in testlabels:
e@0 1246 if topredict_labels is None:
e@0 1247 topredict_labels = l
e@0 1248 else:
e@0 1249 topredict_labels = append(topredict_labels, l)
e@0 1250
e@0 1251
e@0 1252 # print "[II] Fit data: ", tofit_data
e@0 1253 ## print "[II] Fit labels: ", tofit_labels
e@0 1254 # print "[II] Predict data: ", topredict_data
e@0 1255 # print "[II] Predict labels: ", topredict_labels
e@0 1256 #
e@0 1257 estimator_ = deepcopy(estimator)
e@0 1258
e@0 1259 print tofit_labels
e@0 1260 estimator_.fit(tofit_data, tofit_labels)
e@0 1261
e@0 1262 labels = estimator_.predict(topredict_data)
e@0 1263
e@0 1264 m = sum(array(topredict_labels==labels).astype(float))/len(labels)
e@0 1265
e@0 1266
e@0 1267 print "[II] [%.2f-%.2f] cross-validation for HMM Classifier: %.1f" % (p, (1-p), m)
e@0 1268
e@0 1269 RATIOS.append(m)
e@0 1270
e@0 1271 return RATIOS,percents
e@0 1272
e@0 1273
e@0 1274 class HMMsvmClassifier2:
e@0 1275 def __init__(self, N=2):
e@0 1276 self.classifiers = {}
e@0 1277 self.name = "HMM-SVM Classifier 2"
e@0 1278 self.smallname = "hmmsvm2"
e@0 1279 self.obs = MyAVAClassifier()
e@0 1280 self.chain_size = N
e@0 1281
e@0 1282 def getName(self):
e@0 1283 return self.name
e@0 1284
e@0 1285 def fit(self, X, y):
e@0 1286 self.n_classes = max(unique(y))+1
e@0 1287 n_classes = self.n_classes
e@0 1288 chain_size = self.chain_size
e@0 1289
e@0 1290 if n_classes == 1:
e@0 1291 self.only_one_class = True
e@0 1292 return
e@0 1293 else:
e@0 1294 self.only_one_class = False
e@0 1295
e@0 1296 print "[II] Number of classes: ", n_classes
e@0 1297
e@0 1298 hmms = [None]*n_classes
e@0 1299 obs = [None]*n_classes
e@0 1300
e@0 1301
e@0 1302 for i in range(chain_size, len(parameters)):
e@0 1303 class_ = parameters[i-1]
e@0 1304
e@0 1305
e@0 1306
e@0 1307 class HMMsvmClassifier:
e@0 1308 def __init__(self, N=2):
e@0 1309 self.classifiers = {}
e@0 1310 self.name = "HMM-SVM Classifier"
e@0 1311 self.smallname = "hmmsvm"
e@0 1312 self.obs = MyAVAClassifier()
e@0 1313 self.chain_size = N
e@0 1314
e@0 1315 def getName(self):
e@0 1316 return self.name
e@0 1317
e@0 1318 def fit(self, X, y):
e@0 1319 self.n_classes = max(unique(y))+1
e@0 1320
e@0 1321 self.obs.fit(X,y)
e@0 1322 self.hmm = HMMsvm(self.obs)
e@0 1323 self.hmm.fit([X],[y])
e@0 1324
e@0 1325 def predict(self, X):
e@0 1326 # chain_size = self.chain_size
e@0 1327 # predicted = zeros((X.shape[0]),)
e@0 1328 # for i in range(chain_size, len(predicted)):
e@0 1329 # predicted[i] = self.hmm.predict(X[i-chain_size:i,:])[-1]
e@0 1330
e@0 1331
e@0 1332
e@0 1333 # return predicted
e@0 1334
e@0 1335 return self.hmm.predict(X)
e@0 1336
e@0 1337 def confidence(self, x, q):
e@0 1338 return self.hmm.estimate_emission_probability(x, q)
e@0 1339
e@0 1340
e@0 1341
e@0 1342 class SinkHoleClassifier:
e@0 1343 def __init__(self):
e@0 1344 self.classifierNB = NBClassifier()
e@0 1345 self.classifierSVM = MyAVAClassifier()
e@0 1346 self.classifierHMM = HmmClassifier3()
e@0 1347 self.classifierHMMSVM = HMMsvmClassifier()
e@0 1348
e@0 1349
e@0 1350
e@0 1351 def getName(self):
e@0 1352 return self.name
e@0 1353
e@0 1354
e@0 1355 def fit(self, X, y):
e@0 1356 self.n_classes = max(unique(y))+1
e@0 1357
e@0 1358 self.classifierNB.fit(X,y)
e@0 1359 self.classifierSVM.fit(X,y)
e@0 1360 self.classifierHMM.fit(X,y)
e@0 1361 self.classifierHMMSVM.fit(X,y)
e@0 1362
e@0 1363
e@0 1364 def predict(self, X):
e@0 1365 predictedNB = self.classifierNB.predict(X)
e@0 1366 predictedSVM = self.classifierSVM.predict(X)
e@0 1367 predictedHMM = self.classifierHMM.predict(X)
e@0 1368 predictedHMMSVM = self.classifierHMMSVM.predict(X)
e@0 1369
e@0 1370
e@0 1371
e@0 1372 predicted = zeros((X.shape[0], ))
e@0 1373
e@0 1374 for i in range(0, len(predicted)):
e@0 1375 candidates = [predictedNB[i], predictedSVM[i], predictedHMM[i], predictedHMMSVM[i]]
e@0 1376 c = Counter(candidates)
e@0 1377
e@0 1378 most_common = c.most_common()
e@0 1379
e@0 1380 # If there is an equal voting, select something at random
e@0 1381 if len(unique([k[1] for k in most_common])) == 1:
e@0 1382 predicted[i] = most_common[randint(len(most_common))][0]
e@0 1383 else:
e@0 1384 predicted[i] = most_common[0][0]
e@0 1385
e@0 1386
e@0 1387
e@0 1388 return predicted
e@0 1389
e@0 1390 class MyAVAClassifier:
e@0 1391
e@0 1392 def __init__(self):
e@0 1393 self.classifiers = {}
e@0 1394 self.name = "Linear SVM Classifier"
e@0 1395 self.smallname = "svc-ava"
e@0 1396 # self.probabilities = {}
e@0 1397
e@0 1398
e@0 1399 def getName(self):
e@0 1400 return self.name
e@0 1401 def fit(self, X, y, flr = 0):
e@0 1402
e@0 1403 n_classes = max(unique(y)) + 1
e@0 1404
e@0 1405 # Transform to a binary if there are only two classes
e@0 1406 if len(unique(y)) == 1:
e@0 1407 self.only_one_class = True
e@0 1408 self.n_classes = 1
e@0 1409 self.one_class_label = y[0]
e@0 1410 return
e@0 1411 elif len(unique(y)) == 2:
e@0 1412
e@0 1413 self.n_classes = n_classes
e@0 1414 self.svm = svm.SVC(probability = True, kernel='poly', gamma=2, C = C)
e@0 1415 self.svm.fit(X,y)
e@0 1416 classes_ = unique(y)
e@0 1417 self.classifiers[(classes_[0], classes_[1])] = self.svm
e@0 1418 self.only_two_classes = True
e@0 1419 self.only_one_class = False
e@0 1420
e@0 1421 return
e@0 1422 else:
e@0 1423 self.only_two_classes = False
e@0 1424 self.only_one_class = False
e@0 1425
e@0 1426
e@0 1427 classes = arange(0, n_classes)
e@0 1428 self.n_classes = n_classes
e@0 1429 # M = n_classes*(n_classes-1) # Number of classifiers
e@0 1430
e@0 1431 h = histogram(y, n_classes)[0].astype(float)
e@0 1432 self.prior = h/sum(h)
e@0 1433
e@0 1434 transmat = zeros((n_classes, n_classes))
e@0 1435
e@0 1436 for i in range(1, len(y)):
e@0 1437 prev = y[i-1]
e@0 1438 cur = y[i]
e@0 1439 transmat[prev,cur] += 1
e@0 1440
e@0 1441 transmat = transmat/sum(transmat)
e@0 1442
e@0 1443 self.transmat = transmat
e@0 1444
e@0 1445 # Add a very small probability for random jump to avoid zero values
e@0 1446
e@0 1447 self.transmat += flr
e@0 1448 self.transmat = self.transmat/sum(self.transmat)
e@0 1449
e@0 1450 for i in range(0, n_classes):
e@0 1451 for j in range(0, n_classes):
e@0 1452 if i != j and (i,j) not in self.classifiers and (j,i) not in self.classifiers:
e@0 1453 # print (i,j)
e@0 1454 # print classes
e@0 1455 idx_ = bitwise_or(y == classes[i], y == classes[j])
e@0 1456 # idx_ = bitwise_or(y == i, y == j)
e@0 1457
e@0 1458 X_ = X[idx_, :]
e@0 1459
e@0 1460 y_ = y[idx_]
e@0 1461 #print y_
e@0 1462
e@0 1463 if len(unique(y_)) > 1:
e@0 1464 svm_ = svm.SVC(probability = True, kernel='poly', gamma=2, C = C)
e@0 1465
e@0 1466 # print y_
e@0 1467 svm_.fit(X_, y_)
e@0 1468 self.classifiers[(i,j)] = svm_
e@0 1469 # self.probabilities[(i,j)] = svm_.predict_proba(X)
e@0 1470
e@0 1471
e@0 1472 def estimate_pairwise_class_probability(self, i, j, x):
e@0 1473
e@0 1474 # print self.classifiers.keys()
e@0 1475
e@0 1476 if (i,j) not in self.classifiers and (j,i) in self.classifiers:
e@0 1477 return self.classifiers[(j,i)].predict_proba(x)[0,1]
e@0 1478 elif (i,j) not in self.classifiers and (j,i) not in self.classifiers:
e@0 1479 return 0.0
e@0 1480 else:
e@0 1481 return self.classifiers[(i,j)].predict_proba(x)[0,0]
e@0 1482
e@0 1483 def estimate_posterior_probability(self, i, x):
e@0 1484 mus = zeros((self.n_classes,))
e@0 1485 for j in range(0, self.n_classes):
e@0 1486 if i != j:
e@0 1487 pcp = self.estimate_pairwise_class_probability(i,j,x)
e@0 1488 pcp += 1e-18
e@0 1489 mus[j] = 1/pcp
e@0 1490 # print mus
e@0 1491 S = sum(mus) - (self.n_classes - 2)
e@0 1492 # print S
e@0 1493 return 1/S
e@0 1494
e@0 1495 def estimate_posterior_probability_vector(self, x):
e@0 1496 posterior = zeros((self.n_classes,))
e@0 1497 for i in range(0, len(posterior)):
e@0 1498 posterior[i] = self.estimate_posterior_probability(i, x)
e@0 1499
e@0 1500 return posterior
e@0 1501
e@0 1502 # def estimate_emission_probability(self, i, x):
e@0 1503 # post = self.estimate_posterior_probability_vector(x)
e@0 1504 # # print "post: ", post
e@0 1505 # prior = self.prior
e@0 1506 # # print "prior: ", prior
e@0 1507 # p = post/prior
e@0 1508 # p = p/sum(p)
e@0 1509 #
e@0 1510 # return p[i]
e@0 1511
e@0 1512 # def estimate_emissmat(self, X):
e@0 1513 # emissmat = zeros((X.shape[0], self.n_classes))
e@0 1514 # for i in range(0, X.shape[0]):
e@0 1515 # for j in range(0, self.n_classes):
e@0 1516 # emissmat[i,j] = self.estimate_emission_probability(j, X[i,:])
e@0 1517 #
e@0 1518 # return emissmat
e@0 1519
e@0 1520 def predict(self, X):
e@0 1521 predicted = zeros((X.shape[0],))
e@0 1522
e@0 1523 if self.only_one_class == True:
e@0 1524 return ones((X.shape[0],))*self.one_class_label
e@0 1525 elif self.only_two_classes == True:
e@0 1526 return self.svm.predict(X)
e@0 1527
e@0 1528
e@0 1529 for i in range(0, X.shape[0]):
e@0 1530 x = X[i,:]
e@0 1531 P = zeros((self.n_classes,))
e@0 1532
e@0 1533
e@0 1534 for c in range(0, len(P)):
e@0 1535 P[c] = self.estimate_posterior_probability(c, x)
e@0 1536
e@0 1537 pred = argmax(P)
e@0 1538 predicted[i] = pred
e@0 1539
e@0 1540 return predicted
e@0 1541
e@0 1542 def logsum(X):
e@0 1543 if len(X) == 1:
e@0 1544 return log(X[0])
e@0 1545 else:
e@0 1546 return logaddexp(log(X[0]),logsum(X[1:]))
e@0 1547
e@0 1548
e@0 1549
e@0 1550
e@0 1551 def smooth_matrix_1D(X):
e@0 1552 window = scipy.signal.gaussian(51,4)
e@0 1553 window = window/sum(window)
e@0 1554 intermx = zeros((X.shape[0],X.shape[1]+100))
e@0 1555 intermx[:, 50:-50] = X
e@0 1556
e@0 1557 for m in range(0, X.shape[0]):
e@0 1558 # print intermx.shape
e@0 1559 intermx[m,:] = convolve(ravel(intermx[m,:]), window,'same')
e@0 1560
e@0 1561 return intermx[:,50:-50]
e@0 1562
e@0 1563 def adm_reconstruct(codeword, h, dmin=.01, dmax=.28):
e@0 1564 x = zeros((1, codeword.shape[1]))
e@0 1565
e@0 1566 delta1 = dmin
e@0 1567 delta2 = dmin
e@0 1568 Sum = h
e@0 1569
e@0 1570 x[0] = h
e@0 1571 for i in range(0, codeword.shape[1]):
e@0 1572 if codeword[0,i] == 0:
e@0 1573 delta1 = dmin
e@0 1574 delta2 = dmin
e@0 1575
e@0 1576 elif codeword[0,i] == 1:
e@0 1577 delta2 = dmin
e@0 1578 Sum += delta1
e@0 1579 delta1 *= 2
e@0 1580 if delta1 > dmax:
e@0 1581 delta1 = dmax
e@0 1582
e@0 1583 elif codeword[0,i] == -1:
e@0 1584 delta1 = dmin
e@0 1585 Sum -= delta2
e@0 1586 delta2 *= 2
e@0 1587 if delta2 > dmax:
e@0 1588 delta2 = dmax
e@0 1589 x[0,i] = Sum
e@0 1590 return x
e@0 1591
e@0 1592 def adm(x, dmin=.01, dmax=.28, tol=0.0001):
e@0 1593
e@0 1594 # Adaptive delta modulation adapted by code:
e@0 1595 # (adeltamod.m)
e@0 1596 #
e@0 1597 # Adaptive Delta Modulator
e@0 1598 # by Gandhar Desai (gdesai)
e@0 1599 # BITS Pilani Goa Campus
e@0 1600 # Date: 28 Sept, 2013
e@0 1601
e@0 1602 xsig = x
e@0 1603
e@0 1604 Lx = len(x)
e@0 1605
e@0 1606 ADMout = zeros((1, Lx))
e@0 1607 codevec = zeros((1, Lx))
e@0 1608
e@0 1609
e@0 1610 Sum = x[0]
e@0 1611 delta1 = dmin
e@0 1612 delta2 = dmin
e@0 1613 mult1 = 2
e@0 1614 mult2 = 2
e@0 1615 for i in range(0, Lx):
e@0 1616 #print abs(xsig[i] - Sum)
e@0 1617 if (abs(xsig[i] - Sum) < tol):
e@0 1618 bit = 0
e@0 1619 delta2 = dmin
e@0 1620 delta1 = dmin
e@0 1621
e@0 1622
e@0 1623 elif (xsig[i] >= Sum):
e@0 1624 bit = 1
e@0 1625 delta2 = dmin
e@0 1626 Sum += delta1
e@0 1627 delta1 *= mult1
e@0 1628 if delta1 > dmax:
e@0 1629 delta1 = dmax
e@0 1630
e@0 1631
e@0 1632 else:
e@0 1633 bit = -1
e@0 1634 delta1 = dmin
e@0 1635 Sum -= delta2
e@0 1636 delta2 *= mult2
e@0 1637 if delta2 > dmax:
e@0 1638 delta2 = dmax
e@0 1639
e@0 1640
e@0 1641
e@0 1642 ADMout[0, i] = Sum
e@0 1643 codevec[0, i]= bit
e@0 1644
e@0 1645 return ADMout,codevec, x[0]
e@0 1646
e@0 1647 def median_filter(v, K):
e@0 1648 v2 = zeros((len(v),))
e@0 1649 for i in range(K, len(v)):
e@0 1650 seq = v[i-K:i]
e@0 1651 m = median(seq)
e@0 1652 v2[i-K:i] = m
e@0 1653
e@0 1654 return v2
e@0 1655
e@0 1656
e@0 1657
e@0 1658
e@0 1659 from models import ReverbModel
e@0 1660
e@0 1661
e@0 1662
e@0 1663 if __name__=="__main__":
e@0 1664 if len(argv) != 2:
e@0 1665 print "[EE] Wrong number of arguments"
e@0 1666 print "[II] Correct syntax is:"
e@0 1667 print "[II] \t%s <trainingdir>"
e@0 1668 print "[II] where <trainingdir> is the directory containing the features files in the format *_features.yaml"
e@0 1669 print "[II] and the parameters in the format *_parameters.yaml"
e@0 1670 exit(-1)
e@0 1671
e@0 1672
e@0 1673 n_clusters = 3
e@0 1674 UpsamplingFactor = 1
e@0 1675 dmin = 0.001
e@0 1676 dmax = 0.28
e@0 1677 tol = 0.001
e@0 1678
e@0 1679 d1min = 0.01
e@0 1680 d1max = 0.1
e@0 1681
e@0 1682 g1min = 0.01
e@0 1683 g1max = 0.99
e@0 1684
e@0 1685 damin = 0.006
e@0 1686 damax = 0.012
e@0 1687
e@0 1688 Gmin = 0.01
e@0 1689 Gmax = 0.99
e@0 1690
e@0 1691 gcmin = 0.01
e@0 1692 gcmax = 0.99
e@0 1693
e@0 1694 # For searching the directory
e@0 1695 from glob import glob
e@0 1696
e@0 1697 traindir = argv[1]
e@0 1698
e@0 1699 songs_in_dir = glob("%s/*_features.yaml" % traindir)
e@0 1700
e@0 1701 print "[II] Using files: %s" % songs_in_dir
e@0 1702
e@0 1703
e@0 1704 chain_length=1
e@0 1705
e@0 1706
e@0 1707 # asdsd
e@0 1708
e@0 1709 # fullfeatures_pool = Pool()
e@0 1710
e@0 1711 features_vector = None
e@0 1712 parameters_vector = None
e@0 1713 index_vector = None
e@0 1714
e@0 1715 idx = 0
e@0 1716 # Shuffle segments
e@0 1717 random.shuffle(songs_in_dir)
e@0 1718 for f_ in songs_in_dir:
e@0 1719 infile = f_
e@0 1720 paramfile = "%s_parameters.yaml" % f_.split('_features.yaml')[0]
e@0 1721
e@0 1722 print "[II] Using features file: %s" % infile
e@0 1723 print "[II] and parameters file: %s" % paramfile
e@0 1724
e@0 1725
e@0 1726 # paramfile = infile.split(')
e@0 1727
e@0 1728 features_pool = YamlInput(filename = infile)()
e@0 1729 parameters_pool = YamlInput(filename = paramfile)()
e@0 1730
e@0 1731 d1 = parameters_pool['parameters.d1']
e@0 1732 da = parameters_pool['parameters.da']
e@0 1733 g1 = parameters_pool['parameters.g1']
e@0 1734 gc = parameters_pool['parameters.gc']
e@0 1735 G = parameters_pool['parameters.G']
e@0 1736
e@0 1737
e@0 1738
e@0 1739
e@0 1740
e@0 1741 feature_captions = features_pool.descriptorNames()
e@0 1742 parameter_captions = ['d1','da','g1','gc','G']
e@0 1743
e@0 1744
e@0 1745 svmhmmstr = ""
e@0 1746
e@0 1747
e@0 1748 for c in features_pool.descriptorNames():
e@0 1749 if c.split('.')[0] == 'metadata' or c.split('.')[0] == 'parameter':
e@0 1750 feature_captions.remove(c)
e@0 1751
e@0 1752
e@0 1753 # close('all')
e@0 1754
e@0 1755 # print "[II] Loaded training data from %s (%s) " % (infile, features_pool['metadata.date'][0])
e@0 1756 print "[II] %d Features Available: " % len(feature_captions)
e@0 1757
e@0 1758
e@0 1759
e@0 1760 print str(feature_captions).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7]
e@0 1761
e@0 1762 nfeatures_in = len(feature_captions)
e@0 1763 nparameters_in = 5
e@0 1764 features_vector_one = zeros((nfeatures_in, len(features_pool[feature_captions[0]])))
e@0 1765 index_vector_one = ones((len(features_pool[feature_captions[0]]),))
e@0 1766 parameters_vector_one = ones((nparameters_in, len(features_pool[feature_captions[0]])))
e@0 1767 # paramet
e@0 1768
e@0 1769
e@0 1770 for i in range(0, nfeatures_in):
e@0 1771 features_vector_one[i, :] = features_pool[feature_captions[i]].T
e@0 1772 # for i in range(0, nparameters_in):
e@0 1773 # parameters_vector[i, :] = features_pool[parameter_captions[0]].T
e@0 1774
e@0 1775 parameters_vector_one[0, :] = d1*parameters_vector_one[0,:]
e@0 1776 parameters_vector_one[1, :] = g1*parameters_vector_one[1,:]
e@0 1777 parameters_vector_one[2, :] = da*parameters_vector_one[2,:]
e@0 1778 parameters_vector_one[3, :] = gc*parameters_vector_one[3,:]
e@0 1779 parameters_vector_one[4, :] = G*parameters_vector_one[4,:]
e@0 1780
e@0 1781 # Stores example index number
e@0 1782
e@0 1783 index_vector_one *= idx
e@0 1784
e@0 1785
e@0 1786
e@0 1787
e@0 1788
e@0 1789
e@0 1790
e@0 1791
e@0 1792
e@0 1793 print "[II] %d parameters used:" % len(parameter_captions)
e@0 1794 print str(parameter_captions).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7].replace('parameter.','')
e@0 1795
e@0 1796 if features_vector is None:
e@0 1797 features_vector = features_vector_one
e@0 1798 else:
e@0 1799 features_vector = append(features_vector, features_vector_one, 1)
e@0 1800
e@0 1801 if parameters_vector is None:
e@0 1802 parameters_vector = parameters_vector_one
e@0 1803 else:
e@0 1804 parameters_vector = append(parameters_vector, parameters_vector_one, 1)
e@0 1805
e@0 1806 if index_vector is None:
e@0 1807 index_vector = index_vector_one
e@0 1808 else:
e@0 1809 index_vector = append(index_vector, index_vector_one)
e@0 1810
e@0 1811
e@0 1812 idx += 1
e@0 1813
e@0 1814
e@0 1815 print "[II] Marking silent parts"
e@0 1816 features_full_nontransformed_train = features_vector.copy()
e@0 1817 # silent_parts = zeros((1, len(features_pool[feature_captions[i]].T)))
e@0 1818
e@0 1819 silent_parts = zeros((1, features_vector.shape[1]))
e@0 1820
e@0 1821 rms = features_vector[feature_captions.index('rms'), :]
e@0 1822
e@0 1823 # Implementing Hysteresis Gate -- High threshold is halfway between
e@0 1824 # the mean and the max and Low is halfway between the mean dn the min
e@0 1825
e@0 1826 rms_threshold_mean = mean(rms)
e@0 1827
e@0 1828 rms_threshold_max = max(rms)
e@0 1829 rms_threshold_min = min(rms)
e@0 1830
e@0 1831 rms_threshold_high = 0.1 * rms_threshold_mean
e@0 1832 rms_threshold_low = 0.01 * rms_threshold_mean
e@0 1833
e@0 1834 for n in range(1, len(rms)):
e@0 1835 prev = rms[n-1]
e@0 1836 curr = rms[n]
e@0 1837
e@0 1838 if prev >= rms_threshold_high:
e@0 1839 if curr < rms_threshold_low:
e@0 1840 silent_parts[0,n] = 1
e@0 1841 else:
e@0 1842 silent_parts[0,n] = 0
e@0 1843 elif prev <= rms_threshold_low:
e@0 1844 if curr > rms_threshold_high:
e@0 1845 silent_parts[0,n] = 0
e@0 1846 else:
e@0 1847 silent_parts[0,n] = 1
e@0 1848 else:
e@0 1849 silent_parts[0,n] = silent_parts[0,n-1]
e@0 1850
e@0 1851
e@0 1852 if silent_parts[0,1] == 1:
e@0 1853 silent_parts[0, 0] = 1
e@0 1854
e@0 1855
e@0 1856
e@0 1857 active_index = invert(silent_parts.flatten().astype(bool))
e@0 1858
e@0 1859 # Keep only active parts
e@0 1860
e@0 1861
e@0 1862 # Uncomment this
e@0 1863 features_vector = features_vector[:, active_index]
e@0 1864
e@0 1865 # Keep this for debugging purposes
e@0 1866
e@0 1867
e@0 1868 parameters_vector = parameters_vector[:, active_index]
e@0 1869 # index_vector = index_vector[active_index]
e@0 1870
e@0 1871 # Label examples for a chain of chain_length
e@0 1872
e@0 1873 # idx = 0
e@0 1874 # for i in range(0, len(index_vector)):
e@0 1875 # index_vector[i] = idx
e@0 1876 # if i % chain_length == 0:
e@0 1877 # idx += 1
e@0 1878 # number_of_examples = idx
e@0 1879
e@0 1880
e@0 1881
e@0 1882 # Scale parameters to [0,1]
e@0 1883
e@0 1884
e@0 1885 parameters_vector[0,:] = (parameters_vector[0,:] - d1min)/d1max #d1
e@0 1886 parameters_vector[1,:] = (parameters_vector[1,:] - g1min)/g1max #g1
e@0 1887 parameters_vector[2,:] = (parameters_vector[2,:] - damin)/damax #g1
e@0 1888 parameters_vector[3,:] = (parameters_vector[3,:] - Gmin)/Gmax #G
e@0 1889 parameters_vector[4,:] = (parameters_vector[4,:] - gcmin)/gcmax #gc
e@0 1890
e@0 1891
e@0 1892
e@0 1893
e@0 1894
e@0 1895
e@0 1896
e@0 1897 moments_vector = zeros((features_vector.shape[0], 2))
e@0 1898
e@0 1899 features_vector_original = features_vector.copy()
e@0 1900
e@0 1901
e@0 1902
e@0 1903 print "[II] Storing moments vector"
e@0 1904 for i in range(0, features_vector.shape[0]):
e@0 1905 mean_ = mean(features_vector[i,:])
e@0 1906 std_ = std(features_vector[i,:], ddof=1)
e@0 1907 moments_vector[i,0] = mean_
e@0 1908 moments_vector[i,1] = std_
e@0 1909
e@0 1910 features_vector[i,:] = (features_vector[i,:] - mean_)/std_
e@0 1911
e@0 1912 features_vector_old_train = features_vector.copy()
e@0 1913 # moments_vector_train = moments_vector
e@0 1914
e@0 1915
e@0 1916 print "[II] Extracting PCA configuration "
e@0 1917
e@0 1918 kernel, q, featurelist = extract_pca_configuration_from_data(features_vector)
e@0 1919
e@0 1920
e@0 1921 #q = 1
e@0 1922
e@0 1923
e@0 1924 # q = q + 1
e@0 1925
e@0 1926 print "[II] Optimal number of PCs to keep: %d" % q
e@0 1927
e@0 1928
e@0 1929
e@0 1930
e@0 1931 feature_captions_array = array(feature_captions)
e@0 1932
e@0 1933 features_to_keep = list(feature_captions_array[featurelist])
e@0 1934
e@0 1935 print "[II] Decided to keep %d features:" % len(features_to_keep)
e@0 1936 print str(features_to_keep).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7]
e@0 1937
e@0 1938 featurelist_bak = featurelist
e@0 1939 features_kept_data = features_vector[featurelist,:]
e@0 1940 features_kept_data_bak = features_kept_data.copy()
e@0 1941 # features_vector_old_train = features_kept_data
e@0 1942 features_vector = (kernel.T*features_kept_data)[0:q,:]
e@0 1943 features_vector_new_train = features_vector.copy()
e@0 1944
e@0 1945
e@0 1946 # example_features = None
e@0 1947 # example_parameters = None
e@0 1948 # example_idx = None
e@0 1949 #
e@0 1950 # for idx in range(0, shape(parameters_vector)[1]-chain_length):
e@0 1951 # example_features_ = features_vector[:, idx:idx+chain_length]
e@0 1952 # # print example_features_
e@0 1953 # example_parameters_ = parameters_vector[:, idx:idx+chain_length]
e@0 1954 # example_idx_ = ones((shape(example_parameters_)[1],))
e@0 1955 #
e@0 1956 # if example_features is None:
e@0 1957 # example_features = example_features_
e@0 1958 # else:
e@0 1959 # #print "appending"
e@0 1960 # example_features = append(example_features, example_features_, 1)
e@0 1961 #
e@0 1962 # if example_parameters is None:
e@0 1963 # example_parameters = example_parameters_
e@0 1964 # else:
e@0 1965 # example_parameters = append(example_parameters, example_parameters_, 1)
e@0 1966 #
e@0 1967 # if example_idx is None:
e@0 1968 # example_idx = example_idx_*idx
e@0 1969 # else:
e@0 1970 # example_idx = append(example_idx, example_idx_*idx, 1)
e@0 1971 #
e@0 1972 #
e@0 1973 #
e@0 1974 #
e@0 1975 # features_vector = example_features
e@0 1976 # parameters_vector = example_parameters
e@0 1977 # index_vector = example_idx
e@0 1978
e@0 1979 print "[II] Trying ADM-coded parameters"
e@0 1980 print "[II] Upsampling features and parameters by a factor of %d" % UpsamplingFactor
e@0 1981
e@0 1982
e@0 1983 # Upsampled features and parameters
e@0 1984 features_vector_upsampled = smooth_matrix_1D(repeat(features_vector,UpsamplingFactor, axis=1))
e@0 1985 # features_vector_upsampled = repeat(features_vector, UpsamplingFactor, axis=1)
e@0 1986
e@0 1987
e@0 1988 # features_vector_upsampled = smooth_matrix_1D(repeat(features_vector_old_train,UpsamplingFactor, axis=1))
e@0 1989
e@0 1990 # feature_labels_upsampled = repeat(features_clustering_labels,UpsamplingFactor, axis=0)
e@0 1991 parameters_vector_upsampled = repeat(parameters_vector,UpsamplingFactor, axis=1)
e@0 1992
e@0 1993
e@0 1994 # km = KMeans(init='k-means++', n_init=10, max_iter=300, tol=0.0000001, verbose = 0, n_clusters=12).fit(parameters_vector_upsampled.T)
e@0 1995 # centers_ = km.cluster_centers_
e@0 1996 # labels__ = km.labels_
e@0 1997 # Remove the following two lines in order to restore non-kmeans version
e@0 1998 # parameters_vector_kmeans_upsampled = array(centers_[labels__])
e@0 1999 #
e@0 2000 # parameters_vector_upsampled = parameters_vector_kmeans_upsampled.T
e@0 2001 #
e@0 2002 #
e@0 2003 # parameters_vector_upsampled = smooth_matrix_1D(parameters_vector_upsampled)
e@0 2004
e@0 2005 parameters_vector_upsampled_adm = matrix(zeros(shape(parameters_vector_upsampled)))
e@0 2006 parameters_vector_upsampled_code = matrix(zeros(shape(parameters_vector_upsampled)))
e@0 2007 parameters_vector_upsampled_firstval = matrix(zeros((parameters_vector_upsampled.shape[0],1)))
e@0 2008
e@0 2009 # Reconstructed parameters
e@0 2010
e@0 2011
e@0 2012
e@0 2013 parameters_vector_upsampled_reconstructed = matrix(zeros(shape(parameters_vector_upsampled)))
e@0 2014
e@0 2015
e@0 2016
e@0 2017 # ADM HERE
e@0 2018 #
e@0 2019
e@0 2020 def adm_matrix(X, dmin=0.001,dmax=0.28,tol=0.001):
e@0 2021
e@0 2022 out = matrix(zeros(shape(X)))
e@0 2023 code = matrix(zeros(shape(X)))
e@0 2024 firstval = matrix(zeros((X.shape[0], 1)))
e@0 2025
e@0 2026 for i in range(0, X.shape[0]):
e@0 2027 out[i,:], code[i,:], firstval[i,0] = adm(X[i,:],dmin=dmin,dmax=dmax,tol=tol)
e@0 2028
e@0 2029 return out,code,firstval
e@0 2030
e@0 2031 # parameters_vector_upsampled_reconstructed[i,:] = adm_reconstruct(parameters_vector_upsampled_code[i,:],parameters_vector_upsampled_firstval[i,0], dmin=dmin,dmax=dmax)
e@0 2032
e@0 2033 def adm_matrix_reconstruct(code, firstval, dmin=0.001, dmax=0.28):
e@0 2034 X = matrix(zeros(shape(code)))
e@0 2035 for i in range(0, code.shape[0]):
e@0 2036 X[i,:] = adm_reconstruct(code[i,:], firstval[i,0], dmin=dmin, dmax=dmax)
e@0 2037
e@0 2038 return X
e@0 2039
e@0 2040
e@0 2041 parameters_vector_upsampled_adm, parameters_vector_upsampled_code, parameters_vector_upsampled_firstval = adm_matrix(parameters_vector_upsampled, dmin, dmax, tol)
e@0 2042 # parameters_vector_upsampled = parameters_vector_upsampled_code
e@0 2043
e@0 2044
e@0 2045 def diff_and_pad(X):
e@0 2046 return concatenate((
e@0 2047 zeros((
e@0 2048 shape(X)[0],
e@0 2049 1
e@0 2050 )),
e@0 2051 diff(X, axis=1)),
e@0 2052 axis=1)
e@0 2053
e@0 2054
e@0 2055 # Change for adm,
e@0 2056
e@0 2057 # parameters_vector_upsampled = parameters_vector_upsampled_code
e@0 2058 print "[II] Clustering features."
e@0 2059 #
e@0 2060 features_clustering = GMM(n_components = n_clusters, covariance_type='diag')
e@0 2061 #
e@0 2062 features_clustering.fit( features_vector_upsampled.T, y=parameters_vector_upsampled_code)
e@0 2063 #
e@0 2064 features_clustering_means = features_clustering.means_
e@0 2065 features_clustering_labels = features_clustering.predict(features_vector_upsampled.T)
e@0 2066 features_clustering_sigmas = features_clustering.covars_
e@0 2067 #
e@0 2068 features_vector_upsampled_estimated = zeros(shape(features_vector_upsampled))
e@0 2069 #
e@0 2070 #
e@0 2071 for n in range(0, len(features_vector_upsampled_estimated[0])):
e@0 2072 features_vector_upsampled_estimated[:,n] = features_clustering_means[features_clustering_labels[n]]
e@0 2073 #
e@0 2074 #
e@0 2075 print "[II] Features MSE for %d clusters: %.3f" % (n_clusters, mse(features_vector_upsampled, features_vector_upsampled_estimated))
e@0 2076
e@0 2077
e@0 2078
e@0 2079 def happy_curve_classification(data, classes, estimator, Nd=1):
e@0 2080 print "[II] Generating Happy Curve "
e@0 2081 from copy import deepcopy
e@0 2082 estimator_fulldata = deepcopy(estimator)
e@0 2083 estimator_fulldata.fit(data, classes)
e@0 2084 labels = estimator.predict(data)
e@0 2085
e@0 2086 # 1. Split data in two, training and testing
e@0 2087
e@0 2088 Ntr = int(round(data.shape[0]/2)) # Training dataset size
e@0 2089 Nts = data.shape[0] - Ntr # Testing dataset size
e@0 2090
e@0 2091 ratios = []
e@0 2092
e@0 2093 for n in range(Nd, Ntr):
e@0 2094 train, test, trainl, testl = cross_validation.train_test_split(data, classes, test_size = 0.5, random_state = 0)
e@0 2095 train = train[0:n,:]
e@0 2096 trainl = trainl[0:n]
e@0 2097 # print "trainl"
e@0 2098 # print trainl
e@0 2099 estimator_ = deepcopy(estimator)
e@0 2100 estimator_.fit(train,trainl)
e@0 2101 labels = estimator_.predict(test)
e@0 2102
e@0 2103 ratio = sum(array(testl==labels).astype(float))/len(labels)
e@0 2104
e@0 2105 ratios.append(ratio)
e@0 2106
e@0 2107
e@0 2108 return ratios
e@0 2109
e@0 2110
e@0 2111 def cross_validate_classification(data, classes, estimator):
e@0 2112 print "[II] Crossvalidating... "
e@0 2113 from copy import deepcopy
e@0 2114 estimator_fulldata = deepcopy(estimator)
e@0 2115 estimator_fulldata.fit(data, classes)
e@0 2116
e@0 2117 percents = arange(0.1,0.9,0.1)
e@0 2118 MSEs = []
e@0 2119 labels = estimator.predict(data)
e@0 2120
e@0 2121 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels))
e@0 2122
e@0 2123 for p in percents:
e@0 2124 train,test,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=0)
e@0 2125 estimator_ = deepcopy(estimator)
e@0 2126 estimator_.fit(train, trainlabels)
e@0 2127 labels = estimator_.predict(test)
e@0 2128 print "[II] for training(%.2f)-testing(%.2f): %.2f" % ((1-p),p,sum(array(testlabels==labels).astype(float))/len(labels))
e@0 2129
e@0 2130 return MSEs
e@0 2131
e@0 2132 def cross_validate_clustering(data, estimator):
e@0 2133 print "[II] Crossvalidating... "
e@0 2134 estimator_fulldata = estimator
e@0 2135 estimator_fulldata.fit(data)
e@0 2136
e@0 2137 # labels = estimator_fulldata.predict(data)
e@0 2138 means = estimator_fulldata.means_
e@0 2139 # print means
e@0 2140
e@0 2141 percents = arange(0.1,0.6,0.1)
e@0 2142 MSEs = []
e@0 2143 reconstructed = zeros(shape(data))
e@0 2144 labels = estimator.predict(data)
e@0 2145 for n in range(0, len(reconstructed)):
e@0 2146 reconstructed[n,:] = means[labels[n]]
e@0 2147
e@0 2148 MSEs.append(mse(data,reconstructed))
e@0 2149 for p in percents:
e@0 2150 train,test = cross_validation.train_test_split(data,test_size=p,random_state=0)
e@0 2151 train = matrix(train)
e@0 2152 test = matrix(test)
e@0 2153
e@0 2154 estimator.fit(train)
e@0 2155 means = estimator.means_
e@0 2156 labels = estimator.predict(test)
e@0 2157 reconstructed = zeros(shape(test))
e@0 2158 for n in range(0, len(reconstructed)):
e@0 2159 reconstructed[n,:] = means[labels[n]]
e@0 2160
e@0 2161 m = mse(test,reconstructed)
e@0 2162
e@0 2163 print "[II] MSE for clustering crossvalidated data %.2f-%.2f: %.5f" % ((1-p), p, m)
e@0 2164 MSEs.append(m)
e@0 2165
e@0 2166 print "[II] Crossvalidation complete"
e@0 2167
e@0 2168 return MSEs
e@0 2169
e@0 2170
e@0 2171
e@0 2172
e@0 2173 # Construct parameters alphabet, each symbol is going to be a different column vector
e@0 2174 # in parameter code matrix
e@0 2175
e@0 2176
e@0 2177 def vector_to_states(X):
e@0 2178 """
e@0 2179 Input: a vector MxN with N samples and M variables
e@0 2180 Output: a codeword dictionary `parameters_alphabet',
e@0 2181 state_seq, inverse `parameters_alphabet_inv' """
e@0 2182
e@0 2183
e@0 2184 parameters_alphabet = {}
e@0 2185 n = 0
e@0 2186
e@0 2187 for i in range(0, X.shape[1]):
e@0 2188 vec = tuple(ravel(X[:,i]))
e@0 2189 if vec not in parameters_alphabet:
e@0 2190 parameters_alphabet[vec] = n
e@0 2191 n += 1
e@0 2192
e@0 2193 parameters_alphabet_inv = dict([(parameters_alphabet[m],m) for m in parameters_alphabet])
e@0 2194
e@0 2195 state_seq = array([parameters_alphabet[tuple(ravel(X[:,m]))] for m in range(0, parameters_vector_upsampled_code.shape[1])] )
e@0 2196
e@0 2197 return state_seq, parameters_alphabet, parameters_alphabet_inv
e@0 2198
e@0 2199
e@0 2200 def states_to_vector(predicted, parameters_alphabet_inv):
e@0 2201 estimated = matrix(zeros((len(parameters_alphabet_inv[0]), len(predicted))))
e@0 2202 for i in range(0, len(predicted)):
e@0 2203 # print "[II] Predicted: ", predicted[i]
e@0 2204 estimated[:, i] = matrix(parameters_alphabet_inv[predicted[i]]).T
e@0 2205
e@0 2206 return estimated
e@0 2207
e@0 2208 # state_seq, parameters_alphabet, parameters_alphabet_inv = vector_to_states(parameters_vector_upsampled_code)
e@0 2209
e@0 2210
e@0 2211 # parameters_change_variable = matrix(diff_and_pad(parameters_vector_upsampled)!=0).astype(int)
e@0 2212
e@0 2213 # changes_state_seq, changes_parameters_alphabet, changes_parameters_alphabet_inv = vector_to_states(parameters_change_variable)
e@0 2214
e@0 2215
e@0 2216 # This is an hmm that just codes the changes"
e@0 2217 # We have only two states, change and stay the same.
e@0 2218
e@0 2219 # Uncomment that here
e@0 2220
e@0 2221 # parameters_vector_upsampled = parameters_vector_upsampled_code
e@0 2222 # parameters_state_orig, parameter_state_alphabet_orig, parameter_state_alphabet_inv_orig = vector_to_states(parameters_vector)
e@0 2223 # TODO: HA
e@0 2224 # parmaeters_vector_upsampled = parameters_vector_upsampled_code
e@0 2225 parameters_state, parameter_state_alphabet, parameter_state_alphabet_inv = vector_to_states(parameters_vector_upsampled)
e@0 2226
e@0 2227
e@0 2228 # Remove outliers
e@0 2229
e@0 2230 ee = EllipticEnvelope()
e@0 2231
e@0 2232 # inliers = ee.fit(features_vector_upsampled.T).predict(features_vector_upsampled.T) == 1
e@0 2233
e@0 2234 inoutliers = zeros((len(parameters_state),))
e@0 2235 #
e@0 2236 for p in unique(parameters_state):
e@0 2237 ee = EllipticEnvelope(contamination=0.1)
e@0 2238 inoutliers[parameters_state == p] = ee.fit(features_vector_upsampled[:, parameters_state == p].T).predict(features_vector_upsampled[:, parameters_state == p].T)
e@0 2239 #
e@0 2240 #
e@0 2241 # Label inliers and outliers
e@0 2242
e@0 2243 inliers = inoutliers > 0
e@0 2244 outliers = inoutliers < 0
e@0 2245 #
e@0 2246 #
e@0 2247 #
e@0 2248
e@0 2249 # Do pre-processing and re-label the outliers
e@0 2250
e@0 2251 #completeavac = SinkHoleClassifier()
e@0 2252 #completeavac.fit(features_vector_upsampled[:, inliers].T, parameters_state[inliers])
e@0 2253
e@0 2254 #parameters_state_original = parameters_state.copy()
e@0 2255 #parameters_state[outliers] = completeavac.predict(features_vector_upsampled[:,outliers].T)
e@0 2256
e@0 2257
e@0 2258
e@0 2259
e@0 2260
e@0 2261 # features_vector_upsampled = features_vector_upsampled[:, inliers]
e@0 2262 # parameters_state = parameters_state[inliers]
e@0 2263
e@0 2264 # print "UNIQUE PARAMETERS STATE"
e@0 2265 # print unique(parameters_state)
e@0 2266 # asdasdasd
e@0 2267 print "[II] Testing Gaussian Naive Bayes Classifier"
e@0 2268 gnb = GaussianNB()
e@0 2269 gnb.fit(features_vector_upsampled.T, parameters_state)
e@0 2270
e@0 2271
e@0 2272
e@0 2273 parameters_state_estimated = gnb.predict(features_vector_upsampled.T)
e@0 2274
e@0 2275 output = states_to_vector(parameters_state_estimated, parameter_state_alphabet_inv)
e@0 2276
e@0 2277 figure()
e@0 2278 subplot(211)
e@0 2279 plot(parameters_vector_upsampled.T)
e@0 2280 title('Parameter value')# % UpsamplingFactor)
e@0 2281 ylabel('value')
e@0 2282 xlabel('frame \#')
e@0 2283 subplot(212)
e@0 2284 #plot(smooth_matrix_1D(output).T)
e@0 2285 plot(output.T)
e@0 2286 ylabel('value')
e@0 2287 xlabel('frame \#')
e@0 2288
e@0 2289 errorvector_nbc = [mse(output[:,i], parameters_vector_upsampled[:,i]) for i in range(0, len(parameters_state_estimated))]
e@0 2290
e@0 2291
e@0 2292
e@0 2293
e@0 2294 #cross_validate_classification(features_vector_upsampled.T, parameters_state, gnb)
e@0 2295 # hc = happy_curve_classification(features_vector_upsampled.T, parameters_state, gnb)
e@0 2296
e@0 2297 #
e@0 2298 # figure()
e@0 2299 # plot(hc)
e@0 2300 # figure()
e@0 2301
e@0 2302 print "[II] Trying Multinomial HMM"
e@0 2303
e@0 2304 # In order to do classification with HMMs, we need to:
e@0 2305 # 1. Split the parameters into classes
e@0 2306 # 2. Train one model per class
e@0 2307 # 3. Feed our data to all the models
e@0 2308 # 4. Check which has a better score and assig,n to M
e@0 2309 class SVMClassifier:
e@0 2310 def __init__(self,**kwargs):
e@0 2311 print "[II] SVM Classifier "
e@0 2312 # self.clf = svm.SVC(**kwargs)
e@0 2313 self.name = "SVM"
e@0 2314 self.smallname = "svm"
e@0 2315 self.clf = multiclass.OneVsOneClassifier(svm.SVC(**kwargs) )
e@0 2316
e@0 2317 def fit(self, X, classes):
e@0 2318 n_classes = max(unique(classes))+1
e@0 2319
e@0 2320 self.clf.fit(X,classes)
e@0 2321
e@0 2322 def predict(self, X):
e@0 2323 return self.clf.predict(X)
e@0 2324
e@0 2325 def getName(self):
e@0 2326 return self.name
e@0 2327
e@0 2328 def cross_validate(self, data, classes):
e@0 2329 print "[II] SVN Classifier Crossvalidating... "
e@0 2330 from copy import deepcopy
e@0 2331 estimator = deepcopy(self)
e@0 2332 estimator_fulldata = deepcopy(self)
e@0 2333 estimator_fulldata.fit(data, classes)
e@0 2334
e@0 2335 percents = arange(0.1,0.9,0.1)
e@0 2336 self.percents = percents * 100
e@0 2337
e@0 2338 RATIOS = []
e@0 2339 labels = estimator.predict(data)
e@0 2340
e@0 2341
e@0 2342 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels))
e@0 2343
e@0 2344 for p in percents:
e@0 2345 traindata,testdata,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=1)
e@0 2346
e@0 2347
e@0 2348 # print "[II] [%.2f-%.2f] Examples used for training: %s" % (p, (1-p), str(traindata))
e@0 2349 # print "[II] [%.2f-%.2f] Examples used for testing: %s" % (p, (1-p), str(testdata))
e@0 2350 # Training phase
e@0 2351
e@0 2352
e@0 2353
e@0 2354 estimator_ = deepcopy(estimator)
e@0 2355 estimator_.fit(traindata, trainlabels)
e@0 2356
e@0 2357 predicted_labels = estimator_.predict(testdata)
e@0 2358
e@0 2359 m = sum(array(predicted_labels==testlabels).astype(float))/len(testlabels)
e@0 2360
e@0 2361 print "[II] [%.2f-%.2f] 5-fold cross-validation for SVN Classifier: %.1f" % (p, (1-p), m)
e@0 2362
e@0 2363 RATIOS.append(m)
e@0 2364
e@0 2365 return RATIOS,percents
e@0 2366
e@0 2367
e@0 2368
e@0 2369
e@0 2370
e@0 2371
e@0 2372
e@0 2373 class HmmClassifier2:
e@0 2374 def __init__(self, N=2, chain_size=4, n_components = 1):
e@0 2375 print "[II] HMM Classifier (%d states, %d components)" % (N, n_components)
e@0 2376 self.name = "HMM (%d states, %d components)" % (N, n_components)
e@0 2377 self.n_components = n_components
e@0 2378 self.chain_size = chain_size
e@0 2379 self.hmms_ = []
e@0 2380 self.N = N
e@0 2381 self.n_states = N
e@0 2382
e@0 2383 def getName(self):
e@0 2384 return self.name
e@0 2385
e@0 2386 def fit(self, features, parameters):
e@0 2387 self.n_params = len(unique(parameters))
e@0 2388
e@0 2389
e@0 2390
e@0 2391 for n in range(0, self.n_params):
e@0 2392 hmm_ = hmm.GaussianHMM(n_components = self.n_components, covariance_type='full')
e@0 2393
e@0 2394 # Get training data for each parameter
e@0 2395
e@0 2396 for i in range(0, len(parameters)-self.chain_size):
e@0 2397 seq = features[i:i+self.chain_size, :]
e@0 2398 if parameters[i+self.chain_size-1] == n and seq.shape[0] == self.chain_size:
e@0 2399 # print "[II] Fitting: %s" % str(seq)
e@0 2400
e@0 2401 hmm_.fit([seq])
e@0 2402
e@0 2403 self.hmms_.append(hmm_)
e@0 2404
e@0 2405 def predict(self, X):
e@0 2406 labels = zeros((X.shape[0],))
e@0 2407 N = self.N
e@0 2408
e@0 2409 scores = zeros((self.n_states, ))
e@0 2410
e@0 2411 for i in range(0, len(labels)):
e@0 2412 testdata = X[i:i+self.chain_size,:]
e@0 2413
e@0 2414 for j in range(0, self.n_states):
e@0 2415 scores[j] = self.hmms_[j].score(testdata)
e@0 2416 labels[i] = argmax(scores)
e@0 2417
e@0 2418 return labels
e@0 2419
e@0 2420 def cross_validate(self, data, classes):
e@0 2421 print "[II] SVN Classifier Crossvalidating... "
e@0 2422 from copy import deepcopy
e@0 2423 estimator = deepcopy(self)
e@0 2424 estimator_fulldata = deepcopy(self)
e@0 2425 estimator_fulldata.fit(data, classes)
e@0 2426
e@0 2427 percents = arange(0.1,0.9,0.1)
e@0 2428 self.percents = percents * 100
e@0 2429
e@0 2430 RATIOS = []
e@0 2431 labels = estimator.predict(data)
e@0 2432
e@0 2433
e@0 2434 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels))
e@0 2435
e@0 2436 for p in percents:
e@0 2437 traindata,testdata,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=1)
e@0 2438
e@0 2439
e@0 2440 # print "[II] [%.2f-%.2f] Examples used for training: %s" % (p, (1-p), str(traindata))
e@0 2441 # print "[II] [%.2f-%.2f] Examples used for testing: %s" % (p, (1-p), str(testdata))
e@0 2442
e@0 2443 # Training phase
e@0 2444
e@0 2445
e@0 2446
e@0 2447 estimator_ = deepcopy(estimator)
e@0 2448 estimator_.fit(traindata, trainlabels)
e@0 2449
e@0 2450 predicted_labels = estimator_.predict(testdata)
e@0 2451
e@0 2452 m = sum(array(predicted_labels==testlabels).astype(float))/len(testlabels)
e@0 2453
e@0 2454 print "[II] [%.2f-%.2f] 5-fold cross-validation for HMM2 Classifier: %.1f" % (p, (1-p), m)
e@0 2455
e@0 2456 RATIOS.append(m)
e@0 2457
e@0 2458 return RATIOS,percents
e@0 2459
e@0 2460
e@0 2461
e@0 2462
e@0 2463 class HmmClassifier:
e@0 2464 def __init__(self, N=2, n_components = 1):
e@0 2465 print "[II] HMM Classifier (%d states, %d components)" % (N, n_components)
e@0 2466 self.name = "HMM (%d states, %d components)" % (N, n_components)
e@0 2467 self.n_components = n_components
e@0 2468 self.chain_size = N
e@0 2469 self.hmms_ = []
e@0 2470 self.N = N
e@0 2471
e@0 2472 def getName(self):
e@0 2473 return self.name
e@0 2474
e@0 2475 def fit(self, X, states):
e@0 2476 self.n_states = len(unique(states))
e@0 2477
e@0 2478 for n in range(0, self.n_states):
e@0 2479 hmm_ = hmm.GaussianHMM(n_components = self.n_components, covariance_type = 'full')
e@0 2480
e@0 2481 # Get training data for each class
e@0 2482 vector = X[states == n,:]
e@0 2483
e@0 2484 # Fit the HMM
e@0 2485 # print vector
e@0 2486 hmm_.fit([vector])
e@0 2487
e@0 2488 # And append to the list
e@0 2489 self.hmms_.append(hmm_)
e@0 2490
e@0 2491 def predict(self,X):
e@0 2492 labels = zeros((X.shape[0],))
e@0 2493 N = self.N
e@0 2494
e@0 2495 m = 0
e@0 2496
e@0 2497 scores = zeros((1, self.n_states))
e@0 2498
e@0 2499
e@0 2500 while m*N < X.shape[0]:
e@0 2501 if (m+1)*N > X.shape[0]:
e@0 2502 testdata = X[m*N:,:]
e@0 2503 else:
e@0 2504 testdata = X[m*N:(m+1)*N,:]
e@0 2505
e@0 2506 # print testdata
e@0 2507
e@0 2508 for i in range(0, self.n_states):
e@0 2509 scores[0,i] = self.hmms_[i].score(testdata)
e@0 2510
e@0 2511 if (m+1)*N > X.shape[0]:
e@0 2512 labels[m*N:] = argmax(scores)
e@0 2513 else:
e@0 2514 labels[m*N:(m+1)*N] = argmax(scores)
e@0 2515
e@0 2516 m+= 1
e@0 2517
e@0 2518 return labels
e@0 2519
e@0 2520 # def cross_validate(self, data, classes, index_vector):
e@0 2521 # print "[II] Crossvalidating... "
e@0 2522 # from copy import deepcopy
e@0 2523 # estimator = deepcopy(self)
e@0 2524 # estimator_fulldata = deepcopy(self)
e@0 2525 # estimator_fulldata.fit(data, classes)
e@0 2526 #
e@0 2527 # percents = arange(0.1,0.9,0.1)
e@0 2528 # self.percents = percents * 100
e@0 2529 # RATIOS = []
e@0 2530 # labels = estimator.predict(data)
e@0 2531 #
e@0 2532 # print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels))
e@0 2533 #
e@0 2534 # for p in percents:
e@0 2535 # train,test,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=1)
e@0 2536 # estimator_ = deepcopy(estimator)
e@0 2537 # estimator_.fit(train, trainlabels)
e@0 2538 # labels = estimator_.predict(test)
e@0 2539 # m = sum(array(testlabels==labels).astype(float))/len(labels)
e@0 2540 # RATIOS.append(m)
e@0 2541 # print "[II] for training(%.2f)-testing(%.2f): %.2f" % ((1-p),p,sum(array(testlabels==labels).astype(float))/len(labels))
e@0 2542 #
e@0 2543 # return RATIOS, percents
e@0 2544
e@0 2545 def cross_validate(self, data, classes):
e@0 2546 print "[II] SVN Classifier Crossvalidating... "
e@0 2547 from copy import deepcopy
e@0 2548 estimator = deepcopy(self)
e@0 2549 estimator_fulldata = deepcopy(self)
e@0 2550 estimator_fulldata.fit(data, classes)
e@0 2551
e@0 2552 percents = arange(0.1,0.9,0.1)
e@0 2553 self.percents = percents * 100
e@0 2554
e@0 2555 RATIOS = []
e@0 2556 labels = estimator.predict(data)
e@0 2557
e@0 2558
e@0 2559 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels))
e@0 2560
e@0 2561 for p in percents:
e@0 2562 traindata,testdata,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=1)
e@0 2563
e@0 2564
e@0 2565 # print "[II] [%.2f-%.2f] Examples used for training: %s" % (p, (1-p), str(traindata))
e@0 2566 # print "[II] [%.2f-%.2f] Examples used for testing: %s" % (p, (1-p), str(testdata))
e@0 2567 # Training phase
e@0 2568
e@0 2569
e@0 2570
e@0 2571 estimator_ = deepcopy(estimator)
e@0 2572 estimator_.fit(traindata, trainlabels)
e@0 2573
e@0 2574 predicted_labels = estimator_.predict(testdata)
e@0 2575 m = sum(array(predicted_labels==testlabels).astype(float))/len(testlabels)
e@0 2576
e@0 2577
e@0 2578 # testdata = None
e@0 2579 #
e@0 2580 # traindata = None
e@0 2581 # trainlabels = None
e@0 2582 # testlabels = None
e@0 2583 #
e@0 2584 # for t in train:
e@0 2585 # if traindata is None:
e@0 2586 # traindata = data[index_vector == t, :]
e@0 2587 # trainlabels = classes[index_vector == t]
e@0 2588 # else:
e@0 2589 # traindata = append(traindata, data[index_vector == t, :], 0)
e@0 2590 # trainlabels = append(trainlabels, classes[index_vector ==t])
e@0 2591 #
e@0 2592 # estimator_ = deepcopy(estimator)
e@0 2593 # estimator_.fit(traindata, trainlabels)
e@0 2594 #
e@0 2595 #
e@0 2596 # for t in test:
e@0 2597 # if testdata is None:
e@0 2598 # testdata = data[index_vector == t, :]
e@0 2599 # testlabels = classes[index_vector == t]
e@0 2600 # else:
e@0 2601 # testdata = append(testdata, data[index_vector == t,:], 0)
e@0 2602 # testlabels = append(testlabels, classes[index_vector == t])
e@0 2603
e@0 2604
e@0 2605
e@0 2606 # predicted_labels = estimator_.predict(testdata)
e@0 2607
e@0 2608 m = sum(array(predicted_labels==testlabels).astype(float))/len(testlabels)
e@0 2609
e@0 2610 print "[II] [%.2f-%.2f] 5-fold cross-validation for HMM Classifier: %.1f" % (p, (1-p), m)
e@0 2611
e@0 2612 RATIOS.append(m)
e@0 2613
e@0 2614 return RATIOS,percents
e@0 2615
e@0 2616 def cross_validate_full_k_fold(data, classes, list_of_classifiers, K=10):
e@0 2617 """ K-Folds Cross-validates all available classifiers, plots and saves barplots. """
e@0 2618
e@0 2619 ratios = []
e@0 2620 for l in list_of_classifiers:
e@0 2621 ratio = l.k_fold_cross_validate(data, classes, K)
e@0 2622 ratios.append(ratio)
e@0 2623
e@0 2624 L = len(ratios)
e@0 2625
e@0 2626 ind = arange(L)
e@0 2627
e@0 2628 fig, ax = subplots()
e@0 2629 rects = []
e@0 2630
e@0 2631
e@0 2632
e@0 2633 width = 10/float(len(ratios))
e@0 2634
e@0 2635 colors = ['r', 'y', 'g', 'c']
e@0 2636
e@0 2637 labels_ = []
e@0 2638
e@0 2639 for i in range(0, len(ratios)):
e@0 2640 rects.append(ax.bar(i*width,ratios[i],width,color=colors[i % len(colors)]))
e@0 2641 labels_.append(list_of_classifiers[i].getName())
e@0 2642
e@0 2643 return ratios
e@0 2644
e@0 2645
e@0 2646
e@0 2647 def cross_validate_full(seq, classes, list_of_classifiers):
e@0 2648 """ Cross-validates all available classifiers, plots and saves barplots. """
e@0 2649
e@0 2650 ratios = []
e@0 2651 percents = []
e@0 2652 for l in list_of_classifiers:
e@0 2653 ratios_, percents_ = l.cross_validate(seq, classes)
e@0 2654 ratios.append(ratios_)
e@0 2655 percents.append(percents_)
e@0 2656
e@0 2657
e@0 2658 L = len(ratios[0])
e@0 2659
e@0 2660 ind = arange(L)
e@0 2661
e@0 2662 fig,ax = subplots()
e@0 2663
e@0 2664 rects = []
e@0 2665
e@0 2666 W = 0.8
e@0 2667 width = W/float(len(ratios))
e@0 2668
e@0 2669 colors = ['r', 'y', 'g', 'c']
e@0 2670
e@0 2671 labels_ = []
e@0 2672 for i in range(0, len(ratios)):
e@0 2673 rects.append(ax.bar(ind+i*width,ratios[i],width,color=colors[i % len(colors)]))
e@0 2674 labels_.append(list_of_classifiers[i].getName())
e@0 2675
e@0 2676 ax.legend(tuple(labels_))
e@0 2677
e@0 2678 ax.set_xticks(ind+W/2)
e@0 2679 ax.set_xticklabels(tuple(percents[0]*100))
e@0 2680 ax.set_xlabel("Percentage % of test data")
e@0 2681 ax.set_ylabel("Success ratio")
e@0 2682
e@0 2683
e@0 2684
e@0 2685
e@0 2686
e@0 2687
e@0 2688 # rects1 = ax.bar(ind,ratios[0],0.35,color='r')
e@0 2689 # rects2 = ax.bar(ind+0.351,ratios[1],0.35,color='y')
e@0 2690
e@0 2691
e@0 2692
e@0 2693
e@0 2694 return ratios
e@0 2695
e@0 2696
e@0 2697
e@0 2698
e@0 2699
e@0 2700
e@0 2701
e@0 2702 # hmmc = HmmClassifier(N = len(unique(parameters_state)), n_components = 1)
e@0 2703 # hmmc.fit(features_vector_upsampled.T, parameters_state)
e@0 2704 #
e@0 2705 # hmmc2 = HmmClassifier(N = 12, n_components = 4)
e@0 2706 # hmmc2.fit(features_vector_upsampled.T, parameters_state)
e@0 2707 #
e@0 2708 svmc = SVMClassifier(probability=True)
e@0 2709 svmc.fit(features_vector_upsampled.T, parameters_state)
e@0 2710 # # svmc.cross_validate(features_vector_upsampled.T, parameters_state, index_vector)
e@0 2711 #
e@0 2712 nbc = NBClassifier()
e@0 2713 nbc.fit(features_vector_upsampled.T, parameters_state)
e@0 2714 #
e@0 2715 #
e@0 2716 #hchmmc = happy_curve_classification(features_vector_upsampled.T, parameters_state, hmmc, Nd=100)
e@0 2717 # # cross_validate_classification(features_vector_upsampled.T, parameters_state, hmmc)
e@0 2718 #
e@0 2719 # #hmmc.cross_validate(features_vector_upsampled.T, parameters_state)
e@0 2720 #
e@0 2721
e@0 2722 hmmc3 = HmmClassifier3(N=2, n_components = max(unique(parameters_state)))
e@0 2723 #print(parameters_state)
e@0 2724 plot(parameters_state)
e@0 2725
e@0 2726
e@0 2727 print(shape(features_vector_upsampled))
e@0 2728 obs = hmmc3.fit(features_vector_upsampled.T, parameters_state)
e@0 2729
e@0 2730
e@0 2731
e@0 2732 # svmhmm = HmmSVMClassifier()
e@0 2733 # svmhmm.fit(features_vector_upsampled.T, parameters_state)
e@0 2734 # ratios = cross_validate_full(features_vector_upsampled.T, parameters_state, [nbc, svmc])
e@0 2735 # ratios = cross_validate_full_k_fold(features_vector_upsampled.T, parameters_state,
e@0 2736 # [hmmc3])
e@0 2737
e@0 2738 def approximate_chain_size(data,parameters):
e@0 2739 ratios = []
e@0 2740
e@0 2741 # chainsizes = arange(1, 40)
e@0 2742 # for cs in chainsizes:
e@0 2743 # print "[II] Trying chain size: %d" % cs
e@0 2744 # hmmc = HmmClassifier3(N=cs, n_components = 2)
e@0 2745 # ratios.append(hmmc.k_fold_cross_validate(data,parameters,K=10))
e@0 2746
e@0 2747
e@0 2748 componentssize = arange(1, 12)
e@0 2749
e@0 2750 for cs in componentssize:
e@0 2751 print "[II] Trying component size: %d" % cs
e@0 2752 hmmc = HmmClassifier3(N=2, n_components = cs)
e@0 2753 ratios.append(hmmc.k_fold_cross_validate(data, parameters, K=2))
e@0 2754
e@0 2755
e@0 2756
e@0 2757
e@0 2758 figure()
e@0 2759 plot(componentssize, ratios)
e@0 2760 xlabel('Chain Size')
e@0 2761 ylabel('Success Ratio')
e@0 2762 title('10-Fold cross validation success ratio vs chain size')
e@0 2763
e@0 2764
e@0 2765 return ratios
e@0 2766
e@0 2767 # ratios = approximate_chain_size(features_vector_upsampled.T, parameters_state)
e@0 2768
e@0 2769 #predictedhmmc2 = hmmc2.predict(features_vector_upsampled.T)
e@0 2770
e@0 2771 # figure()
e@0 2772
e@0 2773
e@0 2774
e@0 2775
e@0 2776
e@0 2777 # hmms_ = []
e@0 2778 #
e@0 2779 # for n in range(0, len(parameter_state_alphabet)):
e@0 2780 # #hmm_ = hmm.GMMHMM(n_components = 1, n_mix = 2)
e@0 2781 # hmm_ = hmm.GaussianHMM(n_components = 1,covariance_type = 'full')
e@0 2782 #
e@0 2783 # # Get training data for each class
e@0 2784 # vector = features_vector_upsampled[:,parameters_state == n]
e@0 2785 #
e@0 2786 # #if vector.shape[1] < n_clusters:
e@0 2787 # # hmms_.append(None)
e@0 2788 # #else:
e@0 2789 #
e@0 2790 # hmm_.fit([vector.T])
e@0 2791 #
e@0 2792 # # Append to the list
e@0 2793 #
e@0 2794 # hmms_.append(hmm_)
e@0 2795 #
e@0 2796 # labels = zeros((features_vector_upsampled.shape[1],))
e@0 2797 #
e@0 2798 # N = 20
e@0 2799 # m = 0
e@0 2800 #
e@0 2801 # while m*N < features_vector_upsampled.shape[1]:
e@0 2802 #
e@0 2803 # scores = zeros((1, len(parameter_state_alphabet)))
e@0 2804 #
e@0 2805 # if (m+1)*N > features_vector_upsampled.shape[1]:
e@0 2806 # testdata = features_vector_upsampled[:,m*N:]
e@0 2807 # else:
e@0 2808 # testdata = features_vector_upsampled[:,m*N:(m+1)*N]
e@0 2809 #
e@0 2810 # for i in range(0, len(parameter_state_alphabet)):
e@0 2811 # if hmms_[i] is not None:
e@0 2812 # scores[0,i] = hmms_[i].score(testdata.T)
e@0 2813 # else:
e@0 2814 # scores[0,i] = -100000 # Very large negative score
e@0 2815 # if (m+1)*N >= features_vector_upsampled.shape[1]:
e@0 2816 # labels[m*N:] = argmax(scores)
e@0 2817 # else:
e@0 2818 # labels[m*N:(m+1)*N] = argmax(scores)
e@0 2819 #
e@0 2820 # m += 1
e@0 2821
e@0 2822
e@0 2823 # figure()
e@0 2824 #plot(labels.T)
e@0 2825
e@0 2826
e@0 2827 # labels = hmmc.predict(features_vector_upsampled.T)
e@0 2828 # estimated = states_to_vector(labels,parameter_state_alphabet_inv)
e@0 2829 # plot(estimated.T,'r--')
e@0 2830 #
e@0 2831 # title('Estimated parameter values')
e@0 2832 # legend(['Naive Bayes Classifier', 'HMM chain size %d (%.1fms)' % (N, float(N)/UpsamplingFactor*23.0)])
e@0 2833 #
e@0 2834 # ylabel('value')
e@0 2835 # xlabel('frame #')
e@0 2836 #
e@0 2837
e@0 2838 # close('all')
e@0 2839
e@0 2840 # plot(features_clustering_labels/float(max(features_clustering_labels)))
e@0 2841 # plot(parameters_vector_upsampled.T/max(ravel(parameters_vector_upsampled)))
e@0 2842
e@0 2843
e@0 2844 def plot_clusters(x, labels):
e@0 2845 figure()
e@0 2846 symbols = ['>', 'x', '.', '<','v']
e@0 2847 colors = ['b', 'r', 'g', 'm','c']
e@0 2848
e@0 2849 for r in range(0, x.shape[0]):
e@0 2850 scatter(x[r,0], x[r,1], c=colors[int(labels[r]) % len(colors)], marker=symbols[int(labels[r]) % len(symbols)])
e@0 2851
e@0 2852
e@0 2853 # plot_clusters(features_vector_upsampled.T, parameters_state)
e@0 2854 #
e@0 2855
e@0 2856 # SVM
e@0 2857
e@0 2858 class HmmClassifierSVN_Naive:
e@0 2859 def __init__(self, N=2, n_components = 1):
e@0 2860 self.n_components = n_components
e@0 2861 self.chain_size = N
e@0 2862 self.hmms_ = []
e@0 2863 self.N = N
e@0 2864
e@0 2865 def fit(self, X, states):
e@0 2866 self.n_states = len(unique(states))
e@0 2867
e@0 2868 for n in range(0, self.n_states):
e@0 2869 hmm_ = hmm.GaussianHMM(n_components = self.n_components, covariance_type = 'full')
e@0 2870
e@0 2871 # Get training data for each class
e@0 2872 vector = X[states == n,:]
e@0 2873
e@0 2874 # Fit the HMM
e@0 2875 # print vector
e@0 2876 hmm_.fit([vector])
e@0 2877
e@0 2878 # And append to the list
e@0 2879 self.hmms_.append(hmm_)
e@0 2880
e@0 2881 def predict(self,X):
e@0 2882 labels = zeros((X.shape[0],))
e@0 2883 N = self.N
e@0 2884
e@0 2885 m = 0
e@0 2886
e@0 2887 scores = zeros((1, self.n_states))
e@0 2888
e@0 2889
e@0 2890 while m*N < X.shape[0]:
e@0 2891 if (m+1)*N > X.shape[0]:
e@0 2892 testdata = X[m*N:,:]
e@0 2893 else:
e@0 2894 testdata = X[m*N:(m+1)*N,:]
e@0 2895
e@0 2896 # print testdata
e@0 2897
e@0 2898 for i in range(0, self.n_states):
e@0 2899 scores[0,i] = self.hmms_[i].score(testdata)
e@0 2900
e@0 2901 if (m+1)*N > X.shape[0]:
e@0 2902 labels[m*N:] = argmax(scores)
e@0 2903 else:
e@0 2904 labels[m*N:(m+1)*N] = argmax(scores)
e@0 2905
e@0 2906 m+= 1
e@0 2907
e@0 2908 return labels
e@0 2909
e@0 2910
e@0 2911
e@0 2912
e@0 2913 def plot_parameters_estimation(list_of_estimators):
e@0 2914 for e in list_of_estimators:
e@0 2915 name_ = e.getName()
e@0 2916
e@0 2917
e@0 2918
e@0 2919 fig = figure()
e@0 2920 fig.suptitle('Parameters estimation using %s' % name_, fontweight='bold')
e@0 2921 subplot(311)
e@0 2922 title ('original parameters')
e@0 2923 param_real = states_to_vector(parameters_state,parameter_state_alphabet_inv).T
e@0 2924 plot(param_real)
e@0 2925 xlabel('sample')
e@0 2926 ylabel('normalized parameter value')
e@0 2927 legend(parameter_captions)
e@0 2928 subplot(312)
e@0 2929 title('estimated parameters')
e@0 2930 pred = e.predict(features_vector_upsampled.T)
e@0 2931 param_est = states_to_vector(pred,parameter_state_alphabet_inv).T
e@0 2932 plot(param_est)
e@0 2933 xlabel('sample')
e@0 2934 ylabel('normalized parameter value')
e@0 2935 legend(parameter_captions)
e@0 2936 subplot(313)
e@0 2937 error_real_est = zeros((len(param_est),))
e@0 2938 for i in range(0, len(error_real_est)):
e@0 2939 error_real_est[i] = mse(param_real[i],param_est[i])
e@0 2940
e@0 2941 totalmse = sum(error_real_est)
e@0 2942 title('mean squared error (total: %.3f)' % totalmse)
e@0 2943
e@0 2944 plot(error_real_est)
e@0 2945 xlabel('sample')
e@0 2946 ylabel('normalized parameter value')
e@0 2947
e@0 2948
e@0 2949
e@0 2950 # hmm2 = hmm.MultinomialHMM(n_components = n_classes, startprob = prior_p_q, transmat = transmat )
e@0 2951
e@0 2952
e@0 2953
e@0 2954 # TODO: Embed SVM emissions to HMM. Try to do pick maximum likelihood sequence from
e@0 2955 # classifiers like in HmmClassifier3. For every class, train an SVM, with n_components as
e@0 2956 # states, and compute the log-likelihood for prediction. Should work.
e@0 2957
e@0 2958
e@0 2959
e@0 2960 # class HMMsvmClassifier:
e@0 2961 # def __init__(self, N=6):
e@0 2962 # print "[II] HMM-SVM Classifier (%d time steps)" % N
e@0 2963 # self.name = "HMM-SVM (%d time steps)" % N
e@0 2964 # # self.n_components = n_components
e@0 2965 # self.chain_size = N
e@0 2966 # self.svm = MyAVAClassifier()
e@0 2967 #
e@0 2968 #
e@0 2969 # def getName(self):
e@0 2970 # return self.name
e@0 2971 #
e@0 2972 # def fit(self, features, parameters):
e@0 2973 #
e@0 2974 # # First train emission svm
e@0 2975 #
e@0 2976 # self.svm.fit(features, parameters)
e@0 2977 #
e@0 2978 # # print parameters
e@0 2979 # n_classes = max(unique(parameters)) + 1
e@0 2980 #
e@0 2981 # print "[II] Number of classes: ", n_classes
e@0 2982 # hmms = [None]*n_classes
e@0 2983 # chain_size = self.chain_size
e@0 2984 # params = [None]*n_classes
e@0 2985 # obs = [None]*n_classes
e@0 2986 #
e@0 2987 # for i in range(chain_size, len(parameters)):
e@0 2988 # class_ = parameters[i-1]
e@0 2989 # params_ = parameters[i-chain_size:i]
e@0 2990 # feats_ =features[i-chain_size:i,:]
e@0 2991 #
e@0 2992 # if params[class_] is None:
e@0 2993 # params[class_] = [params_]
e@0 2994 # obs[class_] = [feats_]
e@0 2995 # else:
e@0 2996 # params[class_].append(params_)
e@0 2997 # obs[class_].append(feats_)
e@0 2998 #
e@0 2999 #
e@0 3000 #
e@0 3001 # for i in range(0, len(params)):
e@0 3002 # if params[i] is not None and len(params[i]) != 0:
e@0 3003 # hmm_ = HMMsvm(self.svm)
e@0 3004 # # print "[II] %d Fitting params: " % i, params[i]
e@0 3005 # print "[!!] OBSERVATIONS: "
e@0 3006 # print obs[i]
e@0 3007 # hmm_.fit(obs[i], params[i])
e@0 3008 #
e@0 3009 # print "[!!] PARAMETERS: "
e@0 3010 # print params[i]
e@0 3011 #
e@0 3012 # # TODO: Left here on 06/07 19:56
e@0 3013 #
e@0 3014 # # hmm_.fit(features,)
e@0 3015 # # print "obs: ", obs[i]
e@0 3016 # # print "params: ", params[i]
e@0 3017 # # hmm_.fit(obs[i], params[i],flr=1e-9)
e@0 3018 # hmms[i] = hmm_
e@0 3019 #
e@0 3020 # self.hmms = hmms
e@0 3021 #
e@0 3022 # return obs
e@0 3023 # def predict(self, features, mfilt=20):
e@0 3024 # chain_size = self.chain_size
e@0 3025 # hmms = self.hmms
e@0 3026 # predicted_classes = zeros((features.shape[0],))
e@0 3027 # for i in range(chain_size, features.shape[0]):
e@0 3028 # scores = zeros((len(hmms),))
e@0 3029 #
e@0 3030 # seq = features[i-chain_size:i, :]
e@0 3031 #
e@0 3032 # for j in range(0, len(hmms)):
e@0 3033 # if hmms[j] is not None:
e@0 3034 # q,p = hmms[j].logviterbi(seq)
e@0 3035 #
e@0 3036 # scores[j] = logsumexp(p)
e@0 3037 # else:
e@0 3038 # scores[j] = -infty
e@0 3039 #
e@0 3040 # predicted_classes[i] = argmax(scores)
e@0 3041 #
e@0 3042 # # Do a median filter at the end
e@0 3043 #
e@0 3044 # # predicted_classes = median_filter(predicted_classes,mfilt)
e@0 3045 #
e@0 3046 # return predicted_classes
e@0 3047
e@0 3048 class HmmSvmClassifier3:
e@0 3049 def __init__(self, N=2,n_components = 1):
e@0 3050 print "[II] HMM/SVM Classifier (%d states, %d components)" % (N, n_components)
e@0 3051 self.name = "HMM (%d time steps, %d components)" % (N, n_components)
e@0 3052 self.n_components = n_components
e@0 3053 self.smallname = "hmmsvmc"
e@0 3054 self.chain_size = N
e@0 3055
e@0 3056 def getName(self):
e@0 3057 return self.name
e@0 3058
e@0 3059 def fit(self, features, parameters):
e@0 3060
e@0 3061
e@0 3062
e@0 3063 # print parameters
e@0 3064 n_classes = max(unique(parameters)) + 1
e@0 3065
e@0 3066 if n_classes == 1:
e@0 3067 self.only_one_class = True
e@0 3068 return
e@0 3069 else:
e@0 3070 self.only_one_class = False
e@0 3071
e@0 3072 print "[II] Number of classes: ", n_classes
e@0 3073 hmms = [None]*n_classes
e@0 3074 # svms = [None]*n_classes
e@0 3075 chain_size = self.chain_size
e@0 3076 obs = [None]*n_classes
e@0 3077 cls = [None]*n_classes
e@0 3078
e@0 3079 for i in range(chain_size, len(parameters)):
e@0 3080 class_ = parameters[i-1]
e@0 3081 seq = features[i-chain_size:i,:]
e@0 3082 params = parameters[i-chain_size:i]
e@0 3083
e@0 3084 if obs[class_] is None:
e@0 3085 obs[class_] = [seq]
e@0 3086 cls[class_] = [params]
e@0 3087 else:
e@0 3088 obs[class_].append(seq)
e@0 3089 cls[class_].append(params)
e@0 3090
e@0 3091
e@0 3092
e@0 3093 for i in range(0, len(obs)):
e@0 3094 if obs[i] is not None and len(obs[i]) != 0:
e@0 3095 # hmm_ = hmm.GaussianHMM(n_components=self.n_components, covariance_type='full')
e@0 3096 svm_ = MyAVAClassifier()
e@0 3097 print obs[i]
e@0 3098 print cls[i]
e@0 3099
e@0 3100 O = obs[i][0]
e@0 3101 C = cls[i][0]
e@0 3102
e@0 3103 for j in range(0, len(obs[i])):
e@0 3104 O = append(O, obs[i][j], 1)
e@0 3105 C = append(C, cls[i][j], 1)
e@0 3106
e@0 3107 print O
e@0 3108 print C
e@0 3109
e@0 3110 svm_.fit(O.T,C)
e@0 3111 hmm_ = HMMsvm(svm_)
e@0 3112 hmm_.fit(obs[i],cls[i])
e@0 3113 hmms[i] = hmm_
e@0 3114
e@0 3115 self.hmms = hmms
e@0 3116
e@0 3117 return obs
e@0 3118
e@0 3119 def predict(self, features, mfilt=20):
e@0 3120
e@0 3121 if self.only_one_class == True:
e@0 3122 return zeros((features.shape[0], ))
e@0 3123
e@0 3124 chain_size = self.chain_size
e@0 3125 hmms = self.hmms
e@0 3126 predicted_classes = zeros((features.shape[0],))
e@0 3127
e@0 3128
e@0 3129 for i in range(chain_size, features.shape[0]):
e@0 3130 scores = zeros((len(hmms),))
e@0 3131
e@0 3132 seq = features[i-chain_size:i, :]
e@0 3133
e@0 3134 for j in range(0, len(hmms)):
e@0 3135 if hmms[j] is not None:
e@0 3136 print hmms[j].score(seq)
e@0 3137 scores[j] = hmms[j].score(seq)[j]
e@0 3138 else:
e@0 3139 scores[j] = -infty
e@0 3140
e@0 3141 predicted_classes[i] = argmax(scores)
e@0 3142
e@0 3143 # Do a median filter at the end
e@0 3144
e@0 3145 # predicted_classes = median_filter(predicted_classes,mfilt)
e@0 3146
e@0 3147 return predicted_classes
e@0 3148
e@0 3149
e@0 3150
e@0 3151
e@0 3152
e@0 3153
e@0 3154 # newstates = self.logviterbi()
e@0 3155
e@0 3156 #
e@0 3157 #
e@0 3158 # # Alphas and Betas
e@0 3159 # X = features
e@0 3160 # alphas = zeros((T,n_classes))
e@0 3161 # betas = zeros((T,n_classes))
e@0 3162 #
e@0 3163 # forward_messages = zeros((T,n_classes))
e@0 3164 # backward_messages = zeros((T, n_classes))
e@0 3165 #
e@0 3166 # print "[II] Computing alpha, beta..."
e@0 3167 # for t in range(1, T+1):
e@0 3168 # forward_messages[t-1,:] = self._forward_message(forward_messages[t-2,:],X[0:t,])
e@0 3169 # backward_messages[-t,:] = self._backward_message(backward_messages[-t+1], X[-t:,:])
e@0 3170 #
e@0 3171 #
e@0 3172 # print "[II] Computing ksu..."
e@0 3173 #
e@0 3174 # a_norm = forward_messages[-1,n_classes-1] # alpha_T
e@0 3175 #
e@0 3176 # ksu = zeros((T, n_classes, n_classes))
e@0 3177 #
e@0 3178 # for i in range(0, n_classes):
e@0 3179 # for j in range(0, n_classes):
e@0 3180 # for t in range(0, T-1):
e@0 3181 # ksu[t,i,j] = exp(forward_messages[t, i] + log(transmat[i,j]) + log(self.estimate_emission_probability(X[t,:],j)) + backward_messages[t+1,j] - a_norm)
e@0 3182 #
e@0 3183 #
e@0 3184 #
e@0 3185 # self.alphas = forward_messages
e@0 3186 # self.betas = backward_messages
e@0 3187 # self.ksu = ksu
e@0 3188 #
e@0 3189 # transmat_new = zeros((n_classes,n_classes))
e@0 3190 #
e@0 3191 # for i in range(0, n_classes):
e@0 3192 #
e@0 3193 # for j in range(0, n_classes):
e@0 3194 # transmat_new[i,j] = sum(ksu[:,i,j])/sum(ksu[:,i,:])
e@0 3195 #
e@0 3196 # self.transmat_new = transmat_new
e@0 3197 #
e@0 3198
e@0 3199
e@0 3200
e@0 3201
e@0 3202
e@0 3203
e@0 3204
e@0 3205
e@0 3206
e@0 3207
e@0 3208
e@0 3209 #
e@0 3210 # def forward(self, X):
e@0 3211 # # Compute log likelihood using the forward algorithm
e@0 3212 #
e@0 3213 # # Number of states
e@0 3214 # N = self.n_classes
e@0 3215 #
e@0 3216 # # Number of Observations
e@0 3217 # T = X.shape[0]
e@0 3218 #
e@0 3219 #
e@0 3220 # transmat = self.transmat
e@0 3221 #
e@0 3222 # # Initialization
e@0 3223 # # a1 = ones((N,))/N
e@0 3224 # a1 = self.prior
e@0 3225 #
e@0 3226 # alpha = zeros((N,))
e@0 3227 # for i in range(0, N):
e@0 3228 # alpha[i] = log(a1[i])+log(self.estimate_emission_probability(X[0,:], i))
e@0 3229 # # print alpha
e@0 3230 # # Recursion
e@0 3231 # for t in range(0, T):
e@0 3232 # alpha_old = alpha.copy()
e@0 3233 # x = X[t, :]
e@0 3234 # for j in range(0, N):
e@0 3235 # tosum = zeros((N,))
e@0 3236 # for i in range(0, N):
e@0 3237 # tosum[i] = alpha_old[i]+log(transmat[i,j])
e@0 3238 #
e@0 3239 # # Sum = logsum(tosum)
e@0 3240 # Sum = logsumexp(tosum)
e@0 3241 # bj = self.estimate_emission_probability(x, j)
e@0 3242 #
e@0 3243 # alpha[j] = Sum+log(bj)
e@0 3244 # # print alpha
e@0 3245 #
e@0 3246 # tosum = zeros((N,))
e@0 3247 # for i in range(0, N):
e@0 3248 # tosum[i] = alpha[i] + log(transmat[i,N-1])
e@0 3249 #
e@0 3250 # return tosum
e@0 3251 #
e@0 3252 # def backward(self, X):
e@0 3253 # # Number of states
e@0 3254 # N = self.n_classes
e@0 3255 #
e@0 3256 # # Number of Observations
e@0 3257 # T = X.shape[0]
e@0 3258 #
e@0 3259 # transmat = self.transmat
e@0 3260 #
e@0 3261 # # Initialization
e@0 3262 #
e@0 3263 # b1 = ones((N,))/N
e@0 3264 #
e@0 3265 # beta = zeros((N,))
e@0 3266 # for i in range(0, N):
e@0 3267 # beta[i] = log(b1[i])+log(self.estimate_emission_probability(X[-1,:], i))
e@0 3268 #
e@0 3269 # for t in range(0, T):
e@0 3270 # beta_old = beta.copy()
e@0 3271 # x = X[-t, :]
e@0 3272 # for j in range(0, N):
e@0 3273 # tosum = zeros((N, ))
e@0 3274 # for i in range(0, N):
e@0 3275 # tosum[i] = beta_old[i]+log(transmat[i,j])
e@0 3276 #
e@0 3277 # Sum = logsumexp(tosum)
e@0 3278 # bj = self.estimate_emission_probability(x, j)
e@0 3279 # beta[j] = Sum+log(bj)
e@0 3280 #
e@0 3281 # tosum = zeros((N,))
e@0 3282 #
e@0 3283 # for i in range(0, N):
e@0 3284 # tosum[i] = beta[i] + log(transmat[i,0])
e@0 3285 #
e@0 3286 # #3 p = logsumexp(tosum)
e@0 3287 #
e@0 3288 # return tosum
e@0 3289
e@0 3290
e@0 3291 def _log_likelihood(self, X):
e@0 3292
e@0 3293 return logsumexp(self.forward(X))
e@0 3294
e@0 3295
e@0 3296
e@0 3297
e@0 3298
e@0 3299 def _likelihood(self, X):
e@0 3300 # Number of states
e@0 3301 N = self.n_classes
e@0 3302
e@0 3303 # Number of Observations
e@0 3304 T = X.shape[0]
e@0 3305
e@0 3306
e@0 3307 transmat = self.transmat
e@0 3308
e@0 3309 # Initialization
e@0 3310 # a1 = ones((N,))/N
e@0 3311 a1 = self.prior
e@0 3312
e@0 3313 alpha = zeros((N,))
e@0 3314 for i in range(0, N):
e@0 3315 alpha[i] = a1[i]*self.estimate_emission_probability(X[0,:], i)
e@0 3316
e@0 3317 # Recursion
e@0 3318 print alpha
e@0 3319 for t in range(1, T):
e@0 3320 alpha_old = alpha.copy()
e@0 3321 x = X[t, :]
e@0 3322 for j in range(0, N):
e@0 3323 Sum = 0
e@0 3324 for i in range(0, N):
e@0 3325 Sum += alpha_old[i]*transmat[i,j]
e@0 3326
e@0 3327 alpha[j] = Sum*self.estimate_emission_probability(x, j)
e@0 3328 print alpha
e@0 3329
e@0 3330
e@0 3331 # Termination
e@0 3332
e@0 3333 Sum = 0
e@0 3334 for i in range(0, N):
e@0 3335 Sum += alpha[i]*transmat[i, N-1]
e@0 3336
e@0 3337 p = Sum
e@0 3338
e@0 3339 return p
e@0 3340
e@0 3341
e@0 3342
e@0 3343
e@0 3344
e@0 3345
e@0 3346
e@0 3347 # def fit(self, X, states):
e@0 3348 # # print parameters
e@0 3349 # n_classes = max(unique(states)) + 1
e@0 3350 #
e@0 3351 # # svms = [None]*n_classes
e@0 3352 # obs = [None]*n_classes
e@0 3353 # sta = [None]*n_classes
e@0 3354 # chain_size = self.chain_size
e@0 3355 #
e@0 3356 # #22 obs = None
e@0 3357 # # sta = None
e@0 3358 #
e@0 3359 # print "[II] Number of classes: ", n_classes
e@0 3360 #
e@0 3361 # # For each class:
e@0 3362 # # Concatenate examples
e@0 3363 # # Fit SVM
e@0 3364 #
e@0 3365 # for i in range(chain_size, len(states)):
e@0 3366 # class_ = states[i-1]
e@0 3367 # seq = X[i-chain_size:i, :]
e@0 3368 # states_ = states[i-chain_size:i]
e@0 3369 #
e@0 3370 #
e@0 3371 # if obs[class_] is None:
e@0 3372 # obs[class_] = [seq]
e@0 3373 # sta[class_] = [states_]
e@0 3374 # self.svms.append(MyAVAClassifier())
e@0 3375 # else:
e@0 3376 # obs[class_].append(seq)
e@0 3377 # sta[class_].append(states_)
e@0 3378 #
e@0 3379 #
e@0 3380 # for i in range(0, len(obs)):
e@0 3381 #
e@0 3382 # o = None
e@0 3383 # s = None
e@0 3384 #
e@0 3385 # for j in range(0, len(obs[i])):
e@0 3386 # if o is None:
e@0 3387 # o = obs[i][j]
e@0 3388 # s = sta[i][j]
e@0 3389 #
e@0 3390 # else:
e@0 3391 # o = append(o, obs[i][j],0)
e@0 3392 # s = append(s, sta[i][j])
e@0 3393 #
e@0 3394 #
e@0 3395 # self.svms[i].fit(o, s)
e@0 3396
e@0 3397 # def predict(self, features):
e@0 3398 # chain_size = self.chain_size
e@0 3399 # svms = self.svms
e@0 3400 #
e@0 3401 # predicted_classes = zeros((features.shape[0],))
e@0 3402 # for i in range(chain_size, features.shape[0]):
e@0 3403 # scores = zeros((len(svms)))
e@0 3404 #
e@0 3405 # seq = features[i-chain_size:i, :]
e@0 3406 #
e@0 3407 # for j in range(0, len(svms)):
e@0 3408 # if svms[j] is not None:
e@0 3409 # scores[j] = svms[j].compute_log_likelihood(seq)
e@0 3410 # else:
e@0 3411 # scores[k] = -infty
e@0 3412 # predicted_classes[i] = argmax(scores)
e@0 3413 #
e@0 3414 # return predicted_classes
e@0 3415
e@0 3416
e@0 3417
e@0 3418
e@0 3419
e@0 3420 # Marginalize over the latent variable
e@0 3421 # for i in range(0, X.shape[0]):
e@0 3422 # P = zeros((self.n_classes,))
e@0 3423 # x = X[i,:]
e@0 3424 # for j in range(0, len(P)):
e@0 3425 # P[j] = self.estimate_emission_probability(j, x)
e@0 3426 #
e@0 3427 # S[i] = log(sum(P[j]))
e@0 3428 #
e@0 3429 # return sum(S)
e@0 3430
e@0 3431
e@0 3432
e@0 3433
e@0 3434 # Continue from here
e@0 3435 X = features_vector_upsampled.T
e@0 3436 y = parameters_state
e@0 3437
e@0 3438 # clf = svm.SVC()
e@0 3439 # clf.fit(X,y)
e@0 3440
e@0 3441
e@0 3442 # parameters_state_y = clf.predict(X)
e@0 3443
e@0 3444
e@0 3445 predhmmc3 = hmmc3.predict(features_vector_upsampled.T)
e@0 3446
e@0 3447 myava = MyAVAClassifier()
e@0 3448 myava.fit(features_vector_upsampled.T, parameters_state)
e@0 3449
e@0 3450 predmyava = myava.predict(features_vector_upsampled.T)
e@0 3451
e@0 3452 # hmmsvmc = HMMsvmClassifier(N=2)
e@0 3453 # hmmsvmc.fit(features_vector_upsampled.T, parameters_state)
e@0 3454 #predhmmsvmc = hmmsvmc.predict(features_vector_upsampled.T)
e@0 3455
e@0 3456 obsc = MyAVAClassifier()
e@0 3457 obsc.fit(features_vector_upsampled.T, parameters_state)
e@0 3458 # hmm2 = hmm.GaussianHMM(n_components=6)
e@0 3459
e@0 3460 # hmm2.fit([features_vector_upsampled.T])
e@0 3461 # hmm3 = HMMsvm(obsc)
e@0 3462 # hmm3.fit([features_vector_upsampled.T],[parameters_state])
e@0 3463 # predhmmsvm = hmm3.predict(features_vector_upsampled.T)
e@0 3464
e@0 3465 # hmmsvmc = HMMsvmClassifier()
e@0 3466
e@0 3467 hmmsvmc = HMMsvmClassifier()
e@0 3468 # hmmsvmc = HmmSvmClassifier3()
e@0 3469 hmmsvmc.fit(features_vector_upsampled.T, parameters_state)
e@0 3470 predhmmsvm = hmmsvmc.predict(features_vector_upsampled.T)
e@0 3471 plot_parameters_estimation([nbc, myava, hmmc3, hmmsvmc])
e@0 3472
e@0 3473
e@0 3474 #plot(parameters)
e@0 3475 figure()
e@0 3476 plot(parameters_state,'b')
e@0 3477
e@0 3478 # plot(parameters_state_y,'r--')
e@0 3479 plot(predhmmc3,'g+-')
e@0 3480
e@0 3481 plot(predhmmsvm, 'co-')
e@0 3482
e@0 3483 # plot(predhmmsvmc, 'bo-')
e@0 3484
e@0 3485 legend(['Real', 'SVM', 'HMM','HMM-SVM'])
e@0 3486
e@0 3487 xlabel('sample')
e@0 3488 ylabel('state')
e@0 3489 # TODO, HMM with SVN, Cross-validation
e@0 3490
e@0 3491 # Using hidden markov svm
e@0 3492
e@0 3493 svmhmm = ""
e@0 3494
e@0 3495 # print "[II] Success ratio for SVM-RBF Classifier: %.2f" % (sum(parameters_state==parameters_state_y).astype(float)/float(len(parameters_state)))
e@0 3496
e@0 3497 print "[II] All-vs-All custom Support Vector Classifier Success Ratio: %.2f " %(sum(predmyava==parameters_state).astype(float)/len(parameters_state))
e@0 3498 print "[II] Success ratio for HMM: %.2f" % (sum(parameters_state==predhmmc3).astype(float)/float(len(predhmmc3)))
e@0 3499 print "[II] Success ratio for HMM-SVM: %.2f" % (sum(parameters_state==predhmmsvm).astype(float)/float(len(predhmmsvm)))
e@0 3500
e@0 3501
e@0 3502
e@0 3503
e@0 3504
e@0 3505 model_ghmm = ReverbModel("GaussianHMM Classifier Model", kernel, q, features_to_keep, hmmc3, parameter_dictionary=parameter_state_alphabet_inv, moments_vector=moments_vector)
e@0 3506 model_gnb = ReverbModel("Naive Bayes Classifier Model", kernel, q, features_to_keep, nbc, parameter_dictionary=parameter_state_alphabet_inv, moments_vector=moments_vector )
e@0 3507 model_svm = ReverbModel("AVA LinearSVM Classifier Model", kernel, q, features_to_keep, myava, parameter_dictionary=parameter_state_alphabet_inv, moments_vector=moments_vector )
e@0 3508 model_hmmsvm = ReverbModel("HMM0SVM Classifier Model", kernel, q, features_to_keep, hmmsvmc, parameter_dictionary=parameter_state_alphabet_inv, moments_vector=moments_vector )
e@0 3509
e@0 3510 model_ghmm.save("model_ghmm.dat")
e@0 3511 model_gnb.save("model_gnb.dat")
e@0 3512 model_svm.save("model_svm.dat")
e@0 3513 model_hmmsvm.save("model_hmmsvm.dat")
e@0 3514
e@0 3515
e@0 3516
e@0 3517 def split_data_to_k_samples(data, params, K):
e@0 3518 L = shape(data)[0]
e@0 3519 M = int(round(L/float(K)))
e@0 3520
e@0 3521 samples_data = []
e@0 3522 samples_parameters = []
e@0 3523 for k in range(K):
e@0 3524 if (k+1)*M > L:
e@0 3525 samples_data.append(data[k*M:,:])
e@0 3526 samples_parameters.append(params[k*M:])
e@0 3527 else:
e@0 3528 samples_data.append(data[k*M:(k+1)*M,:])
e@0 3529 samples_parameters.append(params[k*M:(k+1)*M])
e@0 3530
e@0 3531 return samples_data, samples_parameters
e@0 3532
e@0 3533 def join_data(data, params):
e@0 3534 L = 0
e@0 3535 for p in params:
e@0 3536 L += len(p)
e@0 3537
e@0 3538 new_data = zeros((L, data[0].shape[1]))
e@0 3539 new_params = zeros((L,))
e@0 3540
e@0 3541 idx = 0
e@0 3542 for n in range(0,len(data)):
e@0 3543 new_data[idx:idx+data[n].shape[0],:] = data[n]
e@0 3544 new_params[idx:idx+len(params[n])] = params[n]
e@0 3545 idx += len(params[n])
e@0 3546
e@0 3547 return new_data, new_params
e@0 3548
e@0 3549 cross_validation_ratios = {}
e@0 3550 cross_validation_mses = {}
e@0 3551
e@0 3552
e@0 3553 features_samples, parameters_samples = split_data_to_k_samples(features_vector_upsampled.T, parameters_state, 5)
e@0 3554
e@0 3555 from sklearn.cross_validation import KFold
e@0 3556
e@0 3557 from sklearn import metrics
e@0 3558
e@0 3559
e@0 3560
e@0 3561 KF = 10
e@0 3562
e@0 3563 s1,s2 = split_data_to_k_samples(features_vector_upsampled.T, parameters_state, KF)
e@0 3564 r1,r2 = join_data(s1,s2)
e@0 3565
e@0 3566 kf = KFold(KF, n_folds=KF) # Leave one out
e@0 3567
e@0 3568 trial = 1.0
e@0 3569
e@0 3570 rhmmc = 0
e@0 3571 rgnbc = 0
e@0 3572 ravac = 0
e@0 3573 rhmmsvmc = 0
e@0 3574
e@0 3575
e@0 3576 msegbnc = 0
e@0 3577 msehmmc = 0
e@0 3578 mseavac = 0
e@0 3579 msehmmsvmc = 0
e@0 3580
e@0 3581 fullratios = []
e@0 3582 fullmses = []
e@0 3583
e@0 3584
e@0 3585 def calculate_mse(pred, real):
e@0 3586 realparams = states_to_vector(real, parameter_state_alphabet_inv)
e@0 3587 estparams = states_to_vector(pred, parameter_state_alphabet_inv)
e@0 3588 return mse(realparams, estparams)
e@0 3589 ratios_gnb = []
e@0 3590 ratios_hmm = []
e@0 3591 ratios_svm = []
e@0 3592 ratios_hmmsvm = []
e@0 3593 ratios_sinkhole = []
e@0 3594
e@0 3595 mse_gnb = []
e@0 3596 mse_hmm = []
e@0 3597 mse_svm = []
e@0 3598 mse_hmmsvm = []
e@0 3599 mse_sinkhole = []
e@0 3600
e@0 3601
e@0 3602 for train, test in kf:
e@0 3603
e@0 3604 train_idx = 100
e@0 3605
e@0 3606 print train,test
e@0 3607 f1 = []
e@0 3608 p1 = []
e@0 3609
e@0 3610 tf1 = []
e@0 3611 tp1 = []
e@0 3612
e@0 3613 for i in train:
e@0 3614 f1.append(s1[i-1])
e@0 3615 p1.append(s2[i-1])
e@0 3616
e@0 3617 for i in test:
e@0 3618 tf1.append(s1[i-1])
e@0 3619 tp1.append(s2[i-1])
e@0 3620
e@0 3621 new_trainset_data, new_trainset_params = join_data(f1, p1)
e@0 3622 new_trainset_params = new_trainset_params.astype(int)
e@0 3623
e@0 3624
e@0 3625 # Detect and remove outliers
e@0 3626
e@0 3627 # ee = EllipticEnvelope()
e@0 3628
e@0 3629 # ee.fit(new_trainset_data, new_trainset_params)
e@0 3630
e@0 3631
e@0 3632 # inliers = ee.predict(new_trainset_data) == 1
e@0 3633
e@0 3634 # new_trainset_data = new_trainset_data[inliers, :]
e@0 3635 # new_trainset_params = new_trainset_params[inliers]
e@0 3636
e@0 3637
e@0 3638 new_testset_data, new_testset_params = join_data(tf1, tp1)
e@0 3639 new_testset_params = new_testset_params.astype(int)
e@0 3640
e@0 3641
e@0 3642
e@0 3643 param_est = states_to_vector(new_testset_params,parameter_state_alphabet_inv).T
e@0 3644
e@0 3645
e@0 3646
e@0 3647
e@0 3648
e@0 3649
e@0 3650
e@0 3651 X = arange(50, len(new_trainset_params), 50)
e@0 3652 print X
e@0 3653 # X = append(X, [len(new_trainset_params)], 1)
e@0 3654
e@0 3655
e@0 3656 #
e@0 3657 for M in X:
e@0 3658 # print CHAINSIZE
e@0 3659 # print NCOMPONENTS
e@0 3660 hmmc = HmmClassifier3(N=CHAINSIZE, n_components = NCOMPONENTS)
e@0 3661 gnbc = GaussianNB()
e@0 3662 avac = MyAVAClassifier()
e@0 3663 hmmsvmc = HMMsvmClassifier()
e@0 3664
e@0 3665 gnbc.fit(new_trainset_data[0:M,:], new_trainset_params[0:M])
e@0 3666 pgnbc = gnbc.predict(new_testset_data)
e@0 3667 rgnbc = metrics.accuracy_score(pgnbc, new_testset_params)
e@0 3668 mgnbc = calculate_mse(pgnbc,new_testset_params)
e@0 3669
e@0 3670 #
e@0 3671 hmmc.fit(new_trainset_data[0:M,:], new_trainset_params[0:M])
e@0 3672 phmmc = hmmc.predict(new_testset_data)
e@0 3673 rhmmc = metrics.accuracy_score(phmmc, new_testset_params)
e@0 3674 mhmmc = calculate_mse(phmmc,new_testset_params)
e@0 3675
e@0 3676 #
e@0 3677 try:
e@0 3678 avac.fit(new_trainset_data[0:M,:], new_trainset_params[0:M])
e@0 3679 pavac = avac.predict(new_testset_data)
e@0 3680 ravac = metrics.accuracy_score(pavac, new_testset_params)
e@0 3681 mavac = calculate_mse(pavac,new_testset_params)
e@0 3682 except:
e@0 3683 ravac = 0
e@0 3684 mavac = inf
e@0 3685 #
e@0 3686 #
e@0 3687 #
e@0 3688 #
e@0 3689 #
e@0 3690 try:
e@0 3691 hmmsvmc.fit(new_trainset_data[0:M,:], new_trainset_params[0:M])
e@0 3692 phmmsvm = hmmsvmc.predict(new_testset_data)
e@0 3693 rhmmsvm = metrics.accuracy_score(phmmsvm, new_testset_params)
e@0 3694 mhmmsvm = calculate_mse(phmmsvm,new_testset_params)
e@0 3695 #
e@0 3696 except:
e@0 3697 rhmmsvm = 0
e@0 3698 mavac = inf
e@0 3699 #
e@0 3700 # ratios_gnb.append(rgnbc)
e@0 3701 # ratios_hmm.append(rhmmc)
e@0 3702 # ratios_svm.append(ravac)
e@0 3703 # ratios_hmmsvm.append(rhmmsvm)
e@0 3704 #
e@0 3705 # mse_gnb.append(mgnbc)
e@0 3706 # mse_hmm.append(mhmmc)
e@0 3707 # mse_svm.append(mavac)
e@0 3708 # mse_hmmsvm.append(mhmmsvm)
e@0 3709 #
e@0 3710 # fullratios.append((trial,X,ratios_gnb,ratios_hmm,ratios_svm,ratios_hmmsvm))
e@0 3711 # fullmses.append((trial,X,mse_gnb,mse_hmm,mse_svm,mse_hmmsvm))
e@0 3712
e@0 3713 #
e@0 3714 #
e@0 3715 #
e@0 3716 #
e@0 3717
e@0 3718
e@0 3719
e@0 3720
e@0 3721
e@0 3722
e@0 3723 # hmmc = HmmClassifier3(N=3, n_components = max(unique(parameters_state))+1)
e@0 3724 hmmc = HmmClassifier3(N=CHAINSIZE, n_components = NCOMPONENTS)
e@0 3725 gnbc = GaussianNB()
e@0 3726 avac = MyAVAClassifier()
e@0 3727 hmmsvmc = HMMsvmClassifier()
e@0 3728 sinkholec = SinkHoleClassifier()
e@0 3729
e@0 3730 hmmc.fit(new_trainset_data, new_trainset_params)
e@0 3731 gnbc.fit(new_trainset_data, new_trainset_params)
e@0 3732 # try:
e@0 3733 avac.fit(new_trainset_data, new_trainset_params)
e@0 3734 sinkholec.fit(new_trainset_data, new_trainset_params)
e@0 3735 pavac = avac.predict(new_testset_data)
e@0 3736
e@0 3737
e@0 3738 # except:
e@0 3739 # pavac = ones((new_testset_data.shape[0],))*new_trainset_params[0]
e@0 3740
e@0 3741
e@0 3742 # try:
e@0 3743 hmmsvmc.fit(new_trainset_data, new_trainset_params)
e@0 3744 phmmsvm = hmmsvmc.predict(new_testset_data)
e@0 3745 # except:
e@0 3746 #
e@0 3747 # # phmmsvm = hmmsvmc.predict(new_testset_data)
e@0 3748 # phmmsvm = ones((new_testset_data.shape[0],))*new_trainset_params[0]
e@0 3749
e@0 3750
e@0 3751 phmmc = hmmc.predict(new_testset_data)
e@0 3752 pgnbc = gnbc.predict(new_testset_data)
e@0 3753 psinkhole = sinkholec.predict(new_testset_data)
e@0 3754
e@0 3755 print "[II] (Trial %d) Classification ratio for GNB: %.2f" % (trial, metrics.accuracy_score(pgnbc, new_testset_params))
e@0 3756 print "[II] (Trial %d) Classification ratio for SVM: %.2f" % (trial, metrics.accuracy_score(pavac, new_testset_params))
e@0 3757 print "[II] (Trial %d) Classification ratio for HMM: %.2f" % (trial, metrics.accuracy_score(phmmc, new_testset_params))
e@0 3758 print "[II] (Trial %d) Classification ratio for HMM/SVM: %.2f" % (trial, metrics.accuracy_score(phmmsvm, new_testset_params))
e@0 3759 print "[II] (Trial %d) Classification ratio for Sinkhole Approach: %.2f" % (trial, metrics.accuracy_score(psinkhole, new_testset_params))
e@0 3760
e@0 3761
e@0 3762 mse
e@0 3763
e@0 3764 # rgnbc = rhmmc*(trial-1)/trial + metrics.accuracy_score(pgnbc, new_testset_params)/trial
e@0 3765 # ravac = rhmmc*(trial-1)/trial + metrics.accuracy_score(pavac, new_testset_params)/trial
e@0 3766 # rhmmc = rhmmc*(trial-1)/trial + metrics.accuracy_score(phmmc, new_testset_params)/trial
e@0 3767 # rhmmsvmc = rhmmc*(trial-1)/trial + metrics.accuracy_score(phmmsvm, new_testset_params)/trial
e@0 3768
e@0 3769 rgnbc = metrics.accuracy_score(pgnbc, new_testset_params)
e@0 3770 ravac = metrics.accuracy_score(pavac, new_testset_params)
e@0 3771 rhmmc = metrics.accuracy_score(phmmc, new_testset_params)
e@0 3772 rhmmsvmc = metrics.accuracy_score(phmmsvm, new_testset_params)
e@0 3773 rsinkhole = metrics.accuracy_score(psinkhole, new_testset_params)
e@0 3774
e@0 3775 ratios_gnb.append(rgnbc)
e@0 3776 ratios_svm.append(ravac)
e@0 3777 ratios_hmm.append(rhmmc)
e@0 3778 ratios_hmmsvm.append(rhmmsvmc)
e@0 3779 ratios_sinkhole.append(rsinkhole)
e@0 3780
e@0 3781
e@0 3782
e@0 3783
e@0 3784
e@0 3785 # msegbnc = msegbnc*(trial-1)/trial + calculate_mse(pgnbc,new_testset_params)/trial
e@0 3786 # msehmmc = msehmmc*(trial-1)/trial + calculate_mse(phmmc,new_testset_params)/trial
e@0 3787 # mseavac = mseavac*(trial-1)/trial + calculate_mse(pavac,new_testset_params)/trial
e@0 3788 # msehmmsvmc = msehmmsvmc*(trial-1)/trial + calculate_mse(phmmsvm,new_testset_params)/trial
e@0 3789
e@0 3790 msegnb = calculate_mse(pgnbc,new_testset_params)
e@0 3791 msesvm = calculate_mse(pavac,new_testset_params)
e@0 3792 msehmm = calculate_mse(phmmc,new_testset_params)
e@0 3793 msehmmsvm = calculate_mse(phmmsvm,new_testset_params)
e@0 3794 msesinkhole = calculate_mse(psinkhole,new_testset_params)
e@0 3795
e@0 3796 mse_gnb.append(msegnb)
e@0 3797 mse_svm.append(msesvm)
e@0 3798 mse_hmm.append(msehmm)
e@0 3799 mse_hmmsvm.append(msehmmsvm)
e@0 3800 mse_sinkhole.append(msesinkhole)
e@0 3801
e@0 3802
e@0 3803
e@0 3804
e@0 3805
e@0 3806
e@0 3807 trial += 1
e@0 3808
e@0 3809 print ratios_gnb
e@0 3810
e@0 3811
e@0 3812 sucrats = [mean(ratios_gnb),mean(ratios_svm), mean(ratios_hmm),mean(ratios_hmmsvm), mean(ratios_sinkhole)]
e@0 3813 sucratsstd = [std(ratios_gnb,ddof=1),std(ratios_svm,ddof=1), std(ratios_hmm,ddof=1),std(ratios_hmmsvm,ddof=1), std(ratios_sinkhole,ddof=1)]
e@0 3814 mserats = [mean(mse_gnb),mean(mse_svm), mean(mse_hmm),mean(mse_hmmsvm), mean(mse_sinkhole)]
e@0 3815 #mserats = [msegbnc, mseavac, msehmmc,msehmmsvmc]
e@0 3816
e@0 3817 print tuple(sucrats)
e@0 3818 # print tuple(sucratsstd)
e@0 3819
e@0 3820
e@0 3821 # scores = zeros((len(sucrats), len(sucrats)))
e@0 3822 # for i in range(0, len(sucrats)):
e@0 3823 # for j in range(0, len(sucrats)):
e@0 3824 # scores[i,j] = (sucrats[i] - sucrats[j])/sqrt(sucratsstd[i]**2 + sucratsstd[j]**2)
e@0 3825 print tuple(mserats)
e@0 3826 # print scores
e@0 3827
e@0 3828
e@0 3829
e@0 3830 # print (msegbnc, mseavac, msehmmc,msehmmsvmc )
e@0 3831
e@0 3832 sample_sizes = X/float(max(X))
e@0 3833 gnbc_ratios_curves = zeros((len(X),len(fullratios)))
e@0 3834 gnbc_mses_curves = zeros((len(X),len(fullratios)))
e@0 3835 #
e@0 3836 avac_ratios_curves = zeros((len(X),len(fullratios)))
e@0 3837 avac_mses_curves = zeros((len(X),len(fullratios)))
e@0 3838 #
e@0 3839 hmmc_ratios_curves = zeros((len(X),len(fullratios)))
e@0 3840 hmmc_mses_curves = zeros((len(X),len(fullratios)))
e@0 3841 #
e@0 3842 hmmsvmc_ratios_curves = zeros((len(X),len(fullratios)))
e@0 3843 hmmsvmc_mses_curves = zeros((len(X),len(fullratios)))
e@0 3844 #
e@0 3845 #
e@0 3846 for i in range(0, len(fullratios)):
e@0 3847 for j in range(0, len(X)):
e@0 3848 gnbc_ratios_curves[j,i] = fullratios[i][2][j]
e@0 3849 gnbc_mses_curves[j,i] = fullmses[i][2][j]
e@0 3850
e@0 3851 avac_ratios_curves[j,i] = fullratios[i][3][j]
e@0 3852 avac_mses_curves[j,i] = fullmses[i][3][j]
e@0 3853
e@0 3854 hmmc_ratios_curves[j,i] = fullratios[i][4][j]
e@0 3855 hmmc_mses_curves[j,i] = fullmses[i][4][j]
e@0 3856
e@0 3857 hmmsvmc_ratios_curves[j,i] = fullratios[i][5][j]
e@0 3858 hmmsvmc_mses_curves[j,i] = fullmses[i][5][j]
e@0 3859 #
e@0 3860 #
e@0 3861 #
e@0 3862 #
e@0 3863 #
e@0 3864 gnbc_mses_curve = mean(gnbc_mses_curves,1)
e@0 3865 avac_mses_curve = mean(avac_mses_curves,1)
e@0 3866 hmmc_mses_curve = mean(hmmc_mses_curves,1)
e@0 3867 hmmsvmc_mses_curve = mean(hmmsvmc_mses_curves,1)
e@0 3868 #
e@0 3869 gnbc_ratios_curve = mean(gnbc_ratios_curves,1)
e@0 3870 avac_ratios_curve = mean(avac_ratios_curves,1)
e@0 3871 hmmc_ratios_curve = mean(hmmc_ratios_curves,1)
e@0 3872 hmmsvmc_ratios_curve = mean(hmmsvmc_ratios_curves,1)
e@0 3873 #
e@0 3874 figure()
e@0 3875 subplot(211)
e@0 3876 plot(sample_sizes,gnbc_ratios_curve)
e@0 3877 plot(sample_sizes,avac_ratios_curve)
e@0 3878 plot(sample_sizes,hmmc_ratios_curve)
e@0 3879 plot(sample_sizes,hmmsvmc_ratios_curve)
e@0 3880 xlabel('Part of training set used')
e@0 3881 ylabel('Accuracy')
e@0 3882 legend(['GNB','SVM','HMM','HMM/SVM'])
e@0 3883 #
e@0 3884 subplot(212)
e@0 3885 plot(sample_sizes,gnbc_mses_curve)
e@0 3886 plot(sample_sizes,avac_mses_curve)
e@0 3887 plot(sample_sizes,hmmc_mses_curve)
e@0 3888 plot(sample_sizes,hmmsvmc_mses_curve)
e@0 3889 xlabel('Part of training set used')
e@0 3890 ylabel('Mean Squared Error')
e@0 3891 #
e@0 3892 legend(['GNB','SVM','HMM','HMM/SVM'])
e@0 3893 #
e@0 3894 show()
e@0 3895 # # tight_layout()
e@0 3896 savefig('ratios-2.png',dpi=500)
e@0 3897 #
e@0 3898 #
e@0 3899 #