e@0
|
1 #!/usr/bin/python2
|
e@0
|
2 # -*- coding: utf-8 -*-
|
e@0
|
3 """
|
e@0
|
4 Created on Thu Apr 23 11:53:17 2015
|
e@0
|
5
|
e@0
|
6 @author: mmxgn
|
e@0
|
7 """
|
e@0
|
8
|
e@0
|
9 # This file does the cluster estimation and the removal of outliers
|
e@0
|
10
|
e@0
|
11 from warnings import filterwarnings
|
e@0
|
12
|
e@0
|
13 from sys import argv, exit
|
e@0
|
14 from essentia.standard import YamlInput, YamlOutput
|
e@0
|
15 from essentia import Pool
|
e@0
|
16 from pca import *
|
e@0
|
17
|
e@0
|
18 from numpy import *
|
e@0
|
19 from sklearn import cluster
|
e@0
|
20 from sklearn.metrics import pairwise_distances
|
e@0
|
21 from sklearn.cluster import KMeans, MiniBatchKMeans
|
e@0
|
22 from matplotlib.pyplot import *
|
e@0
|
23 #from sklearn.mixture import GMM
|
e@0
|
24 from sklearn.naive_bayes import GaussianNB, MultinomialNB
|
e@0
|
25 from scipy.signal import decimate
|
e@0
|
26 from sklearn import cross_validation
|
e@0
|
27 from sklearn import multiclass
|
e@0
|
28 from sklearn.covariance import EllipticEnvelope
|
e@0
|
29
|
e@0
|
30 #from hmmlearn import hmm
|
e@0
|
31 from hmmlearn.hmm import GMM
|
e@0
|
32 from hmmlearn import hmm
|
e@0
|
33
|
e@0
|
34 from sklearn import svm
|
e@0
|
35 import copy as cp
|
e@0
|
36
|
e@0
|
37 from scipy.misc import logsumexp
|
e@0
|
38 import pickle
|
e@0
|
39
|
e@0
|
40 from collections import Counter
|
e@0
|
41 #from adpcm import adm, adm_reconstruct
|
e@0
|
42
|
e@0
|
43
|
e@0
|
44 from reshape_observations import reshape_observations
|
e@0
|
45
|
e@0
|
46 mse = lambda A,B: ((array(A)-array(B)) ** 2).mean()
|
e@0
|
47
|
e@0
|
48 lognorm = lambda A: A - logsumexp(A)
|
e@0
|
49
|
e@0
|
50
|
e@0
|
51 rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
|
e@0
|
52 ## for Palatino and other serif fonts use:
|
e@0
|
53 #rc('font',**{'family':'serif','serif':['Palatino']})
|
e@0
|
54 rc('text', usetex=True)
|
e@0
|
55 #logsum = lambda X: logsumexp(log(X))
|
e@0
|
56
|
e@0
|
57
|
e@0
|
58 filterwarnings("ignore")
|
e@0
|
59 C = 0.7
|
e@0
|
60 NCOMPONENTS=2
|
e@0
|
61 CHAINSIZE = 5
|
e@0
|
62
|
e@0
|
63 close('all')
|
e@0
|
64
|
e@0
|
65 from numpy.random import randint
|
e@0
|
66
|
e@0
|
67 class HMMsvm:
|
e@0
|
68 def __init__(self, svm_):
|
e@0
|
69 self.svm = svm_
|
e@0
|
70
|
e@0
|
71
|
e@0
|
72 # self.svm = MyAVAClassifier()
|
e@0
|
73
|
e@0
|
74
|
e@0
|
75 def fit(self, features_list, states, flr=1e-13):
|
e@0
|
76 # def fit(self, features_list, flr=1e-13):
|
e@0
|
77
|
e@0
|
78 # TODO: Concatenate features, train
|
e@0
|
79 #self.svm.fit(X, states, flr)
|
e@0
|
80 #self.prior = self.svm.prior
|
e@0
|
81 #self.transmat = self.svm.transmat
|
e@0
|
82
|
e@0
|
83
|
e@0
|
84 features = None
|
e@0
|
85
|
e@0
|
86 for f in features_list:
|
e@0
|
87 if features is None:
|
e@0
|
88 features = f
|
e@0
|
89 else:
|
e@0
|
90 features = append(features, f, 0)
|
e@0
|
91
|
e@0
|
92 fullstates = None
|
e@0
|
93 # T = features.shape[0]
|
e@0
|
94 for s in states:
|
e@0
|
95 if fullstates is None:
|
e@0
|
96 fullstates = s
|
e@0
|
97 else:
|
e@0
|
98 fullstates = append(fullstates, s)
|
e@0
|
99
|
e@0
|
100
|
e@0
|
101
|
e@0
|
102
|
e@0
|
103
|
e@0
|
104
|
e@0
|
105 self.n_classes = self.svm.n_classes
|
e@0
|
106 n_classes = self.n_classes
|
e@0
|
107
|
e@0
|
108 # print fullstates, shape(fullstates)
|
e@0
|
109
|
e@0
|
110 h = histogram(fullstates, n_classes)[0].astype(float)
|
e@0
|
111 self.prior = h/sum(h)
|
e@0
|
112
|
e@0
|
113 # Add a very small probability for random jump
|
e@0
|
114
|
e@0
|
115 self.prior += flr
|
e@0
|
116 self.prior = self.prior/sum(self.prior)
|
e@0
|
117
|
e@0
|
118 transmat = zeros((n_classes, n_classes))
|
e@0
|
119 transitions = zeros((n_classes, ))
|
e@0
|
120 for s in states:
|
e@0
|
121 for i in range(1, len(s)):
|
e@0
|
122 prev = s[i-1]
|
e@0
|
123 cur = s[i]
|
e@0
|
124 transmat[prev,cur] += 1
|
e@0
|
125 transitions[prev] += 1
|
e@0
|
126
|
e@0
|
127 transitions[transitions == 0] = 1
|
e@0
|
128
|
e@0
|
129 for i in range(0, transmat.shape[0]):
|
e@0
|
130 transmat[i,:] = transmat[i,:]/transitions[i]
|
e@0
|
131
|
e@0
|
132 self.transmat = transmat
|
e@0
|
133
|
e@0
|
134 # Add a very small probability for random jump to avoid zero values
|
e@0
|
135
|
e@0
|
136 self.transmat += flr
|
e@0
|
137
|
e@0
|
138 for i in range(0, self.transmat.shape[0]):
|
e@0
|
139 self.transmat[i,:] = self.transmat[i,:]/sum(self.transmat[i,:])
|
e@0
|
140
|
e@0
|
141
|
e@0
|
142 A = zeros((n_classes, n_classes))
|
e@0
|
143
|
e@0
|
144 Aold = self.transmat
|
e@0
|
145
|
e@0
|
146 while mse(A,Aold)>10*size(A)*flr:
|
e@0
|
147 Aold = A.copy()
|
e@0
|
148 A = zeros((n_classes, n_classes))
|
e@0
|
149 transitions = zeros((n_classes, ))
|
e@0
|
150
|
e@0
|
151 for i in range(0,len(features_list)):
|
e@0
|
152 f = features_list[i]
|
e@0
|
153 # print "FEATURES:", f
|
e@0
|
154 # print f
|
e@0
|
155 s,p = self.logviterbi(f)
|
e@0
|
156 # print s
|
e@0
|
157 for j in range(1, len(s)):
|
e@0
|
158 prev = s[j-1]
|
e@0
|
159 cur = s[j]
|
e@0
|
160 A[prev,cur] += 1
|
e@0
|
161 transitions[prev] += 1
|
e@0
|
162
|
e@0
|
163 transitions[transitions == 0] = 1
|
e@0
|
164
|
e@0
|
165
|
e@0
|
166 for i in range(0, A.shape[0]):
|
e@0
|
167 A[i,:] = A[i,:]/transitions[i]
|
e@0
|
168
|
e@0
|
169 A += flr
|
e@0
|
170
|
e@0
|
171
|
e@0
|
172
|
e@0
|
173 self.transmat = A
|
e@0
|
174
|
e@0
|
175 for i in range(0, A.shape[0]):
|
e@0
|
176 if sum(A[i,:]) > 0:
|
e@0
|
177 A[i,:] = A[i,:]/sum(A[i,:])
|
e@0
|
178
|
e@0
|
179
|
e@0
|
180
|
e@0
|
181 # print observations
|
e@0
|
182
|
e@0
|
183
|
e@0
|
184 def estimate_emission_probability(self, x, q):
|
e@0
|
185 post = self.svm.estimate_posterior_probability_vector(x)
|
e@0
|
186 # print "post: ", post
|
e@0
|
187 prior = self.prior
|
e@0
|
188 # print "prior: ", prior
|
e@0
|
189 p = post/prior
|
e@0
|
190 p = p/sum(p)
|
e@0
|
191
|
e@0
|
192 return p[q]
|
e@0
|
193
|
e@0
|
194 # def estimate_emission_probability(self, x, q):
|
e@0
|
195 # return self.svm.estimate_emission_probability(q, x)
|
e@0
|
196
|
e@0
|
197
|
e@0
|
198 def predict(self, X):
|
e@0
|
199 return self.logviterbi(X)[0]
|
e@0
|
200
|
e@0
|
201
|
e@0
|
202 def logviterbi(self, X):
|
e@0
|
203 # Number of states
|
e@0
|
204
|
e@0
|
205 N = self.n_classes
|
e@0
|
206
|
e@0
|
207 # Number of observations
|
e@0
|
208
|
e@0
|
209
|
e@0
|
210
|
e@0
|
211 T = X.shape[0]
|
e@0
|
212
|
e@0
|
213
|
e@0
|
214
|
e@0
|
215 transmat = self.transmat
|
e@0
|
216
|
e@0
|
217 #1. Initalization
|
e@0
|
218
|
e@0
|
219 a1 = self.prior
|
e@0
|
220
|
e@0
|
221 delta = zeros((T,N))
|
e@0
|
222 psi = zeros((T,N))
|
e@0
|
223
|
e@0
|
224 for i in range(0, N):
|
e@0
|
225 delta[0, i] = log(self.transmat[0,i]) + log(self.estimate_emission_probability(X[0,:],i))
|
e@0
|
226
|
e@0
|
227
|
e@0
|
228 #2. Recursion
|
e@0
|
229
|
e@0
|
230 for t in range(1, T):
|
e@0
|
231 # delta_old = delta.copy()
|
e@0
|
232 x = X[t, :]
|
e@0
|
233 for j in range(0, N):
|
e@0
|
234 # print "j: %d, S" % j, delta_old+log(transmat[:,j])
|
e@0
|
235 delta[t,j] = max(delta[t-1,:] + log(transmat[:,j])) + log(self.estimate_emission_probability(x,j))
|
e@0
|
236 psi[t,j] = argmax(delta[t-1,:] + log(transmat[:,j]))
|
e@0
|
237
|
e@0
|
238 # print "t: %d, delta: " % t, delta
|
e@0
|
239 # print "t: %d, psi:" % t, psi
|
e@0
|
240
|
e@0
|
241
|
e@0
|
242 # 3. Termination
|
e@0
|
243
|
e@0
|
244 p_star = max(delta[T-1,:] + log(transmat[:,N-1]))
|
e@0
|
245 q_star = argmax(delta[T-1,:] + log(transmat[:, N-1]))
|
e@0
|
246
|
e@0
|
247
|
e@0
|
248 # 4. Backtracking
|
e@0
|
249
|
e@0
|
250 q = zeros((T,))
|
e@0
|
251 p = zeros((T,))
|
e@0
|
252
|
e@0
|
253 q[-1] = q_star
|
e@0
|
254 p[-1] = p_star
|
e@0
|
255 for t in range(1, T):
|
e@0
|
256 q[-t-1] = psi[-t, q[-t]]
|
e@0
|
257 p[-t-1] = delta[-t, q[-t]]
|
e@0
|
258
|
e@0
|
259
|
e@0
|
260 return q,p
|
e@0
|
261
|
e@0
|
262 def viterbi(self, X):
|
e@0
|
263 # Number of states
|
e@0
|
264
|
e@0
|
265 N = self.n_classes
|
e@0
|
266
|
e@0
|
267 # Number of observations
|
e@0
|
268
|
e@0
|
269 T = X.shape[0]
|
e@0
|
270
|
e@0
|
271 transmat = self.transmat
|
e@0
|
272
|
e@0
|
273 #1. Initialization
|
e@0
|
274
|
e@0
|
275 a1 = self.prior
|
e@0
|
276
|
e@0
|
277 delta = zeros((N, ))
|
e@0
|
278 psi = zeros((N, ))
|
e@0
|
279
|
e@0
|
280 for i in range(0, N):
|
e@0
|
281 delta[i] = a1[i]*self.estimate_emission_probability(X[0,:],i)
|
e@0
|
282
|
e@0
|
283
|
e@0
|
284
|
e@0
|
285
|
e@0
|
286
|
e@0
|
287 #2. Recursion
|
e@0
|
288
|
e@0
|
289 for t in range(1, T):
|
e@0
|
290 delta_old = delta.copy()
|
e@0
|
291 x = X[t,:]
|
e@0
|
292 for j in range(0, N):
|
e@0
|
293 delta[j] = max(delta_old*transmat[:,j])*self.estimate_emission_probability(x, j)
|
e@0
|
294 psi[j] = argmax(delta_old*transmat[:,j])
|
e@0
|
295
|
e@0
|
296 #3. Termination
|
e@0
|
297
|
e@0
|
298 p_star = max(delta*transmat[:,N-1])
|
e@0
|
299 q_star = argmax(delta*transmat[:,N-1])
|
e@0
|
300
|
e@0
|
301
|
e@0
|
302
|
e@0
|
303 #4. Backtracking
|
e@0
|
304
|
e@0
|
305 q = zeros((T,))
|
e@0
|
306 q[-1] = q_star
|
e@0
|
307
|
e@0
|
308 for t in range(1, T):
|
e@0
|
309 q[-t-1] = psi[q[-t]]
|
e@0
|
310
|
e@0
|
311 return q
|
e@0
|
312
|
e@0
|
313
|
e@0
|
314 def _log_likelihood_matrix(self, X):
|
e@0
|
315 N = self.n_classes
|
e@0
|
316 ll = zeros((X.shape[0], N))
|
e@0
|
317
|
e@0
|
318 for i in range(0, X.shape[0]):
|
e@0
|
319 ll[i,:] = self._forward(i, X)
|
e@0
|
320
|
e@0
|
321 return ll
|
e@0
|
322
|
e@0
|
323 def compute_ksus(self, X):
|
e@0
|
324 N = self.n_classes
|
e@0
|
325 T = X.shape[0]
|
e@0
|
326 print "[II] Computing gammas... "
|
e@0
|
327
|
e@0
|
328 gammas = self._forward_backward(X)
|
e@0
|
329
|
e@0
|
330 # Initialize
|
e@0
|
331
|
e@0
|
332 aij = self.transmat
|
e@0
|
333
|
e@0
|
334
|
e@0
|
335
|
e@0
|
336
|
e@0
|
337
|
e@0
|
338
|
e@0
|
339 def _not_quite_ksu(self, t, i, j, X):
|
e@0
|
340 alpha = exp(self.forward(X[0:t+1,:]))[i]
|
e@0
|
341 beta = exp(self.backward(X[t+1:,:]))[j]
|
e@0
|
342
|
e@0
|
343 aij = self.transmat[i,j]
|
e@0
|
344 bj = self.estimate_emission_probability(X[t+1,:], j)
|
e@0
|
345
|
e@0
|
346 return alpha*beta*aij*bj
|
e@0
|
347
|
e@0
|
348 def _ksu(self, t, i, j, X):
|
e@0
|
349 alphaT = exp(self.forward(X[0:t+1,:]))[0]
|
e@0
|
350
|
e@0
|
351 return self._not_quite_ksu(t,i,j,X)
|
e@0
|
352
|
e@0
|
353
|
e@0
|
354 def _forward_backward(self, X):
|
e@0
|
355 T = X.shape[0]
|
e@0
|
356 N = self.n_classes
|
e@0
|
357 fv = zeros((T, N))
|
e@0
|
358 sv = zeros((T, N))
|
e@0
|
359
|
e@0
|
360 b = None
|
e@0
|
361 for t in range(1, T+1):
|
e@0
|
362
|
e@0
|
363 fv[t-1,:] = self._forward_message(fv[t-2,:], X[0:t, ])
|
e@0
|
364
|
e@0
|
365 for t in range(1, T+1):
|
e@0
|
366 b = self._backward_message(b, X[-t: , :])
|
e@0
|
367 sv[-t,:] = lognorm(fv[t-1,:]*b)
|
e@0
|
368
|
e@0
|
369 return sv
|
e@0
|
370
|
e@0
|
371
|
e@0
|
372 def _backward(self, t, X):
|
e@0
|
373 N = self.n_classes
|
e@0
|
374 a = ones((N,))/N
|
e@0
|
375
|
e@0
|
376 beta = zeros((N, ))
|
e@0
|
377 transmat = self.transmat
|
e@0
|
378 T = X.shape[0]
|
e@0
|
379 x = X[t, :]
|
e@0
|
380 if t == T-1:
|
e@0
|
381 beta = log(a)
|
e@0
|
382
|
e@0
|
383 return beta
|
e@0
|
384 else:
|
e@0
|
385 tosum = zeros((N, ))
|
e@0
|
386 bt = self._backward(t+1, X)
|
e@0
|
387 btnew = zeros((N, ))
|
e@0
|
388 # print bt
|
e@0
|
389 for j in range(0, N):
|
e@0
|
390 for i in range(0, N):
|
e@0
|
391 # print log(self.estimate_emission_probability(x, j)), bt[i], log(transmat[i,j])
|
e@0
|
392 tosum[i] = log(self.estimate_emission_probability(x, j)) + bt[i] + log(transmat[i,j])
|
e@0
|
393 # print tosum
|
e@0
|
394
|
e@0
|
395 btnew[j] = logsumexp(tosum)
|
e@0
|
396 btnew = lognorm(btnew)
|
e@0
|
397
|
e@0
|
398 return btnew
|
e@0
|
399
|
e@0
|
400
|
e@0
|
401 def score(self, X):
|
e@0
|
402 return self.forward(X)
|
e@0
|
403
|
e@0
|
404 def forward(self, X):
|
e@0
|
405 T = X.shape[0]
|
e@0
|
406 f = None
|
e@0
|
407 for t in range(1, T+1):
|
e@0
|
408 f = self._forward_message(f, X[0:t, :])
|
e@0
|
409
|
e@0
|
410 return f
|
e@0
|
411
|
e@0
|
412 def backward(self, X):
|
e@0
|
413 T = X.shape[0]
|
e@0
|
414 b = None
|
e@0
|
415 for t in range(1,T+1):
|
e@0
|
416 # print t
|
e@0
|
417 # print t,b
|
e@0
|
418 idx = arange(-t,T)
|
e@0
|
419 # print idx
|
e@0
|
420 b = self._backward_message(b, X[-t:, :])
|
e@0
|
421
|
e@0
|
422 return b
|
e@0
|
423
|
e@0
|
424 def _backward_message(self, b, X):
|
e@0
|
425 N = self.n_classes
|
e@0
|
426
|
e@0
|
427
|
e@0
|
428 beta = zeros((N, ))
|
e@0
|
429 transmat = self.transmat
|
e@0
|
430
|
e@0
|
431 x = X[0, :]
|
e@0
|
432
|
e@0
|
433 if X.shape[0] == 1:
|
e@0
|
434 beta = log(ones((N,))/N)
|
e@0
|
435 return beta
|
e@0
|
436 else:
|
e@0
|
437 tosum = zeros((N, ))
|
e@0
|
438 btnew = zeros((N, ))
|
e@0
|
439 for j in range(0, N):
|
e@0
|
440 for i in range(0, N):
|
e@0
|
441 tosum[i] = log(self.estimate_emission_probability(x,j)) + b[i] + log(transmat[i,j])
|
e@0
|
442
|
e@0
|
443 btnew[j] = logsumexp(tosum)
|
e@0
|
444 # btnew = lognorm(btnew)
|
e@0
|
445 return btnew
|
e@0
|
446
|
e@0
|
447 def _forward_message(self, f, X):
|
e@0
|
448 N = self.n_classes
|
e@0
|
449 a = self.prior
|
e@0
|
450
|
e@0
|
451 alpha = zeros((N, ))
|
e@0
|
452 transmat = self.transmat
|
e@0
|
453
|
e@0
|
454 x = X[-1, :]
|
e@0
|
455
|
e@0
|
456 if X.shape[0] == 1:
|
e@0
|
457 for i in range(0, N):
|
e@0
|
458 alpha[i] = log(a[i]) + log(self.estimate_emission_probability(x, i))
|
e@0
|
459 # alpha = lognorm(alpha)
|
e@0
|
460 return alpha
|
e@0
|
461
|
e@0
|
462 else:
|
e@0
|
463 tosum = zeros((N,))
|
e@0
|
464 ftnew = zeros((N,))
|
e@0
|
465 for j in range(0, N):
|
e@0
|
466 for i in range(0, N):
|
e@0
|
467 tosum[i] = f[i] + log(transmat[i,j])
|
e@0
|
468 Sum = logsumexp(tosum)
|
e@0
|
469 bj = self.estimate_emission_probability(x, j)
|
e@0
|
470 ftnew[j] = Sum+log(bj)
|
e@0
|
471
|
e@0
|
472 # ftnew = lognorm(ftnew)
|
e@0
|
473 return ftnew
|
e@0
|
474
|
e@0
|
475 def _forward(self, t, X):
|
e@0
|
476 N = self.n_classes
|
e@0
|
477 a = self.prior
|
e@0
|
478 # print a
|
e@0
|
479 alpha = zeros((N, ))
|
e@0
|
480 transmat = self.transmat
|
e@0
|
481 x = X[t, :]
|
e@0
|
482
|
e@0
|
483 if t == 0:
|
e@0
|
484 for i in range(0, N):
|
e@0
|
485 alpha[i] = log(a[i]) + log(self.estimate_emission_probability(x, i))
|
e@0
|
486 # print "--"
|
e@0
|
487 # print alpha
|
e@0
|
488 alpha = lognorm(alpha)
|
e@0
|
489 # print alpha
|
e@0
|
490 # print "xx"
|
e@0
|
491 return alpha
|
e@0
|
492 else:
|
e@0
|
493 tosum = zeros((N, ))
|
e@0
|
494 ft = self._forward(t-1, X)
|
e@0
|
495 ftnew = zeros((N, ))
|
e@0
|
496 for j in range(0, N):
|
e@0
|
497 for i in range(0, N):
|
e@0
|
498 # print ft
|
e@0
|
499 tosum[i] = ft[i] + log(transmat[i,j])
|
e@0
|
500
|
e@0
|
501 Sum = logsumexp(tosum)
|
e@0
|
502 bj = self.estimate_emission_probability(x, j)
|
e@0
|
503 ftnew[j] = Sum+log(bj)
|
e@0
|
504 ftnew = lognorm(ftnew)
|
e@0
|
505
|
e@0
|
506 return ftnew
|
e@0
|
507
|
e@0
|
508 class HMMsvm2:
|
e@0
|
509 def __init__(self, svm_, n_components=4):
|
e@0
|
510 self.svm = svm_
|
e@0
|
511 self.n_components = n_components
|
e@0
|
512
|
e@0
|
513
|
e@0
|
514 # self.svm = MyAVAClassifier()
|
e@0
|
515
|
e@0
|
516
|
e@0
|
517 def fit(self, features_list, flr=1e-13):
|
e@0
|
518 # def fit(self, features_list, flr=1e-13):
|
e@0
|
519
|
e@0
|
520 # TODO: Concatenate features, train
|
e@0
|
521 #self.svm.fit(X, states, flr)
|
e@0
|
522 #self.prior = self.svm.prior
|
e@0
|
523 #self.transmat = self.svm.transmat
|
e@0
|
524
|
e@0
|
525
|
e@0
|
526 n_classes = self.n_components
|
e@0
|
527 self.n_classes = n_classes
|
e@0
|
528
|
e@0
|
529 A = zeros((n_classes, n_classes))
|
e@0
|
530
|
e@0
|
531 Aold = randn(n_classes, n_classes)
|
e@0
|
532 self.transmat = Aold
|
e@0
|
533 self.prior = randn(n_classes)
|
e@0
|
534 self.prior = self.prior/sum(self.prior)
|
e@0
|
535 for i in range(0, Aold.shape[0]):
|
e@0
|
536 Aold[i,:] = Aold[i,:]/sum(Aold[i,:])
|
e@0
|
537
|
e@0
|
538 while mse(A,Aold)>10*size(A)*flr:
|
e@0
|
539 Aold = A.copy()
|
e@0
|
540 A = zeros((n_classes, n_classes))
|
e@0
|
541 transitions = zeros((n_classes, ))
|
e@0
|
542
|
e@0
|
543 for i in range(0,len(features_list)):
|
e@0
|
544 f = features_list[i]
|
e@0
|
545 # print "FEATURES:", f
|
e@0
|
546 # print f
|
e@0
|
547 s,p = self.logviterbi(f)
|
e@0
|
548 # print s
|
e@0
|
549
|
e@0
|
550 h = histogram(s, n_classes)[0].astype(float)
|
e@0
|
551 self.prior = h/sum(h)
|
e@0
|
552
|
e@0
|
553
|
e@0
|
554 self.prior += flr
|
e@0
|
555 self.prior = self.prior/sum(self.prior)
|
e@0
|
556
|
e@0
|
557
|
e@0
|
558 for j in range(1, len(s)):
|
e@0
|
559 prev = s[j-1]
|
e@0
|
560 cur = s[j]
|
e@0
|
561 A[prev,cur] += 1
|
e@0
|
562 transitions[prev] += 1
|
e@0
|
563
|
e@0
|
564 transitions[transitions == 0] = 1
|
e@0
|
565
|
e@0
|
566
|
e@0
|
567 for i in range(0, A.shape[0]):
|
e@0
|
568 A[i,:] = A[i,:]/transitions[i]
|
e@0
|
569
|
e@0
|
570 A += flr
|
e@0
|
571
|
e@0
|
572
|
e@0
|
573
|
e@0
|
574 self.transmat = A
|
e@0
|
575
|
e@0
|
576 for i in range(0, A.shape[0]):
|
e@0
|
577 if sum(A[i,:]) > 0:
|
e@0
|
578 A[i,:] = A[i,:]/sum(A[i,:])
|
e@0
|
579
|
e@0
|
580
|
e@0
|
581
|
e@0
|
582 # print observations
|
e@0
|
583
|
e@0
|
584
|
e@0
|
585 def estimate_emission_probability(self, x, q):
|
e@0
|
586 post = self.svm.estimate_posterior_probability_vector(x)
|
e@0
|
587 # print "post: ", post
|
e@0
|
588 prior = self.prior
|
e@0
|
589 # print "prior: ", prior
|
e@0
|
590 p = post/prior
|
e@0
|
591 p = p/sum(p)
|
e@0
|
592
|
e@0
|
593 return p[q]
|
e@0
|
594
|
e@0
|
595 # def estimate_emission_probability(self, x, q):
|
e@0
|
596 # return self.svm.estimate_emission_probability(q, x)
|
e@0
|
597
|
e@0
|
598
|
e@0
|
599 def predict(self, X):
|
e@0
|
600 return self.logviterbi(X)[0]
|
e@0
|
601
|
e@0
|
602
|
e@0
|
603 def logviterbi(self, X):
|
e@0
|
604 # Number of states
|
e@0
|
605
|
e@0
|
606 N = self.n_classes
|
e@0
|
607
|
e@0
|
608 # Number of observations
|
e@0
|
609
|
e@0
|
610
|
e@0
|
611
|
e@0
|
612 T = X.shape[0]
|
e@0
|
613
|
e@0
|
614
|
e@0
|
615
|
e@0
|
616 transmat = self.transmat
|
e@0
|
617
|
e@0
|
618 #1. Initalization
|
e@0
|
619
|
e@0
|
620 # a1 = self.prior
|
e@0
|
621
|
e@0
|
622 delta = zeros((T,N))
|
e@0
|
623 psi = zeros((T,N))
|
e@0
|
624
|
e@0
|
625 for i in range(0, N):
|
e@0
|
626 delta[0, i] = log(self.transmat[0,i]) + log(self.estimate_emission_probability(X[0,:],i))
|
e@0
|
627
|
e@0
|
628
|
e@0
|
629 #2. Recursion
|
e@0
|
630
|
e@0
|
631 for t in range(1, T):
|
e@0
|
632 # delta_old = delta.copy()
|
e@0
|
633 x = X[t, :]
|
e@0
|
634 for j in range(0, N):
|
e@0
|
635 # print "j: %d, S" % j, delta_old+log(transmat[:,j])
|
e@0
|
636 delta[t,j] = max(delta[t-1,:] + log(transmat[:,j])) + log(self.estimate_emission_probability(x,j))
|
e@0
|
637 psi[t,j] = argmax(delta[t-1,:] + log(transmat[:,j]))
|
e@0
|
638
|
e@0
|
639 # print "t: %d, delta: " % t, delta
|
e@0
|
640 # print "t: %d, psi:" % t, psi
|
e@0
|
641
|
e@0
|
642
|
e@0
|
643 # 3. Termination
|
e@0
|
644
|
e@0
|
645 p_star = max(delta[T-1,:] + log(transmat[:,N-1]))
|
e@0
|
646 q_star = argmax(delta[T-1,:] + log(transmat[:, N-1]))
|
e@0
|
647
|
e@0
|
648
|
e@0
|
649 # 4. Backtracking
|
e@0
|
650
|
e@0
|
651 q = zeros((T,))
|
e@0
|
652 p = zeros((T,))
|
e@0
|
653
|
e@0
|
654 q[-1] = q_star
|
e@0
|
655 p[-1] = p_star
|
e@0
|
656 for t in range(1, T):
|
e@0
|
657 q[-t-1] = psi[-t, q[-t]]
|
e@0
|
658 p[-t-1] = delta[-t, q[-t]]
|
e@0
|
659
|
e@0
|
660
|
e@0
|
661 return q,p
|
e@0
|
662
|
e@0
|
663 def viterbi(self, X):
|
e@0
|
664 # Number of states
|
e@0
|
665
|
e@0
|
666 N = self.n_classes
|
e@0
|
667
|
e@0
|
668 # Number of observations
|
e@0
|
669
|
e@0
|
670 T = X.shape[0]
|
e@0
|
671
|
e@0
|
672 transmat = self.transmat
|
e@0
|
673
|
e@0
|
674 #1. Initialization
|
e@0
|
675
|
e@0
|
676 a1 = self.prior
|
e@0
|
677
|
e@0
|
678 delta = zeros((N, ))
|
e@0
|
679 psi = zeros((N, ))
|
e@0
|
680
|
e@0
|
681 for i in range(0, N):
|
e@0
|
682 delta[i] = a1[i]*self.estimate_emission_probability(X[0,:],i)
|
e@0
|
683
|
e@0
|
684
|
e@0
|
685
|
e@0
|
686
|
e@0
|
687
|
e@0
|
688 #2. Recursion
|
e@0
|
689
|
e@0
|
690 for t in range(1, T):
|
e@0
|
691 delta_old = delta.copy()
|
e@0
|
692 x = X[t,:]
|
e@0
|
693 for j in range(0, N):
|
e@0
|
694 delta[j] = max(delta_old*transmat[:,j])*self.estimate_emission_probability(x, j)
|
e@0
|
695 psi[j] = argmax(delta_old*transmat[:,j])
|
e@0
|
696
|
e@0
|
697 #3. Termination
|
e@0
|
698
|
e@0
|
699 p_star = max(delta*transmat[:,N-1])
|
e@0
|
700 q_star = argmax(delta*transmat[:,N-1])
|
e@0
|
701
|
e@0
|
702
|
e@0
|
703
|
e@0
|
704 #4. Backtracking
|
e@0
|
705
|
e@0
|
706 q = zeros((T,))
|
e@0
|
707 q[-1] = q_star
|
e@0
|
708
|
e@0
|
709 for t in range(1, T):
|
e@0
|
710 q[-t-1] = psi[q[-t]]
|
e@0
|
711
|
e@0
|
712 return q
|
e@0
|
713
|
e@0
|
714
|
e@0
|
715 def _log_likelihood_matrix(self, X):
|
e@0
|
716 N = self.n_classes
|
e@0
|
717 ll = zeros((X.shape[0], N))
|
e@0
|
718
|
e@0
|
719 for i in range(0, X.shape[0]):
|
e@0
|
720 ll[i,:] = self._forward(i, X)
|
e@0
|
721
|
e@0
|
722 return ll
|
e@0
|
723
|
e@0
|
724 def compute_ksus(self, X):
|
e@0
|
725 N = self.n_classes
|
e@0
|
726 T = X.shape[0]
|
e@0
|
727 print "[II] Computing gammas... "
|
e@0
|
728
|
e@0
|
729 gammas = self._forward_backward(X)
|
e@0
|
730
|
e@0
|
731 # Initialize
|
e@0
|
732
|
e@0
|
733 aij = self.transmat
|
e@0
|
734
|
e@0
|
735
|
e@0
|
736
|
e@0
|
737
|
e@0
|
738
|
e@0
|
739
|
e@0
|
740 def _not_quite_ksu(self, t, i, j, X):
|
e@0
|
741 alpha = exp(self.forward(X[0:t+1,:]))[i]
|
e@0
|
742 beta = exp(self.backward(X[t+1:,:]))[j]
|
e@0
|
743
|
e@0
|
744 aij = self.transmat[i,j]
|
e@0
|
745 bj = self.estimate_emission_probability(X[t+1,:], j)
|
e@0
|
746
|
e@0
|
747 return alpha*beta*aij*bj
|
e@0
|
748
|
e@0
|
749 def _ksu(self, t, i, j, X):
|
e@0
|
750 alphaT = exp(self.forward(X[0:t+1,:]))[0]
|
e@0
|
751
|
e@0
|
752 return self._not_quite_ksu(t,i,j,X)
|
e@0
|
753
|
e@0
|
754
|
e@0
|
755 def _forward_backward(self, X):
|
e@0
|
756 T = X.shape[0]
|
e@0
|
757 N = self.n_classes
|
e@0
|
758 fv = zeros((T, N))
|
e@0
|
759 sv = zeros((T, N))
|
e@0
|
760
|
e@0
|
761 b = None
|
e@0
|
762 for t in range(1, T+1):
|
e@0
|
763
|
e@0
|
764 fv[t-1,:] = self._forward_message(fv[t-2,:], X[0:t, ])
|
e@0
|
765
|
e@0
|
766 for t in range(1, T+1):
|
e@0
|
767 b = self._backward_message(b, X[-t: , :])
|
e@0
|
768 sv[-t,:] = lognorm(fv[t-1,:]*b)
|
e@0
|
769
|
e@0
|
770 return sv
|
e@0
|
771
|
e@0
|
772
|
e@0
|
773 def _backward(self, t, X):
|
e@0
|
774 N = self.n_classes
|
e@0
|
775 a = ones((N,))/N
|
e@0
|
776
|
e@0
|
777 beta = zeros((N, ))
|
e@0
|
778 transmat = self.transmat
|
e@0
|
779 T = X.shape[0]
|
e@0
|
780 x = X[t, :]
|
e@0
|
781 if t == T-1:
|
e@0
|
782 beta = log(a)
|
e@0
|
783
|
e@0
|
784 return beta
|
e@0
|
785 else:
|
e@0
|
786 tosum = zeros((N, ))
|
e@0
|
787 bt = self._backward(t+1, X)
|
e@0
|
788 btnew = zeros((N, ))
|
e@0
|
789 # print bt
|
e@0
|
790 for j in range(0, N):
|
e@0
|
791 for i in range(0, N):
|
e@0
|
792 # print log(self.estimate_emission_probability(x, j)), bt[i], log(transmat[i,j])
|
e@0
|
793 tosum[i] = log(self.estimate_emission_probability(x, j)) + bt[i] + log(transmat[i,j])
|
e@0
|
794 # print tosum
|
e@0
|
795
|
e@0
|
796 btnew[j] = logsumexp(tosum)
|
e@0
|
797 btnew = lognorm(btnew)
|
e@0
|
798 return btnew
|
e@0
|
799
|
e@0
|
800
|
e@0
|
801 def score(self, X):
|
e@0
|
802 return self.forward(X)
|
e@0
|
803
|
e@0
|
804 def forward(self, X):
|
e@0
|
805 T = X.shape[0]
|
e@0
|
806 f = None
|
e@0
|
807 for t in range(1, T+1):
|
e@0
|
808 f = self._forward_message(f, X[0:t, :])
|
e@0
|
809
|
e@0
|
810 return f
|
e@0
|
811
|
e@0
|
812 def backward(self, X):
|
e@0
|
813 T = X.shape[0]
|
e@0
|
814 b = None
|
e@0
|
815 for t in range(1,T+1):
|
e@0
|
816 # print t
|
e@0
|
817 # print t,b
|
e@0
|
818 idx = arange(-t,T)
|
e@0
|
819 # print idx
|
e@0
|
820 b = self._backward_message(b, X[-t:, :])
|
e@0
|
821
|
e@0
|
822 return b
|
e@0
|
823
|
e@0
|
824 def _backward_message(self, b, X):
|
e@0
|
825 N = self.n_classes
|
e@0
|
826
|
e@0
|
827
|
e@0
|
828 beta = zeros((N, ))
|
e@0
|
829 transmat = self.transmat
|
e@0
|
830
|
e@0
|
831 x = X[0, :]
|
e@0
|
832
|
e@0
|
833 if X.shape[0] == 1:
|
e@0
|
834 beta = log(ones((N,))/N)
|
e@0
|
835 return beta
|
e@0
|
836 else:
|
e@0
|
837 tosum = zeros((N, ))
|
e@0
|
838 btnew = zeros((N, ))
|
e@0
|
839 for j in range(0, N):
|
e@0
|
840 for i in range(0, N):
|
e@0
|
841 tosum[i] = log(self.estimate_emission_probability(x,j)) + b[i] + log(transmat[i,j])
|
e@0
|
842
|
e@0
|
843 btnew[j] = logsumexp(tosum)
|
e@0
|
844 # btnew = lognorm(btnew)
|
e@0
|
845 return btnew
|
e@0
|
846
|
e@0
|
847 def _forward_message(self, f, X):
|
e@0
|
848 N = self.n_classes
|
e@0
|
849 a = self.prior
|
e@0
|
850
|
e@0
|
851 alpha = zeros((N, ))
|
e@0
|
852 transmat = self.transmat
|
e@0
|
853
|
e@0
|
854 x = X[-1, :]
|
e@0
|
855
|
e@0
|
856 if X.shape[0] == 1:
|
e@0
|
857 for i in range(0, N):
|
e@0
|
858 alpha[i] = log(a[i]) + log(self.estimate_emission_probability(x, i))
|
e@0
|
859 # alpha = lognorm(alpha)
|
e@0
|
860 return alpha
|
e@0
|
861
|
e@0
|
862 else:
|
e@0
|
863 tosum = zeros((N,))
|
e@0
|
864 ftnew = zeros((N,))
|
e@0
|
865 for j in range(0, N):
|
e@0
|
866 for i in range(0, N):
|
e@0
|
867 tosum[i] = f[i] + log(transmat[i,j])
|
e@0
|
868 Sum = logsumexp(tosum)
|
e@0
|
869 bj = self.estimate_emission_probability(x, j)
|
e@0
|
870 ftnew[j] = Sum+log(bj)
|
e@0
|
871
|
e@0
|
872 # ftnew = lognorm(ftnew)
|
e@0
|
873 return ftnew
|
e@0
|
874
|
e@0
|
875 def _forward(self, t, X):
|
e@0
|
876 N = self.n_classes
|
e@0
|
877 a = self.prior
|
e@0
|
878 # print a
|
e@0
|
879 alpha = zeros((N, ))
|
e@0
|
880 transmat = self.transmat
|
e@0
|
881 x = X[t, :]
|
e@0
|
882
|
e@0
|
883 if t == 0:
|
e@0
|
884 for i in range(0, N):
|
e@0
|
885 alpha[i] = log(a[i]) + log(self.estimate_emission_probability(x, i))
|
e@0
|
886 # print "--"
|
e@0
|
887 # print alpha
|
e@0
|
888 alpha = lognorm(alpha)
|
e@0
|
889 # print alpha
|
e@0
|
890 # print "xx"
|
e@0
|
891 return alpha
|
e@0
|
892 else:
|
e@0
|
893 tosum = zeros((N, ))
|
e@0
|
894 ft = self._forward(t-1, X)
|
e@0
|
895 ftnew = zeros((N, ))
|
e@0
|
896 for j in range(0, N):
|
e@0
|
897 for i in range(0, N):
|
e@0
|
898 # print ft
|
e@0
|
899 tosum[i] = ft[i] + log(transmat[i,j])
|
e@0
|
900
|
e@0
|
901 Sum = logsumexp(tosum)
|
e@0
|
902 bj = self.estimate_emission_probability(x, j)
|
e@0
|
903 ftnew[j] = Sum+log(bj)
|
e@0
|
904 ftnew = lognorm(ftnew)
|
e@0
|
905
|
e@0
|
906 return ftnew
|
e@0
|
907
|
e@0
|
908
|
e@0
|
909
|
e@0
|
910
|
e@0
|
911
|
e@0
|
912
|
e@0
|
913
|
e@0
|
914 class NBClassifier:
|
e@0
|
915 def __init__(self):
|
e@0
|
916 print "[II] Gaussian Naive Bayes Classifier"
|
e@0
|
917 self.name = "Naive Bayes"
|
e@0
|
918 self.smallname = "gnbc"
|
e@0
|
919 self.gnb = GaussianNB()
|
e@0
|
920
|
e@0
|
921 def getName(self):
|
e@0
|
922 return self.name
|
e@0
|
923
|
e@0
|
924 def fit(self, X, states):
|
e@0
|
925 self.gnb.fit(X, states)
|
e@0
|
926
|
e@0
|
927 def predict(self, X):
|
e@0
|
928 return self.gnb.predict(X)
|
e@0
|
929
|
e@0
|
930 def cross_validate(self, data, classes):
|
e@0
|
931 print "[II] SVN Classifier Crossvalidating... "
|
e@0
|
932 from copy import deepcopy
|
e@0
|
933 estimator = deepcopy(self)
|
e@0
|
934 estimator_fulldata = deepcopy(self)
|
e@0
|
935 estimator_fulldata.fit(data, classes)
|
e@0
|
936
|
e@0
|
937 percents = arange(0.1,0.9,0.1)
|
e@0
|
938 self.percents = percents * 100
|
e@0
|
939
|
e@0
|
940 RATIOS = []
|
e@0
|
941 labels = estimator.predict(data)
|
e@0
|
942
|
e@0
|
943
|
e@0
|
944 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels))
|
e@0
|
945
|
e@0
|
946 for p in percents:
|
e@0
|
947 traindata,testdata,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=1)
|
e@0
|
948
|
e@0
|
949 # print "[II] [%.2f-%.2f] Examples used for training: %s" % (p, (1-p), str(traindata))
|
e@0
|
950 # print "[II] [%.2f-%.2f] Examples used for testing: %s" % (p, (1-p), str(testdata))
|
e@0
|
951
|
e@0
|
952 # Training phase
|
e@0
|
953
|
e@0
|
954
|
e@0
|
955
|
e@0
|
956 estimator_ = deepcopy(estimator)
|
e@0
|
957 estimator_.fit(traindata, trainlabels)
|
e@0
|
958
|
e@0
|
959 predicted_labels = estimator_.predict(testdata)
|
e@0
|
960
|
e@0
|
961 m = sum(array(predicted_labels==testlabels).astype(float))/len(testlabels)
|
e@0
|
962
|
e@0
|
963 print "[II] [%.2f-%.2f] 5-fold cross-validation for NB Classifier: %.1f" % (p, (1-p), m)
|
e@0
|
964
|
e@0
|
965 RATIOS.append(m)
|
e@0
|
966
|
e@0
|
967 return RATIOS,percents
|
e@0
|
968
|
e@0
|
969
|
e@0
|
970
|
e@0
|
971
|
e@0
|
972 class HmmClassifier3:
|
e@0
|
973 def __init__(self, N=2,n_components = 1):
|
e@0
|
974 print "[II] HMM Classifier (%d states, %d components)" % (N, n_components)
|
e@0
|
975 self.name = "HMM (%d time steps, %d components)" % (N, n_components)
|
e@0
|
976 self.n_components = n_components
|
e@0
|
977
|
e@0
|
978 self.n_components = n_components
|
e@0
|
979 self.smallname = "hmmc"
|
e@0
|
980 self.chain_size = N
|
e@0
|
981
|
e@0
|
982 def getName(self):
|
e@0
|
983 return self.name
|
e@0
|
984
|
e@0
|
985 def fit(self, features, parameters):
|
e@0
|
986
|
e@0
|
987
|
e@0
|
988 # print "Parameters:"
|
e@0
|
989 # print parameters
|
e@0
|
990 n_classes = max(unique(parameters)) + 1
|
e@0
|
991
|
e@0
|
992 if n_classes == 1:
|
e@0
|
993 self.only_one_class = True
|
e@0
|
994 return
|
e@0
|
995 else:
|
e@0
|
996 self.only_one_class = False
|
e@0
|
997
|
e@0
|
998 print "[II] Number of classes: ", n_classes
|
e@0
|
999 hmms = [None]*n_classes
|
e@0
|
1000
|
e@0
|
1001 chain_size = self.chain_size
|
e@0
|
1002 obs = [None]*n_classes
|
e@0
|
1003
|
e@0
|
1004 for i in range(chain_size, len(parameters)):
|
e@0
|
1005 class_ = parameters[i]
|
e@0
|
1006 seq = features[i-chain_size:i,:]
|
e@0
|
1007
|
e@0
|
1008
|
e@0
|
1009 if obs[class_] is None:
|
e@0
|
1010 obs[class_] = [seq]
|
e@0
|
1011 else:
|
e@0
|
1012 obs[class_].append(seq)
|
e@0
|
1013
|
e@0
|
1014
|
e@0
|
1015
|
e@0
|
1016 for i in range(0, len(obs)):
|
e@0
|
1017
|
e@0
|
1018 if obs[i] is not None and len(obs[i]) != 0:
|
e@0
|
1019 hmm_ = hmm.GaussianHMM(n_components=self.n_components, covariance_type='diag')
|
e@0
|
1020 # print(obs[i])
|
e@0
|
1021 #obs_ = reshape_observations(obs[i])
|
e@0
|
1022 obs_ = concatenate(obs[i])
|
e@0
|
1023 #lengths_ = [o.shape[0] for o in obs[i]]
|
e@0
|
1024 #print (lengths_)
|
e@0
|
1025 # print len(obs[i])
|
e@0
|
1026 # print obs[i][0][0].shape
|
e@0
|
1027 # print obs[i]
|
e@0
|
1028 # for s in obs[i]:
|
e@0
|
1029 # if s.ndim > 2:
|
e@0
|
1030 # print s
|
e@0
|
1031 hmm_.fit(obs_, [self.chain_size]*len(obs[i]))
|
e@0
|
1032 hmms[i] = hmm_
|
e@0
|
1033
|
e@0
|
1034 self.hmms = hmms
|
e@0
|
1035
|
e@0
|
1036 return obs
|
e@0
|
1037
|
e@0
|
1038 def predict(self, features, mfilt=20):
|
e@0
|
1039
|
e@0
|
1040 if self.only_one_class == True:
|
e@0
|
1041 return zeros((features.shape[0], ))
|
e@0
|
1042
|
e@0
|
1043 chain_size = self.chain_size
|
e@0
|
1044 hmms = self.hmms
|
e@0
|
1045 predicted_classes = zeros((features.shape[0],))
|
e@0
|
1046
|
e@0
|
1047
|
e@0
|
1048 for i in range(chain_size, features.shape[0]):
|
e@0
|
1049 scores = zeros((len(hmms),))
|
e@0
|
1050
|
e@0
|
1051 seq = features[i-chain_size:i, :]
|
e@0
|
1052
|
e@0
|
1053 for j in range(0, len(hmms)):
|
e@0
|
1054 if hmms[j] is not None:
|
e@0
|
1055 scores[j] = hmms[j].score(seq)
|
e@0
|
1056 else:
|
e@0
|
1057 scores[j] = -infty
|
e@0
|
1058
|
e@0
|
1059 predicted_classes[i] = argmax(scores)
|
e@0
|
1060
|
e@0
|
1061 # Do a median filter at the end
|
e@0
|
1062
|
e@0
|
1063 # predicted_classes = median_filter(predicted_classes,mfilt)
|
e@0
|
1064
|
e@0
|
1065 return predicted_classes
|
e@0
|
1066
|
e@0
|
1067
|
e@0
|
1068
|
e@0
|
1069 def k_fold_cross_validate(self, data, classes, K=5):
|
e@0
|
1070 print "[II] HMM Classifier Crossvalidating... "
|
e@0
|
1071 print "[II] Validating data: ", data
|
e@0
|
1072 print "[II] With classes: ", classes
|
e@0
|
1073 from copy import deepcopy
|
e@0
|
1074 estimator = deepcopy(self)
|
e@0
|
1075 estimator_fulldata = deepcopy(self)
|
e@0
|
1076 estimator_fulldata.fit(data, classes)
|
e@0
|
1077
|
e@0
|
1078
|
e@0
|
1079 labels = estimator_fulldata.predict(data)
|
e@0
|
1080 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(classes))
|
e@0
|
1081
|
e@0
|
1082 # Crossvalidate this.
|
e@0
|
1083
|
e@0
|
1084 example_sequences = []
|
e@0
|
1085 example_labels = []
|
e@0
|
1086
|
e@0
|
1087 chain_size = self.chain_size
|
e@0
|
1088
|
e@0
|
1089 percents = arange(0.1,0.9,0.1)
|
e@0
|
1090 self.percents = percents * 100
|
e@0
|
1091
|
e@0
|
1092 RATIOS = []
|
e@0
|
1093
|
e@0
|
1094
|
e@0
|
1095
|
e@0
|
1096 # Constructing examples and labels
|
e@0
|
1097
|
e@0
|
1098
|
e@0
|
1099 L = data.shape[0]/K
|
e@0
|
1100 M = K
|
e@0
|
1101 # print "[II] Example size:", L
|
e@0
|
1102
|
e@0
|
1103 n = 1
|
e@0
|
1104 while L*(n-1) < M*L:
|
e@0
|
1105 if L*n > shape(data)[0]:
|
e@0
|
1106 example = data[L*(n-1):,:]
|
e@0
|
1107 classes_ = classes[L*(n-1):]
|
e@0
|
1108 else:
|
e@0
|
1109 example = data[L*(n-1):L*n, :]
|
e@0
|
1110 classes_ = classes[L*(n-1):L*n]
|
e@0
|
1111
|
e@0
|
1112 # print example
|
e@0
|
1113 # print classes_
|
e@0
|
1114
|
e@0
|
1115 example_sequences.append(example)
|
e@0
|
1116 example_labels.append(classes_)
|
e@0
|
1117 n+=1
|
e@0
|
1118
|
e@0
|
1119 # print len(example_sequences)
|
e@0
|
1120 # print len(example_labels)
|
e@0
|
1121 from sklearn.cross_validation import KFold
|
e@0
|
1122 kf = KFold(K, n_folds=K)
|
e@0
|
1123
|
e@0
|
1124 ratio = 0
|
e@0
|
1125
|
e@0
|
1126 for train, test in kf:
|
e@0
|
1127 print "[II] Validating examples %s against %s." % (train, test)
|
e@0
|
1128
|
e@0
|
1129 topredict_data = example_sequences[test[0]]
|
e@0
|
1130 topredict_labels = example_labels[test[0]]
|
e@0
|
1131
|
e@0
|
1132 tofit_data = None
|
e@0
|
1133 tofit_labels = None
|
e@0
|
1134
|
e@0
|
1135 for t in train:
|
e@0
|
1136 # print t
|
e@0
|
1137 if tofit_data is None:
|
e@0
|
1138 tofit_data = example_sequences[t]
|
e@0
|
1139 tofit_labels = example_labels[t]
|
e@0
|
1140 else:
|
e@0
|
1141 tofit_data = append(tofit_data, example_sequences[t], 0)
|
e@0
|
1142 tofit_labels = append(tofit_labels, example_labels[t], 0)
|
e@0
|
1143
|
e@0
|
1144 estimator_ = deepcopy(estimator)
|
e@0
|
1145 estimator_.fit(tofit_data, tofit_labels)
|
e@0
|
1146
|
e@0
|
1147 labels = estimator_.predict(topredict_data)
|
e@0
|
1148
|
e@0
|
1149 m = sum(array(topredict_labels==labels).astype(float))/len(labels)
|
e@0
|
1150
|
e@0
|
1151
|
e@0
|
1152 print "[II] %d-fold cross-validation for HMM Classifier: %.1f" % (K, m)
|
e@0
|
1153
|
e@0
|
1154 ratio += m/float(len(kf))
|
e@0
|
1155
|
e@0
|
1156 return ratio
|
e@0
|
1157
|
e@0
|
1158
|
e@0
|
1159
|
e@0
|
1160
|
e@0
|
1161
|
e@0
|
1162
|
e@0
|
1163 # Splitting to train/test
|
e@0
|
1164
|
e@0
|
1165
|
e@0
|
1166 def cross_validate(self, data, classes):
|
e@0
|
1167 print "[II] HMM Classifier Crossvalidating... "
|
e@0
|
1168 from copy import deepcopy
|
e@0
|
1169 estimator = deepcopy(self)
|
e@0
|
1170 estimator_fulldata = deepcopy(self)
|
e@0
|
1171 estimator_fulldata.fit(data, classes)
|
e@0
|
1172
|
e@0
|
1173
|
e@0
|
1174 labels = estimator_fulldata.predict(data)
|
e@0
|
1175 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(classes))
|
e@0
|
1176
|
e@0
|
1177 # Crossvalidate this.
|
e@0
|
1178
|
e@0
|
1179 example_sequences = []
|
e@0
|
1180 example_labels = []
|
e@0
|
1181
|
e@0
|
1182 chain_size = self.chain_size
|
e@0
|
1183
|
e@0
|
1184 percents = arange(0.5,0.9,0.1)
|
e@0
|
1185 self.percents = percents * 100
|
e@0
|
1186
|
e@0
|
1187 RATIOS = []
|
e@0
|
1188
|
e@0
|
1189
|
e@0
|
1190
|
e@0
|
1191 # Constructing examples and labels
|
e@0
|
1192
|
e@0
|
1193 M = self.chain_size
|
e@0
|
1194 L = data.shape[0]/20
|
e@0
|
1195
|
e@0
|
1196 print "[II] Example size:", L
|
e@0
|
1197
|
e@0
|
1198 n = 1
|
e@0
|
1199 while L*n < M*L-L:
|
e@0
|
1200 example = data[L*(n-1):L*n, :]
|
e@0
|
1201 # print example
|
e@0
|
1202 classes_ = classes[L*(n-1):L*n]
|
e@0
|
1203 # print classes_
|
e@0
|
1204
|
e@0
|
1205 example_sequences.append(example)
|
e@0
|
1206 example_labels.append(classes_)
|
e@0
|
1207 n+=1
|
e@0
|
1208
|
e@0
|
1209
|
e@0
|
1210
|
e@0
|
1211 # Splitting to train/test
|
e@0
|
1212
|
e@0
|
1213 for p in percents:
|
e@0
|
1214 traindata,testdata,trainlabels,testlabels = cross_validation.train_test_split(example_sequences, example_labels, test_size=p, random_state=1)
|
e@0
|
1215
|
e@0
|
1216 print traindata
|
e@0
|
1217 print testdata
|
e@0
|
1218 # Concatenate traindata
|
e@0
|
1219
|
e@0
|
1220 tofit_data = None
|
e@0
|
1221 tofit_labels = None
|
e@0
|
1222
|
e@0
|
1223 topredict_data = None
|
e@0
|
1224 topredict_labels = None
|
e@0
|
1225
|
e@0
|
1226
|
e@0
|
1227 for t in traindata:
|
e@0
|
1228 if tofit_data is None:
|
e@0
|
1229 tofit_data = t
|
e@0
|
1230 else:
|
e@0
|
1231 tofit_data = append(tofit_data, t, 0)
|
e@0
|
1232
|
e@0
|
1233 for l in trainlabels:
|
e@0
|
1234 if tofit_labels is None:
|
e@0
|
1235 tofit_labels = l
|
e@0
|
1236 else:
|
e@0
|
1237 tofit_labels = append(tofit_labels, l)
|
e@0
|
1238
|
e@0
|
1239 for t in testdata:
|
e@0
|
1240 if topredict_data is None:
|
e@0
|
1241 topredict_data = t
|
e@0
|
1242 else:
|
e@0
|
1243 topredict_data = append(topredict_data, t, 0)
|
e@0
|
1244
|
e@0
|
1245 for l in testlabels:
|
e@0
|
1246 if topredict_labels is None:
|
e@0
|
1247 topredict_labels = l
|
e@0
|
1248 else:
|
e@0
|
1249 topredict_labels = append(topredict_labels, l)
|
e@0
|
1250
|
e@0
|
1251
|
e@0
|
1252 # print "[II] Fit data: ", tofit_data
|
e@0
|
1253 ## print "[II] Fit labels: ", tofit_labels
|
e@0
|
1254 # print "[II] Predict data: ", topredict_data
|
e@0
|
1255 # print "[II] Predict labels: ", topredict_labels
|
e@0
|
1256 #
|
e@0
|
1257 estimator_ = deepcopy(estimator)
|
e@0
|
1258
|
e@0
|
1259 print tofit_labels
|
e@0
|
1260 estimator_.fit(tofit_data, tofit_labels)
|
e@0
|
1261
|
e@0
|
1262 labels = estimator_.predict(topredict_data)
|
e@0
|
1263
|
e@0
|
1264 m = sum(array(topredict_labels==labels).astype(float))/len(labels)
|
e@0
|
1265
|
e@0
|
1266
|
e@0
|
1267 print "[II] [%.2f-%.2f] cross-validation for HMM Classifier: %.1f" % (p, (1-p), m)
|
e@0
|
1268
|
e@0
|
1269 RATIOS.append(m)
|
e@0
|
1270
|
e@0
|
1271 return RATIOS,percents
|
e@0
|
1272
|
e@0
|
1273
|
e@0
|
1274 class HMMsvmClassifier2:
|
e@0
|
1275 def __init__(self, N=2):
|
e@0
|
1276 self.classifiers = {}
|
e@0
|
1277 self.name = "HMM-SVM Classifier 2"
|
e@0
|
1278 self.smallname = "hmmsvm2"
|
e@0
|
1279 self.obs = MyAVAClassifier()
|
e@0
|
1280 self.chain_size = N
|
e@0
|
1281
|
e@0
|
1282 def getName(self):
|
e@0
|
1283 return self.name
|
e@0
|
1284
|
e@0
|
1285 def fit(self, X, y):
|
e@0
|
1286 self.n_classes = max(unique(y))+1
|
e@0
|
1287 n_classes = self.n_classes
|
e@0
|
1288 chain_size = self.chain_size
|
e@0
|
1289
|
e@0
|
1290 if n_classes == 1:
|
e@0
|
1291 self.only_one_class = True
|
e@0
|
1292 return
|
e@0
|
1293 else:
|
e@0
|
1294 self.only_one_class = False
|
e@0
|
1295
|
e@0
|
1296 print "[II] Number of classes: ", n_classes
|
e@0
|
1297
|
e@0
|
1298 hmms = [None]*n_classes
|
e@0
|
1299 obs = [None]*n_classes
|
e@0
|
1300
|
e@0
|
1301
|
e@0
|
1302 for i in range(chain_size, len(parameters)):
|
e@0
|
1303 class_ = parameters[i-1]
|
e@0
|
1304
|
e@0
|
1305
|
e@0
|
1306
|
e@0
|
1307 class HMMsvmClassifier:
|
e@0
|
1308 def __init__(self, N=2):
|
e@0
|
1309 self.classifiers = {}
|
e@0
|
1310 self.name = "HMM-SVM Classifier"
|
e@0
|
1311 self.smallname = "hmmsvm"
|
e@0
|
1312 self.obs = MyAVAClassifier()
|
e@0
|
1313 self.chain_size = N
|
e@0
|
1314
|
e@0
|
1315 def getName(self):
|
e@0
|
1316 return self.name
|
e@0
|
1317
|
e@0
|
1318 def fit(self, X, y):
|
e@0
|
1319 self.n_classes = max(unique(y))+1
|
e@0
|
1320
|
e@0
|
1321 self.obs.fit(X,y)
|
e@0
|
1322 self.hmm = HMMsvm(self.obs)
|
e@0
|
1323 self.hmm.fit([X],[y])
|
e@0
|
1324
|
e@0
|
1325 def predict(self, X):
|
e@0
|
1326 # chain_size = self.chain_size
|
e@0
|
1327 # predicted = zeros((X.shape[0]),)
|
e@0
|
1328 # for i in range(chain_size, len(predicted)):
|
e@0
|
1329 # predicted[i] = self.hmm.predict(X[i-chain_size:i,:])[-1]
|
e@0
|
1330
|
e@0
|
1331
|
e@0
|
1332
|
e@0
|
1333 # return predicted
|
e@0
|
1334
|
e@0
|
1335 return self.hmm.predict(X)
|
e@0
|
1336
|
e@0
|
1337 def confidence(self, x, q):
|
e@0
|
1338 return self.hmm.estimate_emission_probability(x, q)
|
e@0
|
1339
|
e@0
|
1340
|
e@0
|
1341
|
e@0
|
1342 class SinkHoleClassifier:
|
e@0
|
1343 def __init__(self):
|
e@0
|
1344 self.classifierNB = NBClassifier()
|
e@0
|
1345 self.classifierSVM = MyAVAClassifier()
|
e@0
|
1346 self.classifierHMM = HmmClassifier3()
|
e@0
|
1347 self.classifierHMMSVM = HMMsvmClassifier()
|
e@0
|
1348
|
e@0
|
1349
|
e@0
|
1350
|
e@0
|
1351 def getName(self):
|
e@0
|
1352 return self.name
|
e@0
|
1353
|
e@0
|
1354
|
e@0
|
1355 def fit(self, X, y):
|
e@0
|
1356 self.n_classes = max(unique(y))+1
|
e@0
|
1357
|
e@0
|
1358 self.classifierNB.fit(X,y)
|
e@0
|
1359 self.classifierSVM.fit(X,y)
|
e@0
|
1360 self.classifierHMM.fit(X,y)
|
e@0
|
1361 self.classifierHMMSVM.fit(X,y)
|
e@0
|
1362
|
e@0
|
1363
|
e@0
|
1364 def predict(self, X):
|
e@0
|
1365 predictedNB = self.classifierNB.predict(X)
|
e@0
|
1366 predictedSVM = self.classifierSVM.predict(X)
|
e@0
|
1367 predictedHMM = self.classifierHMM.predict(X)
|
e@0
|
1368 predictedHMMSVM = self.classifierHMMSVM.predict(X)
|
e@0
|
1369
|
e@0
|
1370
|
e@0
|
1371
|
e@0
|
1372 predicted = zeros((X.shape[0], ))
|
e@0
|
1373
|
e@0
|
1374 for i in range(0, len(predicted)):
|
e@0
|
1375 candidates = [predictedNB[i], predictedSVM[i], predictedHMM[i], predictedHMMSVM[i]]
|
e@0
|
1376 c = Counter(candidates)
|
e@0
|
1377
|
e@0
|
1378 most_common = c.most_common()
|
e@0
|
1379
|
e@0
|
1380 # If there is an equal voting, select something at random
|
e@0
|
1381 if len(unique([k[1] for k in most_common])) == 1:
|
e@0
|
1382 predicted[i] = most_common[randint(len(most_common))][0]
|
e@0
|
1383 else:
|
e@0
|
1384 predicted[i] = most_common[0][0]
|
e@0
|
1385
|
e@0
|
1386
|
e@0
|
1387
|
e@0
|
1388 return predicted
|
e@0
|
1389
|
e@0
|
1390 class MyAVAClassifier:
|
e@0
|
1391
|
e@0
|
1392 def __init__(self):
|
e@0
|
1393 self.classifiers = {}
|
e@0
|
1394 self.name = "Linear SVM Classifier"
|
e@0
|
1395 self.smallname = "svc-ava"
|
e@0
|
1396 # self.probabilities = {}
|
e@0
|
1397
|
e@0
|
1398
|
e@0
|
1399 def getName(self):
|
e@0
|
1400 return self.name
|
e@0
|
1401 def fit(self, X, y, flr = 0):
|
e@0
|
1402
|
e@0
|
1403 n_classes = max(unique(y)) + 1
|
e@0
|
1404
|
e@0
|
1405 # Transform to a binary if there are only two classes
|
e@0
|
1406 if len(unique(y)) == 1:
|
e@0
|
1407 self.only_one_class = True
|
e@0
|
1408 self.n_classes = 1
|
e@0
|
1409 self.one_class_label = y[0]
|
e@0
|
1410 return
|
e@0
|
1411 elif len(unique(y)) == 2:
|
e@0
|
1412
|
e@0
|
1413 self.n_classes = n_classes
|
e@0
|
1414 self.svm = svm.SVC(probability = True, kernel='poly', gamma=2, C = C)
|
e@0
|
1415 self.svm.fit(X,y)
|
e@0
|
1416 classes_ = unique(y)
|
e@0
|
1417 self.classifiers[(classes_[0], classes_[1])] = self.svm
|
e@0
|
1418 self.only_two_classes = True
|
e@0
|
1419 self.only_one_class = False
|
e@0
|
1420
|
e@0
|
1421 return
|
e@0
|
1422 else:
|
e@0
|
1423 self.only_two_classes = False
|
e@0
|
1424 self.only_one_class = False
|
e@0
|
1425
|
e@0
|
1426
|
e@0
|
1427 classes = arange(0, n_classes)
|
e@0
|
1428 self.n_classes = n_classes
|
e@0
|
1429 # M = n_classes*(n_classes-1) # Number of classifiers
|
e@0
|
1430
|
e@0
|
1431 h = histogram(y, n_classes)[0].astype(float)
|
e@0
|
1432 self.prior = h/sum(h)
|
e@0
|
1433
|
e@0
|
1434 transmat = zeros((n_classes, n_classes))
|
e@0
|
1435
|
e@0
|
1436 for i in range(1, len(y)):
|
e@0
|
1437 prev = y[i-1]
|
e@0
|
1438 cur = y[i]
|
e@0
|
1439 transmat[prev,cur] += 1
|
e@0
|
1440
|
e@0
|
1441 transmat = transmat/sum(transmat)
|
e@0
|
1442
|
e@0
|
1443 self.transmat = transmat
|
e@0
|
1444
|
e@0
|
1445 # Add a very small probability for random jump to avoid zero values
|
e@0
|
1446
|
e@0
|
1447 self.transmat += flr
|
e@0
|
1448 self.transmat = self.transmat/sum(self.transmat)
|
e@0
|
1449
|
e@0
|
1450 for i in range(0, n_classes):
|
e@0
|
1451 for j in range(0, n_classes):
|
e@0
|
1452 if i != j and (i,j) not in self.classifiers and (j,i) not in self.classifiers:
|
e@0
|
1453 # print (i,j)
|
e@0
|
1454 # print classes
|
e@0
|
1455 idx_ = bitwise_or(y == classes[i], y == classes[j])
|
e@0
|
1456 # idx_ = bitwise_or(y == i, y == j)
|
e@0
|
1457
|
e@0
|
1458 X_ = X[idx_, :]
|
e@0
|
1459
|
e@0
|
1460 y_ = y[idx_]
|
e@0
|
1461 #print y_
|
e@0
|
1462
|
e@0
|
1463 if len(unique(y_)) > 1:
|
e@0
|
1464 svm_ = svm.SVC(probability = True, kernel='poly', gamma=2, C = C)
|
e@0
|
1465
|
e@0
|
1466 # print y_
|
e@0
|
1467 svm_.fit(X_, y_)
|
e@0
|
1468 self.classifiers[(i,j)] = svm_
|
e@0
|
1469 # self.probabilities[(i,j)] = svm_.predict_proba(X)
|
e@0
|
1470
|
e@0
|
1471
|
e@0
|
1472 def estimate_pairwise_class_probability(self, i, j, x):
|
e@0
|
1473
|
e@0
|
1474 # print self.classifiers.keys()
|
e@0
|
1475
|
e@0
|
1476 if (i,j) not in self.classifiers and (j,i) in self.classifiers:
|
e@0
|
1477 return self.classifiers[(j,i)].predict_proba(x)[0,1]
|
e@0
|
1478 elif (i,j) not in self.classifiers and (j,i) not in self.classifiers:
|
e@0
|
1479 return 0.0
|
e@0
|
1480 else:
|
e@0
|
1481 return self.classifiers[(i,j)].predict_proba(x)[0,0]
|
e@0
|
1482
|
e@0
|
1483 def estimate_posterior_probability(self, i, x):
|
e@0
|
1484 mus = zeros((self.n_classes,))
|
e@0
|
1485 for j in range(0, self.n_classes):
|
e@0
|
1486 if i != j:
|
e@0
|
1487 pcp = self.estimate_pairwise_class_probability(i,j,x)
|
e@0
|
1488 pcp += 1e-18
|
e@0
|
1489 mus[j] = 1/pcp
|
e@0
|
1490 # print mus
|
e@0
|
1491 S = sum(mus) - (self.n_classes - 2)
|
e@0
|
1492 # print S
|
e@0
|
1493 return 1/S
|
e@0
|
1494
|
e@0
|
1495 def estimate_posterior_probability_vector(self, x):
|
e@0
|
1496 posterior = zeros((self.n_classes,))
|
e@0
|
1497 for i in range(0, len(posterior)):
|
e@0
|
1498 posterior[i] = self.estimate_posterior_probability(i, x)
|
e@0
|
1499
|
e@0
|
1500 return posterior
|
e@0
|
1501
|
e@0
|
1502 # def estimate_emission_probability(self, i, x):
|
e@0
|
1503 # post = self.estimate_posterior_probability_vector(x)
|
e@0
|
1504 # # print "post: ", post
|
e@0
|
1505 # prior = self.prior
|
e@0
|
1506 # # print "prior: ", prior
|
e@0
|
1507 # p = post/prior
|
e@0
|
1508 # p = p/sum(p)
|
e@0
|
1509 #
|
e@0
|
1510 # return p[i]
|
e@0
|
1511
|
e@0
|
1512 # def estimate_emissmat(self, X):
|
e@0
|
1513 # emissmat = zeros((X.shape[0], self.n_classes))
|
e@0
|
1514 # for i in range(0, X.shape[0]):
|
e@0
|
1515 # for j in range(0, self.n_classes):
|
e@0
|
1516 # emissmat[i,j] = self.estimate_emission_probability(j, X[i,:])
|
e@0
|
1517 #
|
e@0
|
1518 # return emissmat
|
e@0
|
1519
|
e@0
|
1520 def predict(self, X):
|
e@0
|
1521 predicted = zeros((X.shape[0],))
|
e@0
|
1522
|
e@0
|
1523 if self.only_one_class == True:
|
e@0
|
1524 return ones((X.shape[0],))*self.one_class_label
|
e@0
|
1525 elif self.only_two_classes == True:
|
e@0
|
1526 return self.svm.predict(X)
|
e@0
|
1527
|
e@0
|
1528
|
e@0
|
1529 for i in range(0, X.shape[0]):
|
e@0
|
1530 x = X[i,:]
|
e@0
|
1531 P = zeros((self.n_classes,))
|
e@0
|
1532
|
e@0
|
1533
|
e@0
|
1534 for c in range(0, len(P)):
|
e@0
|
1535 P[c] = self.estimate_posterior_probability(c, x)
|
e@0
|
1536
|
e@0
|
1537 pred = argmax(P)
|
e@0
|
1538 predicted[i] = pred
|
e@0
|
1539
|
e@0
|
1540 return predicted
|
e@0
|
1541
|
e@0
|
1542 def logsum(X):
|
e@0
|
1543 if len(X) == 1:
|
e@0
|
1544 return log(X[0])
|
e@0
|
1545 else:
|
e@0
|
1546 return logaddexp(log(X[0]),logsum(X[1:]))
|
e@0
|
1547
|
e@0
|
1548
|
e@0
|
1549
|
e@0
|
1550
|
e@0
|
1551 def smooth_matrix_1D(X):
|
e@0
|
1552 window = scipy.signal.gaussian(51,4)
|
e@0
|
1553 window = window/sum(window)
|
e@0
|
1554 intermx = zeros((X.shape[0],X.shape[1]+100))
|
e@0
|
1555 intermx[:, 50:-50] = X
|
e@0
|
1556
|
e@0
|
1557 for m in range(0, X.shape[0]):
|
e@0
|
1558 # print intermx.shape
|
e@0
|
1559 intermx[m,:] = convolve(ravel(intermx[m,:]), window,'same')
|
e@0
|
1560
|
e@0
|
1561 return intermx[:,50:-50]
|
e@0
|
1562
|
e@0
|
1563 def adm_reconstruct(codeword, h, dmin=.01, dmax=.28):
|
e@0
|
1564 x = zeros((1, codeword.shape[1]))
|
e@0
|
1565
|
e@0
|
1566 delta1 = dmin
|
e@0
|
1567 delta2 = dmin
|
e@0
|
1568 Sum = h
|
e@0
|
1569
|
e@0
|
1570 x[0] = h
|
e@0
|
1571 for i in range(0, codeword.shape[1]):
|
e@0
|
1572 if codeword[0,i] == 0:
|
e@0
|
1573 delta1 = dmin
|
e@0
|
1574 delta2 = dmin
|
e@0
|
1575
|
e@0
|
1576 elif codeword[0,i] == 1:
|
e@0
|
1577 delta2 = dmin
|
e@0
|
1578 Sum += delta1
|
e@0
|
1579 delta1 *= 2
|
e@0
|
1580 if delta1 > dmax:
|
e@0
|
1581 delta1 = dmax
|
e@0
|
1582
|
e@0
|
1583 elif codeword[0,i] == -1:
|
e@0
|
1584 delta1 = dmin
|
e@0
|
1585 Sum -= delta2
|
e@0
|
1586 delta2 *= 2
|
e@0
|
1587 if delta2 > dmax:
|
e@0
|
1588 delta2 = dmax
|
e@0
|
1589 x[0,i] = Sum
|
e@0
|
1590 return x
|
e@0
|
1591
|
e@0
|
1592 def adm(x, dmin=.01, dmax=.28, tol=0.0001):
|
e@0
|
1593
|
e@0
|
1594 # Adaptive delta modulation adapted by code:
|
e@0
|
1595 # (adeltamod.m)
|
e@0
|
1596 #
|
e@0
|
1597 # Adaptive Delta Modulator
|
e@0
|
1598 # by Gandhar Desai (gdesai)
|
e@0
|
1599 # BITS Pilani Goa Campus
|
e@0
|
1600 # Date: 28 Sept, 2013
|
e@0
|
1601
|
e@0
|
1602 xsig = x
|
e@0
|
1603
|
e@0
|
1604 Lx = len(x)
|
e@0
|
1605
|
e@0
|
1606 ADMout = zeros((1, Lx))
|
e@0
|
1607 codevec = zeros((1, Lx))
|
e@0
|
1608
|
e@0
|
1609
|
e@0
|
1610 Sum = x[0]
|
e@0
|
1611 delta1 = dmin
|
e@0
|
1612 delta2 = dmin
|
e@0
|
1613 mult1 = 2
|
e@0
|
1614 mult2 = 2
|
e@0
|
1615 for i in range(0, Lx):
|
e@0
|
1616 #print abs(xsig[i] - Sum)
|
e@0
|
1617 if (abs(xsig[i] - Sum) < tol):
|
e@0
|
1618 bit = 0
|
e@0
|
1619 delta2 = dmin
|
e@0
|
1620 delta1 = dmin
|
e@0
|
1621
|
e@0
|
1622
|
e@0
|
1623 elif (xsig[i] >= Sum):
|
e@0
|
1624 bit = 1
|
e@0
|
1625 delta2 = dmin
|
e@0
|
1626 Sum += delta1
|
e@0
|
1627 delta1 *= mult1
|
e@0
|
1628 if delta1 > dmax:
|
e@0
|
1629 delta1 = dmax
|
e@0
|
1630
|
e@0
|
1631
|
e@0
|
1632 else:
|
e@0
|
1633 bit = -1
|
e@0
|
1634 delta1 = dmin
|
e@0
|
1635 Sum -= delta2
|
e@0
|
1636 delta2 *= mult2
|
e@0
|
1637 if delta2 > dmax:
|
e@0
|
1638 delta2 = dmax
|
e@0
|
1639
|
e@0
|
1640
|
e@0
|
1641
|
e@0
|
1642 ADMout[0, i] = Sum
|
e@0
|
1643 codevec[0, i]= bit
|
e@0
|
1644
|
e@0
|
1645 return ADMout,codevec, x[0]
|
e@0
|
1646
|
e@0
|
1647 def median_filter(v, K):
|
e@0
|
1648 v2 = zeros((len(v),))
|
e@0
|
1649 for i in range(K, len(v)):
|
e@0
|
1650 seq = v[i-K:i]
|
e@0
|
1651 m = median(seq)
|
e@0
|
1652 v2[i-K:i] = m
|
e@0
|
1653
|
e@0
|
1654 return v2
|
e@0
|
1655
|
e@0
|
1656
|
e@0
|
1657
|
e@0
|
1658
|
e@0
|
1659 from models import ReverbModel
|
e@0
|
1660
|
e@0
|
1661
|
e@0
|
1662
|
e@0
|
1663 if __name__=="__main__":
|
e@0
|
1664 if len(argv) != 2:
|
e@0
|
1665 print "[EE] Wrong number of arguments"
|
e@0
|
1666 print "[II] Correct syntax is:"
|
e@0
|
1667 print "[II] \t%s <trainingdir>"
|
e@0
|
1668 print "[II] where <trainingdir> is the directory containing the features files in the format *_features.yaml"
|
e@0
|
1669 print "[II] and the parameters in the format *_parameters.yaml"
|
e@0
|
1670 exit(-1)
|
e@0
|
1671
|
e@0
|
1672
|
e@0
|
1673 n_clusters = 3
|
e@0
|
1674 UpsamplingFactor = 1
|
e@0
|
1675 dmin = 0.001
|
e@0
|
1676 dmax = 0.28
|
e@0
|
1677 tol = 0.001
|
e@0
|
1678
|
e@0
|
1679 d1min = 0.01
|
e@0
|
1680 d1max = 0.1
|
e@0
|
1681
|
e@0
|
1682 g1min = 0.01
|
e@0
|
1683 g1max = 0.99
|
e@0
|
1684
|
e@0
|
1685 damin = 0.006
|
e@0
|
1686 damax = 0.012
|
e@0
|
1687
|
e@0
|
1688 Gmin = 0.01
|
e@0
|
1689 Gmax = 0.99
|
e@0
|
1690
|
e@0
|
1691 gcmin = 0.01
|
e@0
|
1692 gcmax = 0.99
|
e@0
|
1693
|
e@0
|
1694 # For searching the directory
|
e@0
|
1695 from glob import glob
|
e@0
|
1696
|
e@0
|
1697 traindir = argv[1]
|
e@0
|
1698
|
e@0
|
1699 songs_in_dir = glob("%s/*_features.yaml" % traindir)
|
e@0
|
1700
|
e@0
|
1701 print "[II] Using files: %s" % songs_in_dir
|
e@0
|
1702
|
e@0
|
1703
|
e@0
|
1704 chain_length=1
|
e@0
|
1705
|
e@0
|
1706
|
e@0
|
1707 # asdsd
|
e@0
|
1708
|
e@0
|
1709 # fullfeatures_pool = Pool()
|
e@0
|
1710
|
e@0
|
1711 features_vector = None
|
e@0
|
1712 parameters_vector = None
|
e@0
|
1713 index_vector = None
|
e@0
|
1714
|
e@0
|
1715 idx = 0
|
e@0
|
1716 # Shuffle segments
|
e@0
|
1717 random.shuffle(songs_in_dir)
|
e@0
|
1718 for f_ in songs_in_dir:
|
e@0
|
1719 infile = f_
|
e@0
|
1720 paramfile = "%s_parameters.yaml" % f_.split('_features.yaml')[0]
|
e@0
|
1721
|
e@0
|
1722 print "[II] Using features file: %s" % infile
|
e@0
|
1723 print "[II] and parameters file: %s" % paramfile
|
e@0
|
1724
|
e@0
|
1725
|
e@0
|
1726 # paramfile = infile.split(')
|
e@0
|
1727
|
e@0
|
1728 features_pool = YamlInput(filename = infile)()
|
e@0
|
1729 parameters_pool = YamlInput(filename = paramfile)()
|
e@0
|
1730
|
e@0
|
1731 d1 = parameters_pool['parameters.d1']
|
e@0
|
1732 da = parameters_pool['parameters.da']
|
e@0
|
1733 g1 = parameters_pool['parameters.g1']
|
e@0
|
1734 gc = parameters_pool['parameters.gc']
|
e@0
|
1735 G = parameters_pool['parameters.G']
|
e@0
|
1736
|
e@0
|
1737
|
e@0
|
1738
|
e@0
|
1739
|
e@0
|
1740
|
e@0
|
1741 feature_captions = features_pool.descriptorNames()
|
e@0
|
1742 parameter_captions = ['d1','da','g1','gc','G']
|
e@0
|
1743
|
e@0
|
1744
|
e@0
|
1745 svmhmmstr = ""
|
e@0
|
1746
|
e@0
|
1747
|
e@0
|
1748 for c in features_pool.descriptorNames():
|
e@0
|
1749 if c.split('.')[0] == 'metadata' or c.split('.')[0] == 'parameter':
|
e@0
|
1750 feature_captions.remove(c)
|
e@0
|
1751
|
e@0
|
1752
|
e@0
|
1753 # close('all')
|
e@0
|
1754
|
e@0
|
1755 # print "[II] Loaded training data from %s (%s) " % (infile, features_pool['metadata.date'][0])
|
e@0
|
1756 print "[II] %d Features Available: " % len(feature_captions)
|
e@0
|
1757
|
e@0
|
1758
|
e@0
|
1759
|
e@0
|
1760 print str(feature_captions).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7]
|
e@0
|
1761
|
e@0
|
1762 nfeatures_in = len(feature_captions)
|
e@0
|
1763 nparameters_in = 5
|
e@0
|
1764 features_vector_one = zeros((nfeatures_in, len(features_pool[feature_captions[0]])))
|
e@0
|
1765 index_vector_one = ones((len(features_pool[feature_captions[0]]),))
|
e@0
|
1766 parameters_vector_one = ones((nparameters_in, len(features_pool[feature_captions[0]])))
|
e@0
|
1767 # paramet
|
e@0
|
1768
|
e@0
|
1769
|
e@0
|
1770 for i in range(0, nfeatures_in):
|
e@0
|
1771 features_vector_one[i, :] = features_pool[feature_captions[i]].T
|
e@0
|
1772 # for i in range(0, nparameters_in):
|
e@0
|
1773 # parameters_vector[i, :] = features_pool[parameter_captions[0]].T
|
e@0
|
1774
|
e@0
|
1775 parameters_vector_one[0, :] = d1*parameters_vector_one[0,:]
|
e@0
|
1776 parameters_vector_one[1, :] = g1*parameters_vector_one[1,:]
|
e@0
|
1777 parameters_vector_one[2, :] = da*parameters_vector_one[2,:]
|
e@0
|
1778 parameters_vector_one[3, :] = gc*parameters_vector_one[3,:]
|
e@0
|
1779 parameters_vector_one[4, :] = G*parameters_vector_one[4,:]
|
e@0
|
1780
|
e@0
|
1781 # Stores example index number
|
e@0
|
1782
|
e@0
|
1783 index_vector_one *= idx
|
e@0
|
1784
|
e@0
|
1785
|
e@0
|
1786
|
e@0
|
1787
|
e@0
|
1788
|
e@0
|
1789
|
e@0
|
1790
|
e@0
|
1791
|
e@0
|
1792
|
e@0
|
1793 print "[II] %d parameters used:" % len(parameter_captions)
|
e@0
|
1794 print str(parameter_captions).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7].replace('parameter.','')
|
e@0
|
1795
|
e@0
|
1796 if features_vector is None:
|
e@0
|
1797 features_vector = features_vector_one
|
e@0
|
1798 else:
|
e@0
|
1799 features_vector = append(features_vector, features_vector_one, 1)
|
e@0
|
1800
|
e@0
|
1801 if parameters_vector is None:
|
e@0
|
1802 parameters_vector = parameters_vector_one
|
e@0
|
1803 else:
|
e@0
|
1804 parameters_vector = append(parameters_vector, parameters_vector_one, 1)
|
e@0
|
1805
|
e@0
|
1806 if index_vector is None:
|
e@0
|
1807 index_vector = index_vector_one
|
e@0
|
1808 else:
|
e@0
|
1809 index_vector = append(index_vector, index_vector_one)
|
e@0
|
1810
|
e@0
|
1811
|
e@0
|
1812 idx += 1
|
e@0
|
1813
|
e@0
|
1814
|
e@0
|
1815 print "[II] Marking silent parts"
|
e@0
|
1816 features_full_nontransformed_train = features_vector.copy()
|
e@0
|
1817 # silent_parts = zeros((1, len(features_pool[feature_captions[i]].T)))
|
e@0
|
1818
|
e@0
|
1819 silent_parts = zeros((1, features_vector.shape[1]))
|
e@0
|
1820
|
e@0
|
1821 rms = features_vector[feature_captions.index('rms'), :]
|
e@0
|
1822
|
e@0
|
1823 # Implementing Hysteresis Gate -- High threshold is halfway between
|
e@0
|
1824 # the mean and the max and Low is halfway between the mean dn the min
|
e@0
|
1825
|
e@0
|
1826 rms_threshold_mean = mean(rms)
|
e@0
|
1827
|
e@0
|
1828 rms_threshold_max = max(rms)
|
e@0
|
1829 rms_threshold_min = min(rms)
|
e@0
|
1830
|
e@0
|
1831 rms_threshold_high = 0.1 * rms_threshold_mean
|
e@0
|
1832 rms_threshold_low = 0.01 * rms_threshold_mean
|
e@0
|
1833
|
e@0
|
1834 for n in range(1, len(rms)):
|
e@0
|
1835 prev = rms[n-1]
|
e@0
|
1836 curr = rms[n]
|
e@0
|
1837
|
e@0
|
1838 if prev >= rms_threshold_high:
|
e@0
|
1839 if curr < rms_threshold_low:
|
e@0
|
1840 silent_parts[0,n] = 1
|
e@0
|
1841 else:
|
e@0
|
1842 silent_parts[0,n] = 0
|
e@0
|
1843 elif prev <= rms_threshold_low:
|
e@0
|
1844 if curr > rms_threshold_high:
|
e@0
|
1845 silent_parts[0,n] = 0
|
e@0
|
1846 else:
|
e@0
|
1847 silent_parts[0,n] = 1
|
e@0
|
1848 else:
|
e@0
|
1849 silent_parts[0,n] = silent_parts[0,n-1]
|
e@0
|
1850
|
e@0
|
1851
|
e@0
|
1852 if silent_parts[0,1] == 1:
|
e@0
|
1853 silent_parts[0, 0] = 1
|
e@0
|
1854
|
e@0
|
1855
|
e@0
|
1856
|
e@0
|
1857 active_index = invert(silent_parts.flatten().astype(bool))
|
e@0
|
1858
|
e@0
|
1859 # Keep only active parts
|
e@0
|
1860
|
e@0
|
1861
|
e@0
|
1862 # Uncomment this
|
e@0
|
1863 features_vector = features_vector[:, active_index]
|
e@0
|
1864
|
e@0
|
1865 # Keep this for debugging purposes
|
e@0
|
1866
|
e@0
|
1867
|
e@0
|
1868 parameters_vector = parameters_vector[:, active_index]
|
e@0
|
1869 # index_vector = index_vector[active_index]
|
e@0
|
1870
|
e@0
|
1871 # Label examples for a chain of chain_length
|
e@0
|
1872
|
e@0
|
1873 # idx = 0
|
e@0
|
1874 # for i in range(0, len(index_vector)):
|
e@0
|
1875 # index_vector[i] = idx
|
e@0
|
1876 # if i % chain_length == 0:
|
e@0
|
1877 # idx += 1
|
e@0
|
1878 # number_of_examples = idx
|
e@0
|
1879
|
e@0
|
1880
|
e@0
|
1881
|
e@0
|
1882 # Scale parameters to [0,1]
|
e@0
|
1883
|
e@0
|
1884
|
e@0
|
1885 parameters_vector[0,:] = (parameters_vector[0,:] - d1min)/d1max #d1
|
e@0
|
1886 parameters_vector[1,:] = (parameters_vector[1,:] - g1min)/g1max #g1
|
e@0
|
1887 parameters_vector[2,:] = (parameters_vector[2,:] - damin)/damax #g1
|
e@0
|
1888 parameters_vector[3,:] = (parameters_vector[3,:] - Gmin)/Gmax #G
|
e@0
|
1889 parameters_vector[4,:] = (parameters_vector[4,:] - gcmin)/gcmax #gc
|
e@0
|
1890
|
e@0
|
1891
|
e@0
|
1892
|
e@0
|
1893
|
e@0
|
1894
|
e@0
|
1895
|
e@0
|
1896
|
e@0
|
1897 moments_vector = zeros((features_vector.shape[0], 2))
|
e@0
|
1898
|
e@0
|
1899 features_vector_original = features_vector.copy()
|
e@0
|
1900
|
e@0
|
1901
|
e@0
|
1902
|
e@0
|
1903 print "[II] Storing moments vector"
|
e@0
|
1904 for i in range(0, features_vector.shape[0]):
|
e@0
|
1905 mean_ = mean(features_vector[i,:])
|
e@0
|
1906 std_ = std(features_vector[i,:], ddof=1)
|
e@0
|
1907 moments_vector[i,0] = mean_
|
e@0
|
1908 moments_vector[i,1] = std_
|
e@0
|
1909
|
e@0
|
1910 features_vector[i,:] = (features_vector[i,:] - mean_)/std_
|
e@0
|
1911
|
e@0
|
1912 features_vector_old_train = features_vector.copy()
|
e@0
|
1913 # moments_vector_train = moments_vector
|
e@0
|
1914
|
e@0
|
1915
|
e@0
|
1916 print "[II] Extracting PCA configuration "
|
e@0
|
1917
|
e@0
|
1918 kernel, q, featurelist = extract_pca_configuration_from_data(features_vector)
|
e@0
|
1919
|
e@0
|
1920
|
e@0
|
1921 #q = 1
|
e@0
|
1922
|
e@0
|
1923
|
e@0
|
1924 # q = q + 1
|
e@0
|
1925
|
e@0
|
1926 print "[II] Optimal number of PCs to keep: %d" % q
|
e@0
|
1927
|
e@0
|
1928
|
e@0
|
1929
|
e@0
|
1930
|
e@0
|
1931 feature_captions_array = array(feature_captions)
|
e@0
|
1932
|
e@0
|
1933 features_to_keep = list(feature_captions_array[featurelist])
|
e@0
|
1934
|
e@0
|
1935 print "[II] Decided to keep %d features:" % len(features_to_keep)
|
e@0
|
1936 print str(features_to_keep).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7]
|
e@0
|
1937
|
e@0
|
1938 featurelist_bak = featurelist
|
e@0
|
1939 features_kept_data = features_vector[featurelist,:]
|
e@0
|
1940 features_kept_data_bak = features_kept_data.copy()
|
e@0
|
1941 # features_vector_old_train = features_kept_data
|
e@0
|
1942 features_vector = (kernel.T*features_kept_data)[0:q,:]
|
e@0
|
1943 features_vector_new_train = features_vector.copy()
|
e@0
|
1944
|
e@0
|
1945
|
e@0
|
1946 # example_features = None
|
e@0
|
1947 # example_parameters = None
|
e@0
|
1948 # example_idx = None
|
e@0
|
1949 #
|
e@0
|
1950 # for idx in range(0, shape(parameters_vector)[1]-chain_length):
|
e@0
|
1951 # example_features_ = features_vector[:, idx:idx+chain_length]
|
e@0
|
1952 # # print example_features_
|
e@0
|
1953 # example_parameters_ = parameters_vector[:, idx:idx+chain_length]
|
e@0
|
1954 # example_idx_ = ones((shape(example_parameters_)[1],))
|
e@0
|
1955 #
|
e@0
|
1956 # if example_features is None:
|
e@0
|
1957 # example_features = example_features_
|
e@0
|
1958 # else:
|
e@0
|
1959 # #print "appending"
|
e@0
|
1960 # example_features = append(example_features, example_features_, 1)
|
e@0
|
1961 #
|
e@0
|
1962 # if example_parameters is None:
|
e@0
|
1963 # example_parameters = example_parameters_
|
e@0
|
1964 # else:
|
e@0
|
1965 # example_parameters = append(example_parameters, example_parameters_, 1)
|
e@0
|
1966 #
|
e@0
|
1967 # if example_idx is None:
|
e@0
|
1968 # example_idx = example_idx_*idx
|
e@0
|
1969 # else:
|
e@0
|
1970 # example_idx = append(example_idx, example_idx_*idx, 1)
|
e@0
|
1971 #
|
e@0
|
1972 #
|
e@0
|
1973 #
|
e@0
|
1974 #
|
e@0
|
1975 # features_vector = example_features
|
e@0
|
1976 # parameters_vector = example_parameters
|
e@0
|
1977 # index_vector = example_idx
|
e@0
|
1978
|
e@0
|
1979 print "[II] Trying ADM-coded parameters"
|
e@0
|
1980 print "[II] Upsampling features and parameters by a factor of %d" % UpsamplingFactor
|
e@0
|
1981
|
e@0
|
1982
|
e@0
|
1983 # Upsampled features and parameters
|
e@0
|
1984 features_vector_upsampled = smooth_matrix_1D(repeat(features_vector,UpsamplingFactor, axis=1))
|
e@0
|
1985 # features_vector_upsampled = repeat(features_vector, UpsamplingFactor, axis=1)
|
e@0
|
1986
|
e@0
|
1987
|
e@0
|
1988 # features_vector_upsampled = smooth_matrix_1D(repeat(features_vector_old_train,UpsamplingFactor, axis=1))
|
e@0
|
1989
|
e@0
|
1990 # feature_labels_upsampled = repeat(features_clustering_labels,UpsamplingFactor, axis=0)
|
e@0
|
1991 parameters_vector_upsampled = repeat(parameters_vector,UpsamplingFactor, axis=1)
|
e@0
|
1992
|
e@0
|
1993
|
e@0
|
1994 # km = KMeans(init='k-means++', n_init=10, max_iter=300, tol=0.0000001, verbose = 0, n_clusters=12).fit(parameters_vector_upsampled.T)
|
e@0
|
1995 # centers_ = km.cluster_centers_
|
e@0
|
1996 # labels__ = km.labels_
|
e@0
|
1997 # Remove the following two lines in order to restore non-kmeans version
|
e@0
|
1998 # parameters_vector_kmeans_upsampled = array(centers_[labels__])
|
e@0
|
1999 #
|
e@0
|
2000 # parameters_vector_upsampled = parameters_vector_kmeans_upsampled.T
|
e@0
|
2001 #
|
e@0
|
2002 #
|
e@0
|
2003 # parameters_vector_upsampled = smooth_matrix_1D(parameters_vector_upsampled)
|
e@0
|
2004
|
e@0
|
2005 parameters_vector_upsampled_adm = matrix(zeros(shape(parameters_vector_upsampled)))
|
e@0
|
2006 parameters_vector_upsampled_code = matrix(zeros(shape(parameters_vector_upsampled)))
|
e@0
|
2007 parameters_vector_upsampled_firstval = matrix(zeros((parameters_vector_upsampled.shape[0],1)))
|
e@0
|
2008
|
e@0
|
2009 # Reconstructed parameters
|
e@0
|
2010
|
e@0
|
2011
|
e@0
|
2012
|
e@0
|
2013 parameters_vector_upsampled_reconstructed = matrix(zeros(shape(parameters_vector_upsampled)))
|
e@0
|
2014
|
e@0
|
2015
|
e@0
|
2016
|
e@0
|
2017 # ADM HERE
|
e@0
|
2018 #
|
e@0
|
2019
|
e@0
|
2020 def adm_matrix(X, dmin=0.001,dmax=0.28,tol=0.001):
|
e@0
|
2021
|
e@0
|
2022 out = matrix(zeros(shape(X)))
|
e@0
|
2023 code = matrix(zeros(shape(X)))
|
e@0
|
2024 firstval = matrix(zeros((X.shape[0], 1)))
|
e@0
|
2025
|
e@0
|
2026 for i in range(0, X.shape[0]):
|
e@0
|
2027 out[i,:], code[i,:], firstval[i,0] = adm(X[i,:],dmin=dmin,dmax=dmax,tol=tol)
|
e@0
|
2028
|
e@0
|
2029 return out,code,firstval
|
e@0
|
2030
|
e@0
|
2031 # parameters_vector_upsampled_reconstructed[i,:] = adm_reconstruct(parameters_vector_upsampled_code[i,:],parameters_vector_upsampled_firstval[i,0], dmin=dmin,dmax=dmax)
|
e@0
|
2032
|
e@0
|
2033 def adm_matrix_reconstruct(code, firstval, dmin=0.001, dmax=0.28):
|
e@0
|
2034 X = matrix(zeros(shape(code)))
|
e@0
|
2035 for i in range(0, code.shape[0]):
|
e@0
|
2036 X[i,:] = adm_reconstruct(code[i,:], firstval[i,0], dmin=dmin, dmax=dmax)
|
e@0
|
2037
|
e@0
|
2038 return X
|
e@0
|
2039
|
e@0
|
2040
|
e@0
|
2041 parameters_vector_upsampled_adm, parameters_vector_upsampled_code, parameters_vector_upsampled_firstval = adm_matrix(parameters_vector_upsampled, dmin, dmax, tol)
|
e@0
|
2042 # parameters_vector_upsampled = parameters_vector_upsampled_code
|
e@0
|
2043
|
e@0
|
2044
|
e@0
|
2045 def diff_and_pad(X):
|
e@0
|
2046 return concatenate((
|
e@0
|
2047 zeros((
|
e@0
|
2048 shape(X)[0],
|
e@0
|
2049 1
|
e@0
|
2050 )),
|
e@0
|
2051 diff(X, axis=1)),
|
e@0
|
2052 axis=1)
|
e@0
|
2053
|
e@0
|
2054
|
e@0
|
2055 # Change for adm,
|
e@0
|
2056
|
e@0
|
2057 # parameters_vector_upsampled = parameters_vector_upsampled_code
|
e@0
|
2058 print "[II] Clustering features."
|
e@0
|
2059 #
|
e@0
|
2060 features_clustering = GMM(n_components = n_clusters, covariance_type='diag')
|
e@0
|
2061 #
|
e@0
|
2062 features_clustering.fit( features_vector_upsampled.T, y=parameters_vector_upsampled_code)
|
e@0
|
2063 #
|
e@0
|
2064 features_clustering_means = features_clustering.means_
|
e@0
|
2065 features_clustering_labels = features_clustering.predict(features_vector_upsampled.T)
|
e@0
|
2066 features_clustering_sigmas = features_clustering.covars_
|
e@0
|
2067 #
|
e@0
|
2068 features_vector_upsampled_estimated = zeros(shape(features_vector_upsampled))
|
e@0
|
2069 #
|
e@0
|
2070 #
|
e@0
|
2071 for n in range(0, len(features_vector_upsampled_estimated[0])):
|
e@0
|
2072 features_vector_upsampled_estimated[:,n] = features_clustering_means[features_clustering_labels[n]]
|
e@0
|
2073 #
|
e@0
|
2074 #
|
e@0
|
2075 print "[II] Features MSE for %d clusters: %.3f" % (n_clusters, mse(features_vector_upsampled, features_vector_upsampled_estimated))
|
e@0
|
2076
|
e@0
|
2077
|
e@0
|
2078
|
e@0
|
2079 def happy_curve_classification(data, classes, estimator, Nd=1):
|
e@0
|
2080 print "[II] Generating Happy Curve "
|
e@0
|
2081 from copy import deepcopy
|
e@0
|
2082 estimator_fulldata = deepcopy(estimator)
|
e@0
|
2083 estimator_fulldata.fit(data, classes)
|
e@0
|
2084 labels = estimator.predict(data)
|
e@0
|
2085
|
e@0
|
2086 # 1. Split data in two, training and testing
|
e@0
|
2087
|
e@0
|
2088 Ntr = int(round(data.shape[0]/2)) # Training dataset size
|
e@0
|
2089 Nts = data.shape[0] - Ntr # Testing dataset size
|
e@0
|
2090
|
e@0
|
2091 ratios = []
|
e@0
|
2092
|
e@0
|
2093 for n in range(Nd, Ntr):
|
e@0
|
2094 train, test, trainl, testl = cross_validation.train_test_split(data, classes, test_size = 0.5, random_state = 0)
|
e@0
|
2095 train = train[0:n,:]
|
e@0
|
2096 trainl = trainl[0:n]
|
e@0
|
2097 # print "trainl"
|
e@0
|
2098 # print trainl
|
e@0
|
2099 estimator_ = deepcopy(estimator)
|
e@0
|
2100 estimator_.fit(train,trainl)
|
e@0
|
2101 labels = estimator_.predict(test)
|
e@0
|
2102
|
e@0
|
2103 ratio = sum(array(testl==labels).astype(float))/len(labels)
|
e@0
|
2104
|
e@0
|
2105 ratios.append(ratio)
|
e@0
|
2106
|
e@0
|
2107
|
e@0
|
2108 return ratios
|
e@0
|
2109
|
e@0
|
2110
|
e@0
|
2111 def cross_validate_classification(data, classes, estimator):
|
e@0
|
2112 print "[II] Crossvalidating... "
|
e@0
|
2113 from copy import deepcopy
|
e@0
|
2114 estimator_fulldata = deepcopy(estimator)
|
e@0
|
2115 estimator_fulldata.fit(data, classes)
|
e@0
|
2116
|
e@0
|
2117 percents = arange(0.1,0.9,0.1)
|
e@0
|
2118 MSEs = []
|
e@0
|
2119 labels = estimator.predict(data)
|
e@0
|
2120
|
e@0
|
2121 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels))
|
e@0
|
2122
|
e@0
|
2123 for p in percents:
|
e@0
|
2124 train,test,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=0)
|
e@0
|
2125 estimator_ = deepcopy(estimator)
|
e@0
|
2126 estimator_.fit(train, trainlabels)
|
e@0
|
2127 labels = estimator_.predict(test)
|
e@0
|
2128 print "[II] for training(%.2f)-testing(%.2f): %.2f" % ((1-p),p,sum(array(testlabels==labels).astype(float))/len(labels))
|
e@0
|
2129
|
e@0
|
2130 return MSEs
|
e@0
|
2131
|
e@0
|
2132 def cross_validate_clustering(data, estimator):
|
e@0
|
2133 print "[II] Crossvalidating... "
|
e@0
|
2134 estimator_fulldata = estimator
|
e@0
|
2135 estimator_fulldata.fit(data)
|
e@0
|
2136
|
e@0
|
2137 # labels = estimator_fulldata.predict(data)
|
e@0
|
2138 means = estimator_fulldata.means_
|
e@0
|
2139 # print means
|
e@0
|
2140
|
e@0
|
2141 percents = arange(0.1,0.6,0.1)
|
e@0
|
2142 MSEs = []
|
e@0
|
2143 reconstructed = zeros(shape(data))
|
e@0
|
2144 labels = estimator.predict(data)
|
e@0
|
2145 for n in range(0, len(reconstructed)):
|
e@0
|
2146 reconstructed[n,:] = means[labels[n]]
|
e@0
|
2147
|
e@0
|
2148 MSEs.append(mse(data,reconstructed))
|
e@0
|
2149 for p in percents:
|
e@0
|
2150 train,test = cross_validation.train_test_split(data,test_size=p,random_state=0)
|
e@0
|
2151 train = matrix(train)
|
e@0
|
2152 test = matrix(test)
|
e@0
|
2153
|
e@0
|
2154 estimator.fit(train)
|
e@0
|
2155 means = estimator.means_
|
e@0
|
2156 labels = estimator.predict(test)
|
e@0
|
2157 reconstructed = zeros(shape(test))
|
e@0
|
2158 for n in range(0, len(reconstructed)):
|
e@0
|
2159 reconstructed[n,:] = means[labels[n]]
|
e@0
|
2160
|
e@0
|
2161 m = mse(test,reconstructed)
|
e@0
|
2162
|
e@0
|
2163 print "[II] MSE for clustering crossvalidated data %.2f-%.2f: %.5f" % ((1-p), p, m)
|
e@0
|
2164 MSEs.append(m)
|
e@0
|
2165
|
e@0
|
2166 print "[II] Crossvalidation complete"
|
e@0
|
2167
|
e@0
|
2168 return MSEs
|
e@0
|
2169
|
e@0
|
2170
|
e@0
|
2171
|
e@0
|
2172
|
e@0
|
2173 # Construct parameters alphabet, each symbol is going to be a different column vector
|
e@0
|
2174 # in parameter code matrix
|
e@0
|
2175
|
e@0
|
2176
|
e@0
|
2177 def vector_to_states(X):
|
e@0
|
2178 """
|
e@0
|
2179 Input: a vector MxN with N samples and M variables
|
e@0
|
2180 Output: a codeword dictionary `parameters_alphabet',
|
e@0
|
2181 state_seq, inverse `parameters_alphabet_inv' """
|
e@0
|
2182
|
e@0
|
2183
|
e@0
|
2184 parameters_alphabet = {}
|
e@0
|
2185 n = 0
|
e@0
|
2186
|
e@0
|
2187 for i in range(0, X.shape[1]):
|
e@0
|
2188 vec = tuple(ravel(X[:,i]))
|
e@0
|
2189 if vec not in parameters_alphabet:
|
e@0
|
2190 parameters_alphabet[vec] = n
|
e@0
|
2191 n += 1
|
e@0
|
2192
|
e@0
|
2193 parameters_alphabet_inv = dict([(parameters_alphabet[m],m) for m in parameters_alphabet])
|
e@0
|
2194
|
e@0
|
2195 state_seq = array([parameters_alphabet[tuple(ravel(X[:,m]))] for m in range(0, parameters_vector_upsampled_code.shape[1])] )
|
e@0
|
2196
|
e@0
|
2197 return state_seq, parameters_alphabet, parameters_alphabet_inv
|
e@0
|
2198
|
e@0
|
2199
|
e@0
|
2200 def states_to_vector(predicted, parameters_alphabet_inv):
|
e@0
|
2201 estimated = matrix(zeros((len(parameters_alphabet_inv[0]), len(predicted))))
|
e@0
|
2202 for i in range(0, len(predicted)):
|
e@0
|
2203 # print "[II] Predicted: ", predicted[i]
|
e@0
|
2204 estimated[:, i] = matrix(parameters_alphabet_inv[predicted[i]]).T
|
e@0
|
2205
|
e@0
|
2206 return estimated
|
e@0
|
2207
|
e@0
|
2208 # state_seq, parameters_alphabet, parameters_alphabet_inv = vector_to_states(parameters_vector_upsampled_code)
|
e@0
|
2209
|
e@0
|
2210
|
e@0
|
2211 # parameters_change_variable = matrix(diff_and_pad(parameters_vector_upsampled)!=0).astype(int)
|
e@0
|
2212
|
e@0
|
2213 # changes_state_seq, changes_parameters_alphabet, changes_parameters_alphabet_inv = vector_to_states(parameters_change_variable)
|
e@0
|
2214
|
e@0
|
2215
|
e@0
|
2216 # This is an hmm that just codes the changes"
|
e@0
|
2217 # We have only two states, change and stay the same.
|
e@0
|
2218
|
e@0
|
2219 # Uncomment that here
|
e@0
|
2220
|
e@0
|
2221 # parameters_vector_upsampled = parameters_vector_upsampled_code
|
e@0
|
2222 # parameters_state_orig, parameter_state_alphabet_orig, parameter_state_alphabet_inv_orig = vector_to_states(parameters_vector)
|
e@0
|
2223 # TODO: HA
|
e@0
|
2224 # parmaeters_vector_upsampled = parameters_vector_upsampled_code
|
e@0
|
2225 parameters_state, parameter_state_alphabet, parameter_state_alphabet_inv = vector_to_states(parameters_vector_upsampled)
|
e@0
|
2226
|
e@0
|
2227
|
e@0
|
2228 # Remove outliers
|
e@0
|
2229
|
e@0
|
2230 ee = EllipticEnvelope()
|
e@0
|
2231
|
e@0
|
2232 # inliers = ee.fit(features_vector_upsampled.T).predict(features_vector_upsampled.T) == 1
|
e@0
|
2233
|
e@0
|
2234 inoutliers = zeros((len(parameters_state),))
|
e@0
|
2235 #
|
e@0
|
2236 for p in unique(parameters_state):
|
e@0
|
2237 ee = EllipticEnvelope(contamination=0.1)
|
e@0
|
2238 inoutliers[parameters_state == p] = ee.fit(features_vector_upsampled[:, parameters_state == p].T).predict(features_vector_upsampled[:, parameters_state == p].T)
|
e@0
|
2239 #
|
e@0
|
2240 #
|
e@0
|
2241 # Label inliers and outliers
|
e@0
|
2242
|
e@0
|
2243 inliers = inoutliers > 0
|
e@0
|
2244 outliers = inoutliers < 0
|
e@0
|
2245 #
|
e@0
|
2246 #
|
e@0
|
2247 #
|
e@0
|
2248
|
e@0
|
2249 # Do pre-processing and re-label the outliers
|
e@0
|
2250
|
e@0
|
2251 #completeavac = SinkHoleClassifier()
|
e@0
|
2252 #completeavac.fit(features_vector_upsampled[:, inliers].T, parameters_state[inliers])
|
e@0
|
2253
|
e@0
|
2254 #parameters_state_original = parameters_state.copy()
|
e@0
|
2255 #parameters_state[outliers] = completeavac.predict(features_vector_upsampled[:,outliers].T)
|
e@0
|
2256
|
e@0
|
2257
|
e@0
|
2258
|
e@0
|
2259
|
e@0
|
2260
|
e@0
|
2261 # features_vector_upsampled = features_vector_upsampled[:, inliers]
|
e@0
|
2262 # parameters_state = parameters_state[inliers]
|
e@0
|
2263
|
e@0
|
2264 # print "UNIQUE PARAMETERS STATE"
|
e@0
|
2265 # print unique(parameters_state)
|
e@0
|
2266 # asdasdasd
|
e@0
|
2267 print "[II] Testing Gaussian Naive Bayes Classifier"
|
e@0
|
2268 gnb = GaussianNB()
|
e@0
|
2269 gnb.fit(features_vector_upsampled.T, parameters_state)
|
e@0
|
2270
|
e@0
|
2271
|
e@0
|
2272
|
e@0
|
2273 parameters_state_estimated = gnb.predict(features_vector_upsampled.T)
|
e@0
|
2274
|
e@0
|
2275 output = states_to_vector(parameters_state_estimated, parameter_state_alphabet_inv)
|
e@0
|
2276
|
e@0
|
2277 figure()
|
e@0
|
2278 subplot(211)
|
e@0
|
2279 plot(parameters_vector_upsampled.T)
|
e@0
|
2280 title('Parameter value')# % UpsamplingFactor)
|
e@0
|
2281 ylabel('value')
|
e@0
|
2282 xlabel('frame \#')
|
e@0
|
2283 subplot(212)
|
e@0
|
2284 #plot(smooth_matrix_1D(output).T)
|
e@0
|
2285 plot(output.T)
|
e@0
|
2286 ylabel('value')
|
e@0
|
2287 xlabel('frame \#')
|
e@0
|
2288
|
e@0
|
2289 errorvector_nbc = [mse(output[:,i], parameters_vector_upsampled[:,i]) for i in range(0, len(parameters_state_estimated))]
|
e@0
|
2290
|
e@0
|
2291
|
e@0
|
2292
|
e@0
|
2293
|
e@0
|
2294 #cross_validate_classification(features_vector_upsampled.T, parameters_state, gnb)
|
e@0
|
2295 # hc = happy_curve_classification(features_vector_upsampled.T, parameters_state, gnb)
|
e@0
|
2296
|
e@0
|
2297 #
|
e@0
|
2298 # figure()
|
e@0
|
2299 # plot(hc)
|
e@0
|
2300 # figure()
|
e@0
|
2301
|
e@0
|
2302 print "[II] Trying Multinomial HMM"
|
e@0
|
2303
|
e@0
|
2304 # In order to do classification with HMMs, we need to:
|
e@0
|
2305 # 1. Split the parameters into classes
|
e@0
|
2306 # 2. Train one model per class
|
e@0
|
2307 # 3. Feed our data to all the models
|
e@0
|
2308 # 4. Check which has a better score and assig,n to M
|
e@0
|
2309 class SVMClassifier:
|
e@0
|
2310 def __init__(self,**kwargs):
|
e@0
|
2311 print "[II] SVM Classifier "
|
e@0
|
2312 # self.clf = svm.SVC(**kwargs)
|
e@0
|
2313 self.name = "SVM"
|
e@0
|
2314 self.smallname = "svm"
|
e@0
|
2315 self.clf = multiclass.OneVsOneClassifier(svm.SVC(**kwargs) )
|
e@0
|
2316
|
e@0
|
2317 def fit(self, X, classes):
|
e@0
|
2318 n_classes = max(unique(classes))+1
|
e@0
|
2319
|
e@0
|
2320 self.clf.fit(X,classes)
|
e@0
|
2321
|
e@0
|
2322 def predict(self, X):
|
e@0
|
2323 return self.clf.predict(X)
|
e@0
|
2324
|
e@0
|
2325 def getName(self):
|
e@0
|
2326 return self.name
|
e@0
|
2327
|
e@0
|
2328 def cross_validate(self, data, classes):
|
e@0
|
2329 print "[II] SVN Classifier Crossvalidating... "
|
e@0
|
2330 from copy import deepcopy
|
e@0
|
2331 estimator = deepcopy(self)
|
e@0
|
2332 estimator_fulldata = deepcopy(self)
|
e@0
|
2333 estimator_fulldata.fit(data, classes)
|
e@0
|
2334
|
e@0
|
2335 percents = arange(0.1,0.9,0.1)
|
e@0
|
2336 self.percents = percents * 100
|
e@0
|
2337
|
e@0
|
2338 RATIOS = []
|
e@0
|
2339 labels = estimator.predict(data)
|
e@0
|
2340
|
e@0
|
2341
|
e@0
|
2342 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels))
|
e@0
|
2343
|
e@0
|
2344 for p in percents:
|
e@0
|
2345 traindata,testdata,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=1)
|
e@0
|
2346
|
e@0
|
2347
|
e@0
|
2348 # print "[II] [%.2f-%.2f] Examples used for training: %s" % (p, (1-p), str(traindata))
|
e@0
|
2349 # print "[II] [%.2f-%.2f] Examples used for testing: %s" % (p, (1-p), str(testdata))
|
e@0
|
2350 # Training phase
|
e@0
|
2351
|
e@0
|
2352
|
e@0
|
2353
|
e@0
|
2354 estimator_ = deepcopy(estimator)
|
e@0
|
2355 estimator_.fit(traindata, trainlabels)
|
e@0
|
2356
|
e@0
|
2357 predicted_labels = estimator_.predict(testdata)
|
e@0
|
2358
|
e@0
|
2359 m = sum(array(predicted_labels==testlabels).astype(float))/len(testlabels)
|
e@0
|
2360
|
e@0
|
2361 print "[II] [%.2f-%.2f] 5-fold cross-validation for SVN Classifier: %.1f" % (p, (1-p), m)
|
e@0
|
2362
|
e@0
|
2363 RATIOS.append(m)
|
e@0
|
2364
|
e@0
|
2365 return RATIOS,percents
|
e@0
|
2366
|
e@0
|
2367
|
e@0
|
2368
|
e@0
|
2369
|
e@0
|
2370
|
e@0
|
2371
|
e@0
|
2372
|
e@0
|
2373 class HmmClassifier2:
|
e@0
|
2374 def __init__(self, N=2, chain_size=4, n_components = 1):
|
e@0
|
2375 print "[II] HMM Classifier (%d states, %d components)" % (N, n_components)
|
e@0
|
2376 self.name = "HMM (%d states, %d components)" % (N, n_components)
|
e@0
|
2377 self.n_components = n_components
|
e@0
|
2378 self.chain_size = chain_size
|
e@0
|
2379 self.hmms_ = []
|
e@0
|
2380 self.N = N
|
e@0
|
2381 self.n_states = N
|
e@0
|
2382
|
e@0
|
2383 def getName(self):
|
e@0
|
2384 return self.name
|
e@0
|
2385
|
e@0
|
2386 def fit(self, features, parameters):
|
e@0
|
2387 self.n_params = len(unique(parameters))
|
e@0
|
2388
|
e@0
|
2389
|
e@0
|
2390
|
e@0
|
2391 for n in range(0, self.n_params):
|
e@0
|
2392 hmm_ = hmm.GaussianHMM(n_components = self.n_components, covariance_type='full')
|
e@0
|
2393
|
e@0
|
2394 # Get training data for each parameter
|
e@0
|
2395
|
e@0
|
2396 for i in range(0, len(parameters)-self.chain_size):
|
e@0
|
2397 seq = features[i:i+self.chain_size, :]
|
e@0
|
2398 if parameters[i+self.chain_size-1] == n and seq.shape[0] == self.chain_size:
|
e@0
|
2399 # print "[II] Fitting: %s" % str(seq)
|
e@0
|
2400
|
e@0
|
2401 hmm_.fit([seq])
|
e@0
|
2402
|
e@0
|
2403 self.hmms_.append(hmm_)
|
e@0
|
2404
|
e@0
|
2405 def predict(self, X):
|
e@0
|
2406 labels = zeros((X.shape[0],))
|
e@0
|
2407 N = self.N
|
e@0
|
2408
|
e@0
|
2409 scores = zeros((self.n_states, ))
|
e@0
|
2410
|
e@0
|
2411 for i in range(0, len(labels)):
|
e@0
|
2412 testdata = X[i:i+self.chain_size,:]
|
e@0
|
2413
|
e@0
|
2414 for j in range(0, self.n_states):
|
e@0
|
2415 scores[j] = self.hmms_[j].score(testdata)
|
e@0
|
2416 labels[i] = argmax(scores)
|
e@0
|
2417
|
e@0
|
2418 return labels
|
e@0
|
2419
|
e@0
|
2420 def cross_validate(self, data, classes):
|
e@0
|
2421 print "[II] SVN Classifier Crossvalidating... "
|
e@0
|
2422 from copy import deepcopy
|
e@0
|
2423 estimator = deepcopy(self)
|
e@0
|
2424 estimator_fulldata = deepcopy(self)
|
e@0
|
2425 estimator_fulldata.fit(data, classes)
|
e@0
|
2426
|
e@0
|
2427 percents = arange(0.1,0.9,0.1)
|
e@0
|
2428 self.percents = percents * 100
|
e@0
|
2429
|
e@0
|
2430 RATIOS = []
|
e@0
|
2431 labels = estimator.predict(data)
|
e@0
|
2432
|
e@0
|
2433
|
e@0
|
2434 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels))
|
e@0
|
2435
|
e@0
|
2436 for p in percents:
|
e@0
|
2437 traindata,testdata,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=1)
|
e@0
|
2438
|
e@0
|
2439
|
e@0
|
2440 # print "[II] [%.2f-%.2f] Examples used for training: %s" % (p, (1-p), str(traindata))
|
e@0
|
2441 # print "[II] [%.2f-%.2f] Examples used for testing: %s" % (p, (1-p), str(testdata))
|
e@0
|
2442
|
e@0
|
2443 # Training phase
|
e@0
|
2444
|
e@0
|
2445
|
e@0
|
2446
|
e@0
|
2447 estimator_ = deepcopy(estimator)
|
e@0
|
2448 estimator_.fit(traindata, trainlabels)
|
e@0
|
2449
|
e@0
|
2450 predicted_labels = estimator_.predict(testdata)
|
e@0
|
2451
|
e@0
|
2452 m = sum(array(predicted_labels==testlabels).astype(float))/len(testlabels)
|
e@0
|
2453
|
e@0
|
2454 print "[II] [%.2f-%.2f] 5-fold cross-validation for HMM2 Classifier: %.1f" % (p, (1-p), m)
|
e@0
|
2455
|
e@0
|
2456 RATIOS.append(m)
|
e@0
|
2457
|
e@0
|
2458 return RATIOS,percents
|
e@0
|
2459
|
e@0
|
2460
|
e@0
|
2461
|
e@0
|
2462
|
e@0
|
2463 class HmmClassifier:
|
e@0
|
2464 def __init__(self, N=2, n_components = 1):
|
e@0
|
2465 print "[II] HMM Classifier (%d states, %d components)" % (N, n_components)
|
e@0
|
2466 self.name = "HMM (%d states, %d components)" % (N, n_components)
|
e@0
|
2467 self.n_components = n_components
|
e@0
|
2468 self.chain_size = N
|
e@0
|
2469 self.hmms_ = []
|
e@0
|
2470 self.N = N
|
e@0
|
2471
|
e@0
|
2472 def getName(self):
|
e@0
|
2473 return self.name
|
e@0
|
2474
|
e@0
|
2475 def fit(self, X, states):
|
e@0
|
2476 self.n_states = len(unique(states))
|
e@0
|
2477
|
e@0
|
2478 for n in range(0, self.n_states):
|
e@0
|
2479 hmm_ = hmm.GaussianHMM(n_components = self.n_components, covariance_type = 'full')
|
e@0
|
2480
|
e@0
|
2481 # Get training data for each class
|
e@0
|
2482 vector = X[states == n,:]
|
e@0
|
2483
|
e@0
|
2484 # Fit the HMM
|
e@0
|
2485 # print vector
|
e@0
|
2486 hmm_.fit([vector])
|
e@0
|
2487
|
e@0
|
2488 # And append to the list
|
e@0
|
2489 self.hmms_.append(hmm_)
|
e@0
|
2490
|
e@0
|
2491 def predict(self,X):
|
e@0
|
2492 labels = zeros((X.shape[0],))
|
e@0
|
2493 N = self.N
|
e@0
|
2494
|
e@0
|
2495 m = 0
|
e@0
|
2496
|
e@0
|
2497 scores = zeros((1, self.n_states))
|
e@0
|
2498
|
e@0
|
2499
|
e@0
|
2500 while m*N < X.shape[0]:
|
e@0
|
2501 if (m+1)*N > X.shape[0]:
|
e@0
|
2502 testdata = X[m*N:,:]
|
e@0
|
2503 else:
|
e@0
|
2504 testdata = X[m*N:(m+1)*N,:]
|
e@0
|
2505
|
e@0
|
2506 # print testdata
|
e@0
|
2507
|
e@0
|
2508 for i in range(0, self.n_states):
|
e@0
|
2509 scores[0,i] = self.hmms_[i].score(testdata)
|
e@0
|
2510
|
e@0
|
2511 if (m+1)*N > X.shape[0]:
|
e@0
|
2512 labels[m*N:] = argmax(scores)
|
e@0
|
2513 else:
|
e@0
|
2514 labels[m*N:(m+1)*N] = argmax(scores)
|
e@0
|
2515
|
e@0
|
2516 m+= 1
|
e@0
|
2517
|
e@0
|
2518 return labels
|
e@0
|
2519
|
e@0
|
2520 # def cross_validate(self, data, classes, index_vector):
|
e@0
|
2521 # print "[II] Crossvalidating... "
|
e@0
|
2522 # from copy import deepcopy
|
e@0
|
2523 # estimator = deepcopy(self)
|
e@0
|
2524 # estimator_fulldata = deepcopy(self)
|
e@0
|
2525 # estimator_fulldata.fit(data, classes)
|
e@0
|
2526 #
|
e@0
|
2527 # percents = arange(0.1,0.9,0.1)
|
e@0
|
2528 # self.percents = percents * 100
|
e@0
|
2529 # RATIOS = []
|
e@0
|
2530 # labels = estimator.predict(data)
|
e@0
|
2531 #
|
e@0
|
2532 # print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels))
|
e@0
|
2533 #
|
e@0
|
2534 # for p in percents:
|
e@0
|
2535 # train,test,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=1)
|
e@0
|
2536 # estimator_ = deepcopy(estimator)
|
e@0
|
2537 # estimator_.fit(train, trainlabels)
|
e@0
|
2538 # labels = estimator_.predict(test)
|
e@0
|
2539 # m = sum(array(testlabels==labels).astype(float))/len(labels)
|
e@0
|
2540 # RATIOS.append(m)
|
e@0
|
2541 # print "[II] for training(%.2f)-testing(%.2f): %.2f" % ((1-p),p,sum(array(testlabels==labels).astype(float))/len(labels))
|
e@0
|
2542 #
|
e@0
|
2543 # return RATIOS, percents
|
e@0
|
2544
|
e@0
|
2545 def cross_validate(self, data, classes):
|
e@0
|
2546 print "[II] SVN Classifier Crossvalidating... "
|
e@0
|
2547 from copy import deepcopy
|
e@0
|
2548 estimator = deepcopy(self)
|
e@0
|
2549 estimator_fulldata = deepcopy(self)
|
e@0
|
2550 estimator_fulldata.fit(data, classes)
|
e@0
|
2551
|
e@0
|
2552 percents = arange(0.1,0.9,0.1)
|
e@0
|
2553 self.percents = percents * 100
|
e@0
|
2554
|
e@0
|
2555 RATIOS = []
|
e@0
|
2556 labels = estimator.predict(data)
|
e@0
|
2557
|
e@0
|
2558
|
e@0
|
2559 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels))
|
e@0
|
2560
|
e@0
|
2561 for p in percents:
|
e@0
|
2562 traindata,testdata,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=1)
|
e@0
|
2563
|
e@0
|
2564
|
e@0
|
2565 # print "[II] [%.2f-%.2f] Examples used for training: %s" % (p, (1-p), str(traindata))
|
e@0
|
2566 # print "[II] [%.2f-%.2f] Examples used for testing: %s" % (p, (1-p), str(testdata))
|
e@0
|
2567 # Training phase
|
e@0
|
2568
|
e@0
|
2569
|
e@0
|
2570
|
e@0
|
2571 estimator_ = deepcopy(estimator)
|
e@0
|
2572 estimator_.fit(traindata, trainlabels)
|
e@0
|
2573
|
e@0
|
2574 predicted_labels = estimator_.predict(testdata)
|
e@0
|
2575 m = sum(array(predicted_labels==testlabels).astype(float))/len(testlabels)
|
e@0
|
2576
|
e@0
|
2577
|
e@0
|
2578 # testdata = None
|
e@0
|
2579 #
|
e@0
|
2580 # traindata = None
|
e@0
|
2581 # trainlabels = None
|
e@0
|
2582 # testlabels = None
|
e@0
|
2583 #
|
e@0
|
2584 # for t in train:
|
e@0
|
2585 # if traindata is None:
|
e@0
|
2586 # traindata = data[index_vector == t, :]
|
e@0
|
2587 # trainlabels = classes[index_vector == t]
|
e@0
|
2588 # else:
|
e@0
|
2589 # traindata = append(traindata, data[index_vector == t, :], 0)
|
e@0
|
2590 # trainlabels = append(trainlabels, classes[index_vector ==t])
|
e@0
|
2591 #
|
e@0
|
2592 # estimator_ = deepcopy(estimator)
|
e@0
|
2593 # estimator_.fit(traindata, trainlabels)
|
e@0
|
2594 #
|
e@0
|
2595 #
|
e@0
|
2596 # for t in test:
|
e@0
|
2597 # if testdata is None:
|
e@0
|
2598 # testdata = data[index_vector == t, :]
|
e@0
|
2599 # testlabels = classes[index_vector == t]
|
e@0
|
2600 # else:
|
e@0
|
2601 # testdata = append(testdata, data[index_vector == t,:], 0)
|
e@0
|
2602 # testlabels = append(testlabels, classes[index_vector == t])
|
e@0
|
2603
|
e@0
|
2604
|
e@0
|
2605
|
e@0
|
2606 # predicted_labels = estimator_.predict(testdata)
|
e@0
|
2607
|
e@0
|
2608 m = sum(array(predicted_labels==testlabels).astype(float))/len(testlabels)
|
e@0
|
2609
|
e@0
|
2610 print "[II] [%.2f-%.2f] 5-fold cross-validation for HMM Classifier: %.1f" % (p, (1-p), m)
|
e@0
|
2611
|
e@0
|
2612 RATIOS.append(m)
|
e@0
|
2613
|
e@0
|
2614 return RATIOS,percents
|
e@0
|
2615
|
e@0
|
2616 def cross_validate_full_k_fold(data, classes, list_of_classifiers, K=10):
|
e@0
|
2617 """ K-Folds Cross-validates all available classifiers, plots and saves barplots. """
|
e@0
|
2618
|
e@0
|
2619 ratios = []
|
e@0
|
2620 for l in list_of_classifiers:
|
e@0
|
2621 ratio = l.k_fold_cross_validate(data, classes, K)
|
e@0
|
2622 ratios.append(ratio)
|
e@0
|
2623
|
e@0
|
2624 L = len(ratios)
|
e@0
|
2625
|
e@0
|
2626 ind = arange(L)
|
e@0
|
2627
|
e@0
|
2628 fig, ax = subplots()
|
e@0
|
2629 rects = []
|
e@0
|
2630
|
e@0
|
2631
|
e@0
|
2632
|
e@0
|
2633 width = 10/float(len(ratios))
|
e@0
|
2634
|
e@0
|
2635 colors = ['r', 'y', 'g', 'c']
|
e@0
|
2636
|
e@0
|
2637 labels_ = []
|
e@0
|
2638
|
e@0
|
2639 for i in range(0, len(ratios)):
|
e@0
|
2640 rects.append(ax.bar(i*width,ratios[i],width,color=colors[i % len(colors)]))
|
e@0
|
2641 labels_.append(list_of_classifiers[i].getName())
|
e@0
|
2642
|
e@0
|
2643 return ratios
|
e@0
|
2644
|
e@0
|
2645
|
e@0
|
2646
|
e@0
|
2647 def cross_validate_full(seq, classes, list_of_classifiers):
|
e@0
|
2648 """ Cross-validates all available classifiers, plots and saves barplots. """
|
e@0
|
2649
|
e@0
|
2650 ratios = []
|
e@0
|
2651 percents = []
|
e@0
|
2652 for l in list_of_classifiers:
|
e@0
|
2653 ratios_, percents_ = l.cross_validate(seq, classes)
|
e@0
|
2654 ratios.append(ratios_)
|
e@0
|
2655 percents.append(percents_)
|
e@0
|
2656
|
e@0
|
2657
|
e@0
|
2658 L = len(ratios[0])
|
e@0
|
2659
|
e@0
|
2660 ind = arange(L)
|
e@0
|
2661
|
e@0
|
2662 fig,ax = subplots()
|
e@0
|
2663
|
e@0
|
2664 rects = []
|
e@0
|
2665
|
e@0
|
2666 W = 0.8
|
e@0
|
2667 width = W/float(len(ratios))
|
e@0
|
2668
|
e@0
|
2669 colors = ['r', 'y', 'g', 'c']
|
e@0
|
2670
|
e@0
|
2671 labels_ = []
|
e@0
|
2672 for i in range(0, len(ratios)):
|
e@0
|
2673 rects.append(ax.bar(ind+i*width,ratios[i],width,color=colors[i % len(colors)]))
|
e@0
|
2674 labels_.append(list_of_classifiers[i].getName())
|
e@0
|
2675
|
e@0
|
2676 ax.legend(tuple(labels_))
|
e@0
|
2677
|
e@0
|
2678 ax.set_xticks(ind+W/2)
|
e@0
|
2679 ax.set_xticklabels(tuple(percents[0]*100))
|
e@0
|
2680 ax.set_xlabel("Percentage % of test data")
|
e@0
|
2681 ax.set_ylabel("Success ratio")
|
e@0
|
2682
|
e@0
|
2683
|
e@0
|
2684
|
e@0
|
2685
|
e@0
|
2686
|
e@0
|
2687
|
e@0
|
2688 # rects1 = ax.bar(ind,ratios[0],0.35,color='r')
|
e@0
|
2689 # rects2 = ax.bar(ind+0.351,ratios[1],0.35,color='y')
|
e@0
|
2690
|
e@0
|
2691
|
e@0
|
2692
|
e@0
|
2693
|
e@0
|
2694 return ratios
|
e@0
|
2695
|
e@0
|
2696
|
e@0
|
2697
|
e@0
|
2698
|
e@0
|
2699
|
e@0
|
2700
|
e@0
|
2701
|
e@0
|
2702 # hmmc = HmmClassifier(N = len(unique(parameters_state)), n_components = 1)
|
e@0
|
2703 # hmmc.fit(features_vector_upsampled.T, parameters_state)
|
e@0
|
2704 #
|
e@0
|
2705 # hmmc2 = HmmClassifier(N = 12, n_components = 4)
|
e@0
|
2706 # hmmc2.fit(features_vector_upsampled.T, parameters_state)
|
e@0
|
2707 #
|
e@0
|
2708 svmc = SVMClassifier(probability=True)
|
e@0
|
2709 svmc.fit(features_vector_upsampled.T, parameters_state)
|
e@0
|
2710 # # svmc.cross_validate(features_vector_upsampled.T, parameters_state, index_vector)
|
e@0
|
2711 #
|
e@0
|
2712 nbc = NBClassifier()
|
e@0
|
2713 nbc.fit(features_vector_upsampled.T, parameters_state)
|
e@0
|
2714 #
|
e@0
|
2715 #
|
e@0
|
2716 #hchmmc = happy_curve_classification(features_vector_upsampled.T, parameters_state, hmmc, Nd=100)
|
e@0
|
2717 # # cross_validate_classification(features_vector_upsampled.T, parameters_state, hmmc)
|
e@0
|
2718 #
|
e@0
|
2719 # #hmmc.cross_validate(features_vector_upsampled.T, parameters_state)
|
e@0
|
2720 #
|
e@0
|
2721
|
e@0
|
2722 hmmc3 = HmmClassifier3(N=2, n_components = max(unique(parameters_state)))
|
e@0
|
2723 #print(parameters_state)
|
e@0
|
2724 plot(parameters_state)
|
e@0
|
2725
|
e@0
|
2726
|
e@0
|
2727 print(shape(features_vector_upsampled))
|
e@0
|
2728 obs = hmmc3.fit(features_vector_upsampled.T, parameters_state)
|
e@0
|
2729
|
e@0
|
2730
|
e@0
|
2731
|
e@0
|
2732 # svmhmm = HmmSVMClassifier()
|
e@0
|
2733 # svmhmm.fit(features_vector_upsampled.T, parameters_state)
|
e@0
|
2734 # ratios = cross_validate_full(features_vector_upsampled.T, parameters_state, [nbc, svmc])
|
e@0
|
2735 # ratios = cross_validate_full_k_fold(features_vector_upsampled.T, parameters_state,
|
e@0
|
2736 # [hmmc3])
|
e@0
|
2737
|
e@0
|
2738 def approximate_chain_size(data,parameters):
|
e@0
|
2739 ratios = []
|
e@0
|
2740
|
e@0
|
2741 # chainsizes = arange(1, 40)
|
e@0
|
2742 # for cs in chainsizes:
|
e@0
|
2743 # print "[II] Trying chain size: %d" % cs
|
e@0
|
2744 # hmmc = HmmClassifier3(N=cs, n_components = 2)
|
e@0
|
2745 # ratios.append(hmmc.k_fold_cross_validate(data,parameters,K=10))
|
e@0
|
2746
|
e@0
|
2747
|
e@0
|
2748 componentssize = arange(1, 12)
|
e@0
|
2749
|
e@0
|
2750 for cs in componentssize:
|
e@0
|
2751 print "[II] Trying component size: %d" % cs
|
e@0
|
2752 hmmc = HmmClassifier3(N=2, n_components = cs)
|
e@0
|
2753 ratios.append(hmmc.k_fold_cross_validate(data, parameters, K=2))
|
e@0
|
2754
|
e@0
|
2755
|
e@0
|
2756
|
e@0
|
2757
|
e@0
|
2758 figure()
|
e@0
|
2759 plot(componentssize, ratios)
|
e@0
|
2760 xlabel('Chain Size')
|
e@0
|
2761 ylabel('Success Ratio')
|
e@0
|
2762 title('10-Fold cross validation success ratio vs chain size')
|
e@0
|
2763
|
e@0
|
2764
|
e@0
|
2765 return ratios
|
e@0
|
2766
|
e@0
|
2767 # ratios = approximate_chain_size(features_vector_upsampled.T, parameters_state)
|
e@0
|
2768
|
e@0
|
2769 #predictedhmmc2 = hmmc2.predict(features_vector_upsampled.T)
|
e@0
|
2770
|
e@0
|
2771 # figure()
|
e@0
|
2772
|
e@0
|
2773
|
e@0
|
2774
|
e@0
|
2775
|
e@0
|
2776
|
e@0
|
2777 # hmms_ = []
|
e@0
|
2778 #
|
e@0
|
2779 # for n in range(0, len(parameter_state_alphabet)):
|
e@0
|
2780 # #hmm_ = hmm.GMMHMM(n_components = 1, n_mix = 2)
|
e@0
|
2781 # hmm_ = hmm.GaussianHMM(n_components = 1,covariance_type = 'full')
|
e@0
|
2782 #
|
e@0
|
2783 # # Get training data for each class
|
e@0
|
2784 # vector = features_vector_upsampled[:,parameters_state == n]
|
e@0
|
2785 #
|
e@0
|
2786 # #if vector.shape[1] < n_clusters:
|
e@0
|
2787 # # hmms_.append(None)
|
e@0
|
2788 # #else:
|
e@0
|
2789 #
|
e@0
|
2790 # hmm_.fit([vector.T])
|
e@0
|
2791 #
|
e@0
|
2792 # # Append to the list
|
e@0
|
2793 #
|
e@0
|
2794 # hmms_.append(hmm_)
|
e@0
|
2795 #
|
e@0
|
2796 # labels = zeros((features_vector_upsampled.shape[1],))
|
e@0
|
2797 #
|
e@0
|
2798 # N = 20
|
e@0
|
2799 # m = 0
|
e@0
|
2800 #
|
e@0
|
2801 # while m*N < features_vector_upsampled.shape[1]:
|
e@0
|
2802 #
|
e@0
|
2803 # scores = zeros((1, len(parameter_state_alphabet)))
|
e@0
|
2804 #
|
e@0
|
2805 # if (m+1)*N > features_vector_upsampled.shape[1]:
|
e@0
|
2806 # testdata = features_vector_upsampled[:,m*N:]
|
e@0
|
2807 # else:
|
e@0
|
2808 # testdata = features_vector_upsampled[:,m*N:(m+1)*N]
|
e@0
|
2809 #
|
e@0
|
2810 # for i in range(0, len(parameter_state_alphabet)):
|
e@0
|
2811 # if hmms_[i] is not None:
|
e@0
|
2812 # scores[0,i] = hmms_[i].score(testdata.T)
|
e@0
|
2813 # else:
|
e@0
|
2814 # scores[0,i] = -100000 # Very large negative score
|
e@0
|
2815 # if (m+1)*N >= features_vector_upsampled.shape[1]:
|
e@0
|
2816 # labels[m*N:] = argmax(scores)
|
e@0
|
2817 # else:
|
e@0
|
2818 # labels[m*N:(m+1)*N] = argmax(scores)
|
e@0
|
2819 #
|
e@0
|
2820 # m += 1
|
e@0
|
2821
|
e@0
|
2822
|
e@0
|
2823 # figure()
|
e@0
|
2824 #plot(labels.T)
|
e@0
|
2825
|
e@0
|
2826
|
e@0
|
2827 # labels = hmmc.predict(features_vector_upsampled.T)
|
e@0
|
2828 # estimated = states_to_vector(labels,parameter_state_alphabet_inv)
|
e@0
|
2829 # plot(estimated.T,'r--')
|
e@0
|
2830 #
|
e@0
|
2831 # title('Estimated parameter values')
|
e@0
|
2832 # legend(['Naive Bayes Classifier', 'HMM chain size %d (%.1fms)' % (N, float(N)/UpsamplingFactor*23.0)])
|
e@0
|
2833 #
|
e@0
|
2834 # ylabel('value')
|
e@0
|
2835 # xlabel('frame #')
|
e@0
|
2836 #
|
e@0
|
2837
|
e@0
|
2838 # close('all')
|
e@0
|
2839
|
e@0
|
2840 # plot(features_clustering_labels/float(max(features_clustering_labels)))
|
e@0
|
2841 # plot(parameters_vector_upsampled.T/max(ravel(parameters_vector_upsampled)))
|
e@0
|
2842
|
e@0
|
2843
|
e@0
|
2844 def plot_clusters(x, labels):
|
e@0
|
2845 figure()
|
e@0
|
2846 symbols = ['>', 'x', '.', '<','v']
|
e@0
|
2847 colors = ['b', 'r', 'g', 'm','c']
|
e@0
|
2848
|
e@0
|
2849 for r in range(0, x.shape[0]):
|
e@0
|
2850 scatter(x[r,0], x[r,1], c=colors[int(labels[r]) % len(colors)], marker=symbols[int(labels[r]) % len(symbols)])
|
e@0
|
2851
|
e@0
|
2852
|
e@0
|
2853 # plot_clusters(features_vector_upsampled.T, parameters_state)
|
e@0
|
2854 #
|
e@0
|
2855
|
e@0
|
2856 # SVM
|
e@0
|
2857
|
e@0
|
2858 class HmmClassifierSVN_Naive:
|
e@0
|
2859 def __init__(self, N=2, n_components = 1):
|
e@0
|
2860 self.n_components = n_components
|
e@0
|
2861 self.chain_size = N
|
e@0
|
2862 self.hmms_ = []
|
e@0
|
2863 self.N = N
|
e@0
|
2864
|
e@0
|
2865 def fit(self, X, states):
|
e@0
|
2866 self.n_states = len(unique(states))
|
e@0
|
2867
|
e@0
|
2868 for n in range(0, self.n_states):
|
e@0
|
2869 hmm_ = hmm.GaussianHMM(n_components = self.n_components, covariance_type = 'full')
|
e@0
|
2870
|
e@0
|
2871 # Get training data for each class
|
e@0
|
2872 vector = X[states == n,:]
|
e@0
|
2873
|
e@0
|
2874 # Fit the HMM
|
e@0
|
2875 # print vector
|
e@0
|
2876 hmm_.fit([vector])
|
e@0
|
2877
|
e@0
|
2878 # And append to the list
|
e@0
|
2879 self.hmms_.append(hmm_)
|
e@0
|
2880
|
e@0
|
2881 def predict(self,X):
|
e@0
|
2882 labels = zeros((X.shape[0],))
|
e@0
|
2883 N = self.N
|
e@0
|
2884
|
e@0
|
2885 m = 0
|
e@0
|
2886
|
e@0
|
2887 scores = zeros((1, self.n_states))
|
e@0
|
2888
|
e@0
|
2889
|
e@0
|
2890 while m*N < X.shape[0]:
|
e@0
|
2891 if (m+1)*N > X.shape[0]:
|
e@0
|
2892 testdata = X[m*N:,:]
|
e@0
|
2893 else:
|
e@0
|
2894 testdata = X[m*N:(m+1)*N,:]
|
e@0
|
2895
|
e@0
|
2896 # print testdata
|
e@0
|
2897
|
e@0
|
2898 for i in range(0, self.n_states):
|
e@0
|
2899 scores[0,i] = self.hmms_[i].score(testdata)
|
e@0
|
2900
|
e@0
|
2901 if (m+1)*N > X.shape[0]:
|
e@0
|
2902 labels[m*N:] = argmax(scores)
|
e@0
|
2903 else:
|
e@0
|
2904 labels[m*N:(m+1)*N] = argmax(scores)
|
e@0
|
2905
|
e@0
|
2906 m+= 1
|
e@0
|
2907
|
e@0
|
2908 return labels
|
e@0
|
2909
|
e@0
|
2910
|
e@0
|
2911
|
e@0
|
2912
|
e@0
|
2913 def plot_parameters_estimation(list_of_estimators):
|
e@0
|
2914 for e in list_of_estimators:
|
e@0
|
2915 name_ = e.getName()
|
e@0
|
2916
|
e@0
|
2917
|
e@0
|
2918
|
e@0
|
2919 fig = figure()
|
e@0
|
2920 fig.suptitle('Parameters estimation using %s' % name_, fontweight='bold')
|
e@0
|
2921 subplot(311)
|
e@0
|
2922 title ('original parameters')
|
e@0
|
2923 param_real = states_to_vector(parameters_state,parameter_state_alphabet_inv).T
|
e@0
|
2924 plot(param_real)
|
e@0
|
2925 xlabel('sample')
|
e@0
|
2926 ylabel('normalized parameter value')
|
e@0
|
2927 legend(parameter_captions)
|
e@0
|
2928 subplot(312)
|
e@0
|
2929 title('estimated parameters')
|
e@0
|
2930 pred = e.predict(features_vector_upsampled.T)
|
e@0
|
2931 param_est = states_to_vector(pred,parameter_state_alphabet_inv).T
|
e@0
|
2932 plot(param_est)
|
e@0
|
2933 xlabel('sample')
|
e@0
|
2934 ylabel('normalized parameter value')
|
e@0
|
2935 legend(parameter_captions)
|
e@0
|
2936 subplot(313)
|
e@0
|
2937 error_real_est = zeros((len(param_est),))
|
e@0
|
2938 for i in range(0, len(error_real_est)):
|
e@0
|
2939 error_real_est[i] = mse(param_real[i],param_est[i])
|
e@0
|
2940
|
e@0
|
2941 totalmse = sum(error_real_est)
|
e@0
|
2942 title('mean squared error (total: %.3f)' % totalmse)
|
e@0
|
2943
|
e@0
|
2944 plot(error_real_est)
|
e@0
|
2945 xlabel('sample')
|
e@0
|
2946 ylabel('normalized parameter value')
|
e@0
|
2947
|
e@0
|
2948
|
e@0
|
2949
|
e@0
|
2950 # hmm2 = hmm.MultinomialHMM(n_components = n_classes, startprob = prior_p_q, transmat = transmat )
|
e@0
|
2951
|
e@0
|
2952
|
e@0
|
2953
|
e@0
|
2954 # TODO: Embed SVM emissions to HMM. Try to do pick maximum likelihood sequence from
|
e@0
|
2955 # classifiers like in HmmClassifier3. For every class, train an SVM, with n_components as
|
e@0
|
2956 # states, and compute the log-likelihood for prediction. Should work.
|
e@0
|
2957
|
e@0
|
2958
|
e@0
|
2959
|
e@0
|
2960 # class HMMsvmClassifier:
|
e@0
|
2961 # def __init__(self, N=6):
|
e@0
|
2962 # print "[II] HMM-SVM Classifier (%d time steps)" % N
|
e@0
|
2963 # self.name = "HMM-SVM (%d time steps)" % N
|
e@0
|
2964 # # self.n_components = n_components
|
e@0
|
2965 # self.chain_size = N
|
e@0
|
2966 # self.svm = MyAVAClassifier()
|
e@0
|
2967 #
|
e@0
|
2968 #
|
e@0
|
2969 # def getName(self):
|
e@0
|
2970 # return self.name
|
e@0
|
2971 #
|
e@0
|
2972 # def fit(self, features, parameters):
|
e@0
|
2973 #
|
e@0
|
2974 # # First train emission svm
|
e@0
|
2975 #
|
e@0
|
2976 # self.svm.fit(features, parameters)
|
e@0
|
2977 #
|
e@0
|
2978 # # print parameters
|
e@0
|
2979 # n_classes = max(unique(parameters)) + 1
|
e@0
|
2980 #
|
e@0
|
2981 # print "[II] Number of classes: ", n_classes
|
e@0
|
2982 # hmms = [None]*n_classes
|
e@0
|
2983 # chain_size = self.chain_size
|
e@0
|
2984 # params = [None]*n_classes
|
e@0
|
2985 # obs = [None]*n_classes
|
e@0
|
2986 #
|
e@0
|
2987 # for i in range(chain_size, len(parameters)):
|
e@0
|
2988 # class_ = parameters[i-1]
|
e@0
|
2989 # params_ = parameters[i-chain_size:i]
|
e@0
|
2990 # feats_ =features[i-chain_size:i,:]
|
e@0
|
2991 #
|
e@0
|
2992 # if params[class_] is None:
|
e@0
|
2993 # params[class_] = [params_]
|
e@0
|
2994 # obs[class_] = [feats_]
|
e@0
|
2995 # else:
|
e@0
|
2996 # params[class_].append(params_)
|
e@0
|
2997 # obs[class_].append(feats_)
|
e@0
|
2998 #
|
e@0
|
2999 #
|
e@0
|
3000 #
|
e@0
|
3001 # for i in range(0, len(params)):
|
e@0
|
3002 # if params[i] is not None and len(params[i]) != 0:
|
e@0
|
3003 # hmm_ = HMMsvm(self.svm)
|
e@0
|
3004 # # print "[II] %d Fitting params: " % i, params[i]
|
e@0
|
3005 # print "[!!] OBSERVATIONS: "
|
e@0
|
3006 # print obs[i]
|
e@0
|
3007 # hmm_.fit(obs[i], params[i])
|
e@0
|
3008 #
|
e@0
|
3009 # print "[!!] PARAMETERS: "
|
e@0
|
3010 # print params[i]
|
e@0
|
3011 #
|
e@0
|
3012 # # TODO: Left here on 06/07 19:56
|
e@0
|
3013 #
|
e@0
|
3014 # # hmm_.fit(features,)
|
e@0
|
3015 # # print "obs: ", obs[i]
|
e@0
|
3016 # # print "params: ", params[i]
|
e@0
|
3017 # # hmm_.fit(obs[i], params[i],flr=1e-9)
|
e@0
|
3018 # hmms[i] = hmm_
|
e@0
|
3019 #
|
e@0
|
3020 # self.hmms = hmms
|
e@0
|
3021 #
|
e@0
|
3022 # return obs
|
e@0
|
3023 # def predict(self, features, mfilt=20):
|
e@0
|
3024 # chain_size = self.chain_size
|
e@0
|
3025 # hmms = self.hmms
|
e@0
|
3026 # predicted_classes = zeros((features.shape[0],))
|
e@0
|
3027 # for i in range(chain_size, features.shape[0]):
|
e@0
|
3028 # scores = zeros((len(hmms),))
|
e@0
|
3029 #
|
e@0
|
3030 # seq = features[i-chain_size:i, :]
|
e@0
|
3031 #
|
e@0
|
3032 # for j in range(0, len(hmms)):
|
e@0
|
3033 # if hmms[j] is not None:
|
e@0
|
3034 # q,p = hmms[j].logviterbi(seq)
|
e@0
|
3035 #
|
e@0
|
3036 # scores[j] = logsumexp(p)
|
e@0
|
3037 # else:
|
e@0
|
3038 # scores[j] = -infty
|
e@0
|
3039 #
|
e@0
|
3040 # predicted_classes[i] = argmax(scores)
|
e@0
|
3041 #
|
e@0
|
3042 # # Do a median filter at the end
|
e@0
|
3043 #
|
e@0
|
3044 # # predicted_classes = median_filter(predicted_classes,mfilt)
|
e@0
|
3045 #
|
e@0
|
3046 # return predicted_classes
|
e@0
|
3047
|
e@0
|
3048 class HmmSvmClassifier3:
|
e@0
|
3049 def __init__(self, N=2,n_components = 1):
|
e@0
|
3050 print "[II] HMM/SVM Classifier (%d states, %d components)" % (N, n_components)
|
e@0
|
3051 self.name = "HMM (%d time steps, %d components)" % (N, n_components)
|
e@0
|
3052 self.n_components = n_components
|
e@0
|
3053 self.smallname = "hmmsvmc"
|
e@0
|
3054 self.chain_size = N
|
e@0
|
3055
|
e@0
|
3056 def getName(self):
|
e@0
|
3057 return self.name
|
e@0
|
3058
|
e@0
|
3059 def fit(self, features, parameters):
|
e@0
|
3060
|
e@0
|
3061
|
e@0
|
3062
|
e@0
|
3063 # print parameters
|
e@0
|
3064 n_classes = max(unique(parameters)) + 1
|
e@0
|
3065
|
e@0
|
3066 if n_classes == 1:
|
e@0
|
3067 self.only_one_class = True
|
e@0
|
3068 return
|
e@0
|
3069 else:
|
e@0
|
3070 self.only_one_class = False
|
e@0
|
3071
|
e@0
|
3072 print "[II] Number of classes: ", n_classes
|
e@0
|
3073 hmms = [None]*n_classes
|
e@0
|
3074 # svms = [None]*n_classes
|
e@0
|
3075 chain_size = self.chain_size
|
e@0
|
3076 obs = [None]*n_classes
|
e@0
|
3077 cls = [None]*n_classes
|
e@0
|
3078
|
e@0
|
3079 for i in range(chain_size, len(parameters)):
|
e@0
|
3080 class_ = parameters[i-1]
|
e@0
|
3081 seq = features[i-chain_size:i,:]
|
e@0
|
3082 params = parameters[i-chain_size:i]
|
e@0
|
3083
|
e@0
|
3084 if obs[class_] is None:
|
e@0
|
3085 obs[class_] = [seq]
|
e@0
|
3086 cls[class_] = [params]
|
e@0
|
3087 else:
|
e@0
|
3088 obs[class_].append(seq)
|
e@0
|
3089 cls[class_].append(params)
|
e@0
|
3090
|
e@0
|
3091
|
e@0
|
3092
|
e@0
|
3093 for i in range(0, len(obs)):
|
e@0
|
3094 if obs[i] is not None and len(obs[i]) != 0:
|
e@0
|
3095 # hmm_ = hmm.GaussianHMM(n_components=self.n_components, covariance_type='full')
|
e@0
|
3096 svm_ = MyAVAClassifier()
|
e@0
|
3097 print obs[i]
|
e@0
|
3098 print cls[i]
|
e@0
|
3099
|
e@0
|
3100 O = obs[i][0]
|
e@0
|
3101 C = cls[i][0]
|
e@0
|
3102
|
e@0
|
3103 for j in range(0, len(obs[i])):
|
e@0
|
3104 O = append(O, obs[i][j], 1)
|
e@0
|
3105 C = append(C, cls[i][j], 1)
|
e@0
|
3106
|
e@0
|
3107 print O
|
e@0
|
3108 print C
|
e@0
|
3109
|
e@0
|
3110 svm_.fit(O.T,C)
|
e@0
|
3111 hmm_ = HMMsvm(svm_)
|
e@0
|
3112 hmm_.fit(obs[i],cls[i])
|
e@0
|
3113 hmms[i] = hmm_
|
e@0
|
3114
|
e@0
|
3115 self.hmms = hmms
|
e@0
|
3116
|
e@0
|
3117 return obs
|
e@0
|
3118
|
e@0
|
3119 def predict(self, features, mfilt=20):
|
e@0
|
3120
|
e@0
|
3121 if self.only_one_class == True:
|
e@0
|
3122 return zeros((features.shape[0], ))
|
e@0
|
3123
|
e@0
|
3124 chain_size = self.chain_size
|
e@0
|
3125 hmms = self.hmms
|
e@0
|
3126 predicted_classes = zeros((features.shape[0],))
|
e@0
|
3127
|
e@0
|
3128
|
e@0
|
3129 for i in range(chain_size, features.shape[0]):
|
e@0
|
3130 scores = zeros((len(hmms),))
|
e@0
|
3131
|
e@0
|
3132 seq = features[i-chain_size:i, :]
|
e@0
|
3133
|
e@0
|
3134 for j in range(0, len(hmms)):
|
e@0
|
3135 if hmms[j] is not None:
|
e@0
|
3136 print hmms[j].score(seq)
|
e@0
|
3137 scores[j] = hmms[j].score(seq)[j]
|
e@0
|
3138 else:
|
e@0
|
3139 scores[j] = -infty
|
e@0
|
3140
|
e@0
|
3141 predicted_classes[i] = argmax(scores)
|
e@0
|
3142
|
e@0
|
3143 # Do a median filter at the end
|
e@0
|
3144
|
e@0
|
3145 # predicted_classes = median_filter(predicted_classes,mfilt)
|
e@0
|
3146
|
e@0
|
3147 return predicted_classes
|
e@0
|
3148
|
e@0
|
3149
|
e@0
|
3150
|
e@0
|
3151
|
e@0
|
3152
|
e@0
|
3153
|
e@0
|
3154 # newstates = self.logviterbi()
|
e@0
|
3155
|
e@0
|
3156 #
|
e@0
|
3157 #
|
e@0
|
3158 # # Alphas and Betas
|
e@0
|
3159 # X = features
|
e@0
|
3160 # alphas = zeros((T,n_classes))
|
e@0
|
3161 # betas = zeros((T,n_classes))
|
e@0
|
3162 #
|
e@0
|
3163 # forward_messages = zeros((T,n_classes))
|
e@0
|
3164 # backward_messages = zeros((T, n_classes))
|
e@0
|
3165 #
|
e@0
|
3166 # print "[II] Computing alpha, beta..."
|
e@0
|
3167 # for t in range(1, T+1):
|
e@0
|
3168 # forward_messages[t-1,:] = self._forward_message(forward_messages[t-2,:],X[0:t,])
|
e@0
|
3169 # backward_messages[-t,:] = self._backward_message(backward_messages[-t+1], X[-t:,:])
|
e@0
|
3170 #
|
e@0
|
3171 #
|
e@0
|
3172 # print "[II] Computing ksu..."
|
e@0
|
3173 #
|
e@0
|
3174 # a_norm = forward_messages[-1,n_classes-1] # alpha_T
|
e@0
|
3175 #
|
e@0
|
3176 # ksu = zeros((T, n_classes, n_classes))
|
e@0
|
3177 #
|
e@0
|
3178 # for i in range(0, n_classes):
|
e@0
|
3179 # for j in range(0, n_classes):
|
e@0
|
3180 # for t in range(0, T-1):
|
e@0
|
3181 # ksu[t,i,j] = exp(forward_messages[t, i] + log(transmat[i,j]) + log(self.estimate_emission_probability(X[t,:],j)) + backward_messages[t+1,j] - a_norm)
|
e@0
|
3182 #
|
e@0
|
3183 #
|
e@0
|
3184 #
|
e@0
|
3185 # self.alphas = forward_messages
|
e@0
|
3186 # self.betas = backward_messages
|
e@0
|
3187 # self.ksu = ksu
|
e@0
|
3188 #
|
e@0
|
3189 # transmat_new = zeros((n_classes,n_classes))
|
e@0
|
3190 #
|
e@0
|
3191 # for i in range(0, n_classes):
|
e@0
|
3192 #
|
e@0
|
3193 # for j in range(0, n_classes):
|
e@0
|
3194 # transmat_new[i,j] = sum(ksu[:,i,j])/sum(ksu[:,i,:])
|
e@0
|
3195 #
|
e@0
|
3196 # self.transmat_new = transmat_new
|
e@0
|
3197 #
|
e@0
|
3198
|
e@0
|
3199
|
e@0
|
3200
|
e@0
|
3201
|
e@0
|
3202
|
e@0
|
3203
|
e@0
|
3204
|
e@0
|
3205
|
e@0
|
3206
|
e@0
|
3207
|
e@0
|
3208
|
e@0
|
3209 #
|
e@0
|
3210 # def forward(self, X):
|
e@0
|
3211 # # Compute log likelihood using the forward algorithm
|
e@0
|
3212 #
|
e@0
|
3213 # # Number of states
|
e@0
|
3214 # N = self.n_classes
|
e@0
|
3215 #
|
e@0
|
3216 # # Number of Observations
|
e@0
|
3217 # T = X.shape[0]
|
e@0
|
3218 #
|
e@0
|
3219 #
|
e@0
|
3220 # transmat = self.transmat
|
e@0
|
3221 #
|
e@0
|
3222 # # Initialization
|
e@0
|
3223 # # a1 = ones((N,))/N
|
e@0
|
3224 # a1 = self.prior
|
e@0
|
3225 #
|
e@0
|
3226 # alpha = zeros((N,))
|
e@0
|
3227 # for i in range(0, N):
|
e@0
|
3228 # alpha[i] = log(a1[i])+log(self.estimate_emission_probability(X[0,:], i))
|
e@0
|
3229 # # print alpha
|
e@0
|
3230 # # Recursion
|
e@0
|
3231 # for t in range(0, T):
|
e@0
|
3232 # alpha_old = alpha.copy()
|
e@0
|
3233 # x = X[t, :]
|
e@0
|
3234 # for j in range(0, N):
|
e@0
|
3235 # tosum = zeros((N,))
|
e@0
|
3236 # for i in range(0, N):
|
e@0
|
3237 # tosum[i] = alpha_old[i]+log(transmat[i,j])
|
e@0
|
3238 #
|
e@0
|
3239 # # Sum = logsum(tosum)
|
e@0
|
3240 # Sum = logsumexp(tosum)
|
e@0
|
3241 # bj = self.estimate_emission_probability(x, j)
|
e@0
|
3242 #
|
e@0
|
3243 # alpha[j] = Sum+log(bj)
|
e@0
|
3244 # # print alpha
|
e@0
|
3245 #
|
e@0
|
3246 # tosum = zeros((N,))
|
e@0
|
3247 # for i in range(0, N):
|
e@0
|
3248 # tosum[i] = alpha[i] + log(transmat[i,N-1])
|
e@0
|
3249 #
|
e@0
|
3250 # return tosum
|
e@0
|
3251 #
|
e@0
|
3252 # def backward(self, X):
|
e@0
|
3253 # # Number of states
|
e@0
|
3254 # N = self.n_classes
|
e@0
|
3255 #
|
e@0
|
3256 # # Number of Observations
|
e@0
|
3257 # T = X.shape[0]
|
e@0
|
3258 #
|
e@0
|
3259 # transmat = self.transmat
|
e@0
|
3260 #
|
e@0
|
3261 # # Initialization
|
e@0
|
3262 #
|
e@0
|
3263 # b1 = ones((N,))/N
|
e@0
|
3264 #
|
e@0
|
3265 # beta = zeros((N,))
|
e@0
|
3266 # for i in range(0, N):
|
e@0
|
3267 # beta[i] = log(b1[i])+log(self.estimate_emission_probability(X[-1,:], i))
|
e@0
|
3268 #
|
e@0
|
3269 # for t in range(0, T):
|
e@0
|
3270 # beta_old = beta.copy()
|
e@0
|
3271 # x = X[-t, :]
|
e@0
|
3272 # for j in range(0, N):
|
e@0
|
3273 # tosum = zeros((N, ))
|
e@0
|
3274 # for i in range(0, N):
|
e@0
|
3275 # tosum[i] = beta_old[i]+log(transmat[i,j])
|
e@0
|
3276 #
|
e@0
|
3277 # Sum = logsumexp(tosum)
|
e@0
|
3278 # bj = self.estimate_emission_probability(x, j)
|
e@0
|
3279 # beta[j] = Sum+log(bj)
|
e@0
|
3280 #
|
e@0
|
3281 # tosum = zeros((N,))
|
e@0
|
3282 #
|
e@0
|
3283 # for i in range(0, N):
|
e@0
|
3284 # tosum[i] = beta[i] + log(transmat[i,0])
|
e@0
|
3285 #
|
e@0
|
3286 # #3 p = logsumexp(tosum)
|
e@0
|
3287 #
|
e@0
|
3288 # return tosum
|
e@0
|
3289
|
e@0
|
3290
|
e@0
|
3291 def _log_likelihood(self, X):
|
e@0
|
3292
|
e@0
|
3293 return logsumexp(self.forward(X))
|
e@0
|
3294
|
e@0
|
3295
|
e@0
|
3296
|
e@0
|
3297
|
e@0
|
3298
|
e@0
|
3299 def _likelihood(self, X):
|
e@0
|
3300 # Number of states
|
e@0
|
3301 N = self.n_classes
|
e@0
|
3302
|
e@0
|
3303 # Number of Observations
|
e@0
|
3304 T = X.shape[0]
|
e@0
|
3305
|
e@0
|
3306
|
e@0
|
3307 transmat = self.transmat
|
e@0
|
3308
|
e@0
|
3309 # Initialization
|
e@0
|
3310 # a1 = ones((N,))/N
|
e@0
|
3311 a1 = self.prior
|
e@0
|
3312
|
e@0
|
3313 alpha = zeros((N,))
|
e@0
|
3314 for i in range(0, N):
|
e@0
|
3315 alpha[i] = a1[i]*self.estimate_emission_probability(X[0,:], i)
|
e@0
|
3316
|
e@0
|
3317 # Recursion
|
e@0
|
3318 print alpha
|
e@0
|
3319 for t in range(1, T):
|
e@0
|
3320 alpha_old = alpha.copy()
|
e@0
|
3321 x = X[t, :]
|
e@0
|
3322 for j in range(0, N):
|
e@0
|
3323 Sum = 0
|
e@0
|
3324 for i in range(0, N):
|
e@0
|
3325 Sum += alpha_old[i]*transmat[i,j]
|
e@0
|
3326
|
e@0
|
3327 alpha[j] = Sum*self.estimate_emission_probability(x, j)
|
e@0
|
3328 print alpha
|
e@0
|
3329
|
e@0
|
3330
|
e@0
|
3331 # Termination
|
e@0
|
3332
|
e@0
|
3333 Sum = 0
|
e@0
|
3334 for i in range(0, N):
|
e@0
|
3335 Sum += alpha[i]*transmat[i, N-1]
|
e@0
|
3336
|
e@0
|
3337 p = Sum
|
e@0
|
3338
|
e@0
|
3339 return p
|
e@0
|
3340
|
e@0
|
3341
|
e@0
|
3342
|
e@0
|
3343
|
e@0
|
3344
|
e@0
|
3345
|
e@0
|
3346
|
e@0
|
3347 # def fit(self, X, states):
|
e@0
|
3348 # # print parameters
|
e@0
|
3349 # n_classes = max(unique(states)) + 1
|
e@0
|
3350 #
|
e@0
|
3351 # # svms = [None]*n_classes
|
e@0
|
3352 # obs = [None]*n_classes
|
e@0
|
3353 # sta = [None]*n_classes
|
e@0
|
3354 # chain_size = self.chain_size
|
e@0
|
3355 #
|
e@0
|
3356 # #22 obs = None
|
e@0
|
3357 # # sta = None
|
e@0
|
3358 #
|
e@0
|
3359 # print "[II] Number of classes: ", n_classes
|
e@0
|
3360 #
|
e@0
|
3361 # # For each class:
|
e@0
|
3362 # # Concatenate examples
|
e@0
|
3363 # # Fit SVM
|
e@0
|
3364 #
|
e@0
|
3365 # for i in range(chain_size, len(states)):
|
e@0
|
3366 # class_ = states[i-1]
|
e@0
|
3367 # seq = X[i-chain_size:i, :]
|
e@0
|
3368 # states_ = states[i-chain_size:i]
|
e@0
|
3369 #
|
e@0
|
3370 #
|
e@0
|
3371 # if obs[class_] is None:
|
e@0
|
3372 # obs[class_] = [seq]
|
e@0
|
3373 # sta[class_] = [states_]
|
e@0
|
3374 # self.svms.append(MyAVAClassifier())
|
e@0
|
3375 # else:
|
e@0
|
3376 # obs[class_].append(seq)
|
e@0
|
3377 # sta[class_].append(states_)
|
e@0
|
3378 #
|
e@0
|
3379 #
|
e@0
|
3380 # for i in range(0, len(obs)):
|
e@0
|
3381 #
|
e@0
|
3382 # o = None
|
e@0
|
3383 # s = None
|
e@0
|
3384 #
|
e@0
|
3385 # for j in range(0, len(obs[i])):
|
e@0
|
3386 # if o is None:
|
e@0
|
3387 # o = obs[i][j]
|
e@0
|
3388 # s = sta[i][j]
|
e@0
|
3389 #
|
e@0
|
3390 # else:
|
e@0
|
3391 # o = append(o, obs[i][j],0)
|
e@0
|
3392 # s = append(s, sta[i][j])
|
e@0
|
3393 #
|
e@0
|
3394 #
|
e@0
|
3395 # self.svms[i].fit(o, s)
|
e@0
|
3396
|
e@0
|
3397 # def predict(self, features):
|
e@0
|
3398 # chain_size = self.chain_size
|
e@0
|
3399 # svms = self.svms
|
e@0
|
3400 #
|
e@0
|
3401 # predicted_classes = zeros((features.shape[0],))
|
e@0
|
3402 # for i in range(chain_size, features.shape[0]):
|
e@0
|
3403 # scores = zeros((len(svms)))
|
e@0
|
3404 #
|
e@0
|
3405 # seq = features[i-chain_size:i, :]
|
e@0
|
3406 #
|
e@0
|
3407 # for j in range(0, len(svms)):
|
e@0
|
3408 # if svms[j] is not None:
|
e@0
|
3409 # scores[j] = svms[j].compute_log_likelihood(seq)
|
e@0
|
3410 # else:
|
e@0
|
3411 # scores[k] = -infty
|
e@0
|
3412 # predicted_classes[i] = argmax(scores)
|
e@0
|
3413 #
|
e@0
|
3414 # return predicted_classes
|
e@0
|
3415
|
e@0
|
3416
|
e@0
|
3417
|
e@0
|
3418
|
e@0
|
3419
|
e@0
|
3420 # Marginalize over the latent variable
|
e@0
|
3421 # for i in range(0, X.shape[0]):
|
e@0
|
3422 # P = zeros((self.n_classes,))
|
e@0
|
3423 # x = X[i,:]
|
e@0
|
3424 # for j in range(0, len(P)):
|
e@0
|
3425 # P[j] = self.estimate_emission_probability(j, x)
|
e@0
|
3426 #
|
e@0
|
3427 # S[i] = log(sum(P[j]))
|
e@0
|
3428 #
|
e@0
|
3429 # return sum(S)
|
e@0
|
3430
|
e@0
|
3431
|
e@0
|
3432
|
e@0
|
3433
|
e@0
|
3434 # Continue from here
|
e@0
|
3435 X = features_vector_upsampled.T
|
e@0
|
3436 y = parameters_state
|
e@0
|
3437
|
e@0
|
3438 # clf = svm.SVC()
|
e@0
|
3439 # clf.fit(X,y)
|
e@0
|
3440
|
e@0
|
3441
|
e@0
|
3442 # parameters_state_y = clf.predict(X)
|
e@0
|
3443
|
e@0
|
3444
|
e@0
|
3445 predhmmc3 = hmmc3.predict(features_vector_upsampled.T)
|
e@0
|
3446
|
e@0
|
3447 myava = MyAVAClassifier()
|
e@0
|
3448 myava.fit(features_vector_upsampled.T, parameters_state)
|
e@0
|
3449
|
e@0
|
3450 predmyava = myava.predict(features_vector_upsampled.T)
|
e@0
|
3451
|
e@0
|
3452 # hmmsvmc = HMMsvmClassifier(N=2)
|
e@0
|
3453 # hmmsvmc.fit(features_vector_upsampled.T, parameters_state)
|
e@0
|
3454 #predhmmsvmc = hmmsvmc.predict(features_vector_upsampled.T)
|
e@0
|
3455
|
e@0
|
3456 obsc = MyAVAClassifier()
|
e@0
|
3457 obsc.fit(features_vector_upsampled.T, parameters_state)
|
e@0
|
3458 # hmm2 = hmm.GaussianHMM(n_components=6)
|
e@0
|
3459
|
e@0
|
3460 # hmm2.fit([features_vector_upsampled.T])
|
e@0
|
3461 # hmm3 = HMMsvm(obsc)
|
e@0
|
3462 # hmm3.fit([features_vector_upsampled.T],[parameters_state])
|
e@0
|
3463 # predhmmsvm = hmm3.predict(features_vector_upsampled.T)
|
e@0
|
3464
|
e@0
|
3465 # hmmsvmc = HMMsvmClassifier()
|
e@0
|
3466
|
e@0
|
3467 hmmsvmc = HMMsvmClassifier()
|
e@0
|
3468 # hmmsvmc = HmmSvmClassifier3()
|
e@0
|
3469 hmmsvmc.fit(features_vector_upsampled.T, parameters_state)
|
e@0
|
3470 predhmmsvm = hmmsvmc.predict(features_vector_upsampled.T)
|
e@0
|
3471 plot_parameters_estimation([nbc, myava, hmmc3, hmmsvmc])
|
e@0
|
3472
|
e@0
|
3473
|
e@0
|
3474 #plot(parameters)
|
e@0
|
3475 figure()
|
e@0
|
3476 plot(parameters_state,'b')
|
e@0
|
3477
|
e@0
|
3478 # plot(parameters_state_y,'r--')
|
e@0
|
3479 plot(predhmmc3,'g+-')
|
e@0
|
3480
|
e@0
|
3481 plot(predhmmsvm, 'co-')
|
e@0
|
3482
|
e@0
|
3483 # plot(predhmmsvmc, 'bo-')
|
e@0
|
3484
|
e@0
|
3485 legend(['Real', 'SVM', 'HMM','HMM-SVM'])
|
e@0
|
3486
|
e@0
|
3487 xlabel('sample')
|
e@0
|
3488 ylabel('state')
|
e@0
|
3489 # TODO, HMM with SVN, Cross-validation
|
e@0
|
3490
|
e@0
|
3491 # Using hidden markov svm
|
e@0
|
3492
|
e@0
|
3493 svmhmm = ""
|
e@0
|
3494
|
e@0
|
3495 # print "[II] Success ratio for SVM-RBF Classifier: %.2f" % (sum(parameters_state==parameters_state_y).astype(float)/float(len(parameters_state)))
|
e@0
|
3496
|
e@0
|
3497 print "[II] All-vs-All custom Support Vector Classifier Success Ratio: %.2f " %(sum(predmyava==parameters_state).astype(float)/len(parameters_state))
|
e@0
|
3498 print "[II] Success ratio for HMM: %.2f" % (sum(parameters_state==predhmmc3).astype(float)/float(len(predhmmc3)))
|
e@0
|
3499 print "[II] Success ratio for HMM-SVM: %.2f" % (sum(parameters_state==predhmmsvm).astype(float)/float(len(predhmmsvm)))
|
e@0
|
3500
|
e@0
|
3501
|
e@0
|
3502
|
e@0
|
3503
|
e@0
|
3504
|
e@0
|
3505 model_ghmm = ReverbModel("GaussianHMM Classifier Model", kernel, q, features_to_keep, hmmc3, parameter_dictionary=parameter_state_alphabet_inv, moments_vector=moments_vector)
|
e@0
|
3506 model_gnb = ReverbModel("Naive Bayes Classifier Model", kernel, q, features_to_keep, nbc, parameter_dictionary=parameter_state_alphabet_inv, moments_vector=moments_vector )
|
e@0
|
3507 model_svm = ReverbModel("AVA LinearSVM Classifier Model", kernel, q, features_to_keep, myava, parameter_dictionary=parameter_state_alphabet_inv, moments_vector=moments_vector )
|
e@0
|
3508 model_hmmsvm = ReverbModel("HMM0SVM Classifier Model", kernel, q, features_to_keep, hmmsvmc, parameter_dictionary=parameter_state_alphabet_inv, moments_vector=moments_vector )
|
e@0
|
3509
|
e@0
|
3510 model_ghmm.save("model_ghmm.dat")
|
e@0
|
3511 model_gnb.save("model_gnb.dat")
|
e@0
|
3512 model_svm.save("model_svm.dat")
|
e@0
|
3513 model_hmmsvm.save("model_hmmsvm.dat")
|
e@0
|
3514
|
e@0
|
3515
|
e@0
|
3516
|
e@0
|
3517 def split_data_to_k_samples(data, params, K):
|
e@0
|
3518 L = shape(data)[0]
|
e@0
|
3519 M = int(round(L/float(K)))
|
e@0
|
3520
|
e@0
|
3521 samples_data = []
|
e@0
|
3522 samples_parameters = []
|
e@0
|
3523 for k in range(K):
|
e@0
|
3524 if (k+1)*M > L:
|
e@0
|
3525 samples_data.append(data[k*M:,:])
|
e@0
|
3526 samples_parameters.append(params[k*M:])
|
e@0
|
3527 else:
|
e@0
|
3528 samples_data.append(data[k*M:(k+1)*M,:])
|
e@0
|
3529 samples_parameters.append(params[k*M:(k+1)*M])
|
e@0
|
3530
|
e@0
|
3531 return samples_data, samples_parameters
|
e@0
|
3532
|
e@0
|
3533 def join_data(data, params):
|
e@0
|
3534 L = 0
|
e@0
|
3535 for p in params:
|
e@0
|
3536 L += len(p)
|
e@0
|
3537
|
e@0
|
3538 new_data = zeros((L, data[0].shape[1]))
|
e@0
|
3539 new_params = zeros((L,))
|
e@0
|
3540
|
e@0
|
3541 idx = 0
|
e@0
|
3542 for n in range(0,len(data)):
|
e@0
|
3543 new_data[idx:idx+data[n].shape[0],:] = data[n]
|
e@0
|
3544 new_params[idx:idx+len(params[n])] = params[n]
|
e@0
|
3545 idx += len(params[n])
|
e@0
|
3546
|
e@0
|
3547 return new_data, new_params
|
e@0
|
3548
|
e@0
|
3549 cross_validation_ratios = {}
|
e@0
|
3550 cross_validation_mses = {}
|
e@0
|
3551
|
e@0
|
3552
|
e@0
|
3553 features_samples, parameters_samples = split_data_to_k_samples(features_vector_upsampled.T, parameters_state, 5)
|
e@0
|
3554
|
e@0
|
3555 from sklearn.cross_validation import KFold
|
e@0
|
3556
|
e@0
|
3557 from sklearn import metrics
|
e@0
|
3558
|
e@0
|
3559
|
e@0
|
3560
|
e@0
|
3561 KF = 10
|
e@0
|
3562
|
e@0
|
3563 s1,s2 = split_data_to_k_samples(features_vector_upsampled.T, parameters_state, KF)
|
e@0
|
3564 r1,r2 = join_data(s1,s2)
|
e@0
|
3565
|
e@0
|
3566 kf = KFold(KF, n_folds=KF) # Leave one out
|
e@0
|
3567
|
e@0
|
3568 trial = 1.0
|
e@0
|
3569
|
e@0
|
3570 rhmmc = 0
|
e@0
|
3571 rgnbc = 0
|
e@0
|
3572 ravac = 0
|
e@0
|
3573 rhmmsvmc = 0
|
e@0
|
3574
|
e@0
|
3575
|
e@0
|
3576 msegbnc = 0
|
e@0
|
3577 msehmmc = 0
|
e@0
|
3578 mseavac = 0
|
e@0
|
3579 msehmmsvmc = 0
|
e@0
|
3580
|
e@0
|
3581 fullratios = []
|
e@0
|
3582 fullmses = []
|
e@0
|
3583
|
e@0
|
3584
|
e@0
|
3585 def calculate_mse(pred, real):
|
e@0
|
3586 realparams = states_to_vector(real, parameter_state_alphabet_inv)
|
e@0
|
3587 estparams = states_to_vector(pred, parameter_state_alphabet_inv)
|
e@0
|
3588 return mse(realparams, estparams)
|
e@0
|
3589 ratios_gnb = []
|
e@0
|
3590 ratios_hmm = []
|
e@0
|
3591 ratios_svm = []
|
e@0
|
3592 ratios_hmmsvm = []
|
e@0
|
3593 ratios_sinkhole = []
|
e@0
|
3594
|
e@0
|
3595 mse_gnb = []
|
e@0
|
3596 mse_hmm = []
|
e@0
|
3597 mse_svm = []
|
e@0
|
3598 mse_hmmsvm = []
|
e@0
|
3599 mse_sinkhole = []
|
e@0
|
3600
|
e@0
|
3601
|
e@0
|
3602 for train, test in kf:
|
e@0
|
3603
|
e@0
|
3604 train_idx = 100
|
e@0
|
3605
|
e@0
|
3606 print train,test
|
e@0
|
3607 f1 = []
|
e@0
|
3608 p1 = []
|
e@0
|
3609
|
e@0
|
3610 tf1 = []
|
e@0
|
3611 tp1 = []
|
e@0
|
3612
|
e@0
|
3613 for i in train:
|
e@0
|
3614 f1.append(s1[i-1])
|
e@0
|
3615 p1.append(s2[i-1])
|
e@0
|
3616
|
e@0
|
3617 for i in test:
|
e@0
|
3618 tf1.append(s1[i-1])
|
e@0
|
3619 tp1.append(s2[i-1])
|
e@0
|
3620
|
e@0
|
3621 new_trainset_data, new_trainset_params = join_data(f1, p1)
|
e@0
|
3622 new_trainset_params = new_trainset_params.astype(int)
|
e@0
|
3623
|
e@0
|
3624
|
e@0
|
3625 # Detect and remove outliers
|
e@0
|
3626
|
e@0
|
3627 # ee = EllipticEnvelope()
|
e@0
|
3628
|
e@0
|
3629 # ee.fit(new_trainset_data, new_trainset_params)
|
e@0
|
3630
|
e@0
|
3631
|
e@0
|
3632 # inliers = ee.predict(new_trainset_data) == 1
|
e@0
|
3633
|
e@0
|
3634 # new_trainset_data = new_trainset_data[inliers, :]
|
e@0
|
3635 # new_trainset_params = new_trainset_params[inliers]
|
e@0
|
3636
|
e@0
|
3637
|
e@0
|
3638 new_testset_data, new_testset_params = join_data(tf1, tp1)
|
e@0
|
3639 new_testset_params = new_testset_params.astype(int)
|
e@0
|
3640
|
e@0
|
3641
|
e@0
|
3642
|
e@0
|
3643 param_est = states_to_vector(new_testset_params,parameter_state_alphabet_inv).T
|
e@0
|
3644
|
e@0
|
3645
|
e@0
|
3646
|
e@0
|
3647
|
e@0
|
3648
|
e@0
|
3649
|
e@0
|
3650
|
e@0
|
3651 X = arange(50, len(new_trainset_params), 50)
|
e@0
|
3652 print X
|
e@0
|
3653 # X = append(X, [len(new_trainset_params)], 1)
|
e@0
|
3654
|
e@0
|
3655
|
e@0
|
3656 #
|
e@0
|
3657 for M in X:
|
e@0
|
3658 # print CHAINSIZE
|
e@0
|
3659 # print NCOMPONENTS
|
e@0
|
3660 hmmc = HmmClassifier3(N=CHAINSIZE, n_components = NCOMPONENTS)
|
e@0
|
3661 gnbc = GaussianNB()
|
e@0
|
3662 avac = MyAVAClassifier()
|
e@0
|
3663 hmmsvmc = HMMsvmClassifier()
|
e@0
|
3664
|
e@0
|
3665 gnbc.fit(new_trainset_data[0:M,:], new_trainset_params[0:M])
|
e@0
|
3666 pgnbc = gnbc.predict(new_testset_data)
|
e@0
|
3667 rgnbc = metrics.accuracy_score(pgnbc, new_testset_params)
|
e@0
|
3668 mgnbc = calculate_mse(pgnbc,new_testset_params)
|
e@0
|
3669
|
e@0
|
3670 #
|
e@0
|
3671 hmmc.fit(new_trainset_data[0:M,:], new_trainset_params[0:M])
|
e@0
|
3672 phmmc = hmmc.predict(new_testset_data)
|
e@0
|
3673 rhmmc = metrics.accuracy_score(phmmc, new_testset_params)
|
e@0
|
3674 mhmmc = calculate_mse(phmmc,new_testset_params)
|
e@0
|
3675
|
e@0
|
3676 #
|
e@0
|
3677 try:
|
e@0
|
3678 avac.fit(new_trainset_data[0:M,:], new_trainset_params[0:M])
|
e@0
|
3679 pavac = avac.predict(new_testset_data)
|
e@0
|
3680 ravac = metrics.accuracy_score(pavac, new_testset_params)
|
e@0
|
3681 mavac = calculate_mse(pavac,new_testset_params)
|
e@0
|
3682 except:
|
e@0
|
3683 ravac = 0
|
e@0
|
3684 mavac = inf
|
e@0
|
3685 #
|
e@0
|
3686 #
|
e@0
|
3687 #
|
e@0
|
3688 #
|
e@0
|
3689 #
|
e@0
|
3690 try:
|
e@0
|
3691 hmmsvmc.fit(new_trainset_data[0:M,:], new_trainset_params[0:M])
|
e@0
|
3692 phmmsvm = hmmsvmc.predict(new_testset_data)
|
e@0
|
3693 rhmmsvm = metrics.accuracy_score(phmmsvm, new_testset_params)
|
e@0
|
3694 mhmmsvm = calculate_mse(phmmsvm,new_testset_params)
|
e@0
|
3695 #
|
e@0
|
3696 except:
|
e@0
|
3697 rhmmsvm = 0
|
e@0
|
3698 mavac = inf
|
e@0
|
3699 #
|
e@0
|
3700 # ratios_gnb.append(rgnbc)
|
e@0
|
3701 # ratios_hmm.append(rhmmc)
|
e@0
|
3702 # ratios_svm.append(ravac)
|
e@0
|
3703 # ratios_hmmsvm.append(rhmmsvm)
|
e@0
|
3704 #
|
e@0
|
3705 # mse_gnb.append(mgnbc)
|
e@0
|
3706 # mse_hmm.append(mhmmc)
|
e@0
|
3707 # mse_svm.append(mavac)
|
e@0
|
3708 # mse_hmmsvm.append(mhmmsvm)
|
e@0
|
3709 #
|
e@0
|
3710 # fullratios.append((trial,X,ratios_gnb,ratios_hmm,ratios_svm,ratios_hmmsvm))
|
e@0
|
3711 # fullmses.append((trial,X,mse_gnb,mse_hmm,mse_svm,mse_hmmsvm))
|
e@0
|
3712
|
e@0
|
3713 #
|
e@0
|
3714 #
|
e@0
|
3715 #
|
e@0
|
3716 #
|
e@0
|
3717
|
e@0
|
3718
|
e@0
|
3719
|
e@0
|
3720
|
e@0
|
3721
|
e@0
|
3722
|
e@0
|
3723 # hmmc = HmmClassifier3(N=3, n_components = max(unique(parameters_state))+1)
|
e@0
|
3724 hmmc = HmmClassifier3(N=CHAINSIZE, n_components = NCOMPONENTS)
|
e@0
|
3725 gnbc = GaussianNB()
|
e@0
|
3726 avac = MyAVAClassifier()
|
e@0
|
3727 hmmsvmc = HMMsvmClassifier()
|
e@0
|
3728 sinkholec = SinkHoleClassifier()
|
e@0
|
3729
|
e@0
|
3730 hmmc.fit(new_trainset_data, new_trainset_params)
|
e@0
|
3731 gnbc.fit(new_trainset_data, new_trainset_params)
|
e@0
|
3732 # try:
|
e@0
|
3733 avac.fit(new_trainset_data, new_trainset_params)
|
e@0
|
3734 sinkholec.fit(new_trainset_data, new_trainset_params)
|
e@0
|
3735 pavac = avac.predict(new_testset_data)
|
e@0
|
3736
|
e@0
|
3737
|
e@0
|
3738 # except:
|
e@0
|
3739 # pavac = ones((new_testset_data.shape[0],))*new_trainset_params[0]
|
e@0
|
3740
|
e@0
|
3741
|
e@0
|
3742 # try:
|
e@0
|
3743 hmmsvmc.fit(new_trainset_data, new_trainset_params)
|
e@0
|
3744 phmmsvm = hmmsvmc.predict(new_testset_data)
|
e@0
|
3745 # except:
|
e@0
|
3746 #
|
e@0
|
3747 # # phmmsvm = hmmsvmc.predict(new_testset_data)
|
e@0
|
3748 # phmmsvm = ones((new_testset_data.shape[0],))*new_trainset_params[0]
|
e@0
|
3749
|
e@0
|
3750
|
e@0
|
3751 phmmc = hmmc.predict(new_testset_data)
|
e@0
|
3752 pgnbc = gnbc.predict(new_testset_data)
|
e@0
|
3753 psinkhole = sinkholec.predict(new_testset_data)
|
e@0
|
3754
|
e@0
|
3755 print "[II] (Trial %d) Classification ratio for GNB: %.2f" % (trial, metrics.accuracy_score(pgnbc, new_testset_params))
|
e@0
|
3756 print "[II] (Trial %d) Classification ratio for SVM: %.2f" % (trial, metrics.accuracy_score(pavac, new_testset_params))
|
e@0
|
3757 print "[II] (Trial %d) Classification ratio for HMM: %.2f" % (trial, metrics.accuracy_score(phmmc, new_testset_params))
|
e@0
|
3758 print "[II] (Trial %d) Classification ratio for HMM/SVM: %.2f" % (trial, metrics.accuracy_score(phmmsvm, new_testset_params))
|
e@0
|
3759 print "[II] (Trial %d) Classification ratio for Sinkhole Approach: %.2f" % (trial, metrics.accuracy_score(psinkhole, new_testset_params))
|
e@0
|
3760
|
e@0
|
3761
|
e@0
|
3762 mse
|
e@0
|
3763
|
e@0
|
3764 # rgnbc = rhmmc*(trial-1)/trial + metrics.accuracy_score(pgnbc, new_testset_params)/trial
|
e@0
|
3765 # ravac = rhmmc*(trial-1)/trial + metrics.accuracy_score(pavac, new_testset_params)/trial
|
e@0
|
3766 # rhmmc = rhmmc*(trial-1)/trial + metrics.accuracy_score(phmmc, new_testset_params)/trial
|
e@0
|
3767 # rhmmsvmc = rhmmc*(trial-1)/trial + metrics.accuracy_score(phmmsvm, new_testset_params)/trial
|
e@0
|
3768
|
e@0
|
3769 rgnbc = metrics.accuracy_score(pgnbc, new_testset_params)
|
e@0
|
3770 ravac = metrics.accuracy_score(pavac, new_testset_params)
|
e@0
|
3771 rhmmc = metrics.accuracy_score(phmmc, new_testset_params)
|
e@0
|
3772 rhmmsvmc = metrics.accuracy_score(phmmsvm, new_testset_params)
|
e@0
|
3773 rsinkhole = metrics.accuracy_score(psinkhole, new_testset_params)
|
e@0
|
3774
|
e@0
|
3775 ratios_gnb.append(rgnbc)
|
e@0
|
3776 ratios_svm.append(ravac)
|
e@0
|
3777 ratios_hmm.append(rhmmc)
|
e@0
|
3778 ratios_hmmsvm.append(rhmmsvmc)
|
e@0
|
3779 ratios_sinkhole.append(rsinkhole)
|
e@0
|
3780
|
e@0
|
3781
|
e@0
|
3782
|
e@0
|
3783
|
e@0
|
3784
|
e@0
|
3785 # msegbnc = msegbnc*(trial-1)/trial + calculate_mse(pgnbc,new_testset_params)/trial
|
e@0
|
3786 # msehmmc = msehmmc*(trial-1)/trial + calculate_mse(phmmc,new_testset_params)/trial
|
e@0
|
3787 # mseavac = mseavac*(trial-1)/trial + calculate_mse(pavac,new_testset_params)/trial
|
e@0
|
3788 # msehmmsvmc = msehmmsvmc*(trial-1)/trial + calculate_mse(phmmsvm,new_testset_params)/trial
|
e@0
|
3789
|
e@0
|
3790 msegnb = calculate_mse(pgnbc,new_testset_params)
|
e@0
|
3791 msesvm = calculate_mse(pavac,new_testset_params)
|
e@0
|
3792 msehmm = calculate_mse(phmmc,new_testset_params)
|
e@0
|
3793 msehmmsvm = calculate_mse(phmmsvm,new_testset_params)
|
e@0
|
3794 msesinkhole = calculate_mse(psinkhole,new_testset_params)
|
e@0
|
3795
|
e@0
|
3796 mse_gnb.append(msegnb)
|
e@0
|
3797 mse_svm.append(msesvm)
|
e@0
|
3798 mse_hmm.append(msehmm)
|
e@0
|
3799 mse_hmmsvm.append(msehmmsvm)
|
e@0
|
3800 mse_sinkhole.append(msesinkhole)
|
e@0
|
3801
|
e@0
|
3802
|
e@0
|
3803
|
e@0
|
3804
|
e@0
|
3805
|
e@0
|
3806
|
e@0
|
3807 trial += 1
|
e@0
|
3808
|
e@0
|
3809 print ratios_gnb
|
e@0
|
3810
|
e@0
|
3811
|
e@0
|
3812 sucrats = [mean(ratios_gnb),mean(ratios_svm), mean(ratios_hmm),mean(ratios_hmmsvm), mean(ratios_sinkhole)]
|
e@0
|
3813 sucratsstd = [std(ratios_gnb,ddof=1),std(ratios_svm,ddof=1), std(ratios_hmm,ddof=1),std(ratios_hmmsvm,ddof=1), std(ratios_sinkhole,ddof=1)]
|
e@0
|
3814 mserats = [mean(mse_gnb),mean(mse_svm), mean(mse_hmm),mean(mse_hmmsvm), mean(mse_sinkhole)]
|
e@0
|
3815 #mserats = [msegbnc, mseavac, msehmmc,msehmmsvmc]
|
e@0
|
3816
|
e@0
|
3817 print tuple(sucrats)
|
e@0
|
3818 # print tuple(sucratsstd)
|
e@0
|
3819
|
e@0
|
3820
|
e@0
|
3821 # scores = zeros((len(sucrats), len(sucrats)))
|
e@0
|
3822 # for i in range(0, len(sucrats)):
|
e@0
|
3823 # for j in range(0, len(sucrats)):
|
e@0
|
3824 # scores[i,j] = (sucrats[i] - sucrats[j])/sqrt(sucratsstd[i]**2 + sucratsstd[j]**2)
|
e@0
|
3825 print tuple(mserats)
|
e@0
|
3826 # print scores
|
e@0
|
3827
|
e@0
|
3828
|
e@0
|
3829
|
e@0
|
3830 # print (msegbnc, mseavac, msehmmc,msehmmsvmc )
|
e@0
|
3831
|
e@0
|
3832 sample_sizes = X/float(max(X))
|
e@0
|
3833 gnbc_ratios_curves = zeros((len(X),len(fullratios)))
|
e@0
|
3834 gnbc_mses_curves = zeros((len(X),len(fullratios)))
|
e@0
|
3835 #
|
e@0
|
3836 avac_ratios_curves = zeros((len(X),len(fullratios)))
|
e@0
|
3837 avac_mses_curves = zeros((len(X),len(fullratios)))
|
e@0
|
3838 #
|
e@0
|
3839 hmmc_ratios_curves = zeros((len(X),len(fullratios)))
|
e@0
|
3840 hmmc_mses_curves = zeros((len(X),len(fullratios)))
|
e@0
|
3841 #
|
e@0
|
3842 hmmsvmc_ratios_curves = zeros((len(X),len(fullratios)))
|
e@0
|
3843 hmmsvmc_mses_curves = zeros((len(X),len(fullratios)))
|
e@0
|
3844 #
|
e@0
|
3845 #
|
e@0
|
3846 for i in range(0, len(fullratios)):
|
e@0
|
3847 for j in range(0, len(X)):
|
e@0
|
3848 gnbc_ratios_curves[j,i] = fullratios[i][2][j]
|
e@0
|
3849 gnbc_mses_curves[j,i] = fullmses[i][2][j]
|
e@0
|
3850
|
e@0
|
3851 avac_ratios_curves[j,i] = fullratios[i][3][j]
|
e@0
|
3852 avac_mses_curves[j,i] = fullmses[i][3][j]
|
e@0
|
3853
|
e@0
|
3854 hmmc_ratios_curves[j,i] = fullratios[i][4][j]
|
e@0
|
3855 hmmc_mses_curves[j,i] = fullmses[i][4][j]
|
e@0
|
3856
|
e@0
|
3857 hmmsvmc_ratios_curves[j,i] = fullratios[i][5][j]
|
e@0
|
3858 hmmsvmc_mses_curves[j,i] = fullmses[i][5][j]
|
e@0
|
3859 #
|
e@0
|
3860 #
|
e@0
|
3861 #
|
e@0
|
3862 #
|
e@0
|
3863 #
|
e@0
|
3864 gnbc_mses_curve = mean(gnbc_mses_curves,1)
|
e@0
|
3865 avac_mses_curve = mean(avac_mses_curves,1)
|
e@0
|
3866 hmmc_mses_curve = mean(hmmc_mses_curves,1)
|
e@0
|
3867 hmmsvmc_mses_curve = mean(hmmsvmc_mses_curves,1)
|
e@0
|
3868 #
|
e@0
|
3869 gnbc_ratios_curve = mean(gnbc_ratios_curves,1)
|
e@0
|
3870 avac_ratios_curve = mean(avac_ratios_curves,1)
|
e@0
|
3871 hmmc_ratios_curve = mean(hmmc_ratios_curves,1)
|
e@0
|
3872 hmmsvmc_ratios_curve = mean(hmmsvmc_ratios_curves,1)
|
e@0
|
3873 #
|
e@0
|
3874 figure()
|
e@0
|
3875 subplot(211)
|
e@0
|
3876 plot(sample_sizes,gnbc_ratios_curve)
|
e@0
|
3877 plot(sample_sizes,avac_ratios_curve)
|
e@0
|
3878 plot(sample_sizes,hmmc_ratios_curve)
|
e@0
|
3879 plot(sample_sizes,hmmsvmc_ratios_curve)
|
e@0
|
3880 xlabel('Part of training set used')
|
e@0
|
3881 ylabel('Accuracy')
|
e@0
|
3882 legend(['GNB','SVM','HMM','HMM/SVM'])
|
e@0
|
3883 #
|
e@0
|
3884 subplot(212)
|
e@0
|
3885 plot(sample_sizes,gnbc_mses_curve)
|
e@0
|
3886 plot(sample_sizes,avac_mses_curve)
|
e@0
|
3887 plot(sample_sizes,hmmc_mses_curve)
|
e@0
|
3888 plot(sample_sizes,hmmsvmc_mses_curve)
|
e@0
|
3889 xlabel('Part of training set used')
|
e@0
|
3890 ylabel('Mean Squared Error')
|
e@0
|
3891 #
|
e@0
|
3892 legend(['GNB','SVM','HMM','HMM/SVM'])
|
e@0
|
3893 #
|
e@0
|
3894 show()
|
e@0
|
3895 # # tight_layout()
|
e@0
|
3896 savefig('ratios-2.png',dpi=500)
|
e@0
|
3897 #
|
e@0
|
3898 #
|
e@0
|
3899 #
|