e@0
|
1 # -*- coding: utf-8 -*-
|
e@0
|
2 """
|
e@0
|
3 Created on Mon Mar 21 15:29:06 2016
|
e@0
|
4
|
e@0
|
5 @author: Emmanouil Theofanis Chourdakis
|
e@0
|
6 """
|
e@0
|
7
|
e@0
|
8
|
e@0
|
9 from glob import glob
|
e@0
|
10 from sys import argv
|
e@0
|
11 from essentia.standard import YamlInput
|
e@0
|
12 #from sklearn.covariance import EllipticEnvelope
|
e@0
|
13 from sklearn.naive_bayes import GaussianNB
|
e@0
|
14 from sklearn.linear_model import LogisticRegression
|
e@0
|
15 from sklearn import svm
|
e@0
|
16 from joblib import Parallel, delayed
|
e@0
|
17 from collections import Counter
|
e@0
|
18 from models import ReverbModel
|
e@0
|
19 from sklearn.cluster import KMeans
|
e@0
|
20 from time import time
|
e@0
|
21 import tempfile
|
e@0
|
22 import os
|
e@0
|
23 import shutil
|
e@0
|
24 from scipy.signal import gaussian
|
e@0
|
25
|
e@0
|
26
|
e@0
|
27 from sklearn.metrics import accuracy_score, f1_score
|
e@0
|
28 from sklearn.cross_validation import cross_val_score, cross_val_predict
|
e@0
|
29 from sklearn.covariance import EllipticEnvelope
|
e@0
|
30
|
e@0
|
31 from sklearn.covariance import EmpiricalCovariance, MinCovDet
|
e@0
|
32
|
e@0
|
33 from sklearn.svm import LinearSVC
|
e@0
|
34 from hmmlearn import hmm
|
e@0
|
35
|
e@0
|
36 from mapping import *
|
e@0
|
37 from numpy import *
|
e@0
|
38 from pca import *
|
e@0
|
39 from numpy.random import randint
|
e@0
|
40
|
e@0
|
41
|
e@0
|
42 from warnings import filterwarnings
|
e@0
|
43 filterwarnings("ignore")
|
e@0
|
44 from sklearn.base import BaseEstimator
|
e@0
|
45 mse = lambda A,B: ((array(A)-array(B)) ** 2).mean()
|
e@0
|
46
|
e@0
|
47 def compress_parameters_vector(parameters_vector, tol=0.000001):
|
e@0
|
48 new_parameters_vector = zeros(parameters_vector.shape)
|
e@0
|
49
|
e@0
|
50 for i in range(0, parameters_vector.shape[0]):
|
e@0
|
51
|
e@0
|
52 p = parameters_vector[i,:]
|
e@0
|
53 p_set = matrix(unique(p)).T
|
e@0
|
54
|
e@0
|
55 scores = []
|
e@0
|
56
|
e@0
|
57 score = -1000
|
e@0
|
58
|
e@0
|
59 for n in range(1, len(p_set)):
|
e@0
|
60 old_score = score
|
e@0
|
61
|
e@0
|
62
|
e@0
|
63 k_means = KMeans(n_clusters=n)
|
e@0
|
64 k_means.fit(p_set)
|
e@0
|
65 score = k_means.score(p_set)
|
e@0
|
66
|
e@0
|
67
|
e@0
|
68 scores.append(score)
|
e@0
|
69
|
e@0
|
70 if abs(score-old_score) < tol:
|
e@0
|
71 n_opt = n
|
e@0
|
72 break
|
e@0
|
73
|
e@0
|
74 predicted_p = k_means.predict(matrix(p).T)
|
e@0
|
75 new_parameters_vector[i,:] = array(ravel(k_means.cluster_centers_[predicted_p]))
|
e@0
|
76
|
e@0
|
77 return new_parameters_vector
|
e@0
|
78 def smooth_matrix_1D(X,W=100):
|
e@0
|
79 window = gaussian(W+1,4)
|
e@0
|
80 window = window/sum(window)
|
e@0
|
81 intermx = zeros((X.shape[0],X.shape[1]+2*W))
|
e@0
|
82 intermx[:, W:-W] = X
|
e@0
|
83
|
e@0
|
84 for m in range(0, X.shape[0]):
|
e@0
|
85 # print intermx.shape
|
e@0
|
86 intermx[m,:] = convolve(ravel(intermx[m,:]), window,'same')
|
e@0
|
87
|
e@0
|
88 return intermx[:,W:-W]
|
e@0
|
89 # return intermx
|
e@0
|
90
|
e@0
|
91 #def smooth_matrix_1D(X):
|
e@0
|
92 # window = gaussian(11,4)
|
e@0
|
93 # window = window/sum(window)
|
e@0
|
94 # intermx = zeros((X.shape[0],X.shape[1]+20))
|
e@0
|
95 # intermx[:, 10:-10] = X
|
e@0
|
96 #
|
e@0
|
97 # for m in range(0, X.shape[0]):
|
e@0
|
98 # # print intermx.shape
|
e@0
|
99 # intermx[m,:] = convolve(ravel(intermx[m,:]), window,'same')
|
e@0
|
100 #
|
e@0
|
101 # return intermx[:,10:-10]
|
e@0
|
102 class HMMsvm:
|
e@0
|
103 def __init__(self, svm_):
|
e@0
|
104 self.svm = svm_
|
e@0
|
105
|
e@0
|
106 def fit(self, features_list, states, flr=1e-13):
|
e@0
|
107
|
e@0
|
108 # TODO: Concatenate features, train
|
e@0
|
109 #self.svm.fit(X, states, flr)
|
e@0
|
110 #self.prior = self.svm.prior
|
e@0
|
111 #self.transmat = self.svm.transmat
|
e@0
|
112
|
e@0
|
113
|
e@0
|
114 features = None
|
e@0
|
115
|
e@0
|
116 for f in features_list:
|
e@0
|
117 if features is None:
|
e@0
|
118 features = f
|
e@0
|
119 else:
|
e@0
|
120 features = append(features, f, 0)
|
e@0
|
121
|
e@0
|
122 fullstates = None
|
e@0
|
123 for s in states:
|
e@0
|
124 if fullstates is None:
|
e@0
|
125 fullstates = s
|
e@0
|
126 else:
|
e@0
|
127 fullstates = append(fullstates, s)
|
e@0
|
128
|
e@0
|
129
|
e@0
|
130
|
e@0
|
131
|
e@0
|
132
|
e@0
|
133
|
e@0
|
134 self.n_classes = self.svm.n_classes
|
e@0
|
135 n_classes = self.n_classes
|
e@0
|
136
|
e@0
|
137 h = histogram(fullstates, n_classes)[0].astype(float)
|
e@0
|
138 self.prior = h/sum(h)
|
e@0
|
139
|
e@0
|
140 # Add a very small probability for random jump
|
e@0
|
141
|
e@0
|
142 self.prior += flr
|
e@0
|
143 self.prior = self.prior/sum(self.prior)
|
e@0
|
144
|
e@0
|
145 transmat = zeros((n_classes, n_classes))
|
e@0
|
146 transitions = zeros((n_classes, ))
|
e@0
|
147 for s in states:
|
e@0
|
148 for i in range(1, len(s)):
|
e@0
|
149 prev = s[i-1]
|
e@0
|
150 cur = s[i]
|
e@0
|
151 transmat[prev,cur] += 1
|
e@0
|
152 transitions[prev] += 1
|
e@0
|
153
|
e@0
|
154 transitions[transitions == 0] = 1
|
e@0
|
155
|
e@0
|
156 for i in range(0, transmat.shape[0]):
|
e@0
|
157 transmat[i,:] = transmat[i,:]/transitions[i]
|
e@0
|
158
|
e@0
|
159 self.transmat = transmat
|
e@0
|
160
|
e@0
|
161 # Add a very small probability for random jump to avoid zero values
|
e@0
|
162
|
e@0
|
163 self.transmat += flr
|
e@0
|
164
|
e@0
|
165 for i in range(0, self.transmat.shape[0]):
|
e@0
|
166 self.transmat[i,:] = self.transmat[i,:]/sum(self.transmat[i,:])
|
e@0
|
167
|
e@0
|
168
|
e@0
|
169 A = zeros((n_classes, n_classes))
|
e@0
|
170
|
e@0
|
171 Aold = self.transmat
|
e@0
|
172
|
e@0
|
173 while mse(A,Aold)>10*size(A)*flr:
|
e@0
|
174 Aold = A.copy()
|
e@0
|
175 A = zeros((n_classes, n_classes))
|
e@0
|
176 transitions = zeros((n_classes, ))
|
e@0
|
177
|
e@0
|
178 for i in range(0,len(features_list)):
|
e@0
|
179 f = features_list[i]
|
e@0
|
180 s,p = self.logviterbi(f)
|
e@0
|
181 for j in range(1, len(s)):
|
e@0
|
182 prev = s[j-1]
|
e@0
|
183 cur = s[j]
|
e@0
|
184 A[prev,cur] += 1
|
e@0
|
185 transitions[prev] += 1
|
e@0
|
186
|
e@0
|
187 transitions[transitions == 0] = 1
|
e@0
|
188
|
e@0
|
189
|
e@0
|
190 for i in range(0, A.shape[0]):
|
e@0
|
191 A[i,:] = A[i,:]/transitions[i]
|
e@0
|
192
|
e@0
|
193 A += flr
|
e@0
|
194
|
e@0
|
195
|
e@0
|
196
|
e@0
|
197 self.transmat = A
|
e@0
|
198
|
e@0
|
199 for i in range(0, A.shape[0]):
|
e@0
|
200 if sum(A[i,:]) > 0:
|
e@0
|
201 A[i,:] = A[i,:]/sum(A[i,:])
|
e@0
|
202
|
e@0
|
203 def estimate_emission_probability(self, x, q):
|
e@0
|
204 post = self.svm.estimate_posterior_probability_vector(x)
|
e@0
|
205 prior = self.prior
|
e@0
|
206 p = post/prior
|
e@0
|
207 p = p/sum(p)
|
e@0
|
208
|
e@0
|
209 return p[q]
|
e@0
|
210
|
e@0
|
211
|
e@0
|
212
|
e@0
|
213
|
e@0
|
214 def predict(self, X):
|
e@0
|
215 return self.logviterbi(X)[0]
|
e@0
|
216
|
e@0
|
217
|
e@0
|
218 def logviterbi(self, X):
|
e@0
|
219 # Number of states
|
e@0
|
220
|
e@0
|
221 N = self.n_classes
|
e@0
|
222
|
e@0
|
223 # Number of observations
|
e@0
|
224
|
e@0
|
225
|
e@0
|
226
|
e@0
|
227 T = X.shape[0]
|
e@0
|
228
|
e@0
|
229
|
e@0
|
230
|
e@0
|
231 transmat = self.transmat
|
e@0
|
232
|
e@0
|
233 #1. Initalization
|
e@0
|
234
|
e@0
|
235 a1 = self.prior
|
e@0
|
236
|
e@0
|
237 delta = zeros((T,N))
|
e@0
|
238 psi = zeros((T,N))
|
e@0
|
239
|
e@0
|
240 for i in range(0, N):
|
e@0
|
241 delta[0, i] = log(self.transmat[0,i]) + log(self.estimate_emission_probability(X[0,:],i))
|
e@0
|
242
|
e@0
|
243
|
e@0
|
244 #2. Recursion
|
e@0
|
245
|
e@0
|
246 for t in range(1, T):
|
e@0
|
247 x = X[t, :]
|
e@0
|
248 for j in range(0, N):
|
e@0
|
249 delta[t,j] = max(delta[t-1,:] + log(transmat[:,j])) + log(self.estimate_emission_probability(x,j))
|
e@0
|
250 psi[t,j] = argmax(delta[t-1,:] + log(transmat[:,j]))
|
e@0
|
251
|
e@0
|
252
|
e@0
|
253 # 3. Termination
|
e@0
|
254
|
e@0
|
255 p_star = max(delta[T-1,:] + log(transmat[:,N-1]))
|
e@0
|
256 q_star = argmax(delta[T-1,:] + log(transmat[:, N-1]))
|
e@0
|
257
|
e@0
|
258
|
e@0
|
259 # 4. Backtracking
|
e@0
|
260
|
e@0
|
261 q = zeros((T,))
|
e@0
|
262 p = zeros((T,))
|
e@0
|
263
|
e@0
|
264 q[-1] = q_star
|
e@0
|
265 p[-1] = p_star
|
e@0
|
266 for t in range(1, T):
|
e@0
|
267 q[-t-1] = psi[-t, q[-t]]
|
e@0
|
268 p[-t-1] = delta[-t, q[-t]]
|
e@0
|
269
|
e@0
|
270
|
e@0
|
271 return q,p
|
e@0
|
272
|
e@0
|
273 def viterbi(self, X):
|
e@0
|
274 # Number of states
|
e@0
|
275
|
e@0
|
276 N = self.n_classes
|
e@0
|
277
|
e@0
|
278 # Number of observations
|
e@0
|
279
|
e@0
|
280 T = X.shape[0]
|
e@0
|
281
|
e@0
|
282 transmat = self.transmat
|
e@0
|
283
|
e@0
|
284 #1. Initialization
|
e@0
|
285
|
e@0
|
286 a1 = self.prior
|
e@0
|
287
|
e@0
|
288 delta = zeros((N, ))
|
e@0
|
289 psi = zeros((N, ))
|
e@0
|
290
|
e@0
|
291 for i in range(0, N):
|
e@0
|
292 delta[i] = a1[i]*self.estimate_emission_probability(X[0,:],i)
|
e@0
|
293
|
e@0
|
294
|
e@0
|
295
|
e@0
|
296
|
e@0
|
297
|
e@0
|
298 #2. Recursion
|
e@0
|
299
|
e@0
|
300 for t in range(1, T):
|
e@0
|
301 delta_old = delta.copy()
|
e@0
|
302 x = X[t,:]
|
e@0
|
303 for j in range(0, N):
|
e@0
|
304 delta[j] = max(delta_old*transmat[:,j])*self.estimate_emission_probability(x, j)
|
e@0
|
305 psi[j] = argmax(delta_old*transmat[:,j])
|
e@0
|
306
|
e@0
|
307 #3. Termination
|
e@0
|
308
|
e@0
|
309 p_star = max(delta*transmat[:,N-1])
|
e@0
|
310 q_star = argmax(delta*transmat[:,N-1])
|
e@0
|
311
|
e@0
|
312
|
e@0
|
313
|
e@0
|
314 #4. Backtracking
|
e@0
|
315
|
e@0
|
316 q = zeros((T,))
|
e@0
|
317 q[-1] = q_star
|
e@0
|
318
|
e@0
|
319 for t in range(1, T):
|
e@0
|
320 q[-t-1] = psi[q[-t]]
|
e@0
|
321
|
e@0
|
322 return q
|
e@0
|
323
|
e@0
|
324
|
e@0
|
325 def _log_likelihood_matrix(self, X):
|
e@0
|
326 N = self.n_classes
|
e@0
|
327 ll = zeros((X.shape[0], N))
|
e@0
|
328
|
e@0
|
329 for i in range(0, X.shape[0]):
|
e@0
|
330 ll[i,:] = self._forward(i, X)
|
e@0
|
331
|
e@0
|
332 return ll
|
e@0
|
333
|
e@0
|
334 def compute_ksus(self, X):
|
e@0
|
335 N = self.n_classes
|
e@0
|
336 T = X.shape[0]
|
e@0
|
337 print "[II] Computing gammas... "
|
e@0
|
338
|
e@0
|
339 gammas = self._forward_backward(X)
|
e@0
|
340
|
e@0
|
341 # Initialize
|
e@0
|
342
|
e@0
|
343 aij = self.transmat
|
e@0
|
344
|
e@0
|
345
|
e@0
|
346
|
e@0
|
347
|
e@0
|
348
|
e@0
|
349
|
e@0
|
350 def _not_quite_ksu(self, t, i, j, X):
|
e@0
|
351 alpha = exp(self.forward(X[0:t+1,:]))[i]
|
e@0
|
352 beta = exp(self.backward(X[t+1:,:]))[j]
|
e@0
|
353
|
e@0
|
354 aij = self.transmat[i,j]
|
e@0
|
355 bj = self.estimate_emission_probability(X[t+1,:], j)
|
e@0
|
356
|
e@0
|
357 return alpha*beta*aij*bj
|
e@0
|
358
|
e@0
|
359 def _ksu(self, t, i, j, X):
|
e@0
|
360 alphaT = exp(self.forward(X[0:t+1,:]))[0]
|
e@0
|
361
|
e@0
|
362 return self._not_quite_ksu(t,i,j,X)
|
e@0
|
363
|
e@0
|
364
|
e@0
|
365 def _forward_backward(self, X):
|
e@0
|
366 T = X.shape[0]
|
e@0
|
367 N = self.n_classes
|
e@0
|
368 fv = zeros((T, N))
|
e@0
|
369 sv = zeros((T, N))
|
e@0
|
370
|
e@0
|
371 b = None
|
e@0
|
372 for t in range(1, T+1):
|
e@0
|
373
|
e@0
|
374 fv[t-1,:] = self._forward_message(fv[t-2,:], X[0:t, ])
|
e@0
|
375
|
e@0
|
376 for t in range(1, T+1):
|
e@0
|
377 b = self._backward_message(b, X[-t: , :])
|
e@0
|
378 sv[-t,:] = lognorm(fv[t-1,:]*b)
|
e@0
|
379
|
e@0
|
380 return sv
|
e@0
|
381
|
e@0
|
382
|
e@0
|
383 def _backward(self, t, X):
|
e@0
|
384 N = self.n_classes
|
e@0
|
385 a = ones((N,))/N
|
e@0
|
386
|
e@0
|
387 beta = zeros((N, ))
|
e@0
|
388 transmat = self.transmat
|
e@0
|
389 T = X.shape[0]
|
e@0
|
390 x = X[t, :]
|
e@0
|
391 if t == T-1:
|
e@0
|
392 beta = log(a)
|
e@0
|
393
|
e@0
|
394 return beta
|
e@0
|
395 else:
|
e@0
|
396 tosum = zeros((N, ))
|
e@0
|
397 bt = self._backward(t+1, X)
|
e@0
|
398 btnew = zeros((N, ))
|
e@0
|
399 for j in range(0, N):
|
e@0
|
400 for i in range(0, N):
|
e@0
|
401 tosum[i] = log(self.estimate_emission_probability(x, j)) + bt[i] + log(transmat[i,j])
|
e@0
|
402
|
e@0
|
403 btnew[j] = logsumexp(tosum)
|
e@0
|
404 btnew = lognorm(btnew)
|
e@0
|
405 return btnew
|
e@0
|
406
|
e@0
|
407
|
e@0
|
408 def score(self, X):
|
e@0
|
409 return self.forward(X)
|
e@0
|
410
|
e@0
|
411 def forward(self, X):
|
e@0
|
412 T = X.shape[0]
|
e@0
|
413 f = None
|
e@0
|
414 for t in range(1, T+1):
|
e@0
|
415 f = self._forward_message(f, X[0:t, :])
|
e@0
|
416
|
e@0
|
417 return f
|
e@0
|
418
|
e@0
|
419 def backward(self, X):
|
e@0
|
420 T = X.shape[0]
|
e@0
|
421 b = None
|
e@0
|
422 for t in range(1,T+1):
|
e@0
|
423 b = self._backward_message(b, X[-t:, :])
|
e@0
|
424
|
e@0
|
425 return b
|
e@0
|
426
|
e@0
|
427 def _backward_message(self, b, X):
|
e@0
|
428 N = self.n_classes
|
e@0
|
429
|
e@0
|
430
|
e@0
|
431 beta = zeros((N, ))
|
e@0
|
432 transmat = self.transmat
|
e@0
|
433
|
e@0
|
434 x = X[0, :]
|
e@0
|
435
|
e@0
|
436 if X.shape[0] == 1:
|
e@0
|
437 beta = log(ones((N,))/N)
|
e@0
|
438 return beta
|
e@0
|
439 else:
|
e@0
|
440 tosum = zeros((N, ))
|
e@0
|
441 btnew = zeros((N, ))
|
e@0
|
442 for j in range(0, N):
|
e@0
|
443 for i in range(0, N):
|
e@0
|
444 tosum[i] = log(self.estimate_emission_probability(x,j)) + b[i] + log(transmat[i,j])
|
e@0
|
445
|
e@0
|
446 btnew[j] = logsumexp(tosum)
|
e@0
|
447 return btnew
|
e@0
|
448
|
e@0
|
449 def _forward_message(self, f, X):
|
e@0
|
450 N = self.n_classes
|
e@0
|
451 a = self.prior
|
e@0
|
452
|
e@0
|
453 alpha = zeros((N, ))
|
e@0
|
454 transmat = self.transmat
|
e@0
|
455
|
e@0
|
456 x = X[-1, :]
|
e@0
|
457
|
e@0
|
458 if X.shape[0] == 1:
|
e@0
|
459 for i in range(0, N):
|
e@0
|
460 alpha[i] = log(a[i]) + log(self.estimate_emission_probability(x, i))
|
e@0
|
461 return alpha
|
e@0
|
462
|
e@0
|
463 else:
|
e@0
|
464 tosum = zeros((N,))
|
e@0
|
465 ftnew = zeros((N,))
|
e@0
|
466 for j in range(0, N):
|
e@0
|
467 for i in range(0, N):
|
e@0
|
468 tosum[i] = f[i] + log(transmat[i,j])
|
e@0
|
469 Sum = logsumexp(tosum)
|
e@0
|
470 bj = self.estimate_emission_probability(x, j)
|
e@0
|
471 ftnew[j] = Sum+log(bj)
|
e@0
|
472
|
e@0
|
473 return ftnew
|
e@0
|
474
|
e@0
|
475 def _forward(self, t, X):
|
e@0
|
476 N = self.n_classes
|
e@0
|
477 a = self.prior
|
e@0
|
478 alpha = zeros((N, ))
|
e@0
|
479 transmat = self.transmat
|
e@0
|
480 x = X[t, :]
|
e@0
|
481
|
e@0
|
482 if t == 0:
|
e@0
|
483 for i in range(0, N):
|
e@0
|
484 alpha[i] = log(a[i]) + log(self.estimate_emission_probability(x, i))
|
e@0
|
485 alpha = lognorm(alpha)
|
e@0
|
486
|
e@0
|
487 return alpha
|
e@0
|
488 else:
|
e@0
|
489 tosum = zeros((N, ))
|
e@0
|
490 ft = self._forward(t-1, X)
|
e@0
|
491 ftnew = zeros((N, ))
|
e@0
|
492 for j in range(0, N):
|
e@0
|
493 for i in range(0, N):
|
e@0
|
494 tosum[i] = ft[i] + log(transmat[i,j])
|
e@0
|
495
|
e@0
|
496 Sum = logsumexp(tosum)
|
e@0
|
497 bj = self.estimate_emission_probability(x, j)
|
e@0
|
498 ftnew[j] = Sum+log(bj)
|
e@0
|
499 ftnew = lognorm(ftnew)
|
e@0
|
500
|
e@0
|
501 return ftnew
|
e@0
|
502
|
e@0
|
503
|
e@0
|
504 class NBClassifier:
|
e@0
|
505 def __init__(self):
|
e@0
|
506 print "[II] Gaussian Naive Bayes Classifier"
|
e@0
|
507 self.name = "Naive Bayes"
|
e@0
|
508 self.smallname = "gnbc"
|
e@0
|
509 self.gnb = GaussianNB()
|
e@0
|
510
|
e@0
|
511 def getName(self):
|
e@0
|
512 return self.name
|
e@0
|
513
|
e@0
|
514 def score(self, features, parameters):
|
e@0
|
515 predicted_states = self.predict(features)
|
e@0
|
516 return accuracy_score(parameters, predicted_states)
|
e@0
|
517
|
e@0
|
518 def fit(self, X, states):
|
e@0
|
519 self.gnb.fit(X, states)
|
e@0
|
520
|
e@0
|
521 def predict(self, X):
|
e@0
|
522 return self.gnb.predict(X)
|
e@0
|
523
|
e@0
|
524 class SVMClassifier(LinearSVC):
|
e@0
|
525 def __init__(self,**arguments):
|
e@0
|
526 LinearSVC.__init__(self,dual=False)
|
e@0
|
527 self.smallname = "svmc"
|
e@0
|
528
|
e@0
|
529
|
e@0
|
530
|
e@0
|
531 class SinkHoleClassifier(BaseEstimator):
|
e@0
|
532 def __init__(self,name="SinkholeClassifier", N=2, n_components=1):
|
e@0
|
533 self.chain_size = N
|
e@0
|
534 self.n_components = n_components
|
e@0
|
535 self.classifierNB = NBClassifier()
|
e@0
|
536 # self.classifierSVM = MyAVAClassifier()
|
e@0
|
537 self.classifierSVM = LinearSVC(dual=False)
|
e@0
|
538 self.classifierHMM = HmmClassifier(N=N, n_components=n_components)
|
e@0
|
539 self.classifierHMMSVM = HMMsvmClassifier(N=N)
|
e@0
|
540 self.name = name
|
e@0
|
541 self.smallname = "sinkholec"
|
e@0
|
542
|
e@0
|
543 def score(self, features, parameters):
|
e@0
|
544 predicted_states = self.predict(features)
|
e@0
|
545 return accuracy_score(parameters, predicted_states)
|
e@0
|
546
|
e@0
|
547 def getName(self):
|
e@0
|
548 return self.name
|
e@0
|
549
|
e@0
|
550 def get_params(self, deep=True):
|
e@0
|
551 return {"N":self.chain_size, "n_components":self.n_components}
|
e@0
|
552
|
e@0
|
553 def set_params(self, **parameters):
|
e@0
|
554 for parameter, value in parameters.items():
|
e@0
|
555 self.setattr(parameter, value)
|
e@0
|
556 return self
|
e@0
|
557
|
e@0
|
558 def fit(self, X, y):
|
e@0
|
559 self.n_classes = max(unique(y))+1
|
e@0
|
560
|
e@0
|
561 self.classifierNB.fit(X,y)
|
e@0
|
562 self.classifierSVM.fit(X,y)
|
e@0
|
563 self.classifierHMM.fit(X,y)
|
e@0
|
564 self.classifierHMMSVM.fit(X,y)
|
e@0
|
565
|
e@0
|
566
|
e@0
|
567 def predict(self, X):
|
e@0
|
568 predictedNB = self.classifierNB.predict(X)
|
e@0
|
569 predictedSVM = self.classifierSVM.predict(X)
|
e@0
|
570 predictedHMM = self.classifierHMM.predict(X)
|
e@0
|
571 predictedHMMSVM = self.classifierHMMSVM.predict(X)
|
e@0
|
572
|
e@0
|
573
|
e@0
|
574
|
e@0
|
575
|
e@0
|
576 predicted = zeros((X.shape[0], ))
|
e@0
|
577
|
e@0
|
578 for i in range(0, len(predicted)):
|
e@0
|
579 candidates = [predictedNB[i], predictedSVM[i], predictedHMM[i], predictedHMMSVM[i]]
|
e@0
|
580
|
e@0
|
581 c = Counter(candidates)
|
e@0
|
582
|
e@0
|
583 most_common = c.most_common()
|
e@0
|
584
|
e@0
|
585 # If there is an equal voting, select something at random
|
e@0
|
586 if len(unique([k[1] for k in most_common])) == 1:
|
e@0
|
587 predicted[i] = most_common[randint(len(most_common))][0]
|
e@0
|
588 else:
|
e@0
|
589 predicted[i] = most_common[0][0]
|
e@0
|
590
|
e@0
|
591 return predicted
|
e@0
|
592
|
e@0
|
593 class MyAVAClassifier:
|
e@0
|
594
|
e@0
|
595 def __init__(self):
|
e@0
|
596 self.classifiers = {}
|
e@0
|
597 self.name = "Linear SVM Classifier"
|
e@0
|
598 self.smallname = "svc-ava"
|
e@0
|
599
|
e@0
|
600
|
e@0
|
601 def getName(self):
|
e@0
|
602 return self.name
|
e@0
|
603 def fit(self, X, y, flr = 0, C=0.7):
|
e@0
|
604
|
e@0
|
605 n_classes = max(unique(y)) + 1
|
e@0
|
606
|
e@0
|
607 if len(unique(y)) == 1:
|
e@0
|
608 self.only_one_class = True
|
e@0
|
609 self.n_classes = 1
|
e@0
|
610 self.one_class_label = y[0]
|
e@0
|
611 return
|
e@0
|
612 elif len(unique(y)) == 2:
|
e@0
|
613
|
e@0
|
614 self.n_classes = n_classes
|
e@0
|
615 self.svm = svm.SVC(degree=2,probability = True, kernel='rbf', gamma=2, C = C)
|
e@0
|
616 self.svm.fit(X,y)
|
e@0
|
617 classes_ = unique(y)
|
e@0
|
618 self.classifiers[(classes_[0], classes_[1])] = self.svm
|
e@0
|
619 self.only_two_classes = True
|
e@0
|
620 self.only_one_class = False
|
e@0
|
621
|
e@0
|
622 return
|
e@0
|
623 else:
|
e@0
|
624 self.only_two_classes = False
|
e@0
|
625 self.only_one_class = False
|
e@0
|
626
|
e@0
|
627
|
e@0
|
628 classes = arange(0, n_classes)
|
e@0
|
629 self.n_classes = n_classes
|
e@0
|
630
|
e@0
|
631 h = histogram(y, n_classes)[0].astype(float)
|
e@0
|
632 self.prior = h/sum(h)
|
e@0
|
633
|
e@0
|
634 transmat = zeros((n_classes, n_classes))
|
e@0
|
635
|
e@0
|
636 for i in range(1, len(y)):
|
e@0
|
637 prev = y[i-1]
|
e@0
|
638 cur = y[i]
|
e@0
|
639 transmat[prev,cur] += 1
|
e@0
|
640
|
e@0
|
641 transmat = transmat/sum(transmat)
|
e@0
|
642
|
e@0
|
643 self.transmat = transmat
|
e@0
|
644
|
e@0
|
645 # Add a very small probability for random jump to avoid zero values
|
e@0
|
646
|
e@0
|
647 self.transmat += flr
|
e@0
|
648 self.transmat = self.transmat/sum(self.transmat)
|
e@0
|
649
|
e@0
|
650 for i in range(0, n_classes):
|
e@0
|
651 for j in range(0, n_classes):
|
e@0
|
652 if i != j and (i,j) not in self.classifiers and (j,i) not in self.classifiers:
|
e@0
|
653
|
e@0
|
654 idx_ = bitwise_or(y == classes[i], y == classes[j])
|
e@0
|
655
|
e@0
|
656 X_ = X[idx_, :]
|
e@0
|
657
|
e@0
|
658 y_ = y[idx_]
|
e@0
|
659
|
e@0
|
660 if len(unique(y_)) > 1:
|
e@0
|
661 svm_ = svm.SVC(probability = True, kernel='poly', gamma=2, C = C)
|
e@0
|
662
|
e@0
|
663 svm_.fit(X_, y_)
|
e@0
|
664 self.classifiers[(i,j)] = svm_
|
e@0
|
665
|
e@0
|
666
|
e@0
|
667 def estimate_pairwise_class_probability(self, i, j, x):
|
e@0
|
668
|
e@0
|
669
|
e@0
|
670 if (i,j) not in self.classifiers and (j,i) in self.classifiers:
|
e@0
|
671 return self.classifiers[(j,i)].predict_proba(x)[0,1]
|
e@0
|
672 elif (i,j) not in self.classifiers and (j,i) not in self.classifiers:
|
e@0
|
673 return 0.0
|
e@0
|
674 else:
|
e@0
|
675 return self.classifiers[(i,j)].predict_proba(x)[0,0]
|
e@0
|
676
|
e@0
|
677 def estimate_posterior_probability(self, i, x):
|
e@0
|
678 mus = zeros((self.n_classes,))
|
e@0
|
679 for j in range(0, self.n_classes):
|
e@0
|
680 if i != j:
|
e@0
|
681 pcp = self.estimate_pairwise_class_probability(i,j,x)
|
e@0
|
682 pcp += 1e-18
|
e@0
|
683 mus[j] = 1/pcp
|
e@0
|
684 S = sum(mus) - (self.n_classes - 2)
|
e@0
|
685 return 1/S
|
e@0
|
686
|
e@0
|
687 def estimate_posterior_probability_vector(self, x):
|
e@0
|
688 posterior = zeros((self.n_classes,))
|
e@0
|
689 for i in range(0, len(posterior)):
|
e@0
|
690 posterior[i] = self.estimate_posterior_probability(i, x)
|
e@0
|
691
|
e@0
|
692 return posterior
|
e@0
|
693
|
e@0
|
694
|
e@0
|
695 def predict(self, X):
|
e@0
|
696 predicted = zeros((X.shape[0],))
|
e@0
|
697
|
e@0
|
698 if self.only_one_class == True:
|
e@0
|
699 return ones((X.shape[0],))*self.one_class_label
|
e@0
|
700 elif self.only_two_classes == True:
|
e@0
|
701 return self.svm.predict(X)
|
e@0
|
702
|
e@0
|
703
|
e@0
|
704 for i in range(0, X.shape[0]):
|
e@0
|
705 x = X[i,:]
|
e@0
|
706 P = zeros((self.n_classes,))
|
e@0
|
707
|
e@0
|
708
|
e@0
|
709 for c in range(0, len(P)):
|
e@0
|
710 P[c] = self.estimate_posterior_probability(c, x)
|
e@0
|
711
|
e@0
|
712 pred = argmax(P)
|
e@0
|
713 predicted[i] = pred
|
e@0
|
714
|
e@0
|
715 return predicted
|
e@0
|
716
|
e@0
|
717 class HMMsvmClassifier(BaseEstimator):
|
e@0
|
718 def __init__(self, N=2):
|
e@0
|
719 self.classifiers = {}
|
e@0
|
720 self.name = "HMM-SVM Classifier"
|
e@0
|
721 self.smallname ="svmhmm"
|
e@0
|
722 self.obs = MyAVAClassifier()
|
e@0
|
723 self.chain_size = N
|
e@0
|
724
|
e@0
|
725 def get_params(self, deep=True):
|
e@0
|
726 return {"N":self.chain_size}
|
e@0
|
727
|
e@0
|
728 def set_params(self, **parameters):
|
e@0
|
729 for parameter, value in parameters.items():
|
e@0
|
730 self.setattr(parameter, value)
|
e@0
|
731 return self
|
e@0
|
732
|
e@0
|
733 def getName(self):
|
e@0
|
734 return self.name
|
e@0
|
735
|
e@0
|
736 def score(self, features, parameters):
|
e@0
|
737 predicted_states = self.predict(features)
|
e@0
|
738 return accuracy_score(parameters, predicted_states)
|
e@0
|
739
|
e@0
|
740 def fit(self, X, y):
|
e@0
|
741 self.n_classes = max(unique(y))+1
|
e@0
|
742
|
e@0
|
743 self.obs.fit(X,y)
|
e@0
|
744 self.hmm = HMMsvm(self.obs)
|
e@0
|
745 self.hmm.fit([X],[y])
|
e@0
|
746
|
e@0
|
747 def predict(self, X):
|
e@0
|
748 return self.hmm.predict(X)
|
e@0
|
749
|
e@0
|
750 def confidence(self, x, q):
|
e@0
|
751 return self.hmm.estimate_emission_probability(x, q)
|
e@0
|
752
|
e@0
|
753
|
e@0
|
754 class HmmClassifier(BaseEstimator):
|
e@0
|
755 def __init__(self, N=2,n_components = 1):
|
e@0
|
756 self.name = "HMM (%d time steps, %d components)" % (N, n_components)
|
e@0
|
757 self.smallname = "ghmm"
|
e@0
|
758 self.n_components = n_components
|
e@0
|
759 self.chain_size = N
|
e@0
|
760
|
e@0
|
761
|
e@0
|
762 def get_params(self, deep=True):
|
e@0
|
763 return {"N":self.chain_size, "n_components":self.n_components}
|
e@0
|
764
|
e@0
|
765 def set_params(self, **parameters):
|
e@0
|
766 for parameter, value in parameters.items():
|
e@0
|
767 self.setattr(parameter, value)
|
e@0
|
768 return self
|
e@0
|
769
|
e@0
|
770 def getName(self):
|
e@0
|
771 return self.name
|
e@0
|
772
|
e@0
|
773 def score(self, features, parameters):
|
e@0
|
774 predicted_states = self.predict(features)
|
e@0
|
775 return accuracy_score(parameters, predicted_states)
|
e@0
|
776
|
e@0
|
777 def fit(self, features, parameters):
|
e@0
|
778
|
e@0
|
779 n_classes = max(unique(parameters)) + 1
|
e@0
|
780
|
e@0
|
781 if n_classes == 1:
|
e@0
|
782 self.only_one_class = True
|
e@0
|
783 return
|
e@0
|
784 else:
|
e@0
|
785 self.only_one_class = False
|
e@0
|
786
|
e@0
|
787 hmms = [None]*n_classes
|
e@0
|
788
|
e@0
|
789 chain_size = self.chain_size
|
e@0
|
790 obs = [None]*n_classes
|
e@0
|
791
|
e@0
|
792 for i in range(chain_size, len(parameters)):
|
e@0
|
793 class_ = parameters[i]
|
e@0
|
794 seq = features[i-chain_size:i,:]
|
e@0
|
795
|
e@0
|
796
|
e@0
|
797 if obs[class_] is None:
|
e@0
|
798 obs[class_] = [seq]
|
e@0
|
799 else:
|
e@0
|
800 obs[class_].append(seq)
|
e@0
|
801
|
e@0
|
802
|
e@0
|
803
|
e@0
|
804 for i in range(0, len(obs)):
|
e@0
|
805
|
e@0
|
806 if obs[i] is not None and len(obs[i]) != 0:
|
e@0
|
807 hmm_ = hmm.GaussianHMM(n_components=self.n_components, covariance_type='diag')
|
e@0
|
808 obs_ = concatenate(obs[i])
|
e@0
|
809 hmm_.fit(obs_, [self.chain_size]*len(obs[i]))
|
e@0
|
810
|
e@0
|
811 hmms[i] = hmm_
|
e@0
|
812
|
e@0
|
813 self.hmms = hmms
|
e@0
|
814
|
e@0
|
815 return obs
|
e@0
|
816
|
e@0
|
817 def predict(self, features, mfilt=20):
|
e@0
|
818
|
e@0
|
819 if self.only_one_class == True:
|
e@0
|
820 return zeros((features.shape[0], ))
|
e@0
|
821
|
e@0
|
822 chain_size = self.chain_size
|
e@0
|
823 hmms = self.hmms
|
e@0
|
824 predicted_classes = zeros((features.shape[0],))
|
e@0
|
825
|
e@0
|
826
|
e@0
|
827 for i in range(chain_size, features.shape[0]):
|
e@0
|
828 scores = zeros((len(hmms),))
|
e@0
|
829
|
e@0
|
830 seq = features[i-chain_size:i, :]
|
e@0
|
831
|
e@0
|
832 for j in range(0, len(hmms)):
|
e@0
|
833 if hmms[j] is not None:
|
e@0
|
834 scores[j] = hmms[j].score(seq)
|
e@0
|
835 else:
|
e@0
|
836 scores[j] = -infty
|
e@0
|
837
|
e@0
|
838 predicted_classes[i] = argmax(scores)
|
e@0
|
839
|
e@0
|
840
|
e@0
|
841 return predicted_classes
|
e@0
|
842
|
e@0
|
843
|
e@0
|
844
|
e@0
|
845
|
e@0
|
846 def vector_to_states(X):
|
e@0
|
847 """
|
e@0
|
848 Input: a vector MxN with N samples and M variables
|
e@0
|
849 Output: a codeword dictionary `parameters_alphabet',
|
e@0
|
850 state_seq, inverse `parameters_alphabet_inv' """
|
e@0
|
851
|
e@0
|
852
|
e@0
|
853 parameters_alphabet = {}
|
e@0
|
854 n = 0
|
e@0
|
855
|
e@0
|
856 for i in range(0, X.shape[1]):
|
e@0
|
857 vec = tuple(ravel(X[:,i]))
|
e@0
|
858 if vec not in parameters_alphabet:
|
e@0
|
859 parameters_alphabet[vec] = n
|
e@0
|
860 n += 1
|
e@0
|
861
|
e@0
|
862 parameters_alphabet_inv = dict([(parameters_alphabet[m],m) for m in parameters_alphabet])
|
e@0
|
863
|
e@0
|
864 state_seq = array([parameters_alphabet[tuple(ravel(X[:,m]))] for m in range(0, parameters_vector.shape[1])] )
|
e@0
|
865
|
e@0
|
866 return state_seq, parameters_alphabet, parameters_alphabet_inv
|
e@0
|
867
|
e@0
|
868
|
e@0
|
869 def states_to_vector(predicted, parameters_alphabet_inv):
|
e@0
|
870 estimated = matrix(zeros((len(parameters_alphabet_inv[0]), len(predicted))))
|
e@0
|
871 for i in range(0, len(predicted)):
|
e@0
|
872 # print "[II] Predicted: ", predicted[i]
|
e@0
|
873 estimated[:, i] = matrix(parameters_alphabet_inv[predicted[i]]).T
|
e@0
|
874
|
e@0
|
875 return estimated
|
e@0
|
876
|
e@0
|
877 if __name__=="__main__":
|
e@0
|
878 if len(argv) != 2:
|
e@0
|
879 print "[EE] Wrong number of arguments"
|
e@0
|
880 print "[II] Correct syntax is:"
|
e@0
|
881 print "[II] \t%s <trainingdir>"
|
e@0
|
882 print "[II] where <trainingdir> is the directory containing the features files in the format *_features.yaml"
|
e@0
|
883 print "[II] and the parameters in the format *_parameters.yaml"
|
e@0
|
884 exit(-1)
|
e@0
|
885
|
e@0
|
886 totalstart = time()
|
e@0
|
887
|
e@0
|
888 traindir = argv[1]
|
e@0
|
889
|
e@0
|
890 songs_in_dir = glob("%s/*_features.yaml" % traindir)
|
e@0
|
891
|
e@0
|
892
|
e@0
|
893 # Shuffle to avoid using the same songs in the same sets in cross-validation
|
e@0
|
894
|
e@0
|
895 #random.shuffle(songs_in_dir)
|
e@0
|
896
|
e@0
|
897
|
e@0
|
898 features_vector = None
|
e@0
|
899 parameters_vector = None
|
e@0
|
900 index_vector = None
|
e@0
|
901
|
e@0
|
902
|
e@0
|
903 idx = 0
|
e@0
|
904
|
e@0
|
905 for f_ in songs_in_dir:
|
e@0
|
906 infile = f_
|
e@0
|
907 paramfile = "%s_parameters.yaml" % f_.split('_features.yaml')[0]
|
e@0
|
908
|
e@0
|
909 print "[II] Using features file: %s" % infile
|
e@0
|
910 print "[II] and parameters file: %s" % paramfile
|
e@0
|
911
|
e@0
|
912
|
e@0
|
913 features_pool = YamlInput(filename = infile)()
|
e@0
|
914 parameters_pool = YamlInput(filename = paramfile)()
|
e@0
|
915
|
e@0
|
916 d1 = parameters_pool['parameters.d1']
|
e@0
|
917 da = parameters_pool['parameters.da']
|
e@0
|
918 g1 = parameters_pool['parameters.g1']
|
e@0
|
919 gc = parameters_pool['parameters.gc']
|
e@0
|
920 G = parameters_pool['parameters.G']
|
e@0
|
921
|
e@0
|
922 feature_captions = features_pool.descriptorNames()
|
e@0
|
923 parameter_captions = ['d1','da','g1','gc','G']
|
e@0
|
924
|
e@0
|
925
|
e@0
|
926 for c in features_pool.descriptorNames():
|
e@0
|
927 if c.split('.')[0] == 'metadata' or c.split('.')[0] == 'parameter':
|
e@0
|
928 feature_captions.remove(c)
|
e@0
|
929
|
e@0
|
930
|
e@0
|
931 print "[II] %d Features Available: " % len(feature_captions)
|
e@0
|
932 print str(feature_captions).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7]
|
e@0
|
933
|
e@0
|
934 nfeatures_in = len(feature_captions)
|
e@0
|
935 nparameters_in = 5
|
e@0
|
936 features_vector_one = zeros((nfeatures_in, len(features_pool[feature_captions[0]])))
|
e@0
|
937 index_vector_one = ones((len(features_pool[feature_captions[0]]),))
|
e@0
|
938 parameters_vector_one = ones((nparameters_in, len(features_pool[feature_captions[0]])))
|
e@0
|
939
|
e@0
|
940
|
e@0
|
941 for i in range(0, nfeatures_in):
|
e@0
|
942 features_vector_one[i, :] = features_pool[feature_captions[i]].T
|
e@0
|
943
|
e@0
|
944
|
e@0
|
945 parameters_vector_one[0, :] = d1*parameters_vector_one[0,:]
|
e@0
|
946 parameters_vector_one[1, :] = g1*parameters_vector_one[1,:]
|
e@0
|
947 parameters_vector_one[2, :] = da*parameters_vector_one[2,:]
|
e@0
|
948 parameters_vector_one[3, :] = gc*parameters_vector_one[3,:]
|
e@0
|
949 parameters_vector_one[4, :] = G*parameters_vector_one[4,:]
|
e@0
|
950 # Stores example index number
|
e@0
|
951
|
e@0
|
952 index_vector_one *= idx
|
e@0
|
953
|
e@0
|
954 print "[II] %d parameters used:" % len(parameter_captions)
|
e@0
|
955 print str(parameter_captions).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7].replace('parameter.','')
|
e@0
|
956
|
e@0
|
957 if features_vector is None:
|
e@0
|
958 features_vector = features_vector_one
|
e@0
|
959 else:
|
e@0
|
960 features_vector = append(features_vector, features_vector_one, 1)
|
e@0
|
961
|
e@0
|
962 if parameters_vector is None:
|
e@0
|
963 parameters_vector = parameters_vector_one
|
e@0
|
964 else:
|
e@0
|
965 parameters_vector = append(parameters_vector, parameters_vector_one, 1)
|
e@0
|
966
|
e@0
|
967 if index_vector is None:
|
e@0
|
968 index_vector = index_vector_one
|
e@0
|
969 else:
|
e@0
|
970 index_vector = append(index_vector, index_vector_one)
|
e@0
|
971
|
e@0
|
972
|
e@0
|
973 idx += 1
|
e@0
|
974
|
e@0
|
975 print "[II] Marking silent parts"
|
e@0
|
976 #features_full_nontransformed_train = features_vector.copy()
|
e@0
|
977
|
e@0
|
978 silent_parts = zeros((1, features_vector.shape[1]))
|
e@0
|
979
|
e@0
|
980 rms = features_vector[feature_captions.index('rms'), :]
|
e@0
|
981
|
e@0
|
982 # Implementing Hysteresis Gate -- High threshold is halfway between
|
e@0
|
983 # the mean and the max and Low is halfway between the mean dn the min
|
e@0
|
984
|
e@0
|
985 rms_threshold_mean = mean(rms)
|
e@0
|
986
|
e@0
|
987 rms_threshold_max = max(rms)
|
e@0
|
988 rms_threshold_min = min(rms)
|
e@0
|
989
|
e@0
|
990 rms_threshold_high = 0.1 * rms_threshold_mean
|
e@0
|
991 rms_threshold_low = 0.01 * rms_threshold_mean
|
e@0
|
992
|
e@0
|
993 for n in range(1, len(rms)):
|
e@0
|
994 prev = rms[n-1]
|
e@0
|
995 curr = rms[n]
|
e@0
|
996
|
e@0
|
997 if prev >= rms_threshold_high:
|
e@0
|
998 if curr < rms_threshold_low:
|
e@0
|
999 silent_parts[0,n] = 1
|
e@0
|
1000 else:
|
e@0
|
1001 silent_parts[0,n] = 0
|
e@0
|
1002 elif prev <= rms_threshold_low:
|
e@0
|
1003 if curr > rms_threshold_high:
|
e@0
|
1004 silent_parts[0,n] = 0
|
e@0
|
1005 else:
|
e@0
|
1006 silent_parts[0,n] = 1
|
e@0
|
1007 else:
|
e@0
|
1008 silent_parts[0,n] = silent_parts[0,n-1]
|
e@0
|
1009
|
e@0
|
1010
|
e@0
|
1011 if silent_parts[0,1] == 1:
|
e@0
|
1012 silent_parts[0, 0] = 1
|
e@0
|
1013
|
e@0
|
1014
|
e@0
|
1015 ## START COMMENTING HERE FOR KEEPING ACTIVE PARTS ONLY
|
e@0
|
1016
|
e@0
|
1017 active_index = invert(silent_parts.flatten().astype(bool))
|
e@0
|
1018 # Keep only active parts
|
e@0
|
1019
|
e@0
|
1020 # Uncomment this
|
e@0
|
1021 features_vector = features_vector[:, active_index]
|
e@0
|
1022
|
e@0
|
1023 # Keep this for debugging purposes
|
e@0
|
1024 parameters_vector = parameters_vector[:, active_index]
|
e@0
|
1025
|
e@0
|
1026
|
e@0
|
1027
|
e@0
|
1028
|
e@0
|
1029 ## UP TO HERE
|
e@0
|
1030 parameters_vector[0,:] = (parameters_vector[0,:] - d1_min)/d1_max #d1
|
e@0
|
1031 parameters_vector[1,:] = (parameters_vector[1,:] - g1_min)/g1_max #g1
|
e@0
|
1032 parameters_vector[2,:] = (parameters_vector[2,:] - da_min)/da_max #g1
|
e@0
|
1033 parameters_vector[3,:] = (parameters_vector[3,:] - G_min)/G_max #G
|
e@0
|
1034 parameters_vector[4,:] = (parameters_vector[4,:] - gc_min)/gc_max #gc
|
e@0
|
1035
|
e@0
|
1036 # Store moments
|
e@0
|
1037 moments_vector = zeros((features_vector.shape[0], 2))
|
e@0
|
1038 features_vector_original = features_vector.copy()
|
e@0
|
1039
|
e@0
|
1040 print "[II] Storing moments vector"
|
e@0
|
1041 for i in range(0, features_vector.shape[0]):
|
e@0
|
1042 mean_ = mean(features_vector[i,:])
|
e@0
|
1043 std_ = std(features_vector[i,:], ddof=1)
|
e@0
|
1044 moments_vector[i,0] = mean_
|
e@0
|
1045 moments_vector[i,1] = std_
|
e@0
|
1046
|
e@0
|
1047 features_vector[i,:] = (features_vector[i,:] - mean_)/std_
|
e@0
|
1048
|
e@0
|
1049 features_vector_old_train = features_vector.copy()
|
e@0
|
1050
|
e@0
|
1051
|
e@0
|
1052
|
e@0
|
1053
|
e@0
|
1054
|
e@0
|
1055
|
e@0
|
1056
|
e@0
|
1057 print "[II] Extracting PCA configuration "
|
e@0
|
1058 kernel, q, featurelist = extract_pca_configuration_from_data(features_vector)
|
e@0
|
1059
|
e@0
|
1060 q = kernel.shape[0]
|
e@0
|
1061
|
e@0
|
1062
|
e@0
|
1063 print "[II] Optimal number of PCs to keep: %d" % q
|
e@0
|
1064
|
e@0
|
1065
|
e@0
|
1066
|
e@0
|
1067 feature_captions_array = array(feature_captions)
|
e@0
|
1068 features_to_keep = list(feature_captions_array[featurelist])
|
e@0
|
1069
|
e@0
|
1070
|
e@0
|
1071 print "[II] Decided to keep %d features:" % len(features_to_keep)
|
e@0
|
1072 print str(features_to_keep).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7]
|
e@0
|
1073
|
e@0
|
1074 featurelist_bak = featurelist
|
e@0
|
1075 features_kept_data = features_vector[featurelist,:]
|
e@0
|
1076 features_kept_data_bak = features_kept_data.copy()
|
e@0
|
1077 features_vector = (kernel.T*features_kept_data)[0:q,:]
|
e@0
|
1078 features_vector_new_train = features_vector.copy()
|
e@0
|
1079
|
e@0
|
1080
|
e@0
|
1081 features_vector_unsmoothed = features_vector
|
e@0
|
1082 features_vector = smooth_matrix_1D(features_vector)
|
e@0
|
1083
|
e@0
|
1084
|
e@0
|
1085 # Construct parameters alphabet, each symbol is going to be a different column vector
|
e@0
|
1086 # in parameter code matrix
|
e@0
|
1087
|
e@0
|
1088 parameters_state, parameter_state_alphabet, parameter_state_alphabet_inv = vector_to_states(parameters_vector)
|
e@0
|
1089
|
e@0
|
1090
|
e@0
|
1091
|
e@0
|
1092
|
e@0
|
1093 CV = 10
|
e@0
|
1094 print "[II] %d-fold cross validation" % CV
|
e@0
|
1095
|
e@0
|
1096
|
e@0
|
1097 print "[II] GaussianNB..."
|
e@0
|
1098 gnbc = GaussianNB()
|
e@0
|
1099 predictsGNBC = cross_val_predict(gnbc,features_vector.T,parameters_state,cv=CV)
|
e@0
|
1100 scoresGNBC = f1_score(parameters_state, predictsGNBC, average='weighted')
|
e@0
|
1101 parametersGNBC = states_to_vector(predictsGNBC, parameter_state_alphabet_inv)
|
e@0
|
1102 msesGNBC = mse(parametersGNBC, parameters_vector)
|
e@0
|
1103 print "[II] f1-weighted: %s" % scoresGNBC
|
e@0
|
1104 print "[II] mse: %s" % msesGNBC
|
e@0
|
1105
|
e@0
|
1106 print "[II] o-v-r SVM..."
|
e@0
|
1107 ovrc = LinearSVC(dual=False)
|
e@0
|
1108 predictsOVRC = cross_val_predict(ovrc,features_vector.T,parameters_state,cv=CV)
|
e@0
|
1109 scoresOVRC = f1_score(parameters_state, predictsOVRC, average='weighted')
|
e@0
|
1110 parametersOVRC = states_to_vector(predictsOVRC, parameter_state_alphabet_inv)
|
e@0
|
1111 msesOVRC = mse(parametersOVRC, parameters_vector)
|
e@0
|
1112 print "[II] %s" % scoresOVRC
|
e@0
|
1113 print "[II] mse: %s" % msesOVRC
|
e@0
|
1114
|
e@0
|
1115
|
e@0
|
1116 print "[II] GaussianHMM..."
|
e@0
|
1117 start = time()
|
e@0
|
1118
|
e@0
|
1119
|
e@0
|
1120
|
e@0
|
1121
|
e@0
|
1122 tempfolder = tempfile.mkdtemp()
|
e@0
|
1123
|
e@0
|
1124 _scores_cs = memmap(os.path.join(tempfolder,'scores'), dtype=float,shape=98, mode='w+')
|
e@0
|
1125 _mses_cs = memmap(os.path.join(tempfolder,'mses'), dtype=float,shape=98, mode='w+')
|
e@0
|
1126
|
e@0
|
1127 _chain_sizes = memmap(os.path.join(tempfolder,'cs'), dtype=int,shape=98, mode='w+')
|
e@0
|
1128
|
e@0
|
1129
|
e@0
|
1130 print "[II] Calibrating GaussianHMM please wait..."
|
e@0
|
1131 def iter_(scores, cs, n):
|
e@0
|
1132 _n = n
|
e@0
|
1133
|
e@0
|
1134 try:
|
e@0
|
1135 hmmc = HmmClassifier(N=_n, n_components=1)
|
e@0
|
1136 predictsHMMC = cross_val_predict(hmmc,features_vector.T,parameters_state,cv=CV)
|
e@0
|
1137 # scoresHMMC = cross_val_score(hmmc,features_vector.T,parameters_state,cv=CV, scoring='f1_weighted')
|
e@0
|
1138 scores[n-2] = f1_score(parameters_state, predictsHMMC, average='weighted')
|
e@0
|
1139 parametersHMMC = states_to_vector(predictsHMMC, parameter_state_alphabet_inv)
|
e@0
|
1140 _mses_cs[n-2] = mse(parametersHMMC, parameters_vector)
|
e@0
|
1141
|
e@0
|
1142 except:
|
e@0
|
1143 scores[n-2] = 0
|
e@0
|
1144 cs[n-2] = _n
|
e@0
|
1145
|
e@0
|
1146
|
e@0
|
1147
|
e@0
|
1148
|
e@0
|
1149 print "[II] Calibrating GaussianHMM please wait..."
|
e@0
|
1150
|
e@0
|
1151
|
e@0
|
1152 def iter_(scores, nc, n):
|
e@0
|
1153 try:
|
e@0
|
1154 hmmc = HmmClassifier(N=opt_N, n_components=n)
|
e@0
|
1155 predictsHMMC = cross_val_predict(hmmc,features_vector.T,parameters_state,cv=CV)
|
e@0
|
1156
|
e@0
|
1157 # scoresHMMC = cross_val_score(hmmc,features_vector.T,parameters_state,cv=CV,n_jobs=2, scoring='f1_weighted')
|
e@0
|
1158 scores[n-1] = f1_score(parameters_state, predictsHMMC, average='weighted')
|
e@0
|
1159 parametersHMMC = states_to_vector(predictsHMMC, parameter_state_alphabet_inv)
|
e@0
|
1160 _mses_nc[n-1] = mse(parametersHMMC, parameters_vector)
|
e@0
|
1161 except:
|
e@0
|
1162 scores[n-1] = 0
|
e@0
|
1163 nc[n-1] = n
|
e@0
|
1164
|
e@0
|
1165
|
e@0
|
1166 print "[II] Configuring using cross-validation disabled for performance reasons, please ignore results or uncomment the relevant code."
|
e@0
|
1167
|
e@0
|
1168
|
e@0
|
1169
|
e@0
|
1170
|
e@0
|
1171 finish = time()
|
e@0
|
1172
|
e@0
|
1173
|
e@0
|
1174
|
e@0
|
1175 outp = ""
|
e@0
|
1176
|
e@0
|
1177 outp+= "[II] Results (F1-Scores):\n"
|
e@0
|
1178
|
e@0
|
1179 outp+= "[II] Estimator\tMean\n"
|
e@0
|
1180 outp+= "[II] %s\t%.2f\t%.5f\n" % ("GaussianNB", scoresGNBC, msesGNBC)
|
e@0
|
1181 outp+= "[II] %s\t%.2f\t%.5f\n" % ("O-V-R SVM", scoresOVRC, msesOVRC)
|
e@0
|
1182
|
e@0
|
1183
|
e@0
|
1184
|
e@0
|
1185 totalfinish = time()
|
e@0
|
1186 totalduration = totalfinish - totalstart
|
e@0
|
1187
|
e@0
|
1188 outp+= "[II] Total time: %dm:%ds\n" % (int(totalduration) / 60, int(totalduration) % 60)
|
e@0
|
1189 print outp
|
e@0
|
1190 shutil.rmtree(tempfolder)
|
e@0
|
1191
|
e@0
|
1192 gnbc = NBClassifier()
|
e@0
|
1193 gnbc.fit(features_vector.T, parameters_state)
|
e@0
|
1194
|
e@0
|
1195 outp+= "[GNBC] Testing against full data:\n"
|
e@0
|
1196 predictsGNBC = gnbc.predict(features_vector.T)
|
e@0
|
1197 scoresGNBC = f1_score(parameters_state, predictsGNBC, average='weighted')
|
e@0
|
1198 parametersGNBC = states_to_vector(predictsGNBC, parameter_state_alphabet_inv)
|
e@0
|
1199 msesGNBC = mse(parametersGNBC, parameters_vector)
|
e@0
|
1200 outp+= "[II] f1-weighted: %s\n" % scoresGNBC
|
e@0
|
1201 outp+= "[II] mse: %s\n" % msesGNBC
|
e@0
|
1202
|
e@0
|
1203
|
e@0
|
1204
|
e@0
|
1205
|
e@0
|
1206
|
e@0
|
1207 ovrc = SVMClassifier()
|
e@0
|
1208 ovrc.fit(features_vector.T, parameters_state)
|
e@0
|
1209
|
e@0
|
1210 outp+= "[OVRC ] Testing against full data:\n"
|
e@0
|
1211 predictsOVRC = ovrc.predict(features_vector.T)
|
e@0
|
1212 scoresOVRC = f1_score(parameters_state, predictsOVRC , average='weighted')
|
e@0
|
1213 parametersOVRC = states_to_vector(predictsOVRC , parameter_state_alphabet_inv)
|
e@0
|
1214 msesOVRC = mse(parametersOVRC , parameters_vector)
|
e@0
|
1215 outp+= "[II] f1-weighted: %s\n" % scoresOVRC
|
e@0
|
1216 outp+= "[II] mse: %s\n" % msesOVRC
|
e@0
|
1217
|
e@0
|
1218
|
e@0
|
1219
|
e@0
|
1220 model_gnb = ReverbModel("Naive Bayes Classifier Model", kernel, q, features_to_keep, gnbc, parameter_dictionary=parameter_state_alphabet_inv, moments_vector=moments_vector )
|
e@0
|
1221 model_svm = ReverbModel("OVR LinearSVM Classifier Model", kernel, q, features_to_keep, ovrc, parameter_dictionary=parameter_state_alphabet_inv, moments_vector=moments_vector )
|
e@0
|
1222
|
e@0
|
1223
|
e@0
|
1224 model_gnb.save("model_gnb.dat")
|
e@0
|
1225 model_svm.save("model_svm.dat")
|
e@0
|
1226
|
e@0
|
1227
|
e@0
|
1228 print outp
|
e@0
|
1229 f = open('results.txt','w')
|
e@0
|
1230 f.write(outp)
|
e@0
|
1231 f.close()
|