Mercurial > hg > chourdakisreiss2016
comparison experiment-reverb/code/cmodels.py @ 0:246d5546657c
initial commit, needs cleanup
author | Emmanouil Theofanis Chourdakis <e.t.chourdakis@qmul.ac.uk> |
---|---|
date | Wed, 14 Dec 2016 13:15:48 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:246d5546657c |
---|---|
1 #!/usr/bin/python2 | |
2 # -*- coding: utf-8 -*- | |
3 | |
4 """ | |
5 Classifier models | |
6 @author: Emmanouil Theofanis Chourdakis | |
7 """ | |
8 | |
9 | |
10 # Imports needed | |
11 | |
12 from hmmlearn.hmm import GMM | |
13 from hmmlearn import hmm | |
14 | |
15 from sklearn import svm | |
16 import copy as cp | |
17 | |
18 from scipy.misc import logsumexp | |
19 import pickle | |
20 | |
21 from collections import Counter | |
22 | |
23 class HMMsvm: | |
24 def __init__(self, svm_): | |
25 self.svm = svm_ | |
26 | |
27 | |
28 # self.svm = MyAVAClassifier() | |
29 | |
30 | |
31 def fit(self, features_list, states, flr=1e-13): | |
32 # def fit(self, features_list, flr=1e-13): | |
33 | |
34 # TODO: Concatenate features, train | |
35 #self.svm.fit(X, states, flr) | |
36 #self.prior = self.svm.prior | |
37 #self.transmat = self.svm.transmat | |
38 | |
39 | |
40 features = None | |
41 | |
42 for f in features_list: | |
43 if features is None: | |
44 features = f | |
45 else: | |
46 features = append(features, f, 0) | |
47 | |
48 fullstates = None | |
49 # T = features.shape[0] | |
50 for s in states: | |
51 if fullstates is None: | |
52 fullstates = s | |
53 else: | |
54 fullstates = append(fullstates, s) | |
55 | |
56 | |
57 | |
58 | |
59 | |
60 | |
61 self.n_classes = self.svm.n_classes | |
62 n_classes = self.n_classes | |
63 | |
64 # print fullstates, shape(fullstates) | |
65 | |
66 h = histogram(fullstates, n_classes)[0].astype(float) | |
67 self.prior = h/sum(h) | |
68 | |
69 # Add a very small probability for random jump | |
70 | |
71 self.prior += flr | |
72 self.prior = self.prior/sum(self.prior) | |
73 | |
74 transmat = zeros((n_classes, n_classes)) | |
75 transitions = zeros((n_classes, )) | |
76 for s in states: | |
77 for i in range(1, len(s)): | |
78 prev = s[i-1] | |
79 cur = s[i] | |
80 transmat[prev,cur] += 1 | |
81 transitions[prev] += 1 | |
82 | |
83 transitions[transitions == 0] = 1 | |
84 | |
85 for i in range(0, transmat.shape[0]): | |
86 transmat[i,:] = transmat[i,:]/transitions[i] | |
87 | |
88 self.transmat = transmat | |
89 | |
90 # Add a very small probability for random jump to avoid zero values | |
91 | |
92 self.transmat += flr | |
93 | |
94 for i in range(0, self.transmat.shape[0]): | |
95 self.transmat[i,:] = self.transmat[i,:]/sum(self.transmat[i,:]) | |
96 | |
97 | |
98 A = zeros((n_classes, n_classes)) | |
99 | |
100 Aold = self.transmat | |
101 | |
102 while mse(A,Aold)>10*size(A)*flr: | |
103 Aold = A.copy() | |
104 A = zeros((n_classes, n_classes)) | |
105 transitions = zeros((n_classes, )) | |
106 | |
107 for i in range(0,len(features_list)): | |
108 f = features_list[i] | |
109 # print "FEATURES:", f | |
110 # print f | |
111 s,p = self.logviterbi(f) | |
112 # print s | |
113 for j in range(1, len(s)): | |
114 prev = s[j-1] | |
115 cur = s[j] | |
116 A[prev,cur] += 1 | |
117 transitions[prev] += 1 | |
118 | |
119 transitions[transitions == 0] = 1 | |
120 | |
121 | |
122 for i in range(0, A.shape[0]): | |
123 A[i,:] = A[i,:]/transitions[i] | |
124 | |
125 A += flr | |
126 | |
127 | |
128 | |
129 self.transmat = A | |
130 | |
131 for i in range(0, A.shape[0]): | |
132 if sum(A[i,:]) > 0: | |
133 A[i,:] = A[i,:]/sum(A[i,:]) | |
134 | |
135 | |
136 | |
137 # print observations | |
138 | |
139 | |
140 def estimate_emission_probability(self, x, q): | |
141 post = self.svm.estimate_posterior_probability_vector(x) | |
142 # print "post: ", post | |
143 prior = self.prior | |
144 # print "prior: ", prior | |
145 p = post/prior | |
146 p = p/sum(p) | |
147 | |
148 return p[q] | |
149 | |
150 # def estimate_emission_probability(self, x, q): | |
151 # return self.svm.estimate_emission_probability(q, x) | |
152 | |
153 | |
154 def predict(self, X): | |
155 return self.logviterbi(X)[0] | |
156 | |
157 | |
158 def logviterbi(self, X): | |
159 # Number of states | |
160 | |
161 N = self.n_classes | |
162 | |
163 # Number of observations | |
164 | |
165 | |
166 | |
167 T = X.shape[0] | |
168 | |
169 | |
170 | |
171 transmat = self.transmat | |
172 | |
173 #1. Initalization | |
174 | |
175 a1 = self.prior | |
176 | |
177 delta = zeros((T,N)) | |
178 psi = zeros((T,N)) | |
179 | |
180 for i in range(0, N): | |
181 delta[0, i] = log(self.transmat[0,i]) + log(self.estimate_emission_probability(X[0,:],i)) | |
182 | |
183 | |
184 #2. Recursion | |
185 | |
186 for t in range(1, T): | |
187 # delta_old = delta.copy() | |
188 x = X[t, :] | |
189 for j in range(0, N): | |
190 # print "j: %d, S" % j, delta_old+log(transmat[:,j]) | |
191 delta[t,j] = max(delta[t-1,:] + log(transmat[:,j])) + log(self.estimate_emission_probability(x,j)) | |
192 psi[t,j] = argmax(delta[t-1,:] + log(transmat[:,j])) | |
193 | |
194 # print "t: %d, delta: " % t, delta | |
195 # print "t: %d, psi:" % t, psi | |
196 | |
197 | |
198 # 3. Termination | |
199 | |
200 p_star = max(delta[T-1,:] + log(transmat[:,N-1])) | |
201 q_star = argmax(delta[T-1,:] + log(transmat[:, N-1])) | |
202 | |
203 | |
204 # 4. Backtracking | |
205 | |
206 q = zeros((T,)) | |
207 p = zeros((T,)) | |
208 | |
209 q[-1] = q_star | |
210 p[-1] = p_star | |
211 for t in range(1, T): | |
212 q[-t-1] = psi[-t, q[-t]] | |
213 p[-t-1] = delta[-t, q[-t]] | |
214 | |
215 | |
216 return q,p | |
217 | |
218 def viterbi(self, X): | |
219 # Number of states | |
220 | |
221 N = self.n_classes | |
222 | |
223 # Number of observations | |
224 | |
225 T = X.shape[0] | |
226 | |
227 transmat = self.transmat | |
228 | |
229 #1. Initialization | |
230 | |
231 a1 = self.prior | |
232 | |
233 delta = zeros((N, )) | |
234 psi = zeros((N, )) | |
235 | |
236 for i in range(0, N): | |
237 delta[i] = a1[i]*self.estimate_emission_probability(X[0,:],i) | |
238 | |
239 | |
240 | |
241 | |
242 | |
243 #2. Recursion | |
244 | |
245 for t in range(1, T): | |
246 delta_old = delta.copy() | |
247 x = X[t,:] | |
248 for j in range(0, N): | |
249 delta[j] = max(delta_old*transmat[:,j])*self.estimate_emission_probability(x, j) | |
250 psi[j] = argmax(delta_old*transmat[:,j]) | |
251 | |
252 #3. Termination | |
253 | |
254 p_star = max(delta*transmat[:,N-1]) | |
255 q_star = argmax(delta*transmat[:,N-1]) | |
256 | |
257 | |
258 | |
259 #4. Backtracking | |
260 | |
261 q = zeros((T,)) | |
262 q[-1] = q_star | |
263 | |
264 for t in range(1, T): | |
265 q[-t-1] = psi[q[-t]] | |
266 | |
267 return q | |
268 | |
269 | |
270 def _log_likelihood_matrix(self, X): | |
271 N = self.n_classes | |
272 ll = zeros((X.shape[0], N)) | |
273 | |
274 for i in range(0, X.shape[0]): | |
275 ll[i,:] = self._forward(i, X) | |
276 | |
277 return ll | |
278 | |
279 def compute_ksus(self, X): | |
280 N = self.n_classes | |
281 T = X.shape[0] | |
282 print "[II] Computing gammas... " | |
283 | |
284 gammas = self._forward_backward(X) | |
285 | |
286 # Initialize | |
287 | |
288 aij = self.transmat | |
289 | |
290 | |
291 | |
292 | |
293 | |
294 | |
295 def _not_quite_ksu(self, t, i, j, X): | |
296 alpha = exp(self.forward(X[0:t+1,:]))[i] | |
297 beta = exp(self.backward(X[t+1:,:]))[j] | |
298 | |
299 aij = self.transmat[i,j] | |
300 bj = self.estimate_emission_probability(X[t+1,:], j) | |
301 | |
302 return alpha*beta*aij*bj | |
303 | |
304 def _ksu(self, t, i, j, X): | |
305 alphaT = exp(self.forward(X[0:t+1,:]))[0] | |
306 | |
307 return self._not_quite_ksu(t,i,j,X) | |
308 | |
309 | |
310 def _forward_backward(self, X): | |
311 T = X.shape[0] | |
312 N = self.n_classes | |
313 fv = zeros((T, N)) | |
314 sv = zeros((T, N)) | |
315 | |
316 b = None | |
317 for t in range(1, T+1): | |
318 | |
319 fv[t-1,:] = self._forward_message(fv[t-2,:], X[0:t, ]) | |
320 | |
321 for t in range(1, T+1): | |
322 b = self._backward_message(b, X[-t: , :]) | |
323 sv[-t,:] = lognorm(fv[t-1,:]*b) | |
324 | |
325 return sv | |
326 | |
327 | |
328 def _backward(self, t, X): | |
329 N = self.n_classes | |
330 a = ones((N,))/N | |
331 | |
332 beta = zeros((N, )) | |
333 transmat = self.transmat | |
334 T = X.shape[0] | |
335 x = X[t, :] | |
336 if t == T-1: | |
337 beta = log(a) | |
338 | |
339 return beta | |
340 else: | |
341 tosum = zeros((N, )) | |
342 bt = self._backward(t+1, X) | |
343 btnew = zeros((N, )) | |
344 # print bt | |
345 for j in range(0, N): | |
346 for i in range(0, N): | |
347 # print log(self.estimate_emission_probability(x, j)), bt[i], log(transmat[i,j]) | |
348 tosum[i] = log(self.estimate_emission_probability(x, j)) + bt[i] + log(transmat[i,j]) | |
349 # print tosum | |
350 | |
351 btnew[j] = logsumexp(tosum) | |
352 btnew = lognorm(btnew) | |
353 return btnew | |
354 | |
355 | |
356 def score(self, X): | |
357 return self.forward(X) | |
358 | |
359 def forward(self, X): | |
360 T = X.shape[0] | |
361 f = None | |
362 for t in range(1, T+1): | |
363 f = self._forward_message(f, X[0:t, :]) | |
364 | |
365 return f | |
366 | |
367 def backward(self, X): | |
368 T = X.shape[0] | |
369 b = None | |
370 for t in range(1,T+1): | |
371 # print t | |
372 # print t,b | |
373 idx = arange(-t,T) | |
374 # print idx | |
375 b = self._backward_message(b, X[-t:, :]) | |
376 | |
377 return b | |
378 | |
379 def _backward_message(self, b, X): | |
380 N = self.n_classes | |
381 | |
382 | |
383 beta = zeros((N, )) | |
384 transmat = self.transmat | |
385 | |
386 x = X[0, :] | |
387 | |
388 if X.shape[0] == 1: | |
389 beta = log(ones((N,))/N) | |
390 return beta | |
391 else: | |
392 tosum = zeros((N, )) | |
393 btnew = zeros((N, )) | |
394 for j in range(0, N): | |
395 for i in range(0, N): | |
396 tosum[i] = log(self.estimate_emission_probability(x,j)) + b[i] + log(transmat[i,j]) | |
397 | |
398 btnew[j] = logsumexp(tosum) | |
399 # btnew = lognorm(btnew) | |
400 return btnew | |
401 | |
402 def _forward_message(self, f, X): | |
403 N = self.n_classes | |
404 a = self.prior | |
405 | |
406 alpha = zeros((N, )) | |
407 transmat = self.transmat | |
408 | |
409 x = X[-1, :] | |
410 | |
411 if X.shape[0] == 1: | |
412 for i in range(0, N): | |
413 alpha[i] = log(a[i]) + log(self.estimate_emission_probability(x, i)) | |
414 # alpha = lognorm(alpha) | |
415 return alpha | |
416 | |
417 else: | |
418 tosum = zeros((N,)) | |
419 ftnew = zeros((N,)) | |
420 for j in range(0, N): | |
421 for i in range(0, N): | |
422 tosum[i] = f[i] + log(transmat[i,j]) | |
423 Sum = logsumexp(tosum) | |
424 bj = self.estimate_emission_probability(x, j) | |
425 ftnew[j] = Sum+log(bj) | |
426 | |
427 # ftnew = lognorm(ftnew) | |
428 return ftnew | |
429 | |
430 def _forward(self, t, X): | |
431 N = self.n_classes | |
432 a = self.prior | |
433 # print a | |
434 alpha = zeros((N, )) | |
435 transmat = self.transmat | |
436 x = X[t, :] | |
437 | |
438 if t == 0: | |
439 for i in range(0, N): | |
440 alpha[i] = log(a[i]) + log(self.estimate_emission_probability(x, i)) | |
441 # print "--" | |
442 # print alpha | |
443 alpha = lognorm(alpha) | |
444 # print alpha | |
445 # print "xx" | |
446 return alpha | |
447 else: | |
448 tosum = zeros((N, )) | |
449 ft = self._forward(t-1, X) | |
450 ftnew = zeros((N, )) | |
451 for j in range(0, N): | |
452 for i in range(0, N): | |
453 # print ft | |
454 tosum[i] = ft[i] + log(transmat[i,j]) | |
455 | |
456 Sum = logsumexp(tosum) | |
457 bj = self.estimate_emission_probability(x, j) | |
458 ftnew[j] = Sum+log(bj) | |
459 ftnew = lognorm(ftnew) | |
460 | |
461 return ftnew |