comparison experiment-reverb/code/cmodels.py @ 0:246d5546657c

initial commit, needs cleanup
author Emmanouil Theofanis Chourdakis <e.t.chourdakis@qmul.ac.uk>
date Wed, 14 Dec 2016 13:15:48 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:246d5546657c
1 #!/usr/bin/python2
2 # -*- coding: utf-8 -*-
3
4 """
5 Classifier models
6 @author: Emmanouil Theofanis Chourdakis
7 """
8
9
10 # Imports needed
11
12 from hmmlearn.hmm import GMM
13 from hmmlearn import hmm
14
15 from sklearn import svm
16 import copy as cp
17
18 from scipy.misc import logsumexp
19 import pickle
20
21 from collections import Counter
22
23 class HMMsvm:
24 def __init__(self, svm_):
25 self.svm = svm_
26
27
28 # self.svm = MyAVAClassifier()
29
30
31 def fit(self, features_list, states, flr=1e-13):
32 # def fit(self, features_list, flr=1e-13):
33
34 # TODO: Concatenate features, train
35 #self.svm.fit(X, states, flr)
36 #self.prior = self.svm.prior
37 #self.transmat = self.svm.transmat
38
39
40 features = None
41
42 for f in features_list:
43 if features is None:
44 features = f
45 else:
46 features = append(features, f, 0)
47
48 fullstates = None
49 # T = features.shape[0]
50 for s in states:
51 if fullstates is None:
52 fullstates = s
53 else:
54 fullstates = append(fullstates, s)
55
56
57
58
59
60
61 self.n_classes = self.svm.n_classes
62 n_classes = self.n_classes
63
64 # print fullstates, shape(fullstates)
65
66 h = histogram(fullstates, n_classes)[0].astype(float)
67 self.prior = h/sum(h)
68
69 # Add a very small probability for random jump
70
71 self.prior += flr
72 self.prior = self.prior/sum(self.prior)
73
74 transmat = zeros((n_classes, n_classes))
75 transitions = zeros((n_classes, ))
76 for s in states:
77 for i in range(1, len(s)):
78 prev = s[i-1]
79 cur = s[i]
80 transmat[prev,cur] += 1
81 transitions[prev] += 1
82
83 transitions[transitions == 0] = 1
84
85 for i in range(0, transmat.shape[0]):
86 transmat[i,:] = transmat[i,:]/transitions[i]
87
88 self.transmat = transmat
89
90 # Add a very small probability for random jump to avoid zero values
91
92 self.transmat += flr
93
94 for i in range(0, self.transmat.shape[0]):
95 self.transmat[i,:] = self.transmat[i,:]/sum(self.transmat[i,:])
96
97
98 A = zeros((n_classes, n_classes))
99
100 Aold = self.transmat
101
102 while mse(A,Aold)>10*size(A)*flr:
103 Aold = A.copy()
104 A = zeros((n_classes, n_classes))
105 transitions = zeros((n_classes, ))
106
107 for i in range(0,len(features_list)):
108 f = features_list[i]
109 # print "FEATURES:", f
110 # print f
111 s,p = self.logviterbi(f)
112 # print s
113 for j in range(1, len(s)):
114 prev = s[j-1]
115 cur = s[j]
116 A[prev,cur] += 1
117 transitions[prev] += 1
118
119 transitions[transitions == 0] = 1
120
121
122 for i in range(0, A.shape[0]):
123 A[i,:] = A[i,:]/transitions[i]
124
125 A += flr
126
127
128
129 self.transmat = A
130
131 for i in range(0, A.shape[0]):
132 if sum(A[i,:]) > 0:
133 A[i,:] = A[i,:]/sum(A[i,:])
134
135
136
137 # print observations
138
139
140 def estimate_emission_probability(self, x, q):
141 post = self.svm.estimate_posterior_probability_vector(x)
142 # print "post: ", post
143 prior = self.prior
144 # print "prior: ", prior
145 p = post/prior
146 p = p/sum(p)
147
148 return p[q]
149
150 # def estimate_emission_probability(self, x, q):
151 # return self.svm.estimate_emission_probability(q, x)
152
153
154 def predict(self, X):
155 return self.logviterbi(X)[0]
156
157
158 def logviterbi(self, X):
159 # Number of states
160
161 N = self.n_classes
162
163 # Number of observations
164
165
166
167 T = X.shape[0]
168
169
170
171 transmat = self.transmat
172
173 #1. Initalization
174
175 a1 = self.prior
176
177 delta = zeros((T,N))
178 psi = zeros((T,N))
179
180 for i in range(0, N):
181 delta[0, i] = log(self.transmat[0,i]) + log(self.estimate_emission_probability(X[0,:],i))
182
183
184 #2. Recursion
185
186 for t in range(1, T):
187 # delta_old = delta.copy()
188 x = X[t, :]
189 for j in range(0, N):
190 # print "j: %d, S" % j, delta_old+log(transmat[:,j])
191 delta[t,j] = max(delta[t-1,:] + log(transmat[:,j])) + log(self.estimate_emission_probability(x,j))
192 psi[t,j] = argmax(delta[t-1,:] + log(transmat[:,j]))
193
194 # print "t: %d, delta: " % t, delta
195 # print "t: %d, psi:" % t, psi
196
197
198 # 3. Termination
199
200 p_star = max(delta[T-1,:] + log(transmat[:,N-1]))
201 q_star = argmax(delta[T-1,:] + log(transmat[:, N-1]))
202
203
204 # 4. Backtracking
205
206 q = zeros((T,))
207 p = zeros((T,))
208
209 q[-1] = q_star
210 p[-1] = p_star
211 for t in range(1, T):
212 q[-t-1] = psi[-t, q[-t]]
213 p[-t-1] = delta[-t, q[-t]]
214
215
216 return q,p
217
218 def viterbi(self, X):
219 # Number of states
220
221 N = self.n_classes
222
223 # Number of observations
224
225 T = X.shape[0]
226
227 transmat = self.transmat
228
229 #1. Initialization
230
231 a1 = self.prior
232
233 delta = zeros((N, ))
234 psi = zeros((N, ))
235
236 for i in range(0, N):
237 delta[i] = a1[i]*self.estimate_emission_probability(X[0,:],i)
238
239
240
241
242
243 #2. Recursion
244
245 for t in range(1, T):
246 delta_old = delta.copy()
247 x = X[t,:]
248 for j in range(0, N):
249 delta[j] = max(delta_old*transmat[:,j])*self.estimate_emission_probability(x, j)
250 psi[j] = argmax(delta_old*transmat[:,j])
251
252 #3. Termination
253
254 p_star = max(delta*transmat[:,N-1])
255 q_star = argmax(delta*transmat[:,N-1])
256
257
258
259 #4. Backtracking
260
261 q = zeros((T,))
262 q[-1] = q_star
263
264 for t in range(1, T):
265 q[-t-1] = psi[q[-t]]
266
267 return q
268
269
270 def _log_likelihood_matrix(self, X):
271 N = self.n_classes
272 ll = zeros((X.shape[0], N))
273
274 for i in range(0, X.shape[0]):
275 ll[i,:] = self._forward(i, X)
276
277 return ll
278
279 def compute_ksus(self, X):
280 N = self.n_classes
281 T = X.shape[0]
282 print "[II] Computing gammas... "
283
284 gammas = self._forward_backward(X)
285
286 # Initialize
287
288 aij = self.transmat
289
290
291
292
293
294
295 def _not_quite_ksu(self, t, i, j, X):
296 alpha = exp(self.forward(X[0:t+1,:]))[i]
297 beta = exp(self.backward(X[t+1:,:]))[j]
298
299 aij = self.transmat[i,j]
300 bj = self.estimate_emission_probability(X[t+1,:], j)
301
302 return alpha*beta*aij*bj
303
304 def _ksu(self, t, i, j, X):
305 alphaT = exp(self.forward(X[0:t+1,:]))[0]
306
307 return self._not_quite_ksu(t,i,j,X)
308
309
310 def _forward_backward(self, X):
311 T = X.shape[0]
312 N = self.n_classes
313 fv = zeros((T, N))
314 sv = zeros((T, N))
315
316 b = None
317 for t in range(1, T+1):
318
319 fv[t-1,:] = self._forward_message(fv[t-2,:], X[0:t, ])
320
321 for t in range(1, T+1):
322 b = self._backward_message(b, X[-t: , :])
323 sv[-t,:] = lognorm(fv[t-1,:]*b)
324
325 return sv
326
327
328 def _backward(self, t, X):
329 N = self.n_classes
330 a = ones((N,))/N
331
332 beta = zeros((N, ))
333 transmat = self.transmat
334 T = X.shape[0]
335 x = X[t, :]
336 if t == T-1:
337 beta = log(a)
338
339 return beta
340 else:
341 tosum = zeros((N, ))
342 bt = self._backward(t+1, X)
343 btnew = zeros((N, ))
344 # print bt
345 for j in range(0, N):
346 for i in range(0, N):
347 # print log(self.estimate_emission_probability(x, j)), bt[i], log(transmat[i,j])
348 tosum[i] = log(self.estimate_emission_probability(x, j)) + bt[i] + log(transmat[i,j])
349 # print tosum
350
351 btnew[j] = logsumexp(tosum)
352 btnew = lognorm(btnew)
353 return btnew
354
355
356 def score(self, X):
357 return self.forward(X)
358
359 def forward(self, X):
360 T = X.shape[0]
361 f = None
362 for t in range(1, T+1):
363 f = self._forward_message(f, X[0:t, :])
364
365 return f
366
367 def backward(self, X):
368 T = X.shape[0]
369 b = None
370 for t in range(1,T+1):
371 # print t
372 # print t,b
373 idx = arange(-t,T)
374 # print idx
375 b = self._backward_message(b, X[-t:, :])
376
377 return b
378
379 def _backward_message(self, b, X):
380 N = self.n_classes
381
382
383 beta = zeros((N, ))
384 transmat = self.transmat
385
386 x = X[0, :]
387
388 if X.shape[0] == 1:
389 beta = log(ones((N,))/N)
390 return beta
391 else:
392 tosum = zeros((N, ))
393 btnew = zeros((N, ))
394 for j in range(0, N):
395 for i in range(0, N):
396 tosum[i] = log(self.estimate_emission_probability(x,j)) + b[i] + log(transmat[i,j])
397
398 btnew[j] = logsumexp(tosum)
399 # btnew = lognorm(btnew)
400 return btnew
401
402 def _forward_message(self, f, X):
403 N = self.n_classes
404 a = self.prior
405
406 alpha = zeros((N, ))
407 transmat = self.transmat
408
409 x = X[-1, :]
410
411 if X.shape[0] == 1:
412 for i in range(0, N):
413 alpha[i] = log(a[i]) + log(self.estimate_emission_probability(x, i))
414 # alpha = lognorm(alpha)
415 return alpha
416
417 else:
418 tosum = zeros((N,))
419 ftnew = zeros((N,))
420 for j in range(0, N):
421 for i in range(0, N):
422 tosum[i] = f[i] + log(transmat[i,j])
423 Sum = logsumexp(tosum)
424 bj = self.estimate_emission_probability(x, j)
425 ftnew[j] = Sum+log(bj)
426
427 # ftnew = lognorm(ftnew)
428 return ftnew
429
430 def _forward(self, t, X):
431 N = self.n_classes
432 a = self.prior
433 # print a
434 alpha = zeros((N, ))
435 transmat = self.transmat
436 x = X[t, :]
437
438 if t == 0:
439 for i in range(0, N):
440 alpha[i] = log(a[i]) + log(self.estimate_emission_probability(x, i))
441 # print "--"
442 # print alpha
443 alpha = lognorm(alpha)
444 # print alpha
445 # print "xx"
446 return alpha
447 else:
448 tosum = zeros((N, ))
449 ft = self._forward(t-1, X)
450 ftnew = zeros((N, ))
451 for j in range(0, N):
452 for i in range(0, N):
453 # print ft
454 tosum[i] = ft[i] + log(transmat[i,j])
455
456 Sum = logsumexp(tosum)
457 bj = self.estimate_emission_probability(x, j)
458 ftnew[j] = Sum+log(bj)
459 ftnew = lognorm(ftnew)
460
461 return ftnew