e@0
|
1 #!/usr/bin/python2
|
e@0
|
2 # -*- coding: utf-8 -*-
|
e@0
|
3 """
|
e@0
|
4 Created on Thu Apr 23 11:53:17 2015
|
e@0
|
5
|
e@0
|
6 @author: mmxgn
|
e@0
|
7 """
|
e@0
|
8
|
e@0
|
9 # This file does the cluster estimation and the removal of outliers
|
e@0
|
10
|
e@0
|
11 from sys import argv, exit
|
e@0
|
12 from essentia.standard import YamlInput, YamlOutput
|
e@0
|
13 from essentia import Pool
|
e@0
|
14 from pca import *
|
e@0
|
15
|
e@0
|
16 from numpy import *
|
e@0
|
17 from sklearn import cluster
|
e@0
|
18 from sklearn.metrics import pairwise_distances
|
e@0
|
19 from sklearn.cluster import KMeans, MiniBatchKMeans
|
e@0
|
20 from matplotlib.pyplot import *
|
e@0
|
21 #from sklearn.mixture import GMM
|
e@0
|
22 from sklearn.naive_bayes import GaussianNB, MultinomialNB
|
e@0
|
23 from scipy.signal import decimate
|
e@0
|
24 from sklearn import cross_validation
|
e@0
|
25 from sklearn import multiclass
|
e@0
|
26
|
e@0
|
27 #from hmmlearn import hmm
|
e@0
|
28 from hmmlearn.hmm import GMM
|
e@0
|
29 from hmmlearn import hmm
|
e@0
|
30
|
e@0
|
31 from sklearn import svm
|
e@0
|
32 import copy as cp
|
e@0
|
33
|
e@0
|
34 from scipy.misc import logsumexp
|
e@0
|
35 import pickle
|
e@0
|
36 #from adpcm import adm, adm_reconstruct
|
e@0
|
37
|
e@0
|
38
|
e@0
|
39 mse = lambda A,B: ((array(A)-array(B)) ** 2).mean()
|
e@0
|
40
|
e@0
|
41 lognorm = lambda A: A - logsumexp(A)
|
e@0
|
42
|
e@0
|
43 #logsum = lambda X: logsumexp(log(X))
|
e@0
|
44
|
e@0
|
45
|
e@0
|
46 def logsum(X):
|
e@0
|
47 if len(X) == 1:
|
e@0
|
48 return log(X[0])
|
e@0
|
49 else:
|
e@0
|
50 return logaddexp(log(X[0]),logsum(X[1:]))
|
e@0
|
51
|
e@0
|
52
|
e@0
|
53
|
e@0
|
54
|
e@0
|
55 def smooth_matrix_1D(X):
|
e@0
|
56 window = scipy.signal.gaussian(51,4)
|
e@0
|
57 window = window/sum(window)
|
e@0
|
58 intermx = zeros((X.shape[0],X.shape[1]+100))
|
e@0
|
59 intermx[:, 50:-50] = X
|
e@0
|
60
|
e@0
|
61 for m in range(0, X.shape[0]):
|
e@0
|
62 # print intermx.shape
|
e@0
|
63 intermx[m,:] = convolve(ravel(intermx[m,:]), window,'same')
|
e@0
|
64
|
e@0
|
65 return intermx[:,50:-50]
|
e@0
|
66
|
e@0
|
67 def adm_reconstruct(codeword, h, dmin=.01, dmax=.28):
|
e@0
|
68 x = zeros((1, codeword.shape[1]))
|
e@0
|
69
|
e@0
|
70 delta1 = dmin
|
e@0
|
71 delta2 = dmin
|
e@0
|
72 Sum = h
|
e@0
|
73
|
e@0
|
74 x[0] = h
|
e@0
|
75 for i in range(0, codeword.shape[1]):
|
e@0
|
76 if codeword[0,i] == 0:
|
e@0
|
77 delta1 = dmin
|
e@0
|
78 delta2 = dmin
|
e@0
|
79
|
e@0
|
80 elif codeword[0,i] == 1:
|
e@0
|
81 delta2 = dmin
|
e@0
|
82 Sum += delta1
|
e@0
|
83 delta1 *= 2
|
e@0
|
84 if delta1 > dmax:
|
e@0
|
85 delta1 = dmax
|
e@0
|
86
|
e@0
|
87 elif codeword[0,i] == -1:
|
e@0
|
88 delta1 = dmin
|
e@0
|
89 Sum -= delta2
|
e@0
|
90 delta2 *= 2
|
e@0
|
91 if delta2 > dmax:
|
e@0
|
92 delta2 = dmax
|
e@0
|
93 x[0,i] = Sum
|
e@0
|
94 return x
|
e@0
|
95
|
e@0
|
96 def adm(x, dmin=.01, dmax=.28, tol=0.0001):
|
e@0
|
97
|
e@0
|
98 # Adaptive delta modulation adapted by code:
|
e@0
|
99 # (adeltamod.m)
|
e@0
|
100 #
|
e@0
|
101 # Adaptive Delta Modulator
|
e@0
|
102 # by Gandhar Desai (gdesai)
|
e@0
|
103 # BITS Pilani Goa Campus
|
e@0
|
104 # Date: 28 Sept, 2013
|
e@0
|
105
|
e@0
|
106 xsig = x
|
e@0
|
107
|
e@0
|
108 Lx = len(x)
|
e@0
|
109
|
e@0
|
110 ADMout = zeros((1, Lx))
|
e@0
|
111 codevec = zeros((1, Lx))
|
e@0
|
112
|
e@0
|
113
|
e@0
|
114 Sum = x[0]
|
e@0
|
115 delta1 = dmin
|
e@0
|
116 delta2 = dmin
|
e@0
|
117 mult1 = 2
|
e@0
|
118 mult2 = 2
|
e@0
|
119 for i in range(0, Lx):
|
e@0
|
120 #print abs(xsig[i] - Sum)
|
e@0
|
121 if (abs(xsig[i] - Sum) < tol):
|
e@0
|
122 bit = 0
|
e@0
|
123 delta2 = dmin
|
e@0
|
124 delta1 = dmin
|
e@0
|
125
|
e@0
|
126
|
e@0
|
127 elif (xsig[i] >= Sum):
|
e@0
|
128 bit = 1
|
e@0
|
129 delta2 = dmin
|
e@0
|
130 Sum += delta1
|
e@0
|
131 delta1 *= mult1
|
e@0
|
132 if delta1 > dmax:
|
e@0
|
133 delta1 = dmax
|
e@0
|
134
|
e@0
|
135
|
e@0
|
136 else:
|
e@0
|
137 bit = -1
|
e@0
|
138 delta1 = dmin
|
e@0
|
139 Sum -= delta2
|
e@0
|
140 delta2 *= mult2
|
e@0
|
141 if delta2 > dmax:
|
e@0
|
142 delta2 = dmax
|
e@0
|
143
|
e@0
|
144
|
e@0
|
145
|
e@0
|
146 ADMout[0, i] = Sum
|
e@0
|
147 codevec[0, i]= bit
|
e@0
|
148
|
e@0
|
149 return ADMout,codevec, x[0]
|
e@0
|
150
|
e@0
|
151 def median_filter(v, K):
|
e@0
|
152 v2 = zeros((len(v),))
|
e@0
|
153 for i in range(K, len(v)):
|
e@0
|
154 seq = v[i-K:i]
|
e@0
|
155 m = median(seq)
|
e@0
|
156 v2[i-K:i] = m
|
e@0
|
157
|
e@0
|
158 return v2
|
e@0
|
159
|
e@0
|
160
|
e@0
|
161
|
e@0
|
162
|
e@0
|
163 from models import ReverbModel
|
e@0
|
164
|
e@0
|
165
|
e@0
|
166
|
e@0
|
167 if __name__=="__main__":
|
e@0
|
168 if len(argv) != 2:
|
e@0
|
169 print "[EE] Wrong number of arguments"
|
e@0
|
170 print "[II] Correct syntax is:"
|
e@0
|
171 print "[II] \t%s <trainingdir>"
|
e@0
|
172 print "[II] where <trainingdir> is the directory containing the features files in the format *_features.yaml"
|
e@0
|
173 print "[II] and the parameters in the format *_parameters.yaml"
|
e@0
|
174 exit(-1)
|
e@0
|
175
|
e@0
|
176
|
e@0
|
177 n_clusters = 3
|
e@0
|
178 UpsamplingFactor = 1
|
e@0
|
179 dmin = 0.001
|
e@0
|
180 dmax = 0.28
|
e@0
|
181 tol = 0.001
|
e@0
|
182
|
e@0
|
183 d1min = 0.01
|
e@0
|
184 d1max = 0.1
|
e@0
|
185
|
e@0
|
186 g1min = 0.01
|
e@0
|
187 g1max = 0.99
|
e@0
|
188
|
e@0
|
189 damin = 0.006
|
e@0
|
190 damax = 0.012
|
e@0
|
191
|
e@0
|
192 Gmin = 0.01
|
e@0
|
193 Gmax = 0.99
|
e@0
|
194
|
e@0
|
195 gcmin = 0.01
|
e@0
|
196 gcmax = 0.99
|
e@0
|
197
|
e@0
|
198 # For searching the directory
|
e@0
|
199 from glob import glob
|
e@0
|
200
|
e@0
|
201 traindir = argv[1]
|
e@0
|
202
|
e@0
|
203 songs_in_dir = glob("%s/*_features.yaml" % traindir)
|
e@0
|
204
|
e@0
|
205 print "[II] Using files: %s" % songs_in_dir
|
e@0
|
206
|
e@0
|
207
|
e@0
|
208 chain_length=1
|
e@0
|
209
|
e@0
|
210
|
e@0
|
211 # asdsd
|
e@0
|
212
|
e@0
|
213 # fullfeatures_pool = Pool()
|
e@0
|
214
|
e@0
|
215 features_vector = None
|
e@0
|
216 parameters_vector = None
|
e@0
|
217 index_vector = None
|
e@0
|
218
|
e@0
|
219 idx = 0
|
e@0
|
220
|
e@0
|
221 for f_ in songs_in_dir:
|
e@0
|
222 infile = f_
|
e@0
|
223 paramfile = "%s_parameters.yaml" % f_.split('_features.yaml')[0]
|
e@0
|
224
|
e@0
|
225 print "[II] Using features file: %s" % infile
|
e@0
|
226 print "[II] and parameters file: %s" % paramfile
|
e@0
|
227
|
e@0
|
228
|
e@0
|
229 # paramfile = infile.split(')
|
e@0
|
230
|
e@0
|
231 features_pool = YamlInput(filename = infile)()
|
e@0
|
232 parameters_pool = YamlInput(filename = paramfile)()
|
e@0
|
233
|
e@0
|
234 d1 = parameters_pool['parameters.d1']
|
e@0
|
235 da = parameters_pool['parameters.da']
|
e@0
|
236 g1 = parameters_pool['parameters.g1']
|
e@0
|
237 gc = parameters_pool['parameters.gc']
|
e@0
|
238 G = parameters_pool['parameters.G']
|
e@0
|
239
|
e@0
|
240
|
e@0
|
241
|
e@0
|
242
|
e@0
|
243
|
e@0
|
244 feature_captions = features_pool.descriptorNames()
|
e@0
|
245 parameter_captions = ['d1','da','g1','gc','G']
|
e@0
|
246
|
e@0
|
247
|
e@0
|
248 svmhmmstr = ""
|
e@0
|
249
|
e@0
|
250
|
e@0
|
251 for c in features_pool.descriptorNames():
|
e@0
|
252 if c.split('.')[0] == 'metadata' or c.split('.')[0] == 'parameter':
|
e@0
|
253 feature_captions.remove(c)
|
e@0
|
254
|
e@0
|
255
|
e@0
|
256 # close('all')
|
e@0
|
257
|
e@0
|
258 # print "[II] Loaded training data from %s (%s) " % (infile, features_pool['metadata.date'][0])
|
e@0
|
259 print "[II] %d Features Available: " % len(feature_captions)
|
e@0
|
260
|
e@0
|
261
|
e@0
|
262
|
e@0
|
263 print str(feature_captions).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7]
|
e@0
|
264
|
e@0
|
265 nfeatures_in = len(feature_captions)
|
e@0
|
266 nparameters_in = 5
|
e@0
|
267 features_vector_one = zeros((nfeatures_in, len(features_pool[feature_captions[0]])))
|
e@0
|
268 index_vector_one = ones((len(features_pool[feature_captions[0]]),))
|
e@0
|
269 parameters_vector_one = ones((nparameters_in, len(features_pool[feature_captions[0]])))
|
e@0
|
270 # paramet
|
e@0
|
271
|
e@0
|
272
|
e@0
|
273 for i in range(0, nfeatures_in):
|
e@0
|
274 features_vector_one[i, :] = features_pool[feature_captions[i]].T
|
e@0
|
275 # for i in range(0, nparameters_in):
|
e@0
|
276 # parameters_vector[i, :] = features_pool[parameter_captions[0]].T
|
e@0
|
277
|
e@0
|
278 parameters_vector_one[0, :] = d1*parameters_vector_one[0,:]
|
e@0
|
279 parameters_vector_one[1, :] = g1*parameters_vector_one[1,:]
|
e@0
|
280 parameters_vector_one[2, :] = da*parameters_vector_one[2,:]
|
e@0
|
281 parameters_vector_one[3, :] = gc*parameters_vector_one[3,:]
|
e@0
|
282 parameters_vector_one[4, :] = G*parameters_vector_one[4,:]
|
e@0
|
283
|
e@0
|
284 # Stores example index number
|
e@0
|
285
|
e@0
|
286 index_vector_one *= idx
|
e@0
|
287
|
e@0
|
288
|
e@0
|
289
|
e@0
|
290
|
e@0
|
291
|
e@0
|
292
|
e@0
|
293
|
e@0
|
294
|
e@0
|
295
|
e@0
|
296 print "[II] %d parameters used:" % len(parameter_captions)
|
e@0
|
297 print str(parameter_captions).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7].replace('parameter.','')
|
e@0
|
298
|
e@0
|
299 if features_vector is None:
|
e@0
|
300 features_vector = features_vector_one
|
e@0
|
301 else:
|
e@0
|
302 features_vector = append(features_vector, features_vector_one, 1)
|
e@0
|
303
|
e@0
|
304 if parameters_vector is None:
|
e@0
|
305 parameters_vector = parameters_vector_one
|
e@0
|
306 else:
|
e@0
|
307 parameters_vector = append(parameters_vector, parameters_vector_one, 1)
|
e@0
|
308
|
e@0
|
309 if index_vector is None:
|
e@0
|
310 index_vector = index_vector_one
|
e@0
|
311 else:
|
e@0
|
312 index_vector = append(index_vector, index_vector_one)
|
e@0
|
313
|
e@0
|
314
|
e@0
|
315 idx += 1
|
e@0
|
316
|
e@0
|
317
|
e@0
|
318 print "[II] Marking silent parts"
|
e@0
|
319 features_full_nontransformed_train = features_vector.copy()
|
e@0
|
320 # silent_parts = zeros((1, len(features_pool[feature_captions[i]].T)))
|
e@0
|
321
|
e@0
|
322 silent_parts = zeros((1, features_vector.shape[1]))
|
e@0
|
323
|
e@0
|
324 rms = features_vector[feature_captions.index('rms'), :]
|
e@0
|
325
|
e@0
|
326 # Implementing Hysteresis Gate -- High threshold is halfway between
|
e@0
|
327 # the mean and the max and Low is halfway between the mean dn the min
|
e@0
|
328
|
e@0
|
329 rms_threshold_mean = mean(rms)
|
e@0
|
330
|
e@0
|
331 rms_threshold_max = max(rms)
|
e@0
|
332 rms_threshold_min = min(rms)
|
e@0
|
333
|
e@0
|
334 rms_threshold_high = 0.1 * rms_threshold_mean
|
e@0
|
335 rms_threshold_low = 0.01 * rms_threshold_mean
|
e@0
|
336
|
e@0
|
337 for n in range(1, len(rms)):
|
e@0
|
338 prev = rms[n-1]
|
e@0
|
339 curr = rms[n]
|
e@0
|
340
|
e@0
|
341 if prev >= rms_threshold_high:
|
e@0
|
342 if curr < rms_threshold_low:
|
e@0
|
343 silent_parts[0,n] = 1
|
e@0
|
344 else:
|
e@0
|
345 silent_parts[0,n] = 0
|
e@0
|
346 elif prev <= rms_threshold_low:
|
e@0
|
347 if curr > rms_threshold_high:
|
e@0
|
348 silent_parts[0,n] = 0
|
e@0
|
349 else:
|
e@0
|
350 silent_parts[0,n] = 1
|
e@0
|
351 else:
|
e@0
|
352 silent_parts[0,n] = silent_parts[0,n-1]
|
e@0
|
353
|
e@0
|
354
|
e@0
|
355 if silent_parts[0,1] == 1:
|
e@0
|
356 silent_parts[0, 0] = 1
|
e@0
|
357
|
e@0
|
358
|
e@0
|
359
|
e@0
|
360 active_index = invert(silent_parts.flatten().astype(bool))
|
e@0
|
361
|
e@0
|
362 # Keep only active parts
|
e@0
|
363
|
e@0
|
364
|
e@0
|
365 # Uncomment this
|
e@0
|
366 # features_vector = features_vector[:, active_index]
|
e@0
|
367
|
e@0
|
368 # Keep this for debugging purposes
|
e@0
|
369
|
e@0
|
370
|
e@0
|
371 # parameters_vector = parameters_vector[:, active_index]
|
e@0
|
372 # index_vector = index_vector[active_index]
|
e@0
|
373
|
e@0
|
374 # Label examples for a chain of chain_length
|
e@0
|
375
|
e@0
|
376 # idx = 0
|
e@0
|
377 # for i in range(0, len(index_vector)):
|
e@0
|
378 # index_vector[i] = idx
|
e@0
|
379 # if i % chain_length == 0:
|
e@0
|
380 # idx += 1
|
e@0
|
381 # number_of_examples = idx
|
e@0
|
382
|
e@0
|
383
|
e@0
|
384
|
e@0
|
385 # Scale parameters to [0,1]
|
e@0
|
386
|
e@0
|
387
|
e@0
|
388 parameters_vector[0,:] = (parameters_vector[0,:] - d1min)/d1max #d1
|
e@0
|
389 parameters_vector[1,:] = (parameters_vector[1,:] - g1min)/g1max #g1
|
e@0
|
390 parameters_vector[2,:] = (parameters_vector[2,:] - damin)/damax #g1
|
e@0
|
391 parameters_vector[3,:] = (parameters_vector[3,:] - Gmin)/Gmax #G
|
e@0
|
392 parameters_vector[4,:] = (parameters_vector[4,:] - gcmin)/gcmax #gc
|
e@0
|
393
|
e@0
|
394
|
e@0
|
395
|
e@0
|
396
|
e@0
|
397
|
e@0
|
398
|
e@0
|
399
|
e@0
|
400 moments_vector = zeros((features_vector.shape[0], 2))
|
e@0
|
401
|
e@0
|
402 features_vector_original = features_vector.copy()
|
e@0
|
403
|
e@0
|
404
|
e@0
|
405
|
e@0
|
406 print "[II] Storing moments vector"
|
e@0
|
407 for i in range(0, features_vector.shape[0]):
|
e@0
|
408 mean_ = mean(features_vector[i,:])
|
e@0
|
409 std_ = std(features_vector[i,:], ddof=1)
|
e@0
|
410 moments_vector[i,0] = mean_
|
e@0
|
411 moments_vector[i,1] = std_
|
e@0
|
412
|
e@0
|
413 features_vector[i,:] = (features_vector[i,:] - mean_)/std_
|
e@0
|
414
|
e@0
|
415 features_vector_old_train = features_vector.copy()
|
e@0
|
416 # moments_vector_train = moments_vector
|
e@0
|
417
|
e@0
|
418
|
e@0
|
419 print "[II] Extracting PCA configuration "
|
e@0
|
420
|
e@0
|
421 kernel, q, featurelist = extract_pca_configuration_from_data(features_vector)
|
e@0
|
422
|
e@0
|
423 q = q + 1
|
e@0
|
424
|
e@0
|
425 print "[II] Optimal number of PCs to keep: %d" % q
|
e@0
|
426
|
e@0
|
427
|
e@0
|
428
|
e@0
|
429
|
e@0
|
430 feature_captions_array = array(feature_captions)
|
e@0
|
431
|
e@0
|
432 features_to_keep = list(feature_captions_array[featurelist])
|
e@0
|
433 print "[II] Decided to keep %d features:" % len(features_to_keep)
|
e@0
|
434 print str(features_to_keep).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7]
|
e@0
|
435
|
e@0
|
436
|
e@0
|
437 features_kept_data = features_vector[featurelist,:]
|
e@0
|
438
|
e@0
|
439 features_vector_old_train = features_kept_data
|
e@0
|
440 features_vector = (kernel.T*features_kept_data)[0:q,:]
|
e@0
|
441 features_vector_new_train = features_vector
|
e@0
|
442
|
e@0
|
443
|
e@0
|
444 # example_features = None
|
e@0
|
445 # example_parameters = None
|
e@0
|
446 # example_idx = None
|
e@0
|
447 #
|
e@0
|
448 # for idx in range(0, shape(parameters_vector)[1]-chain_length):
|
e@0
|
449 # example_features_ = features_vector[:, idx:idx+chain_length]
|
e@0
|
450 # # print example_features_
|
e@0
|
451 # example_parameters_ = parameters_vector[:, idx:idx+chain_length]
|
e@0
|
452 # example_idx_ = ones((shape(example_parameters_)[1],))
|
e@0
|
453 #
|
e@0
|
454 # if example_features is None:
|
e@0
|
455 # example_features = example_features_
|
e@0
|
456 # else:
|
e@0
|
457 # #print "appending"
|
e@0
|
458 # example_features = append(example_features, example_features_, 1)
|
e@0
|
459 #
|
e@0
|
460 # if example_parameters is None:
|
e@0
|
461 # example_parameters = example_parameters_
|
e@0
|
462 # else:
|
e@0
|
463 # example_parameters = append(example_parameters, example_parameters_, 1)
|
e@0
|
464 #
|
e@0
|
465 # if example_idx is None:
|
e@0
|
466 # example_idx = example_idx_*idx
|
e@0
|
467 # else:
|
e@0
|
468 # example_idx = append(example_idx, example_idx_*idx, 1)
|
e@0
|
469 #
|
e@0
|
470 #
|
e@0
|
471 #
|
e@0
|
472 #
|
e@0
|
473 # features_vector = example_features
|
e@0
|
474 # parameters_vector = example_parameters
|
e@0
|
475 # index_vector = example_idx
|
e@0
|
476
|
e@0
|
477 print "[II] Trying ADM-coded parameters"
|
e@0
|
478 print "[II] Upsampling features and parameters by a factor of %d" % UpsamplingFactor
|
e@0
|
479
|
e@0
|
480
|
e@0
|
481 # Upsampled features and parameters
|
e@0
|
482 features_vector_upsampled = smooth_matrix_1D(repeat(features_vector,UpsamplingFactor, axis=1))
|
e@0
|
483
|
e@0
|
484 # feature_labels_upsampled = repeat(features_clustering_labels,UpsamplingFactor, axis=0)
|
e@0
|
485 parameters_vector_upsampled = repeat(parameters_vector,UpsamplingFactor, axis=1)
|
e@0
|
486
|
e@0
|
487
|
e@0
|
488 # km = KMeans(init='k-means++', n_init=10, max_iter=300, tol=0.0000001, verbose = 0).fit(parameters_vector_upsampled.T)
|
e@0
|
489 # centers_ = km.cluster_centers_
|
e@0
|
490 # labels__ = km.labels_
|
e@0
|
491 # # Remove the following two lines in order to restore non-kmeans version
|
e@0
|
492 # parameters_vector_kmeans_upsampled = array(centers_[labels__])
|
e@0
|
493 #
|
e@0
|
494 # parameters_vector_upsampled = parameters_vector_kmeans_upsampled.T
|
e@0
|
495 #
|
e@0
|
496 #
|
e@0
|
497 # parameters_vector_upsampled = smooth_matrix_1D(parameters_vector_upsampled)
|
e@0
|
498
|
e@0
|
499 parameters_vector_upsampled_adm = matrix(zeros(shape(parameters_vector_upsampled)))
|
e@0
|
500 parameters_vector_upsampled_code = matrix(zeros(shape(parameters_vector_upsampled)))
|
e@0
|
501 parameters_vector_upsampled_firstval = matrix(zeros((parameters_vector_upsampled.shape[0],1)))
|
e@0
|
502
|
e@0
|
503 # Reconstructed parameters
|
e@0
|
504
|
e@0
|
505 parameters_vector_upsampled_reconstructed = matrix(zeros(shape(parameters_vector_upsampled)))
|
e@0
|
506
|
e@0
|
507
|
e@0
|
508
|
e@0
|
509
|
e@0
|
510
|
e@0
|
511 def adm_matrix(X, dmin=0.001,dmax=0.28,tol=0.001):
|
e@0
|
512
|
e@0
|
513 out = matrix(zeros(shape(X)))
|
e@0
|
514 code = matrix(zeros(shape(X)))
|
e@0
|
515 firstval = matrix(zeros((X.shape[0], 1)))
|
e@0
|
516
|
e@0
|
517 for i in range(0, X.shape[0]):
|
e@0
|
518 out[i,:], code[i,:], firstval[i,0] = adm(X[i,:],dmin=dmin,dmax=dmax,tol=tol)
|
e@0
|
519
|
e@0
|
520 return out,code,firstval
|
e@0
|
521
|
e@0
|
522 # parameters_vector_upsampled_reconstructed[i,:] = adm_reconstruct(parameters_vector_upsampled_code[i,:],parameters_vector_upsampled_firstval[i,0], dmin=dmin,dmax=dmax)
|
e@0
|
523
|
e@0
|
524 def adm_matrix_reconstruct(code, firstval, dmin=0.001, dmax=0.28):
|
e@0
|
525 X = matrix(zeros(shape(code)))
|
e@0
|
526 for i in range(0, code.shape[0]):
|
e@0
|
527 X[i,:] = adm_reconstruct(code[i,:], firstval[i,0], dmin=dmin, dmax=dmax)
|
e@0
|
528
|
e@0
|
529 return X
|
e@0
|
530
|
e@0
|
531
|
e@0
|
532 parameters_vector_upsampled_adm, parameters_vector_upsampled_code, parameters_vector_upsampled_firstval = adm_matrix(parameters_vector_upsampled, dmin, dmax, tol)
|
e@0
|
533
|
e@0
|
534
|
e@0
|
535 def diff_and_pad(X):
|
e@0
|
536 return concatenate((
|
e@0
|
537 zeros((
|
e@0
|
538 shape(X)[0],
|
e@0
|
539 1
|
e@0
|
540 )),
|
e@0
|
541 diff(X, axis=1)),
|
e@0
|
542 axis=1)
|
e@0
|
543
|
e@0
|
544
|
e@0
|
545 # Change for adm,
|
e@0
|
546
|
e@0
|
547 # parameters_vector_upsampled = parameters_vector_upsampled_code
|
e@0
|
548 print "[II] Clustering features."
|
e@0
|
549 #
|
e@0
|
550 features_clustering = GMM(n_components = n_clusters, covariance_type='diag')
|
e@0
|
551 #
|
e@0
|
552 features_clustering.fit( features_vector_upsampled.T, y=parameters_vector_upsampled_code)
|
e@0
|
553 #
|
e@0
|
554 features_clustering_means = features_clustering.means_
|
e@0
|
555 features_clustering_labels = features_clustering.predict(features_vector_upsampled.T)
|
e@0
|
556 features_clustering_sigmas = features_clustering.covars_
|
e@0
|
557 #
|
e@0
|
558 features_vector_upsampled_estimated = zeros(shape(features_vector_upsampled))
|
e@0
|
559 #
|
e@0
|
560 #
|
e@0
|
561 for n in range(0, len(features_vector_upsampled_estimated[0])):
|
e@0
|
562 features_vector_upsampled_estimated[:,n] = features_clustering_means[features_clustering_labels[n]]
|
e@0
|
563 #
|
e@0
|
564 #
|
e@0
|
565 print "[II] Features MSE for %d clusters: %.3f" % (n_clusters, mse(features_vector_upsampled, features_vector_upsampled_estimated))
|
e@0
|
566
|
e@0
|
567
|
e@0
|
568
|
e@0
|
569 def happy_curve_classification(data, classes, estimator, Nd=1):
|
e@0
|
570 print "[II] Generating Happy Curve "
|
e@0
|
571 from copy import deepcopy
|
e@0
|
572 estimator_fulldata = deepcopy(estimator)
|
e@0
|
573 estimator_fulldata.fit(data, classes)
|
e@0
|
574 labels = estimator.predict(data)
|
e@0
|
575
|
e@0
|
576 # 1. Split data in two, training and testing
|
e@0
|
577
|
e@0
|
578 Ntr = int(round(data.shape[0]/2)) # Training dataset size
|
e@0
|
579 Nts = data.shape[0] - Ntr # Testing dataset size
|
e@0
|
580
|
e@0
|
581 ratios = []
|
e@0
|
582
|
e@0
|
583 for n in range(Nd, Ntr):
|
e@0
|
584 train, test, trainl, testl = cross_validation.train_test_split(data, classes, test_size = 0.5, random_state = 0)
|
e@0
|
585 train = train[0:n,:]
|
e@0
|
586 trainl = trainl[0:n]
|
e@0
|
587 # print "trainl"
|
e@0
|
588 # print trainl
|
e@0
|
589 estimator_ = deepcopy(estimator)
|
e@0
|
590 estimator_.fit(train,trainl)
|
e@0
|
591 labels = estimator_.predict(test)
|
e@0
|
592
|
e@0
|
593 ratio = sum(array(testl==labels).astype(float))/len(labels)
|
e@0
|
594
|
e@0
|
595 ratios.append(ratio)
|
e@0
|
596
|
e@0
|
597
|
e@0
|
598 return ratios
|
e@0
|
599
|
e@0
|
600
|
e@0
|
601 def cross_validate_classification(data, classes, estimator):
|
e@0
|
602 print "[II] Crossvalidating... "
|
e@0
|
603 from copy import deepcopy
|
e@0
|
604 estimator_fulldata = deepcopy(estimator)
|
e@0
|
605 estimator_fulldata.fit(data, classes)
|
e@0
|
606
|
e@0
|
607 percents = arange(0.1,0.9,0.1)
|
e@0
|
608 MSEs = []
|
e@0
|
609 labels = estimator.predict(data)
|
e@0
|
610
|
e@0
|
611 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels))
|
e@0
|
612
|
e@0
|
613 for p in percents:
|
e@0
|
614 train,test,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=0)
|
e@0
|
615 estimator_ = deepcopy(estimator)
|
e@0
|
616 estimator_.fit(train, trainlabels)
|
e@0
|
617 labels = estimator_.predict(test)
|
e@0
|
618 print "[II] for training(%.2f)-testing(%.2f): %.2f" % ((1-p),p,sum(array(testlabels==labels).astype(float))/len(labels))
|
e@0
|
619
|
e@0
|
620 return MSEs
|
e@0
|
621
|
e@0
|
622 def cross_validate_clustering(data, estimator):
|
e@0
|
623 print "[II] Crossvalidating... "
|
e@0
|
624 estimator_fulldata = estimator
|
e@0
|
625 estimator_fulldata.fit(data)
|
e@0
|
626
|
e@0
|
627 # labels = estimator_fulldata.predict(data)
|
e@0
|
628 means = estimator_fulldata.means_
|
e@0
|
629 # print means
|
e@0
|
630
|
e@0
|
631 percents = arange(0.1,0.6,0.1)
|
e@0
|
632 MSEs = []
|
e@0
|
633 reconstructed = zeros(shape(data))
|
e@0
|
634 labels = estimator.predict(data)
|
e@0
|
635 for n in range(0, len(reconstructed)):
|
e@0
|
636 reconstructed[n,:] = means[labels[n]]
|
e@0
|
637
|
e@0
|
638 MSEs.append(mse(data,reconstructed))
|
e@0
|
639 for p in percents:
|
e@0
|
640 train,test = cross_validation.train_test_split(data,test_size=p,random_state=0)
|
e@0
|
641 train = matrix(train)
|
e@0
|
642 test = matrix(test)
|
e@0
|
643
|
e@0
|
644 estimator.fit(train)
|
e@0
|
645 means = estimator.means_
|
e@0
|
646 labels = estimator.predict(test)
|
e@0
|
647 reconstructed = zeros(shape(test))
|
e@0
|
648 for n in range(0, len(reconstructed)):
|
e@0
|
649 reconstructed[n,:] = means[labels[n]]
|
e@0
|
650
|
e@0
|
651 m = mse(test,reconstructed)
|
e@0
|
652
|
e@0
|
653 print "[II] MSE for clustering crossvalidated data %.2f-%.2f: %.5f" % ((1-p), p, m)
|
e@0
|
654 MSEs.append(m)
|
e@0
|
655
|
e@0
|
656 print "[II] Crossvalidation complete"
|
e@0
|
657
|
e@0
|
658 return MSEs
|
e@0
|
659
|
e@0
|
660
|
e@0
|
661
|
e@0
|
662
|
e@0
|
663 # Construct parameters alphabet, each symbol is going to be a different column vector
|
e@0
|
664 # in parameter code matrix
|
e@0
|
665
|
e@0
|
666
|
e@0
|
667 def vector_to_states(X):
|
e@0
|
668 """
|
e@0
|
669 Input: a vector MxN with N samples and M variables
|
e@0
|
670 Output: a codeword dictionary `parameters_alphabet',
|
e@0
|
671 state_seq, inverse `parameters_alphabet_inv' """
|
e@0
|
672
|
e@0
|
673
|
e@0
|
674 parameters_alphabet = {}
|
e@0
|
675 n = 0
|
e@0
|
676
|
e@0
|
677 for i in range(0, X.shape[1]):
|
e@0
|
678 vec = tuple(ravel(X[:,i]))
|
e@0
|
679 if vec not in parameters_alphabet:
|
e@0
|
680 parameters_alphabet[vec] = n
|
e@0
|
681 n += 1
|
e@0
|
682
|
e@0
|
683 parameters_alphabet_inv = dict([(parameters_alphabet[m],m) for m in parameters_alphabet])
|
e@0
|
684
|
e@0
|
685 state_seq = array([parameters_alphabet[tuple(ravel(X[:,m]))] for m in range(0, parameters_vector_upsampled_code.shape[1])] )
|
e@0
|
686
|
e@0
|
687 return state_seq, parameters_alphabet, parameters_alphabet_inv
|
e@0
|
688
|
e@0
|
689
|
e@0
|
690 def states_to_vector(predicted, parameters_alphabet_inv):
|
e@0
|
691 estimated = matrix(zeros((len(parameters_alphabet_inv[0]), len(predicted))))
|
e@0
|
692 for i in range(0, len(predicted)):
|
e@0
|
693 # print "[II] Predicted: ", predicted[i]
|
e@0
|
694 estimated[:, i] = matrix(parameters_alphabet_inv[predicted[i]]).T
|
e@0
|
695
|
e@0
|
696 return estimated
|
e@0
|
697
|
e@0
|
698 # state_seq, parameters_alphabet, parameters_alphabet_inv = vector_to_states(parameters_vector_upsampled_code)
|
e@0
|
699
|
e@0
|
700
|
e@0
|
701 # parameters_change_variable = matrix(diff_and_pad(parameters_vector_upsampled)!=0).astype(int)
|
e@0
|
702
|
e@0
|
703 # changes_state_seq, changes_parameters_alphabet, changes_parameters_alphabet_inv = vector_to_states(parameters_change_variable)
|
e@0
|
704
|
e@0
|
705
|
e@0
|
706 # This is an hmm that just codes the changes"
|
e@0
|
707 # We have only two states, change and stay the same.
|
e@0
|
708
|
e@0
|
709 # Uncomment that here
|
e@0
|
710
|
e@0
|
711 # parameters_vector_upsampled = parameters_vector_upsampled_code
|
e@0
|
712 # parameters_state_orig, parameter_state_alphabet_orig, parameter_state_alphabet_inv_orig = vector_to_states(parameters_vector)
|
e@0
|
713 # TODO: HA
|
e@0
|
714 # parmaeters_vector_upsampled = parameters_vector_upsampled_code
|
e@0
|
715 parameters_state, parameter_state_alphabet, parameter_state_alphabet_inv = vector_to_states(parameters_vector_upsampled)
|
e@0
|
716
|
e@0
|
717 # asdasdasd
|
e@0
|
718 print "[II] Testing Gaussian Naive Bayes Classifier"
|
e@0
|
719 gnb = GaussianNB()
|
e@0
|
720 gnb.fit(features_vector_upsampled.T, parameters_state)
|
e@0
|
721
|
e@0
|
722
|
e@0
|
723
|
e@0
|
724 parameters_state_estimated = gnb.predict(features_vector_upsampled.T)
|
e@0
|
725
|
e@0
|
726 output = states_to_vector(parameters_state_estimated, parameter_state_alphabet_inv)
|
e@0
|
727
|
e@0
|
728 figure()
|
e@0
|
729 subplot(211)
|
e@0
|
730 plot(parameters_vector_upsampled.T)
|
e@0
|
731 title('Parameter value upsampled by a factor of %d' % UpsamplingFactor)
|
e@0
|
732 ylabel('value')
|
e@0
|
733 xlabel('frame #')
|
e@0
|
734 subplot(212)
|
e@0
|
735 #plot(smooth_matrix_1D(output).T)
|
e@0
|
736 plot(output.T)
|
e@0
|
737 ylabel('value')
|
e@0
|
738 xlabel('frame #')
|
e@0
|
739
|
e@0
|
740 errorvector_nbc = [mse(output[:,i], parameters_vector_upsampled[:,i]) for i in range(0, len(parameters_state_estimated))]
|
e@0
|
741
|
e@0
|
742
|
e@0
|
743
|
e@0
|
744
|
e@0
|
745 #cross_validate_classification(features_vector_upsampled.T, parameters_state, gnb)
|
e@0
|
746 # hc = happy_curve_classification(features_vector_upsampled.T, parameters_state, gnb)
|
e@0
|
747
|
e@0
|
748 #
|
e@0
|
749 # figure()
|
e@0
|
750 # plot(hc)
|
e@0
|
751 # figure()
|
e@0
|
752
|
e@0
|
753 print "[II] Trying Multinomial HMM"
|
e@0
|
754
|
e@0
|
755 # In order to do classification with HMMs, we need to:
|
e@0
|
756 # 1. Split the parameters into classes
|
e@0
|
757 # 2. Train one model per class
|
e@0
|
758 # 3. Feed our data to all the models
|
e@0
|
759 # 4. Check which has a better score and assig,n to M
|
e@0
|
760 class SVMClassifier:
|
e@0
|
761 def __init__(self,**kwargs):
|
e@0
|
762 print "[II] SVM Classifier "
|
e@0
|
763 # self.clf = svm.SVC(**kwargs)
|
e@0
|
764 self.name = "SVM"
|
e@0
|
765 self.clf = multiclass.OneVsOneClassifier(svm.SVC(**kwargs) )
|
e@0
|
766
|
e@0
|
767 def fit(self, X, classes):
|
e@0
|
768 n_classes = max(unique(classes))+1
|
e@0
|
769
|
e@0
|
770 self.clf.fit(X,classes)
|
e@0
|
771
|
e@0
|
772 def predict(self, X):
|
e@0
|
773 return self.clf.predict(X)
|
e@0
|
774
|
e@0
|
775 def getName(self):
|
e@0
|
776 return self.name
|
e@0
|
777
|
e@0
|
778 def cross_validate(self, data, classes):
|
e@0
|
779 print "[II] SVN Classifier Crossvalidating... "
|
e@0
|
780 from copy import deepcopy
|
e@0
|
781 estimator = deepcopy(self)
|
e@0
|
782 estimator_fulldata = deepcopy(self)
|
e@0
|
783 estimator_fulldata.fit(data, classes)
|
e@0
|
784
|
e@0
|
785 percents = arange(0.1,0.9,0.1)
|
e@0
|
786 self.percents = percents * 100
|
e@0
|
787
|
e@0
|
788 RATIOS = []
|
e@0
|
789 labels = estimator.predict(data)
|
e@0
|
790
|
e@0
|
791
|
e@0
|
792 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels))
|
e@0
|
793
|
e@0
|
794 for p in percents:
|
e@0
|
795 traindata,testdata,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=1)
|
e@0
|
796
|
e@0
|
797
|
e@0
|
798 # print "[II] [%.2f-%.2f] Examples used for training: %s" % (p, (1-p), str(traindata))
|
e@0
|
799 # print "[II] [%.2f-%.2f] Examples used for testing: %s" % (p, (1-p), str(testdata))
|
e@0
|
800 # Training phase
|
e@0
|
801
|
e@0
|
802
|
e@0
|
803
|
e@0
|
804 estimator_ = deepcopy(estimator)
|
e@0
|
805 estimator_.fit(traindata, trainlabels)
|
e@0
|
806
|
e@0
|
807 predicted_labels = estimator_.predict(testdata)
|
e@0
|
808
|
e@0
|
809 m = sum(array(predicted_labels==testlabels).astype(float))/len(testlabels)
|
e@0
|
810
|
e@0
|
811 print "[II] [%.2f-%.2f] 5-fold cross-validation for SVN Classifier: %.1f" % (p, (1-p), m)
|
e@0
|
812
|
e@0
|
813 RATIOS.append(m)
|
e@0
|
814
|
e@0
|
815 return RATIOS,percents
|
e@0
|
816
|
e@0
|
817
|
e@0
|
818 class NBClassifier:
|
e@0
|
819 def __init__(self):
|
e@0
|
820 print "[II] Gaussian Naive Bayes Classifier"
|
e@0
|
821 self.name = "Naive Bayes"
|
e@0
|
822 self.gnb = GaussianNB()
|
e@0
|
823
|
e@0
|
824 def getName(self):
|
e@0
|
825 return self.name
|
e@0
|
826
|
e@0
|
827 def fit(self, X, states):
|
e@0
|
828 self.gnb.fit(X, states)
|
e@0
|
829
|
e@0
|
830 def predict(self, X):
|
e@0
|
831 return self.gnb.predict(X)
|
e@0
|
832
|
e@0
|
833 def cross_validate(self, data, classes):
|
e@0
|
834 print "[II] SVN Classifier Crossvalidating... "
|
e@0
|
835 from copy import deepcopy
|
e@0
|
836 estimator = deepcopy(self)
|
e@0
|
837 estimator_fulldata = deepcopy(self)
|
e@0
|
838 estimator_fulldata.fit(data, classes)
|
e@0
|
839
|
e@0
|
840 percents = arange(0.1,0.9,0.1)
|
e@0
|
841 self.percents = percents * 100
|
e@0
|
842
|
e@0
|
843 RATIOS = []
|
e@0
|
844 labels = estimator.predict(data)
|
e@0
|
845
|
e@0
|
846
|
e@0
|
847 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels))
|
e@0
|
848
|
e@0
|
849 for p in percents:
|
e@0
|
850 traindata,testdata,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=1)
|
e@0
|
851
|
e@0
|
852 # print "[II] [%.2f-%.2f] Examples used for training: %s" % (p, (1-p), str(traindata))
|
e@0
|
853 # print "[II] [%.2f-%.2f] Examples used for testing: %s" % (p, (1-p), str(testdata))
|
e@0
|
854
|
e@0
|
855 # Training phase
|
e@0
|
856
|
e@0
|
857
|
e@0
|
858
|
e@0
|
859 estimator_ = deepcopy(estimator)
|
e@0
|
860 estimator_.fit(traindata, trainlabels)
|
e@0
|
861
|
e@0
|
862 predicted_labels = estimator_.predict(testdata)
|
e@0
|
863
|
e@0
|
864 m = sum(array(predicted_labels==testlabels).astype(float))/len(testlabels)
|
e@0
|
865
|
e@0
|
866 print "[II] [%.2f-%.2f] 5-fold cross-validation for NB Classifier: %.1f" % (p, (1-p), m)
|
e@0
|
867
|
e@0
|
868 RATIOS.append(m)
|
e@0
|
869
|
e@0
|
870 return RATIOS,percents
|
e@0
|
871
|
e@0
|
872
|
e@0
|
873
|
e@0
|
874
|
e@0
|
875
|
e@0
|
876
|
e@0
|
877
|
e@0
|
878 class HmmClassifier3:
|
e@0
|
879 def __init__(self, N=2,n_components = 1):
|
e@0
|
880 print "[II] HMM Classifier (%d states, %d components)" % (N, n_components)
|
e@0
|
881 self.name = "HMM (%d time steps, %d components)" % (N, n_components)
|
e@0
|
882 self.n_components = n_components
|
e@0
|
883 self.chain_size = N
|
e@0
|
884
|
e@0
|
885 def getName(self):
|
e@0
|
886 return self.name
|
e@0
|
887
|
e@0
|
888 def fit(self, features, parameters):
|
e@0
|
889
|
e@0
|
890 # print parameters
|
e@0
|
891 n_classes = max(unique(parameters)) + 1
|
e@0
|
892
|
e@0
|
893 print "[II] Number of classes: ", n_classes
|
e@0
|
894 hmms = [None]*n_classes
|
e@0
|
895 svms = [None]*n_classes
|
e@0
|
896 chain_size = self.chain_size
|
e@0
|
897 obs = [None]*n_classes
|
e@0
|
898
|
e@0
|
899 for i in range(chain_size, len(parameters)):
|
e@0
|
900 class_ = parameters[i-1]
|
e@0
|
901 seq = features[i-chain_size:i,:]
|
e@0
|
902
|
e@0
|
903 if obs[class_] is None:
|
e@0
|
904 obs[class_] = [seq]
|
e@0
|
905 else:
|
e@0
|
906 obs[class_].append(seq)
|
e@0
|
907
|
e@0
|
908
|
e@0
|
909
|
e@0
|
910 for i in range(0, len(obs)):
|
e@0
|
911 if obs[i] is not None and len(obs[i]) != 0:
|
e@0
|
912 hmm_ = hmm.GaussianHMM(n_components=self.n_components, covariance_type='full')
|
e@0
|
913 hmm_.fit(obs[i])
|
e@0
|
914 hmms[i] = hmm_
|
e@0
|
915
|
e@0
|
916 self.hmms = hmms
|
e@0
|
917
|
e@0
|
918 return obs
|
e@0
|
919
|
e@0
|
920 def predict(self, features, mfilt=20):
|
e@0
|
921 chain_size = self.chain_size
|
e@0
|
922 hmms = self.hmms
|
e@0
|
923 predicted_classes = zeros((features.shape[0],))
|
e@0
|
924 for i in range(chain_size, features.shape[0]):
|
e@0
|
925 scores = zeros((len(hmms),))
|
e@0
|
926
|
e@0
|
927 seq = features[i-chain_size:i, :]
|
e@0
|
928
|
e@0
|
929 for j in range(0, len(hmms)):
|
e@0
|
930 if hmms[j] is not None:
|
e@0
|
931 scores[j] = hmms[j].score(seq)
|
e@0
|
932 else:
|
e@0
|
933 scores[j] = -infty
|
e@0
|
934
|
e@0
|
935 predicted_classes[i] = argmax(scores)
|
e@0
|
936
|
e@0
|
937 # Do a median filter at the end
|
e@0
|
938
|
e@0
|
939 # predicted_classes = median_filter(predicted_classes,mfilt)
|
e@0
|
940
|
e@0
|
941 return predicted_classes
|
e@0
|
942
|
e@0
|
943
|
e@0
|
944
|
e@0
|
945 def k_fold_cross_validate(self, data, classes, K=5):
|
e@0
|
946 print "[II] HMM Classifier Crossvalidating... "
|
e@0
|
947 print "[II] Validating data: ", data
|
e@0
|
948 print "[II] With classes: ", classes
|
e@0
|
949 from copy import deepcopy
|
e@0
|
950 estimator = deepcopy(self)
|
e@0
|
951 estimator_fulldata = deepcopy(self)
|
e@0
|
952 estimator_fulldata.fit(data, classes)
|
e@0
|
953
|
e@0
|
954
|
e@0
|
955 labels = estimator_fulldata.predict(data)
|
e@0
|
956 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(classes))
|
e@0
|
957
|
e@0
|
958 # Crossvalidate this.
|
e@0
|
959
|
e@0
|
960 example_sequences = []
|
e@0
|
961 example_labels = []
|
e@0
|
962
|
e@0
|
963 chain_size = self.chain_size
|
e@0
|
964
|
e@0
|
965 percents = arange(0.1,0.9,0.1)
|
e@0
|
966 self.percents = percents * 100
|
e@0
|
967
|
e@0
|
968 RATIOS = []
|
e@0
|
969
|
e@0
|
970
|
e@0
|
971
|
e@0
|
972 # Constructing examples and labels
|
e@0
|
973
|
e@0
|
974
|
e@0
|
975 L = data.shape[0]/K
|
e@0
|
976 M = K
|
e@0
|
977 # print "[II] Example size:", L
|
e@0
|
978
|
e@0
|
979 n = 1
|
e@0
|
980 while L*(n-1) < M*L:
|
e@0
|
981 if L*n > shape(data)[0]:
|
e@0
|
982 example = data[L*(n-1):,:]
|
e@0
|
983 classes_ = classes[L*(n-1):]
|
e@0
|
984 else:
|
e@0
|
985 example = data[L*(n-1):L*n, :]
|
e@0
|
986 classes_ = classes[L*(n-1):L*n]
|
e@0
|
987
|
e@0
|
988 # print example
|
e@0
|
989 # print classes_
|
e@0
|
990
|
e@0
|
991 example_sequences.append(example)
|
e@0
|
992 example_labels.append(classes_)
|
e@0
|
993 n+=1
|
e@0
|
994
|
e@0
|
995 # print len(example_sequences)
|
e@0
|
996 # print len(example_labels)
|
e@0
|
997 from sklearn.cross_validation import KFold
|
e@0
|
998 kf = KFold(K, n_folds=K)
|
e@0
|
999
|
e@0
|
1000 ratio = 0
|
e@0
|
1001
|
e@0
|
1002 for train, test in kf:
|
e@0
|
1003 print "[II] Validating examples %s against %s." % (train, test)
|
e@0
|
1004
|
e@0
|
1005 topredict_data = example_sequences[test[0]]
|
e@0
|
1006 topredict_labels = example_labels[test[0]]
|
e@0
|
1007
|
e@0
|
1008 tofit_data = None
|
e@0
|
1009 tofit_labels = None
|
e@0
|
1010
|
e@0
|
1011 for t in train:
|
e@0
|
1012 # print t
|
e@0
|
1013 if tofit_data is None:
|
e@0
|
1014 tofit_data = example_sequences[t]
|
e@0
|
1015 tofit_labels = example_labels[t]
|
e@0
|
1016 else:
|
e@0
|
1017 tofit_data = append(tofit_data, example_sequences[t], 0)
|
e@0
|
1018 tofit_labels = append(tofit_labels, example_labels[t], 0)
|
e@0
|
1019
|
e@0
|
1020 estimator_ = deepcopy(estimator)
|
e@0
|
1021 estimator_.fit(tofit_data, tofit_labels)
|
e@0
|
1022
|
e@0
|
1023 labels = estimator_.predict(topredict_data)
|
e@0
|
1024
|
e@0
|
1025 m = sum(array(topredict_labels==labels).astype(float))/len(labels)
|
e@0
|
1026
|
e@0
|
1027
|
e@0
|
1028 print "[II] %d-fold cross-validation for HMM Classifier: %.1f" % (K, m)
|
e@0
|
1029
|
e@0
|
1030 ratio += m/float(len(kf))
|
e@0
|
1031
|
e@0
|
1032 return ratio
|
e@0
|
1033
|
e@0
|
1034
|
e@0
|
1035
|
e@0
|
1036
|
e@0
|
1037
|
e@0
|
1038
|
e@0
|
1039 # Splitting to train/test
|
e@0
|
1040
|
e@0
|
1041
|
e@0
|
1042 def cross_validate(self, data, classes):
|
e@0
|
1043 print "[II] HMM Classifier Crossvalidating... "
|
e@0
|
1044 from copy import deepcopy
|
e@0
|
1045 estimator = deepcopy(self)
|
e@0
|
1046 estimator_fulldata = deepcopy(self)
|
e@0
|
1047 estimator_fulldata.fit(data, classes)
|
e@0
|
1048
|
e@0
|
1049
|
e@0
|
1050 labels = estimator_fulldata.predict(data)
|
e@0
|
1051 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(classes))
|
e@0
|
1052
|
e@0
|
1053 # Crossvalidate this.
|
e@0
|
1054
|
e@0
|
1055 example_sequences = []
|
e@0
|
1056 example_labels = []
|
e@0
|
1057
|
e@0
|
1058 chain_size = self.chain_size
|
e@0
|
1059
|
e@0
|
1060 percents = arange(0.5,0.9,0.1)
|
e@0
|
1061 self.percents = percents * 100
|
e@0
|
1062
|
e@0
|
1063 RATIOS = []
|
e@0
|
1064
|
e@0
|
1065
|
e@0
|
1066
|
e@0
|
1067 # Constructing examples and labels
|
e@0
|
1068
|
e@0
|
1069 M = self.chain_size
|
e@0
|
1070 L = data.shape[0]/20
|
e@0
|
1071
|
e@0
|
1072 print "[II] Example size:", L
|
e@0
|
1073
|
e@0
|
1074 n = 1
|
e@0
|
1075 while L*n < M*L-L:
|
e@0
|
1076 example = data[L*(n-1):L*n, :]
|
e@0
|
1077 # print example
|
e@0
|
1078 classes_ = classes[L*(n-1):L*n]
|
e@0
|
1079 # print classes_
|
e@0
|
1080
|
e@0
|
1081 example_sequences.append(example)
|
e@0
|
1082 example_labels.append(classes_)
|
e@0
|
1083 n+=1
|
e@0
|
1084
|
e@0
|
1085
|
e@0
|
1086
|
e@0
|
1087 # Splitting to train/test
|
e@0
|
1088
|
e@0
|
1089 for p in percents:
|
e@0
|
1090 traindata,testdata,trainlabels,testlabels = cross_validation.train_test_split(example_sequences, example_labels, test_size=p, random_state=1)
|
e@0
|
1091
|
e@0
|
1092 print traindata
|
e@0
|
1093 print testdata
|
e@0
|
1094 # Concatenate traindata
|
e@0
|
1095
|
e@0
|
1096 tofit_data = None
|
e@0
|
1097 tofit_labels = None
|
e@0
|
1098
|
e@0
|
1099 topredict_data = None
|
e@0
|
1100 topredict_labels = None
|
e@0
|
1101
|
e@0
|
1102
|
e@0
|
1103 for t in traindata:
|
e@0
|
1104 if tofit_data is None:
|
e@0
|
1105 tofit_data = t
|
e@0
|
1106 else:
|
e@0
|
1107 tofit_data = append(tofit_data, t, 0)
|
e@0
|
1108
|
e@0
|
1109 for l in trainlabels:
|
e@0
|
1110 if tofit_labels is None:
|
e@0
|
1111 tofit_labels = l
|
e@0
|
1112 else:
|
e@0
|
1113 tofit_labels = append(tofit_labels, l)
|
e@0
|
1114
|
e@0
|
1115 for t in testdata:
|
e@0
|
1116 if topredict_data is None:
|
e@0
|
1117 topredict_data = t
|
e@0
|
1118 else:
|
e@0
|
1119 topredict_data = append(topredict_data, t, 0)
|
e@0
|
1120
|
e@0
|
1121 for l in testlabels:
|
e@0
|
1122 if topredict_labels is None:
|
e@0
|
1123 topredict_labels = l
|
e@0
|
1124 else:
|
e@0
|
1125 topredict_labels = append(topredict_labels, l)
|
e@0
|
1126
|
e@0
|
1127
|
e@0
|
1128 # print "[II] Fit data: ", tofit_data
|
e@0
|
1129 ## print "[II] Fit labels: ", tofit_labels
|
e@0
|
1130 # print "[II] Predict data: ", topredict_data
|
e@0
|
1131 # print "[II] Predict labels: ", topredict_labels
|
e@0
|
1132 #
|
e@0
|
1133 estimator_ = deepcopy(estimator)
|
e@0
|
1134
|
e@0
|
1135 print tofit_labels
|
e@0
|
1136 estimator_.fit(tofit_data, tofit_labels)
|
e@0
|
1137
|
e@0
|
1138 labels = estimator_.predict(topredict_data)
|
e@0
|
1139
|
e@0
|
1140 m = sum(array(topredict_labels==labels).astype(float))/len(labels)
|
e@0
|
1141
|
e@0
|
1142
|
e@0
|
1143 print "[II] [%.2f-%.2f] cross-validation for HMM Classifier: %.1f" % (p, (1-p), m)
|
e@0
|
1144
|
e@0
|
1145 RATIOS.append(m)
|
e@0
|
1146
|
e@0
|
1147 return RATIOS,percents
|
e@0
|
1148 class HmmClassifier2:
|
e@0
|
1149 def __init__(self, N=2, chain_size=4, n_components = 1):
|
e@0
|
1150 print "[II] HMM Classifier (%d states, %d components)" % (N, n_components)
|
e@0
|
1151 self.name = "HMM (%d states, %d components)" % (N, n_components)
|
e@0
|
1152 self.n_components = n_components
|
e@0
|
1153 self.chain_size = chain_size
|
e@0
|
1154 self.hmms_ = []
|
e@0
|
1155 self.N = N
|
e@0
|
1156 self.n_states = N
|
e@0
|
1157
|
e@0
|
1158 def getName(self):
|
e@0
|
1159 return self.name
|
e@0
|
1160
|
e@0
|
1161 def fit(self, features, parameters):
|
e@0
|
1162 self.n_params = len(unique(parameters))
|
e@0
|
1163
|
e@0
|
1164
|
e@0
|
1165
|
e@0
|
1166 for n in range(0, self.n_params):
|
e@0
|
1167 hmm_ = hmm.GaussianHMM(n_components = self.n_components, covariance_type='full')
|
e@0
|
1168
|
e@0
|
1169 # Get training data for each parameter
|
e@0
|
1170
|
e@0
|
1171 for i in range(0, len(parameters)-self.chain_size):
|
e@0
|
1172 seq = features[i:i+self.chain_size, :]
|
e@0
|
1173 if parameters[i+self.chain_size-1] == n and seq.shape[0] == self.chain_size:
|
e@0
|
1174 # print "[II] Fitting: %s" % str(seq)
|
e@0
|
1175
|
e@0
|
1176 hmm_.fit([seq])
|
e@0
|
1177
|
e@0
|
1178 self.hmms_.append(hmm_)
|
e@0
|
1179
|
e@0
|
1180 def predict(self, X):
|
e@0
|
1181 labels = zeros((X.shape[0],))
|
e@0
|
1182 N = self.N
|
e@0
|
1183
|
e@0
|
1184 scores = zeros((self.n_states, ))
|
e@0
|
1185
|
e@0
|
1186 for i in range(0, len(labels)):
|
e@0
|
1187 testdata = X[i:i+self.chain_size,:]
|
e@0
|
1188
|
e@0
|
1189 for j in range(0, self.n_states):
|
e@0
|
1190 scores[j] = self.hmms_[j].score(testdata)
|
e@0
|
1191 labels[i] = argmax(scores)
|
e@0
|
1192
|
e@0
|
1193 return labels
|
e@0
|
1194
|
e@0
|
1195 def cross_validate(self, data, classes):
|
e@0
|
1196 print "[II] SVN Classifier Crossvalidating... "
|
e@0
|
1197 from copy import deepcopy
|
e@0
|
1198 estimator = deepcopy(self)
|
e@0
|
1199 estimator_fulldata = deepcopy(self)
|
e@0
|
1200 estimator_fulldata.fit(data, classes)
|
e@0
|
1201
|
e@0
|
1202 percents = arange(0.1,0.9,0.1)
|
e@0
|
1203 self.percents = percents * 100
|
e@0
|
1204
|
e@0
|
1205 RATIOS = []
|
e@0
|
1206 labels = estimator.predict(data)
|
e@0
|
1207
|
e@0
|
1208
|
e@0
|
1209 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels))
|
e@0
|
1210
|
e@0
|
1211 for p in percents:
|
e@0
|
1212 traindata,testdata,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=1)
|
e@0
|
1213
|
e@0
|
1214
|
e@0
|
1215 # print "[II] [%.2f-%.2f] Examples used for training: %s" % (p, (1-p), str(traindata))
|
e@0
|
1216 # print "[II] [%.2f-%.2f] Examples used for testing: %s" % (p, (1-p), str(testdata))
|
e@0
|
1217
|
e@0
|
1218 # Training phase
|
e@0
|
1219
|
e@0
|
1220
|
e@0
|
1221
|
e@0
|
1222 estimator_ = deepcopy(estimator)
|
e@0
|
1223 estimator_.fit(traindata, trainlabels)
|
e@0
|
1224
|
e@0
|
1225 predicted_labels = estimator_.predict(testdata)
|
e@0
|
1226
|
e@0
|
1227 m = sum(array(predicted_labels==testlabels).astype(float))/len(testlabels)
|
e@0
|
1228
|
e@0
|
1229 print "[II] [%.2f-%.2f] 5-fold cross-validation for HMM2 Classifier: %.1f" % (p, (1-p), m)
|
e@0
|
1230
|
e@0
|
1231 RATIOS.append(m)
|
e@0
|
1232
|
e@0
|
1233 return RATIOS,percents
|
e@0
|
1234
|
e@0
|
1235
|
e@0
|
1236
|
e@0
|
1237
|
e@0
|
1238 class HmmClassifier:
|
e@0
|
1239 def __init__(self, N=2, n_components = 1):
|
e@0
|
1240 print "[II] HMM Classifier (%d states, %d components)" % (N, n_components)
|
e@0
|
1241 self.name = "HMM (%d states, %d components)" % (N, n_components)
|
e@0
|
1242 self.n_components = n_components
|
e@0
|
1243 self.chain_size = N
|
e@0
|
1244 self.hmms_ = []
|
e@0
|
1245 self.N = N
|
e@0
|
1246
|
e@0
|
1247 def getName(self):
|
e@0
|
1248 return self.name
|
e@0
|
1249
|
e@0
|
1250 def fit(self, X, states):
|
e@0
|
1251 self.n_states = len(unique(states))
|
e@0
|
1252
|
e@0
|
1253 for n in range(0, self.n_states):
|
e@0
|
1254 hmm_ = hmm.GaussianHMM(n_components = self.n_components, covariance_type = 'full')
|
e@0
|
1255
|
e@0
|
1256 # Get training data for each class
|
e@0
|
1257 vector = X[states == n,:]
|
e@0
|
1258
|
e@0
|
1259 # Fit the HMM
|
e@0
|
1260 # print vector
|
e@0
|
1261 hmm_.fit([vector])
|
e@0
|
1262
|
e@0
|
1263 # And append to the list
|
e@0
|
1264 self.hmms_.append(hmm_)
|
e@0
|
1265
|
e@0
|
1266 def predict(self,X):
|
e@0
|
1267 labels = zeros((X.shape[0],))
|
e@0
|
1268 N = self.N
|
e@0
|
1269
|
e@0
|
1270 m = 0
|
e@0
|
1271
|
e@0
|
1272 scores = zeros((1, self.n_states))
|
e@0
|
1273
|
e@0
|
1274
|
e@0
|
1275 while m*N < X.shape[0]:
|
e@0
|
1276 if (m+1)*N > X.shape[0]:
|
e@0
|
1277 testdata = X[m*N:,:]
|
e@0
|
1278 else:
|
e@0
|
1279 testdata = X[m*N:(m+1)*N,:]
|
e@0
|
1280
|
e@0
|
1281 # print testdata
|
e@0
|
1282
|
e@0
|
1283 for i in range(0, self.n_states):
|
e@0
|
1284 scores[0,i] = self.hmms_[i].score(testdata)
|
e@0
|
1285
|
e@0
|
1286 if (m+1)*N > X.shape[0]:
|
e@0
|
1287 labels[m*N:] = argmax(scores)
|
e@0
|
1288 else:
|
e@0
|
1289 labels[m*N:(m+1)*N] = argmax(scores)
|
e@0
|
1290
|
e@0
|
1291 m+= 1
|
e@0
|
1292
|
e@0
|
1293 return labels
|
e@0
|
1294
|
e@0
|
1295 # def cross_validate(self, data, classes, index_vector):
|
e@0
|
1296 # print "[II] Crossvalidating... "
|
e@0
|
1297 # from copy import deepcopy
|
e@0
|
1298 # estimator = deepcopy(self)
|
e@0
|
1299 # estimator_fulldata = deepcopy(self)
|
e@0
|
1300 # estimator_fulldata.fit(data, classes)
|
e@0
|
1301 #
|
e@0
|
1302 # percents = arange(0.1,0.9,0.1)
|
e@0
|
1303 # self.percents = percents * 100
|
e@0
|
1304 # RATIOS = []
|
e@0
|
1305 # labels = estimator.predict(data)
|
e@0
|
1306 #
|
e@0
|
1307 # print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels))
|
e@0
|
1308 #
|
e@0
|
1309 # for p in percents:
|
e@0
|
1310 # train,test,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=1)
|
e@0
|
1311 # estimator_ = deepcopy(estimator)
|
e@0
|
1312 # estimator_.fit(train, trainlabels)
|
e@0
|
1313 # labels = estimator_.predict(test)
|
e@0
|
1314 # m = sum(array(testlabels==labels).astype(float))/len(labels)
|
e@0
|
1315 # RATIOS.append(m)
|
e@0
|
1316 # print "[II] for training(%.2f)-testing(%.2f): %.2f" % ((1-p),p,sum(array(testlabels==labels).astype(float))/len(labels))
|
e@0
|
1317 #
|
e@0
|
1318 # return RATIOS, percents
|
e@0
|
1319
|
e@0
|
1320 def cross_validate(self, data, classes):
|
e@0
|
1321 print "[II] SVN Classifier Crossvalidating... "
|
e@0
|
1322 from copy import deepcopy
|
e@0
|
1323 estimator = deepcopy(self)
|
e@0
|
1324 estimator_fulldata = deepcopy(self)
|
e@0
|
1325 estimator_fulldata.fit(data, classes)
|
e@0
|
1326
|
e@0
|
1327 percents = arange(0.1,0.9,0.1)
|
e@0
|
1328 self.percents = percents * 100
|
e@0
|
1329
|
e@0
|
1330 RATIOS = []
|
e@0
|
1331 labels = estimator.predict(data)
|
e@0
|
1332
|
e@0
|
1333
|
e@0
|
1334 print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels))
|
e@0
|
1335
|
e@0
|
1336 for p in percents:
|
e@0
|
1337 traindata,testdata,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=1)
|
e@0
|
1338
|
e@0
|
1339
|
e@0
|
1340 # print "[II] [%.2f-%.2f] Examples used for training: %s" % (p, (1-p), str(traindata))
|
e@0
|
1341 # print "[II] [%.2f-%.2f] Examples used for testing: %s" % (p, (1-p), str(testdata))
|
e@0
|
1342 # Training phase
|
e@0
|
1343
|
e@0
|
1344
|
e@0
|
1345
|
e@0
|
1346 estimator_ = deepcopy(estimator)
|
e@0
|
1347 estimator_.fit(traindata, trainlabels)
|
e@0
|
1348
|
e@0
|
1349 predicted_labels = estimator_.predict(testdata)
|
e@0
|
1350 m = sum(array(predicted_labels==testlabels).astype(float))/len(testlabels)
|
e@0
|
1351
|
e@0
|
1352
|
e@0
|
1353 # testdata = None
|
e@0
|
1354 #
|
e@0
|
1355 # traindata = None
|
e@0
|
1356 # trainlabels = None
|
e@0
|
1357 # testlabels = None
|
e@0
|
1358 #
|
e@0
|
1359 # for t in train:
|
e@0
|
1360 # if traindata is None:
|
e@0
|
1361 # traindata = data[index_vector == t, :]
|
e@0
|
1362 # trainlabels = classes[index_vector == t]
|
e@0
|
1363 # else:
|
e@0
|
1364 # traindata = append(traindata, data[index_vector == t, :], 0)
|
e@0
|
1365 # trainlabels = append(trainlabels, classes[index_vector ==t])
|
e@0
|
1366 #
|
e@0
|
1367 # estimator_ = deepcopy(estimator)
|
e@0
|
1368 # estimator_.fit(traindata, trainlabels)
|
e@0
|
1369 #
|
e@0
|
1370 #
|
e@0
|
1371 # for t in test:
|
e@0
|
1372 # if testdata is None:
|
e@0
|
1373 # testdata = data[index_vector == t, :]
|
e@0
|
1374 # testlabels = classes[index_vector == t]
|
e@0
|
1375 # else:
|
e@0
|
1376 # testdata = append(testdata, data[index_vector == t,:], 0)
|
e@0
|
1377 # testlabels = append(testlabels, classes[index_vector == t])
|
e@0
|
1378
|
e@0
|
1379
|
e@0
|
1380
|
e@0
|
1381 # predicted_labels = estimator_.predict(testdata)
|
e@0
|
1382
|
e@0
|
1383 m = sum(array(predicted_labels==testlabels).astype(float))/len(testlabels)
|
e@0
|
1384
|
e@0
|
1385 print "[II] [%.2f-%.2f] 5-fold cross-validation for HMM Classifier: %.1f" % (p, (1-p), m)
|
e@0
|
1386
|
e@0
|
1387 RATIOS.append(m)
|
e@0
|
1388
|
e@0
|
1389 return RATIOS,percents
|
e@0
|
1390
|
e@0
|
1391 def cross_validate_full_k_fold(data, classes, list_of_classifiers, K=10):
|
e@0
|
1392 """ K-Folds Cross-validates all available classifiers, plots and saves barplots. """
|
e@0
|
1393
|
e@0
|
1394 ratios = []
|
e@0
|
1395 for l in list_of_classifiers:
|
e@0
|
1396 ratio = l.k_fold_cross_validate(data, classes, K)
|
e@0
|
1397 ratios.append(ratio)
|
e@0
|
1398
|
e@0
|
1399 L = len(ratios)
|
e@0
|
1400
|
e@0
|
1401 ind = arange(L)
|
e@0
|
1402
|
e@0
|
1403 fig, ax = subplots()
|
e@0
|
1404 rects = []
|
e@0
|
1405
|
e@0
|
1406
|
e@0
|
1407
|
e@0
|
1408 width = 10/float(len(ratios))
|
e@0
|
1409
|
e@0
|
1410 colors = ['r', 'y', 'g', 'c']
|
e@0
|
1411
|
e@0
|
1412 labels_ = []
|
e@0
|
1413
|
e@0
|
1414 for i in range(0, len(ratios)):
|
e@0
|
1415 rects.append(ax.bar(i*width,ratios[i],width,color=colors[i % len(colors)]))
|
e@0
|
1416 labels_.append(list_of_classifiers[i].getName())
|
e@0
|
1417
|
e@0
|
1418 return ratios
|
e@0
|
1419
|
e@0
|
1420
|
e@0
|
1421
|
e@0
|
1422 def cross_validate_full(seq, classes, list_of_classifiers):
|
e@0
|
1423 """ Cross-validates all available classifiers, plots and saves barplots. """
|
e@0
|
1424
|
e@0
|
1425 ratios = []
|
e@0
|
1426 percents = []
|
e@0
|
1427 for l in list_of_classifiers:
|
e@0
|
1428 ratios_, percents_ = l.cross_validate(seq, classes)
|
e@0
|
1429 ratios.append(ratios_)
|
e@0
|
1430 percents.append(percents_)
|
e@0
|
1431
|
e@0
|
1432
|
e@0
|
1433 L = len(ratios[0])
|
e@0
|
1434
|
e@0
|
1435 ind = arange(L)
|
e@0
|
1436
|
e@0
|
1437 fig,ax = subplots()
|
e@0
|
1438
|
e@0
|
1439 rects = []
|
e@0
|
1440
|
e@0
|
1441 W = 0.8
|
e@0
|
1442 width = W/float(len(ratios))
|
e@0
|
1443
|
e@0
|
1444 colors = ['r', 'y', 'g', 'c']
|
e@0
|
1445
|
e@0
|
1446 labels_ = []
|
e@0
|
1447 for i in range(0, len(ratios)):
|
e@0
|
1448 rects.append(ax.bar(ind+i*width,ratios[i],width,color=colors[i % len(colors)]))
|
e@0
|
1449 labels_.append(list_of_classifiers[i].getName())
|
e@0
|
1450
|
e@0
|
1451 ax.legend(tuple(labels_))
|
e@0
|
1452
|
e@0
|
1453 ax.set_xticks(ind+W/2)
|
e@0
|
1454 ax.set_xticklabels(tuple(percents[0]*100))
|
e@0
|
1455 ax.set_xlabel("Percentage % of test data")
|
e@0
|
1456 ax.set_ylabel("Success ratio")
|
e@0
|
1457
|
e@0
|
1458
|
e@0
|
1459
|
e@0
|
1460
|
e@0
|
1461
|
e@0
|
1462
|
e@0
|
1463 # rects1 = ax.bar(ind,ratios[0],0.35,color='r')
|
e@0
|
1464 # rects2 = ax.bar(ind+0.351,ratios[1],0.35,color='y')
|
e@0
|
1465
|
e@0
|
1466
|
e@0
|
1467
|
e@0
|
1468
|
e@0
|
1469 return ratios
|
e@0
|
1470
|
e@0
|
1471
|
e@0
|
1472
|
e@0
|
1473
|
e@0
|
1474
|
e@0
|
1475
|
e@0
|
1476
|
e@0
|
1477 # hmmc = HmmClassifier(N = len(unique(parameters_state)), n_components = 1)
|
e@0
|
1478 # hmmc.fit(features_vector_upsampled.T, parameters_state)
|
e@0
|
1479 #
|
e@0
|
1480 # hmmc2 = HmmClassifier(N = 12, n_components = 4)
|
e@0
|
1481 # hmmc2.fit(features_vector_upsampled.T, parameters_state)
|
e@0
|
1482 #
|
e@0
|
1483 svmc = SVMClassifier(probability=True)
|
e@0
|
1484 svmc.fit(features_vector_upsampled.T, parameters_state)
|
e@0
|
1485 # # svmc.cross_validate(features_vector_upsampled.T, parameters_state, index_vector)
|
e@0
|
1486 #
|
e@0
|
1487 nbc = NBClassifier()
|
e@0
|
1488 nbc.fit(features_vector_upsampled.T, parameters_state)
|
e@0
|
1489 #
|
e@0
|
1490 #
|
e@0
|
1491 #hchmmc = happy_curve_classification(features_vector_upsampled.T, parameters_state, hmmc, Nd=100)
|
e@0
|
1492 # # cross_validate_classification(features_vector_upsampled.T, parameters_state, hmmc)
|
e@0
|
1493 #
|
e@0
|
1494 # #hmmc.cross_validate(features_vector_upsampled.T, parameters_state)
|
e@0
|
1495 #
|
e@0
|
1496
|
e@0
|
1497 hmmc3 = HmmClassifier3(N=6, n_components = max(unique(parameters_state)))
|
e@0
|
1498 obs = hmmc3.fit(features_vector_upsampled.T, parameters_state)
|
e@0
|
1499
|
e@0
|
1500
|
e@0
|
1501 # svmhmm = HmmSVMClassifier()
|
e@0
|
1502 # svmhmm.fit(features_vector_upsampled.T, parameters_state)
|
e@0
|
1503 # ratios = cross_validate_full(features_vector_upsampled.T, parameters_state, [nbc, svmc])
|
e@0
|
1504 # ratios = cross_validate_full_k_fold(features_vector_upsampled.T, parameters_state,
|
e@0
|
1505 # [hmmc3])
|
e@0
|
1506
|
e@0
|
1507 def approximate_chain_size(data,parameters):
|
e@0
|
1508 ratios = []
|
e@0
|
1509
|
e@0
|
1510 # chainsizes = arange(1, 40)
|
e@0
|
1511 # for cs in chainsizes:
|
e@0
|
1512 # print "[II] Trying chain size: %d" % cs
|
e@0
|
1513 # hmmc = HmmClassifier3(N=cs, n_components = 2)
|
e@0
|
1514 # ratios.append(hmmc.k_fold_cross_validate(data,parameters,K=10))
|
e@0
|
1515
|
e@0
|
1516
|
e@0
|
1517 componentssize = arange(1, 12)
|
e@0
|
1518
|
e@0
|
1519 for cs in componentssize:
|
e@0
|
1520 print "[II] Trying component size: %d" % cs
|
e@0
|
1521 hmmc = HmmClassifier3(N=6, n_components = cs)
|
e@0
|
1522 ratios.append(hmmc.k_fold_cross_validate(data, parameters, K=2))
|
e@0
|
1523
|
e@0
|
1524
|
e@0
|
1525
|
e@0
|
1526
|
e@0
|
1527 figure()
|
e@0
|
1528 plot(componentssize, ratios)
|
e@0
|
1529 xlabel('Chain Size')
|
e@0
|
1530 ylabel('Success Ratio')
|
e@0
|
1531 title('10-Fold cross validation success ratio vs chain size')
|
e@0
|
1532
|
e@0
|
1533
|
e@0
|
1534 return ratios
|
e@0
|
1535
|
e@0
|
1536 # ratios = approximate_chain_size(features_vector_upsampled.T, parameters_state)
|
e@0
|
1537
|
e@0
|
1538 #predictedhmmc2 = hmmc2.predict(features_vector_upsampled.T)
|
e@0
|
1539
|
e@0
|
1540 # figure()
|
e@0
|
1541
|
e@0
|
1542
|
e@0
|
1543
|
e@0
|
1544
|
e@0
|
1545
|
e@0
|
1546 # hmms_ = []
|
e@0
|
1547 #
|
e@0
|
1548 # for n in range(0, len(parameter_state_alphabet)):
|
e@0
|
1549 # #hmm_ = hmm.GMMHMM(n_components = 1, n_mix = 2)
|
e@0
|
1550 # hmm_ = hmm.GaussianHMM(n_components = 1,covariance_type = 'full')
|
e@0
|
1551 #
|
e@0
|
1552 # # Get training data for each class
|
e@0
|
1553 # vector = features_vector_upsampled[:,parameters_state == n]
|
e@0
|
1554 #
|
e@0
|
1555 # #if vector.shape[1] < n_clusters:
|
e@0
|
1556 # # hmms_.append(None)
|
e@0
|
1557 # #else:
|
e@0
|
1558 #
|
e@0
|
1559 # hmm_.fit([vector.T])
|
e@0
|
1560 #
|
e@0
|
1561 # # Append to the list
|
e@0
|
1562 #
|
e@0
|
1563 # hmms_.append(hmm_)
|
e@0
|
1564 #
|
e@0
|
1565 # labels = zeros((features_vector_upsampled.shape[1],))
|
e@0
|
1566 #
|
e@0
|
1567 # N = 20
|
e@0
|
1568 # m = 0
|
e@0
|
1569 #
|
e@0
|
1570 # while m*N < features_vector_upsampled.shape[1]:
|
e@0
|
1571 #
|
e@0
|
1572 # scores = zeros((1, len(parameter_state_alphabet)))
|
e@0
|
1573 #
|
e@0
|
1574 # if (m+1)*N > features_vector_upsampled.shape[1]:
|
e@0
|
1575 # testdata = features_vector_upsampled[:,m*N:]
|
e@0
|
1576 # else:
|
e@0
|
1577 # testdata = features_vector_upsampled[:,m*N:(m+1)*N]
|
e@0
|
1578 #
|
e@0
|
1579 # for i in range(0, len(parameter_state_alphabet)):
|
e@0
|
1580 # if hmms_[i] is not None:
|
e@0
|
1581 # scores[0,i] = hmms_[i].score(testdata.T)
|
e@0
|
1582 # else:
|
e@0
|
1583 # scores[0,i] = -100000 # Very large negative score
|
e@0
|
1584 # if (m+1)*N >= features_vector_upsampled.shape[1]:
|
e@0
|
1585 # labels[m*N:] = argmax(scores)
|
e@0
|
1586 # else:
|
e@0
|
1587 # labels[m*N:(m+1)*N] = argmax(scores)
|
e@0
|
1588 #
|
e@0
|
1589 # m += 1
|
e@0
|
1590
|
e@0
|
1591
|
e@0
|
1592 # figure()
|
e@0
|
1593 #plot(labels.T)
|
e@0
|
1594
|
e@0
|
1595
|
e@0
|
1596 # labels = hmmc.predict(features_vector_upsampled.T)
|
e@0
|
1597 # estimated = states_to_vector(labels,parameter_state_alphabet_inv)
|
e@0
|
1598 # plot(estimated.T,'r--')
|
e@0
|
1599 #
|
e@0
|
1600 # title('Estimated parameter values')
|
e@0
|
1601 # legend(['Naive Bayes Classifier', 'HMM chain size %d (%.1fms)' % (N, float(N)/UpsamplingFactor*23.0)])
|
e@0
|
1602 #
|
e@0
|
1603 # ylabel('value')
|
e@0
|
1604 # xlabel('frame #')
|
e@0
|
1605 #
|
e@0
|
1606
|
e@0
|
1607 # close('all')
|
e@0
|
1608
|
e@0
|
1609 # plot(features_clustering_labels/float(max(features_clustering_labels)))
|
e@0
|
1610 # plot(parameters_vector_upsampled.T/max(ravel(parameters_vector_upsampled)))
|
e@0
|
1611
|
e@0
|
1612
|
e@0
|
1613 def plot_clusters(x, labels):
|
e@0
|
1614 figure()
|
e@0
|
1615 symbols = ['>', 'x', '.', '<','v']
|
e@0
|
1616 colors = ['b', 'r', 'g', 'm','c']
|
e@0
|
1617
|
e@0
|
1618 for r in range(0, x.shape[0]):
|
e@0
|
1619 scatter(x[r,0], x[r,1], c=colors[int(labels[r]) % len(colors)], marker=symbols[int(labels[r]) % len(symbols)])
|
e@0
|
1620
|
e@0
|
1621
|
e@0
|
1622 # plot_clusters(features_vector_upsampled.T, parameters_state)
|
e@0
|
1623 #
|
e@0
|
1624
|
e@0
|
1625 # SVM
|
e@0
|
1626
|
e@0
|
1627 class HmmClassifierSVN_Naive:
|
e@0
|
1628 def __init__(self, N=2, n_components = 1):
|
e@0
|
1629 self.n_components = n_components
|
e@0
|
1630 self.chain_size = N
|
e@0
|
1631 self.hmms_ = []
|
e@0
|
1632 self.N = N
|
e@0
|
1633
|
e@0
|
1634 def fit(self, X, states):
|
e@0
|
1635 self.n_states = len(unique(states))
|
e@0
|
1636
|
e@0
|
1637 for n in range(0, self.n_states):
|
e@0
|
1638 hmm_ = hmm.GaussianHMM(n_components = self.n_components, covariance_type = 'full')
|
e@0
|
1639
|
e@0
|
1640 # Get training data for each class
|
e@0
|
1641 vector = X[states == n,:]
|
e@0
|
1642
|
e@0
|
1643 # Fit the HMM
|
e@0
|
1644 # print vector
|
e@0
|
1645 hmm_.fit([vector])
|
e@0
|
1646
|
e@0
|
1647 # And append to the list
|
e@0
|
1648 self.hmms_.append(hmm_)
|
e@0
|
1649
|
e@0
|
1650 def predict(self,X):
|
e@0
|
1651 labels = zeros((X.shape[0],))
|
e@0
|
1652 N = self.N
|
e@0
|
1653
|
e@0
|
1654 m = 0
|
e@0
|
1655
|
e@0
|
1656 scores = zeros((1, self.n_states))
|
e@0
|
1657
|
e@0
|
1658
|
e@0
|
1659 while m*N < X.shape[0]:
|
e@0
|
1660 if (m+1)*N > X.shape[0]:
|
e@0
|
1661 testdata = X[m*N:,:]
|
e@0
|
1662 else:
|
e@0
|
1663 testdata = X[m*N:(m+1)*N,:]
|
e@0
|
1664
|
e@0
|
1665 # print testdata
|
e@0
|
1666
|
e@0
|
1667 for i in range(0, self.n_states):
|
e@0
|
1668 scores[0,i] = self.hmms_[i].score(testdata)
|
e@0
|
1669
|
e@0
|
1670 if (m+1)*N > X.shape[0]:
|
e@0
|
1671 labels[m*N:] = argmax(scores)
|
e@0
|
1672 else:
|
e@0
|
1673 labels[m*N:(m+1)*N] = argmax(scores)
|
e@0
|
1674
|
e@0
|
1675 m+= 1
|
e@0
|
1676
|
e@0
|
1677 return labels
|
e@0
|
1678
|
e@0
|
1679
|
e@0
|
1680
|
e@0
|
1681
|
e@0
|
1682 def plot_parameters_estimation(list_of_estimators):
|
e@0
|
1683 for e in list_of_estimators:
|
e@0
|
1684 name_ = e.getName()
|
e@0
|
1685
|
e@0
|
1686
|
e@0
|
1687
|
e@0
|
1688 fig = figure()
|
e@0
|
1689 fig.suptitle('Parameters estimation using %s' % name_, fontweight='bold')
|
e@0
|
1690 subplot(311)
|
e@0
|
1691 title ('original parameters')
|
e@0
|
1692 param_real = states_to_vector(parameters_state,parameter_state_alphabet_inv).T
|
e@0
|
1693 plot(param_real)
|
e@0
|
1694 legend(parameter_captions)
|
e@0
|
1695 subplot(312)
|
e@0
|
1696 title('estimated parameters')
|
e@0
|
1697 pred = e.predict(features_vector_upsampled.T)
|
e@0
|
1698 param_est = states_to_vector(pred,parameter_state_alphabet_inv).T
|
e@0
|
1699 plot(param_est)
|
e@0
|
1700 legend(parameter_captions)
|
e@0
|
1701 subplot(313)
|
e@0
|
1702 error_real_est = zeros((len(param_est),))
|
e@0
|
1703 for i in range(0, len(error_real_est)):
|
e@0
|
1704 error_real_est[i] = mse(param_real[i],param_est[i])
|
e@0
|
1705
|
e@0
|
1706 totalmse = sum(error_real_est)
|
e@0
|
1707 title('mean squared error (total: %.3f)' % totalmse)
|
e@0
|
1708
|
e@0
|
1709 plot(error_real_est)
|
e@0
|
1710
|
e@0
|
1711 plot_parameters_estimation([nbc, svmc, hmmc3])
|
e@0
|
1712
|
e@0
|
1713 class HmSVMClassifier:
|
e@0
|
1714 def __init__(self, N=2,n_components = 1):
|
e@0
|
1715 print "[II] HMM Classifier (%d states, %d components)" % (N, n_components)
|
e@0
|
1716 self.name = "HMM (%d time steps, %d components)" % (N, n_components)
|
e@0
|
1717 self.n_components = n_components
|
e@0
|
1718 self.chain_size = N
|
e@0
|
1719
|
e@0
|
1720 def getName(self):
|
e@0
|
1721 return self.name
|
e@0
|
1722
|
e@0
|
1723 def fit(self, features, parameters, fname_training = "trainingdat.dat",
|
e@0
|
1724 fname_model = "training.model", C = 689):
|
e@0
|
1725 svmhmm = ""
|
e@0
|
1726 idx = 0
|
e@0
|
1727 chain_size = self.chain_size
|
e@0
|
1728 for i in range(chain_size, len(parameters)):
|
e@0
|
1729 idx += 1
|
e@0
|
1730 class_ = parameters[i-1]
|
e@0
|
1731 seq = features[i-chain_size:i,:]
|
e@0
|
1732
|
e@0
|
1733 for m in range(0, len(seq)):
|
e@0
|
1734 s = seq[m]
|
e@0
|
1735
|
e@0
|
1736 svmhmm += "%d qid:1.%d " % (class_, idx)
|
e@0
|
1737 for j in range(0, len(s)):
|
e@0
|
1738 svmhmm += "%d:%.4f " % (j+1, s[j])
|
e@0
|
1739 svmhmm += "\n"
|
e@0
|
1740
|
e@0
|
1741 fileout = open(fname_training, 'w')
|
e@0
|
1742 fileout.write(svmhmm)
|
e@0
|
1743 fileout.close()
|
e@0
|
1744
|
e@0
|
1745 import subprocess
|
e@0
|
1746
|
e@0
|
1747 # C = idx
|
e@0
|
1748
|
e@0
|
1749 subprocess.call(["svmhmm/svm_hmm_learn", '-c', '%d' % C, '-e', '0.5', fname_training, fname_model])
|
e@0
|
1750 return svmhmm
|
e@0
|
1751
|
e@0
|
1752
|
e@0
|
1753 n_classes = max(unique(parameters_state)) + 1
|
e@0
|
1754
|
e@0
|
1755 hist_p_q = histogram(parameters_state, n_classes)[0].astype(float)
|
e@0
|
1756 prior_p_q = hist_p_q/sum(hist_p_q)
|
e@0
|
1757
|
e@0
|
1758 transmat = zeros((n_classes,n_classes))
|
e@0
|
1759 for i in range(1, len(parameters_state)):
|
e@0
|
1760 prev = parameters_state[i-1]
|
e@0
|
1761 cur = parameters_state[i]
|
e@0
|
1762 transmat[prev,cur] += 1
|
e@0
|
1763
|
e@0
|
1764 transmat = transmat/sum(transmat)
|
e@0
|
1765
|
e@0
|
1766 hmm2 = hmm.MultinomialHMM(n_components = n_classes, startprob = prior_p_q, transmat = transmat )
|
e@0
|
1767
|
e@0
|
1768
|
e@0
|
1769
|
e@0
|
1770 # TODO: Embed SVM emissions to HMM. Try to do pick maximum likelihood sequence from
|
e@0
|
1771 # classifiers like in HmmClassifier3. For every class, train an SVM, with n_components as
|
e@0
|
1772 # states, and compute the log-likelihood for prediction. Should work.
|
e@0
|
1773
|
e@0
|
1774
|
e@0
|
1775
|
e@0
|
1776 class MyAVAClassifier:
|
e@0
|
1777
|
e@0
|
1778 def __init__(self):
|
e@0
|
1779 self.classifiers = {}
|
e@0
|
1780 self.name = "Linear SVM Classifier"
|
e@0
|
1781 # self.probabilities = {}
|
e@0
|
1782
|
e@0
|
1783
|
e@0
|
1784 def fit(self, X, y, flr = 0):
|
e@0
|
1785
|
e@0
|
1786 n_classes = max(unique(y)) + 1
|
e@0
|
1787 classes = arange(0, n_classes)
|
e@0
|
1788 self.n_classes = n_classes
|
e@0
|
1789 # M = n_classes*(n_classes-1) # Number of classifiers
|
e@0
|
1790
|
e@0
|
1791 h = histogram(y, n_classes)[0].astype(float)
|
e@0
|
1792 self.prior = h/sum(h)
|
e@0
|
1793
|
e@0
|
1794 transmat = zeros((n_classes, n_classes))
|
e@0
|
1795
|
e@0
|
1796 for i in range(1, len(y)):
|
e@0
|
1797 prev = y[i-1]
|
e@0
|
1798 cur = y[i]
|
e@0
|
1799 transmat[prev,cur] += 1
|
e@0
|
1800
|
e@0
|
1801 transmat = transmat/sum(transmat)
|
e@0
|
1802
|
e@0
|
1803 self.transmat = transmat
|
e@0
|
1804
|
e@0
|
1805 # Add a very small probability for random jump to avoid zero values
|
e@0
|
1806
|
e@0
|
1807 self.transmat += flr
|
e@0
|
1808 self.transmat = self.transmat/sum(self.transmat)
|
e@0
|
1809
|
e@0
|
1810 for i in range(0, n_classes):
|
e@0
|
1811 for j in range(0, n_classes):
|
e@0
|
1812 if i != j and (i,j) not in self.classifiers and (j,i) not in self.classifiers:
|
e@0
|
1813 print (i,j)
|
e@0
|
1814 idx_ = bitwise_or(y == classes[i], y == classes[j])
|
e@0
|
1815 X_ = X[idx_, :]
|
e@0
|
1816 y_ = y[idx_]
|
e@0
|
1817
|
e@0
|
1818 svm_ = svm.SVC(probability = True)
|
e@0
|
1819
|
e@0
|
1820 # print y_
|
e@0
|
1821 svm_.fit(X_, y_)
|
e@0
|
1822 self.classifiers[(i,j)] = svm_
|
e@0
|
1823 # self.probabilities[(i,j)] = svm_.predict_proba(X)
|
e@0
|
1824
|
e@0
|
1825
|
e@0
|
1826 def estimate_pairwise_class_probability(self, i, j, x):
|
e@0
|
1827 if (i,j) not in self.classifiers and (j,i) in self.classifiers:
|
e@0
|
1828 return self.classifiers[(j,i)].predict_proba(x)[0,1]
|
e@0
|
1829 else:
|
e@0
|
1830 return self.classifiers[(i,j)].predict_proba(x)[0,0]
|
e@0
|
1831
|
e@0
|
1832 def estimate_posterior_probability(self, i, x):
|
e@0
|
1833 mus = zeros((self.n_classes,))
|
e@0
|
1834 for j in range(0, self.n_classes):
|
e@0
|
1835 if i != j:
|
e@0
|
1836 mus[j] = 1/self.estimate_pairwise_class_probability(i,j,x)
|
e@0
|
1837 # print mus
|
e@0
|
1838 S = sum(mus) - (self.n_classes - 2)
|
e@0
|
1839 # print S
|
e@0
|
1840 return 1/S
|
e@0
|
1841
|
e@0
|
1842 def estimate_posterior_probability_vector(self, x):
|
e@0
|
1843 posterior = zeros((self.n_classes,))
|
e@0
|
1844 for i in range(0, len(posterior)):
|
e@0
|
1845 posterior[i] = self.estimate_posterior_probability(i, x)
|
e@0
|
1846
|
e@0
|
1847 return posterior
|
e@0
|
1848
|
e@0
|
1849 # def estimate_emission_probability(self, i, x):
|
e@0
|
1850 # post = self.estimate_posterior_probability_vector(x)
|
e@0
|
1851 # # print "post: ", post
|
e@0
|
1852 # prior = self.prior
|
e@0
|
1853 # # print "prior: ", prior
|
e@0
|
1854 # p = post/prior
|
e@0
|
1855 # p = p/sum(p)
|
e@0
|
1856 #
|
e@0
|
1857 # return p[i]
|
e@0
|
1858
|
e@0
|
1859 # def estimate_emissmat(self, X):
|
e@0
|
1860 # emissmat = zeros((X.shape[0], self.n_classes))
|
e@0
|
1861 # for i in range(0, X.shape[0]):
|
e@0
|
1862 # for j in range(0, self.n_classes):
|
e@0
|
1863 # emissmat[i,j] = self.estimate_emission_probability(j, X[i,:])
|
e@0
|
1864 #
|
e@0
|
1865 # return emissmat
|
e@0
|
1866
|
e@0
|
1867 def predict(self, X):
|
e@0
|
1868 predicted = zeros((X.shape[0],))
|
e@0
|
1869 for i in range(0, X.shape[0]):
|
e@0
|
1870 x = X[i,:]
|
e@0
|
1871 P = zeros((self.n_classes,))
|
e@0
|
1872
|
e@0
|
1873
|
e@0
|
1874 for c in range(0, len(P)):
|
e@0
|
1875 P[c] = self.estimate_posterior_probability(c, x)
|
e@0
|
1876
|
e@0
|
1877 pred = argmax(P)
|
e@0
|
1878 predicted[i] = pred
|
e@0
|
1879
|
e@0
|
1880 return predicted
|
e@0
|
1881
|
e@0
|
1882 class HMMsvmClassifier:
|
e@0
|
1883 def __init__(self, N=2):
|
e@0
|
1884 print "[II] HMM-SVM Classifier (%d states)" % N
|
e@0
|
1885 self.name = "HMM-SVM (%d time steps)" % N
|
e@0
|
1886 # self.n_components = n_components
|
e@0
|
1887 self.chain_size = N
|
e@0
|
1888 self.svm = MyAVAClassifier()
|
e@0
|
1889
|
e@0
|
1890
|
e@0
|
1891 def getName(self):
|
e@0
|
1892 return self.name
|
e@0
|
1893
|
e@0
|
1894 def fit(self, features, parameters):
|
e@0
|
1895
|
e@0
|
1896 # First train emission svm
|
e@0
|
1897
|
e@0
|
1898 self.svm.fit(features, parameters)
|
e@0
|
1899
|
e@0
|
1900 # print parameters
|
e@0
|
1901 n_classes = max(unique(parameters)) + 1
|
e@0
|
1902
|
e@0
|
1903 print "[II] Number of classes: ", n_classes
|
e@0
|
1904 hmms = [None]*n_classes
|
e@0
|
1905 chain_size = self.chain_size
|
e@0
|
1906 params = [None]*n_classes
|
e@0
|
1907 obs = [None]*n_classes
|
e@0
|
1908
|
e@0
|
1909 for i in range(chain_size, len(parameters)):
|
e@0
|
1910 class_ = parameters[i-1]
|
e@0
|
1911 params_ = parameters[i-chain_size:i]
|
e@0
|
1912 feats_ =features[i-chain_size:i,:]
|
e@0
|
1913
|
e@0
|
1914 if params[class_] is None:
|
e@0
|
1915 params[class_] = [params_]
|
e@0
|
1916 obs[class_] = [feats_]
|
e@0
|
1917 else:
|
e@0
|
1918 params[class_].append(params_)
|
e@0
|
1919 obs[class_].append(feats_)
|
e@0
|
1920
|
e@0
|
1921
|
e@0
|
1922
|
e@0
|
1923 for i in range(0, len(params)):
|
e@0
|
1924 if params[i] is not None and len(params[i]) != 0:
|
e@0
|
1925 hmm_ = HMMsvm(self.svm)
|
e@0
|
1926 print "[II] %d Fitting params: " % i, params[i]
|
e@0
|
1927 hmm_.fit(obs[i], params[i])
|
e@0
|
1928
|
e@0
|
1929 # TODO: Left here on 06/07 19:56
|
e@0
|
1930
|
e@0
|
1931 # hmm_.fit(features,)
|
e@0
|
1932 # print "obs: ", obs[i]
|
e@0
|
1933 # print "params: ", params[i]
|
e@0
|
1934 # hmm_.fit(obs[i], params[i],flr=1e-9)
|
e@0
|
1935 hmms[i] = hmm_
|
e@0
|
1936
|
e@0
|
1937 self.hmms = hmms
|
e@0
|
1938
|
e@0
|
1939 return obs
|
e@0
|
1940 def predict(self, features, mfilt=20):
|
e@0
|
1941 chain_size = self.chain_size
|
e@0
|
1942 hmms = self.hmms
|
e@0
|
1943 predicted_classes = zeros((features.shape[0],))
|
e@0
|
1944 for i in range(chain_size, features.shape[0]):
|
e@0
|
1945 scores = zeros((len(hmms),))
|
e@0
|
1946
|
e@0
|
1947 seq = features[i-chain_size:i, :]
|
e@0
|
1948
|
e@0
|
1949 for j in range(0, len(hmms)):
|
e@0
|
1950 if hmms[j] is not None:
|
e@0
|
1951 scores[j] = hmms[j]._log_likelihood(seq)
|
e@0
|
1952 else:
|
e@0
|
1953 scores[j] = -infty
|
e@0
|
1954
|
e@0
|
1955 predicted_classes[i] = argmax(scores)
|
e@0
|
1956
|
e@0
|
1957 # Do a median filter at the end
|
e@0
|
1958
|
e@0
|
1959 # predicted_classes = median_filter(predicted_classes,mfilt)
|
e@0
|
1960
|
e@0
|
1961 return predicted_classes
|
e@0
|
1962
|
e@0
|
1963
|
e@0
|
1964 class HMMsvm:
|
e@0
|
1965 def __init__(self, svm_):
|
e@0
|
1966 self.svm = svm_
|
e@0
|
1967
|
e@0
|
1968
|
e@0
|
1969 # self.svm = MyAVAClassifier()
|
e@0
|
1970
|
e@0
|
1971
|
e@0
|
1972 def fit(self, features_list, states, flr=1e-12):
|
e@0
|
1973
|
e@0
|
1974 # TODO: Concatenate features, train
|
e@0
|
1975 #self.svm.fit(X, states, flr)
|
e@0
|
1976 #self.prior = self.svm.prior
|
e@0
|
1977 #self.transmat = self.svm.transmat
|
e@0
|
1978
|
e@0
|
1979 features = None
|
e@0
|
1980
|
e@0
|
1981 for f in features_list:
|
e@0
|
1982 if features is None:
|
e@0
|
1983 features = f
|
e@0
|
1984 else:
|
e@0
|
1985 features = append(features, f, 0)
|
e@0
|
1986
|
e@0
|
1987 fullstates = None
|
e@0
|
1988 T = features.shape[0]
|
e@0
|
1989 for s in states:
|
e@0
|
1990 if fullstates is None:
|
e@0
|
1991 fullstates = s
|
e@0
|
1992 else:
|
e@0
|
1993 fullstates = append(fullstates, s)
|
e@0
|
1994
|
e@0
|
1995
|
e@0
|
1996
|
e@0
|
1997 self.n_classes = self.svm.n_classes
|
e@0
|
1998 n_classes = self.n_classes
|
e@0
|
1999
|
e@0
|
2000 print fullstates, shape(fullstates)
|
e@0
|
2001
|
e@0
|
2002 h = histogram(fullstates, n_classes)[0].astype(float)
|
e@0
|
2003 self.prior = h/sum(h)
|
e@0
|
2004
|
e@0
|
2005 # Add a very small probability for random jump
|
e@0
|
2006
|
e@0
|
2007 self.prior += flr
|
e@0
|
2008 self.prior = self.prior/sum(self.prior)
|
e@0
|
2009
|
e@0
|
2010 transmat = zeros((n_classes, n_classes))
|
e@0
|
2011 transitions = zeros((n_classes, ))
|
e@0
|
2012 for s in states:
|
e@0
|
2013 for i in range(1, len(s)):
|
e@0
|
2014 prev = s[i-1]
|
e@0
|
2015 cur = s[i]
|
e@0
|
2016 transmat[prev,cur] += 1
|
e@0
|
2017 transitions[prev] += 1
|
e@0
|
2018 # print "Adding one to", (prev,cur)
|
e@0
|
2019
|
e@0
|
2020 transitions[transitions == 0] = 1
|
e@0
|
2021
|
e@0
|
2022 for i in range(0, transmat.shape[0]):
|
e@0
|
2023 transmat[i,:] = transmat[i,:]/transitions[i]
|
e@0
|
2024
|
e@0
|
2025 self.transmat = transmat
|
e@0
|
2026
|
e@0
|
2027 # Add a very small probability for random jump to avoid zero values
|
e@0
|
2028
|
e@0
|
2029 self.transmat += flr
|
e@0
|
2030 self.transmat = self.transmat/sum(self.transmat)
|
e@0
|
2031
|
e@0
|
2032
|
e@0
|
2033 # Alphas and Betas
|
e@0
|
2034 X = features
|
e@0
|
2035 alphas = zeros((T,n_classes))
|
e@0
|
2036 betas = zeros((T,n_classes))
|
e@0
|
2037
|
e@0
|
2038 forward_messages = zeros((T,n_classes))
|
e@0
|
2039 backward_messages = zeros((T, n_classes))
|
e@0
|
2040
|
e@0
|
2041 print "[II] Computing alpha, beta..."
|
e@0
|
2042 for t in range(1, T+1):
|
e@0
|
2043 forward_messages[t-1,:] = self._forward_message(forward_messages[t-2,:],X[0:t,])
|
e@0
|
2044 backward_messages[-t,:] = self._backward_message(backward_messages[-t+1], X[-t:,:])
|
e@0
|
2045
|
e@0
|
2046
|
e@0
|
2047 print "[II] Computing ksu..."
|
e@0
|
2048
|
e@0
|
2049 a_norm = forward_messages[-1,n_classes-1] # alpha_T
|
e@0
|
2050
|
e@0
|
2051 ksu = zeros((T, n_classes, n_classes))
|
e@0
|
2052
|
e@0
|
2053 for i in range(0, n_classes):
|
e@0
|
2054 for j in range(0, n_classes):
|
e@0
|
2055 for t in range(0, T-1):
|
e@0
|
2056 ksu[t,i,j] = exp(forward_messages[t, i] + log(transmat[i,j]) + log(self.estimate_emission_probability(X[t,:],j)) + backward_messages[t+1,j] - a_norm)
|
e@0
|
2057
|
e@0
|
2058
|
e@0
|
2059
|
e@0
|
2060 self.alphas = forward_messages
|
e@0
|
2061 self.betas = backward_messages
|
e@0
|
2062 self.ksu = ksu
|
e@0
|
2063
|
e@0
|
2064 transmat_new = zeros((n_classes,n_classes))
|
e@0
|
2065
|
e@0
|
2066 for i in range(0, n_classes):
|
e@0
|
2067
|
e@0
|
2068 for j in range(0, n_classes):
|
e@0
|
2069 transmat_new[i,j] = sum(ksu[:,i,j])/sum(ksu[:,i,:])
|
e@0
|
2070
|
e@0
|
2071 self.transmat_new = transmat_new
|
e@0
|
2072
|
e@0
|
2073
|
e@0
|
2074
|
e@0
|
2075
|
e@0
|
2076
|
e@0
|
2077
|
e@0
|
2078
|
e@0
|
2079
|
e@0
|
2080
|
e@0
|
2081
|
e@0
|
2082 # print observations
|
e@0
|
2083 def estimate_emission_probability(self, x, q):
|
e@0
|
2084 post = self.svm.estimate_posterior_probability_vector(x)
|
e@0
|
2085 # print "post: ", post
|
e@0
|
2086 prior = self.prior
|
e@0
|
2087 # print "prior: ", prior
|
e@0
|
2088 p = post/prior
|
e@0
|
2089 p = p/sum(p)
|
e@0
|
2090
|
e@0
|
2091 return p[q]
|
e@0
|
2092
|
e@0
|
2093 # def estimate_emission_probability(self, x, q):
|
e@0
|
2094 # return self.svm.estimate_emission_probability(q, x)
|
e@0
|
2095
|
e@0
|
2096
|
e@0
|
2097 def logviterbi(self, X):
|
e@0
|
2098 # Number of states
|
e@0
|
2099
|
e@0
|
2100 N = self.n_classes
|
e@0
|
2101
|
e@0
|
2102 # Number of observations
|
e@0
|
2103
|
e@0
|
2104 T = X.shape[0]
|
e@0
|
2105
|
e@0
|
2106 transmat = self.transmat
|
e@0
|
2107
|
e@0
|
2108 #1. Initalization
|
e@0
|
2109
|
e@0
|
2110 a1 = self.prior
|
e@0
|
2111
|
e@0
|
2112 delta = zeros((N,))
|
e@0
|
2113 psi = zeros((N,))
|
e@0
|
2114
|
e@0
|
2115 for i in range(0, N):
|
e@0
|
2116 delta[i] = log(a1[i]) + log(self.estimate_emission_probability(X[0,:],i))
|
e@0
|
2117
|
e@0
|
2118
|
e@0
|
2119 #2. Recursion
|
e@0
|
2120
|
e@0
|
2121 for t in range(1, T):
|
e@0
|
2122 delta_old = delta.copy()
|
e@0
|
2123 x = X[t, :]
|
e@0
|
2124 for j in range(0, N):
|
e@0
|
2125 delta[j] = max(delta_old + log(transmat[i,j]))
|
e@0
|
2126 psi[j] = argmax(delta_old + log(transmat[i,j]))
|
e@0
|
2127
|
e@0
|
2128
|
e@0
|
2129 # 3. Termination
|
e@0
|
2130
|
e@0
|
2131 p_star = max(delta + log(transmat[:,N-1]))
|
e@0
|
2132 q_star = argmax(delta + log(transmat[:, N-1]))
|
e@0
|
2133
|
e@0
|
2134
|
e@0
|
2135 # 4. Backtracking
|
e@0
|
2136
|
e@0
|
2137 q = zeros((T,))
|
e@0
|
2138 q[-1] = q_star
|
e@0
|
2139
|
e@0
|
2140 for t in range(1, T):
|
e@0
|
2141 q[-t-1] = psi[q[-t]]
|
e@0
|
2142
|
e@0
|
2143 return q
|
e@0
|
2144
|
e@0
|
2145 def viterbi(self, X):
|
e@0
|
2146 # Number of states
|
e@0
|
2147
|
e@0
|
2148 N = self.n_classes
|
e@0
|
2149
|
e@0
|
2150 # Number of observations
|
e@0
|
2151
|
e@0
|
2152 T = X.shape[0]
|
e@0
|
2153
|
e@0
|
2154 transmat = self.transmat
|
e@0
|
2155
|
e@0
|
2156 #1. Initialization
|
e@0
|
2157
|
e@0
|
2158 a1 = self.prior
|
e@0
|
2159
|
e@0
|
2160 delta = zeros((N, ))
|
e@0
|
2161 psi = zeros((N, ))
|
e@0
|
2162
|
e@0
|
2163 for i in range(0, N):
|
e@0
|
2164 delta[i] = a1[i]*self.estimate_emission_probability(X[0,:],i)
|
e@0
|
2165
|
e@0
|
2166
|
e@0
|
2167
|
e@0
|
2168
|
e@0
|
2169
|
e@0
|
2170 #2. Recursion
|
e@0
|
2171
|
e@0
|
2172 for t in range(1, T):
|
e@0
|
2173 delta_old = delta.copy()
|
e@0
|
2174 x = X[t,:]
|
e@0
|
2175 for j in range(0, N):
|
e@0
|
2176 delta[j] = max(delta_old*transmat[:,j])*self.estimate_emission_probability(x, j)
|
e@0
|
2177 psi[j] = argmax(delta_old*transmat[:,j])
|
e@0
|
2178
|
e@0
|
2179 #3. Termination
|
e@0
|
2180
|
e@0
|
2181 p_star = max(delta*transmat[:,N-1])
|
e@0
|
2182 q_star = argmax(delta*transmat[:,N-1])
|
e@0
|
2183
|
e@0
|
2184
|
e@0
|
2185
|
e@0
|
2186 #4. Backtracking
|
e@0
|
2187
|
e@0
|
2188 q = zeros((T,))
|
e@0
|
2189 q[-1] = q_star
|
e@0
|
2190
|
e@0
|
2191 for t in range(1, T):
|
e@0
|
2192 q[-t-1] = psi[q[-t]]
|
e@0
|
2193
|
e@0
|
2194 return q
|
e@0
|
2195
|
e@0
|
2196
|
e@0
|
2197 def _log_likelihood_matrix(self, X):
|
e@0
|
2198 N = self.n_classes
|
e@0
|
2199 ll = zeros((X.shape[0], N))
|
e@0
|
2200
|
e@0
|
2201 for i in range(0, X.shape[0]):
|
e@0
|
2202 ll[i,:] = self._forward(i, X)
|
e@0
|
2203
|
e@0
|
2204 return ll
|
e@0
|
2205
|
e@0
|
2206 def compute_ksus(self, X):
|
e@0
|
2207 N = self.n_classes
|
e@0
|
2208 T = X.shape[0]
|
e@0
|
2209 print "[II] Computing gammas... "
|
e@0
|
2210
|
e@0
|
2211 gammas = self._forward_backward(X)
|
e@0
|
2212
|
e@0
|
2213 # Initialize
|
e@0
|
2214
|
e@0
|
2215 aij = self.transmat
|
e@0
|
2216
|
e@0
|
2217
|
e@0
|
2218
|
e@0
|
2219
|
e@0
|
2220
|
e@0
|
2221
|
e@0
|
2222 def _not_quite_ksu(self, t, i, j, X):
|
e@0
|
2223 alpha = exp(self.forward(X[0:t+1,:]))[i]
|
e@0
|
2224 beta = exp(self.backward(X[t+1:,:]))[j]
|
e@0
|
2225
|
e@0
|
2226 aij = self.transmat[i,j]
|
e@0
|
2227 bj = self.estimate_emission_probability(X[t+1,:], j)
|
e@0
|
2228
|
e@0
|
2229 return alpha*beta*aij*bj
|
e@0
|
2230
|
e@0
|
2231 def _ksu(self, t, i, j, X):
|
e@0
|
2232 alphaT = exp(self.forward(X[0:t+1,:]))[0]
|
e@0
|
2233
|
e@0
|
2234 return self._not_quite_ksu(t,i,j,X)
|
e@0
|
2235
|
e@0
|
2236
|
e@0
|
2237 def _forward_backward(self, X):
|
e@0
|
2238 T = X.shape[0]
|
e@0
|
2239 N = self.n_classes
|
e@0
|
2240 fv = zeros((T, N))
|
e@0
|
2241 sv = zeros((T, N))
|
e@0
|
2242
|
e@0
|
2243 b = None
|
e@0
|
2244 for t in range(1, T+1):
|
e@0
|
2245
|
e@0
|
2246 fv[t-1,:] = self._forward_message(fv[t-2,:], X[0:t, ])
|
e@0
|
2247
|
e@0
|
2248 for t in range(1, T+1):
|
e@0
|
2249 b = self._backward_message(b, X[-t: , :])
|
e@0
|
2250 sv[-t,:] = lognorm(fv[t-1,:]*b)
|
e@0
|
2251
|
e@0
|
2252 return sv
|
e@0
|
2253
|
e@0
|
2254
|
e@0
|
2255 def _backward(self, t, X):
|
e@0
|
2256 N = self.n_classes
|
e@0
|
2257 a = ones((N,))/N
|
e@0
|
2258
|
e@0
|
2259 beta = zeros((N, ))
|
e@0
|
2260 transmat = self.transmat
|
e@0
|
2261 T = X.shape[0]
|
e@0
|
2262 x = X[t, :]
|
e@0
|
2263 if t == T-1:
|
e@0
|
2264 beta = log(a)
|
e@0
|
2265
|
e@0
|
2266 return beta
|
e@0
|
2267 else:
|
e@0
|
2268 tosum = zeros((N, ))
|
e@0
|
2269 bt = self._backward(t+1, X)
|
e@0
|
2270 btnew = zeros((N, ))
|
e@0
|
2271 # print bt
|
e@0
|
2272 for j in range(0, N):
|
e@0
|
2273 for i in range(0, N):
|
e@0
|
2274 # print log(self.estimate_emission_probability(x, j)), bt[i], log(transmat[i,j])
|
e@0
|
2275 tosum[i] = log(self.estimate_emission_probability(x, j)) + bt[i] + log(transmat[i,j])
|
e@0
|
2276 # print tosum
|
e@0
|
2277
|
e@0
|
2278 btnew[j] = logsumexp(tosum)
|
e@0
|
2279 btnew = lognorm(btnew)
|
e@0
|
2280 return btnew
|
e@0
|
2281
|
e@0
|
2282
|
e@0
|
2283
|
e@0
|
2284
|
e@0
|
2285 def forward(self, X):
|
e@0
|
2286 T = X.shape[0]
|
e@0
|
2287 f = None
|
e@0
|
2288 for t in range(1, T+1):
|
e@0
|
2289 f = self._forward_message(f, X[0:t, :])
|
e@0
|
2290
|
e@0
|
2291 return f
|
e@0
|
2292
|
e@0
|
2293 def backward(self, X):
|
e@0
|
2294 T = X.shape[0]
|
e@0
|
2295 b = None
|
e@0
|
2296 for t in range(1,T+1):
|
e@0
|
2297 # print t
|
e@0
|
2298 # print t,b
|
e@0
|
2299 idx = arange(-t,T)
|
e@0
|
2300 # print idx
|
e@0
|
2301 b = self._backward_message(b, X[-t:, :])
|
e@0
|
2302
|
e@0
|
2303 return b
|
e@0
|
2304
|
e@0
|
2305 def _backward_message(self, b, X):
|
e@0
|
2306 N = self.n_classes
|
e@0
|
2307
|
e@0
|
2308
|
e@0
|
2309 beta = zeros((N, ))
|
e@0
|
2310 transmat = self.transmat
|
e@0
|
2311
|
e@0
|
2312 x = X[0, :]
|
e@0
|
2313
|
e@0
|
2314 if X.shape[0] == 1:
|
e@0
|
2315 beta = log(ones((N,))/N)
|
e@0
|
2316 return beta
|
e@0
|
2317 else:
|
e@0
|
2318 tosum = zeros((N, ))
|
e@0
|
2319 btnew = zeros((N, ))
|
e@0
|
2320 for j in range(0, N):
|
e@0
|
2321 for i in range(0, N):
|
e@0
|
2322 tosum[i] = log(self.estimate_emission_probability(x,j)) + b[i] + log(transmat[i,j])
|
e@0
|
2323
|
e@0
|
2324 btnew[j] = logsumexp(tosum)
|
e@0
|
2325 btnew = lognorm(btnew)
|
e@0
|
2326 return btnew
|
e@0
|
2327
|
e@0
|
2328 def _forward_message(self, f, X):
|
e@0
|
2329 N = self.n_classes
|
e@0
|
2330 a = self.prior
|
e@0
|
2331
|
e@0
|
2332 alpha = zeros((N, ))
|
e@0
|
2333 transmat = self.transmat
|
e@0
|
2334
|
e@0
|
2335 x = X[-1, :]
|
e@0
|
2336
|
e@0
|
2337 if X.shape[0] == 1:
|
e@0
|
2338 for i in range(0, N):
|
e@0
|
2339 alpha[i] = log(a[i]) + log(self.estimate_emission_probability(x, i))
|
e@0
|
2340 alpha = lognorm(alpha)
|
e@0
|
2341 return alpha
|
e@0
|
2342
|
e@0
|
2343 else:
|
e@0
|
2344 tosum = zeros((N,))
|
e@0
|
2345 ftnew = zeros((N,))
|
e@0
|
2346 for j in range(0, N):
|
e@0
|
2347 for i in range(0, N):
|
e@0
|
2348 tosum[i] = f[i] + log(transmat[i,j])
|
e@0
|
2349 Sum = logsumexp(tosum)
|
e@0
|
2350 bj = self.estimate_emission_probability(x, j)
|
e@0
|
2351 ftnew[j] = Sum+log(bj)
|
e@0
|
2352
|
e@0
|
2353 ftnew = lognorm(ftnew)
|
e@0
|
2354 return ftnew
|
e@0
|
2355
|
e@0
|
2356 def _forward(self, t, X):
|
e@0
|
2357 N = self.n_classes
|
e@0
|
2358 a = self.prior
|
e@0
|
2359 # print a
|
e@0
|
2360 alpha = zeros((N, ))
|
e@0
|
2361 transmat = self.transmat
|
e@0
|
2362 x = X[t, :]
|
e@0
|
2363
|
e@0
|
2364 if t == 0:
|
e@0
|
2365 for i in range(0, N):
|
e@0
|
2366 alpha[i] = log(a[i]) + log(self.estimate_emission_probability(x, i))
|
e@0
|
2367 # print "--"
|
e@0
|
2368 # print alpha
|
e@0
|
2369 alpha = lognorm(alpha)
|
e@0
|
2370 # print alpha
|
e@0
|
2371 # print "xx"
|
e@0
|
2372 return alpha
|
e@0
|
2373 else:
|
e@0
|
2374 tosum = zeros((N, ))
|
e@0
|
2375 ft = self._forward(t-1, X)
|
e@0
|
2376 ftnew = zeros((N, ))
|
e@0
|
2377 for j in range(0, N):
|
e@0
|
2378 for i in range(0, N):
|
e@0
|
2379 # print ft
|
e@0
|
2380 tosum[i] = ft[i] + log(transmat[i,j])
|
e@0
|
2381
|
e@0
|
2382 Sum = logsumexp(tosum)
|
e@0
|
2383 bj = self.estimate_emission_probability(x, j)
|
e@0
|
2384 ftnew[j] = Sum+log(bj)
|
e@0
|
2385 ftnew = lognorm(ftnew)
|
e@0
|
2386
|
e@0
|
2387 return ftnew
|
e@0
|
2388
|
e@0
|
2389
|
e@0
|
2390
|
e@0
|
2391
|
e@0
|
2392
|
e@0
|
2393
|
e@0
|
2394
|
e@0
|
2395
|
e@0
|
2396
|
e@0
|
2397 #
|
e@0
|
2398 # def forward(self, X):
|
e@0
|
2399 # # Compute log likelihood using the forward algorithm
|
e@0
|
2400 #
|
e@0
|
2401 # # Number of states
|
e@0
|
2402 # N = self.n_classes
|
e@0
|
2403 #
|
e@0
|
2404 # # Number of Observations
|
e@0
|
2405 # T = X.shape[0]
|
e@0
|
2406 #
|
e@0
|
2407 #
|
e@0
|
2408 # transmat = self.transmat
|
e@0
|
2409 #
|
e@0
|
2410 # # Initialization
|
e@0
|
2411 # # a1 = ones((N,))/N
|
e@0
|
2412 # a1 = self.prior
|
e@0
|
2413 #
|
e@0
|
2414 # alpha = zeros((N,))
|
e@0
|
2415 # for i in range(0, N):
|
e@0
|
2416 # alpha[i] = log(a1[i])+log(self.estimate_emission_probability(X[0,:], i))
|
e@0
|
2417 # # print alpha
|
e@0
|
2418 # # Recursion
|
e@0
|
2419 # for t in range(0, T):
|
e@0
|
2420 # alpha_old = alpha.copy()
|
e@0
|
2421 # x = X[t, :]
|
e@0
|
2422 # for j in range(0, N):
|
e@0
|
2423 # tosum = zeros((N,))
|
e@0
|
2424 # for i in range(0, N):
|
e@0
|
2425 # tosum[i] = alpha_old[i]+log(transmat[i,j])
|
e@0
|
2426 #
|
e@0
|
2427 # # Sum = logsum(tosum)
|
e@0
|
2428 # Sum = logsumexp(tosum)
|
e@0
|
2429 # bj = self.estimate_emission_probability(x, j)
|
e@0
|
2430 #
|
e@0
|
2431 # alpha[j] = Sum+log(bj)
|
e@0
|
2432 # # print alpha
|
e@0
|
2433 #
|
e@0
|
2434 # tosum = zeros((N,))
|
e@0
|
2435 # for i in range(0, N):
|
e@0
|
2436 # tosum[i] = alpha[i] + log(transmat[i,N-1])
|
e@0
|
2437 #
|
e@0
|
2438 # return tosum
|
e@0
|
2439 #
|
e@0
|
2440 # def backward(self, X):
|
e@0
|
2441 # # Number of states
|
e@0
|
2442 # N = self.n_classes
|
e@0
|
2443 #
|
e@0
|
2444 # # Number of Observations
|
e@0
|
2445 # T = X.shape[0]
|
e@0
|
2446 #
|
e@0
|
2447 # transmat = self.transmat
|
e@0
|
2448 #
|
e@0
|
2449 # # Initialization
|
e@0
|
2450 #
|
e@0
|
2451 # b1 = ones((N,))/N
|
e@0
|
2452 #
|
e@0
|
2453 # beta = zeros((N,))
|
e@0
|
2454 # for i in range(0, N):
|
e@0
|
2455 # beta[i] = log(b1[i])+log(self.estimate_emission_probability(X[-1,:], i))
|
e@0
|
2456 #
|
e@0
|
2457 # for t in range(0, T):
|
e@0
|
2458 # beta_old = beta.copy()
|
e@0
|
2459 # x = X[-t, :]
|
e@0
|
2460 # for j in range(0, N):
|
e@0
|
2461 # tosum = zeros((N, ))
|
e@0
|
2462 # for i in range(0, N):
|
e@0
|
2463 # tosum[i] = beta_old[i]+log(transmat[i,j])
|
e@0
|
2464 #
|
e@0
|
2465 # Sum = logsumexp(tosum)
|
e@0
|
2466 # bj = self.estimate_emission_probability(x, j)
|
e@0
|
2467 # beta[j] = Sum+log(bj)
|
e@0
|
2468 #
|
e@0
|
2469 # tosum = zeros((N,))
|
e@0
|
2470 #
|
e@0
|
2471 # for i in range(0, N):
|
e@0
|
2472 # tosum[i] = beta[i] + log(transmat[i,0])
|
e@0
|
2473 #
|
e@0
|
2474 # #3 p = logsumexp(tosum)
|
e@0
|
2475 #
|
e@0
|
2476 # return tosum
|
e@0
|
2477
|
e@0
|
2478
|
e@0
|
2479 def _log_likelihood(self, X):
|
e@0
|
2480
|
e@0
|
2481 return logsumexp(self.forward(X))
|
e@0
|
2482
|
e@0
|
2483
|
e@0
|
2484
|
e@0
|
2485
|
e@0
|
2486
|
e@0
|
2487 def _likelihood(self, X):
|
e@0
|
2488 # Number of states
|
e@0
|
2489 N = self.n_classes
|
e@0
|
2490
|
e@0
|
2491 # Number of Observations
|
e@0
|
2492 T = X.shape[0]
|
e@0
|
2493
|
e@0
|
2494
|
e@0
|
2495 transmat = self.transmat
|
e@0
|
2496
|
e@0
|
2497 # Initialization
|
e@0
|
2498 # a1 = ones((N,))/N
|
e@0
|
2499 a1 = self.prior
|
e@0
|
2500
|
e@0
|
2501 alpha = zeros((N,))
|
e@0
|
2502 for i in range(0, N):
|
e@0
|
2503 alpha[i] = a1[i]*self.estimate_emission_probability(X[0,:], i)
|
e@0
|
2504
|
e@0
|
2505 # Recursion
|
e@0
|
2506 print alpha
|
e@0
|
2507 for t in range(1, T):
|
e@0
|
2508 alpha_old = alpha.copy()
|
e@0
|
2509 x = X[t, :]
|
e@0
|
2510 for j in range(0, N):
|
e@0
|
2511 Sum = 0
|
e@0
|
2512 for i in range(0, N):
|
e@0
|
2513 Sum += alpha_old[i]*transmat[i,j]
|
e@0
|
2514
|
e@0
|
2515 alpha[j] = Sum*self.estimate_emission_probability(x, j)
|
e@0
|
2516 print alpha
|
e@0
|
2517
|
e@0
|
2518
|
e@0
|
2519 # Termination
|
e@0
|
2520
|
e@0
|
2521 Sum = 0
|
e@0
|
2522 for i in range(0, N):
|
e@0
|
2523 Sum += alpha[i]*transmat[i, N-1]
|
e@0
|
2524
|
e@0
|
2525 p = Sum
|
e@0
|
2526
|
e@0
|
2527 return p
|
e@0
|
2528
|
e@0
|
2529
|
e@0
|
2530
|
e@0
|
2531
|
e@0
|
2532
|
e@0
|
2533
|
e@0
|
2534
|
e@0
|
2535 # def fit(self, X, states):
|
e@0
|
2536 # # print parameters
|
e@0
|
2537 # n_classes = max(unique(states)) + 1
|
e@0
|
2538 #
|
e@0
|
2539 # # svms = [None]*n_classes
|
e@0
|
2540 # obs = [None]*n_classes
|
e@0
|
2541 # sta = [None]*n_classes
|
e@0
|
2542 # chain_size = self.chain_size
|
e@0
|
2543 #
|
e@0
|
2544 # #22 obs = None
|
e@0
|
2545 # # sta = None
|
e@0
|
2546 #
|
e@0
|
2547 # print "[II] Number of classes: ", n_classes
|
e@0
|
2548 #
|
e@0
|
2549 # # For each class:
|
e@0
|
2550 # # Concatenate examples
|
e@0
|
2551 # # Fit SVM
|
e@0
|
2552 #
|
e@0
|
2553 # for i in range(chain_size, len(states)):
|
e@0
|
2554 # class_ = states[i-1]
|
e@0
|
2555 # seq = X[i-chain_size:i, :]
|
e@0
|
2556 # states_ = states[i-chain_size:i]
|
e@0
|
2557 #
|
e@0
|
2558 #
|
e@0
|
2559 # if obs[class_] is None:
|
e@0
|
2560 # obs[class_] = [seq]
|
e@0
|
2561 # sta[class_] = [states_]
|
e@0
|
2562 # self.svms.append(MyAVAClassifier())
|
e@0
|
2563 # else:
|
e@0
|
2564 # obs[class_].append(seq)
|
e@0
|
2565 # sta[class_].append(states_)
|
e@0
|
2566 #
|
e@0
|
2567 #
|
e@0
|
2568 # for i in range(0, len(obs)):
|
e@0
|
2569 #
|
e@0
|
2570 # o = None
|
e@0
|
2571 # s = None
|
e@0
|
2572 #
|
e@0
|
2573 # for j in range(0, len(obs[i])):
|
e@0
|
2574 # if o is None:
|
e@0
|
2575 # o = obs[i][j]
|
e@0
|
2576 # s = sta[i][j]
|
e@0
|
2577 #
|
e@0
|
2578 # else:
|
e@0
|
2579 # o = append(o, obs[i][j],0)
|
e@0
|
2580 # s = append(s, sta[i][j])
|
e@0
|
2581 #
|
e@0
|
2582 #
|
e@0
|
2583 # self.svms[i].fit(o, s)
|
e@0
|
2584
|
e@0
|
2585 # def predict(self, features):
|
e@0
|
2586 # chain_size = self.chain_size
|
e@0
|
2587 # svms = self.svms
|
e@0
|
2588 #
|
e@0
|
2589 # predicted_classes = zeros((features.shape[0],))
|
e@0
|
2590 # for i in range(chain_size, features.shape[0]):
|
e@0
|
2591 # scores = zeros((len(svms)))
|
e@0
|
2592 #
|
e@0
|
2593 # seq = features[i-chain_size:i, :]
|
e@0
|
2594 #
|
e@0
|
2595 # for j in range(0, len(svms)):
|
e@0
|
2596 # if svms[j] is not None:
|
e@0
|
2597 # scores[j] = svms[j].compute_log_likelihood(seq)
|
e@0
|
2598 # else:
|
e@0
|
2599 # scores[k] = -infty
|
e@0
|
2600 # predicted_classes[i] = argmax(scores)
|
e@0
|
2601 #
|
e@0
|
2602 # return predicted_classes
|
e@0
|
2603
|
e@0
|
2604
|
e@0
|
2605
|
e@0
|
2606
|
e@0
|
2607
|
e@0
|
2608 # Marginalize over the latent variable
|
e@0
|
2609 # for i in range(0, X.shape[0]):
|
e@0
|
2610 # P = zeros((self.n_classes,))
|
e@0
|
2611 # x = X[i,:]
|
e@0
|
2612 # for j in range(0, len(P)):
|
e@0
|
2613 # P[j] = self.estimate_emission_probability(j, x)
|
e@0
|
2614 #
|
e@0
|
2615 # S[i] = log(sum(P[j]))
|
e@0
|
2616 #
|
e@0
|
2617 # return sum(S)
|
e@0
|
2618
|
e@0
|
2619
|
e@0
|
2620
|
e@0
|
2621
|
e@0
|
2622 # Continue from here
|
e@0
|
2623 X = features_vector_upsampled.T
|
e@0
|
2624 y = parameters_state
|
e@0
|
2625
|
e@0
|
2626 clf = svm.SVC()
|
e@0
|
2627 clf.fit(X,y)
|
e@0
|
2628
|
e@0
|
2629
|
e@0
|
2630 parameters_state_y = clf.predict(X)
|
e@0
|
2631
|
e@0
|
2632 predhmmc3 = hmmc3.predict(features_vector_upsampled.T)
|
e@0
|
2633
|
e@0
|
2634 myava = MyAVAClassifier()
|
e@0
|
2635 myava.fit(features_vector_upsampled.T, parameters_state)
|
e@0
|
2636
|
e@0
|
2637 predmyava = myava.predict(features_vector_upsampled.T)
|
e@0
|
2638
|
e@0
|
2639 # hmmsvmc = HMMsvmClassifier(N=2)
|
e@0
|
2640 # hmmsvmc.fit(features_vector_upsampled.T, parameters_state)
|
e@0
|
2641 #predhmmsvmc = hmmsvmc.predict(features_vector_upsampled.T)
|
e@0
|
2642
|
e@0
|
2643 #plot(parameters)
|
e@0
|
2644 figure()
|
e@0
|
2645 plot(parameters_state,'b')
|
e@0
|
2646
|
e@0
|
2647 plot(parameters_state_y,'r--')
|
e@0
|
2648 plot(predhmmc3,'g+-')
|
e@0
|
2649
|
e@0
|
2650 # plot(predhmmsvmc, 'bo-')
|
e@0
|
2651
|
e@0
|
2652 legend(['Real', 'SVM', 'HMM3','HMM-SVM'])
|
e@0
|
2653
|
e@0
|
2654 # TODO, HMM with SVN, Cross-validation
|
e@0
|
2655
|
e@0
|
2656 # Using hidden markov svm
|
e@0
|
2657
|
e@0
|
2658 svmhmm = ""
|
e@0
|
2659
|
e@0
|
2660 print "[II] Success ratio for SVN: %.2f" % (sum(parameters_state==parameters_state_y).astype(float)/float(len(parameters_state)))
|
e@0
|
2661 print "[II] Success ratio for HMM: %.2f" % (sum(parameters_state==predhmmc3).astype(float)/float(len(predhmmc3)) )
|
e@0
|
2662
|
e@0
|
2663
|
e@0
|
2664
|
e@0
|
2665 print "[II] All-vs-All custom Support Vector Classifier Success Ratio: %.2f " %(sum(predmyava==parameters_state).astype(float)/len(parameters_state))
|
e@0
|
2666
|
e@0
|
2667
|
e@0
|
2668 model_ghmm = ReverbModel("GaussianHMM Classifier Model", kernel, q, features_to_keep, hmmc3, parameter_dictionary=parameter_state_alphabet_inv, moments_vector=moments_vector)
|
e@0
|
2669 model_gnb = ReverbModel("Naive Bayes Classifier Model", kernel, q, features_to_keep, nbc, parameter_dictionary=parameter_state_alphabet_inv, moments_vector=moments_vector )
|
e@0
|
2670 model_svm = ReverbModel("AVA LinearSVM Classifier Model", kernel, q, features_to_keep, myava, parameter_dictionary=parameter_state_alphabet_inv, moments_vector=moments_vector )
|
e@0
|
2671
|
e@0
|
2672 model_ghmm.save("model_ghmm.dat")
|
e@0
|
2673 model_gnb.save("model_gnb.dat")
|
e@0
|
2674 model_svm.save("model_svm.dat")
|
e@0
|
2675
|
e@0
|
2676
|
e@0
|
2677
|
e@0
|
2678 # HMM classifier with SVN emissions
|
e@0
|
2679
|
e@0
|
2680 # print "[II] All-vs-All custom HMM-SVM classifier Success Ratio: %.2f " % (sum(predhmmsvmc==parameters_state).astype(float)/len(parameters_state))
|
e@0
|
2681 #
|
e@0
|
2682
|
e@0
|
2683 # obsc = MyAVAClassifier()
|
e@0
|
2684 # obsc.fit(features_vector_upsampled.T, parameters_state)
|
e@0
|
2685
|
e@0
|
2686 # hmm2 = HMMsvm(obsc)
|
e@0
|
2687 # hmm2.fit([features_vector_upsampled.T], [parameters_state])
|
e@0
|
2688
|
e@0
|
2689 # hmmsvmc = HMMsvmClassifier()
|
e@0
|
2690 # hmmsvmc.fit(features_vector_upsampled.T, parameters_state)
|
e@0
|
2691 # svmhmm = HMMSVMClassifier(N=6, n_components=max(unique(parameters_state)))
|
e@0
|
2692 # svmhmm.fit(features_vector_upsampled.T, parameters_state)
|
e@0
|
2693 # pred_svmhmm = svmhmm.predict(features_vector_upsampled.T
|
e@0
|
2694
|
e@0
|
2695 # )
|
e@0
|
2696 # def predict(self, features, fname_testing = "testingdat.dat', fname_model = "training.model", fname_tags = "classes.tags"):
|
e@0
|
2697 # import subprocess
|
e@0
|
2698
|
e@0
|
2699
|
e@0
|
2700
|
e@0
|
2701 #
|
e@0
|
2702 # subprocess.call(["svmhmm/svm_hmm_classify", 'trainingdat.dat', 'training.model', 'classes.tags'])
|
e@0
|
2703
|
e@0
|
2704
|
e@0
|
2705
|
e@0
|
2706 # hmsvmc = HmSVMClassifier()
|
e@0
|
2707
|
e@0
|
2708 # print hmsvmc.fit(features_vector_upsampled.T, parameters_state)
|
e@0
|
2709
|
e@0
|
2710
|
e@0
|
2711
|
e@0
|
2712 #plot(mse(states_to_vector(predhmmc3,parameter_state_alphabet_inv).T,
|
e@0
|
2713 #states_to_vector(parameters_state,parameter_state_alphabet_inv).T))
|
e@0
|
2714
|
e@0
|
2715
|
e@0
|
2716
|
e@0
|
2717 #
|
e@0
|
2718 # print "[II] Classifying using svmhmm..."
|
e@0
|
2719 #
|
e@0
|
2720 #
|
e@0
|
2721 # for l in range(1, len(parameters_state)+1):
|
e@0
|
2722 # svmhmm += "%d qid:1.%d " % (parameters_state[l-1], l)
|
e@0
|
2723 # for f in range(1, features_vector_upsampled.shape[0]+1):
|
e@0
|
2724 # svmhmm += "%d:%.4f " % (f, features_vector_upsampled[f-1,l-1])
|
e@0
|
2725 #
|
e@0
|
2726 # svmhmm += "#training \n"
|
e@0
|
2727 #
|
e@0
|
2728 #
|
e@0
|
2729 #
|
e@0
|
2730 #
|
e@0
|
2731 # fileout = open("trainingdat.dat", "w")
|
e@0
|
2732 #
|
e@0
|
2733 # fileout.write(svmhmm)
|
e@0
|
2734 # fileout.close()
|
e@0
|
2735 #
|
e@0
|
2736 # import subprocess
|
e@0
|
2737 #
|
e@0
|
2738 # C = len(parameters_state)/3
|
e@0
|
2739 # # C = 100
|
e@0
|
2740 # #subprocess.call("svmhmm/svm_hmm_learn -c %d -e 0.5 trainingdat.dat training.model" % len(parameters_state))
|
e@0
|
2741 # subprocess.call(["svmhmm/svm_hmm_learn",'-c', '%d' % C, '-e', '0.7', 'trainingdat.dat', 'training.model'])
|
e@0
|
2742 #
|
e@0
|
2743 #
|
e@0
|
2744 #
|
e@0
|
2745 # subprocess.call(["svmhmm/svm_hmm_classify", 'trainingdat.dat', 'training.model', 'classes.tags'])
|
e@0
|
2746 #
|
e@0
|
2747 # f = open('classes.tags')
|
e@0
|
2748 # s = f.read()
|
e@0
|
2749 # s = s[2:-2]
|
e@0
|
2750 # parameters_state_y2 = [int(d) for d in s if d!=' ']
|
e@0
|
2751 # f.close()
|
e@0
|
2752 #
|
e@0
|
2753 # plot(parameters_state_y2)
|
e@0
|
2754 #
|
e@0
|
2755 # classif_ratio_svm = 1 - float(sum(parameters_state != parameters_state_y))/len(parameters_state)
|
e@0
|
2756 # classif_ratio_svmhmm = 1- float(sum(parameters_state != parameters_state_y2))/len(parameters_state)
|
e@0
|
2757 #
|
e@0
|
2758 # print "[II] SVM classification ratio: %.2f" % classif_ratio_svm
|
e@0
|
2759 # print "[II] SVM HMM classification ratio: %.2f" % classif_ratio_svmhmm |