e@0
|
1 # -*- coding: utf-8 -*-
|
e@0
|
2 """
|
e@0
|
3 Created on Tue Jul 7 11:21:52 2015
|
e@0
|
4
|
e@0
|
5 @author: Emmanouil Theofanis Chourdakis
|
e@0
|
6 """
|
e@0
|
7
|
e@0
|
8 from sklearn.base import BaseEstimator
|
e@0
|
9
|
e@0
|
10 import pickle
|
e@0
|
11 from numpy import *
|
e@0
|
12
|
e@0
|
13
|
e@0
|
14 class SinkHoleClassifier(BaseEstimator):
|
e@0
|
15 def __init__(self,name="SinkholeClassifier", N=2, n_components=1):
|
e@0
|
16 self.classifierNB = NBClassifier()
|
e@0
|
17 # self.classifierSVM = MyAVAClassifier()
|
e@0
|
18 self.classifierSVM = LinearSVC(dual=False)
|
e@0
|
19 self.classifierHMM = HmmClassifier(N=N, n_components=n_components)
|
e@0
|
20 self.classifierHMMSVM = HMMsvmClassifier(N=N)
|
e@0
|
21 self.name = name
|
e@0
|
22
|
e@0
|
23 def score(self, features, parameters):
|
e@0
|
24 predicted_states = self.predict(features)
|
e@0
|
25 return accuracy_score(parameters, predicted_states)
|
e@0
|
26
|
e@0
|
27 def getName(self):
|
e@0
|
28 return self.name
|
e@0
|
29
|
e@0
|
30 def get_params(self, deep=True):
|
e@0
|
31 return {"N":self.chain_size}
|
e@0
|
32
|
e@0
|
33 def set_params(self, **parameters):
|
e@0
|
34 for parameter, value in parameters.items():
|
e@0
|
35 self.setattr(parameter, value)
|
e@0
|
36 return self
|
e@0
|
37
|
e@0
|
38 def fit(self, X, y):
|
e@0
|
39 self.n_classes = max(unique(y))+1
|
e@0
|
40
|
e@0
|
41 self.classifierNB.fit(X,y)
|
e@0
|
42 self.classifierSVM.fit(X,y)
|
e@0
|
43 self.classifierHMM.fit(X,y)
|
e@0
|
44 self.classifierHMMSVM.fit(X,y)
|
e@0
|
45
|
e@0
|
46
|
e@0
|
47 def predict(self, X):
|
e@0
|
48 predictedNB = self.classifierNB.predict(X)
|
e@0
|
49 predictedSVM = self.classifierSVM.predict(X)
|
e@0
|
50 predictedHMM = self.classifierHMM.predict(X)
|
e@0
|
51 predictedHMMSVM = self.classifierHMMSVM.predict(X)
|
e@0
|
52
|
e@0
|
53
|
e@0
|
54
|
e@0
|
55
|
e@0
|
56 predicted = zeros((X.shape[0], ))
|
e@0
|
57
|
e@0
|
58 for i in range(0, len(predicted)):
|
e@0
|
59 candidates = [predictedNB[i], predictedSVM[i], predictedHMM[i], predictedHMMSVM[i], predictedLogReg[i]]
|
e@0
|
60
|
e@0
|
61 c = Counter(candidates)
|
e@0
|
62
|
e@0
|
63 most_common = c.most_common()
|
e@0
|
64
|
e@0
|
65 # If there is an equal voting, select something at random
|
e@0
|
66 if len(unique([k[1] for k in most_common])) == 1:
|
e@0
|
67 predicted[i] = most_common[randint(len(most_common))][0]
|
e@0
|
68 else:
|
e@0
|
69 predicted[i] = most_common[0][0]
|
e@0
|
70
|
e@0
|
71 return predicted
|
e@0
|
72 class MyAVAClassifier:
|
e@0
|
73
|
e@0
|
74 def __init__(self):
|
e@0
|
75 self.classifiers = {}
|
e@0
|
76 self.name = "Linear SVM Classifier"
|
e@0
|
77 self.smallname = "svc-ava"
|
e@0
|
78
|
e@0
|
79
|
e@0
|
80 def getName(self):
|
e@0
|
81 return self.name
|
e@0
|
82 def fit(self, X, y, flr = 0, C=0.7):
|
e@0
|
83
|
e@0
|
84 n_classes = max(unique(y)) + 1
|
e@0
|
85
|
e@0
|
86 if len(unique(y)) == 1:
|
e@0
|
87 self.only_one_class = True
|
e@0
|
88 self.n_classes = 1
|
e@0
|
89 self.one_class_label = y[0]
|
e@0
|
90 return
|
e@0
|
91 elif len(unique(y)) == 2:
|
e@0
|
92
|
e@0
|
93 self.n_classes = n_classes
|
e@0
|
94 self.svm = svm.SVC(decision_function_shape='ovr',degree=2,probability = True, kernel='poly', gamma=2, C = C)
|
e@0
|
95 self.svm.fit(X,y)
|
e@0
|
96 classes_ = unique(y)
|
e@0
|
97 self.classifiers[(classes_[0], classes_[1])] = self.svm
|
e@0
|
98 self.only_two_classes = True
|
e@0
|
99 self.only_one_class = False
|
e@0
|
100
|
e@0
|
101 return
|
e@0
|
102 else:
|
e@0
|
103 self.only_two_classes = False
|
e@0
|
104 self.only_one_class = False
|
e@0
|
105
|
e@0
|
106
|
e@0
|
107 classes = arange(0, n_classes)
|
e@0
|
108 self.n_classes = n_classes
|
e@0
|
109
|
e@0
|
110 h = histogram(y, n_classes)[0].astype(float)
|
e@0
|
111 self.prior = h/sum(h)
|
e@0
|
112
|
e@0
|
113 transmat = zeros((n_classes, n_classes))
|
e@0
|
114
|
e@0
|
115 for i in range(1, len(y)):
|
e@0
|
116 prev = y[i-1]
|
e@0
|
117 cur = y[i]
|
e@0
|
118 transmat[prev,cur] += 1
|
e@0
|
119
|
e@0
|
120 transmat = transmat/sum(transmat)
|
e@0
|
121
|
e@0
|
122 self.transmat = transmat
|
e@0
|
123
|
e@0
|
124 # Add a very small probability for random jump to avoid zero values
|
e@0
|
125
|
e@0
|
126 self.transmat += flr
|
e@0
|
127 self.transmat = self.transmat/sum(self.transmat)
|
e@0
|
128
|
e@0
|
129 for i in range(0, n_classes):
|
e@0
|
130 for j in range(0, n_classes):
|
e@0
|
131 if i != j and (i,j) not in self.classifiers and (j,i) not in self.classifiers:
|
e@0
|
132
|
e@0
|
133 idx_ = bitwise_or(y == classes[i], y == classes[j])
|
e@0
|
134
|
e@0
|
135 X_ = X[idx_, :]
|
e@0
|
136
|
e@0
|
137 y_ = y[idx_]
|
e@0
|
138
|
e@0
|
139 if len(unique(y_)) > 1:
|
e@0
|
140 svm_ = svm.SVC(probability = True, kernel='poly', gamma=2, C = C)
|
e@0
|
141
|
e@0
|
142 svm_.fit(X_, y_)
|
e@0
|
143 self.classifiers[(i,j)] = svm_
|
e@0
|
144
|
e@0
|
145
|
e@0
|
146 def estimate_pairwise_class_probability(self, i, j, x):
|
e@0
|
147
|
e@0
|
148
|
e@0
|
149 if (i,j) not in self.classifiers and (j,i) in self.classifiers:
|
e@0
|
150 return self.classifiers[(j,i)].predict_proba(x)[0,1]
|
e@0
|
151 elif (i,j) not in self.classifiers and (j,i) not in self.classifiers:
|
e@0
|
152 return 0.0
|
e@0
|
153 else:
|
e@0
|
154 return self.classifiers[(i,j)].predict_proba(x)[0,0]
|
e@0
|
155
|
e@0
|
156 def estimate_posterior_probability(self, i, x):
|
e@0
|
157 mus = zeros((self.n_classes,))
|
e@0
|
158 for j in range(0, self.n_classes):
|
e@0
|
159 if i != j:
|
e@0
|
160 pcp = self.estimate_pairwise_class_probability(i,j,x)
|
e@0
|
161 pcp += 1e-18
|
e@0
|
162 mus[j] = 1/pcp
|
e@0
|
163 S = sum(mus) - (self.n_classes - 2)
|
e@0
|
164 return 1/S
|
e@0
|
165
|
e@0
|
166 def estimate_posterior_probability_vector(self, x):
|
e@0
|
167 posterior = zeros((self.n_classes,))
|
e@0
|
168 for i in range(0, len(posterior)):
|
e@0
|
169 posterior[i] = self.estimate_posterior_probability(i, x)
|
e@0
|
170
|
e@0
|
171 return posterior
|
e@0
|
172
|
e@0
|
173
|
e@0
|
174 def predict(self, X):
|
e@0
|
175 predicted = zeros((X.shape[0],))
|
e@0
|
176
|
e@0
|
177 if self.only_one_class == True:
|
e@0
|
178 return ones((X.shape[0],))*self.one_class_label
|
e@0
|
179 elif self.only_two_classes == True:
|
e@0
|
180 return self.svm.predict(X)
|
e@0
|
181
|
e@0
|
182
|
e@0
|
183 for i in range(0, X.shape[0]):
|
e@0
|
184 x = X[i,:]
|
e@0
|
185 P = zeros((self.n_classes,))
|
e@0
|
186
|
e@0
|
187
|
e@0
|
188 for c in range(0, len(P)):
|
e@0
|
189 P[c] = self.estimate_posterior_probability(c, x)
|
e@0
|
190
|
e@0
|
191 pred = argmax(P)
|
e@0
|
192 predicted[i] = pred
|
e@0
|
193
|
e@0
|
194 return predicted
|
e@0
|
195
|
e@0
|
196
|
e@0
|
197 class NBClassifier:
|
e@0
|
198 def __init__(self):
|
e@0
|
199 print "[II] Gaussian Naive Bayes Classifier"
|
e@0
|
200 self.name = "Naive Bayes"
|
e@0
|
201 self.smallname = "gnbc"
|
e@0
|
202 self.gnb = GaussianNB()
|
e@0
|
203
|
e@0
|
204 def getName(self):
|
e@0
|
205 return self.name
|
e@0
|
206
|
e@0
|
207 def score(self, features, parameters):
|
e@0
|
208 predicted_states = self.predict(features)
|
e@0
|
209 return accuracy_score(parameters, predicted_states)
|
e@0
|
210
|
e@0
|
211 def fit(self, X, states):
|
e@0
|
212 self.gnb.fit(X, states)
|
e@0
|
213
|
e@0
|
214 def predict(self, X):
|
e@0
|
215 return self.gnb.predict(X)
|
e@0
|
216
|
e@0
|
217
|
e@0
|
218 class HmmClassifier(BaseEstimator):
|
e@0
|
219 def __init__(self, N=2,n_components = 1):
|
e@0
|
220 self.name = "HMM (%d time steps, %d components)" % (N, n_components)
|
e@0
|
221 self.n_components = n_components
|
e@0
|
222 self.chain_size = N
|
e@0
|
223
|
e@0
|
224
|
e@0
|
225 def get_params(self, deep=True):
|
e@0
|
226 return {"N":self.chain_size, "n_components":self.n_components}
|
e@0
|
227
|
e@0
|
228 def set_params(self, **parameters):
|
e@0
|
229 for parameter, value in parameters.items():
|
e@0
|
230 self.setattr(parameter, value)
|
e@0
|
231 return self
|
e@0
|
232
|
e@0
|
233 def getName(self):
|
e@0
|
234 return self.name
|
e@0
|
235
|
e@0
|
236 def score(self, features, parameters):
|
e@0
|
237 predicted_states = self.predict(features)
|
e@0
|
238 return accuracy_score(parameters, predicted_states)
|
e@0
|
239
|
e@0
|
240 def fit(self, features, parameters):
|
e@0
|
241
|
e@0
|
242 n_classes = max(unique(parameters)) + 1
|
e@0
|
243
|
e@0
|
244 if n_classes == 1:
|
e@0
|
245 self.only_one_class = True
|
e@0
|
246 return
|
e@0
|
247 else:
|
e@0
|
248 self.only_one_class = False
|
e@0
|
249
|
e@0
|
250 hmms = [None]*n_classes
|
e@0
|
251
|
e@0
|
252 chain_size = self.chain_size
|
e@0
|
253 obs = [None]*n_classes
|
e@0
|
254
|
e@0
|
255 for i in range(chain_size, len(parameters)):
|
e@0
|
256 class_ = parameters[i]
|
e@0
|
257 seq = features[i-chain_size:i,:]
|
e@0
|
258
|
e@0
|
259
|
e@0
|
260 if obs[class_] is None:
|
e@0
|
261 obs[class_] = [seq]
|
e@0
|
262 else:
|
e@0
|
263 obs[class_].append(seq)
|
e@0
|
264
|
e@0
|
265
|
e@0
|
266
|
e@0
|
267 for i in range(0, len(obs)):
|
e@0
|
268
|
e@0
|
269 if obs[i] is not None and len(obs[i]) != 0:
|
e@0
|
270 hmm_ = hmm.GaussianHMM(n_components=self.n_components, covariance_type='diag')
|
e@0
|
271 obs_ = concatenate(obs[i])
|
e@0
|
272 hmm_.fit(obs_, [self.chain_size]*len(obs[i]))
|
e@0
|
273
|
e@0
|
274 hmms[i] = hmm_
|
e@0
|
275
|
e@0
|
276 self.hmms = hmms
|
e@0
|
277
|
e@0
|
278 return obs
|
e@0
|
279
|
e@0
|
280 def predict(self, features, mfilt=20):
|
e@0
|
281
|
e@0
|
282 if self.only_one_class == True:
|
e@0
|
283 return zeros((features.shape[0], ))
|
e@0
|
284
|
e@0
|
285 chain_size = self.chain_size
|
e@0
|
286 hmms = self.hmms
|
e@0
|
287 predicted_classes = zeros((features.shape[0],))
|
e@0
|
288
|
e@0
|
289
|
e@0
|
290 for i in range(chain_size, features.shape[0]):
|
e@0
|
291 scores = zeros((len(hmms),))
|
e@0
|
292
|
e@0
|
293 seq = features[i-chain_size:i, :]
|
e@0
|
294
|
e@0
|
295 for j in range(0, len(hmms)):
|
e@0
|
296 if hmms[j] is not None:
|
e@0
|
297 scores[j] = hmms[j].score(seq)
|
e@0
|
298 else:
|
e@0
|
299 scores[j] = -infty
|
e@0
|
300
|
e@0
|
301 predicted_classes[i] = argmax(scores)
|
e@0
|
302
|
e@0
|
303
|
e@0
|
304 return predicted_classes
|
e@0
|
305
|
e@0
|
306 class HMMsvmClassifier(BaseEstimator):
|
e@0
|
307 def __init__(self, N=2):
|
e@0
|
308 self.classifiers = {}
|
e@0
|
309 self.name = "HMM-SVM Classifier"
|
e@0
|
310 self.obs = MyAVAClassifier()
|
e@0
|
311 self.chain_size = N
|
e@0
|
312
|
e@0
|
313 def get_params(self, deep=True):
|
e@0
|
314 return {"N":self.chain_size}
|
e@0
|
315
|
e@0
|
316 def set_params(self, **parameters):
|
e@0
|
317 for parameter, value in parameters.items():
|
e@0
|
318 self.setattr(parameter, value)
|
e@0
|
319 return self
|
e@0
|
320
|
e@0
|
321 def getName(self):
|
e@0
|
322 return self.name
|
e@0
|
323
|
e@0
|
324 def score(self, features, parameters):
|
e@0
|
325 predicted_states = self.predict(features)
|
e@0
|
326 return accuracy_score(parameters, predicted_states)
|
e@0
|
327
|
e@0
|
328 def fit(self, X, y):
|
e@0
|
329 self.n_classes = max(unique(y))+1
|
e@0
|
330
|
e@0
|
331 self.obs.fit(X,y)
|
e@0
|
332 self.hmm = HMMsvm(self.obs)
|
e@0
|
333 self.hmm.fit([X],[y])
|
e@0
|
334
|
e@0
|
335 def predict(self, X):
|
e@0
|
336 return self.hmm.predict(X)
|
e@0
|
337
|
e@0
|
338 def confidence(self, x, q):
|
e@0
|
339 return self.hmm.estimate_emission_probability(x, q)
|
e@0
|
340 class ReverbModel:
|
e@0
|
341 """ Our Reverberation model, consists on the Principal Components Kernel,
|
e@0
|
342 the number of salient principal component dimensions, the list of salient
|
e@0
|
343 features and the classifier itself. """
|
e@0
|
344
|
e@0
|
345
|
e@0
|
346 def __init__(self, name=None, kernel=None, q=None, feature_list=None, classifier=None, parameter_dictionary=None, moments_vector=None, filename=None):
|
e@0
|
347
|
e@0
|
348 if filename is not None:
|
e@0
|
349 self.load(filename)
|
e@0
|
350 name = self.name
|
e@0
|
351 kernel = self.kernel
|
e@0
|
352 q = self.q
|
e@0
|
353 feature_list = self.feature_list
|
e@0
|
354 classifier = self.classifier
|
e@0
|
355 parameter_dictionary = self.parameter_dictionary
|
e@0
|
356 moments_vector = self.moments_vector
|
e@0
|
357 else:
|
e@0
|
358 if parameter_dictionary is None or name is None or kernel is None or q is None or feature_list is None or classifier is None:
|
e@0
|
359 raise Exception("Must supply name, kernel, q, feature_list and classifier or filename.")
|
e@0
|
360
|
e@0
|
361
|
e@0
|
362 print "[II] Initializing model `%s' with %dx%d PC kernel and features:" % (name,q,q)
|
e@0
|
363 print str(feature_list).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7]
|
e@0
|
364 # print "[II] Using classifier: %s" % classifier.name
|
e@0
|
365 print "[II] And parameters dictionary:", parameter_dictionary
|
e@0
|
366
|
e@0
|
367
|
e@0
|
368 self.name = name
|
e@0
|
369
|
e@0
|
370 self.kernel = kernel
|
e@0
|
371 self.q = q
|
e@0
|
372 self.feature_list = feature_list
|
e@0
|
373 self.classifier = classifier
|
e@0
|
374 self.parameter_dictionary = parameter_dictionary
|
e@0
|
375 self.moments_vector = moments_vector
|
e@0
|
376
|
e@0
|
377 def save(self, filename):
|
e@0
|
378 print "[II] Saving model to: `%s.'" % filename
|
e@0
|
379 f = open(filename, 'wb')
|
e@0
|
380 pickle.dump(self, f)
|
e@0
|
381 f.close()
|
e@0
|
382
|
e@0
|
383 def load(self, filename):
|
e@0
|
384 print "[II] Loading model from: `%s'." % filename
|
e@0
|
385 f = open(filename, 'rb')
|
e@0
|
386 new_model = pickle.load(f)
|
e@0
|
387 self.name = new_model.name
|
e@0
|
388 self.kernel = new_model.kernel
|
e@0
|
389 self.q = new_model.q
|
e@0
|
390 self.feature_list = new_model.feature_list
|
e@0
|
391 self.classifier = new_model.classifier
|
e@0
|
392 self.parameter_dictionary = new_model.parameter_dictionary
|
e@0
|
393 self.moments_vector = new_model.moments_vector
|
e@0
|
394
|
e@0
|
395 f.close()
|
e@0
|
396 |