Mercurial > hg > chourdakisreiss2016
comparison experiment-reverb/code/models.py @ 0:246d5546657c
initial commit, needs cleanup
author | Emmanouil Theofanis Chourdakis <e.t.chourdakis@qmul.ac.uk> |
---|---|
date | Wed, 14 Dec 2016 13:15:48 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:246d5546657c |
---|---|
1 # -*- coding: utf-8 -*- | |
2 """ | |
3 Created on Tue Jul 7 11:21:52 2015 | |
4 | |
5 @author: Emmanouil Theofanis Chourdakis | |
6 """ | |
7 | |
8 from sklearn.base import BaseEstimator | |
9 | |
10 import pickle | |
11 from numpy import * | |
12 | |
13 | |
14 class SinkHoleClassifier(BaseEstimator): | |
15 def __init__(self,name="SinkholeClassifier", N=2, n_components=1): | |
16 self.classifierNB = NBClassifier() | |
17 # self.classifierSVM = MyAVAClassifier() | |
18 self.classifierSVM = LinearSVC(dual=False) | |
19 self.classifierHMM = HmmClassifier(N=N, n_components=n_components) | |
20 self.classifierHMMSVM = HMMsvmClassifier(N=N) | |
21 self.name = name | |
22 | |
23 def score(self, features, parameters): | |
24 predicted_states = self.predict(features) | |
25 return accuracy_score(parameters, predicted_states) | |
26 | |
27 def getName(self): | |
28 return self.name | |
29 | |
30 def get_params(self, deep=True): | |
31 return {"N":self.chain_size} | |
32 | |
33 def set_params(self, **parameters): | |
34 for parameter, value in parameters.items(): | |
35 self.setattr(parameter, value) | |
36 return self | |
37 | |
38 def fit(self, X, y): | |
39 self.n_classes = max(unique(y))+1 | |
40 | |
41 self.classifierNB.fit(X,y) | |
42 self.classifierSVM.fit(X,y) | |
43 self.classifierHMM.fit(X,y) | |
44 self.classifierHMMSVM.fit(X,y) | |
45 | |
46 | |
47 def predict(self, X): | |
48 predictedNB = self.classifierNB.predict(X) | |
49 predictedSVM = self.classifierSVM.predict(X) | |
50 predictedHMM = self.classifierHMM.predict(X) | |
51 predictedHMMSVM = self.classifierHMMSVM.predict(X) | |
52 | |
53 | |
54 | |
55 | |
56 predicted = zeros((X.shape[0], )) | |
57 | |
58 for i in range(0, len(predicted)): | |
59 candidates = [predictedNB[i], predictedSVM[i], predictedHMM[i], predictedHMMSVM[i], predictedLogReg[i]] | |
60 | |
61 c = Counter(candidates) | |
62 | |
63 most_common = c.most_common() | |
64 | |
65 # If there is an equal voting, select something at random | |
66 if len(unique([k[1] for k in most_common])) == 1: | |
67 predicted[i] = most_common[randint(len(most_common))][0] | |
68 else: | |
69 predicted[i] = most_common[0][0] | |
70 | |
71 return predicted | |
72 class MyAVAClassifier: | |
73 | |
74 def __init__(self): | |
75 self.classifiers = {} | |
76 self.name = "Linear SVM Classifier" | |
77 self.smallname = "svc-ava" | |
78 | |
79 | |
80 def getName(self): | |
81 return self.name | |
82 def fit(self, X, y, flr = 0, C=0.7): | |
83 | |
84 n_classes = max(unique(y)) + 1 | |
85 | |
86 if len(unique(y)) == 1: | |
87 self.only_one_class = True | |
88 self.n_classes = 1 | |
89 self.one_class_label = y[0] | |
90 return | |
91 elif len(unique(y)) == 2: | |
92 | |
93 self.n_classes = n_classes | |
94 self.svm = svm.SVC(decision_function_shape='ovr',degree=2,probability = True, kernel='poly', gamma=2, C = C) | |
95 self.svm.fit(X,y) | |
96 classes_ = unique(y) | |
97 self.classifiers[(classes_[0], classes_[1])] = self.svm | |
98 self.only_two_classes = True | |
99 self.only_one_class = False | |
100 | |
101 return | |
102 else: | |
103 self.only_two_classes = False | |
104 self.only_one_class = False | |
105 | |
106 | |
107 classes = arange(0, n_classes) | |
108 self.n_classes = n_classes | |
109 | |
110 h = histogram(y, n_classes)[0].astype(float) | |
111 self.prior = h/sum(h) | |
112 | |
113 transmat = zeros((n_classes, n_classes)) | |
114 | |
115 for i in range(1, len(y)): | |
116 prev = y[i-1] | |
117 cur = y[i] | |
118 transmat[prev,cur] += 1 | |
119 | |
120 transmat = transmat/sum(transmat) | |
121 | |
122 self.transmat = transmat | |
123 | |
124 # Add a very small probability for random jump to avoid zero values | |
125 | |
126 self.transmat += flr | |
127 self.transmat = self.transmat/sum(self.transmat) | |
128 | |
129 for i in range(0, n_classes): | |
130 for j in range(0, n_classes): | |
131 if i != j and (i,j) not in self.classifiers and (j,i) not in self.classifiers: | |
132 | |
133 idx_ = bitwise_or(y == classes[i], y == classes[j]) | |
134 | |
135 X_ = X[idx_, :] | |
136 | |
137 y_ = y[idx_] | |
138 | |
139 if len(unique(y_)) > 1: | |
140 svm_ = svm.SVC(probability = True, kernel='poly', gamma=2, C = C) | |
141 | |
142 svm_.fit(X_, y_) | |
143 self.classifiers[(i,j)] = svm_ | |
144 | |
145 | |
146 def estimate_pairwise_class_probability(self, i, j, x): | |
147 | |
148 | |
149 if (i,j) not in self.classifiers and (j,i) in self.classifiers: | |
150 return self.classifiers[(j,i)].predict_proba(x)[0,1] | |
151 elif (i,j) not in self.classifiers and (j,i) not in self.classifiers: | |
152 return 0.0 | |
153 else: | |
154 return self.classifiers[(i,j)].predict_proba(x)[0,0] | |
155 | |
156 def estimate_posterior_probability(self, i, x): | |
157 mus = zeros((self.n_classes,)) | |
158 for j in range(0, self.n_classes): | |
159 if i != j: | |
160 pcp = self.estimate_pairwise_class_probability(i,j,x) | |
161 pcp += 1e-18 | |
162 mus[j] = 1/pcp | |
163 S = sum(mus) - (self.n_classes - 2) | |
164 return 1/S | |
165 | |
166 def estimate_posterior_probability_vector(self, x): | |
167 posterior = zeros((self.n_classes,)) | |
168 for i in range(0, len(posterior)): | |
169 posterior[i] = self.estimate_posterior_probability(i, x) | |
170 | |
171 return posterior | |
172 | |
173 | |
174 def predict(self, X): | |
175 predicted = zeros((X.shape[0],)) | |
176 | |
177 if self.only_one_class == True: | |
178 return ones((X.shape[0],))*self.one_class_label | |
179 elif self.only_two_classes == True: | |
180 return self.svm.predict(X) | |
181 | |
182 | |
183 for i in range(0, X.shape[0]): | |
184 x = X[i,:] | |
185 P = zeros((self.n_classes,)) | |
186 | |
187 | |
188 for c in range(0, len(P)): | |
189 P[c] = self.estimate_posterior_probability(c, x) | |
190 | |
191 pred = argmax(P) | |
192 predicted[i] = pred | |
193 | |
194 return predicted | |
195 | |
196 | |
197 class NBClassifier: | |
198 def __init__(self): | |
199 print "[II] Gaussian Naive Bayes Classifier" | |
200 self.name = "Naive Bayes" | |
201 self.smallname = "gnbc" | |
202 self.gnb = GaussianNB() | |
203 | |
204 def getName(self): | |
205 return self.name | |
206 | |
207 def score(self, features, parameters): | |
208 predicted_states = self.predict(features) | |
209 return accuracy_score(parameters, predicted_states) | |
210 | |
211 def fit(self, X, states): | |
212 self.gnb.fit(X, states) | |
213 | |
214 def predict(self, X): | |
215 return self.gnb.predict(X) | |
216 | |
217 | |
218 class HmmClassifier(BaseEstimator): | |
219 def __init__(self, N=2,n_components = 1): | |
220 self.name = "HMM (%d time steps, %d components)" % (N, n_components) | |
221 self.n_components = n_components | |
222 self.chain_size = N | |
223 | |
224 | |
225 def get_params(self, deep=True): | |
226 return {"N":self.chain_size, "n_components":self.n_components} | |
227 | |
228 def set_params(self, **parameters): | |
229 for parameter, value in parameters.items(): | |
230 self.setattr(parameter, value) | |
231 return self | |
232 | |
233 def getName(self): | |
234 return self.name | |
235 | |
236 def score(self, features, parameters): | |
237 predicted_states = self.predict(features) | |
238 return accuracy_score(parameters, predicted_states) | |
239 | |
240 def fit(self, features, parameters): | |
241 | |
242 n_classes = max(unique(parameters)) + 1 | |
243 | |
244 if n_classes == 1: | |
245 self.only_one_class = True | |
246 return | |
247 else: | |
248 self.only_one_class = False | |
249 | |
250 hmms = [None]*n_classes | |
251 | |
252 chain_size = self.chain_size | |
253 obs = [None]*n_classes | |
254 | |
255 for i in range(chain_size, len(parameters)): | |
256 class_ = parameters[i] | |
257 seq = features[i-chain_size:i,:] | |
258 | |
259 | |
260 if obs[class_] is None: | |
261 obs[class_] = [seq] | |
262 else: | |
263 obs[class_].append(seq) | |
264 | |
265 | |
266 | |
267 for i in range(0, len(obs)): | |
268 | |
269 if obs[i] is not None and len(obs[i]) != 0: | |
270 hmm_ = hmm.GaussianHMM(n_components=self.n_components, covariance_type='diag') | |
271 obs_ = concatenate(obs[i]) | |
272 hmm_.fit(obs_, [self.chain_size]*len(obs[i])) | |
273 | |
274 hmms[i] = hmm_ | |
275 | |
276 self.hmms = hmms | |
277 | |
278 return obs | |
279 | |
280 def predict(self, features, mfilt=20): | |
281 | |
282 if self.only_one_class == True: | |
283 return zeros((features.shape[0], )) | |
284 | |
285 chain_size = self.chain_size | |
286 hmms = self.hmms | |
287 predicted_classes = zeros((features.shape[0],)) | |
288 | |
289 | |
290 for i in range(chain_size, features.shape[0]): | |
291 scores = zeros((len(hmms),)) | |
292 | |
293 seq = features[i-chain_size:i, :] | |
294 | |
295 for j in range(0, len(hmms)): | |
296 if hmms[j] is not None: | |
297 scores[j] = hmms[j].score(seq) | |
298 else: | |
299 scores[j] = -infty | |
300 | |
301 predicted_classes[i] = argmax(scores) | |
302 | |
303 | |
304 return predicted_classes | |
305 | |
306 class HMMsvmClassifier(BaseEstimator): | |
307 def __init__(self, N=2): | |
308 self.classifiers = {} | |
309 self.name = "HMM-SVM Classifier" | |
310 self.obs = MyAVAClassifier() | |
311 self.chain_size = N | |
312 | |
313 def get_params(self, deep=True): | |
314 return {"N":self.chain_size} | |
315 | |
316 def set_params(self, **parameters): | |
317 for parameter, value in parameters.items(): | |
318 self.setattr(parameter, value) | |
319 return self | |
320 | |
321 def getName(self): | |
322 return self.name | |
323 | |
324 def score(self, features, parameters): | |
325 predicted_states = self.predict(features) | |
326 return accuracy_score(parameters, predicted_states) | |
327 | |
328 def fit(self, X, y): | |
329 self.n_classes = max(unique(y))+1 | |
330 | |
331 self.obs.fit(X,y) | |
332 self.hmm = HMMsvm(self.obs) | |
333 self.hmm.fit([X],[y]) | |
334 | |
335 def predict(self, X): | |
336 return self.hmm.predict(X) | |
337 | |
338 def confidence(self, x, q): | |
339 return self.hmm.estimate_emission_probability(x, q) | |
340 class ReverbModel: | |
341 """ Our Reverberation model, consists on the Principal Components Kernel, | |
342 the number of salient principal component dimensions, the list of salient | |
343 features and the classifier itself. """ | |
344 | |
345 | |
346 def __init__(self, name=None, kernel=None, q=None, feature_list=None, classifier=None, parameter_dictionary=None, moments_vector=None, filename=None): | |
347 | |
348 if filename is not None: | |
349 self.load(filename) | |
350 name = self.name | |
351 kernel = self.kernel | |
352 q = self.q | |
353 feature_list = self.feature_list | |
354 classifier = self.classifier | |
355 parameter_dictionary = self.parameter_dictionary | |
356 moments_vector = self.moments_vector | |
357 else: | |
358 if parameter_dictionary is None or name is None or kernel is None or q is None or feature_list is None or classifier is None: | |
359 raise Exception("Must supply name, kernel, q, feature_list and classifier or filename.") | |
360 | |
361 | |
362 print "[II] Initializing model `%s' with %dx%d PC kernel and features:" % (name,q,q) | |
363 print str(feature_list).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7] | |
364 # print "[II] Using classifier: %s" % classifier.name | |
365 print "[II] And parameters dictionary:", parameter_dictionary | |
366 | |
367 | |
368 self.name = name | |
369 | |
370 self.kernel = kernel | |
371 self.q = q | |
372 self.feature_list = feature_list | |
373 self.classifier = classifier | |
374 self.parameter_dictionary = parameter_dictionary | |
375 self.moments_vector = moments_vector | |
376 | |
377 def save(self, filename): | |
378 print "[II] Saving model to: `%s.'" % filename | |
379 f = open(filename, 'wb') | |
380 pickle.dump(self, f) | |
381 f.close() | |
382 | |
383 def load(self, filename): | |
384 print "[II] Loading model from: `%s'." % filename | |
385 f = open(filename, 'rb') | |
386 new_model = pickle.load(f) | |
387 self.name = new_model.name | |
388 self.kernel = new_model.kernel | |
389 self.q = new_model.q | |
390 self.feature_list = new_model.feature_list | |
391 self.classifier = new_model.classifier | |
392 self.parameter_dictionary = new_model.parameter_dictionary | |
393 self.moments_vector = new_model.moments_vector | |
394 | |
395 f.close() | |
396 |