Mercurial > hg > chourdakisreiss2016
comparison experiment-reverb/code/supervised_training_pca_bak_2.py @ 0:246d5546657c
initial commit, needs cleanup
author | Emmanouil Theofanis Chourdakis <e.t.chourdakis@qmul.ac.uk> |
---|---|
date | Wed, 14 Dec 2016 13:15:48 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:246d5546657c |
---|---|
1 #!/usr/bin/python2 | |
2 # -*- coding: utf-8 -*- | |
3 """ | |
4 Created on Thu Apr 23 11:53:17 2015 | |
5 | |
6 @author: mmxgn | |
7 """ | |
8 | |
9 # This file does the cluster estimation and the removal of outliers | |
10 | |
11 from sys import argv, exit | |
12 from essentia.standard import YamlInput, YamlOutput | |
13 from essentia import Pool | |
14 from pca import * | |
15 | |
16 from numpy import * | |
17 from sklearn import cluster | |
18 from sklearn.metrics import pairwise_distances | |
19 from sklearn.cluster import KMeans, MiniBatchKMeans | |
20 import matplotlib.pyplot as plt | |
21 #from sklearn.mixture import GMM | |
22 from sklearn.naive_bayes import GaussianNB, MultinomialNB | |
23 from scipy.signal import decimate | |
24 from sklearn import cross_validation | |
25 | |
26 #from hmmlearn import hmm | |
27 from hmmlearn.hmm import GMM | |
28 from hmmlearn import hmm | |
29 #from adpcm import adm, adm_reconstruct | |
30 | |
31 | |
32 mse = lambda A,B: ((array(A)-array(B)) ** 2).mean() | |
33 | |
34 | |
35 | |
36 | |
37 def adm_reconstruct(codeword, h, dmin=.01, dmax=.28): | |
38 x = zeros((1, codeword.shape[1])) | |
39 | |
40 delta1 = dmin | |
41 delta2 = dmin | |
42 Sum = h | |
43 | |
44 x[0] = h | |
45 for i in range(0, codeword.shape[1]): | |
46 if codeword[0,i] == 0: | |
47 delta1 = dmin | |
48 delta2 = dmin | |
49 | |
50 elif codeword[0,i] == 1: | |
51 delta2 = dmin | |
52 Sum += delta1 | |
53 delta1 *= 2 | |
54 if delta1 > dmax: | |
55 delta1 = dmax | |
56 | |
57 elif codeword[0,i] == -1: | |
58 delta1 = dmin | |
59 Sum -= delta2 | |
60 delta2 *= 2 | |
61 if delta2 > dmax: | |
62 delta2 = dmax | |
63 x[0,i] = Sum | |
64 return x | |
65 | |
66 def adm(x, dmin=.01, dmax=.28, tol=0.0001): | |
67 | |
68 # Adaptive delta modulation adapted by code: | |
69 # (adeltamod.m) | |
70 # | |
71 # Adaptive Delta Modulator | |
72 # by Gandhar Desai (gdesai) | |
73 # BITS Pilani Goa Campus | |
74 # Date: 28 Sept, 2013 | |
75 | |
76 xsig = x | |
77 | |
78 Lx = len(x) | |
79 | |
80 ADMout = zeros((1, Lx)) | |
81 codevec = zeros((1, Lx)) | |
82 | |
83 | |
84 Sum = x[0] | |
85 delta1 = dmin | |
86 delta2 = dmin | |
87 mult1 = 2 | |
88 mult2 = 2 | |
89 for i in range(0, Lx): | |
90 #print abs(xsig[i] - Sum) | |
91 if (abs(xsig[i] - Sum) < tol): | |
92 bit = 0 | |
93 delta2 = dmin | |
94 delta1 = dmin | |
95 | |
96 | |
97 elif (xsig[i] >= Sum): | |
98 bit = 1 | |
99 delta2 = dmin | |
100 Sum += delta1 | |
101 delta1 *= mult1 | |
102 if delta1 > dmax: | |
103 delta1 = dmax | |
104 | |
105 | |
106 else: | |
107 bit = -1 | |
108 delta1 = dmin | |
109 Sum -= delta2 | |
110 delta2 *= mult2 | |
111 if delta2 > dmax: | |
112 delta2 = dmax | |
113 | |
114 | |
115 | |
116 ADMout[0, i] = Sum | |
117 codevec[0, i]= bit | |
118 | |
119 return ADMout,codevec, x[0] | |
120 | |
121 if __name__=="__main__": | |
122 if len(argv) != 2: | |
123 print "[EE] Wrong number of arguments" | |
124 print "[II] Correct syntax is:" | |
125 print "[II] \t%s <training_file>" | |
126 print "[II] where <training_file> is a .yaml file containing the" | |
127 print "[II] features of the dataset (try output2_stage/fulltraining-last.yaml)" | |
128 exit(-1) | |
129 | |
130 | |
131 n_clusters = 4 | |
132 UpsamplingFactor = 1 | |
133 dmin = 0.001 | |
134 dmax = 0.28 | |
135 tol = 0.001 | |
136 | |
137 infile = argv[1] | |
138 | |
139 features_pool = YamlInput(filename = infile)() | |
140 | |
141 | |
142 | |
143 feature_captions = features_pool.descriptorNames() | |
144 parameter_captions = [] | |
145 | |
146 | |
147 for c in features_pool.descriptorNames(): | |
148 if c.split('.')[0] == 'parameter': | |
149 parameter_captions.append(c) | |
150 if c.split('.')[0] == 'metadata' or c.split('.')[0] == 'parameter': | |
151 feature_captions.remove(c) | |
152 | |
153 | |
154 | |
155 close('all') | |
156 | |
157 print "[II] Loaded training data from %s (%s) " % (infile, features_pool['metadata.date'][0]) | |
158 print "[II] %d Features Available: " % len(feature_captions) | |
159 | |
160 | |
161 | |
162 print str(feature_captions).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7] | |
163 | |
164 nfeatures_in = len(feature_captions) | |
165 nparameters_in = len(parameter_captions) | |
166 features_vector = zeros((nfeatures_in, len(features_pool[feature_captions[0]]))) | |
167 | |
168 parameters_vector = zeros((nparameters_in, len(features_pool[parameter_captions[0]]))) | |
169 | |
170 | |
171 for i in range(0, nfeatures_in): | |
172 features_vector[i, :] = features_pool[feature_captions[i]].T | |
173 for i in range(0, nparameters_in): | |
174 parameters_vector[i, :] = features_pool[parameter_captions[0]].T | |
175 | |
176 print "[II] %d parameters used:" % len(parameter_captions) | |
177 print str(parameter_captions).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7].replace('parameter.','') | |
178 | |
179 print "[II] Marking silent parts" | |
180 | |
181 silent_parts = zeros((1, len(features_pool[feature_captions[i]].T))) | |
182 | |
183 rms = features_vector[feature_captions.index('rms'), :] | |
184 | |
185 # Implementing Hysteresis Gate -- High threshold is halfway between | |
186 # the mean and the max and Low is halfway between the mean dn the min | |
187 | |
188 rms_threshold_mean = mean(rms) | |
189 | |
190 rms_threshold_max = max(rms) | |
191 rms_threshold_min = min(rms) | |
192 | |
193 rms_threshold_high = 0.1 * rms_threshold_mean | |
194 rms_threshold_low = 0.01 * rms_threshold_mean | |
195 | |
196 for n in range(1, len(rms)): | |
197 prev = rms[n-1] | |
198 curr = rms[n] | |
199 | |
200 if prev >= rms_threshold_high: | |
201 if curr < rms_threshold_low: | |
202 silent_parts[0,n] = 1 | |
203 else: | |
204 silent_parts[0,n] = 0 | |
205 elif prev <= rms_threshold_low: | |
206 if curr > rms_threshold_high: | |
207 silent_parts[0,n] = 0 | |
208 else: | |
209 silent_parts[0,n] = 1 | |
210 else: | |
211 silent_parts[0,n] = silent_parts[0,n-1] | |
212 | |
213 | |
214 if silent_parts[0,1] == 1: | |
215 silent_parts[0, 0] = 1 | |
216 | |
217 | |
218 # plot(rms) | |
219 # plot(silent_parts.T) | |
220 # plot(ones((len(rms), 1))*rms_threshold_high) | |
221 # plot(ones((len(rms), 1))*rms_threshold_low) | |
222 | |
223 active_index = invert(silent_parts.flatten().astype(bool)) | |
224 | |
225 # Keep only active parts | |
226 | |
227 # Uncomment this | |
228 features_vector = features_vector[:, active_index] | |
229 # parameters_vector = parameters_vector[:, active_index] | |
230 | |
231 moments_vector = zeros((features_vector.shape[0], 2)) | |
232 | |
233 print "[II] Storing moments vector" | |
234 for i in range(0, features_vector.shape[0]): | |
235 mean_ = mean(features_vector[i,:]) | |
236 std_ = std(features_vector[i,:], ddof=1) | |
237 moments_vector[i,0] = mean_ | |
238 moments_vector[i,1] = std_ | |
239 | |
240 features_vector[i,:] = (features_vector[i,:] - mean_)/std_ | |
241 | |
242 features_vector_original = features_vector | |
243 | |
244 | |
245 print "[II] Extracting PCA configuration " | |
246 | |
247 kernel, q, featurelist = extract_pca_configuration_from_data(features_vector) | |
248 | |
249 print "[II] Optimal number of PCs to keep: %d" % q | |
250 | |
251 feature_captions_array = array(feature_captions) | |
252 | |
253 # features_to_keep = features_vector | |
254 | |
255 features_to_keep = list(feature_captions_array[featurelist]) | |
256 print "[II] Decided to keep %d features:" % len(features_to_keep) | |
257 print str(features_to_keep).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7] | |
258 | |
259 | |
260 | |
261 # Keep the desired features | |
262 #Uncomment this | |
263 features_kept_data = features_vector[featurelist,:] | |
264 # features_kept_data = features_vector | |
265 # features_kept_data = features_vector | |
266 | |
267 # Generate the parameter clusters using k-means | |
268 | |
269 # Uncomment this | |
270 features_vector = (kernel.T*features_kept_data)[0:q,:] | |
271 #features_vector = log(features_vector+0.001) | |
272 # features_vector = features_vector_original | |
273 | |
274 | |
275 # parameters_k_means = KMeans(n_clusters = parameters_k, init='k-means++', max_iter=300, tol=0.0000001, verbose = 1) | |
276 parameters_k_means = KMeans(init='k-means++', n_init=10, max_iter=300, tol=0.0000001, verbose = 0) | |
277 # # parameters_k_means = MiniBatchKMeans(init='k-means++', max_iter=300, tol=0.00001, verbose = 1) | |
278 # | |
279 # # Quantize the differences of the parameters instead of the parameters themselves | |
280 # parameters_vector_diff = concatenate((zeros((shape(parameters_vector)[0],1)),diff(parameters_vector, axis=1)),axis=1) | |
281 # features_vector_diff = concatenate((zeros((shape(features_vector)[0],1)),diff(features_vector,axis=1)),axis=1) | |
282 # | |
283 # # Delete this afterwards | |
284 # # features_vector = features_vector_diff | |
285 # parameters_k_means.fit(parameters_vector_diff.T) | |
286 | |
287 print "[II] Trying ADM-coded parameters" | |
288 print "[II] Upsampling features and parameters by a factor of %d" % UpsamplingFactor | |
289 | |
290 | |
291 # Upsampled features and parameters | |
292 features_vector_upsampled = repeat(features_vector,UpsamplingFactor, axis=1) | |
293 # feature_labels_upsampled = repeat(features_clustering_labels,UpsamplingFactor, axis=0) | |
294 parameters_vector_upsampled = repeat(parameters_vector,UpsamplingFactor, axis=1) | |
295 | |
296 parameters_vector_upsampled_adm = matrix(zeros(shape(parameters_vector_upsampled))) | |
297 parameters_vector_upsampled_code = matrix(zeros(shape(parameters_vector_upsampled))) | |
298 parameters_vector_upsampled_firstval = matrix(zeros((parameters_vector_upsampled.shape[0],1))) | |
299 | |
300 # Reconstructed parameters | |
301 | |
302 parameters_vector_upsampled_reconstructed = matrix(zeros(shape(parameters_vector_upsampled))) | |
303 | |
304 | |
305 | |
306 | |
307 def adm_matrix(X, dmin=0.001,dmax=0.28,tol=0.001): | |
308 | |
309 out = matrix(zeros(shape(X))) | |
310 code = matrix(zeros(shape(X))) | |
311 firstval = matrix(zeros((X.shape[0], 1))) | |
312 | |
313 for i in range(0, X.shape[0]): | |
314 out[i,:], code[i,:], firstval[i,0] = adm(X[i,:],dmin=dmin,dmax=dmax,tol=tol) | |
315 | |
316 return out,code,firstval | |
317 | |
318 # parameters_vector_upsampled_reconstructed[i,:] = adm_reconstruct(parameters_vector_upsampled_code[i,:],parameters_vector_upsampled_firstval[i,0], dmin=dmin,dmax=dmax) | |
319 | |
320 def adm_matrix_reconstruct(code, firstval, dmin=0.001, dmax=0.28): | |
321 X = matrix(zeros(shape(code))) | |
322 for i in range(0, code.shape[0]): | |
323 X[i,:] = adm_reconstruct(code[i,:], firstval[i,0], dmin=dmin, dmax=dmax) | |
324 | |
325 return X | |
326 | |
327 | |
328 parameters_vector_upsampled_adm, parameters_vector_upsampled_code, parameters_vector_upsampled_firstval = adm_matrix(parameters_vector_upsampled, dmin, dmax, tol) | |
329 | |
330 | |
331 def diff_and_pad(X): | |
332 return concatenate(( | |
333 zeros(( | |
334 shape(X)[0], | |
335 1 | |
336 )), | |
337 diff(X, axis=1)), | |
338 axis=1) | |
339 | |
340 | |
341 #features_vector_upsampled = diff_and_pad(diff_and_pad(features_vector_upsampled)) | |
342 | |
343 # features_vector_diff = concatenate((zeros((shape(features_vector)[0],1)),diff(features_vector,axis=1)),axis=1) | |
344 | |
345 | |
346 | |
347 # Segmentation stuff | |
348 | |
349 | |
350 | |
351 # for i in range(0, parameters_vector_upsampled.shape[0]): | |
352 # out, code, h = adm(parameters_vector_upsampled[i,:],dmin=dmin,dmax=dmax,tol=tol) | |
353 # parameters_vector_upsampled_adm[i,:] = out | |
354 # parameters_vector_upsampled_code[i,:] = code | |
355 # parameters_vector_upsampled_firstval[i, 0] = h | |
356 | |
357 # parameters_k_means.fit(parameters_vector.T) | |
358 ## | |
359 # parameters_k_means_centers = parameters_k_means.cluster_centers_ | |
360 # parameters_k_means_labels = parameters_k_means.labels_ | |
361 ## | |
362 # parameters_vector_estimated = zeros(shape(parameters_vector)) | |
363 ## | |
364 # for n in range(0, len(parameters_vector_estimated[0])): | |
365 # parameters_vector_estimated[:,n] = parameters_k_means_centers[parameters_k_means_labels[n]] | |
366 ## | |
367 ### plot(parameters_vector[0]) | |
368 ## # plot(parameters_vector_estimated[0]) | |
369 ## | |
370 ## # PROvLIMA EDW | |
371 # print "[II] Parameters MSE for %d clusters: %.3f" % (len(parameters_k_means.cluster_centers_), mse(parameters_vector, parameters_vector_estimated)) | |
372 # | |
373 ## | |
374 print "[II] Clustering features." | |
375 # | |
376 features_clustering = GMM(n_components = n_clusters, covariance_type='diag') | |
377 # | |
378 features_clustering.fit( features_vector_upsampled.T, y=parameters_vector_upsampled_code) | |
379 # | |
380 features_clustering_means = features_clustering.means_ | |
381 features_clustering_labels = features_clustering.predict(features_vector_upsampled.T) | |
382 features_clustering_sigmas = features_clustering.covars_ | |
383 # | |
384 features_vector_upsampled_estimated = zeros(shape(features_vector_upsampled)) | |
385 # | |
386 # | |
387 for n in range(0, len(features_vector_upsampled_estimated[0])): | |
388 features_vector_upsampled_estimated[:,n] = features_clustering_means[features_clustering_labels[n]] | |
389 # | |
390 # # for n in range(0,features_vector.shape[0]): | |
391 # # hist(features_vector[1]-features_vector_estimated[1], 256) | |
392 # std(features_vector[1]-features_vector_estimated[1], ddof=1) | |
393 # mean(features_vector[1]-features_vector_estimated[1]) | |
394 # | |
395 print "[II] Features MSE for %d clusters: %.3f" % (n_clusters, mse(features_vector_upsampled, features_vector_upsampled_estimated)) | |
396 | |
397 | |
398 | |
399 def cross_validate_clustering(data, estimator): | |
400 print "[II] Crossvalidating... " | |
401 estimator_fulldata = estimator | |
402 estimator_fulldata.fit(data) | |
403 | |
404 # labels = estimator_fulldata.predict(data) | |
405 means = estimator_fulldata.means_ | |
406 # print means | |
407 | |
408 percents = arange(0.1,0.6,0.1) | |
409 MSEs = [] | |
410 reconstructed = zeros(shape(data)) | |
411 labels = estimator.predict(data) | |
412 for n in range(0, len(reconstructed)): | |
413 reconstructed[n,:] = means[labels[n]] | |
414 | |
415 MSEs.append(mse(data,reconstructed)) | |
416 for p in percents: | |
417 train,test = cross_validation.train_test_split(data,test_size=p,random_state=0) | |
418 train = matrix(train) | |
419 test = matrix(test) | |
420 # print shape(train) | |
421 # print shape(test) | |
422 estimator.fit(train) | |
423 means = estimator.means_ | |
424 labels = estimator.predict(test) | |
425 reconstructed = zeros(shape(test)) | |
426 for n in range(0, len(reconstructed)): | |
427 reconstructed[n,:] = means[labels[n]] | |
428 | |
429 m = mse(test,reconstructed) | |
430 | |
431 print "[II] MSE for clustering crossvalidated data %.2f-%.2f: %.5f" % ((1-p), p, m) | |
432 MSEs.append(m) | |
433 | |
434 print "[II] Crossvalidation complete" | |
435 | |
436 return MSEs | |
437 | |
438 # print "[II] Trying Cross Validation" | |
439 | |
440 # cross_validate_clustering(features_vector_upsampled.T, features_clustering) | |
441 | |
442 | |
443 # Construct parameters alphabet, each symbol is going to be a different column vector | |
444 # in parameter code matrix | |
445 | |
446 | |
447 def vector_to_states(X): | |
448 """ | |
449 Input: a vector MxN with N samples and M variables | |
450 Output: a codeword dictionary `parameters_alphabet', | |
451 state_seq, inverse `parameters_alphabet_inv' """ | |
452 | |
453 | |
454 parameters_alphabet = {} | |
455 n = 0 | |
456 | |
457 for i in range(0, X.shape[1]): | |
458 vec = tuple(ravel(X[:,i])) | |
459 if vec not in parameters_alphabet: | |
460 parameters_alphabet[vec] = n | |
461 n += 1 | |
462 | |
463 parameters_alphabet_inv = dict([(parameters_alphabet[m],m) for m in parameters_alphabet]) | |
464 | |
465 state_seq = array([parameters_alphabet[tuple(ravel(X[:,m]))] for m in range(0, parameters_vector_upsampled_code.shape[1])] ) | |
466 | |
467 return state_seq, parameters_alphabet, parameters_alphabet_inv | |
468 | |
469 | |
470 def states_to_vector(predicted, parameters_alphabet_inv): | |
471 estimated = matrix(zeros((len(parameters_alphabet_inv[0]), len(predicted)))) | |
472 for i in range(0, len(state_seq)): | |
473 estimated[:, i] = matrix(parameters_alphabet_inv[predicted[i]]).T | |
474 | |
475 return estimated | |
476 | |
477 state_seq, parameters_alphabet, parameters_alphabet_inv = vector_to_states(parameters_vector_upsampled_code) | |
478 | |
479 | |
480 parameters_change_variable = matrix(diff_and_pad(parameters_vector_upsampled)!=0).astype(int) | |
481 | |
482 changes_state_seq, changes_parameters_alphabet, changes_parameters_alphabet_inv = vector_to_states(parameters_change_variable) | |
483 | |
484 | |
485 # This is an hmm that just codes the changes" | |
486 # We have only two states, change and stay the same. | |
487 | |
488 print "[II] (changes hmm-chain) Creating emission probability mixtures for every state " | |
489 | |
490 gmm_list_changes = [] | |
491 for n in range(0, 2): | |
492 vectors = features_vector_upsampled[:,changes_state_seq == n] | |
493 gmm = GMM(n_components=n_clusters, covariance_type = 'diag') | |
494 gmm.fit(vectors.T) | |
495 gmm_list_changes.append(gmm) | |
496 | |
497 | |
498 hmm_changes = hmm.GMMHMM(n_components=2, gmms=array(gmm_list_changes),n_mix=n_clusters) | |
499 hmm_changes.fit([array(features_vector_upsampled).T]) | |
500 | |
501 subplot(211) | |
502 plot(parameters_change_variable.T) | |
503 subplot(212) | |
504 | |
505 changes_predicted_states = hmm_changes.predict(array(features_vector_upsampled.T)) | |
506 predicted_changes_estimated = states_to_vector(changes_predicted_states, changes_parameters_alphabet_inv) | |
507 | |
508 plot(predicted_changes_estimated.T) | |
509 | |
510 | |
511 | |
512 | |
513 # End of changes HMM here | |
514 | |
515 | |
516 print "[II] Creating emission probability mixtures for every state" | |
517 gmm_list = [] | |
518 for n in range(0, 3): | |
519 vectors = features_vector_upsampled[:,state_seq == n] | |
520 gmm = GMM(n_components=n_clusters,covariance_type = 'diag') | |
521 gmm.fit(vectors.T) | |
522 gmm_list.append(gmm) | |
523 | |
524 hmm1 = hmm.GMMHMM(n_components=3, gmms=array(gmm_list),n_mix=n_clusters) | |
525 hmm1.fit([array(features_vector_upsampled).T]) | |
526 | |
527 figure() | |
528 subplot(221) | |
529 plot(parameters_vector_upsampled_code.T) | |
530 | |
531 predicted = hmm1.predict(array(features_vector_upsampled.T)) | |
532 | |
533 code_estimated = matrix(zeros((len(parameters_vector_upsampled), len(state_seq)))) | |
534 | |
535 # for i in range(0, len(state_seq)): | |
536 # code_estimated[:, i] = matrix(parameters_alphabet_inv[predicted[i]]).T | |
537 | |
538 code_estimated = states_to_vector(predicted,parameters_alphabet_inv) | |
539 subplot(222) | |
540 plot(code_estimated.T) | |
541 | |
542 reconstructed_original = adm_matrix_reconstruct(parameters_vector_upsampled_code, parameters_vector_upsampled_firstval) | |
543 subplot(223) | |
544 plot(reconstructed_original.T) | |
545 subplot(224) | |
546 reconstructed_estimated = adm_matrix_reconstruct(code_estimated, parameters_vector_upsampled_firstval) | |
547 plot(reconstructed_estimated.T) | |
548 | |
549 # scatter(features_vector_upsampled[0,:],features_vector_upsampled_estimated[0,:]) | |
550 # scatter(features_vector_upsampled[1,:],features_vector_upsampled_estimated[1,:]) | |
551 # | |
552 # xlabel('Original Features on Principal Components') | |
553 # ylabel('Estimated Features on Principal Components') | |
554 # title('Original vs Estimated Features') | |
555 # savefig('original_vs_estimated.png') | |
556 | |
557 | |
558 | |
559 # | |
560 # | |
561 # print "[II] Testing Gaussian Naive Bayes Classifier" | |
562 ## | |
563 # gnb = GaussianNB() | |
564 # gnb.fit(features_vector_upsampled.T, parameters_vector_upsampled_code.T) | |
565 # parameters_vector_upsampled_code_estimated = gnb.predict(features_vector_upsampled.T) | |
566 # | |
567 # | |
568 ## | |
569 # print "[II] Raw Parameters - Gaussian Naive Bayes classifier testing ratio: %.4f" % (float(sum((parameters_vector_upsampled_code_estimated == parameters_vector_upsampled_code).astype(float)))/float(len(parameters_k_means_labels))) | |
570 ## | |
571 # plot(adm_matrix_reconstruct(parameters_vector_upsampled_code_estimated,parameters_vector_upsampled_firstval,dmin,dmax).T) | |
572 # | |
573 # print "[II] Trying ADM-coded parameters" | |
574 # UpsamplingFactor = 100 | |
575 # print "[II] Upsampling features and parameters by a factor of %d" % UpsamplingFactor | |
576 # | |
577 # | |
578 # # Upsampled features and parameters | |
579 # features_vector_upsampled = repeat(features_vector,UpsamplingFactor, axis=1) | |
580 # feature_labels_upsampled = repeat(features_clustering_labels,UpsamplingFactor, axis=0) | |
581 # parameters_vector_upsampled = repeat(parameters_vector,UpsamplingFactor, axis=1) | |
582 # | |
583 # parameters_vector_upsampled_adm = matrix(zeros(shape(parameters_vector_upsampled))) | |
584 # parameters_vector_upsampled_code = matrix(zeros(shape(parameters_vector_upsampled))) | |
585 # parameters_vector_upsampled_firstval = matrix(zeros((parameters_vector_upsampled.shape[0],1))) | |
586 # | |
587 # # Reconstructed parameters | |
588 # | |
589 # parameters_vector_upsampled_reconstructed = matrix(zeros(shape(parameters_vector_upsampled))) | |
590 # | |
591 # dmin = 0.001 | |
592 # dmax = 0.28 | |
593 # tol = 0.001 | |
594 # for i in range(0, parameters_vector_upsampled.shape[0]): | |
595 # out, code, h = adm(parameters_vector_upsampled[i,:],dmin=dmin,dmax=dmax,tol=tol) | |
596 # parameters_vector_upsampled_adm[i,:] = out | |
597 # parameters_vector_upsampled_code[i,:] = code | |
598 # parameters_vector_upsampled_firstval[i, 0] = h | |
599 # | |
600 # | |
601 # # Reconstruct-ADM | |
602 # parameters_vector_upsampled_reconstructed[i,:] = adm_reconstruct(parameters_vector_upsampled_code[i,:],parameters_vector_upsampled_firstval[i,0], dmin=dmin,dmax=dmax) | |
603 # | |
604 # | |
605 # # plot(parameters_vector_upsampled_adm.T, 'r--') | |
606 # | |
607 # | |
608 # # plot(parameters_vector_upsampled.T) | |
609 # # plot(parameters_vector_upsampled_reconstructed.T, 'g.') | |
610 # | |
611 # | |
612 # | |
613 # parameters_vector_reconstructed = zeros(shape(parameters_vector)) | |
614 # for n in range(0, parameters_vector.shape[1]): | |
615 # parameters_vector_reconstructed[:,n] = parameters_vector_upsampled_reconstructed[:,n*UpsamplingFactor] | |
616 # | |
617 # | |
618 # mse_adm = mse(parameters_vector_reconstructed, parameters_vector) | |
619 # | |
620 # print "[II] Expected ADM reconstruction MSE: %.4f" % mse_adm | |
621 # | |
622 # # figure() | |
623 # #plot(parameters_vector.T) | |
624 # # plot(parameters_vector_reconstructed.T) | |
625 # | |
626 # print "[II] Building HMM transition, emission matrices and priors" | |
627 # | |
628 # transmat = zeros((3,3)) | |
629 # startprob = zeros((3,)) | |
630 # emissionmat = zeros((3, n_clusters)) | |
631 # | |
632 # | |
633 # state_labels = parameters_vector_upsampled_code + 1 | |
634 # stateseq = state_labels.T | |
635 # | |
636 # for n in range(0,shape(parameters_vector_upsampled_code)[1]): | |
637 # if n>0: | |
638 # transmat[state_labels[0,n-1],state_labels[0,n]] += 1 | |
639 # startprob[state_labels[0,n]] +=1 | |
640 # emissionmat[state_labels[0,n],feature_labels_upsampled[n]] += 1 | |
641 # | |
642 # | |
643 # for n in range(0, transmat.shape[0]): | |
644 # transmat[n,:]/=sum(transmat[n,:]) | |
645 # emissionmat[n,:]/=sum(emissionmat[n,:]) | |
646 # | |
647 # | |
648 # transmat = matrix(transmat) | |
649 # emissionmat = matrix(emissionmat) | |
650 # # Prior | |
651 # startprob = startprob/sum(startprob) | |
652 # startprob = ravel(startprob) | |
653 # | |
654 # # Vocabulary | |
655 # | |
656 # model = hmm.GMMHMM(n_mix=n_clusters, n_components=3, covariance_type="diag") | |
657 # model.means_ = features_clustering.means_ | |
658 # model.covars_ = features_clustering.covars_ | |
659 # | |
660 # features_vector_array = array(features_vector) | |
661 # features_vector_upsampled_array=array(features_vector_upsampled) | |
662 # | |
663 # model.fit([features_vector_array.T]) | |
664 # stateseq_estimated = model.predict(features_vector_upsampled_array.T) | |
665 # | |
666 # parameters_vector_upsampled_reconstructed_decoded = matrix(zeros(shape(parameters_vector_upsampled))) | |
667 # | |
668 # | |
669 # plot(stateseq_estimated) | |
670 # plot(stateseq) | |
671 # | |
672 # code_estimated = matrix(zeros(shape(parameters_vector_upsampled))) | |
673 # code_estimated[0,:] = stateseq_estimated - 1 | |
674 # | |
675 # | |
676 # | |
677 # parameters_vector_upsampled_reconstructed_estimated = matrix(zeros(shape(parameters_vector_upsampled))) | |
678 # | |
679 # for i in range(0, parameters_vector_upsampled.shape[0]): | |
680 # parameters_vector_upsampled_reconstructed_estimated[i,:] = adm_reconstruct(code_estimated,parameters_vector_upsampled_firstval[i,0], dmin=dmin,dmax=dmax) | |
681 # figure() | |
682 # plot(parameters_vector_upsampled_reconstructed_estimated.T) | |
683 # plot(parameters_vector_upsampled.T) |