annotate experiment-reverb/code/supervised_training_pca_bak_2.py @ 2:c87a9505f294 tip

Added LICENSE for code, removed .wav files
author Emmanouil Theofanis Chourdakis <e.t.chourdakis@qmul.ac.uk>
date Sat, 30 Sep 2017 13:25:50 +0100
parents 246d5546657c
children
rev   line source
e@0 1 #!/usr/bin/python2
e@0 2 # -*- coding: utf-8 -*-
e@0 3 """
e@0 4 Created on Thu Apr 23 11:53:17 2015
e@0 5
e@0 6 @author: mmxgn
e@0 7 """
e@0 8
e@0 9 # This file does the cluster estimation and the removal of outliers
e@0 10
e@0 11 from sys import argv, exit
e@0 12 from essentia.standard import YamlInput, YamlOutput
e@0 13 from essentia import Pool
e@0 14 from pca import *
e@0 15
e@0 16 from numpy import *
e@0 17 from sklearn import cluster
e@0 18 from sklearn.metrics import pairwise_distances
e@0 19 from sklearn.cluster import KMeans, MiniBatchKMeans
e@0 20 import matplotlib.pyplot as plt
e@0 21 #from sklearn.mixture import GMM
e@0 22 from sklearn.naive_bayes import GaussianNB, MultinomialNB
e@0 23 from scipy.signal import decimate
e@0 24 from sklearn import cross_validation
e@0 25
e@0 26 #from hmmlearn import hmm
e@0 27 from hmmlearn.hmm import GMM
e@0 28 from hmmlearn import hmm
e@0 29 #from adpcm import adm, adm_reconstruct
e@0 30
e@0 31
e@0 32 mse = lambda A,B: ((array(A)-array(B)) ** 2).mean()
e@0 33
e@0 34
e@0 35
e@0 36
e@0 37 def adm_reconstruct(codeword, h, dmin=.01, dmax=.28):
e@0 38 x = zeros((1, codeword.shape[1]))
e@0 39
e@0 40 delta1 = dmin
e@0 41 delta2 = dmin
e@0 42 Sum = h
e@0 43
e@0 44 x[0] = h
e@0 45 for i in range(0, codeword.shape[1]):
e@0 46 if codeword[0,i] == 0:
e@0 47 delta1 = dmin
e@0 48 delta2 = dmin
e@0 49
e@0 50 elif codeword[0,i] == 1:
e@0 51 delta2 = dmin
e@0 52 Sum += delta1
e@0 53 delta1 *= 2
e@0 54 if delta1 > dmax:
e@0 55 delta1 = dmax
e@0 56
e@0 57 elif codeword[0,i] == -1:
e@0 58 delta1 = dmin
e@0 59 Sum -= delta2
e@0 60 delta2 *= 2
e@0 61 if delta2 > dmax:
e@0 62 delta2 = dmax
e@0 63 x[0,i] = Sum
e@0 64 return x
e@0 65
e@0 66 def adm(x, dmin=.01, dmax=.28, tol=0.0001):
e@0 67
e@0 68 # Adaptive delta modulation adapted by code:
e@0 69 # (adeltamod.m)
e@0 70 #
e@0 71 # Adaptive Delta Modulator
e@0 72 # by Gandhar Desai (gdesai)
e@0 73 # BITS Pilani Goa Campus
e@0 74 # Date: 28 Sept, 2013
e@0 75
e@0 76 xsig = x
e@0 77
e@0 78 Lx = len(x)
e@0 79
e@0 80 ADMout = zeros((1, Lx))
e@0 81 codevec = zeros((1, Lx))
e@0 82
e@0 83
e@0 84 Sum = x[0]
e@0 85 delta1 = dmin
e@0 86 delta2 = dmin
e@0 87 mult1 = 2
e@0 88 mult2 = 2
e@0 89 for i in range(0, Lx):
e@0 90 #print abs(xsig[i] - Sum)
e@0 91 if (abs(xsig[i] - Sum) < tol):
e@0 92 bit = 0
e@0 93 delta2 = dmin
e@0 94 delta1 = dmin
e@0 95
e@0 96
e@0 97 elif (xsig[i] >= Sum):
e@0 98 bit = 1
e@0 99 delta2 = dmin
e@0 100 Sum += delta1
e@0 101 delta1 *= mult1
e@0 102 if delta1 > dmax:
e@0 103 delta1 = dmax
e@0 104
e@0 105
e@0 106 else:
e@0 107 bit = -1
e@0 108 delta1 = dmin
e@0 109 Sum -= delta2
e@0 110 delta2 *= mult2
e@0 111 if delta2 > dmax:
e@0 112 delta2 = dmax
e@0 113
e@0 114
e@0 115
e@0 116 ADMout[0, i] = Sum
e@0 117 codevec[0, i]= bit
e@0 118
e@0 119 return ADMout,codevec, x[0]
e@0 120
e@0 121 if __name__=="__main__":
e@0 122 if len(argv) != 2:
e@0 123 print "[EE] Wrong number of arguments"
e@0 124 print "[II] Correct syntax is:"
e@0 125 print "[II] \t%s <training_file>"
e@0 126 print "[II] where <training_file> is a .yaml file containing the"
e@0 127 print "[II] features of the dataset (try output2_stage/fulltraining-last.yaml)"
e@0 128 exit(-1)
e@0 129
e@0 130
e@0 131 n_clusters = 4
e@0 132 UpsamplingFactor = 1
e@0 133 dmin = 0.001
e@0 134 dmax = 0.28
e@0 135 tol = 0.001
e@0 136
e@0 137 infile = argv[1]
e@0 138
e@0 139 features_pool = YamlInput(filename = infile)()
e@0 140
e@0 141
e@0 142
e@0 143 feature_captions = features_pool.descriptorNames()
e@0 144 parameter_captions = []
e@0 145
e@0 146
e@0 147 for c in features_pool.descriptorNames():
e@0 148 if c.split('.')[0] == 'parameter':
e@0 149 parameter_captions.append(c)
e@0 150 if c.split('.')[0] == 'metadata' or c.split('.')[0] == 'parameter':
e@0 151 feature_captions.remove(c)
e@0 152
e@0 153
e@0 154
e@0 155 close('all')
e@0 156
e@0 157 print "[II] Loaded training data from %s (%s) " % (infile, features_pool['metadata.date'][0])
e@0 158 print "[II] %d Features Available: " % len(feature_captions)
e@0 159
e@0 160
e@0 161
e@0 162 print str(feature_captions).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7]
e@0 163
e@0 164 nfeatures_in = len(feature_captions)
e@0 165 nparameters_in = len(parameter_captions)
e@0 166 features_vector = zeros((nfeatures_in, len(features_pool[feature_captions[0]])))
e@0 167
e@0 168 parameters_vector = zeros((nparameters_in, len(features_pool[parameter_captions[0]])))
e@0 169
e@0 170
e@0 171 for i in range(0, nfeatures_in):
e@0 172 features_vector[i, :] = features_pool[feature_captions[i]].T
e@0 173 for i in range(0, nparameters_in):
e@0 174 parameters_vector[i, :] = features_pool[parameter_captions[0]].T
e@0 175
e@0 176 print "[II] %d parameters used:" % len(parameter_captions)
e@0 177 print str(parameter_captions).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7].replace('parameter.','')
e@0 178
e@0 179 print "[II] Marking silent parts"
e@0 180
e@0 181 silent_parts = zeros((1, len(features_pool[feature_captions[i]].T)))
e@0 182
e@0 183 rms = features_vector[feature_captions.index('rms'), :]
e@0 184
e@0 185 # Implementing Hysteresis Gate -- High threshold is halfway between
e@0 186 # the mean and the max and Low is halfway between the mean dn the min
e@0 187
e@0 188 rms_threshold_mean = mean(rms)
e@0 189
e@0 190 rms_threshold_max = max(rms)
e@0 191 rms_threshold_min = min(rms)
e@0 192
e@0 193 rms_threshold_high = 0.1 * rms_threshold_mean
e@0 194 rms_threshold_low = 0.01 * rms_threshold_mean
e@0 195
e@0 196 for n in range(1, len(rms)):
e@0 197 prev = rms[n-1]
e@0 198 curr = rms[n]
e@0 199
e@0 200 if prev >= rms_threshold_high:
e@0 201 if curr < rms_threshold_low:
e@0 202 silent_parts[0,n] = 1
e@0 203 else:
e@0 204 silent_parts[0,n] = 0
e@0 205 elif prev <= rms_threshold_low:
e@0 206 if curr > rms_threshold_high:
e@0 207 silent_parts[0,n] = 0
e@0 208 else:
e@0 209 silent_parts[0,n] = 1
e@0 210 else:
e@0 211 silent_parts[0,n] = silent_parts[0,n-1]
e@0 212
e@0 213
e@0 214 if silent_parts[0,1] == 1:
e@0 215 silent_parts[0, 0] = 1
e@0 216
e@0 217
e@0 218 # plot(rms)
e@0 219 # plot(silent_parts.T)
e@0 220 # plot(ones((len(rms), 1))*rms_threshold_high)
e@0 221 # plot(ones((len(rms), 1))*rms_threshold_low)
e@0 222
e@0 223 active_index = invert(silent_parts.flatten().astype(bool))
e@0 224
e@0 225 # Keep only active parts
e@0 226
e@0 227 # Uncomment this
e@0 228 features_vector = features_vector[:, active_index]
e@0 229 # parameters_vector = parameters_vector[:, active_index]
e@0 230
e@0 231 moments_vector = zeros((features_vector.shape[0], 2))
e@0 232
e@0 233 print "[II] Storing moments vector"
e@0 234 for i in range(0, features_vector.shape[0]):
e@0 235 mean_ = mean(features_vector[i,:])
e@0 236 std_ = std(features_vector[i,:], ddof=1)
e@0 237 moments_vector[i,0] = mean_
e@0 238 moments_vector[i,1] = std_
e@0 239
e@0 240 features_vector[i,:] = (features_vector[i,:] - mean_)/std_
e@0 241
e@0 242 features_vector_original = features_vector
e@0 243
e@0 244
e@0 245 print "[II] Extracting PCA configuration "
e@0 246
e@0 247 kernel, q, featurelist = extract_pca_configuration_from_data(features_vector)
e@0 248
e@0 249 print "[II] Optimal number of PCs to keep: %d" % q
e@0 250
e@0 251 feature_captions_array = array(feature_captions)
e@0 252
e@0 253 # features_to_keep = features_vector
e@0 254
e@0 255 features_to_keep = list(feature_captions_array[featurelist])
e@0 256 print "[II] Decided to keep %d features:" % len(features_to_keep)
e@0 257 print str(features_to_keep).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7]
e@0 258
e@0 259
e@0 260
e@0 261 # Keep the desired features
e@0 262 #Uncomment this
e@0 263 features_kept_data = features_vector[featurelist,:]
e@0 264 # features_kept_data = features_vector
e@0 265 # features_kept_data = features_vector
e@0 266
e@0 267 # Generate the parameter clusters using k-means
e@0 268
e@0 269 # Uncomment this
e@0 270 features_vector = (kernel.T*features_kept_data)[0:q,:]
e@0 271 #features_vector = log(features_vector+0.001)
e@0 272 # features_vector = features_vector_original
e@0 273
e@0 274
e@0 275 # parameters_k_means = KMeans(n_clusters = parameters_k, init='k-means++', max_iter=300, tol=0.0000001, verbose = 1)
e@0 276 parameters_k_means = KMeans(init='k-means++', n_init=10, max_iter=300, tol=0.0000001, verbose = 0)
e@0 277 # # parameters_k_means = MiniBatchKMeans(init='k-means++', max_iter=300, tol=0.00001, verbose = 1)
e@0 278 #
e@0 279 # # Quantize the differences of the parameters instead of the parameters themselves
e@0 280 # parameters_vector_diff = concatenate((zeros((shape(parameters_vector)[0],1)),diff(parameters_vector, axis=1)),axis=1)
e@0 281 # features_vector_diff = concatenate((zeros((shape(features_vector)[0],1)),diff(features_vector,axis=1)),axis=1)
e@0 282 #
e@0 283 # # Delete this afterwards
e@0 284 # # features_vector = features_vector_diff
e@0 285 # parameters_k_means.fit(parameters_vector_diff.T)
e@0 286
e@0 287 print "[II] Trying ADM-coded parameters"
e@0 288 print "[II] Upsampling features and parameters by a factor of %d" % UpsamplingFactor
e@0 289
e@0 290
e@0 291 # Upsampled features and parameters
e@0 292 features_vector_upsampled = repeat(features_vector,UpsamplingFactor, axis=1)
e@0 293 # feature_labels_upsampled = repeat(features_clustering_labels,UpsamplingFactor, axis=0)
e@0 294 parameters_vector_upsampled = repeat(parameters_vector,UpsamplingFactor, axis=1)
e@0 295
e@0 296 parameters_vector_upsampled_adm = matrix(zeros(shape(parameters_vector_upsampled)))
e@0 297 parameters_vector_upsampled_code = matrix(zeros(shape(parameters_vector_upsampled)))
e@0 298 parameters_vector_upsampled_firstval = matrix(zeros((parameters_vector_upsampled.shape[0],1)))
e@0 299
e@0 300 # Reconstructed parameters
e@0 301
e@0 302 parameters_vector_upsampled_reconstructed = matrix(zeros(shape(parameters_vector_upsampled)))
e@0 303
e@0 304
e@0 305
e@0 306
e@0 307 def adm_matrix(X, dmin=0.001,dmax=0.28,tol=0.001):
e@0 308
e@0 309 out = matrix(zeros(shape(X)))
e@0 310 code = matrix(zeros(shape(X)))
e@0 311 firstval = matrix(zeros((X.shape[0], 1)))
e@0 312
e@0 313 for i in range(0, X.shape[0]):
e@0 314 out[i,:], code[i,:], firstval[i,0] = adm(X[i,:],dmin=dmin,dmax=dmax,tol=tol)
e@0 315
e@0 316 return out,code,firstval
e@0 317
e@0 318 # parameters_vector_upsampled_reconstructed[i,:] = adm_reconstruct(parameters_vector_upsampled_code[i,:],parameters_vector_upsampled_firstval[i,0], dmin=dmin,dmax=dmax)
e@0 319
e@0 320 def adm_matrix_reconstruct(code, firstval, dmin=0.001, dmax=0.28):
e@0 321 X = matrix(zeros(shape(code)))
e@0 322 for i in range(0, code.shape[0]):
e@0 323 X[i,:] = adm_reconstruct(code[i,:], firstval[i,0], dmin=dmin, dmax=dmax)
e@0 324
e@0 325 return X
e@0 326
e@0 327
e@0 328 parameters_vector_upsampled_adm, parameters_vector_upsampled_code, parameters_vector_upsampled_firstval = adm_matrix(parameters_vector_upsampled, dmin, dmax, tol)
e@0 329
e@0 330
e@0 331 def diff_and_pad(X):
e@0 332 return concatenate((
e@0 333 zeros((
e@0 334 shape(X)[0],
e@0 335 1
e@0 336 )),
e@0 337 diff(X, axis=1)),
e@0 338 axis=1)
e@0 339
e@0 340
e@0 341 #features_vector_upsampled = diff_and_pad(diff_and_pad(features_vector_upsampled))
e@0 342
e@0 343 # features_vector_diff = concatenate((zeros((shape(features_vector)[0],1)),diff(features_vector,axis=1)),axis=1)
e@0 344
e@0 345
e@0 346
e@0 347 # Segmentation stuff
e@0 348
e@0 349
e@0 350
e@0 351 # for i in range(0, parameters_vector_upsampled.shape[0]):
e@0 352 # out, code, h = adm(parameters_vector_upsampled[i,:],dmin=dmin,dmax=dmax,tol=tol)
e@0 353 # parameters_vector_upsampled_adm[i,:] = out
e@0 354 # parameters_vector_upsampled_code[i,:] = code
e@0 355 # parameters_vector_upsampled_firstval[i, 0] = h
e@0 356
e@0 357 # parameters_k_means.fit(parameters_vector.T)
e@0 358 ##
e@0 359 # parameters_k_means_centers = parameters_k_means.cluster_centers_
e@0 360 # parameters_k_means_labels = parameters_k_means.labels_
e@0 361 ##
e@0 362 # parameters_vector_estimated = zeros(shape(parameters_vector))
e@0 363 ##
e@0 364 # for n in range(0, len(parameters_vector_estimated[0])):
e@0 365 # parameters_vector_estimated[:,n] = parameters_k_means_centers[parameters_k_means_labels[n]]
e@0 366 ##
e@0 367 ### plot(parameters_vector[0])
e@0 368 ## # plot(parameters_vector_estimated[0])
e@0 369 ##
e@0 370 ## # PROvLIMA EDW
e@0 371 # print "[II] Parameters MSE for %d clusters: %.3f" % (len(parameters_k_means.cluster_centers_), mse(parameters_vector, parameters_vector_estimated))
e@0 372 #
e@0 373 ##
e@0 374 print "[II] Clustering features."
e@0 375 #
e@0 376 features_clustering = GMM(n_components = n_clusters, covariance_type='diag')
e@0 377 #
e@0 378 features_clustering.fit( features_vector_upsampled.T, y=parameters_vector_upsampled_code)
e@0 379 #
e@0 380 features_clustering_means = features_clustering.means_
e@0 381 features_clustering_labels = features_clustering.predict(features_vector_upsampled.T)
e@0 382 features_clustering_sigmas = features_clustering.covars_
e@0 383 #
e@0 384 features_vector_upsampled_estimated = zeros(shape(features_vector_upsampled))
e@0 385 #
e@0 386 #
e@0 387 for n in range(0, len(features_vector_upsampled_estimated[0])):
e@0 388 features_vector_upsampled_estimated[:,n] = features_clustering_means[features_clustering_labels[n]]
e@0 389 #
e@0 390 # # for n in range(0,features_vector.shape[0]):
e@0 391 # # hist(features_vector[1]-features_vector_estimated[1], 256)
e@0 392 # std(features_vector[1]-features_vector_estimated[1], ddof=1)
e@0 393 # mean(features_vector[1]-features_vector_estimated[1])
e@0 394 #
e@0 395 print "[II] Features MSE for %d clusters: %.3f" % (n_clusters, mse(features_vector_upsampled, features_vector_upsampled_estimated))
e@0 396
e@0 397
e@0 398
e@0 399 def cross_validate_clustering(data, estimator):
e@0 400 print "[II] Crossvalidating... "
e@0 401 estimator_fulldata = estimator
e@0 402 estimator_fulldata.fit(data)
e@0 403
e@0 404 # labels = estimator_fulldata.predict(data)
e@0 405 means = estimator_fulldata.means_
e@0 406 # print means
e@0 407
e@0 408 percents = arange(0.1,0.6,0.1)
e@0 409 MSEs = []
e@0 410 reconstructed = zeros(shape(data))
e@0 411 labels = estimator.predict(data)
e@0 412 for n in range(0, len(reconstructed)):
e@0 413 reconstructed[n,:] = means[labels[n]]
e@0 414
e@0 415 MSEs.append(mse(data,reconstructed))
e@0 416 for p in percents:
e@0 417 train,test = cross_validation.train_test_split(data,test_size=p,random_state=0)
e@0 418 train = matrix(train)
e@0 419 test = matrix(test)
e@0 420 # print shape(train)
e@0 421 # print shape(test)
e@0 422 estimator.fit(train)
e@0 423 means = estimator.means_
e@0 424 labels = estimator.predict(test)
e@0 425 reconstructed = zeros(shape(test))
e@0 426 for n in range(0, len(reconstructed)):
e@0 427 reconstructed[n,:] = means[labels[n]]
e@0 428
e@0 429 m = mse(test,reconstructed)
e@0 430
e@0 431 print "[II] MSE for clustering crossvalidated data %.2f-%.2f: %.5f" % ((1-p), p, m)
e@0 432 MSEs.append(m)
e@0 433
e@0 434 print "[II] Crossvalidation complete"
e@0 435
e@0 436 return MSEs
e@0 437
e@0 438 # print "[II] Trying Cross Validation"
e@0 439
e@0 440 # cross_validate_clustering(features_vector_upsampled.T, features_clustering)
e@0 441
e@0 442
e@0 443 # Construct parameters alphabet, each symbol is going to be a different column vector
e@0 444 # in parameter code matrix
e@0 445
e@0 446
e@0 447 def vector_to_states(X):
e@0 448 """
e@0 449 Input: a vector MxN with N samples and M variables
e@0 450 Output: a codeword dictionary `parameters_alphabet',
e@0 451 state_seq, inverse `parameters_alphabet_inv' """
e@0 452
e@0 453
e@0 454 parameters_alphabet = {}
e@0 455 n = 0
e@0 456
e@0 457 for i in range(0, X.shape[1]):
e@0 458 vec = tuple(ravel(X[:,i]))
e@0 459 if vec not in parameters_alphabet:
e@0 460 parameters_alphabet[vec] = n
e@0 461 n += 1
e@0 462
e@0 463 parameters_alphabet_inv = dict([(parameters_alphabet[m],m) for m in parameters_alphabet])
e@0 464
e@0 465 state_seq = array([parameters_alphabet[tuple(ravel(X[:,m]))] for m in range(0, parameters_vector_upsampled_code.shape[1])] )
e@0 466
e@0 467 return state_seq, parameters_alphabet, parameters_alphabet_inv
e@0 468
e@0 469
e@0 470 def states_to_vector(predicted, parameters_alphabet_inv):
e@0 471 estimated = matrix(zeros((len(parameters_alphabet_inv[0]), len(predicted))))
e@0 472 for i in range(0, len(state_seq)):
e@0 473 estimated[:, i] = matrix(parameters_alphabet_inv[predicted[i]]).T
e@0 474
e@0 475 return estimated
e@0 476
e@0 477 state_seq, parameters_alphabet, parameters_alphabet_inv = vector_to_states(parameters_vector_upsampled_code)
e@0 478
e@0 479
e@0 480 parameters_change_variable = matrix(diff_and_pad(parameters_vector_upsampled)!=0).astype(int)
e@0 481
e@0 482 changes_state_seq, changes_parameters_alphabet, changes_parameters_alphabet_inv = vector_to_states(parameters_change_variable)
e@0 483
e@0 484
e@0 485 # This is an hmm that just codes the changes"
e@0 486 # We have only two states, change and stay the same.
e@0 487
e@0 488 print "[II] (changes hmm-chain) Creating emission probability mixtures for every state "
e@0 489
e@0 490 gmm_list_changes = []
e@0 491 for n in range(0, 2):
e@0 492 vectors = features_vector_upsampled[:,changes_state_seq == n]
e@0 493 gmm = GMM(n_components=n_clusters, covariance_type = 'diag')
e@0 494 gmm.fit(vectors.T)
e@0 495 gmm_list_changes.append(gmm)
e@0 496
e@0 497
e@0 498 hmm_changes = hmm.GMMHMM(n_components=2, gmms=array(gmm_list_changes),n_mix=n_clusters)
e@0 499 hmm_changes.fit([array(features_vector_upsampled).T])
e@0 500
e@0 501 subplot(211)
e@0 502 plot(parameters_change_variable.T)
e@0 503 subplot(212)
e@0 504
e@0 505 changes_predicted_states = hmm_changes.predict(array(features_vector_upsampled.T))
e@0 506 predicted_changes_estimated = states_to_vector(changes_predicted_states, changes_parameters_alphabet_inv)
e@0 507
e@0 508 plot(predicted_changes_estimated.T)
e@0 509
e@0 510
e@0 511
e@0 512
e@0 513 # End of changes HMM here
e@0 514
e@0 515
e@0 516 print "[II] Creating emission probability mixtures for every state"
e@0 517 gmm_list = []
e@0 518 for n in range(0, 3):
e@0 519 vectors = features_vector_upsampled[:,state_seq == n]
e@0 520 gmm = GMM(n_components=n_clusters,covariance_type = 'diag')
e@0 521 gmm.fit(vectors.T)
e@0 522 gmm_list.append(gmm)
e@0 523
e@0 524 hmm1 = hmm.GMMHMM(n_components=3, gmms=array(gmm_list),n_mix=n_clusters)
e@0 525 hmm1.fit([array(features_vector_upsampled).T])
e@0 526
e@0 527 figure()
e@0 528 subplot(221)
e@0 529 plot(parameters_vector_upsampled_code.T)
e@0 530
e@0 531 predicted = hmm1.predict(array(features_vector_upsampled.T))
e@0 532
e@0 533 code_estimated = matrix(zeros((len(parameters_vector_upsampled), len(state_seq))))
e@0 534
e@0 535 # for i in range(0, len(state_seq)):
e@0 536 # code_estimated[:, i] = matrix(parameters_alphabet_inv[predicted[i]]).T
e@0 537
e@0 538 code_estimated = states_to_vector(predicted,parameters_alphabet_inv)
e@0 539 subplot(222)
e@0 540 plot(code_estimated.T)
e@0 541
e@0 542 reconstructed_original = adm_matrix_reconstruct(parameters_vector_upsampled_code, parameters_vector_upsampled_firstval)
e@0 543 subplot(223)
e@0 544 plot(reconstructed_original.T)
e@0 545 subplot(224)
e@0 546 reconstructed_estimated = adm_matrix_reconstruct(code_estimated, parameters_vector_upsampled_firstval)
e@0 547 plot(reconstructed_estimated.T)
e@0 548
e@0 549 # scatter(features_vector_upsampled[0,:],features_vector_upsampled_estimated[0,:])
e@0 550 # scatter(features_vector_upsampled[1,:],features_vector_upsampled_estimated[1,:])
e@0 551 #
e@0 552 # xlabel('Original Features on Principal Components')
e@0 553 # ylabel('Estimated Features on Principal Components')
e@0 554 # title('Original vs Estimated Features')
e@0 555 # savefig('original_vs_estimated.png')
e@0 556
e@0 557
e@0 558
e@0 559 #
e@0 560 #
e@0 561 # print "[II] Testing Gaussian Naive Bayes Classifier"
e@0 562 ##
e@0 563 # gnb = GaussianNB()
e@0 564 # gnb.fit(features_vector_upsampled.T, parameters_vector_upsampled_code.T)
e@0 565 # parameters_vector_upsampled_code_estimated = gnb.predict(features_vector_upsampled.T)
e@0 566 #
e@0 567 #
e@0 568 ##
e@0 569 # print "[II] Raw Parameters - Gaussian Naive Bayes classifier testing ratio: %.4f" % (float(sum((parameters_vector_upsampled_code_estimated == parameters_vector_upsampled_code).astype(float)))/float(len(parameters_k_means_labels)))
e@0 570 ##
e@0 571 # plot(adm_matrix_reconstruct(parameters_vector_upsampled_code_estimated,parameters_vector_upsampled_firstval,dmin,dmax).T)
e@0 572 #
e@0 573 # print "[II] Trying ADM-coded parameters"
e@0 574 # UpsamplingFactor = 100
e@0 575 # print "[II] Upsampling features and parameters by a factor of %d" % UpsamplingFactor
e@0 576 #
e@0 577 #
e@0 578 # # Upsampled features and parameters
e@0 579 # features_vector_upsampled = repeat(features_vector,UpsamplingFactor, axis=1)
e@0 580 # feature_labels_upsampled = repeat(features_clustering_labels,UpsamplingFactor, axis=0)
e@0 581 # parameters_vector_upsampled = repeat(parameters_vector,UpsamplingFactor, axis=1)
e@0 582 #
e@0 583 # parameters_vector_upsampled_adm = matrix(zeros(shape(parameters_vector_upsampled)))
e@0 584 # parameters_vector_upsampled_code = matrix(zeros(shape(parameters_vector_upsampled)))
e@0 585 # parameters_vector_upsampled_firstval = matrix(zeros((parameters_vector_upsampled.shape[0],1)))
e@0 586 #
e@0 587 # # Reconstructed parameters
e@0 588 #
e@0 589 # parameters_vector_upsampled_reconstructed = matrix(zeros(shape(parameters_vector_upsampled)))
e@0 590 #
e@0 591 # dmin = 0.001
e@0 592 # dmax = 0.28
e@0 593 # tol = 0.001
e@0 594 # for i in range(0, parameters_vector_upsampled.shape[0]):
e@0 595 # out, code, h = adm(parameters_vector_upsampled[i,:],dmin=dmin,dmax=dmax,tol=tol)
e@0 596 # parameters_vector_upsampled_adm[i,:] = out
e@0 597 # parameters_vector_upsampled_code[i,:] = code
e@0 598 # parameters_vector_upsampled_firstval[i, 0] = h
e@0 599 #
e@0 600 #
e@0 601 # # Reconstruct-ADM
e@0 602 # parameters_vector_upsampled_reconstructed[i,:] = adm_reconstruct(parameters_vector_upsampled_code[i,:],parameters_vector_upsampled_firstval[i,0], dmin=dmin,dmax=dmax)
e@0 603 #
e@0 604 #
e@0 605 # # plot(parameters_vector_upsampled_adm.T, 'r--')
e@0 606 #
e@0 607 #
e@0 608 # # plot(parameters_vector_upsampled.T)
e@0 609 # # plot(parameters_vector_upsampled_reconstructed.T, 'g.')
e@0 610 #
e@0 611 #
e@0 612 #
e@0 613 # parameters_vector_reconstructed = zeros(shape(parameters_vector))
e@0 614 # for n in range(0, parameters_vector.shape[1]):
e@0 615 # parameters_vector_reconstructed[:,n] = parameters_vector_upsampled_reconstructed[:,n*UpsamplingFactor]
e@0 616 #
e@0 617 #
e@0 618 # mse_adm = mse(parameters_vector_reconstructed, parameters_vector)
e@0 619 #
e@0 620 # print "[II] Expected ADM reconstruction MSE: %.4f" % mse_adm
e@0 621 #
e@0 622 # # figure()
e@0 623 # #plot(parameters_vector.T)
e@0 624 # # plot(parameters_vector_reconstructed.T)
e@0 625 #
e@0 626 # print "[II] Building HMM transition, emission matrices and priors"
e@0 627 #
e@0 628 # transmat = zeros((3,3))
e@0 629 # startprob = zeros((3,))
e@0 630 # emissionmat = zeros((3, n_clusters))
e@0 631 #
e@0 632 #
e@0 633 # state_labels = parameters_vector_upsampled_code + 1
e@0 634 # stateseq = state_labels.T
e@0 635 #
e@0 636 # for n in range(0,shape(parameters_vector_upsampled_code)[1]):
e@0 637 # if n>0:
e@0 638 # transmat[state_labels[0,n-1],state_labels[0,n]] += 1
e@0 639 # startprob[state_labels[0,n]] +=1
e@0 640 # emissionmat[state_labels[0,n],feature_labels_upsampled[n]] += 1
e@0 641 #
e@0 642 #
e@0 643 # for n in range(0, transmat.shape[0]):
e@0 644 # transmat[n,:]/=sum(transmat[n,:])
e@0 645 # emissionmat[n,:]/=sum(emissionmat[n,:])
e@0 646 #
e@0 647 #
e@0 648 # transmat = matrix(transmat)
e@0 649 # emissionmat = matrix(emissionmat)
e@0 650 # # Prior
e@0 651 # startprob = startprob/sum(startprob)
e@0 652 # startprob = ravel(startprob)
e@0 653 #
e@0 654 # # Vocabulary
e@0 655 #
e@0 656 # model = hmm.GMMHMM(n_mix=n_clusters, n_components=3, covariance_type="diag")
e@0 657 # model.means_ = features_clustering.means_
e@0 658 # model.covars_ = features_clustering.covars_
e@0 659 #
e@0 660 # features_vector_array = array(features_vector)
e@0 661 # features_vector_upsampled_array=array(features_vector_upsampled)
e@0 662 #
e@0 663 # model.fit([features_vector_array.T])
e@0 664 # stateseq_estimated = model.predict(features_vector_upsampled_array.T)
e@0 665 #
e@0 666 # parameters_vector_upsampled_reconstructed_decoded = matrix(zeros(shape(parameters_vector_upsampled)))
e@0 667 #
e@0 668 #
e@0 669 # plot(stateseq_estimated)
e@0 670 # plot(stateseq)
e@0 671 #
e@0 672 # code_estimated = matrix(zeros(shape(parameters_vector_upsampled)))
e@0 673 # code_estimated[0,:] = stateseq_estimated - 1
e@0 674 #
e@0 675 #
e@0 676 #
e@0 677 # parameters_vector_upsampled_reconstructed_estimated = matrix(zeros(shape(parameters_vector_upsampled)))
e@0 678 #
e@0 679 # for i in range(0, parameters_vector_upsampled.shape[0]):
e@0 680 # parameters_vector_upsampled_reconstructed_estimated[i,:] = adm_reconstruct(code_estimated,parameters_vector_upsampled_firstval[i,0], dmin=dmin,dmax=dmax)
e@0 681 # figure()
e@0 682 # plot(parameters_vector_upsampled_reconstructed_estimated.T)
e@0 683 # plot(parameters_vector_upsampled.T)