e@0: #!/usr/bin/python2 e@0: # -*- coding: utf-8 -*- e@0: """ e@0: Created on Thu Apr 23 11:53:17 2015 e@0: e@0: @author: mmxgn e@0: """ e@0: e@0: # This file does the cluster estimation and the removal of outliers e@0: e@0: from sys import argv, exit e@0: from essentia.standard import YamlInput, YamlOutput e@0: from essentia import Pool e@0: from pca import * e@0: e@0: from numpy import * e@0: from sklearn import cluster e@0: from sklearn.metrics import pairwise_distances e@0: from sklearn.cluster import KMeans, MiniBatchKMeans e@0: import matplotlib.pyplot as plt e@0: #from sklearn.mixture import GMM e@0: from sklearn.naive_bayes import GaussianNB, MultinomialNB e@0: from scipy.signal import decimate e@0: from sklearn import cross_validation e@0: e@0: #from hmmlearn import hmm e@0: from hmmlearn.hmm import GMM e@0: from hmmlearn import hmm e@0: #from adpcm import adm, adm_reconstruct e@0: e@0: e@0: mse = lambda A,B: ((array(A)-array(B)) ** 2).mean() e@0: e@0: e@0: def smooth_matrix_1D(X): e@0: window = scipy.signal.gaussian(51,4) e@0: window = window/sum(window) e@0: intermx = zeros((X.shape[0],X.shape[1]+100)) e@0: intermx[:, 50:-50] = X e@0: e@0: for m in range(0, X.shape[0]): e@0: # print intermx.shape e@0: intermx[m,:] = convolve(ravel(intermx[m,:]), window,'same') e@0: e@0: return intermx[:,50:-50] e@0: e@0: def adm_reconstruct(codeword, h, dmin=.01, dmax=.28): e@0: x = zeros((1, codeword.shape[1])) e@0: e@0: delta1 = dmin e@0: delta2 = dmin e@0: Sum = h e@0: e@0: x[0] = h e@0: for i in range(0, codeword.shape[1]): e@0: if codeword[0,i] == 0: e@0: delta1 = dmin e@0: delta2 = dmin e@0: e@0: elif codeword[0,i] == 1: e@0: delta2 = dmin e@0: Sum += delta1 e@0: delta1 *= 2 e@0: if delta1 > dmax: e@0: delta1 = dmax e@0: e@0: elif codeword[0,i] == -1: e@0: delta1 = dmin e@0: Sum -= delta2 e@0: delta2 *= 2 e@0: if delta2 > dmax: e@0: delta2 = dmax e@0: x[0,i] = Sum e@0: return x e@0: e@0: def adm(x, dmin=.01, dmax=.28, tol=0.0001): e@0: e@0: # Adaptive delta modulation adapted by code: e@0: # (adeltamod.m) e@0: # e@0: # Adaptive Delta Modulator e@0: # by Gandhar Desai (gdesai) e@0: # BITS Pilani Goa Campus e@0: # Date: 28 Sept, 2013 e@0: e@0: xsig = x e@0: e@0: Lx = len(x) e@0: e@0: ADMout = zeros((1, Lx)) e@0: codevec = zeros((1, Lx)) e@0: e@0: e@0: Sum = x[0] e@0: delta1 = dmin e@0: delta2 = dmin e@0: mult1 = 2 e@0: mult2 = 2 e@0: for i in range(0, Lx): e@0: #print abs(xsig[i] - Sum) e@0: if (abs(xsig[i] - Sum) < tol): e@0: bit = 0 e@0: delta2 = dmin e@0: delta1 = dmin e@0: e@0: e@0: elif (xsig[i] >= Sum): e@0: bit = 1 e@0: delta2 = dmin e@0: Sum += delta1 e@0: delta1 *= mult1 e@0: if delta1 > dmax: e@0: delta1 = dmax e@0: e@0: e@0: else: e@0: bit = -1 e@0: delta1 = dmin e@0: Sum -= delta2 e@0: delta2 *= mult2 e@0: if delta2 > dmax: e@0: delta2 = dmax e@0: e@0: e@0: e@0: ADMout[0, i] = Sum e@0: codevec[0, i]= bit e@0: e@0: return ADMout,codevec, x[0] e@0: e@0: if __name__=="__main__": e@0: if len(argv) != 2: e@0: print "[EE] Wrong number of arguments" e@0: print "[II] Correct syntax is:" e@0: print "[II] \t%s " e@0: print "[II] where is a .yaml file containing the" e@0: print "[II] features of the dataset (try output2_stage/fulltraining-last.yaml)" e@0: exit(-1) e@0: e@0: e@0: n_clusters = 2 e@0: UpsamplingFactor = 1 e@0: dmin = 0.001 e@0: dmax = 0.28 e@0: tol = 0.001 e@0: e@0: infile = argv[1] e@0: e@0: features_pool = YamlInput(filename = infile)() e@0: e@0: e@0: e@0: feature_captions = features_pool.descriptorNames() e@0: parameter_captions = [] e@0: e@0: e@0: for c in features_pool.descriptorNames(): e@0: if c.split('.')[0] == 'parameter': e@0: parameter_captions.append(c) e@0: if c.split('.')[0] == 'metadata' or c.split('.')[0] == 'parameter': e@0: feature_captions.remove(c) e@0: e@0: e@0: e@0: close('all') e@0: e@0: print "[II] Loaded training data from %s (%s) " % (infile, features_pool['metadata.date'][0]) e@0: print "[II] %d Features Available: " % len(feature_captions) e@0: e@0: e@0: e@0: print str(feature_captions).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7] e@0: e@0: nfeatures_in = len(feature_captions) e@0: nparameters_in = len(parameter_captions) e@0: 1 features_vector = zeros((nfeatures_in, len(features_pool[feature_captions[0]]))) e@0: e@0: parameters_vector = zeros((nparameters_in, len(features_pool[parameter_captions[0]]))) e@0: e@0: e@0: for i in range(0, nfeatures_in): e@0: features_vector[i, :] = features_pool[feature_captions[i]].T e@0: for i in range(0, nparameters_in): e@0: parameters_vector[i, :] = features_pool[parameter_captions[0]].T e@0: e@0: print "[II] %d parameters used:" % len(parameter_captions) e@0: print str(parameter_captions).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7].replace('parameter.','') e@0: e@0: print "[II] Marking silent parts" e@0: e@0: silent_parts = zeros((1, len(features_pool[feature_captions[i]].T))) e@0: e@0: rms = features_vector[feature_captions.index('rms'), :] e@0: e@0: # Implementing Hysteresis Gate -- High threshold is halfway between e@0: # the mean and the max and Low is halfway between the mean dn the min e@0: e@0: rms_threshold_mean = mean(rms) e@0: e@0: rms_threshold_max = max(rms) e@0: rms_threshold_min = min(rms) e@0: e@0: rms_threshold_high = 0.1 * rms_threshold_mean e@0: rms_threshold_low = 0.01 * rms_threshold_mean e@0: e@0: for n in range(1, len(rms)): e@0: prev = rms[n-1] e@0: curr = rms[n] e@0: e@0: if prev >= rms_threshold_high: e@0: if curr < rms_threshold_low: e@0: silent_parts[0,n] = 1 e@0: else: e@0: silent_parts[0,n] = 0 e@0: elif prev <= rms_threshold_low: e@0: if curr > rms_threshold_high: e@0: silent_parts[0,n] = 0 e@0: else: e@0: silent_parts[0,n] = 1 e@0: else: e@0: silent_parts[0,n] = silent_parts[0,n-1] e@0: e@0: e@0: if silent_parts[0,1] == 1: e@0: silent_parts[0, 0] = 1 e@0: e@0: e@0: # plot(rms) e@0: # plot(silent_parts.T) e@0: # plot(ones((len(rms), 1))*rms_threshold_high) e@0: # plot(ones((len(rms), 1))*rms_threshold_low) e@0: e@0: active_index = invert(silent_parts.flatten().astype(bool)) e@0: e@0: # Keep only active parts e@0: e@0: # Uncomment this e@0: features_vector = features_vector[:, active_index] e@0: # parameters_vector = parameters_vector[:, active_index] e@0: e@0: moments_vector = zeros((features_vector.shape[0], 2)) e@0: e@0: print "[II] Storing moments vector" e@0: for i in range(0, features_vector.shape[0]): e@0: mean_ = mean(features_vector[i,:]) e@0: std_ = std(features_vector[i,:], ddof=1) e@0: moments_vector[i,0] = mean_ e@0: moments_vector[i,1] = std_ e@0: e@0: features_vector[i,:] = (features_vector[i,:] - mean_)/std_ e@0: e@0: features_vector_original = features_vector e@0: e@0: e@0: print "[II] Extracting PCA configuration " e@0: e@0: kernel, q, featurelist = extract_pca_configuration_from_data(features_vector) e@0: e@0: print "[II] Optimal number of PCs to keep: %d" % q e@0: e@0: feature_captions_array = array(feature_captions) e@0: e@0: # features_to_keep = features_vector e@0: e@0: features_to_keep = list(feature_captions_array[featurelist]) e@0: print "[II] Decided to keep %d features:" % len(features_to_keep) e@0: print str(features_to_keep).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7] e@0: e@0: e@0: e@0: # Keep the desired features e@0: #Uncomment this e@0: features_kept_data = features_vector[featurelist,:] e@0: # features_kept_data = features_vector e@0: # features_kept_data = features_vector e@0: e@0: # Generate the parameter clusters using k-means e@0: e@0: # Uncomment this e@0: features_vector = (kernel.T*features_kept_data)[0:q,:] e@0: #features_vector = log(features_vector+0.001) e@0: # features_vector = features_vector_original e@0: e@0: e@0: # parameters_k_means = KMeans(n_clusters = parameters_k, init='k-means++', max_iter=300, tol=0.0000001, verbose = 1) e@0: parameters_k_means = KMeans(init='k-means++', n_init=10, max_iter=300, tol=0.0000001, verbose = 0) e@0: # # parameters_k_means = MiniBatchKMeans(init='k-means++', max_iter=300, tol=0.00001, verbose = 1) e@0: # e@0: # # Quantize the differences of the parameters instead of the parameters themselves e@0: # parameters_vector_diff = concatenate((zeros((shape(parameters_vector)[0],1)),diff(parameters_vector, axis=1)),axis=1) e@0: # features_vector_diff = concatenate((zeros((shape(features_vector)[0],1)),diff(features_vector,axis=1)),axis=1) e@0: # e@0: # # Delete this afterwards e@0: # # features_vector = features_vector_diff e@0: # parameters_k_means.fit(parameters_vector_diff.T) e@0: e@0: print "[II] Trying ADM-coded parameters" e@0: print "[II] Upsampling features and parameters by a factor of %d" % UpsamplingFactor e@0: e@0: e@0: # Upsampled features and parameters e@0: features_vector_upsampled = smooth_matrix_1D(repeat(features_vector,UpsamplingFactor, axis=1)) e@0: e@0: # feature_labels_upsampled = repeat(features_clustering_labels,UpsamplingFactor, axis=0) e@0: parameters_vector_upsampled = repeat(parameters_vector,UpsamplingFactor, axis=1) e@0: e@0: # parameters_vector_upsampled = smooth_matrix_1D(parameters_vector_upsampled) e@0: e@0: parameters_vector_upsampled_adm = matrix(zeros(shape(parameters_vector_upsampled))) e@0: parameters_vector_upsampled_code = matrix(zeros(shape(parameters_vector_upsampled))) e@0: parameters_vector_upsampled_firstval = matrix(zeros((parameters_vector_upsampled.shape[0],1))) e@0: e@0: # Reconstructed parameters e@0: e@0: parameters_vector_upsampled_reconstructed = matrix(zeros(shape(parameters_vector_upsampled))) e@0: e@0: e@0: e@0: e@0: def adm_matrix(X, dmin=0.001,dmax=0.28,tol=0.001): e@0: e@0: out = matrix(zeros(shape(X))) e@0: code = matrix(zeros(shape(X))) e@0: firstval = matrix(zeros((X.shape[0], 1))) e@0: e@0: for i in range(0, X.shape[0]): e@0: out[i,:], code[i,:], firstval[i,0] = adm(X[i,:],dmin=dmin,dmax=dmax,tol=tol) e@0: e@0: return out,code,firstval e@0: e@0: # parameters_vector_upsampled_reconstructed[i,:] = adm_reconstruct(parameters_vector_upsampled_code[i,:],parameters_vector_upsampled_firstval[i,0], dmin=dmin,dmax=dmax) e@0: e@0: def adm_matrix_reconstruct(code, firstval, dmin=0.001, dmax=0.28): e@0: X = matrix(zeros(shape(code))) e@0: for i in range(0, code.shape[0]): e@0: X[i,:] = adm_reconstruct(code[i,:], firstval[i,0], dmin=dmin, dmax=dmax) e@0: e@0: return X e@0: e@0: e@0: parameters_vector_upsampled_adm, parameters_vector_upsampled_code, parameters_vector_upsampled_firstval = adm_matrix(parameters_vector_upsampled, dmin, dmax, tol) e@0: e@0: e@0: def diff_and_pad(X): e@0: return concatenate(( e@0: zeros(( e@0: shape(X)[0], e@0: 1 e@0: )), e@0: diff(X, axis=1)), e@0: axis=1) e@0: e@0: e@0: # features_vector_upsampled = features_vector_upsampled e@0: e@0: e@0: # features_vector_upsampled = diff_and_pad(features_vector_upsampled) e@0: e@0: e@0: # features_vector_diff = concatenate((zeros((shape(features_vector)[0],1)),diff(features_vector,axis=1)),axis=1) e@0: e@0: e@0: e@0: # Segmentation stuff e@0: e@0: e@0: e@0: # for i in range(0, parameters_vector_upsampled.shape[0]): e@0: # out, code, h = adm(parameters_vector_upsampled[i,:],dmin=dmin,dmax=dmax,tol=tol) e@0: # parameters_vector_upsampled_adm[i,:] = out e@0: # parameters_vector_upsampled_code[i,:] = code e@0: # parameters_vector_upsampled_firstval[i, 0] = h e@0: e@0: # parameters_k_means.fit(parameters_vector.T) e@0: ## e@0: # parameters_k_means_centers = parameters_k_means.cluster_centers_ e@0: # parameters_k_means_labels = parameters_k_means.labels_ e@0: ## e@0: # parameters_vector_estimated = zeros(shape(parameters_vector)) e@0: ## e@0: # for n in range(0, len(parameters_vector_estimated[0])): e@0: # parameters_vector_estimated[:,n] = parameters_k_means_centers[parameters_k_means_labels[n]] e@0: ## e@0: ### plot(parameters_vector[0]) e@0: ## # plot(parameters_vector_estimated[0]) e@0: ## e@0: ## # PROvLIMA EDW e@0: # print "[II] Parameters MSE for %d clusters: %.3f" % (len(parameters_k_means.cluster_centers_), mse(parameters_vector, parameters_vector_estimated)) e@0: # e@0: ## e@0: print "[II] Clustering features." e@0: # e@0: features_clustering = GMM(n_components = n_clusters, covariance_type='full') e@0: # e@0: features_clustering.fit( features_vector_upsampled.T, y=parameters_vector_upsampled_code) e@0: # e@0: features_clustering_means = features_clustering.means_ e@0: features_clustering_labels = features_clustering.predict(features_vector_upsampled.T) e@0: features_clustering_sigmas = features_clustering.covars_ e@0: # e@0: features_vector_upsampled_estimated = zeros(shape(features_vector_upsampled)) e@0: # e@0: # e@0: for n in range(0, len(features_vector_upsampled_estimated[0])): e@0: features_vector_upsampled_estimated[:,n] = features_clustering_means[features_clustering_labels[n]] e@0: # e@0: # # for n in range(0,features_vector.shape[0]): e@0: # # hist(features_vector[1]-features_vector_estimated[1], 256) e@0: # std(features_vector[1]-features_vector_estimated[1], ddof=1) e@0: # mean(features_vector[1]-features_vector_estimated[1]) e@0: # e@0: print "[II] Features MSE for %d clusters: %.3f" % (n_clusters, mse(features_vector_upsampled, features_vector_upsampled_estimated)) e@0: e@0: e@0: e@0: def cross_validate_clustering(data, estimator): e@0: print "[II] Crossvalidating... " e@0: estimator_fulldata = estimator e@0: estimator_fulldata.fit(data) e@0: e@0: # labels = estimator_fulldata.predict(data) e@0: means = estimator_fulldata.means_ e@0: # print means e@0: e@0: percents = arange(0.1,0.6,0.1) e@0: MSEs = [] e@0: reconstructed = zeros(shape(data)) e@0: labels = estimator.predict(data) e@0: for n in range(0, len(reconstructed)): e@0: reconstructed[n,:] = means[labels[n]] e@0: e@0: MSEs.append(mse(data,reconstructed)) e@0: for p in percents: e@0: train,test = cross_validation.train_test_split(data,test_size=p,random_state=0) e@0: train = matrix(train) e@0: test = matrix(test) e@0: # print shape(train) e@0: # print shape(test) e@0: estimator.fit(train) e@0: means = estimator.means_ e@0: labels = estimator.predict(test) e@0: reconstructed = zeros(shape(test)) e@0: for n in range(0, len(reconstructed)): e@0: reconstructed[n,:] = means[labels[n]] e@0: e@0: m = mse(test,reconstructed) e@0: e@0: print "[II] MSE for clustering crossvalidated data %.2f-%.2f: %.5f" % ((1-p), p, m) e@0: MSEs.append(m) e@0: e@0: print "[II] Crossvalidation complete" e@0: e@0: return MSEs e@0: e@0: # print "[II] Trying Cross Validation" e@0: e@0: # cross_validate_clustering(features_vector_upsampled.T, features_clustering) e@0: e@0: e@0: # Construct parameters alphabet, each symbol is going to be a different column vector e@0: # in parameter code matrix e@0: e@0: e@0: def vector_to_states(X): e@0: """ e@0: Input: a vector MxN with N samples and M variables e@0: Output: a codeword dictionary `parameters_alphabet', e@0: state_seq, inverse `parameters_alphabet_inv' """ e@0: e@0: e@0: parameters_alphabet = {} e@0: n = 0 e@0: e@0: for i in range(0, X.shape[1]): e@0: vec = tuple(ravel(X[:,i])) e@0: if vec not in parameters_alphabet: e@0: parameters_alphabet[vec] = n e@0: n += 1 e@0: e@0: parameters_alphabet_inv = dict([(parameters_alphabet[m],m) for m in parameters_alphabet]) e@0: e@0: state_seq = array([parameters_alphabet[tuple(ravel(X[:,m]))] for m in range(0, parameters_vector_upsampled_code.shape[1])] ) e@0: e@0: return state_seq, parameters_alphabet, parameters_alphabet_inv e@0: e@0: e@0: def states_to_vector(predicted, parameters_alphabet_inv): e@0: estimated = matrix(zeros((len(parameters_alphabet_inv[0]), len(predicted)))) e@0: for i in range(0, len(state_seq)): e@0: estimated[:, i] = matrix(parameters_alphabet_inv[predicted[i]]).T e@0: e@0: return estimated e@0: e@0: state_seq, parameters_alphabet, parameters_alphabet_inv = vector_to_states(parameters_vector_upsampled_code) e@0: e@0: e@0: parameters_change_variable = matrix(diff_and_pad(parameters_vector_upsampled)!=0).astype(int) e@0: e@0: changes_state_seq, changes_parameters_alphabet, changes_parameters_alphabet_inv = vector_to_states(parameters_change_variable) e@0: e@0: e@0: # This is an hmm that just codes the changes" e@0: # We have only two states, change and stay the same. e@0: e@0: e@0: print "[II] Creating emission probability mixtures for every state " e@0: parameters_state, parameter_state_alphabet, parameter_state_alphabet_inv = vector_to_states(parameters_vector_upsampled) e@0: e@0: gmm_list_changes = [] e@0: for n in range(0, len(parameter_state_alphabet)): e@0: vectors = features_vector_upsampled[:,parameters_state == n] e@0: gmm = GMM(n_components=n_clusters, covariance_type = 'diag') e@0: gmm.fit(vectors.T) e@0: gmm_list_changes.append(gmm) e@0: e@0: e@0: hmm_changes = hmm.GMMHMM(n_components=len(parameter_state_alphabet), gmms=array(gmm_list_changes),n_mix=n_clusters) e@0: hmm_changes.fit([array(features_vector_upsampled).T]) e@0: e@0: # subplot(211) e@0: # plot(parameters_change_variable.T) e@0: e@0: subplot(211), plot(parameters_vector_upsampled.T) e@0: subplot(212) e@0: e@0: predicted_states = hmm_changes.predict(array(features_vector_upsampled.T)) e@0: predicted_states_estimated = states_to_vector(predicted_states, parameter_state_alphabet_inv) e@0: e@0: plot(predicted_states_estimated.T) e@0: e@0: e@0: # print "[II] (changes hmm-chain) Creating emission probability mixtures for every state " e@0: # e@0: # gmm_list_changes = [] e@0: # for n in range(0, 2): e@0: # vectors = features_vector_upsampled[:,changes_state_seq == n] e@0: # gmm = GMM(n_components=n_clusters, covariance_type = 'diag') e@0: # gmm.fit(vectors.T) e@0: # gmm_list_changes.append(gmm) e@0: # e@0: # e@0: # hmm_changes = hmm.GMMHMM(n_components=2, gmms=array(gmm_list_changes),n_mix=n_clusters) e@0: # hmm_changes.fit([array(features_vector_upsampled).T]) e@0: # e@0: # subplot(211) e@0: # plot(parameters_change_variable.T) e@0: # subplot(212) e@0: # e@0: # changes_predicted_states = hmm_changes.predict(array(features_vector_upsampled.T)) e@0: # predicted_changes_estimated = states_to_vector(changes_predicted_states, changes_parameters_alphabet_inv) e@0: # e@0: # plot(predicted_changes_estimated.T) e@0: e@0: e@0: e@0: e@0: # End of changes HMM here e@0: e@0: e@0: print "[II] Creating emission probability mixtures for every state" e@0: gmm_list = [] e@0: for n in range(0, 3): e@0: vectors = features_vector_upsampled[:,state_seq == n] e@0: gmm = GMM(n_components=n_clusters,covariance_type = 'diag') e@0: gmm.fit(vectors.T) e@0: gmm_list.append(gmm) e@0: e@0: hmm1 = hmm.GMMHMM(n_components=3, gmms=array(gmm_list),n_mix=n_clusters) e@0: hmm1.fit([array(features_vector_upsampled).T]) e@0: e@0: figure() e@0: subplot(221) e@0: plot(parameters_vector_upsampled_code.T) e@0: e@0: predicted = hmm1.predict(array(features_vector_upsampled.T)) e@0: e@0: code_estimated = matrix(zeros((len(parameters_vector_upsampled), len(state_seq)))) e@0: e@0: # for i in range(0, len(state_seq)): e@0: # code_estimated[:, i] = matrix(parameters_alphabet_inv[predicted[i]]).T e@0: e@0: code_estimated = states_to_vector(predicted,parameters_alphabet_inv) e@0: subplot(222) e@0: plot(code_estimated.T) e@0: e@0: reconstructed_original = adm_matrix_reconstruct(parameters_vector_upsampled_code, parameters_vector_upsampled_firstval) e@0: subplot(223) e@0: plot(reconstructed_original.T) e@0: subplot(224) e@0: reconstructed_estimated = adm_matrix_reconstruct(code_estimated, parameters_vector_upsampled_firstval) e@0: plot(reconstructed_estimated.T) e@0: e@0: # scatter(features_vector_upsampled[0,:],features_vector_upsampled_estimated[0,:]) e@0: # scatter(features_vector_upsampled[1,:],features_vector_upsampled_estimated[1,:]) e@0: # e@0: # xlabel('Original Features on Principal Components') e@0: # ylabel('Estimated Features on Principal Components') e@0: # title('Original vs Estimated Features') e@0: # savefig('original_vs_estimated.png') e@0: e@0: e@0: e@0: # e@0: # e@0: print "[II] Testing Gaussian Naive Bayes Classifier" e@0: ## e@0: gnb = GaussianNB() e@0: gnb.fit(features_vector_upsampled.T, parameters_state) e@0: e@0: parameters_state_estimated = gnb.predict(features_vector_upsampled.T) e@0: e@0: output = states_to_vector(parameters_state_estimated, parameter_state_alphabet_inv) e@0: e@0: figure() e@0: subplot(211) e@0: plot(parameters_vector_upsampled.T) e@0: subplot(212) e@0: plot(smooth_matrix_1D(output.T)) e@0: # parameters_vector_upsampled_code_estimated = gnb.predict(features_vector_upsampled.T) e@0: # e@0: # e@0: ## e@0: # print "[II] Raw Parameters - Gaussian Naive Bayes classifier testing ratio: %.4f" % (float(sum((parameters_vector_upsampled_code_estimated == parameters_vector_upsampled_code).astype(float)))/float(len(parameters_k_means_labels))) e@0: ## e@0: # plot(adm_matrix_reconstruct(parameters_vector_upsampled_code_estimated,parameters_vector_upsampled_firstval,dmin,dmax).T) e@0: # e@0: # print "[II] Trying ADM-coded parameters" e@0: # UpsamplingFactor = 100 e@0: # print "[II] Upsampling features and parameters by a factor of %d" % UpsamplingFactor e@0: # e@0: # e@0: # # Upsampled features and parameters e@0: # features_vector_upsampled = repeat(features_vector,UpsamplingFactor, axis=1) e@0: # feature_labels_upsampled = repeat(features_clustering_labels,UpsamplingFactor, axis=0) e@0: # parameters_vector_upsampled = repeat(parameters_vector,UpsamplingFactor, axis=1) e@0: # e@0: # parameters_vector_upsampled_adm = matrix(zeros(shape(parameters_vector_upsampled))) e@0: # parameters_vector_upsampled_code = matrix(zeros(shape(parameters_vector_upsampled))) e@0: # parameters_vector_upsampled_firstval = matrix(zeros((parameters_vector_upsampled.shape[0],1))) e@0: # e@0: # # Reconstructed parameters e@0: # e@0: # parameters_vector_upsampled_reconstructed = matrix(zeros(shape(parameters_vector_upsampled))) e@0: # e@0: # dmin = 0.001 e@0: # dmax = 0.28 e@0: # tol = 0.001 e@0: # for i in range(0, parameters_vector_upsampled.shape[0]): e@0: # out, code, h = adm(parameters_vector_upsampled[i,:],dmin=dmin,dmax=dmax,tol=tol) e@0: # parameters_vector_upsampled_adm[i,:] = out e@0: # parameters_vector_upsampled_code[i,:] = code e@0: # parameters_vector_upsampled_firstval[i, 0] = h e@0: # e@0: # e@0: # # Reconstruct-ADM e@0: # parameters_vector_upsampled_reconstructed[i,:] = adm_reconstruct(parameters_vector_upsampled_code[i,:],parameters_vector_upsampled_firstval[i,0], dmin=dmin,dmax=dmax) e@0: # e@0: # e@0: # # plot(parameters_vector_upsampled_adm.T, 'r--') e@0: # e@0: # e@0: # # plot(parameters_vector_upsampled.T) e@0: # # plot(parameters_vector_upsampled_reconstructed.T, 'g.') e@0: # e@0: # e@0: # e@0: # parameters_vector_reconstructed = zeros(shape(parameters_vector)) e@0: # for n in range(0, parameters_vector.shape[1]): e@0: # parameters_vector_reconstructed[:,n] = parameters_vector_upsampled_reconstructed[:,n*UpsamplingFactor] e@0: # e@0: # e@0: # mse_adm = mse(parameters_vector_reconstructed, parameters_vector) e@0: # e@0: # print "[II] Expected ADM reconstruction MSE: %.4f" % mse_adm e@0: # e@0: # # figure() e@0: # #plot(parameters_vector.T) e@0: # # plot(parameters_vector_reconstructed.T) e@0: # e@0: # print "[II] Building HMM transition, emission matrices and priors" e@0: # e@0: # transmat = zeros((3,3)) e@0: # startprob = zeros((3,)) e@0: # emissionmat = zeros((3, n_clusters)) e@0: # e@0: # e@0: # state_labels = parameters_vector_upsampled_code + 1 e@0: # stateseq = state_labels.T e@0: # e@0: # for n in range(0,shape(parameters_vector_upsampled_code)[1]): e@0: # if n>0: e@0: # transmat[state_labels[0,n-1],state_labels[0,n]] += 1 e@0: # startprob[state_labels[0,n]] +=1 e@0: # emissionmat[state_labels[0,n],feature_labels_upsampled[n]] += 1 e@0: # e@0: # e@0: # for n in range(0, transmat.shape[0]): e@0: # transmat[n,:]/=sum(transmat[n,:]) e@0: # emissionmat[n,:]/=sum(emissionmat[n,:]) e@0: # e@0: # e@0: # transmat = matrix(transmat) e@0: # emissionmat = matrix(emissionmat) e@0: # # Prior e@0: # startprob = startprob/sum(startprob) e@0: # startprob = ravel(startprob) e@0: # e@0: # # Vocabulary e@0: # e@0: # model = hmm.GMMHMM(n_mix=n_clusters, n_components=3, covariance_type="diag") e@0: # model.means_ = features_clustering.means_ e@0: # model.covars_ = features_clustering.covars_ e@0: # e@0: # features_vector_array = array(features_vector) e@0: # features_vector_upsampled_array=array(features_vector_upsampled) e@0: # e@0: # model.fit([features_vector_array.T]) e@0: # stateseq_estimated = model.predict(features_vector_upsampled_array.T) e@0: # e@0: # parameters_vector_upsampled_reconstructed_decoded = matrix(zeros(shape(parameters_vector_upsampled))) e@0: # e@0: # e@0: # plot(stateseq_estimated) e@0: # plot(stateseq) e@0: # e@0: # code_estimated = matrix(zeros(shape(parameters_vector_upsampled))) e@0: # code_estimated[0,:] = stateseq_estimated - 1 e@0: # e@0: # e@0: # e@0: # parameters_vector_upsampled_reconstructed_estimated = matrix(zeros(shape(parameters_vector_upsampled))) e@0: # e@0: # for i in range(0, parameters_vector_upsampled.shape[0]): e@0: # parameters_vector_upsampled_reconstructed_estimated[i,:] = adm_reconstruct(code_estimated,parameters_vector_upsampled_firstval[i,0], dmin=dmin,dmax=dmax) e@0: # figure() e@0: # plot(parameters_vector_upsampled_reconstructed_estimated.T) e@0: # plot(parameters_vector_upsampled.T)