e@0: #!/usr/bin/python2 e@0: # -*- coding: utf-8 -*- e@0: """ e@0: Created on Thu Apr 23 11:53:17 2015 e@0: e@0: @author: mmxgn e@0: """ e@0: e@0: # This file does the cluster estimation and the removal of outliers e@0: e@0: from warnings import filterwarnings e@0: e@0: from sys import argv, exit e@0: from essentia.standard import YamlInput, YamlOutput e@0: from essentia import Pool e@0: from pca import * e@0: e@0: from numpy import * e@0: from sklearn import cluster e@0: from sklearn.metrics import pairwise_distances e@0: from sklearn.cluster import KMeans, MiniBatchKMeans e@0: from matplotlib.pyplot import * e@0: #from sklearn.mixture import GMM e@0: from sklearn.naive_bayes import GaussianNB, MultinomialNB e@0: from scipy.signal import decimate e@0: from sklearn import cross_validation e@0: from sklearn import multiclass e@0: from sklearn.covariance import EllipticEnvelope e@0: e@0: #from hmmlearn import hmm e@0: from hmmlearn.hmm import GMM e@0: from hmmlearn import hmm e@0: e@0: from sklearn import svm e@0: import copy as cp e@0: e@0: from scipy.misc import logsumexp e@0: import pickle e@0: e@0: from collections import Counter e@0: #from adpcm import adm, adm_reconstruct e@0: e@0: e@0: from reshape_observations import reshape_observations e@0: e@0: mse = lambda A,B: ((array(A)-array(B)) ** 2).mean() e@0: e@0: lognorm = lambda A: A - logsumexp(A) e@0: e@0: e@0: rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']}) e@0: ## for Palatino and other serif fonts use: e@0: #rc('font',**{'family':'serif','serif':['Palatino']}) e@0: rc('text', usetex=True) e@0: #logsum = lambda X: logsumexp(log(X)) e@0: e@0: e@0: filterwarnings("ignore") e@0: C = 0.7 e@0: NCOMPONENTS=2 e@0: CHAINSIZE = 5 e@0: e@0: close('all') e@0: e@0: from numpy.random import randint e@0: e@0: class HMMsvm: e@0: def __init__(self, svm_): e@0: self.svm = svm_ e@0: e@0: e@0: # self.svm = MyAVAClassifier() e@0: e@0: e@0: def fit(self, features_list, states, flr=1e-13): e@0: # def fit(self, features_list, flr=1e-13): e@0: e@0: # TODO: Concatenate features, train e@0: #self.svm.fit(X, states, flr) e@0: #self.prior = self.svm.prior e@0: #self.transmat = self.svm.transmat e@0: e@0: e@0: features = None e@0: e@0: for f in features_list: e@0: if features is None: e@0: features = f e@0: else: e@0: features = append(features, f, 0) e@0: e@0: fullstates = None e@0: # T = features.shape[0] e@0: for s in states: e@0: if fullstates is None: e@0: fullstates = s e@0: else: e@0: fullstates = append(fullstates, s) e@0: e@0: e@0: e@0: e@0: e@0: e@0: self.n_classes = self.svm.n_classes e@0: n_classes = self.n_classes e@0: e@0: # print fullstates, shape(fullstates) e@0: e@0: h = histogram(fullstates, n_classes)[0].astype(float) e@0: self.prior = h/sum(h) e@0: e@0: # Add a very small probability for random jump e@0: e@0: self.prior += flr e@0: self.prior = self.prior/sum(self.prior) e@0: e@0: transmat = zeros((n_classes, n_classes)) e@0: transitions = zeros((n_classes, )) e@0: for s in states: e@0: for i in range(1, len(s)): e@0: prev = s[i-1] e@0: cur = s[i] e@0: transmat[prev,cur] += 1 e@0: transitions[prev] += 1 e@0: e@0: transitions[transitions == 0] = 1 e@0: e@0: for i in range(0, transmat.shape[0]): e@0: transmat[i,:] = transmat[i,:]/transitions[i] e@0: e@0: self.transmat = transmat e@0: e@0: # Add a very small probability for random jump to avoid zero values e@0: e@0: self.transmat += flr e@0: e@0: for i in range(0, self.transmat.shape[0]): e@0: self.transmat[i,:] = self.transmat[i,:]/sum(self.transmat[i,:]) e@0: e@0: e@0: A = zeros((n_classes, n_classes)) e@0: e@0: Aold = self.transmat e@0: e@0: while mse(A,Aold)>10*size(A)*flr: e@0: Aold = A.copy() e@0: A = zeros((n_classes, n_classes)) e@0: transitions = zeros((n_classes, )) e@0: e@0: for i in range(0,len(features_list)): e@0: f = features_list[i] e@0: # print "FEATURES:", f e@0: # print f e@0: s,p = self.logviterbi(f) e@0: # print s e@0: for j in range(1, len(s)): e@0: prev = s[j-1] e@0: cur = s[j] e@0: A[prev,cur] += 1 e@0: transitions[prev] += 1 e@0: e@0: transitions[transitions == 0] = 1 e@0: e@0: e@0: for i in range(0, A.shape[0]): e@0: A[i,:] = A[i,:]/transitions[i] e@0: e@0: A += flr e@0: e@0: e@0: e@0: self.transmat = A e@0: e@0: for i in range(0, A.shape[0]): e@0: if sum(A[i,:]) > 0: e@0: A[i,:] = A[i,:]/sum(A[i,:]) e@0: e@0: e@0: e@0: # print observations e@0: e@0: e@0: def estimate_emission_probability(self, x, q): e@0: post = self.svm.estimate_posterior_probability_vector(x) e@0: # print "post: ", post e@0: prior = self.prior e@0: # print "prior: ", prior e@0: p = post/prior e@0: p = p/sum(p) e@0: e@0: return p[q] e@0: e@0: # def estimate_emission_probability(self, x, q): e@0: # return self.svm.estimate_emission_probability(q, x) e@0: e@0: e@0: def predict(self, X): e@0: return self.logviterbi(X)[0] e@0: e@0: e@0: def logviterbi(self, X): e@0: # Number of states e@0: e@0: N = self.n_classes e@0: e@0: # Number of observations e@0: e@0: e@0: e@0: T = X.shape[0] e@0: e@0: e@0: e@0: transmat = self.transmat e@0: e@0: #1. Initalization e@0: e@0: a1 = self.prior e@0: e@0: delta = zeros((T,N)) e@0: psi = zeros((T,N)) e@0: e@0: for i in range(0, N): e@0: delta[0, i] = log(self.transmat[0,i]) + log(self.estimate_emission_probability(X[0,:],i)) e@0: e@0: e@0: #2. Recursion e@0: e@0: for t in range(1, T): e@0: # delta_old = delta.copy() e@0: x = X[t, :] e@0: for j in range(0, N): e@0: # print "j: %d, S" % j, delta_old+log(transmat[:,j]) e@0: delta[t,j] = max(delta[t-1,:] + log(transmat[:,j])) + log(self.estimate_emission_probability(x,j)) e@0: psi[t,j] = argmax(delta[t-1,:] + log(transmat[:,j])) e@0: e@0: # print "t: %d, delta: " % t, delta e@0: # print "t: %d, psi:" % t, psi e@0: e@0: e@0: # 3. Termination e@0: e@0: p_star = max(delta[T-1,:] + log(transmat[:,N-1])) e@0: q_star = argmax(delta[T-1,:] + log(transmat[:, N-1])) e@0: e@0: e@0: # 4. Backtracking e@0: e@0: q = zeros((T,)) e@0: p = zeros((T,)) e@0: e@0: q[-1] = q_star e@0: p[-1] = p_star e@0: for t in range(1, T): e@0: q[-t-1] = psi[-t, q[-t]] e@0: p[-t-1] = delta[-t, q[-t]] e@0: e@0: e@0: return q,p e@0: e@0: def viterbi(self, X): e@0: # Number of states e@0: e@0: N = self.n_classes e@0: e@0: # Number of observations e@0: e@0: T = X.shape[0] e@0: e@0: transmat = self.transmat e@0: e@0: #1. Initialization e@0: e@0: a1 = self.prior e@0: e@0: delta = zeros((N, )) e@0: psi = zeros((N, )) e@0: e@0: for i in range(0, N): e@0: delta[i] = a1[i]*self.estimate_emission_probability(X[0,:],i) e@0: e@0: e@0: e@0: e@0: e@0: #2. Recursion e@0: e@0: for t in range(1, T): e@0: delta_old = delta.copy() e@0: x = X[t,:] e@0: for j in range(0, N): e@0: delta[j] = max(delta_old*transmat[:,j])*self.estimate_emission_probability(x, j) e@0: psi[j] = argmax(delta_old*transmat[:,j]) e@0: e@0: #3. Termination e@0: e@0: p_star = max(delta*transmat[:,N-1]) e@0: q_star = argmax(delta*transmat[:,N-1]) e@0: e@0: e@0: e@0: #4. Backtracking e@0: e@0: q = zeros((T,)) e@0: q[-1] = q_star e@0: e@0: for t in range(1, T): e@0: q[-t-1] = psi[q[-t]] e@0: e@0: return q e@0: e@0: e@0: def _log_likelihood_matrix(self, X): e@0: N = self.n_classes e@0: ll = zeros((X.shape[0], N)) e@0: e@0: for i in range(0, X.shape[0]): e@0: ll[i,:] = self._forward(i, X) e@0: e@0: return ll e@0: e@0: def compute_ksus(self, X): e@0: N = self.n_classes e@0: T = X.shape[0] e@0: print "[II] Computing gammas... " e@0: e@0: gammas = self._forward_backward(X) e@0: e@0: # Initialize e@0: e@0: aij = self.transmat e@0: e@0: e@0: e@0: e@0: e@0: e@0: def _not_quite_ksu(self, t, i, j, X): e@0: alpha = exp(self.forward(X[0:t+1,:]))[i] e@0: beta = exp(self.backward(X[t+1:,:]))[j] e@0: e@0: aij = self.transmat[i,j] e@0: bj = self.estimate_emission_probability(X[t+1,:], j) e@0: e@0: return alpha*beta*aij*bj e@0: e@0: def _ksu(self, t, i, j, X): e@0: alphaT = exp(self.forward(X[0:t+1,:]))[0] e@0: e@0: return self._not_quite_ksu(t,i,j,X) e@0: e@0: e@0: def _forward_backward(self, X): e@0: T = X.shape[0] e@0: N = self.n_classes e@0: fv = zeros((T, N)) e@0: sv = zeros((T, N)) e@0: e@0: b = None e@0: for t in range(1, T+1): e@0: e@0: fv[t-1,:] = self._forward_message(fv[t-2,:], X[0:t, ]) e@0: e@0: for t in range(1, T+1): e@0: b = self._backward_message(b, X[-t: , :]) e@0: sv[-t,:] = lognorm(fv[t-1,:]*b) e@0: e@0: return sv e@0: e@0: e@0: def _backward(self, t, X): e@0: N = self.n_classes e@0: a = ones((N,))/N e@0: e@0: beta = zeros((N, )) e@0: transmat = self.transmat e@0: T = X.shape[0] e@0: x = X[t, :] e@0: if t == T-1: e@0: beta = log(a) e@0: e@0: return beta e@0: else: e@0: tosum = zeros((N, )) e@0: bt = self._backward(t+1, X) e@0: btnew = zeros((N, )) e@0: # print bt e@0: for j in range(0, N): e@0: for i in range(0, N): e@0: # print log(self.estimate_emission_probability(x, j)), bt[i], log(transmat[i,j]) e@0: tosum[i] = log(self.estimate_emission_probability(x, j)) + bt[i] + log(transmat[i,j]) e@0: # print tosum e@0: e@0: btnew[j] = logsumexp(tosum) e@0: btnew = lognorm(btnew) e@0: e@0: return btnew e@0: e@0: e@0: def score(self, X): e@0: return self.forward(X) e@0: e@0: def forward(self, X): e@0: T = X.shape[0] e@0: f = None e@0: for t in range(1, T+1): e@0: f = self._forward_message(f, X[0:t, :]) e@0: e@0: return f e@0: e@0: def backward(self, X): e@0: T = X.shape[0] e@0: b = None e@0: for t in range(1,T+1): e@0: # print t e@0: # print t,b e@0: idx = arange(-t,T) e@0: # print idx e@0: b = self._backward_message(b, X[-t:, :]) e@0: e@0: return b e@0: e@0: def _backward_message(self, b, X): e@0: N = self.n_classes e@0: e@0: e@0: beta = zeros((N, )) e@0: transmat = self.transmat e@0: e@0: x = X[0, :] e@0: e@0: if X.shape[0] == 1: e@0: beta = log(ones((N,))/N) e@0: return beta e@0: else: e@0: tosum = zeros((N, )) e@0: btnew = zeros((N, )) e@0: for j in range(0, N): e@0: for i in range(0, N): e@0: tosum[i] = log(self.estimate_emission_probability(x,j)) + b[i] + log(transmat[i,j]) e@0: e@0: btnew[j] = logsumexp(tosum) e@0: # btnew = lognorm(btnew) e@0: return btnew e@0: e@0: def _forward_message(self, f, X): e@0: N = self.n_classes e@0: a = self.prior e@0: e@0: alpha = zeros((N, )) e@0: transmat = self.transmat e@0: e@0: x = X[-1, :] e@0: e@0: if X.shape[0] == 1: e@0: for i in range(0, N): e@0: alpha[i] = log(a[i]) + log(self.estimate_emission_probability(x, i)) e@0: # alpha = lognorm(alpha) e@0: return alpha e@0: e@0: else: e@0: tosum = zeros((N,)) e@0: ftnew = zeros((N,)) e@0: for j in range(0, N): e@0: for i in range(0, N): e@0: tosum[i] = f[i] + log(transmat[i,j]) e@0: Sum = logsumexp(tosum) e@0: bj = self.estimate_emission_probability(x, j) e@0: ftnew[j] = Sum+log(bj) e@0: e@0: # ftnew = lognorm(ftnew) e@0: return ftnew e@0: e@0: def _forward(self, t, X): e@0: N = self.n_classes e@0: a = self.prior e@0: # print a e@0: alpha = zeros((N, )) e@0: transmat = self.transmat e@0: x = X[t, :] e@0: e@0: if t == 0: e@0: for i in range(0, N): e@0: alpha[i] = log(a[i]) + log(self.estimate_emission_probability(x, i)) e@0: # print "--" e@0: # print alpha e@0: alpha = lognorm(alpha) e@0: # print alpha e@0: # print "xx" e@0: return alpha e@0: else: e@0: tosum = zeros((N, )) e@0: ft = self._forward(t-1, X) e@0: ftnew = zeros((N, )) e@0: for j in range(0, N): e@0: for i in range(0, N): e@0: # print ft e@0: tosum[i] = ft[i] + log(transmat[i,j]) e@0: e@0: Sum = logsumexp(tosum) e@0: bj = self.estimate_emission_probability(x, j) e@0: ftnew[j] = Sum+log(bj) e@0: ftnew = lognorm(ftnew) e@0: e@0: return ftnew e@0: e@0: class HMMsvm2: e@0: def __init__(self, svm_, n_components=4): e@0: self.svm = svm_ e@0: self.n_components = n_components e@0: e@0: e@0: # self.svm = MyAVAClassifier() e@0: e@0: e@0: def fit(self, features_list, flr=1e-13): e@0: # def fit(self, features_list, flr=1e-13): e@0: e@0: # TODO: Concatenate features, train e@0: #self.svm.fit(X, states, flr) e@0: #self.prior = self.svm.prior e@0: #self.transmat = self.svm.transmat e@0: e@0: e@0: n_classes = self.n_components e@0: self.n_classes = n_classes e@0: e@0: A = zeros((n_classes, n_classes)) e@0: e@0: Aold = randn(n_classes, n_classes) e@0: self.transmat = Aold e@0: self.prior = randn(n_classes) e@0: self.prior = self.prior/sum(self.prior) e@0: for i in range(0, Aold.shape[0]): e@0: Aold[i,:] = Aold[i,:]/sum(Aold[i,:]) e@0: e@0: while mse(A,Aold)>10*size(A)*flr: e@0: Aold = A.copy() e@0: A = zeros((n_classes, n_classes)) e@0: transitions = zeros((n_classes, )) e@0: e@0: for i in range(0,len(features_list)): e@0: f = features_list[i] e@0: # print "FEATURES:", f e@0: # print f e@0: s,p = self.logviterbi(f) e@0: # print s e@0: e@0: h = histogram(s, n_classes)[0].astype(float) e@0: self.prior = h/sum(h) e@0: e@0: e@0: self.prior += flr e@0: self.prior = self.prior/sum(self.prior) e@0: e@0: e@0: for j in range(1, len(s)): e@0: prev = s[j-1] e@0: cur = s[j] e@0: A[prev,cur] += 1 e@0: transitions[prev] += 1 e@0: e@0: transitions[transitions == 0] = 1 e@0: e@0: e@0: for i in range(0, A.shape[0]): e@0: A[i,:] = A[i,:]/transitions[i] e@0: e@0: A += flr e@0: e@0: e@0: e@0: self.transmat = A e@0: e@0: for i in range(0, A.shape[0]): e@0: if sum(A[i,:]) > 0: e@0: A[i,:] = A[i,:]/sum(A[i,:]) e@0: e@0: e@0: e@0: # print observations e@0: e@0: e@0: def estimate_emission_probability(self, x, q): e@0: post = self.svm.estimate_posterior_probability_vector(x) e@0: # print "post: ", post e@0: prior = self.prior e@0: # print "prior: ", prior e@0: p = post/prior e@0: p = p/sum(p) e@0: e@0: return p[q] e@0: e@0: # def estimate_emission_probability(self, x, q): e@0: # return self.svm.estimate_emission_probability(q, x) e@0: e@0: e@0: def predict(self, X): e@0: return self.logviterbi(X)[0] e@0: e@0: e@0: def logviterbi(self, X): e@0: # Number of states e@0: e@0: N = self.n_classes e@0: e@0: # Number of observations e@0: e@0: e@0: e@0: T = X.shape[0] e@0: e@0: e@0: e@0: transmat = self.transmat e@0: e@0: #1. Initalization e@0: e@0: # a1 = self.prior e@0: e@0: delta = zeros((T,N)) e@0: psi = zeros((T,N)) e@0: e@0: for i in range(0, N): e@0: delta[0, i] = log(self.transmat[0,i]) + log(self.estimate_emission_probability(X[0,:],i)) e@0: e@0: e@0: #2. Recursion e@0: e@0: for t in range(1, T): e@0: # delta_old = delta.copy() e@0: x = X[t, :] e@0: for j in range(0, N): e@0: # print "j: %d, S" % j, delta_old+log(transmat[:,j]) e@0: delta[t,j] = max(delta[t-1,:] + log(transmat[:,j])) + log(self.estimate_emission_probability(x,j)) e@0: psi[t,j] = argmax(delta[t-1,:] + log(transmat[:,j])) e@0: e@0: # print "t: %d, delta: " % t, delta e@0: # print "t: %d, psi:" % t, psi e@0: e@0: e@0: # 3. Termination e@0: e@0: p_star = max(delta[T-1,:] + log(transmat[:,N-1])) e@0: q_star = argmax(delta[T-1,:] + log(transmat[:, N-1])) e@0: e@0: e@0: # 4. Backtracking e@0: e@0: q = zeros((T,)) e@0: p = zeros((T,)) e@0: e@0: q[-1] = q_star e@0: p[-1] = p_star e@0: for t in range(1, T): e@0: q[-t-1] = psi[-t, q[-t]] e@0: p[-t-1] = delta[-t, q[-t]] e@0: e@0: e@0: return q,p e@0: e@0: def viterbi(self, X): e@0: # Number of states e@0: e@0: N = self.n_classes e@0: e@0: # Number of observations e@0: e@0: T = X.shape[0] e@0: e@0: transmat = self.transmat e@0: e@0: #1. Initialization e@0: e@0: a1 = self.prior e@0: e@0: delta = zeros((N, )) e@0: psi = zeros((N, )) e@0: e@0: for i in range(0, N): e@0: delta[i] = a1[i]*self.estimate_emission_probability(X[0,:],i) e@0: e@0: e@0: e@0: e@0: e@0: #2. Recursion e@0: e@0: for t in range(1, T): e@0: delta_old = delta.copy() e@0: x = X[t,:] e@0: for j in range(0, N): e@0: delta[j] = max(delta_old*transmat[:,j])*self.estimate_emission_probability(x, j) e@0: psi[j] = argmax(delta_old*transmat[:,j]) e@0: e@0: #3. Termination e@0: e@0: p_star = max(delta*transmat[:,N-1]) e@0: q_star = argmax(delta*transmat[:,N-1]) e@0: e@0: e@0: e@0: #4. Backtracking e@0: e@0: q = zeros((T,)) e@0: q[-1] = q_star e@0: e@0: for t in range(1, T): e@0: q[-t-1] = psi[q[-t]] e@0: e@0: return q e@0: e@0: e@0: def _log_likelihood_matrix(self, X): e@0: N = self.n_classes e@0: ll = zeros((X.shape[0], N)) e@0: e@0: for i in range(0, X.shape[0]): e@0: ll[i,:] = self._forward(i, X) e@0: e@0: return ll e@0: e@0: def compute_ksus(self, X): e@0: N = self.n_classes e@0: T = X.shape[0] e@0: print "[II] Computing gammas... " e@0: e@0: gammas = self._forward_backward(X) e@0: e@0: # Initialize e@0: e@0: aij = self.transmat e@0: e@0: e@0: e@0: e@0: e@0: e@0: def _not_quite_ksu(self, t, i, j, X): e@0: alpha = exp(self.forward(X[0:t+1,:]))[i] e@0: beta = exp(self.backward(X[t+1:,:]))[j] e@0: e@0: aij = self.transmat[i,j] e@0: bj = self.estimate_emission_probability(X[t+1,:], j) e@0: e@0: return alpha*beta*aij*bj e@0: e@0: def _ksu(self, t, i, j, X): e@0: alphaT = exp(self.forward(X[0:t+1,:]))[0] e@0: e@0: return self._not_quite_ksu(t,i,j,X) e@0: e@0: e@0: def _forward_backward(self, X): e@0: T = X.shape[0] e@0: N = self.n_classes e@0: fv = zeros((T, N)) e@0: sv = zeros((T, N)) e@0: e@0: b = None e@0: for t in range(1, T+1): e@0: e@0: fv[t-1,:] = self._forward_message(fv[t-2,:], X[0:t, ]) e@0: e@0: for t in range(1, T+1): e@0: b = self._backward_message(b, X[-t: , :]) e@0: sv[-t,:] = lognorm(fv[t-1,:]*b) e@0: e@0: return sv e@0: e@0: e@0: def _backward(self, t, X): e@0: N = self.n_classes e@0: a = ones((N,))/N e@0: e@0: beta = zeros((N, )) e@0: transmat = self.transmat e@0: T = X.shape[0] e@0: x = X[t, :] e@0: if t == T-1: e@0: beta = log(a) e@0: e@0: return beta e@0: else: e@0: tosum = zeros((N, )) e@0: bt = self._backward(t+1, X) e@0: btnew = zeros((N, )) e@0: # print bt e@0: for j in range(0, N): e@0: for i in range(0, N): e@0: # print log(self.estimate_emission_probability(x, j)), bt[i], log(transmat[i,j]) e@0: tosum[i] = log(self.estimate_emission_probability(x, j)) + bt[i] + log(transmat[i,j]) e@0: # print tosum e@0: e@0: btnew[j] = logsumexp(tosum) e@0: btnew = lognorm(btnew) e@0: return btnew e@0: e@0: e@0: def score(self, X): e@0: return self.forward(X) e@0: e@0: def forward(self, X): e@0: T = X.shape[0] e@0: f = None e@0: for t in range(1, T+1): e@0: f = self._forward_message(f, X[0:t, :]) e@0: e@0: return f e@0: e@0: def backward(self, X): e@0: T = X.shape[0] e@0: b = None e@0: for t in range(1,T+1): e@0: # print t e@0: # print t,b e@0: idx = arange(-t,T) e@0: # print idx e@0: b = self._backward_message(b, X[-t:, :]) e@0: e@0: return b e@0: e@0: def _backward_message(self, b, X): e@0: N = self.n_classes e@0: e@0: e@0: beta = zeros((N, )) e@0: transmat = self.transmat e@0: e@0: x = X[0, :] e@0: e@0: if X.shape[0] == 1: e@0: beta = log(ones((N,))/N) e@0: return beta e@0: else: e@0: tosum = zeros((N, )) e@0: btnew = zeros((N, )) e@0: for j in range(0, N): e@0: for i in range(0, N): e@0: tosum[i] = log(self.estimate_emission_probability(x,j)) + b[i] + log(transmat[i,j]) e@0: e@0: btnew[j] = logsumexp(tosum) e@0: # btnew = lognorm(btnew) e@0: return btnew e@0: e@0: def _forward_message(self, f, X): e@0: N = self.n_classes e@0: a = self.prior e@0: e@0: alpha = zeros((N, )) e@0: transmat = self.transmat e@0: e@0: x = X[-1, :] e@0: e@0: if X.shape[0] == 1: e@0: for i in range(0, N): e@0: alpha[i] = log(a[i]) + log(self.estimate_emission_probability(x, i)) e@0: # alpha = lognorm(alpha) e@0: return alpha e@0: e@0: else: e@0: tosum = zeros((N,)) e@0: ftnew = zeros((N,)) e@0: for j in range(0, N): e@0: for i in range(0, N): e@0: tosum[i] = f[i] + log(transmat[i,j]) e@0: Sum = logsumexp(tosum) e@0: bj = self.estimate_emission_probability(x, j) e@0: ftnew[j] = Sum+log(bj) e@0: e@0: # ftnew = lognorm(ftnew) e@0: return ftnew e@0: e@0: def _forward(self, t, X): e@0: N = self.n_classes e@0: a = self.prior e@0: # print a e@0: alpha = zeros((N, )) e@0: transmat = self.transmat e@0: x = X[t, :] e@0: e@0: if t == 0: e@0: for i in range(0, N): e@0: alpha[i] = log(a[i]) + log(self.estimate_emission_probability(x, i)) e@0: # print "--" e@0: # print alpha e@0: alpha = lognorm(alpha) e@0: # print alpha e@0: # print "xx" e@0: return alpha e@0: else: e@0: tosum = zeros((N, )) e@0: ft = self._forward(t-1, X) e@0: ftnew = zeros((N, )) e@0: for j in range(0, N): e@0: for i in range(0, N): e@0: # print ft e@0: tosum[i] = ft[i] + log(transmat[i,j]) e@0: e@0: Sum = logsumexp(tosum) e@0: bj = self.estimate_emission_probability(x, j) e@0: ftnew[j] = Sum+log(bj) e@0: ftnew = lognorm(ftnew) e@0: e@0: return ftnew e@0: e@0: e@0: e@0: e@0: e@0: e@0: e@0: class NBClassifier: e@0: def __init__(self): e@0: print "[II] Gaussian Naive Bayes Classifier" e@0: self.name = "Naive Bayes" e@0: self.smallname = "gnbc" e@0: self.gnb = GaussianNB() e@0: e@0: def getName(self): e@0: return self.name e@0: e@0: def fit(self, X, states): e@0: self.gnb.fit(X, states) e@0: e@0: def predict(self, X): e@0: return self.gnb.predict(X) e@0: e@0: def cross_validate(self, data, classes): e@0: print "[II] SVN Classifier Crossvalidating... " e@0: from copy import deepcopy e@0: estimator = deepcopy(self) e@0: estimator_fulldata = deepcopy(self) e@0: estimator_fulldata.fit(data, classes) e@0: e@0: percents = arange(0.1,0.9,0.1) e@0: self.percents = percents * 100 e@0: e@0: RATIOS = [] e@0: labels = estimator.predict(data) e@0: e@0: e@0: print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels)) e@0: e@0: for p in percents: e@0: traindata,testdata,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=1) e@0: e@0: # print "[II] [%.2f-%.2f] Examples used for training: %s" % (p, (1-p), str(traindata)) e@0: # print "[II] [%.2f-%.2f] Examples used for testing: %s" % (p, (1-p), str(testdata)) e@0: e@0: # Training phase e@0: e@0: e@0: e@0: estimator_ = deepcopy(estimator) e@0: estimator_.fit(traindata, trainlabels) e@0: e@0: predicted_labels = estimator_.predict(testdata) e@0: e@0: m = sum(array(predicted_labels==testlabels).astype(float))/len(testlabels) e@0: e@0: print "[II] [%.2f-%.2f] 5-fold cross-validation for NB Classifier: %.1f" % (p, (1-p), m) e@0: e@0: RATIOS.append(m) e@0: e@0: return RATIOS,percents e@0: e@0: e@0: e@0: e@0: class HmmClassifier3: e@0: def __init__(self, N=2,n_components = 1): e@0: print "[II] HMM Classifier (%d states, %d components)" % (N, n_components) e@0: self.name = "HMM (%d time steps, %d components)" % (N, n_components) e@0: self.n_components = n_components e@0: e@0: self.n_components = n_components e@0: self.smallname = "hmmc" e@0: self.chain_size = N e@0: e@0: def getName(self): e@0: return self.name e@0: e@0: def fit(self, features, parameters): e@0: e@0: e@0: # print "Parameters:" e@0: # print parameters e@0: n_classes = max(unique(parameters)) + 1 e@0: e@0: if n_classes == 1: e@0: self.only_one_class = True e@0: return e@0: else: e@0: self.only_one_class = False e@0: e@0: print "[II] Number of classes: ", n_classes e@0: hmms = [None]*n_classes e@0: e@0: chain_size = self.chain_size e@0: obs = [None]*n_classes e@0: e@0: for i in range(chain_size, len(parameters)): e@0: class_ = parameters[i] e@0: seq = features[i-chain_size:i,:] e@0: e@0: e@0: if obs[class_] is None: e@0: obs[class_] = [seq] e@0: else: e@0: obs[class_].append(seq) e@0: e@0: e@0: e@0: for i in range(0, len(obs)): e@0: e@0: if obs[i] is not None and len(obs[i]) != 0: e@0: hmm_ = hmm.GaussianHMM(n_components=self.n_components, covariance_type='diag') e@0: # print(obs[i]) e@0: #obs_ = reshape_observations(obs[i]) e@0: obs_ = concatenate(obs[i]) e@0: #lengths_ = [o.shape[0] for o in obs[i]] e@0: #print (lengths_) e@0: # print len(obs[i]) e@0: # print obs[i][0][0].shape e@0: # print obs[i] e@0: # for s in obs[i]: e@0: # if s.ndim > 2: e@0: # print s e@0: hmm_.fit(obs_, [self.chain_size]*len(obs[i])) e@0: hmms[i] = hmm_ e@0: e@0: self.hmms = hmms e@0: e@0: return obs e@0: e@0: def predict(self, features, mfilt=20): e@0: e@0: if self.only_one_class == True: e@0: return zeros((features.shape[0], )) e@0: e@0: chain_size = self.chain_size e@0: hmms = self.hmms e@0: predicted_classes = zeros((features.shape[0],)) e@0: e@0: e@0: for i in range(chain_size, features.shape[0]): e@0: scores = zeros((len(hmms),)) e@0: e@0: seq = features[i-chain_size:i, :] e@0: e@0: for j in range(0, len(hmms)): e@0: if hmms[j] is not None: e@0: scores[j] = hmms[j].score(seq) e@0: else: e@0: scores[j] = -infty e@0: e@0: predicted_classes[i] = argmax(scores) e@0: e@0: # Do a median filter at the end e@0: e@0: # predicted_classes = median_filter(predicted_classes,mfilt) e@0: e@0: return predicted_classes e@0: e@0: e@0: e@0: def k_fold_cross_validate(self, data, classes, K=5): e@0: print "[II] HMM Classifier Crossvalidating... " e@0: print "[II] Validating data: ", data e@0: print "[II] With classes: ", classes e@0: from copy import deepcopy e@0: estimator = deepcopy(self) e@0: estimator_fulldata = deepcopy(self) e@0: estimator_fulldata.fit(data, classes) e@0: e@0: e@0: labels = estimator_fulldata.predict(data) e@0: print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(classes)) e@0: e@0: # Crossvalidate this. e@0: e@0: example_sequences = [] e@0: example_labels = [] e@0: e@0: chain_size = self.chain_size e@0: e@0: percents = arange(0.1,0.9,0.1) e@0: self.percents = percents * 100 e@0: e@0: RATIOS = [] e@0: e@0: e@0: e@0: # Constructing examples and labels e@0: e@0: e@0: L = data.shape[0]/K e@0: M = K e@0: # print "[II] Example size:", L e@0: e@0: n = 1 e@0: while L*(n-1) < M*L: e@0: if L*n > shape(data)[0]: e@0: example = data[L*(n-1):,:] e@0: classes_ = classes[L*(n-1):] e@0: else: e@0: example = data[L*(n-1):L*n, :] e@0: classes_ = classes[L*(n-1):L*n] e@0: e@0: # print example e@0: # print classes_ e@0: e@0: example_sequences.append(example) e@0: example_labels.append(classes_) e@0: n+=1 e@0: e@0: # print len(example_sequences) e@0: # print len(example_labels) e@0: from sklearn.cross_validation import KFold e@0: kf = KFold(K, n_folds=K) e@0: e@0: ratio = 0 e@0: e@0: for train, test in kf: e@0: print "[II] Validating examples %s against %s." % (train, test) e@0: e@0: topredict_data = example_sequences[test[0]] e@0: topredict_labels = example_labels[test[0]] e@0: e@0: tofit_data = None e@0: tofit_labels = None e@0: e@0: for t in train: e@0: # print t e@0: if tofit_data is None: e@0: tofit_data = example_sequences[t] e@0: tofit_labels = example_labels[t] e@0: else: e@0: tofit_data = append(tofit_data, example_sequences[t], 0) e@0: tofit_labels = append(tofit_labels, example_labels[t], 0) e@0: e@0: estimator_ = deepcopy(estimator) e@0: estimator_.fit(tofit_data, tofit_labels) e@0: e@0: labels = estimator_.predict(topredict_data) e@0: e@0: m = sum(array(topredict_labels==labels).astype(float))/len(labels) e@0: e@0: e@0: print "[II] %d-fold cross-validation for HMM Classifier: %.1f" % (K, m) e@0: e@0: ratio += m/float(len(kf)) e@0: e@0: return ratio e@0: e@0: e@0: e@0: e@0: e@0: e@0: # Splitting to train/test e@0: e@0: e@0: def cross_validate(self, data, classes): e@0: print "[II] HMM Classifier Crossvalidating... " e@0: from copy import deepcopy e@0: estimator = deepcopy(self) e@0: estimator_fulldata = deepcopy(self) e@0: estimator_fulldata.fit(data, classes) e@0: e@0: e@0: labels = estimator_fulldata.predict(data) e@0: print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(classes)) e@0: e@0: # Crossvalidate this. e@0: e@0: example_sequences = [] e@0: example_labels = [] e@0: e@0: chain_size = self.chain_size e@0: e@0: percents = arange(0.5,0.9,0.1) e@0: self.percents = percents * 100 e@0: e@0: RATIOS = [] e@0: e@0: e@0: e@0: # Constructing examples and labels e@0: e@0: M = self.chain_size e@0: L = data.shape[0]/20 e@0: e@0: print "[II] Example size:", L e@0: e@0: n = 1 e@0: while L*n < M*L-L: e@0: example = data[L*(n-1):L*n, :] e@0: # print example e@0: classes_ = classes[L*(n-1):L*n] e@0: # print classes_ e@0: e@0: example_sequences.append(example) e@0: example_labels.append(classes_) e@0: n+=1 e@0: e@0: e@0: e@0: # Splitting to train/test e@0: e@0: for p in percents: e@0: traindata,testdata,trainlabels,testlabels = cross_validation.train_test_split(example_sequences, example_labels, test_size=p, random_state=1) e@0: e@0: print traindata e@0: print testdata e@0: # Concatenate traindata e@0: e@0: tofit_data = None e@0: tofit_labels = None e@0: e@0: topredict_data = None e@0: topredict_labels = None e@0: e@0: e@0: for t in traindata: e@0: if tofit_data is None: e@0: tofit_data = t e@0: else: e@0: tofit_data = append(tofit_data, t, 0) e@0: e@0: for l in trainlabels: e@0: if tofit_labels is None: e@0: tofit_labels = l e@0: else: e@0: tofit_labels = append(tofit_labels, l) e@0: e@0: for t in testdata: e@0: if topredict_data is None: e@0: topredict_data = t e@0: else: e@0: topredict_data = append(topredict_data, t, 0) e@0: e@0: for l in testlabels: e@0: if topredict_labels is None: e@0: topredict_labels = l e@0: else: e@0: topredict_labels = append(topredict_labels, l) e@0: e@0: e@0: # print "[II] Fit data: ", tofit_data e@0: ## print "[II] Fit labels: ", tofit_labels e@0: # print "[II] Predict data: ", topredict_data e@0: # print "[II] Predict labels: ", topredict_labels e@0: # e@0: estimator_ = deepcopy(estimator) e@0: e@0: print tofit_labels e@0: estimator_.fit(tofit_data, tofit_labels) e@0: e@0: labels = estimator_.predict(topredict_data) e@0: e@0: m = sum(array(topredict_labels==labels).astype(float))/len(labels) e@0: e@0: e@0: print "[II] [%.2f-%.2f] cross-validation for HMM Classifier: %.1f" % (p, (1-p), m) e@0: e@0: RATIOS.append(m) e@0: e@0: return RATIOS,percents e@0: e@0: e@0: class HMMsvmClassifier2: e@0: def __init__(self, N=2): e@0: self.classifiers = {} e@0: self.name = "HMM-SVM Classifier 2" e@0: self.smallname = "hmmsvm2" e@0: self.obs = MyAVAClassifier() e@0: self.chain_size = N e@0: e@0: def getName(self): e@0: return self.name e@0: e@0: def fit(self, X, y): e@0: self.n_classes = max(unique(y))+1 e@0: n_classes = self.n_classes e@0: chain_size = self.chain_size e@0: e@0: if n_classes == 1: e@0: self.only_one_class = True e@0: return e@0: else: e@0: self.only_one_class = False e@0: e@0: print "[II] Number of classes: ", n_classes e@0: e@0: hmms = [None]*n_classes e@0: obs = [None]*n_classes e@0: e@0: e@0: for i in range(chain_size, len(parameters)): e@0: class_ = parameters[i-1] e@0: e@0: e@0: e@0: class HMMsvmClassifier: e@0: def __init__(self, N=2): e@0: self.classifiers = {} e@0: self.name = "HMM-SVM Classifier" e@0: self.smallname = "hmmsvm" e@0: self.obs = MyAVAClassifier() e@0: self.chain_size = N e@0: e@0: def getName(self): e@0: return self.name e@0: e@0: def fit(self, X, y): e@0: self.n_classes = max(unique(y))+1 e@0: e@0: self.obs.fit(X,y) e@0: self.hmm = HMMsvm(self.obs) e@0: self.hmm.fit([X],[y]) e@0: e@0: def predict(self, X): e@0: # chain_size = self.chain_size e@0: # predicted = zeros((X.shape[0]),) e@0: # for i in range(chain_size, len(predicted)): e@0: # predicted[i] = self.hmm.predict(X[i-chain_size:i,:])[-1] e@0: e@0: e@0: e@0: # return predicted e@0: e@0: return self.hmm.predict(X) e@0: e@0: def confidence(self, x, q): e@0: return self.hmm.estimate_emission_probability(x, q) e@0: e@0: e@0: e@0: class SinkHoleClassifier: e@0: def __init__(self): e@0: self.classifierNB = NBClassifier() e@0: self.classifierSVM = MyAVAClassifier() e@0: self.classifierHMM = HmmClassifier3() e@0: self.classifierHMMSVM = HMMsvmClassifier() e@0: e@0: e@0: e@0: def getName(self): e@0: return self.name e@0: e@0: e@0: def fit(self, X, y): e@0: self.n_classes = max(unique(y))+1 e@0: e@0: self.classifierNB.fit(X,y) e@0: self.classifierSVM.fit(X,y) e@0: self.classifierHMM.fit(X,y) e@0: self.classifierHMMSVM.fit(X,y) e@0: e@0: e@0: def predict(self, X): e@0: predictedNB = self.classifierNB.predict(X) e@0: predictedSVM = self.classifierSVM.predict(X) e@0: predictedHMM = self.classifierHMM.predict(X) e@0: predictedHMMSVM = self.classifierHMMSVM.predict(X) e@0: e@0: e@0: e@0: predicted = zeros((X.shape[0], )) e@0: e@0: for i in range(0, len(predicted)): e@0: candidates = [predictedNB[i], predictedSVM[i], predictedHMM[i], predictedHMMSVM[i]] e@0: c = Counter(candidates) e@0: e@0: most_common = c.most_common() e@0: e@0: # If there is an equal voting, select something at random e@0: if len(unique([k[1] for k in most_common])) == 1: e@0: predicted[i] = most_common[randint(len(most_common))][0] e@0: else: e@0: predicted[i] = most_common[0][0] e@0: e@0: e@0: e@0: return predicted e@0: e@0: class MyAVAClassifier: e@0: e@0: def __init__(self): e@0: self.classifiers = {} e@0: self.name = "Linear SVM Classifier" e@0: self.smallname = "svc-ava" e@0: # self.probabilities = {} e@0: e@0: e@0: def getName(self): e@0: return self.name e@0: def fit(self, X, y, flr = 0): e@0: e@0: n_classes = max(unique(y)) + 1 e@0: e@0: # Transform to a binary if there are only two classes e@0: if len(unique(y)) == 1: e@0: self.only_one_class = True e@0: self.n_classes = 1 e@0: self.one_class_label = y[0] e@0: return e@0: elif len(unique(y)) == 2: e@0: e@0: self.n_classes = n_classes e@0: self.svm = svm.SVC(probability = True, kernel='poly', gamma=2, C = C) e@0: self.svm.fit(X,y) e@0: classes_ = unique(y) e@0: self.classifiers[(classes_[0], classes_[1])] = self.svm e@0: self.only_two_classes = True e@0: self.only_one_class = False e@0: e@0: return e@0: else: e@0: self.only_two_classes = False e@0: self.only_one_class = False e@0: e@0: e@0: classes = arange(0, n_classes) e@0: self.n_classes = n_classes e@0: # M = n_classes*(n_classes-1) # Number of classifiers e@0: e@0: h = histogram(y, n_classes)[0].astype(float) e@0: self.prior = h/sum(h) e@0: e@0: transmat = zeros((n_classes, n_classes)) e@0: e@0: for i in range(1, len(y)): e@0: prev = y[i-1] e@0: cur = y[i] e@0: transmat[prev,cur] += 1 e@0: e@0: transmat = transmat/sum(transmat) e@0: e@0: self.transmat = transmat e@0: e@0: # Add a very small probability for random jump to avoid zero values e@0: e@0: self.transmat += flr e@0: self.transmat = self.transmat/sum(self.transmat) e@0: e@0: for i in range(0, n_classes): e@0: for j in range(0, n_classes): e@0: if i != j and (i,j) not in self.classifiers and (j,i) not in self.classifiers: e@0: # print (i,j) e@0: # print classes e@0: idx_ = bitwise_or(y == classes[i], y == classes[j]) e@0: # idx_ = bitwise_or(y == i, y == j) e@0: e@0: X_ = X[idx_, :] e@0: e@0: y_ = y[idx_] e@0: #print y_ e@0: e@0: if len(unique(y_)) > 1: e@0: svm_ = svm.SVC(probability = True, kernel='poly', gamma=2, C = C) e@0: e@0: # print y_ e@0: svm_.fit(X_, y_) e@0: self.classifiers[(i,j)] = svm_ e@0: # self.probabilities[(i,j)] = svm_.predict_proba(X) e@0: e@0: e@0: def estimate_pairwise_class_probability(self, i, j, x): e@0: e@0: # print self.classifiers.keys() e@0: e@0: if (i,j) not in self.classifiers and (j,i) in self.classifiers: e@0: return self.classifiers[(j,i)].predict_proba(x)[0,1] e@0: elif (i,j) not in self.classifiers and (j,i) not in self.classifiers: e@0: return 0.0 e@0: else: e@0: return self.classifiers[(i,j)].predict_proba(x)[0,0] e@0: e@0: def estimate_posterior_probability(self, i, x): e@0: mus = zeros((self.n_classes,)) e@0: for j in range(0, self.n_classes): e@0: if i != j: e@0: pcp = self.estimate_pairwise_class_probability(i,j,x) e@0: pcp += 1e-18 e@0: mus[j] = 1/pcp e@0: # print mus e@0: S = sum(mus) - (self.n_classes - 2) e@0: # print S e@0: return 1/S e@0: e@0: def estimate_posterior_probability_vector(self, x): e@0: posterior = zeros((self.n_classes,)) e@0: for i in range(0, len(posterior)): e@0: posterior[i] = self.estimate_posterior_probability(i, x) e@0: e@0: return posterior e@0: e@0: # def estimate_emission_probability(self, i, x): e@0: # post = self.estimate_posterior_probability_vector(x) e@0: # # print "post: ", post e@0: # prior = self.prior e@0: # # print "prior: ", prior e@0: # p = post/prior e@0: # p = p/sum(p) e@0: # e@0: # return p[i] e@0: e@0: # def estimate_emissmat(self, X): e@0: # emissmat = zeros((X.shape[0], self.n_classes)) e@0: # for i in range(0, X.shape[0]): e@0: # for j in range(0, self.n_classes): e@0: # emissmat[i,j] = self.estimate_emission_probability(j, X[i,:]) e@0: # e@0: # return emissmat e@0: e@0: def predict(self, X): e@0: predicted = zeros((X.shape[0],)) e@0: e@0: if self.only_one_class == True: e@0: return ones((X.shape[0],))*self.one_class_label e@0: elif self.only_two_classes == True: e@0: return self.svm.predict(X) e@0: e@0: e@0: for i in range(0, X.shape[0]): e@0: x = X[i,:] e@0: P = zeros((self.n_classes,)) e@0: e@0: e@0: for c in range(0, len(P)): e@0: P[c] = self.estimate_posterior_probability(c, x) e@0: e@0: pred = argmax(P) e@0: predicted[i] = pred e@0: e@0: return predicted e@0: e@0: def logsum(X): e@0: if len(X) == 1: e@0: return log(X[0]) e@0: else: e@0: return logaddexp(log(X[0]),logsum(X[1:])) e@0: e@0: e@0: e@0: e@0: def smooth_matrix_1D(X): e@0: window = scipy.signal.gaussian(51,4) e@0: window = window/sum(window) e@0: intermx = zeros((X.shape[0],X.shape[1]+100)) e@0: intermx[:, 50:-50] = X e@0: e@0: for m in range(0, X.shape[0]): e@0: # print intermx.shape e@0: intermx[m,:] = convolve(ravel(intermx[m,:]), window,'same') e@0: e@0: return intermx[:,50:-50] e@0: e@0: def adm_reconstruct(codeword, h, dmin=.01, dmax=.28): e@0: x = zeros((1, codeword.shape[1])) e@0: e@0: delta1 = dmin e@0: delta2 = dmin e@0: Sum = h e@0: e@0: x[0] = h e@0: for i in range(0, codeword.shape[1]): e@0: if codeword[0,i] == 0: e@0: delta1 = dmin e@0: delta2 = dmin e@0: e@0: elif codeword[0,i] == 1: e@0: delta2 = dmin e@0: Sum += delta1 e@0: delta1 *= 2 e@0: if delta1 > dmax: e@0: delta1 = dmax e@0: e@0: elif codeword[0,i] == -1: e@0: delta1 = dmin e@0: Sum -= delta2 e@0: delta2 *= 2 e@0: if delta2 > dmax: e@0: delta2 = dmax e@0: x[0,i] = Sum e@0: return x e@0: e@0: def adm(x, dmin=.01, dmax=.28, tol=0.0001): e@0: e@0: # Adaptive delta modulation adapted by code: e@0: # (adeltamod.m) e@0: # e@0: # Adaptive Delta Modulator e@0: # by Gandhar Desai (gdesai) e@0: # BITS Pilani Goa Campus e@0: # Date: 28 Sept, 2013 e@0: e@0: xsig = x e@0: e@0: Lx = len(x) e@0: e@0: ADMout = zeros((1, Lx)) e@0: codevec = zeros((1, Lx)) e@0: e@0: e@0: Sum = x[0] e@0: delta1 = dmin e@0: delta2 = dmin e@0: mult1 = 2 e@0: mult2 = 2 e@0: for i in range(0, Lx): e@0: #print abs(xsig[i] - Sum) e@0: if (abs(xsig[i] - Sum) < tol): e@0: bit = 0 e@0: delta2 = dmin e@0: delta1 = dmin e@0: e@0: e@0: elif (xsig[i] >= Sum): e@0: bit = 1 e@0: delta2 = dmin e@0: Sum += delta1 e@0: delta1 *= mult1 e@0: if delta1 > dmax: e@0: delta1 = dmax e@0: e@0: e@0: else: e@0: bit = -1 e@0: delta1 = dmin e@0: Sum -= delta2 e@0: delta2 *= mult2 e@0: if delta2 > dmax: e@0: delta2 = dmax e@0: e@0: e@0: e@0: ADMout[0, i] = Sum e@0: codevec[0, i]= bit e@0: e@0: return ADMout,codevec, x[0] e@0: e@0: def median_filter(v, K): e@0: v2 = zeros((len(v),)) e@0: for i in range(K, len(v)): e@0: seq = v[i-K:i] e@0: m = median(seq) e@0: v2[i-K:i] = m e@0: e@0: return v2 e@0: e@0: e@0: e@0: e@0: from models import ReverbModel e@0: e@0: e@0: e@0: if __name__=="__main__": e@0: if len(argv) != 2: e@0: print "[EE] Wrong number of arguments" e@0: print "[II] Correct syntax is:" e@0: print "[II] \t%s " e@0: print "[II] where is the directory containing the features files in the format *_features.yaml" e@0: print "[II] and the parameters in the format *_parameters.yaml" e@0: exit(-1) e@0: e@0: e@0: n_clusters = 3 e@0: UpsamplingFactor = 1 e@0: dmin = 0.001 e@0: dmax = 0.28 e@0: tol = 0.001 e@0: e@0: d1min = 0.01 e@0: d1max = 0.1 e@0: e@0: g1min = 0.01 e@0: g1max = 0.99 e@0: e@0: damin = 0.006 e@0: damax = 0.012 e@0: e@0: Gmin = 0.01 e@0: Gmax = 0.99 e@0: e@0: gcmin = 0.01 e@0: gcmax = 0.99 e@0: e@0: # For searching the directory e@0: from glob import glob e@0: e@0: traindir = argv[1] e@0: e@0: songs_in_dir = glob("%s/*_features.yaml" % traindir) e@0: e@0: print "[II] Using files: %s" % songs_in_dir e@0: e@0: e@0: chain_length=1 e@0: e@0: e@0: # asdsd e@0: e@0: # fullfeatures_pool = Pool() e@0: e@0: features_vector = None e@0: parameters_vector = None e@0: index_vector = None e@0: e@0: idx = 0 e@0: # Shuffle segments e@0: random.shuffle(songs_in_dir) e@0: for f_ in songs_in_dir: e@0: infile = f_ e@0: paramfile = "%s_parameters.yaml" % f_.split('_features.yaml')[0] e@0: e@0: print "[II] Using features file: %s" % infile e@0: print "[II] and parameters file: %s" % paramfile e@0: e@0: e@0: # paramfile = infile.split(') e@0: e@0: features_pool = YamlInput(filename = infile)() e@0: parameters_pool = YamlInput(filename = paramfile)() e@0: e@0: d1 = parameters_pool['parameters.d1'] e@0: da = parameters_pool['parameters.da'] e@0: g1 = parameters_pool['parameters.g1'] e@0: gc = parameters_pool['parameters.gc'] e@0: G = parameters_pool['parameters.G'] e@0: e@0: e@0: e@0: e@0: e@0: feature_captions = features_pool.descriptorNames() e@0: parameter_captions = ['d1','da','g1','gc','G'] e@0: e@0: e@0: svmhmmstr = "" e@0: e@0: e@0: for c in features_pool.descriptorNames(): e@0: if c.split('.')[0] == 'metadata' or c.split('.')[0] == 'parameter': e@0: feature_captions.remove(c) e@0: e@0: e@0: # close('all') e@0: e@0: # print "[II] Loaded training data from %s (%s) " % (infile, features_pool['metadata.date'][0]) e@0: print "[II] %d Features Available: " % len(feature_captions) e@0: e@0: e@0: e@0: print str(feature_captions).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7] e@0: e@0: nfeatures_in = len(feature_captions) e@0: nparameters_in = 5 e@0: features_vector_one = zeros((nfeatures_in, len(features_pool[feature_captions[0]]))) e@0: index_vector_one = ones((len(features_pool[feature_captions[0]]),)) e@0: parameters_vector_one = ones((nparameters_in, len(features_pool[feature_captions[0]]))) e@0: # paramet e@0: e@0: e@0: for i in range(0, nfeatures_in): e@0: features_vector_one[i, :] = features_pool[feature_captions[i]].T e@0: # for i in range(0, nparameters_in): e@0: # parameters_vector[i, :] = features_pool[parameter_captions[0]].T e@0: e@0: parameters_vector_one[0, :] = d1*parameters_vector_one[0,:] e@0: parameters_vector_one[1, :] = g1*parameters_vector_one[1,:] e@0: parameters_vector_one[2, :] = da*parameters_vector_one[2,:] e@0: parameters_vector_one[3, :] = gc*parameters_vector_one[3,:] e@0: parameters_vector_one[4, :] = G*parameters_vector_one[4,:] e@0: e@0: # Stores example index number e@0: e@0: index_vector_one *= idx e@0: e@0: e@0: e@0: e@0: e@0: e@0: e@0: e@0: e@0: print "[II] %d parameters used:" % len(parameter_captions) e@0: print str(parameter_captions).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7].replace('parameter.','') e@0: e@0: if features_vector is None: e@0: features_vector = features_vector_one e@0: else: e@0: features_vector = append(features_vector, features_vector_one, 1) e@0: e@0: if parameters_vector is None: e@0: parameters_vector = parameters_vector_one e@0: else: e@0: parameters_vector = append(parameters_vector, parameters_vector_one, 1) e@0: e@0: if index_vector is None: e@0: index_vector = index_vector_one e@0: else: e@0: index_vector = append(index_vector, index_vector_one) e@0: e@0: e@0: idx += 1 e@0: e@0: e@0: print "[II] Marking silent parts" e@0: features_full_nontransformed_train = features_vector.copy() e@0: # silent_parts = zeros((1, len(features_pool[feature_captions[i]].T))) e@0: e@0: silent_parts = zeros((1, features_vector.shape[1])) e@0: e@0: rms = features_vector[feature_captions.index('rms'), :] e@0: e@0: # Implementing Hysteresis Gate -- High threshold is halfway between e@0: # the mean and the max and Low is halfway between the mean dn the min e@0: e@0: rms_threshold_mean = mean(rms) e@0: e@0: rms_threshold_max = max(rms) e@0: rms_threshold_min = min(rms) e@0: e@0: rms_threshold_high = 0.1 * rms_threshold_mean e@0: rms_threshold_low = 0.01 * rms_threshold_mean e@0: e@0: for n in range(1, len(rms)): e@0: prev = rms[n-1] e@0: curr = rms[n] e@0: e@0: if prev >= rms_threshold_high: e@0: if curr < rms_threshold_low: e@0: silent_parts[0,n] = 1 e@0: else: e@0: silent_parts[0,n] = 0 e@0: elif prev <= rms_threshold_low: e@0: if curr > rms_threshold_high: e@0: silent_parts[0,n] = 0 e@0: else: e@0: silent_parts[0,n] = 1 e@0: else: e@0: silent_parts[0,n] = silent_parts[0,n-1] e@0: e@0: e@0: if silent_parts[0,1] == 1: e@0: silent_parts[0, 0] = 1 e@0: e@0: e@0: e@0: active_index = invert(silent_parts.flatten().astype(bool)) e@0: e@0: # Keep only active parts e@0: e@0: e@0: # Uncomment this e@0: features_vector = features_vector[:, active_index] e@0: e@0: # Keep this for debugging purposes e@0: e@0: e@0: parameters_vector = parameters_vector[:, active_index] e@0: # index_vector = index_vector[active_index] e@0: e@0: # Label examples for a chain of chain_length e@0: e@0: # idx = 0 e@0: # for i in range(0, len(index_vector)): e@0: # index_vector[i] = idx e@0: # if i % chain_length == 0: e@0: # idx += 1 e@0: # number_of_examples = idx e@0: e@0: e@0: e@0: # Scale parameters to [0,1] e@0: e@0: e@0: parameters_vector[0,:] = (parameters_vector[0,:] - d1min)/d1max #d1 e@0: parameters_vector[1,:] = (parameters_vector[1,:] - g1min)/g1max #g1 e@0: parameters_vector[2,:] = (parameters_vector[2,:] - damin)/damax #g1 e@0: parameters_vector[3,:] = (parameters_vector[3,:] - Gmin)/Gmax #G e@0: parameters_vector[4,:] = (parameters_vector[4,:] - gcmin)/gcmax #gc e@0: e@0: e@0: e@0: e@0: e@0: e@0: e@0: moments_vector = zeros((features_vector.shape[0], 2)) e@0: e@0: features_vector_original = features_vector.copy() e@0: e@0: e@0: e@0: print "[II] Storing moments vector" e@0: for i in range(0, features_vector.shape[0]): e@0: mean_ = mean(features_vector[i,:]) e@0: std_ = std(features_vector[i,:], ddof=1) e@0: moments_vector[i,0] = mean_ e@0: moments_vector[i,1] = std_ e@0: e@0: features_vector[i,:] = (features_vector[i,:] - mean_)/std_ e@0: e@0: features_vector_old_train = features_vector.copy() e@0: # moments_vector_train = moments_vector e@0: e@0: e@0: print "[II] Extracting PCA configuration " e@0: e@0: kernel, q, featurelist = extract_pca_configuration_from_data(features_vector) e@0: e@0: e@0: #q = 1 e@0: e@0: e@0: # q = q + 1 e@0: e@0: print "[II] Optimal number of PCs to keep: %d" % q e@0: e@0: e@0: e@0: e@0: feature_captions_array = array(feature_captions) e@0: e@0: features_to_keep = list(feature_captions_array[featurelist]) e@0: e@0: print "[II] Decided to keep %d features:" % len(features_to_keep) e@0: print str(features_to_keep).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7] e@0: e@0: featurelist_bak = featurelist e@0: features_kept_data = features_vector[featurelist,:] e@0: features_kept_data_bak = features_kept_data.copy() e@0: # features_vector_old_train = features_kept_data e@0: features_vector = (kernel.T*features_kept_data)[0:q,:] e@0: features_vector_new_train = features_vector.copy() e@0: e@0: e@0: # example_features = None e@0: # example_parameters = None e@0: # example_idx = None e@0: # e@0: # for idx in range(0, shape(parameters_vector)[1]-chain_length): e@0: # example_features_ = features_vector[:, idx:idx+chain_length] e@0: # # print example_features_ e@0: # example_parameters_ = parameters_vector[:, idx:idx+chain_length] e@0: # example_idx_ = ones((shape(example_parameters_)[1],)) e@0: # e@0: # if example_features is None: e@0: # example_features = example_features_ e@0: # else: e@0: # #print "appending" e@0: # example_features = append(example_features, example_features_, 1) e@0: # e@0: # if example_parameters is None: e@0: # example_parameters = example_parameters_ e@0: # else: e@0: # example_parameters = append(example_parameters, example_parameters_, 1) e@0: # e@0: # if example_idx is None: e@0: # example_idx = example_idx_*idx e@0: # else: e@0: # example_idx = append(example_idx, example_idx_*idx, 1) e@0: # e@0: # e@0: # e@0: # e@0: # features_vector = example_features e@0: # parameters_vector = example_parameters e@0: # index_vector = example_idx e@0: e@0: print "[II] Trying ADM-coded parameters" e@0: print "[II] Upsampling features and parameters by a factor of %d" % UpsamplingFactor e@0: e@0: e@0: # Upsampled features and parameters e@0: features_vector_upsampled = smooth_matrix_1D(repeat(features_vector,UpsamplingFactor, axis=1)) e@0: # features_vector_upsampled = repeat(features_vector, UpsamplingFactor, axis=1) e@0: e@0: e@0: # features_vector_upsampled = smooth_matrix_1D(repeat(features_vector_old_train,UpsamplingFactor, axis=1)) e@0: e@0: # feature_labels_upsampled = repeat(features_clustering_labels,UpsamplingFactor, axis=0) e@0: parameters_vector_upsampled = repeat(parameters_vector,UpsamplingFactor, axis=1) e@0: e@0: e@0: # km = KMeans(init='k-means++', n_init=10, max_iter=300, tol=0.0000001, verbose = 0, n_clusters=12).fit(parameters_vector_upsampled.T) e@0: # centers_ = km.cluster_centers_ e@0: # labels__ = km.labels_ e@0: # Remove the following two lines in order to restore non-kmeans version e@0: # parameters_vector_kmeans_upsampled = array(centers_[labels__]) e@0: # e@0: # parameters_vector_upsampled = parameters_vector_kmeans_upsampled.T e@0: # e@0: # e@0: # parameters_vector_upsampled = smooth_matrix_1D(parameters_vector_upsampled) e@0: e@0: parameters_vector_upsampled_adm = matrix(zeros(shape(parameters_vector_upsampled))) e@0: parameters_vector_upsampled_code = matrix(zeros(shape(parameters_vector_upsampled))) e@0: parameters_vector_upsampled_firstval = matrix(zeros((parameters_vector_upsampled.shape[0],1))) e@0: e@0: # Reconstructed parameters e@0: e@0: e@0: e@0: parameters_vector_upsampled_reconstructed = matrix(zeros(shape(parameters_vector_upsampled))) e@0: e@0: e@0: e@0: # ADM HERE e@0: # e@0: e@0: def adm_matrix(X, dmin=0.001,dmax=0.28,tol=0.001): e@0: e@0: out = matrix(zeros(shape(X))) e@0: code = matrix(zeros(shape(X))) e@0: firstval = matrix(zeros((X.shape[0], 1))) e@0: e@0: for i in range(0, X.shape[0]): e@0: out[i,:], code[i,:], firstval[i,0] = adm(X[i,:],dmin=dmin,dmax=dmax,tol=tol) e@0: e@0: return out,code,firstval e@0: e@0: # parameters_vector_upsampled_reconstructed[i,:] = adm_reconstruct(parameters_vector_upsampled_code[i,:],parameters_vector_upsampled_firstval[i,0], dmin=dmin,dmax=dmax) e@0: e@0: def adm_matrix_reconstruct(code, firstval, dmin=0.001, dmax=0.28): e@0: X = matrix(zeros(shape(code))) e@0: for i in range(0, code.shape[0]): e@0: X[i,:] = adm_reconstruct(code[i,:], firstval[i,0], dmin=dmin, dmax=dmax) e@0: e@0: return X e@0: e@0: e@0: parameters_vector_upsampled_adm, parameters_vector_upsampled_code, parameters_vector_upsampled_firstval = adm_matrix(parameters_vector_upsampled, dmin, dmax, tol) e@0: # parameters_vector_upsampled = parameters_vector_upsampled_code e@0: e@0: e@0: def diff_and_pad(X): e@0: return concatenate(( e@0: zeros(( e@0: shape(X)[0], e@0: 1 e@0: )), e@0: diff(X, axis=1)), e@0: axis=1) e@0: e@0: e@0: # Change for adm, e@0: e@0: # parameters_vector_upsampled = parameters_vector_upsampled_code e@0: print "[II] Clustering features." e@0: # e@0: features_clustering = GMM(n_components = n_clusters, covariance_type='diag') e@0: # e@0: features_clustering.fit( features_vector_upsampled.T, y=parameters_vector_upsampled_code) e@0: # e@0: features_clustering_means = features_clustering.means_ e@0: features_clustering_labels = features_clustering.predict(features_vector_upsampled.T) e@0: features_clustering_sigmas = features_clustering.covars_ e@0: # e@0: features_vector_upsampled_estimated = zeros(shape(features_vector_upsampled)) e@0: # e@0: # e@0: for n in range(0, len(features_vector_upsampled_estimated[0])): e@0: features_vector_upsampled_estimated[:,n] = features_clustering_means[features_clustering_labels[n]] e@0: # e@0: # e@0: print "[II] Features MSE for %d clusters: %.3f" % (n_clusters, mse(features_vector_upsampled, features_vector_upsampled_estimated)) e@0: e@0: e@0: e@0: def happy_curve_classification(data, classes, estimator, Nd=1): e@0: print "[II] Generating Happy Curve " e@0: from copy import deepcopy e@0: estimator_fulldata = deepcopy(estimator) e@0: estimator_fulldata.fit(data, classes) e@0: labels = estimator.predict(data) e@0: e@0: # 1. Split data in two, training and testing e@0: e@0: Ntr = int(round(data.shape[0]/2)) # Training dataset size e@0: Nts = data.shape[0] - Ntr # Testing dataset size e@0: e@0: ratios = [] e@0: e@0: for n in range(Nd, Ntr): e@0: train, test, trainl, testl = cross_validation.train_test_split(data, classes, test_size = 0.5, random_state = 0) e@0: train = train[0:n,:] e@0: trainl = trainl[0:n] e@0: # print "trainl" e@0: # print trainl e@0: estimator_ = deepcopy(estimator) e@0: estimator_.fit(train,trainl) e@0: labels = estimator_.predict(test) e@0: e@0: ratio = sum(array(testl==labels).astype(float))/len(labels) e@0: e@0: ratios.append(ratio) e@0: e@0: e@0: return ratios e@0: e@0: e@0: def cross_validate_classification(data, classes, estimator): e@0: print "[II] Crossvalidating... " e@0: from copy import deepcopy e@0: estimator_fulldata = deepcopy(estimator) e@0: estimator_fulldata.fit(data, classes) e@0: e@0: percents = arange(0.1,0.9,0.1) e@0: MSEs = [] e@0: labels = estimator.predict(data) e@0: e@0: print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels)) e@0: e@0: for p in percents: e@0: train,test,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=0) e@0: estimator_ = deepcopy(estimator) e@0: estimator_.fit(train, trainlabels) e@0: labels = estimator_.predict(test) e@0: print "[II] for training(%.2f)-testing(%.2f): %.2f" % ((1-p),p,sum(array(testlabels==labels).astype(float))/len(labels)) e@0: e@0: return MSEs e@0: e@0: def cross_validate_clustering(data, estimator): e@0: print "[II] Crossvalidating... " e@0: estimator_fulldata = estimator e@0: estimator_fulldata.fit(data) e@0: e@0: # labels = estimator_fulldata.predict(data) e@0: means = estimator_fulldata.means_ e@0: # print means e@0: e@0: percents = arange(0.1,0.6,0.1) e@0: MSEs = [] e@0: reconstructed = zeros(shape(data)) e@0: labels = estimator.predict(data) e@0: for n in range(0, len(reconstructed)): e@0: reconstructed[n,:] = means[labels[n]] e@0: e@0: MSEs.append(mse(data,reconstructed)) e@0: for p in percents: e@0: train,test = cross_validation.train_test_split(data,test_size=p,random_state=0) e@0: train = matrix(train) e@0: test = matrix(test) e@0: e@0: estimator.fit(train) e@0: means = estimator.means_ e@0: labels = estimator.predict(test) e@0: reconstructed = zeros(shape(test)) e@0: for n in range(0, len(reconstructed)): e@0: reconstructed[n,:] = means[labels[n]] e@0: e@0: m = mse(test,reconstructed) e@0: e@0: print "[II] MSE for clustering crossvalidated data %.2f-%.2f: %.5f" % ((1-p), p, m) e@0: MSEs.append(m) e@0: e@0: print "[II] Crossvalidation complete" e@0: e@0: return MSEs e@0: e@0: e@0: e@0: e@0: # Construct parameters alphabet, each symbol is going to be a different column vector e@0: # in parameter code matrix e@0: e@0: e@0: def vector_to_states(X): e@0: """ e@0: Input: a vector MxN with N samples and M variables e@0: Output: a codeword dictionary `parameters_alphabet', e@0: state_seq, inverse `parameters_alphabet_inv' """ e@0: e@0: e@0: parameters_alphabet = {} e@0: n = 0 e@0: e@0: for i in range(0, X.shape[1]): e@0: vec = tuple(ravel(X[:,i])) e@0: if vec not in parameters_alphabet: e@0: parameters_alphabet[vec] = n e@0: n += 1 e@0: e@0: parameters_alphabet_inv = dict([(parameters_alphabet[m],m) for m in parameters_alphabet]) e@0: e@0: state_seq = array([parameters_alphabet[tuple(ravel(X[:,m]))] for m in range(0, parameters_vector_upsampled_code.shape[1])] ) e@0: e@0: return state_seq, parameters_alphabet, parameters_alphabet_inv e@0: e@0: e@0: def states_to_vector(predicted, parameters_alphabet_inv): e@0: estimated = matrix(zeros((len(parameters_alphabet_inv[0]), len(predicted)))) e@0: for i in range(0, len(predicted)): e@0: # print "[II] Predicted: ", predicted[i] e@0: estimated[:, i] = matrix(parameters_alphabet_inv[predicted[i]]).T e@0: e@0: return estimated e@0: e@0: # state_seq, parameters_alphabet, parameters_alphabet_inv = vector_to_states(parameters_vector_upsampled_code) e@0: e@0: e@0: # parameters_change_variable = matrix(diff_and_pad(parameters_vector_upsampled)!=0).astype(int) e@0: e@0: # changes_state_seq, changes_parameters_alphabet, changes_parameters_alphabet_inv = vector_to_states(parameters_change_variable) e@0: e@0: e@0: # This is an hmm that just codes the changes" e@0: # We have only two states, change and stay the same. e@0: e@0: # Uncomment that here e@0: e@0: # parameters_vector_upsampled = parameters_vector_upsampled_code e@0: # parameters_state_orig, parameter_state_alphabet_orig, parameter_state_alphabet_inv_orig = vector_to_states(parameters_vector) e@0: # TODO: HA e@0: # parmaeters_vector_upsampled = parameters_vector_upsampled_code e@0: parameters_state, parameter_state_alphabet, parameter_state_alphabet_inv = vector_to_states(parameters_vector_upsampled) e@0: e@0: e@0: # Remove outliers e@0: e@0: ee = EllipticEnvelope() e@0: e@0: # inliers = ee.fit(features_vector_upsampled.T).predict(features_vector_upsampled.T) == 1 e@0: e@0: inoutliers = zeros((len(parameters_state),)) e@0: # e@0: for p in unique(parameters_state): e@0: ee = EllipticEnvelope(contamination=0.1) e@0: inoutliers[parameters_state == p] = ee.fit(features_vector_upsampled[:, parameters_state == p].T).predict(features_vector_upsampled[:, parameters_state == p].T) e@0: # e@0: # e@0: # Label inliers and outliers e@0: e@0: inliers = inoutliers > 0 e@0: outliers = inoutliers < 0 e@0: # e@0: # e@0: # e@0: e@0: # Do pre-processing and re-label the outliers e@0: e@0: #completeavac = SinkHoleClassifier() e@0: #completeavac.fit(features_vector_upsampled[:, inliers].T, parameters_state[inliers]) e@0: e@0: #parameters_state_original = parameters_state.copy() e@0: #parameters_state[outliers] = completeavac.predict(features_vector_upsampled[:,outliers].T) e@0: e@0: e@0: e@0: e@0: e@0: # features_vector_upsampled = features_vector_upsampled[:, inliers] e@0: # parameters_state = parameters_state[inliers] e@0: e@0: # print "UNIQUE PARAMETERS STATE" e@0: # print unique(parameters_state) e@0: # asdasdasd e@0: print "[II] Testing Gaussian Naive Bayes Classifier" e@0: gnb = GaussianNB() e@0: gnb.fit(features_vector_upsampled.T, parameters_state) e@0: e@0: e@0: e@0: parameters_state_estimated = gnb.predict(features_vector_upsampled.T) e@0: e@0: output = states_to_vector(parameters_state_estimated, parameter_state_alphabet_inv) e@0: e@0: figure() e@0: subplot(211) e@0: plot(parameters_vector_upsampled.T) e@0: title('Parameter value')# % UpsamplingFactor) e@0: ylabel('value') e@0: xlabel('frame \#') e@0: subplot(212) e@0: #plot(smooth_matrix_1D(output).T) e@0: plot(output.T) e@0: ylabel('value') e@0: xlabel('frame \#') e@0: e@0: errorvector_nbc = [mse(output[:,i], parameters_vector_upsampled[:,i]) for i in range(0, len(parameters_state_estimated))] e@0: e@0: e@0: e@0: e@0: #cross_validate_classification(features_vector_upsampled.T, parameters_state, gnb) e@0: # hc = happy_curve_classification(features_vector_upsampled.T, parameters_state, gnb) e@0: e@0: # e@0: # figure() e@0: # plot(hc) e@0: # figure() e@0: e@0: print "[II] Trying Multinomial HMM" e@0: e@0: # In order to do classification with HMMs, we need to: e@0: # 1. Split the parameters into classes e@0: # 2. Train one model per class e@0: # 3. Feed our data to all the models e@0: # 4. Check which has a better score and assig,n to M e@0: class SVMClassifier: e@0: def __init__(self,**kwargs): e@0: print "[II] SVM Classifier " e@0: # self.clf = svm.SVC(**kwargs) e@0: self.name = "SVM" e@0: self.smallname = "svm" e@0: self.clf = multiclass.OneVsOneClassifier(svm.SVC(**kwargs) ) e@0: e@0: def fit(self, X, classes): e@0: n_classes = max(unique(classes))+1 e@0: e@0: self.clf.fit(X,classes) e@0: e@0: def predict(self, X): e@0: return self.clf.predict(X) e@0: e@0: def getName(self): e@0: return self.name e@0: e@0: def cross_validate(self, data, classes): e@0: print "[II] SVN Classifier Crossvalidating... " e@0: from copy import deepcopy e@0: estimator = deepcopy(self) e@0: estimator_fulldata = deepcopy(self) e@0: estimator_fulldata.fit(data, classes) e@0: e@0: percents = arange(0.1,0.9,0.1) e@0: self.percents = percents * 100 e@0: e@0: RATIOS = [] e@0: labels = estimator.predict(data) e@0: e@0: e@0: print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels)) e@0: e@0: for p in percents: e@0: traindata,testdata,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=1) e@0: e@0: e@0: # print "[II] [%.2f-%.2f] Examples used for training: %s" % (p, (1-p), str(traindata)) e@0: # print "[II] [%.2f-%.2f] Examples used for testing: %s" % (p, (1-p), str(testdata)) e@0: # Training phase e@0: e@0: e@0: e@0: estimator_ = deepcopy(estimator) e@0: estimator_.fit(traindata, trainlabels) e@0: e@0: predicted_labels = estimator_.predict(testdata) e@0: e@0: m = sum(array(predicted_labels==testlabels).astype(float))/len(testlabels) e@0: e@0: print "[II] [%.2f-%.2f] 5-fold cross-validation for SVN Classifier: %.1f" % (p, (1-p), m) e@0: e@0: RATIOS.append(m) e@0: e@0: return RATIOS,percents e@0: e@0: e@0: e@0: e@0: e@0: e@0: e@0: class HmmClassifier2: e@0: def __init__(self, N=2, chain_size=4, n_components = 1): e@0: print "[II] HMM Classifier (%d states, %d components)" % (N, n_components) e@0: self.name = "HMM (%d states, %d components)" % (N, n_components) e@0: self.n_components = n_components e@0: self.chain_size = chain_size e@0: self.hmms_ = [] e@0: self.N = N e@0: self.n_states = N e@0: e@0: def getName(self): e@0: return self.name e@0: e@0: def fit(self, features, parameters): e@0: self.n_params = len(unique(parameters)) e@0: e@0: e@0: e@0: for n in range(0, self.n_params): e@0: hmm_ = hmm.GaussianHMM(n_components = self.n_components, covariance_type='full') e@0: e@0: # Get training data for each parameter e@0: e@0: for i in range(0, len(parameters)-self.chain_size): e@0: seq = features[i:i+self.chain_size, :] e@0: if parameters[i+self.chain_size-1] == n and seq.shape[0] == self.chain_size: e@0: # print "[II] Fitting: %s" % str(seq) e@0: e@0: hmm_.fit([seq]) e@0: e@0: self.hmms_.append(hmm_) e@0: e@0: def predict(self, X): e@0: labels = zeros((X.shape[0],)) e@0: N = self.N e@0: e@0: scores = zeros((self.n_states, )) e@0: e@0: for i in range(0, len(labels)): e@0: testdata = X[i:i+self.chain_size,:] e@0: e@0: for j in range(0, self.n_states): e@0: scores[j] = self.hmms_[j].score(testdata) e@0: labels[i] = argmax(scores) e@0: e@0: return labels e@0: e@0: def cross_validate(self, data, classes): e@0: print "[II] SVN Classifier Crossvalidating... " e@0: from copy import deepcopy e@0: estimator = deepcopy(self) e@0: estimator_fulldata = deepcopy(self) e@0: estimator_fulldata.fit(data, classes) e@0: e@0: percents = arange(0.1,0.9,0.1) e@0: self.percents = percents * 100 e@0: e@0: RATIOS = [] e@0: labels = estimator.predict(data) e@0: e@0: e@0: print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels)) e@0: e@0: for p in percents: e@0: traindata,testdata,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=1) e@0: e@0: e@0: # print "[II] [%.2f-%.2f] Examples used for training: %s" % (p, (1-p), str(traindata)) e@0: # print "[II] [%.2f-%.2f] Examples used for testing: %s" % (p, (1-p), str(testdata)) e@0: e@0: # Training phase e@0: e@0: e@0: e@0: estimator_ = deepcopy(estimator) e@0: estimator_.fit(traindata, trainlabels) e@0: e@0: predicted_labels = estimator_.predict(testdata) e@0: e@0: m = sum(array(predicted_labels==testlabels).astype(float))/len(testlabels) e@0: e@0: print "[II] [%.2f-%.2f] 5-fold cross-validation for HMM2 Classifier: %.1f" % (p, (1-p), m) e@0: e@0: RATIOS.append(m) e@0: e@0: return RATIOS,percents e@0: e@0: e@0: e@0: e@0: class HmmClassifier: e@0: def __init__(self, N=2, n_components = 1): e@0: print "[II] HMM Classifier (%d states, %d components)" % (N, n_components) e@0: self.name = "HMM (%d states, %d components)" % (N, n_components) e@0: self.n_components = n_components e@0: self.chain_size = N e@0: self.hmms_ = [] e@0: self.N = N e@0: e@0: def getName(self): e@0: return self.name e@0: e@0: def fit(self, X, states): e@0: self.n_states = len(unique(states)) e@0: e@0: for n in range(0, self.n_states): e@0: hmm_ = hmm.GaussianHMM(n_components = self.n_components, covariance_type = 'full') e@0: e@0: # Get training data for each class e@0: vector = X[states == n,:] e@0: e@0: # Fit the HMM e@0: # print vector e@0: hmm_.fit([vector]) e@0: e@0: # And append to the list e@0: self.hmms_.append(hmm_) e@0: e@0: def predict(self,X): e@0: labels = zeros((X.shape[0],)) e@0: N = self.N e@0: e@0: m = 0 e@0: e@0: scores = zeros((1, self.n_states)) e@0: e@0: e@0: while m*N < X.shape[0]: e@0: if (m+1)*N > X.shape[0]: e@0: testdata = X[m*N:,:] e@0: else: e@0: testdata = X[m*N:(m+1)*N,:] e@0: e@0: # print testdata e@0: e@0: for i in range(0, self.n_states): e@0: scores[0,i] = self.hmms_[i].score(testdata) e@0: e@0: if (m+1)*N > X.shape[0]: e@0: labels[m*N:] = argmax(scores) e@0: else: e@0: labels[m*N:(m+1)*N] = argmax(scores) e@0: e@0: m+= 1 e@0: e@0: return labels e@0: e@0: # def cross_validate(self, data, classes, index_vector): e@0: # print "[II] Crossvalidating... " e@0: # from copy import deepcopy e@0: # estimator = deepcopy(self) e@0: # estimator_fulldata = deepcopy(self) e@0: # estimator_fulldata.fit(data, classes) e@0: # e@0: # percents = arange(0.1,0.9,0.1) e@0: # self.percents = percents * 100 e@0: # RATIOS = [] e@0: # labels = estimator.predict(data) e@0: # e@0: # print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels)) e@0: # e@0: # for p in percents: e@0: # train,test,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=1) e@0: # estimator_ = deepcopy(estimator) e@0: # estimator_.fit(train, trainlabels) e@0: # labels = estimator_.predict(test) e@0: # m = sum(array(testlabels==labels).astype(float))/len(labels) e@0: # RATIOS.append(m) e@0: # print "[II] for training(%.2f)-testing(%.2f): %.2f" % ((1-p),p,sum(array(testlabels==labels).astype(float))/len(labels)) e@0: # e@0: # return RATIOS, percents e@0: e@0: def cross_validate(self, data, classes): e@0: print "[II] SVN Classifier Crossvalidating... " e@0: from copy import deepcopy e@0: estimator = deepcopy(self) e@0: estimator_fulldata = deepcopy(self) e@0: estimator_fulldata.fit(data, classes) e@0: e@0: percents = arange(0.1,0.9,0.1) e@0: self.percents = percents * 100 e@0: e@0: RATIOS = [] e@0: labels = estimator.predict(data) e@0: e@0: e@0: print "[II] for full training-testing: %.2f" % (sum(array(classes==labels).astype(float))/len(labels)) e@0: e@0: for p in percents: e@0: traindata,testdata,trainlabels,testlabels = cross_validation.train_test_split(data,classes,test_size=p,random_state=1) e@0: e@0: e@0: # print "[II] [%.2f-%.2f] Examples used for training: %s" % (p, (1-p), str(traindata)) e@0: # print "[II] [%.2f-%.2f] Examples used for testing: %s" % (p, (1-p), str(testdata)) e@0: # Training phase e@0: e@0: e@0: e@0: estimator_ = deepcopy(estimator) e@0: estimator_.fit(traindata, trainlabels) e@0: e@0: predicted_labels = estimator_.predict(testdata) e@0: m = sum(array(predicted_labels==testlabels).astype(float))/len(testlabels) e@0: e@0: e@0: # testdata = None e@0: # e@0: # traindata = None e@0: # trainlabels = None e@0: # testlabels = None e@0: # e@0: # for t in train: e@0: # if traindata is None: e@0: # traindata = data[index_vector == t, :] e@0: # trainlabels = classes[index_vector == t] e@0: # else: e@0: # traindata = append(traindata, data[index_vector == t, :], 0) e@0: # trainlabels = append(trainlabels, classes[index_vector ==t]) e@0: # e@0: # estimator_ = deepcopy(estimator) e@0: # estimator_.fit(traindata, trainlabels) e@0: # e@0: # e@0: # for t in test: e@0: # if testdata is None: e@0: # testdata = data[index_vector == t, :] e@0: # testlabels = classes[index_vector == t] e@0: # else: e@0: # testdata = append(testdata, data[index_vector == t,:], 0) e@0: # testlabels = append(testlabels, classes[index_vector == t]) e@0: e@0: e@0: e@0: # predicted_labels = estimator_.predict(testdata) e@0: e@0: m = sum(array(predicted_labels==testlabels).astype(float))/len(testlabels) e@0: e@0: print "[II] [%.2f-%.2f] 5-fold cross-validation for HMM Classifier: %.1f" % (p, (1-p), m) e@0: e@0: RATIOS.append(m) e@0: e@0: return RATIOS,percents e@0: e@0: def cross_validate_full_k_fold(data, classes, list_of_classifiers, K=10): e@0: """ K-Folds Cross-validates all available classifiers, plots and saves barplots. """ e@0: e@0: ratios = [] e@0: for l in list_of_classifiers: e@0: ratio = l.k_fold_cross_validate(data, classes, K) e@0: ratios.append(ratio) e@0: e@0: L = len(ratios) e@0: e@0: ind = arange(L) e@0: e@0: fig, ax = subplots() e@0: rects = [] e@0: e@0: e@0: e@0: width = 10/float(len(ratios)) e@0: e@0: colors = ['r', 'y', 'g', 'c'] e@0: e@0: labels_ = [] e@0: e@0: for i in range(0, len(ratios)): e@0: rects.append(ax.bar(i*width,ratios[i],width,color=colors[i % len(colors)])) e@0: labels_.append(list_of_classifiers[i].getName()) e@0: e@0: return ratios e@0: e@0: e@0: e@0: def cross_validate_full(seq, classes, list_of_classifiers): e@0: """ Cross-validates all available classifiers, plots and saves barplots. """ e@0: e@0: ratios = [] e@0: percents = [] e@0: for l in list_of_classifiers: e@0: ratios_, percents_ = l.cross_validate(seq, classes) e@0: ratios.append(ratios_) e@0: percents.append(percents_) e@0: e@0: e@0: L = len(ratios[0]) e@0: e@0: ind = arange(L) e@0: e@0: fig,ax = subplots() e@0: e@0: rects = [] e@0: e@0: W = 0.8 e@0: width = W/float(len(ratios)) e@0: e@0: colors = ['r', 'y', 'g', 'c'] e@0: e@0: labels_ = [] e@0: for i in range(0, len(ratios)): e@0: rects.append(ax.bar(ind+i*width,ratios[i],width,color=colors[i % len(colors)])) e@0: labels_.append(list_of_classifiers[i].getName()) e@0: e@0: ax.legend(tuple(labels_)) e@0: e@0: ax.set_xticks(ind+W/2) e@0: ax.set_xticklabels(tuple(percents[0]*100)) e@0: ax.set_xlabel("Percentage % of test data") e@0: ax.set_ylabel("Success ratio") e@0: e@0: e@0: e@0: e@0: e@0: e@0: # rects1 = ax.bar(ind,ratios[0],0.35,color='r') e@0: # rects2 = ax.bar(ind+0.351,ratios[1],0.35,color='y') e@0: e@0: e@0: e@0: e@0: return ratios e@0: e@0: e@0: e@0: e@0: e@0: e@0: e@0: # hmmc = HmmClassifier(N = len(unique(parameters_state)), n_components = 1) e@0: # hmmc.fit(features_vector_upsampled.T, parameters_state) e@0: # e@0: # hmmc2 = HmmClassifier(N = 12, n_components = 4) e@0: # hmmc2.fit(features_vector_upsampled.T, parameters_state) e@0: # e@0: svmc = SVMClassifier(probability=True) e@0: svmc.fit(features_vector_upsampled.T, parameters_state) e@0: # # svmc.cross_validate(features_vector_upsampled.T, parameters_state, index_vector) e@0: # e@0: nbc = NBClassifier() e@0: nbc.fit(features_vector_upsampled.T, parameters_state) e@0: # e@0: # e@0: #hchmmc = happy_curve_classification(features_vector_upsampled.T, parameters_state, hmmc, Nd=100) e@0: # # cross_validate_classification(features_vector_upsampled.T, parameters_state, hmmc) e@0: # e@0: # #hmmc.cross_validate(features_vector_upsampled.T, parameters_state) e@0: # e@0: e@0: hmmc3 = HmmClassifier3(N=2, n_components = max(unique(parameters_state))) e@0: #print(parameters_state) e@0: plot(parameters_state) e@0: e@0: e@0: print(shape(features_vector_upsampled)) e@0: obs = hmmc3.fit(features_vector_upsampled.T, parameters_state) e@0: e@0: e@0: e@0: # svmhmm = HmmSVMClassifier() e@0: # svmhmm.fit(features_vector_upsampled.T, parameters_state) e@0: # ratios = cross_validate_full(features_vector_upsampled.T, parameters_state, [nbc, svmc]) e@0: # ratios = cross_validate_full_k_fold(features_vector_upsampled.T, parameters_state, e@0: # [hmmc3]) e@0: e@0: def approximate_chain_size(data,parameters): e@0: ratios = [] e@0: e@0: # chainsizes = arange(1, 40) e@0: # for cs in chainsizes: e@0: # print "[II] Trying chain size: %d" % cs e@0: # hmmc = HmmClassifier3(N=cs, n_components = 2) e@0: # ratios.append(hmmc.k_fold_cross_validate(data,parameters,K=10)) e@0: e@0: e@0: componentssize = arange(1, 12) e@0: e@0: for cs in componentssize: e@0: print "[II] Trying component size: %d" % cs e@0: hmmc = HmmClassifier3(N=2, n_components = cs) e@0: ratios.append(hmmc.k_fold_cross_validate(data, parameters, K=2)) e@0: e@0: e@0: e@0: e@0: figure() e@0: plot(componentssize, ratios) e@0: xlabel('Chain Size') e@0: ylabel('Success Ratio') e@0: title('10-Fold cross validation success ratio vs chain size') e@0: e@0: e@0: return ratios e@0: e@0: # ratios = approximate_chain_size(features_vector_upsampled.T, parameters_state) e@0: e@0: #predictedhmmc2 = hmmc2.predict(features_vector_upsampled.T) e@0: e@0: # figure() e@0: e@0: e@0: e@0: e@0: e@0: # hmms_ = [] e@0: # e@0: # for n in range(0, len(parameter_state_alphabet)): e@0: # #hmm_ = hmm.GMMHMM(n_components = 1, n_mix = 2) e@0: # hmm_ = hmm.GaussianHMM(n_components = 1,covariance_type = 'full') e@0: # e@0: # # Get training data for each class e@0: # vector = features_vector_upsampled[:,parameters_state == n] e@0: # e@0: # #if vector.shape[1] < n_clusters: e@0: # # hmms_.append(None) e@0: # #else: e@0: # e@0: # hmm_.fit([vector.T]) e@0: # e@0: # # Append to the list e@0: # e@0: # hmms_.append(hmm_) e@0: # e@0: # labels = zeros((features_vector_upsampled.shape[1],)) e@0: # e@0: # N = 20 e@0: # m = 0 e@0: # e@0: # while m*N < features_vector_upsampled.shape[1]: e@0: # e@0: # scores = zeros((1, len(parameter_state_alphabet))) e@0: # e@0: # if (m+1)*N > features_vector_upsampled.shape[1]: e@0: # testdata = features_vector_upsampled[:,m*N:] e@0: # else: e@0: # testdata = features_vector_upsampled[:,m*N:(m+1)*N] e@0: # e@0: # for i in range(0, len(parameter_state_alphabet)): e@0: # if hmms_[i] is not None: e@0: # scores[0,i] = hmms_[i].score(testdata.T) e@0: # else: e@0: # scores[0,i] = -100000 # Very large negative score e@0: # if (m+1)*N >= features_vector_upsampled.shape[1]: e@0: # labels[m*N:] = argmax(scores) e@0: # else: e@0: # labels[m*N:(m+1)*N] = argmax(scores) e@0: # e@0: # m += 1 e@0: e@0: e@0: # figure() e@0: #plot(labels.T) e@0: e@0: e@0: # labels = hmmc.predict(features_vector_upsampled.T) e@0: # estimated = states_to_vector(labels,parameter_state_alphabet_inv) e@0: # plot(estimated.T,'r--') e@0: # e@0: # title('Estimated parameter values') e@0: # legend(['Naive Bayes Classifier', 'HMM chain size %d (%.1fms)' % (N, float(N)/UpsamplingFactor*23.0)]) e@0: # e@0: # ylabel('value') e@0: # xlabel('frame #') e@0: # e@0: e@0: # close('all') e@0: e@0: # plot(features_clustering_labels/float(max(features_clustering_labels))) e@0: # plot(parameters_vector_upsampled.T/max(ravel(parameters_vector_upsampled))) e@0: e@0: e@0: def plot_clusters(x, labels): e@0: figure() e@0: symbols = ['>', 'x', '.', '<','v'] e@0: colors = ['b', 'r', 'g', 'm','c'] e@0: e@0: for r in range(0, x.shape[0]): e@0: scatter(x[r,0], x[r,1], c=colors[int(labels[r]) % len(colors)], marker=symbols[int(labels[r]) % len(symbols)]) e@0: e@0: e@0: # plot_clusters(features_vector_upsampled.T, parameters_state) e@0: # e@0: e@0: # SVM e@0: e@0: class HmmClassifierSVN_Naive: e@0: def __init__(self, N=2, n_components = 1): e@0: self.n_components = n_components e@0: self.chain_size = N e@0: self.hmms_ = [] e@0: self.N = N e@0: e@0: def fit(self, X, states): e@0: self.n_states = len(unique(states)) e@0: e@0: for n in range(0, self.n_states): e@0: hmm_ = hmm.GaussianHMM(n_components = self.n_components, covariance_type = 'full') e@0: e@0: # Get training data for each class e@0: vector = X[states == n,:] e@0: e@0: # Fit the HMM e@0: # print vector e@0: hmm_.fit([vector]) e@0: e@0: # And append to the list e@0: self.hmms_.append(hmm_) e@0: e@0: def predict(self,X): e@0: labels = zeros((X.shape[0],)) e@0: N = self.N e@0: e@0: m = 0 e@0: e@0: scores = zeros((1, self.n_states)) e@0: e@0: e@0: while m*N < X.shape[0]: e@0: if (m+1)*N > X.shape[0]: e@0: testdata = X[m*N:,:] e@0: else: e@0: testdata = X[m*N:(m+1)*N,:] e@0: e@0: # print testdata e@0: e@0: for i in range(0, self.n_states): e@0: scores[0,i] = self.hmms_[i].score(testdata) e@0: e@0: if (m+1)*N > X.shape[0]: e@0: labels[m*N:] = argmax(scores) e@0: else: e@0: labels[m*N:(m+1)*N] = argmax(scores) e@0: e@0: m+= 1 e@0: e@0: return labels e@0: e@0: e@0: e@0: e@0: def plot_parameters_estimation(list_of_estimators): e@0: for e in list_of_estimators: e@0: name_ = e.getName() e@0: e@0: e@0: e@0: fig = figure() e@0: fig.suptitle('Parameters estimation using %s' % name_, fontweight='bold') e@0: subplot(311) e@0: title ('original parameters') e@0: param_real = states_to_vector(parameters_state,parameter_state_alphabet_inv).T e@0: plot(param_real) e@0: xlabel('sample') e@0: ylabel('normalized parameter value') e@0: legend(parameter_captions) e@0: subplot(312) e@0: title('estimated parameters') e@0: pred = e.predict(features_vector_upsampled.T) e@0: param_est = states_to_vector(pred,parameter_state_alphabet_inv).T e@0: plot(param_est) e@0: xlabel('sample') e@0: ylabel('normalized parameter value') e@0: legend(parameter_captions) e@0: subplot(313) e@0: error_real_est = zeros((len(param_est),)) e@0: for i in range(0, len(error_real_est)): e@0: error_real_est[i] = mse(param_real[i],param_est[i]) e@0: e@0: totalmse = sum(error_real_est) e@0: title('mean squared error (total: %.3f)' % totalmse) e@0: e@0: plot(error_real_est) e@0: xlabel('sample') e@0: ylabel('normalized parameter value') e@0: e@0: e@0: e@0: # hmm2 = hmm.MultinomialHMM(n_components = n_classes, startprob = prior_p_q, transmat = transmat ) e@0: e@0: e@0: e@0: # TODO: Embed SVM emissions to HMM. Try to do pick maximum likelihood sequence from e@0: # classifiers like in HmmClassifier3. For every class, train an SVM, with n_components as e@0: # states, and compute the log-likelihood for prediction. Should work. e@0: e@0: e@0: e@0: # class HMMsvmClassifier: e@0: # def __init__(self, N=6): e@0: # print "[II] HMM-SVM Classifier (%d time steps)" % N e@0: # self.name = "HMM-SVM (%d time steps)" % N e@0: # # self.n_components = n_components e@0: # self.chain_size = N e@0: # self.svm = MyAVAClassifier() e@0: # e@0: # e@0: # def getName(self): e@0: # return self.name e@0: # e@0: # def fit(self, features, parameters): e@0: # e@0: # # First train emission svm e@0: # e@0: # self.svm.fit(features, parameters) e@0: # e@0: # # print parameters e@0: # n_classes = max(unique(parameters)) + 1 e@0: # e@0: # print "[II] Number of classes: ", n_classes e@0: # hmms = [None]*n_classes e@0: # chain_size = self.chain_size e@0: # params = [None]*n_classes e@0: # obs = [None]*n_classes e@0: # e@0: # for i in range(chain_size, len(parameters)): e@0: # class_ = parameters[i-1] e@0: # params_ = parameters[i-chain_size:i] e@0: # feats_ =features[i-chain_size:i,:] e@0: # e@0: # if params[class_] is None: e@0: # params[class_] = [params_] e@0: # obs[class_] = [feats_] e@0: # else: e@0: # params[class_].append(params_) e@0: # obs[class_].append(feats_) e@0: # e@0: # e@0: # e@0: # for i in range(0, len(params)): e@0: # if params[i] is not None and len(params[i]) != 0: e@0: # hmm_ = HMMsvm(self.svm) e@0: # # print "[II] %d Fitting params: " % i, params[i] e@0: # print "[!!] OBSERVATIONS: " e@0: # print obs[i] e@0: # hmm_.fit(obs[i], params[i]) e@0: # e@0: # print "[!!] PARAMETERS: " e@0: # print params[i] e@0: # e@0: # # TODO: Left here on 06/07 19:56 e@0: # e@0: # # hmm_.fit(features,) e@0: # # print "obs: ", obs[i] e@0: # # print "params: ", params[i] e@0: # # hmm_.fit(obs[i], params[i],flr=1e-9) e@0: # hmms[i] = hmm_ e@0: # e@0: # self.hmms = hmms e@0: # e@0: # return obs e@0: # def predict(self, features, mfilt=20): e@0: # chain_size = self.chain_size e@0: # hmms = self.hmms e@0: # predicted_classes = zeros((features.shape[0],)) e@0: # for i in range(chain_size, features.shape[0]): e@0: # scores = zeros((len(hmms),)) e@0: # e@0: # seq = features[i-chain_size:i, :] e@0: # e@0: # for j in range(0, len(hmms)): e@0: # if hmms[j] is not None: e@0: # q,p = hmms[j].logviterbi(seq) e@0: # e@0: # scores[j] = logsumexp(p) e@0: # else: e@0: # scores[j] = -infty e@0: # e@0: # predicted_classes[i] = argmax(scores) e@0: # e@0: # # Do a median filter at the end e@0: # e@0: # # predicted_classes = median_filter(predicted_classes,mfilt) e@0: # e@0: # return predicted_classes e@0: e@0: class HmmSvmClassifier3: e@0: def __init__(self, N=2,n_components = 1): e@0: print "[II] HMM/SVM Classifier (%d states, %d components)" % (N, n_components) e@0: self.name = "HMM (%d time steps, %d components)" % (N, n_components) e@0: self.n_components = n_components e@0: self.smallname = "hmmsvmc" e@0: self.chain_size = N e@0: e@0: def getName(self): e@0: return self.name e@0: e@0: def fit(self, features, parameters): e@0: e@0: e@0: e@0: # print parameters e@0: n_classes = max(unique(parameters)) + 1 e@0: e@0: if n_classes == 1: e@0: self.only_one_class = True e@0: return e@0: else: e@0: self.only_one_class = False e@0: e@0: print "[II] Number of classes: ", n_classes e@0: hmms = [None]*n_classes e@0: # svms = [None]*n_classes e@0: chain_size = self.chain_size e@0: obs = [None]*n_classes e@0: cls = [None]*n_classes e@0: e@0: for i in range(chain_size, len(parameters)): e@0: class_ = parameters[i-1] e@0: seq = features[i-chain_size:i,:] e@0: params = parameters[i-chain_size:i] e@0: e@0: if obs[class_] is None: e@0: obs[class_] = [seq] e@0: cls[class_] = [params] e@0: else: e@0: obs[class_].append(seq) e@0: cls[class_].append(params) e@0: e@0: e@0: e@0: for i in range(0, len(obs)): e@0: if obs[i] is not None and len(obs[i]) != 0: e@0: # hmm_ = hmm.GaussianHMM(n_components=self.n_components, covariance_type='full') e@0: svm_ = MyAVAClassifier() e@0: print obs[i] e@0: print cls[i] e@0: e@0: O = obs[i][0] e@0: C = cls[i][0] e@0: e@0: for j in range(0, len(obs[i])): e@0: O = append(O, obs[i][j], 1) e@0: C = append(C, cls[i][j], 1) e@0: e@0: print O e@0: print C e@0: e@0: svm_.fit(O.T,C) e@0: hmm_ = HMMsvm(svm_) e@0: hmm_.fit(obs[i],cls[i]) e@0: hmms[i] = hmm_ e@0: e@0: self.hmms = hmms e@0: e@0: return obs e@0: e@0: def predict(self, features, mfilt=20): e@0: e@0: if self.only_one_class == True: e@0: return zeros((features.shape[0], )) e@0: e@0: chain_size = self.chain_size e@0: hmms = self.hmms e@0: predicted_classes = zeros((features.shape[0],)) e@0: e@0: e@0: for i in range(chain_size, features.shape[0]): e@0: scores = zeros((len(hmms),)) e@0: e@0: seq = features[i-chain_size:i, :] e@0: e@0: for j in range(0, len(hmms)): e@0: if hmms[j] is not None: e@0: print hmms[j].score(seq) e@0: scores[j] = hmms[j].score(seq)[j] e@0: else: e@0: scores[j] = -infty e@0: e@0: predicted_classes[i] = argmax(scores) e@0: e@0: # Do a median filter at the end e@0: e@0: # predicted_classes = median_filter(predicted_classes,mfilt) e@0: e@0: return predicted_classes e@0: e@0: e@0: e@0: e@0: e@0: e@0: # newstates = self.logviterbi() e@0: e@0: # e@0: # e@0: # # Alphas and Betas e@0: # X = features e@0: # alphas = zeros((T,n_classes)) e@0: # betas = zeros((T,n_classes)) e@0: # e@0: # forward_messages = zeros((T,n_classes)) e@0: # backward_messages = zeros((T, n_classes)) e@0: # e@0: # print "[II] Computing alpha, beta..." e@0: # for t in range(1, T+1): e@0: # forward_messages[t-1,:] = self._forward_message(forward_messages[t-2,:],X[0:t,]) e@0: # backward_messages[-t,:] = self._backward_message(backward_messages[-t+1], X[-t:,:]) e@0: # e@0: # e@0: # print "[II] Computing ksu..." e@0: # e@0: # a_norm = forward_messages[-1,n_classes-1] # alpha_T e@0: # e@0: # ksu = zeros((T, n_classes, n_classes)) e@0: # e@0: # for i in range(0, n_classes): e@0: # for j in range(0, n_classes): e@0: # for t in range(0, T-1): e@0: # ksu[t,i,j] = exp(forward_messages[t, i] + log(transmat[i,j]) + log(self.estimate_emission_probability(X[t,:],j)) + backward_messages[t+1,j] - a_norm) e@0: # e@0: # e@0: # e@0: # self.alphas = forward_messages e@0: # self.betas = backward_messages e@0: # self.ksu = ksu e@0: # e@0: # transmat_new = zeros((n_classes,n_classes)) e@0: # e@0: # for i in range(0, n_classes): e@0: # e@0: # for j in range(0, n_classes): e@0: # transmat_new[i,j] = sum(ksu[:,i,j])/sum(ksu[:,i,:]) e@0: # e@0: # self.transmat_new = transmat_new e@0: # e@0: e@0: e@0: e@0: e@0: e@0: e@0: e@0: e@0: e@0: e@0: e@0: # e@0: # def forward(self, X): e@0: # # Compute log likelihood using the forward algorithm e@0: # e@0: # # Number of states e@0: # N = self.n_classes e@0: # e@0: # # Number of Observations e@0: # T = X.shape[0] e@0: # e@0: # e@0: # transmat = self.transmat e@0: # e@0: # # Initialization e@0: # # a1 = ones((N,))/N e@0: # a1 = self.prior e@0: # e@0: # alpha = zeros((N,)) e@0: # for i in range(0, N): e@0: # alpha[i] = log(a1[i])+log(self.estimate_emission_probability(X[0,:], i)) e@0: # # print alpha e@0: # # Recursion e@0: # for t in range(0, T): e@0: # alpha_old = alpha.copy() e@0: # x = X[t, :] e@0: # for j in range(0, N): e@0: # tosum = zeros((N,)) e@0: # for i in range(0, N): e@0: # tosum[i] = alpha_old[i]+log(transmat[i,j]) e@0: # e@0: # # Sum = logsum(tosum) e@0: # Sum = logsumexp(tosum) e@0: # bj = self.estimate_emission_probability(x, j) e@0: # e@0: # alpha[j] = Sum+log(bj) e@0: # # print alpha e@0: # e@0: # tosum = zeros((N,)) e@0: # for i in range(0, N): e@0: # tosum[i] = alpha[i] + log(transmat[i,N-1]) e@0: # e@0: # return tosum e@0: # e@0: # def backward(self, X): e@0: # # Number of states e@0: # N = self.n_classes e@0: # e@0: # # Number of Observations e@0: # T = X.shape[0] e@0: # e@0: # transmat = self.transmat e@0: # e@0: # # Initialization e@0: # e@0: # b1 = ones((N,))/N e@0: # e@0: # beta = zeros((N,)) e@0: # for i in range(0, N): e@0: # beta[i] = log(b1[i])+log(self.estimate_emission_probability(X[-1,:], i)) e@0: # e@0: # for t in range(0, T): e@0: # beta_old = beta.copy() e@0: # x = X[-t, :] e@0: # for j in range(0, N): e@0: # tosum = zeros((N, )) e@0: # for i in range(0, N): e@0: # tosum[i] = beta_old[i]+log(transmat[i,j]) e@0: # e@0: # Sum = logsumexp(tosum) e@0: # bj = self.estimate_emission_probability(x, j) e@0: # beta[j] = Sum+log(bj) e@0: # e@0: # tosum = zeros((N,)) e@0: # e@0: # for i in range(0, N): e@0: # tosum[i] = beta[i] + log(transmat[i,0]) e@0: # e@0: # #3 p = logsumexp(tosum) e@0: # e@0: # return tosum e@0: e@0: e@0: def _log_likelihood(self, X): e@0: e@0: return logsumexp(self.forward(X)) e@0: e@0: e@0: e@0: e@0: e@0: def _likelihood(self, X): e@0: # Number of states e@0: N = self.n_classes e@0: e@0: # Number of Observations e@0: T = X.shape[0] e@0: e@0: e@0: transmat = self.transmat e@0: e@0: # Initialization e@0: # a1 = ones((N,))/N e@0: a1 = self.prior e@0: e@0: alpha = zeros((N,)) e@0: for i in range(0, N): e@0: alpha[i] = a1[i]*self.estimate_emission_probability(X[0,:], i) e@0: e@0: # Recursion e@0: print alpha e@0: for t in range(1, T): e@0: alpha_old = alpha.copy() e@0: x = X[t, :] e@0: for j in range(0, N): e@0: Sum = 0 e@0: for i in range(0, N): e@0: Sum += alpha_old[i]*transmat[i,j] e@0: e@0: alpha[j] = Sum*self.estimate_emission_probability(x, j) e@0: print alpha e@0: e@0: e@0: # Termination e@0: e@0: Sum = 0 e@0: for i in range(0, N): e@0: Sum += alpha[i]*transmat[i, N-1] e@0: e@0: p = Sum e@0: e@0: return p e@0: e@0: e@0: e@0: e@0: e@0: e@0: e@0: # def fit(self, X, states): e@0: # # print parameters e@0: # n_classes = max(unique(states)) + 1 e@0: # e@0: # # svms = [None]*n_classes e@0: # obs = [None]*n_classes e@0: # sta = [None]*n_classes e@0: # chain_size = self.chain_size e@0: # e@0: # #22 obs = None e@0: # # sta = None e@0: # e@0: # print "[II] Number of classes: ", n_classes e@0: # e@0: # # For each class: e@0: # # Concatenate examples e@0: # # Fit SVM e@0: # e@0: # for i in range(chain_size, len(states)): e@0: # class_ = states[i-1] e@0: # seq = X[i-chain_size:i, :] e@0: # states_ = states[i-chain_size:i] e@0: # e@0: # e@0: # if obs[class_] is None: e@0: # obs[class_] = [seq] e@0: # sta[class_] = [states_] e@0: # self.svms.append(MyAVAClassifier()) e@0: # else: e@0: # obs[class_].append(seq) e@0: # sta[class_].append(states_) e@0: # e@0: # e@0: # for i in range(0, len(obs)): e@0: # e@0: # o = None e@0: # s = None e@0: # e@0: # for j in range(0, len(obs[i])): e@0: # if o is None: e@0: # o = obs[i][j] e@0: # s = sta[i][j] e@0: # e@0: # else: e@0: # o = append(o, obs[i][j],0) e@0: # s = append(s, sta[i][j]) e@0: # e@0: # e@0: # self.svms[i].fit(o, s) e@0: e@0: # def predict(self, features): e@0: # chain_size = self.chain_size e@0: # svms = self.svms e@0: # e@0: # predicted_classes = zeros((features.shape[0],)) e@0: # for i in range(chain_size, features.shape[0]): e@0: # scores = zeros((len(svms))) e@0: # e@0: # seq = features[i-chain_size:i, :] e@0: # e@0: # for j in range(0, len(svms)): e@0: # if svms[j] is not None: e@0: # scores[j] = svms[j].compute_log_likelihood(seq) e@0: # else: e@0: # scores[k] = -infty e@0: # predicted_classes[i] = argmax(scores) e@0: # e@0: # return predicted_classes e@0: e@0: e@0: e@0: e@0: e@0: # Marginalize over the latent variable e@0: # for i in range(0, X.shape[0]): e@0: # P = zeros((self.n_classes,)) e@0: # x = X[i,:] e@0: # for j in range(0, len(P)): e@0: # P[j] = self.estimate_emission_probability(j, x) e@0: # e@0: # S[i] = log(sum(P[j])) e@0: # e@0: # return sum(S) e@0: e@0: e@0: e@0: e@0: # Continue from here e@0: X = features_vector_upsampled.T e@0: y = parameters_state e@0: e@0: # clf = svm.SVC() e@0: # clf.fit(X,y) e@0: e@0: e@0: # parameters_state_y = clf.predict(X) e@0: e@0: e@0: predhmmc3 = hmmc3.predict(features_vector_upsampled.T) e@0: e@0: myava = MyAVAClassifier() e@0: myava.fit(features_vector_upsampled.T, parameters_state) e@0: e@0: predmyava = myava.predict(features_vector_upsampled.T) e@0: e@0: # hmmsvmc = HMMsvmClassifier(N=2) e@0: # hmmsvmc.fit(features_vector_upsampled.T, parameters_state) e@0: #predhmmsvmc = hmmsvmc.predict(features_vector_upsampled.T) e@0: e@0: obsc = MyAVAClassifier() e@0: obsc.fit(features_vector_upsampled.T, parameters_state) e@0: # hmm2 = hmm.GaussianHMM(n_components=6) e@0: e@0: # hmm2.fit([features_vector_upsampled.T]) e@0: # hmm3 = HMMsvm(obsc) e@0: # hmm3.fit([features_vector_upsampled.T],[parameters_state]) e@0: # predhmmsvm = hmm3.predict(features_vector_upsampled.T) e@0: e@0: # hmmsvmc = HMMsvmClassifier() e@0: e@0: hmmsvmc = HMMsvmClassifier() e@0: # hmmsvmc = HmmSvmClassifier3() e@0: hmmsvmc.fit(features_vector_upsampled.T, parameters_state) e@0: predhmmsvm = hmmsvmc.predict(features_vector_upsampled.T) e@0: plot_parameters_estimation([nbc, myava, hmmc3, hmmsvmc]) e@0: e@0: e@0: #plot(parameters) e@0: figure() e@0: plot(parameters_state,'b') e@0: e@0: # plot(parameters_state_y,'r--') e@0: plot(predhmmc3,'g+-') e@0: e@0: plot(predhmmsvm, 'co-') e@0: e@0: # plot(predhmmsvmc, 'bo-') e@0: e@0: legend(['Real', 'SVM', 'HMM','HMM-SVM']) e@0: e@0: xlabel('sample') e@0: ylabel('state') e@0: # TODO, HMM with SVN, Cross-validation e@0: e@0: # Using hidden markov svm e@0: e@0: svmhmm = "" e@0: e@0: # print "[II] Success ratio for SVM-RBF Classifier: %.2f" % (sum(parameters_state==parameters_state_y).astype(float)/float(len(parameters_state))) e@0: e@0: print "[II] All-vs-All custom Support Vector Classifier Success Ratio: %.2f " %(sum(predmyava==parameters_state).astype(float)/len(parameters_state)) e@0: print "[II] Success ratio for HMM: %.2f" % (sum(parameters_state==predhmmc3).astype(float)/float(len(predhmmc3))) e@0: print "[II] Success ratio for HMM-SVM: %.2f" % (sum(parameters_state==predhmmsvm).astype(float)/float(len(predhmmsvm))) e@0: e@0: e@0: e@0: e@0: e@0: model_ghmm = ReverbModel("GaussianHMM Classifier Model", kernel, q, features_to_keep, hmmc3, parameter_dictionary=parameter_state_alphabet_inv, moments_vector=moments_vector) e@0: model_gnb = ReverbModel("Naive Bayes Classifier Model", kernel, q, features_to_keep, nbc, parameter_dictionary=parameter_state_alphabet_inv, moments_vector=moments_vector ) e@0: model_svm = ReverbModel("AVA LinearSVM Classifier Model", kernel, q, features_to_keep, myava, parameter_dictionary=parameter_state_alphabet_inv, moments_vector=moments_vector ) e@0: model_hmmsvm = ReverbModel("HMM0SVM Classifier Model", kernel, q, features_to_keep, hmmsvmc, parameter_dictionary=parameter_state_alphabet_inv, moments_vector=moments_vector ) e@0: e@0: model_ghmm.save("model_ghmm.dat") e@0: model_gnb.save("model_gnb.dat") e@0: model_svm.save("model_svm.dat") e@0: model_hmmsvm.save("model_hmmsvm.dat") e@0: e@0: e@0: e@0: def split_data_to_k_samples(data, params, K): e@0: L = shape(data)[0] e@0: M = int(round(L/float(K))) e@0: e@0: samples_data = [] e@0: samples_parameters = [] e@0: for k in range(K): e@0: if (k+1)*M > L: e@0: samples_data.append(data[k*M:,:]) e@0: samples_parameters.append(params[k*M:]) e@0: else: e@0: samples_data.append(data[k*M:(k+1)*M,:]) e@0: samples_parameters.append(params[k*M:(k+1)*M]) e@0: e@0: return samples_data, samples_parameters e@0: e@0: def join_data(data, params): e@0: L = 0 e@0: for p in params: e@0: L += len(p) e@0: e@0: new_data = zeros((L, data[0].shape[1])) e@0: new_params = zeros((L,)) e@0: e@0: idx = 0 e@0: for n in range(0,len(data)): e@0: new_data[idx:idx+data[n].shape[0],:] = data[n] e@0: new_params[idx:idx+len(params[n])] = params[n] e@0: idx += len(params[n]) e@0: e@0: return new_data, new_params e@0: e@0: cross_validation_ratios = {} e@0: cross_validation_mses = {} e@0: e@0: e@0: features_samples, parameters_samples = split_data_to_k_samples(features_vector_upsampled.T, parameters_state, 5) e@0: e@0: from sklearn.cross_validation import KFold e@0: e@0: from sklearn import metrics e@0: e@0: e@0: e@0: KF = 10 e@0: e@0: s1,s2 = split_data_to_k_samples(features_vector_upsampled.T, parameters_state, KF) e@0: r1,r2 = join_data(s1,s2) e@0: e@0: kf = KFold(KF, n_folds=KF) # Leave one out e@0: e@0: trial = 1.0 e@0: e@0: rhmmc = 0 e@0: rgnbc = 0 e@0: ravac = 0 e@0: rhmmsvmc = 0 e@0: e@0: e@0: msegbnc = 0 e@0: msehmmc = 0 e@0: mseavac = 0 e@0: msehmmsvmc = 0 e@0: e@0: fullratios = [] e@0: fullmses = [] e@0: e@0: e@0: def calculate_mse(pred, real): e@0: realparams = states_to_vector(real, parameter_state_alphabet_inv) e@0: estparams = states_to_vector(pred, parameter_state_alphabet_inv) e@0: return mse(realparams, estparams) e@0: ratios_gnb = [] e@0: ratios_hmm = [] e@0: ratios_svm = [] e@0: ratios_hmmsvm = [] e@0: ratios_sinkhole = [] e@0: e@0: mse_gnb = [] e@0: mse_hmm = [] e@0: mse_svm = [] e@0: mse_hmmsvm = [] e@0: mse_sinkhole = [] e@0: e@0: e@0: for train, test in kf: e@0: e@0: train_idx = 100 e@0: e@0: print train,test e@0: f1 = [] e@0: p1 = [] e@0: e@0: tf1 = [] e@0: tp1 = [] e@0: e@0: for i in train: e@0: f1.append(s1[i-1]) e@0: p1.append(s2[i-1]) e@0: e@0: for i in test: e@0: tf1.append(s1[i-1]) e@0: tp1.append(s2[i-1]) e@0: e@0: new_trainset_data, new_trainset_params = join_data(f1, p1) e@0: new_trainset_params = new_trainset_params.astype(int) e@0: e@0: e@0: # Detect and remove outliers e@0: e@0: # ee = EllipticEnvelope() e@0: e@0: # ee.fit(new_trainset_data, new_trainset_params) e@0: e@0: e@0: # inliers = ee.predict(new_trainset_data) == 1 e@0: e@0: # new_trainset_data = new_trainset_data[inliers, :] e@0: # new_trainset_params = new_trainset_params[inliers] e@0: e@0: e@0: new_testset_data, new_testset_params = join_data(tf1, tp1) e@0: new_testset_params = new_testset_params.astype(int) e@0: e@0: e@0: e@0: param_est = states_to_vector(new_testset_params,parameter_state_alphabet_inv).T e@0: e@0: e@0: e@0: e@0: e@0: e@0: e@0: X = arange(50, len(new_trainset_params), 50) e@0: print X e@0: # X = append(X, [len(new_trainset_params)], 1) e@0: e@0: e@0: # e@0: for M in X: e@0: # print CHAINSIZE e@0: # print NCOMPONENTS e@0: hmmc = HmmClassifier3(N=CHAINSIZE, n_components = NCOMPONENTS) e@0: gnbc = GaussianNB() e@0: avac = MyAVAClassifier() e@0: hmmsvmc = HMMsvmClassifier() e@0: e@0: gnbc.fit(new_trainset_data[0:M,:], new_trainset_params[0:M]) e@0: pgnbc = gnbc.predict(new_testset_data) e@0: rgnbc = metrics.accuracy_score(pgnbc, new_testset_params) e@0: mgnbc = calculate_mse(pgnbc,new_testset_params) e@0: e@0: # e@0: hmmc.fit(new_trainset_data[0:M,:], new_trainset_params[0:M]) e@0: phmmc = hmmc.predict(new_testset_data) e@0: rhmmc = metrics.accuracy_score(phmmc, new_testset_params) e@0: mhmmc = calculate_mse(phmmc,new_testset_params) e@0: e@0: # e@0: try: e@0: avac.fit(new_trainset_data[0:M,:], new_trainset_params[0:M]) e@0: pavac = avac.predict(new_testset_data) e@0: ravac = metrics.accuracy_score(pavac, new_testset_params) e@0: mavac = calculate_mse(pavac,new_testset_params) e@0: except: e@0: ravac = 0 e@0: mavac = inf e@0: # e@0: # e@0: # e@0: # e@0: # e@0: try: e@0: hmmsvmc.fit(new_trainset_data[0:M,:], new_trainset_params[0:M]) e@0: phmmsvm = hmmsvmc.predict(new_testset_data) e@0: rhmmsvm = metrics.accuracy_score(phmmsvm, new_testset_params) e@0: mhmmsvm = calculate_mse(phmmsvm,new_testset_params) e@0: # e@0: except: e@0: rhmmsvm = 0 e@0: mavac = inf e@0: # e@0: # ratios_gnb.append(rgnbc) e@0: # ratios_hmm.append(rhmmc) e@0: # ratios_svm.append(ravac) e@0: # ratios_hmmsvm.append(rhmmsvm) e@0: # e@0: # mse_gnb.append(mgnbc) e@0: # mse_hmm.append(mhmmc) e@0: # mse_svm.append(mavac) e@0: # mse_hmmsvm.append(mhmmsvm) e@0: # e@0: # fullratios.append((trial,X,ratios_gnb,ratios_hmm,ratios_svm,ratios_hmmsvm)) e@0: # fullmses.append((trial,X,mse_gnb,mse_hmm,mse_svm,mse_hmmsvm)) e@0: e@0: # e@0: # e@0: # e@0: # e@0: e@0: e@0: e@0: e@0: e@0: e@0: # hmmc = HmmClassifier3(N=3, n_components = max(unique(parameters_state))+1) e@0: hmmc = HmmClassifier3(N=CHAINSIZE, n_components = NCOMPONENTS) e@0: gnbc = GaussianNB() e@0: avac = MyAVAClassifier() e@0: hmmsvmc = HMMsvmClassifier() e@0: sinkholec = SinkHoleClassifier() e@0: e@0: hmmc.fit(new_trainset_data, new_trainset_params) e@0: gnbc.fit(new_trainset_data, new_trainset_params) e@0: # try: e@0: avac.fit(new_trainset_data, new_trainset_params) e@0: sinkholec.fit(new_trainset_data, new_trainset_params) e@0: pavac = avac.predict(new_testset_data) e@0: e@0: e@0: # except: e@0: # pavac = ones((new_testset_data.shape[0],))*new_trainset_params[0] e@0: e@0: e@0: # try: e@0: hmmsvmc.fit(new_trainset_data, new_trainset_params) e@0: phmmsvm = hmmsvmc.predict(new_testset_data) e@0: # except: e@0: # e@0: # # phmmsvm = hmmsvmc.predict(new_testset_data) e@0: # phmmsvm = ones((new_testset_data.shape[0],))*new_trainset_params[0] e@0: e@0: e@0: phmmc = hmmc.predict(new_testset_data) e@0: pgnbc = gnbc.predict(new_testset_data) e@0: psinkhole = sinkholec.predict(new_testset_data) e@0: e@0: print "[II] (Trial %d) Classification ratio for GNB: %.2f" % (trial, metrics.accuracy_score(pgnbc, new_testset_params)) e@0: print "[II] (Trial %d) Classification ratio for SVM: %.2f" % (trial, metrics.accuracy_score(pavac, new_testset_params)) e@0: print "[II] (Trial %d) Classification ratio for HMM: %.2f" % (trial, metrics.accuracy_score(phmmc, new_testset_params)) e@0: print "[II] (Trial %d) Classification ratio for HMM/SVM: %.2f" % (trial, metrics.accuracy_score(phmmsvm, new_testset_params)) e@0: print "[II] (Trial %d) Classification ratio for Sinkhole Approach: %.2f" % (trial, metrics.accuracy_score(psinkhole, new_testset_params)) e@0: e@0: e@0: mse e@0: e@0: # rgnbc = rhmmc*(trial-1)/trial + metrics.accuracy_score(pgnbc, new_testset_params)/trial e@0: # ravac = rhmmc*(trial-1)/trial + metrics.accuracy_score(pavac, new_testset_params)/trial e@0: # rhmmc = rhmmc*(trial-1)/trial + metrics.accuracy_score(phmmc, new_testset_params)/trial e@0: # rhmmsvmc = rhmmc*(trial-1)/trial + metrics.accuracy_score(phmmsvm, new_testset_params)/trial e@0: e@0: rgnbc = metrics.accuracy_score(pgnbc, new_testset_params) e@0: ravac = metrics.accuracy_score(pavac, new_testset_params) e@0: rhmmc = metrics.accuracy_score(phmmc, new_testset_params) e@0: rhmmsvmc = metrics.accuracy_score(phmmsvm, new_testset_params) e@0: rsinkhole = metrics.accuracy_score(psinkhole, new_testset_params) e@0: e@0: ratios_gnb.append(rgnbc) e@0: ratios_svm.append(ravac) e@0: ratios_hmm.append(rhmmc) e@0: ratios_hmmsvm.append(rhmmsvmc) e@0: ratios_sinkhole.append(rsinkhole) e@0: e@0: e@0: e@0: e@0: e@0: # msegbnc = msegbnc*(trial-1)/trial + calculate_mse(pgnbc,new_testset_params)/trial e@0: # msehmmc = msehmmc*(trial-1)/trial + calculate_mse(phmmc,new_testset_params)/trial e@0: # mseavac = mseavac*(trial-1)/trial + calculate_mse(pavac,new_testset_params)/trial e@0: # msehmmsvmc = msehmmsvmc*(trial-1)/trial + calculate_mse(phmmsvm,new_testset_params)/trial e@0: e@0: msegnb = calculate_mse(pgnbc,new_testset_params) e@0: msesvm = calculate_mse(pavac,new_testset_params) e@0: msehmm = calculate_mse(phmmc,new_testset_params) e@0: msehmmsvm = calculate_mse(phmmsvm,new_testset_params) e@0: msesinkhole = calculate_mse(psinkhole,new_testset_params) e@0: e@0: mse_gnb.append(msegnb) e@0: mse_svm.append(msesvm) e@0: mse_hmm.append(msehmm) e@0: mse_hmmsvm.append(msehmmsvm) e@0: mse_sinkhole.append(msesinkhole) e@0: e@0: e@0: e@0: e@0: e@0: e@0: trial += 1 e@0: e@0: print ratios_gnb e@0: e@0: e@0: sucrats = [mean(ratios_gnb),mean(ratios_svm), mean(ratios_hmm),mean(ratios_hmmsvm), mean(ratios_sinkhole)] e@0: sucratsstd = [std(ratios_gnb,ddof=1),std(ratios_svm,ddof=1), std(ratios_hmm,ddof=1),std(ratios_hmmsvm,ddof=1), std(ratios_sinkhole,ddof=1)] e@0: mserats = [mean(mse_gnb),mean(mse_svm), mean(mse_hmm),mean(mse_hmmsvm), mean(mse_sinkhole)] e@0: #mserats = [msegbnc, mseavac, msehmmc,msehmmsvmc] e@0: e@0: print tuple(sucrats) e@0: # print tuple(sucratsstd) e@0: e@0: e@0: # scores = zeros((len(sucrats), len(sucrats))) e@0: # for i in range(0, len(sucrats)): e@0: # for j in range(0, len(sucrats)): e@0: # scores[i,j] = (sucrats[i] - sucrats[j])/sqrt(sucratsstd[i]**2 + sucratsstd[j]**2) e@0: print tuple(mserats) e@0: # print scores e@0: e@0: e@0: e@0: # print (msegbnc, mseavac, msehmmc,msehmmsvmc ) e@0: e@0: sample_sizes = X/float(max(X)) e@0: gnbc_ratios_curves = zeros((len(X),len(fullratios))) e@0: gnbc_mses_curves = zeros((len(X),len(fullratios))) e@0: # e@0: avac_ratios_curves = zeros((len(X),len(fullratios))) e@0: avac_mses_curves = zeros((len(X),len(fullratios))) e@0: # e@0: hmmc_ratios_curves = zeros((len(X),len(fullratios))) e@0: hmmc_mses_curves = zeros((len(X),len(fullratios))) e@0: # e@0: hmmsvmc_ratios_curves = zeros((len(X),len(fullratios))) e@0: hmmsvmc_mses_curves = zeros((len(X),len(fullratios))) e@0: # e@0: # e@0: for i in range(0, len(fullratios)): e@0: for j in range(0, len(X)): e@0: gnbc_ratios_curves[j,i] = fullratios[i][2][j] e@0: gnbc_mses_curves[j,i] = fullmses[i][2][j] e@0: e@0: avac_ratios_curves[j,i] = fullratios[i][3][j] e@0: avac_mses_curves[j,i] = fullmses[i][3][j] e@0: e@0: hmmc_ratios_curves[j,i] = fullratios[i][4][j] e@0: hmmc_mses_curves[j,i] = fullmses[i][4][j] e@0: e@0: hmmsvmc_ratios_curves[j,i] = fullratios[i][5][j] e@0: hmmsvmc_mses_curves[j,i] = fullmses[i][5][j] e@0: # e@0: # e@0: # e@0: # e@0: # e@0: gnbc_mses_curve = mean(gnbc_mses_curves,1) e@0: avac_mses_curve = mean(avac_mses_curves,1) e@0: hmmc_mses_curve = mean(hmmc_mses_curves,1) e@0: hmmsvmc_mses_curve = mean(hmmsvmc_mses_curves,1) e@0: # e@0: gnbc_ratios_curve = mean(gnbc_ratios_curves,1) e@0: avac_ratios_curve = mean(avac_ratios_curves,1) e@0: hmmc_ratios_curve = mean(hmmc_ratios_curves,1) e@0: hmmsvmc_ratios_curve = mean(hmmsvmc_ratios_curves,1) e@0: # e@0: figure() e@0: subplot(211) e@0: plot(sample_sizes,gnbc_ratios_curve) e@0: plot(sample_sizes,avac_ratios_curve) e@0: plot(sample_sizes,hmmc_ratios_curve) e@0: plot(sample_sizes,hmmsvmc_ratios_curve) e@0: xlabel('Part of training set used') e@0: ylabel('Accuracy') e@0: legend(['GNB','SVM','HMM','HMM/SVM']) e@0: # e@0: subplot(212) e@0: plot(sample_sizes,gnbc_mses_curve) e@0: plot(sample_sizes,avac_mses_curve) e@0: plot(sample_sizes,hmmc_mses_curve) e@0: plot(sample_sizes,hmmsvmc_mses_curve) e@0: xlabel('Part of training set used') e@0: ylabel('Mean Squared Error') e@0: # e@0: legend(['GNB','SVM','HMM','HMM/SVM']) e@0: # e@0: show() e@0: # # tight_layout() e@0: savefig('ratios-2.png',dpi=500) e@0: # e@0: # e@0: #