Mercurial > hg > chourdakisreiss2016
view experiment-reverb/code/pca.py @ 2:c87a9505f294 tip
Added LICENSE for code, removed .wav files
author | Emmanouil Theofanis Chourdakis <e.t.chourdakis@qmul.ac.uk> |
---|---|
date | Sat, 30 Sep 2017 13:25:50 +0100 |
parents | 144fbd1d29c3 |
children |
line wrap: on
line source
# -*- coding: utf-8 -*- """ Created on Mon Apr 20 14:51:02 2015 @author: Emmanouil Chourdakis Principal Factor Analisys """ from numpy import * from sklearn import cluster from sklearn.metrics import pairwise_distances from numpy.linalg.linalg import eig from matplotlib.mlab import find from numpy.random import randn #from empca import * mse = lambda A,B: ((array(A)-array(B)) ** 2).mean() L = matrix([ [2,4,5,5,3,2], [2,3,4,5,4,3] ]) K = matrix(array([[ 0.94241267, -1.63156463, -0.04654153, 0.1000316 ], [-0.16114828, -0.70320328, 0.25243464, -0.10076863], [ 1.10255852, 0.41740569, 1.14947174, -0.95764054], [ 0.31158905, -0.37584351, 1.65328982, 0.1987012 ]])) def testing(): L = matrix([ [2,4,5,5,3,2], [2,3,4,5,4,3] ]) K = matrix(array([[ 0.94241267, -1.63156463, -0.04654153, 0.1000316 ], [-0.16114828, -0.70320328, 0.25243464, -0.10076863], [ 1.10255852, 0.41740569, 1.14947174, -0.95764054], [ 0.31158905, -0.37584351, 1.65328982, 0.1987012 ]])) def kmeans(X, k): # simulate matlab's kmeans using python's sklearn.cluster k_means = cluster.KMeans(n_clusters=k) k_means.fit(X) C = k_means.cluster_centers_ idx = k_means.labels_ # Create inter-cluster distance sums sumd = zeros((k,1)) for i in range(0, k): S = X[idx == i, :] - C[i, :] sumd[i] = sum(array(S)**2) # sumd = sum(array(S)**2) D = matrix(pairwise_distances(X, C)) return idx,C,sumd,D def princomp(A): # Adapted from THE GLOWING PYTHON if A.shape[1] == 1: return 1, 0, 0 A = matrix(A) M = mean(A, axis=1) Rxx = matrix(corrcoef(A - M, ddof=1)) # Rxx = matrix(cov(A-M, ddof=1)) #print A-M latent,coeff = linalg.eig(Rxx) p = size(coeff,axis=1) idx = argsort(latent) idx = idx[::-1] coeff = coeff[:,idx] latent = latent[idx] # sorting eigenvalues score = matrix(coeff).T * (A-M)# projection of the data in the new space return matrix(coeff),matrix(score).T,matrix(latent).T def papca(X): """ Horn's method for parallel PCA, returns nComp: the number of principal compoents to use.""" N = 1000 X = matrix(X) coeff, score, lq = princomp(X) eigs = matrix(zeros((len(lq), N))) for k in range(1, N+1): coeff, score, lrq = princomp(randn(X.shape[0], X.shape[1])) eigs[:,k-1] = lrq lrqbar = mean(eigs, 1) ladjq = lq - lrqbar + 1 nComp = sum(ladjq > 1) if nComp == 0: return 1 else: return nComp def pfa(X, Variability = 0.81): def choose_n(Lambda, variability = Variability): """ Choose for variability """ sumL = sum(Lambda) suml = 0 for i in range(0, len(Lambda)): suml += Lambda[i] v = suml/sumL if v >= variability: return i+1 # print X X = matrix(X) # print X N = shape(X)[1] meanX = mean(X) stdX = std(X, ddof=1) X = X - meanX X = X / stdX Sigma = corrcoef(X, ddof=1) Lambda, A = eig(Sigma) q = choose_n(Lambda, Variability) # Sort Eigenvalues idx = argsort(Lambda)[::-1] Lambda = Lambda[idx] A = matrix(A) A = A[:,idx] Aq = A[:, 0:q] p = q + 2 p = min(Aq.shape[0], p) # Todo K _means labels, centers, sumd, D = kmeans(Aq, p) tokeep = [] for i in range(0,p): # Find vectors in clusters vectors_idx = find(labels==i) vectors = Aq[vectors_idx,:] # Compute mean vectors_mean = mean(vectors, 0) # Compute distance from mean distances_from_mean = pairwise_distances(vectors, vectors_mean) # Get the vector with the minimum distance sorted_idx = argsort(distances_from_mean.T) closest_idx = vectors_idx[sorted_idx] tokeep.append(int(closest_idx[0,0])) return sort(tokeep) def convert_to_pc(data, kernel, q, features): """ Expects a PC kernel, the desired number of PCs and an array of the kept features and returns the transformed data projected into the PC kernel space""" data_n = data[features, :] pc = kernel.T*data_n return matrix(pc[0:q, :]) def extract_pca_configuration_from_data(data, Variability = 0.9): """ Expects a k x n Matrix of n samples of k features and returns a matrix Kernel with the PCs and an array Features of the chosen features""" print "[II] Selecting salient features" features = pfa(data); data_n = data[features, :] #data_n = data print "[II] Selected %d features" % len(features) print "[II] Calculating principal components coefficients" coeff, score, latent = princomp(data_n) print "[II] Calculating the principal components" pc = coeff.T*data_n print "[II] Doing horn's Parallel Analysis and setting number of vectors" q = papca(data_n) print "[II] Selected %d PCs to include" % q if q < pc.shape[0]: pc[q+1:, :] = 0 test_data = zeros((data.shape[0], data.shape[1])) test_data[features, :] = coeff*pc #test_data = coeff*pc # print test_data # print "," # print data print "[II] PCA Reconstruction MSE is: %.4f" % mse(test_data[features,:], data_n) # Extract Kernel, number of PCs and array of features return coeff, q, features if __name__=="__main__": import sys test = array([ [1., 2, 3, 4], [5, 6, 7, 8], [9,10,11,12], [8, 9, 7, 6] ])