chourdakisreiss2016: experiment-reverb/code/pca.py comparison

comparison experiment-reverb/code/pca.py @ 0:246d5546657c

initial commit, needs cleanup

author	Emmanouil Theofanis Chourdakis <e.t.chourdakis@qmul.ac.uk>
date	Wed, 14 Dec 2016 13:15:48 +0000
parents
children	144fbd1d29c3

comparison

equal deleted inserted replaced

--1:000000000000
+:246d5546657c
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Apr 20 14:51:02 2015
+@author: Emmanouil Chourdakis
+Principal Factor Analisys
+"""
+from numpy import *
+from sklearn import cluster
+from sklearn.metrics import pairwise_distances
+from numpy.linalg.linalg import eig
+from matplotlib.mlab import find
+from numpy.random import randn
+from empca import *
+mse = lambda A,B: ((array(A)-array(B)) ** 2).mean()
+L = matrix([
+[2,4,5,5,3,2],
+[2,3,4,5,4,3]
+])
+K = matrix(array([[ 0.94241267, -1.63156463, -0.04654153,  0.1000316 ],
+[-0.16114828, -0.70320328,  0.25243464, -0.10076863],
+[ 1.10255852,  0.41740569,  1.14947174, -0.95764054],
+[ 0.31158905, -0.37584351,  1.65328982,  0.1987012 ]]))
+def testing():
+L = matrix([
+[2,4,5,5,3,2],
+[2,3,4,5,4,3]
+])
+K = matrix(array([[ 0.94241267, -1.63156463, -0.04654153,  0.1000316 ],
+[-0.16114828, -0.70320328,  0.25243464, -0.10076863],
+[ 1.10255852,  0.41740569,  1.14947174, -0.95764054],
+[ 0.31158905, -0.37584351,  1.65328982,  0.1987012 ]]))
+def kmeans(X, k):
+# simulate matlab's kmeans using python's sklearn.cluster
+k_means = cluster.KMeans(n_clusters=k)
+k_means.fit(X)
+C = k_means.cluster_centers_
+idx = k_means.labels_
+# Create inter-cluster distance sums
+sumd = zeros((k,1))
+for i in range(0, k):
+S = X[idx == i, :] - C[i, :]
+sumd[i] = sum(array(S)**2)
+# sumd = sum(array(S)**2)
+D = matrix(pairwise_distances(X, C))
+return idx,C,sumd,D
+def princomp(A):
+# Adapted from THE GLOWING PYTHON
+if A.shape[1] == 1:
+return 1, 0, 0
+A = matrix(A)
+M = mean(A, axis=1)
+Rxx = matrix(corrcoef(A - M, ddof=1))
+# Rxx = matrix(cov(A-M, ddof=1))
+#print A-M
+latent,coeff = linalg.eig(Rxx)
+p = size(coeff,axis=1)
+idx = argsort(latent)
+idx = idx[::-1]
+coeff = coeff[:,idx]
+latent = latent[idx]   # sorting eigenvalues
+score =  matrix(coeff).T * (A-M)# projection of the data in the new space
+return matrix(coeff),matrix(score).T,matrix(latent).T
+def papca(X):
+""" Horn's method for parallel PCA, returns nComp: the number of
+principal compoents to use."""
+N = 1000
+X = matrix(X)
+coeff, score, lq = princomp(X)
+eigs = matrix(zeros((len(lq), N)))
+for k in range(1, N+1):
+coeff, score, lrq = princomp(randn(X.shape[0], X.shape[1]))
+eigs[:,k-1] = lrq
+lrqbar = mean(eigs, 1)
+ladjq = lq - lrqbar + 1
+nComp =  sum(ladjq > 1)
+if nComp == 0:
+return 1
+else:
+return nComp
+def pfa(X, Variability = 0.81):
+def choose_n(Lambda, variability = Variability):
+""" Choose for variability """
+sumL = sum(Lambda)
+suml = 0
+for i in range(0, len(Lambda)):
+suml += Lambda[i]
+v = suml/sumL
+if v >= variability:
+return i+1
+#   print X
+X = matrix(X)
+#  print X
+N = shape(X)[1]
+meanX = mean(X)
+stdX = std(X, ddof=1)
+X = X - meanX
+X = X / stdX
+Sigma = corrcoef(X, ddof=1)
+Lambda, A = eig(Sigma)
+q = choose_n(Lambda, Variability)
+# Sort Eigenvalues
+idx = argsort(Lambda)[::-1]
+Lambda = Lambda[idx]
+A = matrix(A)
+A = A[:,idx]
+Aq = A[:, 0:q]
+p = q + 2
+p = min(Aq.shape[0], p)
+# Todo K _means
+labels, centers, sumd, D = kmeans(Aq, p)
+tokeep = []
+for i in range(0,p):
+# Find vectors in clusters
+vectors_idx = find(labels==i)
+vectors = Aq[vectors_idx,:]
+# Compute mean
+vectors_mean = mean(vectors, 0)
+# Compute distance from mean
+distances_from_mean = pairwise_distances(vectors, vectors_mean)
+# Get the vector with the minimum distance
+sorted_idx = argsort(distances_from_mean.T)
+closest_idx = vectors_idx[sorted_idx]
+tokeep.append(int(closest_idx[0,0]))
+return sort(tokeep)
+def convert_to_pc(data, kernel, q, features):
+""" Expects a PC kernel, the desired number
+of PCs and an array of the kept features
+and returns the transformed data projected
+into the PC kernel space"""
+data_n = data[features, :]
+pc = kernel.T*data_n
+return matrix(pc[0:q, :])
+def extract_pca_configuration_from_data(data, Variability = 0.9):
+""" Expects a k x n Matrix of n samples of k features and returns
+a matrix Kernel with the PCs and an array Features of the chosen features"""
+print "[II] Selecting salient features"
+features = pfa(data);
+data_n = data[features, :]
+#data_n = data
+print "[II] Selected %d features" % len(features)
+print "[II] Calculating principal components coefficients"
+coeff, score, latent = princomp(data_n)
+print "[II] Calculating the principal components"
+pc = coeff.T*data_n
+print "[II] Doing horn's Parallel Analysis and setting number of vectors"
+q = papca(data_n)
+print "[II] Selected %d PCs to include" % q
+if q < pc.shape[0]:
+pc[q+1:, :] = 0
+test_data = zeros((data.shape[0], data.shape[1]))
+test_data[features, :] = coeff*pc
+#test_data = coeff*pc
+# print test_data
+#  print ","
+# print data
+print "[II] PCA Reconstruction MSE is: %.4f" % mse(test_data[features,:], data_n)
+# Extract Kernel, number of PCs and array of features
+return  coeff, q, features
+if __name__=="__main__":
+import sys
+test = array([
+[1., 2, 3, 4],
+[5, 6, 7, 8],
+[9,10,11,12],
+[8, 9, 7, 6]
+])

Mercurial > hg > chourdakisreiss2016

comparison experiment-reverb/code/pca.py @ 0:246d5546657c