diff experiment-reverb/code/pfa.py @ 0:246d5546657c

initial commit, needs cleanup
author Emmanouil Theofanis Chourdakis <e.t.chourdakis@qmul.ac.uk>
date Wed, 14 Dec 2016 13:15:48 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/experiment-reverb/code/pfa.py	Wed Dec 14 13:15:48 2016 +0000
@@ -0,0 +1,261 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Apr 20 14:51:02 2015
+
+@author: Emmanouil Chourdakis
+
+Principal Factor Analisys
+
+"""
+
+from numpy import *
+from sklearn import cluster
+from sklearn.metrics import pairwise_distances
+
+mse = lambda A,B: ((array(A)-array(B)) ** 2).mean()
+
+L = matrix([
+            [2,4,5,5,3,2],
+            [2,3,4,5,4,3]
+    ])
+    
+
+K = matrix(array([[ 0.94241267, -1.63156463, -0.04654153,  0.1000316 ],
+       [-0.16114828, -0.70320328,  0.25243464, -0.10076863],
+       [ 1.10255852,  0.41740569,  1.14947174, -0.95764054],
+       [ 0.31158905, -0.37584351,  1.65328982,  0.1987012 ]]))
+       
+
+def testing():
+    L = matrix([
+            [2,4,5,5,3,2],
+            [2,3,4,5,4,3]
+    ])
+    
+    K = matrix(array([[ 0.94241267, -1.63156463, -0.04654153,  0.1000316 ],
+           [-0.16114828, -0.70320328,  0.25243464, -0.10076863],
+           [ 1.10255852,  0.41740569,  1.14947174, -0.95764054],
+           [ 0.31158905, -0.37584351,  1.65328982,  0.1987012 ]]))    
+    
+
+def kmeans(X, k):
+    # simulate matlab's kmeans using python's sklearn.cluster
+    k_means = cluster.KMeans(n_clusters=k)    
+    k_means.fit(X)
+    
+    C = k_means.cluster_centers_
+    idx = k_means.labels_
+    
+    
+    # Create inter-cluster distance sums
+    sumd = zeros((k,1))
+    for i in range(0, k):
+        S = X[idx == i, :] - C[i, :]
+        sumd[i] = sum(array(S)**2)
+        
+   # sumd = sum(array(S)**2)
+ 
+    D = matrix(pairwise_distances(X, C))
+    
+    return idx,C,sumd,D
+    
+       
+def princomp(A):
+ # Adapted from THE GLOWING PYTHON
+
+ if A.shape[1] == 1:
+     return 1, 0, 0
+     
+ A = matrix(A)
+ M = mean(A, axis=1)
+
+ Rxx = matrix(corrcoef(A - M, ddof=1))
+ #print A-M
+ latent,coeff = linalg.eig(Rxx)
+ p = size(coeff,axis=1)
+ idx = argsort(latent) 
+ idx = idx[::-1]       
+ 
+
+ coeff = coeff[:,idx]
+ latent = latent[idx]   # sorting eigenvalues
+ score =  matrix(coeff).T * (A-M)# projection of the data in the new space
+ return matrix(coeff),matrix(score).T,matrix(latent).T
+ 
+    
+
+def papca(X):
+    """ Horn's method for parallel PCA, returns nComp: the number of
+    principal compoents to use."""
+    
+    N = 1000
+    X = matrix(X)
+    coeff, score, lq = princomp(X)
+    
+    eigs = matrix(zeros((len(lq), N)))
+    
+    for k in range(1, N+1):
+        coeff, score, lrq = princomp(randn(X.shape[0], X.shape[1]))
+        eigs[:,k-1] = lrq
+        
+    lrqbar = mean(eigs, 1)
+    ladjq = lq - lrqbar + 1
+    
+    nComp =  sum(ladjq > 1)
+    
+    if nComp == 0:
+        return 1
+    else:
+        return nComp
+
+        
+        
+    
+            
+def pfa(X, Variability = 0.81):
+    
+    def choose_n(Lambda, variability = Variability):
+        """ Choose for variability """ 
+        sumL = sum(Lambda)
+        suml = 0
+        
+        for i in range(0, len(Lambda)):
+            suml += Lambda[i]
+            v = suml/sumL
+            if v >= variability:
+                return i+1
+        
+ #   print X
+    X = matrix(X)
+  #  print X
+    N = shape(X)[1]
+    
+    meanX = mean(X)
+    stdX = std(X, ddof=1)
+    
+    X = X - meanX
+    X = X / stdX
+    
+    
+    Sigma = corrcoef(X, ddof=1)
+    
+    Lambda, A = eig(Sigma)
+    
+    
+    q = choose_n(Lambda, Variability)
+
+    # Sort Eigenvalues
+
+    idx = argsort(Lambda)[::-1]    
+    
+    Lambda = Lambda[idx]
+    
+    A = matrix(A)
+    A = A[:,idx]
+    
+    Aq = A[:, 0:q]
+
+    p = q + 2
+    
+    p = min(Aq.shape[0], p)
+    
+    # Todo K _means
+    
+ 
+    labels, centers, sumd, D = kmeans(Aq, p)
+    
+    tokeep = []
+    
+    for i in range(0,p):
+        # Find vectors in clusters
+        vectors_idx = find(labels==i)
+        vectors = Aq[vectors_idx,:]
+        # Compute mean
+        vectors_mean = mean(vectors, 0)
+        
+        # Compute distance from mean
+        distances_from_mean = pairwise_distances(vectors, vectors_mean)
+        
+        # Get the vector with the minimum distance
+        sorted_idx = argsort(distances_from_mean.T)
+        closest_idx = vectors_idx[sorted_idx]
+        tokeep.append(int(closest_idx[0,0]))
+        
+        
+    
+    return sort(tokeep)
+
+
+def convert_to_pc(data, kernel, q, features):
+    """ Expects a PC kernel, the desired number
+    of PCs and an array of the kept features
+    and returns the transformed data projected
+    into the PC kernel space"""
+    
+    
+    data_n = data[features, :]
+    pc = kernel.T*data_n
+    
+    return matrix(pc[0:q, :])
+
+def extract_pca_configuration_from_data(data, Variability = 0.9):
+    """ Expects a k x n Matrix of n samples of k features and returns
+    a matrix Kernel with the PCs and an array Features of the chosen features"""
+    
+    print "[II] Selecting salient features"    
+    features = pfa(data);
+    data_n = data[features, :]
+    #data_n = data
+    
+
+    
+    print "[II] Selected %d features" % len(features)
+    
+    print "[II] Calculating principal components coefficients"
+    coeff, score, latent = princomp(data_n)
+    
+    print "[II] Calculating the principal components"
+    pc = coeff.T*data_n
+    
+    print "[II] Doing horn's Parallel Analysis and setting number of vectors"
+    q = papca(data_n)
+    
+    print "[II] Selected %d PCs to include" % q
+    
+    pc[q+1:, :] = 0
+    
+    test_data = zeros((data.shape[0], data.shape[1]))
+    test_data[features, :] = coeff*pc
+    
+    #test_data = coeff*pc
+
+    
+    print test_data
+    print ","
+    print data
+    
+    print "[II] PCA Reconstruction MSE is: %.4f" % mse(test_data, data)
+    
+    # Extract Kernel, number of PCs and array of features
+    return  coeff, q, features
+    
+    
+    
+    
+    
+    
+    
+
+if __name__=="__main__":
+    import sys
+    
+    test = array([
+                    [1., 2, 3, 4],
+                    [5, 6, 7, 8],
+                    [9,10,11,12],
+                    [8, 9, 7, 6]
+                 ])
+                 
+
+                     
+