annotate experiment-reverb/code/pfa.py @ 2:c87a9505f294 tip

Added LICENSE for code, removed .wav files
author Emmanouil Theofanis Chourdakis <e.t.chourdakis@qmul.ac.uk>
date Sat, 30 Sep 2017 13:25:50 +0100
parents 246d5546657c
children
rev   line source
e@0 1 # -*- coding: utf-8 -*-
e@0 2 """
e@0 3 Created on Mon Apr 20 14:51:02 2015
e@0 4
e@0 5 @author: Emmanouil Chourdakis
e@0 6
e@0 7 Principal Factor Analisys
e@0 8
e@0 9 """
e@0 10
e@0 11 from numpy import *
e@0 12 from sklearn import cluster
e@0 13 from sklearn.metrics import pairwise_distances
e@0 14
e@0 15 mse = lambda A,B: ((array(A)-array(B)) ** 2).mean()
e@0 16
e@0 17 L = matrix([
e@0 18 [2,4,5,5,3,2],
e@0 19 [2,3,4,5,4,3]
e@0 20 ])
e@0 21
e@0 22
e@0 23 K = matrix(array([[ 0.94241267, -1.63156463, -0.04654153, 0.1000316 ],
e@0 24 [-0.16114828, -0.70320328, 0.25243464, -0.10076863],
e@0 25 [ 1.10255852, 0.41740569, 1.14947174, -0.95764054],
e@0 26 [ 0.31158905, -0.37584351, 1.65328982, 0.1987012 ]]))
e@0 27
e@0 28
e@0 29 def testing():
e@0 30 L = matrix([
e@0 31 [2,4,5,5,3,2],
e@0 32 [2,3,4,5,4,3]
e@0 33 ])
e@0 34
e@0 35 K = matrix(array([[ 0.94241267, -1.63156463, -0.04654153, 0.1000316 ],
e@0 36 [-0.16114828, -0.70320328, 0.25243464, -0.10076863],
e@0 37 [ 1.10255852, 0.41740569, 1.14947174, -0.95764054],
e@0 38 [ 0.31158905, -0.37584351, 1.65328982, 0.1987012 ]]))
e@0 39
e@0 40
e@0 41 def kmeans(X, k):
e@0 42 # simulate matlab's kmeans using python's sklearn.cluster
e@0 43 k_means = cluster.KMeans(n_clusters=k)
e@0 44 k_means.fit(X)
e@0 45
e@0 46 C = k_means.cluster_centers_
e@0 47 idx = k_means.labels_
e@0 48
e@0 49
e@0 50 # Create inter-cluster distance sums
e@0 51 sumd = zeros((k,1))
e@0 52 for i in range(0, k):
e@0 53 S = X[idx == i, :] - C[i, :]
e@0 54 sumd[i] = sum(array(S)**2)
e@0 55
e@0 56 # sumd = sum(array(S)**2)
e@0 57
e@0 58 D = matrix(pairwise_distances(X, C))
e@0 59
e@0 60 return idx,C,sumd,D
e@0 61
e@0 62
e@0 63 def princomp(A):
e@0 64 # Adapted from THE GLOWING PYTHON
e@0 65
e@0 66 if A.shape[1] == 1:
e@0 67 return 1, 0, 0
e@0 68
e@0 69 A = matrix(A)
e@0 70 M = mean(A, axis=1)
e@0 71
e@0 72 Rxx = matrix(corrcoef(A - M, ddof=1))
e@0 73 #print A-M
e@0 74 latent,coeff = linalg.eig(Rxx)
e@0 75 p = size(coeff,axis=1)
e@0 76 idx = argsort(latent)
e@0 77 idx = idx[::-1]
e@0 78
e@0 79
e@0 80 coeff = coeff[:,idx]
e@0 81 latent = latent[idx] # sorting eigenvalues
e@0 82 score = matrix(coeff).T * (A-M)# projection of the data in the new space
e@0 83 return matrix(coeff),matrix(score).T,matrix(latent).T
e@0 84
e@0 85
e@0 86
e@0 87 def papca(X):
e@0 88 """ Horn's method for parallel PCA, returns nComp: the number of
e@0 89 principal compoents to use."""
e@0 90
e@0 91 N = 1000
e@0 92 X = matrix(X)
e@0 93 coeff, score, lq = princomp(X)
e@0 94
e@0 95 eigs = matrix(zeros((len(lq), N)))
e@0 96
e@0 97 for k in range(1, N+1):
e@0 98 coeff, score, lrq = princomp(randn(X.shape[0], X.shape[1]))
e@0 99 eigs[:,k-1] = lrq
e@0 100
e@0 101 lrqbar = mean(eigs, 1)
e@0 102 ladjq = lq - lrqbar + 1
e@0 103
e@0 104 nComp = sum(ladjq > 1)
e@0 105
e@0 106 if nComp == 0:
e@0 107 return 1
e@0 108 else:
e@0 109 return nComp
e@0 110
e@0 111
e@0 112
e@0 113
e@0 114
e@0 115 def pfa(X, Variability = 0.81):
e@0 116
e@0 117 def choose_n(Lambda, variability = Variability):
e@0 118 """ Choose for variability """
e@0 119 sumL = sum(Lambda)
e@0 120 suml = 0
e@0 121
e@0 122 for i in range(0, len(Lambda)):
e@0 123 suml += Lambda[i]
e@0 124 v = suml/sumL
e@0 125 if v >= variability:
e@0 126 return i+1
e@0 127
e@0 128 # print X
e@0 129 X = matrix(X)
e@0 130 # print X
e@0 131 N = shape(X)[1]
e@0 132
e@0 133 meanX = mean(X)
e@0 134 stdX = std(X, ddof=1)
e@0 135
e@0 136 X = X - meanX
e@0 137 X = X / stdX
e@0 138
e@0 139
e@0 140 Sigma = corrcoef(X, ddof=1)
e@0 141
e@0 142 Lambda, A = eig(Sigma)
e@0 143
e@0 144
e@0 145 q = choose_n(Lambda, Variability)
e@0 146
e@0 147 # Sort Eigenvalues
e@0 148
e@0 149 idx = argsort(Lambda)[::-1]
e@0 150
e@0 151 Lambda = Lambda[idx]
e@0 152
e@0 153 A = matrix(A)
e@0 154 A = A[:,idx]
e@0 155
e@0 156 Aq = A[:, 0:q]
e@0 157
e@0 158 p = q + 2
e@0 159
e@0 160 p = min(Aq.shape[0], p)
e@0 161
e@0 162 # Todo K _means
e@0 163
e@0 164
e@0 165 labels, centers, sumd, D = kmeans(Aq, p)
e@0 166
e@0 167 tokeep = []
e@0 168
e@0 169 for i in range(0,p):
e@0 170 # Find vectors in clusters
e@0 171 vectors_idx = find(labels==i)
e@0 172 vectors = Aq[vectors_idx,:]
e@0 173 # Compute mean
e@0 174 vectors_mean = mean(vectors, 0)
e@0 175
e@0 176 # Compute distance from mean
e@0 177 distances_from_mean = pairwise_distances(vectors, vectors_mean)
e@0 178
e@0 179 # Get the vector with the minimum distance
e@0 180 sorted_idx = argsort(distances_from_mean.T)
e@0 181 closest_idx = vectors_idx[sorted_idx]
e@0 182 tokeep.append(int(closest_idx[0,0]))
e@0 183
e@0 184
e@0 185
e@0 186 return sort(tokeep)
e@0 187
e@0 188
e@0 189 def convert_to_pc(data, kernel, q, features):
e@0 190 """ Expects a PC kernel, the desired number
e@0 191 of PCs and an array of the kept features
e@0 192 and returns the transformed data projected
e@0 193 into the PC kernel space"""
e@0 194
e@0 195
e@0 196 data_n = data[features, :]
e@0 197 pc = kernel.T*data_n
e@0 198
e@0 199 return matrix(pc[0:q, :])
e@0 200
e@0 201 def extract_pca_configuration_from_data(data, Variability = 0.9):
e@0 202 """ Expects a k x n Matrix of n samples of k features and returns
e@0 203 a matrix Kernel with the PCs and an array Features of the chosen features"""
e@0 204
e@0 205 print "[II] Selecting salient features"
e@0 206 features = pfa(data);
e@0 207 data_n = data[features, :]
e@0 208 #data_n = data
e@0 209
e@0 210
e@0 211
e@0 212 print "[II] Selected %d features" % len(features)
e@0 213
e@0 214 print "[II] Calculating principal components coefficients"
e@0 215 coeff, score, latent = princomp(data_n)
e@0 216
e@0 217 print "[II] Calculating the principal components"
e@0 218 pc = coeff.T*data_n
e@0 219
e@0 220 print "[II] Doing horn's Parallel Analysis and setting number of vectors"
e@0 221 q = papca(data_n)
e@0 222
e@0 223 print "[II] Selected %d PCs to include" % q
e@0 224
e@0 225 pc[q+1:, :] = 0
e@0 226
e@0 227 test_data = zeros((data.shape[0], data.shape[1]))
e@0 228 test_data[features, :] = coeff*pc
e@0 229
e@0 230 #test_data = coeff*pc
e@0 231
e@0 232
e@0 233 print test_data
e@0 234 print ","
e@0 235 print data
e@0 236
e@0 237 print "[II] PCA Reconstruction MSE is: %.4f" % mse(test_data, data)
e@0 238
e@0 239 # Extract Kernel, number of PCs and array of features
e@0 240 return coeff, q, features
e@0 241
e@0 242
e@0 243
e@0 244
e@0 245
e@0 246
e@0 247
e@0 248
e@0 249 if __name__=="__main__":
e@0 250 import sys
e@0 251
e@0 252 test = array([
e@0 253 [1., 2, 3, 4],
e@0 254 [5, 6, 7, 8],
e@0 255 [9,10,11,12],
e@0 256 [8, 9, 7, 6]
e@0 257 ])
e@0 258
e@0 259
e@0 260
e@0 261