mi@0: #!/usr/bin/python mi@0: # mi@0: # Copyright (C) Christian Thurau, 2010. mi@0: # Licensed under the GNU General Public License (GPL). mi@0: # http://www.gnu.org/licenses/gpl.txt mi@0: """ mi@0: PyMF Singular Value Decomposition. mi@0: mi@0: SVD : Class for Singular Value Decomposition mi@0: pinv() : Compute the pseudoinverse of a Matrix mi@0: mi@0: """ mi@0: mi@0: mi@0: mi@0: from numpy.linalg import eigh mi@0: import scipy.sparse mi@0: mi@0: try: mi@0: import scipy.sparse.linalg.eigen.arpack as linalg mi@0: except (ImportError, AttributeError): mi@0: import scipy.sparse.linalg as linalg mi@0: mi@0: mi@0: import numpy as np mi@0: mi@0: def pinv(A, k=-1, eps=10**-8): mi@0: # Compute Pseudoinverse of a matrix mi@0: # calculate SVD mi@0: svd_mdl = SVD(A, k=k) mi@0: svd_mdl.factorize() mi@0: mi@0: S = svd_mdl.S mi@0: Sdiag = S.diagonal() mi@0: Sdiag = np.where(Sdiag >eps, 1.0/Sdiag, 0.0) mi@0: mi@0: for i in range(S.shape[0]): mi@0: S[i,i] = Sdiag[i] mi@0: mi@0: if scipy.sparse.issparse(A): mi@0: A_p = svd_mdl.V.T * (S * svd_mdl.U.T) mi@0: else: mi@0: A_p = np.dot(svd_mdl.V.T, np.core.multiply(np.diag(S)[:,np.newaxis], svd_mdl.U.T)) mi@0: mi@0: return A_p mi@0: mi@0: mi@0: class SVD(): mi@0: """ mi@0: SVD(data, show_progress=False) mi@0: mi@0: mi@0: Singular Value Decomposition. Factorize a data matrix into three matrices s.t. mi@0: F = | data - USV| is minimal. U and V correspond to eigenvectors of the matrices mi@0: data*data.T and data.T*data. mi@0: mi@0: Parameters mi@0: ---------- mi@0: data : array_like [data_dimension x num_samples] mi@0: the input data mi@0: mi@0: Attributes mi@0: ---------- mi@0: U,S,V : submatrices s.t. data = USV mi@0: mi@0: Example mi@0: ------- mi@0: >>> import numpy as np mi@0: >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]]) mi@0: >>> svd_mdl = SVD(data, show_progress=False) mi@0: >>> svd_mdl.factorize() mi@0: """ mi@0: mi@0: _EPS=10**-8 mi@0: mi@0: def __init__(self, data, k=-1, rrank=0, crank=0): mi@0: self.data = data mi@0: (self._rows, self._cols) = self.data.shape mi@0: if rrank > 0: mi@0: self._rrank = rrank mi@0: else: mi@0: self._rrank = self._rows mi@0: mi@0: if crank > 0: mi@0: self._crank = crank mi@0: else: mi@0: self._crank = self._cols mi@0: mi@0: # set the rank to either rrank or crank mi@0: self._k = k mi@0: mi@0: def frobenius_norm(self): mi@0: """ Frobenius norm (||data - USV||) for a data matrix and a low rank mi@0: approximation given by SVH using rank k for U and V mi@0: mi@0: Returns: mi@0: frobenius norm: F = ||data - USV|| mi@0: """ mi@0: if scipy.sparse.issparse(self.data): mi@0: err = self.data - self.U*self.S*self.V mi@0: err = err.multiply(err) mi@0: err = np.sqrt(err.sum()) mi@0: else: mi@0: err = self.data[:,:] - np.dot(np.dot(self.U, self.S), self.V) mi@0: err = np.sqrt(np.sum(err**2)) mi@0: mi@0: return err mi@0: mi@0: mi@0: def factorize(self): mi@0: def _right_svd(): mi@0: AA = np.dot(self.data[:,:], self.data[:,:].T) mi@0: values, u_vectors = eigh(AA) mi@0: mi@0: # get rid of too low eigenvalues mi@0: u_vectors = u_vectors[:, values > self._EPS] mi@0: values = values[values > self._EPS] mi@0: mi@0: # sort eigenvectors according to largest value mi@0: idx = np.argsort(values) mi@0: values = values[idx[::-1]] mi@0: mi@0: # argsort sorts in ascending order -> access is backwards mi@0: self.U = u_vectors[:,idx[::-1]] mi@0: mi@0: # compute S mi@0: self.S = np.diag(np.sqrt(values)) mi@0: mi@0: # and the inverse of it mi@0: S_inv = np.diag(np.sqrt(values)**-1) mi@0: mi@0: # compute V from it mi@0: self.V = np.dot(S_inv, np.dot(self.U[:,:].T, self.data[:,:])) mi@0: mi@0: mi@0: def _left_svd(): mi@0: AA = np.dot(self.data[:,:].T, self.data[:,:]) mi@0: values, v_vectors = eigh(AA) mi@0: mi@0: # get rid of too low eigenvalues mi@0: v_vectors = v_vectors[:, values > self._EPS] mi@0: values = values[values > self._EPS] mi@0: mi@0: # sort eigenvectors according to largest value mi@0: # argsort sorts in ascending order -> access is backwards mi@0: idx = np.argsort(values)[::-1] mi@0: values = values[idx] mi@0: mi@0: # compute S mi@0: self.S= np.diag(np.sqrt(values)) mi@0: mi@0: # and the inverse of it mi@0: S_inv = np.diag(1.0/np.sqrt(values)) mi@0: mi@0: Vtmp = v_vectors[:,idx] mi@0: mi@0: self.U = np.dot(np.dot(self.data[:,:], Vtmp), S_inv) mi@0: self.V = Vtmp.T mi@0: mi@0: def _sparse_right_svd(): mi@0: ## for some reasons arpack does not allow computation of rank(A) eigenvectors (??) # mi@0: AA = self.data*self.data.transpose() mi@0: if self.data.shape[0] > 1: mi@0: # do not compute full rank if desired mi@0: if self._k > 0 and self._k < self.data.shape[0]-1: mi@0: k = self._k mi@0: else: mi@0: k = self.data.shape[0]-1 mi@0: mi@0: values, u_vectors = linalg.eigen_symmetric(AA,k=k) mi@0: else: mi@0: values, u_vectors = eigh(AA.todense()) mi@0: mi@0: # get rid of too low eigenvalues mi@0: u_vectors = u_vectors[:, values > self._EPS] mi@0: values = values[values > self._EPS] mi@0: mi@0: # sort eigenvectors according to largest value mi@0: idx = np.argsort(values) mi@0: values = values[idx[::-1]] mi@0: mi@0: # argsort sorts in ascending order -> access is backwards mi@0: self.U = scipy.sparse.csc_matrix(u_vectors[:,idx[::-1]]) mi@0: mi@0: # compute S mi@0: self.S = scipy.sparse.csc_matrix(np.diag(np.sqrt(values))) mi@0: mi@0: # and the inverse of it mi@0: S_inv = scipy.sparse.csc_matrix(np.diag(1.0/np.sqrt(values))) mi@0: mi@0: # compute V from it mi@0: self.V = self.U.transpose() * self.data mi@0: self.V = S_inv * self.V mi@0: mi@0: def _sparse_left_svd(): mi@0: # for some reasons arpack does not allow computation of rank(A) eigenvectors (??) mi@0: AA = self.data.transpose()*self.data mi@0: mi@0: if self.data.shape[1] > 1: mi@0: # do not compute full rank if desired mi@0: if self._k > 0 and self._k < self.data.shape[1]-1: mi@0: k = self._k mi@0: else: mi@0: k = self.data.shape[1]-1 mi@0: values, v_vectors = linalg.eigen_symmetric(AA,k=k) mi@0: else: mi@0: values, v_vectors = eigh(AA.todense()) mi@0: # get rid of too low eigenvalues mi@0: v_vectors = v_vectors[:, values > self._EPS] mi@0: values = values[values > self._EPS] mi@0: mi@0: # sort eigenvectors according to largest value mi@0: idx = np.argsort(values) mi@0: values = values[idx[::-1]] mi@0: mi@0: # argsort sorts in ascending order -> access is backwards mi@0: self.V = scipy.sparse.csc_matrix(v_vectors[:,idx[::-1]]) mi@0: mi@0: # compute S mi@0: self.S = scipy.sparse.csc_matrix(np.diag(np.sqrt(values))) mi@0: mi@0: # and the inverse of it mi@0: S_inv = scipy.sparse.csc_matrix(np.diag(1.0/np.sqrt(values))) mi@0: mi@0: self.U = self.data * self.V * S_inv mi@0: self.V = self.V.transpose() mi@0: mi@0: mi@0: if self._rows > self._cols: mi@0: if scipy.sparse.issparse(self.data): mi@0: _sparse_left_svd() mi@0: else: mi@0: _left_svd() mi@0: else: mi@0: if scipy.sparse.issparse(self.data): mi@0: _sparse_right_svd() mi@0: else: mi@0: _right_svd() mi@0: mi@0: if __name__ == "__main__": mi@0: import doctest mi@0: doctest.testmod()