mi@0: #!/usr/bin/python2.6 mi@0: # mi@0: # Copyright (C) Christian Thurau, 2010. mi@0: # Licensed under the GNU General Public License (GPL). mi@0: # http://www.gnu.org/licenses/gpl.txt mi@0: #$Id$ mi@0: """ mi@0: PyMF CUR-like Sparse Column Based Matrix Reconstruction via Greedy Approximation[1] mi@0: mi@0: GREEDYCUR: class for CUR-like decompositions using the GREEDY[2] algorithm. mi@0: mi@0: [1] Drineas, P., Kannan, R. and Mahoney, M. (2006), 'Fast Monte Carlo Algorithms III: mi@0: Computing a Compressed Approixmate Matrix Decomposition', SIAM J. Computing 36(1), 184-206. mi@0: [2] Ali Civril, Malik Magdon-Ismail. Deterministic Sparse Column Based Matrix mi@0: Reconstruction via Greedy Approximation of SVD. ISAAC'2008. mi@0: """ mi@0: mi@0: mi@0: import numpy as np mi@0: from greedy import GREEDY mi@0: from cur import CUR mi@0: mi@0: __all__ = ["GREEDYCUR"] mi@0: mi@0: class GREEDYCUR(CUR): mi@0: ''' mi@0: GREEDYCUR(data, data, k=-1, rrank=0, crank=0) mi@0: mi@0: GREEDY-CUR Decomposition. Factorize a data matrix into three matrices s.t. mi@0: F = | data - USV| is minimal. Unlike CUR, GREEDYCUR selects the rows mi@0: and columns using GREEDY, i.e. it tries to find rows/columns that are close mi@0: to SVD-based solutions. mi@0: mi@0: Parameters mi@0: ---------- mi@0: data : array_like [data_dimension x num_samples] mi@0: the input data mi@0: rrank: int, optional mi@0: Number of rows to sample from data. mi@0: 4 (default) mi@0: crank: int, optional mi@0: Number of columns to sample from data. mi@0: 4 (default) mi@0: show_progress: bool, optional mi@0: Print some extra information mi@0: False (default) mi@0: mi@0: Attributes mi@0: ---------- mi@0: U,S,V : submatrices s.t. data = USV mi@0: mi@0: Example mi@0: ------- mi@0: >>> import numpy as np mi@0: >>> from greedycur import GREEDYCUR mi@0: >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]]) mi@0: >>> cur_mdl = GREEDYCUR(data, show_progress=False, rrank=1, crank=2) mi@0: >>> cur_mdl.factorize() mi@0: """ mi@0: ''' mi@0: mi@0: def sample(self, A, c): mi@0: # set k to a value lower than the number of bases, usually mi@0: # gives better results. mi@0: k = np.round(c - c/5.0) mi@0: greedy_mdl = GREEDY(A, k=k, num_bases=c) mi@0: greedy_mdl.factorize(compute_h=False, compute_err=False, niter=1) mi@0: return greedy_mdl.select mi@0: mi@0: mi@0: def factorize(self): mi@0: # sample row and column indices that maximize the volume of the submatrix mi@0: self._rid = self.sample(self.data.transpose(), self._rrank) mi@0: self._cid = self.sample(self.data, self._crank) mi@0: self._rcnt = np.ones(len(self._rid)) mi@0: self._ccnt = np.ones(len(self._cid)) mi@0: mi@0: self.computeUCR() mi@0: mi@0: mi@0: if __name__ == "__main__": mi@0: import doctest mi@0: doctest.testmod()