mi@0: #!/usr/bin/python2.6
mi@0: #
mi@0: # Copyright (C) Christian Thurau, 2010. 
mi@0: # Licensed under the GNU General Public License (GPL). 
mi@0: # http://www.gnu.org/licenses/gpl.txt
mi@0: #$Id$
mi@0: """  
mi@0: PyMF CUR-like Sparse Column Based Matrix Reconstruction via Greedy Approximation[1]
mi@0: 
mi@0: 	GREEDYCUR: class for CUR-like decompositions using the GREEDY[2] algorithm.
mi@0: 
mi@0: [1] Drineas, P., Kannan, R. and Mahoney, M. (2006), 'Fast Monte Carlo Algorithms III: 
mi@0: Computing a Compressed Approixmate Matrix Decomposition', SIAM J. Computing 36(1), 184-206.
mi@0: [2] Ali Civril, Malik Magdon-Ismail. Deterministic Sparse Column Based Matrix
mi@0: Reconstruction via Greedy Approximation of SVD. ISAAC'2008.
mi@0: """
mi@0: 
mi@0: 
mi@0: import numpy as np
mi@0: from greedy import GREEDY
mi@0: from cur import CUR
mi@0: 
mi@0: __all__ = ["GREEDYCUR"]
mi@0: 
mi@0: class GREEDYCUR(CUR):
mi@0:     '''
mi@0:     GREEDYCUR(data,  data, k=-1, rrank=0, crank=0)
mi@0: 
mi@0:     GREEDY-CUR Decomposition. Factorize a data matrix into three matrices s.t. 
mi@0:     F = | data - USV| is minimal. Unlike CUR, GREEDYCUR selects the rows 
mi@0:     and columns using GREEDY, i.e. it tries to find rows/columns that are close
mi@0:     to SVD-based solutions.
mi@0: 
mi@0:     Parameters
mi@0:     ----------
mi@0:     data : array_like [data_dimension x num_samples]
mi@0:         the input data
mi@0:     rrank: int, optional 
mi@0:         Number of rows to sample from data.
mi@0:         4 (default)
mi@0:     crank: int, optional
mi@0:         Number of columns to sample from data.
mi@0:         4 (default)
mi@0:     show_progress: bool, optional
mi@0:         Print some extra information
mi@0:         False (default)    
mi@0:     
mi@0:     Attributes
mi@0:     ----------
mi@0:         U,S,V : submatrices s.t. data = USV        
mi@0:     
mi@0:     Example
mi@0:     -------
mi@0:     >>> import numpy as np
mi@0:     >>> from greedycur import GREEDYCUR
mi@0:     >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]])
mi@0:     >>> cur_mdl = GREEDYCUR(data, show_progress=False, rrank=1, crank=2)    
mi@0:     >>> cur_mdl.factorize()
mi@0:     """
mi@0:     '''
mi@0: 
mi@0:     def sample(self, A, c):
mi@0:         # set k to a value lower than the number of bases, usually
mi@0:         # gives better results.
mi@0:         k = np.round(c - c/5.0)
mi@0:         greedy_mdl = GREEDY(A, k=k, num_bases=c)
mi@0:         greedy_mdl.factorize(compute_h=False, compute_err=False, niter=1)        
mi@0:         return greedy_mdl.select
mi@0:             
mi@0:             
mi@0:     def factorize(self):
mi@0:         # sample row and column indices that maximize the volume of the submatrix
mi@0:         self._rid = self.sample(self.data.transpose(), self._rrank)
mi@0:         self._cid = self.sample(self.data, self._crank)
mi@0:         self._rcnt = np.ones(len(self._rid))
mi@0:         self._ccnt = np.ones(len(self._cid))
mi@0:                                     
mi@0:         self.computeUCR()
mi@0: 
mi@0: 
mi@0: if __name__ == "__main__":
mi@0:     import doctest  
mi@0:     doctest.testmod()