mi@0
|
1 #!/usr/bin/python2.6
|
mi@0
|
2 #
|
mi@0
|
3 # Copyright (C) Christian Thurau, 2010.
|
mi@0
|
4 # Licensed under the GNU General Public License (GPL).
|
mi@0
|
5 # http://www.gnu.org/licenses/gpl.txt
|
mi@0
|
6 #$Id$
|
mi@0
|
7 """
|
mi@0
|
8 PyMF CUR-like Sparse Column Based Matrix Reconstruction via Greedy Approximation[1]
|
mi@0
|
9
|
mi@0
|
10 GREEDYCUR: class for CUR-like decompositions using the GREEDY[2] algorithm.
|
mi@0
|
11
|
mi@0
|
12 [1] Drineas, P., Kannan, R. and Mahoney, M. (2006), 'Fast Monte Carlo Algorithms III:
|
mi@0
|
13 Computing a Compressed Approixmate Matrix Decomposition', SIAM J. Computing 36(1), 184-206.
|
mi@0
|
14 [2] Ali Civril, Malik Magdon-Ismail. Deterministic Sparse Column Based Matrix
|
mi@0
|
15 Reconstruction via Greedy Approximation of SVD. ISAAC'2008.
|
mi@0
|
16 """
|
mi@0
|
17
|
mi@0
|
18
|
mi@0
|
19 import numpy as np
|
mi@0
|
20 from greedy import GREEDY
|
mi@0
|
21 from cur import CUR
|
mi@0
|
22
|
mi@0
|
23 __all__ = ["GREEDYCUR"]
|
mi@0
|
24
|
mi@0
|
25 class GREEDYCUR(CUR):
|
mi@0
|
26 '''
|
mi@0
|
27 GREEDYCUR(data, data, k=-1, rrank=0, crank=0)
|
mi@0
|
28
|
mi@0
|
29 GREEDY-CUR Decomposition. Factorize a data matrix into three matrices s.t.
|
mi@0
|
30 F = | data - USV| is minimal. Unlike CUR, GREEDYCUR selects the rows
|
mi@0
|
31 and columns using GREEDY, i.e. it tries to find rows/columns that are close
|
mi@0
|
32 to SVD-based solutions.
|
mi@0
|
33
|
mi@0
|
34 Parameters
|
mi@0
|
35 ----------
|
mi@0
|
36 data : array_like [data_dimension x num_samples]
|
mi@0
|
37 the input data
|
mi@0
|
38 rrank: int, optional
|
mi@0
|
39 Number of rows to sample from data.
|
mi@0
|
40 4 (default)
|
mi@0
|
41 crank: int, optional
|
mi@0
|
42 Number of columns to sample from data.
|
mi@0
|
43 4 (default)
|
mi@0
|
44 show_progress: bool, optional
|
mi@0
|
45 Print some extra information
|
mi@0
|
46 False (default)
|
mi@0
|
47
|
mi@0
|
48 Attributes
|
mi@0
|
49 ----------
|
mi@0
|
50 U,S,V : submatrices s.t. data = USV
|
mi@0
|
51
|
mi@0
|
52 Example
|
mi@0
|
53 -------
|
mi@0
|
54 >>> import numpy as np
|
mi@0
|
55 >>> from greedycur import GREEDYCUR
|
mi@0
|
56 >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]])
|
mi@0
|
57 >>> cur_mdl = GREEDYCUR(data, show_progress=False, rrank=1, crank=2)
|
mi@0
|
58 >>> cur_mdl.factorize()
|
mi@0
|
59 """
|
mi@0
|
60 '''
|
mi@0
|
61
|
mi@0
|
62 def sample(self, A, c):
|
mi@0
|
63 # set k to a value lower than the number of bases, usually
|
mi@0
|
64 # gives better results.
|
mi@0
|
65 k = np.round(c - c/5.0)
|
mi@0
|
66 greedy_mdl = GREEDY(A, k=k, num_bases=c)
|
mi@0
|
67 greedy_mdl.factorize(compute_h=False, compute_err=False, niter=1)
|
mi@0
|
68 return greedy_mdl.select
|
mi@0
|
69
|
mi@0
|
70
|
mi@0
|
71 def factorize(self):
|
mi@0
|
72 # sample row and column indices that maximize the volume of the submatrix
|
mi@0
|
73 self._rid = self.sample(self.data.transpose(), self._rrank)
|
mi@0
|
74 self._cid = self.sample(self.data, self._crank)
|
mi@0
|
75 self._rcnt = np.ones(len(self._rid))
|
mi@0
|
76 self._ccnt = np.ones(len(self._cid))
|
mi@0
|
77
|
mi@0
|
78 self.computeUCR()
|
mi@0
|
79
|
mi@0
|
80
|
mi@0
|
81 if __name__ == "__main__":
|
mi@0
|
82 import doctest
|
mi@0
|
83 doctest.testmod()
|