segmentation: pymf/greedycur.py annotate

annotate pymf/greedycur.py @ 19:890cfe424f4a tip

added annotations

author	mitian
date	Fri, 11 Dec 2015 09:47:40 +0000
parents	26838b1f560f
children

rev	line source
mi@0	1 #!/usr/bin/python2.6
mi@0	2 #
mi@0	3 # Copyright (C) Christian Thurau, 2010.
mi@0	4 # Licensed under the GNU General Public License (GPL).
mi@0	5 # http://www.gnu.org/licenses/gpl.txt
mi@0	6 #$Id$
mi@0	7 """
mi@0	8 PyMF CUR-like Sparse Column Based Matrix Reconstruction via Greedy Approximation[1]
mi@0	9
mi@0	10 GREEDYCUR: class for CUR-like decompositions using the GREEDY[2] algorithm.
mi@0	11
mi@0	12 [1] Drineas, P., Kannan, R. and Mahoney, M. (2006), 'Fast Monte Carlo Algorithms III:
mi@0	13 Computing a Compressed Approixmate Matrix Decomposition', SIAM J. Computing 36(1), 184-206.
mi@0	14 [2] Ali Civril, Malik Magdon-Ismail. Deterministic Sparse Column Based Matrix
mi@0	15 Reconstruction via Greedy Approximation of SVD. ISAAC'2008.
mi@0	16 """
mi@0	17
mi@0	18
mi@0	19 import numpy as np
mi@0	20 from greedy import GREEDY
mi@0	21 from cur import CUR
mi@0	22
mi@0	23 __all__ = ["GREEDYCUR"]
mi@0	24
mi@0	25 class GREEDYCUR(CUR):
mi@0	26 '''
mi@0	27 GREEDYCUR(data, data, k=-1, rrank=0, crank=0)
mi@0	28
mi@0	29 GREEDY-CUR Decomposition. Factorize a data matrix into three matrices s.t.
mi@0	30 F = \| data - USV\| is minimal. Unlike CUR, GREEDYCUR selects the rows
mi@0	31 and columns using GREEDY, i.e. it tries to find rows/columns that are close
mi@0	32 to SVD-based solutions.
mi@0	33
mi@0	34 Parameters
mi@0	35 ----------
mi@0	36 data : array_like [data_dimension x num_samples]
mi@0	37 the input data
mi@0	38 rrank: int, optional
mi@0	39 Number of rows to sample from data.
mi@0	40 4 (default)
mi@0	41 crank: int, optional
mi@0	42 Number of columns to sample from data.
mi@0	43 4 (default)
mi@0	44 show_progress: bool, optional
mi@0	45 Print some extra information
mi@0	46 False (default)
mi@0	47
mi@0	48 Attributes
mi@0	49 ----------
mi@0	50 U,S,V : submatrices s.t. data = USV
mi@0	51
mi@0	52 Example
mi@0	53 -------
mi@0	54 >>> import numpy as np
mi@0	55 >>> from greedycur import GREEDYCUR
mi@0	56 >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]])
mi@0	57 >>> cur_mdl = GREEDYCUR(data, show_progress=False, rrank=1, crank=2)
mi@0	58 >>> cur_mdl.factorize()
mi@0	59 """
mi@0	60 '''
mi@0	61
mi@0	62 def sample(self, A, c):
mi@0	63 # set k to a value lower than the number of bases, usually
mi@0	64 # gives better results.
mi@0	65 k = np.round(c - c/5.0)
mi@0	66 greedy_mdl = GREEDY(A, k=k, num_bases=c)
mi@0	67 greedy_mdl.factorize(compute_h=False, compute_err=False, niter=1)
mi@0	68 return greedy_mdl.select
mi@0	69
mi@0	70
mi@0	71 def factorize(self):
mi@0	72 # sample row and column indices that maximize the volume of the submatrix
mi@0	73 self._rid = self.sample(self.data.transpose(), self._rrank)
mi@0	74 self._cid = self.sample(self.data, self._crank)
mi@0	75 self._rcnt = np.ones(len(self._rid))
mi@0	76 self._ccnt = np.ones(len(self._cid))
mi@0	77
mi@0	78 self.computeUCR()
mi@0	79
mi@0	80
mi@0	81 if __name__ == "__main__":
mi@0	82 import doctest
mi@0	83 doctest.testmod()

Mercurial > hg > segmentation

annotate pymf/greedycur.py @ 19:890cfe424f4a tip