mi@0: #!/usr/bin/python mi@0: # mi@0: # Copyright (C) Christian Thurau, 2010. mi@0: # Licensed under the GNU General Public License (GPL). mi@0: # http://www.gnu.org/licenses/gpl.txt mi@0: """ mi@0: PyMF Non-negative Matrix Factorization. mi@0: mi@0: NMFALS: Class for Non-negative Matrix Factorization using alternating least mi@0: squares optimization (requires cvxopt) mi@0: mi@0: [1] Lee, D. D. and Seung, H. S. (1999), Learning the Parts of Objects by Non-negative mi@0: Matrix Factorization, Nature 401(6755), 788-799. mi@0: """ mi@0: mi@0: mi@0: mi@0: import numpy as np mi@0: from cvxopt import solvers, base mi@0: from nmf import NMF mi@0: mi@0: __all__ = ["NMFALS"] mi@0: mi@0: class NMFALS(NMF): mi@0: """ mi@0: NMF(data, num_bases=4) mi@0: mi@0: mi@0: Non-negative Matrix Factorization. Factorize a data matrix into two matrices mi@0: s.t. F = | data - W*H | = | is minimal. H, and W are restricted to non-negative mi@0: data. Uses the an alternating least squares procedure (quite slow for larger mi@0: data sets) mi@0: mi@0: Parameters mi@0: ---------- mi@0: data : array_like, shape (_data_dimension, _num_samples) mi@0: the input data mi@0: num_bases: int, optional mi@0: Number of bases to compute (column rank of W and row rank of H). mi@0: 4 (default) mi@0: mi@0: Attributes mi@0: ---------- mi@0: W : "data_dimension x num_bases" matrix of basis vectors mi@0: H : "num bases x num_samples" matrix of coefficients mi@0: ferr : frobenius norm (after calling .factorize()) mi@0: mi@0: Example mi@0: ------- mi@0: Applying NMF to some rather stupid data set: mi@0: mi@0: >>> import numpy as np mi@0: >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]]) mi@0: >>> nmf_mdl = NMFALS(data, num_bases=2) mi@0: >>> nmf_mdl.factorize(niter=10) mi@0: mi@0: The basis vectors are now stored in nmf_mdl.W, the coefficients in nmf_mdl.H. mi@0: To compute coefficients for an existing set of basis vectors simply copy W mi@0: to nmf_mdl.W, and set compute_w to False: mi@0: mi@0: >>> data = np.array([[1.5], [1.2]]) mi@0: >>> W = np.array([[1.0, 0.0], [0.0, 1.0]]) mi@0: >>> nmf_mdl = NMFALS(data, num_bases=2) mi@0: >>> nmf_mdl.W = W mi@0: >>> nmf_mdl.factorize(niter=1, compute_w=False) mi@0: mi@0: The result is a set of coefficients nmf_mdl.H, s.t. data = W * nmf_mdl.H. mi@0: """ mi@0: mi@0: def update_h(self): mi@0: def updatesingleH(i): mi@0: # optimize alpha using qp solver from cvxopt mi@0: FA = base.matrix(np.float64(np.dot(-self.W.T, self.data[:,i]))) mi@0: al = solvers.qp(HA, FA, INQa, INQb) mi@0: self.H[:,i] = np.array(al['x']).reshape((1,-1)) mi@0: mi@0: # float64 required for cvxopt mi@0: HA = base.matrix(np.float64(np.dot(self.W.T, self.W))) mi@0: INQa = base.matrix(-np.eye(self._num_bases)) mi@0: INQb = base.matrix(0.0, (self._num_bases,1)) mi@0: mi@0: map(updatesingleH, xrange(self._num_samples)) mi@0: mi@0: mi@0: def update_w(self): mi@0: def updatesingleW(i): mi@0: # optimize alpha using qp solver from cvxopt mi@0: FA = base.matrix(np.float64(np.dot(-self.H, self.data[i,:].T))) mi@0: al = solvers.qp(HA, FA, INQa, INQb) mi@0: self.W[i,:] = np.array(al['x']).reshape((1,-1)) mi@0: mi@0: # float64 required for cvxopt mi@0: HA = base.matrix(np.float64(np.dot(self.H, self.H.T))) mi@0: INQa = base.matrix(-np.eye(self._num_bases)) mi@0: INQb = base.matrix(0.0, (self._num_bases,1)) mi@0: mi@0: map(updatesingleW, xrange(self._data_dimension))