mi@0: #!/usr/bin/python mi@0: # mi@0: # Copyright (C) Christian Thurau, 2010. mi@0: # Licensed under the GNU General Public License (GPL). mi@0: # http://www.gnu.org/licenses/gpl.txt mi@0: """ mi@0: PyMF Non-negative Matrix Factorization. mi@0: mi@0: NMF: Class for Non-negative Matrix Factorization mi@0: mi@0: [1] Lee, D. D. and Seung, H. S. (1999), Learning the Parts of Objects by Non-negative mi@0: Matrix Factorization, Nature 401(6755), 788-799. mi@0: """ mi@0: mi@0: mi@0: import numpy as np mi@0: import logging mi@0: import logging.config mi@0: import scipy.sparse mi@0: mi@0: from nmf import NMF mi@0: mi@0: __all__ = ["RNMF"] mi@0: mi@0: class RNMF(NMF): mi@0: """ mi@0: RNMF(data, num_bases=4) mi@0: mi@0: mi@0: Non-negative Matrix Factorization. Factorize a data matrix into two matrices mi@0: s.t. F = | data - W*H | = | is minimal. H, and W are restricted to non-negative mi@0: data. Uses the classicial multiplicative update rule. mi@0: mi@0: Parameters mi@0: ---------- mi@0: data : array_like, shape (_data_dimension, _num_samples) mi@0: the input data mi@0: num_bases: int, optional mi@0: Number of bases to compute (column rank of W and row rank of H). mi@0: 4 (default) mi@0: mi@0: Attributes mi@0: ---------- mi@0: W : "data_dimension x num_bases" matrix of basis vectors mi@0: H : "num bases x num_samples" matrix of coefficients mi@0: ferr : frobenius norm (after calling .factorize()) mi@0: mi@0: Example mi@0: ------- mi@0: Applying NMF to some rather stupid data set: mi@0: mi@0: >>> import numpy as np mi@0: >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]]) mi@0: >>> nmf_mdl = NMF(data, num_bases=2, niter=10) mi@0: >>> nmf_mdl.factorize() mi@0: mi@0: The basis vectors are now stored in nmf_mdl.W, the coefficients in nmf_mdl.H. mi@0: To compute coefficients for an existing set of basis vectors simply copy W mi@0: to nmf_mdl.W, and set compute_w to False: mi@0: mi@0: >>> data = np.array([[1.5], [1.2]]) mi@0: >>> W = np.array([[1.0, 0.0], [0.0, 1.0]]) mi@0: >>> nmf_mdl = NMF(data, num_bases=2) mi@0: >>> nmf_mdl.W = W mi@0: >>> nmf_mdl.factorize(niter=20, compute_w=False) mi@0: mi@0: The result is a set of coefficients nmf_mdl.H, s.t. data = W * nmf_mdl.H. mi@0: """ mi@0: mi@0: def __init__(self, data, num_bases=4, lamb=2.0): mi@0: # call inherited method mi@0: NMF.__init__(self, data, num_bases=num_bases) mi@0: self._lamb = lamb mi@0: mi@0: def soft_thresholding(self, X, lamb): mi@0: X = np.where(np.abs(X) <= lamb, 0.0, X) mi@0: X = np.where(X > lamb, X - lamb, X) mi@0: X = np.where(X < -1.0*lamb, X + lamb, X) mi@0: return X mi@0: mi@0: def init_w(self): mi@0: self.W = np.random.random((self._data_dimension, self._num_bases)) mi@0: mi@0: def init_h(self): mi@0: self.H = np.random.random((self._num_bases, self._num_samples)) mi@0: self.H[:,:] = 1.0 mi@0: # normalized bases mi@0: Wnorm = np.sqrt(np.sum(self.W**2.0, axis=0)) mi@0: self.W /= Wnorm mi@0: mi@0: for i in range(self.H.shape[0]): mi@0: self.H[i,:] *= Wnorm[i] mi@0: mi@0: self.update_s() mi@0: mi@0: def update_s(self): mi@0: self.S = self.data - np.dot(self.W, self.H) mi@0: self.S = self.soft_thresholding(self.S, self._lamb) mi@0: mi@0: def update_h(self): mi@0: # pre init H1, and H2 (necessary for storing matrices on disk) mi@0: H1 = np.dot(self.W.T, self.S - self.data) mi@0: H1 = np.abs(H1) - H1 mi@0: H1 /= (2.0* np.dot(self.W.T, np.dot(self.W, self.H))) mi@0: self.H *= H1 mi@0: mi@0: # adapt S mi@0: self.update_s() mi@0: mi@0: def update_w(self): mi@0: # pre init W1, and W2 (necessary for storing matrices on disk) mi@0: W1 = np.dot(self.S - self.data, self.H.T) mi@0: #W1 = np.dot(self.data - self.S, self.H.T) mi@0: W1 = np.abs(W1) - W1 mi@0: W1 /= (2.0 * (np.dot(self.W, np.dot(self.H, self.H.T)))) mi@0: self.W *= W1 mi@0: mi@0: if __name__ == "__main__": mi@0: import doctest mi@0: doctest.testmod()