mi@0: #!/usr/bin/python mi@0: # mi@0: # Copyright (C) Christian Thurau, 2010. mi@0: # Licensed under the GNU General Public License (GPL). mi@0: # http://www.gnu.org/licenses/gpl.txt mi@0: """ mi@0: PyMF Binary Matrix Factorization [1] mi@0: mi@0: BNMF(NMF) : Class for binary matrix factorization mi@0: mi@0: [1]Z. Zhang, T. Li, C. H. Q. Ding, X. Zhang: Binary Matrix Factorization with mi@0: Applications. ICDM 2007 mi@0: """ mi@0: mi@0: mi@0: import numpy as np mi@0: from nmf import NMF mi@0: mi@0: __all__ = ["BNMF"] mi@0: mi@0: class BNMF(NMF): mi@0: """ mi@0: BNMF(data, data, num_bases=4) mi@0: Binary Matrix Factorization. Factorize a data matrix into two matrices s.t. mi@0: F = | data - W*H | is minimal. H and W are restricted to binary values. mi@0: mi@0: Parameters mi@0: ---------- mi@0: data : array_like, shape (_data_dimension, _num_samples) mi@0: the input data mi@0: num_bases: int, optional mi@0: Number of bases to compute (column rank of W and row rank of H). mi@0: 4 (default) mi@0: mi@0: Attributes mi@0: ---------- mi@0: W : "data_dimension x num_bases" matrix of basis vectors mi@0: H : "num bases x num_samples" matrix of coefficients mi@0: ferr : frobenius norm (after calling .factorize()) mi@0: mi@0: Example mi@0: ------- mi@0: Applying BNMF to some rather stupid data set: mi@0: mi@0: >>> import numpy as np mi@0: >>> from bnmf import BNMF mi@0: >>> data = np.array([[1.0, 0.0, 1.0], [0.0, 1.0, 1.0]]) mi@0: mi@0: Use 2 basis vectors -> W shape(data_dimension, 2). mi@0: mi@0: >>> bnmf_mdl = BNMF(data, num_bases=2) mi@0: mi@0: Set number of iterations to 5 and start computing the factorization. mi@0: mi@0: >>> bnmf_mdl.factorize(niter=5) mi@0: mi@0: The basis vectors are now stored in bnmf_mdl.W, the coefficients in bnmf_mdl.H. mi@0: To compute coefficients for an existing set of basis vectors simply copy W mi@0: to bnmf_mdl.W, and set compute_w to False: mi@0: mi@0: >>> data = np.array([[0.0], [1.0]]) mi@0: >>> W = np.array([[1.0, 0.0], [0.0, 1.0]]) mi@0: >>> bnmf_mdl = BNMF(data, num_bases=2) mi@0: >>> bnmf_mdl.W = W mi@0: >>> bnmf_mdl.factorize(niter=10, compute_w=False) mi@0: mi@0: The result is a set of coefficients bnmf_mdl.H, s.t. data = W * bnmf_mdl.H. mi@0: """ mi@0: mi@0: # controls how fast lambda should increase: mi@0: # this influence convergence to binary values during the update. A value mi@0: # <1 will result in non-binary decompositions as the update rule effectively mi@0: # is a conventional nmf update rule. Values >1 give more weight to making the mi@0: # factorization binary with increasing iterations. mi@0: # setting either W or H to 0 results make the resulting matrix non binary. mi@0: _LAMB_INCREASE_W = 1.1 mi@0: _LAMB_INCREASE_H = 1.1 mi@0: mi@0: def update_h(self): mi@0: H1 = np.dot(self.W.T, self.data[:,:]) + 3.0*self._lamb_H*(self.H**2) mi@0: H2 = np.dot(np.dot(self.W.T,self.W), self.H) + 2*self._lamb_H*(self.H**3) + self._lamb_H*self.H + 10**-9 mi@0: self.H *= H1/H2 mi@0: mi@0: self._lamb_W = self._LAMB_INCREASE_W * self._lamb_W mi@0: self._lamb_H = self._LAMB_INCREASE_H * self._lamb_H mi@0: mi@0: def update_w(self): mi@0: W1 = np.dot(self.data[:,:], self.H.T) + 3.0*self._lamb_W*(self.W**2) mi@0: W2 = np.dot(self.W, np.dot(self.H, self.H.T)) + 2.0*self._lamb_W*(self.W**3) + self._lamb_W*self.W + 10**-9 mi@0: self.W *= W1/W2 mi@0: mi@0: def factorize(self, niter=10, compute_w=True, compute_h=True, mi@0: show_progress=False, compute_err=True): mi@0: """ Factorize s.t. WH = data mi@0: mi@0: Parameters mi@0: ---------- mi@0: niter : int mi@0: number of iterations. mi@0: show_progress : bool mi@0: print some extra information to stdout. mi@0: compute_h : bool mi@0: iteratively update values for H. mi@0: compute_w : bool mi@0: iteratively update values for W. mi@0: compute_err : bool mi@0: compute Frobenius norm |data-WH| after each update and store mi@0: it to .ferr[k]. mi@0: mi@0: Updated Values mi@0: -------------- mi@0: .W : updated values for W. mi@0: .H : updated values for H. mi@0: .ferr : Frobenius norm |data-WH| for each iteration. mi@0: """ mi@0: mi@0: # init some learning parameters mi@0: self._lamb_W = 1.0/niter mi@0: self._lamb_H = 1.0/niter mi@0: mi@0: NMF.factorize(self, niter=niter, compute_w=compute_w, mi@0: compute_h=compute_h, show_progress=show_progress, mi@0: compute_err=compute_err) mi@0: mi@0: if __name__ == "__main__": mi@0: import doctest mi@0: doctest.testmod()