mi@0: #!/usr/bin/python mi@0: # mi@0: # Copyright (C) Christian Thurau, 2010. mi@0: # Licensed under the GNU General Public License (GPL). mi@0: # http://www.gnu.org/licenses/gpl.txt mi@0: #$Id$ mi@0: """ mi@0: PyMF Non-negative Double Singular Value Decompositions. mi@0: mi@0: NNDSVD: Class for Non-negative Double Singular Value Decompositions [1] mi@0: mi@0: [1] C. Boutsidis and E. Gallopoulos (2008), SVD based initialization: A head mi@0: start for nonnegative matrix factorization, Pattern Recognition, 41, 1350-1362 mi@0: """ mi@0: mi@0: mi@0: import numpy as np mi@0: mi@0: from nmf import NMF mi@0: from svd import SVD mi@0: mi@0: __all__ = ["NNDSVD"] mi@0: mi@0: class NNDSVD(NMF): mi@0: """ mi@0: NNDSVD(data, num_bases=4) mi@0: mi@0: mi@0: Non-negative Double Singular Value Decompositions. Factorize a data mi@0: matrix into two matrices s.t. F = | data - W*H | = | is minimal. H, and mi@0: W are restricted to non-negative data. NNDSVD is primarily used for mi@0: initializing NMF. mi@0: mi@0: Parameters mi@0: ---------- mi@0: data : array_like, shape (_data_dimension, _num_samples) mi@0: the input data mi@0: num_bases: int, optional mi@0: Number of bases to compute (column rank of W and row rank of H). mi@0: 4 (default) mi@0: mi@0: Attributes mi@0: ---------- mi@0: W : "data_dimension x num_bases" matrix of basis vectors mi@0: H : "num bases x num_samples" matrix of coefficients mi@0: ferr : frobenius norm (after calling .factorize()) mi@0: mi@0: Example mi@0: ------- mi@0: Applying NNDSVD to some rather stupid data set: mi@0: mi@0: >>> import numpy as np mi@0: >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]]) mi@0: >>> nndsvd_mdl = NNDSVD(data, num_bases=2) mi@0: >>> nndsvd_mdl.factorize() mi@0: mi@0: The basis vectors are now stored in nndsvd_mdl.W, the coefficients in mi@0: nndsvd_mdl.H. To initialize NMF with nndsvd_mdl.W, nndsvd_mdl.H mi@0: simply copy W to nmf_mdl.W and H to nmf_mdl.H: mi@0: mi@0: >>> data = np.array([[1.5], [1.2]]) mi@0: >>> W = np.array([[1.0, 0.0], [0.0, 1.0]]) mi@0: >>> nmf_mdl = NMF(data, num_bases=2) mi@0: >>> nmf_mdl.W = nndsvd_mdl.W mi@0: >>> nmf_mdl.H = nndsvd_mdl.H mi@0: >>> nmf_mdl.factorize(niter=20) mi@0: mi@0: The result is a set of (more optimal) coefficients nmf_mdl.H, nmf_mdl.W. mi@0: """ mi@0: def init_w(self): mi@0: self.W = np.zeros((self._data_dimension, self._num_bases)) mi@0: mi@0: def init_h(self): mi@0: self.H = np.zeros((self._num_bases, self._num_samples)) mi@0: mi@0: def update_h(self): mi@0: pass mi@0: mi@0: def update_w(self): mi@0: svd_mdl = SVD(self.data) mi@0: svd_mdl.factorize() mi@0: mi@0: U, S, V = svd_mdl.U, svd_mdl.S, svd_mdl.V mi@0: mi@0: # The first left singular vector is nonnegative mi@0: # (abs is only used as values could be all negative) mi@0: self.W[:,0] = np.sqrt(S[0,0]) * np.abs(U[:,0]) mi@0: mi@0: #The first right singular vector is nonnegative mi@0: self.H[0,:] = np.sqrt(S[0,0]) * np.abs(V[0,:].T) mi@0: mi@0: for i in range(1,self._num_bases): mi@0: # Form the rank one factor mi@0: Tmp = np.dot(U[:,i:i+1]*S[i,i], V[i:i+1,:]) mi@0: mi@0: # zero out the negative elements mi@0: Tmp = np.where(Tmp < 0, 0.0, Tmp) mi@0: mi@0: # Apply 2nd SVD mi@0: svd_mdl_2 = SVD(Tmp) mi@0: svd_mdl_2.factorize() mi@0: u, s, v = svd_mdl_2.U, svd_mdl_2.S, svd_mdl_2.V mi@0: mi@0: # The first left singular vector is nonnegative mi@0: self.W[:,i] = np.sqrt(s[0,0]) * np.abs(u[:,0]) mi@0: mi@0: #The first right singular vector is nonnegative mi@0: self.H[i,:] = np.sqrt(s[0,0]) * np.abs(v[0,:].T) mi@0: mi@0: def factorize(self, niter=1, show_progress=False, mi@0: compute_w=True, compute_h=True, compute_err=True): mi@0: mi@0: # enforce certain default values, otherwise it won't work mi@0: NMF.factorize(self, niter=1, show_progress=show_progress, mi@0: compute_w=True, compute_h=True, compute_err=compute_err) mi@0: mi@0: if __name__ == "__main__": mi@0: import doctest mi@0: doctest.testmod()