mi@0: #!/usr/bin/python
mi@0: #
mi@0: # Copyright (C) Christian Thurau, 2010.
mi@0: # Licensed under the GNU General Public License (GPL).
mi@0: # http://www.gnu.org/licenses/gpl.txt
mi@0: #$Id$
mi@0: """
mi@0: PyMF Non-negative Double Singular Value Decompositions.
mi@0: 
mi@0:     NNDSVD: Class for Non-negative Double Singular Value Decompositions [1]
mi@0: 
mi@0: [1] C. Boutsidis and E. Gallopoulos (2008), SVD based initialization: A head
mi@0: start for nonnegative matrix factorization, Pattern Recognition, 41, 1350-1362
mi@0: """
mi@0: 
mi@0: 
mi@0: import numpy as np
mi@0: 
mi@0: from nmf import NMF
mi@0: from svd import SVD
mi@0: 
mi@0: __all__ = ["NNDSVD"]
mi@0: 
mi@0: class NNDSVD(NMF):
mi@0:     """
mi@0:     NNDSVD(data, num_bases=4)
mi@0: 
mi@0: 
mi@0:     Non-negative Double Singular Value Decompositions. Factorize a data 
mi@0:     matrix into two matrices s.t. F = | data - W*H | = | is minimal. H, and 
mi@0:     W are restricted to non-negative data. NNDSVD is primarily used for
mi@0:     initializing NMF. 
mi@0: 
mi@0:     Parameters
mi@0:     ----------
mi@0:     data : array_like, shape (_data_dimension, _num_samples)
mi@0:         the input data
mi@0:     num_bases: int, optional
mi@0:         Number of bases to compute (column rank of W and row rank of H).
mi@0:         4 (default)        
mi@0: 
mi@0:     Attributes
mi@0:     ----------
mi@0:     W : "data_dimension x num_bases" matrix of basis vectors
mi@0:     H : "num bases x num_samples" matrix of coefficients
mi@0:     ferr : frobenius norm (after calling .factorize()) 
mi@0: 
mi@0:     Example
mi@0:     -------
mi@0:     Applying NNDSVD to some rather stupid data set:
mi@0: 
mi@0:     >>> import numpy as np
mi@0:     >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]])
mi@0:     >>> nndsvd_mdl = NNDSVD(data, num_bases=2)
mi@0:     >>> nndsvd_mdl.factorize()
mi@0: 
mi@0:     The basis vectors are now stored in nndsvd_mdl.W, the coefficients in 
mi@0:     nndsvd_mdl.H. To initialize NMF with nndsvd_mdl.W, nndsvd_mdl.H 
mi@0:     simply copy W to nmf_mdl.W and H to nmf_mdl.H:
mi@0: 
mi@0:     >>> data = np.array([[1.5], [1.2]])
mi@0:     >>> W = np.array([[1.0, 0.0], [0.0, 1.0]])
mi@0:     >>> nmf_mdl = NMF(data, num_bases=2)
mi@0:     >>> nmf_mdl.W = nndsvd_mdl.W
mi@0:     >>> nmf_mdl.H = nndsvd_mdl.H
mi@0:     >>> nmf_mdl.factorize(niter=20)
mi@0: 
mi@0:     The result is a set of (more optimal) coefficients nmf_mdl.H, nmf_mdl.W.
mi@0:     """                
mi@0:     def init_w(self):
mi@0:         self.W = np.zeros((self._data_dimension, self._num_bases))
mi@0:                        
mi@0:     def init_h(self):
mi@0:         self.H = np.zeros((self._num_bases, self._num_samples))
mi@0:         
mi@0:     def update_h(self):
mi@0:         pass
mi@0: 
mi@0:     def update_w(self):
mi@0:         svd_mdl = SVD(self.data)
mi@0:         svd_mdl.factorize()
mi@0:         
mi@0:         U, S, V = svd_mdl.U, svd_mdl.S, svd_mdl.V    
mi@0:         
mi@0:         # The first left singular vector is nonnegative
mi@0:         # (abs is only used as values could be all negative)
mi@0:         self.W[:,0] = np.sqrt(S[0,0]) * np.abs(U[:,0])
mi@0:         
mi@0:         #The first right singular vector is nonnegative
mi@0:         self.H[0,:] = np.sqrt(S[0,0]) * np.abs(V[0,:].T)
mi@0: 
mi@0:         for i in range(1,self._num_bases):
mi@0:             # Form the rank one factor
mi@0:             Tmp = np.dot(U[:,i:i+1]*S[i,i], V[i:i+1,:])          
mi@0:             
mi@0:             # zero out the negative elements
mi@0:             Tmp = np.where(Tmp < 0, 0.0, Tmp)
mi@0:             
mi@0:             # Apply 2nd SVD
mi@0:             svd_mdl_2 = SVD(Tmp)
mi@0:             svd_mdl_2.factorize()
mi@0:             u, s, v = svd_mdl_2.U, svd_mdl_2.S, svd_mdl_2.V
mi@0:             
mi@0:             # The first left singular vector is nonnegative
mi@0:             self.W[:,i] = np.sqrt(s[0,0]) * np.abs(u[:,0]) 
mi@0:             
mi@0:             #The first right singular vector is nonnegative
mi@0:             self.H[i,:] = np.sqrt(s[0,0]) * np.abs(v[0,:].T) 
mi@0:         
mi@0:     def factorize(self, niter=1, show_progress=False, 
mi@0:                   compute_w=True, compute_h=True, compute_err=True):
mi@0:                       
mi@0:         # enforce certain default values, otherwise it won't work
mi@0:         NMF.factorize(self, niter=1, show_progress=show_progress, 
mi@0:                   compute_w=True, compute_h=True, compute_err=compute_err)
mi@0: 
mi@0: if __name__ == "__main__":
mi@0:     import doctest
mi@0:     doctest.testmod()