Mercurial > hg > segmentation
diff pymf/nndsvd.py @ 0:26838b1f560f
initial commit of a segmenter project
author | mi tian |
---|---|
date | Thu, 02 Apr 2015 18:09:27 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pymf/nndsvd.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,119 @@ +#!/usr/bin/python +# +# Copyright (C) Christian Thurau, 2010. +# Licensed under the GNU General Public License (GPL). +# http://www.gnu.org/licenses/gpl.txt +#$Id$ +""" +PyMF Non-negative Double Singular Value Decompositions. + + NNDSVD: Class for Non-negative Double Singular Value Decompositions [1] + +[1] C. Boutsidis and E. Gallopoulos (2008), SVD based initialization: A head +start for nonnegative matrix factorization, Pattern Recognition, 41, 1350-1362 +""" + + +import numpy as np + +from nmf import NMF +from svd import SVD + +__all__ = ["NNDSVD"] + +class NNDSVD(NMF): + """ + NNDSVD(data, num_bases=4) + + + Non-negative Double Singular Value Decompositions. Factorize a data + matrix into two matrices s.t. F = | data - W*H | = | is minimal. H, and + W are restricted to non-negative data. NNDSVD is primarily used for + initializing NMF. + + Parameters + ---------- + data : array_like, shape (_data_dimension, _num_samples) + the input data + num_bases: int, optional + Number of bases to compute (column rank of W and row rank of H). + 4 (default) + + Attributes + ---------- + W : "data_dimension x num_bases" matrix of basis vectors + H : "num bases x num_samples" matrix of coefficients + ferr : frobenius norm (after calling .factorize()) + + Example + ------- + Applying NNDSVD to some rather stupid data set: + + >>> import numpy as np + >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]]) + >>> nndsvd_mdl = NNDSVD(data, num_bases=2) + >>> nndsvd_mdl.factorize() + + The basis vectors are now stored in nndsvd_mdl.W, the coefficients in + nndsvd_mdl.H. To initialize NMF with nndsvd_mdl.W, nndsvd_mdl.H + simply copy W to nmf_mdl.W and H to nmf_mdl.H: + + >>> data = np.array([[1.5], [1.2]]) + >>> W = np.array([[1.0, 0.0], [0.0, 1.0]]) + >>> nmf_mdl = NMF(data, num_bases=2) + >>> nmf_mdl.W = nndsvd_mdl.W + >>> nmf_mdl.H = nndsvd_mdl.H + >>> nmf_mdl.factorize(niter=20) + + The result is a set of (more optimal) coefficients nmf_mdl.H, nmf_mdl.W. + """ + def init_w(self): + self.W = np.zeros((self._data_dimension, self._num_bases)) + + def init_h(self): + self.H = np.zeros((self._num_bases, self._num_samples)) + + def update_h(self): + pass + + def update_w(self): + svd_mdl = SVD(self.data) + svd_mdl.factorize() + + U, S, V = svd_mdl.U, svd_mdl.S, svd_mdl.V + + # The first left singular vector is nonnegative + # (abs is only used as values could be all negative) + self.W[:,0] = np.sqrt(S[0,0]) * np.abs(U[:,0]) + + #The first right singular vector is nonnegative + self.H[0,:] = np.sqrt(S[0,0]) * np.abs(V[0,:].T) + + for i in range(1,self._num_bases): + # Form the rank one factor + Tmp = np.dot(U[:,i:i+1]*S[i,i], V[i:i+1,:]) + + # zero out the negative elements + Tmp = np.where(Tmp < 0, 0.0, Tmp) + + # Apply 2nd SVD + svd_mdl_2 = SVD(Tmp) + svd_mdl_2.factorize() + u, s, v = svd_mdl_2.U, svd_mdl_2.S, svd_mdl_2.V + + # The first left singular vector is nonnegative + self.W[:,i] = np.sqrt(s[0,0]) * np.abs(u[:,0]) + + #The first right singular vector is nonnegative + self.H[i,:] = np.sqrt(s[0,0]) * np.abs(v[0,:].T) + + def factorize(self, niter=1, show_progress=False, + compute_w=True, compute_h=True, compute_err=True): + + # enforce certain default values, otherwise it won't work + NMF.factorize(self, niter=1, show_progress=show_progress, + compute_w=True, compute_h=True, compute_err=compute_err) + +if __name__ == "__main__": + import doctest + doctest.testmod()