annotate pymf/snmf.py @ 19:890cfe424f4a tip

added annotations
author mitian
date Fri, 11 Dec 2015 09:47:40 +0000
parents 26838b1f560f
children
rev   line source
mi@0 1 #!/usr/bin/python
mi@0 2 #
mi@0 3 # Copyright (C) Christian Thurau, 2010.
mi@0 4 # Licensed under the GNU General Public License (GPL).
mi@0 5 # http://www.gnu.org/licenses/gpl.txt
mi@0 6 """
mi@0 7 PyMF Semi Non-negative Matrix Factorization.
mi@0 8
mi@0 9 SNMF(NMF) : Class for semi non-negative matrix factorization
mi@0 10
mi@0 11 [1] Ding, C., Li, T. and Jordan, M.. Convex and Semi-Nonnegative Matrix Factorizations.
mi@0 12 IEEE Trans. on Pattern Analysis and Machine Intelligence 32(1), 45-55.
mi@0 13 """
mi@0 14
mi@0 15
mi@0 16
mi@0 17 import numpy as np
mi@0 18
mi@0 19 from nmf import NMF
mi@0 20
mi@0 21 __all__ = ["SNMF"]
mi@0 22
mi@0 23 class SNMF(NMF):
mi@0 24 """
mi@0 25 SNMF(data, num_bases=4)
mi@0 26
mi@0 27 Semi Non-negative Matrix Factorization. Factorize a data matrix into two
mi@0 28 matrices s.t. F = | data - W*H | is minimal.
mi@0 29
mi@0 30 Parameters
mi@0 31 ----------
mi@0 32 data : array_like, shape (_data_dimension, _num_samples)
mi@0 33 the input data
mi@0 34 num_bases: int, optional
mi@0 35 Number of bases to compute (column rank of W and row rank of H).
mi@0 36 4 (default)
mi@0 37
mi@0 38 Attributes
mi@0 39 ----------
mi@0 40 W : "data_dimension x num_bases" matrix of basis vectors
mi@0 41 H : "num bases x num_samples" matrix of coefficients
mi@0 42 ferr : frobenius norm (after calling .factorize())
mi@0 43
mi@0 44 Example
mi@0 45 -------
mi@0 46 Applying Semi-NMF to some rather stupid data set:
mi@0 47
mi@0 48 >>> import numpy as np
mi@0 49 >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]])
mi@0 50 >>> snmf_mdl = SNMF(data, num_bases=2)
mi@0 51 >>> snmf_mdl.factorize(niter=10)
mi@0 52
mi@0 53 The basis vectors are now stored in snmf_mdl.W, the coefficients in snmf_mdl.H.
mi@0 54 To compute coefficients for an existing set of basis vectors simply copy W
mi@0 55 to snmf_mdl.W, and set compute_w to False:
mi@0 56
mi@0 57 >>> data = np.array([[1.5], [1.2]])
mi@0 58 >>> W = np.array([[1.0, 0.0], [0.0, 1.0]])
mi@0 59 >>> snmf_mdl = SNMF(data, num_bases=2)
mi@0 60 >>> snmf_mdl.W = W
mi@0 61 >>> snmf_mdl.factorize(niter=1, compute_w=False)
mi@0 62
mi@0 63 The result is a set of coefficients snmf_mdl.H, s.t. data = W * snmf_mdl.H.
mi@0 64 """
mi@0 65
mi@0 66
mi@0 67 def update_w(self):
mi@0 68 W1 = np.dot(self.data[:,:], self.H.T)
mi@0 69 W2 = np.dot(self.H, self.H.T)
mi@0 70 self.W = np.dot(W1, np.linalg.inv(W2))
mi@0 71
mi@0 72 def update_h(self):
mi@0 73 def separate_positive(m):
mi@0 74 return (np.abs(m) + m)/2.0
mi@0 75
mi@0 76 def separate_negative(m):
mi@0 77 return (np.abs(m) - m)/2.0
mi@0 78
mi@0 79 XW = np.dot(self.data[:,:].T, self.W)
mi@0 80
mi@0 81 WW = np.dot(self.W.T, self.W)
mi@0 82 WW_pos = separate_positive(WW)
mi@0 83 WW_neg = separate_negative(WW)
mi@0 84
mi@0 85 XW_pos = separate_positive(XW)
mi@0 86 H1 = (XW_pos + np.dot(self.H.T, WW_neg)).T
mi@0 87
mi@0 88 XW_neg = separate_negative(XW)
mi@0 89 H2 = (XW_neg + np.dot(self.H.T,WW_pos)).T + 10**-9
mi@0 90
mi@0 91 self.H *= np.sqrt(H1/H2)
mi@0 92
mi@0 93 if __name__ == "__main__":
mi@0 94 import doctest
mi@0 95 doctest.testmod()