mi@0
|
1 #!/usr/bin/python
|
mi@0
|
2 #
|
mi@0
|
3 # Copyright (C) Christian Thurau, 2010.
|
mi@0
|
4 # Licensed under the GNU General Public License (GPL).
|
mi@0
|
5 # http://www.gnu.org/licenses/gpl.txt
|
mi@0
|
6 """
|
mi@0
|
7 PyMF Semi Non-negative Matrix Factorization.
|
mi@0
|
8
|
mi@0
|
9 SNMF(NMF) : Class for semi non-negative matrix factorization
|
mi@0
|
10
|
mi@0
|
11 [1] Ding, C., Li, T. and Jordan, M.. Convex and Semi-Nonnegative Matrix Factorizations.
|
mi@0
|
12 IEEE Trans. on Pattern Analysis and Machine Intelligence 32(1), 45-55.
|
mi@0
|
13 """
|
mi@0
|
14
|
mi@0
|
15
|
mi@0
|
16
|
mi@0
|
17 import numpy as np
|
mi@0
|
18
|
mi@0
|
19 from nmf import NMF
|
mi@0
|
20
|
mi@0
|
21 __all__ = ["SNMF"]
|
mi@0
|
22
|
mi@0
|
23 class SNMF(NMF):
|
mi@0
|
24 """
|
mi@0
|
25 SNMF(data, num_bases=4)
|
mi@0
|
26
|
mi@0
|
27 Semi Non-negative Matrix Factorization. Factorize a data matrix into two
|
mi@0
|
28 matrices s.t. F = | data - W*H | is minimal.
|
mi@0
|
29
|
mi@0
|
30 Parameters
|
mi@0
|
31 ----------
|
mi@0
|
32 data : array_like, shape (_data_dimension, _num_samples)
|
mi@0
|
33 the input data
|
mi@0
|
34 num_bases: int, optional
|
mi@0
|
35 Number of bases to compute (column rank of W and row rank of H).
|
mi@0
|
36 4 (default)
|
mi@0
|
37
|
mi@0
|
38 Attributes
|
mi@0
|
39 ----------
|
mi@0
|
40 W : "data_dimension x num_bases" matrix of basis vectors
|
mi@0
|
41 H : "num bases x num_samples" matrix of coefficients
|
mi@0
|
42 ferr : frobenius norm (after calling .factorize())
|
mi@0
|
43
|
mi@0
|
44 Example
|
mi@0
|
45 -------
|
mi@0
|
46 Applying Semi-NMF to some rather stupid data set:
|
mi@0
|
47
|
mi@0
|
48 >>> import numpy as np
|
mi@0
|
49 >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]])
|
mi@0
|
50 >>> snmf_mdl = SNMF(data, num_bases=2)
|
mi@0
|
51 >>> snmf_mdl.factorize(niter=10)
|
mi@0
|
52
|
mi@0
|
53 The basis vectors are now stored in snmf_mdl.W, the coefficients in snmf_mdl.H.
|
mi@0
|
54 To compute coefficients for an existing set of basis vectors simply copy W
|
mi@0
|
55 to snmf_mdl.W, and set compute_w to False:
|
mi@0
|
56
|
mi@0
|
57 >>> data = np.array([[1.5], [1.2]])
|
mi@0
|
58 >>> W = np.array([[1.0, 0.0], [0.0, 1.0]])
|
mi@0
|
59 >>> snmf_mdl = SNMF(data, num_bases=2)
|
mi@0
|
60 >>> snmf_mdl.W = W
|
mi@0
|
61 >>> snmf_mdl.factorize(niter=1, compute_w=False)
|
mi@0
|
62
|
mi@0
|
63 The result is a set of coefficients snmf_mdl.H, s.t. data = W * snmf_mdl.H.
|
mi@0
|
64 """
|
mi@0
|
65
|
mi@0
|
66
|
mi@0
|
67 def update_w(self):
|
mi@0
|
68 W1 = np.dot(self.data[:,:], self.H.T)
|
mi@0
|
69 W2 = np.dot(self.H, self.H.T)
|
mi@0
|
70 self.W = np.dot(W1, np.linalg.inv(W2))
|
mi@0
|
71
|
mi@0
|
72 def update_h(self):
|
mi@0
|
73 def separate_positive(m):
|
mi@0
|
74 return (np.abs(m) + m)/2.0
|
mi@0
|
75
|
mi@0
|
76 def separate_negative(m):
|
mi@0
|
77 return (np.abs(m) - m)/2.0
|
mi@0
|
78
|
mi@0
|
79 XW = np.dot(self.data[:,:].T, self.W)
|
mi@0
|
80
|
mi@0
|
81 WW = np.dot(self.W.T, self.W)
|
mi@0
|
82 WW_pos = separate_positive(WW)
|
mi@0
|
83 WW_neg = separate_negative(WW)
|
mi@0
|
84
|
mi@0
|
85 XW_pos = separate_positive(XW)
|
mi@0
|
86 H1 = (XW_pos + np.dot(self.H.T, WW_neg)).T
|
mi@0
|
87
|
mi@0
|
88 XW_neg = separate_negative(XW)
|
mi@0
|
89 H2 = (XW_neg + np.dot(self.H.T,WW_pos)).T + 10**-9
|
mi@0
|
90
|
mi@0
|
91 self.H *= np.sqrt(H1/H2)
|
mi@0
|
92
|
mi@0
|
93 if __name__ == "__main__":
|
mi@0
|
94 import doctest
|
mi@0
|
95 doctest.testmod()
|