mi@0
|
1 #!/usr/bin/python
|
mi@0
|
2 #
|
mi@0
|
3 # Copyright (C) Christian Thurau, 2010.
|
mi@0
|
4 # Licensed under the GNU General Public License (GPL).
|
mi@0
|
5 # http://www.gnu.org/licenses/gpl.txt
|
mi@0
|
6 """
|
mi@0
|
7 PyMF Non-negative Matrix Factorization.
|
mi@0
|
8
|
mi@0
|
9 NMF: Class for Non-negative Matrix Factorization
|
mi@0
|
10
|
mi@0
|
11 [1] Lee, D. D. and Seung, H. S. (1999), Learning the Parts of Objects by Non-negative
|
mi@0
|
12 Matrix Factorization, Nature 401(6755), 788-799.
|
mi@0
|
13 """
|
mi@0
|
14
|
mi@0
|
15
|
mi@0
|
16 import numpy as np
|
mi@0
|
17 import logging
|
mi@0
|
18 import logging.config
|
mi@0
|
19 import scipy.sparse
|
mi@0
|
20
|
mi@0
|
21 from nmf import NMF
|
mi@0
|
22
|
mi@0
|
23 __all__ = ["RNMF"]
|
mi@0
|
24
|
mi@0
|
25 class RNMF(NMF):
|
mi@0
|
26 """
|
mi@0
|
27 RNMF(data, num_bases=4)
|
mi@0
|
28
|
mi@0
|
29
|
mi@0
|
30 Non-negative Matrix Factorization. Factorize a data matrix into two matrices
|
mi@0
|
31 s.t. F = | data - W*H | = | is minimal. H, and W are restricted to non-negative
|
mi@0
|
32 data. Uses the classicial multiplicative update rule.
|
mi@0
|
33
|
mi@0
|
34 Parameters
|
mi@0
|
35 ----------
|
mi@0
|
36 data : array_like, shape (_data_dimension, _num_samples)
|
mi@0
|
37 the input data
|
mi@0
|
38 num_bases: int, optional
|
mi@0
|
39 Number of bases to compute (column rank of W and row rank of H).
|
mi@0
|
40 4 (default)
|
mi@0
|
41
|
mi@0
|
42 Attributes
|
mi@0
|
43 ----------
|
mi@0
|
44 W : "data_dimension x num_bases" matrix of basis vectors
|
mi@0
|
45 H : "num bases x num_samples" matrix of coefficients
|
mi@0
|
46 ferr : frobenius norm (after calling .factorize())
|
mi@0
|
47
|
mi@0
|
48 Example
|
mi@0
|
49 -------
|
mi@0
|
50 Applying NMF to some rather stupid data set:
|
mi@0
|
51
|
mi@0
|
52 >>> import numpy as np
|
mi@0
|
53 >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]])
|
mi@0
|
54 >>> nmf_mdl = NMF(data, num_bases=2, niter=10)
|
mi@0
|
55 >>> nmf_mdl.factorize()
|
mi@0
|
56
|
mi@0
|
57 The basis vectors are now stored in nmf_mdl.W, the coefficients in nmf_mdl.H.
|
mi@0
|
58 To compute coefficients for an existing set of basis vectors simply copy W
|
mi@0
|
59 to nmf_mdl.W, and set compute_w to False:
|
mi@0
|
60
|
mi@0
|
61 >>> data = np.array([[1.5], [1.2]])
|
mi@0
|
62 >>> W = np.array([[1.0, 0.0], [0.0, 1.0]])
|
mi@0
|
63 >>> nmf_mdl = NMF(data, num_bases=2)
|
mi@0
|
64 >>> nmf_mdl.W = W
|
mi@0
|
65 >>> nmf_mdl.factorize(niter=20, compute_w=False)
|
mi@0
|
66
|
mi@0
|
67 The result is a set of coefficients nmf_mdl.H, s.t. data = W * nmf_mdl.H.
|
mi@0
|
68 """
|
mi@0
|
69
|
mi@0
|
70 def __init__(self, data, num_bases=4, lamb=2.0):
|
mi@0
|
71 # call inherited method
|
mi@0
|
72 NMF.__init__(self, data, num_bases=num_bases)
|
mi@0
|
73 self._lamb = lamb
|
mi@0
|
74
|
mi@0
|
75 def soft_thresholding(self, X, lamb):
|
mi@0
|
76 X = np.where(np.abs(X) <= lamb, 0.0, X)
|
mi@0
|
77 X = np.where(X > lamb, X - lamb, X)
|
mi@0
|
78 X = np.where(X < -1.0*lamb, X + lamb, X)
|
mi@0
|
79 return X
|
mi@0
|
80
|
mi@0
|
81 def init_w(self):
|
mi@0
|
82 self.W = np.random.random((self._data_dimension, self._num_bases))
|
mi@0
|
83
|
mi@0
|
84 def init_h(self):
|
mi@0
|
85 self.H = np.random.random((self._num_bases, self._num_samples))
|
mi@0
|
86 self.H[:,:] = 1.0
|
mi@0
|
87 # normalized bases
|
mi@0
|
88 Wnorm = np.sqrt(np.sum(self.W**2.0, axis=0))
|
mi@0
|
89 self.W /= Wnorm
|
mi@0
|
90
|
mi@0
|
91 for i in range(self.H.shape[0]):
|
mi@0
|
92 self.H[i,:] *= Wnorm[i]
|
mi@0
|
93
|
mi@0
|
94 self.update_s()
|
mi@0
|
95
|
mi@0
|
96 def update_s(self):
|
mi@0
|
97 self.S = self.data - np.dot(self.W, self.H)
|
mi@0
|
98 self.S = self.soft_thresholding(self.S, self._lamb)
|
mi@0
|
99
|
mi@0
|
100 def update_h(self):
|
mi@0
|
101 # pre init H1, and H2 (necessary for storing matrices on disk)
|
mi@0
|
102 H1 = np.dot(self.W.T, self.S - self.data)
|
mi@0
|
103 H1 = np.abs(H1) - H1
|
mi@0
|
104 H1 /= (2.0* np.dot(self.W.T, np.dot(self.W, self.H)))
|
mi@0
|
105 self.H *= H1
|
mi@0
|
106
|
mi@0
|
107 # adapt S
|
mi@0
|
108 self.update_s()
|
mi@0
|
109
|
mi@0
|
110 def update_w(self):
|
mi@0
|
111 # pre init W1, and W2 (necessary for storing matrices on disk)
|
mi@0
|
112 W1 = np.dot(self.S - self.data, self.H.T)
|
mi@0
|
113 #W1 = np.dot(self.data - self.S, self.H.T)
|
mi@0
|
114 W1 = np.abs(W1) - W1
|
mi@0
|
115 W1 /= (2.0 * (np.dot(self.W, np.dot(self.H, self.H.T))))
|
mi@0
|
116 self.W *= W1
|
mi@0
|
117
|
mi@0
|
118 if __name__ == "__main__":
|
mi@0
|
119 import doctest
|
mi@0
|
120 doctest.testmod()
|