mi@0
|
1 #!/usr/bin/python
|
mi@0
|
2 #
|
mi@0
|
3 # Copyright (C) Christian Thurau, 2010.
|
mi@0
|
4 # Licensed under the GNU General Public License (GPL).
|
mi@0
|
5 # http://www.gnu.org/licenses/gpl.txt
|
mi@0
|
6 """
|
mi@0
|
7 PyMF Non-negative Matrix Factorization.
|
mi@0
|
8
|
mi@0
|
9 NMFALS: Class for Non-negative Matrix Factorization using non negative
|
mi@0
|
10 least squares optimization (requires scipy.optimize)
|
mi@0
|
11
|
mi@0
|
12 [1] Lee, D. D. and Seung, H. S. (1999), Learning the Parts of Objects by Non-negative
|
mi@0
|
13 Matrix Factorization, Nature 401(6755), 788-799.
|
mi@0
|
14 """
|
mi@0
|
15
|
mi@0
|
16
|
mi@0
|
17
|
mi@0
|
18 import scipy.optimize
|
mi@0
|
19 from nmf import NMF
|
mi@0
|
20
|
mi@0
|
21 __all__ = ["NMFNNLS"]
|
mi@0
|
22
|
mi@0
|
23 class NMFNNLS(NMF):
|
mi@0
|
24 """
|
mi@0
|
25 NMFNNLS(data, num_bases=4)
|
mi@0
|
26
|
mi@0
|
27
|
mi@0
|
28 Non-negative Matrix Factorization. Factorize a data matrix into two matrices
|
mi@0
|
29 s.t. F = | data - W*H | = | is minimal. H, and W are restricted to non-negative
|
mi@0
|
30 data. Uses the Lawsons and Hanson's algorithm for non negative constrained
|
mi@0
|
31 least squares (-> also see scipy.optimize.nnls)
|
mi@0
|
32
|
mi@0
|
33 Parameters
|
mi@0
|
34 ----------
|
mi@0
|
35 data : array_like, shape (_data_dimension, _num_samples)
|
mi@0
|
36 the input data
|
mi@0
|
37 num_bases: int, optional
|
mi@0
|
38 Number of bases to compute (column rank of W and row rank of H).
|
mi@0
|
39 4 (default)
|
mi@0
|
40
|
mi@0
|
41 Attributes
|
mi@0
|
42 ----------
|
mi@0
|
43 W : "data_dimension x num_bases" matrix of basis vectors
|
mi@0
|
44 H : "num bases x num_samples" matrix of coefficients
|
mi@0
|
45 ferr : frobenius norm (after calling .factorize())
|
mi@0
|
46
|
mi@0
|
47 Example
|
mi@0
|
48 -------
|
mi@0
|
49 Applying NMF to some rather stupid data set:
|
mi@0
|
50
|
mi@0
|
51 >>> import numpy as np
|
mi@0
|
52 >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]])
|
mi@0
|
53 >>> nmf_mdl = NMFALS(data, num_bases=2)
|
mi@0
|
54 >>> nmf_mdl.factorize(niter=10)
|
mi@0
|
55
|
mi@0
|
56 The basis vectors are now stored in nmf_mdl.W, the coefficients in nmf_mdl.H.
|
mi@0
|
57 To compute coefficients for an existing set of basis vectors simply copy W
|
mi@0
|
58 to nmf_mdl.W, and set compute_w to False:
|
mi@0
|
59
|
mi@0
|
60 >>> data = np.array([[1.5], [1.2]])
|
mi@0
|
61 >>> W = np.array([[1.0, 0.0], [0.0, 1.0]])
|
mi@0
|
62 >>> nmf_mdl = NMFALS(data, num_bases=2)
|
mi@0
|
63 >>> nmf_mdl.W = W
|
mi@0
|
64 >>> nmf_mdl.factorize(niter=1, compute_w=False)
|
mi@0
|
65
|
mi@0
|
66 The result is a set of coefficients nmf_mdl.H, s.t. data = W * nmf_mdl.H.
|
mi@0
|
67 """
|
mi@0
|
68
|
mi@0
|
69 def update_h(self):
|
mi@0
|
70 def updatesingleH(i):
|
mi@0
|
71 self.H[:,i] = scipy.optimize.nnls(self.W, self.data[:,i])[0]
|
mi@0
|
72
|
mi@0
|
73 map(updatesingleH, xrange(self._num_samples))
|
mi@0
|
74
|
mi@0
|
75
|
mi@0
|
76 def update_w(self):
|
mi@0
|
77 def updatesingleW(i):
|
mi@0
|
78 self.W[i,:] = scipy.optimize.nnls(self.H.T, self.data[i,:].T)[0]
|
mi@0
|
79
|
mi@0
|
80 map(updatesingleW, xrange(self._data_dimension))
|