mi@0
|
1 #!/usr/bin/python
|
mi@0
|
2 #
|
mi@0
|
3 # Copyright (C) Christian Thurau, 2010.
|
mi@0
|
4 # Licensed under the GNU General Public License (GPL).
|
mi@0
|
5 # http://www.gnu.org/licenses/gpl.txt
|
mi@0
|
6 """
|
mi@0
|
7 PyMF Non-negative Matrix Factorization.
|
mi@0
|
8
|
mi@0
|
9 NMFALS: Class for Non-negative Matrix Factorization using alternating least
|
mi@0
|
10 squares optimization (requires cvxopt)
|
mi@0
|
11
|
mi@0
|
12 [1] Lee, D. D. and Seung, H. S. (1999), Learning the Parts of Objects by Non-negative
|
mi@0
|
13 Matrix Factorization, Nature 401(6755), 788-799.
|
mi@0
|
14 """
|
mi@0
|
15
|
mi@0
|
16
|
mi@0
|
17
|
mi@0
|
18 import numpy as np
|
mi@0
|
19 from cvxopt import solvers, base
|
mi@0
|
20 from nmf import NMF
|
mi@0
|
21
|
mi@0
|
22 __all__ = ["NMFALS"]
|
mi@0
|
23
|
mi@0
|
24 class NMFALS(NMF):
|
mi@0
|
25 """
|
mi@0
|
26 NMF(data, num_bases=4)
|
mi@0
|
27
|
mi@0
|
28
|
mi@0
|
29 Non-negative Matrix Factorization. Factorize a data matrix into two matrices
|
mi@0
|
30 s.t. F = | data - W*H | = | is minimal. H, and W are restricted to non-negative
|
mi@0
|
31 data. Uses the an alternating least squares procedure (quite slow for larger
|
mi@0
|
32 data sets)
|
mi@0
|
33
|
mi@0
|
34 Parameters
|
mi@0
|
35 ----------
|
mi@0
|
36 data : array_like, shape (_data_dimension, _num_samples)
|
mi@0
|
37 the input data
|
mi@0
|
38 num_bases: int, optional
|
mi@0
|
39 Number of bases to compute (column rank of W and row rank of H).
|
mi@0
|
40 4 (default)
|
mi@0
|
41
|
mi@0
|
42 Attributes
|
mi@0
|
43 ----------
|
mi@0
|
44 W : "data_dimension x num_bases" matrix of basis vectors
|
mi@0
|
45 H : "num bases x num_samples" matrix of coefficients
|
mi@0
|
46 ferr : frobenius norm (after calling .factorize())
|
mi@0
|
47
|
mi@0
|
48 Example
|
mi@0
|
49 -------
|
mi@0
|
50 Applying NMF to some rather stupid data set:
|
mi@0
|
51
|
mi@0
|
52 >>> import numpy as np
|
mi@0
|
53 >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]])
|
mi@0
|
54 >>> nmf_mdl = NMFALS(data, num_bases=2)
|
mi@0
|
55 >>> nmf_mdl.factorize(niter=10)
|
mi@0
|
56
|
mi@0
|
57 The basis vectors are now stored in nmf_mdl.W, the coefficients in nmf_mdl.H.
|
mi@0
|
58 To compute coefficients for an existing set of basis vectors simply copy W
|
mi@0
|
59 to nmf_mdl.W, and set compute_w to False:
|
mi@0
|
60
|
mi@0
|
61 >>> data = np.array([[1.5], [1.2]])
|
mi@0
|
62 >>> W = np.array([[1.0, 0.0], [0.0, 1.0]])
|
mi@0
|
63 >>> nmf_mdl = NMFALS(data, num_bases=2)
|
mi@0
|
64 >>> nmf_mdl.W = W
|
mi@0
|
65 >>> nmf_mdl.factorize(niter=1, compute_w=False)
|
mi@0
|
66
|
mi@0
|
67 The result is a set of coefficients nmf_mdl.H, s.t. data = W * nmf_mdl.H.
|
mi@0
|
68 """
|
mi@0
|
69
|
mi@0
|
70 def update_h(self):
|
mi@0
|
71 def updatesingleH(i):
|
mi@0
|
72 # optimize alpha using qp solver from cvxopt
|
mi@0
|
73 FA = base.matrix(np.float64(np.dot(-self.W.T, self.data[:,i])))
|
mi@0
|
74 al = solvers.qp(HA, FA, INQa, INQb)
|
mi@0
|
75 self.H[:,i] = np.array(al['x']).reshape((1,-1))
|
mi@0
|
76
|
mi@0
|
77 # float64 required for cvxopt
|
mi@0
|
78 HA = base.matrix(np.float64(np.dot(self.W.T, self.W)))
|
mi@0
|
79 INQa = base.matrix(-np.eye(self._num_bases))
|
mi@0
|
80 INQb = base.matrix(0.0, (self._num_bases,1))
|
mi@0
|
81
|
mi@0
|
82 map(updatesingleH, xrange(self._num_samples))
|
mi@0
|
83
|
mi@0
|
84
|
mi@0
|
85 def update_w(self):
|
mi@0
|
86 def updatesingleW(i):
|
mi@0
|
87 # optimize alpha using qp solver from cvxopt
|
mi@0
|
88 FA = base.matrix(np.float64(np.dot(-self.H, self.data[i,:].T)))
|
mi@0
|
89 al = solvers.qp(HA, FA, INQa, INQb)
|
mi@0
|
90 self.W[i,:] = np.array(al['x']).reshape((1,-1))
|
mi@0
|
91
|
mi@0
|
92 # float64 required for cvxopt
|
mi@0
|
93 HA = base.matrix(np.float64(np.dot(self.H, self.H.T)))
|
mi@0
|
94 INQa = base.matrix(-np.eye(self._num_bases))
|
mi@0
|
95 INQb = base.matrix(0.0, (self._num_bases,1))
|
mi@0
|
96
|
mi@0
|
97 map(updatesingleW, xrange(self._data_dimension))
|