mi@0
|
1 #!/usr/bin/python
|
mi@0
|
2 #
|
mi@0
|
3 # Copyright (C) Christian Thurau, 2010.
|
mi@0
|
4 # Licensed under the GNU General Public License (GPL).
|
mi@0
|
5 # http://www.gnu.org/licenses/gpl.txt
|
mi@0
|
6 #$Id$
|
mi@0
|
7 """
|
mi@0
|
8 PyMF Non-negative Double Singular Value Decompositions.
|
mi@0
|
9
|
mi@0
|
10 NNDSVD: Class for Non-negative Double Singular Value Decompositions [1]
|
mi@0
|
11
|
mi@0
|
12 [1] C. Boutsidis and E. Gallopoulos (2008), SVD based initialization: A head
|
mi@0
|
13 start for nonnegative matrix factorization, Pattern Recognition, 41, 1350-1362
|
mi@0
|
14 """
|
mi@0
|
15
|
mi@0
|
16
|
mi@0
|
17 import numpy as np
|
mi@0
|
18
|
mi@0
|
19 from nmf import NMF
|
mi@0
|
20 from svd import SVD
|
mi@0
|
21
|
mi@0
|
22 __all__ = ["NNDSVD"]
|
mi@0
|
23
|
mi@0
|
24 class NNDSVD(NMF):
|
mi@0
|
25 """
|
mi@0
|
26 NNDSVD(data, num_bases=4)
|
mi@0
|
27
|
mi@0
|
28
|
mi@0
|
29 Non-negative Double Singular Value Decompositions. Factorize a data
|
mi@0
|
30 matrix into two matrices s.t. F = | data - W*H | = | is minimal. H, and
|
mi@0
|
31 W are restricted to non-negative data. NNDSVD is primarily used for
|
mi@0
|
32 initializing NMF.
|
mi@0
|
33
|
mi@0
|
34 Parameters
|
mi@0
|
35 ----------
|
mi@0
|
36 data : array_like, shape (_data_dimension, _num_samples)
|
mi@0
|
37 the input data
|
mi@0
|
38 num_bases: int, optional
|
mi@0
|
39 Number of bases to compute (column rank of W and row rank of H).
|
mi@0
|
40 4 (default)
|
mi@0
|
41
|
mi@0
|
42 Attributes
|
mi@0
|
43 ----------
|
mi@0
|
44 W : "data_dimension x num_bases" matrix of basis vectors
|
mi@0
|
45 H : "num bases x num_samples" matrix of coefficients
|
mi@0
|
46 ferr : frobenius norm (after calling .factorize())
|
mi@0
|
47
|
mi@0
|
48 Example
|
mi@0
|
49 -------
|
mi@0
|
50 Applying NNDSVD to some rather stupid data set:
|
mi@0
|
51
|
mi@0
|
52 >>> import numpy as np
|
mi@0
|
53 >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]])
|
mi@0
|
54 >>> nndsvd_mdl = NNDSVD(data, num_bases=2)
|
mi@0
|
55 >>> nndsvd_mdl.factorize()
|
mi@0
|
56
|
mi@0
|
57 The basis vectors are now stored in nndsvd_mdl.W, the coefficients in
|
mi@0
|
58 nndsvd_mdl.H. To initialize NMF with nndsvd_mdl.W, nndsvd_mdl.H
|
mi@0
|
59 simply copy W to nmf_mdl.W and H to nmf_mdl.H:
|
mi@0
|
60
|
mi@0
|
61 >>> data = np.array([[1.5], [1.2]])
|
mi@0
|
62 >>> W = np.array([[1.0, 0.0], [0.0, 1.0]])
|
mi@0
|
63 >>> nmf_mdl = NMF(data, num_bases=2)
|
mi@0
|
64 >>> nmf_mdl.W = nndsvd_mdl.W
|
mi@0
|
65 >>> nmf_mdl.H = nndsvd_mdl.H
|
mi@0
|
66 >>> nmf_mdl.factorize(niter=20)
|
mi@0
|
67
|
mi@0
|
68 The result is a set of (more optimal) coefficients nmf_mdl.H, nmf_mdl.W.
|
mi@0
|
69 """
|
mi@0
|
70 def init_w(self):
|
mi@0
|
71 self.W = np.zeros((self._data_dimension, self._num_bases))
|
mi@0
|
72
|
mi@0
|
73 def init_h(self):
|
mi@0
|
74 self.H = np.zeros((self._num_bases, self._num_samples))
|
mi@0
|
75
|
mi@0
|
76 def update_h(self):
|
mi@0
|
77 pass
|
mi@0
|
78
|
mi@0
|
79 def update_w(self):
|
mi@0
|
80 svd_mdl = SVD(self.data)
|
mi@0
|
81 svd_mdl.factorize()
|
mi@0
|
82
|
mi@0
|
83 U, S, V = svd_mdl.U, svd_mdl.S, svd_mdl.V
|
mi@0
|
84
|
mi@0
|
85 # The first left singular vector is nonnegative
|
mi@0
|
86 # (abs is only used as values could be all negative)
|
mi@0
|
87 self.W[:,0] = np.sqrt(S[0,0]) * np.abs(U[:,0])
|
mi@0
|
88
|
mi@0
|
89 #The first right singular vector is nonnegative
|
mi@0
|
90 self.H[0,:] = np.sqrt(S[0,0]) * np.abs(V[0,:].T)
|
mi@0
|
91
|
mi@0
|
92 for i in range(1,self._num_bases):
|
mi@0
|
93 # Form the rank one factor
|
mi@0
|
94 Tmp = np.dot(U[:,i:i+1]*S[i,i], V[i:i+1,:])
|
mi@0
|
95
|
mi@0
|
96 # zero out the negative elements
|
mi@0
|
97 Tmp = np.where(Tmp < 0, 0.0, Tmp)
|
mi@0
|
98
|
mi@0
|
99 # Apply 2nd SVD
|
mi@0
|
100 svd_mdl_2 = SVD(Tmp)
|
mi@0
|
101 svd_mdl_2.factorize()
|
mi@0
|
102 u, s, v = svd_mdl_2.U, svd_mdl_2.S, svd_mdl_2.V
|
mi@0
|
103
|
mi@0
|
104 # The first left singular vector is nonnegative
|
mi@0
|
105 self.W[:,i] = np.sqrt(s[0,0]) * np.abs(u[:,0])
|
mi@0
|
106
|
mi@0
|
107 #The first right singular vector is nonnegative
|
mi@0
|
108 self.H[i,:] = np.sqrt(s[0,0]) * np.abs(v[0,:].T)
|
mi@0
|
109
|
mi@0
|
110 def factorize(self, niter=1, show_progress=False,
|
mi@0
|
111 compute_w=True, compute_h=True, compute_err=True):
|
mi@0
|
112
|
mi@0
|
113 # enforce certain default values, otherwise it won't work
|
mi@0
|
114 NMF.factorize(self, niter=1, show_progress=show_progress,
|
mi@0
|
115 compute_w=True, compute_h=True, compute_err=compute_err)
|
mi@0
|
116
|
mi@0
|
117 if __name__ == "__main__":
|
mi@0
|
118 import doctest
|
mi@0
|
119 doctest.testmod()
|