mi@0
|
1 #!/usr/bin/python
|
mi@0
|
2 #
|
mi@0
|
3 # Copyright (C) Christian Thurau, 2010.
|
mi@0
|
4 # Licensed under the GNU General Public License (GPL).
|
mi@0
|
5 # http://www.gnu.org/licenses/gpl.txt
|
mi@0
|
6 """
|
mi@0
|
7 PyMF Binary Matrix Factorization [1]
|
mi@0
|
8
|
mi@0
|
9 BNMF(NMF) : Class for binary matrix factorization
|
mi@0
|
10
|
mi@0
|
11 [1]Z. Zhang, T. Li, C. H. Q. Ding, X. Zhang: Binary Matrix Factorization with
|
mi@0
|
12 Applications. ICDM 2007
|
mi@0
|
13 """
|
mi@0
|
14
|
mi@0
|
15
|
mi@0
|
16 import numpy as np
|
mi@0
|
17 from nmf import NMF
|
mi@0
|
18
|
mi@0
|
19 __all__ = ["BNMF"]
|
mi@0
|
20
|
mi@0
|
21 class BNMF(NMF):
|
mi@0
|
22 """
|
mi@0
|
23 BNMF(data, data, num_bases=4)
|
mi@0
|
24 Binary Matrix Factorization. Factorize a data matrix into two matrices s.t.
|
mi@0
|
25 F = | data - W*H | is minimal. H and W are restricted to binary values.
|
mi@0
|
26
|
mi@0
|
27 Parameters
|
mi@0
|
28 ----------
|
mi@0
|
29 data : array_like, shape (_data_dimension, _num_samples)
|
mi@0
|
30 the input data
|
mi@0
|
31 num_bases: int, optional
|
mi@0
|
32 Number of bases to compute (column rank of W and row rank of H).
|
mi@0
|
33 4 (default)
|
mi@0
|
34
|
mi@0
|
35 Attributes
|
mi@0
|
36 ----------
|
mi@0
|
37 W : "data_dimension x num_bases" matrix of basis vectors
|
mi@0
|
38 H : "num bases x num_samples" matrix of coefficients
|
mi@0
|
39 ferr : frobenius norm (after calling .factorize())
|
mi@0
|
40
|
mi@0
|
41 Example
|
mi@0
|
42 -------
|
mi@0
|
43 Applying BNMF to some rather stupid data set:
|
mi@0
|
44
|
mi@0
|
45 >>> import numpy as np
|
mi@0
|
46 >>> from bnmf import BNMF
|
mi@0
|
47 >>> data = np.array([[1.0, 0.0, 1.0], [0.0, 1.0, 1.0]])
|
mi@0
|
48
|
mi@0
|
49 Use 2 basis vectors -> W shape(data_dimension, 2).
|
mi@0
|
50
|
mi@0
|
51 >>> bnmf_mdl = BNMF(data, num_bases=2)
|
mi@0
|
52
|
mi@0
|
53 Set number of iterations to 5 and start computing the factorization.
|
mi@0
|
54
|
mi@0
|
55 >>> bnmf_mdl.factorize(niter=5)
|
mi@0
|
56
|
mi@0
|
57 The basis vectors are now stored in bnmf_mdl.W, the coefficients in bnmf_mdl.H.
|
mi@0
|
58 To compute coefficients for an existing set of basis vectors simply copy W
|
mi@0
|
59 to bnmf_mdl.W, and set compute_w to False:
|
mi@0
|
60
|
mi@0
|
61 >>> data = np.array([[0.0], [1.0]])
|
mi@0
|
62 >>> W = np.array([[1.0, 0.0], [0.0, 1.0]])
|
mi@0
|
63 >>> bnmf_mdl = BNMF(data, num_bases=2)
|
mi@0
|
64 >>> bnmf_mdl.W = W
|
mi@0
|
65 >>> bnmf_mdl.factorize(niter=10, compute_w=False)
|
mi@0
|
66
|
mi@0
|
67 The result is a set of coefficients bnmf_mdl.H, s.t. data = W * bnmf_mdl.H.
|
mi@0
|
68 """
|
mi@0
|
69
|
mi@0
|
70 # controls how fast lambda should increase:
|
mi@0
|
71 # this influence convergence to binary values during the update. A value
|
mi@0
|
72 # <1 will result in non-binary decompositions as the update rule effectively
|
mi@0
|
73 # is a conventional nmf update rule. Values >1 give more weight to making the
|
mi@0
|
74 # factorization binary with increasing iterations.
|
mi@0
|
75 # setting either W or H to 0 results make the resulting matrix non binary.
|
mi@0
|
76 _LAMB_INCREASE_W = 1.1
|
mi@0
|
77 _LAMB_INCREASE_H = 1.1
|
mi@0
|
78
|
mi@0
|
79 def update_h(self):
|
mi@0
|
80 H1 = np.dot(self.W.T, self.data[:,:]) + 3.0*self._lamb_H*(self.H**2)
|
mi@0
|
81 H2 = np.dot(np.dot(self.W.T,self.W), self.H) + 2*self._lamb_H*(self.H**3) + self._lamb_H*self.H + 10**-9
|
mi@0
|
82 self.H *= H1/H2
|
mi@0
|
83
|
mi@0
|
84 self._lamb_W = self._LAMB_INCREASE_W * self._lamb_W
|
mi@0
|
85 self._lamb_H = self._LAMB_INCREASE_H * self._lamb_H
|
mi@0
|
86
|
mi@0
|
87 def update_w(self):
|
mi@0
|
88 W1 = np.dot(self.data[:,:], self.H.T) + 3.0*self._lamb_W*(self.W**2)
|
mi@0
|
89 W2 = np.dot(self.W, np.dot(self.H, self.H.T)) + 2.0*self._lamb_W*(self.W**3) + self._lamb_W*self.W + 10**-9
|
mi@0
|
90 self.W *= W1/W2
|
mi@0
|
91
|
mi@0
|
92 def factorize(self, niter=10, compute_w=True, compute_h=True,
|
mi@0
|
93 show_progress=False, compute_err=True):
|
mi@0
|
94 """ Factorize s.t. WH = data
|
mi@0
|
95
|
mi@0
|
96 Parameters
|
mi@0
|
97 ----------
|
mi@0
|
98 niter : int
|
mi@0
|
99 number of iterations.
|
mi@0
|
100 show_progress : bool
|
mi@0
|
101 print some extra information to stdout.
|
mi@0
|
102 compute_h : bool
|
mi@0
|
103 iteratively update values for H.
|
mi@0
|
104 compute_w : bool
|
mi@0
|
105 iteratively update values for W.
|
mi@0
|
106 compute_err : bool
|
mi@0
|
107 compute Frobenius norm |data-WH| after each update and store
|
mi@0
|
108 it to .ferr[k].
|
mi@0
|
109
|
mi@0
|
110 Updated Values
|
mi@0
|
111 --------------
|
mi@0
|
112 .W : updated values for W.
|
mi@0
|
113 .H : updated values for H.
|
mi@0
|
114 .ferr : Frobenius norm |data-WH| for each iteration.
|
mi@0
|
115 """
|
mi@0
|
116
|
mi@0
|
117 # init some learning parameters
|
mi@0
|
118 self._lamb_W = 1.0/niter
|
mi@0
|
119 self._lamb_H = 1.0/niter
|
mi@0
|
120
|
mi@0
|
121 NMF.factorize(self, niter=niter, compute_w=compute_w,
|
mi@0
|
122 compute_h=compute_h, show_progress=show_progress,
|
mi@0
|
123 compute_err=compute_err)
|
mi@0
|
124
|
mi@0
|
125 if __name__ == "__main__":
|
mi@0
|
126 import doctest
|
mi@0
|
127 doctest.testmod()
|