Mercurial > hg > segmentation
comparison pymf/greedy.py @ 0:26838b1f560f
initial commit of a segmenter project
author | mi tian |
---|---|
date | Thu, 02 Apr 2015 18:09:27 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:26838b1f560f |
---|---|
1 #!/usr/bin/python2.6 | |
2 # | |
3 # Copyright (C) Christian Thurau, 2010. | |
4 # Licensed under the GNU General Public License (GPL). | |
5 # http://www.gnu.org/licenses/gpl.txt | |
6 #$Id$ | |
7 """ | |
8 PyMF GREEDY[1] | |
9 | |
10 GREEDY: class for a deterministic SVD based greedy matrix reconstruction [1]. | |
11 | |
12 | |
13 [1] Ali Civril, Malik Magdon-Ismail. Deterministic Sparse Column Based Matrix | |
14 Reconstruction via Greedy Approximation of SVD. ISAAC'2008. | |
15 """ | |
16 | |
17 | |
18 import time | |
19 import scipy.sparse | |
20 import numpy as np | |
21 from svd import * | |
22 from nmf import NMF | |
23 | |
24 __all__ = ["GREEDY"] | |
25 | |
26 class GREEDY(NMF): | |
27 """ | |
28 GREEDYVOL(data, num_bases=4, niter=100, show_progress=True, compW=True) | |
29 | |
30 | |
31 Deterministic Sparse Column Based Matrix Reconstruction via Greedy | |
32 Approximation of SVD. Factorize a data matrix into two matrices s.t. | |
33 F = | data - W*H | is minimal. W is iteratively selected as columns | |
34 of data. | |
35 | |
36 Parameters | |
37 ---------- | |
38 data : array_like, shape (_data_dimension, _num_samples) | |
39 the input data | |
40 num_bases: int, optional | |
41 Number of bases to compute (column rank of W and row rank of H). | |
42 4 (default) | |
43 k : number of singular vectors for the SVD step of the algorithm | |
44 num_bases (default) | |
45 | |
46 Attributes | |
47 ---------- | |
48 W : "data_dimension x num_bases" matrix of basis vectors | |
49 H : "num bases x num_samples" matrix of coefficients | |
50 ferr : frobenius norm (after calling .factorize()) | |
51 | |
52 Example | |
53 ------- | |
54 Applying GREEDY to some rather stupid data set: | |
55 | |
56 >>> import numpy as np | |
57 >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]]) | |
58 >>> greedy_mdl = GREEDY(data, num_bases=2, niter=10) | |
59 >>> greedy_mdl.factorize() | |
60 | |
61 The basis vectors are now stored in greedy_mdl.W, the coefficients in | |
62 greedy_mdl.H. To compute coefficients for an existing set of basis | |
63 vectors simply copy W to greedy_mdl.W, and set compW to False: | |
64 | |
65 >>> data = np.array([[1.5, 1.3], [1.2, 0.3]]) | |
66 >>> W = np.array([[1.0, 0.0], [0.0, 1.0]]) | |
67 >>> greedy_mdl = GREEDY(data, num_bases=2) | |
68 >>> greedy_mdl.W = W | |
69 >>> greedy_mdl.factorize(compute_w=False) | |
70 | |
71 The result is a set of coefficients greedy_mdl.H, s.t. data = W * greedy_mdl.H. | |
72 """ | |
73 | |
74 | |
75 def __init__(self, data, k=-1, num_bases=4): | |
76 # call inherited method | |
77 NMF.__init__(self, data, num_bases=num_bases) | |
78 self._k = k | |
79 if self._k == -1: | |
80 self._k = num_bases | |
81 | |
82 def update_h(self): | |
83 if scipy.sparse.issparse(self.data): | |
84 self.H = pinv(self.W) * self.data | |
85 else: | |
86 self.H = np.dot(pinv(self.W), self.data) | |
87 | |
88 def update_w(self): | |
89 def normalize_matrix(K): | |
90 """ Normalize a matrix K s.t. columns have Euclidean-norm |1| | |
91 """ | |
92 if scipy.sparse.issparse(K): | |
93 L = np.sqrt(np.array(K.multiply(K).sum(axis=0)))[0,:] | |
94 s = np.where(L > 0.0)[0] | |
95 L[s] = L[s]**-1 | |
96 KN = scipy.sparse.spdiags(L,0,len(L),len(L),format='csc') | |
97 K = K*KN | |
98 else: | |
99 L = np.sqrt((K**2).sum(axis=0)) | |
100 s = np.where(L > 0.0)[0] | |
101 L[s] = L[s]**-1 | |
102 K = K*L | |
103 return K | |
104 | |
105 self._t = np.zeros((self._num_bases)) | |
106 t0 = time.time() | |
107 self.select = [] | |
108 | |
109 # normalize data | |
110 A = self.data.copy() | |
111 | |
112 svd_mdl = SVD(A, k=self._k) | |
113 svd_mdl.factorize() | |
114 | |
115 if scipy.sparse.issparse(self.data): | |
116 B = svd_mdl.U * svd_mdl.S | |
117 B = B.tocsc() | |
118 else: | |
119 B = np.dot(svd_mdl.U, svd_mdl.S) | |
120 B = B[:, :self._num_bases] | |
121 | |
122 for i in range(self._num_bases): | |
123 A = normalize_matrix(A) | |
124 | |
125 if scipy.sparse.issparse(self.data): | |
126 T = B.transpose() * A | |
127 T = np.array(T.multiply(T).sum(axis=0))[0,:] | |
128 | |
129 # next selected column index | |
130 T[self.select] = 0.0 | |
131 idx = np.argmax(T) | |
132 Aidx = A[:, idx].copy() | |
133 self.select.append(idx) | |
134 | |
135 # update B | |
136 BC = Aidx.transpose() * B | |
137 B = B - (Aidx*BC) | |
138 | |
139 # update A | |
140 AC = Aidx.transpose() * A | |
141 A = A - (Aidx*AC) | |
142 | |
143 else: | |
144 T = np.dot(B.transpose(), A) | |
145 T = np.sum(T**2.0, axis=0) | |
146 | |
147 # next selected column index | |
148 T[self.select] = 0.0 | |
149 idx = np.argmax(T) | |
150 self.select.append(idx) | |
151 | |
152 # update B | |
153 BC = np.dot(B.transpose(),A[:,idx]) | |
154 B -= np.dot(A[:,idx].reshape(-1,1), BC.reshape(1,-1)) | |
155 | |
156 # and A | |
157 AC = np.dot(A.transpose(),A[:,idx]) | |
158 A -= np.dot(A[:,idx].reshape(-1,1), AC.reshape(1,-1)) | |
159 | |
160 | |
161 # detect the next best data point | |
162 self._logger.info('searching for next best column ...') | |
163 self._logger.info('cur_columns: ' + str(self.select)) | |
164 self._t[i] = time.time() - t0 | |
165 | |
166 # sort indices, otherwise h5py won't work | |
167 self.W = self.data[:, np.sort(self.select)] | |
168 | |
169 # "unsort" it again to keep the correct order | |
170 self.W = self.W[:, np.argsort(np.argsort(self.select))] | |
171 | |
172 if __name__ == "__main__": | |
173 import doctest | |
174 doctest.testmod() |