segmentation: pymf/greedy.py annotate

annotate pymf/greedy.py @ 19:890cfe424f4a tip

added annotations

author	mitian
date	Fri, 11 Dec 2015 09:47:40 +0000
parents	26838b1f560f
children

rev	line source
mi@0	1 #!/usr/bin/python2.6
mi@0	2 #
mi@0	3 # Copyright (C) Christian Thurau, 2010.
mi@0	4 # Licensed under the GNU General Public License (GPL).
mi@0	5 # http://www.gnu.org/licenses/gpl.txt
mi@0	6 #$Id$
mi@0	7 """
mi@0	8 PyMF GREEDY[1]
mi@0	9
mi@0	10 GREEDY: class for a deterministic SVD based greedy matrix reconstruction [1].
mi@0	11
mi@0	12
mi@0	13 [1] Ali Civril, Malik Magdon-Ismail. Deterministic Sparse Column Based Matrix
mi@0	14 Reconstruction via Greedy Approximation of SVD. ISAAC'2008.
mi@0	15 """
mi@0	16
mi@0	17
mi@0	18 import time
mi@0	19 import scipy.sparse
mi@0	20 import numpy as np
mi@0	21 from svd import *
mi@0	22 from nmf import NMF
mi@0	23
mi@0	24 __all__ = ["GREEDY"]
mi@0	25
mi@0	26 class GREEDY(NMF):
mi@0	27 """
mi@0	28 GREEDYVOL(data, num_bases=4, niter=100, show_progress=True, compW=True)
mi@0	29
mi@0	30
mi@0	31 Deterministic Sparse Column Based Matrix Reconstruction via Greedy
mi@0	32 Approximation of SVD. Factorize a data matrix into two matrices s.t.
mi@0	33 F = \| data - W*H \| is minimal. W is iteratively selected as columns
mi@0	34 of data.
mi@0	35
mi@0	36 Parameters
mi@0	37 ----------
mi@0	38 data : array_like, shape (_data_dimension, _num_samples)
mi@0	39 the input data
mi@0	40 num_bases: int, optional
mi@0	41 Number of bases to compute (column rank of W and row rank of H).
mi@0	42 4 (default)
mi@0	43 k : number of singular vectors for the SVD step of the algorithm
mi@0	44 num_bases (default)
mi@0	45
mi@0	46 Attributes
mi@0	47 ----------
mi@0	48 W : "data_dimension x num_bases" matrix of basis vectors
mi@0	49 H : "num bases x num_samples" matrix of coefficients
mi@0	50 ferr : frobenius norm (after calling .factorize())
mi@0	51
mi@0	52 Example
mi@0	53 -------
mi@0	54 Applying GREEDY to some rather stupid data set:
mi@0	55
mi@0	56 >>> import numpy as np
mi@0	57 >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]])
mi@0	58 >>> greedy_mdl = GREEDY(data, num_bases=2, niter=10)
mi@0	59 >>> greedy_mdl.factorize()
mi@0	60
mi@0	61 The basis vectors are now stored in greedy_mdl.W, the coefficients in
mi@0	62 greedy_mdl.H. To compute coefficients for an existing set of basis
mi@0	63 vectors simply copy W to greedy_mdl.W, and set compW to False:
mi@0	64
mi@0	65 >>> data = np.array([[1.5, 1.3], [1.2, 0.3]])
mi@0	66 >>> W = np.array([[1.0, 0.0], [0.0, 1.0]])
mi@0	67 >>> greedy_mdl = GREEDY(data, num_bases=2)
mi@0	68 >>> greedy_mdl.W = W
mi@0	69 >>> greedy_mdl.factorize(compute_w=False)
mi@0	70
mi@0	71 The result is a set of coefficients greedy_mdl.H, s.t. data = W * greedy_mdl.H.
mi@0	72 """
mi@0	73
mi@0	74
mi@0	75 def __init__(self, data, k=-1, num_bases=4):
mi@0	76 # call inherited method
mi@0	77 NMF.__init__(self, data, num_bases=num_bases)
mi@0	78 self._k = k
mi@0	79 if self._k == -1:
mi@0	80 self._k = num_bases
mi@0	81
mi@0	82 def update_h(self):
mi@0	83 if scipy.sparse.issparse(self.data):
mi@0	84 self.H = pinv(self.W) * self.data
mi@0	85 else:
mi@0	86 self.H = np.dot(pinv(self.W), self.data)
mi@0	87
mi@0	88 def update_w(self):
mi@0	89 def normalize_matrix(K):
mi@0	90 """ Normalize a matrix K s.t. columns have Euclidean-norm \|1\|
mi@0	91 """
mi@0	92 if scipy.sparse.issparse(K):
mi@0	93 L = np.sqrt(np.array(K.multiply(K).sum(axis=0)))[0,:]
mi@0	94 s = np.where(L > 0.0)[0]
mi@0	95 L[s] = L[s]**-1
mi@0	96 KN = scipy.sparse.spdiags(L,0,len(L),len(L),format='csc')
mi@0	97 K = K*KN
mi@0	98 else:
mi@0	99 L = np.sqrt((K**2).sum(axis=0))
mi@0	100 s = np.where(L > 0.0)[0]
mi@0	101 L[s] = L[s]**-1
mi@0	102 K = K*L
mi@0	103 return K
mi@0	104
mi@0	105 self._t = np.zeros((self._num_bases))
mi@0	106 t0 = time.time()
mi@0	107 self.select = []
mi@0	108
mi@0	109 # normalize data
mi@0	110 A = self.data.copy()
mi@0	111
mi@0	112 svd_mdl = SVD(A, k=self._k)
mi@0	113 svd_mdl.factorize()
mi@0	114
mi@0	115 if scipy.sparse.issparse(self.data):
mi@0	116 B = svd_mdl.U * svd_mdl.S
mi@0	117 B = B.tocsc()
mi@0	118 else:
mi@0	119 B = np.dot(svd_mdl.U, svd_mdl.S)
mi@0	120 B = B[:, :self._num_bases]
mi@0	121
mi@0	122 for i in range(self._num_bases):
mi@0	123 A = normalize_matrix(A)
mi@0	124
mi@0	125 if scipy.sparse.issparse(self.data):
mi@0	126 T = B.transpose() * A
mi@0	127 T = np.array(T.multiply(T).sum(axis=0))[0,:]
mi@0	128
mi@0	129 # next selected column index
mi@0	130 T[self.select] = 0.0
mi@0	131 idx = np.argmax(T)
mi@0	132 Aidx = A[:, idx].copy()
mi@0	133 self.select.append(idx)
mi@0	134
mi@0	135 # update B
mi@0	136 BC = Aidx.transpose() * B
mi@0	137 B = B - (Aidx*BC)
mi@0	138
mi@0	139 # update A
mi@0	140 AC = Aidx.transpose() * A
mi@0	141 A = A - (Aidx*AC)
mi@0	142
mi@0	143 else:
mi@0	144 T = np.dot(B.transpose(), A)
mi@0	145 T = np.sum(T**2.0, axis=0)
mi@0	146
mi@0	147 # next selected column index
mi@0	148 T[self.select] = 0.0
mi@0	149 idx = np.argmax(T)
mi@0	150 self.select.append(idx)
mi@0	151
mi@0	152 # update B
mi@0	153 BC = np.dot(B.transpose(),A[:,idx])
mi@0	154 B -= np.dot(A[:,idx].reshape(-1,1), BC.reshape(1,-1))
mi@0	155
mi@0	156 # and A
mi@0	157 AC = np.dot(A.transpose(),A[:,idx])
mi@0	158 A -= np.dot(A[:,idx].reshape(-1,1), AC.reshape(1,-1))
mi@0	159
mi@0	160
mi@0	161 # detect the next best data point
mi@0	162 self._logger.info('searching for next best column ...')
mi@0	163 self._logger.info('cur_columns: ' + str(self.select))
mi@0	164 self._t[i] = time.time() - t0
mi@0	165
mi@0	166 # sort indices, otherwise h5py won't work
mi@0	167 self.W = self.data[:, np.sort(self.select)]
mi@0	168
mi@0	169 # "unsort" it again to keep the correct order
mi@0	170 self.W = self.W[:, np.argsort(np.argsort(self.select))]
mi@0	171
mi@0	172 if __name__ == "__main__":
mi@0	173 import doctest
mi@0	174 doctest.testmod()

Mercurial > hg > segmentation

annotate pymf/greedy.py @ 19:890cfe424f4a tip