mi@0
|
1 #!/usr/bin/python
|
mi@0
|
2 #
|
mi@0
|
3 # Copyright (C) Christian Thurau, 2010.
|
mi@0
|
4 # Licensed under the GNU General Public License (GPL).
|
mi@0
|
5 # http://www.gnu.org/licenses/gpl.txt
|
mi@0
|
6 """
|
mi@0
|
7 PyMF LAESA
|
mi@0
|
8 """
|
mi@0
|
9
|
mi@0
|
10
|
mi@0
|
11 import scipy.sparse
|
mi@0
|
12 import numpy as np
|
mi@0
|
13
|
mi@0
|
14 from dist import *
|
mi@0
|
15 from sivm import SIVM
|
mi@0
|
16
|
mi@0
|
17 __all__ = ["LAESA"]
|
mi@0
|
18
|
mi@0
|
19 class LAESA(SIVM):
|
mi@0
|
20 """
|
mi@0
|
21 LAESA(data, num_bases=4)
|
mi@0
|
22
|
mi@0
|
23
|
mi@0
|
24 Simplex Volume Maximization. Factorize a data matrix into two matrices s.t.
|
mi@0
|
25 F = | data - W*H | is minimal. H is restricted to convexity. W is iteratively
|
mi@0
|
26 found by maximizing the volume of the resulting simplex (see [1]).
|
mi@0
|
27
|
mi@0
|
28 Parameters
|
mi@0
|
29 ----------
|
mi@0
|
30 data : array_like, shape (_data_dimension, _num_samples)
|
mi@0
|
31 the input data
|
mi@0
|
32 num_bases: int, optional
|
mi@0
|
33 Number of bases to compute (column rank of W and row rank of H).
|
mi@0
|
34 4 (default)
|
mi@0
|
35
|
mi@0
|
36 Attributes
|
mi@0
|
37 ----------
|
mi@0
|
38 W : "data_dimension x num_bases" matrix of basis vectors
|
mi@0
|
39 H : "num bases x num_samples" matrix of coefficients
|
mi@0
|
40 ferr : frobenius norm (after calling .factorize())
|
mi@0
|
41
|
mi@0
|
42 Example
|
mi@0
|
43 -------
|
mi@0
|
44 Applying LAESA to some rather stupid data set:
|
mi@0
|
45
|
mi@0
|
46 >>> import numpy as np
|
mi@0
|
47 >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]])
|
mi@0
|
48 >>> laesa_mdl = LAESA(data, num_bases=2)
|
mi@0
|
49 >>> laesa_mdl.factorize()
|
mi@0
|
50
|
mi@0
|
51 The basis vectors are now stored in laesa_mdl.W, the coefficients in laesa_mdl.H.
|
mi@0
|
52 To compute coefficients for an existing set of basis vectors simply copy W
|
mi@0
|
53 to laesa_mdl.W, and set compute_w to False:
|
mi@0
|
54
|
mi@0
|
55 >>> data = np.array([[1.5, 1.3], [1.2, 0.3]])
|
mi@0
|
56 >>> W = np.array([[1.0, 0.0], [0.0, 1.0]])
|
mi@0
|
57 >>> laesa_mdl = LAESA(data, num_bases=2)
|
mi@0
|
58 >>> laesa_mdl.W = W
|
mi@0
|
59 >>> laesa_mdl.factorize(niter=1, compute_w=False)
|
mi@0
|
60
|
mi@0
|
61 The result is a set of coefficients laesa_mdl.H, s.t. data = W * laesa_mdl.H.
|
mi@0
|
62 """
|
mi@0
|
63 def update_w(self):
|
mi@0
|
64 # initialize some of the recursively updated distance measures
|
mi@0
|
65 self.init_sivm()
|
mi@0
|
66 distiter = self._distance(self.select[-1])
|
mi@0
|
67
|
mi@0
|
68 for l in range(self._num_bases-1):
|
mi@0
|
69 d = self._distance(self.select[-1])
|
mi@0
|
70
|
mi@0
|
71 # replace distances in distiter
|
mi@0
|
72 distiter = np.where(d<distiter, d, distiter)
|
mi@0
|
73
|
mi@0
|
74 # detect the next best data point
|
mi@0
|
75 self.select.append(np.argmax(distiter))
|
mi@0
|
76 self._logger.info('cur_nodes: ' + str(self.select))
|
mi@0
|
77
|
mi@0
|
78 # sort indices, otherwise h5py won't work
|
mi@0
|
79 self.W = self.data[:, np.sort(self.select)]
|
mi@0
|
80
|
mi@0
|
81 # but "unsort" it again to keep the correct order
|
mi@0
|
82 self.W = self.W[:, np.argsort(np.argsort(self.select))]
|
mi@0
|
83
|
mi@0
|
84
|
mi@0
|
85 if __name__ == "__main__":
|
mi@0
|
86 import doctest
|
mi@0
|
87 doctest.testmod()
|