segmentation: pymf/chnmf.py annotate

annotate pymf/chnmf.py @ 19:890cfe424f4a tip

added annotations

author	mitian
date	Fri, 11 Dec 2015 09:47:40 +0000
parents	26838b1f560f
children

rev	line source
mi@0	1 #!/usr/bin/python
mi@0	2 #
mi@0	3 # Copyright (C) Christian Thurau, 2010.
mi@0	4 # Licensed under the GNU General Public License (GPL).
mi@0	5 # http://www.gnu.org/licenses/gpl.txt
mi@0	6 """
mi@0	7 PyMF Convex Hull Non-negative Matrix Factorization [1]
mi@0	8
mi@0	9 CHNMF(NMF) : Class for Convex-hull NMF
mi@0	10 quickhull : Function for finding the convex hull in 2D
mi@0	11
mi@0	12 [1] C. Thurau, K. Kersting, and C. Bauckhage. Convex Non-Negative Matrix
mi@0	13 Factorization in the Wild. ICDM 2009.
mi@0	14 """
mi@0	15
mi@0	16
mi@0	17 import numpy as np
mi@0	18
mi@0	19 from itertools import combinations
mi@0	20 from dist import vq
mi@0	21 from pca import PCA
mi@0	22 from aa import AA
mi@0	23
mi@0	24 __all__ = ["CHNMF"]
mi@0	25
mi@0	26
mi@0	27 def quickhull(sample):
mi@0	28 """ Find data points on the convex hull of a supplied data set
mi@0	29
mi@0	30 Args:
mi@0	31 sample: data points as column vectors n x d
mi@0	32 n - number samples
mi@0	33 d - data dimension (should be two)
mi@0	34
mi@0	35 Returns:
mi@0	36 a k x d matrix containint the convex hull data points
mi@0	37 """
mi@0	38
mi@0	39 link = lambda a, b: np.concatenate((a, b[1:]))
mi@0	40 edge = lambda a, b: np.concatenate(([a], [b]))
mi@0	41
mi@0	42 def dome(sample, base):
mi@0	43 h, t = base
mi@0	44 dists = np.dot(sample - h, np.dot(((0, -1), (1, 0)), (t - h)))
mi@0	45 outer = np.repeat(sample, dists > 0, axis=0)
mi@0	46
mi@0	47 if len(outer):
mi@0	48 pivot = sample[np.argmax(dists)]
mi@0	49 return link(dome(outer, edge(h, pivot)),
mi@0	50 dome(outer, edge(pivot, t)))
mi@0	51 else:
mi@0	52 return base
mi@0	53
mi@0	54 if len(sample) > 2:
mi@0	55 axis = sample[:, 0]
mi@0	56 base = np.take(sample, [np.argmin(axis), np.argmax(axis)], axis=0)
mi@0	57 return link(dome(sample, base),
mi@0	58 dome(sample, base[::-1]))
mi@0	59 else:
mi@0	60 return sample
mi@0	61
mi@0	62 class CHNMF(AA):
mi@0	63 """
mi@0	64 CHNMF(data, num_bases=4)
mi@0	65
mi@0	66 Convex Hull Non-negative Matrix Factorization. Factorize a data matrix into
mi@0	67 two matrices s.t. F = \| data - W*H \| is minimal. H is restricted to convexity
mi@0	68 (H >=0, sum(H, axis=1) = [1 .. 1]) and W resides on actual data points.
mi@0	69 Factorization is solved via an alternating least squares optimization using
mi@0	70 the quadratic programming solver from cvxopt. The results are usually
mi@0	71 equivalent to Archetypal Analysis (pymf.AA) but CHNMF also works for very
mi@0	72 large datasets.
mi@0	73
mi@0	74 Parameters
mi@0	75 ----------
mi@0	76 data : array_like, shape (_data_dimension, _num_samples)
mi@0	77 the input data
mi@0	78 num_bases: int, optional
mi@0	79 Number of bases to compute (column rank of W and row rank of H).
mi@0	80 4 (default)
mi@0	81 base_sel: int,
mi@0	82 Number of pairwise basis vector projections. Set to a value< rank(data).
mi@0	83 Computation time scale exponentially with this value, usually rather low
mi@0	84 values are sufficient (3-10).
mi@0	85
mi@0	86 Attributes
mi@0	87 ----------
mi@0	88 W : "data_dimension x num_bases" matrix of basis vectors
mi@0	89 H : "num bases x num_samples" matrix of coefficients
mi@0	90 ferr : frobenius norm (after calling .factorize())
mi@0	91
mi@0	92 Example
mi@0	93 -------
mi@0	94 Applying CHNMF to some rather stupid data set:
mi@0	95
mi@0	96 >>> import numpy as np
mi@0	97 >>> from chnmf import CHNMF
mi@0	98 >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]])
mi@0	99
mi@0	100 Use 2 basis vectors -> W shape(data_dimension, 2).
mi@0	101
mi@0	102 >>> chnmf_mdl = CHNMF(data, num_bases=2)
mi@0	103
mi@0	104 And start computing the factorization.
mi@0	105
mi@0	106 >>> chnmf_mdl.factorize()
mi@0	107
mi@0	108 The basis vectors are now stored in chnmf_mdl.W, the coefficients in
mi@0	109 chnmf_mdl.H. To compute coefficients for an existing set of basis vectors
mi@0	110 simply copy W to chnmf_mdl.W, and set compute_w to False:
mi@0	111
mi@0	112 >>> data = np.array([[1.5, 2.0], [1.2, 1.8]])
mi@0	113 >>> W = np.array([[1.0, 0.0], [0.0, 1.0]])
mi@0	114 >>> chnmf_mdl = CHNMF(data, num_bases=2)
mi@0	115 >>> chnmf_mdl.W = W
mi@0	116 >>> chnmf_mdl.factorize(compute_w=False)
mi@0	117
mi@0	118 The result is a set of coefficients chnmf_mdl.H, s.t. data = W * chnmf_mdl.H.
mi@0	119 """
mi@0	120
mi@0	121 def __init__(self, data, num_bases=4, base_sel=3):
mi@0	122
mi@0	123 # call inherited method
mi@0	124 AA.__init__(self, data, num_bases=num_bases)
mi@0	125
mi@0	126 # base sel should never be larger than the actual data dimension
mi@0	127 self._base_sel = base_sel
mi@0	128 if base_sel > self.data.shape[0]:
mi@0	129 self._base_sel = self.data.shape[0]
mi@0	130
mi@0	131 def init_h(self):
mi@0	132 self.H = np.zeros((self._num_bases, self._num_samples))
mi@0	133
mi@0	134 def init_w(self):
mi@0	135 self.W = np.zeros((self._data_dimension, self._num_bases))
mi@0	136
mi@0	137 def _map_w_to_data(self):
mi@0	138 """ Return data points that are most similar to basis vectors W
mi@0	139 """
mi@0	140
mi@0	141 # assign W to the next best data sample
mi@0	142 self._Wmapped_index = vq(self.data, self.W)
mi@0	143 self.Wmapped = np.zeros(self.W.shape)
mi@0	144
mi@0	145 # do not directly assign, i.e. Wdist = self.data[:,sel]
mi@0	146 # as self might be unsorted (in non ascending order)
mi@0	147 # -> sorting sel would screw the matching to W if
mi@0	148 # self.data is stored as a hdf5 table (see h5py)
mi@0	149 for i, s in enumerate(self._Wmapped_index):
mi@0	150 self.Wmapped[:,i] = self.data[:,s]
mi@0	151
mi@0	152 def update_w(self):
mi@0	153 """ compute new W """
mi@0	154 def select_hull_points(data, n=3):
mi@0	155 """ select data points for pairwise projections of the first n
mi@0	156 dimensions """
mi@0	157
mi@0	158 # iterate over all projections and select data points
mi@0	159 idx = np.array([])
mi@0	160
mi@0	161 # iterate over some pairwise combinations of dimensions
mi@0	162 for i in combinations(range(n), 2):
mi@0	163 # sample convex hull points in 2D projection
mi@0	164 convex_hull_d = quickhull(data[i, :].T)
mi@0	165
mi@0	166 # get indices for convex hull data points
mi@0	167 idx = np.append(idx, vq(data[i, :], convex_hull_d.T))
mi@0	168 idx = np.unique(idx)
mi@0	169
mi@0	170 return np.int32(idx)
mi@0	171
mi@0	172 # determine convex hull data points using either PCA or random
mi@0	173 # projections
mi@0	174 method = 'randomprojection'
mi@0	175 if method == 'pca':
mi@0	176 pcamodel = PCA(self.data)
mi@0	177 pcamodel.factorize(show_progress=False)
mi@0	178 proj = pcamodel.H
mi@0	179 else:
mi@0	180 R = np.random.randn(self._base_sel, self._data_dimension)
mi@0	181 proj = np.dot(R, self.data)
mi@0	182
mi@0	183 self._hull_idx = select_hull_points(proj, n=self._base_sel)
mi@0	184 aa_mdl = AA(self.data[:, self._hull_idx], num_bases=self._num_bases)
mi@0	185
mi@0	186 # determine W
mi@0	187 aa_mdl.factorize(niter=50, compute_h=True, compute_w=True,
mi@0	188 compute_err=True, show_progress=False)
mi@0	189
mi@0	190 self.W = aa_mdl.W
mi@0	191 self._map_w_to_data()
mi@0	192
mi@0	193 def factorize(self, show_progress=False, compute_w=True, compute_h=True,
mi@0	194 compute_err=True, niter=1):
mi@0	195 """ Factorize s.t. WH = data
mi@0	196
mi@0	197 Parameters
mi@0	198 ----------
mi@0	199 show_progress : bool
mi@0	200 print some extra information to stdout.
mi@0	201 compute_h : bool
mi@0	202 iteratively update values for H.
mi@0	203 compute_w : bool
mi@0	204 iteratively update values for W.
mi@0	205 compute_err : bool
mi@0	206 compute Frobenius norm \|data-WH\| after each update and store
mi@0	207 it to .ferr[k].
mi@0	208
mi@0	209 Updated Values
mi@0	210 --------------
mi@0	211 .W : updated values for W.
mi@0	212 .H : updated values for H.
mi@0	213 .ferr : Frobenius norm \|data-WH\|.
mi@0	214 """
mi@0	215
mi@0	216 AA.factorize(self, niter=1, show_progress=show_progress,
mi@0	217 compute_w=compute_w, compute_h=compute_h,
mi@0	218 compute_err=compute_err)
mi@0	219
mi@0	220
mi@0	221 if __name__ == "__main__":
mi@0	222 import doctest
mi@0	223 doctest.testmod()

Mercurial > hg > segmentation

annotate pymf/chnmf.py @ 19:890cfe424f4a tip