segmentation: pymf/dist.py annotate

annotate pymf/dist.py @ 19:890cfe424f4a tip

added annotations

author	mitian
date	Fri, 11 Dec 2015 09:47:40 +0000
parents	26838b1f560f
children

rev	line source
mi@0	1 #!/usr/bin/python
mi@0	2 #
mi@0	3 # Copyright (C) Christian Thurau, 2010.
mi@0	4 # Licensed under the GNU General Public License (GPL).
mi@0	5 # http://www.gnu.org/licenses/gpl.txt
mi@0	6 """
mi@0	7 PyMF several distance functions
mi@0	8
mi@0	9 kl_divergence(): KL Divergence
mi@0	10 l1_distance(): L1 distance
mi@0	11 l2_distance(): L2 distance
mi@0	12 cosine_distance(): Cosine distance
mi@0	13 pdist(): Pairwise distance computation
mi@0	14 vq(): Vector quantization
mi@0	15
mi@0	16 """
mi@0	17
mi@0	18
mi@0	19 import numpy as np
mi@0	20 import scipy.sparse
mi@0	21
mi@0	22 __all__ = ["abs_cosine_distance", "kl_divergence", "l1_distance", "l2_distance",
mi@0	23 "weighted_abs_cosine_distance","cosine_distance","vq", "pdist"]
mi@0	24
mi@0	25 def kl_divergence(d, vec):
mi@0	26 b = vec*(1/d)
mi@0	27 b = np.where(b>0, np.log(b),0)
mi@0	28 b = vec * b
mi@0	29 b = np.sum(b - vec + d, axis=0).reshape((-1))
mi@0	30 return b
mi@0	31
mi@0	32 def l1_distance(d, vec):
mi@0	33 ret_val = np.sum(np.abs(d - vec), axis=0)
mi@0	34 ret_val = ret_val.reshape((-1))
mi@0	35 return ret_val
mi@0	36
mi@0	37 def sparse_l2_distance(d, vec):
mi@0	38 # compute the norm of d
mi@0	39 nd = (d.multiply(d)).sum(axis=0)
mi@0	40 nv = (vec.multiply(vec)).sum(axis=0)
mi@0	41 ret_val = nd + nv - 2.0(d.T vec).T
mi@0	42 return np.sqrt(ret_val)
mi@0	43
mi@0	44 def approx_l2_distance(d, vec):
mi@0	45 # Use random projections to approximate the conventional l2 distance
mi@0	46 k = np.round(np.log(d.shape[0]))
mi@0	47 #k = d.shape[0]
mi@0	48 R = np.random.randn(k, d.shape[0])
mi@0	49 R = R / np.sqrt((R**2).sum(axis=0))
mi@0	50 A = np.dot(R,d)
mi@0	51 B = np.dot(R, vec)
mi@0	52 ret_val = np.sum( (A - B)**2, axis=0)
mi@0	53 ret_val = np.sqrt(R.shape[1]/R.shape[0]) * np.sqrt(ret_val)
mi@0	54 ret_val = ret_val.reshape((-1))
mi@0	55 return ret_val
mi@0	56
mi@0	57 def l2_distance(d, vec):
mi@0	58 if scipy.sparse.issparse(d):
mi@0	59 ret_val = sparse_l2_distance(d, vec)
mi@0	60 else:
mi@0	61 ret_val = np.sqrt(((d[:,:] - vec)**2).sum(axis=0))
mi@0	62
mi@0	63 return ret_val.reshape((-1))
mi@0	64
mi@0	65 def l2_distance_new(d,vec):
mi@0	66 # compute the norm of d
mi@0	67 nd = (d**2).sum(axis=0)
mi@0	68 nv = (vec**2).sum(axis=0)
mi@0	69 ret_val = nd + nv - 2.0*np.dot(d.T,vec.reshape((-1,1))).T
mi@0	70
mi@0	71 return np.sqrt(ret_val)
mi@0	72
mi@0	73 def cosine_distance(d, vec):
mi@0	74 tmp = np.dot(np.transpose(d), vec)
mi@0	75 a = np.sqrt(np.sum(d**2, axis=0))
mi@0	76 b = np.sqrt(np.sum(vec**2))
mi@0	77 k = (ab).reshape(-1) + (10*-9)
mi@0	78
mi@0	79 # compute distance
mi@0	80 ret_val = 1.0 - tmp/k
mi@0	81
mi@0	82 return ret_val.reshape((-1))
mi@0	83
mi@0	84 def abs_cosine_distance(d, vec, weighted=False):
mi@0	85 if scipy.sparse.issparse(d):
mi@0	86 tmp = np.array((d.T * vec).todense(), dtype=np.float32).reshape(-1)
mi@0	87 a = np.sqrt(np.array(d.multiply(d).sum(axis=0), dtype=np.float32).reshape(-1))
mi@0	88 b = np.sqrt(np.array(vec.multiply(vec).sum(axis=0), dtype=np.float32).reshape(-1))
mi@0	89 else:
mi@0	90 tmp = np.dot(np.transpose(d), vec).reshape(-1)
mi@0	91 a = np.sqrt(np.sum(d**2, axis=0)).reshape(-1)
mi@0	92 b = np.sqrt(np.sum(vec**2)).reshape(-1)
mi@0	93
mi@0	94 k = (ab).reshape(-1) + 10*-9
mi@0	95
mi@0	96 # compute distance
mi@0	97 ret_val = 1.0 - np.abs(tmp/k)
mi@0	98
mi@0	99 if weighted:
mi@0	100 ret_val = ret_val * a
mi@0	101 return ret_val.reshape((-1))
mi@0	102
mi@0	103 def weighted_abs_cosine_distance(d, vec):
mi@0	104 ret_val = abs_cosine_distance(d, vec, weighted=True)
mi@0	105 return ret_val
mi@0	106
mi@0	107 def pdist(A, B, metric='l2' ):
mi@0	108 # compute pairwise distance between a data matrix A (d x n) and B (d x m).
mi@0	109 # Returns a distance matrix d (n x m).
mi@0	110 d = np.zeros((A.shape[1], B.shape[1]))
mi@0	111 if A.shape[1] <= B.shape[1]:
mi@0	112 for aidx in xrange(A.shape[1]):
mi@0	113 if metric == 'l2':
mi@0	114 d[aidx:aidx+1,:] = l2_distance(B[:,:], A[:,aidx:aidx+1]).reshape((1,-1))
mi@0	115 if metric == 'l1':
mi@0	116 d[aidx:aidx+1,:] = l1_distance(B[:,:], A[:,aidx:aidx+1]).reshape((1,-1))
mi@0	117 else:
mi@0	118 for bidx in xrange(B.shape[1]):
mi@0	119 if metric == 'l2':
mi@0	120 d[:, bidx:bidx+1] = l2_distance(A[:,:], B[:,bidx:bidx+1]).reshape((-1,1))
mi@0	121 if metric == 'l1':
mi@0	122 d[:, bidx:bidx+1] = l1_distance(A[:,:], B[:,bidx:bidx+1]).reshape((-1,1))
mi@0	123
mi@0	124 return d
mi@0	125
mi@0	126 def vq(A, B, metric='l2'):
mi@0	127 # assigns data samples in B to cluster centers A and
mi@0	128 # returns an index list [assume n column vectors, d x n]
mi@0	129 assigned = np.argmin(pdist(A,B, metric=metric), axis=0)
mi@0	130 return assigned

Mercurial > hg > segmentation

annotate pymf/dist.py @ 19:890cfe424f4a tip