mi@0: #!/usr/bin/env python
mi@0: # encoding: utf-8
mi@0: """
mi@0: gmmdist.py
mi@0: 
mi@0: Created by George Fazekas on 2014-07-03.
mi@0: Copyright (c) 2014 . All rights reserved.
mi@0: """
mi@0: 
mi@0: import sys,os
mi@0: 
mi@0: from numpy import sum,isnan,isinf,vstack
mi@0: from numpy.random import rand
mi@0: import numpy as np
mi@0: from numpy import log, power, pi, exp, transpose, zeros, log, ones, dot
mi@0: from sklearn.mixture import GMM
mi@0: from sklearn.metrics.pairwise import pairwise_distances
mi@0: from scipy.linalg import *
mi@0: from pylab import plt
mi@0: from scipy.stats import norm
mi@0: #from gmmplot import plot_gmm
mi@0: from scipy.io import savemat,loadmat
mi@0: from numpy import trace
mi@0: from numpy.linalg import det, inv, eigvals
mi@0: 
mi@0: FLOAT_MAX = np.finfo(np.float64).max
mi@0: 
mi@0: def is_pos_def(x):
mi@0: 	'''Check if matrix is positive definite.'''
mi@0: 	return np.all(eigvals(x) > 0)
mi@0: 	
mi@0: def skl_models(g1,g2):
mi@0: 	'''Wrapper function with error checking and adaptation to the GmmDistance/GaussianComponent classes.
mi@0: 	This function compares two Gaussian mixture models with and equal number of components and full covariance matrices.
mi@0: 	Covariance matrices must be positive definite.
mi@0: 	'''
mi@0: 	m1,m2 = g1.means,g2.means
mi@0: 	v1,v2 = g1.covars.swapaxes(0,2), g2.covars.swapaxes(0,2)
mi@0: 	w1,w2 = g1.weights[:,np.newaxis],g2.weights[:,np.newaxis]
mi@0: 	assert m1.shape[1] > 1, "The minimum number of features is 2."
mi@0: 	assert w1.shape == w2.shape, "Models must have the same number of components"
mi@0: 	# print 'v1, v2', v1.shape, v2.shape
mi@0: 	# assert (is_pos_def(v1) and is_pos_def(v2)) == True, "Covariance matrices must be positive definite."
mi@0: 	d = skl_gmm(m1,m2,v1,v2,w1,w2)
mi@0: 	if isnan(d): #or isinf(d):
mi@0: 		return FLOAT_MAX
mi@0: 	return d
mi@0: 
mi@0: # def kldiv_full(m0,m1,s0,s1):
mi@0: # 	'''Naive (unoptimised) implementation of the KL divergence between two single Gaussians with fully defined covariances (s).'''
mi@0: # 	return 0.5*(np.trace(s0/s1)+np.trace(s1/s0)+ np.dot( np.dot((m0-m1).T, np.linalg.inv(s0+s1)), (m0-m1)))-m0.shape[0]
mi@0: # 	
mi@0: # def skl_full(p0,p1):
mi@0: # 	'''Symmetrised KL divergence computed from 2 KL divergences using mean( KL(p||q), KL(q||p) )'''
mi@0: # 	d = (kldiv_full(p0.means,p0.covars,p1.means,p1.covars) + kldiv_full(p1.means,p1.covars,p0.means,p0.covars)) * 0.5
mi@0: # 	d = sum(d)
mi@0: # 	if isnan(d) : 
mi@0: # 		return np.finfo(np.float64).max
mi@0: # 	return d
mi@0: 
mi@0: def kldiv_full(m1,m2,s1,s2):
mi@0: 	m1,m2 = m1[:,None],m2[:,None]
mi@0: 	logdet1, logdet2 = log(det(s1)), log(det(s2))
mi@0: 	inv1, inv2 = inv(s1), inv(s2)
mi@0: 	m = m1-m2
mi@0: 	d = m.shape[0] # number of dimensions
mi@0: 	return 0.5 * ((logdet1-logdet2) + trace(dot(inv1,s2)) + dot(dot(m.T, inv1), m) - d)[0][0]
mi@0: 
mi@0: def _skl_full(m1,m2,s1,s2):
mi@0: 	m1,m2 = m1[:,None],m2[:,None]
mi@0: 	logdet1, logdet2 = log(det(s1)), log(det(s2))
mi@0: 	inv1, inv2 = inv(s1), inv(s2)
mi@0: 	m12 = m1-m2
mi@0: 	m21 = m2-m1
mi@0: 	d = m12.shape[0] # number of dimensions
mi@0: 	kl12 = 0.5 * ((logdet1-logdet2) + trace(dot(inv1,s2)) + dot(dot(m12.T, inv1), m12) - d)[0][0]
mi@0: 	kl21 = 0.5 * ((logdet2-logdet1) + trace(dot(inv2,s1)) + dot(dot(m21.T, inv2), m21) - d)[0][0]
mi@0: 	return 0.5 * (kl12+kl21)
mi@0: 
mi@0: def skl_full(p1,p2):
mi@0: 	m1,m2 = p1.means,p2.means
mi@0: 	s1,s2 = p1.covars,p2.covars
mi@0: 	return _skl_full(m1,m2,s1,s2)	
mi@0: 
mi@0: 	
mi@0: 		
mi@0: def skl_gmm(m1,m2,v1,v2,w1,w2):
mi@0: 	'''Take the mean of	 KL(g1mm||gmm2) & KL(gmm2||gmm1) to symmetrise the divergence.'''
mi@0: 	return (abs(kldiv_gmm(m1,m2,v1,v2,w1,w2)) + abs(kldiv_gmm(m2,m1,v2,v1,w2,w1))) * 0.5
mi@0: 		
mi@0: def kldiv_gmm(m1,m2,v1,v2,w1,w2):
mi@0: 	'''Low level implementation of variational approximation of KL divergence between Gaussian Mixture Models.
mi@0: 	See first: J. R. Hershey and P. A. Olsen. "Approximating the Kullback-Leibler Divergence Between Gaussian Mixture Models."
mi@0: 	IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Volume 4, pp. 317320, April, 2007.
mi@0: 	Further theory and refinement: J. L. Durrieu, J. P. Thiran, F. Kelly. "Lower and Upper Bounds for Approximation 
mi@0: 	of the Kullback-Leibler Divergence Between Gaussian Mixture Models", ICASSP, 2012.
mi@0: 	This implementation is by George Fazekas, Centre for Digital Music, QMUL, London, UK.
mi@0: 	
mi@0: 	Inputs: 
mi@0: 	m(x) : mean vector of gmm(x)
mi@0: 	v(x) : covariance matrices of gmm(x)
mi@0: 	w(x) : weight vector of gmm(x)
mi@0: 	
mi@0: 	Ouptut:
mi@0: 	kl_full_12 : Kullback-Leibler divergence of the PDFs approximated by two Gaussian Mixture Models.
mi@0: 	
mi@0: 	This implementation is using a variational approximation rather than the conventional (and expensive) 
mi@0: 	Monte Carlo simulation based approach. See cited paper for details. The divergence is not symmetrised 
mi@0: 	and may be negative (unlike the closed form) or inf. In case the output is complex, the coavriance matrices
mi@0: 	do not fulfill required criteria, i.e. somehow badly formed, sigular, not positive definite etc...	  
mi@0: 	'''
mi@0: 	# TODO: consider dieling better with inf/-inf outcomes in the final distance computation 
mi@0: 	# - note: the max of the rows of kl12 are not always zero like that of kl11
mi@0: 	# - eliminate the need for swapaxes of the covariances
mi@0: 	
mi@0: 	n = m1.shape[0] # number of components
mi@0: 	d = m1.shape[1] # number of dimensions
mi@0: 
mi@0: 	ixm = ones((n,1),dtype=int).T # selector of mean matrix components
mi@0: 	ixd = range(0,d*d,d+1) # indices of diagonal elements of DxD matrix
mi@0: 	t1 = v1.swapaxes(1,2).reshape(d,n*d) # concatenate gmm1 covariance matrices
mi@0: 	loopn = xrange(n)
mi@0: 
mi@0: 	# step 1) precompute log(determinant()) of covariance matrices of gmm1
mi@0: 	logdet1 = zeros((n,1))
mi@0: 	for i in loopn :
mi@0: 		logdet1[i] = log(det(v1[:,:,i]))
mi@0: 
mi@0: 	# step 2) compute reference kldiv between individual components of gmm1
mi@0: 	kl11 = zeros((n,n))
mi@0: 	for i in loopn :
mi@0: 		inv1 = inv(v1[:,:,i])
mi@0: 		mm1 = m1 - m1[i*ixm,:][0]
mi@0: 		b1 = dot(inv1,t1).swapaxes(0,1).reshape(n,power(d,2)).T
mi@0: 		kl11[:,i] = 0.5 * ( (logdet1[i]-d-logdet1)[:,0] + sum(b1[ixd,:],0).T + sum(dot(mm1,inv1) * mm1,1))
mi@0: 	# print kl11
mi@0: 
mi@0: 	# step 3) precompute log(determinant()) of covariance matrices of gmm2
mi@0: 	logdet2 = zeros((n,1))
mi@0: 	for i in loopn :
mi@0: 		logdet2[i] = log(det(v2[:,:,i]))
mi@0: 
mi@0: 	# step 4) compute pair-wise kldiv between components of gmm1 and gmm2
mi@0: 	kl12 = zeros((n,n))
mi@0: 	for i in loopn :
mi@0: 		inv2 = inv(v2[:,:,i])
mi@0: 		m12 = m1 - m2[i*ixm,:][0]
mi@0: 		b2 = dot(inv2,t1).swapaxes(0,1).reshape(n,power(d,2)).T
mi@0: 		kl12[:,i] = 0.5 * ( (logdet2[i]-d-logdet1)[:,0] + sum(b2[ixd,:],0).T + sum(dot(m12,inv2) * m12,1))
mi@0: 	# print kl12
mi@0: 
mi@0: 	# step 5) compute the final variational distance between gmm1 and gmm2
mi@0: 	kl_full_12 = dot(w1.T, (log(sum(exp(-kl11)*w1,1))) - log(sum(exp(-kl12)*w2,1)))[0]
mi@0: 	# print "KL divergence between gmm1 || gmm2:", kl_full_12
mi@0: 	return kl_full_12
mi@0: 	
mi@0: 
mi@0: # models = loadmat("gmms.mat")
mi@0: # # print models.keys()
mi@0: # 
mi@0: # X = models['X']
mi@0: # Y = models['Y']
mi@0: # 
mi@0: # print "Data shape:"
mi@0: # print X.shape
mi@0: # print Y.shape
mi@0: # 
mi@0: # # # plot the fitted model
mi@0: # # gmm1 = GMM(n_components = 3, covariance_type='full')
mi@0: # # model1 = gmm1.fit(X)
mi@0: # # plot_gmm(gmm1,X)
mi@0: # # 
mi@0: # # # plot the fitted model
mi@0: # # gmm2 = GMM(n_components = 3, covariance_type='full')
mi@0: # # model2 = gmm2.fit(Y)
mi@0: # # plot_gmm(gmm2,Y)
mi@0: # 
mi@0: # # print "KL=",kldiv_full(gmm1.means_[0],gmm1.means_[1],gmm1.covars_[0],gmm1.covars_[1])
mi@0: # # 
mi@0: # # print "gmm1_covars:\n", gmm1.covars_, gmm1.covars_.shape
mi@0: # 
mi@0: # 
mi@0: # # m1 = gmm1.means_
mi@0: # # v1 = gmm1.covars_.swapaxes(0,2)
mi@0: # # w1 = gmm1.weights_
mi@0: # # 
mi@0: # # m2 = gmm2.means_
mi@0: # # v2 = gmm2.covars_.swapaxes(0,2)
mi@0: # # w2 = gmm2.weights_
mi@0: # 
mi@0: # m1 = models['gmm1_means']
mi@0: # v1 = models['gmm1_covars']
mi@0: # w1 = models['gmm1_weights']
mi@0: # 
mi@0: # m2 = models['gmm2_means']
mi@0: # v2 = models['gmm2_covars']
mi@0: # w2 = models['gmm2_weights']
mi@0: # 
mi@0: # print "KL divergence between gmm1 || gmm2:", kldiv_gmm(m1,m2,v1,v2,w1,w2)
mi@0: # print "KL divergence between gmm2 || gmm1:", kldiv_gmm(m2,m1,v2,v1,w2,w1)
mi@0: # print "Symmetrised KL distance between gmm1 || gmm2:", skl_gmm(m1,m2,v1,v2,w1,w2)
mi@0: 
mi@0: def main():
mi@0: 	pass
mi@0: 
mi@0: 
mi@0: if __name__ == '__main__':
mi@0: 	main()
mi@0: