mi@0: #!/usr/bin/env python mi@0: # encoding: utf-8 mi@0: """ mi@0: gmmdist.py mi@0: mi@0: Created by George Fazekas on 2014-07-03. mi@0: Copyright (c) 2014 . All rights reserved. mi@0: """ mi@0: mi@0: import sys,os mi@0: mi@0: from numpy import sum,isnan,isinf,vstack mi@0: from numpy.random import rand mi@0: import numpy as np mi@0: from numpy import log, power, pi, exp, transpose, zeros, log, ones, dot mi@0: from sklearn.mixture import GMM mi@0: from sklearn.metrics.pairwise import pairwise_distances mi@0: from scipy.linalg import * mi@0: from pylab import plt mi@0: from scipy.stats import norm mi@0: #from gmmplot import plot_gmm mi@0: from scipy.io import savemat,loadmat mi@0: from numpy import trace mi@0: from numpy.linalg import det, inv, eigvals mi@0: mi@0: FLOAT_MAX = np.finfo(np.float64).max mi@0: mi@0: def is_pos_def(x): mi@0: '''Check if matrix is positive definite.''' mi@0: return np.all(eigvals(x) > 0) mi@0: mi@0: def skl_models(g1,g2): mi@0: '''Wrapper function with error checking and adaptation to the GmmDistance/GaussianComponent classes. mi@0: This function compares two Gaussian mixture models with and equal number of components and full covariance matrices. mi@0: Covariance matrices must be positive definite. mi@0: ''' mi@0: m1,m2 = g1.means,g2.means mi@0: v1,v2 = g1.covars.swapaxes(0,2), g2.covars.swapaxes(0,2) mi@0: w1,w2 = g1.weights[:,np.newaxis],g2.weights[:,np.newaxis] mi@0: assert m1.shape[1] > 1, "The minimum number of features is 2." mi@0: assert w1.shape == w2.shape, "Models must have the same number of components" mi@0: # print 'v1, v2', v1.shape, v2.shape mi@0: # assert (is_pos_def(v1) and is_pos_def(v2)) == True, "Covariance matrices must be positive definite." mi@0: d = skl_gmm(m1,m2,v1,v2,w1,w2) mi@0: if isnan(d): #or isinf(d): mi@0: return FLOAT_MAX mi@0: return d mi@0: mi@0: # def kldiv_full(m0,m1,s0,s1): mi@0: # '''Naive (unoptimised) implementation of the KL divergence between two single Gaussians with fully defined covariances (s).''' mi@0: # return 0.5*(np.trace(s0/s1)+np.trace(s1/s0)+ np.dot( np.dot((m0-m1).T, np.linalg.inv(s0+s1)), (m0-m1)))-m0.shape[0] mi@0: # mi@0: # def skl_full(p0,p1): mi@0: # '''Symmetrised KL divergence computed from 2 KL divergences using mean( KL(p||q), KL(q||p) )''' mi@0: # d = (kldiv_full(p0.means,p0.covars,p1.means,p1.covars) + kldiv_full(p1.means,p1.covars,p0.means,p0.covars)) * 0.5 mi@0: # d = sum(d) mi@0: # if isnan(d) : mi@0: # return np.finfo(np.float64).max mi@0: # return d mi@0: mi@0: def kldiv_full(m1,m2,s1,s2): mi@0: m1,m2 = m1[:,None],m2[:,None] mi@0: logdet1, logdet2 = log(det(s1)), log(det(s2)) mi@0: inv1, inv2 = inv(s1), inv(s2) mi@0: m = m1-m2 mi@0: d = m.shape[0] # number of dimensions mi@0: return 0.5 * ((logdet1-logdet2) + trace(dot(inv1,s2)) + dot(dot(m.T, inv1), m) - d)[0][0] mi@0: mi@0: def _skl_full(m1,m2,s1,s2): mi@0: m1,m2 = m1[:,None],m2[:,None] mi@0: logdet1, logdet2 = log(det(s1)), log(det(s2)) mi@0: inv1, inv2 = inv(s1), inv(s2) mi@0: m12 = m1-m2 mi@0: m21 = m2-m1 mi@0: d = m12.shape[0] # number of dimensions mi@0: kl12 = 0.5 * ((logdet1-logdet2) + trace(dot(inv1,s2)) + dot(dot(m12.T, inv1), m12) - d)[0][0] mi@0: kl21 = 0.5 * ((logdet2-logdet1) + trace(dot(inv2,s1)) + dot(dot(m21.T, inv2), m21) - d)[0][0] mi@0: return 0.5 * (kl12+kl21) mi@0: mi@0: def skl_full(p1,p2): mi@0: m1,m2 = p1.means,p2.means mi@0: s1,s2 = p1.covars,p2.covars mi@0: return _skl_full(m1,m2,s1,s2) mi@0: mi@0: mi@0: mi@0: def skl_gmm(m1,m2,v1,v2,w1,w2): mi@0: '''Take the mean of KL(g1mm||gmm2) & KL(gmm2||gmm1) to symmetrise the divergence.''' mi@0: return (abs(kldiv_gmm(m1,m2,v1,v2,w1,w2)) + abs(kldiv_gmm(m2,m1,v2,v1,w2,w1))) * 0.5 mi@0: mi@0: def kldiv_gmm(m1,m2,v1,v2,w1,w2): mi@0: '''Low level implementation of variational approximation of KL divergence between Gaussian Mixture Models. mi@0: See first: J. R. Hershey and P. A. Olsen. "Approximating the Kullback-Leibler Divergence Between Gaussian Mixture Models." mi@0: IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Volume 4, pp. 317–320, April, 2007. mi@0: Further theory and refinement: J. L. Durrieu, J. P. Thiran, F. Kelly. "Lower and Upper Bounds for Approximation mi@0: of the Kullback-Leibler Divergence Between Gaussian Mixture Models", ICASSP, 2012. mi@0: This implementation is by George Fazekas, Centre for Digital Music, QMUL, London, UK. mi@0: mi@0: Inputs: mi@0: m(x) : mean vector of gmm(x) mi@0: v(x) : covariance matrices of gmm(x) mi@0: w(x) : weight vector of gmm(x) mi@0: mi@0: Ouptut: mi@0: kl_full_12 : Kullback-Leibler divergence of the PDFs approximated by two Gaussian Mixture Models. mi@0: mi@0: This implementation is using a variational approximation rather than the conventional (and expensive) mi@0: Monte Carlo simulation based approach. See cited paper for details. The divergence is not symmetrised mi@0: and may be negative (unlike the closed form) or inf. In case the output is complex, the coavriance matrices mi@0: do not fulfill required criteria, i.e. somehow badly formed, sigular, not positive definite etc... mi@0: ''' mi@0: # TODO: consider dieling better with inf/-inf outcomes in the final distance computation mi@0: # - note: the max of the rows of kl12 are not always zero like that of kl11 mi@0: # - eliminate the need for swapaxes of the covariances mi@0: mi@0: n = m1.shape[0] # number of components mi@0: d = m1.shape[1] # number of dimensions mi@0: mi@0: ixm = ones((n,1),dtype=int).T # selector of mean matrix components mi@0: ixd = range(0,d*d,d+1) # indices of diagonal elements of DxD matrix mi@0: t1 = v1.swapaxes(1,2).reshape(d,n*d) # concatenate gmm1 covariance matrices mi@0: loopn = xrange(n) mi@0: mi@0: # step 1) precompute log(determinant()) of covariance matrices of gmm1 mi@0: logdet1 = zeros((n,1)) mi@0: for i in loopn : mi@0: logdet1[i] = log(det(v1[:,:,i])) mi@0: mi@0: # step 2) compute reference kldiv between individual components of gmm1 mi@0: kl11 = zeros((n,n)) mi@0: for i in loopn : mi@0: inv1 = inv(v1[:,:,i]) mi@0: mm1 = m1 - m1[i*ixm,:][0] mi@0: b1 = dot(inv1,t1).swapaxes(0,1).reshape(n,power(d,2)).T mi@0: kl11[:,i] = 0.5 * ( (logdet1[i]-d-logdet1)[:,0] + sum(b1[ixd,:],0).T + sum(dot(mm1,inv1) * mm1,1)) mi@0: # print kl11 mi@0: mi@0: # step 3) precompute log(determinant()) of covariance matrices of gmm2 mi@0: logdet2 = zeros((n,1)) mi@0: for i in loopn : mi@0: logdet2[i] = log(det(v2[:,:,i])) mi@0: mi@0: # step 4) compute pair-wise kldiv between components of gmm1 and gmm2 mi@0: kl12 = zeros((n,n)) mi@0: for i in loopn : mi@0: inv2 = inv(v2[:,:,i]) mi@0: m12 = m1 - m2[i*ixm,:][0] mi@0: b2 = dot(inv2,t1).swapaxes(0,1).reshape(n,power(d,2)).T mi@0: kl12[:,i] = 0.5 * ( (logdet2[i]-d-logdet1)[:,0] + sum(b2[ixd,:],0).T + sum(dot(m12,inv2) * m12,1)) mi@0: # print kl12 mi@0: mi@0: # step 5) compute the final variational distance between gmm1 and gmm2 mi@0: kl_full_12 = dot(w1.T, (log(sum(exp(-kl11)*w1,1))) - log(sum(exp(-kl12)*w2,1)))[0] mi@0: # print "KL divergence between gmm1 || gmm2:", kl_full_12 mi@0: return kl_full_12 mi@0: mi@0: mi@0: # models = loadmat("gmms.mat") mi@0: # # print models.keys() mi@0: # mi@0: # X = models['X'] mi@0: # Y = models['Y'] mi@0: # mi@0: # print "Data shape:" mi@0: # print X.shape mi@0: # print Y.shape mi@0: # mi@0: # # # plot the fitted model mi@0: # # gmm1 = GMM(n_components = 3, covariance_type='full') mi@0: # # model1 = gmm1.fit(X) mi@0: # # plot_gmm(gmm1,X) mi@0: # # mi@0: # # # plot the fitted model mi@0: # # gmm2 = GMM(n_components = 3, covariance_type='full') mi@0: # # model2 = gmm2.fit(Y) mi@0: # # plot_gmm(gmm2,Y) mi@0: # mi@0: # # print "KL=",kldiv_full(gmm1.means_[0],gmm1.means_[1],gmm1.covars_[0],gmm1.covars_[1]) mi@0: # # mi@0: # # print "gmm1_covars:\n", gmm1.covars_, gmm1.covars_.shape mi@0: # mi@0: # mi@0: # # m1 = gmm1.means_ mi@0: # # v1 = gmm1.covars_.swapaxes(0,2) mi@0: # # w1 = gmm1.weights_ mi@0: # # mi@0: # # m2 = gmm2.means_ mi@0: # # v2 = gmm2.covars_.swapaxes(0,2) mi@0: # # w2 = gmm2.weights_ mi@0: # mi@0: # m1 = models['gmm1_means'] mi@0: # v1 = models['gmm1_covars'] mi@0: # w1 = models['gmm1_weights'] mi@0: # mi@0: # m2 = models['gmm2_means'] mi@0: # v2 = models['gmm2_covars'] mi@0: # w2 = models['gmm2_weights'] mi@0: # mi@0: # print "KL divergence between gmm1 || gmm2:", kldiv_gmm(m1,m2,v1,v2,w1,w2) mi@0: # print "KL divergence between gmm2 || gmm1:", kldiv_gmm(m2,m1,v2,v1,w2,w1) mi@0: # print "Symmetrised KL distance between gmm1 || gmm2:", skl_gmm(m1,m2,v1,v2,w1,w2) mi@0: mi@0: def main(): mi@0: pass mi@0: mi@0: mi@0: if __name__ == '__main__': mi@0: main() mi@0: