c@243: /*
c@243:  *  cluster.c
c@243:  *  cluster_melt
c@243:  *
c@243:  *  Created by Mark Levy on 21/02/2006.
c@309:  *  Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
c@309: 
c@309:     This program is free software; you can redistribute it and/or
c@309:     modify it under the terms of the GNU General Public License as
c@309:     published by the Free Software Foundation; either version 2 of the
c@309:     License, or (at your option) any later version.  See the file
c@309:     COPYING included with this distribution for more information.
c@243:  *
c@243:  */
c@243: 
c@243: #include <stdlib.h>
c@243: 
c@243: #include "cluster_melt.h"
c@243: 
c@243: #define DEFAULT_LAMBDA 0.02;
c@243: #define DEFAULT_LIMIT 20;
c@243: 
c@243: double kldist(double* a, double* b, int n) {
c@243: 	/* NB assume that all a[i], b[i] are non-negative
c@243: 	because a, b represent probability distributions */
c@243: 	double q, d;
c@243: 	int i;
c@243: 	
c@243: 	d = 0;
c@243: 	for (i = 0; i < n; i++)
c@243: 	{
c@243: 		q = (a[i] + b[i]) / 2.0;
c@243: 		if (q > 0)
c@243: 		{
c@243: 			if (a[i] > 0)
c@243: 				d += a[i] * log(a[i] / q);
c@243: 			if (b[i] > 0)
c@243: 				d += b[i] * log(b[i] / q);
c@243: 		}
c@243: 	}
c@243: 	return d;		
c@243: }	
c@243: 
c@243: void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l, int *c) {
c@243: 	double lambda, sum, beta, logsumexp, maxlp;
c@243: 	int i, j, a, b, b0, b1, limit, B, it, maxiter, maxiter0, maxiter1;
c@243: 	double** cl;	/* reference histograms for each cluster */
c@243: 	int** nc;	/* neighbour counts for each histogram */
c@243: 	double** lp;	/* soft assignment probs for each histogram */
c@243: 	int* oldc;	/* previous hard assignments (to check convergence) */
c@243: 	
c@243: 	/* NB h is passed as a 1d row major array */
c@243: 	
c@243: 	/* parameter values */
c@243: 	lambda = DEFAULT_LAMBDA;
c@243: 	if (l > 0)
c@243: 		limit = l;
c@243: 	else
c@243: 		limit = DEFAULT_LIMIT;		/* use default if no valid neighbourhood limit supplied */
c@243: 	B = 2 * limit + 1;
c@243: 	maxiter0 = 20;	/* number of iterations at initial temperature */
c@243: 	maxiter1 = 5;	/* number of iterations at subsequent temperatures */
c@243: 	
c@243: 	/* allocate memory */	
c@243: 	cl = (double**) malloc(k*sizeof(double*));
c@243: 	for (i= 0; i < k; i++)
c@243: 		cl[i] = (double*) malloc(m*sizeof(double));
c@243: 	
c@243: 	nc = (int**) malloc(n*sizeof(int*));
c@243: 	for (i= 0; i < n; i++)
c@243: 		nc[i] = (int*) malloc(k*sizeof(int));
c@243: 	
c@243: 	lp = (double**) malloc(n*sizeof(double*));
c@243: 	for (i= 0; i < n; i++)
c@243: 		lp[i] = (double*) malloc(k*sizeof(double));
c@243: 	
c@243: 	oldc = (int*) malloc(n * sizeof(int));
c@243: 	
c@243: 	/* initialise */
c@243: 	for (i = 0; i < k; i++)
c@243: 	{
c@243: 		sum = 0;
c@243: 		for (j = 0; j < m; j++)
c@243: 		{
c@243: 			cl[i][j] = rand();	/* random initial reference histograms */
c@243: 			sum += cl[i][j] * cl[i][j];
c@243: 		}
c@243: 		sum = sqrt(sum);
c@243: 		for (j = 0; j < m; j++)
c@243: 		{
c@243: 			cl[i][j] /= sum;	/* normalise */
c@243: 		}
c@243: 	}	
c@243: 	//print_array(cl, k, m);
c@243: 	
c@243: 	for (i = 0; i < n; i++)
c@243: 		c[i] = 1;	/* initially assign all histograms to cluster 1 */
c@243: 	
c@243: 	for (a = 0; a < t; a++)
c@243: 	{
c@243: 		beta = Bsched[a];
c@243: 		
c@243: 		if (a == 0)
c@243: 			maxiter = maxiter0;
c@243: 		else
c@243: 			maxiter = maxiter1;
c@243: 		
c@243: 		for (it = 0; it < maxiter; it++)
c@243: 		{
c@243: 			//if (it == maxiter - 1)
c@243: 			//	mexPrintf("hasn't converged after %d iterations\n", maxiter);
c@243: 			
c@243: 			for (i = 0; i < n; i++)
c@243: 			{
c@243: 				/* save current hard assignments */
c@243: 				oldc[i] = c[i];
c@243: 				
c@243: 				/* calculate soft assignment logprobs for each cluster */
c@243: 				sum = 0;
c@243: 				for (j = 0; j < k; j++)
c@243: 				{
c@243: 					lp[i][ j] = -beta * kldist(cl[j], &h[i*m], m);
c@243: 					
c@243: 					/* update matching neighbour counts for this histogram, based on current hard assignments */
c@243: 					/* old version:
c@243: 					nc[i][j] = 0;	
c@243: 					if (i >= limit && i <= n - 1 - limit)
c@243: 					{
c@243: 							for (b = i - limit; b <= i + limit; b++)
c@243: 							{
c@243: 								if (c[b] == j+1)
c@243: 									nc[i][j]++;
c@243: 							}
c@243: 							nc[i][j] = B - nc[i][j];
c@243: 					}
c@243: 					*/
c@243: 					b0 = i - limit;
c@243: 					if (b0 < 0)
c@243: 						b0 = 0;
c@243: 					b1 = i + limit;
c@243: 					if (b1 >= n)
c@243: 						b1 = n - 1;
c@243: 					nc[i][j] = b1 - b0 + 1;		/* = B except at edges */
c@243: 					for (b = b0; b <= b1; b++)
c@243: 						if (c[b] == j+1)
c@243: 							nc[i][j]--;
c@243: 					
c@243: 					sum += exp(lp[i][j]);
c@243: 				}
c@243: 				
c@243: 				/* normalise responsibilities and add duration logprior */
c@243: 				logsumexp = log(sum);
c@243: 				for (j = 0; j < k; j++)
c@243: 					lp[i][j] -= logsumexp + lambda * nc[i][j];				
c@243: 			}
c@243: 			//print_array(lp, n, k);
c@243: 			/*
c@243: 			for (i = 0; i < n; i++)
c@243: 			{
c@243: 				 for (j = 0; j < k; j++)
c@243: 					 mexPrintf("%d ", nc[i][j]);
c@243: 				 mexPrintf("\n");
c@243: 			} 
c@243: 			*/
c@243: 			
c@243: 			
c@243: 			/* update the assignments now that we know the duration priors
c@243: 			based on the current assignments */
c@243: 			for (i = 0; i < n; i++)
c@243: 			{
c@243: 				maxlp = lp[i][0];
c@243: 				c[i] = 1;
c@243: 				for (j = 1; j < k; j++)
c@243: 					if (lp[i][j] > maxlp)
c@243: 					{
c@243: 						maxlp = lp[i][j];
c@243: 						c[i] = j+1;
c@243: 					}
c@243: 			}
c@243: 				
c@243: 			/* break if assignments haven't changed */
c@243: 			i = 0;
c@243: 			while (i < n && oldc[i] == c[i])
c@243: 				i++;
c@243: 			if (i == n)
c@243: 				break;
c@243: 			
c@243: 			/* update reference histograms now we know new responsibilities */
c@243: 			for (j = 0; j < k; j++)
c@243: 			{
c@243: 				for (b = 0; b < m; b++)
c@243: 				{
c@243: 					cl[j][b] = 0;
c@243: 					for (i = 0; i < n; i++)
c@243: 					{
c@243: 						cl[j][b] += exp(lp[i][j]) * h[i*m+b];
c@243: 					}	
c@243: 				}
c@243: 				
c@243: 				sum = 0;				
c@243: 				for (i = 0; i < n; i++)
c@243: 					sum += exp(lp[i][j]);
c@243: 				for (b = 0; b < m; b++)
c@243: 					cl[j][b] /= sum;	/* normalise */
c@243: 			}	
c@243: 			
c@243: 			//print_array(cl, k, m);
c@243: 			//mexPrintf("\n\n");
c@243: 		}
c@243: 	}
c@243: 		
c@243: 	/* free memory */
c@243: 	for (i = 0; i < k; i++)
c@243: 		free(cl[i]);
c@243: 	free(cl);
c@243: 	for (i = 0; i < n; i++)
c@243: 		free(nc[i]);
c@243: 	free(nc);
c@243: 	for (i = 0; i < n; i++)
c@243: 		free(lp[i]);
c@243: 	free(lp);
c@243: 	free(oldc);	
c@243: }
c@243: 
c@243: