cannam@484: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ c@243: /* c@243: * cluster.c c@243: * cluster_melt c@243: * c@243: * Created by Mark Levy on 21/02/2006. c@309: * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. c@309: c@309: This program is free software; you can redistribute it and/or c@309: modify it under the terms of the GNU General Public License as c@309: published by the Free Software Foundation; either version 2 of the c@309: License, or (at your option) any later version. See the file c@309: COPYING included with this distribution for more information. c@243: * c@243: */ c@243: c@243: #include c@243: c@243: #include "cluster_melt.h" c@243: c@243: #define DEFAULT_LAMBDA 0.02; c@243: #define DEFAULT_LIMIT 20; c@243: c@243: double kldist(double* a, double* b, int n) { cannam@480: /* NB assume that all a[i], b[i] are non-negative cannam@480: because a, b represent probability distributions */ cannam@480: double q, d; cannam@480: int i; cannam@480: cannam@480: d = 0; cannam@480: for (i = 0; i < n; i++) { cannam@480: q = (a[i] + b[i]) / 2.0; cannam@480: if (q > 0) { cannam@480: if (a[i] > 0) { cannam@480: d += a[i] * log(a[i] / q); cannam@480: } cannam@480: if (b[i] > 0) { cannam@480: d += b[i] * log(b[i] / q); cannam@480: } cannam@480: } cannam@480: } cannam@480: return d; cannam@480: } c@243: c@243: void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l, int *c) { cannam@480: double lambda, sum, beta, logsumexp, maxlp; cannam@480: int i, j, a, b, b0, b1, limit, /* B, */ it, maxiter, maxiter0, maxiter1; cannam@480: double** cl; /* reference histograms for each cluster */ cannam@480: int** nc; /* neighbour counts for each histogram */ cannam@480: double** lp; /* soft assignment probs for each histogram */ cannam@480: int* oldc; /* previous hard assignments (to check convergence) */ cannam@480: cannam@480: /* NB h is passed as a 1d row major array */ cannam@480: cannam@480: /* parameter values */ cannam@480: lambda = DEFAULT_LAMBDA; cannam@480: if (l > 0) { cannam@480: limit = l; cannam@480: } else { cannam@480: limit = DEFAULT_LIMIT; /* use default if no valid neighbourhood limit supplied */ cannam@480: } cannam@480: cannam@480: maxiter0 = 20; /* number of iterations at initial temperature */ cannam@480: maxiter1 = 5; /* number of iterations at subsequent temperatures */ cannam@480: cannam@480: /* allocate memory */ cannam@480: cl = (double**) malloc(k*sizeof(double*)); cannam@480: for (i= 0; i < k; i++) { cannam@480: cl[i] = (double*) malloc(m*sizeof(double)); cannam@480: } cannam@480: cannam@480: nc = (int**) malloc(n*sizeof(int*)); cannam@480: for (i= 0; i < n; i++) { cannam@480: nc[i] = (int*) malloc(k*sizeof(int)); cannam@480: } cannam@480: cannam@480: lp = (double**) malloc(n*sizeof(double*)); cannam@480: for (i= 0; i < n; i++) { cannam@480: lp[i] = (double*) malloc(k*sizeof(double)); cannam@480: } cannam@480: cannam@480: oldc = (int*) malloc(n * sizeof(int)); cannam@480: cannam@480: /* initialise */ cannam@480: for (i = 0; i < k; i++) { cannam@480: sum = 0; cannam@480: for (j = 0; j < m; j++) { cannam@480: cl[i][j] = rand(); /* random initial reference histograms */ cannam@480: sum += cl[i][j] * cl[i][j]; cannam@480: } cannam@480: sum = sqrt(sum); cannam@480: for (j = 0; j < m; j++) { cannam@480: cl[i][j] /= sum; /* normalise */ cannam@480: } cannam@480: } cannam@480: cannam@480: for (i = 0; i < n; i++) { cannam@480: c[i] = 1; /* initially assign all histograms to cluster 1 */ cannam@480: } cannam@480: cannam@480: for (a = 0; a < t; a++) { cannam@480: cannam@480: beta = Bsched[a]; cannam@480: cannam@480: if (a == 0) { cannam@480: maxiter = maxiter0; cannam@480: } else { cannam@480: maxiter = maxiter1; cannam@480: } cannam@480: cannam@480: for (it = 0; it < maxiter; it++) { cannam@480: cannam@480: //if (it == maxiter - 1) cannam@480: // mexPrintf("hasn't converged after %d iterations\n", maxiter); cannam@480: cannam@480: for (i = 0; i < n; i++) { cannam@480: cannam@480: /* save current hard assignments */ cannam@480: oldc[i] = c[i]; cannam@480: cannam@480: /* calculate soft assignment logprobs for each cluster */ cannam@480: sum = 0; cannam@480: cannam@480: for (j = 0; j < k; j++) { cannam@480: cannam@480: lp[i][ j] = -beta * kldist(cl[j], &h[i*m], m); cannam@480: cannam@480: /* update matching neighbour counts for this histogram, based on current hard assignments */ cannam@480: cannam@480: b0 = i - limit; cannam@480: if (b0 < 0) { cannam@480: b0 = 0; cannam@480: } cannam@480: b1 = i + limit; cannam@480: if (b1 >= n) { cannam@480: b1 = n - 1; cannam@480: } cannam@480: nc[i][j] = b1 - b0 + 1; /* = B except at edges */ cannam@480: for (b = b0; b <= b1; b++) { cannam@480: if (c[b] == j+1) { cannam@480: nc[i][j]--; cannam@480: } cannam@480: } cannam@480: cannam@480: sum += exp(lp[i][j]); cannam@480: } cannam@480: cannam@480: /* normalise responsibilities and add duration logprior */ cannam@480: logsumexp = log(sum); cannam@481: for (j = 0; j < k; j++) { cannam@480: lp[i][j] -= logsumexp + lambda * nc[i][j]; cannam@480: } cannam@480: } cannam@480: cannam@480: /* update the assignments now that we know the duration priors cannam@480: based on the current assignments */ cannam@480: for (i = 0; i < n; i++) { cannam@480: maxlp = lp[i][0]; cannam@480: c[i] = 1; cannam@480: for (j = 1; j < k; j++) { cannam@480: if (lp[i][j] > maxlp) { cannam@480: maxlp = lp[i][j]; cannam@480: c[i] = j+1; cannam@480: } cannam@480: } cannam@480: } cannam@480: cannam@480: /* break if assignments haven't changed */ cannam@480: i = 0; cannam@480: while (i < n && oldc[i] == c[i]) { cannam@480: i++; cannam@480: } cannam@480: if (i == n) { cannam@480: break; cannam@480: } cannam@480: cannam@480: /* update reference histograms now we know new responsibilities */ cannam@480: for (j = 0; j < k; j++) { cannam@480: for (b = 0; b < m; b++) { cannam@480: cl[j][b] = 0; cannam@480: for (i = 0; i < n; i++) { cannam@480: cl[j][b] += exp(lp[i][j]) * h[i*m+b]; cannam@480: } cannam@480: } cannam@480: cannam@480: sum = 0; cannam@480: for (i = 0; i < n; i++) { cannam@480: sum += exp(lp[i][j]); cannam@480: } cannam@480: for (b = 0; b < m; b++) { cannam@480: cl[j][b] /= sum; /* normalise */ cannam@480: } cannam@480: } cannam@480: } cannam@480: } cannam@480: cannam@480: /* free memory */ cannam@480: for (i = 0; i < k; i++) { cannam@480: free(cl[i]); cannam@480: } cannam@480: free(cl); cannam@480: for (i = 0; i < n; i++) { cannam@480: free(nc[i]); cannam@480: } cannam@480: free(nc); cannam@480: for (i = 0; i < n; i++) { cannam@480: free(lp[i]); cannam@480: } cannam@480: free(lp); cannam@480: free(oldc); c@243: } c@243: c@243: