annotate dsp/segmentation/cluster_melt.c @ 30:a251fb0de594

* Make MFCC able to accept already-FFT'd input, and simplify API a bit * Add log power value to MFCC, restore windowing, and avoid some heap allocs * In HMM, bail out of iteration if loglik hits NaN
author cannam
date Fri, 18 Jan 2008 13:24:12 +0000
parents 8e90a56b4b5f
children e5907ae6de17
rev   line source
cannam@18 1 /*
cannam@18 2 * cluster.c
cannam@18 3 * cluster_melt
cannam@18 4 *
cannam@18 5 * Created by Mark Levy on 21/02/2006.
cannam@18 6 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. All rights reserved.
cannam@18 7 *
cannam@18 8 */
cannam@18 9
cannam@18 10 #include <stdlib.h>
cannam@18 11
cannam@18 12 #include "cluster_melt.h"
cannam@18 13
cannam@18 14 #define DEFAULT_LAMBDA 0.02;
cannam@18 15 #define DEFAULT_LIMIT 20;
cannam@18 16
cannam@18 17 double kldist(double* a, double* b, int n) {
cannam@18 18 /* NB assume that all a[i], b[i] are non-negative
cannam@18 19 because a, b represent probability distributions */
cannam@18 20 double q, d;
cannam@18 21 int i;
cannam@18 22
cannam@18 23 d = 0;
cannam@18 24 for (i = 0; i < n; i++)
cannam@18 25 {
cannam@18 26 q = (a[i] + b[i]) / 2.0;
cannam@18 27 if (q > 0)
cannam@18 28 {
cannam@18 29 if (a[i] > 0)
cannam@18 30 d += a[i] * log(a[i] / q);
cannam@18 31 if (b[i] > 0)
cannam@18 32 d += b[i] * log(b[i] / q);
cannam@18 33 }
cannam@18 34 }
cannam@18 35 return d;
cannam@18 36 }
cannam@18 37
cannam@18 38 void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l, int *c) {
cannam@18 39 double lambda, sum, beta, logsumexp, maxlp;
cannam@18 40 int i, j, a, b, b0, b1, limit, B, it, maxiter, maxiter0, maxiter1;
cannam@18 41 double** cl; /* reference histograms for each cluster */
cannam@18 42 int** nc; /* neighbour counts for each histogram */
cannam@18 43 double** lp; /* soft assignment probs for each histogram */
cannam@18 44 int* oldc; /* previous hard assignments (to check convergence) */
cannam@18 45
cannam@18 46 /* NB h is passed as a 1d row major array */
cannam@18 47
cannam@18 48 /* parameter values */
cannam@18 49 lambda = DEFAULT_LAMBDA;
cannam@18 50 if (l > 0)
cannam@18 51 limit = l;
cannam@18 52 else
cannam@18 53 limit = DEFAULT_LIMIT; /* use default if no valid neighbourhood limit supplied */
cannam@18 54 B = 2 * limit + 1;
cannam@18 55 maxiter0 = 20; /* number of iterations at initial temperature */
cannam@18 56 maxiter1 = 5; /* number of iterations at subsequent temperatures */
cannam@18 57
cannam@18 58 /* allocate memory */
cannam@18 59 cl = (double**) malloc(k*sizeof(double*));
cannam@18 60 for (i= 0; i < k; i++)
cannam@18 61 cl[i] = (double*) malloc(m*sizeof(double));
cannam@18 62
cannam@18 63 nc = (int**) malloc(n*sizeof(int*));
cannam@18 64 for (i= 0; i < n; i++)
cannam@18 65 nc[i] = (int*) malloc(k*sizeof(int));
cannam@18 66
cannam@18 67 lp = (double**) malloc(n*sizeof(double*));
cannam@18 68 for (i= 0; i < n; i++)
cannam@18 69 lp[i] = (double*) malloc(k*sizeof(double));
cannam@18 70
cannam@18 71 oldc = (int*) malloc(n * sizeof(int));
cannam@18 72
cannam@18 73 /* initialise */
cannam@18 74 for (i = 0; i < k; i++)
cannam@18 75 {
cannam@18 76 sum = 0;
cannam@18 77 for (j = 0; j < m; j++)
cannam@18 78 {
cannam@18 79 cl[i][j] = rand(); /* random initial reference histograms */
cannam@18 80 sum += cl[i][j] * cl[i][j];
cannam@18 81 }
cannam@18 82 sum = sqrt(sum);
cannam@18 83 for (j = 0; j < m; j++)
cannam@18 84 {
cannam@18 85 cl[i][j] /= sum; /* normalise */
cannam@18 86 }
cannam@18 87 }
cannam@18 88 //print_array(cl, k, m);
cannam@18 89
cannam@18 90 for (i = 0; i < n; i++)
cannam@18 91 c[i] = 1; /* initially assign all histograms to cluster 1 */
cannam@18 92
cannam@18 93 for (a = 0; a < t; a++)
cannam@18 94 {
cannam@18 95 beta = Bsched[a];
cannam@18 96
cannam@18 97 if (a == 0)
cannam@18 98 maxiter = maxiter0;
cannam@18 99 else
cannam@18 100 maxiter = maxiter1;
cannam@18 101
cannam@18 102 for (it = 0; it < maxiter; it++)
cannam@18 103 {
cannam@18 104 //if (it == maxiter - 1)
cannam@18 105 // mexPrintf("hasn't converged after %d iterations\n", maxiter);
cannam@18 106
cannam@18 107 for (i = 0; i < n; i++)
cannam@18 108 {
cannam@18 109 /* save current hard assignments */
cannam@18 110 oldc[i] = c[i];
cannam@18 111
cannam@18 112 /* calculate soft assignment logprobs for each cluster */
cannam@18 113 sum = 0;
cannam@18 114 for (j = 0; j < k; j++)
cannam@18 115 {
cannam@18 116 lp[i][ j] = -beta * kldist(cl[j], &h[i*m], m);
cannam@18 117
cannam@18 118 /* update matching neighbour counts for this histogram, based on current hard assignments */
cannam@18 119 /* old version:
cannam@18 120 nc[i][j] = 0;
cannam@18 121 if (i >= limit && i <= n - 1 - limit)
cannam@18 122 {
cannam@18 123 for (b = i - limit; b <= i + limit; b++)
cannam@18 124 {
cannam@18 125 if (c[b] == j+1)
cannam@18 126 nc[i][j]++;
cannam@18 127 }
cannam@18 128 nc[i][j] = B - nc[i][j];
cannam@18 129 }
cannam@18 130 */
cannam@18 131 b0 = i - limit;
cannam@18 132 if (b0 < 0)
cannam@18 133 b0 = 0;
cannam@18 134 b1 = i + limit;
cannam@18 135 if (b1 >= n)
cannam@18 136 b1 = n - 1;
cannam@18 137 nc[i][j] = b1 - b0 + 1; /* = B except at edges */
cannam@18 138 for (b = b0; b <= b1; b++)
cannam@18 139 if (c[b] == j+1)
cannam@18 140 nc[i][j]--;
cannam@18 141
cannam@18 142 sum += exp(lp[i][j]);
cannam@18 143 }
cannam@18 144
cannam@18 145 /* normalise responsibilities and add duration logprior */
cannam@18 146 logsumexp = log(sum);
cannam@18 147 for (j = 0; j < k; j++)
cannam@18 148 lp[i][j] -= logsumexp + lambda * nc[i][j];
cannam@18 149 }
cannam@18 150 //print_array(lp, n, k);
cannam@18 151 /*
cannam@18 152 for (i = 0; i < n; i++)
cannam@18 153 {
cannam@18 154 for (j = 0; j < k; j++)
cannam@18 155 mexPrintf("%d ", nc[i][j]);
cannam@18 156 mexPrintf("\n");
cannam@18 157 }
cannam@18 158 */
cannam@18 159
cannam@18 160
cannam@18 161 /* update the assignments now that we know the duration priors
cannam@18 162 based on the current assignments */
cannam@18 163 for (i = 0; i < n; i++)
cannam@18 164 {
cannam@18 165 maxlp = lp[i][0];
cannam@18 166 c[i] = 1;
cannam@18 167 for (j = 1; j < k; j++)
cannam@18 168 if (lp[i][j] > maxlp)
cannam@18 169 {
cannam@18 170 maxlp = lp[i][j];
cannam@18 171 c[i] = j+1;
cannam@18 172 }
cannam@18 173 }
cannam@18 174
cannam@18 175 /* break if assignments haven't changed */
cannam@18 176 i = 0;
cannam@18 177 while (i < n && oldc[i] == c[i])
cannam@18 178 i++;
cannam@18 179 if (i == n)
cannam@18 180 break;
cannam@18 181
cannam@18 182 /* update reference histograms now we know new responsibilities */
cannam@18 183 for (j = 0; j < k; j++)
cannam@18 184 {
cannam@18 185 for (b = 0; b < m; b++)
cannam@18 186 {
cannam@18 187 cl[j][b] = 0;
cannam@18 188 for (i = 0; i < n; i++)
cannam@18 189 {
cannam@18 190 cl[j][b] += exp(lp[i][j]) * h[i*m+b];
cannam@18 191 }
cannam@18 192 }
cannam@18 193
cannam@18 194 sum = 0;
cannam@18 195 for (i = 0; i < n; i++)
cannam@18 196 sum += exp(lp[i][j]);
cannam@18 197 for (b = 0; b < m; b++)
cannam@18 198 cl[j][b] /= sum; /* normalise */
cannam@18 199 }
cannam@18 200
cannam@18 201 //print_array(cl, k, m);
cannam@18 202 //mexPrintf("\n\n");
cannam@18 203 }
cannam@18 204 }
cannam@18 205
cannam@18 206 /* free memory */
cannam@18 207 for (i = 0; i < k; i++)
cannam@18 208 free(cl[i]);
cannam@18 209 free(cl);
cannam@18 210 for (i = 0; i < n; i++)
cannam@18 211 free(nc[i]);
cannam@18 212 free(nc);
cannam@18 213 for (i = 0; i < n; i++)
cannam@18 214 free(lp[i]);
cannam@18 215 free(lp);
cannam@18 216 free(oldc);
cannam@18 217 }
cannam@18 218
cannam@18 219