annotate dsp/segmentation/cluster_melt.c @ 243:dc30e3864ceb

* merge in segmentation code from soundbite plugin/library repository
author Chris Cannam <c.cannam@qmul.ac.uk>
date Wed, 09 Jan 2008 10:46:25 +0000
parents
children e5907ae6de17
rev   line source
c@243 1 /*
c@243 2 * cluster.c
c@243 3 * cluster_melt
c@243 4 *
c@243 5 * Created by Mark Levy on 21/02/2006.
c@243 6 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. All rights reserved.
c@243 7 *
c@243 8 */
c@243 9
c@243 10 #include <stdlib.h>
c@243 11
c@243 12 #include "cluster_melt.h"
c@243 13
c@243 14 #define DEFAULT_LAMBDA 0.02;
c@243 15 #define DEFAULT_LIMIT 20;
c@243 16
c@243 17 double kldist(double* a, double* b, int n) {
c@243 18 /* NB assume that all a[i], b[i] are non-negative
c@243 19 because a, b represent probability distributions */
c@243 20 double q, d;
c@243 21 int i;
c@243 22
c@243 23 d = 0;
c@243 24 for (i = 0; i < n; i++)
c@243 25 {
c@243 26 q = (a[i] + b[i]) / 2.0;
c@243 27 if (q > 0)
c@243 28 {
c@243 29 if (a[i] > 0)
c@243 30 d += a[i] * log(a[i] / q);
c@243 31 if (b[i] > 0)
c@243 32 d += b[i] * log(b[i] / q);
c@243 33 }
c@243 34 }
c@243 35 return d;
c@243 36 }
c@243 37
c@243 38 void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l, int *c) {
c@243 39 double lambda, sum, beta, logsumexp, maxlp;
c@243 40 int i, j, a, b, b0, b1, limit, B, it, maxiter, maxiter0, maxiter1;
c@243 41 double** cl; /* reference histograms for each cluster */
c@243 42 int** nc; /* neighbour counts for each histogram */
c@243 43 double** lp; /* soft assignment probs for each histogram */
c@243 44 int* oldc; /* previous hard assignments (to check convergence) */
c@243 45
c@243 46 /* NB h is passed as a 1d row major array */
c@243 47
c@243 48 /* parameter values */
c@243 49 lambda = DEFAULT_LAMBDA;
c@243 50 if (l > 0)
c@243 51 limit = l;
c@243 52 else
c@243 53 limit = DEFAULT_LIMIT; /* use default if no valid neighbourhood limit supplied */
c@243 54 B = 2 * limit + 1;
c@243 55 maxiter0 = 20; /* number of iterations at initial temperature */
c@243 56 maxiter1 = 5; /* number of iterations at subsequent temperatures */
c@243 57
c@243 58 /* allocate memory */
c@243 59 cl = (double**) malloc(k*sizeof(double*));
c@243 60 for (i= 0; i < k; i++)
c@243 61 cl[i] = (double*) malloc(m*sizeof(double));
c@243 62
c@243 63 nc = (int**) malloc(n*sizeof(int*));
c@243 64 for (i= 0; i < n; i++)
c@243 65 nc[i] = (int*) malloc(k*sizeof(int));
c@243 66
c@243 67 lp = (double**) malloc(n*sizeof(double*));
c@243 68 for (i= 0; i < n; i++)
c@243 69 lp[i] = (double*) malloc(k*sizeof(double));
c@243 70
c@243 71 oldc = (int*) malloc(n * sizeof(int));
c@243 72
c@243 73 /* initialise */
c@243 74 for (i = 0; i < k; i++)
c@243 75 {
c@243 76 sum = 0;
c@243 77 for (j = 0; j < m; j++)
c@243 78 {
c@243 79 cl[i][j] = rand(); /* random initial reference histograms */
c@243 80 sum += cl[i][j] * cl[i][j];
c@243 81 }
c@243 82 sum = sqrt(sum);
c@243 83 for (j = 0; j < m; j++)
c@243 84 {
c@243 85 cl[i][j] /= sum; /* normalise */
c@243 86 }
c@243 87 }
c@243 88 //print_array(cl, k, m);
c@243 89
c@243 90 for (i = 0; i < n; i++)
c@243 91 c[i] = 1; /* initially assign all histograms to cluster 1 */
c@243 92
c@243 93 for (a = 0; a < t; a++)
c@243 94 {
c@243 95 beta = Bsched[a];
c@243 96
c@243 97 if (a == 0)
c@243 98 maxiter = maxiter0;
c@243 99 else
c@243 100 maxiter = maxiter1;
c@243 101
c@243 102 for (it = 0; it < maxiter; it++)
c@243 103 {
c@243 104 //if (it == maxiter - 1)
c@243 105 // mexPrintf("hasn't converged after %d iterations\n", maxiter);
c@243 106
c@243 107 for (i = 0; i < n; i++)
c@243 108 {
c@243 109 /* save current hard assignments */
c@243 110 oldc[i] = c[i];
c@243 111
c@243 112 /* calculate soft assignment logprobs for each cluster */
c@243 113 sum = 0;
c@243 114 for (j = 0; j < k; j++)
c@243 115 {
c@243 116 lp[i][ j] = -beta * kldist(cl[j], &h[i*m], m);
c@243 117
c@243 118 /* update matching neighbour counts for this histogram, based on current hard assignments */
c@243 119 /* old version:
c@243 120 nc[i][j] = 0;
c@243 121 if (i >= limit && i <= n - 1 - limit)
c@243 122 {
c@243 123 for (b = i - limit; b <= i + limit; b++)
c@243 124 {
c@243 125 if (c[b] == j+1)
c@243 126 nc[i][j]++;
c@243 127 }
c@243 128 nc[i][j] = B - nc[i][j];
c@243 129 }
c@243 130 */
c@243 131 b0 = i - limit;
c@243 132 if (b0 < 0)
c@243 133 b0 = 0;
c@243 134 b1 = i + limit;
c@243 135 if (b1 >= n)
c@243 136 b1 = n - 1;
c@243 137 nc[i][j] = b1 - b0 + 1; /* = B except at edges */
c@243 138 for (b = b0; b <= b1; b++)
c@243 139 if (c[b] == j+1)
c@243 140 nc[i][j]--;
c@243 141
c@243 142 sum += exp(lp[i][j]);
c@243 143 }
c@243 144
c@243 145 /* normalise responsibilities and add duration logprior */
c@243 146 logsumexp = log(sum);
c@243 147 for (j = 0; j < k; j++)
c@243 148 lp[i][j] -= logsumexp + lambda * nc[i][j];
c@243 149 }
c@243 150 //print_array(lp, n, k);
c@243 151 /*
c@243 152 for (i = 0; i < n; i++)
c@243 153 {
c@243 154 for (j = 0; j < k; j++)
c@243 155 mexPrintf("%d ", nc[i][j]);
c@243 156 mexPrintf("\n");
c@243 157 }
c@243 158 */
c@243 159
c@243 160
c@243 161 /* update the assignments now that we know the duration priors
c@243 162 based on the current assignments */
c@243 163 for (i = 0; i < n; i++)
c@243 164 {
c@243 165 maxlp = lp[i][0];
c@243 166 c[i] = 1;
c@243 167 for (j = 1; j < k; j++)
c@243 168 if (lp[i][j] > maxlp)
c@243 169 {
c@243 170 maxlp = lp[i][j];
c@243 171 c[i] = j+1;
c@243 172 }
c@243 173 }
c@243 174
c@243 175 /* break if assignments haven't changed */
c@243 176 i = 0;
c@243 177 while (i < n && oldc[i] == c[i])
c@243 178 i++;
c@243 179 if (i == n)
c@243 180 break;
c@243 181
c@243 182 /* update reference histograms now we know new responsibilities */
c@243 183 for (j = 0; j < k; j++)
c@243 184 {
c@243 185 for (b = 0; b < m; b++)
c@243 186 {
c@243 187 cl[j][b] = 0;
c@243 188 for (i = 0; i < n; i++)
c@243 189 {
c@243 190 cl[j][b] += exp(lp[i][j]) * h[i*m+b];
c@243 191 }
c@243 192 }
c@243 193
c@243 194 sum = 0;
c@243 195 for (i = 0; i < n; i++)
c@243 196 sum += exp(lp[i][j]);
c@243 197 for (b = 0; b < m; b++)
c@243 198 cl[j][b] /= sum; /* normalise */
c@243 199 }
c@243 200
c@243 201 //print_array(cl, k, m);
c@243 202 //mexPrintf("\n\n");
c@243 203 }
c@243 204 }
c@243 205
c@243 206 /* free memory */
c@243 207 for (i = 0; i < k; i++)
c@243 208 free(cl[i]);
c@243 209 free(cl);
c@243 210 for (i = 0; i < n; i++)
c@243 211 free(nc[i]);
c@243 212 free(nc);
c@243 213 for (i = 0; i < n; i++)
c@243 214 free(lp[i]);
c@243 215 free(lp);
c@243 216 free(oldc);
c@243 217 }
c@243 218
c@243 219