annotate dsp/segmentation/cluster_segmenter.c @ 298:255e431ae3d4

* Key detector: when returning key strengths, use the peak value of the three underlying chromagram correlations (from 36-bin chromagram) corresponding to each key, instead of the mean. Rationale: This is the same method as used when returning the key value, and it's nice to have the same results in both returned value and plot. The peak performed better than the sum with a simple test set of triads, so it seems reasonable to change the plot to match the key output rather than the other way around. * FFT: kiss_fftr returns only the non-conjugate bins, synthesise the rest rather than leaving them (perhaps dangerously) undefined. Fixes an uninitialised data error in chromagram that could cause garbage results from key detector. * Constant Q: remove precalculated values again, I reckon they're not proving such a good tradeoff.
author Chris Cannam <c.cannam@qmul.ac.uk>
date Fri, 05 Jun 2009 15:12:39 +0000
parents 5e125f030287
children e5907ae6de17
rev   line source
c@243 1 /*
c@243 2 * cluster_segmenter.c
c@243 3 * soundbite
c@243 4 *
c@243 5 * Created by Mark Levy on 06/04/2006.
c@243 6 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. All rights reserved.
c@243 7 *
c@243 8 */
c@243 9
c@243 10 #include "cluster_segmenter.h"
c@243 11
c@243 12 extern int readmatarray_size(const char *filepath, int n_array, int* t, int* d);
c@243 13 extern int readmatarray(const char *filepath, int n_array, int t, int d, double** arr);
c@243 14
c@243 15 /* converts constant-Q features to normalised chroma */
c@243 16 void cq2chroma(double** cq, int nframes, int ncoeff, int bins, double** chroma)
c@243 17 {
c@243 18 int noct = ncoeff / bins; /* number of complete octaves in constant-Q */
c@243 19 int t, b, oct, ix;
c@243 20 //double maxchroma; /* max chroma value at each time, for normalisation */
c@243 21 //double sum; /* for normalisation */
c@243 22
c@243 23 for (t = 0; t < nframes; t++)
c@243 24 {
c@243 25 for (b = 0; b < bins; b++)
c@243 26 chroma[t][b] = 0;
c@243 27 for (oct = 0; oct < noct; oct++)
c@243 28 {
c@243 29 ix = oct * bins;
c@243 30 for (b = 0; b < bins; b++)
c@243 31 chroma[t][b] += fabs(cq[t][ix+b]);
c@243 32 }
c@243 33 /* normalise to unit sum
c@243 34 sum = 0;
c@243 35 for (b = 0; b < bins; b++)
c@243 36 sum += chroma[t][b];
c@243 37 for (b = 0; b < bins; b++)
c@243 38 chroma[t][b] /= sum;
c@245 39 */
c@243 40 /* normalise to unit max - NO this made results much worse!
c@243 41 maxchroma = 0;
c@243 42 for (b = 0; b < bins; b++)
c@243 43 if (chroma[t][b] > maxchroma)
c@243 44 maxchroma = chroma[t][b];
c@243 45 if (maxchroma > 0)
c@243 46 for (b = 0; b < bins; b++)
c@243 47 chroma[t][b] /= maxchroma;
c@243 48 */
c@243 49 }
c@243 50 }
c@243 51
c@243 52 /* applies MPEG-7 normalisation to constant-Q features, storing normalised envelope (norm) in last feature dimension */
c@243 53 void mpeg7_constq(double** features, int nframes, int ncoeff)
c@243 54 {
c@243 55 int i, j;
c@243 56 double ss;
c@243 57 double env;
c@243 58 double maxenv = 0;
c@243 59
c@243 60 /* convert const-Q features to dB scale */
c@243 61 for (i = 0; i < nframes; i++)
c@243 62 for (j = 0; j < ncoeff; j++)
c@243 63 features[i][j] = 10.0 * log10(features[i][j]+DBL_EPSILON);
c@243 64
c@243 65 /* normalise each feature vector and add the norm as an extra feature dimension */
c@243 66 for (i = 0; i < nframes; i++)
c@243 67 {
c@243 68 ss = 0;
c@243 69 for (j = 0; j < ncoeff; j++)
c@243 70 ss += features[i][j] * features[i][j];
c@243 71 env = sqrt(ss);
c@243 72 for (j = 0; j < ncoeff; j++)
c@243 73 features[i][j] /= env;
c@243 74 features[i][ncoeff] = env;
c@243 75 if (env > maxenv)
c@243 76 maxenv = env;
c@243 77 }
c@243 78 /* normalise the envelopes */
c@243 79 for (i = 0; i < nframes; i++)
c@243 80 features[i][ncoeff] /= maxenv;
c@243 81 }
c@243 82
c@243 83 /* return histograms h[nx*m] of data x[nx] into m bins using a sliding window of length h_len (MUST BE ODD) */
c@243 84 /* NB h is a vector in row major order, as required by cluster_melt() */
c@243 85 /* for historical reasons we normalise the histograms by their norm (not to sum to one) */
c@243 86 void create_histograms(int* x, int nx, int m, int hlen, double* h)
c@243 87 {
c@243 88 int i, j, t;
c@243 89 double norm;
c@266 90
c@266 91 for (i = 0; i < nx*m; i++)
c@266 92 h[i] = 0;
c@266 93
c@243 94 for (i = hlen/2; i < nx-hlen/2; i++)
c@243 95 {
c@243 96 for (j = 0; j < m; j++)
c@243 97 h[i*m+j] = 0;
c@243 98 for (t = i-hlen/2; t <= i+hlen/2; t++)
c@243 99 ++h[i*m+x[t]];
c@243 100 norm = 0;
c@243 101 for (j = 0; j < m; j++)
c@243 102 norm += h[i*m+j] * h[i*m+j];
c@243 103 for (j = 0; j < m; j++)
c@243 104 h[i*m+j] /= norm;
c@243 105 }
c@243 106
c@243 107 /* duplicate histograms at beginning and end to create one histogram for each data value supplied */
c@243 108 for (i = 0; i < hlen/2; i++)
c@243 109 for (j = 0; j < m; j++)
c@243 110 h[i*m+j] = h[hlen/2*m+j];
c@243 111 for (i = nx-hlen/2; i < nx; i++)
c@243 112 for (j = 0; j < m; j++)
c@243 113 h[i*m+j] = h[(nx-hlen/2-1)*m+j];
c@243 114 }
c@243 115
c@243 116 /* segment using HMM and then histogram clustering */
c@243 117 void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states,
c@243 118 int histogram_length, int nclusters, int neighbour_limit)
c@243 119 {
c@243 120 int i, j;
c@243 121
c@243 122 /*****************************/
c@243 123 if (0) {
c@243 124 /* try just using the predominant bin number as a 'decoded state' */
c@243 125 nHMM_states = feature_length + 1; /* allow a 'zero' state */
c@243 126 double chroma_thresh = 0.05;
c@243 127 double maxval;
c@243 128 int maxbin;
c@243 129 for (i = 0; i < frames_read; i++)
c@243 130 {
c@243 131 maxval = 0;
c@243 132 for (j = 0; j < feature_length; j++)
c@243 133 {
c@243 134 if (features[i][j] > maxval)
c@243 135 {
c@243 136 maxval = features[i][j];
c@243 137 maxbin = j;
c@243 138 }
c@243 139 }
c@243 140 if (maxval > chroma_thresh)
c@243 141 q[i] = maxbin;
c@243 142 else
c@243 143 q[i] = feature_length;
c@243 144 }
c@243 145
c@243 146 }
c@243 147 if (1) {
c@243 148 /*****************************/
c@243 149
c@243 150
c@243 151 /* scale all the features to 'balance covariances' during HMM training */
c@243 152 double scale = 10;
c@243 153 for (i = 0; i < frames_read; i++)
c@243 154 for (j = 0; j < feature_length; j++)
c@243 155 features[i][j] *= scale;
c@243 156
c@243 157 /* train an HMM on the features */
c@243 158
c@243 159 /* create a model */
c@243 160 model_t* model = hmm_init(features, frames_read, feature_length, nHMM_states);
c@243 161
c@243 162 /* train the model */
c@243 163 hmm_train(features, frames_read, model);
c@283 164 /*
c@243 165 printf("\n\nafter training:\n");
c@243 166 hmm_print(model);
c@283 167 */
c@243 168 /* decode the hidden state sequence */
c@243 169 viterbi_decode(features, frames_read, model, q);
c@243 170 hmm_close(model);
c@243 171
c@243 172 /*****************************/
c@243 173 }
c@243 174 /*****************************/
c@243 175
c@243 176
c@283 177 /*
c@243 178 fprintf(stderr, "HMM state sequence:\n");
c@243 179 for (i = 0; i < frames_read; i++)
c@243 180 fprintf(stderr, "%d ", q[i]);
c@243 181 fprintf(stderr, "\n\n");
c@283 182 */
c@243 183
c@243 184 /* create histograms of states */
c@243 185 double* h = (double*) malloc(frames_read*nHMM_states*sizeof(double)); /* vector in row major order */
c@243 186 create_histograms(q, frames_read, nHMM_states, histogram_length, h);
c@243 187
c@243 188 /* cluster the histograms */
c@243 189 int nbsched = 20; /* length of inverse temperature schedule */
c@243 190 double* bsched = (double*) malloc(nbsched*sizeof(double)); /* inverse temperature schedule */
c@243 191 double b0 = 100;
c@243 192 double alpha = 0.7;
c@243 193 bsched[0] = b0;
c@243 194 for (i = 1; i < nbsched; i++)
c@243 195 bsched[i] = alpha * bsched[i-1];
c@243 196 cluster_melt(h, nHMM_states, frames_read, bsched, nbsched, nclusters, neighbour_limit, q);
c@243 197
c@243 198 /* now q holds a sequence of cluster assignments */
c@243 199
c@243 200 free(h);
c@243 201 free(bsched);
c@243 202 }
c@243 203
c@243 204 /* segment constant-Q or chroma features */
c@243 205 void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type,
c@243 206 int nHMM_states, int histogram_length, int nclusters, int neighbour_limit)
c@243 207 {
c@243 208 int feature_length;
c@243 209 double** chroma;
c@243 210 int i;
c@243 211
c@243 212 if (feature_type == FEATURE_TYPE_CONSTQ)
c@243 213 {
c@283 214 /* fprintf(stderr, "Converting to dB and normalising...\n");
c@283 215 */
c@243 216 mpeg7_constq(features, frames_read, ncoeff);
c@283 217 /*
c@243 218 fprintf(stderr, "Running PCA...\n");
c@283 219 */
c@243 220 /* do PCA on the features (but not the envelope) */
c@243 221 int ncomponents = 20;
c@243 222 pca_project(features, frames_read, ncoeff, ncomponents);
c@243 223
c@243 224 /* copy the envelope so that it immediatly follows the chosen components */
c@243 225 for (i = 0; i < frames_read; i++)
c@243 226 features[i][ncomponents] = features[i][ncoeff];
c@243 227
c@243 228 feature_length = ncomponents + 1;
c@243 229
c@243 230 /**************************************
c@243 231 //TEST
c@243 232 // feature file name
c@243 233 char* dir = "/Users/mark/documents/semma/audio/";
c@243 234 char* file_name = (char*) malloc((strlen(dir) + strlen(trackname) + strlen("_features_c20r8h0.2f0.6.mat") + 1)*sizeof(char));
c@243 235 strcpy(file_name, dir);
c@243 236 strcat(file_name, trackname);
c@243 237 strcat(file_name, "_features_c20r8h0.2f0.6.mat");
c@243 238
c@243 239 // get the features from Matlab from mat-file
c@243 240 int frames_in_file;
c@243 241 readmatarray_size(file_name, 2, &frames_in_file, &feature_length);
c@243 242 readmatarray(file_name, 2, frames_in_file, feature_length, features);
c@243 243 // copy final frame to ensure that we get as many as we expected
c@243 244 int missing_frames = frames_read - frames_in_file;
c@243 245 while (missing_frames > 0)
c@243 246 {
c@243 247 for (i = 0; i < feature_length; i++)
c@243 248 features[frames_read-missing_frames][i] = features[frames_read-missing_frames-1][i];
c@243 249 --missing_frames;
c@243 250 }
c@243 251
c@243 252 free(file_name);
c@243 253 ******************************************/
c@243 254
c@243 255 cluster_segment(q, features, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit);
c@243 256 }
c@243 257
c@243 258 if (feature_type == FEATURE_TYPE_CHROMA)
c@243 259 {
c@283 260 /*
c@243 261 fprintf(stderr, "Converting to chroma features...\n");
c@283 262 */
c@243 263 /* convert constant-Q to normalised chroma features */
c@243 264 chroma = (double**) malloc(frames_read*sizeof(double*));
c@243 265 for (i = 0; i < frames_read; i++)
c@243 266 chroma[i] = (double*) malloc(bins*sizeof(double));
c@243 267 cq2chroma(features, frames_read, ncoeff, bins, chroma);
c@243 268 feature_length = bins;
c@243 269
c@243 270 cluster_segment(q, chroma, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit);
c@243 271
c@243 272 for (i = 0; i < frames_read; i++)
c@243 273 free(chroma[i]);
c@243 274 free(chroma);
c@243 275 }
c@243 276 }
c@243 277
c@243 278
c@243 279