annotate dsp/segmentation/cluster_segmenter.c @ 73:dcb555b90924

* Key detector: when returning key strengths, use the peak value of the three underlying chromagram correlations (from 36-bin chromagram) corresponding to each key, instead of the mean. Rationale: This is the same method as used when returning the key value, and it's nice to have the same results in both returned value and plot. The peak performed better than the sum with a simple test set of triads, so it seems reasonable to change the plot to match the key output rather than the other way around. * FFT: kiss_fftr returns only the non-conjugate bins, synthesise the rest rather than leaving them (perhaps dangerously) undefined. Fixes an uninitialised data error in chromagram that could cause garbage results from key detector. * Constant Q: remove precalculated values again, I reckon they're not proving such a good tradeoff.
author cannam
date Fri, 05 Jun 2009 15:12:39 +0000
parents d72fcd34d9a7
children e5907ae6de17
rev   line source
cannam@18 1 /*
cannam@18 2 * cluster_segmenter.c
cannam@18 3 * soundbite
cannam@18 4 *
cannam@18 5 * Created by Mark Levy on 06/04/2006.
cannam@18 6 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. All rights reserved.
cannam@18 7 *
cannam@18 8 */
cannam@18 9
cannam@18 10 #include "cluster_segmenter.h"
cannam@18 11
cannam@18 12 extern int readmatarray_size(const char *filepath, int n_array, int* t, int* d);
cannam@18 13 extern int readmatarray(const char *filepath, int n_array, int t, int d, double** arr);
cannam@18 14
cannam@18 15 /* converts constant-Q features to normalised chroma */
cannam@18 16 void cq2chroma(double** cq, int nframes, int ncoeff, int bins, double** chroma)
cannam@18 17 {
cannam@18 18 int noct = ncoeff / bins; /* number of complete octaves in constant-Q */
cannam@18 19 int t, b, oct, ix;
cannam@18 20 //double maxchroma; /* max chroma value at each time, for normalisation */
cannam@18 21 //double sum; /* for normalisation */
cannam@18 22
cannam@18 23 for (t = 0; t < nframes; t++)
cannam@18 24 {
cannam@18 25 for (b = 0; b < bins; b++)
cannam@18 26 chroma[t][b] = 0;
cannam@18 27 for (oct = 0; oct < noct; oct++)
cannam@18 28 {
cannam@18 29 ix = oct * bins;
cannam@18 30 for (b = 0; b < bins; b++)
cannam@18 31 chroma[t][b] += fabs(cq[t][ix+b]);
cannam@18 32 }
cannam@18 33 /* normalise to unit sum
cannam@18 34 sum = 0;
cannam@18 35 for (b = 0; b < bins; b++)
cannam@18 36 sum += chroma[t][b];
cannam@18 37 for (b = 0; b < bins; b++)
cannam@18 38 chroma[t][b] /= sum;
cannam@20 39 */
cannam@18 40 /* normalise to unit max - NO this made results much worse!
cannam@18 41 maxchroma = 0;
cannam@18 42 for (b = 0; b < bins; b++)
cannam@18 43 if (chroma[t][b] > maxchroma)
cannam@18 44 maxchroma = chroma[t][b];
cannam@18 45 if (maxchroma > 0)
cannam@18 46 for (b = 0; b < bins; b++)
cannam@18 47 chroma[t][b] /= maxchroma;
cannam@18 48 */
cannam@18 49 }
cannam@18 50 }
cannam@18 51
cannam@18 52 /* applies MPEG-7 normalisation to constant-Q features, storing normalised envelope (norm) in last feature dimension */
cannam@18 53 void mpeg7_constq(double** features, int nframes, int ncoeff)
cannam@18 54 {
cannam@18 55 int i, j;
cannam@18 56 double ss;
cannam@18 57 double env;
cannam@18 58 double maxenv = 0;
cannam@18 59
cannam@18 60 /* convert const-Q features to dB scale */
cannam@18 61 for (i = 0; i < nframes; i++)
cannam@18 62 for (j = 0; j < ncoeff; j++)
cannam@18 63 features[i][j] = 10.0 * log10(features[i][j]+DBL_EPSILON);
cannam@18 64
cannam@18 65 /* normalise each feature vector and add the norm as an extra feature dimension */
cannam@18 66 for (i = 0; i < nframes; i++)
cannam@18 67 {
cannam@18 68 ss = 0;
cannam@18 69 for (j = 0; j < ncoeff; j++)
cannam@18 70 ss += features[i][j] * features[i][j];
cannam@18 71 env = sqrt(ss);
cannam@18 72 for (j = 0; j < ncoeff; j++)
cannam@18 73 features[i][j] /= env;
cannam@18 74 features[i][ncoeff] = env;
cannam@18 75 if (env > maxenv)
cannam@18 76 maxenv = env;
cannam@18 77 }
cannam@18 78 /* normalise the envelopes */
cannam@18 79 for (i = 0; i < nframes; i++)
cannam@18 80 features[i][ncoeff] /= maxenv;
cannam@18 81 }
cannam@18 82
cannam@18 83 /* return histograms h[nx*m] of data x[nx] into m bins using a sliding window of length h_len (MUST BE ODD) */
cannam@18 84 /* NB h is a vector in row major order, as required by cluster_melt() */
cannam@18 85 /* for historical reasons we normalise the histograms by their norm (not to sum to one) */
cannam@18 86 void create_histograms(int* x, int nx, int m, int hlen, double* h)
cannam@18 87 {
cannam@18 88 int i, j, t;
cannam@18 89 double norm;
cannam@41 90
cannam@41 91 for (i = 0; i < nx*m; i++)
cannam@41 92 h[i] = 0;
cannam@41 93
cannam@18 94 for (i = hlen/2; i < nx-hlen/2; i++)
cannam@18 95 {
cannam@18 96 for (j = 0; j < m; j++)
cannam@18 97 h[i*m+j] = 0;
cannam@18 98 for (t = i-hlen/2; t <= i+hlen/2; t++)
cannam@18 99 ++h[i*m+x[t]];
cannam@18 100 norm = 0;
cannam@18 101 for (j = 0; j < m; j++)
cannam@18 102 norm += h[i*m+j] * h[i*m+j];
cannam@18 103 for (j = 0; j < m; j++)
cannam@18 104 h[i*m+j] /= norm;
cannam@18 105 }
cannam@18 106
cannam@18 107 /* duplicate histograms at beginning and end to create one histogram for each data value supplied */
cannam@18 108 for (i = 0; i < hlen/2; i++)
cannam@18 109 for (j = 0; j < m; j++)
cannam@18 110 h[i*m+j] = h[hlen/2*m+j];
cannam@18 111 for (i = nx-hlen/2; i < nx; i++)
cannam@18 112 for (j = 0; j < m; j++)
cannam@18 113 h[i*m+j] = h[(nx-hlen/2-1)*m+j];
cannam@18 114 }
cannam@18 115
cannam@18 116 /* segment using HMM and then histogram clustering */
cannam@18 117 void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states,
cannam@18 118 int histogram_length, int nclusters, int neighbour_limit)
cannam@18 119 {
cannam@18 120 int i, j;
cannam@18 121
cannam@18 122 /*****************************/
cannam@18 123 if (0) {
cannam@18 124 /* try just using the predominant bin number as a 'decoded state' */
cannam@18 125 nHMM_states = feature_length + 1; /* allow a 'zero' state */
cannam@18 126 double chroma_thresh = 0.05;
cannam@18 127 double maxval;
cannam@18 128 int maxbin;
cannam@18 129 for (i = 0; i < frames_read; i++)
cannam@18 130 {
cannam@18 131 maxval = 0;
cannam@18 132 for (j = 0; j < feature_length; j++)
cannam@18 133 {
cannam@18 134 if (features[i][j] > maxval)
cannam@18 135 {
cannam@18 136 maxval = features[i][j];
cannam@18 137 maxbin = j;
cannam@18 138 }
cannam@18 139 }
cannam@18 140 if (maxval > chroma_thresh)
cannam@18 141 q[i] = maxbin;
cannam@18 142 else
cannam@18 143 q[i] = feature_length;
cannam@18 144 }
cannam@18 145
cannam@18 146 }
cannam@18 147 if (1) {
cannam@18 148 /*****************************/
cannam@18 149
cannam@18 150
cannam@18 151 /* scale all the features to 'balance covariances' during HMM training */
cannam@18 152 double scale = 10;
cannam@18 153 for (i = 0; i < frames_read; i++)
cannam@18 154 for (j = 0; j < feature_length; j++)
cannam@18 155 features[i][j] *= scale;
cannam@18 156
cannam@18 157 /* train an HMM on the features */
cannam@18 158
cannam@18 159 /* create a model */
cannam@18 160 model_t* model = hmm_init(features, frames_read, feature_length, nHMM_states);
cannam@18 161
cannam@18 162 /* train the model */
cannam@18 163 hmm_train(features, frames_read, model);
cannam@58 164 /*
cannam@18 165 printf("\n\nafter training:\n");
cannam@18 166 hmm_print(model);
cannam@58 167 */
cannam@18 168 /* decode the hidden state sequence */
cannam@18 169 viterbi_decode(features, frames_read, model, q);
cannam@18 170 hmm_close(model);
cannam@18 171
cannam@18 172 /*****************************/
cannam@18 173 }
cannam@18 174 /*****************************/
cannam@18 175
cannam@18 176
cannam@58 177 /*
cannam@18 178 fprintf(stderr, "HMM state sequence:\n");
cannam@18 179 for (i = 0; i < frames_read; i++)
cannam@18 180 fprintf(stderr, "%d ", q[i]);
cannam@18 181 fprintf(stderr, "\n\n");
cannam@58 182 */
cannam@18 183
cannam@18 184 /* create histograms of states */
cannam@18 185 double* h = (double*) malloc(frames_read*nHMM_states*sizeof(double)); /* vector in row major order */
cannam@18 186 create_histograms(q, frames_read, nHMM_states, histogram_length, h);
cannam@18 187
cannam@18 188 /* cluster the histograms */
cannam@18 189 int nbsched = 20; /* length of inverse temperature schedule */
cannam@18 190 double* bsched = (double*) malloc(nbsched*sizeof(double)); /* inverse temperature schedule */
cannam@18 191 double b0 = 100;
cannam@18 192 double alpha = 0.7;
cannam@18 193 bsched[0] = b0;
cannam@18 194 for (i = 1; i < nbsched; i++)
cannam@18 195 bsched[i] = alpha * bsched[i-1];
cannam@18 196 cluster_melt(h, nHMM_states, frames_read, bsched, nbsched, nclusters, neighbour_limit, q);
cannam@18 197
cannam@18 198 /* now q holds a sequence of cluster assignments */
cannam@18 199
cannam@18 200 free(h);
cannam@18 201 free(bsched);
cannam@18 202 }
cannam@18 203
cannam@18 204 /* segment constant-Q or chroma features */
cannam@18 205 void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type,
cannam@18 206 int nHMM_states, int histogram_length, int nclusters, int neighbour_limit)
cannam@18 207 {
cannam@18 208 int feature_length;
cannam@18 209 double** chroma;
cannam@18 210 int i;
cannam@18 211
cannam@18 212 if (feature_type == FEATURE_TYPE_CONSTQ)
cannam@18 213 {
cannam@58 214 /* fprintf(stderr, "Converting to dB and normalising...\n");
cannam@58 215 */
cannam@18 216 mpeg7_constq(features, frames_read, ncoeff);
cannam@58 217 /*
cannam@18 218 fprintf(stderr, "Running PCA...\n");
cannam@58 219 */
cannam@18 220 /* do PCA on the features (but not the envelope) */
cannam@18 221 int ncomponents = 20;
cannam@18 222 pca_project(features, frames_read, ncoeff, ncomponents);
cannam@18 223
cannam@18 224 /* copy the envelope so that it immediatly follows the chosen components */
cannam@18 225 for (i = 0; i < frames_read; i++)
cannam@18 226 features[i][ncomponents] = features[i][ncoeff];
cannam@18 227
cannam@18 228 feature_length = ncomponents + 1;
cannam@18 229
cannam@18 230 /**************************************
cannam@18 231 //TEST
cannam@18 232 // feature file name
cannam@18 233 char* dir = "/Users/mark/documents/semma/audio/";
cannam@18 234 char* file_name = (char*) malloc((strlen(dir) + strlen(trackname) + strlen("_features_c20r8h0.2f0.6.mat") + 1)*sizeof(char));
cannam@18 235 strcpy(file_name, dir);
cannam@18 236 strcat(file_name, trackname);
cannam@18 237 strcat(file_name, "_features_c20r8h0.2f0.6.mat");
cannam@18 238
cannam@18 239 // get the features from Matlab from mat-file
cannam@18 240 int frames_in_file;
cannam@18 241 readmatarray_size(file_name, 2, &frames_in_file, &feature_length);
cannam@18 242 readmatarray(file_name, 2, frames_in_file, feature_length, features);
cannam@18 243 // copy final frame to ensure that we get as many as we expected
cannam@18 244 int missing_frames = frames_read - frames_in_file;
cannam@18 245 while (missing_frames > 0)
cannam@18 246 {
cannam@18 247 for (i = 0; i < feature_length; i++)
cannam@18 248 features[frames_read-missing_frames][i] = features[frames_read-missing_frames-1][i];
cannam@18 249 --missing_frames;
cannam@18 250 }
cannam@18 251
cannam@18 252 free(file_name);
cannam@18 253 ******************************************/
cannam@18 254
cannam@18 255 cluster_segment(q, features, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit);
cannam@18 256 }
cannam@18 257
cannam@18 258 if (feature_type == FEATURE_TYPE_CHROMA)
cannam@18 259 {
cannam@58 260 /*
cannam@18 261 fprintf(stderr, "Converting to chroma features...\n");
cannam@58 262 */
cannam@18 263 /* convert constant-Q to normalised chroma features */
cannam@18 264 chroma = (double**) malloc(frames_read*sizeof(double*));
cannam@18 265 for (i = 0; i < frames_read; i++)
cannam@18 266 chroma[i] = (double*) malloc(bins*sizeof(double));
cannam@18 267 cq2chroma(features, frames_read, ncoeff, bins, chroma);
cannam@18 268 feature_length = bins;
cannam@18 269
cannam@18 270 cluster_segment(q, chroma, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit);
cannam@18 271
cannam@18 272 for (i = 0; i < frames_read; i++)
cannam@18 273 free(chroma[i]);
cannam@18 274 free(chroma);
cannam@18 275 }
cannam@18 276 }
cannam@18 277
cannam@18 278
cannam@18 279