annotate dsp/segmentation/cluster_segmenter.c @ 64:6cb2b3cd5356

* Refactor FFT a little bit so as to separate construction and processing rather than have a single static method -- will make it easier to use a different implementation * pull in KissFFT implementation (not hooked up yet)
author cannam
date Wed, 13 May 2009 09:19:12 +0000
parents d72fcd34d9a7
children e5907ae6de17
rev   line source
cannam@18 1 /*
cannam@18 2 * cluster_segmenter.c
cannam@18 3 * soundbite
cannam@18 4 *
cannam@18 5 * Created by Mark Levy on 06/04/2006.
cannam@18 6 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. All rights reserved.
cannam@18 7 *
cannam@18 8 */
cannam@18 9
cannam@18 10 #include "cluster_segmenter.h"
cannam@18 11
cannam@18 12 extern int readmatarray_size(const char *filepath, int n_array, int* t, int* d);
cannam@18 13 extern int readmatarray(const char *filepath, int n_array, int t, int d, double** arr);
cannam@18 14
cannam@18 15 /* converts constant-Q features to normalised chroma */
cannam@18 16 void cq2chroma(double** cq, int nframes, int ncoeff, int bins, double** chroma)
cannam@18 17 {
cannam@18 18 int noct = ncoeff / bins; /* number of complete octaves in constant-Q */
cannam@18 19 int t, b, oct, ix;
cannam@18 20 //double maxchroma; /* max chroma value at each time, for normalisation */
cannam@18 21 //double sum; /* for normalisation */
cannam@18 22
cannam@18 23 for (t = 0; t < nframes; t++)
cannam@18 24 {
cannam@18 25 for (b = 0; b < bins; b++)
cannam@18 26 chroma[t][b] = 0;
cannam@18 27 for (oct = 0; oct < noct; oct++)
cannam@18 28 {
cannam@18 29 ix = oct * bins;
cannam@18 30 for (b = 0; b < bins; b++)
cannam@18 31 chroma[t][b] += fabs(cq[t][ix+b]);
cannam@18 32 }
cannam@18 33 /* normalise to unit sum
cannam@18 34 sum = 0;
cannam@18 35 for (b = 0; b < bins; b++)
cannam@18 36 sum += chroma[t][b];
cannam@18 37 for (b = 0; b < bins; b++)
cannam@18 38 chroma[t][b] /= sum;
cannam@20 39 */
cannam@18 40 /* normalise to unit max - NO this made results much worse!
cannam@18 41 maxchroma = 0;
cannam@18 42 for (b = 0; b < bins; b++)
cannam@18 43 if (chroma[t][b] > maxchroma)
cannam@18 44 maxchroma = chroma[t][b];
cannam@18 45 if (maxchroma > 0)
cannam@18 46 for (b = 0; b < bins; b++)
cannam@18 47 chroma[t][b] /= maxchroma;
cannam@18 48 */
cannam@18 49 }
cannam@18 50 }
cannam@18 51
cannam@18 52 /* applies MPEG-7 normalisation to constant-Q features, storing normalised envelope (norm) in last feature dimension */
cannam@18 53 void mpeg7_constq(double** features, int nframes, int ncoeff)
cannam@18 54 {
cannam@18 55 int i, j;
cannam@18 56 double ss;
cannam@18 57 double env;
cannam@18 58 double maxenv = 0;
cannam@18 59
cannam@18 60 /* convert const-Q features to dB scale */
cannam@18 61 for (i = 0; i < nframes; i++)
cannam@18 62 for (j = 0; j < ncoeff; j++)
cannam@18 63 features[i][j] = 10.0 * log10(features[i][j]+DBL_EPSILON);
cannam@18 64
cannam@18 65 /* normalise each feature vector and add the norm as an extra feature dimension */
cannam@18 66 for (i = 0; i < nframes; i++)
cannam@18 67 {
cannam@18 68 ss = 0;
cannam@18 69 for (j = 0; j < ncoeff; j++)
cannam@18 70 ss += features[i][j] * features[i][j];
cannam@18 71 env = sqrt(ss);
cannam@18 72 for (j = 0; j < ncoeff; j++)
cannam@18 73 features[i][j] /= env;
cannam@18 74 features[i][ncoeff] = env;
cannam@18 75 if (env > maxenv)
cannam@18 76 maxenv = env;
cannam@18 77 }
cannam@18 78 /* normalise the envelopes */
cannam@18 79 for (i = 0; i < nframes; i++)
cannam@18 80 features[i][ncoeff] /= maxenv;
cannam@18 81 }
cannam@18 82
cannam@18 83 /* return histograms h[nx*m] of data x[nx] into m bins using a sliding window of length h_len (MUST BE ODD) */
cannam@18 84 /* NB h is a vector in row major order, as required by cluster_melt() */
cannam@18 85 /* for historical reasons we normalise the histograms by their norm (not to sum to one) */
cannam@18 86 void create_histograms(int* x, int nx, int m, int hlen, double* h)
cannam@18 87 {
cannam@18 88 int i, j, t;
cannam@18 89 double norm;
cannam@41 90
cannam@41 91 for (i = 0; i < nx*m; i++)
cannam@41 92 h[i] = 0;
cannam@41 93
cannam@18 94 for (i = hlen/2; i < nx-hlen/2; i++)
cannam@18 95 {
cannam@18 96 for (j = 0; j < m; j++)
cannam@18 97 h[i*m+j] = 0;
cannam@18 98 for (t = i-hlen/2; t <= i+hlen/2; t++)
cannam@18 99 ++h[i*m+x[t]];
cannam@18 100 norm = 0;
cannam@18 101 for (j = 0; j < m; j++)
cannam@18 102 norm += h[i*m+j] * h[i*m+j];
cannam@18 103 for (j = 0; j < m; j++)
cannam@18 104 h[i*m+j] /= norm;
cannam@18 105 }
cannam@18 106
cannam@18 107 /* duplicate histograms at beginning and end to create one histogram for each data value supplied */
cannam@18 108 for (i = 0; i < hlen/2; i++)
cannam@18 109 for (j = 0; j < m; j++)
cannam@18 110 h[i*m+j] = h[hlen/2*m+j];
cannam@18 111 for (i = nx-hlen/2; i < nx; i++)
cannam@18 112 for (j = 0; j < m; j++)
cannam@18 113 h[i*m+j] = h[(nx-hlen/2-1)*m+j];
cannam@18 114 }
cannam@18 115
cannam@18 116 /* segment using HMM and then histogram clustering */
cannam@18 117 void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states,
cannam@18 118 int histogram_length, int nclusters, int neighbour_limit)
cannam@18 119 {
cannam@18 120 int i, j;
cannam@18 121
cannam@18 122 /*****************************/
cannam@18 123 if (0) {
cannam@18 124 /* try just using the predominant bin number as a 'decoded state' */
cannam@18 125 nHMM_states = feature_length + 1; /* allow a 'zero' state */
cannam@18 126 double chroma_thresh = 0.05;
cannam@18 127 double maxval;
cannam@18 128 int maxbin;
cannam@18 129 for (i = 0; i < frames_read; i++)
cannam@18 130 {
cannam@18 131 maxval = 0;
cannam@18 132 for (j = 0; j < feature_length; j++)
cannam@18 133 {
cannam@18 134 if (features[i][j] > maxval)
cannam@18 135 {
cannam@18 136 maxval = features[i][j];
cannam@18 137 maxbin = j;
cannam@18 138 }
cannam@18 139 }
cannam@18 140 if (maxval > chroma_thresh)
cannam@18 141 q[i] = maxbin;
cannam@18 142 else
cannam@18 143 q[i] = feature_length;
cannam@18 144 }
cannam@18 145
cannam@18 146 }
cannam@18 147 if (1) {
cannam@18 148 /*****************************/
cannam@18 149
cannam@18 150
cannam@18 151 /* scale all the features to 'balance covariances' during HMM training */
cannam@18 152 double scale = 10;
cannam@18 153 for (i = 0; i < frames_read; i++)
cannam@18 154 for (j = 0; j < feature_length; j++)
cannam@18 155 features[i][j] *= scale;
cannam@18 156
cannam@18 157 /* train an HMM on the features */
cannam@18 158
cannam@18 159 /* create a model */
cannam@18 160 model_t* model = hmm_init(features, frames_read, feature_length, nHMM_states);
cannam@18 161
cannam@18 162 /* train the model */
cannam@18 163 hmm_train(features, frames_read, model);
cannam@58 164 /*
cannam@18 165 printf("\n\nafter training:\n");
cannam@18 166 hmm_print(model);
cannam@58 167 */
cannam@18 168 /* decode the hidden state sequence */
cannam@18 169 viterbi_decode(features, frames_read, model, q);
cannam@18 170 hmm_close(model);
cannam@18 171
cannam@18 172 /*****************************/
cannam@18 173 }
cannam@18 174 /*****************************/
cannam@18 175
cannam@18 176
cannam@58 177 /*
cannam@18 178 fprintf(stderr, "HMM state sequence:\n");
cannam@18 179 for (i = 0; i < frames_read; i++)
cannam@18 180 fprintf(stderr, "%d ", q[i]);
cannam@18 181 fprintf(stderr, "\n\n");
cannam@58 182 */
cannam@18 183
cannam@18 184 /* create histograms of states */
cannam@18 185 double* h = (double*) malloc(frames_read*nHMM_states*sizeof(double)); /* vector in row major order */
cannam@18 186 create_histograms(q, frames_read, nHMM_states, histogram_length, h);
cannam@18 187
cannam@18 188 /* cluster the histograms */
cannam@18 189 int nbsched = 20; /* length of inverse temperature schedule */
cannam@18 190 double* bsched = (double*) malloc(nbsched*sizeof(double)); /* inverse temperature schedule */
cannam@18 191 double b0 = 100;
cannam@18 192 double alpha = 0.7;
cannam@18 193 bsched[0] = b0;
cannam@18 194 for (i = 1; i < nbsched; i++)
cannam@18 195 bsched[i] = alpha * bsched[i-1];
cannam@18 196 cluster_melt(h, nHMM_states, frames_read, bsched, nbsched, nclusters, neighbour_limit, q);
cannam@18 197
cannam@18 198 /* now q holds a sequence of cluster assignments */
cannam@18 199
cannam@18 200 free(h);
cannam@18 201 free(bsched);
cannam@18 202 }
cannam@18 203
cannam@18 204 /* segment constant-Q or chroma features */
cannam@18 205 void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type,
cannam@18 206 int nHMM_states, int histogram_length, int nclusters, int neighbour_limit)
cannam@18 207 {
cannam@18 208 int feature_length;
cannam@18 209 double** chroma;
cannam@18 210 int i;
cannam@18 211
cannam@18 212 if (feature_type == FEATURE_TYPE_CONSTQ)
cannam@18 213 {
cannam@58 214 /* fprintf(stderr, "Converting to dB and normalising...\n");
cannam@58 215 */
cannam@18 216 mpeg7_constq(features, frames_read, ncoeff);
cannam@58 217 /*
cannam@18 218 fprintf(stderr, "Running PCA...\n");
cannam@58 219 */
cannam@18 220 /* do PCA on the features (but not the envelope) */
cannam@18 221 int ncomponents = 20;
cannam@18 222 pca_project(features, frames_read, ncoeff, ncomponents);
cannam@18 223
cannam@18 224 /* copy the envelope so that it immediatly follows the chosen components */
cannam@18 225 for (i = 0; i < frames_read; i++)
cannam@18 226 features[i][ncomponents] = features[i][ncoeff];
cannam@18 227
cannam@18 228 feature_length = ncomponents + 1;
cannam@18 229
cannam@18 230 /**************************************
cannam@18 231 //TEST
cannam@18 232 // feature file name
cannam@18 233 char* dir = "/Users/mark/documents/semma/audio/";
cannam@18 234 char* file_name = (char*) malloc((strlen(dir) + strlen(trackname) + strlen("_features_c20r8h0.2f0.6.mat") + 1)*sizeof(char));
cannam@18 235 strcpy(file_name, dir);
cannam@18 236 strcat(file_name, trackname);
cannam@18 237 strcat(file_name, "_features_c20r8h0.2f0.6.mat");
cannam@18 238
cannam@18 239 // get the features from Matlab from mat-file
cannam@18 240 int frames_in_file;
cannam@18 241 readmatarray_size(file_name, 2, &frames_in_file, &feature_length);
cannam@18 242 readmatarray(file_name, 2, frames_in_file, feature_length, features);
cannam@18 243 // copy final frame to ensure that we get as many as we expected
cannam@18 244 int missing_frames = frames_read - frames_in_file;
cannam@18 245 while (missing_frames > 0)
cannam@18 246 {
cannam@18 247 for (i = 0; i < feature_length; i++)
cannam@18 248 features[frames_read-missing_frames][i] = features[frames_read-missing_frames-1][i];
cannam@18 249 --missing_frames;
cannam@18 250 }
cannam@18 251
cannam@18 252 free(file_name);
cannam@18 253 ******************************************/
cannam@18 254
cannam@18 255 cluster_segment(q, features, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit);
cannam@18 256 }
cannam@18 257
cannam@18 258 if (feature_type == FEATURE_TYPE_CHROMA)
cannam@18 259 {
cannam@58 260 /*
cannam@18 261 fprintf(stderr, "Converting to chroma features...\n");
cannam@58 262 */
cannam@18 263 /* convert constant-Q to normalised chroma features */
cannam@18 264 chroma = (double**) malloc(frames_read*sizeof(double*));
cannam@18 265 for (i = 0; i < frames_read; i++)
cannam@18 266 chroma[i] = (double*) malloc(bins*sizeof(double));
cannam@18 267 cq2chroma(features, frames_read, ncoeff, bins, chroma);
cannam@18 268 feature_length = bins;
cannam@18 269
cannam@18 270 cluster_segment(q, chroma, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit);
cannam@18 271
cannam@18 272 for (i = 0; i < frames_read; i++)
cannam@18 273 free(chroma[i]);
cannam@18 274 free(chroma);
cannam@18 275 }
cannam@18 276 }
cannam@18 277
cannam@18 278
cannam@18 279