annotate dsp/segmentation/cluster_segmenter.c @ 321:f1e6be2de9a5

A threshold (delta) is added in the peak picking parameters structure (PPickParams). It is used as an offset when computing the smoothed detection function. A constructor for the structure PPickParams is also added to set the parameters to 0 when a structure instance is created. Hence programmes using the peak picking parameter structure and which do not set the delta parameter (e.g. QM Vamp note onset detector) won't be affected by the modifications. Functions modified: - dsp/onsets/PeakPicking.cpp - dsp/onsets/PeakPicking.h - dsp/signalconditioning/DFProcess.cpp - dsp/signalconditioning/DFProcess.h
author mathieub <mathieu.barthet@eecs.qmul.ac.uk>
date Mon, 20 Jun 2011 19:01:48 +0100
parents d5014ab8b0e5
children 175e51ae78eb
rev   line source
c@243 1 /*
c@243 2 * cluster_segmenter.c
c@243 3 * soundbite
c@243 4 *
c@243 5 * Created by Mark Levy on 06/04/2006.
c@309 6 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
c@309 7
c@309 8 This program is free software; you can redistribute it and/or
c@309 9 modify it under the terms of the GNU General Public License as
c@309 10 published by the Free Software Foundation; either version 2 of the
c@309 11 License, or (at your option) any later version. See the file
c@309 12 COPYING included with this distribution for more information.
c@243 13 *
c@243 14 */
c@243 15
c@243 16 #include "cluster_segmenter.h"
c@243 17
c@243 18 extern int readmatarray_size(const char *filepath, int n_array, int* t, int* d);
c@243 19 extern int readmatarray(const char *filepath, int n_array, int t, int d, double** arr);
c@243 20
c@243 21 /* converts constant-Q features to normalised chroma */
c@243 22 void cq2chroma(double** cq, int nframes, int ncoeff, int bins, double** chroma)
c@243 23 {
c@243 24 int noct = ncoeff / bins; /* number of complete octaves in constant-Q */
c@243 25 int t, b, oct, ix;
c@243 26 //double maxchroma; /* max chroma value at each time, for normalisation */
c@243 27 //double sum; /* for normalisation */
c@243 28
c@243 29 for (t = 0; t < nframes; t++)
c@243 30 {
c@243 31 for (b = 0; b < bins; b++)
c@243 32 chroma[t][b] = 0;
c@243 33 for (oct = 0; oct < noct; oct++)
c@243 34 {
c@243 35 ix = oct * bins;
c@243 36 for (b = 0; b < bins; b++)
c@243 37 chroma[t][b] += fabs(cq[t][ix+b]);
c@243 38 }
c@243 39 /* normalise to unit sum
c@243 40 sum = 0;
c@243 41 for (b = 0; b < bins; b++)
c@243 42 sum += chroma[t][b];
c@243 43 for (b = 0; b < bins; b++)
c@243 44 chroma[t][b] /= sum;
c@245 45 */
c@243 46 /* normalise to unit max - NO this made results much worse!
c@243 47 maxchroma = 0;
c@243 48 for (b = 0; b < bins; b++)
c@243 49 if (chroma[t][b] > maxchroma)
c@243 50 maxchroma = chroma[t][b];
c@243 51 if (maxchroma > 0)
c@243 52 for (b = 0; b < bins; b++)
c@243 53 chroma[t][b] /= maxchroma;
c@243 54 */
c@243 55 }
c@243 56 }
c@243 57
c@243 58 /* applies MPEG-7 normalisation to constant-Q features, storing normalised envelope (norm) in last feature dimension */
c@243 59 void mpeg7_constq(double** features, int nframes, int ncoeff)
c@243 60 {
c@243 61 int i, j;
c@243 62 double ss;
c@243 63 double env;
c@243 64 double maxenv = 0;
c@243 65
c@243 66 /* convert const-Q features to dB scale */
c@243 67 for (i = 0; i < nframes; i++)
c@243 68 for (j = 0; j < ncoeff; j++)
c@243 69 features[i][j] = 10.0 * log10(features[i][j]+DBL_EPSILON);
c@243 70
c@243 71 /* normalise each feature vector and add the norm as an extra feature dimension */
c@243 72 for (i = 0; i < nframes; i++)
c@243 73 {
c@243 74 ss = 0;
c@243 75 for (j = 0; j < ncoeff; j++)
c@243 76 ss += features[i][j] * features[i][j];
c@243 77 env = sqrt(ss);
c@243 78 for (j = 0; j < ncoeff; j++)
c@243 79 features[i][j] /= env;
c@243 80 features[i][ncoeff] = env;
c@243 81 if (env > maxenv)
c@243 82 maxenv = env;
c@243 83 }
c@243 84 /* normalise the envelopes */
c@243 85 for (i = 0; i < nframes; i++)
c@243 86 features[i][ncoeff] /= maxenv;
c@243 87 }
c@243 88
c@243 89 /* return histograms h[nx*m] of data x[nx] into m bins using a sliding window of length h_len (MUST BE ODD) */
c@243 90 /* NB h is a vector in row major order, as required by cluster_melt() */
c@243 91 /* for historical reasons we normalise the histograms by their norm (not to sum to one) */
c@243 92 void create_histograms(int* x, int nx, int m, int hlen, double* h)
c@243 93 {
c@243 94 int i, j, t;
c@243 95 double norm;
c@266 96
c@266 97 for (i = 0; i < nx*m; i++)
c@266 98 h[i] = 0;
c@266 99
c@243 100 for (i = hlen/2; i < nx-hlen/2; i++)
c@243 101 {
c@243 102 for (j = 0; j < m; j++)
c@243 103 h[i*m+j] = 0;
c@243 104 for (t = i-hlen/2; t <= i+hlen/2; t++)
c@243 105 ++h[i*m+x[t]];
c@243 106 norm = 0;
c@243 107 for (j = 0; j < m; j++)
c@243 108 norm += h[i*m+j] * h[i*m+j];
c@243 109 for (j = 0; j < m; j++)
c@243 110 h[i*m+j] /= norm;
c@243 111 }
c@243 112
c@243 113 /* duplicate histograms at beginning and end to create one histogram for each data value supplied */
c@243 114 for (i = 0; i < hlen/2; i++)
c@243 115 for (j = 0; j < m; j++)
c@243 116 h[i*m+j] = h[hlen/2*m+j];
c@243 117 for (i = nx-hlen/2; i < nx; i++)
c@243 118 for (j = 0; j < m; j++)
c@243 119 h[i*m+j] = h[(nx-hlen/2-1)*m+j];
c@243 120 }
c@243 121
c@243 122 /* segment using HMM and then histogram clustering */
c@243 123 void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states,
c@243 124 int histogram_length, int nclusters, int neighbour_limit)
c@243 125 {
c@243 126 int i, j;
c@243 127
c@243 128 /*****************************/
c@243 129 if (0) {
c@243 130 /* try just using the predominant bin number as a 'decoded state' */
c@243 131 nHMM_states = feature_length + 1; /* allow a 'zero' state */
c@243 132 double chroma_thresh = 0.05;
c@243 133 double maxval;
c@243 134 int maxbin;
c@243 135 for (i = 0; i < frames_read; i++)
c@243 136 {
c@243 137 maxval = 0;
c@243 138 for (j = 0; j < feature_length; j++)
c@243 139 {
c@243 140 if (features[i][j] > maxval)
c@243 141 {
c@243 142 maxval = features[i][j];
c@243 143 maxbin = j;
c@243 144 }
c@243 145 }
c@243 146 if (maxval > chroma_thresh)
c@243 147 q[i] = maxbin;
c@243 148 else
c@243 149 q[i] = feature_length;
c@243 150 }
c@243 151
c@243 152 }
c@243 153 if (1) {
c@243 154 /*****************************/
c@243 155
c@243 156
c@243 157 /* scale all the features to 'balance covariances' during HMM training */
c@243 158 double scale = 10;
c@243 159 for (i = 0; i < frames_read; i++)
c@243 160 for (j = 0; j < feature_length; j++)
c@243 161 features[i][j] *= scale;
c@243 162
c@243 163 /* train an HMM on the features */
c@243 164
c@243 165 /* create a model */
c@243 166 model_t* model = hmm_init(features, frames_read, feature_length, nHMM_states);
c@243 167
c@243 168 /* train the model */
c@243 169 hmm_train(features, frames_read, model);
c@283 170 /*
c@243 171 printf("\n\nafter training:\n");
c@243 172 hmm_print(model);
c@283 173 */
c@243 174 /* decode the hidden state sequence */
c@243 175 viterbi_decode(features, frames_read, model, q);
c@243 176 hmm_close(model);
c@243 177
c@243 178 /*****************************/
c@243 179 }
c@243 180 /*****************************/
c@243 181
c@243 182
c@283 183 /*
c@243 184 fprintf(stderr, "HMM state sequence:\n");
c@243 185 for (i = 0; i < frames_read; i++)
c@243 186 fprintf(stderr, "%d ", q[i]);
c@243 187 fprintf(stderr, "\n\n");
c@283 188 */
c@243 189
c@243 190 /* create histograms of states */
c@243 191 double* h = (double*) malloc(frames_read*nHMM_states*sizeof(double)); /* vector in row major order */
c@243 192 create_histograms(q, frames_read, nHMM_states, histogram_length, h);
c@243 193
c@243 194 /* cluster the histograms */
c@243 195 int nbsched = 20; /* length of inverse temperature schedule */
c@243 196 double* bsched = (double*) malloc(nbsched*sizeof(double)); /* inverse temperature schedule */
c@243 197 double b0 = 100;
c@243 198 double alpha = 0.7;
c@243 199 bsched[0] = b0;
c@243 200 for (i = 1; i < nbsched; i++)
c@243 201 bsched[i] = alpha * bsched[i-1];
c@243 202 cluster_melt(h, nHMM_states, frames_read, bsched, nbsched, nclusters, neighbour_limit, q);
c@243 203
c@243 204 /* now q holds a sequence of cluster assignments */
c@243 205
c@243 206 free(h);
c@243 207 free(bsched);
c@243 208 }
c@243 209
c@243 210 /* segment constant-Q or chroma features */
c@243 211 void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type,
c@243 212 int nHMM_states, int histogram_length, int nclusters, int neighbour_limit)
c@243 213 {
c@243 214 int feature_length;
c@243 215 double** chroma;
c@243 216 int i;
c@243 217
c@243 218 if (feature_type == FEATURE_TYPE_CONSTQ)
c@243 219 {
c@283 220 /* fprintf(stderr, "Converting to dB and normalising...\n");
c@283 221 */
c@243 222 mpeg7_constq(features, frames_read, ncoeff);
c@283 223 /*
c@243 224 fprintf(stderr, "Running PCA...\n");
c@283 225 */
c@243 226 /* do PCA on the features (but not the envelope) */
c@243 227 int ncomponents = 20;
c@243 228 pca_project(features, frames_read, ncoeff, ncomponents);
c@243 229
c@243 230 /* copy the envelope so that it immediatly follows the chosen components */
c@243 231 for (i = 0; i < frames_read; i++)
c@243 232 features[i][ncomponents] = features[i][ncoeff];
c@243 233
c@243 234 feature_length = ncomponents + 1;
c@243 235
c@243 236 /**************************************
c@243 237 //TEST
c@243 238 // feature file name
c@243 239 char* dir = "/Users/mark/documents/semma/audio/";
c@243 240 char* file_name = (char*) malloc((strlen(dir) + strlen(trackname) + strlen("_features_c20r8h0.2f0.6.mat") + 1)*sizeof(char));
c@243 241 strcpy(file_name, dir);
c@243 242 strcat(file_name, trackname);
c@243 243 strcat(file_name, "_features_c20r8h0.2f0.6.mat");
c@243 244
c@243 245 // get the features from Matlab from mat-file
c@243 246 int frames_in_file;
c@243 247 readmatarray_size(file_name, 2, &frames_in_file, &feature_length);
c@243 248 readmatarray(file_name, 2, frames_in_file, feature_length, features);
c@243 249 // copy final frame to ensure that we get as many as we expected
c@243 250 int missing_frames = frames_read - frames_in_file;
c@243 251 while (missing_frames > 0)
c@243 252 {
c@243 253 for (i = 0; i < feature_length; i++)
c@243 254 features[frames_read-missing_frames][i] = features[frames_read-missing_frames-1][i];
c@243 255 --missing_frames;
c@243 256 }
c@243 257
c@243 258 free(file_name);
c@243 259 ******************************************/
c@243 260
c@243 261 cluster_segment(q, features, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit);
c@243 262 }
c@243 263
c@243 264 if (feature_type == FEATURE_TYPE_CHROMA)
c@243 265 {
c@283 266 /*
c@243 267 fprintf(stderr, "Converting to chroma features...\n");
c@283 268 */
c@243 269 /* convert constant-Q to normalised chroma features */
c@243 270 chroma = (double**) malloc(frames_read*sizeof(double*));
c@243 271 for (i = 0; i < frames_read; i++)
c@243 272 chroma[i] = (double*) malloc(bins*sizeof(double));
c@243 273 cq2chroma(features, frames_read, ncoeff, bins, chroma);
c@243 274 feature_length = bins;
c@243 275
c@243 276 cluster_segment(q, chroma, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit);
c@243 277
c@243 278 for (i = 0; i < frames_read; i++)
c@243 279 free(chroma[i]);
c@243 280 free(chroma);
c@243 281 }
c@243 282 }
c@243 283
c@243 284
c@243 285