annotate dsp/segmentation/ClusterMeltSegmenter.cpp @ 73:dcb555b90924

* Key detector: when returning key strengths, use the peak value of the three underlying chromagram correlations (from 36-bin chromagram) corresponding to each key, instead of the mean. Rationale: This is the same method as used when returning the key value, and it's nice to have the same results in both returned value and plot. The peak performed better than the sum with a simple test set of triads, so it seems reasonable to change the plot to match the key output rather than the other way around. * FFT: kiss_fftr returns only the non-conjugate bins, synthesise the rest rather than leaving them (perhaps dangerously) undefined. Fixes an uninitialised data error in chromagram that could cause garbage results from key detector. * Constant Q: remove precalculated values again, I reckon they're not proving such a good tradeoff.
author cannam
date Fri, 05 Jun 2009 15:12:39 +0000
parents 6cb2b3cd5356
children e5907ae6de17
rev   line source
cannam@24 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
cannam@24 2
cannam@18 3 /*
cannam@24 4 * ClusterMeltSegmenter.cpp
cannam@18 5 *
cannam@24 6 * Created by Mark Levy on 23/03/2006.
cannam@24 7 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
cannam@24 8 * All rights reserved.
cannam@18 9 */
cannam@18 10
cannam@18 11 #include <cfloat>
cannam@18 12 #include <cmath>
cannam@18 13
cannam@18 14 #include "ClusterMeltSegmenter.h"
cannam@18 15 #include "cluster_segmenter.h"
cannam@18 16 #include "segment.h"
cannam@18 17
cannam@20 18 #include "dsp/transforms/FFT.h"
cannam@24 19 #include "dsp/chromagram/ConstantQ.h"
cannam@24 20 #include "dsp/rateconversion/Decimator.h"
cannam@26 21 #include "dsp/mfcc/MFCC.h"
cannam@20 22
cannam@24 23 ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) :
cannam@24 24 window(NULL),
cannam@64 25 fft(NULL),
cannam@24 26 constq(NULL),
cannam@26 27 mfcc(NULL),
cannam@24 28 featureType(params.featureType),
cannam@24 29 hopSize(params.hopSize),
cannam@24 30 windowSize(params.windowSize),
cannam@24 31 fmin(params.fmin),
cannam@24 32 fmax(params.fmax),
cannam@24 33 nbins(params.nbins),
cannam@24 34 ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c
cannam@24 35 nHMMStates(params.nHMMStates),
cannam@24 36 nclusters(params.nclusters),
cannam@24 37 histogramLength(params.histogramLength),
cannam@24 38 neighbourhoodLimit(params.neighbourhoodLimit),
cannam@26 39 decimator(NULL)
cannam@18 40 {
cannam@18 41 }
cannam@18 42
cannam@18 43 void ClusterMeltSegmenter::initialise(int fs)
cannam@18 44 {
cannam@24 45 samplerate = fs;
cannam@24 46
cannam@26 47 if (featureType == FEATURE_TYPE_CONSTQ ||
cannam@26 48 featureType == FEATURE_TYPE_CHROMA) {
cannam@26 49
cannam@26 50 // run internal processing at 11025 or thereabouts
cannam@24 51 int internalRate = 11025;
cannam@24 52 int decimationFactor = samplerate / internalRate;
cannam@24 53 if (decimationFactor < 1) decimationFactor = 1;
cannam@24 54
cannam@24 55 // must be a power of two
cannam@24 56 while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
cannam@24 57
cannam@24 58 if (decimationFactor > Decimator::getHighestSupportedFactor()) {
cannam@24 59 decimationFactor = Decimator::getHighestSupportedFactor();
cannam@24 60 }
cannam@24 61
cannam@24 62 if (decimationFactor > 1) {
cannam@24 63 decimator = new Decimator(getWindowsize(), decimationFactor);
cannam@24 64 }
cannam@24 65
cannam@24 66 CQConfig config;
cannam@24 67 config.FS = samplerate / decimationFactor;
cannam@24 68 config.min = fmin;
cannam@24 69 config.max = fmax;
cannam@24 70 config.BPO = nbins;
cannam@24 71 config.CQThresh = 0.0054;
cannam@24 72
cannam@24 73 constq = new ConstantQ(config);
cannam@24 74 constq->sparsekernel();
cannam@26 75
cannam@26 76 ncoeff = constq->getK();
cannam@64 77
cannam@64 78 fft = new FFTReal(constq->getfftlength());
cannam@26 79
cannam@26 80 } else if (featureType == FEATURE_TYPE_MFCC) {
cannam@24 81
cannam@27 82 // run internal processing at 22050 or thereabouts
cannam@27 83 int internalRate = 22050;
cannam@27 84 int decimationFactor = samplerate / internalRate;
cannam@27 85 if (decimationFactor < 1) decimationFactor = 1;
cannam@27 86
cannam@27 87 // must be a power of two
cannam@27 88 while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
cannam@27 89
cannam@27 90 if (decimationFactor > Decimator::getHighestSupportedFactor()) {
cannam@27 91 decimationFactor = Decimator::getHighestSupportedFactor();
cannam@27 92 }
cannam@27 93
cannam@27 94 if (decimationFactor > 1) {
cannam@27 95 decimator = new Decimator(getWindowsize(), decimationFactor);
cannam@27 96 }
cannam@27 97
cannam@30 98 MFCCConfig config(samplerate / decimationFactor);
cannam@27 99 config.fftsize = 2048;
cannam@27 100 config.nceps = 19;
cannam@27 101 config.want_c0 = true;
cannam@26 102
cannam@26 103 mfcc = new MFCC(config);
cannam@27 104 ncoeff = config.nceps + 1;
cannam@24 105 }
cannam@18 106 }
cannam@18 107
cannam@18 108 ClusterMeltSegmenter::~ClusterMeltSegmenter()
cannam@18 109 {
cannam@24 110 delete window;
cannam@24 111 delete constq;
cannam@24 112 delete decimator;
cannam@64 113 delete fft;
cannam@20 114 }
cannam@20 115
cannam@20 116 int
cannam@20 117 ClusterMeltSegmenter::getWindowsize()
cannam@20 118 {
cannam@44 119 return static_cast<int>(windowSize * samplerate + 0.001);
cannam@20 120 }
cannam@20 121
cannam@20 122 int
cannam@20 123 ClusterMeltSegmenter::getHopsize()
cannam@20 124 {
cannam@44 125 return static_cast<int>(hopSize * samplerate + 0.001);
cannam@18 126 }
cannam@18 127
cannam@24 128 void ClusterMeltSegmenter::extractFeatures(const double* samples, int nsamples)
cannam@18 129 {
cannam@26 130 if (featureType == FEATURE_TYPE_CONSTQ ||
cannam@26 131 featureType == FEATURE_TYPE_CHROMA) {
cannam@26 132 extractFeaturesConstQ(samples, nsamples);
cannam@26 133 } else if (featureType == FEATURE_TYPE_MFCC) {
cannam@26 134 extractFeaturesMFCC(samples, nsamples);
cannam@26 135 }
cannam@26 136 }
cannam@26 137
cannam@26 138 void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsamples)
cannam@26 139 {
cannam@24 140 if (!constq) {
cannam@26 141 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesConstQ: "
cannam@26 142 << "No const-q: initialise not called?"
cannam@24 143 << std::endl;
cannam@24 144 return;
cannam@24 145 }
cannam@20 146
cannam@24 147 if (nsamples < getWindowsize()) {
cannam@24 148 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
cannam@24 149 return;
cannam@24 150 }
cannam@24 151
cannam@24 152 int fftsize = constq->getfftlength();
cannam@24 153
cannam@24 154 if (!window || window->getSize() != fftsize) {
cannam@24 155 delete window;
cannam@24 156 window = new Window<double>(HammingWindow, fftsize);
cannam@24 157 }
cannam@24 158
cannam@24 159 vector<double> cq(ncoeff);
cannam@24 160
cannam@24 161 for (int i = 0; i < ncoeff; ++i) cq[i] = 0.0;
cannam@24 162
cannam@24 163 const double *psource = samples;
cannam@24 164 int pcount = nsamples;
cannam@24 165
cannam@24 166 if (decimator) {
cannam@24 167 pcount = nsamples / decimator->getFactor();
cannam@24 168 double *decout = new double[pcount];
cannam@24 169 decimator->process(samples, decout);
cannam@24 170 psource = decout;
cannam@24 171 }
cannam@24 172
cannam@24 173 int origin = 0;
cannam@24 174
cannam@24 175 // std::cerr << "nsamples = " << nsamples << ", pcount = " << pcount << std::endl;
cannam@24 176
cannam@24 177 int frames = 0;
cannam@24 178
cannam@24 179 double *frame = new double[fftsize];
cannam@24 180 double *real = new double[fftsize];
cannam@24 181 double *imag = new double[fftsize];
cannam@24 182 double *cqre = new double[ncoeff];
cannam@24 183 double *cqim = new double[ncoeff];
cannam@24 184
cannam@24 185 while (origin <= pcount) {
cannam@24 186
cannam@24 187 // always need at least one fft window per block, but after
cannam@24 188 // that we want to avoid having any incomplete ones
cannam@24 189 if (origin > 0 && origin + fftsize >= pcount) break;
cannam@24 190
cannam@24 191 for (int i = 0; i < fftsize; ++i) {
cannam@24 192 if (origin + i < pcount) {
cannam@24 193 frame[i] = psource[origin + i];
cannam@24 194 } else {
cannam@24 195 frame[i] = 0.0;
cannam@24 196 }
cannam@24 197 }
cannam@24 198
cannam@24 199 for (int i = 0; i < fftsize/2; ++i) {
cannam@24 200 double value = frame[i];
cannam@24 201 frame[i] = frame[i + fftsize/2];
cannam@24 202 frame[i + fftsize/2] = value;
cannam@24 203 }
cannam@24 204
cannam@24 205 window->cut(frame);
cannam@24 206
cannam@64 207 fft->process(false, frame, real, imag);
cannam@24 208
cannam@24 209 constq->process(real, imag, cqre, cqim);
cannam@18 210
cannam@24 211 for (int i = 0; i < ncoeff; ++i) {
cannam@24 212 cq[i] += sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]);
cannam@24 213 }
cannam@24 214 ++frames;
cannam@20 215
cannam@24 216 origin += fftsize/2;
cannam@24 217 }
cannam@20 218
cannam@24 219 delete [] cqre;
cannam@24 220 delete [] cqim;
cannam@24 221 delete [] real;
cannam@24 222 delete [] imag;
cannam@24 223 delete [] frame;
cannam@20 224
cannam@24 225 for (int i = 0; i < ncoeff; ++i) {
cannam@24 226 cq[i] /= frames;
cannam@24 227 }
cannam@20 228
cannam@24 229 if (decimator) delete[] psource;
cannam@20 230
cannam@24 231 features.push_back(cq);
cannam@18 232 }
cannam@18 233
cannam@26 234 void ClusterMeltSegmenter::extractFeaturesMFCC(const double* samples, int nsamples)
cannam@26 235 {
cannam@26 236 if (!mfcc) {
cannam@26 237 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesMFCC: "
cannam@26 238 << "No mfcc: initialise not called?"
cannam@26 239 << std::endl;
cannam@26 240 return;
cannam@26 241 }
cannam@26 242
cannam@26 243 if (nsamples < getWindowsize()) {
cannam@26 244 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
cannam@26 245 return;
cannam@26 246 }
cannam@26 247
cannam@26 248 int fftsize = mfcc->getfftlength();
cannam@26 249
cannam@26 250 vector<double> cc(ncoeff);
cannam@26 251
cannam@26 252 for (int i = 0; i < ncoeff; ++i) cc[i] = 0.0;
cannam@26 253
cannam@26 254 const double *psource = samples;
cannam@26 255 int pcount = nsamples;
cannam@26 256
cannam@27 257 if (decimator) {
cannam@27 258 pcount = nsamples / decimator->getFactor();
cannam@27 259 double *decout = new double[pcount];
cannam@27 260 decimator->process(samples, decout);
cannam@27 261 psource = decout;
cannam@27 262 }
cannam@27 263
cannam@26 264 int origin = 0;
cannam@26 265 int frames = 0;
cannam@26 266
cannam@26 267 double *frame = new double[fftsize];
cannam@26 268 double *ccout = new double[ncoeff];
cannam@26 269
cannam@26 270 while (origin <= pcount) {
cannam@26 271
cannam@26 272 // always need at least one fft window per block, but after
cannam@26 273 // that we want to avoid having any incomplete ones
cannam@26 274 if (origin > 0 && origin + fftsize >= pcount) break;
cannam@26 275
cannam@26 276 for (int i = 0; i < fftsize; ++i) {
cannam@26 277 if (origin + i < pcount) {
cannam@26 278 frame[i] = psource[origin + i];
cannam@26 279 } else {
cannam@26 280 frame[i] = 0.0;
cannam@26 281 }
cannam@26 282 }
cannam@26 283
cannam@30 284 mfcc->process(frame, ccout);
cannam@26 285
cannam@26 286 for (int i = 0; i < ncoeff; ++i) {
cannam@26 287 cc[i] += ccout[i];
cannam@26 288 }
cannam@26 289 ++frames;
cannam@26 290
cannam@26 291 origin += fftsize/2;
cannam@26 292 }
cannam@26 293
cannam@26 294 delete [] ccout;
cannam@26 295 delete [] frame;
cannam@26 296
cannam@26 297 for (int i = 0; i < ncoeff; ++i) {
cannam@26 298 cc[i] /= frames;
cannam@26 299 }
cannam@26 300
cannam@27 301 if (decimator) delete[] psource;
cannam@27 302
cannam@26 303 features.push_back(cc);
cannam@26 304 }
cannam@26 305
cannam@18 306 void ClusterMeltSegmenter::segment(int m)
cannam@18 307 {
cannam@24 308 nclusters = m;
cannam@24 309 segment();
cannam@18 310 }
cannam@18 311
cannam@18 312 void ClusterMeltSegmenter::setFeatures(const vector<vector<double> >& f)
cannam@18 313 {
cannam@24 314 features = f;
cannam@24 315 featureType = FEATURE_TYPE_UNKNOWN;
cannam@18 316 }
cannam@18 317
cannam@18 318 void ClusterMeltSegmenter::segment()
cannam@18 319 {
cannam@26 320 delete constq;
cannam@26 321 constq = 0;
cannam@26 322 delete mfcc;
cannam@26 323 mfcc = 0;
cannam@26 324 delete decimator;
cannam@26 325 decimator = 0;
cannam@58 326
cannam@58 327 if (features.size() < histogramLength) return;
cannam@58 328 /*
cannam@24 329 std::cerr << "ClusterMeltSegmenter::segment: have " << features.size()
cannam@24 330 << " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl;
cannam@58 331 */
cannam@24 332 // copy the features to a native array and use the existing C segmenter...
cannam@24 333 double** arrFeatures = new double*[features.size()];
cannam@24 334 for (int i = 0; i < features.size(); i++)
cannam@24 335 {
cannam@24 336 if (featureType == FEATURE_TYPE_UNKNOWN) {
cannam@24 337 arrFeatures[i] = new double[features[0].size()];
cannam@24 338 for (int j = 0; j < features[0].size(); j++)
cannam@24 339 arrFeatures[i][j] = features[i][j];
cannam@24 340 } else {
cannam@24 341 arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope
cannam@24 342 for (int j = 0; j < ncoeff; j++)
cannam@24 343 arrFeatures[i][j] = features[i][j];
cannam@24 344 }
cannam@24 345 }
cannam@18 346
cannam@24 347 q = new int[features.size()];
cannam@18 348
cannam@26 349 if (featureType == FEATURE_TYPE_UNKNOWN ||
cannam@26 350 featureType == FEATURE_TYPE_MFCC)
cannam@24 351 cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength,
cannam@24 352 nclusters, neighbourhoodLimit);
cannam@24 353 else
cannam@24 354 constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType,
cannam@24 355 nHMMStates, histogramLength, nclusters, neighbourhoodLimit);
cannam@18 356
cannam@24 357 // convert the cluster assignment sequence to a segmentation
cannam@24 358 makeSegmentation(q, features.size());
cannam@18 359
cannam@24 360 // de-allocate arrays
cannam@24 361 delete [] q;
cannam@24 362 for (int i = 0; i < features.size(); i++)
cannam@24 363 delete [] arrFeatures[i];
cannam@24 364 delete [] arrFeatures;
cannam@18 365
cannam@24 366 // clear the features
cannam@24 367 clear();
cannam@18 368 }
cannam@18 369
cannam@18 370 void ClusterMeltSegmenter::makeSegmentation(int* q, int len)
cannam@18 371 {
cannam@24 372 segmentation.segments.clear();
cannam@24 373 segmentation.nsegtypes = nclusters;
cannam@24 374 segmentation.samplerate = samplerate;
cannam@18 375
cannam@24 376 Segment segment;
cannam@24 377 segment.start = 0;
cannam@24 378 segment.type = q[0];
cannam@18 379
cannam@24 380 for (int i = 1; i < len; i++)
cannam@24 381 {
cannam@24 382 if (q[i] != q[i-1])
cannam@24 383 {
cannam@24 384 segment.end = i * getHopsize();
cannam@24 385 segmentation.segments.push_back(segment);
cannam@24 386 segment.type = q[i];
cannam@24 387 segment.start = segment.end;
cannam@24 388 }
cannam@24 389 }
cannam@24 390 segment.end = len * getHopsize();
cannam@24 391 segmentation.segments.push_back(segment);
cannam@18 392 }
cannam@18 393