annotate dsp/segmentation/ClusterMeltSegmenter.cpp @ 321:f1e6be2de9a5

A threshold (delta) is added in the peak picking parameters structure (PPickParams). It is used as an offset when computing the smoothed detection function. A constructor for the structure PPickParams is also added to set the parameters to 0 when a structure instance is created. Hence programmes using the peak picking parameter structure and which do not set the delta parameter (e.g. QM Vamp note onset detector) won't be affected by the modifications. Functions modified: - dsp/onsets/PeakPicking.cpp - dsp/onsets/PeakPicking.h - dsp/signalconditioning/DFProcess.cpp - dsp/signalconditioning/DFProcess.h
author mathieub <mathieu.barthet@eecs.qmul.ac.uk>
date Mon, 20 Jun 2011 19:01:48 +0100
parents d5014ab8b0e5
children f6ccde089491
rev   line source
c@249 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
c@249 2
c@243 3 /*
c@249 4 * ClusterMeltSegmenter.cpp
c@243 5 *
c@249 6 * Created by Mark Levy on 23/03/2006.
c@249 7 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
c@309 8
c@309 9 This program is free software; you can redistribute it and/or
c@309 10 modify it under the terms of the GNU General Public License as
c@309 11 published by the Free Software Foundation; either version 2 of the
c@309 12 License, or (at your option) any later version. See the file
c@309 13 COPYING included with this distribution for more information.
c@243 14 */
c@243 15
c@243 16 #include <cfloat>
c@243 17 #include <cmath>
c@243 18
c@243 19 #include "ClusterMeltSegmenter.h"
c@243 20 #include "cluster_segmenter.h"
c@243 21 #include "segment.h"
c@243 22
c@245 23 #include "dsp/transforms/FFT.h"
c@249 24 #include "dsp/chromagram/ConstantQ.h"
c@249 25 #include "dsp/rateconversion/Decimator.h"
c@251 26 #include "dsp/mfcc/MFCC.h"
c@245 27
c@249 28 ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) :
c@249 29 window(NULL),
c@289 30 fft(NULL),
c@249 31 constq(NULL),
c@251 32 mfcc(NULL),
c@249 33 featureType(params.featureType),
c@249 34 hopSize(params.hopSize),
c@249 35 windowSize(params.windowSize),
c@249 36 fmin(params.fmin),
c@249 37 fmax(params.fmax),
c@249 38 nbins(params.nbins),
c@249 39 ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c
c@249 40 nHMMStates(params.nHMMStates),
c@249 41 nclusters(params.nclusters),
c@249 42 histogramLength(params.histogramLength),
c@249 43 neighbourhoodLimit(params.neighbourhoodLimit),
c@251 44 decimator(NULL)
c@243 45 {
c@243 46 }
c@243 47
c@243 48 void ClusterMeltSegmenter::initialise(int fs)
c@243 49 {
c@249 50 samplerate = fs;
c@249 51
c@251 52 if (featureType == FEATURE_TYPE_CONSTQ ||
c@251 53 featureType == FEATURE_TYPE_CHROMA) {
c@251 54
c@251 55 // run internal processing at 11025 or thereabouts
c@249 56 int internalRate = 11025;
c@249 57 int decimationFactor = samplerate / internalRate;
c@249 58 if (decimationFactor < 1) decimationFactor = 1;
c@249 59
c@249 60 // must be a power of two
c@249 61 while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
c@249 62
c@249 63 if (decimationFactor > Decimator::getHighestSupportedFactor()) {
c@249 64 decimationFactor = Decimator::getHighestSupportedFactor();
c@249 65 }
c@249 66
c@249 67 if (decimationFactor > 1) {
c@249 68 decimator = new Decimator(getWindowsize(), decimationFactor);
c@249 69 }
c@249 70
c@249 71 CQConfig config;
c@249 72 config.FS = samplerate / decimationFactor;
c@249 73 config.min = fmin;
c@249 74 config.max = fmax;
c@249 75 config.BPO = nbins;
c@249 76 config.CQThresh = 0.0054;
c@249 77
c@249 78 constq = new ConstantQ(config);
c@249 79 constq->sparsekernel();
c@251 80
c@251 81 ncoeff = constq->getK();
c@289 82
c@289 83 fft = new FFTReal(constq->getfftlength());
c@251 84
c@251 85 } else if (featureType == FEATURE_TYPE_MFCC) {
c@249 86
c@252 87 // run internal processing at 22050 or thereabouts
c@252 88 int internalRate = 22050;
c@252 89 int decimationFactor = samplerate / internalRate;
c@252 90 if (decimationFactor < 1) decimationFactor = 1;
c@252 91
c@252 92 // must be a power of two
c@252 93 while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
c@252 94
c@252 95 if (decimationFactor > Decimator::getHighestSupportedFactor()) {
c@252 96 decimationFactor = Decimator::getHighestSupportedFactor();
c@252 97 }
c@252 98
c@252 99 if (decimationFactor > 1) {
c@252 100 decimator = new Decimator(getWindowsize(), decimationFactor);
c@252 101 }
c@252 102
c@255 103 MFCCConfig config(samplerate / decimationFactor);
c@252 104 config.fftsize = 2048;
c@252 105 config.nceps = 19;
c@252 106 config.want_c0 = true;
c@251 107
c@251 108 mfcc = new MFCC(config);
c@252 109 ncoeff = config.nceps + 1;
c@249 110 }
c@243 111 }
c@243 112
c@243 113 ClusterMeltSegmenter::~ClusterMeltSegmenter()
c@243 114 {
c@249 115 delete window;
c@249 116 delete constq;
c@249 117 delete decimator;
c@289 118 delete fft;
c@245 119 }
c@245 120
c@245 121 int
c@245 122 ClusterMeltSegmenter::getWindowsize()
c@245 123 {
c@269 124 return static_cast<int>(windowSize * samplerate + 0.001);
c@245 125 }
c@245 126
c@245 127 int
c@245 128 ClusterMeltSegmenter::getHopsize()
c@245 129 {
c@269 130 return static_cast<int>(hopSize * samplerate + 0.001);
c@243 131 }
c@243 132
c@249 133 void ClusterMeltSegmenter::extractFeatures(const double* samples, int nsamples)
c@243 134 {
c@251 135 if (featureType == FEATURE_TYPE_CONSTQ ||
c@251 136 featureType == FEATURE_TYPE_CHROMA) {
c@251 137 extractFeaturesConstQ(samples, nsamples);
c@251 138 } else if (featureType == FEATURE_TYPE_MFCC) {
c@251 139 extractFeaturesMFCC(samples, nsamples);
c@251 140 }
c@251 141 }
c@251 142
c@251 143 void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsamples)
c@251 144 {
c@249 145 if (!constq) {
c@251 146 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesConstQ: "
c@251 147 << "No const-q: initialise not called?"
c@249 148 << std::endl;
c@249 149 return;
c@249 150 }
c@245 151
c@249 152 if (nsamples < getWindowsize()) {
c@249 153 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
c@249 154 return;
c@249 155 }
c@249 156
c@249 157 int fftsize = constq->getfftlength();
c@249 158
c@249 159 if (!window || window->getSize() != fftsize) {
c@249 160 delete window;
c@249 161 window = new Window<double>(HammingWindow, fftsize);
c@249 162 }
c@249 163
c@249 164 vector<double> cq(ncoeff);
c@249 165
c@249 166 for (int i = 0; i < ncoeff; ++i) cq[i] = 0.0;
c@249 167
c@249 168 const double *psource = samples;
c@249 169 int pcount = nsamples;
c@249 170
c@249 171 if (decimator) {
c@249 172 pcount = nsamples / decimator->getFactor();
c@249 173 double *decout = new double[pcount];
c@249 174 decimator->process(samples, decout);
c@249 175 psource = decout;
c@249 176 }
c@249 177
c@249 178 int origin = 0;
c@249 179
c@249 180 // std::cerr << "nsamples = " << nsamples << ", pcount = " << pcount << std::endl;
c@249 181
c@249 182 int frames = 0;
c@249 183
c@249 184 double *frame = new double[fftsize];
c@249 185 double *real = new double[fftsize];
c@249 186 double *imag = new double[fftsize];
c@249 187 double *cqre = new double[ncoeff];
c@249 188 double *cqim = new double[ncoeff];
c@249 189
c@249 190 while (origin <= pcount) {
c@249 191
c@249 192 // always need at least one fft window per block, but after
c@249 193 // that we want to avoid having any incomplete ones
c@249 194 if (origin > 0 && origin + fftsize >= pcount) break;
c@249 195
c@249 196 for (int i = 0; i < fftsize; ++i) {
c@249 197 if (origin + i < pcount) {
c@249 198 frame[i] = psource[origin + i];
c@249 199 } else {
c@249 200 frame[i] = 0.0;
c@249 201 }
c@249 202 }
c@249 203
c@249 204 for (int i = 0; i < fftsize/2; ++i) {
c@249 205 double value = frame[i];
c@249 206 frame[i] = frame[i + fftsize/2];
c@249 207 frame[i + fftsize/2] = value;
c@249 208 }
c@249 209
c@249 210 window->cut(frame);
c@249 211
c@289 212 fft->process(false, frame, real, imag);
c@249 213
c@249 214 constq->process(real, imag, cqre, cqim);
c@243 215
c@249 216 for (int i = 0; i < ncoeff; ++i) {
c@249 217 cq[i] += sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]);
c@249 218 }
c@249 219 ++frames;
c@245 220
c@249 221 origin += fftsize/2;
c@249 222 }
c@245 223
c@249 224 delete [] cqre;
c@249 225 delete [] cqim;
c@249 226 delete [] real;
c@249 227 delete [] imag;
c@249 228 delete [] frame;
c@245 229
c@249 230 for (int i = 0; i < ncoeff; ++i) {
c@249 231 cq[i] /= frames;
c@249 232 }
c@245 233
c@249 234 if (decimator) delete[] psource;
c@245 235
c@249 236 features.push_back(cq);
c@243 237 }
c@243 238
c@251 239 void ClusterMeltSegmenter::extractFeaturesMFCC(const double* samples, int nsamples)
c@251 240 {
c@251 241 if (!mfcc) {
c@251 242 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesMFCC: "
c@251 243 << "No mfcc: initialise not called?"
c@251 244 << std::endl;
c@251 245 return;
c@251 246 }
c@251 247
c@251 248 if (nsamples < getWindowsize()) {
c@251 249 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
c@251 250 return;
c@251 251 }
c@251 252
c@251 253 int fftsize = mfcc->getfftlength();
c@251 254
c@251 255 vector<double> cc(ncoeff);
c@251 256
c@251 257 for (int i = 0; i < ncoeff; ++i) cc[i] = 0.0;
c@251 258
c@251 259 const double *psource = samples;
c@251 260 int pcount = nsamples;
c@251 261
c@252 262 if (decimator) {
c@252 263 pcount = nsamples / decimator->getFactor();
c@252 264 double *decout = new double[pcount];
c@252 265 decimator->process(samples, decout);
c@252 266 psource = decout;
c@252 267 }
c@252 268
c@251 269 int origin = 0;
c@251 270 int frames = 0;
c@251 271
c@251 272 double *frame = new double[fftsize];
c@251 273 double *ccout = new double[ncoeff];
c@251 274
c@251 275 while (origin <= pcount) {
c@251 276
c@251 277 // always need at least one fft window per block, but after
c@251 278 // that we want to avoid having any incomplete ones
c@251 279 if (origin > 0 && origin + fftsize >= pcount) break;
c@251 280
c@251 281 for (int i = 0; i < fftsize; ++i) {
c@251 282 if (origin + i < pcount) {
c@251 283 frame[i] = psource[origin + i];
c@251 284 } else {
c@251 285 frame[i] = 0.0;
c@251 286 }
c@251 287 }
c@251 288
c@255 289 mfcc->process(frame, ccout);
c@251 290
c@251 291 for (int i = 0; i < ncoeff; ++i) {
c@251 292 cc[i] += ccout[i];
c@251 293 }
c@251 294 ++frames;
c@251 295
c@251 296 origin += fftsize/2;
c@251 297 }
c@251 298
c@251 299 delete [] ccout;
c@251 300 delete [] frame;
c@251 301
c@251 302 for (int i = 0; i < ncoeff; ++i) {
c@251 303 cc[i] /= frames;
c@251 304 }
c@251 305
c@252 306 if (decimator) delete[] psource;
c@252 307
c@251 308 features.push_back(cc);
c@251 309 }
c@251 310
c@243 311 void ClusterMeltSegmenter::segment(int m)
c@243 312 {
c@249 313 nclusters = m;
c@249 314 segment();
c@243 315 }
c@243 316
c@243 317 void ClusterMeltSegmenter::setFeatures(const vector<vector<double> >& f)
c@243 318 {
c@249 319 features = f;
c@249 320 featureType = FEATURE_TYPE_UNKNOWN;
c@243 321 }
c@243 322
c@243 323 void ClusterMeltSegmenter::segment()
c@243 324 {
c@251 325 delete constq;
c@251 326 constq = 0;
c@251 327 delete mfcc;
c@251 328 mfcc = 0;
c@251 329 delete decimator;
c@251 330 decimator = 0;
c@283 331
c@283 332 if (features.size() < histogramLength) return;
c@283 333 /*
c@249 334 std::cerr << "ClusterMeltSegmenter::segment: have " << features.size()
c@249 335 << " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl;
c@283 336 */
c@249 337 // copy the features to a native array and use the existing C segmenter...
c@249 338 double** arrFeatures = new double*[features.size()];
c@249 339 for (int i = 0; i < features.size(); i++)
c@249 340 {
c@249 341 if (featureType == FEATURE_TYPE_UNKNOWN) {
c@249 342 arrFeatures[i] = new double[features[0].size()];
c@249 343 for (int j = 0; j < features[0].size(); j++)
c@249 344 arrFeatures[i][j] = features[i][j];
c@249 345 } else {
c@249 346 arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope
c@249 347 for (int j = 0; j < ncoeff; j++)
c@249 348 arrFeatures[i][j] = features[i][j];
c@249 349 }
c@249 350 }
c@243 351
c@249 352 q = new int[features.size()];
c@243 353
c@251 354 if (featureType == FEATURE_TYPE_UNKNOWN ||
c@251 355 featureType == FEATURE_TYPE_MFCC)
c@249 356 cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength,
c@249 357 nclusters, neighbourhoodLimit);
c@249 358 else
c@249 359 constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType,
c@249 360 nHMMStates, histogramLength, nclusters, neighbourhoodLimit);
c@243 361
c@249 362 // convert the cluster assignment sequence to a segmentation
c@249 363 makeSegmentation(q, features.size());
c@243 364
c@249 365 // de-allocate arrays
c@249 366 delete [] q;
c@249 367 for (int i = 0; i < features.size(); i++)
c@249 368 delete [] arrFeatures[i];
c@249 369 delete [] arrFeatures;
c@243 370
c@249 371 // clear the features
c@249 372 clear();
c@243 373 }
c@243 374
c@243 375 void ClusterMeltSegmenter::makeSegmentation(int* q, int len)
c@243 376 {
c@249 377 segmentation.segments.clear();
c@249 378 segmentation.nsegtypes = nclusters;
c@249 379 segmentation.samplerate = samplerate;
c@243 380
c@249 381 Segment segment;
c@249 382 segment.start = 0;
c@249 383 segment.type = q[0];
c@243 384
c@249 385 for (int i = 1; i < len; i++)
c@249 386 {
c@249 387 if (q[i] != q[i-1])
c@249 388 {
c@249 389 segment.end = i * getHopsize();
c@249 390 segmentation.segments.push_back(segment);
c@249 391 segment.type = q[i];
c@249 392 segment.start = segment.end;
c@249 393 }
c@249 394 }
c@249 395 segment.end = len * getHopsize();
c@249 396 segmentation.segments.push_back(segment);
c@243 397 }
c@243 398