annotate dsp/segmentation/ClusterMeltSegmenter.cpp @ 96:88f3cfcff55f

A threshold (delta) is added in the peak picking parameters structure (PPickParams). It is used as an offset when computing the smoothed detection function. A constructor for the structure PPickParams is also added to set the parameters to 0 when a structure instance is created. Hence programmes using the peak picking parameter structure and which do not set the delta parameter (e.g. QM Vamp note onset detector) won't be affected by the modifications. Functions modified: - dsp/onsets/PeakPicking.cpp - dsp/onsets/PeakPicking.h - dsp/signalconditioning/DFProcess.cpp - dsp/signalconditioning/DFProcess.h
author mathieub <mathieu.barthet@eecs.qmul.ac.uk>
date Mon, 20 Jun 2011 19:01:48 +0100
parents e5907ae6de17
children f6ccde089491
rev   line source
cannam@24 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
cannam@24 2
cannam@18 3 /*
cannam@24 4 * ClusterMeltSegmenter.cpp
cannam@18 5 *
cannam@24 6 * Created by Mark Levy on 23/03/2006.
cannam@24 7 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
Chris@84 8
Chris@84 9 This program is free software; you can redistribute it and/or
Chris@84 10 modify it under the terms of the GNU General Public License as
Chris@84 11 published by the Free Software Foundation; either version 2 of the
Chris@84 12 License, or (at your option) any later version. See the file
Chris@84 13 COPYING included with this distribution for more information.
cannam@18 14 */
cannam@18 15
cannam@18 16 #include <cfloat>
cannam@18 17 #include <cmath>
cannam@18 18
cannam@18 19 #include "ClusterMeltSegmenter.h"
cannam@18 20 #include "cluster_segmenter.h"
cannam@18 21 #include "segment.h"
cannam@18 22
cannam@20 23 #include "dsp/transforms/FFT.h"
cannam@24 24 #include "dsp/chromagram/ConstantQ.h"
cannam@24 25 #include "dsp/rateconversion/Decimator.h"
cannam@26 26 #include "dsp/mfcc/MFCC.h"
cannam@20 27
cannam@24 28 ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) :
cannam@24 29 window(NULL),
cannam@64 30 fft(NULL),
cannam@24 31 constq(NULL),
cannam@26 32 mfcc(NULL),
cannam@24 33 featureType(params.featureType),
cannam@24 34 hopSize(params.hopSize),
cannam@24 35 windowSize(params.windowSize),
cannam@24 36 fmin(params.fmin),
cannam@24 37 fmax(params.fmax),
cannam@24 38 nbins(params.nbins),
cannam@24 39 ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c
cannam@24 40 nHMMStates(params.nHMMStates),
cannam@24 41 nclusters(params.nclusters),
cannam@24 42 histogramLength(params.histogramLength),
cannam@24 43 neighbourhoodLimit(params.neighbourhoodLimit),
cannam@26 44 decimator(NULL)
cannam@18 45 {
cannam@18 46 }
cannam@18 47
cannam@18 48 void ClusterMeltSegmenter::initialise(int fs)
cannam@18 49 {
cannam@24 50 samplerate = fs;
cannam@24 51
cannam@26 52 if (featureType == FEATURE_TYPE_CONSTQ ||
cannam@26 53 featureType == FEATURE_TYPE_CHROMA) {
cannam@26 54
cannam@26 55 // run internal processing at 11025 or thereabouts
cannam@24 56 int internalRate = 11025;
cannam@24 57 int decimationFactor = samplerate / internalRate;
cannam@24 58 if (decimationFactor < 1) decimationFactor = 1;
cannam@24 59
cannam@24 60 // must be a power of two
cannam@24 61 while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
cannam@24 62
cannam@24 63 if (decimationFactor > Decimator::getHighestSupportedFactor()) {
cannam@24 64 decimationFactor = Decimator::getHighestSupportedFactor();
cannam@24 65 }
cannam@24 66
cannam@24 67 if (decimationFactor > 1) {
cannam@24 68 decimator = new Decimator(getWindowsize(), decimationFactor);
cannam@24 69 }
cannam@24 70
cannam@24 71 CQConfig config;
cannam@24 72 config.FS = samplerate / decimationFactor;
cannam@24 73 config.min = fmin;
cannam@24 74 config.max = fmax;
cannam@24 75 config.BPO = nbins;
cannam@24 76 config.CQThresh = 0.0054;
cannam@24 77
cannam@24 78 constq = new ConstantQ(config);
cannam@24 79 constq->sparsekernel();
cannam@26 80
cannam@26 81 ncoeff = constq->getK();
cannam@64 82
cannam@64 83 fft = new FFTReal(constq->getfftlength());
cannam@26 84
cannam@26 85 } else if (featureType == FEATURE_TYPE_MFCC) {
cannam@24 86
cannam@27 87 // run internal processing at 22050 or thereabouts
cannam@27 88 int internalRate = 22050;
cannam@27 89 int decimationFactor = samplerate / internalRate;
cannam@27 90 if (decimationFactor < 1) decimationFactor = 1;
cannam@27 91
cannam@27 92 // must be a power of two
cannam@27 93 while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
cannam@27 94
cannam@27 95 if (decimationFactor > Decimator::getHighestSupportedFactor()) {
cannam@27 96 decimationFactor = Decimator::getHighestSupportedFactor();
cannam@27 97 }
cannam@27 98
cannam@27 99 if (decimationFactor > 1) {
cannam@27 100 decimator = new Decimator(getWindowsize(), decimationFactor);
cannam@27 101 }
cannam@27 102
cannam@30 103 MFCCConfig config(samplerate / decimationFactor);
cannam@27 104 config.fftsize = 2048;
cannam@27 105 config.nceps = 19;
cannam@27 106 config.want_c0 = true;
cannam@26 107
cannam@26 108 mfcc = new MFCC(config);
cannam@27 109 ncoeff = config.nceps + 1;
cannam@24 110 }
cannam@18 111 }
cannam@18 112
cannam@18 113 ClusterMeltSegmenter::~ClusterMeltSegmenter()
cannam@18 114 {
cannam@24 115 delete window;
cannam@24 116 delete constq;
cannam@24 117 delete decimator;
cannam@64 118 delete fft;
cannam@20 119 }
cannam@20 120
cannam@20 121 int
cannam@20 122 ClusterMeltSegmenter::getWindowsize()
cannam@20 123 {
cannam@44 124 return static_cast<int>(windowSize * samplerate + 0.001);
cannam@20 125 }
cannam@20 126
cannam@20 127 int
cannam@20 128 ClusterMeltSegmenter::getHopsize()
cannam@20 129 {
cannam@44 130 return static_cast<int>(hopSize * samplerate + 0.001);
cannam@18 131 }
cannam@18 132
cannam@24 133 void ClusterMeltSegmenter::extractFeatures(const double* samples, int nsamples)
cannam@18 134 {
cannam@26 135 if (featureType == FEATURE_TYPE_CONSTQ ||
cannam@26 136 featureType == FEATURE_TYPE_CHROMA) {
cannam@26 137 extractFeaturesConstQ(samples, nsamples);
cannam@26 138 } else if (featureType == FEATURE_TYPE_MFCC) {
cannam@26 139 extractFeaturesMFCC(samples, nsamples);
cannam@26 140 }
cannam@26 141 }
cannam@26 142
cannam@26 143 void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsamples)
cannam@26 144 {
cannam@24 145 if (!constq) {
cannam@26 146 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesConstQ: "
cannam@26 147 << "No const-q: initialise not called?"
cannam@24 148 << std::endl;
cannam@24 149 return;
cannam@24 150 }
cannam@20 151
cannam@24 152 if (nsamples < getWindowsize()) {
cannam@24 153 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
cannam@24 154 return;
cannam@24 155 }
cannam@24 156
cannam@24 157 int fftsize = constq->getfftlength();
cannam@24 158
cannam@24 159 if (!window || window->getSize() != fftsize) {
cannam@24 160 delete window;
cannam@24 161 window = new Window<double>(HammingWindow, fftsize);
cannam@24 162 }
cannam@24 163
cannam@24 164 vector<double> cq(ncoeff);
cannam@24 165
cannam@24 166 for (int i = 0; i < ncoeff; ++i) cq[i] = 0.0;
cannam@24 167
cannam@24 168 const double *psource = samples;
cannam@24 169 int pcount = nsamples;
cannam@24 170
cannam@24 171 if (decimator) {
cannam@24 172 pcount = nsamples / decimator->getFactor();
cannam@24 173 double *decout = new double[pcount];
cannam@24 174 decimator->process(samples, decout);
cannam@24 175 psource = decout;
cannam@24 176 }
cannam@24 177
cannam@24 178 int origin = 0;
cannam@24 179
cannam@24 180 // std::cerr << "nsamples = " << nsamples << ", pcount = " << pcount << std::endl;
cannam@24 181
cannam@24 182 int frames = 0;
cannam@24 183
cannam@24 184 double *frame = new double[fftsize];
cannam@24 185 double *real = new double[fftsize];
cannam@24 186 double *imag = new double[fftsize];
cannam@24 187 double *cqre = new double[ncoeff];
cannam@24 188 double *cqim = new double[ncoeff];
cannam@24 189
cannam@24 190 while (origin <= pcount) {
cannam@24 191
cannam@24 192 // always need at least one fft window per block, but after
cannam@24 193 // that we want to avoid having any incomplete ones
cannam@24 194 if (origin > 0 && origin + fftsize >= pcount) break;
cannam@24 195
cannam@24 196 for (int i = 0; i < fftsize; ++i) {
cannam@24 197 if (origin + i < pcount) {
cannam@24 198 frame[i] = psource[origin + i];
cannam@24 199 } else {
cannam@24 200 frame[i] = 0.0;
cannam@24 201 }
cannam@24 202 }
cannam@24 203
cannam@24 204 for (int i = 0; i < fftsize/2; ++i) {
cannam@24 205 double value = frame[i];
cannam@24 206 frame[i] = frame[i + fftsize/2];
cannam@24 207 frame[i + fftsize/2] = value;
cannam@24 208 }
cannam@24 209
cannam@24 210 window->cut(frame);
cannam@24 211
cannam@64 212 fft->process(false, frame, real, imag);
cannam@24 213
cannam@24 214 constq->process(real, imag, cqre, cqim);
cannam@18 215
cannam@24 216 for (int i = 0; i < ncoeff; ++i) {
cannam@24 217 cq[i] += sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]);
cannam@24 218 }
cannam@24 219 ++frames;
cannam@20 220
cannam@24 221 origin += fftsize/2;
cannam@24 222 }
cannam@20 223
cannam@24 224 delete [] cqre;
cannam@24 225 delete [] cqim;
cannam@24 226 delete [] real;
cannam@24 227 delete [] imag;
cannam@24 228 delete [] frame;
cannam@20 229
cannam@24 230 for (int i = 0; i < ncoeff; ++i) {
cannam@24 231 cq[i] /= frames;
cannam@24 232 }
cannam@20 233
cannam@24 234 if (decimator) delete[] psource;
cannam@20 235
cannam@24 236 features.push_back(cq);
cannam@18 237 }
cannam@18 238
cannam@26 239 void ClusterMeltSegmenter::extractFeaturesMFCC(const double* samples, int nsamples)
cannam@26 240 {
cannam@26 241 if (!mfcc) {
cannam@26 242 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesMFCC: "
cannam@26 243 << "No mfcc: initialise not called?"
cannam@26 244 << std::endl;
cannam@26 245 return;
cannam@26 246 }
cannam@26 247
cannam@26 248 if (nsamples < getWindowsize()) {
cannam@26 249 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
cannam@26 250 return;
cannam@26 251 }
cannam@26 252
cannam@26 253 int fftsize = mfcc->getfftlength();
cannam@26 254
cannam@26 255 vector<double> cc(ncoeff);
cannam@26 256
cannam@26 257 for (int i = 0; i < ncoeff; ++i) cc[i] = 0.0;
cannam@26 258
cannam@26 259 const double *psource = samples;
cannam@26 260 int pcount = nsamples;
cannam@26 261
cannam@27 262 if (decimator) {
cannam@27 263 pcount = nsamples / decimator->getFactor();
cannam@27 264 double *decout = new double[pcount];
cannam@27 265 decimator->process(samples, decout);
cannam@27 266 psource = decout;
cannam@27 267 }
cannam@27 268
cannam@26 269 int origin = 0;
cannam@26 270 int frames = 0;
cannam@26 271
cannam@26 272 double *frame = new double[fftsize];
cannam@26 273 double *ccout = new double[ncoeff];
cannam@26 274
cannam@26 275 while (origin <= pcount) {
cannam@26 276
cannam@26 277 // always need at least one fft window per block, but after
cannam@26 278 // that we want to avoid having any incomplete ones
cannam@26 279 if (origin > 0 && origin + fftsize >= pcount) break;
cannam@26 280
cannam@26 281 for (int i = 0; i < fftsize; ++i) {
cannam@26 282 if (origin + i < pcount) {
cannam@26 283 frame[i] = psource[origin + i];
cannam@26 284 } else {
cannam@26 285 frame[i] = 0.0;
cannam@26 286 }
cannam@26 287 }
cannam@26 288
cannam@30 289 mfcc->process(frame, ccout);
cannam@26 290
cannam@26 291 for (int i = 0; i < ncoeff; ++i) {
cannam@26 292 cc[i] += ccout[i];
cannam@26 293 }
cannam@26 294 ++frames;
cannam@26 295
cannam@26 296 origin += fftsize/2;
cannam@26 297 }
cannam@26 298
cannam@26 299 delete [] ccout;
cannam@26 300 delete [] frame;
cannam@26 301
cannam@26 302 for (int i = 0; i < ncoeff; ++i) {
cannam@26 303 cc[i] /= frames;
cannam@26 304 }
cannam@26 305
cannam@27 306 if (decimator) delete[] psource;
cannam@27 307
cannam@26 308 features.push_back(cc);
cannam@26 309 }
cannam@26 310
cannam@18 311 void ClusterMeltSegmenter::segment(int m)
cannam@18 312 {
cannam@24 313 nclusters = m;
cannam@24 314 segment();
cannam@18 315 }
cannam@18 316
cannam@18 317 void ClusterMeltSegmenter::setFeatures(const vector<vector<double> >& f)
cannam@18 318 {
cannam@24 319 features = f;
cannam@24 320 featureType = FEATURE_TYPE_UNKNOWN;
cannam@18 321 }
cannam@18 322
cannam@18 323 void ClusterMeltSegmenter::segment()
cannam@18 324 {
cannam@26 325 delete constq;
cannam@26 326 constq = 0;
cannam@26 327 delete mfcc;
cannam@26 328 mfcc = 0;
cannam@26 329 delete decimator;
cannam@26 330 decimator = 0;
cannam@58 331
cannam@58 332 if (features.size() < histogramLength) return;
cannam@58 333 /*
cannam@24 334 std::cerr << "ClusterMeltSegmenter::segment: have " << features.size()
cannam@24 335 << " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl;
cannam@58 336 */
cannam@24 337 // copy the features to a native array and use the existing C segmenter...
cannam@24 338 double** arrFeatures = new double*[features.size()];
cannam@24 339 for (int i = 0; i < features.size(); i++)
cannam@24 340 {
cannam@24 341 if (featureType == FEATURE_TYPE_UNKNOWN) {
cannam@24 342 arrFeatures[i] = new double[features[0].size()];
cannam@24 343 for (int j = 0; j < features[0].size(); j++)
cannam@24 344 arrFeatures[i][j] = features[i][j];
cannam@24 345 } else {
cannam@24 346 arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope
cannam@24 347 for (int j = 0; j < ncoeff; j++)
cannam@24 348 arrFeatures[i][j] = features[i][j];
cannam@24 349 }
cannam@24 350 }
cannam@18 351
cannam@24 352 q = new int[features.size()];
cannam@18 353
cannam@26 354 if (featureType == FEATURE_TYPE_UNKNOWN ||
cannam@26 355 featureType == FEATURE_TYPE_MFCC)
cannam@24 356 cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength,
cannam@24 357 nclusters, neighbourhoodLimit);
cannam@24 358 else
cannam@24 359 constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType,
cannam@24 360 nHMMStates, histogramLength, nclusters, neighbourhoodLimit);
cannam@18 361
cannam@24 362 // convert the cluster assignment sequence to a segmentation
cannam@24 363 makeSegmentation(q, features.size());
cannam@18 364
cannam@24 365 // de-allocate arrays
cannam@24 366 delete [] q;
cannam@24 367 for (int i = 0; i < features.size(); i++)
cannam@24 368 delete [] arrFeatures[i];
cannam@24 369 delete [] arrFeatures;
cannam@18 370
cannam@24 371 // clear the features
cannam@24 372 clear();
cannam@18 373 }
cannam@18 374
cannam@18 375 void ClusterMeltSegmenter::makeSegmentation(int* q, int len)
cannam@18 376 {
cannam@24 377 segmentation.segments.clear();
cannam@24 378 segmentation.nsegtypes = nclusters;
cannam@24 379 segmentation.samplerate = samplerate;
cannam@18 380
cannam@24 381 Segment segment;
cannam@24 382 segment.start = 0;
cannam@24 383 segment.type = q[0];
cannam@18 384
cannam@24 385 for (int i = 1; i < len; i++)
cannam@24 386 {
cannam@24 387 if (q[i] != q[i-1])
cannam@24 388 {
cannam@24 389 segment.end = i * getHopsize();
cannam@24 390 segmentation.segments.push_back(segment);
cannam@24 391 segment.type = q[i];
cannam@24 392 segment.start = segment.end;
cannam@24 393 }
cannam@24 394 }
cannam@24 395 segment.end = len * getHopsize();
cannam@24 396 segmentation.segments.push_back(segment);
cannam@18 397 }
cannam@18 398