annotate dsp/segmentation/ClusterMeltSegmenter.cpp @ 114:f6ccde089491 pvoc

Tidy real-to-complex FFT -- forward and inverse have different arguments, so make them separate functions; document
author Chris Cannam
date Wed, 02 Oct 2013 15:04:38 +0100
parents e5907ae6de17
children e4a57215ddee
rev   line source
cannam@24 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
cannam@24 2
cannam@18 3 /*
cannam@24 4 * ClusterMeltSegmenter.cpp
cannam@18 5 *
cannam@24 6 * Created by Mark Levy on 23/03/2006.
cannam@24 7 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
Chris@84 8
Chris@84 9 This program is free software; you can redistribute it and/or
Chris@84 10 modify it under the terms of the GNU General Public License as
Chris@84 11 published by the Free Software Foundation; either version 2 of the
Chris@84 12 License, or (at your option) any later version. See the file
Chris@84 13 COPYING included with this distribution for more information.
cannam@18 14 */
cannam@18 15
cannam@18 16 #include <cfloat>
cannam@18 17 #include <cmath>
cannam@18 18
cannam@18 19 #include "ClusterMeltSegmenter.h"
cannam@18 20 #include "cluster_segmenter.h"
cannam@18 21 #include "segment.h"
cannam@18 22
cannam@20 23 #include "dsp/transforms/FFT.h"
cannam@24 24 #include "dsp/chromagram/ConstantQ.h"
cannam@24 25 #include "dsp/rateconversion/Decimator.h"
cannam@26 26 #include "dsp/mfcc/MFCC.h"
cannam@20 27
cannam@24 28 ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) :
cannam@24 29 window(NULL),
cannam@64 30 fft(NULL),
cannam@24 31 constq(NULL),
cannam@26 32 mfcc(NULL),
cannam@24 33 featureType(params.featureType),
cannam@24 34 hopSize(params.hopSize),
cannam@24 35 windowSize(params.windowSize),
cannam@24 36 fmin(params.fmin),
cannam@24 37 fmax(params.fmax),
cannam@24 38 nbins(params.nbins),
cannam@24 39 ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c
cannam@24 40 nHMMStates(params.nHMMStates),
cannam@24 41 nclusters(params.nclusters),
cannam@24 42 histogramLength(params.histogramLength),
cannam@24 43 neighbourhoodLimit(params.neighbourhoodLimit),
cannam@26 44 decimator(NULL)
cannam@18 45 {
cannam@18 46 }
cannam@18 47
cannam@18 48 void ClusterMeltSegmenter::initialise(int fs)
cannam@18 49 {
cannam@24 50 samplerate = fs;
cannam@24 51
cannam@26 52 if (featureType == FEATURE_TYPE_CONSTQ ||
cannam@26 53 featureType == FEATURE_TYPE_CHROMA) {
cannam@26 54
cannam@26 55 // run internal processing at 11025 or thereabouts
cannam@24 56 int internalRate = 11025;
cannam@24 57 int decimationFactor = samplerate / internalRate;
cannam@24 58 if (decimationFactor < 1) decimationFactor = 1;
cannam@24 59
cannam@24 60 // must be a power of two
cannam@24 61 while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
cannam@24 62
cannam@24 63 if (decimationFactor > Decimator::getHighestSupportedFactor()) {
cannam@24 64 decimationFactor = Decimator::getHighestSupportedFactor();
cannam@24 65 }
cannam@24 66
cannam@24 67 if (decimationFactor > 1) {
cannam@24 68 decimator = new Decimator(getWindowsize(), decimationFactor);
cannam@24 69 }
cannam@24 70
cannam@24 71 CQConfig config;
cannam@24 72 config.FS = samplerate / decimationFactor;
cannam@24 73 config.min = fmin;
cannam@24 74 config.max = fmax;
cannam@24 75 config.BPO = nbins;
cannam@24 76 config.CQThresh = 0.0054;
cannam@24 77
cannam@24 78 constq = new ConstantQ(config);
cannam@24 79 constq->sparsekernel();
cannam@26 80
cannam@26 81 ncoeff = constq->getK();
cannam@64 82
cannam@64 83 fft = new FFTReal(constq->getfftlength());
cannam@26 84
cannam@26 85 } else if (featureType == FEATURE_TYPE_MFCC) {
cannam@24 86
cannam@27 87 // run internal processing at 22050 or thereabouts
cannam@27 88 int internalRate = 22050;
cannam@27 89 int decimationFactor = samplerate / internalRate;
cannam@27 90 if (decimationFactor < 1) decimationFactor = 1;
cannam@27 91
cannam@27 92 // must be a power of two
cannam@27 93 while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
cannam@27 94
cannam@27 95 if (decimationFactor > Decimator::getHighestSupportedFactor()) {
cannam@27 96 decimationFactor = Decimator::getHighestSupportedFactor();
cannam@27 97 }
cannam@27 98
cannam@27 99 if (decimationFactor > 1) {
cannam@27 100 decimator = new Decimator(getWindowsize(), decimationFactor);
cannam@27 101 }
cannam@27 102
cannam@30 103 MFCCConfig config(samplerate / decimationFactor);
cannam@27 104 config.fftsize = 2048;
cannam@27 105 config.nceps = 19;
cannam@27 106 config.want_c0 = true;
cannam@26 107
cannam@26 108 mfcc = new MFCC(config);
cannam@27 109 ncoeff = config.nceps + 1;
cannam@24 110 }
cannam@18 111 }
cannam@18 112
cannam@18 113 ClusterMeltSegmenter::~ClusterMeltSegmenter()
cannam@18 114 {
cannam@24 115 delete window;
cannam@24 116 delete constq;
cannam@24 117 delete decimator;
cannam@64 118 delete fft;
cannam@20 119 }
cannam@20 120
cannam@20 121 int
cannam@20 122 ClusterMeltSegmenter::getWindowsize()
cannam@20 123 {
cannam@44 124 return static_cast<int>(windowSize * samplerate + 0.001);
cannam@20 125 }
cannam@20 126
cannam@20 127 int
cannam@20 128 ClusterMeltSegmenter::getHopsize()
cannam@20 129 {
cannam@44 130 return static_cast<int>(hopSize * samplerate + 0.001);
cannam@18 131 }
cannam@18 132
cannam@24 133 void ClusterMeltSegmenter::extractFeatures(const double* samples, int nsamples)
cannam@18 134 {
cannam@26 135 if (featureType == FEATURE_TYPE_CONSTQ ||
cannam@26 136 featureType == FEATURE_TYPE_CHROMA) {
cannam@26 137 extractFeaturesConstQ(samples, nsamples);
cannam@26 138 } else if (featureType == FEATURE_TYPE_MFCC) {
cannam@26 139 extractFeaturesMFCC(samples, nsamples);
cannam@26 140 }
cannam@26 141 }
cannam@26 142
cannam@26 143 void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsamples)
cannam@26 144 {
cannam@24 145 if (!constq) {
cannam@26 146 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesConstQ: "
cannam@26 147 << "No const-q: initialise not called?"
cannam@24 148 << std::endl;
cannam@24 149 return;
cannam@24 150 }
cannam@20 151
cannam@24 152 if (nsamples < getWindowsize()) {
cannam@24 153 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
cannam@24 154 return;
cannam@24 155 }
cannam@24 156
cannam@24 157 int fftsize = constq->getfftlength();
cannam@24 158
cannam@24 159 if (!window || window->getSize() != fftsize) {
cannam@24 160 delete window;
cannam@24 161 window = new Window<double>(HammingWindow, fftsize);
cannam@24 162 }
cannam@24 163
cannam@24 164 vector<double> cq(ncoeff);
cannam@24 165
cannam@24 166 for (int i = 0; i < ncoeff; ++i) cq[i] = 0.0;
cannam@24 167
cannam@24 168 const double *psource = samples;
cannam@24 169 int pcount = nsamples;
cannam@24 170
cannam@24 171 if (decimator) {
cannam@24 172 pcount = nsamples / decimator->getFactor();
cannam@24 173 double *decout = new double[pcount];
cannam@24 174 decimator->process(samples, decout);
cannam@24 175 psource = decout;
cannam@24 176 }
cannam@24 177
cannam@24 178 int origin = 0;
cannam@24 179
cannam@24 180 // std::cerr << "nsamples = " << nsamples << ", pcount = " << pcount << std::endl;
cannam@24 181
cannam@24 182 int frames = 0;
cannam@24 183
cannam@24 184 double *frame = new double[fftsize];
cannam@24 185 double *real = new double[fftsize];
cannam@24 186 double *imag = new double[fftsize];
cannam@24 187 double *cqre = new double[ncoeff];
cannam@24 188 double *cqim = new double[ncoeff];
cannam@24 189
cannam@24 190 while (origin <= pcount) {
cannam@24 191
cannam@24 192 // always need at least one fft window per block, but after
cannam@24 193 // that we want to avoid having any incomplete ones
cannam@24 194 if (origin > 0 && origin + fftsize >= pcount) break;
cannam@24 195
cannam@24 196 for (int i = 0; i < fftsize; ++i) {
cannam@24 197 if (origin + i < pcount) {
cannam@24 198 frame[i] = psource[origin + i];
cannam@24 199 } else {
cannam@24 200 frame[i] = 0.0;
cannam@24 201 }
cannam@24 202 }
cannam@24 203
cannam@24 204 for (int i = 0; i < fftsize/2; ++i) {
cannam@24 205 double value = frame[i];
cannam@24 206 frame[i] = frame[i + fftsize/2];
cannam@24 207 frame[i + fftsize/2] = value;
cannam@24 208 }
cannam@24 209
cannam@24 210 window->cut(frame);
cannam@24 211
Chris@114 212 fft->forward(frame, real, imag);
cannam@24 213
cannam@24 214 constq->process(real, imag, cqre, cqim);
cannam@18 215
cannam@24 216 for (int i = 0; i < ncoeff; ++i) {
cannam@24 217 cq[i] += sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]);
cannam@24 218 }
cannam@24 219 ++frames;
cannam@20 220
cannam@24 221 origin += fftsize/2;
cannam@24 222 }
cannam@20 223
cannam@24 224 delete [] cqre;
cannam@24 225 delete [] cqim;
cannam@24 226 delete [] real;
cannam@24 227 delete [] imag;
cannam@24 228 delete [] frame;
cannam@20 229
cannam@24 230 for (int i = 0; i < ncoeff; ++i) {
cannam@24 231 cq[i] /= frames;
cannam@24 232 }
cannam@20 233
cannam@24 234 if (decimator) delete[] psource;
cannam@20 235
cannam@24 236 features.push_back(cq);
cannam@18 237 }
cannam@18 238
cannam@26 239 void ClusterMeltSegmenter::extractFeaturesMFCC(const double* samples, int nsamples)
cannam@26 240 {
cannam@26 241 if (!mfcc) {
cannam@26 242 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesMFCC: "
cannam@26 243 << "No mfcc: initialise not called?"
cannam@26 244 << std::endl;
cannam@26 245 return;
cannam@26 246 }
cannam@26 247
cannam@26 248 if (nsamples < getWindowsize()) {
cannam@26 249 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
cannam@26 250 return;
cannam@26 251 }
cannam@26 252
cannam@26 253 int fftsize = mfcc->getfftlength();
cannam@26 254
cannam@26 255 vector<double> cc(ncoeff);
cannam@26 256
cannam@26 257 for (int i = 0; i < ncoeff; ++i) cc[i] = 0.0;
cannam@26 258
cannam@26 259 const double *psource = samples;
cannam@26 260 int pcount = nsamples;
cannam@26 261
cannam@27 262 if (decimator) {
cannam@27 263 pcount = nsamples / decimator->getFactor();
cannam@27 264 double *decout = new double[pcount];
cannam@27 265 decimator->process(samples, decout);
cannam@27 266 psource = decout;
cannam@27 267 }
cannam@27 268
cannam@26 269 int origin = 0;
cannam@26 270 int frames = 0;
cannam@26 271
cannam@26 272 double *frame = new double[fftsize];
cannam@26 273 double *ccout = new double[ncoeff];
cannam@26 274
cannam@26 275 while (origin <= pcount) {
cannam@26 276
cannam@26 277 // always need at least one fft window per block, but after
cannam@26 278 // that we want to avoid having any incomplete ones
cannam@26 279 if (origin > 0 && origin + fftsize >= pcount) break;
cannam@26 280
cannam@26 281 for (int i = 0; i < fftsize; ++i) {
cannam@26 282 if (origin + i < pcount) {
cannam@26 283 frame[i] = psource[origin + i];
cannam@26 284 } else {
cannam@26 285 frame[i] = 0.0;
cannam@26 286 }
cannam@26 287 }
cannam@26 288
cannam@30 289 mfcc->process(frame, ccout);
cannam@26 290
cannam@26 291 for (int i = 0; i < ncoeff; ++i) {
cannam@26 292 cc[i] += ccout[i];
cannam@26 293 }
cannam@26 294 ++frames;
cannam@26 295
cannam@26 296 origin += fftsize/2;
cannam@26 297 }
cannam@26 298
cannam@26 299 delete [] ccout;
cannam@26 300 delete [] frame;
cannam@26 301
cannam@26 302 for (int i = 0; i < ncoeff; ++i) {
cannam@26 303 cc[i] /= frames;
cannam@26 304 }
cannam@26 305
cannam@27 306 if (decimator) delete[] psource;
cannam@27 307
cannam@26 308 features.push_back(cc);
cannam@26 309 }
cannam@26 310
cannam@18 311 void ClusterMeltSegmenter::segment(int m)
cannam@18 312 {
cannam@24 313 nclusters = m;
cannam@24 314 segment();
cannam@18 315 }
cannam@18 316
cannam@18 317 void ClusterMeltSegmenter::setFeatures(const vector<vector<double> >& f)
cannam@18 318 {
cannam@24 319 features = f;
cannam@24 320 featureType = FEATURE_TYPE_UNKNOWN;
cannam@18 321 }
cannam@18 322
cannam@18 323 void ClusterMeltSegmenter::segment()
cannam@18 324 {
cannam@26 325 delete constq;
cannam@26 326 constq = 0;
cannam@26 327 delete mfcc;
cannam@26 328 mfcc = 0;
cannam@26 329 delete decimator;
cannam@26 330 decimator = 0;
cannam@58 331
cannam@58 332 if (features.size() < histogramLength) return;
cannam@58 333 /*
cannam@24 334 std::cerr << "ClusterMeltSegmenter::segment: have " << features.size()
cannam@24 335 << " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl;
cannam@58 336 */
cannam@24 337 // copy the features to a native array and use the existing C segmenter...
cannam@24 338 double** arrFeatures = new double*[features.size()];
cannam@24 339 for (int i = 0; i < features.size(); i++)
cannam@24 340 {
cannam@24 341 if (featureType == FEATURE_TYPE_UNKNOWN) {
cannam@24 342 arrFeatures[i] = new double[features[0].size()];
cannam@24 343 for (int j = 0; j < features[0].size(); j++)
cannam@24 344 arrFeatures[i][j] = features[i][j];
cannam@24 345 } else {
cannam@24 346 arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope
cannam@24 347 for (int j = 0; j < ncoeff; j++)
cannam@24 348 arrFeatures[i][j] = features[i][j];
cannam@24 349 }
cannam@24 350 }
cannam@18 351
cannam@24 352 q = new int[features.size()];
cannam@18 353
cannam@26 354 if (featureType == FEATURE_TYPE_UNKNOWN ||
cannam@26 355 featureType == FEATURE_TYPE_MFCC)
cannam@24 356 cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength,
cannam@24 357 nclusters, neighbourhoodLimit);
cannam@24 358 else
cannam@24 359 constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType,
cannam@24 360 nHMMStates, histogramLength, nclusters, neighbourhoodLimit);
cannam@18 361
cannam@24 362 // convert the cluster assignment sequence to a segmentation
cannam@24 363 makeSegmentation(q, features.size());
cannam@18 364
cannam@24 365 // de-allocate arrays
cannam@24 366 delete [] q;
cannam@24 367 for (int i = 0; i < features.size(); i++)
cannam@24 368 delete [] arrFeatures[i];
cannam@24 369 delete [] arrFeatures;
cannam@18 370
cannam@24 371 // clear the features
cannam@24 372 clear();
cannam@18 373 }
cannam@18 374
cannam@18 375 void ClusterMeltSegmenter::makeSegmentation(int* q, int len)
cannam@18 376 {
cannam@24 377 segmentation.segments.clear();
cannam@24 378 segmentation.nsegtypes = nclusters;
cannam@24 379 segmentation.samplerate = samplerate;
cannam@18 380
cannam@24 381 Segment segment;
cannam@24 382 segment.start = 0;
cannam@24 383 segment.type = q[0];
cannam@18 384
cannam@24 385 for (int i = 1; i < len; i++)
cannam@24 386 {
cannam@24 387 if (q[i] != q[i-1])
cannam@24 388 {
cannam@24 389 segment.end = i * getHopsize();
cannam@24 390 segmentation.segments.push_back(segment);
cannam@24 391 segment.type = q[i];
cannam@24 392 segment.start = segment.end;
cannam@24 393 }
cannam@24 394 }
cannam@24 395 segment.end = len * getHopsize();
cannam@24 396 segmentation.segments.push_back(segment);
cannam@18 397 }
cannam@18 398