annotate dsp/segmentation/ClusterMeltSegmenter.cpp @ 289:befe5aa6b450

* Refactor FFT a little bit so as to separate construction and processing rather than have a single static method -- will make it easier to use a different implementation * pull in KissFFT implementation (not hooked up yet)
author Chris Cannam <c.cannam@qmul.ac.uk>
date Wed, 13 May 2009 09:19:12 +0000
parents 5e125f030287
children e5907ae6de17
rev   line source
c@249 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
c@249 2
c@243 3 /*
c@249 4 * ClusterMeltSegmenter.cpp
c@243 5 *
c@249 6 * Created by Mark Levy on 23/03/2006.
c@249 7 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
c@249 8 * All rights reserved.
c@243 9 */
c@243 10
c@243 11 #include <cfloat>
c@243 12 #include <cmath>
c@243 13
c@243 14 #include "ClusterMeltSegmenter.h"
c@243 15 #include "cluster_segmenter.h"
c@243 16 #include "segment.h"
c@243 17
c@245 18 #include "dsp/transforms/FFT.h"
c@249 19 #include "dsp/chromagram/ConstantQ.h"
c@249 20 #include "dsp/rateconversion/Decimator.h"
c@251 21 #include "dsp/mfcc/MFCC.h"
c@245 22
c@249 23 ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) :
c@249 24 window(NULL),
c@289 25 fft(NULL),
c@249 26 constq(NULL),
c@251 27 mfcc(NULL),
c@249 28 featureType(params.featureType),
c@249 29 hopSize(params.hopSize),
c@249 30 windowSize(params.windowSize),
c@249 31 fmin(params.fmin),
c@249 32 fmax(params.fmax),
c@249 33 nbins(params.nbins),
c@249 34 ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c
c@249 35 nHMMStates(params.nHMMStates),
c@249 36 nclusters(params.nclusters),
c@249 37 histogramLength(params.histogramLength),
c@249 38 neighbourhoodLimit(params.neighbourhoodLimit),
c@251 39 decimator(NULL)
c@243 40 {
c@243 41 }
c@243 42
c@243 43 void ClusterMeltSegmenter::initialise(int fs)
c@243 44 {
c@249 45 samplerate = fs;
c@249 46
c@251 47 if (featureType == FEATURE_TYPE_CONSTQ ||
c@251 48 featureType == FEATURE_TYPE_CHROMA) {
c@251 49
c@251 50 // run internal processing at 11025 or thereabouts
c@249 51 int internalRate = 11025;
c@249 52 int decimationFactor = samplerate / internalRate;
c@249 53 if (decimationFactor < 1) decimationFactor = 1;
c@249 54
c@249 55 // must be a power of two
c@249 56 while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
c@249 57
c@249 58 if (decimationFactor > Decimator::getHighestSupportedFactor()) {
c@249 59 decimationFactor = Decimator::getHighestSupportedFactor();
c@249 60 }
c@249 61
c@249 62 if (decimationFactor > 1) {
c@249 63 decimator = new Decimator(getWindowsize(), decimationFactor);
c@249 64 }
c@249 65
c@249 66 CQConfig config;
c@249 67 config.FS = samplerate / decimationFactor;
c@249 68 config.min = fmin;
c@249 69 config.max = fmax;
c@249 70 config.BPO = nbins;
c@249 71 config.CQThresh = 0.0054;
c@249 72
c@249 73 constq = new ConstantQ(config);
c@249 74 constq->sparsekernel();
c@251 75
c@251 76 ncoeff = constq->getK();
c@289 77
c@289 78 fft = new FFTReal(constq->getfftlength());
c@251 79
c@251 80 } else if (featureType == FEATURE_TYPE_MFCC) {
c@249 81
c@252 82 // run internal processing at 22050 or thereabouts
c@252 83 int internalRate = 22050;
c@252 84 int decimationFactor = samplerate / internalRate;
c@252 85 if (decimationFactor < 1) decimationFactor = 1;
c@252 86
c@252 87 // must be a power of two
c@252 88 while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
c@252 89
c@252 90 if (decimationFactor > Decimator::getHighestSupportedFactor()) {
c@252 91 decimationFactor = Decimator::getHighestSupportedFactor();
c@252 92 }
c@252 93
c@252 94 if (decimationFactor > 1) {
c@252 95 decimator = new Decimator(getWindowsize(), decimationFactor);
c@252 96 }
c@252 97
c@255 98 MFCCConfig config(samplerate / decimationFactor);
c@252 99 config.fftsize = 2048;
c@252 100 config.nceps = 19;
c@252 101 config.want_c0 = true;
c@251 102
c@251 103 mfcc = new MFCC(config);
c@252 104 ncoeff = config.nceps + 1;
c@249 105 }
c@243 106 }
c@243 107
c@243 108 ClusterMeltSegmenter::~ClusterMeltSegmenter()
c@243 109 {
c@249 110 delete window;
c@249 111 delete constq;
c@249 112 delete decimator;
c@289 113 delete fft;
c@245 114 }
c@245 115
c@245 116 int
c@245 117 ClusterMeltSegmenter::getWindowsize()
c@245 118 {
c@269 119 return static_cast<int>(windowSize * samplerate + 0.001);
c@245 120 }
c@245 121
c@245 122 int
c@245 123 ClusterMeltSegmenter::getHopsize()
c@245 124 {
c@269 125 return static_cast<int>(hopSize * samplerate + 0.001);
c@243 126 }
c@243 127
c@249 128 void ClusterMeltSegmenter::extractFeatures(const double* samples, int nsamples)
c@243 129 {
c@251 130 if (featureType == FEATURE_TYPE_CONSTQ ||
c@251 131 featureType == FEATURE_TYPE_CHROMA) {
c@251 132 extractFeaturesConstQ(samples, nsamples);
c@251 133 } else if (featureType == FEATURE_TYPE_MFCC) {
c@251 134 extractFeaturesMFCC(samples, nsamples);
c@251 135 }
c@251 136 }
c@251 137
c@251 138 void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsamples)
c@251 139 {
c@249 140 if (!constq) {
c@251 141 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesConstQ: "
c@251 142 << "No const-q: initialise not called?"
c@249 143 << std::endl;
c@249 144 return;
c@249 145 }
c@245 146
c@249 147 if (nsamples < getWindowsize()) {
c@249 148 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
c@249 149 return;
c@249 150 }
c@249 151
c@249 152 int fftsize = constq->getfftlength();
c@249 153
c@249 154 if (!window || window->getSize() != fftsize) {
c@249 155 delete window;
c@249 156 window = new Window<double>(HammingWindow, fftsize);
c@249 157 }
c@249 158
c@249 159 vector<double> cq(ncoeff);
c@249 160
c@249 161 for (int i = 0; i < ncoeff; ++i) cq[i] = 0.0;
c@249 162
c@249 163 const double *psource = samples;
c@249 164 int pcount = nsamples;
c@249 165
c@249 166 if (decimator) {
c@249 167 pcount = nsamples / decimator->getFactor();
c@249 168 double *decout = new double[pcount];
c@249 169 decimator->process(samples, decout);
c@249 170 psource = decout;
c@249 171 }
c@249 172
c@249 173 int origin = 0;
c@249 174
c@249 175 // std::cerr << "nsamples = " << nsamples << ", pcount = " << pcount << std::endl;
c@249 176
c@249 177 int frames = 0;
c@249 178
c@249 179 double *frame = new double[fftsize];
c@249 180 double *real = new double[fftsize];
c@249 181 double *imag = new double[fftsize];
c@249 182 double *cqre = new double[ncoeff];
c@249 183 double *cqim = new double[ncoeff];
c@249 184
c@249 185 while (origin <= pcount) {
c@249 186
c@249 187 // always need at least one fft window per block, but after
c@249 188 // that we want to avoid having any incomplete ones
c@249 189 if (origin > 0 && origin + fftsize >= pcount) break;
c@249 190
c@249 191 for (int i = 0; i < fftsize; ++i) {
c@249 192 if (origin + i < pcount) {
c@249 193 frame[i] = psource[origin + i];
c@249 194 } else {
c@249 195 frame[i] = 0.0;
c@249 196 }
c@249 197 }
c@249 198
c@249 199 for (int i = 0; i < fftsize/2; ++i) {
c@249 200 double value = frame[i];
c@249 201 frame[i] = frame[i + fftsize/2];
c@249 202 frame[i + fftsize/2] = value;
c@249 203 }
c@249 204
c@249 205 window->cut(frame);
c@249 206
c@289 207 fft->process(false, frame, real, imag);
c@249 208
c@249 209 constq->process(real, imag, cqre, cqim);
c@243 210
c@249 211 for (int i = 0; i < ncoeff; ++i) {
c@249 212 cq[i] += sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]);
c@249 213 }
c@249 214 ++frames;
c@245 215
c@249 216 origin += fftsize/2;
c@249 217 }
c@245 218
c@249 219 delete [] cqre;
c@249 220 delete [] cqim;
c@249 221 delete [] real;
c@249 222 delete [] imag;
c@249 223 delete [] frame;
c@245 224
c@249 225 for (int i = 0; i < ncoeff; ++i) {
c@249 226 cq[i] /= frames;
c@249 227 }
c@245 228
c@249 229 if (decimator) delete[] psource;
c@245 230
c@249 231 features.push_back(cq);
c@243 232 }
c@243 233
c@251 234 void ClusterMeltSegmenter::extractFeaturesMFCC(const double* samples, int nsamples)
c@251 235 {
c@251 236 if (!mfcc) {
c@251 237 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesMFCC: "
c@251 238 << "No mfcc: initialise not called?"
c@251 239 << std::endl;
c@251 240 return;
c@251 241 }
c@251 242
c@251 243 if (nsamples < getWindowsize()) {
c@251 244 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
c@251 245 return;
c@251 246 }
c@251 247
c@251 248 int fftsize = mfcc->getfftlength();
c@251 249
c@251 250 vector<double> cc(ncoeff);
c@251 251
c@251 252 for (int i = 0; i < ncoeff; ++i) cc[i] = 0.0;
c@251 253
c@251 254 const double *psource = samples;
c@251 255 int pcount = nsamples;
c@251 256
c@252 257 if (decimator) {
c@252 258 pcount = nsamples / decimator->getFactor();
c@252 259 double *decout = new double[pcount];
c@252 260 decimator->process(samples, decout);
c@252 261 psource = decout;
c@252 262 }
c@252 263
c@251 264 int origin = 0;
c@251 265 int frames = 0;
c@251 266
c@251 267 double *frame = new double[fftsize];
c@251 268 double *ccout = new double[ncoeff];
c@251 269
c@251 270 while (origin <= pcount) {
c@251 271
c@251 272 // always need at least one fft window per block, but after
c@251 273 // that we want to avoid having any incomplete ones
c@251 274 if (origin > 0 && origin + fftsize >= pcount) break;
c@251 275
c@251 276 for (int i = 0; i < fftsize; ++i) {
c@251 277 if (origin + i < pcount) {
c@251 278 frame[i] = psource[origin + i];
c@251 279 } else {
c@251 280 frame[i] = 0.0;
c@251 281 }
c@251 282 }
c@251 283
c@255 284 mfcc->process(frame, ccout);
c@251 285
c@251 286 for (int i = 0; i < ncoeff; ++i) {
c@251 287 cc[i] += ccout[i];
c@251 288 }
c@251 289 ++frames;
c@251 290
c@251 291 origin += fftsize/2;
c@251 292 }
c@251 293
c@251 294 delete [] ccout;
c@251 295 delete [] frame;
c@251 296
c@251 297 for (int i = 0; i < ncoeff; ++i) {
c@251 298 cc[i] /= frames;
c@251 299 }
c@251 300
c@252 301 if (decimator) delete[] psource;
c@252 302
c@251 303 features.push_back(cc);
c@251 304 }
c@251 305
c@243 306 void ClusterMeltSegmenter::segment(int m)
c@243 307 {
c@249 308 nclusters = m;
c@249 309 segment();
c@243 310 }
c@243 311
c@243 312 void ClusterMeltSegmenter::setFeatures(const vector<vector<double> >& f)
c@243 313 {
c@249 314 features = f;
c@249 315 featureType = FEATURE_TYPE_UNKNOWN;
c@243 316 }
c@243 317
c@243 318 void ClusterMeltSegmenter::segment()
c@243 319 {
c@251 320 delete constq;
c@251 321 constq = 0;
c@251 322 delete mfcc;
c@251 323 mfcc = 0;
c@251 324 delete decimator;
c@251 325 decimator = 0;
c@283 326
c@283 327 if (features.size() < histogramLength) return;
c@283 328 /*
c@249 329 std::cerr << "ClusterMeltSegmenter::segment: have " << features.size()
c@249 330 << " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl;
c@283 331 */
c@249 332 // copy the features to a native array and use the existing C segmenter...
c@249 333 double** arrFeatures = new double*[features.size()];
c@249 334 for (int i = 0; i < features.size(); i++)
c@249 335 {
c@249 336 if (featureType == FEATURE_TYPE_UNKNOWN) {
c@249 337 arrFeatures[i] = new double[features[0].size()];
c@249 338 for (int j = 0; j < features[0].size(); j++)
c@249 339 arrFeatures[i][j] = features[i][j];
c@249 340 } else {
c@249 341 arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope
c@249 342 for (int j = 0; j < ncoeff; j++)
c@249 343 arrFeatures[i][j] = features[i][j];
c@249 344 }
c@249 345 }
c@243 346
c@249 347 q = new int[features.size()];
c@243 348
c@251 349 if (featureType == FEATURE_TYPE_UNKNOWN ||
c@251 350 featureType == FEATURE_TYPE_MFCC)
c@249 351 cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength,
c@249 352 nclusters, neighbourhoodLimit);
c@249 353 else
c@249 354 constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType,
c@249 355 nHMMStates, histogramLength, nclusters, neighbourhoodLimit);
c@243 356
c@249 357 // convert the cluster assignment sequence to a segmentation
c@249 358 makeSegmentation(q, features.size());
c@243 359
c@249 360 // de-allocate arrays
c@249 361 delete [] q;
c@249 362 for (int i = 0; i < features.size(); i++)
c@249 363 delete [] arrFeatures[i];
c@249 364 delete [] arrFeatures;
c@243 365
c@249 366 // clear the features
c@249 367 clear();
c@243 368 }
c@243 369
c@243 370 void ClusterMeltSegmenter::makeSegmentation(int* q, int len)
c@243 371 {
c@249 372 segmentation.segments.clear();
c@249 373 segmentation.nsegtypes = nclusters;
c@249 374 segmentation.samplerate = samplerate;
c@243 375
c@249 376 Segment segment;
c@249 377 segment.start = 0;
c@249 378 segment.type = q[0];
c@243 379
c@249 380 for (int i = 1; i < len; i++)
c@249 381 {
c@249 382 if (q[i] != q[i-1])
c@249 383 {
c@249 384 segment.end = i * getHopsize();
c@249 385 segmentation.segments.push_back(segment);
c@249 386 segment.type = q[i];
c@249 387 segment.start = segment.end;
c@249 388 }
c@249 389 }
c@249 390 segment.end = len * getHopsize();
c@249 391 segmentation.segments.push_back(segment);
c@243 392 }
c@243 393