annotate dsp/segmentation/ClusterMeltSegmenter.cpp @ 64:6cb2b3cd5356

* Refactor FFT a little bit so as to separate construction and processing rather than have a single static method -- will make it easier to use a different implementation * pull in KissFFT implementation (not hooked up yet)
author cannam
date Wed, 13 May 2009 09:19:12 +0000
parents d72fcd34d9a7
children e5907ae6de17
rev   line source
cannam@24 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
cannam@24 2
cannam@18 3 /*
cannam@24 4 * ClusterMeltSegmenter.cpp
cannam@18 5 *
cannam@24 6 * Created by Mark Levy on 23/03/2006.
cannam@24 7 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
cannam@24 8 * All rights reserved.
cannam@18 9 */
cannam@18 10
cannam@18 11 #include <cfloat>
cannam@18 12 #include <cmath>
cannam@18 13
cannam@18 14 #include "ClusterMeltSegmenter.h"
cannam@18 15 #include "cluster_segmenter.h"
cannam@18 16 #include "segment.h"
cannam@18 17
cannam@20 18 #include "dsp/transforms/FFT.h"
cannam@24 19 #include "dsp/chromagram/ConstantQ.h"
cannam@24 20 #include "dsp/rateconversion/Decimator.h"
cannam@26 21 #include "dsp/mfcc/MFCC.h"
cannam@20 22
cannam@24 23 ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) :
cannam@24 24 window(NULL),
cannam@64 25 fft(NULL),
cannam@24 26 constq(NULL),
cannam@26 27 mfcc(NULL),
cannam@24 28 featureType(params.featureType),
cannam@24 29 hopSize(params.hopSize),
cannam@24 30 windowSize(params.windowSize),
cannam@24 31 fmin(params.fmin),
cannam@24 32 fmax(params.fmax),
cannam@24 33 nbins(params.nbins),
cannam@24 34 ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c
cannam@24 35 nHMMStates(params.nHMMStates),
cannam@24 36 nclusters(params.nclusters),
cannam@24 37 histogramLength(params.histogramLength),
cannam@24 38 neighbourhoodLimit(params.neighbourhoodLimit),
cannam@26 39 decimator(NULL)
cannam@18 40 {
cannam@18 41 }
cannam@18 42
cannam@18 43 void ClusterMeltSegmenter::initialise(int fs)
cannam@18 44 {
cannam@24 45 samplerate = fs;
cannam@24 46
cannam@26 47 if (featureType == FEATURE_TYPE_CONSTQ ||
cannam@26 48 featureType == FEATURE_TYPE_CHROMA) {
cannam@26 49
cannam@26 50 // run internal processing at 11025 or thereabouts
cannam@24 51 int internalRate = 11025;
cannam@24 52 int decimationFactor = samplerate / internalRate;
cannam@24 53 if (decimationFactor < 1) decimationFactor = 1;
cannam@24 54
cannam@24 55 // must be a power of two
cannam@24 56 while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
cannam@24 57
cannam@24 58 if (decimationFactor > Decimator::getHighestSupportedFactor()) {
cannam@24 59 decimationFactor = Decimator::getHighestSupportedFactor();
cannam@24 60 }
cannam@24 61
cannam@24 62 if (decimationFactor > 1) {
cannam@24 63 decimator = new Decimator(getWindowsize(), decimationFactor);
cannam@24 64 }
cannam@24 65
cannam@24 66 CQConfig config;
cannam@24 67 config.FS = samplerate / decimationFactor;
cannam@24 68 config.min = fmin;
cannam@24 69 config.max = fmax;
cannam@24 70 config.BPO = nbins;
cannam@24 71 config.CQThresh = 0.0054;
cannam@24 72
cannam@24 73 constq = new ConstantQ(config);
cannam@24 74 constq->sparsekernel();
cannam@26 75
cannam@26 76 ncoeff = constq->getK();
cannam@64 77
cannam@64 78 fft = new FFTReal(constq->getfftlength());
cannam@26 79
cannam@26 80 } else if (featureType == FEATURE_TYPE_MFCC) {
cannam@24 81
cannam@27 82 // run internal processing at 22050 or thereabouts
cannam@27 83 int internalRate = 22050;
cannam@27 84 int decimationFactor = samplerate / internalRate;
cannam@27 85 if (decimationFactor < 1) decimationFactor = 1;
cannam@27 86
cannam@27 87 // must be a power of two
cannam@27 88 while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
cannam@27 89
cannam@27 90 if (decimationFactor > Decimator::getHighestSupportedFactor()) {
cannam@27 91 decimationFactor = Decimator::getHighestSupportedFactor();
cannam@27 92 }
cannam@27 93
cannam@27 94 if (decimationFactor > 1) {
cannam@27 95 decimator = new Decimator(getWindowsize(), decimationFactor);
cannam@27 96 }
cannam@27 97
cannam@30 98 MFCCConfig config(samplerate / decimationFactor);
cannam@27 99 config.fftsize = 2048;
cannam@27 100 config.nceps = 19;
cannam@27 101 config.want_c0 = true;
cannam@26 102
cannam@26 103 mfcc = new MFCC(config);
cannam@27 104 ncoeff = config.nceps + 1;
cannam@24 105 }
cannam@18 106 }
cannam@18 107
cannam@18 108 ClusterMeltSegmenter::~ClusterMeltSegmenter()
cannam@18 109 {
cannam@24 110 delete window;
cannam@24 111 delete constq;
cannam@24 112 delete decimator;
cannam@64 113 delete fft;
cannam@20 114 }
cannam@20 115
cannam@20 116 int
cannam@20 117 ClusterMeltSegmenter::getWindowsize()
cannam@20 118 {
cannam@44 119 return static_cast<int>(windowSize * samplerate + 0.001);
cannam@20 120 }
cannam@20 121
cannam@20 122 int
cannam@20 123 ClusterMeltSegmenter::getHopsize()
cannam@20 124 {
cannam@44 125 return static_cast<int>(hopSize * samplerate + 0.001);
cannam@18 126 }
cannam@18 127
cannam@24 128 void ClusterMeltSegmenter::extractFeatures(const double* samples, int nsamples)
cannam@18 129 {
cannam@26 130 if (featureType == FEATURE_TYPE_CONSTQ ||
cannam@26 131 featureType == FEATURE_TYPE_CHROMA) {
cannam@26 132 extractFeaturesConstQ(samples, nsamples);
cannam@26 133 } else if (featureType == FEATURE_TYPE_MFCC) {
cannam@26 134 extractFeaturesMFCC(samples, nsamples);
cannam@26 135 }
cannam@26 136 }
cannam@26 137
cannam@26 138 void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsamples)
cannam@26 139 {
cannam@24 140 if (!constq) {
cannam@26 141 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesConstQ: "
cannam@26 142 << "No const-q: initialise not called?"
cannam@24 143 << std::endl;
cannam@24 144 return;
cannam@24 145 }
cannam@20 146
cannam@24 147 if (nsamples < getWindowsize()) {
cannam@24 148 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
cannam@24 149 return;
cannam@24 150 }
cannam@24 151
cannam@24 152 int fftsize = constq->getfftlength();
cannam@24 153
cannam@24 154 if (!window || window->getSize() != fftsize) {
cannam@24 155 delete window;
cannam@24 156 window = new Window<double>(HammingWindow, fftsize);
cannam@24 157 }
cannam@24 158
cannam@24 159 vector<double> cq(ncoeff);
cannam@24 160
cannam@24 161 for (int i = 0; i < ncoeff; ++i) cq[i] = 0.0;
cannam@24 162
cannam@24 163 const double *psource = samples;
cannam@24 164 int pcount = nsamples;
cannam@24 165
cannam@24 166 if (decimator) {
cannam@24 167 pcount = nsamples / decimator->getFactor();
cannam@24 168 double *decout = new double[pcount];
cannam@24 169 decimator->process(samples, decout);
cannam@24 170 psource = decout;
cannam@24 171 }
cannam@24 172
cannam@24 173 int origin = 0;
cannam@24 174
cannam@24 175 // std::cerr << "nsamples = " << nsamples << ", pcount = " << pcount << std::endl;
cannam@24 176
cannam@24 177 int frames = 0;
cannam@24 178
cannam@24 179 double *frame = new double[fftsize];
cannam@24 180 double *real = new double[fftsize];
cannam@24 181 double *imag = new double[fftsize];
cannam@24 182 double *cqre = new double[ncoeff];
cannam@24 183 double *cqim = new double[ncoeff];
cannam@24 184
cannam@24 185 while (origin <= pcount) {
cannam@24 186
cannam@24 187 // always need at least one fft window per block, but after
cannam@24 188 // that we want to avoid having any incomplete ones
cannam@24 189 if (origin > 0 && origin + fftsize >= pcount) break;
cannam@24 190
cannam@24 191 for (int i = 0; i < fftsize; ++i) {
cannam@24 192 if (origin + i < pcount) {
cannam@24 193 frame[i] = psource[origin + i];
cannam@24 194 } else {
cannam@24 195 frame[i] = 0.0;
cannam@24 196 }
cannam@24 197 }
cannam@24 198
cannam@24 199 for (int i = 0; i < fftsize/2; ++i) {
cannam@24 200 double value = frame[i];
cannam@24 201 frame[i] = frame[i + fftsize/2];
cannam@24 202 frame[i + fftsize/2] = value;
cannam@24 203 }
cannam@24 204
cannam@24 205 window->cut(frame);
cannam@24 206
cannam@64 207 fft->process(false, frame, real, imag);
cannam@24 208
cannam@24 209 constq->process(real, imag, cqre, cqim);
cannam@18 210
cannam@24 211 for (int i = 0; i < ncoeff; ++i) {
cannam@24 212 cq[i] += sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]);
cannam@24 213 }
cannam@24 214 ++frames;
cannam@20 215
cannam@24 216 origin += fftsize/2;
cannam@24 217 }
cannam@20 218
cannam@24 219 delete [] cqre;
cannam@24 220 delete [] cqim;
cannam@24 221 delete [] real;
cannam@24 222 delete [] imag;
cannam@24 223 delete [] frame;
cannam@20 224
cannam@24 225 for (int i = 0; i < ncoeff; ++i) {
cannam@24 226 cq[i] /= frames;
cannam@24 227 }
cannam@20 228
cannam@24 229 if (decimator) delete[] psource;
cannam@20 230
cannam@24 231 features.push_back(cq);
cannam@18 232 }
cannam@18 233
cannam@26 234 void ClusterMeltSegmenter::extractFeaturesMFCC(const double* samples, int nsamples)
cannam@26 235 {
cannam@26 236 if (!mfcc) {
cannam@26 237 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesMFCC: "
cannam@26 238 << "No mfcc: initialise not called?"
cannam@26 239 << std::endl;
cannam@26 240 return;
cannam@26 241 }
cannam@26 242
cannam@26 243 if (nsamples < getWindowsize()) {
cannam@26 244 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
cannam@26 245 return;
cannam@26 246 }
cannam@26 247
cannam@26 248 int fftsize = mfcc->getfftlength();
cannam@26 249
cannam@26 250 vector<double> cc(ncoeff);
cannam@26 251
cannam@26 252 for (int i = 0; i < ncoeff; ++i) cc[i] = 0.0;
cannam@26 253
cannam@26 254 const double *psource = samples;
cannam@26 255 int pcount = nsamples;
cannam@26 256
cannam@27 257 if (decimator) {
cannam@27 258 pcount = nsamples / decimator->getFactor();
cannam@27 259 double *decout = new double[pcount];
cannam@27 260 decimator->process(samples, decout);
cannam@27 261 psource = decout;
cannam@27 262 }
cannam@27 263
cannam@26 264 int origin = 0;
cannam@26 265 int frames = 0;
cannam@26 266
cannam@26 267 double *frame = new double[fftsize];
cannam@26 268 double *ccout = new double[ncoeff];
cannam@26 269
cannam@26 270 while (origin <= pcount) {
cannam@26 271
cannam@26 272 // always need at least one fft window per block, but after
cannam@26 273 // that we want to avoid having any incomplete ones
cannam@26 274 if (origin > 0 && origin + fftsize >= pcount) break;
cannam@26 275
cannam@26 276 for (int i = 0; i < fftsize; ++i) {
cannam@26 277 if (origin + i < pcount) {
cannam@26 278 frame[i] = psource[origin + i];
cannam@26 279 } else {
cannam@26 280 frame[i] = 0.0;
cannam@26 281 }
cannam@26 282 }
cannam@26 283
cannam@30 284 mfcc->process(frame, ccout);
cannam@26 285
cannam@26 286 for (int i = 0; i < ncoeff; ++i) {
cannam@26 287 cc[i] += ccout[i];
cannam@26 288 }
cannam@26 289 ++frames;
cannam@26 290
cannam@26 291 origin += fftsize/2;
cannam@26 292 }
cannam@26 293
cannam@26 294 delete [] ccout;
cannam@26 295 delete [] frame;
cannam@26 296
cannam@26 297 for (int i = 0; i < ncoeff; ++i) {
cannam@26 298 cc[i] /= frames;
cannam@26 299 }
cannam@26 300
cannam@27 301 if (decimator) delete[] psource;
cannam@27 302
cannam@26 303 features.push_back(cc);
cannam@26 304 }
cannam@26 305
cannam@18 306 void ClusterMeltSegmenter::segment(int m)
cannam@18 307 {
cannam@24 308 nclusters = m;
cannam@24 309 segment();
cannam@18 310 }
cannam@18 311
cannam@18 312 void ClusterMeltSegmenter::setFeatures(const vector<vector<double> >& f)
cannam@18 313 {
cannam@24 314 features = f;
cannam@24 315 featureType = FEATURE_TYPE_UNKNOWN;
cannam@18 316 }
cannam@18 317
cannam@18 318 void ClusterMeltSegmenter::segment()
cannam@18 319 {
cannam@26 320 delete constq;
cannam@26 321 constq = 0;
cannam@26 322 delete mfcc;
cannam@26 323 mfcc = 0;
cannam@26 324 delete decimator;
cannam@26 325 decimator = 0;
cannam@58 326
cannam@58 327 if (features.size() < histogramLength) return;
cannam@58 328 /*
cannam@24 329 std::cerr << "ClusterMeltSegmenter::segment: have " << features.size()
cannam@24 330 << " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl;
cannam@58 331 */
cannam@24 332 // copy the features to a native array and use the existing C segmenter...
cannam@24 333 double** arrFeatures = new double*[features.size()];
cannam@24 334 for (int i = 0; i < features.size(); i++)
cannam@24 335 {
cannam@24 336 if (featureType == FEATURE_TYPE_UNKNOWN) {
cannam@24 337 arrFeatures[i] = new double[features[0].size()];
cannam@24 338 for (int j = 0; j < features[0].size(); j++)
cannam@24 339 arrFeatures[i][j] = features[i][j];
cannam@24 340 } else {
cannam@24 341 arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope
cannam@24 342 for (int j = 0; j < ncoeff; j++)
cannam@24 343 arrFeatures[i][j] = features[i][j];
cannam@24 344 }
cannam@24 345 }
cannam@18 346
cannam@24 347 q = new int[features.size()];
cannam@18 348
cannam@26 349 if (featureType == FEATURE_TYPE_UNKNOWN ||
cannam@26 350 featureType == FEATURE_TYPE_MFCC)
cannam@24 351 cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength,
cannam@24 352 nclusters, neighbourhoodLimit);
cannam@24 353 else
cannam@24 354 constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType,
cannam@24 355 nHMMStates, histogramLength, nclusters, neighbourhoodLimit);
cannam@18 356
cannam@24 357 // convert the cluster assignment sequence to a segmentation
cannam@24 358 makeSegmentation(q, features.size());
cannam@18 359
cannam@24 360 // de-allocate arrays
cannam@24 361 delete [] q;
cannam@24 362 for (int i = 0; i < features.size(); i++)
cannam@24 363 delete [] arrFeatures[i];
cannam@24 364 delete [] arrFeatures;
cannam@18 365
cannam@24 366 // clear the features
cannam@24 367 clear();
cannam@18 368 }
cannam@18 369
cannam@18 370 void ClusterMeltSegmenter::makeSegmentation(int* q, int len)
cannam@18 371 {
cannam@24 372 segmentation.segments.clear();
cannam@24 373 segmentation.nsegtypes = nclusters;
cannam@24 374 segmentation.samplerate = samplerate;
cannam@18 375
cannam@24 376 Segment segment;
cannam@24 377 segment.start = 0;
cannam@24 378 segment.type = q[0];
cannam@18 379
cannam@24 380 for (int i = 1; i < len; i++)
cannam@24 381 {
cannam@24 382 if (q[i] != q[i-1])
cannam@24 383 {
cannam@24 384 segment.end = i * getHopsize();
cannam@24 385 segmentation.segments.push_back(segment);
cannam@24 386 segment.type = q[i];
cannam@24 387 segment.start = segment.end;
cannam@24 388 }
cannam@24 389 }
cannam@24 390 segment.end = len * getHopsize();
cannam@24 391 segmentation.segments.push_back(segment);
cannam@18 392 }
cannam@18 393