annotate dsp/segmentation/ClusterMeltSegmenter.cpp @ 251:c3600d3cfe5c

* Add timbral (MFCC) feature option to segmenter
author Chris Cannam <c.cannam@qmul.ac.uk>
date Thu, 10 Jan 2008 16:41:33 +0000
parents 18a0dffa5c1a
children b678e72323df
rev   line source
c@249 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
c@249 2
c@243 3 /*
c@249 4 * ClusterMeltSegmenter.cpp
c@243 5 *
c@249 6 * Created by Mark Levy on 23/03/2006.
c@249 7 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
c@249 8 * All rights reserved.
c@243 9 */
c@243 10
c@243 11 #include <cfloat>
c@243 12 #include <cmath>
c@243 13
c@243 14 #include "ClusterMeltSegmenter.h"
c@243 15 #include "cluster_segmenter.h"
c@243 16 #include "segment.h"
c@243 17
c@245 18 #include "dsp/transforms/FFT.h"
c@249 19 #include "dsp/chromagram/ConstantQ.h"
c@249 20 #include "dsp/rateconversion/Decimator.h"
c@251 21 #include "dsp/mfcc/MFCC.h"
c@245 22
c@249 23 ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) :
c@249 24 window(NULL),
c@249 25 constq(NULL),
c@251 26 mfcc(NULL),
c@249 27 featureType(params.featureType),
c@249 28 hopSize(params.hopSize),
c@249 29 windowSize(params.windowSize),
c@249 30 fmin(params.fmin),
c@249 31 fmax(params.fmax),
c@249 32 nbins(params.nbins),
c@249 33 ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c
c@249 34 nHMMStates(params.nHMMStates),
c@249 35 nclusters(params.nclusters),
c@249 36 histogramLength(params.histogramLength),
c@249 37 neighbourhoodLimit(params.neighbourhoodLimit),
c@251 38 decimator(NULL)
c@243 39 {
c@243 40 }
c@243 41
c@243 42 void ClusterMeltSegmenter::initialise(int fs)
c@243 43 {
c@249 44 samplerate = fs;
c@249 45
c@251 46 if (featureType == FEATURE_TYPE_CONSTQ ||
c@251 47 featureType == FEATURE_TYPE_CHROMA) {
c@251 48
c@251 49 // run internal processing at 11025 or thereabouts
c@249 50 int internalRate = 11025;
c@249 51 int decimationFactor = samplerate / internalRate;
c@249 52 if (decimationFactor < 1) decimationFactor = 1;
c@249 53
c@249 54 // must be a power of two
c@249 55 while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
c@249 56
c@249 57 if (decimationFactor > Decimator::getHighestSupportedFactor()) {
c@249 58 decimationFactor = Decimator::getHighestSupportedFactor();
c@249 59 }
c@249 60
c@249 61 if (decimationFactor > 1) {
c@249 62 decimator = new Decimator(getWindowsize(), decimationFactor);
c@249 63 }
c@249 64
c@249 65 CQConfig config;
c@249 66 config.FS = samplerate / decimationFactor;
c@249 67 config.min = fmin;
c@249 68 config.max = fmax;
c@249 69 config.BPO = nbins;
c@249 70 config.CQThresh = 0.0054;
c@249 71
c@249 72 constq = new ConstantQ(config);
c@249 73 constq->sparsekernel();
c@251 74
c@251 75 ncoeff = constq->getK();
c@251 76
c@251 77 } else if (featureType == FEATURE_TYPE_MFCC) {
c@249 78
c@251 79 MFCCConfig config;
c@251 80 config.FS = samplerate;
c@251 81 config.fftsize = 1024;
c@251 82 config.nceps = 20;
c@251 83 config.want_c0 = false;
c@251 84
c@251 85 mfcc = new MFCC(config);
c@251 86 ncoeff = config.nceps;
c@249 87 }
c@243 88 }
c@243 89
c@243 90 ClusterMeltSegmenter::~ClusterMeltSegmenter()
c@243 91 {
c@249 92 delete window;
c@249 93 delete constq;
c@249 94 delete decimator;
c@245 95 }
c@245 96
c@245 97 int
c@245 98 ClusterMeltSegmenter::getWindowsize()
c@245 99 {
c@249 100 return static_cast<int>(windowSize * samplerate);
c@245 101 }
c@245 102
c@245 103 int
c@245 104 ClusterMeltSegmenter::getHopsize()
c@245 105 {
c@249 106 return static_cast<int>(hopSize * samplerate);
c@243 107 }
c@243 108
c@249 109 void ClusterMeltSegmenter::extractFeatures(const double* samples, int nsamples)
c@243 110 {
c@251 111 if (featureType == FEATURE_TYPE_CONSTQ ||
c@251 112 featureType == FEATURE_TYPE_CHROMA) {
c@251 113 extractFeaturesConstQ(samples, nsamples);
c@251 114 } else if (featureType == FEATURE_TYPE_MFCC) {
c@251 115 extractFeaturesMFCC(samples, nsamples);
c@251 116 }
c@251 117 }
c@251 118
c@251 119 void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsamples)
c@251 120 {
c@249 121 if (!constq) {
c@251 122 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesConstQ: "
c@251 123 << "No const-q: initialise not called?"
c@249 124 << std::endl;
c@249 125 return;
c@249 126 }
c@245 127
c@249 128 if (nsamples < getWindowsize()) {
c@249 129 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
c@249 130 return;
c@249 131 }
c@249 132
c@249 133 int fftsize = constq->getfftlength();
c@249 134
c@249 135 if (!window || window->getSize() != fftsize) {
c@249 136 delete window;
c@249 137 window = new Window<double>(HammingWindow, fftsize);
c@249 138 }
c@249 139
c@249 140 vector<double> cq(ncoeff);
c@249 141
c@249 142 for (int i = 0; i < ncoeff; ++i) cq[i] = 0.0;
c@249 143
c@249 144 const double *psource = samples;
c@249 145 int pcount = nsamples;
c@249 146
c@249 147 if (decimator) {
c@249 148 pcount = nsamples / decimator->getFactor();
c@249 149 double *decout = new double[pcount];
c@249 150 decimator->process(samples, decout);
c@249 151 psource = decout;
c@249 152 }
c@249 153
c@249 154 int origin = 0;
c@249 155
c@249 156 // std::cerr << "nsamples = " << nsamples << ", pcount = " << pcount << std::endl;
c@249 157
c@249 158 int frames = 0;
c@249 159
c@249 160 double *frame = new double[fftsize];
c@249 161 double *real = new double[fftsize];
c@249 162 double *imag = new double[fftsize];
c@249 163 double *cqre = new double[ncoeff];
c@249 164 double *cqim = new double[ncoeff];
c@249 165
c@249 166 while (origin <= pcount) {
c@249 167
c@249 168 // always need at least one fft window per block, but after
c@249 169 // that we want to avoid having any incomplete ones
c@249 170 if (origin > 0 && origin + fftsize >= pcount) break;
c@249 171
c@249 172 for (int i = 0; i < fftsize; ++i) {
c@249 173 if (origin + i < pcount) {
c@249 174 frame[i] = psource[origin + i];
c@249 175 } else {
c@249 176 frame[i] = 0.0;
c@249 177 }
c@249 178 }
c@249 179
c@249 180 for (int i = 0; i < fftsize/2; ++i) {
c@249 181 double value = frame[i];
c@249 182 frame[i] = frame[i + fftsize/2];
c@249 183 frame[i + fftsize/2] = value;
c@249 184 }
c@249 185
c@249 186 window->cut(frame);
c@249 187
c@249 188 FFT::process(fftsize, false, frame, 0, real, imag);
c@249 189
c@249 190 constq->process(real, imag, cqre, cqim);
c@243 191
c@249 192 for (int i = 0; i < ncoeff; ++i) {
c@249 193 cq[i] += sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]);
c@249 194 }
c@249 195 ++frames;
c@245 196
c@249 197 origin += fftsize/2;
c@249 198 }
c@245 199
c@249 200 delete [] cqre;
c@249 201 delete [] cqim;
c@249 202 delete [] real;
c@249 203 delete [] imag;
c@249 204 delete [] frame;
c@245 205
c@249 206 for (int i = 0; i < ncoeff; ++i) {
c@249 207 cq[i] /= frames;
c@249 208 }
c@245 209
c@249 210 if (decimator) delete[] psource;
c@245 211
c@249 212 features.push_back(cq);
c@243 213 }
c@243 214
c@251 215 void ClusterMeltSegmenter::extractFeaturesMFCC(const double* samples, int nsamples)
c@251 216 {
c@251 217 if (!mfcc) {
c@251 218 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesMFCC: "
c@251 219 << "No mfcc: initialise not called?"
c@251 220 << std::endl;
c@251 221 return;
c@251 222 }
c@251 223
c@251 224 if (nsamples < getWindowsize()) {
c@251 225 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
c@251 226 return;
c@251 227 }
c@251 228
c@251 229 int fftsize = mfcc->getfftlength();
c@251 230
c@251 231 vector<double> cc(ncoeff);
c@251 232
c@251 233 for (int i = 0; i < ncoeff; ++i) cc[i] = 0.0;
c@251 234
c@251 235 const double *psource = samples;
c@251 236 int pcount = nsamples;
c@251 237
c@251 238 int origin = 0;
c@251 239 int frames = 0;
c@251 240
c@251 241 double *frame = new double[fftsize];
c@251 242 double *ccout = new double[ncoeff];
c@251 243
c@251 244 while (origin <= pcount) {
c@251 245
c@251 246 // always need at least one fft window per block, but after
c@251 247 // that we want to avoid having any incomplete ones
c@251 248 if (origin > 0 && origin + fftsize >= pcount) break;
c@251 249
c@251 250 for (int i = 0; i < fftsize; ++i) {
c@251 251 if (origin + i < pcount) {
c@251 252 frame[i] = psource[origin + i];
c@251 253 } else {
c@251 254 frame[i] = 0.0;
c@251 255 }
c@251 256 }
c@251 257
c@251 258 mfcc->process(fftsize, frame, ccout);
c@251 259
c@251 260 for (int i = 0; i < ncoeff; ++i) {
c@251 261 cc[i] += ccout[i];
c@251 262 }
c@251 263 ++frames;
c@251 264
c@251 265 origin += fftsize/2;
c@251 266 }
c@251 267
c@251 268 delete [] ccout;
c@251 269 delete [] frame;
c@251 270
c@251 271 for (int i = 0; i < ncoeff; ++i) {
c@251 272 cc[i] /= frames;
c@251 273 }
c@251 274
c@251 275 features.push_back(cc);
c@251 276 }
c@251 277
c@243 278 void ClusterMeltSegmenter::segment(int m)
c@243 279 {
c@249 280 nclusters = m;
c@249 281 segment();
c@243 282 }
c@243 283
c@243 284 void ClusterMeltSegmenter::setFeatures(const vector<vector<double> >& f)
c@243 285 {
c@249 286 features = f;
c@249 287 featureType = FEATURE_TYPE_UNKNOWN;
c@243 288 }
c@243 289
c@243 290 void ClusterMeltSegmenter::segment()
c@243 291 {
c@251 292 delete constq;
c@251 293 constq = 0;
c@251 294 delete mfcc;
c@251 295 mfcc = 0;
c@251 296 delete decimator;
c@251 297 decimator = 0;
c@243 298
c@249 299 std::cerr << "ClusterMeltSegmenter::segment: have " << features.size()
c@249 300 << " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl;
c@249 301
c@249 302 // copy the features to a native array and use the existing C segmenter...
c@249 303 double** arrFeatures = new double*[features.size()];
c@249 304 for (int i = 0; i < features.size(); i++)
c@249 305 {
c@249 306 if (featureType == FEATURE_TYPE_UNKNOWN) {
c@249 307 arrFeatures[i] = new double[features[0].size()];
c@249 308 for (int j = 0; j < features[0].size(); j++)
c@249 309 arrFeatures[i][j] = features[i][j];
c@249 310 } else {
c@249 311 arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope
c@249 312 for (int j = 0; j < ncoeff; j++)
c@249 313 arrFeatures[i][j] = features[i][j];
c@249 314 }
c@249 315 }
c@243 316
c@249 317 q = new int[features.size()];
c@243 318
c@251 319 if (featureType == FEATURE_TYPE_UNKNOWN ||
c@251 320 featureType == FEATURE_TYPE_MFCC)
c@249 321 cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength,
c@249 322 nclusters, neighbourhoodLimit);
c@249 323 else
c@249 324 constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType,
c@249 325 nHMMStates, histogramLength, nclusters, neighbourhoodLimit);
c@243 326
c@249 327 // convert the cluster assignment sequence to a segmentation
c@249 328 makeSegmentation(q, features.size());
c@243 329
c@249 330 // de-allocate arrays
c@249 331 delete [] q;
c@249 332 for (int i = 0; i < features.size(); i++)
c@249 333 delete [] arrFeatures[i];
c@249 334 delete [] arrFeatures;
c@243 335
c@249 336 // clear the features
c@249 337 clear();
c@243 338 }
c@243 339
c@243 340 void ClusterMeltSegmenter::makeSegmentation(int* q, int len)
c@243 341 {
c@249 342 segmentation.segments.clear();
c@249 343 segmentation.nsegtypes = nclusters;
c@249 344 segmentation.samplerate = samplerate;
c@243 345
c@249 346 Segment segment;
c@249 347 segment.start = 0;
c@249 348 segment.type = q[0];
c@243 349
c@249 350 for (int i = 1; i < len; i++)
c@249 351 {
c@249 352 if (q[i] != q[i-1])
c@249 353 {
c@249 354 segment.end = i * getHopsize();
c@249 355 segmentation.segments.push_back(segment);
c@249 356 segment.type = q[i];
c@249 357 segment.start = segment.end;
c@249 358 }
c@249 359 }
c@249 360 segment.end = len * getHopsize();
c@249 361 segmentation.segments.push_back(segment);
c@243 362 }
c@243 363