annotate dsp/segmentation/ClusterMeltSegmenter.cpp @ 26:d096a79fa772

* Add timbral (MFCC) feature option to segmenter
author cannam
date Thu, 10 Jan 2008 16:41:33 +0000
parents 2b74bd60c61f
children b678e72323df
rev   line source
cannam@24 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
cannam@24 2
cannam@18 3 /*
cannam@24 4 * ClusterMeltSegmenter.cpp
cannam@18 5 *
cannam@24 6 * Created by Mark Levy on 23/03/2006.
cannam@24 7 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
cannam@24 8 * All rights reserved.
cannam@18 9 */
cannam@18 10
cannam@18 11 #include <cfloat>
cannam@18 12 #include <cmath>
cannam@18 13
cannam@18 14 #include "ClusterMeltSegmenter.h"
cannam@18 15 #include "cluster_segmenter.h"
cannam@18 16 #include "segment.h"
cannam@18 17
cannam@20 18 #include "dsp/transforms/FFT.h"
cannam@24 19 #include "dsp/chromagram/ConstantQ.h"
cannam@24 20 #include "dsp/rateconversion/Decimator.h"
cannam@26 21 #include "dsp/mfcc/MFCC.h"
cannam@20 22
cannam@24 23 ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) :
cannam@24 24 window(NULL),
cannam@24 25 constq(NULL),
cannam@26 26 mfcc(NULL),
cannam@24 27 featureType(params.featureType),
cannam@24 28 hopSize(params.hopSize),
cannam@24 29 windowSize(params.windowSize),
cannam@24 30 fmin(params.fmin),
cannam@24 31 fmax(params.fmax),
cannam@24 32 nbins(params.nbins),
cannam@24 33 ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c
cannam@24 34 nHMMStates(params.nHMMStates),
cannam@24 35 nclusters(params.nclusters),
cannam@24 36 histogramLength(params.histogramLength),
cannam@24 37 neighbourhoodLimit(params.neighbourhoodLimit),
cannam@26 38 decimator(NULL)
cannam@18 39 {
cannam@18 40 }
cannam@18 41
cannam@18 42 void ClusterMeltSegmenter::initialise(int fs)
cannam@18 43 {
cannam@24 44 samplerate = fs;
cannam@24 45
cannam@26 46 if (featureType == FEATURE_TYPE_CONSTQ ||
cannam@26 47 featureType == FEATURE_TYPE_CHROMA) {
cannam@26 48
cannam@26 49 // run internal processing at 11025 or thereabouts
cannam@24 50 int internalRate = 11025;
cannam@24 51 int decimationFactor = samplerate / internalRate;
cannam@24 52 if (decimationFactor < 1) decimationFactor = 1;
cannam@24 53
cannam@24 54 // must be a power of two
cannam@24 55 while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
cannam@24 56
cannam@24 57 if (decimationFactor > Decimator::getHighestSupportedFactor()) {
cannam@24 58 decimationFactor = Decimator::getHighestSupportedFactor();
cannam@24 59 }
cannam@24 60
cannam@24 61 if (decimationFactor > 1) {
cannam@24 62 decimator = new Decimator(getWindowsize(), decimationFactor);
cannam@24 63 }
cannam@24 64
cannam@24 65 CQConfig config;
cannam@24 66 config.FS = samplerate / decimationFactor;
cannam@24 67 config.min = fmin;
cannam@24 68 config.max = fmax;
cannam@24 69 config.BPO = nbins;
cannam@24 70 config.CQThresh = 0.0054;
cannam@24 71
cannam@24 72 constq = new ConstantQ(config);
cannam@24 73 constq->sparsekernel();
cannam@26 74
cannam@26 75 ncoeff = constq->getK();
cannam@26 76
cannam@26 77 } else if (featureType == FEATURE_TYPE_MFCC) {
cannam@24 78
cannam@26 79 MFCCConfig config;
cannam@26 80 config.FS = samplerate;
cannam@26 81 config.fftsize = 1024;
cannam@26 82 config.nceps = 20;
cannam@26 83 config.want_c0 = false;
cannam@26 84
cannam@26 85 mfcc = new MFCC(config);
cannam@26 86 ncoeff = config.nceps;
cannam@24 87 }
cannam@18 88 }
cannam@18 89
cannam@18 90 ClusterMeltSegmenter::~ClusterMeltSegmenter()
cannam@18 91 {
cannam@24 92 delete window;
cannam@24 93 delete constq;
cannam@24 94 delete decimator;
cannam@20 95 }
cannam@20 96
cannam@20 97 int
cannam@20 98 ClusterMeltSegmenter::getWindowsize()
cannam@20 99 {
cannam@24 100 return static_cast<int>(windowSize * samplerate);
cannam@20 101 }
cannam@20 102
cannam@20 103 int
cannam@20 104 ClusterMeltSegmenter::getHopsize()
cannam@20 105 {
cannam@24 106 return static_cast<int>(hopSize * samplerate);
cannam@18 107 }
cannam@18 108
cannam@24 109 void ClusterMeltSegmenter::extractFeatures(const double* samples, int nsamples)
cannam@18 110 {
cannam@26 111 if (featureType == FEATURE_TYPE_CONSTQ ||
cannam@26 112 featureType == FEATURE_TYPE_CHROMA) {
cannam@26 113 extractFeaturesConstQ(samples, nsamples);
cannam@26 114 } else if (featureType == FEATURE_TYPE_MFCC) {
cannam@26 115 extractFeaturesMFCC(samples, nsamples);
cannam@26 116 }
cannam@26 117 }
cannam@26 118
cannam@26 119 void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsamples)
cannam@26 120 {
cannam@24 121 if (!constq) {
cannam@26 122 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesConstQ: "
cannam@26 123 << "No const-q: initialise not called?"
cannam@24 124 << std::endl;
cannam@24 125 return;
cannam@24 126 }
cannam@20 127
cannam@24 128 if (nsamples < getWindowsize()) {
cannam@24 129 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
cannam@24 130 return;
cannam@24 131 }
cannam@24 132
cannam@24 133 int fftsize = constq->getfftlength();
cannam@24 134
cannam@24 135 if (!window || window->getSize() != fftsize) {
cannam@24 136 delete window;
cannam@24 137 window = new Window<double>(HammingWindow, fftsize);
cannam@24 138 }
cannam@24 139
cannam@24 140 vector<double> cq(ncoeff);
cannam@24 141
cannam@24 142 for (int i = 0; i < ncoeff; ++i) cq[i] = 0.0;
cannam@24 143
cannam@24 144 const double *psource = samples;
cannam@24 145 int pcount = nsamples;
cannam@24 146
cannam@24 147 if (decimator) {
cannam@24 148 pcount = nsamples / decimator->getFactor();
cannam@24 149 double *decout = new double[pcount];
cannam@24 150 decimator->process(samples, decout);
cannam@24 151 psource = decout;
cannam@24 152 }
cannam@24 153
cannam@24 154 int origin = 0;
cannam@24 155
cannam@24 156 // std::cerr << "nsamples = " << nsamples << ", pcount = " << pcount << std::endl;
cannam@24 157
cannam@24 158 int frames = 0;
cannam@24 159
cannam@24 160 double *frame = new double[fftsize];
cannam@24 161 double *real = new double[fftsize];
cannam@24 162 double *imag = new double[fftsize];
cannam@24 163 double *cqre = new double[ncoeff];
cannam@24 164 double *cqim = new double[ncoeff];
cannam@24 165
cannam@24 166 while (origin <= pcount) {
cannam@24 167
cannam@24 168 // always need at least one fft window per block, but after
cannam@24 169 // that we want to avoid having any incomplete ones
cannam@24 170 if (origin > 0 && origin + fftsize >= pcount) break;
cannam@24 171
cannam@24 172 for (int i = 0; i < fftsize; ++i) {
cannam@24 173 if (origin + i < pcount) {
cannam@24 174 frame[i] = psource[origin + i];
cannam@24 175 } else {
cannam@24 176 frame[i] = 0.0;
cannam@24 177 }
cannam@24 178 }
cannam@24 179
cannam@24 180 for (int i = 0; i < fftsize/2; ++i) {
cannam@24 181 double value = frame[i];
cannam@24 182 frame[i] = frame[i + fftsize/2];
cannam@24 183 frame[i + fftsize/2] = value;
cannam@24 184 }
cannam@24 185
cannam@24 186 window->cut(frame);
cannam@24 187
cannam@24 188 FFT::process(fftsize, false, frame, 0, real, imag);
cannam@24 189
cannam@24 190 constq->process(real, imag, cqre, cqim);
cannam@18 191
cannam@24 192 for (int i = 0; i < ncoeff; ++i) {
cannam@24 193 cq[i] += sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]);
cannam@24 194 }
cannam@24 195 ++frames;
cannam@20 196
cannam@24 197 origin += fftsize/2;
cannam@24 198 }
cannam@20 199
cannam@24 200 delete [] cqre;
cannam@24 201 delete [] cqim;
cannam@24 202 delete [] real;
cannam@24 203 delete [] imag;
cannam@24 204 delete [] frame;
cannam@20 205
cannam@24 206 for (int i = 0; i < ncoeff; ++i) {
cannam@24 207 cq[i] /= frames;
cannam@24 208 }
cannam@20 209
cannam@24 210 if (decimator) delete[] psource;
cannam@20 211
cannam@24 212 features.push_back(cq);
cannam@18 213 }
cannam@18 214
cannam@26 215 void ClusterMeltSegmenter::extractFeaturesMFCC(const double* samples, int nsamples)
cannam@26 216 {
cannam@26 217 if (!mfcc) {
cannam@26 218 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesMFCC: "
cannam@26 219 << "No mfcc: initialise not called?"
cannam@26 220 << std::endl;
cannam@26 221 return;
cannam@26 222 }
cannam@26 223
cannam@26 224 if (nsamples < getWindowsize()) {
cannam@26 225 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
cannam@26 226 return;
cannam@26 227 }
cannam@26 228
cannam@26 229 int fftsize = mfcc->getfftlength();
cannam@26 230
cannam@26 231 vector<double> cc(ncoeff);
cannam@26 232
cannam@26 233 for (int i = 0; i < ncoeff; ++i) cc[i] = 0.0;
cannam@26 234
cannam@26 235 const double *psource = samples;
cannam@26 236 int pcount = nsamples;
cannam@26 237
cannam@26 238 int origin = 0;
cannam@26 239 int frames = 0;
cannam@26 240
cannam@26 241 double *frame = new double[fftsize];
cannam@26 242 double *ccout = new double[ncoeff];
cannam@26 243
cannam@26 244 while (origin <= pcount) {
cannam@26 245
cannam@26 246 // always need at least one fft window per block, but after
cannam@26 247 // that we want to avoid having any incomplete ones
cannam@26 248 if (origin > 0 && origin + fftsize >= pcount) break;
cannam@26 249
cannam@26 250 for (int i = 0; i < fftsize; ++i) {
cannam@26 251 if (origin + i < pcount) {
cannam@26 252 frame[i] = psource[origin + i];
cannam@26 253 } else {
cannam@26 254 frame[i] = 0.0;
cannam@26 255 }
cannam@26 256 }
cannam@26 257
cannam@26 258 mfcc->process(fftsize, frame, ccout);
cannam@26 259
cannam@26 260 for (int i = 0; i < ncoeff; ++i) {
cannam@26 261 cc[i] += ccout[i];
cannam@26 262 }
cannam@26 263 ++frames;
cannam@26 264
cannam@26 265 origin += fftsize/2;
cannam@26 266 }
cannam@26 267
cannam@26 268 delete [] ccout;
cannam@26 269 delete [] frame;
cannam@26 270
cannam@26 271 for (int i = 0; i < ncoeff; ++i) {
cannam@26 272 cc[i] /= frames;
cannam@26 273 }
cannam@26 274
cannam@26 275 features.push_back(cc);
cannam@26 276 }
cannam@26 277
cannam@18 278 void ClusterMeltSegmenter::segment(int m)
cannam@18 279 {
cannam@24 280 nclusters = m;
cannam@24 281 segment();
cannam@18 282 }
cannam@18 283
cannam@18 284 void ClusterMeltSegmenter::setFeatures(const vector<vector<double> >& f)
cannam@18 285 {
cannam@24 286 features = f;
cannam@24 287 featureType = FEATURE_TYPE_UNKNOWN;
cannam@18 288 }
cannam@18 289
cannam@18 290 void ClusterMeltSegmenter::segment()
cannam@18 291 {
cannam@26 292 delete constq;
cannam@26 293 constq = 0;
cannam@26 294 delete mfcc;
cannam@26 295 mfcc = 0;
cannam@26 296 delete decimator;
cannam@26 297 decimator = 0;
cannam@18 298
cannam@24 299 std::cerr << "ClusterMeltSegmenter::segment: have " << features.size()
cannam@24 300 << " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl;
cannam@24 301
cannam@24 302 // copy the features to a native array and use the existing C segmenter...
cannam@24 303 double** arrFeatures = new double*[features.size()];
cannam@24 304 for (int i = 0; i < features.size(); i++)
cannam@24 305 {
cannam@24 306 if (featureType == FEATURE_TYPE_UNKNOWN) {
cannam@24 307 arrFeatures[i] = new double[features[0].size()];
cannam@24 308 for (int j = 0; j < features[0].size(); j++)
cannam@24 309 arrFeatures[i][j] = features[i][j];
cannam@24 310 } else {
cannam@24 311 arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope
cannam@24 312 for (int j = 0; j < ncoeff; j++)
cannam@24 313 arrFeatures[i][j] = features[i][j];
cannam@24 314 }
cannam@24 315 }
cannam@18 316
cannam@24 317 q = new int[features.size()];
cannam@18 318
cannam@26 319 if (featureType == FEATURE_TYPE_UNKNOWN ||
cannam@26 320 featureType == FEATURE_TYPE_MFCC)
cannam@24 321 cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength,
cannam@24 322 nclusters, neighbourhoodLimit);
cannam@24 323 else
cannam@24 324 constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType,
cannam@24 325 nHMMStates, histogramLength, nclusters, neighbourhoodLimit);
cannam@18 326
cannam@24 327 // convert the cluster assignment sequence to a segmentation
cannam@24 328 makeSegmentation(q, features.size());
cannam@18 329
cannam@24 330 // de-allocate arrays
cannam@24 331 delete [] q;
cannam@24 332 for (int i = 0; i < features.size(); i++)
cannam@24 333 delete [] arrFeatures[i];
cannam@24 334 delete [] arrFeatures;
cannam@18 335
cannam@24 336 // clear the features
cannam@24 337 clear();
cannam@18 338 }
cannam@18 339
cannam@18 340 void ClusterMeltSegmenter::makeSegmentation(int* q, int len)
cannam@18 341 {
cannam@24 342 segmentation.segments.clear();
cannam@24 343 segmentation.nsegtypes = nclusters;
cannam@24 344 segmentation.samplerate = samplerate;
cannam@18 345
cannam@24 346 Segment segment;
cannam@24 347 segment.start = 0;
cannam@24 348 segment.type = q[0];
cannam@18 349
cannam@24 350 for (int i = 1; i < len; i++)
cannam@24 351 {
cannam@24 352 if (q[i] != q[i-1])
cannam@24 353 {
cannam@24 354 segment.end = i * getHopsize();
cannam@24 355 segmentation.segments.push_back(segment);
cannam@24 356 segment.type = q[i];
cannam@24 357 segment.start = segment.end;
cannam@24 358 }
cannam@24 359 }
cannam@24 360 segment.end = len * getHopsize();
cannam@24 361 segmentation.segments.push_back(segment);
cannam@18 362 }
cannam@18 363