annotate dsp/segmentation/ClusterMeltSegmenter.cpp @ 209:ccd2019190bf msvc

Some MSVC fixes, including (temporarily, probably) renaming the FFT source file to avoid getting it mixed up with the Vamp SDK one in our object dir
author Chris Cannam
date Thu, 01 Feb 2018 16:34:08 +0000
parents e4a57215ddee
children 175e51ae78eb
rev   line source
cannam@24 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
cannam@24 2
cannam@18 3 /*
cannam@24 4 * ClusterMeltSegmenter.cpp
cannam@18 5 *
cannam@24 6 * Created by Mark Levy on 23/03/2006.
cannam@24 7 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
Chris@84 8
Chris@84 9 This program is free software; you can redistribute it and/or
Chris@84 10 modify it under the terms of the GNU General Public License as
Chris@84 11 published by the Free Software Foundation; either version 2 of the
Chris@84 12 License, or (at your option) any later version. See the file
Chris@84 13 COPYING included with this distribution for more information.
cannam@18 14 */
cannam@18 15
cannam@18 16 #include <cfloat>
cannam@18 17 #include <cmath>
cannam@18 18
cannam@18 19 #include "ClusterMeltSegmenter.h"
cannam@18 20 #include "cluster_segmenter.h"
cannam@18 21 #include "segment.h"
cannam@18 22
cannam@20 23 #include "dsp/transforms/FFT.h"
cannam@24 24 #include "dsp/chromagram/ConstantQ.h"
cannam@24 25 #include "dsp/rateconversion/Decimator.h"
cannam@26 26 #include "dsp/mfcc/MFCC.h"
cannam@20 27
cannam@24 28 ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) :
cannam@24 29 window(NULL),
cannam@64 30 fft(NULL),
cannam@24 31 constq(NULL),
cannam@26 32 mfcc(NULL),
cannam@24 33 featureType(params.featureType),
cannam@24 34 hopSize(params.hopSize),
cannam@24 35 windowSize(params.windowSize),
cannam@24 36 fmin(params.fmin),
cannam@24 37 fmax(params.fmax),
cannam@24 38 nbins(params.nbins),
cannam@24 39 ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c
cannam@24 40 nHMMStates(params.nHMMStates),
cannam@24 41 nclusters(params.nclusters),
cannam@24 42 histogramLength(params.histogramLength),
cannam@24 43 neighbourhoodLimit(params.neighbourhoodLimit),
cannam@26 44 decimator(NULL)
cannam@18 45 {
cannam@18 46 }
cannam@18 47
cannam@18 48 void ClusterMeltSegmenter::initialise(int fs)
cannam@18 49 {
cannam@24 50 samplerate = fs;
cannam@24 51
cannam@26 52 if (featureType == FEATURE_TYPE_CONSTQ ||
cannam@26 53 featureType == FEATURE_TYPE_CHROMA) {
cannam@26 54
cannam@26 55 // run internal processing at 11025 or thereabouts
cannam@24 56 int internalRate = 11025;
cannam@24 57 int decimationFactor = samplerate / internalRate;
cannam@24 58 if (decimationFactor < 1) decimationFactor = 1;
cannam@24 59
cannam@24 60 // must be a power of two
cannam@24 61 while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
cannam@24 62
cannam@24 63 if (decimationFactor > Decimator::getHighestSupportedFactor()) {
cannam@24 64 decimationFactor = Decimator::getHighestSupportedFactor();
cannam@24 65 }
cannam@24 66
cannam@24 67 if (decimationFactor > 1) {
cannam@24 68 decimator = new Decimator(getWindowsize(), decimationFactor);
cannam@24 69 }
cannam@24 70
cannam@24 71 CQConfig config;
cannam@24 72 config.FS = samplerate / decimationFactor;
cannam@24 73 config.min = fmin;
cannam@24 74 config.max = fmax;
cannam@24 75 config.BPO = nbins;
cannam@24 76 config.CQThresh = 0.0054;
cannam@24 77
cannam@24 78 constq = new ConstantQ(config);
cannam@24 79 constq->sparsekernel();
cannam@26 80
cannam@26 81 ncoeff = constq->getK();
cannam@64 82
cannam@64 83 fft = new FFTReal(constq->getfftlength());
cannam@26 84
cannam@26 85 } else if (featureType == FEATURE_TYPE_MFCC) {
cannam@24 86
cannam@27 87 // run internal processing at 22050 or thereabouts
cannam@27 88 int internalRate = 22050;
cannam@27 89 int decimationFactor = samplerate / internalRate;
cannam@27 90 if (decimationFactor < 1) decimationFactor = 1;
cannam@27 91
cannam@27 92 // must be a power of two
cannam@27 93 while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
cannam@27 94
cannam@27 95 if (decimationFactor > Decimator::getHighestSupportedFactor()) {
cannam@27 96 decimationFactor = Decimator::getHighestSupportedFactor();
cannam@27 97 }
cannam@27 98
cannam@27 99 if (decimationFactor > 1) {
cannam@27 100 decimator = new Decimator(getWindowsize(), decimationFactor);
cannam@27 101 }
cannam@27 102
cannam@30 103 MFCCConfig config(samplerate / decimationFactor);
cannam@27 104 config.fftsize = 2048;
cannam@27 105 config.nceps = 19;
cannam@27 106 config.want_c0 = true;
cannam@26 107
cannam@26 108 mfcc = new MFCC(config);
cannam@27 109 ncoeff = config.nceps + 1;
cannam@24 110 }
cannam@18 111 }
cannam@18 112
cannam@18 113 ClusterMeltSegmenter::~ClusterMeltSegmenter()
cannam@18 114 {
cannam@24 115 delete window;
cannam@24 116 delete constq;
cannam@24 117 delete decimator;
cannam@64 118 delete fft;
cannam@20 119 }
cannam@20 120
cannam@20 121 int
cannam@20 122 ClusterMeltSegmenter::getWindowsize()
cannam@20 123 {
cannam@44 124 return static_cast<int>(windowSize * samplerate + 0.001);
cannam@20 125 }
cannam@20 126
cannam@20 127 int
cannam@20 128 ClusterMeltSegmenter::getHopsize()
cannam@20 129 {
cannam@44 130 return static_cast<int>(hopSize * samplerate + 0.001);
cannam@18 131 }
cannam@18 132
cannam@24 133 void ClusterMeltSegmenter::extractFeatures(const double* samples, int nsamples)
cannam@18 134 {
cannam@26 135 if (featureType == FEATURE_TYPE_CONSTQ ||
cannam@26 136 featureType == FEATURE_TYPE_CHROMA) {
cannam@26 137 extractFeaturesConstQ(samples, nsamples);
cannam@26 138 } else if (featureType == FEATURE_TYPE_MFCC) {
cannam@26 139 extractFeaturesMFCC(samples, nsamples);
cannam@26 140 }
cannam@26 141 }
cannam@26 142
cannam@26 143 void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsamples)
cannam@26 144 {
cannam@24 145 if (!constq) {
cannam@26 146 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesConstQ: "
cannam@26 147 << "No const-q: initialise not called?"
cannam@24 148 << std::endl;
cannam@24 149 return;
cannam@24 150 }
cannam@20 151
cannam@24 152 if (nsamples < getWindowsize()) {
cannam@24 153 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
cannam@24 154 return;
cannam@24 155 }
cannam@24 156
cannam@24 157 int fftsize = constq->getfftlength();
cannam@24 158
cannam@24 159 if (!window || window->getSize() != fftsize) {
cannam@24 160 delete window;
cannam@24 161 window = new Window<double>(HammingWindow, fftsize);
cannam@24 162 }
cannam@24 163
cannam@24 164 vector<double> cq(ncoeff);
cannam@24 165
cannam@24 166 for (int i = 0; i < ncoeff; ++i) cq[i] = 0.0;
cannam@24 167
cannam@24 168 const double *psource = samples;
cannam@24 169 int pcount = nsamples;
cannam@24 170
cannam@24 171 if (decimator) {
cannam@24 172 pcount = nsamples / decimator->getFactor();
cannam@24 173 double *decout = new double[pcount];
cannam@24 174 decimator->process(samples, decout);
cannam@24 175 psource = decout;
cannam@24 176 }
cannam@24 177
cannam@24 178 int origin = 0;
cannam@24 179
cannam@24 180 // std::cerr << "nsamples = " << nsamples << ", pcount = " << pcount << std::endl;
cannam@24 181
cannam@24 182 int frames = 0;
cannam@24 183
cannam@24 184 double *frame = new double[fftsize];
cannam@24 185 double *real = new double[fftsize];
cannam@24 186 double *imag = new double[fftsize];
cannam@24 187 double *cqre = new double[ncoeff];
cannam@24 188 double *cqim = new double[ncoeff];
cannam@24 189
cannam@24 190 while (origin <= pcount) {
cannam@24 191
cannam@24 192 // always need at least one fft window per block, but after
cannam@24 193 // that we want to avoid having any incomplete ones
cannam@24 194 if (origin > 0 && origin + fftsize >= pcount) break;
cannam@24 195
cannam@24 196 for (int i = 0; i < fftsize; ++i) {
cannam@24 197 if (origin + i < pcount) {
cannam@24 198 frame[i] = psource[origin + i];
cannam@24 199 } else {
cannam@24 200 frame[i] = 0.0;
cannam@24 201 }
cannam@24 202 }
cannam@24 203
cannam@24 204 for (int i = 0; i < fftsize/2; ++i) {
cannam@24 205 double value = frame[i];
cannam@24 206 frame[i] = frame[i + fftsize/2];
cannam@24 207 frame[i + fftsize/2] = value;
cannam@24 208 }
cannam@24 209
cannam@24 210 window->cut(frame);
cannam@24 211
Chris@114 212 fft->forward(frame, real, imag);
cannam@24 213
cannam@24 214 constq->process(real, imag, cqre, cqim);
cannam@18 215
cannam@24 216 for (int i = 0; i < ncoeff; ++i) {
cannam@24 217 cq[i] += sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]);
cannam@24 218 }
cannam@24 219 ++frames;
cannam@20 220
cannam@24 221 origin += fftsize/2;
cannam@24 222 }
cannam@20 223
cannam@24 224 delete [] cqre;
cannam@24 225 delete [] cqim;
cannam@24 226 delete [] real;
cannam@24 227 delete [] imag;
cannam@24 228 delete [] frame;
cannam@20 229
cannam@24 230 for (int i = 0; i < ncoeff; ++i) {
cannam@24 231 cq[i] /= frames;
cannam@24 232 }
cannam@20 233
cannam@24 234 if (decimator) delete[] psource;
cannam@20 235
cannam@24 236 features.push_back(cq);
cannam@18 237 }
cannam@18 238
cannam@26 239 void ClusterMeltSegmenter::extractFeaturesMFCC(const double* samples, int nsamples)
cannam@26 240 {
cannam@26 241 if (!mfcc) {
cannam@26 242 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesMFCC: "
cannam@26 243 << "No mfcc: initialise not called?"
cannam@26 244 << std::endl;
cannam@26 245 return;
cannam@26 246 }
cannam@26 247
cannam@26 248 if (nsamples < getWindowsize()) {
cannam@26 249 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
cannam@26 250 return;
cannam@26 251 }
cannam@26 252
cannam@26 253 int fftsize = mfcc->getfftlength();
cannam@26 254
cannam@26 255 vector<double> cc(ncoeff);
cannam@26 256
cannam@26 257 for (int i = 0; i < ncoeff; ++i) cc[i] = 0.0;
cannam@26 258
cannam@26 259 const double *psource = samples;
cannam@26 260 int pcount = nsamples;
cannam@26 261
cannam@27 262 if (decimator) {
cannam@27 263 pcount = nsamples / decimator->getFactor();
cannam@27 264 double *decout = new double[pcount];
cannam@27 265 decimator->process(samples, decout);
cannam@27 266 psource = decout;
cannam@27 267 }
cannam@27 268
cannam@26 269 int origin = 0;
cannam@26 270 int frames = 0;
cannam@26 271
cannam@26 272 double *frame = new double[fftsize];
cannam@26 273 double *ccout = new double[ncoeff];
cannam@26 274
cannam@26 275 while (origin <= pcount) {
cannam@26 276
cannam@26 277 // always need at least one fft window per block, but after
cannam@26 278 // that we want to avoid having any incomplete ones
cannam@26 279 if (origin > 0 && origin + fftsize >= pcount) break;
cannam@26 280
cannam@26 281 for (int i = 0; i < fftsize; ++i) {
cannam@26 282 if (origin + i < pcount) {
cannam@26 283 frame[i] = psource[origin + i];
cannam@26 284 } else {
cannam@26 285 frame[i] = 0.0;
cannam@26 286 }
cannam@26 287 }
cannam@26 288
cannam@30 289 mfcc->process(frame, ccout);
cannam@26 290
cannam@26 291 for (int i = 0; i < ncoeff; ++i) {
cannam@26 292 cc[i] += ccout[i];
cannam@26 293 }
cannam@26 294 ++frames;
cannam@26 295
cannam@26 296 origin += fftsize/2;
cannam@26 297 }
cannam@26 298
cannam@26 299 delete [] ccout;
cannam@26 300 delete [] frame;
cannam@26 301
cannam@26 302 for (int i = 0; i < ncoeff; ++i) {
cannam@26 303 cc[i] /= frames;
cannam@26 304 }
cannam@26 305
cannam@27 306 if (decimator) delete[] psource;
cannam@27 307
cannam@26 308 features.push_back(cc);
cannam@26 309 }
cannam@26 310
cannam@18 311 void ClusterMeltSegmenter::segment(int m)
cannam@18 312 {
cannam@24 313 nclusters = m;
cannam@24 314 segment();
cannam@18 315 }
cannam@18 316
cannam@18 317 void ClusterMeltSegmenter::setFeatures(const vector<vector<double> >& f)
cannam@18 318 {
cannam@24 319 features = f;
cannam@24 320 featureType = FEATURE_TYPE_UNKNOWN;
cannam@18 321 }
cannam@18 322
cannam@18 323 void ClusterMeltSegmenter::segment()
cannam@18 324 {
cannam@26 325 delete constq;
cannam@26 326 constq = 0;
cannam@26 327 delete mfcc;
cannam@26 328 mfcc = 0;
cannam@26 329 delete decimator;
cannam@26 330 decimator = 0;
cannam@58 331
Chris@189 332 int sz = features.size();
Chris@189 333
Chris@189 334 if (sz < histogramLength) return;
cannam@58 335 /*
cannam@24 336 std::cerr << "ClusterMeltSegmenter::segment: have " << features.size()
cannam@24 337 << " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl;
cannam@58 338 */
cannam@24 339 // copy the features to a native array and use the existing C segmenter...
cannam@24 340 double** arrFeatures = new double*[features.size()];
Chris@189 341 for (int i = 0; i < sz; i++)
cannam@24 342 {
cannam@24 343 if (featureType == FEATURE_TYPE_UNKNOWN) {
cannam@24 344 arrFeatures[i] = new double[features[0].size()];
Chris@189 345 for (int j = 0; j < int(features[0].size()); j++) {
Chris@189 346 arrFeatures[i][j] = features[i][j];
Chris@189 347 }
cannam@24 348 } else {
cannam@24 349 arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope
Chris@189 350 for (int j = 0; j < ncoeff; j++) {
Chris@189 351 arrFeatures[i][j] = features[i][j];
Chris@189 352 }
cannam@24 353 }
cannam@24 354 }
cannam@18 355
cannam@24 356 q = new int[features.size()];
cannam@18 357
cannam@26 358 if (featureType == FEATURE_TYPE_UNKNOWN ||
cannam@26 359 featureType == FEATURE_TYPE_MFCC)
cannam@24 360 cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength,
cannam@24 361 nclusters, neighbourhoodLimit);
cannam@24 362 else
cannam@24 363 constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType,
cannam@24 364 nHMMStates, histogramLength, nclusters, neighbourhoodLimit);
cannam@18 365
cannam@24 366 // convert the cluster assignment sequence to a segmentation
cannam@24 367 makeSegmentation(q, features.size());
cannam@18 368
cannam@24 369 // de-allocate arrays
cannam@24 370 delete [] q;
Chris@189 371 for (int i = 0; i < int(features.size()); i++) delete [] arrFeatures[i];
cannam@24 372 delete [] arrFeatures;
cannam@18 373
cannam@24 374 // clear the features
cannam@24 375 clear();
cannam@18 376 }
cannam@18 377
cannam@18 378 void ClusterMeltSegmenter::makeSegmentation(int* q, int len)
cannam@18 379 {
cannam@24 380 segmentation.segments.clear();
cannam@24 381 segmentation.nsegtypes = nclusters;
cannam@24 382 segmentation.samplerate = samplerate;
cannam@18 383
cannam@24 384 Segment segment;
cannam@24 385 segment.start = 0;
cannam@24 386 segment.type = q[0];
cannam@18 387
cannam@24 388 for (int i = 1; i < len; i++)
cannam@24 389 {
cannam@24 390 if (q[i] != q[i-1])
cannam@24 391 {
cannam@24 392 segment.end = i * getHopsize();
cannam@24 393 segmentation.segments.push_back(segment);
cannam@24 394 segment.type = q[i];
cannam@24 395 segment.start = segment.end;
cannam@24 396 }
cannam@24 397 }
cannam@24 398 segment.end = len * getHopsize();
cannam@24 399 segmentation.segments.push_back(segment);
cannam@18 400 }
cannam@18 401