annotate dsp/segmentation/ClusterMeltSegmenter.cpp @ 515:08bcc06c38ec tip master

Remove fast-math
author Chris Cannam <cannam@all-day-breakfast.com>
date Tue, 28 Jan 2020 15:27:37 +0000
parents 1bea13b8f951
children
rev   line source
c@249 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
c@249 2
c@243 3 /*
c@249 4 * ClusterMeltSegmenter.cpp
c@243 5 *
c@249 6 * Created by Mark Levy on 23/03/2006.
c@249 7 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
c@309 8
c@309 9 This program is free software; you can redistribute it and/or
c@309 10 modify it under the terms of the GNU General Public License as
c@309 11 published by the Free Software Foundation; either version 2 of the
c@309 12 License, or (at your option) any later version. See the file
c@309 13 COPYING included with this distribution for more information.
c@243 14 */
c@243 15
c@243 16 #include <cfloat>
c@243 17 #include <cmath>
c@243 18
c@243 19 #include "ClusterMeltSegmenter.h"
c@243 20 #include "cluster_segmenter.h"
c@243 21 #include "segment.h"
c@243 22
c@245 23 #include "dsp/transforms/FFT.h"
c@249 24 #include "dsp/chromagram/ConstantQ.h"
c@249 25 #include "dsp/rateconversion/Decimator.h"
c@251 26 #include "dsp/mfcc/MFCC.h"
c@245 27
cannam@493 28 using std::vector;
cannam@493 29
c@249 30 ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) :
c@249 31 window(NULL),
c@289 32 fft(NULL),
c@249 33 constq(NULL),
c@251 34 mfcc(NULL),
c@249 35 featureType(params.featureType),
c@249 36 hopSize(params.hopSize),
c@249 37 windowSize(params.windowSize),
c@249 38 fmin(params.fmin),
c@249 39 fmax(params.fmax),
c@249 40 nbins(params.nbins),
cannam@480 41 ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c
c@249 42 nHMMStates(params.nHMMStates),
c@249 43 nclusters(params.nclusters),
c@249 44 histogramLength(params.histogramLength),
c@249 45 neighbourhoodLimit(params.neighbourhoodLimit),
c@251 46 decimator(NULL)
c@243 47 {
c@243 48 }
c@243 49
c@243 50 void ClusterMeltSegmenter::initialise(int fs)
c@243 51 {
c@249 52 samplerate = fs;
c@249 53
c@251 54 if (featureType == FEATURE_TYPE_CONSTQ ||
c@251 55 featureType == FEATURE_TYPE_CHROMA) {
c@251 56
c@251 57 // run internal processing at 11025 or thereabouts
c@249 58 int internalRate = 11025;
c@249 59 int decimationFactor = samplerate / internalRate;
c@249 60 if (decimationFactor < 1) decimationFactor = 1;
c@249 61
c@249 62 // must be a power of two
c@249 63 while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
c@249 64
c@249 65 if (decimationFactor > Decimator::getHighestSupportedFactor()) {
c@249 66 decimationFactor = Decimator::getHighestSupportedFactor();
c@249 67 }
c@249 68
c@249 69 if (decimationFactor > 1) {
c@249 70 decimator = new Decimator(getWindowsize(), decimationFactor);
c@249 71 }
c@249 72
c@249 73 CQConfig config;
c@249 74 config.FS = samplerate / decimationFactor;
c@249 75 config.min = fmin;
c@249 76 config.max = fmax;
c@249 77 config.BPO = nbins;
c@249 78 config.CQThresh = 0.0054;
c@249 79
c@249 80 constq = new ConstantQ(config);
c@249 81 constq->sparsekernel();
c@251 82
c@251 83 ncoeff = constq->getK();
c@289 84
cannam@495 85 fft = new FFTReal(constq->getFFTLength());
c@251 86
c@251 87 } else if (featureType == FEATURE_TYPE_MFCC) {
c@249 88
c@252 89 // run internal processing at 22050 or thereabouts
c@252 90 int internalRate = 22050;
c@252 91 int decimationFactor = samplerate / internalRate;
c@252 92 if (decimationFactor < 1) decimationFactor = 1;
c@252 93
c@252 94 // must be a power of two
c@252 95 while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
c@252 96
c@252 97 if (decimationFactor > Decimator::getHighestSupportedFactor()) {
c@252 98 decimationFactor = Decimator::getHighestSupportedFactor();
c@252 99 }
c@252 100
c@252 101 if (decimationFactor > 1) {
c@252 102 decimator = new Decimator(getWindowsize(), decimationFactor);
c@252 103 }
c@252 104
c@255 105 MFCCConfig config(samplerate / decimationFactor);
c@252 106 config.fftsize = 2048;
c@252 107 config.nceps = 19;
c@252 108 config.want_c0 = true;
c@251 109
c@251 110 mfcc = new MFCC(config);
c@252 111 ncoeff = config.nceps + 1;
c@249 112 }
c@243 113 }
c@243 114
c@243 115 ClusterMeltSegmenter::~ClusterMeltSegmenter()
c@243 116 {
c@249 117 delete window;
c@249 118 delete constq;
c@249 119 delete decimator;
c@289 120 delete fft;
c@245 121 }
c@245 122
c@245 123 int
c@245 124 ClusterMeltSegmenter::getWindowsize()
c@245 125 {
c@269 126 return static_cast<int>(windowSize * samplerate + 0.001);
c@245 127 }
c@245 128
c@245 129 int
c@245 130 ClusterMeltSegmenter::getHopsize()
c@245 131 {
c@269 132 return static_cast<int>(hopSize * samplerate + 0.001);
c@243 133 }
c@243 134
c@249 135 void ClusterMeltSegmenter::extractFeatures(const double* samples, int nsamples)
c@243 136 {
c@251 137 if (featureType == FEATURE_TYPE_CONSTQ ||
c@251 138 featureType == FEATURE_TYPE_CHROMA) {
c@251 139 extractFeaturesConstQ(samples, nsamples);
c@251 140 } else if (featureType == FEATURE_TYPE_MFCC) {
c@251 141 extractFeaturesMFCC(samples, nsamples);
c@251 142 }
c@251 143 }
c@251 144
c@251 145 void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsamples)
c@251 146 {
c@249 147 if (!constq) {
c@251 148 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesConstQ: "
c@251 149 << "No const-q: initialise not called?"
c@249 150 << std::endl;
c@249 151 return;
c@249 152 }
c@245 153
c@249 154 if (nsamples < getWindowsize()) {
c@249 155 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
c@249 156 return;
c@249 157 }
c@249 158
cannam@495 159 int fftsize = constq->getFFTLength();
c@249 160
c@249 161 if (!window || window->getSize() != fftsize) {
c@249 162 delete window;
c@249 163 window = new Window<double>(HammingWindow, fftsize);
c@249 164 }
c@249 165
c@249 166 vector<double> cq(ncoeff);
c@249 167
c@249 168 for (int i = 0; i < ncoeff; ++i) cq[i] = 0.0;
c@249 169
c@249 170 const double *psource = samples;
c@249 171 int pcount = nsamples;
c@249 172
c@249 173 if (decimator) {
c@249 174 pcount = nsamples / decimator->getFactor();
c@249 175 double *decout = new double[pcount];
c@249 176 decimator->process(samples, decout);
c@249 177 psource = decout;
c@249 178 }
c@249 179
c@249 180 int origin = 0;
c@249 181
c@249 182 // std::cerr << "nsamples = " << nsamples << ", pcount = " << pcount << std::endl;
c@249 183
c@249 184 int frames = 0;
c@249 185
c@249 186 double *frame = new double[fftsize];
c@249 187 double *real = new double[fftsize];
c@249 188 double *imag = new double[fftsize];
c@249 189 double *cqre = new double[ncoeff];
c@249 190 double *cqim = new double[ncoeff];
c@249 191
c@249 192 while (origin <= pcount) {
c@249 193
c@249 194 // always need at least one fft window per block, but after
c@249 195 // that we want to avoid having any incomplete ones
c@249 196 if (origin > 0 && origin + fftsize >= pcount) break;
c@249 197
c@249 198 for (int i = 0; i < fftsize; ++i) {
c@249 199 if (origin + i < pcount) {
c@249 200 frame[i] = psource[origin + i];
c@249 201 } else {
c@249 202 frame[i] = 0.0;
c@249 203 }
c@249 204 }
c@249 205
c@249 206 for (int i = 0; i < fftsize/2; ++i) {
c@249 207 double value = frame[i];
c@249 208 frame[i] = frame[i + fftsize/2];
c@249 209 frame[i + fftsize/2] = value;
c@249 210 }
c@249 211
c@249 212 window->cut(frame);
c@249 213
c@339 214 fft->forward(frame, real, imag);
c@249 215
c@249 216 constq->process(real, imag, cqre, cqim);
cannam@480 217
c@249 218 for (int i = 0; i < ncoeff; ++i) {
c@249 219 cq[i] += sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]);
c@249 220 }
c@249 221 ++frames;
c@245 222
c@249 223 origin += fftsize/2;
c@249 224 }
c@245 225
c@249 226 delete [] cqre;
c@249 227 delete [] cqim;
c@249 228 delete [] real;
c@249 229 delete [] imag;
c@249 230 delete [] frame;
c@245 231
c@249 232 for (int i = 0; i < ncoeff; ++i) {
c@249 233 cq[i] /= frames;
c@249 234 }
c@245 235
c@249 236 if (decimator) delete[] psource;
c@245 237
c@249 238 features.push_back(cq);
c@243 239 }
c@243 240
c@251 241 void ClusterMeltSegmenter::extractFeaturesMFCC(const double* samples, int nsamples)
c@251 242 {
c@251 243 if (!mfcc) {
c@251 244 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesMFCC: "
c@251 245 << "No mfcc: initialise not called?"
c@251 246 << std::endl;
c@251 247 return;
c@251 248 }
c@251 249
c@251 250 if (nsamples < getWindowsize()) {
c@251 251 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
c@251 252 return;
c@251 253 }
c@251 254
c@251 255 int fftsize = mfcc->getfftlength();
c@251 256
c@251 257 vector<double> cc(ncoeff);
c@251 258
c@251 259 for (int i = 0; i < ncoeff; ++i) cc[i] = 0.0;
c@251 260
c@251 261 const double *psource = samples;
c@251 262 int pcount = nsamples;
c@251 263
c@252 264 if (decimator) {
c@252 265 pcount = nsamples / decimator->getFactor();
c@252 266 double *decout = new double[pcount];
c@252 267 decimator->process(samples, decout);
c@252 268 psource = decout;
c@252 269 }
c@252 270
c@251 271 int origin = 0;
c@251 272 int frames = 0;
c@251 273
c@251 274 double *frame = new double[fftsize];
c@251 275 double *ccout = new double[ncoeff];
c@251 276
c@251 277 while (origin <= pcount) {
c@251 278
c@251 279 // always need at least one fft window per block, but after
c@251 280 // that we want to avoid having any incomplete ones
c@251 281 if (origin > 0 && origin + fftsize >= pcount) break;
c@251 282
c@251 283 for (int i = 0; i < fftsize; ++i) {
c@251 284 if (origin + i < pcount) {
c@251 285 frame[i] = psource[origin + i];
c@251 286 } else {
c@251 287 frame[i] = 0.0;
c@251 288 }
c@251 289 }
c@251 290
c@255 291 mfcc->process(frame, ccout);
cannam@480 292
c@251 293 for (int i = 0; i < ncoeff; ++i) {
c@251 294 cc[i] += ccout[i];
c@251 295 }
c@251 296 ++frames;
c@251 297
c@251 298 origin += fftsize/2;
c@251 299 }
c@251 300
c@251 301 delete [] ccout;
c@251 302 delete [] frame;
c@251 303
c@251 304 for (int i = 0; i < ncoeff; ++i) {
c@251 305 cc[i] /= frames;
c@251 306 }
c@251 307
c@252 308 if (decimator) delete[] psource;
c@252 309
c@251 310 features.push_back(cc);
c@251 311 }
c@251 312
c@243 313 void ClusterMeltSegmenter::segment(int m)
c@243 314 {
c@249 315 nclusters = m;
c@249 316 segment();
c@243 317 }
c@243 318
c@243 319 void ClusterMeltSegmenter::setFeatures(const vector<vector<double> >& f)
c@243 320 {
c@249 321 features = f;
c@249 322 featureType = FEATURE_TYPE_UNKNOWN;
c@243 323 }
c@243 324
c@243 325 void ClusterMeltSegmenter::segment()
c@243 326 {
c@251 327 delete constq;
c@251 328 constq = 0;
c@251 329 delete mfcc;
c@251 330 mfcc = 0;
c@251 331 delete decimator;
c@251 332 decimator = 0;
c@283 333
c@414 334 int sz = features.size();
c@414 335
c@414 336 if (sz < histogramLength) return;
c@283 337 /*
c@249 338 std::cerr << "ClusterMeltSegmenter::segment: have " << features.size()
c@249 339 << " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl;
c@283 340 */
c@249 341 // copy the features to a native array and use the existing C segmenter...
cannam@480 342 double** arrFeatures = new double*[features.size()];
cannam@480 343 for (int i = 0; i < sz; i++) {
c@249 344 if (featureType == FEATURE_TYPE_UNKNOWN) {
c@249 345 arrFeatures[i] = new double[features[0].size()];
c@414 346 for (int j = 0; j < int(features[0].size()); j++) {
c@414 347 arrFeatures[i][j] = features[i][j];
c@414 348 }
c@249 349 } else {
cannam@480 350 arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope
c@414 351 for (int j = 0; j < ncoeff; j++) {
c@414 352 arrFeatures[i][j] = features[i][j];
c@414 353 }
c@249 354 }
c@249 355 }
cannam@480 356
c@249 357 q = new int[features.size()];
cannam@480 358
c@251 359 if (featureType == FEATURE_TYPE_UNKNOWN ||
cannam@480 360 featureType == FEATURE_TYPE_MFCC) {
c@249 361 cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength,
c@249 362 nclusters, neighbourhoodLimit);
cannam@480 363 } else {
c@249 364 constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType,
c@249 365 nHMMStates, histogramLength, nclusters, neighbourhoodLimit);
cannam@480 366 }
cannam@480 367
c@249 368 // convert the cluster assignment sequence to a segmentation
cannam@480 369 makeSegmentation(q, features.size());
cannam@480 370
c@249 371 // de-allocate arrays
c@249 372 delete [] q;
c@414 373 for (int i = 0; i < int(features.size()); i++) delete [] arrFeatures[i];
c@249 374 delete [] arrFeatures;
cannam@480 375
c@249 376 // clear the features
c@249 377 clear();
c@243 378 }
c@243 379
c@243 380 void ClusterMeltSegmenter::makeSegmentation(int* q, int len)
c@243 381 {
c@249 382 segmentation.segments.clear();
c@249 383 segmentation.nsegtypes = nclusters;
c@249 384 segmentation.samplerate = samplerate;
cannam@480 385
c@249 386 Segment segment;
c@249 387 segment.start = 0;
c@249 388 segment.type = q[0];
cannam@480 389
cannam@480 390 for (int i = 1; i < len; i++) {
cannam@480 391 if (q[i] != q[i-1]) {
c@249 392 segment.end = i * getHopsize();
c@249 393 segmentation.segments.push_back(segment);
c@249 394 segment.type = q[i];
c@249 395 segment.start = segment.end;
c@249 396 }
c@249 397 }
c@249 398 segment.end = len * getHopsize();
c@249 399 segmentation.segments.push_back(segment);
c@243 400 }
c@243 401