annotate segmentino/Segmentino.cpp @ 74:a0b1f4810e4e

Update subrepos
author Chris Cannam
date Fri, 04 Sep 2015 10:44:26 +0100
parents 37af1c80fb94
children f1717a01dd9a
rev   line source
max@1 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
max@1 2
max@1 3 /*
Chris@48 4 Segmentino
max@1 5
Chris@48 6 Code by Massimiliano Zanoni and Matthias Mauch
Chris@48 7 Centre for Digital Music, Queen Mary, University of London
Chris@48 8
Chris@48 9 Copyright 2009-2013 Queen Mary, University of London.
max@1 10
Chris@65 11 This program is free software: you can redistribute it and/or
Chris@65 12 modify it under the terms of the GNU Affero General Public License
Chris@65 13 as published by the Free Software Foundation, either version 3 of
Chris@65 14 the License, or (at your option) any later version. See the file
max@1 15 COPYING included with this distribution for more information.
max@1 16 */
max@1 17
Chris@48 18 #include "Segmentino.h"
max@1 19
Chris@49 20 #include <qm-dsp/base/Window.h>
Chris@49 21 #include <qm-dsp/dsp/onsets/DetectionFunction.h>
Chris@49 22 #include <qm-dsp/dsp/onsets/PeakPicking.h>
Chris@49 23 #include <qm-dsp/dsp/transforms/FFT.h>
Chris@49 24 #include <qm-dsp/dsp/tempotracking/TempoTrackV2.h>
Chris@49 25 #include <qm-dsp/dsp/tempotracking/DownBeat.h>
Chris@49 26 #include <qm-dsp/maths/MathUtilities.h>
Chris@49 27
Chris@49 28 #include <nnls-chroma/chromamethods.h>
Chris@49 29
Chris@49 30 #include <armadillo>
Chris@49 31
max@1 32 #include <fstream>
max@1 33 #include <sstream>
max@1 34 #include <cmath>
max@1 35 #include <vector>
max@1 36
max@1 37 #include <vamp-sdk/Plugin.h>
max@1 38
Chris@56 39 using arma::colvec;
Chris@56 40 using arma::conv;
Chris@56 41 using arma::cor;
Chris@56 42 using arma::cube;
Chris@56 43 using arma::eye;
Chris@56 44 using arma::imat;
Chris@56 45 using arma::irowvec;
Chris@56 46 using arma::linspace;
Chris@56 47 using arma::mat;
Chris@56 48 using arma::max;
Chris@56 49 using arma::ones;
Chris@56 50 using arma::rowvec;
Chris@56 51 using arma::sort;
Chris@56 52 using arma::span;
Chris@56 53 using arma::sum;
Chris@56 54 using arma::trans;
Chris@56 55 using arma::uvec;
Chris@56 56 using arma::uword;
Chris@56 57 using arma::vec;
Chris@56 58 using arma::zeros;
Chris@56 59
max@1 60 using std::string;
max@1 61 using std::vector;
max@1 62 using std::cerr;
max@1 63 using std::cout;
max@1 64 using std::endl;
max@1 65
max@1 66 // Result Struct
max@1 67 typedef struct Part {
max@1 68 int n;
Chris@21 69 vector<int> indices;
max@1 70 string letter;
Chris@21 71 int value;
max@1 72 int level;
max@1 73 int nInd;
max@1 74 }Part;
max@1 75
max@1 76
max@8 77
max@1 78 /* ------------------------------------ */
max@1 79 /* ----- BEAT DETECTOR CLASS ---------- */
max@1 80 /* ------------------------------------ */
max@1 81
max@1 82 class BeatTrackerData
max@1 83 {
max@1 84 /* --- ATTRIBUTES --- */
max@1 85 public:
max@1 86 DFConfig dfConfig;
max@1 87 DetectionFunction *df;
max@1 88 DownBeat *downBeat;
max@1 89 vector<double> dfOutput;
max@1 90 Vamp::RealTime origin;
max@1 91
max@1 92
max@1 93 /* --- METHODS --- */
max@1 94
max@1 95 /* --- Constructor --- */
max@1 96 public:
max@1 97 BeatTrackerData(float rate, const DFConfig &config) : dfConfig(config) {
Chris@22 98
max@1 99 df = new DetectionFunction(config);
max@1 100 // decimation factor aims at resampling to c. 3KHz; must be power of 2
max@1 101 int factor = MathUtilities::nextPowerOfTwo(rate / 3000);
max@1 102 // std::cerr << "BeatTrackerData: factor = " << factor << std::endl;
max@1 103 downBeat = new DownBeat(rate, factor, config.stepSize);
max@1 104 }
max@1 105
max@1 106 /* --- Desctructor --- */
max@1 107 ~BeatTrackerData() {
Chris@22 108 delete df;
max@1 109 delete downBeat;
max@1 110 }
max@1 111
max@1 112 void reset() {
max@1 113 delete df;
max@1 114 df = new DetectionFunction(dfConfig);
max@1 115 dfOutput.clear();
max@1 116 downBeat->resetAudioBuffer();
max@1 117 origin = Vamp::RealTime::zeroTime;
max@1 118 }
max@1 119 };
max@1 120
max@1 121
max@1 122 /* --------------------------------------- */
max@1 123 /* ----- CHROMA EXTRACTOR CLASS ---------- */
max@1 124 /* --------------------------------------- */
max@1 125
max@1 126 class ChromaData
max@1 127 {
max@1 128
max@1 129 /* --- ATTRIBUTES --- */
max@1 130
max@1 131 public:
max@1 132 int frameCount;
max@1 133 int nBPS;
max@1 134 Vamp::Plugin::FeatureList logSpectrum;
Chris@37 135 int blockSize;
max@1 136 int lengthOfNoteIndex;
max@1 137 vector<float> meanTunings;
max@1 138 vector<float> localTunings;
max@1 139 float whitening;
max@1 140 float preset;
max@1 141 float useNNLS;
max@1 142 vector<float> localTuning;
max@1 143 vector<float> kernelValue;
max@1 144 vector<int> kernelFftIndex;
max@1 145 vector<int> kernelNoteIndex;
max@1 146 float *dict;
max@1 147 bool tuneLocal;
max@1 148 float doNormalizeChroma;
max@1 149 float rollon;
max@1 150 float s;
max@1 151 vector<float> hw;
max@1 152 vector<float> sinvalues;
max@1 153 vector<float> cosvalues;
max@1 154 Window<float> window;
max@1 155 FFTReal fft;
Chris@37 156 int inputSampleRate;
max@1 157
max@1 158 /* --- METHODS --- */
max@1 159
max@1 160 /* --- Constructor --- */
max@1 161
max@1 162 public:
max@1 163 ChromaData(float inputSampleRate, size_t block_size) :
max@1 164 frameCount(0),
max@1 165 nBPS(3),
max@1 166 logSpectrum(0),
max@1 167 blockSize(0),
max@1 168 lengthOfNoteIndex(0),
max@1 169 meanTunings(0),
max@1 170 localTunings(0),
max@1 171 whitening(1.0),
max@1 172 preset(0.0),
max@1 173 useNNLS(1.0),
max@1 174 localTuning(0.0),
max@1 175 kernelValue(0),
max@1 176 kernelFftIndex(0),
max@1 177 kernelNoteIndex(0),
max@1 178 dict(0),
max@1 179 tuneLocal(0.0),
max@1 180 doNormalizeChroma(0),
max@1 181 rollon(0.0),
Chris@35 182 s(0.7),
Chris@35 183 sinvalues(0),
Chris@35 184 cosvalues(0),
Chris@35 185 window(HanningWindow, block_size),
Chris@35 186 fft(block_size),
Chris@35 187 inputSampleRate(inputSampleRate)
max@1 188 {
max@1 189 // make the *note* dictionary matrix
max@1 190 dict = new float[nNote * 84];
max@1 191 for (int i = 0; i < nNote * 84; ++i) dict[i] = 0.0;
max@1 192 blockSize = block_size;
max@1 193 }
max@1 194
max@1 195
max@1 196 /* --- Desctructor --- */
max@1 197
max@1 198 ~ChromaData() {
max@1 199 delete [] dict;
max@1 200 }
max@1 201
max@1 202 /* --- Public Methods --- */
max@1 203
max@1 204 void reset() {
max@1 205 frameCount = 0;
max@1 206 logSpectrum.clear();
max@1 207 for (int iBPS = 0; iBPS < 3; ++iBPS) {
max@1 208 meanTunings[iBPS] = 0;
max@1 209 localTunings[iBPS] = 0;
max@1 210 }
max@1 211 localTuning.clear();
max@1 212 }
max@1 213
max@1 214 void baseProcess(float *inputBuffers, Vamp::RealTime timestamp)
max@1 215 {
Chris@22 216
max@1 217 frameCount++;
max@1 218 float *magnitude = new float[blockSize/2];
max@1 219 double *fftReal = new double[blockSize];
max@1 220 double *fftImag = new double[blockSize];
max@1 221
max@1 222 // FFTReal wants doubles, so we need to make a local copy of inputBuffers
max@1 223 double *inputBuffersDouble = new double[blockSize];
Chris@37 224 for (int i = 0; i < blockSize; i++) inputBuffersDouble[i] = inputBuffers[i];
max@1 225
Chris@65 226 fft.forward(inputBuffersDouble, fftReal, fftImag);
max@1 227
max@1 228 float energysum = 0;
max@1 229 // make magnitude
max@1 230 float maxmag = -10000;
max@1 231 for (int iBin = 0; iBin < static_cast<int>(blockSize/2); iBin++) {
max@1 232 magnitude[iBin] = sqrt(fftReal[iBin] * fftReal[iBin] +
max@1 233 fftImag[iBin] * fftImag[iBin]);
max@1 234 if (magnitude[iBin]>blockSize*1.0) magnitude[iBin] = blockSize;
max@1 235 // a valid audio signal (between -1 and 1) should not be limited here.
max@1 236 if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin];
max@1 237 if (rollon > 0) {
max@1 238 energysum += pow(magnitude[iBin],2);
max@1 239 }
max@1 240 }
max@1 241
max@1 242 float cumenergy = 0;
max@1 243 if (rollon > 0) {
max@1 244 for (int iBin = 2; iBin < static_cast<int>(blockSize/2); iBin++) {
max@1 245 cumenergy += pow(magnitude[iBin],2);
max@1 246 if (cumenergy < energysum * rollon / 100) magnitude[iBin-2] = 0;
max@1 247 else break;
max@1 248 }
max@1 249 }
max@1 250
max@1 251 if (maxmag < 2) {
max@1 252 // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl;
max@1 253 for (int iBin = 0; iBin < static_cast<int>(blockSize/2); iBin++) {
max@1 254 magnitude[iBin] = 0;
max@1 255 }
max@1 256 }
max@1 257
max@1 258 // cerr << magnitude[200] << endl;
max@1 259
max@1 260 // note magnitude mapping using pre-calculated matrix
max@1 261 float *nm = new float[nNote]; // note magnitude
max@1 262 for (int iNote = 0; iNote < nNote; iNote++) {
max@1 263 nm[iNote] = 0; // initialise as 0
max@1 264 }
max@1 265 int binCount = 0;
max@1 266 for (vector<float>::iterator it = kernelValue.begin(); it != kernelValue.end(); ++it) {
max@1 267 nm[kernelNoteIndex[binCount]] += magnitude[kernelFftIndex[binCount]] * kernelValue[binCount];
max@1 268 binCount++;
max@1 269 }
max@1 270
max@1 271 float one_over_N = 1.0/frameCount;
max@1 272 // update means of complex tuning variables
max@1 273 for (int iBPS = 0; iBPS < nBPS; ++iBPS) meanTunings[iBPS] *= float(frameCount-1)*one_over_N;
max@1 274
max@1 275 for (int iTone = 0; iTone < round(nNote*0.62/nBPS)*nBPS+1; iTone = iTone + nBPS) {
max@1 276 for (int iBPS = 0; iBPS < nBPS; ++iBPS) meanTunings[iBPS] += nm[iTone + iBPS]*one_over_N;
max@1 277 float ratioOld = 0.997;
max@1 278 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
max@1 279 localTunings[iBPS] *= ratioOld;
max@1 280 localTunings[iBPS] += nm[iTone + iBPS] * (1 - ratioOld);
max@1 281 }
max@1 282 }
max@1 283
max@1 284 float localTuningImag = 0;
max@1 285 float localTuningReal = 0;
max@1 286 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
max@1 287 localTuningReal += localTunings[iBPS] * cosvalues[iBPS];
max@1 288 localTuningImag += localTunings[iBPS] * sinvalues[iBPS];
max@1 289 }
max@1 290
max@1 291 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
max@1 292 localTuning.push_back(normalisedtuning);
max@1 293
max@1 294 Vamp::Plugin::Feature f1; // logfreqspec
max@1 295 f1.hasTimestamp = true;
max@1 296 f1.timestamp = timestamp;
max@1 297 for (int iNote = 0; iNote < nNote; iNote++) {
max@1 298 f1.values.push_back(nm[iNote]);
max@1 299 }
max@1 300
max@1 301 // deletes
max@1 302 delete[] inputBuffersDouble;
max@1 303 delete[] magnitude;
max@1 304 delete[] fftReal;
max@1 305 delete[] fftImag;
max@1 306 delete[] nm;
max@1 307
max@1 308 logSpectrum.push_back(f1); // remember note magnitude
max@1 309 }
max@1 310
max@1 311 bool initialise()
max@1 312 {
max@1 313 dictionaryMatrix(dict, s);
Chris@22 314
Chris@37 315 // make things for tuning estimation
Chris@37 316 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
max@1 317 sinvalues.push_back(sin(2*M_PI*(iBPS*1.0/nBPS)));
max@1 318 cosvalues.push_back(cos(2*M_PI*(iBPS*1.0/nBPS)));
max@1 319 }
max@1 320
Chris@22 321
Chris@37 322 // make hamming window of length 1/2 octave
Chris@37 323 int hamwinlength = nBPS * 6 + 1;
max@1 324 float hamwinsum = 0;
max@1 325 for (int i = 0; i < hamwinlength; ++i) {
max@1 326 hw.push_back(0.54 - 0.46 * cos((2*M_PI*i)/(hamwinlength-1)));
max@1 327 hamwinsum += 0.54 - 0.46 * cos((2*M_PI*i)/(hamwinlength-1));
max@1 328 }
max@1 329 for (int i = 0; i < hamwinlength; ++i) hw[i] = hw[i] / hamwinsum;
max@1 330
max@1 331
max@1 332 // initialise the tuning
max@1 333 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
max@1 334 meanTunings.push_back(0);
max@1 335 localTunings.push_back(0);
max@1 336 }
Chris@22 337
max@1 338 blockSize = blockSize;
max@1 339 frameCount = 0;
max@1 340 int tempn = nNote * blockSize/2;
max@1 341 // cerr << "length of tempkernel : " << tempn << endl;
max@1 342 float *tempkernel;
max@1 343
max@1 344 tempkernel = new float[tempn];
max@1 345
max@1 346 logFreqMatrix(inputSampleRate, blockSize, tempkernel);
max@1 347 kernelValue.clear();
max@1 348 kernelFftIndex.clear();
max@1 349 kernelNoteIndex.clear();
max@1 350 int countNonzero = 0;
max@1 351 for (int iNote = 0; iNote < nNote; ++iNote) {
max@1 352 // I don't know if this is wise: manually making a sparse matrix
max@1 353 for (int iFFT = 0; iFFT < static_cast<int>(blockSize/2); ++iFFT) {
max@1 354 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
max@1 355 kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
max@1 356 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
max@1 357 countNonzero++;
max@1 358 }
max@1 359 kernelFftIndex.push_back(iFFT);
Chris@23 360 kernelNoteIndex.push_back(iNote);
max@1 361 }
max@1 362 }
max@1 363 }
max@1 364 delete [] tempkernel;
Chris@37 365
Chris@37 366 return true;
max@1 367 }
max@1 368 };
max@1 369
max@1 370
max@1 371 /* --------------------------------- */
max@1 372 /* ----- SONG PARTITIONER ---------- */
max@1 373 /* --------------------------------- */
max@1 374
max@1 375
max@1 376 /* --- ATTRIBUTES --- */
max@1 377
Chris@48 378 float Segmentino::m_stepSecs = 0.01161; // 512 samples at 44100
Chris@48 379 int Segmentino::m_chromaFramesizeFactor = 16; // 16 times as long as beat tracker's
Chris@48 380 int Segmentino::m_chromaStepsizeFactor = 4; // 4 times as long as beat tracker's
max@1 381
max@1 382
max@1 383 /* --- METHODS --- */
max@1 384
max@1 385 /* --- Constructor --- */
Chris@48 386 Segmentino::Segmentino(float inputSampleRate) :
max@1 387 Vamp::Plugin(inputSampleRate),
max@1 388 m_d(0),
Chris@35 389 m_chromadata(0),
max@1 390 m_bpb(4),
max@1 391 m_pluginFrameCount(0)
max@1 392 {
max@1 393 }
max@1 394
max@1 395
max@1 396 /* --- Desctructor --- */
Chris@48 397 Segmentino::~Segmentino()
max@1 398 {
max@1 399 delete m_d;
Chris@35 400 delete m_chromadata;
max@1 401 }
max@1 402
max@1 403
max@1 404 /* --- Methods --- */
Chris@48 405 string Segmentino::getIdentifier() const
max@1 406 {
Chris@54 407 return "segmentino";
max@1 408 }
max@1 409
Chris@48 410 string Segmentino::getName() const
max@1 411 {
Chris@54 412 return "Segmentino";
max@1 413 }
max@1 414
Chris@48 415 string Segmentino::getDescription() const
max@1 416 {
max@1 417 return "Estimate contiguous segments pertaining to song parts such as verse and chorus.";
max@1 418 }
max@1 419
Chris@48 420 string Segmentino::getMaker() const
max@1 421 {
max@1 422 return "Queen Mary, University of London";
max@1 423 }
max@1 424
Chris@48 425 int Segmentino::getPluginVersion() const
max@1 426 {
max@1 427 return 2;
max@1 428 }
max@1 429
Chris@48 430 string Segmentino::getCopyright() const
max@1 431 {
Chris@65 432 return "Plugin by Matthew Davies, Christian Landone, Chris Cannam, Matthias Mauch and Massimiliano Zanoni Copyright (c) 2006-2013 QMUL - Affero GPL";
max@1 433 }
max@1 434
Chris@48 435 Segmentino::ParameterList Segmentino::getParameterDescriptors() const
max@1 436 {
max@1 437 ParameterList list;
max@1 438
max@1 439 ParameterDescriptor desc;
max@1 440
matthiasm@46 441 // desc.identifier = "bpb";
matthiasm@46 442 // desc.name = "Beats per Bar";
matthiasm@46 443 // desc.description = "The number of beats in each bar";
matthiasm@46 444 // desc.minValue = 2;
matthiasm@46 445 // desc.maxValue = 16;
matthiasm@46 446 // desc.defaultValue = 4;
matthiasm@46 447 // desc.isQuantized = true;
matthiasm@46 448 // desc.quantizeStep = 1;
matthiasm@46 449 // list.push_back(desc);
max@1 450
max@1 451 return list;
max@1 452 }
max@1 453
Chris@48 454 float Segmentino::getParameter(std::string name) const
max@1 455 {
max@1 456 if (name == "bpb") return m_bpb;
max@1 457 return 0.0;
max@1 458 }
max@1 459
Chris@48 460 void Segmentino::setParameter(std::string name, float value)
max@1 461 {
max@1 462 if (name == "bpb") m_bpb = lrintf(value);
max@1 463 }
max@1 464
max@1 465
max@1 466 // Return the StepSize for Chroma Extractor
Chris@48 467 size_t Segmentino::getPreferredStepSize() const
max@1 468 {
max@1 469 size_t step = size_t(m_inputSampleRate * m_stepSecs + 0.0001);
max@1 470 if (step < 1) step = 1;
max@1 471
max@1 472 return step;
max@1 473 }
max@1 474
max@1 475 // Return the BlockSize for Chroma Extractor
Chris@48 476 size_t Segmentino::getPreferredBlockSize() const
max@1 477 {
Chris@50 478 int theoretical = getPreferredStepSize() * 2;
max@1 479 theoretical *= m_chromaFramesizeFactor;
Chris@50 480 return MathUtilities::nextPowerOfTwo(theoretical);
max@1 481 }
max@1 482
max@1 483
max@1 484 // Initialize the plugin and define Beat Tracker and Chroma Extractor Objects
Chris@48 485 bool Segmentino::initialise(size_t channels, size_t stepSize, size_t blockSize)
max@1 486 {
max@1 487 if (m_d) {
Chris@22 488 delete m_d;
Chris@22 489 m_d = 0;
max@1 490 }
Chris@35 491 if (m_chromadata) {
Chris@35 492 delete m_chromadata;
Chris@35 493 m_chromadata = 0;
Chris@35 494 }
max@1 495
max@1 496 if (channels < getMinChannelCount() ||
Chris@22 497 channels > getMaxChannelCount()) {
Chris@48 498 std::cerr << "Segmentino::initialise: Unsupported channel count: "
max@1 499 << channels << std::endl;
max@1 500 return false;
max@1 501 }
max@1 502
max@1 503 if (stepSize != getPreferredStepSize()) {
Chris@48 504 std::cerr << "ERROR: Segmentino::initialise: Unsupported step size for this sample rate: "
max@1 505 << stepSize << " (wanted " << (getPreferredStepSize()) << ")" << std::endl;
max@1 506 return false;
max@1 507 }
max@1 508
max@1 509 if (blockSize != getPreferredBlockSize()) {
Chris@48 510 std::cerr << "WARNING: Segmentino::initialise: Sub-optimal block size for this sample rate: "
max@1 511 << blockSize << " (wanted " << getPreferredBlockSize() << ")" << std::endl;
max@1 512 }
max@1 513
max@1 514 // Beat tracker and Chroma extractor has two different configuration parameters
max@1 515
max@1 516 // Configuration Parameters for Beat Tracker
max@1 517 DFConfig dfConfig;
max@1 518 dfConfig.DFType = DF_COMPLEXSD;
max@1 519 dfConfig.stepSize = stepSize;
max@1 520 dfConfig.frameLength = blockSize / m_chromaFramesizeFactor;
max@1 521 dfConfig.dbRise = 3;
max@1 522 dfConfig.adaptiveWhitening = false;
max@1 523 dfConfig.whiteningRelaxCoeff = -1;
max@1 524 dfConfig.whiteningFloor = -1;
max@1 525
max@1 526 // Initialise Beat Tracker
max@1 527 m_d = new BeatTrackerData(m_inputSampleRate, dfConfig);
max@1 528 m_d->downBeat->setBeatsPerBar(m_bpb);
max@1 529
max@1 530 // Initialise Chroma Extractor
max@1 531 m_chromadata = new ChromaData(m_inputSampleRate, blockSize);
max@1 532 m_chromadata->initialise();
max@1 533
matthiasm@59 534 // definition of outputs numbers used internally
matthiasm@59 535 int outputCounter = 1;
matthiasm@59 536 m_beatOutputNumber = outputCounter++;
matthiasm@59 537 m_barsOutputNumber = outputCounter++;
matthiasm@59 538 m_beatcountsOutputNumber = outputCounter++;
matthiasm@59 539 m_beatsdOutputNumber = outputCounter++;
matthiasm@59 540 m_logscalespecOutputNumber = outputCounter++;
matthiasm@59 541 m_bothchromaOutputNumber = outputCounter++;
matthiasm@59 542 m_qchromafwOutputNumber = outputCounter++;
matthiasm@59 543 m_qchromaOutputNumber = outputCounter++;
matthiasm@59 544
max@1 545 return true;
max@1 546 }
max@1 547
Chris@48 548 void Segmentino::reset()
max@1 549 {
max@1 550 if (m_d) m_d->reset();
Chris@38 551 if (m_chromadata) m_chromadata->reset();
max@1 552 m_pluginFrameCount = 0;
max@1 553 }
max@1 554
Chris@48 555 Segmentino::OutputList Segmentino::getOutputDescriptors() const
max@1 556 {
matthiasm@59 557
max@1 558 OutputList list;
max@1 559
max@1 560 OutputDescriptor segm;
Chris@15 561 segm.identifier = "segmentation";
max@1 562 segm.name = "Segmentation";
max@1 563 segm.description = "Segmentation";
max@1 564 segm.unit = "segment-type";
max@1 565 segm.hasFixedBinCount = true;
max@1 566 //segm.binCount = 24;
max@1 567 segm.binCount = 1;
max@1 568 segm.hasKnownExtents = true;
max@1 569 segm.minValue = 1;
max@1 570 segm.maxValue = 5;
max@1 571 segm.isQuantized = true;
max@1 572 segm.quantizeStep = 1;
max@1 573 segm.sampleType = OutputDescriptor::VariableSampleRate;
Chris@17 574 segm.sampleRate = 1.0 / m_stepSecs;
max@1 575 segm.hasDuration = true;
matthiasm@59 576 m_segmOutputNumber = 0;
matthiasm@59 577
max@1 578 list.push_back(segm);
max@1 579
max@1 580 return list;
max@1 581 }
max@1 582
max@1 583 // Executed for each frame - called from the host
max@1 584
max@1 585 // We use time domain input, because DownBeat requires it -- so we
max@1 586 // use the time-domain version of DetectionFunction::process which
max@1 587 // does its own FFT. It requires doubles as input, so we need to
max@1 588 // make a temporary copy
max@1 589
max@1 590 // We only support a single input channel
Chris@65 591 Segmentino::FeatureSet Segmentino::process(const float *const *inputBuffers,
Chris@65 592 Vamp::RealTime timestamp)
max@1 593 {
max@1 594 if (!m_d) {
Chris@48 595 cerr << "ERROR: Segmentino::process: "
Chris@48 596 << "Segmentino has not been initialised"
Chris@22 597 << endl;
Chris@22 598 return FeatureSet();
max@1 599 }
max@1 600
max@1 601 const int fl = m_d->dfConfig.frameLength;
Chris@67 602
max@1 603 int sampleOffset = ((m_chromaFramesizeFactor-1) * fl) / 2;
max@1 604
Chris@67 605 double *dfinput = new double[fl];
Chris@67 606
max@1 607 // Since chroma needs a much longer frame size, we only ever use the very
max@1 608 // beginning of the frame for beat tracking.
max@1 609 for (int i = 0; i < fl; ++i) dfinput[i] = inputBuffers[0][i];
Chris@65 610 double output = m_d->df->processTimeDomain(dfinput);
max@1 611
Chris@67 612 delete[] dfinput;
Chris@67 613
max@1 614 if (m_d->dfOutput.empty()) m_d->origin = timestamp;
max@1 615
max@1 616 // std::cerr << "df[" << m_d->dfOutput.size() << "] is " << output << std::endl;
max@1 617 m_d->dfOutput.push_back(output);
max@1 618
max@1 619 // Downsample and store the incoming audio block.
max@1 620 // We have an overlap on the incoming audio stream (step size is
max@1 621 // half block size) -- this function is configured to take only a
max@1 622 // step size's worth, so effectively ignoring the overlap. Note
max@1 623 // however that this means we omit the last blocksize - stepsize
max@1 624 // samples completely for the purposes of barline detection
max@1 625 // (hopefully not a problem)
max@1 626 m_d->downBeat->pushAudioBlock(inputBuffers[0]);
max@1 627
max@1 628 // The following is not done every time, but only every m_chromaFramesizeFactor times,
max@1 629 // because the chroma does not need dense time frames.
max@1 630
max@1 631 if (m_pluginFrameCount % m_chromaStepsizeFactor == 0)
max@1 632 {
max@1 633
max@1 634 // Window the full time domain, data, FFT it and process chroma stuff.
max@1 635
Chris@67 636 float *windowedBuffers = new float[m_chromadata->blockSize];
Chris@67 637
max@1 638 m_chromadata->window.cut(&inputBuffers[0][0], &windowedBuffers[0]);
max@1 639
max@1 640 // adjust timestamp (we want the middle of the frame)
Chris@67 641 timestamp = timestamp +
Chris@67 642 Vamp::RealTime::frame2RealTime(sampleOffset, lrintf(m_inputSampleRate));
max@1 643
max@1 644 m_chromadata->baseProcess(&windowedBuffers[0], timestamp);
Chris@67 645
Chris@67 646 delete[] windowedBuffers;
max@1 647 }
Chris@67 648
max@1 649 m_pluginFrameCount++;
max@1 650
max@1 651 FeatureSet fs;
max@1 652 return fs;
max@1 653 }
max@1 654
Chris@48 655 Segmentino::FeatureSet Segmentino::getRemainingFeatures()
max@1 656 {
max@1 657 if (!m_d) {
Chris@48 658 cerr << "ERROR: Segmentino::getRemainingFeatures: "
Chris@48 659 << "Segmentino has not been initialised"
Chris@22 660 << endl;
Chris@22 661 return FeatureSet();
max@1 662 }
max@1 663
matthiasm@59 664 FeatureSet masterFeatureset;
matthiasm@59 665 FeatureSet internalFeatureset = beatTrack();
matthiasm@59 666
matthiasm@59 667 int beatcount = internalFeatureset[m_beatOutputNumber].size();
Chris@49 668 if (beatcount == 0) return Segmentino::FeatureSet();
matthiasm@59 669 Vamp::RealTime last_beattime = internalFeatureset[m_beatOutputNumber][beatcount-1].timestamp;
matthiasm@59 670
matthiasm@60 671 // // THIS FOLLOWING BIT IS WEIRD! REPLACES BEAT-TRACKED BEATS WITH
matthiasm@60 672 // // UNIFORM 0.5 SEC BEATS
matthiasm@59 673 // internalFeatureset[m_beatOutputNumber].clear();
matthiasm@59 674 // Vamp::RealTime beattime = Vamp::RealTime::fromSeconds(1.0);
matthiasm@59 675 // while (beattime < last_beattime)
matthiasm@59 676 // {
matthiasm@59 677 // Feature beatfeature;
matthiasm@59 678 // beatfeature.hasTimestamp = true;
matthiasm@59 679 // beatfeature.timestamp = beattime;
matthiasm@59 680 // masterFeatureset[m_beatOutputNumber].push_back(beatfeature);
matthiasm@59 681 // beattime = beattime + Vamp::RealTime::fromSeconds(0.5);
matthiasm@59 682 // }
matthiasm@46 683
Chris@16 684 FeatureList chromaList = chromaFeatures();
max@1 685
Chris@37 686 for (int i = 0; i < (int)chromaList.size(); ++i)
max@1 687 {
matthiasm@59 688 internalFeatureset[m_bothchromaOutputNumber].push_back(chromaList[i]);
max@1 689 }
max@1 690
max@1 691 // quantised and pseudo-quantised (beat-wise) chroma
matthiasm@59 692 std::vector<FeatureList> quantisedChroma = beatQuantiser(chromaList, internalFeatureset[m_beatOutputNumber]);
Chris@32 693
Chris@32 694 if (quantisedChroma.empty()) return masterFeatureset;
max@1 695
matthiasm@59 696 internalFeatureset[m_qchromafwOutputNumber] = quantisedChroma[0];
matthiasm@59 697 internalFeatureset[m_qchromaOutputNumber] = quantisedChroma[1];
max@1 698
max@1 699 // Segmentation
Chris@39 700 try {
Chris@39 701 masterFeatureset[m_segmOutputNumber] = runSegmenter(quantisedChroma[1]);
Chris@39 702 } catch (std::bad_alloc &a) {
Chris@48 703 cerr << "ERROR: Segmentino::getRemainingFeatures: Failed to run segmenter, not enough memory (song too long?)" << endl;
Chris@39 704 }
max@1 705
max@1 706 return(masterFeatureset);
max@1 707 }
max@1 708
max@1 709 /* ------ Beat Tracker ------ */
max@1 710
Chris@48 711 Segmentino::FeatureSet Segmentino::beatTrack()
max@1 712 {
max@1 713 vector<double> df;
max@1 714 vector<double> beatPeriod;
max@1 715 vector<double> tempi;
max@1 716
Chris@37 717 for (int i = 2; i < (int)m_d->dfOutput.size(); ++i) { // discard first two elts
max@1 718 df.push_back(m_d->dfOutput[i]);
max@1 719 beatPeriod.push_back(0.0);
max@1 720 }
max@1 721 if (df.empty()) return FeatureSet();
max@1 722
max@1 723 TempoTrackV2 tt(m_inputSampleRate, m_d->dfConfig.stepSize);
max@1 724 tt.calculateBeatPeriod(df, beatPeriod, tempi);
max@1 725
max@1 726 vector<double> beats;
max@1 727 tt.calculateBeats(df, beatPeriod, beats);
max@1 728
max@1 729 vector<int> downbeats;
max@1 730 size_t downLength = 0;
max@1 731 const float *downsampled = m_d->downBeat->getBufferedAudio(downLength);
max@1 732 m_d->downBeat->findDownBeats(downsampled, downLength, beats, downbeats);
max@1 733
max@1 734 vector<double> beatsd;
max@1 735 m_d->downBeat->getBeatSD(beatsd);
max@1 736
max@1 737 /*std::cout << "BeatTracker: found downbeats at: ";
max@1 738 for (int i = 0; i < downbeats.size(); ++i) std::cout << downbeats[i] << " " << std::endl;*/
max@1 739
max@1 740 FeatureSet returnFeatures;
max@1 741
max@1 742 char label[20];
max@1 743
max@1 744 int dbi = 0;
max@1 745 int beat = 0;
max@1 746 int bar = 0;
max@1 747
max@1 748 if (!downbeats.empty()) {
max@1 749 // get the right number for the first beat; this will be
max@1 750 // incremented before use (at top of the following loop)
max@1 751 int firstDown = downbeats[0];
max@1 752 beat = m_bpb - firstDown - 1;
max@1 753 if (beat == m_bpb) beat = 0;
max@1 754 }
max@1 755
Chris@37 756 for (int i = 0; i < (int)beats.size(); ++i) {
max@1 757
Chris@37 758 int frame = beats[i] * m_d->dfConfig.stepSize;
max@1 759
Chris@37 760 if (dbi < (int)downbeats.size() && i == downbeats[dbi]) {
max@1 761 beat = 0;
max@1 762 ++bar;
max@1 763 ++dbi;
max@1 764 } else {
max@1 765 ++beat;
max@1 766 }
max@1 767
max@1 768 /* Ooutput Section */
max@1 769
max@1 770 // outputs are:
max@1 771 //
max@1 772 // 0 -> beats
max@1 773 // 1 -> bars
max@1 774 // 2 -> beat counter function
max@1 775
max@1 776 Feature feature;
max@1 777 feature.hasTimestamp = true;
max@1 778 feature.timestamp = m_d->origin + Vamp::RealTime::frame2RealTime (frame, lrintf(m_inputSampleRate));
max@1 779
max@1 780 sprintf(label, "%d", beat + 1);
max@1 781 feature.label = label;
max@1 782 returnFeatures[m_beatOutputNumber].push_back(feature); // labelled beats
max@1 783
max@1 784 feature.values.push_back(beat + 1);
max@1 785 returnFeatures[m_beatcountsOutputNumber].push_back(feature); // beat function
max@1 786
Chris@37 787 if (i > 0 && i <= (int)beatsd.size()) {
max@1 788 feature.values.clear();
max@1 789 feature.values.push_back(beatsd[i-1]);
max@1 790 feature.label = "";
max@1 791 returnFeatures[m_beatsdOutputNumber].push_back(feature); // beat spectral difference
max@1 792 }
max@1 793
max@1 794 if (beat == 0) {
max@1 795 feature.values.clear();
max@1 796 sprintf(label, "%d", bar);
max@1 797 feature.label = label;
max@1 798 returnFeatures[m_barsOutputNumber].push_back(feature); // bars
max@1 799 }
max@1 800 }
max@1 801
max@1 802 return returnFeatures;
max@1 803 }
max@1 804
max@1 805
max@1 806 /* ------ Chroma Extractor ------ */
max@1 807
Chris@48 808 Segmentino::FeatureList Segmentino::chromaFeatures()
max@1 809 {
max@1 810
max@1 811 FeatureList returnFeatureList;
max@1 812 FeatureList tunedlogfreqspec;
max@1 813
max@1 814 if (m_chromadata->logSpectrum.size() == 0) return returnFeatureList;
max@1 815
max@1 816 /** Calculate Tuning
max@1 817 calculate tuning from (using the angle of the complex number defined by the
max@1 818 cumulative mean real and imag values)
max@1 819 **/
max@1 820 float meanTuningImag = 0;
max@1 821 float meanTuningReal = 0;
max@1 822 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
max@1 823 meanTuningReal += m_chromadata->meanTunings[iBPS] * m_chromadata->cosvalues[iBPS];
max@1 824 meanTuningImag += m_chromadata->meanTunings[iBPS] * m_chromadata->sinvalues[iBPS];
max@1 825 }
max@1 826 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
max@1 827 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
max@1 828 int intShift = floor(normalisedtuning * 3);
max@1 829 float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this
max@1 830
max@1 831 char buffer0 [50];
max@1 832
max@1 833 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
max@1 834
max@1 835 /** Tune Log-Frequency Spectrogram
max@1 836 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
max@1 837 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
max@1 838 **/
Chris@50 839 // cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... ";
max@1 840
max@1 841 float tempValue = 0;
max@1 842
max@1 843 int count = 0;
max@1 844
max@1 845 for (FeatureList::iterator i = m_chromadata->logSpectrum.begin(); i != m_chromadata->logSpectrum.end(); ++i)
max@1 846 {
max@1 847
max@1 848 Feature f1 = *i;
max@1 849 Feature f2; // tuned log-frequency spectrum
max@1 850
max@1 851 f2.hasTimestamp = true;
max@1 852 f2.timestamp = f1.timestamp;
max@1 853
max@1 854 f2.values.push_back(0.0);
max@1 855 f2.values.push_back(0.0); // set lower edge to zero
max@1 856
max@1 857 if (m_chromadata->tuneLocal) {
max@1 858 intShift = floor(m_chromadata->localTuning[count] * 3);
max@1 859 floatShift = m_chromadata->localTuning[count] * 3 - intShift;
max@1 860 // floatShift is a really bad name for this
max@1 861 }
max@1 862
max@1 863 for (int k = 2; k < (int)f1.values.size() - 3; ++k)
max@1 864 { // interpolate all inner bins
max@1 865 tempValue = f1.values[k + intShift] * (1-floatShift) + f1.values[k+intShift+1] * floatShift;
max@1 866 f2.values.push_back(tempValue);
max@1 867 }
max@1 868
max@1 869 f2.values.push_back(0.0);
max@1 870 f2.values.push_back(0.0);
max@1 871 f2.values.push_back(0.0); // upper edge
max@1 872
max@1 873 vector<float> runningmean = SpecialConvolution(f2.values,m_chromadata->hw);
max@1 874 vector<float> runningstd;
max@1 875 for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance)
max@1 876 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
max@1 877 }
max@1 878 runningstd = SpecialConvolution(runningstd,m_chromadata->hw); // second step convolve
max@1 879 for (int i = 0; i < nNote; i++)
max@1 880 {
max@1 881
max@1 882 runningstd[i] = sqrt(runningstd[i]);
max@1 883 // square root to finally have running std
max@1 884
max@1 885 if (runningstd[i] > 0)
max@1 886 {
max@1 887 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
max@1 888 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_chromadata->whitening) : 0;
max@1 889 }
max@1 890
max@1 891 if (f2.values[i] < 0) {
max@1 892
max@1 893 cerr << "ERROR: negative value in logfreq spectrum" << endl;
max@1 894
max@1 895 }
max@1 896 }
max@1 897 tunedlogfreqspec.push_back(f2);
max@1 898 count++;
max@1 899 }
Chris@50 900 // cerr << "done." << endl;
max@1 901 /** Semitone spectrum and chromagrams
max@1 902 Semitone-spaced log-frequency spectrum derived
max@1 903 from the tuned log-freq spectrum above. the spectrum
max@1 904 is inferred using a non-negative least squares algorithm.
max@1 905 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
max@1 906 bass and treble stacked onto each other).
max@1 907 **/
Chris@50 908 /*
max@1 909 if (m_chromadata->useNNLS == 0) {
max@1 910 cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... ";
max@1 911 } else {
max@1 912 cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... ";
max@1 913 }
Chris@50 914 */
max@1 915 vector<float> oldchroma = vector<float>(12,0);
max@1 916 vector<float> oldbasschroma = vector<float>(12,0);
max@1 917 count = 0;
max@1 918
max@1 919 for (FeatureList::iterator it = tunedlogfreqspec.begin(); it != tunedlogfreqspec.end(); ++it) {
max@1 920 Feature logfreqsp = *it; // logfreq spectrum
max@1 921 Feature bothchroma; // treble and bass chromagram
max@1 922
max@1 923 bothchroma.hasTimestamp = true;
max@1 924 bothchroma.timestamp = logfreqsp.timestamp;
max@1 925
max@1 926 float b[nNote];
max@1 927
max@1 928 bool some_b_greater_zero = false;
max@1 929 float sumb = 0;
max@1 930 for (int i = 0; i < nNote; i++) {
max@1 931 b[i] = logfreqsp.values[i];
max@1 932 sumb += b[i];
max@1 933 if (b[i] > 0) {
max@1 934 some_b_greater_zero = true;
max@1 935 }
max@1 936 }
max@1 937
max@1 938 // here's where the non-negative least squares algorithm calculates the note activation x
max@1 939
max@1 940 vector<float> chroma = vector<float>(12, 0);
max@1 941 vector<float> basschroma = vector<float>(12, 0);
max@1 942 float currval;
max@1 943 int iSemitone = 0;
max@1 944
max@1 945 if (some_b_greater_zero) {
max@1 946 if (m_chromadata->useNNLS == 0) {
max@1 947 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
max@1 948 currval = 0;
max@1 949 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
max@1 950 currval += b[iNote + iBPS] * (1-abs(iBPS*1.0/(nBPS/2+1)));
max@1 951 }
max@1 952 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
max@1 953 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
max@1 954 iSemitone++;
max@1 955 }
max@1 956
max@1 957 } else {
max@1 958 float x[84+1000];
max@1 959 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
max@1 960 vector<int> signifIndex;
max@1 961 int index=0;
max@1 962 sumb /= 84.0;
max@1 963 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
max@1 964 float currval = 0;
max@1 965 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
max@1 966 currval += b[iNote + iBPS];
max@1 967 }
max@1 968 if (currval > 0) signifIndex.push_back(index);
max@1 969 index++;
max@1 970 }
max@1 971 float rnorm;
max@1 972 float w[84+1000];
max@1 973 float zz[84+1000];
max@1 974 int indx[84+1000];
max@1 975 int mode;
max@1 976 int dictsize = nNote*signifIndex.size();
max@1 977
max@1 978 float *curr_dict = new float[dictsize];
max@1 979 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
max@1 980 for (int iBin = 0; iBin < nNote; iBin++) {
max@1 981 curr_dict[iNote * nNote + iBin] =
max@1 982 1.0 * m_chromadata->dict[signifIndex[iNote] * nNote + iBin];
max@1 983 }
max@1 984 }
max@1 985 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
max@1 986 delete [] curr_dict;
max@1 987 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
max@1 988 // cerr << mode << endl;
max@1 989 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
max@1 990 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
max@1 991 }
max@1 992 }
max@1 993 }
max@1 994
max@1 995 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end());
max@1 996 // just stack the both chromas
max@1 997
max@1 998 bothchroma.values = chroma;
max@1 999 returnFeatureList.push_back(bothchroma);
max@1 1000 count++;
max@1 1001 }
Chris@50 1002 // cerr << "done." << endl;
max@1 1003
max@1 1004 return returnFeatureList;
max@1 1005 }
max@1 1006
max@1 1007 /* ------ Beat Quantizer ------ */
max@1 1008
max@4 1009 std::vector<Vamp::Plugin::FeatureList>
Chris@48 1010 Segmentino::beatQuantiser(Vamp::Plugin::FeatureList chromagram, Vamp::Plugin::FeatureList beats)
max@1 1011 {
max@1 1012 std::vector<FeatureList> returnVector;
max@1 1013
max@1 1014 FeatureList fwQchromagram; // frame-wise beat-quantised chroma
max@1 1015 FeatureList bwQchromagram; // beat-wise beat-quantised chroma
matthiasm@43 1016
matthiasm@43 1017
matthiasm@43 1018 size_t nChromaFrame = chromagram.size();
matthiasm@43 1019 size_t nBeat = beats.size();
max@1 1020
max@1 1021 if (nBeat == 0 && nChromaFrame == 0) return returnVector;
max@1 1022
Chris@37 1023 int nBin = chromagram[0].values.size();
max@1 1024
max@1 1025 vector<float> tempChroma = vector<float>(nBin);
max@1 1026
max@1 1027 Vamp::RealTime beatTimestamp = Vamp::RealTime::zeroTime;
max@1 1028 int currBeatCount = -1; // start before first beat
max@1 1029 int framesInBeat = 0;
max@1 1030
matthiasm@43 1031 for (size_t iChroma = 0; iChroma < nChromaFrame; ++iChroma)
max@1 1032 {
max@4 1033 Vamp::RealTime frameTimestamp = chromagram[iChroma].timestamp;
Chris@24 1034 Vamp::RealTime newBeatTimestamp;
Chris@22 1035
Chris@37 1036 if (currBeatCount != (int)beats.size() - 1) {
Chris@37 1037 newBeatTimestamp = beats[currBeatCount+1].timestamp;
Chris@37 1038 } else {
Chris@37 1039 newBeatTimestamp = chromagram[nChromaFrame-1].timestamp;
Chris@37 1040 }
Chris@22 1041
Chris@24 1042 if (frameTimestamp > newBeatTimestamp ||
max@1 1043 iChroma == nChromaFrame-1)
max@1 1044 {
max@1 1045 // new beat (or last chroma frame)
max@1 1046 // 1. finish all the old beat processing
Chris@23 1047 if (framesInBeat > 0)
Chris@23 1048 {
Chris@23 1049 for (int i = 0; i < nBin; ++i) tempChroma[i] /= framesInBeat; // average
Chris@23 1050 }
max@1 1051
max@1 1052 Feature bwQchromaFrame;
max@1 1053 bwQchromaFrame.hasTimestamp = true;
max@1 1054 bwQchromaFrame.timestamp = beatTimestamp;
max@1 1055 bwQchromaFrame.values = tempChroma;
Chris@24 1056 bwQchromaFrame.duration = newBeatTimestamp - beatTimestamp;
max@1 1057 bwQchromagram.push_back(bwQchromaFrame);
max@1 1058
max@1 1059 for (int iFrame = -framesInBeat; iFrame < 0; ++iFrame)
max@1 1060 {
max@1 1061 Feature fwQchromaFrame;
max@1 1062 fwQchromaFrame.hasTimestamp = true;
max@1 1063 fwQchromaFrame.timestamp = chromagram[iChroma+iFrame].timestamp;
max@1 1064 fwQchromaFrame.values = tempChroma; // all between two beats get the same
max@1 1065 fwQchromagram.push_back(fwQchromaFrame);
max@1 1066 }
max@1 1067
max@1 1068 // 2. increments / resets for current (new) beat
max@1 1069 currBeatCount++;
Chris@24 1070 beatTimestamp = newBeatTimestamp;
Chris@37 1071 for (int i = 0; i < nBin; ++i) tempChroma[i] = 0; // average
max@1 1072 framesInBeat = 0;
max@1 1073 }
max@1 1074 framesInBeat++;
Chris@37 1075 for (int i = 0; i < nBin; ++i) tempChroma[i] += chromagram[iChroma].values[i];
max@1 1076 }
max@1 1077 returnVector.push_back(fwQchromagram);
max@1 1078 returnVector.push_back(bwQchromagram);
Chris@30 1079 return returnVector;
max@1 1080 }
max@1 1081
matthiasm@43 1082
matthiasm@43 1083
max@1 1084 /* -------------------------------- */
max@1 1085 /* ------ Support Functions ------ */
max@1 1086 /* -------------------------------- */
max@1 1087
max@1 1088 // one-dimesion median filter
Chris@56 1089 vec medfilt1(vec v, int medfilt_length)
max@1 1090 {
matthiasm@46 1091 // TODO: check if this works with odd and even medfilt_length !!!
max@1 1092 int halfWin = medfilt_length/2;
max@1 1093
max@1 1094 // result vector
Chris@56 1095 vec res = zeros<vec>(v.size());
max@1 1096
max@1 1097 // padding
Chris@56 1098 vec padV = zeros<vec>(v.size()+medfilt_length-1);
max@1 1099
Chris@37 1100 for (int i=medfilt_length/2; i < medfilt_length/2+(int)v.size(); ++ i)
max@1 1101 {
max@1 1102 padV(i) = v(i-medfilt_length/2);
matthiasm@46 1103 }
matthiasm@46 1104
matthiasm@46 1105 // the above loop leaves the boundaries at 0,
matthiasm@46 1106 // the two loops below fill them with the start or end values of v at start and end
matthiasm@46 1107 for (int i = 0; i < halfWin; ++i) padV(i) = v(0);
matthiasm@46 1108 for (int i = halfWin+(int)v.size(); i < (int)v.size()+2*halfWin; ++i) padV(i) = v(v.size()-1);
matthiasm@46 1109
matthiasm@46 1110
max@1 1111
max@1 1112 // Median filter
Chris@56 1113 vec win = zeros<vec>(medfilt_length);
max@1 1114
Chris@37 1115 for (int i=0; i < (int)v.size(); ++i)
max@1 1116 {
max@1 1117 win = padV.subvec(i,i+halfWin*2);
max@1 1118 win = sort(win);
max@1 1119 res(i) = win(halfWin);
max@1 1120 }
max@1 1121
max@1 1122 return res;
max@1 1123 }
max@1 1124
max@1 1125
max@1 1126 // Quantile
Chris@56 1127 double quantile(vec v, double p)
max@1 1128 {
Chris@56 1129 vec sortV = sort(v);
max@1 1130 int n = sortV.size();
Chris@56 1131 vec x = zeros<vec>(n+2);
Chris@56 1132 vec y = zeros<vec>(n+2);
max@1 1133
max@1 1134 x(0) = 0;
max@1 1135 x(n+1) = 100;
max@1 1136
Chris@21 1137 for (int i=1; i<n+1; ++i)
max@1 1138 x(i) = 100*(0.5+(i-1))/n;
max@1 1139
max@1 1140 y(0) = sortV(0);
max@1 1141 y.subvec(1,n) = sortV;
max@1 1142 y(n+1) = sortV(n-1);
max@1 1143
Chris@56 1144 uvec x2index = find(x>=p*100);
max@1 1145
max@1 1146 // Interpolation
max@1 1147 double x1 = x(x2index(0)-1);
max@1 1148 double x2 = x(x2index(0));
max@1 1149 double y1 = y(x2index(0)-1);
max@1 1150 double y2 = y(x2index(0));
max@1 1151
max@1 1152 double res = (y2-y1)/(x2-x1)*(p*100-x1)+y1;
max@1 1153
max@1 1154 return res;
max@1 1155 }
max@1 1156
max@1 1157 // Max Filtering
Chris@56 1158 mat maxfilt1(mat inmat, int len)
max@1 1159 {
Chris@56 1160 mat outmat = inmat;
max@1 1161
Chris@37 1162 for (int i=0; i < (int)inmat.n_rows; ++i)
max@1 1163 {
Chris@56 1164 if (sum(inmat.row(i)) > 0)
max@1 1165 {
max@1 1166 // Take a window of rows
max@1 1167 int startWin;
max@1 1168 int endWin;
max@1 1169
max@1 1170 if (0 > i-len)
max@1 1171 startWin = 0;
max@1 1172 else
max@1 1173 startWin = i-len;
max@1 1174
Chris@37 1175 if ((int)inmat.n_rows-1 < i+len-1)
max@1 1176 endWin = inmat.n_rows-1;
max@1 1177 else
max@1 1178 endWin = i+len-1;
max@1 1179
Chris@56 1180 outmat(i,span::all) =
Chris@56 1181 max(inmat(span(startWin,endWin),span::all));
max@1 1182 }
max@1 1183 }
max@1 1184
max@1 1185 return outmat;
Chris@56 1186
max@1 1187 }
max@1 1188
max@1 1189 // Null Parts
Chris@56 1190 Part nullpart(vector<Part> parts, vec barline)
max@1 1191 {
Chris@56 1192 uvec nullindices = ones<uvec>(barline.size());
Chris@37 1193 for (int iPart=0; iPart<(int)parts.size(); ++iPart)
max@1 1194 {
Chris@21 1195 //for (int iIndex=0; iIndex < parts[0].indices.size(); ++iIndex)
Chris@37 1196 for (int iIndex=0; iIndex < (int)parts[iPart].indices.size(); ++iIndex)
Chris@21 1197 for (int i=0; i<parts[iPart].n; ++i)
max@1 1198 {
Chris@21 1199 int ind = parts[iPart].indices[iIndex]+i;
max@1 1200 nullindices(ind) = 0;
max@1 1201 }
max@1 1202 }
max@7 1203
max@1 1204 Part newPart;
max@1 1205 newPart.n = 1;
Chris@56 1206 uvec q = find(nullindices > 0);
max@1 1207
Chris@37 1208 for (int i=0; i<(int)q.size();++i)
max@1 1209 newPart.indices.push_back(q(i));
max@7 1210
max@1 1211 newPart.letter = '-';
max@1 1212 newPart.value = 0;
max@1 1213 newPart.level = 0;
max@1 1214
max@1 1215 return newPart;
max@1 1216 }
max@1 1217
max@1 1218
max@1 1219 // Merge Nulls
max@1 1220 void mergenulls(vector<Part> &parts)
max@1 1221 {
Chris@37 1222 for (int iPart=0; iPart<(int)parts.size(); ++iPart)
max@1 1223 {
max@1 1224
max@1 1225 vector<Part> newVectorPart;
max@1 1226
max@1 1227 if (parts[iPart].letter.compare("-")==0)
max@1 1228 {
max@1 1229 sort (parts[iPart].indices.begin(), parts[iPart].indices.end());
Chris@21 1230 int newpartind = -1;
max@1 1231
max@1 1232 vector<int> indices;
max@1 1233 indices.push_back(-2);
max@1 1234
Chris@37 1235 for (int iIndex=0; iIndex<(int)parts[iPart].indices.size(); ++iIndex)
max@1 1236 indices.push_back(parts[iPart].indices[iIndex]);
max@1 1237
Chris@37 1238 for (int iInd=1; iInd < (int)indices.size(); ++iInd)
max@1 1239 {
max@1 1240 if (indices[iInd] - indices[iInd-1] > 1)
max@1 1241 {
max@1 1242 newpartind++;
max@1 1243
max@1 1244 Part newPart;
matthiasm@46 1245 newPart.letter = 'N';
max@1 1246 std::stringstream out;
max@1 1247 out << newpartind+1;
max@1 1248 newPart.letter.append(out.str());
matthiasm@44 1249 // newPart.value = 20+newpartind+1;
matthiasm@44 1250 newPart.value = 0;
max@1 1251 newPart.n = 1;
max@1 1252 newPart.indices.push_back(indices[iInd]);
max@1 1253 newPart.level = 0;
max@1 1254
max@1 1255 newVectorPart.push_back(newPart);
max@1 1256 }
max@1 1257 else
max@1 1258 {
max@1 1259 newVectorPart[newpartind].n = newVectorPart[newpartind].n+1;
max@1 1260 }
max@1 1261 }
max@1 1262 parts.erase (parts.end());
max@1 1263
Chris@37 1264 for (int i=0; i<(int)newVectorPart.size(); ++i)
max@1 1265 parts.push_back(newVectorPart[i]);
max@1 1266 }
max@1 1267 }
max@1 1268 }
max@1 1269
max@1 1270 /* ------ Segmentation ------ */
max@1 1271
Chris@19 1272 vector<Part> songSegment(Vamp::Plugin::FeatureList quantisedChromagram)
max@1 1273 {
max@1 1274
max@1 1275
max@1 1276 /* ------ Parameters ------ */
max@1 1277 double thresh_beat = 0.85;
max@1 1278 double thresh_seg = 0.80;
matthiasm@46 1279 int medfilt_length = 5;
max@1 1280 int minlength = 28;
matthiasm@46 1281 int maxlength = 2*128;
max@1 1282 double quantilePerc = 0.1;
max@1 1283 /* ------------------------ */
max@1 1284
max@1 1285
max@1 1286 // Collect Info
Chris@19 1287 int nBeat = quantisedChromagram.size(); // Number of feature vector
Chris@19 1288 int nFeatValues = quantisedChromagram[0].values.size(); // Number of values for each feature vector
max@1 1289
Chris@27 1290 if (nBeat < minlength) {
Chris@27 1291 // return a single part
Chris@27 1292 vector<Part> parts;
Chris@27 1293 Part newPart;
Chris@27 1294 newPart.n = 1;
Chris@27 1295 newPart.indices.push_back(0);
Chris@27 1296 newPart.letter = "n1";
Chris@27 1297 newPart.value = 20;
Chris@27 1298 newPart.level = 0;
Chris@27 1299 parts.push_back(newPart);
Chris@27 1300 return parts;
Chris@27 1301 }
Chris@27 1302
Chris@56 1303 irowvec timeStamp = zeros<imat>(1,nBeat); // Vector of Time Stamps
max@1 1304
Chris@22 1305 // Save time stamp as a Vector
Chris@19 1306 if (quantisedChromagram[0].hasTimestamp)
max@1 1307 {
Chris@21 1308 for (int i = 0; i < nBeat; ++ i)
Chris@19 1309 timeStamp[i] = quantisedChromagram[i].timestamp.nsec;
max@1 1310 }
max@1 1311
max@1 1312
max@1 1313 // Build a ObservationTOFeatures Matrix
Chris@56 1314 mat featVal = zeros<mat>(nBeat,nFeatValues/2);
max@1 1315
Chris@21 1316 for (int i = 0; i < nBeat; ++ i)
Chris@21 1317 for (int j = 0; j < nFeatValues/2; ++ j)
max@1 1318 {
matthiasm@44 1319 featVal(i,j) = 0.8 * quantisedChromagram[i].values[j] + quantisedChromagram[i].values[j+12]; // bass attenuated
max@1 1320 }
max@1 1321
max@1 1322 // Set to arbitrary value to feature vectors with low std
Chris@56 1323 mat a = stddev(featVal,1,1);
max@1 1324
matthiasm@44 1325 // Feature Correlation Matrix
Chris@56 1326 mat simmat0 = 1-cor(trans(featVal));
max@1 1327
max@1 1328
Chris@21 1329 for (int i = 0; i < nBeat; ++ i)
max@1 1330 {
max@1 1331 if (a(i)<0.000001)
max@1 1332 {
max@1 1333 featVal(i,1) = 1000; // arbitrary
max@1 1334
Chris@21 1335 for (int j = 0; j < nFeatValues/2; ++j)
max@1 1336 {
max@1 1337 simmat0(i,j) = 1;
max@1 1338 simmat0(j,i) = 1;
max@1 1339 }
max@1 1340 }
max@1 1341 }
max@1 1342
Chris@56 1343 mat simmat = 1-simmat0/2;
max@1 1344
max@1 1345 // -------- To delate when the proble with the add of beat will be solved -------
matthiasm@45 1346 for (int i = 0; i < nBeat; ++ i)
matthiasm@45 1347 for (int j = 0; j < nBeat; ++ j)
matthiasm@45 1348 if (!std::isfinite(simmat(i,j)))
matthiasm@45 1349 simmat(i,j)=0;
max@1 1350 // ------------------------------------------------------------------------------
max@1 1351
max@1 1352 // Median Filtering applied to the Correlation Matrix
max@1 1353 // The median filter is for each diagonal of the Matrix
Chris@56 1354 mat median_simmat = zeros<mat>(nBeat,nBeat);
max@1 1355
Chris@21 1356 for (int i = 0; i < nBeat; ++ i)
max@1 1357 {
Chris@56 1358 vec temp = medfilt1(simmat.diag(i),medfilt_length);
max@1 1359 median_simmat.diag(i) = temp;
max@1 1360 median_simmat.diag(-i) = temp;
max@1 1361 }
max@1 1362
Chris@21 1363 for (int i = 0; i < nBeat; ++ i)
Chris@21 1364 for (int j = 0; j < nBeat; ++ j)
max@1 1365 if (!std::isfinite(median_simmat(i,j)))
max@1 1366 median_simmat(i,j) = 0;
max@1 1367
max@1 1368 // -------------- NOT CONVERTED -------------------------------------
max@1 1369 // if param.seg.standardise
max@1 1370 // med_median_simmat = repmat(median(median_simmat),nBeat,1);
max@1 1371 // std_median_simmat = repmat(std(median_simmat),nBeat,1);
max@1 1372 // median_simmat = (median_simmat - med_median_simmat) ./ std_median_simmat;
max@1 1373 // end
max@1 1374 // --------------------------------------------------------
max@1 1375
max@1 1376 // Retrieve Bar Bounderies
Chris@56 1377 uvec dup = find(median_simmat > thresh_beat);
Chris@56 1378 mat potential_duplicates = zeros<mat>(nBeat,nBeat);
Chris@56 1379 potential_duplicates.elem(dup) = ones<vec>(dup.size());
max@1 1380 potential_duplicates = trimatu(potential_duplicates);
max@1 1381
Chris@21 1382 int nPartlengths = round((maxlength-minlength)/4)+1;
Chris@56 1383 vec partlengths = zeros<vec>(nPartlengths);
max@1 1384
Chris@21 1385 for (int i = 0; i < nPartlengths; ++ i)
matthiasm@46 1386 partlengths(i) = (i*4) + minlength;
max@1 1387
max@1 1388 // initialise arrays
Chris@56 1389 cube simArray = zeros<cube>(nBeat,nBeat,nPartlengths);
Chris@56 1390 cube decisionArray2 = zeros<cube>(nBeat,nBeat,nPartlengths);
max@1 1391
matthiasm@46 1392 for (int iLength = 0; iLength < nPartlengths; ++ iLength)
matthiasm@46 1393 // for (int iLength = 0; iLength < 20; ++ iLength)
max@1 1394 {
Chris@21 1395 int len = partlengths(iLength);
Chris@21 1396 int nUsedBeat = nBeat - len + 1; // number of potential rep beginnings: they can't overlap at the end of the song
Chris@33 1397
Chris@33 1398 if (nUsedBeat < 1) continue;
max@1 1399
Chris@21 1400 for (int iBeat = 0; iBeat < nUsedBeat; ++ iBeat) // looping over all columns (arbitrarily chosen columns)
max@1 1401 {
Chris@56 1402 uvec help2 = find(potential_duplicates(span(0,nUsedBeat-1),iBeat)==1);
max@1 1403
Chris@37 1404 for (int i=0; i < (int)help2.size(); ++i)
max@1 1405 {
max@1 1406
max@1 1407 // measure how well two length len segments go together
max@1 1408 int kBeat = help2(i);
Chris@56 1409 vec distrib = median_simmat(span(iBeat,iBeat+len-1), span(kBeat,kBeat+len-1)).diag(0);
max@1 1410 simArray(iBeat,kBeat,iLength) = quantile(distrib,quantilePerc);
max@1 1411 }
max@1 1412 }
max@1 1413
Chris@56 1414 mat tempM = simArray(span(0,nUsedBeat-1), span(0,nUsedBeat-1), span(iLength,iLength));
Chris@56 1415 simArray.slice(iLength)(span(0,nUsedBeat-1), span(0,nUsedBeat-1)) = tempM + trans(tempM) - (eye<mat>(nUsedBeat,nUsedBeat)%tempM);
max@1 1416
max@1 1417 // convolution
Chris@56 1418 vec K = zeros<vec>(3);
max@1 1419 K << 0.01 << 0.98 << 0.01;
max@1 1420
max@1 1421
Chris@37 1422 for (int i=0; i < (int)simArray.n_rows; ++i)
max@1 1423 {
Chris@56 1424 rowvec t = conv((rowvec)simArray.slice(iLength).row(i),K);
Chris@56 1425 simArray.slice(iLength)(i, span::all) = t.subvec(1,t.size()-2);
max@1 1426 }
max@1 1427
max@1 1428 // take only over-average bars that do not overlap
max@1 1429
Chris@56 1430 mat temp = zeros<mat>(simArray.n_rows, simArray.n_cols);
Chris@56 1431 temp(span::all, span(0,nUsedBeat-1)) = simArray.slice(iLength)(span::all, span(0,nUsedBeat-1));
max@1 1432
Chris@37 1433 for (int i=0; i < (int)temp.n_rows; ++i)
Chris@37 1434 for (int j=0; j < nUsedBeat; ++j)
max@1 1435 if (temp(i,j) < thresh_seg)
max@1 1436 temp(i,j) = 0;
max@1 1437
max@1 1438 decisionArray2.slice(iLength) = temp;
max@1 1439
Chris@56 1440 mat maxMat = maxfilt1(decisionArray2.slice(iLength),len-1);
max@1 1441
Chris@37 1442 for (int i=0; i < (int)decisionArray2.n_rows; ++i)
Chris@37 1443 for (int j=0; j < (int)decisionArray2.n_cols; ++j)
max@1 1444 if (decisionArray2.slice(iLength)(i,j) < maxMat(i,j))
max@1 1445 decisionArray2.slice(iLength)(i,j) = 0;
max@1 1446
Chris@56 1447 decisionArray2.slice(iLength) = decisionArray2.slice(iLength) % trans(decisionArray2.slice(iLength));
max@1 1448
Chris@37 1449 for (int i=0; i < (int)simArray.n_rows; ++i)
Chris@37 1450 for (int j=0; j < (int)simArray.n_cols; ++j)
max@1 1451 if (simArray.slice(iLength)(i,j) < thresh_seg)
max@1 1452 potential_duplicates(i,j) = 0;
max@1 1453 }
max@1 1454
max@1 1455 // Milk the data
max@1 1456
Chris@56 1457 mat bestval;
max@1 1458
Chris@21 1459 for (int iLength=0; iLength<nPartlengths; ++iLength)
max@1 1460 {
Chris@56 1461 mat temp = zeros<mat>(decisionArray2.n_rows,decisionArray2.n_cols);
max@1 1462
Chris@37 1463 for (int rows=0; rows < (int)decisionArray2.n_rows; ++rows)
Chris@37 1464 for (int cols=0; cols < (int)decisionArray2.n_cols; ++cols)
max@1 1465 if (decisionArray2.slice(iLength)(rows,cols) > 0)
max@1 1466 temp(rows,cols) = 1;
max@1 1467
Chris@56 1468 vec currLogicSum = sum(temp,1);
max@1 1469
Chris@37 1470 for (int iBeat=0; iBeat < nBeat; ++iBeat)
max@1 1471 if (currLogicSum(iBeat) > 1)
max@1 1472 {
Chris@56 1473 vec t = decisionArray2.slice(iLength)(span::all,iBeat);
max@1 1474 double currSum = sum(t);
max@1 1475
Chris@21 1476 int count = 0;
Chris@37 1477 for (int i=0; i < (int)t.size(); ++i)
max@1 1478 if (t(i)>0)
max@1 1479 count++;
max@1 1480
max@1 1481 currSum = (currSum/count)/2;
max@1 1482
Chris@56 1483 rowvec t1;
max@1 1484 t1 << (currLogicSum(iBeat)-1) * partlengths(iLength) << currSum << iLength << iBeat << currLogicSum(iBeat);
max@1 1485
max@1 1486 bestval = join_cols(bestval,t1);
max@1 1487 }
max@1 1488 }
max@1 1489
max@1 1490 // Definition of the resulting vector
max@1 1491 vector<Part> parts;
max@1 1492
max@1 1493 // make a table of all valid sets of parts
max@1 1494
max@1 1495 char partletters[] = {'A','B','C','D','E','F','G', 'H','I','J','K','L','M','N','O','P','Q','R','S'};
Chris@21 1496 int partvalues[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19};
Chris@56 1497 vec valid_sets = ones<vec>(bestval.n_rows);
max@1 1498
max@1 1499 if (!bestval.is_empty())
max@1 1500 {
max@1 1501
max@1 1502 // In questo punto viene introdotto un errore alla 3 cifra decimale
max@1 1503
Chris@56 1504 colvec t = zeros<colvec>(bestval.n_rows);
Chris@37 1505 for (int i=0; i < (int)bestval.n_rows; ++i)
max@1 1506 {
max@1 1507 t(i) = bestval(i,1)*2;
max@1 1508 }
max@1 1509
max@1 1510 double m = t.max();
max@1 1511
Chris@56 1512 bestval(span::all,1) = bestval(span::all,1) / m;
Chris@56 1513 bestval(span::all,0) = bestval(span::all,0) + bestval(span::all,1);
max@1 1514
Chris@56 1515 mat bestval2;
Chris@37 1516 for (int i=0; i < (int)bestval.n_cols; ++i)
max@1 1517 if (i!=1)
max@1 1518 bestval2 = join_rows(bestval2,bestval.col(i));
max@1 1519
Chris@21 1520 for (int kSeg=0; kSeg<6; ++kSeg)
max@1 1521 {
Chris@56 1522 mat currbestvals = zeros<mat>(bestval2.n_rows, bestval2.n_cols);
Chris@37 1523 for (int i=0; i < (int)bestval2.n_rows; ++i)
Chris@37 1524 for (int j=0; j < (int)bestval2.n_cols; ++j)
max@1 1525 if (valid_sets(i))
max@1 1526 currbestvals(i,j) = bestval2(i,j);
max@1 1527
Chris@56 1528 vec t1 = currbestvals.col(0);
max@1 1529 double ma;
Chris@56 1530 uword maIdx;
max@1 1531 ma = t1.max(maIdx);
max@6 1532
max@6 1533 if ((maIdx == 0)&&(ma == 0))
max@6 1534 break;
max@1 1535
Chris@28 1536 int bestLength = lrint(partlengths(currbestvals(maIdx,1)));
Chris@56 1537 rowvec bestIndices = decisionArray2.slice(currbestvals(maIdx,1))(currbestvals(maIdx,2), span::all);
max@1 1538
Chris@56 1539 rowvec bestIndicesMap = zeros<rowvec>(bestIndices.size());
Chris@37 1540 for (int i=0; i < (int)bestIndices.size(); ++i)
max@1 1541 if (bestIndices(i)>0)
max@1 1542 bestIndicesMap(i) = 1;
max@1 1543
Chris@56 1544 rowvec mask = zeros<rowvec>(bestLength*2-1);
Chris@21 1545 for (int i=0; i<bestLength; ++i)
max@1 1546 mask(i+bestLength-1) = 1;
max@1 1547
Chris@56 1548 rowvec t2 = conv(bestIndicesMap,mask);
Chris@56 1549 rowvec island = t2.subvec(mask.size()/2,t2.size()-1-mask.size()/2);
max@1 1550
max@1 1551 // Save results in the structure
max@1 1552 Part newPart;
max@1 1553 newPart.n = bestLength;
Chris@56 1554 uvec q1 = find(bestIndices > 0);
max@1 1555
Chris@37 1556 for (int i=0; i < (int)q1.size();++i)
max@1 1557 newPart.indices.push_back(q1(i));
max@1 1558
max@1 1559 newPart.letter = partletters[kSeg];
max@1 1560 newPart.value = partvalues[kSeg];
max@1 1561 newPart.level = kSeg+1;
max@1 1562 parts.push_back(newPart);
max@1 1563
Chris@56 1564 uvec q2 = find(valid_sets==1);
max@1 1565
Chris@37 1566 for (int i=0; i < (int)q2.size(); ++i)
max@1 1567 {
Chris@21 1568 int iSet = q2(i);
Chris@21 1569 int s = partlengths(bestval2(iSet,1));
max@1 1570
Chris@56 1571 rowvec mask1 = zeros<rowvec>(s*2-1);
Chris@21 1572 for (int i=0; i<s; ++i)
max@1 1573 mask1(i+s-1) = 1;
max@1 1574
Chris@56 1575 rowvec Ind = decisionArray2.slice(bestval2(iSet,1))(bestval2(iSet,2), span::all);
Chris@56 1576 rowvec IndMap = zeros<rowvec>(Ind.size());
Chris@37 1577 for (int i=0; i < (int)Ind.size(); ++i)
max@1 1578 if (Ind(i)>0)
max@1 1579 IndMap(i) = 2;
max@1 1580
Chris@56 1581 rowvec t3 = conv(IndMap,mask1);
Chris@56 1582 rowvec currislands = t3.subvec(mask1.size()/2,t3.size()-1-mask1.size()/2);
Chris@56 1583 rowvec islandsdMult = currislands%island;
max@6 1584
Chris@56 1585 uvec islandsIndex = find(islandsdMult > 0);
max@1 1586
max@6 1587 if (islandsIndex.size() > 0)
max@1 1588 valid_sets(iSet) = 0;
max@1 1589 }
max@1 1590 }
max@1 1591 }
max@1 1592 else
max@1 1593 {
max@1 1594 Part newPart;
max@1 1595 newPart.n = nBeat;
Chris@33 1596 newPart.indices.push_back(0);
max@1 1597 newPart.letter = 'A';
max@1 1598 newPart.value = 1;
max@1 1599 newPart.level = 1;
max@1 1600 parts.push_back(newPart);
max@1 1601 }
max@6 1602
Chris@56 1603 vec bar = linspace(1,nBeat,nBeat);
max@1 1604 Part np = nullpart(parts,bar);
max@7 1605
max@1 1606 parts.push_back(np);
max@1 1607
max@1 1608 // -------------- NOT CONVERTED -------------------------------------
max@1 1609 // if param.seg.editor
max@1 1610 // [pa, ta] = partarray(parts);
max@1 1611 // parts = editorssearch(pa, ta, parts);
max@1 1612 // parts = [parts, nullpart(parts,1:nBeat)];
max@1 1613 // end
max@1 1614 // ------------------------------------------------------------------
max@1 1615
max@1 1616
max@1 1617 mergenulls(parts);
max@1 1618
max@1 1619
max@1 1620 // -------------- NOT CONVERTED -------------------------------------
max@1 1621 // if param.seg.editor
max@1 1622 // [pa, ta] = partarray(parts);
max@1 1623 // parts = editorssearch(pa, ta, parts);
max@1 1624 // parts = [parts, nullpart(parts,1:nBeat)];
max@1 1625 // end
max@1 1626 // ------------------------------------------------------------------
max@1 1627
max@1 1628 return parts;
max@1 1629 }
max@1 1630
max@1 1631
max@1 1632
Chris@19 1633 void songSegmentChroma(Vamp::Plugin::FeatureList quantisedChromagram, vector<Part> &parts)
max@1 1634 {
max@1 1635 // Collect Info
Chris@19 1636 int nBeat = quantisedChromagram.size(); // Number of feature vector
Chris@19 1637 int nFeatValues = quantisedChromagram[0].values.size(); // Number of values for each feature vector
max@1 1638
Chris@56 1639 mat synchTreble = zeros<mat>(nBeat,nFeatValues/2);
max@1 1640
Chris@21 1641 for (int i = 0; i < nBeat; ++ i)
Chris@21 1642 for (int j = 0; j < nFeatValues/2; ++ j)
max@1 1643 {
Chris@19 1644 synchTreble(i,j) = quantisedChromagram[i].values[j];
max@1 1645 }
max@1 1646
Chris@56 1647 mat synchBass = zeros<mat>(nBeat,nFeatValues/2);
max@1 1648
Chris@21 1649 for (int i = 0; i < nBeat; ++ i)
Chris@21 1650 for (int j = 0; j < nFeatValues/2; ++ j)
max@1 1651 {
Chris@19 1652 synchBass(i,j) = quantisedChromagram[i].values[j+12];
max@1 1653 }
max@1 1654
max@1 1655 // Process
max@1 1656
Chris@56 1657 mat segTreble = zeros<mat>(quantisedChromagram.size(),quantisedChromagram[0].values.size()/2);
Chris@56 1658 mat segBass = zeros<mat>(quantisedChromagram.size(),quantisedChromagram[0].values.size()/2);
max@1 1659
Chris@37 1660 for (int iPart=0; iPart < (int)parts.size(); ++iPart)
max@1 1661 {
max@1 1662 parts[iPart].nInd = parts[iPart].indices.size();
max@1 1663
Chris@21 1664 for (int kOccur=0; kOccur<parts[iPart].nInd; ++kOccur)
max@1 1665 {
max@1 1666 int kStartIndex = parts[iPart].indices[kOccur];
max@1 1667 int kEndIndex = kStartIndex + parts[iPart].n-1;
max@1 1668
max@1 1669 segTreble.rows(kStartIndex,kEndIndex) = segTreble.rows(kStartIndex,kEndIndex) + synchTreble.rows(kStartIndex,kEndIndex);
max@1 1670 segBass.rows(kStartIndex,kEndIndex) = segBass.rows(kStartIndex,kEndIndex) + synchBass.rows(kStartIndex,kEndIndex);
max@1 1671 }
max@1 1672 }
max@1 1673 }
max@1 1674
max@1 1675
max@1 1676 // Segment Integration
max@1 1677 vector<Part> songSegmentIntegration(vector<Part> &parts)
max@1 1678 {
max@1 1679 // Break up parts (every part will have one instance)
max@1 1680 vector<Part> newPartVector;
max@1 1681 vector<int> partindices;
max@1 1682
Chris@37 1683 for (int iPart=0; iPart < (int)parts.size(); ++iPart)
max@1 1684 {
max@1 1685 parts[iPart].nInd = parts[iPart].indices.size();
Chris@21 1686 for (int iInstance=0; iInstance<parts[iPart].nInd; ++iInstance)
max@1 1687 {
max@1 1688 Part newPart;
max@1 1689 newPart.n = parts[iPart].n;
max@1 1690 newPart.letter = parts[iPart].letter;
max@1 1691 newPart.value = parts[iPart].value;
max@1 1692 newPart.level = parts[iPart].level;
max@1 1693 newPart.indices.push_back(parts[iPart].indices[iInstance]);
max@1 1694 newPart.nInd = 1;
max@1 1695 partindices.push_back(parts[iPart].indices[iInstance]);
max@1 1696
max@1 1697 newPartVector.push_back(newPart);
max@1 1698 }
max@1 1699 }
max@1 1700
max@1 1701
max@1 1702 // Sort the parts in order of occurrence
max@1 1703 sort (partindices.begin(), partindices.end());
max@1 1704
Chris@37 1705 for (int i=0; i < (int)partindices.size(); ++i)
max@1 1706 {
max@1 1707 bool found = false;
max@1 1708 int in=0;
max@1 1709 while (!found)
max@1 1710 {
max@1 1711 if (newPartVector[in].indices[0] == partindices[i])
max@1 1712 {
max@1 1713 newPartVector.push_back(newPartVector[in]);
max@1 1714 newPartVector.erase(newPartVector.begin()+in);
max@1 1715 found = true;
max@1 1716 }
max@1 1717 else
max@1 1718 in++;
max@1 1719 }
max@1 1720 }
max@1 1721
max@1 1722 // Clear the vector
Chris@37 1723 for (int iNewpart=1; iNewpart < (int)newPartVector.size(); ++iNewpart)
max@1 1724 {
max@1 1725 if (newPartVector[iNewpart].n < 12)
max@1 1726 {
max@1 1727 newPartVector[iNewpart-1].n = newPartVector[iNewpart-1].n + newPartVector[iNewpart].n;
max@1 1728 newPartVector.erase(newPartVector.begin()+iNewpart);
max@1 1729 }
max@1 1730 }
max@1 1731
max@1 1732 return newPartVector;
max@1 1733 }
max@1 1734
max@1 1735 // Segmenter
Chris@48 1736 Vamp::Plugin::FeatureList Segmentino::runSegmenter(Vamp::Plugin::FeatureList quantisedChromagram)
max@1 1737 {
max@1 1738 /* --- Display Information --- */
Chris@37 1739 // int numBeat = quantisedChromagram.size();
Chris@37 1740 // int numFeats = quantisedChromagram[0].values.size();
max@1 1741
max@1 1742 vector<Part> parts;
max@1 1743 vector<Part> finalParts;
max@1 1744
Chris@19 1745 parts = songSegment(quantisedChromagram);
Chris@19 1746 songSegmentChroma(quantisedChromagram,parts);
max@7 1747
max@1 1748 finalParts = songSegmentIntegration(parts);
max@1 1749
max@1 1750
max@1 1751 // TEMP ----
Chris@21 1752 /*for (int i=0;i<finalParts.size(); ++i)
max@1 1753 {
max@6 1754 std::cout << "Parts n° " << i << std::endl;
max@6 1755 std::cout << "n°: " << finalParts[i].n << std::endl;
max@6 1756 std::cout << "letter: " << finalParts[i].letter << std::endl;
max@1 1757
max@6 1758 std::cout << "indices: ";
Chris@21 1759 for (int j=0;j<finalParts[i].indices.size(); ++j)
max@6 1760 std::cout << finalParts[i].indices[j] << " ";
max@6 1761
max@6 1762 std::cout << std::endl;
max@6 1763 std::cout << "level: " << finalParts[i].level << std::endl;
max@1 1764 }*/
max@1 1765
max@1 1766 // ---------
max@1 1767
max@1 1768
max@1 1769 // Output
max@1 1770
max@1 1771 Vamp::Plugin::FeatureList results;
max@1 1772
max@1 1773
max@1 1774 Feature seg;
max@1 1775
Chris@56 1776 vec indices;
Chris@37 1777 // int idx=0;
max@1 1778 vector<int> values;
max@1 1779 vector<string> letters;
max@1 1780
Chris@37 1781 for (int iPart=0; iPart < (int)finalParts.size()-1; ++iPart)
max@1 1782 {
Chris@21 1783 int iInstance=0;
max@1 1784 seg.hasTimestamp = true;
max@1 1785
max@1 1786 int ind = finalParts[iPart].indices[iInstance];
max@1 1787 int ind1 = finalParts[iPart+1].indices[iInstance];
max@1 1788
Chris@19 1789 seg.timestamp = quantisedChromagram[ind].timestamp;
max@1 1790 seg.hasDuration = true;
Chris@19 1791 seg.duration = quantisedChromagram[ind1].timestamp-quantisedChromagram[ind].timestamp;
max@1 1792 seg.values.clear();
max@1 1793 seg.values.push_back(finalParts[iPart].value);
max@1 1794 seg.label = finalParts[iPart].letter;
max@1 1795
max@1 1796 results.push_back(seg);
max@1 1797 }
max@1 1798
Chris@37 1799 if (finalParts.size() > 0) {
Chris@37 1800 int ind = finalParts[finalParts.size()-1].indices[0];
Chris@37 1801 seg.hasTimestamp = true;
Chris@37 1802 seg.timestamp = quantisedChromagram[ind].timestamp;
Chris@37 1803 seg.hasDuration = true;
Chris@37 1804 seg.duration = quantisedChromagram[quantisedChromagram.size()-1].timestamp-quantisedChromagram[ind].timestamp;
Chris@37 1805 seg.values.clear();
Chris@37 1806 seg.values.push_back(finalParts[finalParts.size()-1].value);
Chris@37 1807 seg.label = finalParts[finalParts.size()-1].letter;
max@1 1808
Chris@37 1809 results.push_back(seg);
Chris@37 1810 }
max@1 1811
max@1 1812 return results;
max@1 1813 }
max@1 1814
max@1 1815
max@1 1816
max@1 1817
max@1 1818
max@1 1819
max@1 1820
max@1 1821
max@1 1822
max@1 1823
max@1 1824
max@1 1825
max@1 1826
max@1 1827
max@1 1828
max@1 1829
max@1 1830