annotate songparts/SongParts.cpp @ 22:d28d79fe22b3 slimline

Tabs to spaces
author Chris Cannam
date Thu, 10 May 2012 10:52:41 +0100
parents 1985617603e2
children cb9003bb0251
rev   line source
max@1 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
max@1 2
max@1 3 /*
max@1 4 QM Vamp Plugin Set
max@1 5
max@1 6 Centre for Digital Music, Queen Mary, University of London.
max@1 7
max@1 8 This program is free software; you can redistribute it and/or
max@1 9 modify it under the terms of the GNU General Public License as
max@1 10 published by the Free Software Foundation; either version 2 of the
max@1 11 License, or (at your option) any later version. See the file
max@1 12 COPYING included with this distribution for more information.
max@1 13 */
max@1 14
max@1 15 #include "SongParts.h"
max@1 16
max@1 17 #include <base/Window.h>
max@1 18 #include <dsp/onsets/DetectionFunction.h>
max@1 19 #include <dsp/onsets/PeakPicking.h>
max@1 20 #include <dsp/transforms/FFT.h>
max@1 21 #include <dsp/tempotracking/TempoTrackV2.h>
max@1 22 #include <dsp/tempotracking/DownBeat.h>
max@1 23 #include <chromamethods.h>
max@1 24 #include <maths/MathUtilities.h>
max@1 25 #include <boost/numeric/ublas/matrix.hpp>
max@1 26 #include <boost/numeric/ublas/io.hpp>
max@1 27 #include <boost/math/distributions/normal.hpp>
max@1 28 #include "armadillo"
max@1 29 #include <fstream>
max@1 30 #include <sstream>
max@1 31 #include <cmath>
max@1 32 #include <vector>
max@1 33
max@1 34 #include <vamp-sdk/Plugin.h>
max@1 35
max@1 36 using namespace boost::numeric;
max@1 37 using namespace arma;
max@1 38 using std::string;
max@1 39 using std::vector;
max@1 40 using std::cerr;
max@1 41 using std::cout;
max@1 42 using std::endl;
max@1 43
max@1 44
max@1 45 #ifndef __GNUC__
max@1 46 #include <alloca.h>
max@1 47 #endif
max@1 48
max@1 49
max@1 50 // Result Struct
max@1 51 typedef struct Part {
max@1 52 int n;
Chris@21 53 vector<int> indices;
max@1 54 string letter;
Chris@21 55 int value;
max@1 56 int level;
max@1 57 int nInd;
max@1 58 }Part;
max@1 59
max@1 60
max@1 61 /* ------------------------------------ */
max@1 62 /* ----- BEAT DETECTOR CLASS ---------- */
max@1 63 /* ------------------------------------ */
max@1 64
max@1 65 class BeatTrackerData
max@1 66 {
max@1 67 /* --- ATTRIBUTES --- */
max@1 68 public:
max@1 69 DFConfig dfConfig;
max@1 70 DetectionFunction *df;
max@1 71 DownBeat *downBeat;
max@1 72 vector<double> dfOutput;
max@1 73 Vamp::RealTime origin;
max@1 74
max@1 75
max@1 76 /* --- METHODS --- */
max@1 77
max@1 78 /* --- Constructor --- */
max@1 79 public:
max@1 80 BeatTrackerData(float rate, const DFConfig &config) : dfConfig(config) {
Chris@22 81
max@1 82 df = new DetectionFunction(config);
max@1 83 // decimation factor aims at resampling to c. 3KHz; must be power of 2
max@1 84 int factor = MathUtilities::nextPowerOfTwo(rate / 3000);
max@1 85 // std::cerr << "BeatTrackerData: factor = " << factor << std::endl;
max@1 86 downBeat = new DownBeat(rate, factor, config.stepSize);
max@1 87 }
max@1 88
max@1 89 /* --- Desctructor --- */
max@1 90 ~BeatTrackerData() {
Chris@22 91 delete df;
max@1 92 delete downBeat;
max@1 93 }
max@1 94
max@1 95 void reset() {
max@1 96 delete df;
max@1 97 df = new DetectionFunction(dfConfig);
max@1 98 dfOutput.clear();
max@1 99 downBeat->resetAudioBuffer();
max@1 100 origin = Vamp::RealTime::zeroTime;
max@1 101 }
max@1 102 };
max@1 103
max@1 104
max@1 105 /* --------------------------------------- */
max@1 106 /* ----- CHROMA EXTRACTOR CLASS ---------- */
max@1 107 /* --------------------------------------- */
max@1 108
max@1 109 class ChromaData
max@1 110 {
max@1 111
max@1 112 /* --- ATTRIBUTES --- */
max@1 113
max@1 114 public:
max@1 115 int frameCount;
max@1 116 int nBPS;
max@1 117 Vamp::Plugin::FeatureList logSpectrum;
max@1 118 size_t blockSize;
max@1 119 int lengthOfNoteIndex;
max@1 120 vector<float> meanTunings;
max@1 121 vector<float> localTunings;
max@1 122 float whitening;
max@1 123 float preset;
max@1 124 float useNNLS;
max@1 125 vector<float> localTuning;
max@1 126 vector<float> kernelValue;
max@1 127 vector<int> kernelFftIndex;
max@1 128 vector<int> kernelNoteIndex;
max@1 129 float *dict;
max@1 130 bool tuneLocal;
max@1 131 float doNormalizeChroma;
max@1 132 float rollon;
max@1 133 float s;
max@1 134 vector<float> hw;
max@1 135 vector<float> sinvalues;
max@1 136 vector<float> cosvalues;
max@1 137 Window<float> window;
max@1 138 FFTReal fft;
max@1 139 size_t inputSampleRate;
max@1 140
max@1 141 /* --- METHODS --- */
max@1 142
max@1 143 /* --- Constructor --- */
max@1 144
max@1 145 public:
max@1 146 ChromaData(float inputSampleRate, size_t block_size) :
max@1 147 frameCount(0),
max@1 148 nBPS(3),
max@1 149 logSpectrum(0),
max@1 150 blockSize(0),
max@1 151 lengthOfNoteIndex(0),
max@1 152 meanTunings(0),
max@1 153 localTunings(0),
max@1 154 whitening(1.0),
max@1 155 preset(0.0),
max@1 156 useNNLS(1.0),
max@1 157 localTuning(0.0),
max@1 158 kernelValue(0),
max@1 159 kernelFftIndex(0),
max@1 160 kernelNoteIndex(0),
max@1 161 dict(0),
max@1 162 tuneLocal(0.0),
max@1 163 doNormalizeChroma(0),
max@1 164 rollon(0.0),
Chris@22 165 s(0.7),
Chris@22 166 sinvalues(0),
Chris@22 167 cosvalues(0),
Chris@22 168 window(HanningWindow, block_size),
Chris@22 169 fft(block_size),
Chris@22 170 inputSampleRate(inputSampleRate)
max@1 171 {
max@1 172 // make the *note* dictionary matrix
max@1 173 dict = new float[nNote * 84];
max@1 174 for (int i = 0; i < nNote * 84; ++i) dict[i] = 0.0;
max@1 175 blockSize = block_size;
max@1 176 }
max@1 177
max@1 178
max@1 179 /* --- Desctructor --- */
max@1 180
max@1 181 ~ChromaData() {
max@1 182 delete [] dict;
max@1 183 }
max@1 184
max@1 185 /* --- Public Methods --- */
max@1 186
max@1 187 void reset() {
max@1 188 frameCount = 0;
max@1 189 logSpectrum.clear();
max@1 190 for (int iBPS = 0; iBPS < 3; ++iBPS) {
max@1 191 meanTunings[iBPS] = 0;
max@1 192 localTunings[iBPS] = 0;
max@1 193 }
max@1 194 localTuning.clear();
max@1 195 }
max@1 196
max@1 197 void baseProcess(float *inputBuffers, Vamp::RealTime timestamp)
max@1 198 {
Chris@22 199
max@1 200 frameCount++;
max@1 201 float *magnitude = new float[blockSize/2];
max@1 202 double *fftReal = new double[blockSize];
max@1 203 double *fftImag = new double[blockSize];
max@1 204
max@1 205 // FFTReal wants doubles, so we need to make a local copy of inputBuffers
max@1 206 double *inputBuffersDouble = new double[blockSize];
max@1 207 for (size_t i = 0; i < blockSize; i++) inputBuffersDouble[i] = inputBuffers[i];
max@1 208
max@1 209 fft.process(false, inputBuffersDouble, fftReal, fftImag);
max@1 210
max@1 211 float energysum = 0;
max@1 212 // make magnitude
max@1 213 float maxmag = -10000;
max@1 214 for (int iBin = 0; iBin < static_cast<int>(blockSize/2); iBin++) {
max@1 215 magnitude[iBin] = sqrt(fftReal[iBin] * fftReal[iBin] +
max@1 216 fftImag[iBin] * fftImag[iBin]);
max@1 217 if (magnitude[iBin]>blockSize*1.0) magnitude[iBin] = blockSize;
max@1 218 // a valid audio signal (between -1 and 1) should not be limited here.
max@1 219 if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin];
max@1 220 if (rollon > 0) {
max@1 221 energysum += pow(magnitude[iBin],2);
max@1 222 }
max@1 223 }
max@1 224
max@1 225 float cumenergy = 0;
max@1 226 if (rollon > 0) {
max@1 227 for (int iBin = 2; iBin < static_cast<int>(blockSize/2); iBin++) {
max@1 228 cumenergy += pow(magnitude[iBin],2);
max@1 229 if (cumenergy < energysum * rollon / 100) magnitude[iBin-2] = 0;
max@1 230 else break;
max@1 231 }
max@1 232 }
max@1 233
max@1 234 if (maxmag < 2) {
max@1 235 // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl;
max@1 236 for (int iBin = 0; iBin < static_cast<int>(blockSize/2); iBin++) {
max@1 237 magnitude[iBin] = 0;
max@1 238 }
max@1 239 }
max@1 240
max@1 241 // cerr << magnitude[200] << endl;
max@1 242
max@1 243 // note magnitude mapping using pre-calculated matrix
max@1 244 float *nm = new float[nNote]; // note magnitude
max@1 245 for (int iNote = 0; iNote < nNote; iNote++) {
max@1 246 nm[iNote] = 0; // initialise as 0
max@1 247 }
max@1 248 int binCount = 0;
max@1 249 for (vector<float>::iterator it = kernelValue.begin(); it != kernelValue.end(); ++it) {
max@1 250 nm[kernelNoteIndex[binCount]] += magnitude[kernelFftIndex[binCount]] * kernelValue[binCount];
max@1 251 binCount++;
max@1 252 }
max@1 253
max@1 254 float one_over_N = 1.0/frameCount;
max@1 255 // update means of complex tuning variables
max@1 256 for (int iBPS = 0; iBPS < nBPS; ++iBPS) meanTunings[iBPS] *= float(frameCount-1)*one_over_N;
max@1 257
max@1 258 for (int iTone = 0; iTone < round(nNote*0.62/nBPS)*nBPS+1; iTone = iTone + nBPS) {
max@1 259 for (int iBPS = 0; iBPS < nBPS; ++iBPS) meanTunings[iBPS] += nm[iTone + iBPS]*one_over_N;
max@1 260 float ratioOld = 0.997;
max@1 261 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
max@1 262 localTunings[iBPS] *= ratioOld;
max@1 263 localTunings[iBPS] += nm[iTone + iBPS] * (1 - ratioOld);
max@1 264 }
max@1 265 }
max@1 266
max@1 267 float localTuningImag = 0;
max@1 268 float localTuningReal = 0;
max@1 269 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
max@1 270 localTuningReal += localTunings[iBPS] * cosvalues[iBPS];
max@1 271 localTuningImag += localTunings[iBPS] * sinvalues[iBPS];
max@1 272 }
max@1 273
max@1 274 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
max@1 275 localTuning.push_back(normalisedtuning);
max@1 276
max@1 277 Vamp::Plugin::Feature f1; // logfreqspec
max@1 278 f1.hasTimestamp = true;
max@1 279 f1.timestamp = timestamp;
max@1 280 for (int iNote = 0; iNote < nNote; iNote++) {
max@1 281 f1.values.push_back(nm[iNote]);
max@1 282 }
max@1 283
max@1 284 // deletes
max@1 285 delete[] inputBuffersDouble;
max@1 286 delete[] magnitude;
max@1 287 delete[] fftReal;
max@1 288 delete[] fftImag;
max@1 289 delete[] nm;
max@1 290
max@1 291 logSpectrum.push_back(f1); // remember note magnitude
max@1 292 }
max@1 293
max@1 294 bool initialise()
max@1 295 {
max@1 296 dictionaryMatrix(dict, s);
Chris@22 297
Chris@22 298 // make things for tuning estimation
Chris@22 299 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
max@1 300 sinvalues.push_back(sin(2*M_PI*(iBPS*1.0/nBPS)));
max@1 301 cosvalues.push_back(cos(2*M_PI*(iBPS*1.0/nBPS)));
max@1 302 }
max@1 303
Chris@22 304
Chris@22 305 // make hamming window of length 1/2 octave
Chris@22 306 int hamwinlength = nBPS * 6 + 1;
max@1 307 float hamwinsum = 0;
max@1 308 for (int i = 0; i < hamwinlength; ++i) {
max@1 309 hw.push_back(0.54 - 0.46 * cos((2*M_PI*i)/(hamwinlength-1)));
max@1 310 hamwinsum += 0.54 - 0.46 * cos((2*M_PI*i)/(hamwinlength-1));
max@1 311 }
max@1 312 for (int i = 0; i < hamwinlength; ++i) hw[i] = hw[i] / hamwinsum;
max@1 313
max@1 314
max@1 315 // initialise the tuning
max@1 316 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
max@1 317 meanTunings.push_back(0);
max@1 318 localTunings.push_back(0);
max@1 319 }
Chris@22 320
max@1 321 blockSize = blockSize;
max@1 322 frameCount = 0;
max@1 323 int tempn = nNote * blockSize/2;
max@1 324 // cerr << "length of tempkernel : " << tempn << endl;
max@1 325 float *tempkernel;
max@1 326
max@1 327 tempkernel = new float[tempn];
max@1 328
max@1 329 logFreqMatrix(inputSampleRate, blockSize, tempkernel);
max@1 330 kernelValue.clear();
max@1 331 kernelFftIndex.clear();
max@1 332 kernelNoteIndex.clear();
max@1 333 int countNonzero = 0;
max@1 334 for (int iNote = 0; iNote < nNote; ++iNote) {
max@1 335 // I don't know if this is wise: manually making a sparse matrix
max@1 336 for (int iFFT = 0; iFFT < static_cast<int>(blockSize/2); ++iFFT) {
max@1 337 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
max@1 338 kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
max@1 339 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
max@1 340 countNonzero++;
max@1 341 }
max@1 342 kernelFftIndex.push_back(iFFT);
Chris@22 343 kernelNoteIndex.push_back(iNote);
max@1 344 }
max@1 345 }
max@1 346 }
max@1 347 delete [] tempkernel;
max@1 348 }
max@1 349 };
max@1 350
max@1 351
max@1 352 /* --------------------------------- */
max@1 353 /* ----- SONG PARTITIONER ---------- */
max@1 354 /* --------------------------------- */
max@1 355
max@1 356
max@1 357 /* --- ATTRIBUTES --- */
max@1 358
max@1 359 float SongPartitioner::m_stepSecs = 0.01161; // 512 samples at 44100
max@1 360 size_t SongPartitioner::m_chromaFramesizeFactor = 16; // 16 times as long as beat tracker's
max@1 361 size_t SongPartitioner::m_chromaStepsizeFactor = 4; // 4 times as long as beat tracker's
max@1 362
max@1 363
max@1 364 /* --- METHODS --- */
max@1 365
max@1 366 /* --- Constructor --- */
max@1 367 SongPartitioner::SongPartitioner(float inputSampleRate) :
max@1 368 Vamp::Plugin(inputSampleRate),
max@1 369 m_d(0),
max@1 370 m_bpb(4),
max@1 371 m_pluginFrameCount(0)
max@1 372 {
max@1 373 }
max@1 374
max@1 375
max@1 376 /* --- Desctructor --- */
max@1 377 SongPartitioner::~SongPartitioner()
max@1 378 {
max@1 379 delete m_d;
max@1 380 }
max@1 381
max@1 382
max@1 383 /* --- Methods --- */
max@1 384 string SongPartitioner::getIdentifier() const
max@1 385 {
max@1 386 return "qm-songpartitioner";
max@1 387 }
max@1 388
max@1 389 string SongPartitioner::getName() const
max@1 390 {
max@1 391 return "Song Partitioner";
max@1 392 }
max@1 393
max@1 394 string SongPartitioner::getDescription() const
max@1 395 {
max@1 396 return "Estimate contiguous segments pertaining to song parts such as verse and chorus.";
max@1 397 }
max@1 398
max@1 399 string SongPartitioner::getMaker() const
max@1 400 {
max@1 401 return "Queen Mary, University of London";
max@1 402 }
max@1 403
max@1 404 int SongPartitioner::getPluginVersion() const
max@1 405 {
max@1 406 return 2;
max@1 407 }
max@1 408
max@1 409 string SongPartitioner::getCopyright() const
max@1 410 {
max@1 411 return "Plugin by Matthew Davies, Christian Landone, Chris Cannam, Matthias Mauch and Massimiliano Zanoni Copyright (c) 2006-2012 QMUL - All Rights Reserved";
max@1 412 }
max@1 413
max@1 414 SongPartitioner::ParameterList SongPartitioner::getParameterDescriptors() const
max@1 415 {
max@1 416 ParameterList list;
max@1 417
max@1 418 ParameterDescriptor desc;
max@1 419
max@1 420 desc.identifier = "bpb";
max@1 421 desc.name = "Beats per Bar";
max@1 422 desc.description = "The number of beats in each bar";
max@1 423 desc.minValue = 2;
max@1 424 desc.maxValue = 16;
max@1 425 desc.defaultValue = 4;
max@1 426 desc.isQuantized = true;
max@1 427 desc.quantizeStep = 1;
max@1 428 list.push_back(desc);
max@1 429
max@1 430 return list;
max@1 431 }
max@1 432
max@1 433 float SongPartitioner::getParameter(std::string name) const
max@1 434 {
max@1 435 if (name == "bpb") return m_bpb;
max@1 436 return 0.0;
max@1 437 }
max@1 438
max@1 439 void SongPartitioner::setParameter(std::string name, float value)
max@1 440 {
max@1 441 if (name == "bpb") m_bpb = lrintf(value);
max@1 442 }
max@1 443
max@1 444
max@1 445 // Return the StepSize for Chroma Extractor
max@1 446 size_t SongPartitioner::getPreferredStepSize() const
max@1 447 {
max@1 448 size_t step = size_t(m_inputSampleRate * m_stepSecs + 0.0001);
max@1 449 if (step < 1) step = 1;
max@1 450
max@1 451 return step;
max@1 452 }
max@1 453
max@1 454 // Return the BlockSize for Chroma Extractor
max@1 455 size_t SongPartitioner::getPreferredBlockSize() const
max@1 456 {
max@1 457 size_t theoretical = getPreferredStepSize() * 2;
max@1 458 theoretical *= m_chromaFramesizeFactor;
max@1 459
max@1 460 return theoretical;
max@1 461 }
max@1 462
max@1 463
max@1 464 // Initialize the plugin and define Beat Tracker and Chroma Extractor Objects
max@1 465 bool SongPartitioner::initialise(size_t channels, size_t stepSize, size_t blockSize)
max@1 466 {
max@1 467 if (m_d) {
Chris@22 468 delete m_d;
Chris@22 469 m_d = 0;
max@1 470 }
max@1 471
max@1 472 if (channels < getMinChannelCount() ||
Chris@22 473 channels > getMaxChannelCount()) {
max@1 474 std::cerr << "SongPartitioner::initialise: Unsupported channel count: "
max@1 475 << channels << std::endl;
max@1 476 return false;
max@1 477 }
max@1 478
max@1 479 if (stepSize != getPreferredStepSize()) {
max@1 480 std::cerr << "ERROR: SongPartitioner::initialise: Unsupported step size for this sample rate: "
max@1 481 << stepSize << " (wanted " << (getPreferredStepSize()) << ")" << std::endl;
max@1 482 return false;
max@1 483 }
max@1 484
max@1 485 if (blockSize != getPreferredBlockSize()) {
max@1 486 std::cerr << "WARNING: SongPartitioner::initialise: Sub-optimal block size for this sample rate: "
max@1 487 << blockSize << " (wanted " << getPreferredBlockSize() << ")" << std::endl;
max@1 488 }
max@1 489
max@1 490 // Beat tracker and Chroma extractor has two different configuration parameters
max@1 491
max@1 492 // Configuration Parameters for Beat Tracker
max@1 493 DFConfig dfConfig;
max@1 494 dfConfig.DFType = DF_COMPLEXSD;
max@1 495 dfConfig.stepSize = stepSize;
max@1 496 dfConfig.frameLength = blockSize / m_chromaFramesizeFactor;
max@1 497 dfConfig.dbRise = 3;
max@1 498 dfConfig.adaptiveWhitening = false;
max@1 499 dfConfig.whiteningRelaxCoeff = -1;
max@1 500 dfConfig.whiteningFloor = -1;
max@1 501
max@1 502 // Initialise Beat Tracker
max@1 503 m_d = new BeatTrackerData(m_inputSampleRate, dfConfig);
max@1 504 m_d->downBeat->setBeatsPerBar(m_bpb);
max@1 505
max@1 506 // Initialise Chroma Extractor
max@1 507 m_chromadata = new ChromaData(m_inputSampleRate, blockSize);
max@1 508 m_chromadata->initialise();
max@1 509
max@1 510 return true;
max@1 511 }
max@1 512
max@1 513 void SongPartitioner::reset()
max@1 514 {
max@1 515 if (m_d) m_d->reset();
max@1 516 m_pluginFrameCount = 0;
max@1 517 }
max@1 518
max@1 519 SongPartitioner::OutputList SongPartitioner::getOutputDescriptors() const
max@1 520 {
max@1 521 OutputList list;
max@1 522 size_t outputCounter = 0;
max@1 523
max@1 524 OutputDescriptor beat;
max@1 525 beat.identifier = "beats";
max@1 526 beat.name = "Beats";
max@1 527 beat.description = "Beat locations labelled with metrical position";
max@1 528 beat.unit = "";
max@1 529 beat.hasFixedBinCount = true;
max@1 530 beat.binCount = 0;
max@1 531 beat.sampleType = OutputDescriptor::VariableSampleRate;
max@1 532 beat.sampleRate = 1.0 / m_stepSecs;
max@1 533 m_beatOutputNumber = outputCounter++;
max@1 534
max@1 535 OutputDescriptor bars;
max@1 536 bars.identifier = "bars";
max@1 537 bars.name = "Bars";
max@1 538 bars.description = "Bar locations";
max@1 539 bars.unit = "";
max@1 540 bars.hasFixedBinCount = true;
max@1 541 bars.binCount = 0;
max@1 542 bars.sampleType = OutputDescriptor::VariableSampleRate;
max@1 543 bars.sampleRate = 1.0 / m_stepSecs;
max@1 544 m_barsOutputNumber = outputCounter++;
max@1 545
max@1 546 OutputDescriptor beatcounts;
max@1 547 beatcounts.identifier = "beatcounts";
max@1 548 beatcounts.name = "Beat Count";
max@1 549 beatcounts.description = "Beat counter function";
max@1 550 beatcounts.unit = "";
max@1 551 beatcounts.hasFixedBinCount = true;
max@1 552 beatcounts.binCount = 1;
max@1 553 beatcounts.sampleType = OutputDescriptor::VariableSampleRate;
max@1 554 beatcounts.sampleRate = 1.0 / m_stepSecs;
max@1 555 m_beatcountsOutputNumber = outputCounter++;
max@1 556
max@1 557 OutputDescriptor beatsd;
max@1 558 beatsd.identifier = "beatsd";
max@1 559 beatsd.name = "Beat Spectral Difference";
max@1 560 beatsd.description = "Beat spectral difference function used for bar-line detection";
max@1 561 beatsd.unit = "";
max@1 562 beatsd.hasFixedBinCount = true;
max@1 563 beatsd.binCount = 1;
max@1 564 beatsd.sampleType = OutputDescriptor::VariableSampleRate;
max@1 565 beatsd.sampleRate = 1.0 / m_stepSecs;
max@1 566 m_beatsdOutputNumber = outputCounter++;
max@1 567
max@1 568 OutputDescriptor logscalespec;
max@1 569 logscalespec.identifier = "logscalespec";
max@1 570 logscalespec.name = "Log-Frequency Spectrum";
max@1 571 logscalespec.description = "Spectrum with linear frequency on a log scale.";
max@1 572 logscalespec.unit = "";
max@1 573 logscalespec.hasFixedBinCount = true;
max@1 574 logscalespec.binCount = nNote;
max@1 575 logscalespec.hasKnownExtents = false;
max@1 576 logscalespec.isQuantized = false;
max@1 577 logscalespec.sampleType = OutputDescriptor::FixedSampleRate;
max@1 578 logscalespec.hasDuration = false;
max@1 579 logscalespec.sampleRate = m_inputSampleRate/2048;
max@1 580 m_logscalespecOutputNumber = outputCounter++;
max@1 581
max@1 582 OutputDescriptor bothchroma;
max@1 583 bothchroma.identifier = "bothchroma";
max@1 584 bothchroma.name = "Chromagram and Bass Chromagram";
max@1 585 bothchroma.description = "Tuning-adjusted chromagram and bass chromagram (stacked on top of each other) from NNLS approximate transcription.";
max@1 586 bothchroma.unit = "";
max@1 587 bothchroma.hasFixedBinCount = true;
max@1 588 bothchroma.binCount = 24;
max@1 589 bothchroma.hasKnownExtents = false;
max@1 590 bothchroma.isQuantized = false;
max@1 591 bothchroma.sampleType = OutputDescriptor::FixedSampleRate;
max@1 592 bothchroma.hasDuration = false;
max@1 593 bothchroma.sampleRate = m_inputSampleRate/2048;
max@1 594 m_bothchromaOutputNumber = outputCounter++;
max@1 595
max@1 596 OutputDescriptor qchromafw;
max@1 597 qchromafw.identifier = "qchromafw";
max@1 598 qchromafw.name = "Pseudo-Quantised Chromagram and Bass Chromagram";
max@1 599 qchromafw.description = "Pseudo-Quantised Chromagram and Bass Chromagram (frames between two beats are identical).";
max@1 600 qchromafw.unit = "";
max@1 601 qchromafw.hasFixedBinCount = true;
max@1 602 qchromafw.binCount = 24;
max@1 603 qchromafw.hasKnownExtents = false;
max@1 604 qchromafw.isQuantized = false;
max@1 605 qchromafw.sampleType = OutputDescriptor::FixedSampleRate;
max@1 606 qchromafw.hasDuration = false;
max@1 607 qchromafw.sampleRate = m_inputSampleRate/2048;
max@1 608 m_qchromafwOutputNumber = outputCounter++;
max@1 609
max@1 610 OutputDescriptor qchroma;
max@1 611 qchroma.identifier = "qchroma";
max@1 612 qchroma.name = "Quantised Chromagram and Bass Chromagram";
max@1 613 qchroma.description = "Quantised Chromagram and Bass Chromagram.";
max@1 614 qchroma.unit = "";
max@1 615 qchroma.hasFixedBinCount = true;
max@1 616 qchroma.binCount = 24;
max@1 617 qchroma.hasKnownExtents = false;
max@1 618 qchroma.isQuantized = false;
max@1 619 qchroma.sampleType = OutputDescriptor::FixedSampleRate;
max@1 620 qchroma.hasDuration = true;
Chris@17 621 qchroma.sampleRate = m_inputSampleRate/2048;
max@1 622 m_qchromaOutputNumber = outputCounter++;
max@1 623
max@1 624 OutputDescriptor segm;
Chris@15 625 segm.identifier = "segmentation";
max@1 626 segm.name = "Segmentation";
max@1 627 segm.description = "Segmentation";
max@1 628 segm.unit = "segment-type";
max@1 629 segm.hasFixedBinCount = true;
max@1 630 //segm.binCount = 24;
max@1 631 segm.binCount = 1;
max@1 632 segm.hasKnownExtents = true;
max@1 633 segm.minValue = 1;
max@1 634 segm.maxValue = 5;
max@1 635 segm.isQuantized = true;
max@1 636 segm.quantizeStep = 1;
max@1 637 segm.sampleType = OutputDescriptor::VariableSampleRate;
Chris@17 638 segm.sampleRate = 1.0 / m_stepSecs;
max@1 639 segm.hasDuration = true;
max@1 640 m_segmOutputNumber = outputCounter++;
max@1 641
max@1 642
max@1 643 /*
max@1 644 OutputList list;
max@1 645 OutputDescriptor segmentation;
max@1 646 segmentation.identifier = "segmentation";
max@1 647 segmentation.name = "Segmentation";
max@1 648 segmentation.description = "Segmentation";
max@1 649 segmentation.unit = "segment-type";
max@1 650 segmentation.hasFixedBinCount = true;
max@1 651 segmentation.binCount = 1;
max@1 652 segmentation.hasKnownExtents = true;
max@1 653 segmentation.minValue = 1;
max@1 654 segmentation.maxValue = nSegmentTypes;
max@1 655 segmentation.isQuantized = true;
max@1 656 segmentation.quantizeStep = 1;
max@1 657 segmentation.sampleType = OutputDescriptor::VariableSampleRate;
max@1 658 segmentation.sampleRate = m_inputSampleRate / getPreferredStepSize();
max@1 659 list.push_back(segmentation);
max@1 660 return list;
max@1 661 */
max@1 662
max@1 663
max@1 664 list.push_back(beat);
max@1 665 list.push_back(bars);
max@1 666 list.push_back(beatcounts);
max@1 667 list.push_back(beatsd);
max@1 668 list.push_back(logscalespec);
max@1 669 list.push_back(bothchroma);
max@1 670 list.push_back(qchromafw);
max@1 671 list.push_back(qchroma);
max@1 672 list.push_back(segm);
max@1 673
max@1 674 return list;
max@1 675 }
max@1 676
max@1 677 // Executed for each frame - called from the host
max@1 678
max@1 679 // We use time domain input, because DownBeat requires it -- so we
max@1 680 // use the time-domain version of DetectionFunction::process which
max@1 681 // does its own FFT. It requires doubles as input, so we need to
max@1 682 // make a temporary copy
max@1 683
max@1 684 // We only support a single input channel
max@1 685 SongPartitioner::FeatureSet SongPartitioner::process(const float *const *inputBuffers,Vamp::RealTime timestamp)
max@1 686 {
max@1 687 if (!m_d) {
Chris@22 688 cerr << "ERROR: SongPartitioner::process: "
Chris@22 689 << "SongPartitioner has not been initialised"
Chris@22 690 << endl;
Chris@22 691 return FeatureSet();
max@1 692 }
max@1 693
max@1 694 const int fl = m_d->dfConfig.frameLength;
max@1 695 #ifndef __GNUC__
max@1 696 double *dfinput = (double *)alloca(fl * sizeof(double));
max@1 697 #else
max@1 698 double dfinput[fl];
max@1 699 #endif
max@1 700 int sampleOffset = ((m_chromaFramesizeFactor-1) * fl) / 2;
max@1 701
max@1 702 // Since chroma needs a much longer frame size, we only ever use the very
max@1 703 // beginning of the frame for beat tracking.
max@1 704 for (int i = 0; i < fl; ++i) dfinput[i] = inputBuffers[0][i];
max@1 705 double output = m_d->df->process(dfinput);
max@1 706
max@1 707 if (m_d->dfOutput.empty()) m_d->origin = timestamp;
max@1 708
max@1 709 // std::cerr << "df[" << m_d->dfOutput.size() << "] is " << output << std::endl;
max@1 710 m_d->dfOutput.push_back(output);
max@1 711
max@1 712 // Downsample and store the incoming audio block.
max@1 713 // We have an overlap on the incoming audio stream (step size is
max@1 714 // half block size) -- this function is configured to take only a
max@1 715 // step size's worth, so effectively ignoring the overlap. Note
max@1 716 // however that this means we omit the last blocksize - stepsize
max@1 717 // samples completely for the purposes of barline detection
max@1 718 // (hopefully not a problem)
max@1 719 m_d->downBeat->pushAudioBlock(inputBuffers[0]);
max@1 720
max@1 721 // The following is not done every time, but only every m_chromaFramesizeFactor times,
max@1 722 // because the chroma does not need dense time frames.
max@1 723
max@1 724 if (m_pluginFrameCount % m_chromaStepsizeFactor == 0)
max@1 725 {
max@1 726
max@1 727 // Window the full time domain, data, FFT it and process chroma stuff.
max@1 728
max@1 729 #ifndef __GNUC__
max@1 730 float *windowedBuffers = (float *)alloca(m_chromadata->blockSize * sizeof(float));
max@1 731 #else
max@1 732 float windowedBuffers[m_chromadata->blockSize];
max@1 733 #endif
max@1 734 m_chromadata->window.cut(&inputBuffers[0][0], &windowedBuffers[0]);
max@1 735
max@1 736 // adjust timestamp (we want the middle of the frame)
max@1 737 timestamp = timestamp + Vamp::RealTime::frame2RealTime(sampleOffset, lrintf(m_inputSampleRate));
max@1 738
max@1 739 m_chromadata->baseProcess(&windowedBuffers[0], timestamp);
max@1 740
max@1 741 }
max@1 742 m_pluginFrameCount++;
max@1 743
max@1 744 FeatureSet fs;
max@1 745 fs[m_logscalespecOutputNumber].push_back(
max@1 746 m_chromadata->logSpectrum.back());
max@1 747 return fs;
max@1 748 }
max@1 749
max@1 750 SongPartitioner::FeatureSet SongPartitioner::getRemainingFeatures()
max@1 751 {
max@1 752 if (!m_d) {
Chris@22 753 cerr << "ERROR: SongPartitioner::getRemainingFeatures: "
Chris@22 754 << "SongPartitioner has not been initialised"
Chris@22 755 << endl;
Chris@22 756 return FeatureSet();
max@1 757 }
max@1 758
Chris@16 759 FeatureSet masterFeatureset = beatTrack();
Chris@16 760 FeatureList chromaList = chromaFeatures();
max@1 761
max@1 762 for (size_t i = 0; i < chromaList.size(); ++i)
max@1 763 {
max@1 764 masterFeatureset[m_bothchromaOutputNumber].push_back(chromaList[i]);
max@1 765 }
max@1 766
max@1 767 // quantised and pseudo-quantised (beat-wise) chroma
Chris@16 768 std::vector<FeatureList> quantisedChroma = beatQuantiser(chromaList, masterFeatureset[m_beatOutputNumber]);
max@1 769
max@1 770 masterFeatureset[m_qchromafwOutputNumber] = quantisedChroma[0];
max@1 771 masterFeatureset[m_qchromaOutputNumber] = quantisedChroma[1];
max@1 772
max@1 773 // Segmentation
Chris@14 774 masterFeatureset[m_segmOutputNumber] = runSegmenter(quantisedChroma[1]);
max@1 775
max@1 776 return(masterFeatureset);
max@1 777 }
max@1 778
max@1 779 /* ------ Beat Tracker ------ */
max@1 780
Chris@16 781 SongPartitioner::FeatureSet SongPartitioner::beatTrack()
max@1 782 {
max@1 783 vector<double> df;
max@1 784 vector<double> beatPeriod;
max@1 785 vector<double> tempi;
max@1 786
max@1 787 for (size_t i = 2; i < m_d->dfOutput.size(); ++i) { // discard first two elts
max@1 788 df.push_back(m_d->dfOutput[i]);
max@1 789 beatPeriod.push_back(0.0);
max@1 790 }
max@1 791 if (df.empty()) return FeatureSet();
max@1 792
max@1 793 TempoTrackV2 tt(m_inputSampleRate, m_d->dfConfig.stepSize);
max@1 794 tt.calculateBeatPeriod(df, beatPeriod, tempi);
max@1 795
max@1 796 vector<double> beats;
max@1 797 tt.calculateBeats(df, beatPeriod, beats);
max@1 798
max@1 799 vector<int> downbeats;
max@1 800 size_t downLength = 0;
max@1 801 const float *downsampled = m_d->downBeat->getBufferedAudio(downLength);
max@1 802 m_d->downBeat->findDownBeats(downsampled, downLength, beats, downbeats);
max@1 803
max@1 804 vector<double> beatsd;
max@1 805 m_d->downBeat->getBeatSD(beatsd);
max@1 806
max@1 807 /*std::cout << "BeatTracker: found downbeats at: ";
max@1 808 for (int i = 0; i < downbeats.size(); ++i) std::cout << downbeats[i] << " " << std::endl;*/
max@1 809
max@1 810 FeatureSet returnFeatures;
max@1 811
max@1 812 char label[20];
max@1 813
max@1 814 int dbi = 0;
max@1 815 int beat = 0;
max@1 816 int bar = 0;
max@1 817
max@1 818 if (!downbeats.empty()) {
max@1 819 // get the right number for the first beat; this will be
max@1 820 // incremented before use (at top of the following loop)
max@1 821 int firstDown = downbeats[0];
max@1 822 beat = m_bpb - firstDown - 1;
max@1 823 if (beat == m_bpb) beat = 0;
max@1 824 }
max@1 825
max@1 826 for (size_t i = 0; i < beats.size(); ++i) {
max@1 827
max@1 828 size_t frame = beats[i] * m_d->dfConfig.stepSize;
max@1 829
max@1 830 if (dbi < downbeats.size() && i == downbeats[dbi]) {
max@1 831 beat = 0;
max@1 832 ++bar;
max@1 833 ++dbi;
max@1 834 } else {
max@1 835 ++beat;
max@1 836 }
max@1 837
max@1 838 /* Ooutput Section */
max@1 839
max@1 840 // outputs are:
max@1 841 //
max@1 842 // 0 -> beats
max@1 843 // 1 -> bars
max@1 844 // 2 -> beat counter function
max@1 845
max@1 846 Feature feature;
max@1 847 feature.hasTimestamp = true;
max@1 848 feature.timestamp = m_d->origin + Vamp::RealTime::frame2RealTime (frame, lrintf(m_inputSampleRate));
max@1 849
max@1 850 sprintf(label, "%d", beat + 1);
max@1 851 feature.label = label;
max@1 852 returnFeatures[m_beatOutputNumber].push_back(feature); // labelled beats
max@1 853
max@1 854 feature.values.push_back(beat + 1);
max@1 855 returnFeatures[m_beatcountsOutputNumber].push_back(feature); // beat function
max@1 856
max@1 857 if (i > 0 && i <= beatsd.size()) {
max@1 858 feature.values.clear();
max@1 859 feature.values.push_back(beatsd[i-1]);
max@1 860 feature.label = "";
max@1 861 returnFeatures[m_beatsdOutputNumber].push_back(feature); // beat spectral difference
max@1 862 }
max@1 863
max@1 864 if (beat == 0) {
max@1 865 feature.values.clear();
max@1 866 sprintf(label, "%d", bar);
max@1 867 feature.label = label;
max@1 868 returnFeatures[m_barsOutputNumber].push_back(feature); // bars
max@1 869 }
max@1 870 }
max@1 871
max@1 872 return returnFeatures;
max@1 873 }
max@1 874
max@1 875
max@1 876 /* ------ Chroma Extractor ------ */
max@1 877
Chris@16 878 SongPartitioner::FeatureList SongPartitioner::chromaFeatures()
max@1 879 {
max@1 880
max@1 881 FeatureList returnFeatureList;
max@1 882 FeatureList tunedlogfreqspec;
max@1 883
max@1 884 if (m_chromadata->logSpectrum.size() == 0) return returnFeatureList;
max@1 885
max@1 886 /** Calculate Tuning
max@1 887 calculate tuning from (using the angle of the complex number defined by the
max@1 888 cumulative mean real and imag values)
max@1 889 **/
max@1 890 float meanTuningImag = 0;
max@1 891 float meanTuningReal = 0;
max@1 892 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
max@1 893 meanTuningReal += m_chromadata->meanTunings[iBPS] * m_chromadata->cosvalues[iBPS];
max@1 894 meanTuningImag += m_chromadata->meanTunings[iBPS] * m_chromadata->sinvalues[iBPS];
max@1 895 }
max@1 896 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
max@1 897 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
max@1 898 int intShift = floor(normalisedtuning * 3);
max@1 899 float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this
max@1 900
max@1 901 char buffer0 [50];
max@1 902
max@1 903 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
max@1 904
max@1 905 /** Tune Log-Frequency Spectrogram
max@1 906 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
max@1 907 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
max@1 908 **/
max@1 909 cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... ";
max@1 910
max@1 911 float tempValue = 0;
max@1 912
max@1 913 int count = 0;
max@1 914
max@1 915 for (FeatureList::iterator i = m_chromadata->logSpectrum.begin(); i != m_chromadata->logSpectrum.end(); ++i)
max@1 916 {
max@1 917
max@1 918 Feature f1 = *i;
max@1 919 Feature f2; // tuned log-frequency spectrum
max@1 920
max@1 921 f2.hasTimestamp = true;
max@1 922 f2.timestamp = f1.timestamp;
max@1 923
max@1 924 f2.values.push_back(0.0);
max@1 925 f2.values.push_back(0.0); // set lower edge to zero
max@1 926
max@1 927 if (m_chromadata->tuneLocal) {
max@1 928 intShift = floor(m_chromadata->localTuning[count] * 3);
max@1 929 floatShift = m_chromadata->localTuning[count] * 3 - intShift;
max@1 930 // floatShift is a really bad name for this
max@1 931 }
max@1 932
max@1 933 for (int k = 2; k < (int)f1.values.size() - 3; ++k)
max@1 934 { // interpolate all inner bins
max@1 935 tempValue = f1.values[k + intShift] * (1-floatShift) + f1.values[k+intShift+1] * floatShift;
max@1 936 f2.values.push_back(tempValue);
max@1 937 }
max@1 938
max@1 939 f2.values.push_back(0.0);
max@1 940 f2.values.push_back(0.0);
max@1 941 f2.values.push_back(0.0); // upper edge
max@1 942
max@1 943 vector<float> runningmean = SpecialConvolution(f2.values,m_chromadata->hw);
max@1 944 vector<float> runningstd;
max@1 945 for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance)
max@1 946 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
max@1 947 }
max@1 948 runningstd = SpecialConvolution(runningstd,m_chromadata->hw); // second step convolve
max@1 949 for (int i = 0; i < nNote; i++)
max@1 950 {
max@1 951
max@1 952 runningstd[i] = sqrt(runningstd[i]);
max@1 953 // square root to finally have running std
max@1 954
max@1 955 if (runningstd[i] > 0)
max@1 956 {
max@1 957 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
max@1 958 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_chromadata->whitening) : 0;
max@1 959 }
max@1 960
max@1 961 if (f2.values[i] < 0) {
max@1 962
max@1 963 cerr << "ERROR: negative value in logfreq spectrum" << endl;
max@1 964
max@1 965 }
max@1 966 }
max@1 967 tunedlogfreqspec.push_back(f2);
max@1 968 count++;
max@1 969 }
max@1 970 cerr << "done." << endl;
max@1 971 /** Semitone spectrum and chromagrams
max@1 972 Semitone-spaced log-frequency spectrum derived
max@1 973 from the tuned log-freq spectrum above. the spectrum
max@1 974 is inferred using a non-negative least squares algorithm.
max@1 975 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
max@1 976 bass and treble stacked onto each other).
max@1 977 **/
max@1 978 if (m_chromadata->useNNLS == 0) {
max@1 979 cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... ";
max@1 980 } else {
max@1 981 cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... ";
max@1 982 }
max@1 983
max@1 984 vector<float> oldchroma = vector<float>(12,0);
max@1 985 vector<float> oldbasschroma = vector<float>(12,0);
max@1 986 count = 0;
max@1 987
max@1 988 for (FeatureList::iterator it = tunedlogfreqspec.begin(); it != tunedlogfreqspec.end(); ++it) {
max@1 989 Feature logfreqsp = *it; // logfreq spectrum
max@1 990 Feature bothchroma; // treble and bass chromagram
max@1 991
max@1 992 bothchroma.hasTimestamp = true;
max@1 993 bothchroma.timestamp = logfreqsp.timestamp;
max@1 994
max@1 995 float b[nNote];
max@1 996
max@1 997 bool some_b_greater_zero = false;
max@1 998 float sumb = 0;
max@1 999 for (int i = 0; i < nNote; i++) {
max@1 1000 b[i] = logfreqsp.values[i];
max@1 1001 sumb += b[i];
max@1 1002 if (b[i] > 0) {
max@1 1003 some_b_greater_zero = true;
max@1 1004 }
max@1 1005 }
max@1 1006
max@1 1007 // here's where the non-negative least squares algorithm calculates the note activation x
max@1 1008
max@1 1009 vector<float> chroma = vector<float>(12, 0);
max@1 1010 vector<float> basschroma = vector<float>(12, 0);
max@1 1011 float currval;
max@1 1012 int iSemitone = 0;
max@1 1013
max@1 1014 if (some_b_greater_zero) {
max@1 1015 if (m_chromadata->useNNLS == 0) {
max@1 1016 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
max@1 1017 currval = 0;
max@1 1018 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
max@1 1019 currval += b[iNote + iBPS] * (1-abs(iBPS*1.0/(nBPS/2+1)));
max@1 1020 }
max@1 1021 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
max@1 1022 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
max@1 1023 iSemitone++;
max@1 1024 }
max@1 1025
max@1 1026 } else {
max@1 1027 float x[84+1000];
max@1 1028 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
max@1 1029 vector<int> signifIndex;
max@1 1030 int index=0;
max@1 1031 sumb /= 84.0;
max@1 1032 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
max@1 1033 float currval = 0;
max@1 1034 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
max@1 1035 currval += b[iNote + iBPS];
max@1 1036 }
max@1 1037 if (currval > 0) signifIndex.push_back(index);
max@1 1038 index++;
max@1 1039 }
max@1 1040 float rnorm;
max@1 1041 float w[84+1000];
max@1 1042 float zz[84+1000];
max@1 1043 int indx[84+1000];
max@1 1044 int mode;
max@1 1045 int dictsize = nNote*signifIndex.size();
max@1 1046
max@1 1047 float *curr_dict = new float[dictsize];
max@1 1048 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
max@1 1049 for (int iBin = 0; iBin < nNote; iBin++) {
max@1 1050 curr_dict[iNote * nNote + iBin] =
max@1 1051 1.0 * m_chromadata->dict[signifIndex[iNote] * nNote + iBin];
max@1 1052 }
max@1 1053 }
max@1 1054 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
max@1 1055 delete [] curr_dict;
max@1 1056 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
max@1 1057 // cerr << mode << endl;
max@1 1058 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
max@1 1059 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
max@1 1060 }
max@1 1061 }
max@1 1062 }
max@1 1063
max@1 1064 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end());
max@1 1065 // just stack the both chromas
max@1 1066
max@1 1067 bothchroma.values = chroma;
max@1 1068 returnFeatureList.push_back(bothchroma);
max@1 1069 count++;
max@1 1070 }
max@1 1071 cerr << "done." << endl;
max@1 1072
max@1 1073 return returnFeatureList;
max@1 1074 }
max@1 1075
max@1 1076 /* ------ Beat Quantizer ------ */
max@1 1077
max@4 1078 std::vector<Vamp::Plugin::FeatureList>
Chris@16 1079 SongPartitioner::beatQuantiser(Vamp::Plugin::FeatureList chromagram, Vamp::Plugin::FeatureList beats)
max@1 1080 {
max@1 1081 std::vector<FeatureList> returnVector;
max@1 1082
max@1 1083 FeatureList fwQchromagram; // frame-wise beat-quantised chroma
max@1 1084 FeatureList bwQchromagram; // beat-wise beat-quantised chroma
max@1 1085
max@4 1086 int nChromaFrame = (int) chromagram.size();
max@4 1087 int nBeat = (int) beats.size();
max@1 1088
max@1 1089 if (nBeat == 0 && nChromaFrame == 0) return returnVector;
max@1 1090
max@1 1091 size_t nBin = chromagram[0].values.size();
max@1 1092
max@1 1093 vector<float> tempChroma = vector<float>(nBin);
max@1 1094
max@1 1095 Vamp::RealTime beatTimestamp = Vamp::RealTime::zeroTime;
max@1 1096 int currBeatCount = -1; // start before first beat
max@1 1097 int framesInBeat = 0;
max@1 1098
max@4 1099 for (int iChroma = 0; iChroma < nChromaFrame; ++iChroma)
max@1 1100 {
max@4 1101 Vamp::RealTime frameTimestamp = chromagram[iChroma].timestamp;
Chris@22 1102 Vamp::RealTime tempBeatTimestamp;
Chris@22 1103
Chris@22 1104 if (currBeatCount != beats.size()-1) tempBeatTimestamp = beats[currBeatCount+1].timestamp;
Chris@22 1105 else tempBeatTimestamp = chromagram[nChromaFrame-1].timestamp;
Chris@22 1106
max@4 1107 if (frameTimestamp > tempBeatTimestamp ||
max@1 1108 iChroma == nChromaFrame-1)
max@1 1109 {
max@1 1110 // new beat (or last chroma frame)
max@1 1111 // 1. finish all the old beat processing
Chris@22 1112 if (framesInBeat > 0)
Chris@22 1113 {
Chris@22 1114 for (int i = 0; i < nBin; ++i) tempChroma[i] /= framesInBeat; // average
Chris@22 1115 }
max@1 1116
max@1 1117 Feature bwQchromaFrame;
max@1 1118 bwQchromaFrame.hasTimestamp = true;
max@1 1119 bwQchromaFrame.timestamp = beatTimestamp;
max@1 1120 bwQchromaFrame.values = tempChroma;
max@1 1121 bwQchromaFrame.duration = beats[currBeatCount+1].timestamp - beats[currBeatCount].timestamp;
max@1 1122 bwQchromagram.push_back(bwQchromaFrame);
max@1 1123
max@1 1124 for (int iFrame = -framesInBeat; iFrame < 0; ++iFrame)
max@1 1125 {
max@1 1126 Feature fwQchromaFrame;
max@1 1127 fwQchromaFrame.hasTimestamp = true;
max@1 1128 fwQchromaFrame.timestamp = chromagram[iChroma+iFrame].timestamp;
max@1 1129 fwQchromaFrame.values = tempChroma; // all between two beats get the same
max@1 1130 fwQchromagram.push_back(fwQchromaFrame);
max@1 1131 }
max@1 1132
max@1 1133 // 2. increments / resets for current (new) beat
max@1 1134 currBeatCount++;
max@1 1135 beatTimestamp = beats[currBeatCount].timestamp;
max@1 1136 for (size_t i = 0; i < nBin; ++i) tempChroma[i] = 0; // average
max@1 1137 framesInBeat = 0;
max@1 1138 }
max@1 1139 framesInBeat++;
max@1 1140 for (size_t i = 0; i < nBin; ++i) tempChroma[i] += chromagram[iChroma].values[i];
max@1 1141 }
max@1 1142 returnVector.push_back(fwQchromagram);
max@1 1143 returnVector.push_back(bwQchromagram);
max@1 1144 }
max@1 1145
max@1 1146 /* -------------------------------- */
max@1 1147 /* ------ Support Functions ------ */
max@1 1148 /* -------------------------------- */
max@1 1149
max@1 1150 // one-dimesion median filter
max@1 1151 arma::vec medfilt1(arma::vec v, int medfilt_length)
max@1 1152 {
max@1 1153 int halfWin = medfilt_length/2;
max@1 1154
max@1 1155 // result vector
max@1 1156 arma::vec res = arma::zeros<arma::vec>(v.size());
max@1 1157
max@1 1158 // padding
max@1 1159 arma::vec padV = arma::zeros<arma::vec>(v.size()+medfilt_length-1);
max@1 1160
Chris@21 1161 for (int i=medfilt_length/2; i < medfilt_length/2+v.size(); ++ i)
max@1 1162 {
max@1 1163 padV(i) = v(i-medfilt_length/2);
max@1 1164 }
max@1 1165
max@1 1166 // Median filter
max@1 1167 arma::vec win = arma::zeros<arma::vec>(medfilt_length);
max@1 1168
Chris@21 1169 for (int i=0; i < v.size(); ++i)
max@1 1170 {
max@1 1171 win = padV.subvec(i,i+halfWin*2);
max@1 1172 win = sort(win);
max@1 1173 res(i) = win(halfWin);
max@1 1174 }
max@1 1175
max@1 1176 return res;
max@1 1177 }
max@1 1178
max@1 1179
max@1 1180 // Quantile
max@1 1181 double quantile(arma::vec v, double p)
max@1 1182 {
max@1 1183 arma::vec sortV = arma::sort(v);
max@1 1184 int n = sortV.size();
max@1 1185 arma::vec x = arma::zeros<vec>(n+2);
max@1 1186 arma::vec y = arma::zeros<vec>(n+2);
max@1 1187
max@1 1188 x(0) = 0;
max@1 1189 x(n+1) = 100;
max@1 1190
Chris@21 1191 for (int i=1; i<n+1; ++i)
max@1 1192 x(i) = 100*(0.5+(i-1))/n;
max@1 1193
max@1 1194 y(0) = sortV(0);
max@1 1195 y.subvec(1,n) = sortV;
max@1 1196 y(n+1) = sortV(n-1);
max@1 1197
max@1 1198 arma::uvec x2index = find(x>=p*100);
max@1 1199
max@1 1200 // Interpolation
max@1 1201 double x1 = x(x2index(0)-1);
max@1 1202 double x2 = x(x2index(0));
max@1 1203 double y1 = y(x2index(0)-1);
max@1 1204 double y2 = y(x2index(0));
max@1 1205
max@1 1206 double res = (y2-y1)/(x2-x1)*(p*100-x1)+y1;
max@1 1207
max@1 1208 return res;
max@1 1209 }
max@1 1210
max@1 1211 // Max Filtering
max@1 1212 arma::mat maxfilt1(arma::mat inmat, int len)
max@1 1213 {
max@1 1214 arma::mat outmat = inmat;
max@1 1215
max@1 1216 for (int i=0; i<inmat.n_rows; ++i)
max@1 1217 {
max@1 1218 if (arma::sum(inmat.row(i)) > 0)
max@1 1219 {
max@1 1220 // Take a window of rows
max@1 1221 int startWin;
max@1 1222 int endWin;
max@1 1223
max@1 1224 if (0 > i-len)
max@1 1225 startWin = 0;
max@1 1226 else
max@1 1227 startWin = i-len;
max@1 1228
max@1 1229 if (inmat.n_rows-1 < i+len-1)
max@1 1230 endWin = inmat.n_rows-1;
max@1 1231 else
max@1 1232 endWin = i+len-1;
max@1 1233
max@1 1234 outmat(i,span::all) = arma::max(inmat(span(startWin,endWin),span::all));
max@1 1235 }
max@1 1236 }
max@1 1237
max@1 1238 return outmat;
max@1 1239
max@1 1240 }
max@1 1241
max@1 1242 // Null Parts
max@1 1243 Part nullpart(vector<Part> parts, arma::vec barline)
max@1 1244 {
max@1 1245 arma::uvec nullindices = arma::ones<arma::uvec>(barline.size());
Chris@21 1246 for (int iPart=0; iPart<parts.size(); ++iPart)
max@1 1247 {
Chris@21 1248 //for (int iIndex=0; iIndex < parts[0].indices.size(); ++iIndex)
Chris@21 1249 for (int iIndex=0; iIndex < parts[iPart].indices.size(); ++iIndex)
Chris@21 1250 for (int i=0; i<parts[iPart].n; ++i)
max@1 1251 {
Chris@21 1252 int ind = parts[iPart].indices[iIndex]+i;
max@1 1253 nullindices(ind) = 0;
max@1 1254 }
max@1 1255 }
max@7 1256
max@1 1257 Part newPart;
max@1 1258 newPart.n = 1;
max@1 1259 uvec q = find(nullindices > 0);
max@1 1260
Chris@21 1261 for (int i=0; i<q.size();++i)
max@1 1262 newPart.indices.push_back(q(i));
max@7 1263
max@1 1264 newPart.letter = '-';
max@1 1265 newPart.value = 0;
max@1 1266 newPart.level = 0;
max@1 1267
max@1 1268 return newPart;
max@1 1269 }
max@1 1270
max@1 1271
max@1 1272 // Merge Nulls
max@1 1273 void mergenulls(vector<Part> &parts)
max@1 1274 {
Chris@21 1275 for (int iPart=0; iPart<parts.size(); ++iPart)
max@1 1276 {
max@1 1277
max@1 1278 vector<Part> newVectorPart;
max@1 1279
max@1 1280 if (parts[iPart].letter.compare("-")==0)
max@1 1281 {
max@1 1282 sort (parts[iPart].indices.begin(), parts[iPart].indices.end());
Chris@21 1283 int newpartind = -1;
max@1 1284
max@1 1285 vector<int> indices;
max@1 1286 indices.push_back(-2);
max@1 1287
Chris@21 1288 for (int iIndex=0; iIndex<parts[iPart].indices.size(); ++iIndex)
max@1 1289 indices.push_back(parts[iPart].indices[iIndex]);
max@1 1290
Chris@21 1291 for (int iInd=1; iInd < indices.size(); ++iInd)
max@1 1292 {
max@1 1293 if (indices[iInd] - indices[iInd-1] > 1)
max@1 1294 {
max@1 1295 newpartind++;
max@1 1296
max@1 1297 Part newPart;
max@1 1298 newPart.letter = 'n';
max@1 1299 std::stringstream out;
max@1 1300 out << newpartind+1;
max@1 1301 newPart.letter.append(out.str());
max@1 1302 newPart.value = 20+newpartind+1;
max@1 1303 newPart.n = 1;
max@1 1304 newPart.indices.push_back(indices[iInd]);
max@1 1305 newPart.level = 0;
max@1 1306
max@1 1307 newVectorPart.push_back(newPart);
max@1 1308 }
max@1 1309 else
max@1 1310 {
max@1 1311 newVectorPart[newpartind].n = newVectorPart[newpartind].n+1;
max@1 1312 }
max@1 1313 }
max@1 1314 parts.erase (parts.end());
max@1 1315
Chris@21 1316 for (int i=0; i<newVectorPart.size(); ++i)
max@1 1317 parts.push_back(newVectorPart[i]);
max@1 1318 }
max@1 1319 }
max@1 1320 }
max@1 1321
max@1 1322 /* ------ Segmentation ------ */
max@1 1323
Chris@19 1324 vector<Part> songSegment(Vamp::Plugin::FeatureList quantisedChromagram)
max@1 1325 {
max@1 1326
max@1 1327
max@1 1328 /* ------ Parameters ------ */
max@1 1329 double thresh_beat = 0.85;
max@1 1330 double thresh_seg = 0.80;
max@1 1331 int medfilt_length = 5;
max@1 1332 int minlength = 28;
max@1 1333 int maxlength = 128;
max@1 1334 double quantilePerc = 0.1;
max@1 1335 /* ------------------------ */
max@1 1336
max@1 1337
max@1 1338 // Collect Info
Chris@19 1339 int nBeat = quantisedChromagram.size(); // Number of feature vector
Chris@19 1340 int nFeatValues = quantisedChromagram[0].values.size(); // Number of values for each feature vector
max@1 1341
max@1 1342 arma::irowvec timeStamp = arma::zeros<arma::imat>(1,nBeat); // Vector of Time Stamps
max@1 1343
Chris@22 1344 // Save time stamp as a Vector
Chris@19 1345 if (quantisedChromagram[0].hasTimestamp)
max@1 1346 {
Chris@21 1347 for (int i = 0; i < nBeat; ++ i)
Chris@19 1348 timeStamp[i] = quantisedChromagram[i].timestamp.nsec;
max@1 1349 }
max@1 1350
max@1 1351
max@1 1352 // Build a ObservationTOFeatures Matrix
max@1 1353 arma::mat featVal = arma::zeros<mat>(nBeat,nFeatValues/2);
max@1 1354
Chris@21 1355 for (int i = 0; i < nBeat; ++ i)
Chris@21 1356 for (int j = 0; j < nFeatValues/2; ++ j)
max@1 1357 {
Chris@19 1358 featVal(i,j) = (quantisedChromagram[i].values[j]+quantisedChromagram[i].values[j+12]) * 0.8;
max@1 1359 }
max@1 1360
max@1 1361 // Set to arbitrary value to feature vectors with low std
max@1 1362 arma::mat a = stddev(featVal,1,1);
max@1 1363
max@1 1364 // Feature Colleration Matrix
max@1 1365 arma::mat simmat0 = 1-arma::cor(arma::trans(featVal));
max@1 1366
max@1 1367
Chris@21 1368 for (int i = 0; i < nBeat; ++ i)
max@1 1369 {
max@1 1370 if (a(i)<0.000001)
max@1 1371 {
max@1 1372 featVal(i,1) = 1000; // arbitrary
max@1 1373
Chris@21 1374 for (int j = 0; j < nFeatValues/2; ++j)
max@1 1375 {
max@1 1376 simmat0(i,j) = 1;
max@1 1377 simmat0(j,i) = 1;
max@1 1378 }
max@1 1379 }
max@1 1380 }
max@1 1381
max@1 1382 arma::mat simmat = 1-simmat0/2;
max@1 1383
max@1 1384 // -------- To delate when the proble with the add of beat will be solved -------
Chris@21 1385 for (int i = 0; i < nBeat; ++ i)
Chris@21 1386 for (int j = 0; j < nBeat; ++ j)
max@1 1387 if (!std::isfinite(simmat(i,j)))
max@1 1388 simmat(i,j)=0;
max@1 1389 // ------------------------------------------------------------------------------
max@1 1390
max@1 1391 // Median Filtering applied to the Correlation Matrix
max@1 1392 // The median filter is for each diagonal of the Matrix
max@1 1393 arma::mat median_simmat = arma::zeros<arma::mat>(nBeat,nBeat);
max@1 1394
Chris@21 1395 for (int i = 0; i < nBeat; ++ i)
max@1 1396 {
max@1 1397 arma::vec temp = medfilt1(simmat.diag(i),medfilt_length);
max@1 1398 median_simmat.diag(i) = temp;
max@1 1399 median_simmat.diag(-i) = temp;
max@1 1400 }
max@1 1401
Chris@21 1402 for (int i = 0; i < nBeat; ++ i)
Chris@21 1403 for (int j = 0; j < nBeat; ++ j)
max@1 1404 if (!std::isfinite(median_simmat(i,j)))
max@1 1405 median_simmat(i,j) = 0;
max@1 1406
max@1 1407 // -------------- NOT CONVERTED -------------------------------------
max@1 1408 // if param.seg.standardise
max@1 1409 // med_median_simmat = repmat(median(median_simmat),nBeat,1);
max@1 1410 // std_median_simmat = repmat(std(median_simmat),nBeat,1);
max@1 1411 // median_simmat = (median_simmat - med_median_simmat) ./ std_median_simmat;
max@1 1412 // end
max@1 1413 // --------------------------------------------------------
max@1 1414
max@1 1415 // Retrieve Bar Bounderies
max@1 1416 arma::uvec dup = find(median_simmat > thresh_beat);
max@1 1417 arma::mat potential_duplicates = arma::zeros<arma::mat>(nBeat,nBeat);
max@1 1418 potential_duplicates.elem(dup) = arma::ones<arma::vec>(dup.size());
max@1 1419 potential_duplicates = trimatu(potential_duplicates);
max@1 1420
Chris@21 1421 int nPartlengths = round((maxlength-minlength)/4)+1;
max@1 1422 arma::vec partlengths = zeros<arma::vec>(nPartlengths);
max@1 1423
Chris@21 1424 for (int i = 0; i < nPartlengths; ++ i)
max@1 1425 partlengths(i) = (i*4)+ minlength;
max@1 1426
max@1 1427 // initialise arrays
max@1 1428 arma::cube simArray = zeros<arma::cube>(nBeat,nBeat,nPartlengths);
max@1 1429 arma::cube decisionArray2 = zeros<arma::cube>(nBeat,nBeat,nPartlengths);
max@1 1430
max@1 1431 int conta = 0;
max@1 1432
Chris@21 1433 //for (int iLength = 0; iLength < nPartlengths; ++ iLength)
Chris@21 1434 for (int iLength = 0; iLength < 20; ++ iLength)
max@1 1435 {
Chris@21 1436 int len = partlengths(iLength);
Chris@21 1437 int nUsedBeat = nBeat - len + 1; // number of potential rep beginnings: they can't overlap at the end of the song
max@1 1438
Chris@21 1439 for (int iBeat = 0; iBeat < nUsedBeat; ++ iBeat) // looping over all columns (arbitrarily chosen columns)
max@1 1440 {
max@1 1441 arma::uvec help2 = find(potential_duplicates(span(0,nUsedBeat-1),iBeat)==1);
max@1 1442
Chris@21 1443 for (int i=0; i<help2.size(); ++i)
max@1 1444 {
max@1 1445
max@1 1446 // measure how well two length len segments go together
max@1 1447 int kBeat = help2(i);
max@1 1448 arma::vec distrib = median_simmat(span(iBeat,iBeat+len-1),span(kBeat,kBeat+len-1)).diag(0);
max@1 1449 simArray(iBeat,kBeat,iLength) = quantile(distrib,quantilePerc);
max@1 1450 }
max@1 1451 }
max@1 1452
max@1 1453 arma::mat tempM = simArray(span(0,nUsedBeat-1),span(0,nUsedBeat-1),span(iLength,iLength));
max@1 1454 simArray.slice(iLength)(span(0,nUsedBeat-1),span(0,nUsedBeat-1)) = tempM + arma::trans(tempM) - (eye<mat>(nUsedBeat,nUsedBeat)%tempM);
max@1 1455
max@1 1456 // convolution
max@1 1457 arma::vec K = arma::zeros<vec>(3);
max@1 1458 K << 0.01 << 0.98 << 0.01;
max@1 1459
max@1 1460
Chris@21 1461 for (int i=0; i<simArray.n_rows; ++i)
max@1 1462 {
max@1 1463 arma::rowvec t = arma::conv((arma::rowvec)simArray.slice(iLength).row(i),K);
max@1 1464 simArray.slice(iLength)(i,span::all) = t.subvec(1,t.size()-2);
max@1 1465 }
max@1 1466
max@1 1467 // take only over-average bars that do not overlap
max@1 1468
max@1 1469 arma::mat temp = arma::zeros<mat>(simArray.n_rows, simArray.n_cols);
max@1 1470 temp(span::all, span(0,nUsedBeat-1)) = simArray.slice(iLength)(span::all,span(0,nUsedBeat-1));
max@1 1471
Chris@21 1472 for (int i=0; i<temp.n_rows; ++i)
Chris@21 1473 for (int j=0; j<nUsedBeat; ++j)
max@1 1474 if (temp(i,j) < thresh_seg)
max@1 1475 temp(i,j) = 0;
max@1 1476
max@1 1477 decisionArray2.slice(iLength) = temp;
max@1 1478
max@1 1479 arma::mat maxMat = maxfilt1(decisionArray2.slice(iLength),len-1);
max@1 1480
Chris@21 1481 for (int i=0; i<decisionArray2.n_rows; ++i)
Chris@21 1482 for (int j=0; j<decisionArray2.n_cols; ++j)
max@1 1483 if (decisionArray2.slice(iLength)(i,j) < maxMat(i,j))
max@1 1484 decisionArray2.slice(iLength)(i,j) = 0;
max@1 1485
max@1 1486 decisionArray2.slice(iLength) = decisionArray2.slice(iLength) % arma::trans(decisionArray2.slice(iLength));
max@1 1487
Chris@21 1488 for (int i=0; i<simArray.n_rows; ++i)
Chris@21 1489 for (int j=0; j<simArray.n_cols; ++j)
max@1 1490 if (simArray.slice(iLength)(i,j) < thresh_seg)
max@1 1491 potential_duplicates(i,j) = 0;
max@1 1492 }
max@1 1493
max@1 1494 // Milk the data
max@1 1495
max@1 1496 arma::mat bestval;
max@1 1497
Chris@21 1498 for (int iLength=0; iLength<nPartlengths; ++iLength)
max@1 1499 {
max@1 1500 arma::mat temp = arma::zeros<arma::mat>(decisionArray2.n_rows,decisionArray2.n_cols);
max@1 1501
Chris@21 1502 for (int rows=0; rows<decisionArray2.n_rows; ++rows)
Chris@21 1503 for (int cols=0; cols<decisionArray2.n_cols; ++cols)
max@1 1504 if (decisionArray2.slice(iLength)(rows,cols) > 0)
max@1 1505 temp(rows,cols) = 1;
max@1 1506
max@1 1507 arma::vec currLogicSum = arma::sum(temp,1);
max@1 1508
Chris@21 1509 for (int iBeat=0; iBeat<nBeat; ++iBeat)
max@1 1510 if (currLogicSum(iBeat) > 1)
max@1 1511 {
max@1 1512 arma::vec t = decisionArray2.slice(iLength)(span::all,iBeat);
max@1 1513 double currSum = sum(t);
max@1 1514
Chris@21 1515 int count = 0;
Chris@21 1516 for (int i=0; i<t.size(); ++i)
max@1 1517 if (t(i)>0)
max@1 1518 count++;
max@1 1519
max@1 1520 currSum = (currSum/count)/2;
max@1 1521
max@1 1522 arma::rowvec t1;
max@1 1523 t1 << (currLogicSum(iBeat)-1) * partlengths(iLength) << currSum << iLength << iBeat << currLogicSum(iBeat);
max@1 1524
max@1 1525 bestval = join_cols(bestval,t1);
max@1 1526 }
max@1 1527 }
max@1 1528
max@1 1529 // Definition of the resulting vector
max@1 1530 vector<Part> parts;
max@1 1531
max@1 1532 // make a table of all valid sets of parts
max@1 1533
max@1 1534 char partletters[] = {'A','B','C','D','E','F','G', 'H','I','J','K','L','M','N','O','P','Q','R','S'};
Chris@21 1535 int partvalues[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19};
max@1 1536 arma::vec valid_sets = arma::ones<arma::vec>(bestval.n_rows);
max@1 1537
max@1 1538 if (!bestval.is_empty())
max@1 1539 {
max@1 1540
max@1 1541 // In questo punto viene introdotto un errore alla 3 cifra decimale
max@1 1542
max@1 1543 arma::colvec t = arma::zeros<arma::colvec>(bestval.n_rows);
Chris@21 1544 for (int i=0; i<bestval.n_rows; ++i)
max@1 1545 {
max@1 1546 t(i) = bestval(i,1)*2;
max@1 1547 }
max@1 1548
max@1 1549 double m = t.max();
max@1 1550
max@1 1551 bestval(span::all,1) = bestval(span::all,1) / m;
max@1 1552 bestval(span::all,0) = bestval(span::all,0) + bestval(span::all,1);
max@1 1553
max@1 1554 arma::mat bestval2;
Chris@21 1555 for (int i=0; i<bestval.n_cols; ++i)
max@1 1556 if (i!=1)
max@1 1557 bestval2 = join_rows(bestval2,bestval.col(i));
max@1 1558
Chris@21 1559 for (int kSeg=0; kSeg<6; ++kSeg)
max@1 1560 {
max@1 1561 arma::mat currbestvals = arma::zeros<arma::mat>(bestval2.n_rows, bestval2.n_cols);
Chris@21 1562 for (int i=0; i<bestval2.n_rows; ++i)
Chris@21 1563 for (int j=0; j<bestval2.n_cols; ++j)
max@1 1564 if (valid_sets(i))
max@1 1565 currbestvals(i,j) = bestval2(i,j);
max@1 1566
max@1 1567 arma::vec t1 = currbestvals.col(0);
max@1 1568 double ma;
max@1 1569 uword maIdx;
max@1 1570 ma = t1.max(maIdx);
max@6 1571
max@6 1572 if ((maIdx == 0)&&(ma == 0))
max@6 1573 break;
max@1 1574
max@1 1575 double bestLength = partlengths(currbestvals(maIdx,1));
max@1 1576 arma::rowvec bestIndices = decisionArray2.slice(currbestvals(maIdx,1))(currbestvals(maIdx,2),span::all);
max@1 1577
max@1 1578 arma::rowvec bestIndicesMap = arma::zeros<arma::rowvec>(bestIndices.size());
Chris@21 1579 for (int i=0; i<bestIndices.size(); ++i)
max@1 1580 if (bestIndices(i)>0)
max@1 1581 bestIndicesMap(i) = 1;
max@1 1582
max@1 1583 arma::rowvec mask = arma::zeros<arma::rowvec>(bestLength*2-1);
Chris@21 1584 for (int i=0; i<bestLength; ++i)
max@1 1585 mask(i+bestLength-1) = 1;
max@1 1586
max@1 1587 arma::rowvec t2 = arma::conv(bestIndicesMap,mask);
max@1 1588 arma::rowvec island = t2.subvec(mask.size()/2,t2.size()-1-mask.size()/2);
max@1 1589
max@1 1590 // Save results in the structure
max@1 1591 Part newPart;
max@1 1592 newPart.n = bestLength;
max@1 1593 uvec q1 = find(bestIndices > 0);
max@1 1594
Chris@21 1595 for (int i=0; i<q1.size();++i)
max@1 1596 newPart.indices.push_back(q1(i));
max@1 1597
max@1 1598 newPart.letter = partletters[kSeg];
max@1 1599 newPart.value = partvalues[kSeg];
max@1 1600 newPart.level = kSeg+1;
max@1 1601 parts.push_back(newPart);
max@1 1602
max@1 1603 uvec q2 = find(valid_sets==1);
max@1 1604
Chris@21 1605 for (int i=0; i<q2.size(); ++i)
max@1 1606 {
Chris@21 1607 int iSet = q2(i);
Chris@21 1608 int s = partlengths(bestval2(iSet,1));
max@1 1609
max@1 1610 arma::rowvec mask1 = arma::zeros<arma::rowvec>(s*2-1);
Chris@21 1611 for (int i=0; i<s; ++i)
max@1 1612 mask1(i+s-1) = 1;
max@1 1613
max@1 1614 arma::rowvec Ind = decisionArray2.slice(bestval2(iSet,1))(bestval2(iSet,2),span::all);
max@1 1615 arma::rowvec IndMap = arma::zeros<arma::rowvec>(Ind.size());
Chris@21 1616 for (int i=0; i<Ind.size(); ++i)
max@1 1617 if (Ind(i)>0)
max@1 1618 IndMap(i) = 2;
max@1 1619
max@1 1620 arma::rowvec t3 = arma::conv(IndMap,mask1);
max@6 1621 arma::rowvec currislands = t3.subvec(mask1.size()/2,t3.size()-1-mask1.size()/2);
max@1 1622 arma::rowvec islandsdMult = currislands%island;
max@6 1623
max@1 1624 arma::uvec islandsIndex = find(islandsdMult > 0);
max@1 1625
max@6 1626 if (islandsIndex.size() > 0)
max@1 1627 valid_sets(iSet) = 0;
max@1 1628 }
max@1 1629 }
max@1 1630 }
max@1 1631 else
max@1 1632 {
max@1 1633 Part newPart;
max@1 1634 newPart.n = nBeat;
max@1 1635 newPart.indices.push_back(1);
max@1 1636 newPart.letter = 'A';
max@1 1637 newPart.value = 1;
max@1 1638 newPart.level = 1;
max@1 1639 parts.push_back(newPart);
max@1 1640 }
max@6 1641
max@1 1642 arma::vec bar = linspace(1,nBeat,nBeat);
max@1 1643 Part np = nullpart(parts,bar);
max@7 1644
max@1 1645 parts.push_back(np);
max@1 1646
max@1 1647 // -------------- NOT CONVERTED -------------------------------------
max@1 1648 // if param.seg.editor
max@1 1649 // [pa, ta] = partarray(parts);
max@1 1650 // parts = editorssearch(pa, ta, parts);
max@1 1651 // parts = [parts, nullpart(parts,1:nBeat)];
max@1 1652 // end
max@1 1653 // ------------------------------------------------------------------
max@1 1654
max@1 1655
max@1 1656 mergenulls(parts);
max@1 1657
max@1 1658
max@1 1659 // -------------- NOT CONVERTED -------------------------------------
max@1 1660 // if param.seg.editor
max@1 1661 // [pa, ta] = partarray(parts);
max@1 1662 // parts = editorssearch(pa, ta, parts);
max@1 1663 // parts = [parts, nullpart(parts,1:nBeat)];
max@1 1664 // end
max@1 1665 // ------------------------------------------------------------------
max@1 1666
max@1 1667 return parts;
max@1 1668 }
max@1 1669
max@1 1670
max@1 1671
Chris@19 1672 void songSegmentChroma(Vamp::Plugin::FeatureList quantisedChromagram, vector<Part> &parts)
max@1 1673 {
max@1 1674 // Collect Info
Chris@19 1675 int nBeat = quantisedChromagram.size(); // Number of feature vector
Chris@19 1676 int nFeatValues = quantisedChromagram[0].values.size(); // Number of values for each feature vector
max@1 1677
max@1 1678 arma::mat synchTreble = arma::zeros<mat>(nBeat,nFeatValues/2);
max@1 1679
Chris@21 1680 for (int i = 0; i < nBeat; ++ i)
Chris@21 1681 for (int j = 0; j < nFeatValues/2; ++ j)
max@1 1682 {
Chris@19 1683 synchTreble(i,j) = quantisedChromagram[i].values[j];
max@1 1684 }
max@1 1685
max@1 1686 arma::mat synchBass = arma::zeros<mat>(nBeat,nFeatValues/2);
max@1 1687
Chris@21 1688 for (int i = 0; i < nBeat; ++ i)
Chris@21 1689 for (int j = 0; j < nFeatValues/2; ++ j)
max@1 1690 {
Chris@19 1691 synchBass(i,j) = quantisedChromagram[i].values[j+12];
max@1 1692 }
max@1 1693
max@1 1694 // Process
max@1 1695
Chris@19 1696 arma::mat segTreble = arma::zeros<arma::mat>(quantisedChromagram.size(),quantisedChromagram[0].values.size()/2);
Chris@19 1697 arma::mat segBass = arma::zeros<arma::mat>(quantisedChromagram.size(),quantisedChromagram[0].values.size()/2);
max@1 1698
Chris@21 1699 for (int iPart=0; iPart<parts.size(); ++iPart)
max@1 1700 {
max@1 1701 parts[iPart].nInd = parts[iPart].indices.size();
max@1 1702
Chris@21 1703 for (int kOccur=0; kOccur<parts[iPart].nInd; ++kOccur)
max@1 1704 {
max@1 1705 int kStartIndex = parts[iPart].indices[kOccur];
max@1 1706 int kEndIndex = kStartIndex + parts[iPart].n-1;
max@1 1707
max@1 1708 segTreble.rows(kStartIndex,kEndIndex) = segTreble.rows(kStartIndex,kEndIndex) + synchTreble.rows(kStartIndex,kEndIndex);
max@1 1709 segBass.rows(kStartIndex,kEndIndex) = segBass.rows(kStartIndex,kEndIndex) + synchBass.rows(kStartIndex,kEndIndex);
max@1 1710 }
max@1 1711 }
max@1 1712 }
max@1 1713
max@1 1714
max@1 1715 // Segment Integration
max@1 1716 vector<Part> songSegmentIntegration(vector<Part> &parts)
max@1 1717 {
max@1 1718 // Break up parts (every part will have one instance)
max@1 1719 vector<Part> newPartVector;
max@1 1720 vector<int> partindices;
max@1 1721
Chris@21 1722 for (int iPart=0; iPart<parts.size(); ++iPart)
max@1 1723 {
max@1 1724 parts[iPart].nInd = parts[iPart].indices.size();
Chris@21 1725 for (int iInstance=0; iInstance<parts[iPart].nInd; ++iInstance)
max@1 1726 {
max@1 1727 Part newPart;
max@1 1728 newPart.n = parts[iPart].n;
max@1 1729 newPart.letter = parts[iPart].letter;
max@1 1730 newPart.value = parts[iPart].value;
max@1 1731 newPart.level = parts[iPart].level;
max@1 1732 newPart.indices.push_back(parts[iPart].indices[iInstance]);
max@1 1733 newPart.nInd = 1;
max@1 1734 partindices.push_back(parts[iPart].indices[iInstance]);
max@1 1735
max@1 1736 newPartVector.push_back(newPart);
max@1 1737 }
max@1 1738 }
max@1 1739
max@1 1740
max@1 1741 // Sort the parts in order of occurrence
max@1 1742 sort (partindices.begin(), partindices.end());
max@1 1743
Chris@21 1744 for (int i=0; i<partindices.size(); ++i)
max@1 1745 {
max@1 1746 bool found = false;
max@1 1747 int in=0;
max@1 1748 while (!found)
max@1 1749 {
max@1 1750 if (newPartVector[in].indices[0] == partindices[i])
max@1 1751 {
max@1 1752 newPartVector.push_back(newPartVector[in]);
max@1 1753 newPartVector.erase(newPartVector.begin()+in);
max@1 1754 found = true;
max@1 1755 }
max@1 1756 else
max@1 1757 in++;
max@1 1758 }
max@1 1759 }
max@1 1760
max@1 1761 // Clear the vector
Chris@21 1762 for (int iNewpart=1; iNewpart < newPartVector.size(); ++iNewpart)
max@1 1763 {
max@1 1764 if (newPartVector[iNewpart].n < 12)
max@1 1765 {
max@1 1766 newPartVector[iNewpart-1].n = newPartVector[iNewpart-1].n + newPartVector[iNewpart].n;
max@1 1767 newPartVector.erase(newPartVector.begin()+iNewpart);
max@1 1768 }
max@1 1769 }
max@1 1770
max@1 1771 return newPartVector;
max@1 1772 }
max@1 1773
max@1 1774 // Segmenter
Chris@19 1775 Vamp::Plugin::FeatureList SongPartitioner::runSegmenter(Vamp::Plugin::FeatureList quantisedChromagram)
max@1 1776 {
max@1 1777 /* --- Display Information --- */
Chris@19 1778 int numBeat = quantisedChromagram.size();
Chris@19 1779 int numFeats = quantisedChromagram[0].values.size();
max@1 1780
max@1 1781 vector<Part> parts;
max@1 1782 vector<Part> finalParts;
max@1 1783
Chris@19 1784 parts = songSegment(quantisedChromagram);
Chris@19 1785 songSegmentChroma(quantisedChromagram,parts);
max@7 1786
max@1 1787 finalParts = songSegmentIntegration(parts);
max@1 1788
max@1 1789
max@1 1790 // TEMP ----
Chris@21 1791 /*for (int i=0;i<finalParts.size(); ++i)
max@1 1792 {
max@6 1793 std::cout << "Parts n° " << i << std::endl;
max@6 1794 std::cout << "n°: " << finalParts[i].n << std::endl;
max@6 1795 std::cout << "letter: " << finalParts[i].letter << std::endl;
max@1 1796
max@6 1797 std::cout << "indices: ";
Chris@21 1798 for (int j=0;j<finalParts[i].indices.size(); ++j)
max@6 1799 std::cout << finalParts[i].indices[j] << " ";
max@6 1800
max@6 1801 std::cout << std::endl;
max@6 1802 std::cout << "level: " << finalParts[i].level << std::endl;
max@1 1803 }*/
max@1 1804
max@1 1805 // ---------
max@1 1806
max@1 1807
max@1 1808 // Output
max@1 1809
max@1 1810 Vamp::Plugin::FeatureList results;
max@1 1811
max@1 1812
max@1 1813 Feature seg;
max@1 1814
max@1 1815 arma::vec indices;
Chris@21 1816 int idx=0;
max@1 1817 vector<int> values;
max@1 1818 vector<string> letters;
max@1 1819
Chris@21 1820 for (int iPart=0; iPart<finalParts.size()-1; ++iPart)
max@1 1821 {
Chris@21 1822 int iInstance=0;
max@1 1823 seg.hasTimestamp = true;
max@1 1824
max@1 1825 int ind = finalParts[iPart].indices[iInstance];
max@1 1826 int ind1 = finalParts[iPart+1].indices[iInstance];
max@1 1827
Chris@19 1828 seg.timestamp = quantisedChromagram[ind].timestamp;
max@1 1829 seg.hasDuration = true;
Chris@19 1830 seg.duration = quantisedChromagram[ind1].timestamp-quantisedChromagram[ind].timestamp;
max@1 1831 seg.values.clear();
max@1 1832 seg.values.push_back(finalParts[iPart].value);
max@1 1833 seg.label = finalParts[iPart].letter;
max@1 1834
max@1 1835 results.push_back(seg);
max@1 1836 }
max@1 1837
max@1 1838 int ind = finalParts[finalParts.size()-1].indices[0];
Chris@19 1839 seg.timestamp = quantisedChromagram[ind].timestamp;
max@1 1840 seg.hasDuration = true;
Chris@19 1841 seg.duration = quantisedChromagram[quantisedChromagram.size()-1].timestamp-quantisedChromagram[ind].timestamp;
max@1 1842 seg.values.clear();
max@1 1843 seg.values.push_back(finalParts[finalParts.size()-1].value);
max@1 1844 seg.label = finalParts[finalParts.size()-1].letter;
max@1 1845
max@1 1846 results.push_back(seg);
max@1 1847
max@1 1848 return results;
max@1 1849 }
max@1 1850
max@1 1851
max@1 1852
max@1 1853
max@1 1854
max@1 1855
max@1 1856
max@1 1857
max@1 1858
max@1 1859
max@1 1860
max@1 1861
max@1 1862
max@1 1863
max@1 1864
max@1 1865
max@1 1866