comparison segmentino/Segmentino.cpp @ 48:69251e11a913

Rename SongParts/songpartitioner to Segmentino throughout
author Chris Cannam
date Thu, 13 Jun 2013 09:43:01 +0100
parents songparts/SongParts.cpp@f59ff6a22f8e
children 1ec0e2823891
comparison
equal deleted inserted replaced
47:5ead8717a618 48:69251e11a913
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2
3 /*
4 Segmentino
5
6 Code by Massimiliano Zanoni and Matthias Mauch
7 Centre for Digital Music, Queen Mary, University of London
8
9 Copyright 2009-2013 Queen Mary, University of London.
10
11 This program is free software; you can redistribute it and/or
12 modify it under the terms of the GNU General Public License as
13 published by the Free Software Foundation; either version 2 of the
14 License, or (at your option) any later version. See the file
15 COPYING included with this distribution for more information.
16 */
17
18 #include "Segmentino.h"
19
20 #include <base/Window.h>
21 #include <dsp/onsets/DetectionFunction.h>
22 #include <dsp/onsets/PeakPicking.h>
23 #include <dsp/transforms/FFT.h>
24 #include <dsp/tempotracking/TempoTrackV2.h>
25 #include <dsp/tempotracking/DownBeat.h>
26 #include <chromamethods.h>
27 #include <maths/MathUtilities.h>
28 #include <boost/numeric/ublas/matrix.hpp>
29 #include <boost/numeric/ublas/io.hpp>
30 #include <boost/math/distributions/normal.hpp>
31 #include "armadillo"
32 #include <fstream>
33 #include <sstream>
34 #include <cmath>
35 #include <vector>
36
37 #include <vamp-sdk/Plugin.h>
38
39 using namespace boost::numeric;
40 using namespace arma;
41 using std::string;
42 using std::vector;
43 using std::cerr;
44 using std::cout;
45 using std::endl;
46
47
48 #ifndef __GNUC__
49 #include <alloca.h>
50 #endif
51
52
53 // Result Struct
54 typedef struct Part {
55 int n;
56 vector<int> indices;
57 string letter;
58 int value;
59 int level;
60 int nInd;
61 }Part;
62
63
64
65 /* ------------------------------------ */
66 /* ----- BEAT DETECTOR CLASS ---------- */
67 /* ------------------------------------ */
68
69 class BeatTrackerData
70 {
71 /* --- ATTRIBUTES --- */
72 public:
73 DFConfig dfConfig;
74 DetectionFunction *df;
75 DownBeat *downBeat;
76 vector<double> dfOutput;
77 Vamp::RealTime origin;
78
79
80 /* --- METHODS --- */
81
82 /* --- Constructor --- */
83 public:
84 BeatTrackerData(float rate, const DFConfig &config) : dfConfig(config) {
85
86 df = new DetectionFunction(config);
87 // decimation factor aims at resampling to c. 3KHz; must be power of 2
88 int factor = MathUtilities::nextPowerOfTwo(rate / 3000);
89 // std::cerr << "BeatTrackerData: factor = " << factor << std::endl;
90 downBeat = new DownBeat(rate, factor, config.stepSize);
91 }
92
93 /* --- Desctructor --- */
94 ~BeatTrackerData() {
95 delete df;
96 delete downBeat;
97 }
98
99 void reset() {
100 delete df;
101 df = new DetectionFunction(dfConfig);
102 dfOutput.clear();
103 downBeat->resetAudioBuffer();
104 origin = Vamp::RealTime::zeroTime;
105 }
106 };
107
108
109 /* --------------------------------------- */
110 /* ----- CHROMA EXTRACTOR CLASS ---------- */
111 /* --------------------------------------- */
112
113 class ChromaData
114 {
115
116 /* --- ATTRIBUTES --- */
117
118 public:
119 int frameCount;
120 int nBPS;
121 Vamp::Plugin::FeatureList logSpectrum;
122 int blockSize;
123 int lengthOfNoteIndex;
124 vector<float> meanTunings;
125 vector<float> localTunings;
126 float whitening;
127 float preset;
128 float useNNLS;
129 vector<float> localTuning;
130 vector<float> kernelValue;
131 vector<int> kernelFftIndex;
132 vector<int> kernelNoteIndex;
133 float *dict;
134 bool tuneLocal;
135 float doNormalizeChroma;
136 float rollon;
137 float s;
138 vector<float> hw;
139 vector<float> sinvalues;
140 vector<float> cosvalues;
141 Window<float> window;
142 FFTReal fft;
143 int inputSampleRate;
144
145 /* --- METHODS --- */
146
147 /* --- Constructor --- */
148
149 public:
150 ChromaData(float inputSampleRate, size_t block_size) :
151 frameCount(0),
152 nBPS(3),
153 logSpectrum(0),
154 blockSize(0),
155 lengthOfNoteIndex(0),
156 meanTunings(0),
157 localTunings(0),
158 whitening(1.0),
159 preset(0.0),
160 useNNLS(1.0),
161 localTuning(0.0),
162 kernelValue(0),
163 kernelFftIndex(0),
164 kernelNoteIndex(0),
165 dict(0),
166 tuneLocal(0.0),
167 doNormalizeChroma(0),
168 rollon(0.0),
169 s(0.7),
170 sinvalues(0),
171 cosvalues(0),
172 window(HanningWindow, block_size),
173 fft(block_size),
174 inputSampleRate(inputSampleRate)
175 {
176 // make the *note* dictionary matrix
177 dict = new float[nNote * 84];
178 for (int i = 0; i < nNote * 84; ++i) dict[i] = 0.0;
179 blockSize = block_size;
180 }
181
182
183 /* --- Desctructor --- */
184
185 ~ChromaData() {
186 delete [] dict;
187 }
188
189 /* --- Public Methods --- */
190
191 void reset() {
192 frameCount = 0;
193 logSpectrum.clear();
194 for (int iBPS = 0; iBPS < 3; ++iBPS) {
195 meanTunings[iBPS] = 0;
196 localTunings[iBPS] = 0;
197 }
198 localTuning.clear();
199 }
200
201 void baseProcess(float *inputBuffers, Vamp::RealTime timestamp)
202 {
203
204 frameCount++;
205 float *magnitude = new float[blockSize/2];
206 double *fftReal = new double[blockSize];
207 double *fftImag = new double[blockSize];
208
209 // FFTReal wants doubles, so we need to make a local copy of inputBuffers
210 double *inputBuffersDouble = new double[blockSize];
211 for (int i = 0; i < blockSize; i++) inputBuffersDouble[i] = inputBuffers[i];
212
213 fft.process(false, inputBuffersDouble, fftReal, fftImag);
214
215 float energysum = 0;
216 // make magnitude
217 float maxmag = -10000;
218 for (int iBin = 0; iBin < static_cast<int>(blockSize/2); iBin++) {
219 magnitude[iBin] = sqrt(fftReal[iBin] * fftReal[iBin] +
220 fftImag[iBin] * fftImag[iBin]);
221 if (magnitude[iBin]>blockSize*1.0) magnitude[iBin] = blockSize;
222 // a valid audio signal (between -1 and 1) should not be limited here.
223 if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin];
224 if (rollon > 0) {
225 energysum += pow(magnitude[iBin],2);
226 }
227 }
228
229 float cumenergy = 0;
230 if (rollon > 0) {
231 for (int iBin = 2; iBin < static_cast<int>(blockSize/2); iBin++) {
232 cumenergy += pow(magnitude[iBin],2);
233 if (cumenergy < energysum * rollon / 100) magnitude[iBin-2] = 0;
234 else break;
235 }
236 }
237
238 if (maxmag < 2) {
239 // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl;
240 for (int iBin = 0; iBin < static_cast<int>(blockSize/2); iBin++) {
241 magnitude[iBin] = 0;
242 }
243 }
244
245 // cerr << magnitude[200] << endl;
246
247 // note magnitude mapping using pre-calculated matrix
248 float *nm = new float[nNote]; // note magnitude
249 for (int iNote = 0; iNote < nNote; iNote++) {
250 nm[iNote] = 0; // initialise as 0
251 }
252 int binCount = 0;
253 for (vector<float>::iterator it = kernelValue.begin(); it != kernelValue.end(); ++it) {
254 nm[kernelNoteIndex[binCount]] += magnitude[kernelFftIndex[binCount]] * kernelValue[binCount];
255 binCount++;
256 }
257
258 float one_over_N = 1.0/frameCount;
259 // update means of complex tuning variables
260 for (int iBPS = 0; iBPS < nBPS; ++iBPS) meanTunings[iBPS] *= float(frameCount-1)*one_over_N;
261
262 for (int iTone = 0; iTone < round(nNote*0.62/nBPS)*nBPS+1; iTone = iTone + nBPS) {
263 for (int iBPS = 0; iBPS < nBPS; ++iBPS) meanTunings[iBPS] += nm[iTone + iBPS]*one_over_N;
264 float ratioOld = 0.997;
265 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
266 localTunings[iBPS] *= ratioOld;
267 localTunings[iBPS] += nm[iTone + iBPS] * (1 - ratioOld);
268 }
269 }
270
271 float localTuningImag = 0;
272 float localTuningReal = 0;
273 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
274 localTuningReal += localTunings[iBPS] * cosvalues[iBPS];
275 localTuningImag += localTunings[iBPS] * sinvalues[iBPS];
276 }
277
278 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
279 localTuning.push_back(normalisedtuning);
280
281 Vamp::Plugin::Feature f1; // logfreqspec
282 f1.hasTimestamp = true;
283 f1.timestamp = timestamp;
284 for (int iNote = 0; iNote < nNote; iNote++) {
285 f1.values.push_back(nm[iNote]);
286 }
287
288 // deletes
289 delete[] inputBuffersDouble;
290 delete[] magnitude;
291 delete[] fftReal;
292 delete[] fftImag;
293 delete[] nm;
294
295 logSpectrum.push_back(f1); // remember note magnitude
296 }
297
298 bool initialise()
299 {
300 dictionaryMatrix(dict, s);
301
302 // make things for tuning estimation
303 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
304 sinvalues.push_back(sin(2*M_PI*(iBPS*1.0/nBPS)));
305 cosvalues.push_back(cos(2*M_PI*(iBPS*1.0/nBPS)));
306 }
307
308
309 // make hamming window of length 1/2 octave
310 int hamwinlength = nBPS * 6 + 1;
311 float hamwinsum = 0;
312 for (int i = 0; i < hamwinlength; ++i) {
313 hw.push_back(0.54 - 0.46 * cos((2*M_PI*i)/(hamwinlength-1)));
314 hamwinsum += 0.54 - 0.46 * cos((2*M_PI*i)/(hamwinlength-1));
315 }
316 for (int i = 0; i < hamwinlength; ++i) hw[i] = hw[i] / hamwinsum;
317
318
319 // initialise the tuning
320 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
321 meanTunings.push_back(0);
322 localTunings.push_back(0);
323 }
324
325 blockSize = blockSize;
326 frameCount = 0;
327 int tempn = nNote * blockSize/2;
328 // cerr << "length of tempkernel : " << tempn << endl;
329 float *tempkernel;
330
331 tempkernel = new float[tempn];
332
333 logFreqMatrix(inputSampleRate, blockSize, tempkernel);
334 kernelValue.clear();
335 kernelFftIndex.clear();
336 kernelNoteIndex.clear();
337 int countNonzero = 0;
338 for (int iNote = 0; iNote < nNote; ++iNote) {
339 // I don't know if this is wise: manually making a sparse matrix
340 for (int iFFT = 0; iFFT < static_cast<int>(blockSize/2); ++iFFT) {
341 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
342 kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
343 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
344 countNonzero++;
345 }
346 kernelFftIndex.push_back(iFFT);
347 kernelNoteIndex.push_back(iNote);
348 }
349 }
350 }
351 delete [] tempkernel;
352
353 return true;
354 }
355 };
356
357
358 /* --------------------------------- */
359 /* ----- SONG PARTITIONER ---------- */
360 /* --------------------------------- */
361
362
363 /* --- ATTRIBUTES --- */
364
365 float Segmentino::m_stepSecs = 0.01161; // 512 samples at 44100
366 int Segmentino::m_chromaFramesizeFactor = 16; // 16 times as long as beat tracker's
367 int Segmentino::m_chromaStepsizeFactor = 4; // 4 times as long as beat tracker's
368
369
370 /* --- METHODS --- */
371
372 /* --- Constructor --- */
373 Segmentino::Segmentino(float inputSampleRate) :
374 Vamp::Plugin(inputSampleRate),
375 m_d(0),
376 m_chromadata(0),
377 m_bpb(4),
378 m_pluginFrameCount(0)
379 {
380 }
381
382
383 /* --- Desctructor --- */
384 Segmentino::~Segmentino()
385 {
386 delete m_d;
387 delete m_chromadata;
388 }
389
390
391 /* --- Methods --- */
392 string Segmentino::getIdentifier() const
393 {
394 return "qm-songpartitioner";
395 }
396
397 string Segmentino::getName() const
398 {
399 return "Song Partitioner";
400 }
401
402 string Segmentino::getDescription() const
403 {
404 return "Estimate contiguous segments pertaining to song parts such as verse and chorus.";
405 }
406
407 string Segmentino::getMaker() const
408 {
409 return "Queen Mary, University of London";
410 }
411
412 int Segmentino::getPluginVersion() const
413 {
414 return 2;
415 }
416
417 string Segmentino::getCopyright() const
418 {
419 return "Plugin by Matthew Davies, Christian Landone, Chris Cannam, Matthias Mauch and Massimiliano Zanoni Copyright (c) 2006-2012 QMUL - All Rights Reserved";
420 }
421
422 Segmentino::ParameterList Segmentino::getParameterDescriptors() const
423 {
424 ParameterList list;
425
426 ParameterDescriptor desc;
427
428 // desc.identifier = "bpb";
429 // desc.name = "Beats per Bar";
430 // desc.description = "The number of beats in each bar";
431 // desc.minValue = 2;
432 // desc.maxValue = 16;
433 // desc.defaultValue = 4;
434 // desc.isQuantized = true;
435 // desc.quantizeStep = 1;
436 // list.push_back(desc);
437
438 return list;
439 }
440
441 float Segmentino::getParameter(std::string name) const
442 {
443 if (name == "bpb") return m_bpb;
444 return 0.0;
445 }
446
447 void Segmentino::setParameter(std::string name, float value)
448 {
449 if (name == "bpb") m_bpb = lrintf(value);
450 }
451
452
453 // Return the StepSize for Chroma Extractor
454 size_t Segmentino::getPreferredStepSize() const
455 {
456 size_t step = size_t(m_inputSampleRate * m_stepSecs + 0.0001);
457 if (step < 1) step = 1;
458
459 return step;
460 }
461
462 // Return the BlockSize for Chroma Extractor
463 size_t Segmentino::getPreferredBlockSize() const
464 {
465 size_t theoretical = getPreferredStepSize() * 2;
466 theoretical *= m_chromaFramesizeFactor;
467
468 return theoretical;
469 }
470
471
472 // Initialize the plugin and define Beat Tracker and Chroma Extractor Objects
473 bool Segmentino::initialise(size_t channels, size_t stepSize, size_t blockSize)
474 {
475 if (m_d) {
476 delete m_d;
477 m_d = 0;
478 }
479 if (m_chromadata) {
480 delete m_chromadata;
481 m_chromadata = 0;
482 }
483
484 if (channels < getMinChannelCount() ||
485 channels > getMaxChannelCount()) {
486 std::cerr << "Segmentino::initialise: Unsupported channel count: "
487 << channels << std::endl;
488 return false;
489 }
490
491 if (stepSize != getPreferredStepSize()) {
492 std::cerr << "ERROR: Segmentino::initialise: Unsupported step size for this sample rate: "
493 << stepSize << " (wanted " << (getPreferredStepSize()) << ")" << std::endl;
494 return false;
495 }
496
497 if (blockSize != getPreferredBlockSize()) {
498 std::cerr << "WARNING: Segmentino::initialise: Sub-optimal block size for this sample rate: "
499 << blockSize << " (wanted " << getPreferredBlockSize() << ")" << std::endl;
500 }
501
502 // Beat tracker and Chroma extractor has two different configuration parameters
503
504 // Configuration Parameters for Beat Tracker
505 DFConfig dfConfig;
506 dfConfig.DFType = DF_COMPLEXSD;
507 dfConfig.stepSize = stepSize;
508 dfConfig.frameLength = blockSize / m_chromaFramesizeFactor;
509 dfConfig.dbRise = 3;
510 dfConfig.adaptiveWhitening = false;
511 dfConfig.whiteningRelaxCoeff = -1;
512 dfConfig.whiteningFloor = -1;
513
514 // Initialise Beat Tracker
515 m_d = new BeatTrackerData(m_inputSampleRate, dfConfig);
516 m_d->downBeat->setBeatsPerBar(m_bpb);
517
518 // Initialise Chroma Extractor
519 m_chromadata = new ChromaData(m_inputSampleRate, blockSize);
520 m_chromadata->initialise();
521
522 return true;
523 }
524
525 void Segmentino::reset()
526 {
527 if (m_d) m_d->reset();
528 if (m_chromadata) m_chromadata->reset();
529 m_pluginFrameCount = 0;
530 }
531
532 Segmentino::OutputList Segmentino::getOutputDescriptors() const
533 {
534 OutputList list;
535 int outputCounter = 0;
536
537 OutputDescriptor beat;
538 beat.identifier = "beats";
539 beat.name = "Beats";
540 beat.description = "Beat locations labelled with metrical position";
541 beat.unit = "";
542 beat.hasFixedBinCount = true;
543 beat.binCount = 0;
544 beat.sampleType = OutputDescriptor::VariableSampleRate;
545 beat.sampleRate = 1.0 / m_stepSecs;
546 m_beatOutputNumber = outputCounter++;
547
548 OutputDescriptor bars;
549 bars.identifier = "bars";
550 bars.name = "Bars";
551 bars.description = "Bar locations";
552 bars.unit = "";
553 bars.hasFixedBinCount = true;
554 bars.binCount = 0;
555 bars.sampleType = OutputDescriptor::VariableSampleRate;
556 bars.sampleRate = 1.0 / m_stepSecs;
557 m_barsOutputNumber = outputCounter++;
558
559 OutputDescriptor beatcounts;
560 beatcounts.identifier = "beatcounts";
561 beatcounts.name = "Beat Count";
562 beatcounts.description = "Beat counter function";
563 beatcounts.unit = "";
564 beatcounts.hasFixedBinCount = true;
565 beatcounts.binCount = 1;
566 beatcounts.sampleType = OutputDescriptor::VariableSampleRate;
567 beatcounts.sampleRate = 1.0 / m_stepSecs;
568 m_beatcountsOutputNumber = outputCounter++;
569
570 OutputDescriptor beatsd;
571 beatsd.identifier = "beatsd";
572 beatsd.name = "Beat Spectral Difference";
573 beatsd.description = "Beat spectral difference function used for bar-line detection";
574 beatsd.unit = "";
575 beatsd.hasFixedBinCount = true;
576 beatsd.binCount = 1;
577 beatsd.sampleType = OutputDescriptor::VariableSampleRate;
578 beatsd.sampleRate = 1.0 / m_stepSecs;
579 m_beatsdOutputNumber = outputCounter++;
580
581 OutputDescriptor logscalespec;
582 logscalespec.identifier = "logscalespec";
583 logscalespec.name = "Log-Frequency Spectrum";
584 logscalespec.description = "Spectrum with linear frequency on a log scale.";
585 logscalespec.unit = "";
586 logscalespec.hasFixedBinCount = true;
587 logscalespec.binCount = nNote;
588 logscalespec.hasKnownExtents = false;
589 logscalespec.isQuantized = false;
590 logscalespec.sampleType = OutputDescriptor::FixedSampleRate;
591 logscalespec.hasDuration = false;
592 logscalespec.sampleRate = m_inputSampleRate/2048;
593 m_logscalespecOutputNumber = outputCounter++;
594
595 OutputDescriptor bothchroma;
596 bothchroma.identifier = "bothchroma";
597 bothchroma.name = "Chromagram and Bass Chromagram";
598 bothchroma.description = "Tuning-adjusted chromagram and bass chromagram (stacked on top of each other) from NNLS approximate transcription.";
599 bothchroma.unit = "";
600 bothchroma.hasFixedBinCount = true;
601 bothchroma.binCount = 24;
602 bothchroma.hasKnownExtents = false;
603 bothchroma.isQuantized = false;
604 bothchroma.sampleType = OutputDescriptor::FixedSampleRate;
605 bothchroma.hasDuration = false;
606 bothchroma.sampleRate = m_inputSampleRate/2048;
607 m_bothchromaOutputNumber = outputCounter++;
608
609 OutputDescriptor qchromafw;
610 qchromafw.identifier = "qchromafw";
611 qchromafw.name = "Pseudo-Quantised Chromagram and Bass Chromagram";
612 qchromafw.description = "Pseudo-Quantised Chromagram and Bass Chromagram (frames between two beats are identical).";
613 qchromafw.unit = "";
614 qchromafw.hasFixedBinCount = true;
615 qchromafw.binCount = 24;
616 qchromafw.hasKnownExtents = false;
617 qchromafw.isQuantized = false;
618 qchromafw.sampleType = OutputDescriptor::FixedSampleRate;
619 qchromafw.hasDuration = false;
620 qchromafw.sampleRate = m_inputSampleRate/2048;
621 m_qchromafwOutputNumber = outputCounter++;
622
623 OutputDescriptor qchroma;
624 qchroma.identifier = "qchroma";
625 qchroma.name = "Quantised Chromagram and Bass Chromagram";
626 qchroma.description = "Quantised Chromagram and Bass Chromagram.";
627 qchroma.unit = "";
628 qchroma.hasFixedBinCount = true;
629 qchroma.binCount = 24;
630 qchroma.hasKnownExtents = false;
631 qchroma.isQuantized = false;
632 qchroma.sampleType = OutputDescriptor::FixedSampleRate;
633 qchroma.hasDuration = true;
634 qchroma.sampleRate = m_inputSampleRate/2048;
635 m_qchromaOutputNumber = outputCounter++;
636
637 OutputDescriptor segm;
638 segm.identifier = "segmentation";
639 segm.name = "Segmentation";
640 segm.description = "Segmentation";
641 segm.unit = "segment-type";
642 segm.hasFixedBinCount = true;
643 //segm.binCount = 24;
644 segm.binCount = 1;
645 segm.hasKnownExtents = true;
646 segm.minValue = 1;
647 segm.maxValue = 5;
648 segm.isQuantized = true;
649 segm.quantizeStep = 1;
650 segm.sampleType = OutputDescriptor::VariableSampleRate;
651 segm.sampleRate = 1.0 / m_stepSecs;
652 segm.hasDuration = true;
653 m_segmOutputNumber = outputCounter++;
654
655
656 /*
657 OutputList list;
658 OutputDescriptor segmentation;
659 segmentation.identifier = "segmentation";
660 segmentation.name = "Segmentation";
661 segmentation.description = "Segmentation";
662 segmentation.unit = "segment-type";
663 segmentation.hasFixedBinCount = true;
664 segmentation.binCount = 1;
665 segmentation.hasKnownExtents = true;
666 segmentation.minValue = 1;
667 segmentation.maxValue = nSegmentTypes;
668 segmentation.isQuantized = true;
669 segmentation.quantizeStep = 1;
670 segmentation.sampleType = OutputDescriptor::VariableSampleRate;
671 segmentation.sampleRate = m_inputSampleRate / getPreferredStepSize();
672 list.push_back(segmentation);
673 return list;
674 */
675
676
677 list.push_back(beat);
678 list.push_back(bars);
679 list.push_back(beatcounts);
680 list.push_back(beatsd);
681 list.push_back(logscalespec);
682 list.push_back(bothchroma);
683 list.push_back(qchromafw);
684 list.push_back(qchroma);
685 list.push_back(segm);
686
687 return list;
688 }
689
690 // Executed for each frame - called from the host
691
692 // We use time domain input, because DownBeat requires it -- so we
693 // use the time-domain version of DetectionFunction::process which
694 // does its own FFT. It requires doubles as input, so we need to
695 // make a temporary copy
696
697 // We only support a single input channel
698 Segmentino::FeatureSet Segmentino::process(const float *const *inputBuffers,Vamp::RealTime timestamp)
699 {
700 if (!m_d) {
701 cerr << "ERROR: Segmentino::process: "
702 << "Segmentino has not been initialised"
703 << endl;
704 return FeatureSet();
705 }
706
707 const int fl = m_d->dfConfig.frameLength;
708 #ifndef __GNUC__
709 double *dfinput = (double *)alloca(fl * sizeof(double));
710 #else
711 double dfinput[fl];
712 #endif
713 int sampleOffset = ((m_chromaFramesizeFactor-1) * fl) / 2;
714
715 // Since chroma needs a much longer frame size, we only ever use the very
716 // beginning of the frame for beat tracking.
717 for (int i = 0; i < fl; ++i) dfinput[i] = inputBuffers[0][i];
718 double output = m_d->df->process(dfinput);
719
720 if (m_d->dfOutput.empty()) m_d->origin = timestamp;
721
722 // std::cerr << "df[" << m_d->dfOutput.size() << "] is " << output << std::endl;
723 m_d->dfOutput.push_back(output);
724
725 // Downsample and store the incoming audio block.
726 // We have an overlap on the incoming audio stream (step size is
727 // half block size) -- this function is configured to take only a
728 // step size's worth, so effectively ignoring the overlap. Note
729 // however that this means we omit the last blocksize - stepsize
730 // samples completely for the purposes of barline detection
731 // (hopefully not a problem)
732 m_d->downBeat->pushAudioBlock(inputBuffers[0]);
733
734 // The following is not done every time, but only every m_chromaFramesizeFactor times,
735 // because the chroma does not need dense time frames.
736
737 if (m_pluginFrameCount % m_chromaStepsizeFactor == 0)
738 {
739
740 // Window the full time domain, data, FFT it and process chroma stuff.
741
742 #ifndef __GNUC__
743 float *windowedBuffers = (float *)alloca(m_chromadata->blockSize * sizeof(float));
744 #else
745 float windowedBuffers[m_chromadata->blockSize];
746 #endif
747 m_chromadata->window.cut(&inputBuffers[0][0], &windowedBuffers[0]);
748
749 // adjust timestamp (we want the middle of the frame)
750 timestamp = timestamp + Vamp::RealTime::frame2RealTime(sampleOffset, lrintf(m_inputSampleRate));
751
752 m_chromadata->baseProcess(&windowedBuffers[0], timestamp);
753
754 }
755 m_pluginFrameCount++;
756
757 FeatureSet fs;
758 fs[m_logscalespecOutputNumber].push_back(
759 m_chromadata->logSpectrum.back());
760 return fs;
761 }
762
763 Segmentino::FeatureSet Segmentino::getRemainingFeatures()
764 {
765 if (!m_d) {
766 cerr << "ERROR: Segmentino::getRemainingFeatures: "
767 << "Segmentino has not been initialised"
768 << endl;
769 return FeatureSet();
770 }
771
772 FeatureSet masterFeatureset = beatTrack();
773 Vamp::RealTime last_beattime = masterFeatureset[m_beatOutputNumber][masterFeatureset[m_beatOutputNumber].size()-1].timestamp;
774 masterFeatureset[m_beatOutputNumber].clear();
775 Vamp::RealTime beattime = Vamp::RealTime::fromSeconds(1.0);
776 while (beattime < last_beattime)
777 {
778 Feature beatfeature;
779 beatfeature.hasTimestamp = true;
780 beatfeature.timestamp = beattime;
781 masterFeatureset[m_beatOutputNumber].push_back(beatfeature);
782 beattime = beattime + Vamp::RealTime::fromSeconds(0.5);
783 }
784
785
786 FeatureList chromaList = chromaFeatures();
787
788 for (int i = 0; i < (int)chromaList.size(); ++i)
789 {
790 masterFeatureset[m_bothchromaOutputNumber].push_back(chromaList[i]);
791 }
792
793 // quantised and pseudo-quantised (beat-wise) chroma
794 std::vector<FeatureList> quantisedChroma = beatQuantiser(chromaList, masterFeatureset[m_beatOutputNumber]);
795
796 if (quantisedChroma.empty()) return masterFeatureset;
797
798 masterFeatureset[m_qchromafwOutputNumber] = quantisedChroma[0];
799 masterFeatureset[m_qchromaOutputNumber] = quantisedChroma[1];
800
801 // Segmentation
802 try {
803 masterFeatureset[m_segmOutputNumber] = runSegmenter(quantisedChroma[1]);
804 } catch (std::bad_alloc &a) {
805 cerr << "ERROR: Segmentino::getRemainingFeatures: Failed to run segmenter, not enough memory (song too long?)" << endl;
806 }
807
808 return(masterFeatureset);
809 }
810
811 /* ------ Beat Tracker ------ */
812
813 Segmentino::FeatureSet Segmentino::beatTrack()
814 {
815 vector<double> df;
816 vector<double> beatPeriod;
817 vector<double> tempi;
818
819 for (int i = 2; i < (int)m_d->dfOutput.size(); ++i) { // discard first two elts
820 df.push_back(m_d->dfOutput[i]);
821 beatPeriod.push_back(0.0);
822 }
823 if (df.empty()) return FeatureSet();
824
825 TempoTrackV2 tt(m_inputSampleRate, m_d->dfConfig.stepSize);
826 tt.calculateBeatPeriod(df, beatPeriod, tempi);
827
828 vector<double> beats;
829 tt.calculateBeats(df, beatPeriod, beats);
830
831 vector<int> downbeats;
832 size_t downLength = 0;
833 const float *downsampled = m_d->downBeat->getBufferedAudio(downLength);
834 m_d->downBeat->findDownBeats(downsampled, downLength, beats, downbeats);
835
836 vector<double> beatsd;
837 m_d->downBeat->getBeatSD(beatsd);
838
839 /*std::cout << "BeatTracker: found downbeats at: ";
840 for (int i = 0; i < downbeats.size(); ++i) std::cout << downbeats[i] << " " << std::endl;*/
841
842 FeatureSet returnFeatures;
843
844 char label[20];
845
846 int dbi = 0;
847 int beat = 0;
848 int bar = 0;
849
850 if (!downbeats.empty()) {
851 // get the right number for the first beat; this will be
852 // incremented before use (at top of the following loop)
853 int firstDown = downbeats[0];
854 beat = m_bpb - firstDown - 1;
855 if (beat == m_bpb) beat = 0;
856 }
857
858 for (int i = 0; i < (int)beats.size(); ++i) {
859
860 int frame = beats[i] * m_d->dfConfig.stepSize;
861
862 if (dbi < (int)downbeats.size() && i == downbeats[dbi]) {
863 beat = 0;
864 ++bar;
865 ++dbi;
866 } else {
867 ++beat;
868 }
869
870 /* Ooutput Section */
871
872 // outputs are:
873 //
874 // 0 -> beats
875 // 1 -> bars
876 // 2 -> beat counter function
877
878 Feature feature;
879 feature.hasTimestamp = true;
880 feature.timestamp = m_d->origin + Vamp::RealTime::frame2RealTime (frame, lrintf(m_inputSampleRate));
881
882 sprintf(label, "%d", beat + 1);
883 feature.label = label;
884 returnFeatures[m_beatOutputNumber].push_back(feature); // labelled beats
885
886 feature.values.push_back(beat + 1);
887 returnFeatures[m_beatcountsOutputNumber].push_back(feature); // beat function
888
889 if (i > 0 && i <= (int)beatsd.size()) {
890 feature.values.clear();
891 feature.values.push_back(beatsd[i-1]);
892 feature.label = "";
893 returnFeatures[m_beatsdOutputNumber].push_back(feature); // beat spectral difference
894 }
895
896 if (beat == 0) {
897 feature.values.clear();
898 sprintf(label, "%d", bar);
899 feature.label = label;
900 returnFeatures[m_barsOutputNumber].push_back(feature); // bars
901 }
902 }
903
904 return returnFeatures;
905 }
906
907
908 /* ------ Chroma Extractor ------ */
909
910 Segmentino::FeatureList Segmentino::chromaFeatures()
911 {
912
913 FeatureList returnFeatureList;
914 FeatureList tunedlogfreqspec;
915
916 if (m_chromadata->logSpectrum.size() == 0) return returnFeatureList;
917
918 /** Calculate Tuning
919 calculate tuning from (using the angle of the complex number defined by the
920 cumulative mean real and imag values)
921 **/
922 float meanTuningImag = 0;
923 float meanTuningReal = 0;
924 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
925 meanTuningReal += m_chromadata->meanTunings[iBPS] * m_chromadata->cosvalues[iBPS];
926 meanTuningImag += m_chromadata->meanTunings[iBPS] * m_chromadata->sinvalues[iBPS];
927 }
928 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
929 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
930 int intShift = floor(normalisedtuning * 3);
931 float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this
932
933 char buffer0 [50];
934
935 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
936
937 /** Tune Log-Frequency Spectrogram
938 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
939 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
940 **/
941 cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... ";
942
943 float tempValue = 0;
944
945 int count = 0;
946
947 for (FeatureList::iterator i = m_chromadata->logSpectrum.begin(); i != m_chromadata->logSpectrum.end(); ++i)
948 {
949
950 Feature f1 = *i;
951 Feature f2; // tuned log-frequency spectrum
952
953 f2.hasTimestamp = true;
954 f2.timestamp = f1.timestamp;
955
956 f2.values.push_back(0.0);
957 f2.values.push_back(0.0); // set lower edge to zero
958
959 if (m_chromadata->tuneLocal) {
960 intShift = floor(m_chromadata->localTuning[count] * 3);
961 floatShift = m_chromadata->localTuning[count] * 3 - intShift;
962 // floatShift is a really bad name for this
963 }
964
965 for (int k = 2; k < (int)f1.values.size() - 3; ++k)
966 { // interpolate all inner bins
967 tempValue = f1.values[k + intShift] * (1-floatShift) + f1.values[k+intShift+1] * floatShift;
968 f2.values.push_back(tempValue);
969 }
970
971 f2.values.push_back(0.0);
972 f2.values.push_back(0.0);
973 f2.values.push_back(0.0); // upper edge
974
975 vector<float> runningmean = SpecialConvolution(f2.values,m_chromadata->hw);
976 vector<float> runningstd;
977 for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance)
978 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
979 }
980 runningstd = SpecialConvolution(runningstd,m_chromadata->hw); // second step convolve
981 for (int i = 0; i < nNote; i++)
982 {
983
984 runningstd[i] = sqrt(runningstd[i]);
985 // square root to finally have running std
986
987 if (runningstd[i] > 0)
988 {
989 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
990 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_chromadata->whitening) : 0;
991 }
992
993 if (f2.values[i] < 0) {
994
995 cerr << "ERROR: negative value in logfreq spectrum" << endl;
996
997 }
998 }
999 tunedlogfreqspec.push_back(f2);
1000 count++;
1001 }
1002 cerr << "done." << endl;
1003 /** Semitone spectrum and chromagrams
1004 Semitone-spaced log-frequency spectrum derived
1005 from the tuned log-freq spectrum above. the spectrum
1006 is inferred using a non-negative least squares algorithm.
1007 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
1008 bass and treble stacked onto each other).
1009 **/
1010 if (m_chromadata->useNNLS == 0) {
1011 cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... ";
1012 } else {
1013 cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... ";
1014 }
1015
1016 vector<float> oldchroma = vector<float>(12,0);
1017 vector<float> oldbasschroma = vector<float>(12,0);
1018 count = 0;
1019
1020 for (FeatureList::iterator it = tunedlogfreqspec.begin(); it != tunedlogfreqspec.end(); ++it) {
1021 Feature logfreqsp = *it; // logfreq spectrum
1022 Feature bothchroma; // treble and bass chromagram
1023
1024 bothchroma.hasTimestamp = true;
1025 bothchroma.timestamp = logfreqsp.timestamp;
1026
1027 float b[nNote];
1028
1029 bool some_b_greater_zero = false;
1030 float sumb = 0;
1031 for (int i = 0; i < nNote; i++) {
1032 b[i] = logfreqsp.values[i];
1033 sumb += b[i];
1034 if (b[i] > 0) {
1035 some_b_greater_zero = true;
1036 }
1037 }
1038
1039 // here's where the non-negative least squares algorithm calculates the note activation x
1040
1041 vector<float> chroma = vector<float>(12, 0);
1042 vector<float> basschroma = vector<float>(12, 0);
1043 float currval;
1044 int iSemitone = 0;
1045
1046 if (some_b_greater_zero) {
1047 if (m_chromadata->useNNLS == 0) {
1048 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
1049 currval = 0;
1050 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
1051 currval += b[iNote + iBPS] * (1-abs(iBPS*1.0/(nBPS/2+1)));
1052 }
1053 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
1054 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
1055 iSemitone++;
1056 }
1057
1058 } else {
1059 float x[84+1000];
1060 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
1061 vector<int> signifIndex;
1062 int index=0;
1063 sumb /= 84.0;
1064 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
1065 float currval = 0;
1066 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
1067 currval += b[iNote + iBPS];
1068 }
1069 if (currval > 0) signifIndex.push_back(index);
1070 index++;
1071 }
1072 float rnorm;
1073 float w[84+1000];
1074 float zz[84+1000];
1075 int indx[84+1000];
1076 int mode;
1077 int dictsize = nNote*signifIndex.size();
1078
1079 float *curr_dict = new float[dictsize];
1080 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
1081 for (int iBin = 0; iBin < nNote; iBin++) {
1082 curr_dict[iNote * nNote + iBin] =
1083 1.0 * m_chromadata->dict[signifIndex[iNote] * nNote + iBin];
1084 }
1085 }
1086 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
1087 delete [] curr_dict;
1088 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
1089 // cerr << mode << endl;
1090 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
1091 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
1092 }
1093 }
1094 }
1095
1096 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end());
1097 // just stack the both chromas
1098
1099 bothchroma.values = chroma;
1100 returnFeatureList.push_back(bothchroma);
1101 count++;
1102 }
1103 cerr << "done." << endl;
1104
1105 return returnFeatureList;
1106 }
1107
1108 /* ------ Beat Quantizer ------ */
1109
1110 std::vector<Vamp::Plugin::FeatureList>
1111 Segmentino::beatQuantiser(Vamp::Plugin::FeatureList chromagram, Vamp::Plugin::FeatureList beats)
1112 {
1113 std::vector<FeatureList> returnVector;
1114
1115 FeatureList fwQchromagram; // frame-wise beat-quantised chroma
1116 FeatureList bwQchromagram; // beat-wise beat-quantised chroma
1117
1118
1119 size_t nChromaFrame = chromagram.size();
1120 size_t nBeat = beats.size();
1121
1122 if (nBeat == 0 && nChromaFrame == 0) return returnVector;
1123
1124 int nBin = chromagram[0].values.size();
1125
1126 vector<float> tempChroma = vector<float>(nBin);
1127
1128 Vamp::RealTime beatTimestamp = Vamp::RealTime::zeroTime;
1129 int currBeatCount = -1; // start before first beat
1130 int framesInBeat = 0;
1131
1132 for (size_t iChroma = 0; iChroma < nChromaFrame; ++iChroma)
1133 {
1134 Vamp::RealTime frameTimestamp = chromagram[iChroma].timestamp;
1135 Vamp::RealTime newBeatTimestamp;
1136
1137 if (currBeatCount != (int)beats.size() - 1) {
1138 newBeatTimestamp = beats[currBeatCount+1].timestamp;
1139 } else {
1140 newBeatTimestamp = chromagram[nChromaFrame-1].timestamp;
1141 }
1142
1143 if (frameTimestamp > newBeatTimestamp ||
1144 iChroma == nChromaFrame-1)
1145 {
1146 // new beat (or last chroma frame)
1147 // 1. finish all the old beat processing
1148 if (framesInBeat > 0)
1149 {
1150 for (int i = 0; i < nBin; ++i) tempChroma[i] /= framesInBeat; // average
1151 }
1152
1153 Feature bwQchromaFrame;
1154 bwQchromaFrame.hasTimestamp = true;
1155 bwQchromaFrame.timestamp = beatTimestamp;
1156 bwQchromaFrame.values = tempChroma;
1157 bwQchromaFrame.duration = newBeatTimestamp - beatTimestamp;
1158 bwQchromagram.push_back(bwQchromaFrame);
1159
1160 for (int iFrame = -framesInBeat; iFrame < 0; ++iFrame)
1161 {
1162 Feature fwQchromaFrame;
1163 fwQchromaFrame.hasTimestamp = true;
1164 fwQchromaFrame.timestamp = chromagram[iChroma+iFrame].timestamp;
1165 fwQchromaFrame.values = tempChroma; // all between two beats get the same
1166 fwQchromagram.push_back(fwQchromaFrame);
1167 }
1168
1169 // 2. increments / resets for current (new) beat
1170 currBeatCount++;
1171 beatTimestamp = newBeatTimestamp;
1172 for (int i = 0; i < nBin; ++i) tempChroma[i] = 0; // average
1173 framesInBeat = 0;
1174 }
1175 framesInBeat++;
1176 for (int i = 0; i < nBin; ++i) tempChroma[i] += chromagram[iChroma].values[i];
1177 }
1178 returnVector.push_back(fwQchromagram);
1179 returnVector.push_back(bwQchromagram);
1180 return returnVector;
1181 }
1182
1183
1184
1185 /* -------------------------------- */
1186 /* ------ Support Functions ------ */
1187 /* -------------------------------- */
1188
1189 // one-dimesion median filter
1190 arma::vec medfilt1(arma::vec v, int medfilt_length)
1191 {
1192 // TODO: check if this works with odd and even medfilt_length !!!
1193 int halfWin = medfilt_length/2;
1194
1195 // result vector
1196 arma::vec res = arma::zeros<arma::vec>(v.size());
1197
1198 // padding
1199 arma::vec padV = arma::zeros<arma::vec>(v.size()+medfilt_length-1);
1200
1201 for (int i=medfilt_length/2; i < medfilt_length/2+(int)v.size(); ++ i)
1202 {
1203 padV(i) = v(i-medfilt_length/2);
1204 }
1205
1206 // the above loop leaves the boundaries at 0,
1207 // the two loops below fill them with the start or end values of v at start and end
1208 for (int i = 0; i < halfWin; ++i) padV(i) = v(0);
1209 for (int i = halfWin+(int)v.size(); i < (int)v.size()+2*halfWin; ++i) padV(i) = v(v.size()-1);
1210
1211
1212
1213 // Median filter
1214 arma::vec win = arma::zeros<arma::vec>(medfilt_length);
1215
1216 for (int i=0; i < (int)v.size(); ++i)
1217 {
1218 win = padV.subvec(i,i+halfWin*2);
1219 win = sort(win);
1220 res(i) = win(halfWin);
1221 }
1222
1223 return res;
1224 }
1225
1226
1227 // Quantile
1228 double quantile(arma::vec v, double p)
1229 {
1230 arma::vec sortV = arma::sort(v);
1231 int n = sortV.size();
1232 arma::vec x = arma::zeros<vec>(n+2);
1233 arma::vec y = arma::zeros<vec>(n+2);
1234
1235 x(0) = 0;
1236 x(n+1) = 100;
1237
1238 for (int i=1; i<n+1; ++i)
1239 x(i) = 100*(0.5+(i-1))/n;
1240
1241 y(0) = sortV(0);
1242 y.subvec(1,n) = sortV;
1243 y(n+1) = sortV(n-1);
1244
1245 arma::uvec x2index = find(x>=p*100);
1246
1247 // Interpolation
1248 double x1 = x(x2index(0)-1);
1249 double x2 = x(x2index(0));
1250 double y1 = y(x2index(0)-1);
1251 double y2 = y(x2index(0));
1252
1253 double res = (y2-y1)/(x2-x1)*(p*100-x1)+y1;
1254
1255 return res;
1256 }
1257
1258 // Max Filtering
1259 arma::mat maxfilt1(arma::mat inmat, int len)
1260 {
1261 arma::mat outmat = inmat;
1262
1263 for (int i=0; i < (int)inmat.n_rows; ++i)
1264 {
1265 if (arma::sum(inmat.row(i)) > 0)
1266 {
1267 // Take a window of rows
1268 int startWin;
1269 int endWin;
1270
1271 if (0 > i-len)
1272 startWin = 0;
1273 else
1274 startWin = i-len;
1275
1276 if ((int)inmat.n_rows-1 < i+len-1)
1277 endWin = inmat.n_rows-1;
1278 else
1279 endWin = i+len-1;
1280
1281 outmat(i,span::all) = arma::max(inmat(span(startWin,endWin),span::all));
1282 }
1283 }
1284
1285 return outmat;
1286
1287 }
1288
1289 // Null Parts
1290 Part nullpart(vector<Part> parts, arma::vec barline)
1291 {
1292 arma::uvec nullindices = arma::ones<arma::uvec>(barline.size());
1293 for (int iPart=0; iPart<(int)parts.size(); ++iPart)
1294 {
1295 //for (int iIndex=0; iIndex < parts[0].indices.size(); ++iIndex)
1296 for (int iIndex=0; iIndex < (int)parts[iPart].indices.size(); ++iIndex)
1297 for (int i=0; i<parts[iPart].n; ++i)
1298 {
1299 int ind = parts[iPart].indices[iIndex]+i;
1300 nullindices(ind) = 0;
1301 }
1302 }
1303
1304 Part newPart;
1305 newPart.n = 1;
1306 uvec q = find(nullindices > 0);
1307
1308 for (int i=0; i<(int)q.size();++i)
1309 newPart.indices.push_back(q(i));
1310
1311 newPart.letter = '-';
1312 newPart.value = 0;
1313 newPart.level = 0;
1314
1315 return newPart;
1316 }
1317
1318
1319 // Merge Nulls
1320 void mergenulls(vector<Part> &parts)
1321 {
1322 for (int iPart=0; iPart<(int)parts.size(); ++iPart)
1323 {
1324
1325 vector<Part> newVectorPart;
1326
1327 if (parts[iPart].letter.compare("-")==0)
1328 {
1329 sort (parts[iPart].indices.begin(), parts[iPart].indices.end());
1330 int newpartind = -1;
1331
1332 vector<int> indices;
1333 indices.push_back(-2);
1334
1335 for (int iIndex=0; iIndex<(int)parts[iPart].indices.size(); ++iIndex)
1336 indices.push_back(parts[iPart].indices[iIndex]);
1337
1338 for (int iInd=1; iInd < (int)indices.size(); ++iInd)
1339 {
1340 if (indices[iInd] - indices[iInd-1] > 1)
1341 {
1342 newpartind++;
1343
1344 Part newPart;
1345 newPart.letter = 'N';
1346 std::stringstream out;
1347 out << newpartind+1;
1348 newPart.letter.append(out.str());
1349 // newPart.value = 20+newpartind+1;
1350 newPart.value = 0;
1351 newPart.n = 1;
1352 newPart.indices.push_back(indices[iInd]);
1353 newPart.level = 0;
1354
1355 newVectorPart.push_back(newPart);
1356 }
1357 else
1358 {
1359 newVectorPart[newpartind].n = newVectorPart[newpartind].n+1;
1360 }
1361 }
1362 parts.erase (parts.end());
1363
1364 for (int i=0; i<(int)newVectorPart.size(); ++i)
1365 parts.push_back(newVectorPart[i]);
1366 }
1367 }
1368 }
1369
1370 /* ------ Segmentation ------ */
1371
1372 vector<Part> songSegment(Vamp::Plugin::FeatureList quantisedChromagram)
1373 {
1374
1375
1376 /* ------ Parameters ------ */
1377 double thresh_beat = 0.85;
1378 double thresh_seg = 0.80;
1379 int medfilt_length = 5;
1380 int minlength = 28;
1381 int maxlength = 2*128;
1382 double quantilePerc = 0.1;
1383 /* ------------------------ */
1384
1385
1386 // Collect Info
1387 int nBeat = quantisedChromagram.size(); // Number of feature vector
1388 int nFeatValues = quantisedChromagram[0].values.size(); // Number of values for each feature vector
1389
1390 if (nBeat < minlength) {
1391 // return a single part
1392 vector<Part> parts;
1393 Part newPart;
1394 newPart.n = 1;
1395 newPart.indices.push_back(0);
1396 newPart.letter = "n1";
1397 newPart.value = 20;
1398 newPart.level = 0;
1399 parts.push_back(newPart);
1400 return parts;
1401 }
1402
1403 arma::irowvec timeStamp = arma::zeros<arma::imat>(1,nBeat); // Vector of Time Stamps
1404
1405 // Save time stamp as a Vector
1406 if (quantisedChromagram[0].hasTimestamp)
1407 {
1408 for (int i = 0; i < nBeat; ++ i)
1409 timeStamp[i] = quantisedChromagram[i].timestamp.nsec;
1410 }
1411
1412
1413 // Build a ObservationTOFeatures Matrix
1414 arma::mat featVal = arma::zeros<mat>(nBeat,nFeatValues/2);
1415
1416 for (int i = 0; i < nBeat; ++ i)
1417 for (int j = 0; j < nFeatValues/2; ++ j)
1418 {
1419 featVal(i,j) = 0.8 * quantisedChromagram[i].values[j] + quantisedChromagram[i].values[j+12]; // bass attenuated
1420 }
1421
1422 // Set to arbitrary value to feature vectors with low std
1423 arma::mat a = stddev(featVal,1,1);
1424
1425 // Feature Correlation Matrix
1426 arma::mat simmat0 = 1-arma::cor(arma::trans(featVal));
1427
1428
1429 for (int i = 0; i < nBeat; ++ i)
1430 {
1431 if (a(i)<0.000001)
1432 {
1433 featVal(i,1) = 1000; // arbitrary
1434
1435 for (int j = 0; j < nFeatValues/2; ++j)
1436 {
1437 simmat0(i,j) = 1;
1438 simmat0(j,i) = 1;
1439 }
1440 }
1441 }
1442
1443 arma::mat simmat = 1-simmat0/2;
1444
1445 // -------- To delate when the proble with the add of beat will be solved -------
1446 for (int i = 0; i < nBeat; ++ i)
1447 for (int j = 0; j < nBeat; ++ j)
1448 if (!std::isfinite(simmat(i,j)))
1449 simmat(i,j)=0;
1450 // ------------------------------------------------------------------------------
1451
1452 // Median Filtering applied to the Correlation Matrix
1453 // The median filter is for each diagonal of the Matrix
1454 arma::mat median_simmat = arma::zeros<arma::mat>(nBeat,nBeat);
1455
1456 for (int i = 0; i < nBeat; ++ i)
1457 {
1458 arma::vec temp = medfilt1(simmat.diag(i),medfilt_length);
1459 median_simmat.diag(i) = temp;
1460 median_simmat.diag(-i) = temp;
1461 }
1462
1463 for (int i = 0; i < nBeat; ++ i)
1464 for (int j = 0; j < nBeat; ++ j)
1465 if (!std::isfinite(median_simmat(i,j)))
1466 median_simmat(i,j) = 0;
1467
1468 // -------------- NOT CONVERTED -------------------------------------
1469 // if param.seg.standardise
1470 // med_median_simmat = repmat(median(median_simmat),nBeat,1);
1471 // std_median_simmat = repmat(std(median_simmat),nBeat,1);
1472 // median_simmat = (median_simmat - med_median_simmat) ./ std_median_simmat;
1473 // end
1474 // --------------------------------------------------------
1475
1476 // Retrieve Bar Bounderies
1477 arma::uvec dup = find(median_simmat > thresh_beat);
1478 arma::mat potential_duplicates = arma::zeros<arma::mat>(nBeat,nBeat);
1479 potential_duplicates.elem(dup) = arma::ones<arma::vec>(dup.size());
1480 potential_duplicates = trimatu(potential_duplicates);
1481
1482 int nPartlengths = round((maxlength-minlength)/4)+1;
1483 arma::vec partlengths = zeros<arma::vec>(nPartlengths);
1484
1485 for (int i = 0; i < nPartlengths; ++ i)
1486 partlengths(i) = (i*4) + minlength;
1487
1488 // initialise arrays
1489 arma::cube simArray = zeros<arma::cube>(nBeat,nBeat,nPartlengths);
1490 arma::cube decisionArray2 = zeros<arma::cube>(nBeat,nBeat,nPartlengths);
1491
1492 for (int iLength = 0; iLength < nPartlengths; ++ iLength)
1493 // for (int iLength = 0; iLength < 20; ++ iLength)
1494 {
1495 int len = partlengths(iLength);
1496 int nUsedBeat = nBeat - len + 1; // number of potential rep beginnings: they can't overlap at the end of the song
1497
1498 if (nUsedBeat < 1) continue;
1499
1500 for (int iBeat = 0; iBeat < nUsedBeat; ++ iBeat) // looping over all columns (arbitrarily chosen columns)
1501 {
1502 arma::uvec help2 = find(potential_duplicates(span(0,nUsedBeat-1),iBeat)==1);
1503
1504 for (int i=0; i < (int)help2.size(); ++i)
1505 {
1506
1507 // measure how well two length len segments go together
1508 int kBeat = help2(i);
1509 arma::vec distrib = median_simmat(span(iBeat,iBeat+len-1),span(kBeat,kBeat+len-1)).diag(0);
1510 simArray(iBeat,kBeat,iLength) = quantile(distrib,quantilePerc);
1511 }
1512 }
1513
1514 arma::mat tempM = simArray(span(0,nUsedBeat-1),span(0,nUsedBeat-1),span(iLength,iLength));
1515 simArray.slice(iLength)(span(0,nUsedBeat-1),span(0,nUsedBeat-1)) = tempM + arma::trans(tempM) - (eye<mat>(nUsedBeat,nUsedBeat)%tempM);
1516
1517 // convolution
1518 arma::vec K = arma::zeros<vec>(3);
1519 K << 0.01 << 0.98 << 0.01;
1520
1521
1522 for (int i=0; i < (int)simArray.n_rows; ++i)
1523 {
1524 arma::rowvec t = arma::conv((arma::rowvec)simArray.slice(iLength).row(i),K);
1525 simArray.slice(iLength)(i,span::all) = t.subvec(1,t.size()-2);
1526 }
1527
1528 // take only over-average bars that do not overlap
1529
1530 arma::mat temp = arma::zeros<mat>(simArray.n_rows, simArray.n_cols);
1531 temp(span::all, span(0,nUsedBeat-1)) = simArray.slice(iLength)(span::all,span(0,nUsedBeat-1));
1532
1533 for (int i=0; i < (int)temp.n_rows; ++i)
1534 for (int j=0; j < nUsedBeat; ++j)
1535 if (temp(i,j) < thresh_seg)
1536 temp(i,j) = 0;
1537
1538 decisionArray2.slice(iLength) = temp;
1539
1540 arma::mat maxMat = maxfilt1(decisionArray2.slice(iLength),len-1);
1541
1542 for (int i=0; i < (int)decisionArray2.n_rows; ++i)
1543 for (int j=0; j < (int)decisionArray2.n_cols; ++j)
1544 if (decisionArray2.slice(iLength)(i,j) < maxMat(i,j))
1545 decisionArray2.slice(iLength)(i,j) = 0;
1546
1547 decisionArray2.slice(iLength) = decisionArray2.slice(iLength) % arma::trans(decisionArray2.slice(iLength));
1548
1549 for (int i=0; i < (int)simArray.n_rows; ++i)
1550 for (int j=0; j < (int)simArray.n_cols; ++j)
1551 if (simArray.slice(iLength)(i,j) < thresh_seg)
1552 potential_duplicates(i,j) = 0;
1553 }
1554
1555 // Milk the data
1556
1557 arma::mat bestval;
1558
1559 for (int iLength=0; iLength<nPartlengths; ++iLength)
1560 {
1561 arma::mat temp = arma::zeros<arma::mat>(decisionArray2.n_rows,decisionArray2.n_cols);
1562
1563 for (int rows=0; rows < (int)decisionArray2.n_rows; ++rows)
1564 for (int cols=0; cols < (int)decisionArray2.n_cols; ++cols)
1565 if (decisionArray2.slice(iLength)(rows,cols) > 0)
1566 temp(rows,cols) = 1;
1567
1568 arma::vec currLogicSum = arma::sum(temp,1);
1569
1570 for (int iBeat=0; iBeat < nBeat; ++iBeat)
1571 if (currLogicSum(iBeat) > 1)
1572 {
1573 arma::vec t = decisionArray2.slice(iLength)(span::all,iBeat);
1574 double currSum = sum(t);
1575
1576 int count = 0;
1577 for (int i=0; i < (int)t.size(); ++i)
1578 if (t(i)>0)
1579 count++;
1580
1581 currSum = (currSum/count)/2;
1582
1583 arma::rowvec t1;
1584 t1 << (currLogicSum(iBeat)-1) * partlengths(iLength) << currSum << iLength << iBeat << currLogicSum(iBeat);
1585
1586 bestval = join_cols(bestval,t1);
1587 }
1588 }
1589
1590 // Definition of the resulting vector
1591 vector<Part> parts;
1592
1593 // make a table of all valid sets of parts
1594
1595 char partletters[] = {'A','B','C','D','E','F','G', 'H','I','J','K','L','M','N','O','P','Q','R','S'};
1596 int partvalues[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19};
1597 arma::vec valid_sets = arma::ones<arma::vec>(bestval.n_rows);
1598
1599 if (!bestval.is_empty())
1600 {
1601
1602 // In questo punto viene introdotto un errore alla 3 cifra decimale
1603
1604 arma::colvec t = arma::zeros<arma::colvec>(bestval.n_rows);
1605 for (int i=0; i < (int)bestval.n_rows; ++i)
1606 {
1607 t(i) = bestval(i,1)*2;
1608 }
1609
1610 double m = t.max();
1611
1612 bestval(span::all,1) = bestval(span::all,1) / m;
1613 bestval(span::all,0) = bestval(span::all,0) + bestval(span::all,1);
1614
1615 arma::mat bestval2;
1616 for (int i=0; i < (int)bestval.n_cols; ++i)
1617 if (i!=1)
1618 bestval2 = join_rows(bestval2,bestval.col(i));
1619
1620 for (int kSeg=0; kSeg<6; ++kSeg)
1621 {
1622 arma::mat currbestvals = arma::zeros<arma::mat>(bestval2.n_rows, bestval2.n_cols);
1623 for (int i=0; i < (int)bestval2.n_rows; ++i)
1624 for (int j=0; j < (int)bestval2.n_cols; ++j)
1625 if (valid_sets(i))
1626 currbestvals(i,j) = bestval2(i,j);
1627
1628 arma::vec t1 = currbestvals.col(0);
1629 double ma;
1630 uword maIdx;
1631 ma = t1.max(maIdx);
1632
1633 if ((maIdx == 0)&&(ma == 0))
1634 break;
1635
1636 int bestLength = lrint(partlengths(currbestvals(maIdx,1)));
1637 arma::rowvec bestIndices = decisionArray2.slice(currbestvals(maIdx,1))(currbestvals(maIdx,2),span::all);
1638
1639 arma::rowvec bestIndicesMap = arma::zeros<arma::rowvec>(bestIndices.size());
1640 for (int i=0; i < (int)bestIndices.size(); ++i)
1641 if (bestIndices(i)>0)
1642 bestIndicesMap(i) = 1;
1643
1644 arma::rowvec mask = arma::zeros<arma::rowvec>(bestLength*2-1);
1645 for (int i=0; i<bestLength; ++i)
1646 mask(i+bestLength-1) = 1;
1647
1648 arma::rowvec t2 = arma::conv(bestIndicesMap,mask);
1649 arma::rowvec island = t2.subvec(mask.size()/2,t2.size()-1-mask.size()/2);
1650
1651 // Save results in the structure
1652 Part newPart;
1653 newPart.n = bestLength;
1654 uvec q1 = find(bestIndices > 0);
1655
1656 for (int i=0; i < (int)q1.size();++i)
1657 newPart.indices.push_back(q1(i));
1658
1659 newPart.letter = partletters[kSeg];
1660 newPart.value = partvalues[kSeg];
1661 newPart.level = kSeg+1;
1662 parts.push_back(newPart);
1663
1664 uvec q2 = find(valid_sets==1);
1665
1666 for (int i=0; i < (int)q2.size(); ++i)
1667 {
1668 int iSet = q2(i);
1669 int s = partlengths(bestval2(iSet,1));
1670
1671 arma::rowvec mask1 = arma::zeros<arma::rowvec>(s*2-1);
1672 for (int i=0; i<s; ++i)
1673 mask1(i+s-1) = 1;
1674
1675 arma::rowvec Ind = decisionArray2.slice(bestval2(iSet,1))(bestval2(iSet,2),span::all);
1676 arma::rowvec IndMap = arma::zeros<arma::rowvec>(Ind.size());
1677 for (int i=0; i < (int)Ind.size(); ++i)
1678 if (Ind(i)>0)
1679 IndMap(i) = 2;
1680
1681 arma::rowvec t3 = arma::conv(IndMap,mask1);
1682 arma::rowvec currislands = t3.subvec(mask1.size()/2,t3.size()-1-mask1.size()/2);
1683 arma::rowvec islandsdMult = currislands%island;
1684
1685 arma::uvec islandsIndex = find(islandsdMult > 0);
1686
1687 if (islandsIndex.size() > 0)
1688 valid_sets(iSet) = 0;
1689 }
1690 }
1691 }
1692 else
1693 {
1694 Part newPart;
1695 newPart.n = nBeat;
1696 newPart.indices.push_back(0);
1697 newPart.letter = 'A';
1698 newPart.value = 1;
1699 newPart.level = 1;
1700 parts.push_back(newPart);
1701 }
1702
1703 arma::vec bar = linspace(1,nBeat,nBeat);
1704 Part np = nullpart(parts,bar);
1705
1706 parts.push_back(np);
1707
1708 // -------------- NOT CONVERTED -------------------------------------
1709 // if param.seg.editor
1710 // [pa, ta] = partarray(parts);
1711 // parts = editorssearch(pa, ta, parts);
1712 // parts = [parts, nullpart(parts,1:nBeat)];
1713 // end
1714 // ------------------------------------------------------------------
1715
1716
1717 mergenulls(parts);
1718
1719
1720 // -------------- NOT CONVERTED -------------------------------------
1721 // if param.seg.editor
1722 // [pa, ta] = partarray(parts);
1723 // parts = editorssearch(pa, ta, parts);
1724 // parts = [parts, nullpart(parts,1:nBeat)];
1725 // end
1726 // ------------------------------------------------------------------
1727
1728 return parts;
1729 }
1730
1731
1732
1733 void songSegmentChroma(Vamp::Plugin::FeatureList quantisedChromagram, vector<Part> &parts)
1734 {
1735 // Collect Info
1736 int nBeat = quantisedChromagram.size(); // Number of feature vector
1737 int nFeatValues = quantisedChromagram[0].values.size(); // Number of values for each feature vector
1738
1739 arma::mat synchTreble = arma::zeros<mat>(nBeat,nFeatValues/2);
1740
1741 for (int i = 0; i < nBeat; ++ i)
1742 for (int j = 0; j < nFeatValues/2; ++ j)
1743 {
1744 synchTreble(i,j) = quantisedChromagram[i].values[j];
1745 }
1746
1747 arma::mat synchBass = arma::zeros<mat>(nBeat,nFeatValues/2);
1748
1749 for (int i = 0; i < nBeat; ++ i)
1750 for (int j = 0; j < nFeatValues/2; ++ j)
1751 {
1752 synchBass(i,j) = quantisedChromagram[i].values[j+12];
1753 }
1754
1755 // Process
1756
1757 arma::mat segTreble = arma::zeros<arma::mat>(quantisedChromagram.size(),quantisedChromagram[0].values.size()/2);
1758 arma::mat segBass = arma::zeros<arma::mat>(quantisedChromagram.size(),quantisedChromagram[0].values.size()/2);
1759
1760 for (int iPart=0; iPart < (int)parts.size(); ++iPart)
1761 {
1762 parts[iPart].nInd = parts[iPart].indices.size();
1763
1764 for (int kOccur=0; kOccur<parts[iPart].nInd; ++kOccur)
1765 {
1766 int kStartIndex = parts[iPart].indices[kOccur];
1767 int kEndIndex = kStartIndex + parts[iPart].n-1;
1768
1769 segTreble.rows(kStartIndex,kEndIndex) = segTreble.rows(kStartIndex,kEndIndex) + synchTreble.rows(kStartIndex,kEndIndex);
1770 segBass.rows(kStartIndex,kEndIndex) = segBass.rows(kStartIndex,kEndIndex) + synchBass.rows(kStartIndex,kEndIndex);
1771 }
1772 }
1773 }
1774
1775
1776 // Segment Integration
1777 vector<Part> songSegmentIntegration(vector<Part> &parts)
1778 {
1779 // Break up parts (every part will have one instance)
1780 vector<Part> newPartVector;
1781 vector<int> partindices;
1782
1783 for (int iPart=0; iPart < (int)parts.size(); ++iPart)
1784 {
1785 parts[iPart].nInd = parts[iPart].indices.size();
1786 for (int iInstance=0; iInstance<parts[iPart].nInd; ++iInstance)
1787 {
1788 Part newPart;
1789 newPart.n = parts[iPart].n;
1790 newPart.letter = parts[iPart].letter;
1791 newPart.value = parts[iPart].value;
1792 newPart.level = parts[iPart].level;
1793 newPart.indices.push_back(parts[iPart].indices[iInstance]);
1794 newPart.nInd = 1;
1795 partindices.push_back(parts[iPart].indices[iInstance]);
1796
1797 newPartVector.push_back(newPart);
1798 }
1799 }
1800
1801
1802 // Sort the parts in order of occurrence
1803 sort (partindices.begin(), partindices.end());
1804
1805 for (int i=0; i < (int)partindices.size(); ++i)
1806 {
1807 bool found = false;
1808 int in=0;
1809 while (!found)
1810 {
1811 if (newPartVector[in].indices[0] == partindices[i])
1812 {
1813 newPartVector.push_back(newPartVector[in]);
1814 newPartVector.erase(newPartVector.begin()+in);
1815 found = true;
1816 }
1817 else
1818 in++;
1819 }
1820 }
1821
1822 // Clear the vector
1823 for (int iNewpart=1; iNewpart < (int)newPartVector.size(); ++iNewpart)
1824 {
1825 if (newPartVector[iNewpart].n < 12)
1826 {
1827 newPartVector[iNewpart-1].n = newPartVector[iNewpart-1].n + newPartVector[iNewpart].n;
1828 newPartVector.erase(newPartVector.begin()+iNewpart);
1829 }
1830 }
1831
1832 return newPartVector;
1833 }
1834
1835 // Segmenter
1836 Vamp::Plugin::FeatureList Segmentino::runSegmenter(Vamp::Plugin::FeatureList quantisedChromagram)
1837 {
1838 /* --- Display Information --- */
1839 // int numBeat = quantisedChromagram.size();
1840 // int numFeats = quantisedChromagram[0].values.size();
1841
1842 vector<Part> parts;
1843 vector<Part> finalParts;
1844
1845 parts = songSegment(quantisedChromagram);
1846 songSegmentChroma(quantisedChromagram,parts);
1847
1848 finalParts = songSegmentIntegration(parts);
1849
1850
1851 // TEMP ----
1852 /*for (int i=0;i<finalParts.size(); ++i)
1853 {
1854 std::cout << "Parts n° " << i << std::endl;
1855 std::cout << "n°: " << finalParts[i].n << std::endl;
1856 std::cout << "letter: " << finalParts[i].letter << std::endl;
1857
1858 std::cout << "indices: ";
1859 for (int j=0;j<finalParts[i].indices.size(); ++j)
1860 std::cout << finalParts[i].indices[j] << " ";
1861
1862 std::cout << std::endl;
1863 std::cout << "level: " << finalParts[i].level << std::endl;
1864 }*/
1865
1866 // ---------
1867
1868
1869 // Output
1870
1871 Vamp::Plugin::FeatureList results;
1872
1873
1874 Feature seg;
1875
1876 arma::vec indices;
1877 // int idx=0;
1878 vector<int> values;
1879 vector<string> letters;
1880
1881 for (int iPart=0; iPart < (int)finalParts.size()-1; ++iPart)
1882 {
1883 int iInstance=0;
1884 seg.hasTimestamp = true;
1885
1886 int ind = finalParts[iPart].indices[iInstance];
1887 int ind1 = finalParts[iPart+1].indices[iInstance];
1888
1889 seg.timestamp = quantisedChromagram[ind].timestamp;
1890 seg.hasDuration = true;
1891 seg.duration = quantisedChromagram[ind1].timestamp-quantisedChromagram[ind].timestamp;
1892 seg.values.clear();
1893 seg.values.push_back(finalParts[iPart].value);
1894 seg.label = finalParts[iPart].letter;
1895
1896 results.push_back(seg);
1897 }
1898
1899 if (finalParts.size() > 0) {
1900 int ind = finalParts[finalParts.size()-1].indices[0];
1901 seg.hasTimestamp = true;
1902 seg.timestamp = quantisedChromagram[ind].timestamp;
1903 seg.hasDuration = true;
1904 seg.duration = quantisedChromagram[quantisedChromagram.size()-1].timestamp-quantisedChromagram[ind].timestamp;
1905 seg.values.clear();
1906 seg.values.push_back(finalParts[finalParts.size()-1].value);
1907 seg.label = finalParts[finalParts.size()-1].letter;
1908
1909 results.push_back(seg);
1910 }
1911
1912 return results;
1913 }
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930