segmenter-vamp-plugin: songparts/plugins/SongParts.cpp annotate

annotate songparts/plugins/SongParts.cpp @ 1:f44aa6d29642

Plugin Code - The main file is songparts.cpp

author	maxzanoni76 <max.zanoni@eecs.qmul.ac.uk>
date	Wed, 11 Apr 2012 09:31:28 +0100
parents
children	fa8450e6f5a9

rev	line source
max@1	1 /* -- c-basic-offset: 4 indent-tabs-mode: nil -- vi:set ts=8 sts=4 sw=4: */
max@1	2
max@1	3 /*
max@1	4 QM Vamp Plugin Set
max@1	5
max@1	6 Centre for Digital Music, Queen Mary, University of London.
max@1	7
max@1	8 This program is free software; you can redistribute it and/or
max@1	9 modify it under the terms of the GNU General Public License as
max@1	10 published by the Free Software Foundation; either version 2 of the
max@1	11 License, or (at your option) any later version. See the file
max@1	12 COPYING included with this distribution for more information.
max@1	13 */
max@1	14
max@1	15 #include "SongParts.h"
max@1	16
max@1	17 #include <base/Window.h>
max@1	18 #include <dsp/onsets/DetectionFunction.h>
max@1	19 #include <dsp/onsets/PeakPicking.h>
max@1	20 #include <dsp/transforms/FFT.h>
max@1	21 #include <dsp/tempotracking/TempoTrackV2.h>
max@1	22 #include <dsp/tempotracking/DownBeat.h>
max@1	23 #include <chromamethods.h>
max@1	24 #include <maths/MathUtilities.h>
max@1	25 #include <boost/numeric/ublas/matrix.hpp>
max@1	26 #include <boost/numeric/ublas/io.hpp>
max@1	27 #include <boost/math/distributions/normal.hpp>
max@1	28 #include "armadillo"
max@1	29 #include <fstream>
max@1	30 #include <sstream>
max@1	31 #include <cmath>
max@1	32 #include <vector>
max@1	33
max@1	34 #include <vamp-sdk/Plugin.h>
max@1	35
max@1	36 using namespace boost::numeric;
max@1	37 using namespace arma;
max@1	38 using std::string;
max@1	39 using std::vector;
max@1	40 using std::cerr;
max@1	41 using std::cout;
max@1	42 using std::endl;
max@1	43
max@1	44
max@1	45 #ifndef __GNUC__
max@1	46 #include <alloca.h>
max@1	47 #endif
max@1	48
max@1	49
max@1	50 // Result Struct
max@1	51 typedef struct Part {
max@1	52 int n;
max@1	53 vector<unsigned> indices;
max@1	54 string letter;
max@1	55 unsigned value;
max@1	56 int level;
max@1	57 int nInd;
max@1	58 }Part;
max@1	59
max@1	60
max@1	61 /* ------------------------------------ */
max@1	62 /* ----- BEAT DETECTOR CLASS ---------- */
max@1	63 /* ------------------------------------ */
max@1	64
max@1	65 class BeatTrackerData
max@1	66 {
max@1	67 /* --- ATTRIBUTES --- */
max@1	68 public:
max@1	69 DFConfig dfConfig;
max@1	70 DetectionFunction *df;
max@1	71 DownBeat *downBeat;
max@1	72 vector<double> dfOutput;
max@1	73 Vamp::RealTime origin;
max@1	74
max@1	75
max@1	76 /* --- METHODS --- */
max@1	77
max@1	78 /* --- Constructor --- */
max@1	79 public:
max@1	80 BeatTrackerData(float rate, const DFConfig &config) : dfConfig(config) {
max@1	81
max@1	82 df = new DetectionFunction(config);
max@1	83 // decimation factor aims at resampling to c. 3KHz; must be power of 2
max@1	84 int factor = MathUtilities::nextPowerOfTwo(rate / 3000);
max@1	85 // std::cerr << "BeatTrackerData: factor = " << factor << std::endl;
max@1	86 downBeat = new DownBeat(rate, factor, config.stepSize);
max@1	87 }
max@1	88
max@1	89 /* --- Desctructor --- */
max@1	90 ~BeatTrackerData() {
max@1	91 delete df;
max@1	92 delete downBeat;
max@1	93 }
max@1	94
max@1	95 void reset() {
max@1	96 delete df;
max@1	97 df = new DetectionFunction(dfConfig);
max@1	98 dfOutput.clear();
max@1	99 downBeat->resetAudioBuffer();
max@1	100 origin = Vamp::RealTime::zeroTime;
max@1	101 }
max@1	102 };
max@1	103
max@1	104
max@1	105 /* --------------------------------------- */
max@1	106 /* ----- CHROMA EXTRACTOR CLASS ---------- */
max@1	107 /* --------------------------------------- */
max@1	108
max@1	109 class ChromaData
max@1	110 {
max@1	111
max@1	112 /* --- ATTRIBUTES --- */
max@1	113
max@1	114 public:
max@1	115 int frameCount;
max@1	116 int nBPS;
max@1	117 Vamp::Plugin::FeatureList logSpectrum;
max@1	118 size_t blockSize;
max@1	119 int lengthOfNoteIndex;
max@1	120 vector<float> meanTunings;
max@1	121 vector<float> localTunings;
max@1	122 float whitening;
max@1	123 float preset;
max@1	124 float useNNLS;
max@1	125 vector<float> localTuning;
max@1	126 vector<float> kernelValue;
max@1	127 vector<int> kernelFftIndex;
max@1	128 vector<int> kernelNoteIndex;
max@1	129 float *dict;
max@1	130 bool tuneLocal;
max@1	131 float doNormalizeChroma;
max@1	132 float rollon;
max@1	133 float s;
max@1	134 vector<float> hw;
max@1	135 vector<float> sinvalues;
max@1	136 vector<float> cosvalues;
max@1	137 Window<float> window;
max@1	138 FFTReal fft;
max@1	139 size_t inputSampleRate;
max@1	140
max@1	141 /* --- METHODS --- */
max@1	142
max@1	143 /* --- Constructor --- */
max@1	144
max@1	145 public:
max@1	146 ChromaData(float inputSampleRate, size_t block_size) :
max@1	147 frameCount(0),
max@1	148 nBPS(3),
max@1	149 logSpectrum(0),
max@1	150 blockSize(0),
max@1	151 lengthOfNoteIndex(0),
max@1	152 meanTunings(0),
max@1	153 localTunings(0),
max@1	154 whitening(1.0),
max@1	155 preset(0.0),
max@1	156 useNNLS(1.0),
max@1	157 localTuning(0.0),
max@1	158 kernelValue(0),
max@1	159 kernelFftIndex(0),
max@1	160 kernelNoteIndex(0),
max@1	161 dict(0),
max@1	162 tuneLocal(0.0),
max@1	163 doNormalizeChroma(0),
max@1	164 rollon(0.0),
max@1	165 s(0.7),
max@1	166 sinvalues(0),
max@1	167 cosvalues(0),
max@1	168 window(HanningWindow, block_size),
max@1	169 fft(block_size),
max@1	170 inputSampleRate(inputSampleRate)
max@1	171 {
max@1	172 // make the note dictionary matrix
max@1	173 dict = new float[nNote * 84];
max@1	174 for (int i = 0; i < nNote * 84; ++i) dict[i] = 0.0;
max@1	175 blockSize = block_size;
max@1	176 }
max@1	177
max@1	178
max@1	179 /* --- Desctructor --- */
max@1	180
max@1	181 ~ChromaData() {
max@1	182 delete [] dict;
max@1	183 }
max@1	184
max@1	185 /* --- Public Methods --- */
max@1	186
max@1	187 void reset() {
max@1	188 frameCount = 0;
max@1	189 logSpectrum.clear();
max@1	190 for (int iBPS = 0; iBPS < 3; ++iBPS) {
max@1	191 meanTunings[iBPS] = 0;
max@1	192 localTunings[iBPS] = 0;
max@1	193 }
max@1	194 localTuning.clear();
max@1	195 }
max@1	196
max@1	197 void baseProcess(float *inputBuffers, Vamp::RealTime timestamp)
max@1	198 {
max@1	199
max@1	200 frameCount++;
max@1	201 float *magnitude = new float[blockSize/2];
max@1	202 double *fftReal = new double[blockSize];
max@1	203 double *fftImag = new double[blockSize];
max@1	204
max@1	205 // FFTReal wants doubles, so we need to make a local copy of inputBuffers
max@1	206 double *inputBuffersDouble = new double[blockSize];
max@1	207 for (size_t i = 0; i < blockSize; i++) inputBuffersDouble[i] = inputBuffers[i];
max@1	208
max@1	209 fft.process(false, inputBuffersDouble, fftReal, fftImag);
max@1	210
max@1	211 float energysum = 0;
max@1	212 // make magnitude
max@1	213 float maxmag = -10000;
max@1	214 for (int iBin = 0; iBin < static_cast<int>(blockSize/2); iBin++) {
max@1	215 magnitude[iBin] = sqrt(fftReal[iBin] * fftReal[iBin] +
max@1	216 fftImag[iBin] * fftImag[iBin]);
max@1	217 if (magnitude[iBin]>blockSize*1.0) magnitude[iBin] = blockSize;
max@1	218 // a valid audio signal (between -1 and 1) should not be limited here.
max@1	219 if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin];
max@1	220 if (rollon > 0) {
max@1	221 energysum += pow(magnitude[iBin],2);
max@1	222 }
max@1	223 }
max@1	224
max@1	225 float cumenergy = 0;
max@1	226 if (rollon > 0) {
max@1	227 for (int iBin = 2; iBin < static_cast<int>(blockSize/2); iBin++) {
max@1	228 cumenergy += pow(magnitude[iBin],2);
max@1	229 if (cumenergy < energysum * rollon / 100) magnitude[iBin-2] = 0;
max@1	230 else break;
max@1	231 }
max@1	232 }
max@1	233
max@1	234 if (maxmag < 2) {
max@1	235 // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl;
max@1	236 for (int iBin = 0; iBin < static_cast<int>(blockSize/2); iBin++) {
max@1	237 magnitude[iBin] = 0;
max@1	238 }
max@1	239 }
max@1	240
max@1	241 // cerr << magnitude[200] << endl;
max@1	242
max@1	243 // note magnitude mapping using pre-calculated matrix
max@1	244 float *nm = new float[nNote]; // note magnitude
max@1	245 for (int iNote = 0; iNote < nNote; iNote++) {
max@1	246 nm[iNote] = 0; // initialise as 0
max@1	247 }
max@1	248 int binCount = 0;
max@1	249 for (vector<float>::iterator it = kernelValue.begin(); it != kernelValue.end(); ++it) {
max@1	250 nm[kernelNoteIndex[binCount]] += magnitude[kernelFftIndex[binCount]] * kernelValue[binCount];
max@1	251 binCount++;
max@1	252 }
max@1	253
max@1	254 float one_over_N = 1.0/frameCount;
max@1	255 // update means of complex tuning variables
max@1	256 for (int iBPS = 0; iBPS < nBPS; ++iBPS) meanTunings[iBPS] = float(frameCount-1)one_over_N;
max@1	257
max@1	258 for (int iTone = 0; iTone < round(nNote0.62/nBPS)nBPS+1; iTone = iTone + nBPS) {
max@1	259 for (int iBPS = 0; iBPS < nBPS; ++iBPS) meanTunings[iBPS] += nm[iTone + iBPS]*one_over_N;
max@1	260 float ratioOld = 0.997;
max@1	261 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
max@1	262 localTunings[iBPS] *= ratioOld;
max@1	263 localTunings[iBPS] += nm[iTone + iBPS] * (1 - ratioOld);
max@1	264 }
max@1	265 }
max@1	266
max@1	267 float localTuningImag = 0;
max@1	268 float localTuningReal = 0;
max@1	269 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
max@1	270 localTuningReal += localTunings[iBPS] * cosvalues[iBPS];
max@1	271 localTuningImag += localTunings[iBPS] * sinvalues[iBPS];
max@1	272 }
max@1	273
max@1	274 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
max@1	275 localTuning.push_back(normalisedtuning);
max@1	276
max@1	277 Vamp::Plugin::Feature f1; // logfreqspec
max@1	278 f1.hasTimestamp = true;
max@1	279 f1.timestamp = timestamp;
max@1	280 for (int iNote = 0; iNote < nNote; iNote++) {
max@1	281 f1.values.push_back(nm[iNote]);
max@1	282 }
max@1	283
max@1	284 // deletes
max@1	285 delete[] inputBuffersDouble;
max@1	286 delete[] magnitude;
max@1	287 delete[] fftReal;
max@1	288 delete[] fftImag;
max@1	289 delete[] nm;
max@1	290
max@1	291 logSpectrum.push_back(f1); // remember note magnitude
max@1	292 }
max@1	293
max@1	294 bool initialise()
max@1	295 {
max@1	296 dictionaryMatrix(dict, s);
max@1	297
max@1	298 // make things for tuning estimation
max@1	299 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
max@1	300 sinvalues.push_back(sin(2M_PI(iBPS*1.0/nBPS)));
max@1	301 cosvalues.push_back(cos(2M_PI(iBPS*1.0/nBPS)));
max@1	302 }
max@1	303
max@1	304
max@1	305 // make hamming window of length 1/2 octave
max@1	306 int hamwinlength = nBPS * 6 + 1;
max@1	307 float hamwinsum = 0;
max@1	308 for (int i = 0; i < hamwinlength; ++i) {
max@1	309 hw.push_back(0.54 - 0.46 * cos((2M_PIi)/(hamwinlength-1)));
max@1	310 hamwinsum += 0.54 - 0.46 * cos((2M_PIi)/(hamwinlength-1));
max@1	311 }
max@1	312 for (int i = 0; i < hamwinlength; ++i) hw[i] = hw[i] / hamwinsum;
max@1	313
max@1	314
max@1	315 // initialise the tuning
max@1	316 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
max@1	317 meanTunings.push_back(0);
max@1	318 localTunings.push_back(0);
max@1	319 }
max@1	320
max@1	321 blockSize = blockSize;
max@1	322 frameCount = 0;
max@1	323 int tempn = nNote * blockSize/2;
max@1	324 // cerr << "length of tempkernel : " << tempn << endl;
max@1	325 float *tempkernel;
max@1	326
max@1	327 tempkernel = new float[tempn];
max@1	328
max@1	329 logFreqMatrix(inputSampleRate, blockSize, tempkernel);
max@1	330 kernelValue.clear();
max@1	331 kernelFftIndex.clear();
max@1	332 kernelNoteIndex.clear();
max@1	333 int countNonzero = 0;
max@1	334 for (int iNote = 0; iNote < nNote; ++iNote) {
max@1	335 // I don't know if this is wise: manually making a sparse matrix
max@1	336 for (int iFFT = 0; iFFT < static_cast<int>(blockSize/2); ++iFFT) {
max@1	337 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
max@1	338 kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
max@1	339 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
max@1	340 countNonzero++;
max@1	341 }
max@1	342 kernelFftIndex.push_back(iFFT);
max@1	343 kernelNoteIndex.push_back(iNote);
max@1	344 }
max@1	345 }
max@1	346 }
max@1	347 delete [] tempkernel;
max@1	348 }
max@1	349 };
max@1	350
max@1	351
max@1	352 /* --------------------------------- */
max@1	353 /* ----- SONG PARTITIONER ---------- */
max@1	354 /* --------------------------------- */
max@1	355
max@1	356
max@1	357 /* --- ATTRIBUTES --- */
max@1	358
max@1	359 float SongPartitioner::m_stepSecs = 0.01161; // 512 samples at 44100
max@1	360 size_t SongPartitioner::m_chromaFramesizeFactor = 16; // 16 times as long as beat tracker's
max@1	361 size_t SongPartitioner::m_chromaStepsizeFactor = 4; // 4 times as long as beat tracker's
max@1	362
max@1	363
max@1	364 /* --- METHODS --- */
max@1	365
max@1	366 /* --- Constructor --- */
max@1	367 SongPartitioner::SongPartitioner(float inputSampleRate) :
max@1	368 Vamp::Plugin(inputSampleRate),
max@1	369 m_d(0),
max@1	370 m_bpb(4),
max@1	371 m_pluginFrameCount(0)
max@1	372 {
max@1	373 }
max@1	374
max@1	375
max@1	376 /* --- Desctructor --- */
max@1	377 SongPartitioner::~SongPartitioner()
max@1	378 {
max@1	379 delete m_d;
max@1	380 }
max@1	381
max@1	382
max@1	383 /* --- Methods --- */
max@1	384 string SongPartitioner::getIdentifier() const
max@1	385 {
max@1	386 return "qm-songpartitioner";
max@1	387 }
max@1	388
max@1	389 string SongPartitioner::getName() const
max@1	390 {
max@1	391 return "Song Partitioner";
max@1	392 }
max@1	393
max@1	394 string SongPartitioner::getDescription() const
max@1	395 {
max@1	396 return "Estimate contiguous segments pertaining to song parts such as verse and chorus.";
max@1	397 }
max@1	398
max@1	399 string SongPartitioner::getMaker() const
max@1	400 {
max@1	401 return "Queen Mary, University of London";
max@1	402 }
max@1	403
max@1	404 int SongPartitioner::getPluginVersion() const
max@1	405 {
max@1	406 return 2;
max@1	407 }
max@1	408
max@1	409 string SongPartitioner::getCopyright() const
max@1	410 {
max@1	411 return "Plugin by Matthew Davies, Christian Landone, Chris Cannam, Matthias Mauch and Massimiliano Zanoni Copyright (c) 2006-2012 QMUL - All Rights Reserved";
max@1	412 }
max@1	413
max@1	414 SongPartitioner::ParameterList SongPartitioner::getParameterDescriptors() const
max@1	415 {
max@1	416 ParameterList list;
max@1	417
max@1	418 ParameterDescriptor desc;
max@1	419
max@1	420 desc.identifier = "bpb";
max@1	421 desc.name = "Beats per Bar";
max@1	422 desc.description = "The number of beats in each bar";
max@1	423 desc.minValue = 2;
max@1	424 desc.maxValue = 16;
max@1	425 desc.defaultValue = 4;
max@1	426 desc.isQuantized = true;
max@1	427 desc.quantizeStep = 1;
max@1	428 list.push_back(desc);
max@1	429
max@1	430 return list;
max@1	431 }
max@1	432
max@1	433 float SongPartitioner::getParameter(std::string name) const
max@1	434 {
max@1	435 if (name == "bpb") return m_bpb;
max@1	436 return 0.0;
max@1	437 }
max@1	438
max@1	439 void SongPartitioner::setParameter(std::string name, float value)
max@1	440 {
max@1	441 if (name == "bpb") m_bpb = lrintf(value);
max@1	442 }
max@1	443
max@1	444
max@1	445 // Return the StepSize for Chroma Extractor
max@1	446 size_t SongPartitioner::getPreferredStepSize() const
max@1	447 {
max@1	448 size_t step = size_t(m_inputSampleRate * m_stepSecs + 0.0001);
max@1	449 if (step < 1) step = 1;
max@1	450
max@1	451 return step;
max@1	452 }
max@1	453
max@1	454 // Return the BlockSize for Chroma Extractor
max@1	455 size_t SongPartitioner::getPreferredBlockSize() const
max@1	456 {
max@1	457 size_t theoretical = getPreferredStepSize() * 2;
max@1	458 theoretical *= m_chromaFramesizeFactor;
max@1	459
max@1	460 return theoretical;
max@1	461 }
max@1	462
max@1	463
max@1	464 // Initialize the plugin and define Beat Tracker and Chroma Extractor Objects
max@1	465 bool SongPartitioner::initialise(size_t channels, size_t stepSize, size_t blockSize)
max@1	466 {
max@1	467 if (m_d) {
max@1	468 delete m_d;
max@1	469 m_d = 0;
max@1	470 }
max@1	471
max@1	472 if (channels < getMinChannelCount() \|\|
max@1	473 channels > getMaxChannelCount()) {
max@1	474 std::cerr << "SongPartitioner::initialise: Unsupported channel count: "
max@1	475 << channels << std::endl;
max@1	476 return false;
max@1	477 }
max@1	478
max@1	479 if (stepSize != getPreferredStepSize()) {
max@1	480 std::cerr << "ERROR: SongPartitioner::initialise: Unsupported step size for this sample rate: "
max@1	481 << stepSize << " (wanted " << (getPreferredStepSize()) << ")" << std::endl;
max@1	482 return false;
max@1	483 }
max@1	484
max@1	485 if (blockSize != getPreferredBlockSize()) {
max@1	486 std::cerr << "WARNING: SongPartitioner::initialise: Sub-optimal block size for this sample rate: "
max@1	487 << blockSize << " (wanted " << getPreferredBlockSize() << ")" << std::endl;
max@1	488 }
max@1	489
max@1	490 // Beat tracker and Chroma extractor has two different configuration parameters
max@1	491
max@1	492 // Configuration Parameters for Beat Tracker
max@1	493 DFConfig dfConfig;
max@1	494 dfConfig.DFType = DF_COMPLEXSD;
max@1	495 dfConfig.stepSize = stepSize;
max@1	496 dfConfig.frameLength = blockSize / m_chromaFramesizeFactor;
max@1	497 dfConfig.dbRise = 3;
max@1	498 dfConfig.adaptiveWhitening = false;
max@1	499 dfConfig.whiteningRelaxCoeff = -1;
max@1	500 dfConfig.whiteningFloor = -1;
max@1	501
max@1	502 // Initialise Beat Tracker
max@1	503 m_d = new BeatTrackerData(m_inputSampleRate, dfConfig);
max@1	504 m_d->downBeat->setBeatsPerBar(m_bpb);
max@1	505
max@1	506 // Initialise Chroma Extractor
max@1	507 m_chromadata = new ChromaData(m_inputSampleRate, blockSize);
max@1	508 m_chromadata->initialise();
max@1	509
max@1	510 return true;
max@1	511 }
max@1	512
max@1	513 void SongPartitioner::reset()
max@1	514 {
max@1	515 if (m_d) m_d->reset();
max@1	516 m_pluginFrameCount = 0;
max@1	517 }
max@1	518
max@1	519 SongPartitioner::OutputList SongPartitioner::getOutputDescriptors() const
max@1	520 {
max@1	521 OutputList list;
max@1	522 size_t outputCounter = 0;
max@1	523
max@1	524 OutputDescriptor beat;
max@1	525 beat.identifier = "beats";
max@1	526 beat.name = "Beats";
max@1	527 beat.description = "Beat locations labelled with metrical position";
max@1	528 beat.unit = "";
max@1	529 beat.hasFixedBinCount = true;
max@1	530 beat.binCount = 0;
max@1	531 beat.sampleType = OutputDescriptor::VariableSampleRate;
max@1	532 beat.sampleRate = 1.0 / m_stepSecs;
max@1	533 m_beatOutputNumber = outputCounter++;
max@1	534
max@1	535 OutputDescriptor bars;
max@1	536 bars.identifier = "bars";
max@1	537 bars.name = "Bars";
max@1	538 bars.description = "Bar locations";
max@1	539 bars.unit = "";
max@1	540 bars.hasFixedBinCount = true;
max@1	541 bars.binCount = 0;
max@1	542 bars.sampleType = OutputDescriptor::VariableSampleRate;
max@1	543 bars.sampleRate = 1.0 / m_stepSecs;
max@1	544 m_barsOutputNumber = outputCounter++;
max@1	545
max@1	546 OutputDescriptor beatcounts;
max@1	547 beatcounts.identifier = "beatcounts";
max@1	548 beatcounts.name = "Beat Count";
max@1	549 beatcounts.description = "Beat counter function";
max@1	550 beatcounts.unit = "";
max@1	551 beatcounts.hasFixedBinCount = true;
max@1	552 beatcounts.binCount = 1;
max@1	553 beatcounts.sampleType = OutputDescriptor::VariableSampleRate;
max@1	554 beatcounts.sampleRate = 1.0 / m_stepSecs;
max@1	555 m_beatcountsOutputNumber = outputCounter++;
max@1	556
max@1	557 OutputDescriptor beatsd;
max@1	558 beatsd.identifier = "beatsd";
max@1	559 beatsd.name = "Beat Spectral Difference";
max@1	560 beatsd.description = "Beat spectral difference function used for bar-line detection";
max@1	561 beatsd.unit = "";
max@1	562 beatsd.hasFixedBinCount = true;
max@1	563 beatsd.binCount = 1;
max@1	564 beatsd.sampleType = OutputDescriptor::VariableSampleRate;
max@1	565 beatsd.sampleRate = 1.0 / m_stepSecs;
max@1	566 m_beatsdOutputNumber = outputCounter++;
max@1	567
max@1	568 OutputDescriptor logscalespec;
max@1	569 logscalespec.identifier = "logscalespec";
max@1	570 logscalespec.name = "Log-Frequency Spectrum";
max@1	571 logscalespec.description = "Spectrum with linear frequency on a log scale.";
max@1	572 logscalespec.unit = "";
max@1	573 logscalespec.hasFixedBinCount = true;
max@1	574 logscalespec.binCount = nNote;
max@1	575 logscalespec.hasKnownExtents = false;
max@1	576 logscalespec.isQuantized = false;
max@1	577 logscalespec.sampleType = OutputDescriptor::FixedSampleRate;
max@1	578 logscalespec.hasDuration = false;
max@1	579 logscalespec.sampleRate = m_inputSampleRate/2048;
max@1	580 m_logscalespecOutputNumber = outputCounter++;
max@1	581
max@1	582 OutputDescriptor bothchroma;
max@1	583 bothchroma.identifier = "bothchroma";
max@1	584 bothchroma.name = "Chromagram and Bass Chromagram";
max@1	585 bothchroma.description = "Tuning-adjusted chromagram and bass chromagram (stacked on top of each other) from NNLS approximate transcription.";
max@1	586 bothchroma.unit = "";
max@1	587 bothchroma.hasFixedBinCount = true;
max@1	588 bothchroma.binCount = 24;
max@1	589 bothchroma.hasKnownExtents = false;
max@1	590 bothchroma.isQuantized = false;
max@1	591 bothchroma.sampleType = OutputDescriptor::FixedSampleRate;
max@1	592 bothchroma.hasDuration = false;
max@1	593 bothchroma.sampleRate = m_inputSampleRate/2048;
max@1	594 m_bothchromaOutputNumber = outputCounter++;
max@1	595
max@1	596 OutputDescriptor qchromafw;
max@1	597 qchromafw.identifier = "qchromafw";
max@1	598 qchromafw.name = "Pseudo-Quantised Chromagram and Bass Chromagram";
max@1	599 qchromafw.description = "Pseudo-Quantised Chromagram and Bass Chromagram (frames between two beats are identical).";
max@1	600 qchromafw.unit = "";
max@1	601 qchromafw.hasFixedBinCount = true;
max@1	602 qchromafw.binCount = 24;
max@1	603 qchromafw.hasKnownExtents = false;
max@1	604 qchromafw.isQuantized = false;
max@1	605 qchromafw.sampleType = OutputDescriptor::FixedSampleRate;
max@1	606 qchromafw.hasDuration = false;
max@1	607 qchromafw.sampleRate = m_inputSampleRate/2048;
max@1	608 m_qchromafwOutputNumber = outputCounter++;
max@1	609
max@1	610 OutputDescriptor qchroma;
max@1	611 qchroma.identifier = "qchroma";
max@1	612 qchroma.name = "Quantised Chromagram and Bass Chromagram";
max@1	613 qchroma.description = "Quantised Chromagram and Bass Chromagram.";
max@1	614 qchroma.unit = "";
max@1	615 qchroma.hasFixedBinCount = true;
max@1	616 qchroma.binCount = 24;
max@1	617 qchroma.hasKnownExtents = false;
max@1	618 qchroma.isQuantized = false;
max@1	619 qchroma.sampleType = OutputDescriptor::FixedSampleRate;
max@1	620 qchroma.hasDuration = true;
max@1	621 m_qchromaOutputNumber = outputCounter++;
max@1	622
max@1	623 OutputDescriptor segm;
max@1	624 segm.identifier = "segm";
max@1	625 segm.name = "Segmentation";
max@1	626 segm.description = "Segmentation";
max@1	627 segm.unit = "segment-type";
max@1	628 segm.hasFixedBinCount = true;
max@1	629 //segm.binCount = 24;
max@1	630 segm.binCount = 1;
max@1	631 segm.hasKnownExtents = true;
max@1	632 segm.minValue = 1;
max@1	633 segm.maxValue = 5;
max@1	634 segm.isQuantized = true;
max@1	635 segm.quantizeStep = 1;
max@1	636 segm.sampleType = OutputDescriptor::VariableSampleRate;
max@1	637 segm.hasDuration = true;
max@1	638 m_segmOutputNumber = outputCounter++;
max@1	639
max@1	640
max@1	641 /*
max@1	642 OutputList list;
max@1	643 OutputDescriptor segmentation;
max@1	644 segmentation.identifier = "segmentation";
max@1	645 segmentation.name = "Segmentation";
max@1	646 segmentation.description = "Segmentation";
max@1	647 segmentation.unit = "segment-type";
max@1	648 segmentation.hasFixedBinCount = true;
max@1	649 segmentation.binCount = 1;
max@1	650 segmentation.hasKnownExtents = true;
max@1	651 segmentation.minValue = 1;
max@1	652 segmentation.maxValue = nSegmentTypes;
max@1	653 segmentation.isQuantized = true;
max@1	654 segmentation.quantizeStep = 1;
max@1	655 segmentation.sampleType = OutputDescriptor::VariableSampleRate;
max@1	656 segmentation.sampleRate = m_inputSampleRate / getPreferredStepSize();
max@1	657 list.push_back(segmentation);
max@1	658 return list;
max@1	659 */
max@1	660
max@1	661
max@1	662 list.push_back(beat);
max@1	663 list.push_back(bars);
max@1	664 list.push_back(beatcounts);
max@1	665 list.push_back(beatsd);
max@1	666 list.push_back(logscalespec);
max@1	667 list.push_back(bothchroma);
max@1	668 list.push_back(qchromafw);
max@1	669 list.push_back(qchroma);
max@1	670 list.push_back(segm);
max@1	671
max@1	672 return list;
max@1	673 }
max@1	674
max@1	675 // Executed for each frame - called from the host
max@1	676
max@1	677 // We use time domain input, because DownBeat requires it -- so we
max@1	678 // use the time-domain version of DetectionFunction::process which
max@1	679 // does its own FFT. It requires doubles as input, so we need to
max@1	680 // make a temporary copy
max@1	681
max@1	682 // We only support a single input channel
max@1	683 SongPartitioner::FeatureSet SongPartitioner::process(const float const inputBuffers,Vamp::RealTime timestamp)
max@1	684 {
max@1	685 if (!m_d) {
max@1	686 cerr << "ERROR: SongPartitioner::process: "
max@1	687 << "SongPartitioner has not been initialised"
max@1	688 << endl;
max@1	689 return FeatureSet();
max@1	690 }
max@1	691
max@1	692 const int fl = m_d->dfConfig.frameLength;
max@1	693 #ifndef __GNUC__
max@1	694 double dfinput = (double )alloca(fl * sizeof(double));
max@1	695 #else
max@1	696 double dfinput[fl];
max@1	697 #endif
max@1	698 int sampleOffset = ((m_chromaFramesizeFactor-1) * fl) / 2;
max@1	699
max@1	700 // Since chroma needs a much longer frame size, we only ever use the very
max@1	701 // beginning of the frame for beat tracking.
max@1	702 for (int i = 0; i < fl; ++i) dfinput[i] = inputBuffers[0][i];
max@1	703 double output = m_d->df->process(dfinput);
max@1	704
max@1	705 if (m_d->dfOutput.empty()) m_d->origin = timestamp;
max@1	706
max@1	707 // std::cerr << "df[" << m_d->dfOutput.size() << "] is " << output << std::endl;
max@1	708 m_d->dfOutput.push_back(output);
max@1	709
max@1	710 // Downsample and store the incoming audio block.
max@1	711 // We have an overlap on the incoming audio stream (step size is
max@1	712 // half block size) -- this function is configured to take only a
max@1	713 // step size's worth, so effectively ignoring the overlap. Note
max@1	714 // however that this means we omit the last blocksize - stepsize
max@1	715 // samples completely for the purposes of barline detection
max@1	716 // (hopefully not a problem)
max@1	717 m_d->downBeat->pushAudioBlock(inputBuffers[0]);
max@1	718
max@1	719 // The following is not done every time, but only every m_chromaFramesizeFactor times,
max@1	720 // because the chroma does not need dense time frames.
max@1	721
max@1	722 if (m_pluginFrameCount % m_chromaStepsizeFactor == 0)
max@1	723 {
max@1	724
max@1	725 // Window the full time domain, data, FFT it and process chroma stuff.
max@1	726
max@1	727 #ifndef __GNUC__
max@1	728 float windowedBuffers = (float )alloca(m_chromadata->blockSize * sizeof(float));
max@1	729 #else
max@1	730 float windowedBuffers[m_chromadata->blockSize];
max@1	731 #endif
max@1	732 m_chromadata->window.cut(&inputBuffers[0][0], &windowedBuffers[0]);
max@1	733
max@1	734 // adjust timestamp (we want the middle of the frame)
max@1	735 timestamp = timestamp + Vamp::RealTime::frame2RealTime(sampleOffset, lrintf(m_inputSampleRate));
max@1	736
max@1	737 m_chromadata->baseProcess(&windowedBuffers[0], timestamp);
max@1	738
max@1	739 }
max@1	740 m_pluginFrameCount++;
max@1	741
max@1	742 FeatureSet fs;
max@1	743 fs[m_logscalespecOutputNumber].push_back(
max@1	744 m_chromadata->logSpectrum.back());
max@1	745 return fs;
max@1	746 }
max@1	747
max@1	748 SongPartitioner::FeatureSet SongPartitioner::getRemainingFeatures()
max@1	749 {
max@1	750 if (!m_d) {
max@1	751 cerr << "ERROR: SongPartitioner::getRemainingFeatures: "
max@1	752 << "SongPartitioner has not been initialised"
max@1	753 << endl;
max@1	754 return FeatureSet();
max@1	755 }
max@1	756
max@1	757 FeatureSet masterFeatureset = BeatTrack();
max@1	758 FeatureList chromaList = ChromaFeatures();
max@1	759
max@1	760 for (size_t i = 0; i < chromaList.size(); ++i)
max@1	761 {
max@1	762 masterFeatureset[m_bothchromaOutputNumber].push_back(chromaList[i]);
max@1	763 }
max@1	764
max@1	765 // quantised and pseudo-quantised (beat-wise) chroma
max@1	766 std::vector<FeatureList> quantisedChroma = BeatQuantiser(chromaList, masterFeatureset[m_beatOutputNumber]);
max@1	767
max@1	768 masterFeatureset[m_qchromafwOutputNumber] = quantisedChroma[0];
max@1	769 masterFeatureset[m_qchromaOutputNumber] = quantisedChroma[1];
max@1	770
max@1	771 // Segmentation
max@1	772 masterFeatureset[m_segmOutputNumber] = Segmenter(quantisedChroma[1]);
max@1	773
max@1	774 return(masterFeatureset);
max@1	775 }
max@1	776
max@1	777 /* ------ Beat Tracker ------ */
max@1	778
max@1	779 SongPartitioner::FeatureSet SongPartitioner::BeatTrack()
max@1	780 {
max@1	781 vector<double> df;
max@1	782 vector<double> beatPeriod;
max@1	783 vector<double> tempi;
max@1	784
max@1	785 for (size_t i = 2; i < m_d->dfOutput.size(); ++i) { // discard first two elts
max@1	786 df.push_back(m_d->dfOutput[i]);
max@1	787 beatPeriod.push_back(0.0);
max@1	788 }
max@1	789 if (df.empty()) return FeatureSet();
max@1	790
max@1	791 TempoTrackV2 tt(m_inputSampleRate, m_d->dfConfig.stepSize);
max@1	792 tt.calculateBeatPeriod(df, beatPeriod, tempi);
max@1	793
max@1	794 vector<double> beats;
max@1	795 tt.calculateBeats(df, beatPeriod, beats);
max@1	796
max@1	797 vector<int> downbeats;
max@1	798 size_t downLength = 0;
max@1	799 const float *downsampled = m_d->downBeat->getBufferedAudio(downLength);
max@1	800 m_d->downBeat->findDownBeats(downsampled, downLength, beats, downbeats);
max@1	801
max@1	802 vector<double> beatsd;
max@1	803 m_d->downBeat->getBeatSD(beatsd);
max@1	804
max@1	805 /*std::cout << "BeatTracker: found downbeats at: ";
max@1	806 for (int i = 0; i < downbeats.size(); ++i) std::cout << downbeats[i] << " " << std::endl;*/
max@1	807
max@1	808 FeatureSet returnFeatures;
max@1	809
max@1	810 char label[20];
max@1	811
max@1	812 int dbi = 0;
max@1	813 int beat = 0;
max@1	814 int bar = 0;
max@1	815
max@1	816 if (!downbeats.empty()) {
max@1	817 // get the right number for the first beat; this will be
max@1	818 // incremented before use (at top of the following loop)
max@1	819 int firstDown = downbeats[0];
max@1	820 beat = m_bpb - firstDown - 1;
max@1	821 if (beat == m_bpb) beat = 0;
max@1	822 }
max@1	823
max@1	824 for (size_t i = 0; i < beats.size(); ++i) {
max@1	825
max@1	826 size_t frame = beats[i] * m_d->dfConfig.stepSize;
max@1	827
max@1	828 if (dbi < downbeats.size() && i == downbeats[dbi]) {
max@1	829 beat = 0;
max@1	830 ++bar;
max@1	831 ++dbi;
max@1	832 } else {
max@1	833 ++beat;
max@1	834 }
max@1	835
max@1	836 /* Ooutput Section */
max@1	837
max@1	838 // outputs are:
max@1	839 //
max@1	840 // 0 -> beats
max@1	841 // 1 -> bars
max@1	842 // 2 -> beat counter function
max@1	843
max@1	844 Feature feature;
max@1	845 feature.hasTimestamp = true;
max@1	846 feature.timestamp = m_d->origin + Vamp::RealTime::frame2RealTime (frame, lrintf(m_inputSampleRate));
max@1	847
max@1	848 sprintf(label, "%d", beat + 1);
max@1	849 feature.label = label;
max@1	850 returnFeatures[m_beatOutputNumber].push_back(feature); // labelled beats
max@1	851
max@1	852 feature.values.push_back(beat + 1);
max@1	853 returnFeatures[m_beatcountsOutputNumber].push_back(feature); // beat function
max@1	854
max@1	855 if (i > 0 && i <= beatsd.size()) {
max@1	856 feature.values.clear();
max@1	857 feature.values.push_back(beatsd[i-1]);
max@1	858 feature.label = "";
max@1	859 returnFeatures[m_beatsdOutputNumber].push_back(feature); // beat spectral difference
max@1	860 }
max@1	861
max@1	862 if (beat == 0) {
max@1	863 feature.values.clear();
max@1	864 sprintf(label, "%d", bar);
max@1	865 feature.label = label;
max@1	866 returnFeatures[m_barsOutputNumber].push_back(feature); // bars
max@1	867 }
max@1	868 }
max@1	869
max@1	870 return returnFeatures;
max@1	871 }
max@1	872
max@1	873
max@1	874 /* ------ Chroma Extractor ------ */
max@1	875
max@1	876 SongPartitioner::FeatureList SongPartitioner::ChromaFeatures()
max@1	877 {
max@1	878
max@1	879 FeatureList returnFeatureList;
max@1	880 FeatureList tunedlogfreqspec;
max@1	881
max@1	882 if (m_chromadata->logSpectrum.size() == 0) return returnFeatureList;
max@1	883
max@1	884 /** Calculate Tuning
max@1	885 calculate tuning from (using the angle of the complex number defined by the
max@1	886 cumulative mean real and imag values)
max@1	887 **/
max@1	888 float meanTuningImag = 0;
max@1	889 float meanTuningReal = 0;
max@1	890 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
max@1	891 meanTuningReal += m_chromadata->meanTunings[iBPS] * m_chromadata->cosvalues[iBPS];
max@1	892 meanTuningImag += m_chromadata->meanTunings[iBPS] * m_chromadata->sinvalues[iBPS];
max@1	893 }
max@1	894 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
max@1	895 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
max@1	896 int intShift = floor(normalisedtuning * 3);
max@1	897 float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this
max@1	898
max@1	899 char buffer0 [50];
max@1	900
max@1	901 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
max@1	902
max@1	903 /** Tune Log-Frequency Spectrogram
max@1	904 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
max@1	905 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
max@1	906 **/
max@1	907 cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... ";
max@1	908
max@1	909 float tempValue = 0;
max@1	910
max@1	911 int count = 0;
max@1	912
max@1	913 for (FeatureList::iterator i = m_chromadata->logSpectrum.begin(); i != m_chromadata->logSpectrum.end(); ++i)
max@1	914 {
max@1	915
max@1	916 Feature f1 = *i;
max@1	917 Feature f2; // tuned log-frequency spectrum
max@1	918
max@1	919 f2.hasTimestamp = true;
max@1	920 f2.timestamp = f1.timestamp;
max@1	921
max@1	922 f2.values.push_back(0.0);
max@1	923 f2.values.push_back(0.0); // set lower edge to zero
max@1	924
max@1	925 if (m_chromadata->tuneLocal) {
max@1	926 intShift = floor(m_chromadata->localTuning[count] * 3);
max@1	927 floatShift = m_chromadata->localTuning[count] * 3 - intShift;
max@1	928 // floatShift is a really bad name for this
max@1	929 }
max@1	930
max@1	931 for (int k = 2; k < (int)f1.values.size() - 3; ++k)
max@1	932 { // interpolate all inner bins
max@1	933 tempValue = f1.values[k + intShift] * (1-floatShift) + f1.values[k+intShift+1] * floatShift;
max@1	934 f2.values.push_back(tempValue);
max@1	935 }
max@1	936
max@1	937 f2.values.push_back(0.0);
max@1	938 f2.values.push_back(0.0);
max@1	939 f2.values.push_back(0.0); // upper edge
max@1	940
max@1	941 vector<float> runningmean = SpecialConvolution(f2.values,m_chromadata->hw);
max@1	942 vector<float> runningstd;
max@1	943 for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance)
max@1	944 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
max@1	945 }
max@1	946 runningstd = SpecialConvolution(runningstd,m_chromadata->hw); // second step convolve
max@1	947 for (int i = 0; i < nNote; i++)
max@1	948 {
max@1	949
max@1	950 runningstd[i] = sqrt(runningstd[i]);
max@1	951 // square root to finally have running std
max@1	952
max@1	953 if (runningstd[i] > 0)
max@1	954 {
max@1	955 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
max@1	956 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_chromadata->whitening) : 0;
max@1	957 }
max@1	958
max@1	959 if (f2.values[i] < 0) {
max@1	960
max@1	961 cerr << "ERROR: negative value in logfreq spectrum" << endl;
max@1	962
max@1	963 }
max@1	964 }
max@1	965 tunedlogfreqspec.push_back(f2);
max@1	966 count++;
max@1	967 }
max@1	968 cerr << "done." << endl;
max@1	969 /** Semitone spectrum and chromagrams
max@1	970 Semitone-spaced log-frequency spectrum derived
max@1	971 from the tuned log-freq spectrum above. the spectrum
max@1	972 is inferred using a non-negative least squares algorithm.
max@1	973 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
max@1	974 bass and treble stacked onto each other).
max@1	975 **/
max@1	976 if (m_chromadata->useNNLS == 0) {
max@1	977 cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... ";
max@1	978 } else {
max@1	979 cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... ";
max@1	980 }
max@1	981
max@1	982 vector<float> oldchroma = vector<float>(12,0);
max@1	983 vector<float> oldbasschroma = vector<float>(12,0);
max@1	984 count = 0;
max@1	985
max@1	986 for (FeatureList::iterator it = tunedlogfreqspec.begin(); it != tunedlogfreqspec.end(); ++it) {
max@1	987 Feature logfreqsp = *it; // logfreq spectrum
max@1	988 Feature bothchroma; // treble and bass chromagram
max@1	989
max@1	990 bothchroma.hasTimestamp = true;
max@1	991 bothchroma.timestamp = logfreqsp.timestamp;
max@1	992
max@1	993 float b[nNote];
max@1	994
max@1	995 bool some_b_greater_zero = false;
max@1	996 float sumb = 0;
max@1	997 for (int i = 0; i < nNote; i++) {
max@1	998 b[i] = logfreqsp.values[i];
max@1	999 sumb += b[i];
max@1	1000 if (b[i] > 0) {
max@1	1001 some_b_greater_zero = true;
max@1	1002 }
max@1	1003 }
max@1	1004
max@1	1005 // here's where the non-negative least squares algorithm calculates the note activation x
max@1	1006
max@1	1007 vector<float> chroma = vector<float>(12, 0);
max@1	1008 vector<float> basschroma = vector<float>(12, 0);
max@1	1009 float currval;
max@1	1010 int iSemitone = 0;
max@1	1011
max@1	1012 if (some_b_greater_zero) {
max@1	1013 if (m_chromadata->useNNLS == 0) {
max@1	1014 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
max@1	1015 currval = 0;
max@1	1016 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
max@1	1017 currval += b[iNote + iBPS] * (1-abs(iBPS*1.0/(nBPS/2+1)));
max@1	1018 }
max@1	1019 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
max@1	1020 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
max@1	1021 iSemitone++;
max@1	1022 }
max@1	1023
max@1	1024 } else {
max@1	1025 float x[84+1000];
max@1	1026 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
max@1	1027 vector<int> signifIndex;
max@1	1028 int index=0;
max@1	1029 sumb /= 84.0;
max@1	1030 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
max@1	1031 float currval = 0;
max@1	1032 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
max@1	1033 currval += b[iNote + iBPS];
max@1	1034 }
max@1	1035 if (currval > 0) signifIndex.push_back(index);
max@1	1036 index++;
max@1	1037 }
max@1	1038 float rnorm;
max@1	1039 float w[84+1000];
max@1	1040 float zz[84+1000];
max@1	1041 int indx[84+1000];
max@1	1042 int mode;
max@1	1043 int dictsize = nNote*signifIndex.size();
max@1	1044
max@1	1045 float *curr_dict = new float[dictsize];
max@1	1046 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
max@1	1047 for (int iBin = 0; iBin < nNote; iBin++) {
max@1	1048 curr_dict[iNote * nNote + iBin] =
max@1	1049 1.0 * m_chromadata->dict[signifIndex[iNote] * nNote + iBin];
max@1	1050 }
max@1	1051 }
max@1	1052 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
max@1	1053 delete [] curr_dict;
max@1	1054 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
max@1	1055 // cerr << mode << endl;
max@1	1056 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
max@1	1057 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
max@1	1058 }
max@1	1059 }
max@1	1060 }
max@1	1061
max@1	1062 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end());
max@1	1063 // just stack the both chromas
max@1	1064
max@1	1065 bothchroma.values = chroma;
max@1	1066 returnFeatureList.push_back(bothchroma);
max@1	1067 count++;
max@1	1068 }
max@1	1069 cerr << "done." << endl;
max@1	1070
max@1	1071 return returnFeatureList;
max@1	1072 }
max@1	1073
max@1	1074 /* ------ Beat Quantizer ------ */
max@1	1075
max@1	1076 std::vector<Vamp::Plugin::FeatureList> SongPartitioner::BeatQuantiser(Vamp::Plugin::FeatureList chromagram, Vamp::Plugin::FeatureList beats)
max@1	1077 {
max@1	1078 std::vector<FeatureList> returnVector;
max@1	1079
max@1	1080 FeatureList fwQchromagram; // frame-wise beat-quantised chroma
max@1	1081 FeatureList bwQchromagram; // beat-wise beat-quantised chroma
max@1	1082
max@1	1083 size_t nChromaFrame = chromagram.size();
max@1	1084 size_t nBeat = beats.size();
max@1	1085
max@1	1086 if (nBeat == 0 && nChromaFrame == 0) return returnVector;
max@1	1087
max@1	1088 size_t nBin = chromagram[0].values.size();
max@1	1089
max@1	1090 vector<float> tempChroma = vector<float>(nBin);
max@1	1091
max@1	1092 Vamp::RealTime beatTimestamp = Vamp::RealTime::zeroTime;
max@1	1093 int currBeatCount = -1; // start before first beat
max@1	1094 int framesInBeat = 0;
max@1	1095
max@1	1096 for (size_t iChroma = 0; iChroma < nChromaFrame; ++iChroma)
max@1	1097 {
max@1	1098 Vamp::RealTime chromaTimestamp = chromagram[iChroma].timestamp;
max@1	1099 if (chromaTimestamp > beats[currBeatCount+1].timestamp \|\|
max@1	1100 iChroma == nChromaFrame-1)
max@1	1101 {
max@1	1102 // new beat (or last chroma frame)
max@1	1103 // 1. finish all the old beat processing
max@1	1104 for (size_t i = 0; i < nBin; ++i) tempChroma[i] /= framesInBeat; // average
max@1	1105
max@1	1106 Feature bwQchromaFrame;
max@1	1107 bwQchromaFrame.hasTimestamp = true;
max@1	1108 bwQchromaFrame.timestamp = beatTimestamp;
max@1	1109 bwQchromaFrame.values = tempChroma;
max@1	1110 bwQchromaFrame.duration = beats[currBeatCount+1].timestamp - beats[currBeatCount].timestamp;
max@1	1111 bwQchromagram.push_back(bwQchromaFrame);
max@1	1112
max@1	1113 for (int iFrame = -framesInBeat; iFrame < 0; ++iFrame)
max@1	1114 {
max@1	1115 Feature fwQchromaFrame;
max@1	1116 fwQchromaFrame.hasTimestamp = true;
max@1	1117 fwQchromaFrame.timestamp = chromagram[iChroma+iFrame].timestamp;
max@1	1118 fwQchromaFrame.values = tempChroma; // all between two beats get the same
max@1	1119 fwQchromagram.push_back(fwQchromaFrame);
max@1	1120 }
max@1	1121
max@1	1122 // 2. increments / resets for current (new) beat
max@1	1123 currBeatCount++;
max@1	1124 beatTimestamp = beats[currBeatCount].timestamp;
max@1	1125 for (size_t i = 0; i < nBin; ++i) tempChroma[i] = 0; // average
max@1	1126 framesInBeat = 0;
max@1	1127 }
max@1	1128 framesInBeat++;
max@1	1129 for (size_t i = 0; i < nBin; ++i) tempChroma[i] += chromagram[iChroma].values[i];
max@1	1130 }
max@1	1131 returnVector.push_back(fwQchromagram);
max@1	1132 returnVector.push_back(bwQchromagram);
max@1	1133 }
max@1	1134
max@1	1135
max@1	1136
max@1	1137 /* -------------------------------- */
max@1	1138 /* ------ Support Functions ------ */
max@1	1139 /* -------------------------------- */
max@1	1140
max@1	1141 // one-dimesion median filter
max@1	1142 arma::vec medfilt1(arma::vec v, int medfilt_length)
max@1	1143 {
max@1	1144 int halfWin = medfilt_length/2;
max@1	1145
max@1	1146 // result vector
max@1	1147 arma::vec res = arma::zeros<arma::vec>(v.size());
max@1	1148
max@1	1149 // padding
max@1	1150 arma::vec padV = arma::zeros<arma::vec>(v.size()+medfilt_length-1);
max@1	1151
max@1	1152 for (unsigned i=medfilt_length/2; i < medfilt_length/2+v.size(); ++ i)
max@1	1153 {
max@1	1154 padV(i) = v(i-medfilt_length/2);
max@1	1155 }
max@1	1156
max@1	1157 // Median filter
max@1	1158 arma::vec win = arma::zeros<arma::vec>(medfilt_length);
max@1	1159
max@1	1160 for (unsigned i=0; i < v.size(); ++i)
max@1	1161 {
max@1	1162 win = padV.subvec(i,i+halfWin*2);
max@1	1163 win = sort(win);
max@1	1164 res(i) = win(halfWin);
max@1	1165 }
max@1	1166
max@1	1167 return res;
max@1	1168 }
max@1	1169
max@1	1170
max@1	1171 // Quantile
max@1	1172 double quantile(arma::vec v, double p)
max@1	1173 {
max@1	1174 arma::vec sortV = arma::sort(v);
max@1	1175 int n = sortV.size();
max@1	1176 arma::vec x = arma::zeros<vec>(n+2);
max@1	1177 arma::vec y = arma::zeros<vec>(n+2);
max@1	1178
max@1	1179 x(0) = 0;
max@1	1180 x(n+1) = 100;
max@1	1181
max@1	1182 for (unsigned i=1; i<n+1; ++i)
max@1	1183 x(i) = 100*(0.5+(i-1))/n;
max@1	1184
max@1	1185 y(0) = sortV(0);
max@1	1186 y.subvec(1,n) = sortV;
max@1	1187 y(n+1) = sortV(n-1);
max@1	1188
max@1	1189 arma::uvec x2index = find(x>=p*100);
max@1	1190
max@1	1191 // Interpolation
max@1	1192 double x1 = x(x2index(0)-1);
max@1	1193 double x2 = x(x2index(0));
max@1	1194 double y1 = y(x2index(0)-1);
max@1	1195 double y2 = y(x2index(0));
max@1	1196
max@1	1197 double res = (y2-y1)/(x2-x1)(p100-x1)+y1;
max@1	1198
max@1	1199 return res;
max@1	1200 }
max@1	1201
max@1	1202 // Max Filtering
max@1	1203 arma::mat maxfilt1(arma::mat inmat, int len)
max@1	1204 {
max@1	1205 arma::mat outmat = inmat;
max@1	1206
max@1	1207 for (int i=0; i<inmat.n_rows; ++i)
max@1	1208 {
max@1	1209 if (arma::sum(inmat.row(i)) > 0)
max@1	1210 {
max@1	1211 // Take a window of rows
max@1	1212 int startWin;
max@1	1213 int endWin;
max@1	1214
max@1	1215 if (0 > i-len)
max@1	1216 startWin = 0;
max@1	1217 else
max@1	1218 startWin = i-len;
max@1	1219
max@1	1220 if (inmat.n_rows-1 < i+len-1)
max@1	1221 endWin = inmat.n_rows-1;
max@1	1222 else
max@1	1223 endWin = i+len-1;
max@1	1224
max@1	1225 outmat(i,span::all) = arma::max(inmat(span(startWin,endWin),span::all));
max@1	1226 }
max@1	1227 }
max@1	1228
max@1	1229 return outmat;
max@1	1230
max@1	1231 }
max@1	1232
max@1	1233 // Null Parts
max@1	1234 Part nullpart(vector<Part> parts, arma::vec barline)
max@1	1235 {
max@1	1236 arma::uvec nullindices = arma::ones<arma::uvec>(barline.size());
max@1	1237 for (unsigned iPart=0; iPart<parts.size(); ++iPart)
max@1	1238 {
max@1	1239 for (unsigned iIndex=0; iIndex<parts[0].indices.size(); ++iIndex)
max@1	1240 {
max@1	1241 for (unsigned i=0; i<parts[iPart].n; ++i)
max@1	1242 {
max@1	1243 unsigned ind = parts[iPart].indices[iIndex]+i;
max@1	1244 nullindices(ind) = 0;
max@1	1245 }
max@1	1246 }
max@1	1247 }
max@1	1248
max@1	1249 Part newPart;
max@1	1250 newPart.n = 1;
max@1	1251 uvec q = find(nullindices > 0);
max@1	1252
max@1	1253 for (unsigned i=0; i<q.size();++i)
max@1	1254 newPart.indices.push_back(q(i));
max@1	1255
max@1	1256 newPart.letter = '-';
max@1	1257 newPart.value = 0;
max@1	1258 newPart.level = 0;
max@1	1259
max@1	1260 return newPart;
max@1	1261 }
max@1	1262
max@1	1263
max@1	1264 // Merge Nulls
max@1	1265 void mergenulls(vector<Part> &parts)
max@1	1266 {
max@1	1267 for (unsigned iPart=0; iPart<parts.size(); ++iPart)
max@1	1268 {
max@1	1269
max@1	1270 vector<Part> newVectorPart;
max@1	1271
max@1	1272 if (parts[iPart].letter.compare("-")==0)
max@1	1273 {
max@1	1274 sort (parts[iPart].indices.begin(), parts[iPart].indices.end());
max@1	1275 unsigned newpartind = -1;
max@1	1276
max@1	1277 vector<int> indices;
max@1	1278 indices.push_back(-2);
max@1	1279
max@1	1280 for (unsigned iIndex=0; iIndex<parts[iPart].indices.size(); ++iIndex)
max@1	1281 indices.push_back(parts[iPart].indices[iIndex]);
max@1	1282
max@1	1283 for (unsigned iInd=1; iInd < indices.size(); ++iInd)
max@1	1284 {
max@1	1285 if (indices[iInd] - indices[iInd-1] > 1)
max@1	1286 {
max@1	1287 newpartind++;
max@1	1288
max@1	1289 Part newPart;
max@1	1290 newPart.letter = 'n';
max@1	1291 std::stringstream out;
max@1	1292 out << newpartind+1;
max@1	1293 newPart.letter.append(out.str());
max@1	1294 newPart.value = 20+newpartind+1;
max@1	1295 newPart.n = 1;
max@1	1296 newPart.indices.push_back(indices[iInd]);
max@1	1297 newPart.level = 0;
max@1	1298
max@1	1299 newVectorPart.push_back(newPart);
max@1	1300 }
max@1	1301 else
max@1	1302 {
max@1	1303 newVectorPart[newpartind].n = newVectorPart[newpartind].n+1;
max@1	1304 }
max@1	1305 }
max@1	1306 parts.erase (parts.end());
max@1	1307
max@1	1308 for (unsigned i=0; i<newVectorPart.size(); ++i)
max@1	1309 parts.push_back(newVectorPart[i]);
max@1	1310 }
max@1	1311 }
max@1	1312 }
max@1	1313
max@1	1314 /* ------ Segmentation ------ */
max@1	1315
max@1	1316 vector<Part> songSegment(Vamp::Plugin::FeatureList quatisedChromagram)
max@1	1317 {
max@1	1318
max@1	1319
max@1	1320 /* ------ Parameters ------ */
max@1	1321 double thresh_beat = 0.85;
max@1	1322 double thresh_seg = 0.80;
max@1	1323 int medfilt_length = 5;
max@1	1324 int minlength = 28;
max@1	1325 int maxlength = 128;
max@1	1326 double quantilePerc = 0.1;
max@1	1327 /* ------------------------ */
max@1	1328
max@1	1329
max@1	1330 // Collect Info
max@1	1331 int nBeat = quatisedChromagram.size(); // Number of feature vector
max@1	1332 int nFeatValues = quatisedChromagram[0].values.size(); // Number of values for each feature vector
max@1	1333
max@1	1334 // ----- TEMP ------
max@1	1335 /*if (nBeat > 255)
max@1	1336 nBeat = 255;
max@1	1337 std::cout << "CUT THE ERROR BEAT -> " << nBeat << std::endl;*/
max@1	1338 // -----------------
max@1	1339
max@1	1340 arma::irowvec timeStamp = arma::zeros<arma::imat>(1,nBeat); // Vector of Time Stamps
max@1	1341
max@1	1342 // Save time stamp as a Vector
max@1	1343 if (quatisedChromagram[0].hasTimestamp)
max@1	1344 {
max@1	1345 for (unsigned i = 0; i < nBeat; ++ i)
max@1	1346 timeStamp[i] = quatisedChromagram[i].timestamp.nsec;
max@1	1347 }
max@1	1348
max@1	1349
max@1	1350 // Build a ObservationTOFeatures Matrix
max@1	1351 arma::mat featVal = arma::zeros<mat>(nBeat,nFeatValues/2);
max@1	1352
max@1	1353 for (unsigned i = 0; i < nBeat; ++ i)
max@1	1354 for (unsigned j = 0; j < nFeatValues/2; ++ j)
max@1	1355 {
max@1	1356 featVal(i,j) = (quatisedChromagram[i].values[j]+quatisedChromagram[i].values[j+12]) * 0.8;
max@1	1357 }
max@1	1358
max@1	1359 // Set to arbitrary value to feature vectors with low std
max@1	1360 arma::mat a = stddev(featVal,1,1);
max@1	1361
max@1	1362 // Feature Colleration Matrix
max@1	1363 arma::mat simmat0 = 1-arma::cor(arma::trans(featVal));
max@1	1364
max@1	1365
max@1	1366 for (unsigned i = 0; i < nBeat; ++ i)
max@1	1367 {
max@1	1368 if (a(i)<0.000001)
max@1	1369 {
max@1	1370 featVal(i,1) = 1000; // arbitrary
max@1	1371
max@1	1372 for (unsigned j = 0; j < nFeatValues/2; ++j)
max@1	1373 {
max@1	1374 simmat0(i,j) = 1;
max@1	1375 simmat0(j,i) = 1;
max@1	1376 }
max@1	1377 }
max@1	1378 }
max@1	1379
max@1	1380 arma::mat simmat = 1-simmat0/2;
max@1	1381
max@1	1382 // -------- To delate when the proble with the add of beat will be solved -------
max@1	1383 for (unsigned i = 0; i < nBeat; ++ i)
max@1	1384 for (unsigned j = 0; j < nBeat; ++ j)
max@1	1385 if (!std::isfinite(simmat(i,j)))
max@1	1386 simmat(i,j)=0;
max@1	1387 // ------------------------------------------------------------------------------
max@1	1388
max@1	1389 // Median Filtering applied to the Correlation Matrix
max@1	1390 // The median filter is for each diagonal of the Matrix
max@1	1391 arma::mat median_simmat = arma::zeros<arma::mat>(nBeat,nBeat);
max@1	1392
max@1	1393 for (unsigned i = 0; i < nBeat; ++ i)
max@1	1394 {
max@1	1395 arma::vec temp = medfilt1(simmat.diag(i),medfilt_length);
max@1	1396 median_simmat.diag(i) = temp;
max@1	1397 median_simmat.diag(-i) = temp;
max@1	1398 }
max@1	1399
max@1	1400 for (unsigned i = 0; i < nBeat; ++ i)
max@1	1401 for (unsigned j = 0; j < nBeat; ++ j)
max@1	1402 if (!std::isfinite(median_simmat(i,j)))
max@1	1403 median_simmat(i,j) = 0;
max@1	1404
max@1	1405 // -------------- NOT CONVERTED -------------------------------------
max@1	1406 // if param.seg.standardise
max@1	1407 // med_median_simmat = repmat(median(median_simmat),nBeat,1);
max@1	1408 // std_median_simmat = repmat(std(median_simmat),nBeat,1);
max@1	1409 // median_simmat = (median_simmat - med_median_simmat) ./ std_median_simmat;
max@1	1410 // end
max@1	1411 // --------------------------------------------------------
max@1	1412
max@1	1413 // Retrieve Bar Bounderies
max@1	1414 arma::uvec dup = find(median_simmat > thresh_beat);
max@1	1415 arma::mat potential_duplicates = arma::zeros<arma::mat>(nBeat,nBeat);
max@1	1416 potential_duplicates.elem(dup) = arma::ones<arma::vec>(dup.size());
max@1	1417 potential_duplicates = trimatu(potential_duplicates);
max@1	1418
max@1	1419 unsigned nPartlengths = round((maxlength-minlength)/4)+1;
max@1	1420 arma::vec partlengths = zeros<arma::vec>(nPartlengths);
max@1	1421
max@1	1422 for (unsigned i = 0; i < nPartlengths; ++ i)
max@1	1423 partlengths(i) = (i*4)+ minlength;
max@1	1424
max@1	1425 // initialise arrays
max@1	1426 arma::cube simArray = zeros<arma::cube>(nBeat,nBeat,nPartlengths);
max@1	1427 arma::cube decisionArray2 = zeros<arma::cube>(nBeat,nBeat,nPartlengths);
max@1	1428
max@1	1429 int conta = 0;
max@1	1430
max@1	1431 //for (unsigned iLength = 0; iLength < nPartlengths; ++ iLength)
max@1	1432 for (unsigned iLength = 0; iLength < 20; ++ iLength)
max@1	1433 {
max@1	1434 unsigned len = partlengths(iLength);
max@1	1435 unsigned nUsedBeat = nBeat - len + 1; // number of potential rep beginnings: they can't overlap at the end of the song
max@1	1436
max@1	1437 for (unsigned iBeat = 0; iBeat < nUsedBeat; ++ iBeat) // looping over all columns (arbitrarily chosen columns)
max@1	1438 {
max@1	1439 arma::uvec help2 = find(potential_duplicates(span(0,nUsedBeat-1),iBeat)==1);
max@1	1440
max@1	1441 for (unsigned i=0; i<help2.size(); ++i)
max@1	1442 {
max@1	1443
max@1	1444 // measure how well two length len segments go together
max@1	1445 int kBeat = help2(i);
max@1	1446 arma::vec distrib = median_simmat(span(iBeat,iBeat+len-1),span(kBeat,kBeat+len-1)).diag(0);
max@1	1447 simArray(iBeat,kBeat,iLength) = quantile(distrib,quantilePerc);
max@1	1448 }
max@1	1449 }
max@1	1450
max@1	1451 arma::mat tempM = simArray(span(0,nUsedBeat-1),span(0,nUsedBeat-1),span(iLength,iLength));
max@1	1452 simArray.slice(iLength)(span(0,nUsedBeat-1),span(0,nUsedBeat-1)) = tempM + arma::trans(tempM) - (eye<mat>(nUsedBeat,nUsedBeat)%tempM);
max@1	1453
max@1	1454 // convolution
max@1	1455 arma::vec K = arma::zeros<vec>(3);
max@1	1456 K << 0.01 << 0.98 << 0.01;
max@1	1457
max@1	1458
max@1	1459 for (unsigned i=0; i<simArray.n_rows; ++i)
max@1	1460 {
max@1	1461 arma::rowvec t = arma::conv((arma::rowvec)simArray.slice(iLength).row(i),K);
max@1	1462 simArray.slice(iLength)(i,span::all) = t.subvec(1,t.size()-2);
max@1	1463 }
max@1	1464
max@1	1465 // take only over-average bars that do not overlap
max@1	1466
max@1	1467 arma::mat temp = arma::zeros<mat>(simArray.n_rows, simArray.n_cols);
max@1	1468 temp(span::all, span(0,nUsedBeat-1)) = simArray.slice(iLength)(span::all,span(0,nUsedBeat-1));
max@1	1469
max@1	1470 for (unsigned i=0; i<temp.n_rows; ++i)
max@1	1471 for (unsigned j=0; j<nUsedBeat; ++j)
max@1	1472 if (temp(i,j) < thresh_seg)
max@1	1473 temp(i,j) = 0;
max@1	1474
max@1	1475 decisionArray2.slice(iLength) = temp;
max@1	1476
max@1	1477 arma::mat maxMat = maxfilt1(decisionArray2.slice(iLength),len-1);
max@1	1478
max@1	1479 for (unsigned i=0; i<decisionArray2.n_rows; ++i)
max@1	1480 for (unsigned j=0; j<decisionArray2.n_cols; ++j)
max@1	1481 if (decisionArray2.slice(iLength)(i,j) < maxMat(i,j))
max@1	1482 decisionArray2.slice(iLength)(i,j) = 0;
max@1	1483
max@1	1484 decisionArray2.slice(iLength) = decisionArray2.slice(iLength) % arma::trans(decisionArray2.slice(iLength));
max@1	1485
max@1	1486 for (unsigned i=0; i<simArray.n_rows; ++i)
max@1	1487 for (unsigned j=0; j<simArray.n_cols; ++j)
max@1	1488 if (simArray.slice(iLength)(i,j) < thresh_seg)
max@1	1489 potential_duplicates(i,j) = 0;
max@1	1490 }
max@1	1491
max@1	1492 // Milk the data
max@1	1493
max@1	1494 arma::mat bestval;
max@1	1495
max@1	1496 for (unsigned iLength=0; iLength<nPartlengths; ++iLength)
max@1	1497 {
max@1	1498 arma::mat temp = arma::zeros<arma::mat>(decisionArray2.n_rows,decisionArray2.n_cols);
max@1	1499
max@1	1500 for (unsigned rows=0; rows<decisionArray2.n_rows; ++rows)
max@1	1501 for (unsigned cols=0; cols<decisionArray2.n_cols; ++cols)
max@1	1502 if (decisionArray2.slice(iLength)(rows,cols) > 0)
max@1	1503 temp(rows,cols) = 1;
max@1	1504
max@1	1505 arma::vec currLogicSum = arma::sum(temp,1);
max@1	1506
max@1	1507 for (unsigned iBeat=0; iBeat<nBeat; ++iBeat)
max@1	1508 if (currLogicSum(iBeat) > 1)
max@1	1509 {
max@1	1510 arma::vec t = decisionArray2.slice(iLength)(span::all,iBeat);
max@1	1511 double currSum = sum(t);
max@1	1512
max@1	1513 unsigned count = 0;
max@1	1514 for (unsigned i=0; i<t.size(); ++i)
max@1	1515 if (t(i)>0)
max@1	1516 count++;
max@1	1517
max@1	1518 currSum = (currSum/count)/2;
max@1	1519
max@1	1520 arma::rowvec t1;
max@1	1521 t1 << (currLogicSum(iBeat)-1) * partlengths(iLength) << currSum << iLength << iBeat << currLogicSum(iBeat);
max@1	1522
max@1	1523 bestval = join_cols(bestval,t1);
max@1	1524 }
max@1	1525 }
max@1	1526
max@1	1527 // Definition of the resulting vector
max@1	1528 vector<Part> parts;
max@1	1529
max@1	1530 // make a table of all valid sets of parts
max@1	1531
max@1	1532 char partletters[] = {'A','B','C','D','E','F','G', 'H','I','J','K','L','M','N','O','P','Q','R','S'};
max@1	1533 unsigned partvalues[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19};
max@1	1534 arma::vec valid_sets = arma::ones<arma::vec>(bestval.n_rows);
max@1	1535
max@1	1536 if (!bestval.is_empty())
max@1	1537 {
max@1	1538
max@1	1539 // In questo punto viene introdotto un errore alla 3 cifra decimale
max@1	1540
max@1	1541 arma::colvec t = arma::zeros<arma::colvec>(bestval.n_rows);
max@1	1542 for (unsigned i=0; i<bestval.n_rows; ++i)
max@1	1543 {
max@1	1544 t(i) = bestval(i,1)*2;
max@1	1545 }
max@1	1546
max@1	1547 double m = t.max();
max@1	1548
max@1	1549 bestval(span::all,1) = bestval(span::all,1) / m;
max@1	1550 bestval(span::all,0) = bestval(span::all,0) + bestval(span::all,1);
max@1	1551
max@1	1552 arma::mat bestval2;
max@1	1553 for (unsigned i=0; i<bestval.n_cols; ++i)
max@1	1554 if (i!=1)
max@1	1555 bestval2 = join_rows(bestval2,bestval.col(i));
max@1	1556
max@1	1557 for (unsigned kSeg=0; kSeg<1; ++kSeg)
max@1	1558 {
max@1	1559
max@1	1560 arma::mat currbestvals = arma::zeros<arma::mat>(bestval2.n_rows, bestval2.n_cols);
max@1	1561 for (unsigned i=0; i<bestval2.n_rows; ++i)
max@1	1562 for (unsigned j=0; j<bestval2.n_cols; ++j)
max@1	1563 if (valid_sets(i))
max@1	1564 currbestvals(i,j) = bestval2(i,j);
max@1	1565
max@1	1566 arma::vec t1 = currbestvals.col(0);
max@1	1567 double ma;
max@1	1568 uword maIdx;
max@1	1569 ma = t1.max(maIdx);
max@1	1570
max@1	1571 // -------------- NOT CONVERTED -------------------------------------
max@1	1572 //if isempty(m)
max@1	1573 // break
max@1	1574 // end
max@1	1575 // ------------------------------------------------------------------
max@1	1576
max@1	1577 double bestLength = partlengths(currbestvals(maIdx,1));
max@1	1578 arma::rowvec bestIndices = decisionArray2.slice(currbestvals(maIdx,1))(currbestvals(maIdx,2),span::all);
max@1	1579
max@1	1580 arma::rowvec bestIndicesMap = arma::zeros<arma::rowvec>(bestIndices.size());
max@1	1581 for (unsigned i=0; i<bestIndices.size(); ++i)
max@1	1582 if (bestIndices(i)>0)
max@1	1583 bestIndicesMap(i) = 1;
max@1	1584
max@1	1585 arma::rowvec mask = arma::zeros<arma::rowvec>(bestLength*2-1);
max@1	1586 for (unsigned i=0; i<bestLength; ++i)
max@1	1587 mask(i+bestLength-1) = 1;
max@1	1588
max@1	1589 arma::rowvec t2 = arma::conv(bestIndicesMap,mask);
max@1	1590 arma::rowvec island = t2.subvec(mask.size()/2,t2.size()-1-mask.size()/2);
max@1	1591
max@1	1592 // Save results in the structure
max@1	1593 Part newPart;
max@1	1594 newPart.n = bestLength;
max@1	1595 uvec q1 = find(bestIndices > 0);
max@1	1596
max@1	1597 for (unsigned i=0; i<q1.size();++i)
max@1	1598 newPart.indices.push_back(q1(i));
max@1	1599
max@1	1600 newPart.letter = partletters[kSeg];
max@1	1601 newPart.value = partvalues[kSeg];
max@1	1602 newPart.level = kSeg+1;
max@1	1603 parts.push_back(newPart);
max@1	1604
max@1	1605 uvec q2 = find(valid_sets==1);
max@1	1606
max@1	1607 for (unsigned i=0; i<q2.size(); ++i)
max@1	1608 {
max@1	1609 unsigned iSet = q2(i);
max@1	1610 unsigned s = partlengths(bestval2(iSet,1));
max@1	1611
max@1	1612 arma::rowvec mask1 = arma::zeros<arma::rowvec>(s*2-1);
max@1	1613 for (unsigned i=0; i<s; ++i)
max@1	1614 mask1(i+s-1) = 1;
max@1	1615
max@1	1616 arma::rowvec Ind = decisionArray2.slice(bestval2(iSet,1))(bestval2(iSet,2),span::all);
max@1	1617 arma::rowvec IndMap = arma::zeros<arma::rowvec>(Ind.size());
max@1	1618 for (unsigned i=0; i<Ind.size(); ++i)
max@1	1619 if (Ind(i)>0)
max@1	1620 IndMap(i) = 2;
max@1	1621
max@1	1622 arma::rowvec t3 = arma::conv(IndMap,mask1);
max@1	1623 arma::rowvec currislands = t3.subvec(mask1.size()/2,t3.size()-1-mask1.size()/2);
max@1	1624
max@1	1625 arma::rowvec islandsdMult = currislands%island;
max@1	1626 arma::uvec islandsIndex = find(islandsdMult > 0);
max@1	1627
max@1	1628 if (islandsIndex.size() == currislands.size())
max@1	1629 valid_sets(iSet) = 0;
max@1	1630 }
max@1	1631 }
max@1	1632 }
max@1	1633 else
max@1	1634 {
max@1	1635 Part newPart;
max@1	1636 newPart.n = nBeat;
max@1	1637 newPart.indices.push_back(1);
max@1	1638 newPart.letter = 'A';
max@1	1639 newPart.value = 1;
max@1	1640 newPart.level = 1;
max@1	1641 parts.push_back(newPart);
max@1	1642 }
max@1	1643
max@1	1644
max@1	1645 arma::vec bar = linspace(1,nBeat,nBeat);
max@1	1646 Part np = nullpart(parts,bar);
max@1	1647 parts.push_back(np);
max@1	1648
max@1	1649 // -------------- NOT CONVERTED -------------------------------------
max@1	1650 // if param.seg.editor
max@1	1651 // [pa, ta] = partarray(parts);
max@1	1652 // parts = editorssearch(pa, ta, parts);
max@1	1653 // parts = [parts, nullpart(parts,1:nBeat)];
max@1	1654 // end
max@1	1655 // ------------------------------------------------------------------
max@1	1656
max@1	1657
max@1	1658 mergenulls(parts);
max@1	1659
max@1	1660
max@1	1661 // -------------- NOT CONVERTED -------------------------------------
max@1	1662 // if param.seg.editor
max@1	1663 // [pa, ta] = partarray(parts);
max@1	1664 // parts = editorssearch(pa, ta, parts);
max@1	1665 // parts = [parts, nullpart(parts,1:nBeat)];
max@1	1666 // end
max@1	1667 // ------------------------------------------------------------------
max@1	1668
max@1	1669 return parts;
max@1	1670 }
max@1	1671
max@1	1672
max@1	1673
max@1	1674 void songSegmentChroma(Vamp::Plugin::FeatureList quatisedChromagram, vector<Part> &parts)
max@1	1675 {
max@1	1676 // Collect Info
max@1	1677 int nBeat = quatisedChromagram.size(); // Number of feature vector
max@1	1678 int nFeatValues = quatisedChromagram[0].values.size(); // Number of values for each feature vector
max@1	1679
max@1	1680 arma::mat synchTreble = arma::zeros<mat>(nBeat,nFeatValues/2);
max@1	1681
max@1	1682 for (unsigned i = 0; i < nBeat; ++ i)
max@1	1683 for (unsigned j = 0; j < nFeatValues/2; ++ j)
max@1	1684 {
max@1	1685 synchTreble(i,j) = quatisedChromagram[i].values[j];
max@1	1686 }
max@1	1687
max@1	1688 arma::mat synchBass = arma::zeros<mat>(nBeat,nFeatValues/2);
max@1	1689
max@1	1690 for (unsigned i = 0; i < nBeat; ++ i)
max@1	1691 for (unsigned j = 0; j < nFeatValues/2; ++ j)
max@1	1692 {
max@1	1693 synchBass(i,j) = quatisedChromagram[i].values[j+12];
max@1	1694 }
max@1	1695
max@1	1696 // Process
max@1	1697
max@1	1698 arma::mat segTreble = arma::zeros<arma::mat>(quatisedChromagram.size(),quatisedChromagram[0].values.size()/2);
max@1	1699 arma::mat segBass = arma::zeros<arma::mat>(quatisedChromagram.size(),quatisedChromagram[0].values.size()/2);
max@1	1700
max@1	1701 for (unsigned iPart=0; iPart<parts.size(); ++iPart)
max@1	1702 {
max@1	1703 parts[iPart].nInd = parts[iPart].indices.size();
max@1	1704
max@1	1705 for (unsigned kOccur=0; kOccur<parts[iPart].nInd; ++kOccur)
max@1	1706 {
max@1	1707 int kStartIndex = parts[iPart].indices[kOccur];
max@1	1708 int kEndIndex = kStartIndex + parts[iPart].n-1;
max@1	1709
max@1	1710 segTreble.rows(kStartIndex,kEndIndex) = segTreble.rows(kStartIndex,kEndIndex) + synchTreble.rows(kStartIndex,kEndIndex);
max@1	1711 segBass.rows(kStartIndex,kEndIndex) = segBass.rows(kStartIndex,kEndIndex) + synchBass.rows(kStartIndex,kEndIndex);
max@1	1712 }
max@1	1713 }
max@1	1714 }
max@1	1715
max@1	1716
max@1	1717 // Segment Integration
max@1	1718 vector<Part> songSegmentIntegration(vector<Part> &parts)
max@1	1719 {
max@1	1720 // Break up parts (every part will have one instance)
max@1	1721 vector<Part> newPartVector;
max@1	1722 vector<int> partindices;
max@1	1723
max@1	1724 for (unsigned iPart=0; iPart<parts.size(); ++iPart)
max@1	1725 {
max@1	1726 parts[iPart].nInd = parts[iPart].indices.size();
max@1	1727 for (unsigned iInstance=0; iInstance<parts[iPart].nInd; ++iInstance)
max@1	1728 {
max@1	1729 Part newPart;
max@1	1730 newPart.n = parts[iPart].n;
max@1	1731 newPart.letter = parts[iPart].letter;
max@1	1732 newPart.value = parts[iPart].value;
max@1	1733 newPart.level = parts[iPart].level;
max@1	1734 newPart.indices.push_back(parts[iPart].indices[iInstance]);
max@1	1735 newPart.nInd = 1;
max@1	1736 partindices.push_back(parts[iPart].indices[iInstance]);
max@1	1737
max@1	1738 newPartVector.push_back(newPart);
max@1	1739 }
max@1	1740 }
max@1	1741
max@1	1742
max@1	1743 // Sort the parts in order of occurrence
max@1	1744 sort (partindices.begin(), partindices.end());
max@1	1745
max@1	1746 for (unsigned i=0; i<partindices.size(); ++i)
max@1	1747 {
max@1	1748 bool found = false;
max@1	1749 int in=0;
max@1	1750 while (!found)
max@1	1751 {
max@1	1752 if (newPartVector[in].indices[0] == partindices[i])
max@1	1753 {
max@1	1754 newPartVector.push_back(newPartVector[in]);
max@1	1755 newPartVector.erase(newPartVector.begin()+in);
max@1	1756 found = true;
max@1	1757 }
max@1	1758 else
max@1	1759 in++;
max@1	1760 }
max@1	1761 }
max@1	1762
max@1	1763 // Clear the vector
max@1	1764 for (unsigned iNewpart=1; iNewpart < newPartVector.size(); ++iNewpart)
max@1	1765 {
max@1	1766 if (newPartVector[iNewpart].n < 12)
max@1	1767 {
max@1	1768 newPartVector[iNewpart-1].n = newPartVector[iNewpart-1].n + newPartVector[iNewpart].n;
max@1	1769 newPartVector.erase(newPartVector.begin()+iNewpart);
max@1	1770 }
max@1	1771 }
max@1	1772
max@1	1773 return newPartVector;
max@1	1774 }
max@1	1775
max@1	1776 // Segmenter
max@1	1777 Vamp::Plugin::FeatureList SongPartitioner::Segmenter(Vamp::Plugin::FeatureList quatisedChromagram)
max@1	1778 {
max@1	1779 /* --- Display Information --- */
max@1	1780 int numBeat = quatisedChromagram.size();
max@1	1781 int numFeats = quatisedChromagram[0].values.size();
max@1	1782
max@1	1783 vector<Part> parts;
max@1	1784 vector<Part> finalParts;
max@1	1785
max@1	1786 parts = songSegment(quatisedChromagram);
max@1	1787
max@1	1788 songSegmentChroma(quatisedChromagram,parts);
max@1	1789 finalParts = songSegmentIntegration(parts);
max@1	1790
max@1	1791
max@1	1792 // TEMP ----
max@1	1793 /*for (unsigned i=0;i<finalParts.size(); ++i)
max@1	1794 {
max@1	1795 std::cout << "Parts n° " << i << std::endl;
max@1	1796 std::cout << finalParts[i].n << std::endl;
max@1	1797 std::cout << finalParts[i].letter << std::endl;
max@1	1798
max@1	1799 for (unsigned j=0;j<finalParts[i].indices.size(); ++j)
max@1	1800 std::cout << finalParts[i].indices[j] << std::endl;
max@1	1801 std::cout << finalParts[i].level << std::endl;
max@1	1802 }*/
max@1	1803
max@1	1804
max@1	1805 // ---------
max@1	1806
max@1	1807
max@1	1808 // Output
max@1	1809
max@1	1810 Vamp::Plugin::FeatureList results;
max@1	1811
max@1	1812
max@1	1813 Feature seg;
max@1	1814
max@1	1815 arma::vec indices;
max@1	1816 unsigned idx=0;
max@1	1817 vector<int> values;
max@1	1818 vector<string> letters;
max@1	1819
max@1	1820 for (unsigned iPart=0; iPart<finalParts.size()-1; ++iPart)
max@1	1821 {
max@1	1822 unsigned iInstance=0;
max@1	1823 seg.hasTimestamp = true;
max@1	1824
max@1	1825 int ind = finalParts[iPart].indices[iInstance];
max@1	1826 int ind1 = finalParts[iPart+1].indices[iInstance];
max@1	1827
max@1	1828 seg.timestamp = quatisedChromagram[ind].timestamp;
max@1	1829 seg.hasDuration = true;
max@1	1830 seg.duration = quatisedChromagram[ind1].timestamp-quatisedChromagram[ind].timestamp;
max@1	1831 seg.values.clear();
max@1	1832 seg.values.push_back(finalParts[iPart].value);
max@1	1833 seg.label = finalParts[iPart].letter;
max@1	1834
max@1	1835 results.push_back(seg);
max@1	1836 }
max@1	1837
max@1	1838 int ind = finalParts[finalParts.size()-1].indices[0];
max@1	1839 seg.timestamp = quatisedChromagram[ind].timestamp;
max@1	1840 seg.hasDuration = true;
max@1	1841 seg.duration = quatisedChromagram[quatisedChromagram.size()-1].timestamp-quatisedChromagram[ind].timestamp;
max@1	1842 seg.values.clear();
max@1	1843 seg.values.push_back(finalParts[finalParts.size()-1].value);
max@1	1844 seg.label = finalParts[finalParts.size()-1].letter;
max@1	1845
max@1	1846 results.push_back(seg);
max@1	1847
max@1	1848 return results;
max@1	1849 }
max@1	1850
max@1	1851
max@1	1852
max@1	1853
max@1	1854
max@1	1855
max@1	1856
max@1	1857
max@1	1858
max@1	1859
max@1	1860
max@1	1861
max@1	1862
max@1	1863
max@1	1864
max@1	1865
max@1	1866

Mercurial > hg > segmenter-vamp-plugin

annotate songparts/plugins/SongParts.cpp @ 1:f44aa6d29642