segmenter-vamp-plugin: songparts/SongParts.cpp annotate

annotate songparts/SongParts.cpp @ 35:d6bb46a1e70c slimline

Fix big memory leak

author	Chris Cannam
date	Fri, 11 May 2012 17:37:49 +0100
parents	325cc75df35a
children	cc18e9a13fe8

rev	line source
max@1	1 /* -- c-basic-offset: 4 indent-tabs-mode: nil -- vi:set ts=8 sts=4 sw=4: */
max@1	2
max@1	3 /*
max@1	4 QM Vamp Plugin Set
max@1	5
max@1	6 Centre for Digital Music, Queen Mary, University of London.
max@1	7
max@1	8 This program is free software; you can redistribute it and/or
max@1	9 modify it under the terms of the GNU General Public License as
max@1	10 published by the Free Software Foundation; either version 2 of the
max@1	11 License, or (at your option) any later version. See the file
max@1	12 COPYING included with this distribution for more information.
max@1	13 */
max@1	14
max@1	15 #include "SongParts.h"
max@1	16
max@1	17 #include <base/Window.h>
max@1	18 #include <dsp/onsets/DetectionFunction.h>
max@1	19 #include <dsp/onsets/PeakPicking.h>
max@1	20 #include <dsp/transforms/FFT.h>
max@1	21 #include <dsp/tempotracking/TempoTrackV2.h>
max@1	22 #include <dsp/tempotracking/DownBeat.h>
max@1	23 #include <chromamethods.h>
max@1	24 #include <maths/MathUtilities.h>
max@1	25 #include <boost/numeric/ublas/matrix.hpp>
max@1	26 #include <boost/numeric/ublas/io.hpp>
max@1	27 #include <boost/math/distributions/normal.hpp>
max@1	28 #include "armadillo"
max@1	29 #include <fstream>
max@1	30 #include <sstream>
max@1	31 #include <cmath>
max@1	32 #include <vector>
max@1	33
max@1	34 #include <vamp-sdk/Plugin.h>
max@1	35
max@1	36 using namespace boost::numeric;
max@1	37 using namespace arma;
max@1	38 using std::string;
max@1	39 using std::vector;
max@1	40 using std::cerr;
max@1	41 using std::cout;
max@1	42 using std::endl;
max@1	43
max@1	44
max@1	45 #ifndef __GNUC__
max@1	46 #include <alloca.h>
max@1	47 #endif
max@1	48
max@1	49
max@1	50 // Result Struct
max@1	51 typedef struct Part {
max@1	52 int n;
Chris@21	53 vector<int> indices;
max@1	54 string letter;
Chris@21	55 int value;
max@1	56 int level;
max@1	57 int nInd;
max@1	58 }Part;
max@1	59
max@1	60
max@8	61
max@1	62 /* ------------------------------------ */
max@1	63 /* ----- BEAT DETECTOR CLASS ---------- */
max@1	64 /* ------------------------------------ */
max@1	65
max@1	66 class BeatTrackerData
max@1	67 {
max@1	68 /* --- ATTRIBUTES --- */
max@1	69 public:
max@1	70 DFConfig dfConfig;
max@1	71 DetectionFunction *df;
max@1	72 DownBeat *downBeat;
max@1	73 vector<double> dfOutput;
max@1	74 Vamp::RealTime origin;
max@1	75
max@1	76
max@1	77 /* --- METHODS --- */
max@1	78
max@1	79 /* --- Constructor --- */
max@1	80 public:
max@1	81 BeatTrackerData(float rate, const DFConfig &config) : dfConfig(config) {
Chris@22	82
max@1	83 df = new DetectionFunction(config);
max@1	84 // decimation factor aims at resampling to c. 3KHz; must be power of 2
max@1	85 int factor = MathUtilities::nextPowerOfTwo(rate / 3000);
max@1	86 // std::cerr << "BeatTrackerData: factor = " << factor << std::endl;
max@1	87 downBeat = new DownBeat(rate, factor, config.stepSize);
max@1	88 }
max@1	89
max@1	90 /* --- Desctructor --- */
max@1	91 ~BeatTrackerData() {
Chris@22	92 delete df;
max@1	93 delete downBeat;
max@1	94 }
max@1	95
max@1	96 void reset() {
max@1	97 delete df;
max@1	98 df = new DetectionFunction(dfConfig);
max@1	99 dfOutput.clear();
max@1	100 downBeat->resetAudioBuffer();
max@1	101 origin = Vamp::RealTime::zeroTime;
max@1	102 }
max@1	103 };
max@1	104
max@1	105
max@1	106 /* --------------------------------------- */
max@1	107 /* ----- CHROMA EXTRACTOR CLASS ---------- */
max@1	108 /* --------------------------------------- */
max@1	109
max@1	110 class ChromaData
max@1	111 {
max@1	112
max@1	113 /* --- ATTRIBUTES --- */
max@1	114
max@1	115 public:
max@1	116 int frameCount;
max@1	117 int nBPS;
max@1	118 Vamp::Plugin::FeatureList logSpectrum;
max@1	119 size_t blockSize;
max@1	120 int lengthOfNoteIndex;
max@1	121 vector<float> meanTunings;
max@1	122 vector<float> localTunings;
max@1	123 float whitening;
max@1	124 float preset;
max@1	125 float useNNLS;
max@1	126 vector<float> localTuning;
max@1	127 vector<float> kernelValue;
max@1	128 vector<int> kernelFftIndex;
max@1	129 vector<int> kernelNoteIndex;
max@1	130 float *dict;
max@1	131 bool tuneLocal;
max@1	132 float doNormalizeChroma;
max@1	133 float rollon;
max@1	134 float s;
max@1	135 vector<float> hw;
max@1	136 vector<float> sinvalues;
max@1	137 vector<float> cosvalues;
max@1	138 Window<float> window;
max@1	139 FFTReal fft;
max@1	140 size_t inputSampleRate;
max@1	141
max@1	142 /* --- METHODS --- */
max@1	143
max@1	144 /* --- Constructor --- */
max@1	145
max@1	146 public:
max@1	147 ChromaData(float inputSampleRate, size_t block_size) :
max@1	148 frameCount(0),
max@1	149 nBPS(3),
max@1	150 logSpectrum(0),
max@1	151 blockSize(0),
max@1	152 lengthOfNoteIndex(0),
max@1	153 meanTunings(0),
max@1	154 localTunings(0),
max@1	155 whitening(1.0),
max@1	156 preset(0.0),
max@1	157 useNNLS(1.0),
max@1	158 localTuning(0.0),
max@1	159 kernelValue(0),
max@1	160 kernelFftIndex(0),
max@1	161 kernelNoteIndex(0),
max@1	162 dict(0),
max@1	163 tuneLocal(0.0),
max@1	164 doNormalizeChroma(0),
max@1	165 rollon(0.0),
Chris@35	166 s(0.7),
Chris@35	167 sinvalues(0),
Chris@35	168 cosvalues(0),
Chris@35	169 window(HanningWindow, block_size),
Chris@35	170 fft(block_size),
Chris@35	171 inputSampleRate(inputSampleRate)
max@1	172 {
max@1	173 // make the note dictionary matrix
max@1	174 dict = new float[nNote * 84];
max@1	175 for (int i = 0; i < nNote * 84; ++i) dict[i] = 0.0;
max@1	176 blockSize = block_size;
max@1	177 }
max@1	178
max@1	179
max@1	180 /* --- Desctructor --- */
max@1	181
max@1	182 ~ChromaData() {
max@1	183 delete [] dict;
max@1	184 }
max@1	185
max@1	186 /* --- Public Methods --- */
max@1	187
max@1	188 void reset() {
max@1	189 frameCount = 0;
max@1	190 logSpectrum.clear();
max@1	191 for (int iBPS = 0; iBPS < 3; ++iBPS) {
max@1	192 meanTunings[iBPS] = 0;
max@1	193 localTunings[iBPS] = 0;
max@1	194 }
max@1	195 localTuning.clear();
max@1	196 }
max@1	197
max@1	198 void baseProcess(float *inputBuffers, Vamp::RealTime timestamp)
max@1	199 {
Chris@22	200
max@1	201 frameCount++;
max@1	202 float *magnitude = new float[blockSize/2];
max@1	203 double *fftReal = new double[blockSize];
max@1	204 double *fftImag = new double[blockSize];
max@1	205
max@1	206 // FFTReal wants doubles, so we need to make a local copy of inputBuffers
max@1	207 double *inputBuffersDouble = new double[blockSize];
max@1	208 for (size_t i = 0; i < blockSize; i++) inputBuffersDouble[i] = inputBuffers[i];
max@1	209
max@1	210 fft.process(false, inputBuffersDouble, fftReal, fftImag);
max@1	211
max@1	212 float energysum = 0;
max@1	213 // make magnitude
max@1	214 float maxmag = -10000;
max@1	215 for (int iBin = 0; iBin < static_cast<int>(blockSize/2); iBin++) {
max@1	216 magnitude[iBin] = sqrt(fftReal[iBin] * fftReal[iBin] +
max@1	217 fftImag[iBin] * fftImag[iBin]);
max@1	218 if (magnitude[iBin]>blockSize*1.0) magnitude[iBin] = blockSize;
max@1	219 // a valid audio signal (between -1 and 1) should not be limited here.
max@1	220 if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin];
max@1	221 if (rollon > 0) {
max@1	222 energysum += pow(magnitude[iBin],2);
max@1	223 }
max@1	224 }
max@1	225
max@1	226 float cumenergy = 0;
max@1	227 if (rollon > 0) {
max@1	228 for (int iBin = 2; iBin < static_cast<int>(blockSize/2); iBin++) {
max@1	229 cumenergy += pow(magnitude[iBin],2);
max@1	230 if (cumenergy < energysum * rollon / 100) magnitude[iBin-2] = 0;
max@1	231 else break;
max@1	232 }
max@1	233 }
max@1	234
max@1	235 if (maxmag < 2) {
max@1	236 // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl;
max@1	237 for (int iBin = 0; iBin < static_cast<int>(blockSize/2); iBin++) {
max@1	238 magnitude[iBin] = 0;
max@1	239 }
max@1	240 }
max@1	241
max@1	242 // cerr << magnitude[200] << endl;
max@1	243
max@1	244 // note magnitude mapping using pre-calculated matrix
max@1	245 float *nm = new float[nNote]; // note magnitude
max@1	246 for (int iNote = 0; iNote < nNote; iNote++) {
max@1	247 nm[iNote] = 0; // initialise as 0
max@1	248 }
max@1	249 int binCount = 0;
max@1	250 for (vector<float>::iterator it = kernelValue.begin(); it != kernelValue.end(); ++it) {
max@1	251 nm[kernelNoteIndex[binCount]] += magnitude[kernelFftIndex[binCount]] * kernelValue[binCount];
max@1	252 binCount++;
max@1	253 }
max@1	254
max@1	255 float one_over_N = 1.0/frameCount;
max@1	256 // update means of complex tuning variables
max@1	257 for (int iBPS = 0; iBPS < nBPS; ++iBPS) meanTunings[iBPS] = float(frameCount-1)one_over_N;
max@1	258
max@1	259 for (int iTone = 0; iTone < round(nNote0.62/nBPS)nBPS+1; iTone = iTone + nBPS) {
max@1	260 for (int iBPS = 0; iBPS < nBPS; ++iBPS) meanTunings[iBPS] += nm[iTone + iBPS]*one_over_N;
max@1	261 float ratioOld = 0.997;
max@1	262 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
max@1	263 localTunings[iBPS] *= ratioOld;
max@1	264 localTunings[iBPS] += nm[iTone + iBPS] * (1 - ratioOld);
max@1	265 }
max@1	266 }
max@1	267
max@1	268 float localTuningImag = 0;
max@1	269 float localTuningReal = 0;
max@1	270 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
max@1	271 localTuningReal += localTunings[iBPS] * cosvalues[iBPS];
max@1	272 localTuningImag += localTunings[iBPS] * sinvalues[iBPS];
max@1	273 }
max@1	274
max@1	275 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
max@1	276 localTuning.push_back(normalisedtuning);
max@1	277
max@1	278 Vamp::Plugin::Feature f1; // logfreqspec
max@1	279 f1.hasTimestamp = true;
max@1	280 f1.timestamp = timestamp;
max@1	281 for (int iNote = 0; iNote < nNote; iNote++) {
max@1	282 f1.values.push_back(nm[iNote]);
max@1	283 }
max@1	284
max@1	285 // deletes
max@1	286 delete[] inputBuffersDouble;
max@1	287 delete[] magnitude;
max@1	288 delete[] fftReal;
max@1	289 delete[] fftImag;
max@1	290 delete[] nm;
max@1	291
max@1	292 logSpectrum.push_back(f1); // remember note magnitude
max@1	293 }
max@1	294
max@1	295 bool initialise()
max@1	296 {
max@1	297 dictionaryMatrix(dict, s);
Chris@22	298
Chris@22	299 // make things for tuning estimation
Chris@22	300 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
max@1	301 sinvalues.push_back(sin(2M_PI(iBPS*1.0/nBPS)));
max@1	302 cosvalues.push_back(cos(2M_PI(iBPS*1.0/nBPS)));
max@1	303 }
max@1	304
Chris@22	305
Chris@22	306 // make hamming window of length 1/2 octave
Chris@22	307 int hamwinlength = nBPS * 6 + 1;
max@1	308 float hamwinsum = 0;
max@1	309 for (int i = 0; i < hamwinlength; ++i) {
max@1	310 hw.push_back(0.54 - 0.46 * cos((2M_PIi)/(hamwinlength-1)));
max@1	311 hamwinsum += 0.54 - 0.46 * cos((2M_PIi)/(hamwinlength-1));
max@1	312 }
max@1	313 for (int i = 0; i < hamwinlength; ++i) hw[i] = hw[i] / hamwinsum;
max@1	314
max@1	315
max@1	316 // initialise the tuning
max@1	317 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
max@1	318 meanTunings.push_back(0);
max@1	319 localTunings.push_back(0);
max@1	320 }
Chris@22	321
max@1	322 blockSize = blockSize;
max@1	323 frameCount = 0;
max@1	324 int tempn = nNote * blockSize/2;
max@1	325 // cerr << "length of tempkernel : " << tempn << endl;
max@1	326 float *tempkernel;
max@1	327
max@1	328 tempkernel = new float[tempn];
max@1	329
max@1	330 logFreqMatrix(inputSampleRate, blockSize, tempkernel);
max@1	331 kernelValue.clear();
max@1	332 kernelFftIndex.clear();
max@1	333 kernelNoteIndex.clear();
max@1	334 int countNonzero = 0;
max@1	335 for (int iNote = 0; iNote < nNote; ++iNote) {
max@1	336 // I don't know if this is wise: manually making a sparse matrix
max@1	337 for (int iFFT = 0; iFFT < static_cast<int>(blockSize/2); ++iFFT) {
max@1	338 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
max@1	339 kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
max@1	340 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
max@1	341 countNonzero++;
max@1	342 }
max@1	343 kernelFftIndex.push_back(iFFT);
Chris@23	344 kernelNoteIndex.push_back(iNote);
max@1	345 }
max@1	346 }
max@1	347 }
max@1	348 delete [] tempkernel;
max@1	349 }
max@1	350 };
max@1	351
max@1	352
max@1	353 /* --------------------------------- */
max@1	354 /* ----- SONG PARTITIONER ---------- */
max@1	355 /* --------------------------------- */
max@1	356
max@1	357
max@1	358 /* --- ATTRIBUTES --- */
max@1	359
max@1	360 float SongPartitioner::m_stepSecs = 0.01161; // 512 samples at 44100
max@1	361 size_t SongPartitioner::m_chromaFramesizeFactor = 16; // 16 times as long as beat tracker's
max@1	362 size_t SongPartitioner::m_chromaStepsizeFactor = 4; // 4 times as long as beat tracker's
max@1	363
max@1	364
max@1	365 /* --- METHODS --- */
max@1	366
max@1	367 /* --- Constructor --- */
max@1	368 SongPartitioner::SongPartitioner(float inputSampleRate) :
max@1	369 Vamp::Plugin(inputSampleRate),
max@1	370 m_d(0),
Chris@35	371 m_chromadata(0),
max@1	372 m_bpb(4),
max@1	373 m_pluginFrameCount(0)
max@1	374 {
max@1	375 }
max@1	376
max@1	377
max@1	378 /* --- Desctructor --- */
max@1	379 SongPartitioner::~SongPartitioner()
max@1	380 {
max@1	381 delete m_d;
Chris@35	382 delete m_chromadata;
max@1	383 }
max@1	384
max@1	385
max@1	386 /* --- Methods --- */
max@1	387 string SongPartitioner::getIdentifier() const
max@1	388 {
max@1	389 return "qm-songpartitioner";
max@1	390 }
max@1	391
max@1	392 string SongPartitioner::getName() const
max@1	393 {
max@1	394 return "Song Partitioner";
max@1	395 }
max@1	396
max@1	397 string SongPartitioner::getDescription() const
max@1	398 {
max@1	399 return "Estimate contiguous segments pertaining to song parts such as verse and chorus.";
max@1	400 }
max@1	401
max@1	402 string SongPartitioner::getMaker() const
max@1	403 {
max@1	404 return "Queen Mary, University of London";
max@1	405 }
max@1	406
max@1	407 int SongPartitioner::getPluginVersion() const
max@1	408 {
max@1	409 return 2;
max@1	410 }
max@1	411
max@1	412 string SongPartitioner::getCopyright() const
max@1	413 {
max@1	414 return "Plugin by Matthew Davies, Christian Landone, Chris Cannam, Matthias Mauch and Massimiliano Zanoni Copyright (c) 2006-2012 QMUL - All Rights Reserved";
max@1	415 }
max@1	416
max@1	417 SongPartitioner::ParameterList SongPartitioner::getParameterDescriptors() const
max@1	418 {
max@1	419 ParameterList list;
max@1	420
max@1	421 ParameterDescriptor desc;
max@1	422
max@1	423 desc.identifier = "bpb";
max@1	424 desc.name = "Beats per Bar";
max@1	425 desc.description = "The number of beats in each bar";
max@1	426 desc.minValue = 2;
max@1	427 desc.maxValue = 16;
max@1	428 desc.defaultValue = 4;
max@1	429 desc.isQuantized = true;
max@1	430 desc.quantizeStep = 1;
max@1	431 list.push_back(desc);
max@1	432
max@1	433 return list;
max@1	434 }
max@1	435
max@1	436 float SongPartitioner::getParameter(std::string name) const
max@1	437 {
max@1	438 if (name == "bpb") return m_bpb;
max@1	439 return 0.0;
max@1	440 }
max@1	441
max@1	442 void SongPartitioner::setParameter(std::string name, float value)
max@1	443 {
max@1	444 if (name == "bpb") m_bpb = lrintf(value);
max@1	445 }
max@1	446
max@1	447
max@1	448 // Return the StepSize for Chroma Extractor
max@1	449 size_t SongPartitioner::getPreferredStepSize() const
max@1	450 {
max@1	451 size_t step = size_t(m_inputSampleRate * m_stepSecs + 0.0001);
max@1	452 if (step < 1) step = 1;
max@1	453
max@1	454 return step;
max@1	455 }
max@1	456
max@1	457 // Return the BlockSize for Chroma Extractor
max@1	458 size_t SongPartitioner::getPreferredBlockSize() const
max@1	459 {
max@1	460 size_t theoretical = getPreferredStepSize() * 2;
max@1	461 theoretical *= m_chromaFramesizeFactor;
max@1	462
max@1	463 return theoretical;
max@1	464 }
max@1	465
max@1	466
max@1	467 // Initialize the plugin and define Beat Tracker and Chroma Extractor Objects
max@1	468 bool SongPartitioner::initialise(size_t channels, size_t stepSize, size_t blockSize)
max@1	469 {
max@1	470 if (m_d) {
Chris@22	471 delete m_d;
Chris@22	472 m_d = 0;
max@1	473 }
Chris@35	474 if (m_chromadata) {
Chris@35	475 delete m_chromadata;
Chris@35	476 m_chromadata = 0;
Chris@35	477 }
max@1	478
max@1	479 if (channels < getMinChannelCount() \|\|
Chris@22	480 channels > getMaxChannelCount()) {
max@1	481 std::cerr << "SongPartitioner::initialise: Unsupported channel count: "
max@1	482 << channels << std::endl;
max@1	483 return false;
max@1	484 }
max@1	485
max@1	486 if (stepSize != getPreferredStepSize()) {
max@1	487 std::cerr << "ERROR: SongPartitioner::initialise: Unsupported step size for this sample rate: "
max@1	488 << stepSize << " (wanted " << (getPreferredStepSize()) << ")" << std::endl;
max@1	489 return false;
max@1	490 }
max@1	491
max@1	492 if (blockSize != getPreferredBlockSize()) {
max@1	493 std::cerr << "WARNING: SongPartitioner::initialise: Sub-optimal block size for this sample rate: "
max@1	494 << blockSize << " (wanted " << getPreferredBlockSize() << ")" << std::endl;
max@1	495 }
max@1	496
max@1	497 // Beat tracker and Chroma extractor has two different configuration parameters
max@1	498
max@1	499 // Configuration Parameters for Beat Tracker
max@1	500 DFConfig dfConfig;
max@1	501 dfConfig.DFType = DF_COMPLEXSD;
max@1	502 dfConfig.stepSize = stepSize;
max@1	503 dfConfig.frameLength = blockSize / m_chromaFramesizeFactor;
max@1	504 dfConfig.dbRise = 3;
max@1	505 dfConfig.adaptiveWhitening = false;
max@1	506 dfConfig.whiteningRelaxCoeff = -1;
max@1	507 dfConfig.whiteningFloor = -1;
max@1	508
max@1	509 // Initialise Beat Tracker
max@1	510 m_d = new BeatTrackerData(m_inputSampleRate, dfConfig);
max@1	511 m_d->downBeat->setBeatsPerBar(m_bpb);
max@1	512
max@1	513 // Initialise Chroma Extractor
max@1	514 m_chromadata = new ChromaData(m_inputSampleRate, blockSize);
max@1	515 m_chromadata->initialise();
max@1	516
max@1	517 return true;
max@1	518 }
max@1	519
max@1	520 void SongPartitioner::reset()
max@1	521 {
max@1	522 if (m_d) m_d->reset();
max@1	523 m_pluginFrameCount = 0;
max@1	524 }
max@1	525
max@1	526 SongPartitioner::OutputList SongPartitioner::getOutputDescriptors() const
max@1	527 {
max@1	528 OutputList list;
max@1	529 size_t outputCounter = 0;
max@1	530
max@1	531 OutputDescriptor beat;
max@1	532 beat.identifier = "beats";
max@1	533 beat.name = "Beats";
max@1	534 beat.description = "Beat locations labelled with metrical position";
max@1	535 beat.unit = "";
max@1	536 beat.hasFixedBinCount = true;
max@1	537 beat.binCount = 0;
max@1	538 beat.sampleType = OutputDescriptor::VariableSampleRate;
max@1	539 beat.sampleRate = 1.0 / m_stepSecs;
max@1	540 m_beatOutputNumber = outputCounter++;
max@1	541
max@1	542 OutputDescriptor bars;
max@1	543 bars.identifier = "bars";
max@1	544 bars.name = "Bars";
max@1	545 bars.description = "Bar locations";
max@1	546 bars.unit = "";
max@1	547 bars.hasFixedBinCount = true;
max@1	548 bars.binCount = 0;
max@1	549 bars.sampleType = OutputDescriptor::VariableSampleRate;
max@1	550 bars.sampleRate = 1.0 / m_stepSecs;
max@1	551 m_barsOutputNumber = outputCounter++;
max@1	552
max@1	553 OutputDescriptor beatcounts;
max@1	554 beatcounts.identifier = "beatcounts";
max@1	555 beatcounts.name = "Beat Count";
max@1	556 beatcounts.description = "Beat counter function";
max@1	557 beatcounts.unit = "";
max@1	558 beatcounts.hasFixedBinCount = true;
max@1	559 beatcounts.binCount = 1;
max@1	560 beatcounts.sampleType = OutputDescriptor::VariableSampleRate;
max@1	561 beatcounts.sampleRate = 1.0 / m_stepSecs;
max@1	562 m_beatcountsOutputNumber = outputCounter++;
max@1	563
max@1	564 OutputDescriptor beatsd;
max@1	565 beatsd.identifier = "beatsd";
max@1	566 beatsd.name = "Beat Spectral Difference";
max@1	567 beatsd.description = "Beat spectral difference function used for bar-line detection";
max@1	568 beatsd.unit = "";
max@1	569 beatsd.hasFixedBinCount = true;
max@1	570 beatsd.binCount = 1;
max@1	571 beatsd.sampleType = OutputDescriptor::VariableSampleRate;
max@1	572 beatsd.sampleRate = 1.0 / m_stepSecs;
max@1	573 m_beatsdOutputNumber = outputCounter++;
max@1	574
max@1	575 OutputDescriptor logscalespec;
max@1	576 logscalespec.identifier = "logscalespec";
max@1	577 logscalespec.name = "Log-Frequency Spectrum";
max@1	578 logscalespec.description = "Spectrum with linear frequency on a log scale.";
max@1	579 logscalespec.unit = "";
max@1	580 logscalespec.hasFixedBinCount = true;
max@1	581 logscalespec.binCount = nNote;
max@1	582 logscalespec.hasKnownExtents = false;
max@1	583 logscalespec.isQuantized = false;
max@1	584 logscalespec.sampleType = OutputDescriptor::FixedSampleRate;
max@1	585 logscalespec.hasDuration = false;
max@1	586 logscalespec.sampleRate = m_inputSampleRate/2048;
max@1	587 m_logscalespecOutputNumber = outputCounter++;
max@1	588
max@1	589 OutputDescriptor bothchroma;
max@1	590 bothchroma.identifier = "bothchroma";
max@1	591 bothchroma.name = "Chromagram and Bass Chromagram";
max@1	592 bothchroma.description = "Tuning-adjusted chromagram and bass chromagram (stacked on top of each other) from NNLS approximate transcription.";
max@1	593 bothchroma.unit = "";
max@1	594 bothchroma.hasFixedBinCount = true;
max@1	595 bothchroma.binCount = 24;
max@1	596 bothchroma.hasKnownExtents = false;
max@1	597 bothchroma.isQuantized = false;
max@1	598 bothchroma.sampleType = OutputDescriptor::FixedSampleRate;
max@1	599 bothchroma.hasDuration = false;
max@1	600 bothchroma.sampleRate = m_inputSampleRate/2048;
max@1	601 m_bothchromaOutputNumber = outputCounter++;
max@1	602
max@1	603 OutputDescriptor qchromafw;
max@1	604 qchromafw.identifier = "qchromafw";
max@1	605 qchromafw.name = "Pseudo-Quantised Chromagram and Bass Chromagram";
max@1	606 qchromafw.description = "Pseudo-Quantised Chromagram and Bass Chromagram (frames between two beats are identical).";
max@1	607 qchromafw.unit = "";
max@1	608 qchromafw.hasFixedBinCount = true;
max@1	609 qchromafw.binCount = 24;
max@1	610 qchromafw.hasKnownExtents = false;
max@1	611 qchromafw.isQuantized = false;
max@1	612 qchromafw.sampleType = OutputDescriptor::FixedSampleRate;
max@1	613 qchromafw.hasDuration = false;
max@1	614 qchromafw.sampleRate = m_inputSampleRate/2048;
max@1	615 m_qchromafwOutputNumber = outputCounter++;
max@1	616
max@1	617 OutputDescriptor qchroma;
max@1	618 qchroma.identifier = "qchroma";
max@1	619 qchroma.name = "Quantised Chromagram and Bass Chromagram";
max@1	620 qchroma.description = "Quantised Chromagram and Bass Chromagram.";
max@1	621 qchroma.unit = "";
max@1	622 qchroma.hasFixedBinCount = true;
max@1	623 qchroma.binCount = 24;
max@1	624 qchroma.hasKnownExtents = false;
max@1	625 qchroma.isQuantized = false;
max@1	626 qchroma.sampleType = OutputDescriptor::FixedSampleRate;
max@1	627 qchroma.hasDuration = true;
Chris@17	628 qchroma.sampleRate = m_inputSampleRate/2048;
max@1	629 m_qchromaOutputNumber = outputCounter++;
max@1	630
max@1	631 OutputDescriptor segm;
Chris@15	632 segm.identifier = "segmentation";
max@1	633 segm.name = "Segmentation";
max@1	634 segm.description = "Segmentation";
max@1	635 segm.unit = "segment-type";
max@1	636 segm.hasFixedBinCount = true;
max@1	637 //segm.binCount = 24;
max@1	638 segm.binCount = 1;
max@1	639 segm.hasKnownExtents = true;
max@1	640 segm.minValue = 1;
max@1	641 segm.maxValue = 5;
max@1	642 segm.isQuantized = true;
max@1	643 segm.quantizeStep = 1;
max@1	644 segm.sampleType = OutputDescriptor::VariableSampleRate;
Chris@17	645 segm.sampleRate = 1.0 / m_stepSecs;
max@1	646 segm.hasDuration = true;
max@1	647 m_segmOutputNumber = outputCounter++;
max@1	648
max@1	649
max@1	650 /*
max@1	651 OutputList list;
max@1	652 OutputDescriptor segmentation;
max@1	653 segmentation.identifier = "segmentation";
max@1	654 segmentation.name = "Segmentation";
max@1	655 segmentation.description = "Segmentation";
max@1	656 segmentation.unit = "segment-type";
max@1	657 segmentation.hasFixedBinCount = true;
max@1	658 segmentation.binCount = 1;
max@1	659 segmentation.hasKnownExtents = true;
max@1	660 segmentation.minValue = 1;
max@1	661 segmentation.maxValue = nSegmentTypes;
max@1	662 segmentation.isQuantized = true;
max@1	663 segmentation.quantizeStep = 1;
max@1	664 segmentation.sampleType = OutputDescriptor::VariableSampleRate;
max@1	665 segmentation.sampleRate = m_inputSampleRate / getPreferredStepSize();
max@1	666 list.push_back(segmentation);
max@1	667 return list;
max@1	668 */
max@1	669
max@1	670
max@1	671 list.push_back(beat);
max@1	672 list.push_back(bars);
max@1	673 list.push_back(beatcounts);
max@1	674 list.push_back(beatsd);
max@1	675 list.push_back(logscalespec);
max@1	676 list.push_back(bothchroma);
max@1	677 list.push_back(qchromafw);
max@1	678 list.push_back(qchroma);
max@1	679 list.push_back(segm);
max@1	680
max@1	681 return list;
max@1	682 }
max@1	683
max@1	684 // Executed for each frame - called from the host
max@1	685
max@1	686 // We use time domain input, because DownBeat requires it -- so we
max@1	687 // use the time-domain version of DetectionFunction::process which
max@1	688 // does its own FFT. It requires doubles as input, so we need to
max@1	689 // make a temporary copy
max@1	690
max@1	691 // We only support a single input channel
max@1	692 SongPartitioner::FeatureSet SongPartitioner::process(const float const inputBuffers,Vamp::RealTime timestamp)
max@1	693 {
max@1	694 if (!m_d) {
Chris@22	695 cerr << "ERROR: SongPartitioner::process: "
Chris@22	696 << "SongPartitioner has not been initialised"
Chris@22	697 << endl;
Chris@22	698 return FeatureSet();
max@1	699 }
max@1	700
max@1	701 const int fl = m_d->dfConfig.frameLength;
max@1	702 #ifndef __GNUC__
max@1	703 double dfinput = (double )alloca(fl * sizeof(double));
max@1	704 #else
max@1	705 double dfinput[fl];
max@1	706 #endif
max@1	707 int sampleOffset = ((m_chromaFramesizeFactor-1) * fl) / 2;
max@1	708
max@1	709 // Since chroma needs a much longer frame size, we only ever use the very
max@1	710 // beginning of the frame for beat tracking.
max@1	711 for (int i = 0; i < fl; ++i) dfinput[i] = inputBuffers[0][i];
max@1	712 double output = m_d->df->process(dfinput);
max@1	713
max@1	714 if (m_d->dfOutput.empty()) m_d->origin = timestamp;
max@1	715
max@1	716 // std::cerr << "df[" << m_d->dfOutput.size() << "] is " << output << std::endl;
max@1	717 m_d->dfOutput.push_back(output);
max@1	718
max@1	719 // Downsample and store the incoming audio block.
max@1	720 // We have an overlap on the incoming audio stream (step size is
max@1	721 // half block size) -- this function is configured to take only a
max@1	722 // step size's worth, so effectively ignoring the overlap. Note
max@1	723 // however that this means we omit the last blocksize - stepsize
max@1	724 // samples completely for the purposes of barline detection
max@1	725 // (hopefully not a problem)
max@1	726 m_d->downBeat->pushAudioBlock(inputBuffers[0]);
max@1	727
max@1	728 // The following is not done every time, but only every m_chromaFramesizeFactor times,
max@1	729 // because the chroma does not need dense time frames.
max@1	730
max@1	731 if (m_pluginFrameCount % m_chromaStepsizeFactor == 0)
max@1	732 {
max@1	733
max@1	734 // Window the full time domain, data, FFT it and process chroma stuff.
max@1	735
max@1	736 #ifndef __GNUC__
max@1	737 float windowedBuffers = (float )alloca(m_chromadata->blockSize * sizeof(float));
max@1	738 #else
max@1	739 float windowedBuffers[m_chromadata->blockSize];
max@1	740 #endif
max@1	741 m_chromadata->window.cut(&inputBuffers[0][0], &windowedBuffers[0]);
max@1	742
max@1	743 // adjust timestamp (we want the middle of the frame)
max@1	744 timestamp = timestamp + Vamp::RealTime::frame2RealTime(sampleOffset, lrintf(m_inputSampleRate));
max@1	745
max@1	746 m_chromadata->baseProcess(&windowedBuffers[0], timestamp);
max@1	747
max@1	748 }
max@1	749 m_pluginFrameCount++;
max@1	750
max@1	751 FeatureSet fs;
max@1	752 fs[m_logscalespecOutputNumber].push_back(
max@1	753 m_chromadata->logSpectrum.back());
max@1	754 return fs;
max@1	755 }
max@1	756
max@1	757 SongPartitioner::FeatureSet SongPartitioner::getRemainingFeatures()
max@1	758 {
max@1	759 if (!m_d) {
Chris@22	760 cerr << "ERROR: SongPartitioner::getRemainingFeatures: "
Chris@22	761 << "SongPartitioner has not been initialised"
Chris@22	762 << endl;
Chris@22	763 return FeatureSet();
max@1	764 }
max@1	765
Chris@16	766 FeatureSet masterFeatureset = beatTrack();
Chris@16	767 FeatureList chromaList = chromaFeatures();
max@1	768
max@1	769 for (size_t i = 0; i < chromaList.size(); ++i)
max@1	770 {
max@1	771 masterFeatureset[m_bothchromaOutputNumber].push_back(chromaList[i]);
max@1	772 }
max@1	773
max@1	774 // quantised and pseudo-quantised (beat-wise) chroma
Chris@16	775 std::vector<FeatureList> quantisedChroma = beatQuantiser(chromaList, masterFeatureset[m_beatOutputNumber]);
Chris@32	776
Chris@32	777 if (quantisedChroma.empty()) return masterFeatureset;
max@1	778
max@1	779 masterFeatureset[m_qchromafwOutputNumber] = quantisedChroma[0];
max@1	780 masterFeatureset[m_qchromaOutputNumber] = quantisedChroma[1];
max@1	781
max@1	782 // Segmentation
Chris@14	783 masterFeatureset[m_segmOutputNumber] = runSegmenter(quantisedChroma[1]);
max@1	784
max@1	785 return(masterFeatureset);
max@1	786 }
max@1	787
max@1	788 /* ------ Beat Tracker ------ */
max@1	789
Chris@16	790 SongPartitioner::FeatureSet SongPartitioner::beatTrack()
max@1	791 {
max@1	792 vector<double> df;
max@1	793 vector<double> beatPeriod;
max@1	794 vector<double> tempi;
max@1	795
max@1	796 for (size_t i = 2; i < m_d->dfOutput.size(); ++i) { // discard first two elts
max@1	797 df.push_back(m_d->dfOutput[i]);
max@1	798 beatPeriod.push_back(0.0);
max@1	799 }
max@1	800 if (df.empty()) return FeatureSet();
max@1	801
max@1	802 TempoTrackV2 tt(m_inputSampleRate, m_d->dfConfig.stepSize);
max@1	803 tt.calculateBeatPeriod(df, beatPeriod, tempi);
max@1	804
max@1	805 vector<double> beats;
max@1	806 tt.calculateBeats(df, beatPeriod, beats);
max@1	807
max@1	808 vector<int> downbeats;
max@1	809 size_t downLength = 0;
max@1	810 const float *downsampled = m_d->downBeat->getBufferedAudio(downLength);
max@1	811 m_d->downBeat->findDownBeats(downsampled, downLength, beats, downbeats);
max@1	812
max@1	813 vector<double> beatsd;
max@1	814 m_d->downBeat->getBeatSD(beatsd);
max@1	815
max@1	816 /*std::cout << "BeatTracker: found downbeats at: ";
max@1	817 for (int i = 0; i < downbeats.size(); ++i) std::cout << downbeats[i] << " " << std::endl;*/
max@1	818
max@1	819 FeatureSet returnFeatures;
max@1	820
max@1	821 char label[20];
max@1	822
max@1	823 int dbi = 0;
max@1	824 int beat = 0;
max@1	825 int bar = 0;
max@1	826
max@1	827 if (!downbeats.empty()) {
max@1	828 // get the right number for the first beat; this will be
max@1	829 // incremented before use (at top of the following loop)
max@1	830 int firstDown = downbeats[0];
max@1	831 beat = m_bpb - firstDown - 1;
max@1	832 if (beat == m_bpb) beat = 0;
max@1	833 }
max@1	834
max@1	835 for (size_t i = 0; i < beats.size(); ++i) {
max@1	836
max@1	837 size_t frame = beats[i] * m_d->dfConfig.stepSize;
max@1	838
max@1	839 if (dbi < downbeats.size() && i == downbeats[dbi]) {
max@1	840 beat = 0;
max@1	841 ++bar;
max@1	842 ++dbi;
max@1	843 } else {
max@1	844 ++beat;
max@1	845 }
max@1	846
max@1	847 /* Ooutput Section */
max@1	848
max@1	849 // outputs are:
max@1	850 //
max@1	851 // 0 -> beats
max@1	852 // 1 -> bars
max@1	853 // 2 -> beat counter function
max@1	854
max@1	855 Feature feature;
max@1	856 feature.hasTimestamp = true;
max@1	857 feature.timestamp = m_d->origin + Vamp::RealTime::frame2RealTime (frame, lrintf(m_inputSampleRate));
max@1	858
max@1	859 sprintf(label, "%d", beat + 1);
max@1	860 feature.label = label;
max@1	861 returnFeatures[m_beatOutputNumber].push_back(feature); // labelled beats
max@1	862
max@1	863 feature.values.push_back(beat + 1);
max@1	864 returnFeatures[m_beatcountsOutputNumber].push_back(feature); // beat function
max@1	865
max@1	866 if (i > 0 && i <= beatsd.size()) {
max@1	867 feature.values.clear();
max@1	868 feature.values.push_back(beatsd[i-1]);
max@1	869 feature.label = "";
max@1	870 returnFeatures[m_beatsdOutputNumber].push_back(feature); // beat spectral difference
max@1	871 }
max@1	872
max@1	873 if (beat == 0) {
max@1	874 feature.values.clear();
max@1	875 sprintf(label, "%d", bar);
max@1	876 feature.label = label;
max@1	877 returnFeatures[m_barsOutputNumber].push_back(feature); // bars
max@1	878 }
max@1	879 }
max@1	880
max@1	881 return returnFeatures;
max@1	882 }
max@1	883
max@1	884
max@1	885 /* ------ Chroma Extractor ------ */
max@1	886
Chris@16	887 SongPartitioner::FeatureList SongPartitioner::chromaFeatures()
max@1	888 {
max@1	889
max@1	890 FeatureList returnFeatureList;
max@1	891 FeatureList tunedlogfreqspec;
max@1	892
max@1	893 if (m_chromadata->logSpectrum.size() == 0) return returnFeatureList;
max@1	894
max@1	895 /** Calculate Tuning
max@1	896 calculate tuning from (using the angle of the complex number defined by the
max@1	897 cumulative mean real and imag values)
max@1	898 **/
max@1	899 float meanTuningImag = 0;
max@1	900 float meanTuningReal = 0;
max@1	901 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
max@1	902 meanTuningReal += m_chromadata->meanTunings[iBPS] * m_chromadata->cosvalues[iBPS];
max@1	903 meanTuningImag += m_chromadata->meanTunings[iBPS] * m_chromadata->sinvalues[iBPS];
max@1	904 }
max@1	905 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
max@1	906 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
max@1	907 int intShift = floor(normalisedtuning * 3);
max@1	908 float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this
max@1	909
max@1	910 char buffer0 [50];
max@1	911
max@1	912 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
max@1	913
max@1	914 /** Tune Log-Frequency Spectrogram
max@1	915 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
max@1	916 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
max@1	917 **/
max@1	918 cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... ";
max@1	919
max@1	920 float tempValue = 0;
max@1	921
max@1	922 int count = 0;
max@1	923
max@1	924 for (FeatureList::iterator i = m_chromadata->logSpectrum.begin(); i != m_chromadata->logSpectrum.end(); ++i)
max@1	925 {
max@1	926
max@1	927 Feature f1 = *i;
max@1	928 Feature f2; // tuned log-frequency spectrum
max@1	929
max@1	930 f2.hasTimestamp = true;
max@1	931 f2.timestamp = f1.timestamp;
max@1	932
max@1	933 f2.values.push_back(0.0);
max@1	934 f2.values.push_back(0.0); // set lower edge to zero
max@1	935
max@1	936 if (m_chromadata->tuneLocal) {
max@1	937 intShift = floor(m_chromadata->localTuning[count] * 3);
max@1	938 floatShift = m_chromadata->localTuning[count] * 3 - intShift;
max@1	939 // floatShift is a really bad name for this
max@1	940 }
max@1	941
max@1	942 for (int k = 2; k < (int)f1.values.size() - 3; ++k)
max@1	943 { // interpolate all inner bins
max@1	944 tempValue = f1.values[k + intShift] * (1-floatShift) + f1.values[k+intShift+1] * floatShift;
max@1	945 f2.values.push_back(tempValue);
max@1	946 }
max@1	947
max@1	948 f2.values.push_back(0.0);
max@1	949 f2.values.push_back(0.0);
max@1	950 f2.values.push_back(0.0); // upper edge
max@1	951
max@1	952 vector<float> runningmean = SpecialConvolution(f2.values,m_chromadata->hw);
max@1	953 vector<float> runningstd;
max@1	954 for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance)
max@1	955 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
max@1	956 }
max@1	957 runningstd = SpecialConvolution(runningstd,m_chromadata->hw); // second step convolve
max@1	958 for (int i = 0; i < nNote; i++)
max@1	959 {
max@1	960
max@1	961 runningstd[i] = sqrt(runningstd[i]);
max@1	962 // square root to finally have running std
max@1	963
max@1	964 if (runningstd[i] > 0)
max@1	965 {
max@1	966 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
max@1	967 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_chromadata->whitening) : 0;
max@1	968 }
max@1	969
max@1	970 if (f2.values[i] < 0) {
max@1	971
max@1	972 cerr << "ERROR: negative value in logfreq spectrum" << endl;
max@1	973
max@1	974 }
max@1	975 }
max@1	976 tunedlogfreqspec.push_back(f2);
max@1	977 count++;
max@1	978 }
max@1	979 cerr << "done." << endl;
max@1	980 /** Semitone spectrum and chromagrams
max@1	981 Semitone-spaced log-frequency spectrum derived
max@1	982 from the tuned log-freq spectrum above. the spectrum
max@1	983 is inferred using a non-negative least squares algorithm.
max@1	984 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
max@1	985 bass and treble stacked onto each other).
max@1	986 **/
max@1	987 if (m_chromadata->useNNLS == 0) {
max@1	988 cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... ";
max@1	989 } else {
max@1	990 cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... ";
max@1	991 }
max@1	992
max@1	993 vector<float> oldchroma = vector<float>(12,0);
max@1	994 vector<float> oldbasschroma = vector<float>(12,0);
max@1	995 count = 0;
max@1	996
max@1	997 for (FeatureList::iterator it = tunedlogfreqspec.begin(); it != tunedlogfreqspec.end(); ++it) {
max@1	998 Feature logfreqsp = *it; // logfreq spectrum
max@1	999 Feature bothchroma; // treble and bass chromagram
max@1	1000
max@1	1001 bothchroma.hasTimestamp = true;
max@1	1002 bothchroma.timestamp = logfreqsp.timestamp;
max@1	1003
max@1	1004 float b[nNote];
max@1	1005
max@1	1006 bool some_b_greater_zero = false;
max@1	1007 float sumb = 0;
max@1	1008 for (int i = 0; i < nNote; i++) {
max@1	1009 b[i] = logfreqsp.values[i];
max@1	1010 sumb += b[i];
max@1	1011 if (b[i] > 0) {
max@1	1012 some_b_greater_zero = true;
max@1	1013 }
max@1	1014 }
max@1	1015
max@1	1016 // here's where the non-negative least squares algorithm calculates the note activation x
max@1	1017
max@1	1018 vector<float> chroma = vector<float>(12, 0);
max@1	1019 vector<float> basschroma = vector<float>(12, 0);
max@1	1020 float currval;
max@1	1021 int iSemitone = 0;
max@1	1022
max@1	1023 if (some_b_greater_zero) {
max@1	1024 if (m_chromadata->useNNLS == 0) {
max@1	1025 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
max@1	1026 currval = 0;
max@1	1027 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
max@1	1028 currval += b[iNote + iBPS] * (1-abs(iBPS*1.0/(nBPS/2+1)));
max@1	1029 }
max@1	1030 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
max@1	1031 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
max@1	1032 iSemitone++;
max@1	1033 }
max@1	1034
max@1	1035 } else {
max@1	1036 float x[84+1000];
max@1	1037 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
max@1	1038 vector<int> signifIndex;
max@1	1039 int index=0;
max@1	1040 sumb /= 84.0;
max@1	1041 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
max@1	1042 float currval = 0;
max@1	1043 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
max@1	1044 currval += b[iNote + iBPS];
max@1	1045 }
max@1	1046 if (currval > 0) signifIndex.push_back(index);
max@1	1047 index++;
max@1	1048 }
max@1	1049 float rnorm;
max@1	1050 float w[84+1000];
max@1	1051 float zz[84+1000];
max@1	1052 int indx[84+1000];
max@1	1053 int mode;
max@1	1054 int dictsize = nNote*signifIndex.size();
max@1	1055
max@1	1056 float *curr_dict = new float[dictsize];
max@1	1057 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
max@1	1058 for (int iBin = 0; iBin < nNote; iBin++) {
max@1	1059 curr_dict[iNote * nNote + iBin] =
max@1	1060 1.0 * m_chromadata->dict[signifIndex[iNote] * nNote + iBin];
max@1	1061 }
max@1	1062 }
max@1	1063 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
max@1	1064 delete [] curr_dict;
max@1	1065 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
max@1	1066 // cerr << mode << endl;
max@1	1067 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
max@1	1068 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
max@1	1069 }
max@1	1070 }
max@1	1071 }
max@1	1072
max@1	1073 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end());
max@1	1074 // just stack the both chromas
max@1	1075
max@1	1076 bothchroma.values = chroma;
max@1	1077 returnFeatureList.push_back(bothchroma);
max@1	1078 count++;
max@1	1079 }
max@1	1080 cerr << "done." << endl;
max@1	1081
max@1	1082 return returnFeatureList;
max@1	1083 }
max@1	1084
max@1	1085 /* ------ Beat Quantizer ------ */
max@1	1086
max@4	1087 std::vector<Vamp::Plugin::FeatureList>
Chris@16	1088 SongPartitioner::beatQuantiser(Vamp::Plugin::FeatureList chromagram, Vamp::Plugin::FeatureList beats)
max@1	1089 {
max@1	1090 std::vector<FeatureList> returnVector;
max@1	1091
max@1	1092 FeatureList fwQchromagram; // frame-wise beat-quantised chroma
max@1	1093 FeatureList bwQchromagram; // beat-wise beat-quantised chroma
max@1	1094
max@4	1095 int nChromaFrame = (int) chromagram.size();
max@4	1096 int nBeat = (int) beats.size();
max@1	1097
max@1	1098 if (nBeat == 0 && nChromaFrame == 0) return returnVector;
max@1	1099
max@1	1100 size_t nBin = chromagram[0].values.size();
max@1	1101
max@1	1102 vector<float> tempChroma = vector<float>(nBin);
max@1	1103
max@1	1104 Vamp::RealTime beatTimestamp = Vamp::RealTime::zeroTime;
max@1	1105 int currBeatCount = -1; // start before first beat
max@1	1106 int framesInBeat = 0;
max@1	1107
max@4	1108 for (int iChroma = 0; iChroma < nChromaFrame; ++iChroma)
max@1	1109 {
max@4	1110 Vamp::RealTime frameTimestamp = chromagram[iChroma].timestamp;
Chris@24	1111 Vamp::RealTime newBeatTimestamp;
Chris@22	1112
Chris@24	1113 if (currBeatCount != beats.size()-1) newBeatTimestamp = beats[currBeatCount+1].timestamp;
Chris@24	1114 else newBeatTimestamp = chromagram[nChromaFrame-1].timestamp;
Chris@22	1115
Chris@24	1116 if (frameTimestamp > newBeatTimestamp \|\|
max@1	1117 iChroma == nChromaFrame-1)
max@1	1118 {
max@1	1119 // new beat (or last chroma frame)
max@1	1120 // 1. finish all the old beat processing
Chris@23	1121 if (framesInBeat > 0)
Chris@23	1122 {
Chris@23	1123 for (int i = 0; i < nBin; ++i) tempChroma[i] /= framesInBeat; // average
Chris@23	1124 }
max@1	1125
max@1	1126 Feature bwQchromaFrame;
max@1	1127 bwQchromaFrame.hasTimestamp = true;
max@1	1128 bwQchromaFrame.timestamp = beatTimestamp;
max@1	1129 bwQchromaFrame.values = tempChroma;
Chris@24	1130 bwQchromaFrame.duration = newBeatTimestamp - beatTimestamp;
max@1	1131 bwQchromagram.push_back(bwQchromaFrame);
max@1	1132
max@1	1133 for (int iFrame = -framesInBeat; iFrame < 0; ++iFrame)
max@1	1134 {
max@1	1135 Feature fwQchromaFrame;
max@1	1136 fwQchromaFrame.hasTimestamp = true;
max@1	1137 fwQchromaFrame.timestamp = chromagram[iChroma+iFrame].timestamp;
max@1	1138 fwQchromaFrame.values = tempChroma; // all between two beats get the same
max@1	1139 fwQchromagram.push_back(fwQchromaFrame);
max@1	1140 }
max@1	1141
max@1	1142 // 2. increments / resets for current (new) beat
max@1	1143 currBeatCount++;
Chris@24	1144 beatTimestamp = newBeatTimestamp;
max@1	1145 for (size_t i = 0; i < nBin; ++i) tempChroma[i] = 0; // average
max@1	1146 framesInBeat = 0;
max@1	1147 }
max@1	1148 framesInBeat++;
max@1	1149 for (size_t i = 0; i < nBin; ++i) tempChroma[i] += chromagram[iChroma].values[i];
max@1	1150 }
max@1	1151 returnVector.push_back(fwQchromagram);
max@1	1152 returnVector.push_back(bwQchromagram);
Chris@30	1153 return returnVector;
max@1	1154 }
max@1	1155
max@1	1156 /* -------------------------------- */
max@1	1157 /* ------ Support Functions ------ */
max@1	1158 /* -------------------------------- */
max@1	1159
max@1	1160 // one-dimesion median filter
max@1	1161 arma::vec medfilt1(arma::vec v, int medfilt_length)
max@1	1162 {
max@1	1163 int halfWin = medfilt_length/2;
max@1	1164
max@1	1165 // result vector
max@1	1166 arma::vec res = arma::zeros<arma::vec>(v.size());
max@1	1167
max@1	1168 // padding
max@1	1169 arma::vec padV = arma::zeros<arma::vec>(v.size()+medfilt_length-1);
max@1	1170
Chris@21	1171 for (int i=medfilt_length/2; i < medfilt_length/2+v.size(); ++ i)
max@1	1172 {
max@1	1173 padV(i) = v(i-medfilt_length/2);
max@1	1174 }
max@1	1175
max@1	1176 // Median filter
max@1	1177 arma::vec win = arma::zeros<arma::vec>(medfilt_length);
max@1	1178
Chris@21	1179 for (int i=0; i < v.size(); ++i)
max@1	1180 {
max@1	1181 win = padV.subvec(i,i+halfWin*2);
max@1	1182 win = sort(win);
max@1	1183 res(i) = win(halfWin);
max@1	1184 }
max@1	1185
max@1	1186 return res;
max@1	1187 }
max@1	1188
max@1	1189
max@1	1190 // Quantile
max@1	1191 double quantile(arma::vec v, double p)
max@1	1192 {
max@1	1193 arma::vec sortV = arma::sort(v);
max@1	1194 int n = sortV.size();
max@1	1195 arma::vec x = arma::zeros<vec>(n+2);
max@1	1196 arma::vec y = arma::zeros<vec>(n+2);
max@1	1197
max@1	1198 x(0) = 0;
max@1	1199 x(n+1) = 100;
max@1	1200
Chris@21	1201 for (int i=1; i<n+1; ++i)
max@1	1202 x(i) = 100*(0.5+(i-1))/n;
max@1	1203
max@1	1204 y(0) = sortV(0);
max@1	1205 y.subvec(1,n) = sortV;
max@1	1206 y(n+1) = sortV(n-1);
max@1	1207
max@1	1208 arma::uvec x2index = find(x>=p*100);
max@1	1209
max@1	1210 // Interpolation
max@1	1211 double x1 = x(x2index(0)-1);
max@1	1212 double x2 = x(x2index(0));
max@1	1213 double y1 = y(x2index(0)-1);
max@1	1214 double y2 = y(x2index(0));
max@1	1215
max@1	1216 double res = (y2-y1)/(x2-x1)(p100-x1)+y1;
max@1	1217
max@1	1218 return res;
max@1	1219 }
max@1	1220
max@1	1221 // Max Filtering
max@1	1222 arma::mat maxfilt1(arma::mat inmat, int len)
max@1	1223 {
max@1	1224 arma::mat outmat = inmat;
max@1	1225
max@1	1226 for (int i=0; i<inmat.n_rows; ++i)
max@1	1227 {
max@1	1228 if (arma::sum(inmat.row(i)) > 0)
max@1	1229 {
max@1	1230 // Take a window of rows
max@1	1231 int startWin;
max@1	1232 int endWin;
max@1	1233
max@1	1234 if (0 > i-len)
max@1	1235 startWin = 0;
max@1	1236 else
max@1	1237 startWin = i-len;
max@1	1238
max@1	1239 if (inmat.n_rows-1 < i+len-1)
max@1	1240 endWin = inmat.n_rows-1;
max@1	1241 else
max@1	1242 endWin = i+len-1;
max@1	1243
max@1	1244 outmat(i,span::all) = arma::max(inmat(span(startWin,endWin),span::all));
max@1	1245 }
max@1	1246 }
max@1	1247
max@1	1248 return outmat;
max@1	1249
max@1	1250 }
max@1	1251
max@1	1252 // Null Parts
max@1	1253 Part nullpart(vector<Part> parts, arma::vec barline)
max@1	1254 {
max@1	1255 arma::uvec nullindices = arma::ones<arma::uvec>(barline.size());
Chris@21	1256 for (int iPart=0; iPart<parts.size(); ++iPart)
max@1	1257 {
Chris@21	1258 //for (int iIndex=0; iIndex < parts[0].indices.size(); ++iIndex)
Chris@21	1259 for (int iIndex=0; iIndex < parts[iPart].indices.size(); ++iIndex)
Chris@21	1260 for (int i=0; i<parts[iPart].n; ++i)
max@1	1261 {
Chris@21	1262 int ind = parts[iPart].indices[iIndex]+i;
max@1	1263 nullindices(ind) = 0;
max@1	1264 }
max@1	1265 }
max@7	1266
max@1	1267 Part newPart;
max@1	1268 newPart.n = 1;
max@1	1269 uvec q = find(nullindices > 0);
max@1	1270
Chris@21	1271 for (int i=0; i<q.size();++i)
max@1	1272 newPart.indices.push_back(q(i));
max@7	1273
max@1	1274 newPart.letter = '-';
max@1	1275 newPart.value = 0;
max@1	1276 newPart.level = 0;
max@1	1277
max@1	1278 return newPart;
max@1	1279 }
max@1	1280
max@1	1281
max@1	1282 // Merge Nulls
max@1	1283 void mergenulls(vector<Part> &parts)
max@1	1284 {
Chris@21	1285 for (int iPart=0; iPart<parts.size(); ++iPart)
max@1	1286 {
max@1	1287
max@1	1288 vector<Part> newVectorPart;
max@1	1289
max@1	1290 if (parts[iPart].letter.compare("-")==0)
max@1	1291 {
max@1	1292 sort (parts[iPart].indices.begin(), parts[iPart].indices.end());
Chris@21	1293 int newpartind = -1;
max@1	1294
max@1	1295 vector<int> indices;
max@1	1296 indices.push_back(-2);
max@1	1297
Chris@21	1298 for (int iIndex=0; iIndex<parts[iPart].indices.size(); ++iIndex)
max@1	1299 indices.push_back(parts[iPart].indices[iIndex]);
max@1	1300
Chris@21	1301 for (int iInd=1; iInd < indices.size(); ++iInd)
max@1	1302 {
max@1	1303 if (indices[iInd] - indices[iInd-1] > 1)
max@1	1304 {
max@1	1305 newpartind++;
max@1	1306
max@1	1307 Part newPart;
max@1	1308 newPart.letter = 'n';
max@1	1309 std::stringstream out;
max@1	1310 out << newpartind+1;
max@1	1311 newPart.letter.append(out.str());
max@1	1312 newPart.value = 20+newpartind+1;
max@1	1313 newPart.n = 1;
max@1	1314 newPart.indices.push_back(indices[iInd]);
max@1	1315 newPart.level = 0;
max@1	1316
max@1	1317 newVectorPart.push_back(newPart);
max@1	1318 }
max@1	1319 else
max@1	1320 {
max@1	1321 newVectorPart[newpartind].n = newVectorPart[newpartind].n+1;
max@1	1322 }
max@1	1323 }
max@1	1324 parts.erase (parts.end());
max@1	1325
Chris@21	1326 for (int i=0; i<newVectorPart.size(); ++i)
max@1	1327 parts.push_back(newVectorPart[i]);
max@1	1328 }
max@1	1329 }
max@1	1330 }
max@1	1331
max@1	1332 /* ------ Segmentation ------ */
max@1	1333
Chris@19	1334 vector<Part> songSegment(Vamp::Plugin::FeatureList quantisedChromagram)
max@1	1335 {
max@1	1336
max@1	1337
max@1	1338 /* ------ Parameters ------ */
max@1	1339 double thresh_beat = 0.85;
max@1	1340 double thresh_seg = 0.80;
max@1	1341 int medfilt_length = 5;
max@1	1342 int minlength = 28;
max@1	1343 int maxlength = 128;
max@1	1344 double quantilePerc = 0.1;
max@1	1345 /* ------------------------ */
max@1	1346
max@1	1347
max@1	1348 // Collect Info
Chris@19	1349 int nBeat = quantisedChromagram.size(); // Number of feature vector
Chris@19	1350 int nFeatValues = quantisedChromagram[0].values.size(); // Number of values for each feature vector
max@1	1351
Chris@27	1352 if (nBeat < minlength) {
Chris@27	1353 // return a single part
Chris@27	1354 vector<Part> parts;
Chris@27	1355 Part newPart;
Chris@27	1356 newPart.n = 1;
Chris@27	1357 newPart.indices.push_back(0);
Chris@27	1358 newPart.letter = "n1";
Chris@27	1359 newPart.value = 20;
Chris@27	1360 newPart.level = 0;
Chris@27	1361 parts.push_back(newPart);
Chris@27	1362 return parts;
Chris@27	1363 }
Chris@27	1364
max@1	1365 arma::irowvec timeStamp = arma::zeros<arma::imat>(1,nBeat); // Vector of Time Stamps
max@1	1366
Chris@22	1367 // Save time stamp as a Vector
Chris@19	1368 if (quantisedChromagram[0].hasTimestamp)
max@1	1369 {
Chris@21	1370 for (int i = 0; i < nBeat; ++ i)
Chris@19	1371 timeStamp[i] = quantisedChromagram[i].timestamp.nsec;
max@1	1372 }
max@1	1373
max@1	1374
max@1	1375 // Build a ObservationTOFeatures Matrix
max@1	1376 arma::mat featVal = arma::zeros<mat>(nBeat,nFeatValues/2);
max@1	1377
Chris@21	1378 for (int i = 0; i < nBeat; ++ i)
Chris@21	1379 for (int j = 0; j < nFeatValues/2; ++ j)
max@1	1380 {
Chris@19	1381 featVal(i,j) = (quantisedChromagram[i].values[j]+quantisedChromagram[i].values[j+12]) * 0.8;
max@1	1382 }
max@1	1383
max@1	1384 // Set to arbitrary value to feature vectors with low std
max@1	1385 arma::mat a = stddev(featVal,1,1);
max@1	1386
max@1	1387 // Feature Colleration Matrix
max@1	1388 arma::mat simmat0 = 1-arma::cor(arma::trans(featVal));
max@1	1389
max@1	1390
Chris@21	1391 for (int i = 0; i < nBeat; ++ i)
max@1	1392 {
max@1	1393 if (a(i)<0.000001)
max@1	1394 {
max@1	1395 featVal(i,1) = 1000; // arbitrary
max@1	1396
Chris@21	1397 for (int j = 0; j < nFeatValues/2; ++j)
max@1	1398 {
max@1	1399 simmat0(i,j) = 1;
max@1	1400 simmat0(j,i) = 1;
max@1	1401 }
max@1	1402 }
max@1	1403 }
max@1	1404
max@1	1405 arma::mat simmat = 1-simmat0/2;
max@1	1406
max@1	1407 // -------- To delate when the proble with the add of beat will be solved -------
Chris@21	1408 for (int i = 0; i < nBeat; ++ i)
Chris@21	1409 for (int j = 0; j < nBeat; ++ j)
max@1	1410 if (!std::isfinite(simmat(i,j)))
max@1	1411 simmat(i,j)=0;
max@1	1412 // ------------------------------------------------------------------------------
max@1	1413
max@1	1414 // Median Filtering applied to the Correlation Matrix
max@1	1415 // The median filter is for each diagonal of the Matrix
max@1	1416 arma::mat median_simmat = arma::zeros<arma::mat>(nBeat,nBeat);
max@1	1417
Chris@21	1418 for (int i = 0; i < nBeat; ++ i)
max@1	1419 {
max@1	1420 arma::vec temp = medfilt1(simmat.diag(i),medfilt_length);
max@1	1421 median_simmat.diag(i) = temp;
max@1	1422 median_simmat.diag(-i) = temp;
max@1	1423 }
max@1	1424
Chris@21	1425 for (int i = 0; i < nBeat; ++ i)
Chris@21	1426 for (int j = 0; j < nBeat; ++ j)
max@1	1427 if (!std::isfinite(median_simmat(i,j)))
max@1	1428 median_simmat(i,j) = 0;
max@1	1429
max@1	1430 // -------------- NOT CONVERTED -------------------------------------
max@1	1431 // if param.seg.standardise
max@1	1432 // med_median_simmat = repmat(median(median_simmat),nBeat,1);
max@1	1433 // std_median_simmat = repmat(std(median_simmat),nBeat,1);
max@1	1434 // median_simmat = (median_simmat - med_median_simmat) ./ std_median_simmat;
max@1	1435 // end
max@1	1436 // --------------------------------------------------------
max@1	1437
max@1	1438 // Retrieve Bar Bounderies
max@1	1439 arma::uvec dup = find(median_simmat > thresh_beat);
max@1	1440 arma::mat potential_duplicates = arma::zeros<arma::mat>(nBeat,nBeat);
max@1	1441 potential_duplicates.elem(dup) = arma::ones<arma::vec>(dup.size());
max@1	1442 potential_duplicates = trimatu(potential_duplicates);
max@1	1443
Chris@21	1444 int nPartlengths = round((maxlength-minlength)/4)+1;
max@1	1445 arma::vec partlengths = zeros<arma::vec>(nPartlengths);
max@1	1446
Chris@21	1447 for (int i = 0; i < nPartlengths; ++ i)
max@1	1448 partlengths(i) = (i*4)+ minlength;
max@1	1449
max@1	1450 // initialise arrays
max@1	1451 arma::cube simArray = zeros<arma::cube>(nBeat,nBeat,nPartlengths);
max@1	1452 arma::cube decisionArray2 = zeros<arma::cube>(nBeat,nBeat,nPartlengths);
max@1	1453
max@1	1454 int conta = 0;
max@1	1455
Chris@21	1456 //for (int iLength = 0; iLength < nPartlengths; ++ iLength)
Chris@21	1457 for (int iLength = 0; iLength < 20; ++ iLength)
max@1	1458 {
Chris@21	1459 int len = partlengths(iLength);
Chris@21	1460 int nUsedBeat = nBeat - len + 1; // number of potential rep beginnings: they can't overlap at the end of the song
Chris@33	1461
Chris@33	1462 if (nUsedBeat < 1) continue;
max@1	1463
Chris@21	1464 for (int iBeat = 0; iBeat < nUsedBeat; ++ iBeat) // looping over all columns (arbitrarily chosen columns)
max@1	1465 {
max@1	1466 arma::uvec help2 = find(potential_duplicates(span(0,nUsedBeat-1),iBeat)==1);
max@1	1467
Chris@21	1468 for (int i=0; i<help2.size(); ++i)
max@1	1469 {
max@1	1470
max@1	1471 // measure how well two length len segments go together
max@1	1472 int kBeat = help2(i);
max@1	1473 arma::vec distrib = median_simmat(span(iBeat,iBeat+len-1),span(kBeat,kBeat+len-1)).diag(0);
max@1	1474 simArray(iBeat,kBeat,iLength) = quantile(distrib,quantilePerc);
max@1	1475 }
max@1	1476 }
max@1	1477
max@1	1478 arma::mat tempM = simArray(span(0,nUsedBeat-1),span(0,nUsedBeat-1),span(iLength,iLength));
max@1	1479 simArray.slice(iLength)(span(0,nUsedBeat-1),span(0,nUsedBeat-1)) = tempM + arma::trans(tempM) - (eye<mat>(nUsedBeat,nUsedBeat)%tempM);
max@1	1480
max@1	1481 // convolution
max@1	1482 arma::vec K = arma::zeros<vec>(3);
max@1	1483 K << 0.01 << 0.98 << 0.01;
max@1	1484
max@1	1485
Chris@21	1486 for (int i=0; i<simArray.n_rows; ++i)
max@1	1487 {
max@1	1488 arma::rowvec t = arma::conv((arma::rowvec)simArray.slice(iLength).row(i),K);
max@1	1489 simArray.slice(iLength)(i,span::all) = t.subvec(1,t.size()-2);
max@1	1490 }
max@1	1491
max@1	1492 // take only over-average bars that do not overlap
max@1	1493
max@1	1494 arma::mat temp = arma::zeros<mat>(simArray.n_rows, simArray.n_cols);
max@1	1495 temp(span::all, span(0,nUsedBeat-1)) = simArray.slice(iLength)(span::all,span(0,nUsedBeat-1));
max@1	1496
Chris@21	1497 for (int i=0; i<temp.n_rows; ++i)
Chris@21	1498 for (int j=0; j<nUsedBeat; ++j)
max@1	1499 if (temp(i,j) < thresh_seg)
max@1	1500 temp(i,j) = 0;
max@1	1501
max@1	1502 decisionArray2.slice(iLength) = temp;
max@1	1503
max@1	1504 arma::mat maxMat = maxfilt1(decisionArray2.slice(iLength),len-1);
max@1	1505
Chris@21	1506 for (int i=0; i<decisionArray2.n_rows; ++i)
Chris@21	1507 for (int j=0; j<decisionArray2.n_cols; ++j)
max@1	1508 if (decisionArray2.slice(iLength)(i,j) < maxMat(i,j))
max@1	1509 decisionArray2.slice(iLength)(i,j) = 0;
max@1	1510
max@1	1511 decisionArray2.slice(iLength) = decisionArray2.slice(iLength) % arma::trans(decisionArray2.slice(iLength));
max@1	1512
Chris@21	1513 for (int i=0; i<simArray.n_rows; ++i)
Chris@21	1514 for (int j=0; j<simArray.n_cols; ++j)
max@1	1515 if (simArray.slice(iLength)(i,j) < thresh_seg)
max@1	1516 potential_duplicates(i,j) = 0;
max@1	1517 }
max@1	1518
max@1	1519 // Milk the data
max@1	1520
max@1	1521 arma::mat bestval;
max@1	1522
Chris@21	1523 for (int iLength=0; iLength<nPartlengths; ++iLength)
max@1	1524 {
max@1	1525 arma::mat temp = arma::zeros<arma::mat>(decisionArray2.n_rows,decisionArray2.n_cols);
max@1	1526
Chris@21	1527 for (int rows=0; rows<decisionArray2.n_rows; ++rows)
Chris@21	1528 for (int cols=0; cols<decisionArray2.n_cols; ++cols)
max@1	1529 if (decisionArray2.slice(iLength)(rows,cols) > 0)
max@1	1530 temp(rows,cols) = 1;
max@1	1531
max@1	1532 arma::vec currLogicSum = arma::sum(temp,1);
max@1	1533
Chris@21	1534 for (int iBeat=0; iBeat<nBeat; ++iBeat)
max@1	1535 if (currLogicSum(iBeat) > 1)
max@1	1536 {
max@1	1537 arma::vec t = decisionArray2.slice(iLength)(span::all,iBeat);
max@1	1538 double currSum = sum(t);
max@1	1539
Chris@21	1540 int count = 0;
Chris@21	1541 for (int i=0; i<t.size(); ++i)
max@1	1542 if (t(i)>0)
max@1	1543 count++;
max@1	1544
max@1	1545 currSum = (currSum/count)/2;
max@1	1546
max@1	1547 arma::rowvec t1;
max@1	1548 t1 << (currLogicSum(iBeat)-1) * partlengths(iLength) << currSum << iLength << iBeat << currLogicSum(iBeat);
max@1	1549
max@1	1550 bestval = join_cols(bestval,t1);
max@1	1551 }
max@1	1552 }
max@1	1553
max@1	1554 // Definition of the resulting vector
max@1	1555 vector<Part> parts;
max@1	1556
max@1	1557 // make a table of all valid sets of parts
max@1	1558
max@1	1559 char partletters[] = {'A','B','C','D','E','F','G', 'H','I','J','K','L','M','N','O','P','Q','R','S'};
Chris@21	1560 int partvalues[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19};
max@1	1561 arma::vec valid_sets = arma::ones<arma::vec>(bestval.n_rows);
max@1	1562
max@1	1563 if (!bestval.is_empty())
max@1	1564 {
max@1	1565
max@1	1566 // In questo punto viene introdotto un errore alla 3 cifra decimale
max@1	1567
max@1	1568 arma::colvec t = arma::zeros<arma::colvec>(bestval.n_rows);
Chris@21	1569 for (int i=0; i<bestval.n_rows; ++i)
max@1	1570 {
max@1	1571 t(i) = bestval(i,1)*2;
max@1	1572 }
max@1	1573
max@1	1574 double m = t.max();
max@1	1575
max@1	1576 bestval(span::all,1) = bestval(span::all,1) / m;
max@1	1577 bestval(span::all,0) = bestval(span::all,0) + bestval(span::all,1);
max@1	1578
max@1	1579 arma::mat bestval2;
Chris@21	1580 for (int i=0; i<bestval.n_cols; ++i)
max@1	1581 if (i!=1)
max@1	1582 bestval2 = join_rows(bestval2,bestval.col(i));
max@1	1583
Chris@21	1584 for (int kSeg=0; kSeg<6; ++kSeg)
max@1	1585 {
max@1	1586 arma::mat currbestvals = arma::zeros<arma::mat>(bestval2.n_rows, bestval2.n_cols);
Chris@21	1587 for (int i=0; i<bestval2.n_rows; ++i)
Chris@21	1588 for (int j=0; j<bestval2.n_cols; ++j)
max@1	1589 if (valid_sets(i))
max@1	1590 currbestvals(i,j) = bestval2(i,j);
max@1	1591
max@1	1592 arma::vec t1 = currbestvals.col(0);
max@1	1593 double ma;
max@1	1594 uword maIdx;
max@1	1595 ma = t1.max(maIdx);
max@6	1596
max@6	1597 if ((maIdx == 0)&&(ma == 0))
max@6	1598 break;
max@1	1599
Chris@28	1600 int bestLength = lrint(partlengths(currbestvals(maIdx,1)));
max@1	1601 arma::rowvec bestIndices = decisionArray2.slice(currbestvals(maIdx,1))(currbestvals(maIdx,2),span::all);
max@1	1602
max@1	1603 arma::rowvec bestIndicesMap = arma::zeros<arma::rowvec>(bestIndices.size());
Chris@21	1604 for (int i=0; i<bestIndices.size(); ++i)
max@1	1605 if (bestIndices(i)>0)
max@1	1606 bestIndicesMap(i) = 1;
max@1	1607
max@1	1608 arma::rowvec mask = arma::zeros<arma::rowvec>(bestLength*2-1);
Chris@21	1609 for (int i=0; i<bestLength; ++i)
max@1	1610 mask(i+bestLength-1) = 1;
max@1	1611
max@1	1612 arma::rowvec t2 = arma::conv(bestIndicesMap,mask);
max@1	1613 arma::rowvec island = t2.subvec(mask.size()/2,t2.size()-1-mask.size()/2);
max@1	1614
max@1	1615 // Save results in the structure
max@1	1616 Part newPart;
max@1	1617 newPart.n = bestLength;
max@1	1618 uvec q1 = find(bestIndices > 0);
max@1	1619
Chris@21	1620 for (int i=0; i<q1.size();++i)
max@1	1621 newPart.indices.push_back(q1(i));
max@1	1622
max@1	1623 newPart.letter = partletters[kSeg];
max@1	1624 newPart.value = partvalues[kSeg];
max@1	1625 newPart.level = kSeg+1;
max@1	1626 parts.push_back(newPart);
max@1	1627
max@1	1628 uvec q2 = find(valid_sets==1);
max@1	1629
Chris@21	1630 for (int i=0; i<q2.size(); ++i)
max@1	1631 {
Chris@21	1632 int iSet = q2(i);
Chris@21	1633 int s = partlengths(bestval2(iSet,1));
max@1	1634
max@1	1635 arma::rowvec mask1 = arma::zeros<arma::rowvec>(s*2-1);
Chris@21	1636 for (int i=0; i<s; ++i)
max@1	1637 mask1(i+s-1) = 1;
max@1	1638
max@1	1639 arma::rowvec Ind = decisionArray2.slice(bestval2(iSet,1))(bestval2(iSet,2),span::all);
max@1	1640 arma::rowvec IndMap = arma::zeros<arma::rowvec>(Ind.size());
Chris@21	1641 for (int i=0; i<Ind.size(); ++i)
max@1	1642 if (Ind(i)>0)
max@1	1643 IndMap(i) = 2;
max@1	1644
max@1	1645 arma::rowvec t3 = arma::conv(IndMap,mask1);
max@6	1646 arma::rowvec currislands = t3.subvec(mask1.size()/2,t3.size()-1-mask1.size()/2);
max@1	1647 arma::rowvec islandsdMult = currislands%island;
max@6	1648
max@1	1649 arma::uvec islandsIndex = find(islandsdMult > 0);
max@1	1650
max@6	1651 if (islandsIndex.size() > 0)
max@1	1652 valid_sets(iSet) = 0;
max@1	1653 }
max@1	1654 }
max@1	1655 }
max@1	1656 else
max@1	1657 {
max@1	1658 Part newPart;
max@1	1659 newPart.n = nBeat;
Chris@33	1660 newPart.indices.push_back(0);
max@1	1661 newPart.letter = 'A';
max@1	1662 newPart.value = 1;
max@1	1663 newPart.level = 1;
max@1	1664 parts.push_back(newPart);
max@1	1665 }
max@6	1666
max@1	1667 arma::vec bar = linspace(1,nBeat,nBeat);
max@1	1668 Part np = nullpart(parts,bar);
max@7	1669
max@1	1670 parts.push_back(np);
max@1	1671
max@1	1672 // -------------- NOT CONVERTED -------------------------------------
max@1	1673 // if param.seg.editor
max@1	1674 // [pa, ta] = partarray(parts);
max@1	1675 // parts = editorssearch(pa, ta, parts);
max@1	1676 // parts = [parts, nullpart(parts,1:nBeat)];
max@1	1677 // end
max@1	1678 // ------------------------------------------------------------------
max@1	1679
max@1	1680
max@1	1681 mergenulls(parts);
max@1	1682
max@1	1683
max@1	1684 // -------------- NOT CONVERTED -------------------------------------
max@1	1685 // if param.seg.editor
max@1	1686 // [pa, ta] = partarray(parts);
max@1	1687 // parts = editorssearch(pa, ta, parts);
max@1	1688 // parts = [parts, nullpart(parts,1:nBeat)];
max@1	1689 // end
max@1	1690 // ------------------------------------------------------------------
max@1	1691
max@1	1692 return parts;
max@1	1693 }
max@1	1694
max@1	1695
max@1	1696
Chris@19	1697 void songSegmentChroma(Vamp::Plugin::FeatureList quantisedChromagram, vector<Part> &parts)
max@1	1698 {
max@1	1699 // Collect Info
Chris@19	1700 int nBeat = quantisedChromagram.size(); // Number of feature vector
Chris@19	1701 int nFeatValues = quantisedChromagram[0].values.size(); // Number of values for each feature vector
max@1	1702
max@1	1703 arma::mat synchTreble = arma::zeros<mat>(nBeat,nFeatValues/2);
max@1	1704
Chris@21	1705 for (int i = 0; i < nBeat; ++ i)
Chris@21	1706 for (int j = 0; j < nFeatValues/2; ++ j)
max@1	1707 {
Chris@19	1708 synchTreble(i,j) = quantisedChromagram[i].values[j];
max@1	1709 }
max@1	1710
max@1	1711 arma::mat synchBass = arma::zeros<mat>(nBeat,nFeatValues/2);
max@1	1712
Chris@21	1713 for (int i = 0; i < nBeat; ++ i)
Chris@21	1714 for (int j = 0; j < nFeatValues/2; ++ j)
max@1	1715 {
Chris@19	1716 synchBass(i,j) = quantisedChromagram[i].values[j+12];
max@1	1717 }
max@1	1718
max@1	1719 // Process
max@1	1720
Chris@19	1721 arma::mat segTreble = arma::zeros<arma::mat>(quantisedChromagram.size(),quantisedChromagram[0].values.size()/2);
Chris@19	1722 arma::mat segBass = arma::zeros<arma::mat>(quantisedChromagram.size(),quantisedChromagram[0].values.size()/2);
max@1	1723
Chris@21	1724 for (int iPart=0; iPart<parts.size(); ++iPart)
max@1	1725 {
max@1	1726 parts[iPart].nInd = parts[iPart].indices.size();
max@1	1727
Chris@21	1728 for (int kOccur=0; kOccur<parts[iPart].nInd; ++kOccur)
max@1	1729 {
max@1	1730 int kStartIndex = parts[iPart].indices[kOccur];
max@1	1731 int kEndIndex = kStartIndex + parts[iPart].n-1;
max@1	1732
max@1	1733 segTreble.rows(kStartIndex,kEndIndex) = segTreble.rows(kStartIndex,kEndIndex) + synchTreble.rows(kStartIndex,kEndIndex);
max@1	1734 segBass.rows(kStartIndex,kEndIndex) = segBass.rows(kStartIndex,kEndIndex) + synchBass.rows(kStartIndex,kEndIndex);
max@1	1735 }
max@1	1736 }
max@1	1737 }
max@1	1738
max@1	1739
max@1	1740 // Segment Integration
max@1	1741 vector<Part> songSegmentIntegration(vector<Part> &parts)
max@1	1742 {
max@1	1743 // Break up parts (every part will have one instance)
max@1	1744 vector<Part> newPartVector;
max@1	1745 vector<int> partindices;
max@1	1746
Chris@21	1747 for (int iPart=0; iPart<parts.size(); ++iPart)
max@1	1748 {
max@1	1749 parts[iPart].nInd = parts[iPart].indices.size();
Chris@21	1750 for (int iInstance=0; iInstance<parts[iPart].nInd; ++iInstance)
max@1	1751 {
max@1	1752 Part newPart;
max@1	1753 newPart.n = parts[iPart].n;
max@1	1754 newPart.letter = parts[iPart].letter;
max@1	1755 newPart.value = parts[iPart].value;
max@1	1756 newPart.level = parts[iPart].level;
max@1	1757 newPart.indices.push_back(parts[iPart].indices[iInstance]);
max@1	1758 newPart.nInd = 1;
max@1	1759 partindices.push_back(parts[iPart].indices[iInstance]);
max@1	1760
max@1	1761 newPartVector.push_back(newPart);
max@1	1762 }
max@1	1763 }
max@1	1764
max@1	1765
max@1	1766 // Sort the parts in order of occurrence
max@1	1767 sort (partindices.begin(), partindices.end());
max@1	1768
Chris@21	1769 for (int i=0; i<partindices.size(); ++i)
max@1	1770 {
max@1	1771 bool found = false;
max@1	1772 int in=0;
max@1	1773 while (!found)
max@1	1774 {
max@1	1775 if (newPartVector[in].indices[0] == partindices[i])
max@1	1776 {
max@1	1777 newPartVector.push_back(newPartVector[in]);
max@1	1778 newPartVector.erase(newPartVector.begin()+in);
max@1	1779 found = true;
max@1	1780 }
max@1	1781 else
max@1	1782 in++;
max@1	1783 }
max@1	1784 }
max@1	1785
max@1	1786 // Clear the vector
Chris@21	1787 for (int iNewpart=1; iNewpart < newPartVector.size(); ++iNewpart)
max@1	1788 {
max@1	1789 if (newPartVector[iNewpart].n < 12)
max@1	1790 {
max@1	1791 newPartVector[iNewpart-1].n = newPartVector[iNewpart-1].n + newPartVector[iNewpart].n;
max@1	1792 newPartVector.erase(newPartVector.begin()+iNewpart);
max@1	1793 }
max@1	1794 }
max@1	1795
max@1	1796 return newPartVector;
max@1	1797 }
max@1	1798
max@1	1799 // Segmenter
Chris@19	1800 Vamp::Plugin::FeatureList SongPartitioner::runSegmenter(Vamp::Plugin::FeatureList quantisedChromagram)
max@1	1801 {
max@1	1802 /* --- Display Information --- */
Chris@19	1803 int numBeat = quantisedChromagram.size();
Chris@19	1804 int numFeats = quantisedChromagram[0].values.size();
max@1	1805
max@1	1806 vector<Part> parts;
max@1	1807 vector<Part> finalParts;
max@1	1808
Chris@19	1809 parts = songSegment(quantisedChromagram);
Chris@19	1810 songSegmentChroma(quantisedChromagram,parts);
max@7	1811
max@1	1812 finalParts = songSegmentIntegration(parts);
max@1	1813
max@1	1814
max@1	1815 // TEMP ----
Chris@21	1816 /*for (int i=0;i<finalParts.size(); ++i)
max@1	1817 {
max@6	1818 std::cout << "Parts n° " << i << std::endl;
max@6	1819 std::cout << "n°: " << finalParts[i].n << std::endl;
max@6	1820 std::cout << "letter: " << finalParts[i].letter << std::endl;
max@1	1821
max@6	1822 std::cout << "indices: ";
Chris@21	1823 for (int j=0;j<finalParts[i].indices.size(); ++j)
max@6	1824 std::cout << finalParts[i].indices[j] << " ";
max@6	1825
max@6	1826 std::cout << std::endl;
max@6	1827 std::cout << "level: " << finalParts[i].level << std::endl;
max@1	1828 }*/
max@1	1829
max@1	1830 // ---------
max@1	1831
max@1	1832
max@1	1833 // Output
max@1	1834
max@1	1835 Vamp::Plugin::FeatureList results;
max@1	1836
max@1	1837
max@1	1838 Feature seg;
max@1	1839
max@1	1840 arma::vec indices;
Chris@21	1841 int idx=0;
max@1	1842 vector<int> values;
max@1	1843 vector<string> letters;
max@1	1844
Chris@21	1845 for (int iPart=0; iPart<finalParts.size()-1; ++iPart)
max@1	1846 {
Chris@21	1847 int iInstance=0;
max@1	1848 seg.hasTimestamp = true;
max@1	1849
max@1	1850 int ind = finalParts[iPart].indices[iInstance];
max@1	1851 int ind1 = finalParts[iPart+1].indices[iInstance];
max@1	1852
Chris@19	1853 seg.timestamp = quantisedChromagram[ind].timestamp;
max@1	1854 seg.hasDuration = true;
Chris@19	1855 seg.duration = quantisedChromagram[ind1].timestamp-quantisedChromagram[ind].timestamp;
max@1	1856 seg.values.clear();
max@1	1857 seg.values.push_back(finalParts[iPart].value);
max@1	1858 seg.label = finalParts[iPart].letter;
max@1	1859
max@1	1860 results.push_back(seg);
max@1	1861 }
max@1	1862
max@1	1863 int ind = finalParts[finalParts.size()-1].indices[0];
Chris@19	1864 seg.timestamp = quantisedChromagram[ind].timestamp;
max@1	1865 seg.hasDuration = true;
Chris@19	1866 seg.duration = quantisedChromagram[quantisedChromagram.size()-1].timestamp-quantisedChromagram[ind].timestamp;
max@1	1867 seg.values.clear();
max@1	1868 seg.values.push_back(finalParts[finalParts.size()-1].value);
max@1	1869 seg.label = finalParts[finalParts.size()-1].letter;
max@1	1870
max@1	1871 results.push_back(seg);
max@1	1872
max@1	1873 return results;
max@1	1874 }
max@1	1875
max@1	1876
max@1	1877
max@1	1878
max@1	1879
max@1	1880
max@1	1881
max@1	1882
max@1	1883
max@1	1884
max@1	1885
max@1	1886
max@1	1887
max@1	1888
max@1	1889
max@1	1890
max@1	1891

Mercurial > hg > segmenter-vamp-plugin

annotate songparts/SongParts.cpp @ 35:d6bb46a1e70c slimline