segmenter-vamp-plugin: segmentino/Segmentino.cpp annotate

annotate segmentino/Segmentino.cpp @ 84:55a047986812 tip

Update library URI so as not to be document-local

author	Chris Cannam
date	Wed, 22 Apr 2020 14:21:57 +0100
parents	29892906421f
children

rev	line source
max@1	1 /* -- c-basic-offset: 4 indent-tabs-mode: nil -- vi:set ts=8 sts=4 sw=4: */
max@1	2
max@1	3 /*
Chris@48	4 Segmentino
max@1	5
Chris@48	6 Code by Massimiliano Zanoni and Matthias Mauch
Chris@48	7 Centre for Digital Music, Queen Mary, University of London
Chris@48	8
Chris@48	9 Copyright 2009-2013 Queen Mary, University of London.
max@1	10
Chris@65	11 This program is free software: you can redistribute it and/or
Chris@65	12 modify it under the terms of the GNU Affero General Public License
Chris@65	13 as published by the Free Software Foundation, either version 3 of
Chris@65	14 the License, or (at your option) any later version. See the file
max@1	15 COPYING included with this distribution for more information.
max@1	16 */
max@1	17
Chris@48	18 #include "Segmentino.h"
max@1	19
Chris@49	20 #include <qm-dsp/base/Window.h>
Chris@49	21 #include <qm-dsp/dsp/onsets/DetectionFunction.h>
Chris@49	22 #include <qm-dsp/dsp/onsets/PeakPicking.h>
Chris@49	23 #include <qm-dsp/dsp/transforms/FFT.h>
Chris@49	24 #include <qm-dsp/dsp/tempotracking/TempoTrackV2.h>
Chris@49	25 #include <qm-dsp/dsp/tempotracking/DownBeat.h>
Chris@49	26 #include <qm-dsp/maths/MathUtilities.h>
Chris@49	27
Chris@49	28 #include <nnls-chroma/chromamethods.h>
Chris@49	29
Chris@49	30 #include <armadillo>
Chris@49	31
max@1	32 #include <fstream>
max@1	33 #include <sstream>
max@1	34 #include <cmath>
max@1	35 #include <vector>
max@1	36
max@1	37 #include <vamp-sdk/Plugin.h>
max@1	38
Chris@56	39 using arma::colvec;
Chris@56	40 using arma::conv;
Chris@56	41 using arma::cor;
Chris@56	42 using arma::cube;
Chris@56	43 using arma::eye;
Chris@56	44 using arma::imat;
Chris@56	45 using arma::irowvec;
Chris@56	46 using arma::linspace;
Chris@56	47 using arma::mat;
Chris@56	48 using arma::max;
Chris@56	49 using arma::ones;
Chris@56	50 using arma::rowvec;
Chris@56	51 using arma::sort;
Chris@56	52 using arma::span;
Chris@56	53 using arma::sum;
Chris@56	54 using arma::trans;
Chris@56	55 using arma::uvec;
Chris@56	56 using arma::uword;
Chris@56	57 using arma::vec;
Chris@56	58 using arma::zeros;
Chris@56	59
max@1	60 using std::string;
max@1	61 using std::vector;
max@1	62 using std::cerr;
max@1	63 using std::cout;
max@1	64 using std::endl;
max@1	65
max@1	66 // Result Struct
max@1	67 typedef struct Part {
max@1	68 int n;
Chris@21	69 vector<int> indices;
max@1	70 string letter;
Chris@21	71 int value;
max@1	72 int level;
max@1	73 int nInd;
max@1	74 }Part;
max@1	75
max@1	76
max@8	77
max@1	78 /* ------------------------------------ */
max@1	79 /* ----- BEAT DETECTOR CLASS ---------- */
max@1	80 /* ------------------------------------ */
max@1	81
max@1	82 class BeatTrackerData
max@1	83 {
max@1	84 /* --- ATTRIBUTES --- */
max@1	85 public:
max@1	86 DFConfig dfConfig;
max@1	87 DetectionFunction *df;
max@1	88 DownBeat *downBeat;
max@1	89 vector<double> dfOutput;
max@1	90 Vamp::RealTime origin;
max@1	91
max@1	92
max@1	93 /* --- METHODS --- */
max@1	94
max@1	95 /* --- Constructor --- */
max@1	96 public:
max@1	97 BeatTrackerData(float rate, const DFConfig &config) : dfConfig(config) {
Chris@22	98
max@1	99 df = new DetectionFunction(config);
max@1	100 // decimation factor aims at resampling to c. 3KHz; must be power of 2
max@1	101 int factor = MathUtilities::nextPowerOfTwo(rate / 3000);
max@1	102 // std::cerr << "BeatTrackerData: factor = " << factor << std::endl;
max@1	103 downBeat = new DownBeat(rate, factor, config.stepSize);
max@1	104 }
max@1	105
max@1	106 /* --- Desctructor --- */
max@1	107 ~BeatTrackerData() {
Chris@22	108 delete df;
max@1	109 delete downBeat;
max@1	110 }
max@1	111
max@1	112 void reset() {
max@1	113 delete df;
max@1	114 df = new DetectionFunction(dfConfig);
max@1	115 dfOutput.clear();
max@1	116 downBeat->resetAudioBuffer();
max@1	117 origin = Vamp::RealTime::zeroTime;
max@1	118 }
max@1	119 };
max@1	120
max@1	121
max@1	122 /* --------------------------------------- */
max@1	123 /* ----- CHROMA EXTRACTOR CLASS ---------- */
max@1	124 /* --------------------------------------- */
max@1	125
max@1	126 class ChromaData
max@1	127 {
max@1	128
max@1	129 /* --- ATTRIBUTES --- */
max@1	130
max@1	131 public:
max@1	132 int frameCount;
max@1	133 int nBPS;
max@1	134 Vamp::Plugin::FeatureList logSpectrum;
Chris@37	135 int blockSize;
max@1	136 int lengthOfNoteIndex;
max@1	137 vector<float> meanTunings;
max@1	138 vector<float> localTunings;
max@1	139 float whitening;
max@1	140 float preset;
max@1	141 float useNNLS;
max@1	142 vector<float> localTuning;
max@1	143 vector<float> kernelValue;
max@1	144 vector<int> kernelFftIndex;
max@1	145 vector<int> kernelNoteIndex;
max@1	146 float *dict;
max@1	147 bool tuneLocal;
max@1	148 float doNormalizeChroma;
max@1	149 float rollon;
max@1	150 float s;
max@1	151 vector<float> hw;
max@1	152 vector<float> sinvalues;
max@1	153 vector<float> cosvalues;
max@1	154 Window<float> window;
max@1	155 FFTReal fft;
Chris@37	156 int inputSampleRate;
max@1	157
max@1	158 /* --- METHODS --- */
max@1	159
max@1	160 /* --- Constructor --- */
max@1	161
max@1	162 public:
max@1	163 ChromaData(float inputSampleRate, size_t block_size) :
max@1	164 frameCount(0),
max@1	165 nBPS(3),
max@1	166 logSpectrum(0),
max@1	167 blockSize(0),
max@1	168 lengthOfNoteIndex(0),
max@1	169 meanTunings(0),
max@1	170 localTunings(0),
max@1	171 whitening(1.0),
max@1	172 preset(0.0),
max@1	173 useNNLS(1.0),
max@1	174 localTuning(0.0),
max@1	175 kernelValue(0),
max@1	176 kernelFftIndex(0),
max@1	177 kernelNoteIndex(0),
max@1	178 dict(0),
max@1	179 tuneLocal(0.0),
max@1	180 doNormalizeChroma(0),
max@1	181 rollon(0.0),
Chris@35	182 s(0.7),
Chris@35	183 sinvalues(0),
Chris@35	184 cosvalues(0),
Chris@35	185 window(HanningWindow, block_size),
Chris@35	186 fft(block_size),
Chris@35	187 inputSampleRate(inputSampleRate)
max@1	188 {
max@1	189 // make the note dictionary matrix
max@1	190 dict = new float[nNote * 84];
max@1	191 for (int i = 0; i < nNote * 84; ++i) dict[i] = 0.0;
max@1	192 blockSize = block_size;
max@1	193 }
max@1	194
max@1	195
max@1	196 /* --- Desctructor --- */
max@1	197
max@1	198 ~ChromaData() {
max@1	199 delete [] dict;
max@1	200 }
max@1	201
max@1	202 /* --- Public Methods --- */
max@1	203
max@1	204 void reset() {
max@1	205 frameCount = 0;
max@1	206 logSpectrum.clear();
max@1	207 for (int iBPS = 0; iBPS < 3; ++iBPS) {
max@1	208 meanTunings[iBPS] = 0;
max@1	209 localTunings[iBPS] = 0;
max@1	210 }
max@1	211 localTuning.clear();
max@1	212 }
max@1	213
max@1	214 void baseProcess(float *inputBuffers, Vamp::RealTime timestamp)
max@1	215 {
Chris@22	216
max@1	217 frameCount++;
max@1	218 float *magnitude = new float[blockSize/2];
max@1	219 double *fftReal = new double[blockSize];
max@1	220 double *fftImag = new double[blockSize];
max@1	221
max@1	222 // FFTReal wants doubles, so we need to make a local copy of inputBuffers
max@1	223 double *inputBuffersDouble = new double[blockSize];
Chris@37	224 for (int i = 0; i < blockSize; i++) inputBuffersDouble[i] = inputBuffers[i];
max@1	225
Chris@65	226 fft.forward(inputBuffersDouble, fftReal, fftImag);
max@1	227
max@1	228 float energysum = 0;
max@1	229 // make magnitude
max@1	230 float maxmag = -10000;
max@1	231 for (int iBin = 0; iBin < static_cast<int>(blockSize/2); iBin++) {
max@1	232 magnitude[iBin] = sqrt(fftReal[iBin] * fftReal[iBin] +
max@1	233 fftImag[iBin] * fftImag[iBin]);
max@1	234 if (magnitude[iBin]>blockSize*1.0) magnitude[iBin] = blockSize;
max@1	235 // a valid audio signal (between -1 and 1) should not be limited here.
max@1	236 if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin];
max@1	237 if (rollon > 0) {
max@1	238 energysum += pow(magnitude[iBin],2);
max@1	239 }
max@1	240 }
max@1	241
max@1	242 float cumenergy = 0;
max@1	243 if (rollon > 0) {
max@1	244 for (int iBin = 2; iBin < static_cast<int>(blockSize/2); iBin++) {
max@1	245 cumenergy += pow(magnitude[iBin],2);
max@1	246 if (cumenergy < energysum * rollon / 100) magnitude[iBin-2] = 0;
max@1	247 else break;
max@1	248 }
max@1	249 }
max@1	250
max@1	251 if (maxmag < 2) {
max@1	252 // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl;
max@1	253 for (int iBin = 0; iBin < static_cast<int>(blockSize/2); iBin++) {
max@1	254 magnitude[iBin] = 0;
max@1	255 }
max@1	256 }
max@1	257
max@1	258 // cerr << magnitude[200] << endl;
max@1	259
max@1	260 // note magnitude mapping using pre-calculated matrix
max@1	261 float *nm = new float[nNote]; // note magnitude
max@1	262 for (int iNote = 0; iNote < nNote; iNote++) {
max@1	263 nm[iNote] = 0; // initialise as 0
max@1	264 }
max@1	265 int binCount = 0;
max@1	266 for (vector<float>::iterator it = kernelValue.begin(); it != kernelValue.end(); ++it) {
max@1	267 nm[kernelNoteIndex[binCount]] += magnitude[kernelFftIndex[binCount]] * kernelValue[binCount];
max@1	268 binCount++;
max@1	269 }
max@1	270
max@1	271 float one_over_N = 1.0/frameCount;
max@1	272 // update means of complex tuning variables
max@1	273 for (int iBPS = 0; iBPS < nBPS; ++iBPS) meanTunings[iBPS] = float(frameCount-1)one_over_N;
max@1	274
max@1	275 for (int iTone = 0; iTone < round(nNote0.62/nBPS)nBPS+1; iTone = iTone + nBPS) {
max@1	276 for (int iBPS = 0; iBPS < nBPS; ++iBPS) meanTunings[iBPS] += nm[iTone + iBPS]*one_over_N;
max@1	277 float ratioOld = 0.997;
max@1	278 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
max@1	279 localTunings[iBPS] *= ratioOld;
max@1	280 localTunings[iBPS] += nm[iTone + iBPS] * (1 - ratioOld);
max@1	281 }
max@1	282 }
max@1	283
max@1	284 float localTuningImag = 0;
max@1	285 float localTuningReal = 0;
max@1	286 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
max@1	287 localTuningReal += localTunings[iBPS] * cosvalues[iBPS];
max@1	288 localTuningImag += localTunings[iBPS] * sinvalues[iBPS];
max@1	289 }
max@1	290
max@1	291 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
max@1	292 localTuning.push_back(normalisedtuning);
max@1	293
max@1	294 Vamp::Plugin::Feature f1; // logfreqspec
max@1	295 f1.hasTimestamp = true;
max@1	296 f1.timestamp = timestamp;
max@1	297 for (int iNote = 0; iNote < nNote; iNote++) {
max@1	298 f1.values.push_back(nm[iNote]);
max@1	299 }
max@1	300
max@1	301 // deletes
max@1	302 delete[] inputBuffersDouble;
max@1	303 delete[] magnitude;
max@1	304 delete[] fftReal;
max@1	305 delete[] fftImag;
max@1	306 delete[] nm;
max@1	307
max@1	308 logSpectrum.push_back(f1); // remember note magnitude
max@1	309 }
max@1	310
max@1	311 bool initialise()
max@1	312 {
max@1	313 dictionaryMatrix(dict, s);
Chris@22	314
Chris@37	315 // make things for tuning estimation
Chris@37	316 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
max@1	317 sinvalues.push_back(sin(2M_PI(iBPS*1.0/nBPS)));
max@1	318 cosvalues.push_back(cos(2M_PI(iBPS*1.0/nBPS)));
max@1	319 }
max@1	320
Chris@22	321
Chris@37	322 // make hamming window of length 1/2 octave
Chris@37	323 int hamwinlength = nBPS * 6 + 1;
max@1	324 float hamwinsum = 0;
max@1	325 for (int i = 0; i < hamwinlength; ++i) {
max@1	326 hw.push_back(0.54 - 0.46 * cos((2M_PIi)/(hamwinlength-1)));
max@1	327 hamwinsum += 0.54 - 0.46 * cos((2M_PIi)/(hamwinlength-1));
max@1	328 }
max@1	329 for (int i = 0; i < hamwinlength; ++i) hw[i] = hw[i] / hamwinsum;
max@1	330
max@1	331
max@1	332 // initialise the tuning
max@1	333 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
max@1	334 meanTunings.push_back(0);
max@1	335 localTunings.push_back(0);
max@1	336 }
Chris@22	337
max@1	338 frameCount = 0;
max@1	339 int tempn = nNote * blockSize/2;
max@1	340 // cerr << "length of tempkernel : " << tempn << endl;
max@1	341 float *tempkernel;
max@1	342
max@1	343 tempkernel = new float[tempn];
max@1	344
max@1	345 logFreqMatrix(inputSampleRate, blockSize, tempkernel);
max@1	346 kernelValue.clear();
max@1	347 kernelFftIndex.clear();
max@1	348 kernelNoteIndex.clear();
max@1	349 int countNonzero = 0;
max@1	350 for (int iNote = 0; iNote < nNote; ++iNote) {
max@1	351 // I don't know if this is wise: manually making a sparse matrix
max@1	352 for (int iFFT = 0; iFFT < static_cast<int>(blockSize/2); ++iFFT) {
max@1	353 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
max@1	354 kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
max@1	355 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
max@1	356 countNonzero++;
max@1	357 }
max@1	358 kernelFftIndex.push_back(iFFT);
Chris@23	359 kernelNoteIndex.push_back(iNote);
max@1	360 }
max@1	361 }
max@1	362 }
max@1	363 delete [] tempkernel;
Chris@37	364
Chris@37	365 return true;
max@1	366 }
max@1	367 };
max@1	368
max@1	369
max@1	370 /* --------------------------------- */
max@1	371 /* ----- SONG PARTITIONER ---------- */
max@1	372 /* --------------------------------- */
max@1	373
max@1	374
max@1	375 /* --- ATTRIBUTES --- */
max@1	376
Chris@48	377 float Segmentino::m_stepSecs = 0.01161; // 512 samples at 44100
Chris@48	378 int Segmentino::m_chromaFramesizeFactor = 16; // 16 times as long as beat tracker's
Chris@48	379 int Segmentino::m_chromaStepsizeFactor = 4; // 4 times as long as beat tracker's
max@1	380
max@1	381
max@1	382 /* --- METHODS --- */
max@1	383
max@1	384 /* --- Constructor --- */
Chris@48	385 Segmentino::Segmentino(float inputSampleRate) :
max@1	386 Vamp::Plugin(inputSampleRate),
max@1	387 m_d(0),
Chris@35	388 m_chromadata(0),
max@1	389 m_bpb(4),
max@1	390 m_pluginFrameCount(0)
max@1	391 {
max@1	392 }
max@1	393
max@1	394
max@1	395 /* --- Desctructor --- */
Chris@48	396 Segmentino::~Segmentino()
max@1	397 {
max@1	398 delete m_d;
Chris@35	399 delete m_chromadata;
max@1	400 }
max@1	401
max@1	402
max@1	403 /* --- Methods --- */
Chris@48	404 string Segmentino::getIdentifier() const
max@1	405 {
Chris@54	406 return "segmentino";
max@1	407 }
max@1	408
Chris@48	409 string Segmentino::getName() const
max@1	410 {
Chris@54	411 return "Segmentino";
max@1	412 }
max@1	413
Chris@48	414 string Segmentino::getDescription() const
max@1	415 {
max@1	416 return "Estimate contiguous segments pertaining to song parts such as verse and chorus.";
max@1	417 }
max@1	418
Chris@48	419 string Segmentino::getMaker() const
max@1	420 {
max@1	421 return "Queen Mary, University of London";
max@1	422 }
max@1	423
Chris@48	424 int Segmentino::getPluginVersion() const
max@1	425 {
Chris@81	426 return 3;
max@1	427 }
max@1	428
Chris@48	429 string Segmentino::getCopyright() const
max@1	430 {
Chris@81	431 return "Plugin by Matthew Davies, Christian Landone, Chris Cannam, Matthias Mauch and Massimiliano Zanoni Copyright (c) 2006-2019 QMUL - Affero GPL";
max@1	432 }
max@1	433
Chris@48	434 Segmentino::ParameterList Segmentino::getParameterDescriptors() const
max@1	435 {
max@1	436 ParameterList list;
max@1	437
max@1	438 ParameterDescriptor desc;
max@1	439
matthiasm@46	440 // desc.identifier = "bpb";
matthiasm@46	441 // desc.name = "Beats per Bar";
matthiasm@46	442 // desc.description = "The number of beats in each bar";
matthiasm@46	443 // desc.minValue = 2;
matthiasm@46	444 // desc.maxValue = 16;
matthiasm@46	445 // desc.defaultValue = 4;
matthiasm@46	446 // desc.isQuantized = true;
matthiasm@46	447 // desc.quantizeStep = 1;
matthiasm@46	448 // list.push_back(desc);
max@1	449
max@1	450 return list;
max@1	451 }
max@1	452
Chris@48	453 float Segmentino::getParameter(std::string name) const
max@1	454 {
max@1	455 if (name == "bpb") return m_bpb;
max@1	456 return 0.0;
max@1	457 }
max@1	458
Chris@48	459 void Segmentino::setParameter(std::string name, float value)
max@1	460 {
max@1	461 if (name == "bpb") m_bpb = lrintf(value);
max@1	462 }
max@1	463
max@1	464
max@1	465 // Return the StepSize for Chroma Extractor
Chris@48	466 size_t Segmentino::getPreferredStepSize() const
max@1	467 {
max@1	468 size_t step = size_t(m_inputSampleRate * m_stepSecs + 0.0001);
max@1	469 if (step < 1) step = 1;
max@1	470
max@1	471 return step;
max@1	472 }
max@1	473
max@1	474 // Return the BlockSize for Chroma Extractor
Chris@48	475 size_t Segmentino::getPreferredBlockSize() const
max@1	476 {
Chris@50	477 int theoretical = getPreferredStepSize() * 2;
max@1	478 theoretical *= m_chromaFramesizeFactor;
Chris@50	479 return MathUtilities::nextPowerOfTwo(theoretical);
max@1	480 }
max@1	481
max@1	482
max@1	483 // Initialize the plugin and define Beat Tracker and Chroma Extractor Objects
Chris@48	484 bool Segmentino::initialise(size_t channels, size_t stepSize, size_t blockSize)
max@1	485 {
max@1	486 if (m_d) {
Chris@22	487 delete m_d;
Chris@22	488 m_d = 0;
max@1	489 }
Chris@35	490 if (m_chromadata) {
Chris@35	491 delete m_chromadata;
Chris@35	492 m_chromadata = 0;
Chris@35	493 }
max@1	494
max@1	495 if (channels < getMinChannelCount() \|\|
Chris@22	496 channels > getMaxChannelCount()) {
Chris@48	497 std::cerr << "Segmentino::initialise: Unsupported channel count: "
max@1	498 << channels << std::endl;
max@1	499 return false;
max@1	500 }
max@1	501
max@1	502 if (stepSize != getPreferredStepSize()) {
Chris@48	503 std::cerr << "ERROR: Segmentino::initialise: Unsupported step size for this sample rate: "
max@1	504 << stepSize << " (wanted " << (getPreferredStepSize()) << ")" << std::endl;
max@1	505 return false;
max@1	506 }
max@1	507
max@1	508 if (blockSize != getPreferredBlockSize()) {
Chris@48	509 std::cerr << "WARNING: Segmentino::initialise: Sub-optimal block size for this sample rate: "
max@1	510 << blockSize << " (wanted " << getPreferredBlockSize() << ")" << std::endl;
max@1	511 }
max@1	512
max@1	513 // Beat tracker and Chroma extractor has two different configuration parameters
max@1	514
max@1	515 // Configuration Parameters for Beat Tracker
max@1	516 DFConfig dfConfig;
max@1	517 dfConfig.DFType = DF_COMPLEXSD;
max@1	518 dfConfig.stepSize = stepSize;
max@1	519 dfConfig.frameLength = blockSize / m_chromaFramesizeFactor;
max@1	520 dfConfig.dbRise = 3;
max@1	521 dfConfig.adaptiveWhitening = false;
max@1	522 dfConfig.whiteningRelaxCoeff = -1;
max@1	523 dfConfig.whiteningFloor = -1;
max@1	524
max@1	525 // Initialise Beat Tracker
max@1	526 m_d = new BeatTrackerData(m_inputSampleRate, dfConfig);
max@1	527 m_d->downBeat->setBeatsPerBar(m_bpb);
max@1	528
max@1	529 // Initialise Chroma Extractor
max@1	530 m_chromadata = new ChromaData(m_inputSampleRate, blockSize);
max@1	531 m_chromadata->initialise();
max@1	532
matthiasm@59	533 // definition of outputs numbers used internally
matthiasm@59	534 int outputCounter = 1;
matthiasm@59	535 m_beatOutputNumber = outputCounter++;
matthiasm@59	536 m_barsOutputNumber = outputCounter++;
matthiasm@59	537 m_beatcountsOutputNumber = outputCounter++;
matthiasm@59	538 m_beatsdOutputNumber = outputCounter++;
matthiasm@59	539 m_logscalespecOutputNumber = outputCounter++;
matthiasm@59	540 m_bothchromaOutputNumber = outputCounter++;
matthiasm@59	541 m_qchromafwOutputNumber = outputCounter++;
matthiasm@59	542 m_qchromaOutputNumber = outputCounter++;
matthiasm@59	543
max@1	544 return true;
max@1	545 }
max@1	546
Chris@48	547 void Segmentino::reset()
max@1	548 {
max@1	549 if (m_d) m_d->reset();
Chris@38	550 if (m_chromadata) m_chromadata->reset();
max@1	551 m_pluginFrameCount = 0;
max@1	552 }
max@1	553
Chris@48	554 Segmentino::OutputList Segmentino::getOutputDescriptors() const
max@1	555 {
matthiasm@59	556
max@1	557 OutputList list;
max@1	558
max@1	559 OutputDescriptor segm;
Chris@15	560 segm.identifier = "segmentation";
max@1	561 segm.name = "Segmentation";
max@1	562 segm.description = "Segmentation";
max@1	563 segm.unit = "segment-type";
max@1	564 segm.hasFixedBinCount = true;
max@1	565 //segm.binCount = 24;
max@1	566 segm.binCount = 1;
max@1	567 segm.hasKnownExtents = true;
max@1	568 segm.minValue = 1;
max@1	569 segm.maxValue = 5;
max@1	570 segm.isQuantized = true;
max@1	571 segm.quantizeStep = 1;
max@1	572 segm.sampleType = OutputDescriptor::VariableSampleRate;
Chris@17	573 segm.sampleRate = 1.0 / m_stepSecs;
max@1	574 segm.hasDuration = true;
matthiasm@59	575 m_segmOutputNumber = 0;
matthiasm@59	576
max@1	577 list.push_back(segm);
max@1	578
max@1	579 return list;
max@1	580 }
max@1	581
max@1	582 // Executed for each frame - called from the host
max@1	583
max@1	584 // We use time domain input, because DownBeat requires it -- so we
max@1	585 // use the time-domain version of DetectionFunction::process which
max@1	586 // does its own FFT. It requires doubles as input, so we need to
max@1	587 // make a temporary copy
max@1	588
max@1	589 // We only support a single input channel
Chris@65	590 Segmentino::FeatureSet Segmentino::process(const float const inputBuffers,
Chris@65	591 Vamp::RealTime timestamp)
max@1	592 {
max@1	593 if (!m_d) {
Chris@48	594 cerr << "ERROR: Segmentino::process: "
Chris@48	595 << "Segmentino has not been initialised"
Chris@22	596 << endl;
Chris@22	597 return FeatureSet();
max@1	598 }
max@1	599
max@1	600 const int fl = m_d->dfConfig.frameLength;
Chris@67	601
max@1	602 int sampleOffset = ((m_chromaFramesizeFactor-1) * fl) / 2;
max@1	603
Chris@67	604 double *dfinput = new double[fl];
Chris@67	605
max@1	606 // Since chroma needs a much longer frame size, we only ever use the very
max@1	607 // beginning of the frame for beat tracking.
max@1	608 for (int i = 0; i < fl; ++i) dfinput[i] = inputBuffers[0][i];
Chris@65	609 double output = m_d->df->processTimeDomain(dfinput);
max@1	610
Chris@67	611 delete[] dfinput;
Chris@67	612
max@1	613 if (m_d->dfOutput.empty()) m_d->origin = timestamp;
max@1	614
max@1	615 // std::cerr << "df[" << m_d->dfOutput.size() << "] is " << output << std::endl;
max@1	616 m_d->dfOutput.push_back(output);
max@1	617
max@1	618 // Downsample and store the incoming audio block.
max@1	619 // We have an overlap on the incoming audio stream (step size is
max@1	620 // half block size) -- this function is configured to take only a
max@1	621 // step size's worth, so effectively ignoring the overlap. Note
max@1	622 // however that this means we omit the last blocksize - stepsize
max@1	623 // samples completely for the purposes of barline detection
max@1	624 // (hopefully not a problem)
max@1	625 m_d->downBeat->pushAudioBlock(inputBuffers[0]);
max@1	626
max@1	627 // The following is not done every time, but only every m_chromaFramesizeFactor times,
max@1	628 // because the chroma does not need dense time frames.
max@1	629
max@1	630 if (m_pluginFrameCount % m_chromaStepsizeFactor == 0)
max@1	631 {
max@1	632
max@1	633 // Window the full time domain, data, FFT it and process chroma stuff.
max@1	634
Chris@67	635 float *windowedBuffers = new float[m_chromadata->blockSize];
Chris@67	636
max@1	637 m_chromadata->window.cut(&inputBuffers[0][0], &windowedBuffers[0]);
max@1	638
max@1	639 // adjust timestamp (we want the middle of the frame)
Chris@67	640 timestamp = timestamp +
Chris@67	641 Vamp::RealTime::frame2RealTime(sampleOffset, lrintf(m_inputSampleRate));
max@1	642
max@1	643 m_chromadata->baseProcess(&windowedBuffers[0], timestamp);
Chris@67	644
Chris@67	645 delete[] windowedBuffers;
max@1	646 }
Chris@67	647
max@1	648 m_pluginFrameCount++;
max@1	649
max@1	650 FeatureSet fs;
max@1	651 return fs;
max@1	652 }
max@1	653
Chris@48	654 Segmentino::FeatureSet Segmentino::getRemainingFeatures()
max@1	655 {
max@1	656 if (!m_d) {
Chris@48	657 cerr << "ERROR: Segmentino::getRemainingFeatures: "
Chris@48	658 << "Segmentino has not been initialised"
Chris@22	659 << endl;
Chris@22	660 return FeatureSet();
max@1	661 }
max@1	662
matthiasm@59	663 FeatureSet masterFeatureset;
matthiasm@59	664 FeatureSet internalFeatureset = beatTrack();
matthiasm@59	665
matthiasm@59	666 int beatcount = internalFeatureset[m_beatOutputNumber].size();
Chris@49	667 if (beatcount == 0) return Segmentino::FeatureSet();
matthiasm@59	668 Vamp::RealTime last_beattime = internalFeatureset[m_beatOutputNumber][beatcount-1].timestamp;
matthiasm@59	669
matthiasm@60	670 // // THIS FOLLOWING BIT IS WEIRD! REPLACES BEAT-TRACKED BEATS WITH
matthiasm@60	671 // // UNIFORM 0.5 SEC BEATS
matthiasm@59	672 // internalFeatureset[m_beatOutputNumber].clear();
matthiasm@59	673 // Vamp::RealTime beattime = Vamp::RealTime::fromSeconds(1.0);
matthiasm@59	674 // while (beattime < last_beattime)
matthiasm@59	675 // {
matthiasm@59	676 // Feature beatfeature;
matthiasm@59	677 // beatfeature.hasTimestamp = true;
matthiasm@59	678 // beatfeature.timestamp = beattime;
matthiasm@59	679 // masterFeatureset[m_beatOutputNumber].push_back(beatfeature);
matthiasm@59	680 // beattime = beattime + Vamp::RealTime::fromSeconds(0.5);
matthiasm@59	681 // }
matthiasm@46	682
Chris@16	683 FeatureList chromaList = chromaFeatures();
max@1	684
Chris@37	685 for (int i = 0; i < (int)chromaList.size(); ++i)
max@1	686 {
matthiasm@59	687 internalFeatureset[m_bothchromaOutputNumber].push_back(chromaList[i]);
max@1	688 }
max@1	689
max@1	690 // quantised and pseudo-quantised (beat-wise) chroma
matthiasm@59	691 std::vector<FeatureList> quantisedChroma = beatQuantiser(chromaList, internalFeatureset[m_beatOutputNumber]);
Chris@32	692
Chris@32	693 if (quantisedChroma.empty()) return masterFeatureset;
max@1	694
matthiasm@59	695 internalFeatureset[m_qchromafwOutputNumber] = quantisedChroma[0];
matthiasm@59	696 internalFeatureset[m_qchromaOutputNumber] = quantisedChroma[1];
max@1	697
max@1	698 // Segmentation
Chris@39	699 try {
Chris@39	700 masterFeatureset[m_segmOutputNumber] = runSegmenter(quantisedChroma[1]);
Chris@39	701 } catch (std::bad_alloc &a) {
Chris@48	702 cerr << "ERROR: Segmentino::getRemainingFeatures: Failed to run segmenter, not enough memory (song too long?)" << endl;
Chris@39	703 }
max@1	704
max@1	705 return(masterFeatureset);
max@1	706 }
max@1	707
max@1	708 /* ------ Beat Tracker ------ */
max@1	709
Chris@48	710 Segmentino::FeatureSet Segmentino::beatTrack()
max@1	711 {
max@1	712 vector<double> df;
max@1	713 vector<double> beatPeriod;
max@1	714 vector<double> tempi;
max@1	715
Chris@37	716 for (int i = 2; i < (int)m_d->dfOutput.size(); ++i) { // discard first two elts
max@1	717 df.push_back(m_d->dfOutput[i]);
max@1	718 beatPeriod.push_back(0.0);
max@1	719 }
max@1	720 if (df.empty()) return FeatureSet();
max@1	721
max@1	722 TempoTrackV2 tt(m_inputSampleRate, m_d->dfConfig.stepSize);
max@1	723 tt.calculateBeatPeriod(df, beatPeriod, tempi);
max@1	724
max@1	725 vector<double> beats;
max@1	726 tt.calculateBeats(df, beatPeriod, beats);
max@1	727
max@1	728 vector<int> downbeats;
max@1	729 size_t downLength = 0;
max@1	730 const float *downsampled = m_d->downBeat->getBufferedAudio(downLength);
max@1	731 m_d->downBeat->findDownBeats(downsampled, downLength, beats, downbeats);
max@1	732
max@1	733 vector<double> beatsd;
max@1	734 m_d->downBeat->getBeatSD(beatsd);
max@1	735
max@1	736 /*std::cout << "BeatTracker: found downbeats at: ";
max@1	737 for (int i = 0; i < downbeats.size(); ++i) std::cout << downbeats[i] << " " << std::endl;*/
max@1	738
max@1	739 FeatureSet returnFeatures;
max@1	740
max@1	741 char label[20];
max@1	742
max@1	743 int dbi = 0;
max@1	744 int beat = 0;
max@1	745 int bar = 0;
max@1	746
max@1	747 if (!downbeats.empty()) {
max@1	748 // get the right number for the first beat; this will be
max@1	749 // incremented before use (at top of the following loop)
max@1	750 int firstDown = downbeats[0];
max@1	751 beat = m_bpb - firstDown - 1;
max@1	752 if (beat == m_bpb) beat = 0;
max@1	753 }
max@1	754
Chris@37	755 for (int i = 0; i < (int)beats.size(); ++i) {
max@1	756
Chris@37	757 int frame = beats[i] * m_d->dfConfig.stepSize;
max@1	758
Chris@37	759 if (dbi < (int)downbeats.size() && i == downbeats[dbi]) {
max@1	760 beat = 0;
max@1	761 ++bar;
max@1	762 ++dbi;
max@1	763 } else {
max@1	764 ++beat;
max@1	765 }
max@1	766
max@1	767 /* Ooutput Section */
max@1	768
max@1	769 // outputs are:
max@1	770 //
max@1	771 // 0 -> beats
max@1	772 // 1 -> bars
max@1	773 // 2 -> beat counter function
max@1	774
max@1	775 Feature feature;
max@1	776 feature.hasTimestamp = true;
max@1	777 feature.timestamp = m_d->origin + Vamp::RealTime::frame2RealTime (frame, lrintf(m_inputSampleRate));
max@1	778
max@1	779 sprintf(label, "%d", beat + 1);
max@1	780 feature.label = label;
max@1	781 returnFeatures[m_beatOutputNumber].push_back(feature); // labelled beats
max@1	782
max@1	783 feature.values.push_back(beat + 1);
max@1	784 returnFeatures[m_beatcountsOutputNumber].push_back(feature); // beat function
max@1	785
Chris@37	786 if (i > 0 && i <= (int)beatsd.size()) {
max@1	787 feature.values.clear();
max@1	788 feature.values.push_back(beatsd[i-1]);
max@1	789 feature.label = "";
max@1	790 returnFeatures[m_beatsdOutputNumber].push_back(feature); // beat spectral difference
max@1	791 }
max@1	792
max@1	793 if (beat == 0) {
max@1	794 feature.values.clear();
max@1	795 sprintf(label, "%d", bar);
max@1	796 feature.label = label;
max@1	797 returnFeatures[m_barsOutputNumber].push_back(feature); // bars
max@1	798 }
max@1	799 }
max@1	800
max@1	801 return returnFeatures;
max@1	802 }
max@1	803
max@1	804
max@1	805 /* ------ Chroma Extractor ------ */
max@1	806
Chris@48	807 Segmentino::FeatureList Segmentino::chromaFeatures()
max@1	808 {
max@1	809
max@1	810 FeatureList returnFeatureList;
max@1	811 FeatureList tunedlogfreqspec;
max@1	812
max@1	813 if (m_chromadata->logSpectrum.size() == 0) return returnFeatureList;
max@1	814
max@1	815 /** Calculate Tuning
max@1	816 calculate tuning from (using the angle of the complex number defined by the
max@1	817 cumulative mean real and imag values)
max@1	818 **/
max@1	819 float meanTuningImag = 0;
max@1	820 float meanTuningReal = 0;
max@1	821 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
max@1	822 meanTuningReal += m_chromadata->meanTunings[iBPS] * m_chromadata->cosvalues[iBPS];
max@1	823 meanTuningImag += m_chromadata->meanTunings[iBPS] * m_chromadata->sinvalues[iBPS];
max@1	824 }
max@1	825 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
max@1	826 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
max@1	827 int intShift = floor(normalisedtuning * 3);
max@1	828 float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this
max@1	829
max@1	830 char buffer0 [50];
max@1	831
max@1	832 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
max@1	833
max@1	834 /** Tune Log-Frequency Spectrogram
max@1	835 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
max@1	836 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
max@1	837 **/
Chris@50	838 // cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... ";
max@1	839
max@1	840 float tempValue = 0;
max@1	841
max@1	842 int count = 0;
max@1	843
max@1	844 for (FeatureList::iterator i = m_chromadata->logSpectrum.begin(); i != m_chromadata->logSpectrum.end(); ++i)
max@1	845 {
max@1	846
max@1	847 Feature f1 = *i;
max@1	848 Feature f2; // tuned log-frequency spectrum
max@1	849
max@1	850 f2.hasTimestamp = true;
max@1	851 f2.timestamp = f1.timestamp;
max@1	852
max@1	853 f2.values.push_back(0.0);
max@1	854 f2.values.push_back(0.0); // set lower edge to zero
max@1	855
max@1	856 if (m_chromadata->tuneLocal) {
max@1	857 intShift = floor(m_chromadata->localTuning[count] * 3);
max@1	858 floatShift = m_chromadata->localTuning[count] * 3 - intShift;
max@1	859 // floatShift is a really bad name for this
max@1	860 }
max@1	861
max@1	862 for (int k = 2; k < (int)f1.values.size() - 3; ++k)
max@1	863 { // interpolate all inner bins
max@1	864 tempValue = f1.values[k + intShift] * (1-floatShift) + f1.values[k+intShift+1] * floatShift;
max@1	865 f2.values.push_back(tempValue);
max@1	866 }
max@1	867
max@1	868 f2.values.push_back(0.0);
max@1	869 f2.values.push_back(0.0);
max@1	870 f2.values.push_back(0.0); // upper edge
max@1	871
max@1	872 vector<float> runningmean = SpecialConvolution(f2.values,m_chromadata->hw);
max@1	873 vector<float> runningstd;
max@1	874 for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance)
max@1	875 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
max@1	876 }
max@1	877 runningstd = SpecialConvolution(runningstd,m_chromadata->hw); // second step convolve
max@1	878 for (int i = 0; i < nNote; i++)
max@1	879 {
max@1	880
max@1	881 runningstd[i] = sqrt(runningstd[i]);
max@1	882 // square root to finally have running std
max@1	883
max@1	884 if (runningstd[i] > 0)
max@1	885 {
max@1	886 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
max@1	887 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_chromadata->whitening) : 0;
max@1	888 }
max@1	889
max@1	890 if (f2.values[i] < 0) {
max@1	891
max@1	892 cerr << "ERROR: negative value in logfreq spectrum" << endl;
max@1	893
max@1	894 }
max@1	895 }
max@1	896 tunedlogfreqspec.push_back(f2);
max@1	897 count++;
max@1	898 }
Chris@50	899 // cerr << "done." << endl;
max@1	900 /** Semitone spectrum and chromagrams
max@1	901 Semitone-spaced log-frequency spectrum derived
max@1	902 from the tuned log-freq spectrum above. the spectrum
max@1	903 is inferred using a non-negative least squares algorithm.
max@1	904 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
max@1	905 bass and treble stacked onto each other).
max@1	906 **/
Chris@50	907 /*
max@1	908 if (m_chromadata->useNNLS == 0) {
max@1	909 cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... ";
max@1	910 } else {
max@1	911 cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... ";
max@1	912 }
Chris@50	913 */
max@1	914 vector<float> oldchroma = vector<float>(12,0);
max@1	915 vector<float> oldbasschroma = vector<float>(12,0);
max@1	916 count = 0;
max@1	917
max@1	918 for (FeatureList::iterator it = tunedlogfreqspec.begin(); it != tunedlogfreqspec.end(); ++it) {
max@1	919 Feature logfreqsp = *it; // logfreq spectrum
max@1	920 Feature bothchroma; // treble and bass chromagram
max@1	921
max@1	922 bothchroma.hasTimestamp = true;
max@1	923 bothchroma.timestamp = logfreqsp.timestamp;
max@1	924
max@1	925 float b[nNote];
max@1	926
max@1	927 bool some_b_greater_zero = false;
max@1	928 float sumb = 0;
max@1	929 for (int i = 0; i < nNote; i++) {
max@1	930 b[i] = logfreqsp.values[i];
max@1	931 sumb += b[i];
max@1	932 if (b[i] > 0) {
max@1	933 some_b_greater_zero = true;
max@1	934 }
max@1	935 }
max@1	936
max@1	937 // here's where the non-negative least squares algorithm calculates the note activation x
max@1	938
max@1	939 vector<float> chroma = vector<float>(12, 0);
max@1	940 vector<float> basschroma = vector<float>(12, 0);
max@1	941 float currval;
max@1	942 int iSemitone = 0;
max@1	943
max@1	944 if (some_b_greater_zero) {
max@1	945 if (m_chromadata->useNNLS == 0) {
max@1	946 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
max@1	947 currval = 0;
max@1	948 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
max@1	949 currval += b[iNote + iBPS] * (1-abs(iBPS*1.0/(nBPS/2+1)));
max@1	950 }
max@1	951 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
max@1	952 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
max@1	953 iSemitone++;
max@1	954 }
max@1	955
max@1	956 } else {
max@1	957 float x[84+1000];
max@1	958 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
max@1	959 vector<int> signifIndex;
max@1	960 int index=0;
max@1	961 sumb /= 84.0;
max@1	962 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
max@1	963 float currval = 0;
max@1	964 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
max@1	965 currval += b[iNote + iBPS];
max@1	966 }
max@1	967 if (currval > 0) signifIndex.push_back(index);
max@1	968 index++;
max@1	969 }
max@1	970 float rnorm;
max@1	971 float w[84+1000];
max@1	972 float zz[84+1000];
max@1	973 int indx[84+1000];
max@1	974 int mode;
max@1	975 int dictsize = nNote*signifIndex.size();
max@1	976
max@1	977 float *curr_dict = new float[dictsize];
max@1	978 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
max@1	979 for (int iBin = 0; iBin < nNote; iBin++) {
max@1	980 curr_dict[iNote * nNote + iBin] =
max@1	981 1.0 * m_chromadata->dict[signifIndex[iNote] * nNote + iBin];
max@1	982 }
max@1	983 }
max@1	984 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
max@1	985 delete [] curr_dict;
max@1	986 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
max@1	987 // cerr << mode << endl;
max@1	988 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
max@1	989 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
max@1	990 }
max@1	991 }
max@1	992 }
max@1	993
max@1	994 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end());
max@1	995 // just stack the both chromas
max@1	996
max@1	997 bothchroma.values = chroma;
max@1	998 returnFeatureList.push_back(bothchroma);
max@1	999 count++;
max@1	1000 }
Chris@50	1001 // cerr << "done." << endl;
max@1	1002
max@1	1003 return returnFeatureList;
max@1	1004 }
max@1	1005
max@1	1006 /* ------ Beat Quantizer ------ */
max@1	1007
max@4	1008 std::vector<Vamp::Plugin::FeatureList>
Chris@48	1009 Segmentino::beatQuantiser(Vamp::Plugin::FeatureList chromagram, Vamp::Plugin::FeatureList beats)
max@1	1010 {
max@1	1011 std::vector<FeatureList> returnVector;
max@1	1012
max@1	1013 FeatureList fwQchromagram; // frame-wise beat-quantised chroma
max@1	1014 FeatureList bwQchromagram; // beat-wise beat-quantised chroma
matthiasm@43	1015
matthiasm@43	1016
matthiasm@43	1017 size_t nChromaFrame = chromagram.size();
matthiasm@43	1018 size_t nBeat = beats.size();
max@1	1019
max@1	1020 if (nBeat == 0 && nChromaFrame == 0) return returnVector;
max@1	1021
Chris@37	1022 int nBin = chromagram[0].values.size();
max@1	1023
max@1	1024 vector<float> tempChroma = vector<float>(nBin);
max@1	1025
max@1	1026 Vamp::RealTime beatTimestamp = Vamp::RealTime::zeroTime;
max@1	1027 int currBeatCount = -1; // start before first beat
max@1	1028 int framesInBeat = 0;
max@1	1029
matthiasm@43	1030 for (size_t iChroma = 0; iChroma < nChromaFrame; ++iChroma)
max@1	1031 {
max@4	1032 Vamp::RealTime frameTimestamp = chromagram[iChroma].timestamp;
Chris@24	1033 Vamp::RealTime newBeatTimestamp;
Chris@22	1034
Chris@37	1035 if (currBeatCount != (int)beats.size() - 1) {
Chris@37	1036 newBeatTimestamp = beats[currBeatCount+1].timestamp;
Chris@37	1037 } else {
Chris@37	1038 newBeatTimestamp = chromagram[nChromaFrame-1].timestamp;
Chris@37	1039 }
Chris@22	1040
Chris@24	1041 if (frameTimestamp > newBeatTimestamp \|\|
max@1	1042 iChroma == nChromaFrame-1)
max@1	1043 {
max@1	1044 // new beat (or last chroma frame)
max@1	1045 // 1. finish all the old beat processing
Chris@23	1046 if (framesInBeat > 0)
Chris@23	1047 {
Chris@23	1048 for (int i = 0; i < nBin; ++i) tempChroma[i] /= framesInBeat; // average
Chris@23	1049 }
max@1	1050
max@1	1051 Feature bwQchromaFrame;
max@1	1052 bwQchromaFrame.hasTimestamp = true;
max@1	1053 bwQchromaFrame.timestamp = beatTimestamp;
max@1	1054 bwQchromaFrame.values = tempChroma;
Chris@24	1055 bwQchromaFrame.duration = newBeatTimestamp - beatTimestamp;
max@1	1056 bwQchromagram.push_back(bwQchromaFrame);
max@1	1057
max@1	1058 for (int iFrame = -framesInBeat; iFrame < 0; ++iFrame)
max@1	1059 {
max@1	1060 Feature fwQchromaFrame;
max@1	1061 fwQchromaFrame.hasTimestamp = true;
max@1	1062 fwQchromaFrame.timestamp = chromagram[iChroma+iFrame].timestamp;
max@1	1063 fwQchromaFrame.values = tempChroma; // all between two beats get the same
max@1	1064 fwQchromagram.push_back(fwQchromaFrame);
max@1	1065 }
max@1	1066
max@1	1067 // 2. increments / resets for current (new) beat
max@1	1068 currBeatCount++;
Chris@24	1069 beatTimestamp = newBeatTimestamp;
Chris@37	1070 for (int i = 0; i < nBin; ++i) tempChroma[i] = 0; // average
max@1	1071 framesInBeat = 0;
max@1	1072 }
max@1	1073 framesInBeat++;
Chris@37	1074 for (int i = 0; i < nBin; ++i) tempChroma[i] += chromagram[iChroma].values[i];
max@1	1075 }
max@1	1076 returnVector.push_back(fwQchromagram);
max@1	1077 returnVector.push_back(bwQchromagram);
Chris@30	1078 return returnVector;
max@1	1079 }
max@1	1080
matthiasm@43	1081
matthiasm@43	1082
max@1	1083 /* -------------------------------- */
max@1	1084 /* ------ Support Functions ------ */
max@1	1085 /* -------------------------------- */
max@1	1086
max@1	1087 // one-dimesion median filter
Chris@56	1088 vec medfilt1(vec v, int medfilt_length)
max@1	1089 {
matthiasm@46	1090 // TODO: check if this works with odd and even medfilt_length !!!
max@1	1091 int halfWin = medfilt_length/2;
max@1	1092
max@1	1093 // result vector
Chris@56	1094 vec res = zeros<vec>(v.size());
max@1	1095
max@1	1096 // padding
Chris@56	1097 vec padV = zeros<vec>(v.size()+medfilt_length-1);
max@1	1098
Chris@37	1099 for (int i=medfilt_length/2; i < medfilt_length/2+(int)v.size(); ++ i)
max@1	1100 {
max@1	1101 padV(i) = v(i-medfilt_length/2);
matthiasm@46	1102 }
matthiasm@46	1103
matthiasm@46	1104 // the above loop leaves the boundaries at 0,
matthiasm@46	1105 // the two loops below fill them with the start or end values of v at start and end
matthiasm@46	1106 for (int i = 0; i < halfWin; ++i) padV(i) = v(0);
matthiasm@46	1107 for (int i = halfWin+(int)v.size(); i < (int)v.size()+2*halfWin; ++i) padV(i) = v(v.size()-1);
matthiasm@46	1108
matthiasm@46	1109
max@1	1110
max@1	1111 // Median filter
Chris@56	1112 vec win = zeros<vec>(medfilt_length);
max@1	1113
Chris@37	1114 for (int i=0; i < (int)v.size(); ++i)
max@1	1115 {
max@1	1116 win = padV.subvec(i,i+halfWin*2);
max@1	1117 win = sort(win);
max@1	1118 res(i) = win(halfWin);
max@1	1119 }
max@1	1120
max@1	1121 return res;
max@1	1122 }
max@1	1123
max@1	1124
max@1	1125 // Quantile
Chris@56	1126 double quantile(vec v, double p)
max@1	1127 {
Chris@56	1128 vec sortV = sort(v);
max@1	1129 int n = sortV.size();
Chris@56	1130 vec x = zeros<vec>(n+2);
Chris@56	1131 vec y = zeros<vec>(n+2);
max@1	1132
max@1	1133 x(0) = 0;
max@1	1134 x(n+1) = 100;
max@1	1135
Chris@21	1136 for (int i=1; i<n+1; ++i)
max@1	1137 x(i) = 100*(0.5+(i-1))/n;
max@1	1138
max@1	1139 y(0) = sortV(0);
max@1	1140 y.subvec(1,n) = sortV;
max@1	1141 y(n+1) = sortV(n-1);
max@1	1142
Chris@56	1143 uvec x2index = find(x>=p*100);
max@1	1144
max@1	1145 // Interpolation
max@1	1146 double x1 = x(x2index(0)-1);
max@1	1147 double x2 = x(x2index(0));
max@1	1148 double y1 = y(x2index(0)-1);
max@1	1149 double y2 = y(x2index(0));
max@1	1150
max@1	1151 double res = (y2-y1)/(x2-x1)(p100-x1)+y1;
max@1	1152
max@1	1153 return res;
max@1	1154 }
max@1	1155
max@1	1156 // Max Filtering
Chris@56	1157 mat maxfilt1(mat inmat, int len)
max@1	1158 {
Chris@56	1159 mat outmat = inmat;
max@1	1160
Chris@37	1161 for (int i=0; i < (int)inmat.n_rows; ++i)
max@1	1162 {
Chris@56	1163 if (sum(inmat.row(i)) > 0)
max@1	1164 {
max@1	1165 // Take a window of rows
max@1	1166 int startWin;
max@1	1167 int endWin;
max@1	1168
max@1	1169 if (0 > i-len)
max@1	1170 startWin = 0;
max@1	1171 else
max@1	1172 startWin = i-len;
max@1	1173
Chris@37	1174 if ((int)inmat.n_rows-1 < i+len-1)
max@1	1175 endWin = inmat.n_rows-1;
max@1	1176 else
max@1	1177 endWin = i+len-1;
max@1	1178
Chris@56	1179 outmat(i,span::all) =
Chris@56	1180 max(inmat(span(startWin,endWin),span::all));
max@1	1181 }
max@1	1182 }
max@1	1183
max@1	1184 return outmat;
Chris@56	1185
max@1	1186 }
max@1	1187
max@1	1188 // Null Parts
Chris@56	1189 Part nullpart(vector<Part> parts, vec barline)
max@1	1190 {
Chris@56	1191 uvec nullindices = ones<uvec>(barline.size());
Chris@37	1192 for (int iPart=0; iPart<(int)parts.size(); ++iPart)
max@1	1193 {
Chris@21	1194 //for (int iIndex=0; iIndex < parts[0].indices.size(); ++iIndex)
Chris@37	1195 for (int iIndex=0; iIndex < (int)parts[iPart].indices.size(); ++iIndex)
Chris@21	1196 for (int i=0; i<parts[iPart].n; ++i)
max@1	1197 {
Chris@21	1198 int ind = parts[iPart].indices[iIndex]+i;
max@1	1199 nullindices(ind) = 0;
max@1	1200 }
max@1	1201 }
max@7	1202
max@1	1203 Part newPart;
max@1	1204 newPart.n = 1;
Chris@56	1205 uvec q = find(nullindices > 0);
max@1	1206
Chris@37	1207 for (int i=0; i<(int)q.size();++i)
max@1	1208 newPart.indices.push_back(q(i));
max@7	1209
max@1	1210 newPart.letter = '-';
max@1	1211 newPart.value = 0;
max@1	1212 newPart.level = 0;
max@1	1213
max@1	1214 return newPart;
max@1	1215 }
max@1	1216
max@1	1217
max@1	1218 // Merge Nulls
max@1	1219 void mergenulls(vector<Part> &parts)
max@1	1220 {
Chris@76	1221 /*
Chris@76	1222 cerr << "Segmentino: mergenulls: before: "<< endl;
Chris@76	1223 for (int iPart=0; iPart<(int)parts.size(); ++iPart) {
Chris@76	1224 cerr << parts[iPart].letter << ": ";
Chris@76	1225 for (int iIndex=0; iIndex<(int)parts[iPart].indices.size(); ++iIndex) {
Chris@76	1226 cerr << parts[iPart].indices[iIndex];
Chris@76	1227 if (iIndex+1 < (int)parts[iPart].indices.size()) {
Chris@76	1228 cerr << ", ";
Chris@76	1229 }
Chris@76	1230 }
Chris@76	1231 cerr << endl;
Chris@76	1232 }
Chris@76	1233 */
Chris@37	1234 for (int iPart=0; iPart<(int)parts.size(); ++iPart)
max@1	1235 {
max@1	1236
max@1	1237 vector<Part> newVectorPart;
max@1	1238
max@1	1239 if (parts[iPart].letter.compare("-")==0)
max@1	1240 {
max@1	1241 sort (parts[iPart].indices.begin(), parts[iPart].indices.end());
Chris@21	1242 int newpartind = -1;
max@1	1243
max@1	1244 vector<int> indices;
max@1	1245 indices.push_back(-2);
max@1	1246
Chris@37	1247 for (int iIndex=0; iIndex<(int)parts[iPart].indices.size(); ++iIndex)
max@1	1248 indices.push_back(parts[iPart].indices[iIndex]);
max@1	1249
Chris@37	1250 for (int iInd=1; iInd < (int)indices.size(); ++iInd)
max@1	1251 {
max@1	1252 if (indices[iInd] - indices[iInd-1] > 1)
max@1	1253 {
max@1	1254 newpartind++;
max@1	1255
max@1	1256 Part newPart;
matthiasm@46	1257 newPart.letter = 'N';
max@1	1258 std::stringstream out;
max@1	1259 out << newpartind+1;
max@1	1260 newPart.letter.append(out.str());
matthiasm@44	1261 // newPart.value = 20+newpartind+1;
matthiasm@44	1262 newPart.value = 0;
max@1	1263 newPart.n = 1;
max@1	1264 newPart.indices.push_back(indices[iInd]);
max@1	1265 newPart.level = 0;
max@1	1266
max@1	1267 newVectorPart.push_back(newPart);
max@1	1268 }
max@1	1269 else
max@1	1270 {
max@1	1271 newVectorPart[newpartind].n = newVectorPart[newpartind].n+1;
max@1	1272 }
max@1	1273 }
Chris@76	1274 parts.erase (parts.begin() + iPart);
max@1	1275
Chris@37	1276 for (int i=0; i<(int)newVectorPart.size(); ++i)
max@1	1277 parts.push_back(newVectorPart[i]);
Chris@76	1278
Chris@76	1279 break;
max@1	1280 }
max@1	1281 }
Chris@76	1282 /*
Chris@76	1283 cerr << "Segmentino: mergenulls: after: "<< endl;
Chris@76	1284 for (int iPart=0; iPart<(int)parts.size(); ++iPart) {
Chris@76	1285 cerr << parts[iPart].letter << ": ";
Chris@76	1286 for (int iIndex=0; iIndex<(int)parts[iPart].indices.size(); ++iIndex) {
Chris@76	1287 cerr << parts[iPart].indices[iIndex];
Chris@76	1288 if (iIndex+1 < (int)parts[iPart].indices.size()) {
Chris@76	1289 cerr << ", ";
Chris@76	1290 }
Chris@76	1291 }
Chris@76	1292 cerr << endl;
Chris@76	1293 }
Chris@76	1294 */
max@1	1295 }
max@1	1296
max@1	1297 /* ------ Segmentation ------ */
max@1	1298
Chris@19	1299 vector<Part> songSegment(Vamp::Plugin::FeatureList quantisedChromagram)
max@1	1300 {
max@1	1301
max@1	1302
max@1	1303 /* ------ Parameters ------ */
max@1	1304 double thresh_beat = 0.85;
max@1	1305 double thresh_seg = 0.80;
matthiasm@46	1306 int medfilt_length = 5;
max@1	1307 int minlength = 28;
matthiasm@46	1308 int maxlength = 2*128;
max@1	1309 double quantilePerc = 0.1;
max@1	1310 /* ------------------------ */
max@1	1311
max@1	1312
max@1	1313 // Collect Info
Chris@19	1314 int nBeat = quantisedChromagram.size(); // Number of feature vector
Chris@19	1315 int nFeatValues = quantisedChromagram[0].values.size(); // Number of values for each feature vector
max@1	1316
Chris@27	1317 if (nBeat < minlength) {
Chris@27	1318 // return a single part
Chris@27	1319 vector<Part> parts;
Chris@27	1320 Part newPart;
Chris@27	1321 newPart.n = 1;
Chris@27	1322 newPart.indices.push_back(0);
Chris@27	1323 newPart.letter = "n1";
Chris@27	1324 newPart.value = 20;
Chris@27	1325 newPart.level = 0;
Chris@27	1326 parts.push_back(newPart);
Chris@27	1327 return parts;
Chris@27	1328 }
Chris@27	1329
Chris@56	1330 irowvec timeStamp = zeros<imat>(1,nBeat); // Vector of Time Stamps
max@1	1331
Chris@22	1332 // Save time stamp as a Vector
Chris@19	1333 if (quantisedChromagram[0].hasTimestamp)
max@1	1334 {
Chris@21	1335 for (int i = 0; i < nBeat; ++ i)
Chris@19	1336 timeStamp[i] = quantisedChromagram[i].timestamp.nsec;
max@1	1337 }
max@1	1338
max@1	1339
max@1	1340 // Build a ObservationTOFeatures Matrix
Chris@56	1341 mat featVal = zeros<mat>(nBeat,nFeatValues/2);
max@1	1342
Chris@21	1343 for (int i = 0; i < nBeat; ++ i)
Chris@21	1344 for (int j = 0; j < nFeatValues/2; ++ j)
max@1	1345 {
matthiasm@44	1346 featVal(i,j) = 0.8 * quantisedChromagram[i].values[j] + quantisedChromagram[i].values[j+12]; // bass attenuated
max@1	1347 }
max@1	1348
max@1	1349 // Set to arbitrary value to feature vectors with low std
Chris@56	1350 mat a = stddev(featVal,1,1);
max@1	1351
matthiasm@44	1352 // Feature Correlation Matrix
Chris@56	1353 mat simmat0 = 1-cor(trans(featVal));
max@1	1354
max@1	1355
Chris@21	1356 for (int i = 0; i < nBeat; ++ i)
max@1	1357 {
max@1	1358 if (a(i)<0.000001)
max@1	1359 {
max@1	1360 featVal(i,1) = 1000; // arbitrary
max@1	1361
Chris@21	1362 for (int j = 0; j < nFeatValues/2; ++j)
max@1	1363 {
max@1	1364 simmat0(i,j) = 1;
max@1	1365 simmat0(j,i) = 1;
max@1	1366 }
max@1	1367 }
max@1	1368 }
max@1	1369
Chris@56	1370 mat simmat = 1-simmat0/2;
max@1	1371
max@1	1372 // -------- To delate when the proble with the add of beat will be solved -------
matthiasm@45	1373 for (int i = 0; i < nBeat; ++ i)
matthiasm@45	1374 for (int j = 0; j < nBeat; ++ j)
matthiasm@45	1375 if (!std::isfinite(simmat(i,j)))
matthiasm@45	1376 simmat(i,j)=0;
max@1	1377 // ------------------------------------------------------------------------------
max@1	1378
max@1	1379 // Median Filtering applied to the Correlation Matrix
max@1	1380 // The median filter is for each diagonal of the Matrix
Chris@56	1381 mat median_simmat = zeros<mat>(nBeat,nBeat);
max@1	1382
Chris@21	1383 for (int i = 0; i < nBeat; ++ i)
max@1	1384 {
Chris@56	1385 vec temp = medfilt1(simmat.diag(i),medfilt_length);
max@1	1386 median_simmat.diag(i) = temp;
max@1	1387 median_simmat.diag(-i) = temp;
max@1	1388 }
max@1	1389
Chris@21	1390 for (int i = 0; i < nBeat; ++ i)
Chris@21	1391 for (int j = 0; j < nBeat; ++ j)
max@1	1392 if (!std::isfinite(median_simmat(i,j)))
max@1	1393 median_simmat(i,j) = 0;
max@1	1394
max@1	1395 // -------------- NOT CONVERTED -------------------------------------
max@1	1396 // if param.seg.standardise
max@1	1397 // med_median_simmat = repmat(median(median_simmat),nBeat,1);
max@1	1398 // std_median_simmat = repmat(std(median_simmat),nBeat,1);
max@1	1399 // median_simmat = (median_simmat - med_median_simmat) ./ std_median_simmat;
max@1	1400 // end
max@1	1401 // --------------------------------------------------------
max@1	1402
max@1	1403 // Retrieve Bar Bounderies
Chris@56	1404 uvec dup = find(median_simmat > thresh_beat);
Chris@56	1405 mat potential_duplicates = zeros<mat>(nBeat,nBeat);
Chris@56	1406 potential_duplicates.elem(dup) = ones<vec>(dup.size());
max@1	1407 potential_duplicates = trimatu(potential_duplicates);
max@1	1408
Chris@21	1409 int nPartlengths = round((maxlength-minlength)/4)+1;
Chris@56	1410 vec partlengths = zeros<vec>(nPartlengths);
max@1	1411
Chris@21	1412 for (int i = 0; i < nPartlengths; ++ i)
matthiasm@46	1413 partlengths(i) = (i*4) + minlength;
max@1	1414
max@1	1415 // initialise arrays
Chris@56	1416 cube simArray = zeros<cube>(nBeat,nBeat,nPartlengths);
Chris@56	1417 cube decisionArray2 = zeros<cube>(nBeat,nBeat,nPartlengths);
max@1	1418
matthiasm@46	1419 for (int iLength = 0; iLength < nPartlengths; ++ iLength)
matthiasm@46	1420 // for (int iLength = 0; iLength < 20; ++ iLength)
max@1	1421 {
Chris@21	1422 int len = partlengths(iLength);
Chris@21	1423 int nUsedBeat = nBeat - len + 1; // number of potential rep beginnings: they can't overlap at the end of the song
Chris@33	1424
Chris@33	1425 if (nUsedBeat < 1) continue;
max@1	1426
Chris@21	1427 for (int iBeat = 0; iBeat < nUsedBeat; ++ iBeat) // looping over all columns (arbitrarily chosen columns)
max@1	1428 {
Chris@56	1429 uvec help2 = find(potential_duplicates(span(0,nUsedBeat-1),iBeat)==1);
max@1	1430
Chris@37	1431 for (int i=0; i < (int)help2.size(); ++i)
max@1	1432 {
max@1	1433
max@1	1434 // measure how well two length len segments go together
max@1	1435 int kBeat = help2(i);
Chris@56	1436 vec distrib = median_simmat(span(iBeat,iBeat+len-1), span(kBeat,kBeat+len-1)).diag(0);
max@1	1437 simArray(iBeat,kBeat,iLength) = quantile(distrib,quantilePerc);
max@1	1438 }
max@1	1439 }
max@1	1440
Chris@56	1441 mat tempM = simArray(span(0,nUsedBeat-1), span(0,nUsedBeat-1), span(iLength,iLength));
Chris@56	1442 simArray.slice(iLength)(span(0,nUsedBeat-1), span(0,nUsedBeat-1)) = tempM + trans(tempM) - (eye<mat>(nUsedBeat,nUsedBeat)%tempM);
max@1	1443
max@1	1444 // convolution
Chris@56	1445 vec K = zeros<vec>(3);
max@1	1446 K << 0.01 << 0.98 << 0.01;
max@1	1447
max@1	1448
Chris@37	1449 for (int i=0; i < (int)simArray.n_rows; ++i)
max@1	1450 {
Chris@56	1451 rowvec t = conv((rowvec)simArray.slice(iLength).row(i),K);
Chris@56	1452 simArray.slice(iLength)(i, span::all) = t.subvec(1,t.size()-2);
max@1	1453 }
max@1	1454
max@1	1455 // take only over-average bars that do not overlap
max@1	1456
Chris@56	1457 mat temp = zeros<mat>(simArray.n_rows, simArray.n_cols);
Chris@56	1458 temp(span::all, span(0,nUsedBeat-1)) = simArray.slice(iLength)(span::all, span(0,nUsedBeat-1));
max@1	1459
Chris@37	1460 for (int i=0; i < (int)temp.n_rows; ++i)
Chris@37	1461 for (int j=0; j < nUsedBeat; ++j)
max@1	1462 if (temp(i,j) < thresh_seg)
max@1	1463 temp(i,j) = 0;
max@1	1464
max@1	1465 decisionArray2.slice(iLength) = temp;
max@1	1466
Chris@56	1467 mat maxMat = maxfilt1(decisionArray2.slice(iLength),len-1);
max@1	1468
Chris@37	1469 for (int i=0; i < (int)decisionArray2.n_rows; ++i)
Chris@37	1470 for (int j=0; j < (int)decisionArray2.n_cols; ++j)
max@1	1471 if (decisionArray2.slice(iLength)(i,j) < maxMat(i,j))
max@1	1472 decisionArray2.slice(iLength)(i,j) = 0;
max@1	1473
Chris@56	1474 decisionArray2.slice(iLength) = decisionArray2.slice(iLength) % trans(decisionArray2.slice(iLength));
max@1	1475
Chris@37	1476 for (int i=0; i < (int)simArray.n_rows; ++i)
Chris@37	1477 for (int j=0; j < (int)simArray.n_cols; ++j)
max@1	1478 if (simArray.slice(iLength)(i,j) < thresh_seg)
max@1	1479 potential_duplicates(i,j) = 0;
max@1	1480 }
max@1	1481
max@1	1482 // Milk the data
max@1	1483
Chris@56	1484 mat bestval;
max@1	1485
Chris@21	1486 for (int iLength=0; iLength<nPartlengths; ++iLength)
max@1	1487 {
Chris@56	1488 mat temp = zeros<mat>(decisionArray2.n_rows,decisionArray2.n_cols);
max@1	1489
Chris@37	1490 for (int rows=0; rows < (int)decisionArray2.n_rows; ++rows)
Chris@37	1491 for (int cols=0; cols < (int)decisionArray2.n_cols; ++cols)
max@1	1492 if (decisionArray2.slice(iLength)(rows,cols) > 0)
max@1	1493 temp(rows,cols) = 1;
max@1	1494
Chris@56	1495 vec currLogicSum = sum(temp,1);
max@1	1496
Chris@37	1497 for (int iBeat=0; iBeat < nBeat; ++iBeat)
max@1	1498 if (currLogicSum(iBeat) > 1)
max@1	1499 {
Chris@56	1500 vec t = decisionArray2.slice(iLength)(span::all,iBeat);
max@1	1501 double currSum = sum(t);
max@1	1502
Chris@21	1503 int count = 0;
Chris@37	1504 for (int i=0; i < (int)t.size(); ++i)
max@1	1505 if (t(i)>0)
max@1	1506 count++;
max@1	1507
max@1	1508 currSum = (currSum/count)/2;
max@1	1509
Chris@56	1510 rowvec t1;
max@1	1511 t1 << (currLogicSum(iBeat)-1) * partlengths(iLength) << currSum << iLength << iBeat << currLogicSum(iBeat);
max@1	1512
max@1	1513 bestval = join_cols(bestval,t1);
max@1	1514 }
max@1	1515 }
max@1	1516
max@1	1517 // Definition of the resulting vector
max@1	1518 vector<Part> parts;
max@1	1519
max@1	1520 // make a table of all valid sets of parts
max@1	1521
max@1	1522 char partletters[] = {'A','B','C','D','E','F','G', 'H','I','J','K','L','M','N','O','P','Q','R','S'};
Chris@21	1523 int partvalues[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19};
Chris@56	1524 vec valid_sets = ones<vec>(bestval.n_rows);
max@1	1525
max@1	1526 if (!bestval.is_empty())
max@1	1527 {
max@1	1528
max@1	1529 // In questo punto viene introdotto un errore alla 3 cifra decimale
max@1	1530
Chris@56	1531 colvec t = zeros<colvec>(bestval.n_rows);
Chris@37	1532 for (int i=0; i < (int)bestval.n_rows; ++i)
max@1	1533 {
max@1	1534 t(i) = bestval(i,1)*2;
max@1	1535 }
max@1	1536
max@1	1537 double m = t.max();
max@1	1538
Chris@56	1539 bestval(span::all,1) = bestval(span::all,1) / m;
Chris@56	1540 bestval(span::all,0) = bestval(span::all,0) + bestval(span::all,1);
max@1	1541
Chris@56	1542 mat bestval2;
Chris@37	1543 for (int i=0; i < (int)bestval.n_cols; ++i)
max@1	1544 if (i!=1)
max@1	1545 bestval2 = join_rows(bestval2,bestval.col(i));
max@1	1546
Chris@21	1547 for (int kSeg=0; kSeg<6; ++kSeg)
max@1	1548 {
Chris@56	1549 mat currbestvals = zeros<mat>(bestval2.n_rows, bestval2.n_cols);
Chris@37	1550 for (int i=0; i < (int)bestval2.n_rows; ++i)
Chris@37	1551 for (int j=0; j < (int)bestval2.n_cols; ++j)
max@1	1552 if (valid_sets(i))
max@1	1553 currbestvals(i,j) = bestval2(i,j);
max@1	1554
Chris@56	1555 vec t1 = currbestvals.col(0);
max@1	1556 double ma;
Chris@56	1557 uword maIdx;
max@1	1558 ma = t1.max(maIdx);
max@6	1559
max@6	1560 if ((maIdx == 0)&&(ma == 0))
max@6	1561 break;
max@1	1562
Chris@28	1563 int bestLength = lrint(partlengths(currbestvals(maIdx,1)));
Chris@56	1564 rowvec bestIndices = decisionArray2.slice(currbestvals(maIdx,1))(currbestvals(maIdx,2), span::all);
max@1	1565
Chris@56	1566 rowvec bestIndicesMap = zeros<rowvec>(bestIndices.size());
Chris@37	1567 for (int i=0; i < (int)bestIndices.size(); ++i)
max@1	1568 if (bestIndices(i)>0)
max@1	1569 bestIndicesMap(i) = 1;
max@1	1570
Chris@56	1571 rowvec mask = zeros<rowvec>(bestLength*2-1);
Chris@21	1572 for (int i=0; i<bestLength; ++i)
max@1	1573 mask(i+bestLength-1) = 1;
max@1	1574
Chris@56	1575 rowvec t2 = conv(bestIndicesMap,mask);
Chris@56	1576 rowvec island = t2.subvec(mask.size()/2,t2.size()-1-mask.size()/2);
max@1	1577
max@1	1578 // Save results in the structure
max@1	1579 Part newPart;
max@1	1580 newPart.n = bestLength;
Chris@56	1581 uvec q1 = find(bestIndices > 0);
max@1	1582
Chris@37	1583 for (int i=0; i < (int)q1.size();++i)
max@1	1584 newPart.indices.push_back(q1(i));
max@1	1585
max@1	1586 newPart.letter = partletters[kSeg];
max@1	1587 newPart.value = partvalues[kSeg];
max@1	1588 newPart.level = kSeg+1;
max@1	1589 parts.push_back(newPart);
max@1	1590
Chris@56	1591 uvec q2 = find(valid_sets==1);
max@1	1592
Chris@37	1593 for (int i=0; i < (int)q2.size(); ++i)
max@1	1594 {
Chris@21	1595 int iSet = q2(i);
Chris@21	1596 int s = partlengths(bestval2(iSet,1));
max@1	1597
Chris@56	1598 rowvec mask1 = zeros<rowvec>(s*2-1);
Chris@21	1599 for (int i=0; i<s; ++i)
max@1	1600 mask1(i+s-1) = 1;
max@1	1601
Chris@56	1602 rowvec Ind = decisionArray2.slice(bestval2(iSet,1))(bestval2(iSet,2), span::all);
Chris@56	1603 rowvec IndMap = zeros<rowvec>(Ind.size());
Chris@37	1604 for (int i=0; i < (int)Ind.size(); ++i)
max@1	1605 if (Ind(i)>0)
max@1	1606 IndMap(i) = 2;
max@1	1607
Chris@56	1608 rowvec t3 = conv(IndMap,mask1);
Chris@56	1609 rowvec currislands = t3.subvec(mask1.size()/2,t3.size()-1-mask1.size()/2);
Chris@56	1610 rowvec islandsdMult = currislands%island;
max@6	1611
Chris@56	1612 uvec islandsIndex = find(islandsdMult > 0);
max@1	1613
max@6	1614 if (islandsIndex.size() > 0)
max@1	1615 valid_sets(iSet) = 0;
max@1	1616 }
max@1	1617 }
max@1	1618 }
max@1	1619 else
max@1	1620 {
max@1	1621 Part newPart;
max@1	1622 newPart.n = nBeat;
Chris@33	1623 newPart.indices.push_back(0);
max@1	1624 newPart.letter = 'A';
max@1	1625 newPart.value = 1;
max@1	1626 newPart.level = 1;
max@1	1627 parts.push_back(newPart);
max@1	1628 }
max@6	1629
Chris@56	1630 vec bar = linspace(1,nBeat,nBeat);
max@1	1631 Part np = nullpart(parts,bar);
max@7	1632
max@1	1633 parts.push_back(np);
max@1	1634
max@1	1635 // -------------- NOT CONVERTED -------------------------------------
max@1	1636 // if param.seg.editor
max@1	1637 // [pa, ta] = partarray(parts);
max@1	1638 // parts = editorssearch(pa, ta, parts);
max@1	1639 // parts = [parts, nullpart(parts,1:nBeat)];
max@1	1640 // end
max@1	1641 // ------------------------------------------------------------------
max@1	1642
max@1	1643
max@1	1644 mergenulls(parts);
max@1	1645
max@1	1646
max@1	1647 // -------------- NOT CONVERTED -------------------------------------
max@1	1648 // if param.seg.editor
max@1	1649 // [pa, ta] = partarray(parts);
max@1	1650 // parts = editorssearch(pa, ta, parts);
max@1	1651 // parts = [parts, nullpart(parts,1:nBeat)];
max@1	1652 // end
max@1	1653 // ------------------------------------------------------------------
max@1	1654
max@1	1655 return parts;
max@1	1656 }
max@1	1657
max@1	1658
max@1	1659
Chris@19	1660 void songSegmentChroma(Vamp::Plugin::FeatureList quantisedChromagram, vector<Part> &parts)
max@1	1661 {
max@1	1662 // Collect Info
Chris@19	1663 int nBeat = quantisedChromagram.size(); // Number of feature vector
Chris@19	1664 int nFeatValues = quantisedChromagram[0].values.size(); // Number of values for each feature vector
max@1	1665
Chris@56	1666 mat synchTreble = zeros<mat>(nBeat,nFeatValues/2);
max@1	1667
Chris@21	1668 for (int i = 0; i < nBeat; ++ i)
Chris@21	1669 for (int j = 0; j < nFeatValues/2; ++ j)
max@1	1670 {
Chris@19	1671 synchTreble(i,j) = quantisedChromagram[i].values[j];
max@1	1672 }
max@1	1673
Chris@56	1674 mat synchBass = zeros<mat>(nBeat,nFeatValues/2);
max@1	1675
Chris@21	1676 for (int i = 0; i < nBeat; ++ i)
Chris@21	1677 for (int j = 0; j < nFeatValues/2; ++ j)
max@1	1678 {
Chris@19	1679 synchBass(i,j) = quantisedChromagram[i].values[j+12];
max@1	1680 }
max@1	1681
max@1	1682 // Process
max@1	1683
Chris@56	1684 mat segTreble = zeros<mat>(quantisedChromagram.size(),quantisedChromagram[0].values.size()/2);
Chris@56	1685 mat segBass = zeros<mat>(quantisedChromagram.size(),quantisedChromagram[0].values.size()/2);
max@1	1686
Chris@37	1687 for (int iPart=0; iPart < (int)parts.size(); ++iPart)
max@1	1688 {
max@1	1689 parts[iPart].nInd = parts[iPart].indices.size();
max@1	1690
Chris@21	1691 for (int kOccur=0; kOccur<parts[iPart].nInd; ++kOccur)
max@1	1692 {
max@1	1693 int kStartIndex = parts[iPart].indices[kOccur];
max@1	1694 int kEndIndex = kStartIndex + parts[iPart].n-1;
max@1	1695
max@1	1696 segTreble.rows(kStartIndex,kEndIndex) = segTreble.rows(kStartIndex,kEndIndex) + synchTreble.rows(kStartIndex,kEndIndex);
max@1	1697 segBass.rows(kStartIndex,kEndIndex) = segBass.rows(kStartIndex,kEndIndex) + synchBass.rows(kStartIndex,kEndIndex);
max@1	1698 }
max@1	1699 }
max@1	1700 }
max@1	1701
max@1	1702
max@1	1703 // Segment Integration
max@1	1704 vector<Part> songSegmentIntegration(vector<Part> &parts)
max@1	1705 {
max@1	1706 // Break up parts (every part will have one instance)
max@1	1707 vector<Part> newPartVector;
max@1	1708 vector<int> partindices;
max@1	1709
Chris@37	1710 for (int iPart=0; iPart < (int)parts.size(); ++iPart)
max@1	1711 {
max@1	1712 parts[iPart].nInd = parts[iPart].indices.size();
Chris@21	1713 for (int iInstance=0; iInstance<parts[iPart].nInd; ++iInstance)
max@1	1714 {
max@1	1715 Part newPart;
max@1	1716 newPart.n = parts[iPart].n;
max@1	1717 newPart.letter = parts[iPart].letter;
max@1	1718 newPart.value = parts[iPart].value;
max@1	1719 newPart.level = parts[iPart].level;
max@1	1720 newPart.indices.push_back(parts[iPart].indices[iInstance]);
max@1	1721 newPart.nInd = 1;
max@1	1722 partindices.push_back(parts[iPart].indices[iInstance]);
max@1	1723
max@1	1724 newPartVector.push_back(newPart);
max@1	1725 }
max@1	1726 }
max@1	1727
max@1	1728
max@1	1729 // Sort the parts in order of occurrence
max@1	1730 sort (partindices.begin(), partindices.end());
max@1	1731
Chris@37	1732 for (int i=0; i < (int)partindices.size(); ++i)
max@1	1733 {
max@1	1734 bool found = false;
max@1	1735 int in=0;
max@1	1736 while (!found)
max@1	1737 {
max@1	1738 if (newPartVector[in].indices[0] == partindices[i])
max@1	1739 {
max@1	1740 newPartVector.push_back(newPartVector[in]);
max@1	1741 newPartVector.erase(newPartVector.begin()+in);
max@1	1742 found = true;
max@1	1743 }
max@1	1744 else
max@1	1745 in++;
max@1	1746 }
max@1	1747 }
max@1	1748
max@1	1749 // Clear the vector
Chris@37	1750 for (int iNewpart=1; iNewpart < (int)newPartVector.size(); ++iNewpart)
max@1	1751 {
max@1	1752 if (newPartVector[iNewpart].n < 12)
max@1	1753 {
max@1	1754 newPartVector[iNewpart-1].n = newPartVector[iNewpart-1].n + newPartVector[iNewpart].n;
max@1	1755 newPartVector.erase(newPartVector.begin()+iNewpart);
max@1	1756 }
max@1	1757 }
max@1	1758
max@1	1759 return newPartVector;
max@1	1760 }
max@1	1761
max@1	1762 // Segmenter
Chris@48	1763 Vamp::Plugin::FeatureList Segmentino::runSegmenter(Vamp::Plugin::FeatureList quantisedChromagram)
max@1	1764 {
max@1	1765 /* --- Display Information --- */
Chris@37	1766 // int numBeat = quantisedChromagram.size();
Chris@37	1767 // int numFeats = quantisedChromagram[0].values.size();
max@1	1768
max@1	1769 vector<Part> parts;
max@1	1770 vector<Part> finalParts;
max@1	1771
Chris@19	1772 parts = songSegment(quantisedChromagram);
Chris@19	1773 songSegmentChroma(quantisedChromagram,parts);
max@7	1774
max@1	1775 finalParts = songSegmentIntegration(parts);
max@1	1776
max@1	1777
max@1	1778 // TEMP ----
Chris@21	1779 /*for (int i=0;i<finalParts.size(); ++i)
max@1	1780 {
max@6	1781 std::cout << "Parts n° " << i << std::endl;
max@6	1782 std::cout << "n°: " << finalParts[i].n << std::endl;
max@6	1783 std::cout << "letter: " << finalParts[i].letter << std::endl;
max@1	1784
max@6	1785 std::cout << "indices: ";
Chris@21	1786 for (int j=0;j<finalParts[i].indices.size(); ++j)
max@6	1787 std::cout << finalParts[i].indices[j] << " ";
max@6	1788
max@6	1789 std::cout << std::endl;
max@6	1790 std::cout << "level: " << finalParts[i].level << std::endl;
max@1	1791 }*/
max@1	1792
max@1	1793 // ---------
max@1	1794
max@1	1795
max@1	1796 // Output
max@1	1797
max@1	1798 Vamp::Plugin::FeatureList results;
max@1	1799
max@1	1800
max@1	1801 Feature seg;
max@1	1802
Chris@56	1803 vec indices;
Chris@37	1804 // int idx=0;
max@1	1805 vector<int> values;
max@1	1806 vector<string> letters;
max@1	1807
Chris@37	1808 for (int iPart=0; iPart < (int)finalParts.size()-1; ++iPart)
max@1	1809 {
Chris@21	1810 int iInstance=0;
max@1	1811 seg.hasTimestamp = true;
max@1	1812
max@1	1813 int ind = finalParts[iPart].indices[iInstance];
max@1	1814 int ind1 = finalParts[iPart+1].indices[iInstance];
max@1	1815
Chris@19	1816 seg.timestamp = quantisedChromagram[ind].timestamp;
max@1	1817 seg.hasDuration = true;
Chris@19	1818 seg.duration = quantisedChromagram[ind1].timestamp-quantisedChromagram[ind].timestamp;
max@1	1819 seg.values.clear();
max@1	1820 seg.values.push_back(finalParts[iPart].value);
max@1	1821 seg.label = finalParts[iPart].letter;
max@1	1822
max@1	1823 results.push_back(seg);
max@1	1824 }
max@1	1825
Chris@37	1826 if (finalParts.size() > 0) {
Chris@37	1827 int ind = finalParts[finalParts.size()-1].indices[0];
Chris@37	1828 seg.hasTimestamp = true;
Chris@37	1829 seg.timestamp = quantisedChromagram[ind].timestamp;
Chris@37	1830 seg.hasDuration = true;
Chris@37	1831 seg.duration = quantisedChromagram[quantisedChromagram.size()-1].timestamp-quantisedChromagram[ind].timestamp;
Chris@37	1832 seg.values.clear();
Chris@37	1833 seg.values.push_back(finalParts[finalParts.size()-1].value);
Chris@37	1834 seg.label = finalParts[finalParts.size()-1].letter;
max@1	1835
Chris@37	1836 results.push_back(seg);
Chris@37	1837 }
max@1	1838
max@1	1839 return results;
max@1	1840 }
max@1	1841
max@1	1842
max@1	1843
max@1	1844
max@1	1845
max@1	1846
max@1	1847
max@1	1848
max@1	1849
max@1	1850
max@1	1851
max@1	1852
max@1	1853
max@1	1854
max@1	1855
max@1	1856
max@1	1857

Mercurial > hg > segmenter-vamp-plugin

annotate segmentino/Segmentino.cpp @ 84:55a047986812 tip