max@1
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
max@1
|
2
|
max@1
|
3 /*
|
Chris@48
|
4 Segmentino
|
max@1
|
5
|
Chris@48
|
6 Code by Massimiliano Zanoni and Matthias Mauch
|
Chris@48
|
7 Centre for Digital Music, Queen Mary, University of London
|
Chris@48
|
8
|
Chris@48
|
9 Copyright 2009-2013 Queen Mary, University of London.
|
max@1
|
10
|
max@1
|
11 This program is free software; you can redistribute it and/or
|
max@1
|
12 modify it under the terms of the GNU General Public License as
|
max@1
|
13 published by the Free Software Foundation; either version 2 of the
|
max@1
|
14 License, or (at your option) any later version. See the file
|
max@1
|
15 COPYING included with this distribution for more information.
|
max@1
|
16 */
|
max@1
|
17
|
Chris@48
|
18 #include "Segmentino.h"
|
max@1
|
19
|
Chris@49
|
20 #include <qm-dsp/base/Window.h>
|
Chris@49
|
21 #include <qm-dsp/dsp/onsets/DetectionFunction.h>
|
Chris@49
|
22 #include <qm-dsp/dsp/onsets/PeakPicking.h>
|
Chris@49
|
23 #include <qm-dsp/dsp/transforms/FFT.h>
|
Chris@49
|
24 #include <qm-dsp/dsp/tempotracking/TempoTrackV2.h>
|
Chris@49
|
25 #include <qm-dsp/dsp/tempotracking/DownBeat.h>
|
Chris@49
|
26 #include <qm-dsp/maths/MathUtilities.h>
|
Chris@49
|
27
|
Chris@49
|
28 #include <nnls-chroma/chromamethods.h>
|
Chris@49
|
29
|
max@1
|
30 #include <boost/numeric/ublas/matrix.hpp>
|
max@1
|
31 #include <boost/numeric/ublas/io.hpp>
|
max@1
|
32 #include <boost/math/distributions/normal.hpp>
|
Chris@49
|
33
|
Chris@49
|
34 #include <armadillo>
|
Chris@49
|
35
|
max@1
|
36 #include <fstream>
|
max@1
|
37 #include <sstream>
|
max@1
|
38 #include <cmath>
|
max@1
|
39 #include <vector>
|
max@1
|
40
|
max@1
|
41 #include <vamp-sdk/Plugin.h>
|
max@1
|
42
|
max@1
|
43 using namespace boost::numeric;
|
max@1
|
44 using namespace arma;
|
max@1
|
45 using std::string;
|
max@1
|
46 using std::vector;
|
max@1
|
47 using std::cerr;
|
max@1
|
48 using std::cout;
|
max@1
|
49 using std::endl;
|
max@1
|
50
|
max@1
|
51
|
max@1
|
52 #ifndef __GNUC__
|
max@1
|
53 #include <alloca.h>
|
max@1
|
54 #endif
|
max@1
|
55
|
max@1
|
56
|
max@1
|
57 // Result Struct
|
max@1
|
58 typedef struct Part {
|
max@1
|
59 int n;
|
Chris@21
|
60 vector<int> indices;
|
max@1
|
61 string letter;
|
Chris@21
|
62 int value;
|
max@1
|
63 int level;
|
max@1
|
64 int nInd;
|
max@1
|
65 }Part;
|
max@1
|
66
|
max@1
|
67
|
max@8
|
68
|
max@1
|
69 /* ------------------------------------ */
|
max@1
|
70 /* ----- BEAT DETECTOR CLASS ---------- */
|
max@1
|
71 /* ------------------------------------ */
|
max@1
|
72
|
max@1
|
73 class BeatTrackerData
|
max@1
|
74 {
|
max@1
|
75 /* --- ATTRIBUTES --- */
|
max@1
|
76 public:
|
max@1
|
77 DFConfig dfConfig;
|
max@1
|
78 DetectionFunction *df;
|
max@1
|
79 DownBeat *downBeat;
|
max@1
|
80 vector<double> dfOutput;
|
max@1
|
81 Vamp::RealTime origin;
|
max@1
|
82
|
max@1
|
83
|
max@1
|
84 /* --- METHODS --- */
|
max@1
|
85
|
max@1
|
86 /* --- Constructor --- */
|
max@1
|
87 public:
|
max@1
|
88 BeatTrackerData(float rate, const DFConfig &config) : dfConfig(config) {
|
Chris@22
|
89
|
max@1
|
90 df = new DetectionFunction(config);
|
max@1
|
91 // decimation factor aims at resampling to c. 3KHz; must be power of 2
|
max@1
|
92 int factor = MathUtilities::nextPowerOfTwo(rate / 3000);
|
max@1
|
93 // std::cerr << "BeatTrackerData: factor = " << factor << std::endl;
|
max@1
|
94 downBeat = new DownBeat(rate, factor, config.stepSize);
|
max@1
|
95 }
|
max@1
|
96
|
max@1
|
97 /* --- Desctructor --- */
|
max@1
|
98 ~BeatTrackerData() {
|
Chris@22
|
99 delete df;
|
max@1
|
100 delete downBeat;
|
max@1
|
101 }
|
max@1
|
102
|
max@1
|
103 void reset() {
|
max@1
|
104 delete df;
|
max@1
|
105 df = new DetectionFunction(dfConfig);
|
max@1
|
106 dfOutput.clear();
|
max@1
|
107 downBeat->resetAudioBuffer();
|
max@1
|
108 origin = Vamp::RealTime::zeroTime;
|
max@1
|
109 }
|
max@1
|
110 };
|
max@1
|
111
|
max@1
|
112
|
max@1
|
113 /* --------------------------------------- */
|
max@1
|
114 /* ----- CHROMA EXTRACTOR CLASS ---------- */
|
max@1
|
115 /* --------------------------------------- */
|
max@1
|
116
|
max@1
|
117 class ChromaData
|
max@1
|
118 {
|
max@1
|
119
|
max@1
|
120 /* --- ATTRIBUTES --- */
|
max@1
|
121
|
max@1
|
122 public:
|
max@1
|
123 int frameCount;
|
max@1
|
124 int nBPS;
|
max@1
|
125 Vamp::Plugin::FeatureList logSpectrum;
|
Chris@37
|
126 int blockSize;
|
max@1
|
127 int lengthOfNoteIndex;
|
max@1
|
128 vector<float> meanTunings;
|
max@1
|
129 vector<float> localTunings;
|
max@1
|
130 float whitening;
|
max@1
|
131 float preset;
|
max@1
|
132 float useNNLS;
|
max@1
|
133 vector<float> localTuning;
|
max@1
|
134 vector<float> kernelValue;
|
max@1
|
135 vector<int> kernelFftIndex;
|
max@1
|
136 vector<int> kernelNoteIndex;
|
max@1
|
137 float *dict;
|
max@1
|
138 bool tuneLocal;
|
max@1
|
139 float doNormalizeChroma;
|
max@1
|
140 float rollon;
|
max@1
|
141 float s;
|
max@1
|
142 vector<float> hw;
|
max@1
|
143 vector<float> sinvalues;
|
max@1
|
144 vector<float> cosvalues;
|
max@1
|
145 Window<float> window;
|
max@1
|
146 FFTReal fft;
|
Chris@37
|
147 int inputSampleRate;
|
max@1
|
148
|
max@1
|
149 /* --- METHODS --- */
|
max@1
|
150
|
max@1
|
151 /* --- Constructor --- */
|
max@1
|
152
|
max@1
|
153 public:
|
max@1
|
154 ChromaData(float inputSampleRate, size_t block_size) :
|
max@1
|
155 frameCount(0),
|
max@1
|
156 nBPS(3),
|
max@1
|
157 logSpectrum(0),
|
max@1
|
158 blockSize(0),
|
max@1
|
159 lengthOfNoteIndex(0),
|
max@1
|
160 meanTunings(0),
|
max@1
|
161 localTunings(0),
|
max@1
|
162 whitening(1.0),
|
max@1
|
163 preset(0.0),
|
max@1
|
164 useNNLS(1.0),
|
max@1
|
165 localTuning(0.0),
|
max@1
|
166 kernelValue(0),
|
max@1
|
167 kernelFftIndex(0),
|
max@1
|
168 kernelNoteIndex(0),
|
max@1
|
169 dict(0),
|
max@1
|
170 tuneLocal(0.0),
|
max@1
|
171 doNormalizeChroma(0),
|
max@1
|
172 rollon(0.0),
|
Chris@35
|
173 s(0.7),
|
Chris@35
|
174 sinvalues(0),
|
Chris@35
|
175 cosvalues(0),
|
Chris@35
|
176 window(HanningWindow, block_size),
|
Chris@35
|
177 fft(block_size),
|
Chris@35
|
178 inputSampleRate(inputSampleRate)
|
max@1
|
179 {
|
max@1
|
180 // make the *note* dictionary matrix
|
max@1
|
181 dict = new float[nNote * 84];
|
max@1
|
182 for (int i = 0; i < nNote * 84; ++i) dict[i] = 0.0;
|
max@1
|
183 blockSize = block_size;
|
max@1
|
184 }
|
max@1
|
185
|
max@1
|
186
|
max@1
|
187 /* --- Desctructor --- */
|
max@1
|
188
|
max@1
|
189 ~ChromaData() {
|
max@1
|
190 delete [] dict;
|
max@1
|
191 }
|
max@1
|
192
|
max@1
|
193 /* --- Public Methods --- */
|
max@1
|
194
|
max@1
|
195 void reset() {
|
max@1
|
196 frameCount = 0;
|
max@1
|
197 logSpectrum.clear();
|
max@1
|
198 for (int iBPS = 0; iBPS < 3; ++iBPS) {
|
max@1
|
199 meanTunings[iBPS] = 0;
|
max@1
|
200 localTunings[iBPS] = 0;
|
max@1
|
201 }
|
max@1
|
202 localTuning.clear();
|
max@1
|
203 }
|
max@1
|
204
|
max@1
|
205 void baseProcess(float *inputBuffers, Vamp::RealTime timestamp)
|
max@1
|
206 {
|
Chris@22
|
207
|
max@1
|
208 frameCount++;
|
max@1
|
209 float *magnitude = new float[blockSize/2];
|
max@1
|
210 double *fftReal = new double[blockSize];
|
max@1
|
211 double *fftImag = new double[blockSize];
|
max@1
|
212
|
max@1
|
213 // FFTReal wants doubles, so we need to make a local copy of inputBuffers
|
max@1
|
214 double *inputBuffersDouble = new double[blockSize];
|
Chris@37
|
215 for (int i = 0; i < blockSize; i++) inputBuffersDouble[i] = inputBuffers[i];
|
max@1
|
216
|
max@1
|
217 fft.process(false, inputBuffersDouble, fftReal, fftImag);
|
max@1
|
218
|
max@1
|
219 float energysum = 0;
|
max@1
|
220 // make magnitude
|
max@1
|
221 float maxmag = -10000;
|
max@1
|
222 for (int iBin = 0; iBin < static_cast<int>(blockSize/2); iBin++) {
|
max@1
|
223 magnitude[iBin] = sqrt(fftReal[iBin] * fftReal[iBin] +
|
max@1
|
224 fftImag[iBin] * fftImag[iBin]);
|
max@1
|
225 if (magnitude[iBin]>blockSize*1.0) magnitude[iBin] = blockSize;
|
max@1
|
226 // a valid audio signal (between -1 and 1) should not be limited here.
|
max@1
|
227 if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin];
|
max@1
|
228 if (rollon > 0) {
|
max@1
|
229 energysum += pow(magnitude[iBin],2);
|
max@1
|
230 }
|
max@1
|
231 }
|
max@1
|
232
|
max@1
|
233 float cumenergy = 0;
|
max@1
|
234 if (rollon > 0) {
|
max@1
|
235 for (int iBin = 2; iBin < static_cast<int>(blockSize/2); iBin++) {
|
max@1
|
236 cumenergy += pow(magnitude[iBin],2);
|
max@1
|
237 if (cumenergy < energysum * rollon / 100) magnitude[iBin-2] = 0;
|
max@1
|
238 else break;
|
max@1
|
239 }
|
max@1
|
240 }
|
max@1
|
241
|
max@1
|
242 if (maxmag < 2) {
|
max@1
|
243 // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl;
|
max@1
|
244 for (int iBin = 0; iBin < static_cast<int>(blockSize/2); iBin++) {
|
max@1
|
245 magnitude[iBin] = 0;
|
max@1
|
246 }
|
max@1
|
247 }
|
max@1
|
248
|
max@1
|
249 // cerr << magnitude[200] << endl;
|
max@1
|
250
|
max@1
|
251 // note magnitude mapping using pre-calculated matrix
|
max@1
|
252 float *nm = new float[nNote]; // note magnitude
|
max@1
|
253 for (int iNote = 0; iNote < nNote; iNote++) {
|
max@1
|
254 nm[iNote] = 0; // initialise as 0
|
max@1
|
255 }
|
max@1
|
256 int binCount = 0;
|
max@1
|
257 for (vector<float>::iterator it = kernelValue.begin(); it != kernelValue.end(); ++it) {
|
max@1
|
258 nm[kernelNoteIndex[binCount]] += magnitude[kernelFftIndex[binCount]] * kernelValue[binCount];
|
max@1
|
259 binCount++;
|
max@1
|
260 }
|
max@1
|
261
|
max@1
|
262 float one_over_N = 1.0/frameCount;
|
max@1
|
263 // update means of complex tuning variables
|
max@1
|
264 for (int iBPS = 0; iBPS < nBPS; ++iBPS) meanTunings[iBPS] *= float(frameCount-1)*one_over_N;
|
max@1
|
265
|
max@1
|
266 for (int iTone = 0; iTone < round(nNote*0.62/nBPS)*nBPS+1; iTone = iTone + nBPS) {
|
max@1
|
267 for (int iBPS = 0; iBPS < nBPS; ++iBPS) meanTunings[iBPS] += nm[iTone + iBPS]*one_over_N;
|
max@1
|
268 float ratioOld = 0.997;
|
max@1
|
269 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
max@1
|
270 localTunings[iBPS] *= ratioOld;
|
max@1
|
271 localTunings[iBPS] += nm[iTone + iBPS] * (1 - ratioOld);
|
max@1
|
272 }
|
max@1
|
273 }
|
max@1
|
274
|
max@1
|
275 float localTuningImag = 0;
|
max@1
|
276 float localTuningReal = 0;
|
max@1
|
277 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
max@1
|
278 localTuningReal += localTunings[iBPS] * cosvalues[iBPS];
|
max@1
|
279 localTuningImag += localTunings[iBPS] * sinvalues[iBPS];
|
max@1
|
280 }
|
max@1
|
281
|
max@1
|
282 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
|
max@1
|
283 localTuning.push_back(normalisedtuning);
|
max@1
|
284
|
max@1
|
285 Vamp::Plugin::Feature f1; // logfreqspec
|
max@1
|
286 f1.hasTimestamp = true;
|
max@1
|
287 f1.timestamp = timestamp;
|
max@1
|
288 for (int iNote = 0; iNote < nNote; iNote++) {
|
max@1
|
289 f1.values.push_back(nm[iNote]);
|
max@1
|
290 }
|
max@1
|
291
|
max@1
|
292 // deletes
|
max@1
|
293 delete[] inputBuffersDouble;
|
max@1
|
294 delete[] magnitude;
|
max@1
|
295 delete[] fftReal;
|
max@1
|
296 delete[] fftImag;
|
max@1
|
297 delete[] nm;
|
max@1
|
298
|
max@1
|
299 logSpectrum.push_back(f1); // remember note magnitude
|
max@1
|
300 }
|
max@1
|
301
|
max@1
|
302 bool initialise()
|
max@1
|
303 {
|
max@1
|
304 dictionaryMatrix(dict, s);
|
Chris@22
|
305
|
Chris@37
|
306 // make things for tuning estimation
|
Chris@37
|
307 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
max@1
|
308 sinvalues.push_back(sin(2*M_PI*(iBPS*1.0/nBPS)));
|
max@1
|
309 cosvalues.push_back(cos(2*M_PI*(iBPS*1.0/nBPS)));
|
max@1
|
310 }
|
max@1
|
311
|
Chris@22
|
312
|
Chris@37
|
313 // make hamming window of length 1/2 octave
|
Chris@37
|
314 int hamwinlength = nBPS * 6 + 1;
|
max@1
|
315 float hamwinsum = 0;
|
max@1
|
316 for (int i = 0; i < hamwinlength; ++i) {
|
max@1
|
317 hw.push_back(0.54 - 0.46 * cos((2*M_PI*i)/(hamwinlength-1)));
|
max@1
|
318 hamwinsum += 0.54 - 0.46 * cos((2*M_PI*i)/(hamwinlength-1));
|
max@1
|
319 }
|
max@1
|
320 for (int i = 0; i < hamwinlength; ++i) hw[i] = hw[i] / hamwinsum;
|
max@1
|
321
|
max@1
|
322
|
max@1
|
323 // initialise the tuning
|
max@1
|
324 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
max@1
|
325 meanTunings.push_back(0);
|
max@1
|
326 localTunings.push_back(0);
|
max@1
|
327 }
|
Chris@22
|
328
|
max@1
|
329 blockSize = blockSize;
|
max@1
|
330 frameCount = 0;
|
max@1
|
331 int tempn = nNote * blockSize/2;
|
max@1
|
332 // cerr << "length of tempkernel : " << tempn << endl;
|
max@1
|
333 float *tempkernel;
|
max@1
|
334
|
max@1
|
335 tempkernel = new float[tempn];
|
max@1
|
336
|
max@1
|
337 logFreqMatrix(inputSampleRate, blockSize, tempkernel);
|
max@1
|
338 kernelValue.clear();
|
max@1
|
339 kernelFftIndex.clear();
|
max@1
|
340 kernelNoteIndex.clear();
|
max@1
|
341 int countNonzero = 0;
|
max@1
|
342 for (int iNote = 0; iNote < nNote; ++iNote) {
|
max@1
|
343 // I don't know if this is wise: manually making a sparse matrix
|
max@1
|
344 for (int iFFT = 0; iFFT < static_cast<int>(blockSize/2); ++iFFT) {
|
max@1
|
345 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
max@1
|
346 kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
|
max@1
|
347 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
max@1
|
348 countNonzero++;
|
max@1
|
349 }
|
max@1
|
350 kernelFftIndex.push_back(iFFT);
|
Chris@23
|
351 kernelNoteIndex.push_back(iNote);
|
max@1
|
352 }
|
max@1
|
353 }
|
max@1
|
354 }
|
max@1
|
355 delete [] tempkernel;
|
Chris@37
|
356
|
Chris@37
|
357 return true;
|
max@1
|
358 }
|
max@1
|
359 };
|
max@1
|
360
|
max@1
|
361
|
max@1
|
362 /* --------------------------------- */
|
max@1
|
363 /* ----- SONG PARTITIONER ---------- */
|
max@1
|
364 /* --------------------------------- */
|
max@1
|
365
|
max@1
|
366
|
max@1
|
367 /* --- ATTRIBUTES --- */
|
max@1
|
368
|
Chris@48
|
369 float Segmentino::m_stepSecs = 0.01161; // 512 samples at 44100
|
Chris@48
|
370 int Segmentino::m_chromaFramesizeFactor = 16; // 16 times as long as beat tracker's
|
Chris@48
|
371 int Segmentino::m_chromaStepsizeFactor = 4; // 4 times as long as beat tracker's
|
max@1
|
372
|
max@1
|
373
|
max@1
|
374 /* --- METHODS --- */
|
max@1
|
375
|
max@1
|
376 /* --- Constructor --- */
|
Chris@48
|
377 Segmentino::Segmentino(float inputSampleRate) :
|
max@1
|
378 Vamp::Plugin(inputSampleRate),
|
max@1
|
379 m_d(0),
|
Chris@35
|
380 m_chromadata(0),
|
max@1
|
381 m_bpb(4),
|
max@1
|
382 m_pluginFrameCount(0)
|
max@1
|
383 {
|
max@1
|
384 }
|
max@1
|
385
|
max@1
|
386
|
max@1
|
387 /* --- Desctructor --- */
|
Chris@48
|
388 Segmentino::~Segmentino()
|
max@1
|
389 {
|
max@1
|
390 delete m_d;
|
Chris@35
|
391 delete m_chromadata;
|
max@1
|
392 }
|
max@1
|
393
|
max@1
|
394
|
max@1
|
395 /* --- Methods --- */
|
Chris@48
|
396 string Segmentino::getIdentifier() const
|
max@1
|
397 {
|
Chris@54
|
398 return "segmentino";
|
max@1
|
399 }
|
max@1
|
400
|
Chris@48
|
401 string Segmentino::getName() const
|
max@1
|
402 {
|
Chris@54
|
403 return "Segmentino";
|
max@1
|
404 }
|
max@1
|
405
|
Chris@48
|
406 string Segmentino::getDescription() const
|
max@1
|
407 {
|
max@1
|
408 return "Estimate contiguous segments pertaining to song parts such as verse and chorus.";
|
max@1
|
409 }
|
max@1
|
410
|
Chris@48
|
411 string Segmentino::getMaker() const
|
max@1
|
412 {
|
max@1
|
413 return "Queen Mary, University of London";
|
max@1
|
414 }
|
max@1
|
415
|
Chris@48
|
416 int Segmentino::getPluginVersion() const
|
max@1
|
417 {
|
max@1
|
418 return 2;
|
max@1
|
419 }
|
max@1
|
420
|
Chris@48
|
421 string Segmentino::getCopyright() const
|
max@1
|
422 {
|
max@1
|
423 return "Plugin by Matthew Davies, Christian Landone, Chris Cannam, Matthias Mauch and Massimiliano Zanoni Copyright (c) 2006-2012 QMUL - All Rights Reserved";
|
max@1
|
424 }
|
max@1
|
425
|
Chris@48
|
426 Segmentino::ParameterList Segmentino::getParameterDescriptors() const
|
max@1
|
427 {
|
max@1
|
428 ParameterList list;
|
max@1
|
429
|
max@1
|
430 ParameterDescriptor desc;
|
max@1
|
431
|
matthiasm@46
|
432 // desc.identifier = "bpb";
|
matthiasm@46
|
433 // desc.name = "Beats per Bar";
|
matthiasm@46
|
434 // desc.description = "The number of beats in each bar";
|
matthiasm@46
|
435 // desc.minValue = 2;
|
matthiasm@46
|
436 // desc.maxValue = 16;
|
matthiasm@46
|
437 // desc.defaultValue = 4;
|
matthiasm@46
|
438 // desc.isQuantized = true;
|
matthiasm@46
|
439 // desc.quantizeStep = 1;
|
matthiasm@46
|
440 // list.push_back(desc);
|
max@1
|
441
|
max@1
|
442 return list;
|
max@1
|
443 }
|
max@1
|
444
|
Chris@48
|
445 float Segmentino::getParameter(std::string name) const
|
max@1
|
446 {
|
max@1
|
447 if (name == "bpb") return m_bpb;
|
max@1
|
448 return 0.0;
|
max@1
|
449 }
|
max@1
|
450
|
Chris@48
|
451 void Segmentino::setParameter(std::string name, float value)
|
max@1
|
452 {
|
max@1
|
453 if (name == "bpb") m_bpb = lrintf(value);
|
max@1
|
454 }
|
max@1
|
455
|
max@1
|
456
|
max@1
|
457 // Return the StepSize for Chroma Extractor
|
Chris@48
|
458 size_t Segmentino::getPreferredStepSize() const
|
max@1
|
459 {
|
max@1
|
460 size_t step = size_t(m_inputSampleRate * m_stepSecs + 0.0001);
|
max@1
|
461 if (step < 1) step = 1;
|
max@1
|
462
|
max@1
|
463 return step;
|
max@1
|
464 }
|
max@1
|
465
|
max@1
|
466 // Return the BlockSize for Chroma Extractor
|
Chris@48
|
467 size_t Segmentino::getPreferredBlockSize() const
|
max@1
|
468 {
|
Chris@50
|
469 int theoretical = getPreferredStepSize() * 2;
|
max@1
|
470 theoretical *= m_chromaFramesizeFactor;
|
Chris@50
|
471 return MathUtilities::nextPowerOfTwo(theoretical);
|
max@1
|
472 }
|
max@1
|
473
|
max@1
|
474
|
max@1
|
475 // Initialize the plugin and define Beat Tracker and Chroma Extractor Objects
|
Chris@48
|
476 bool Segmentino::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
max@1
|
477 {
|
max@1
|
478 if (m_d) {
|
Chris@22
|
479 delete m_d;
|
Chris@22
|
480 m_d = 0;
|
max@1
|
481 }
|
Chris@35
|
482 if (m_chromadata) {
|
Chris@35
|
483 delete m_chromadata;
|
Chris@35
|
484 m_chromadata = 0;
|
Chris@35
|
485 }
|
max@1
|
486
|
max@1
|
487 if (channels < getMinChannelCount() ||
|
Chris@22
|
488 channels > getMaxChannelCount()) {
|
Chris@48
|
489 std::cerr << "Segmentino::initialise: Unsupported channel count: "
|
max@1
|
490 << channels << std::endl;
|
max@1
|
491 return false;
|
max@1
|
492 }
|
max@1
|
493
|
max@1
|
494 if (stepSize != getPreferredStepSize()) {
|
Chris@48
|
495 std::cerr << "ERROR: Segmentino::initialise: Unsupported step size for this sample rate: "
|
max@1
|
496 << stepSize << " (wanted " << (getPreferredStepSize()) << ")" << std::endl;
|
max@1
|
497 return false;
|
max@1
|
498 }
|
max@1
|
499
|
max@1
|
500 if (blockSize != getPreferredBlockSize()) {
|
Chris@48
|
501 std::cerr << "WARNING: Segmentino::initialise: Sub-optimal block size for this sample rate: "
|
max@1
|
502 << blockSize << " (wanted " << getPreferredBlockSize() << ")" << std::endl;
|
max@1
|
503 }
|
max@1
|
504
|
max@1
|
505 // Beat tracker and Chroma extractor has two different configuration parameters
|
max@1
|
506
|
max@1
|
507 // Configuration Parameters for Beat Tracker
|
max@1
|
508 DFConfig dfConfig;
|
max@1
|
509 dfConfig.DFType = DF_COMPLEXSD;
|
max@1
|
510 dfConfig.stepSize = stepSize;
|
max@1
|
511 dfConfig.frameLength = blockSize / m_chromaFramesizeFactor;
|
max@1
|
512 dfConfig.dbRise = 3;
|
max@1
|
513 dfConfig.adaptiveWhitening = false;
|
max@1
|
514 dfConfig.whiteningRelaxCoeff = -1;
|
max@1
|
515 dfConfig.whiteningFloor = -1;
|
max@1
|
516
|
max@1
|
517 // Initialise Beat Tracker
|
max@1
|
518 m_d = new BeatTrackerData(m_inputSampleRate, dfConfig);
|
max@1
|
519 m_d->downBeat->setBeatsPerBar(m_bpb);
|
max@1
|
520
|
max@1
|
521 // Initialise Chroma Extractor
|
max@1
|
522 m_chromadata = new ChromaData(m_inputSampleRate, blockSize);
|
max@1
|
523 m_chromadata->initialise();
|
max@1
|
524
|
max@1
|
525 return true;
|
max@1
|
526 }
|
max@1
|
527
|
Chris@48
|
528 void Segmentino::reset()
|
max@1
|
529 {
|
max@1
|
530 if (m_d) m_d->reset();
|
Chris@38
|
531 if (m_chromadata) m_chromadata->reset();
|
max@1
|
532 m_pluginFrameCount = 0;
|
max@1
|
533 }
|
max@1
|
534
|
Chris@48
|
535 Segmentino::OutputList Segmentino::getOutputDescriptors() const
|
max@1
|
536 {
|
max@1
|
537 OutputList list;
|
Chris@37
|
538 int outputCounter = 0;
|
max@1
|
539
|
max@1
|
540 OutputDescriptor beat;
|
max@1
|
541 beat.identifier = "beats";
|
max@1
|
542 beat.name = "Beats";
|
max@1
|
543 beat.description = "Beat locations labelled with metrical position";
|
max@1
|
544 beat.unit = "";
|
max@1
|
545 beat.hasFixedBinCount = true;
|
max@1
|
546 beat.binCount = 0;
|
max@1
|
547 beat.sampleType = OutputDescriptor::VariableSampleRate;
|
max@1
|
548 beat.sampleRate = 1.0 / m_stepSecs;
|
max@1
|
549 m_beatOutputNumber = outputCounter++;
|
matthiasm@51
|
550
|
max@1
|
551 OutputDescriptor bars;
|
max@1
|
552 bars.identifier = "bars";
|
max@1
|
553 bars.name = "Bars";
|
max@1
|
554 bars.description = "Bar locations";
|
max@1
|
555 bars.unit = "";
|
max@1
|
556 bars.hasFixedBinCount = true;
|
max@1
|
557 bars.binCount = 0;
|
max@1
|
558 bars.sampleType = OutputDescriptor::VariableSampleRate;
|
max@1
|
559 bars.sampleRate = 1.0 / m_stepSecs;
|
max@1
|
560 m_barsOutputNumber = outputCounter++;
|
matthiasm@51
|
561
|
max@1
|
562 OutputDescriptor beatcounts;
|
max@1
|
563 beatcounts.identifier = "beatcounts";
|
max@1
|
564 beatcounts.name = "Beat Count";
|
max@1
|
565 beatcounts.description = "Beat counter function";
|
max@1
|
566 beatcounts.unit = "";
|
max@1
|
567 beatcounts.hasFixedBinCount = true;
|
max@1
|
568 beatcounts.binCount = 1;
|
max@1
|
569 beatcounts.sampleType = OutputDescriptor::VariableSampleRate;
|
max@1
|
570 beatcounts.sampleRate = 1.0 / m_stepSecs;
|
max@1
|
571 m_beatcountsOutputNumber = outputCounter++;
|
matthiasm@51
|
572
|
max@1
|
573 OutputDescriptor beatsd;
|
max@1
|
574 beatsd.identifier = "beatsd";
|
max@1
|
575 beatsd.name = "Beat Spectral Difference";
|
max@1
|
576 beatsd.description = "Beat spectral difference function used for bar-line detection";
|
max@1
|
577 beatsd.unit = "";
|
max@1
|
578 beatsd.hasFixedBinCount = true;
|
max@1
|
579 beatsd.binCount = 1;
|
max@1
|
580 beatsd.sampleType = OutputDescriptor::VariableSampleRate;
|
max@1
|
581 beatsd.sampleRate = 1.0 / m_stepSecs;
|
max@1
|
582 m_beatsdOutputNumber = outputCounter++;
|
max@1
|
583
|
max@1
|
584 OutputDescriptor logscalespec;
|
max@1
|
585 logscalespec.identifier = "logscalespec";
|
max@1
|
586 logscalespec.name = "Log-Frequency Spectrum";
|
max@1
|
587 logscalespec.description = "Spectrum with linear frequency on a log scale.";
|
max@1
|
588 logscalespec.unit = "";
|
max@1
|
589 logscalespec.hasFixedBinCount = true;
|
max@1
|
590 logscalespec.binCount = nNote;
|
max@1
|
591 logscalespec.hasKnownExtents = false;
|
max@1
|
592 logscalespec.isQuantized = false;
|
max@1
|
593 logscalespec.sampleType = OutputDescriptor::FixedSampleRate;
|
max@1
|
594 logscalespec.hasDuration = false;
|
max@1
|
595 logscalespec.sampleRate = m_inputSampleRate/2048;
|
max@1
|
596 m_logscalespecOutputNumber = outputCounter++;
|
max@1
|
597
|
max@1
|
598 OutputDescriptor bothchroma;
|
max@1
|
599 bothchroma.identifier = "bothchroma";
|
max@1
|
600 bothchroma.name = "Chromagram and Bass Chromagram";
|
max@1
|
601 bothchroma.description = "Tuning-adjusted chromagram and bass chromagram (stacked on top of each other) from NNLS approximate transcription.";
|
max@1
|
602 bothchroma.unit = "";
|
max@1
|
603 bothchroma.hasFixedBinCount = true;
|
max@1
|
604 bothchroma.binCount = 24;
|
max@1
|
605 bothchroma.hasKnownExtents = false;
|
max@1
|
606 bothchroma.isQuantized = false;
|
max@1
|
607 bothchroma.sampleType = OutputDescriptor::FixedSampleRate;
|
max@1
|
608 bothchroma.hasDuration = false;
|
max@1
|
609 bothchroma.sampleRate = m_inputSampleRate/2048;
|
max@1
|
610 m_bothchromaOutputNumber = outputCounter++;
|
max@1
|
611
|
max@1
|
612 OutputDescriptor qchromafw;
|
max@1
|
613 qchromafw.identifier = "qchromafw";
|
max@1
|
614 qchromafw.name = "Pseudo-Quantised Chromagram and Bass Chromagram";
|
max@1
|
615 qchromafw.description = "Pseudo-Quantised Chromagram and Bass Chromagram (frames between two beats are identical).";
|
max@1
|
616 qchromafw.unit = "";
|
max@1
|
617 qchromafw.hasFixedBinCount = true;
|
max@1
|
618 qchromafw.binCount = 24;
|
max@1
|
619 qchromafw.hasKnownExtents = false;
|
max@1
|
620 qchromafw.isQuantized = false;
|
max@1
|
621 qchromafw.sampleType = OutputDescriptor::FixedSampleRate;
|
max@1
|
622 qchromafw.hasDuration = false;
|
max@1
|
623 qchromafw.sampleRate = m_inputSampleRate/2048;
|
max@1
|
624 m_qchromafwOutputNumber = outputCounter++;
|
max@1
|
625
|
max@1
|
626 OutputDescriptor qchroma;
|
max@1
|
627 qchroma.identifier = "qchroma";
|
max@1
|
628 qchroma.name = "Quantised Chromagram and Bass Chromagram";
|
max@1
|
629 qchroma.description = "Quantised Chromagram and Bass Chromagram.";
|
max@1
|
630 qchroma.unit = "";
|
max@1
|
631 qchroma.hasFixedBinCount = true;
|
max@1
|
632 qchroma.binCount = 24;
|
max@1
|
633 qchroma.hasKnownExtents = false;
|
max@1
|
634 qchroma.isQuantized = false;
|
max@1
|
635 qchroma.sampleType = OutputDescriptor::FixedSampleRate;
|
max@1
|
636 qchroma.hasDuration = true;
|
Chris@17
|
637 qchroma.sampleRate = m_inputSampleRate/2048;
|
max@1
|
638 m_qchromaOutputNumber = outputCounter++;
|
max@1
|
639
|
max@1
|
640 OutputDescriptor segm;
|
Chris@15
|
641 segm.identifier = "segmentation";
|
max@1
|
642 segm.name = "Segmentation";
|
max@1
|
643 segm.description = "Segmentation";
|
max@1
|
644 segm.unit = "segment-type";
|
max@1
|
645 segm.hasFixedBinCount = true;
|
max@1
|
646 //segm.binCount = 24;
|
max@1
|
647 segm.binCount = 1;
|
max@1
|
648 segm.hasKnownExtents = true;
|
max@1
|
649 segm.minValue = 1;
|
max@1
|
650 segm.maxValue = 5;
|
max@1
|
651 segm.isQuantized = true;
|
max@1
|
652 segm.quantizeStep = 1;
|
max@1
|
653 segm.sampleType = OutputDescriptor::VariableSampleRate;
|
Chris@17
|
654 segm.sampleRate = 1.0 / m_stepSecs;
|
max@1
|
655 segm.hasDuration = true;
|
max@1
|
656 m_segmOutputNumber = outputCounter++;
|
max@1
|
657
|
max@1
|
658
|
max@1
|
659 /*
|
max@1
|
660 OutputList list;
|
max@1
|
661 OutputDescriptor segmentation;
|
max@1
|
662 segmentation.identifier = "segmentation";
|
max@1
|
663 segmentation.name = "Segmentation";
|
max@1
|
664 segmentation.description = "Segmentation";
|
max@1
|
665 segmentation.unit = "segment-type";
|
max@1
|
666 segmentation.hasFixedBinCount = true;
|
max@1
|
667 segmentation.binCount = 1;
|
max@1
|
668 segmentation.hasKnownExtents = true;
|
max@1
|
669 segmentation.minValue = 1;
|
max@1
|
670 segmentation.maxValue = nSegmentTypes;
|
max@1
|
671 segmentation.isQuantized = true;
|
max@1
|
672 segmentation.quantizeStep = 1;
|
max@1
|
673 segmentation.sampleType = OutputDescriptor::VariableSampleRate;
|
max@1
|
674 segmentation.sampleRate = m_inputSampleRate / getPreferredStepSize();
|
max@1
|
675 list.push_back(segmentation);
|
max@1
|
676 return list;
|
max@1
|
677 */
|
max@1
|
678
|
max@1
|
679
|
max@1
|
680 list.push_back(beat);
|
max@1
|
681 list.push_back(bars);
|
max@1
|
682 list.push_back(beatcounts);
|
max@1
|
683 list.push_back(beatsd);
|
max@1
|
684 list.push_back(logscalespec);
|
max@1
|
685 list.push_back(bothchroma);
|
max@1
|
686 list.push_back(qchromafw);
|
max@1
|
687 list.push_back(qchroma);
|
max@1
|
688 list.push_back(segm);
|
max@1
|
689
|
max@1
|
690 return list;
|
max@1
|
691 }
|
max@1
|
692
|
max@1
|
693 // Executed for each frame - called from the host
|
max@1
|
694
|
max@1
|
695 // We use time domain input, because DownBeat requires it -- so we
|
max@1
|
696 // use the time-domain version of DetectionFunction::process which
|
max@1
|
697 // does its own FFT. It requires doubles as input, so we need to
|
max@1
|
698 // make a temporary copy
|
max@1
|
699
|
max@1
|
700 // We only support a single input channel
|
Chris@48
|
701 Segmentino::FeatureSet Segmentino::process(const float *const *inputBuffers,Vamp::RealTime timestamp)
|
max@1
|
702 {
|
max@1
|
703 if (!m_d) {
|
Chris@48
|
704 cerr << "ERROR: Segmentino::process: "
|
Chris@48
|
705 << "Segmentino has not been initialised"
|
Chris@22
|
706 << endl;
|
Chris@22
|
707 return FeatureSet();
|
max@1
|
708 }
|
max@1
|
709
|
max@1
|
710 const int fl = m_d->dfConfig.frameLength;
|
max@1
|
711 #ifndef __GNUC__
|
max@1
|
712 double *dfinput = (double *)alloca(fl * sizeof(double));
|
max@1
|
713 #else
|
max@1
|
714 double dfinput[fl];
|
max@1
|
715 #endif
|
max@1
|
716 int sampleOffset = ((m_chromaFramesizeFactor-1) * fl) / 2;
|
max@1
|
717
|
max@1
|
718 // Since chroma needs a much longer frame size, we only ever use the very
|
max@1
|
719 // beginning of the frame for beat tracking.
|
max@1
|
720 for (int i = 0; i < fl; ++i) dfinput[i] = inputBuffers[0][i];
|
max@1
|
721 double output = m_d->df->process(dfinput);
|
max@1
|
722
|
max@1
|
723 if (m_d->dfOutput.empty()) m_d->origin = timestamp;
|
max@1
|
724
|
max@1
|
725 // std::cerr << "df[" << m_d->dfOutput.size() << "] is " << output << std::endl;
|
max@1
|
726 m_d->dfOutput.push_back(output);
|
max@1
|
727
|
max@1
|
728 // Downsample and store the incoming audio block.
|
max@1
|
729 // We have an overlap on the incoming audio stream (step size is
|
max@1
|
730 // half block size) -- this function is configured to take only a
|
max@1
|
731 // step size's worth, so effectively ignoring the overlap. Note
|
max@1
|
732 // however that this means we omit the last blocksize - stepsize
|
max@1
|
733 // samples completely for the purposes of barline detection
|
max@1
|
734 // (hopefully not a problem)
|
max@1
|
735 m_d->downBeat->pushAudioBlock(inputBuffers[0]);
|
max@1
|
736
|
max@1
|
737 // The following is not done every time, but only every m_chromaFramesizeFactor times,
|
max@1
|
738 // because the chroma does not need dense time frames.
|
max@1
|
739
|
max@1
|
740 if (m_pluginFrameCount % m_chromaStepsizeFactor == 0)
|
max@1
|
741 {
|
max@1
|
742
|
max@1
|
743 // Window the full time domain, data, FFT it and process chroma stuff.
|
max@1
|
744
|
max@1
|
745 #ifndef __GNUC__
|
max@1
|
746 float *windowedBuffers = (float *)alloca(m_chromadata->blockSize * sizeof(float));
|
max@1
|
747 #else
|
max@1
|
748 float windowedBuffers[m_chromadata->blockSize];
|
max@1
|
749 #endif
|
max@1
|
750 m_chromadata->window.cut(&inputBuffers[0][0], &windowedBuffers[0]);
|
max@1
|
751
|
max@1
|
752 // adjust timestamp (we want the middle of the frame)
|
max@1
|
753 timestamp = timestamp + Vamp::RealTime::frame2RealTime(sampleOffset, lrintf(m_inputSampleRate));
|
max@1
|
754
|
max@1
|
755 m_chromadata->baseProcess(&windowedBuffers[0], timestamp);
|
max@1
|
756
|
max@1
|
757 }
|
max@1
|
758 m_pluginFrameCount++;
|
max@1
|
759
|
max@1
|
760 FeatureSet fs;
|
max@1
|
761 fs[m_logscalespecOutputNumber].push_back(
|
max@1
|
762 m_chromadata->logSpectrum.back());
|
max@1
|
763 return fs;
|
max@1
|
764 }
|
max@1
|
765
|
Chris@48
|
766 Segmentino::FeatureSet Segmentino::getRemainingFeatures()
|
max@1
|
767 {
|
max@1
|
768 if (!m_d) {
|
Chris@48
|
769 cerr << "ERROR: Segmentino::getRemainingFeatures: "
|
Chris@48
|
770 << "Segmentino has not been initialised"
|
Chris@22
|
771 << endl;
|
Chris@22
|
772 return FeatureSet();
|
max@1
|
773 }
|
max@1
|
774
|
Chris@16
|
775 FeatureSet masterFeatureset = beatTrack();
|
Chris@49
|
776 int beatcount = masterFeatureset[m_beatOutputNumber].size();
|
Chris@49
|
777 if (beatcount == 0) return Segmentino::FeatureSet();
|
Chris@49
|
778 Vamp::RealTime last_beattime = masterFeatureset[m_beatOutputNumber][beatcount-1].timestamp;
|
matthiasm@46
|
779 masterFeatureset[m_beatOutputNumber].clear();
|
matthiasm@46
|
780 Vamp::RealTime beattime = Vamp::RealTime::fromSeconds(1.0);
|
matthiasm@46
|
781 while (beattime < last_beattime)
|
matthiasm@46
|
782 {
|
matthiasm@46
|
783 Feature beatfeature;
|
matthiasm@46
|
784 beatfeature.hasTimestamp = true;
|
matthiasm@46
|
785 beatfeature.timestamp = beattime;
|
matthiasm@46
|
786 masterFeatureset[m_beatOutputNumber].push_back(beatfeature);
|
matthiasm@46
|
787 beattime = beattime + Vamp::RealTime::fromSeconds(0.5);
|
matthiasm@46
|
788 }
|
matthiasm@46
|
789
|
matthiasm@46
|
790
|
Chris@16
|
791 FeatureList chromaList = chromaFeatures();
|
max@1
|
792
|
Chris@37
|
793 for (int i = 0; i < (int)chromaList.size(); ++i)
|
max@1
|
794 {
|
max@1
|
795 masterFeatureset[m_bothchromaOutputNumber].push_back(chromaList[i]);
|
max@1
|
796 }
|
max@1
|
797
|
max@1
|
798 // quantised and pseudo-quantised (beat-wise) chroma
|
Chris@16
|
799 std::vector<FeatureList> quantisedChroma = beatQuantiser(chromaList, masterFeatureset[m_beatOutputNumber]);
|
Chris@32
|
800
|
Chris@32
|
801 if (quantisedChroma.empty()) return masterFeatureset;
|
max@1
|
802
|
max@1
|
803 masterFeatureset[m_qchromafwOutputNumber] = quantisedChroma[0];
|
max@1
|
804 masterFeatureset[m_qchromaOutputNumber] = quantisedChroma[1];
|
max@1
|
805
|
max@1
|
806 // Segmentation
|
Chris@39
|
807 try {
|
Chris@39
|
808 masterFeatureset[m_segmOutputNumber] = runSegmenter(quantisedChroma[1]);
|
Chris@39
|
809 } catch (std::bad_alloc &a) {
|
Chris@48
|
810 cerr << "ERROR: Segmentino::getRemainingFeatures: Failed to run segmenter, not enough memory (song too long?)" << endl;
|
Chris@39
|
811 }
|
max@1
|
812
|
max@1
|
813 return(masterFeatureset);
|
max@1
|
814 }
|
max@1
|
815
|
max@1
|
816 /* ------ Beat Tracker ------ */
|
max@1
|
817
|
Chris@48
|
818 Segmentino::FeatureSet Segmentino::beatTrack()
|
max@1
|
819 {
|
max@1
|
820 vector<double> df;
|
max@1
|
821 vector<double> beatPeriod;
|
max@1
|
822 vector<double> tempi;
|
max@1
|
823
|
Chris@37
|
824 for (int i = 2; i < (int)m_d->dfOutput.size(); ++i) { // discard first two elts
|
max@1
|
825 df.push_back(m_d->dfOutput[i]);
|
max@1
|
826 beatPeriod.push_back(0.0);
|
max@1
|
827 }
|
max@1
|
828 if (df.empty()) return FeatureSet();
|
max@1
|
829
|
max@1
|
830 TempoTrackV2 tt(m_inputSampleRate, m_d->dfConfig.stepSize);
|
max@1
|
831 tt.calculateBeatPeriod(df, beatPeriod, tempi);
|
max@1
|
832
|
max@1
|
833 vector<double> beats;
|
max@1
|
834 tt.calculateBeats(df, beatPeriod, beats);
|
max@1
|
835
|
max@1
|
836 vector<int> downbeats;
|
max@1
|
837 size_t downLength = 0;
|
max@1
|
838 const float *downsampled = m_d->downBeat->getBufferedAudio(downLength);
|
max@1
|
839 m_d->downBeat->findDownBeats(downsampled, downLength, beats, downbeats);
|
max@1
|
840
|
max@1
|
841 vector<double> beatsd;
|
max@1
|
842 m_d->downBeat->getBeatSD(beatsd);
|
max@1
|
843
|
max@1
|
844 /*std::cout << "BeatTracker: found downbeats at: ";
|
max@1
|
845 for (int i = 0; i < downbeats.size(); ++i) std::cout << downbeats[i] << " " << std::endl;*/
|
max@1
|
846
|
max@1
|
847 FeatureSet returnFeatures;
|
max@1
|
848
|
max@1
|
849 char label[20];
|
max@1
|
850
|
max@1
|
851 int dbi = 0;
|
max@1
|
852 int beat = 0;
|
max@1
|
853 int bar = 0;
|
max@1
|
854
|
max@1
|
855 if (!downbeats.empty()) {
|
max@1
|
856 // get the right number for the first beat; this will be
|
max@1
|
857 // incremented before use (at top of the following loop)
|
max@1
|
858 int firstDown = downbeats[0];
|
max@1
|
859 beat = m_bpb - firstDown - 1;
|
max@1
|
860 if (beat == m_bpb) beat = 0;
|
max@1
|
861 }
|
max@1
|
862
|
Chris@37
|
863 for (int i = 0; i < (int)beats.size(); ++i) {
|
max@1
|
864
|
Chris@37
|
865 int frame = beats[i] * m_d->dfConfig.stepSize;
|
max@1
|
866
|
Chris@37
|
867 if (dbi < (int)downbeats.size() && i == downbeats[dbi]) {
|
max@1
|
868 beat = 0;
|
max@1
|
869 ++bar;
|
max@1
|
870 ++dbi;
|
max@1
|
871 } else {
|
max@1
|
872 ++beat;
|
max@1
|
873 }
|
max@1
|
874
|
max@1
|
875 /* Ooutput Section */
|
max@1
|
876
|
max@1
|
877 // outputs are:
|
max@1
|
878 //
|
max@1
|
879 // 0 -> beats
|
max@1
|
880 // 1 -> bars
|
max@1
|
881 // 2 -> beat counter function
|
max@1
|
882
|
max@1
|
883 Feature feature;
|
max@1
|
884 feature.hasTimestamp = true;
|
max@1
|
885 feature.timestamp = m_d->origin + Vamp::RealTime::frame2RealTime (frame, lrintf(m_inputSampleRate));
|
max@1
|
886
|
max@1
|
887 sprintf(label, "%d", beat + 1);
|
max@1
|
888 feature.label = label;
|
max@1
|
889 returnFeatures[m_beatOutputNumber].push_back(feature); // labelled beats
|
max@1
|
890
|
max@1
|
891 feature.values.push_back(beat + 1);
|
max@1
|
892 returnFeatures[m_beatcountsOutputNumber].push_back(feature); // beat function
|
max@1
|
893
|
Chris@37
|
894 if (i > 0 && i <= (int)beatsd.size()) {
|
max@1
|
895 feature.values.clear();
|
max@1
|
896 feature.values.push_back(beatsd[i-1]);
|
max@1
|
897 feature.label = "";
|
max@1
|
898 returnFeatures[m_beatsdOutputNumber].push_back(feature); // beat spectral difference
|
max@1
|
899 }
|
max@1
|
900
|
max@1
|
901 if (beat == 0) {
|
max@1
|
902 feature.values.clear();
|
max@1
|
903 sprintf(label, "%d", bar);
|
max@1
|
904 feature.label = label;
|
max@1
|
905 returnFeatures[m_barsOutputNumber].push_back(feature); // bars
|
max@1
|
906 }
|
max@1
|
907 }
|
max@1
|
908
|
max@1
|
909 return returnFeatures;
|
max@1
|
910 }
|
max@1
|
911
|
max@1
|
912
|
max@1
|
913 /* ------ Chroma Extractor ------ */
|
max@1
|
914
|
Chris@48
|
915 Segmentino::FeatureList Segmentino::chromaFeatures()
|
max@1
|
916 {
|
max@1
|
917
|
max@1
|
918 FeatureList returnFeatureList;
|
max@1
|
919 FeatureList tunedlogfreqspec;
|
max@1
|
920
|
max@1
|
921 if (m_chromadata->logSpectrum.size() == 0) return returnFeatureList;
|
max@1
|
922
|
max@1
|
923 /** Calculate Tuning
|
max@1
|
924 calculate tuning from (using the angle of the complex number defined by the
|
max@1
|
925 cumulative mean real and imag values)
|
max@1
|
926 **/
|
max@1
|
927 float meanTuningImag = 0;
|
max@1
|
928 float meanTuningReal = 0;
|
max@1
|
929 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
max@1
|
930 meanTuningReal += m_chromadata->meanTunings[iBPS] * m_chromadata->cosvalues[iBPS];
|
max@1
|
931 meanTuningImag += m_chromadata->meanTunings[iBPS] * m_chromadata->sinvalues[iBPS];
|
max@1
|
932 }
|
max@1
|
933 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
|
max@1
|
934 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
|
max@1
|
935 int intShift = floor(normalisedtuning * 3);
|
max@1
|
936 float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this
|
max@1
|
937
|
max@1
|
938 char buffer0 [50];
|
max@1
|
939
|
max@1
|
940 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
|
max@1
|
941
|
max@1
|
942 /** Tune Log-Frequency Spectrogram
|
max@1
|
943 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
|
max@1
|
944 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
|
max@1
|
945 **/
|
Chris@50
|
946 // cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... ";
|
max@1
|
947
|
max@1
|
948 float tempValue = 0;
|
max@1
|
949
|
max@1
|
950 int count = 0;
|
max@1
|
951
|
max@1
|
952 for (FeatureList::iterator i = m_chromadata->logSpectrum.begin(); i != m_chromadata->logSpectrum.end(); ++i)
|
max@1
|
953 {
|
max@1
|
954
|
max@1
|
955 Feature f1 = *i;
|
max@1
|
956 Feature f2; // tuned log-frequency spectrum
|
max@1
|
957
|
max@1
|
958 f2.hasTimestamp = true;
|
max@1
|
959 f2.timestamp = f1.timestamp;
|
max@1
|
960
|
max@1
|
961 f2.values.push_back(0.0);
|
max@1
|
962 f2.values.push_back(0.0); // set lower edge to zero
|
max@1
|
963
|
max@1
|
964 if (m_chromadata->tuneLocal) {
|
max@1
|
965 intShift = floor(m_chromadata->localTuning[count] * 3);
|
max@1
|
966 floatShift = m_chromadata->localTuning[count] * 3 - intShift;
|
max@1
|
967 // floatShift is a really bad name for this
|
max@1
|
968 }
|
max@1
|
969
|
max@1
|
970 for (int k = 2; k < (int)f1.values.size() - 3; ++k)
|
max@1
|
971 { // interpolate all inner bins
|
max@1
|
972 tempValue = f1.values[k + intShift] * (1-floatShift) + f1.values[k+intShift+1] * floatShift;
|
max@1
|
973 f2.values.push_back(tempValue);
|
max@1
|
974 }
|
max@1
|
975
|
max@1
|
976 f2.values.push_back(0.0);
|
max@1
|
977 f2.values.push_back(0.0);
|
max@1
|
978 f2.values.push_back(0.0); // upper edge
|
max@1
|
979
|
max@1
|
980 vector<float> runningmean = SpecialConvolution(f2.values,m_chromadata->hw);
|
max@1
|
981 vector<float> runningstd;
|
max@1
|
982 for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance)
|
max@1
|
983 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
|
max@1
|
984 }
|
max@1
|
985 runningstd = SpecialConvolution(runningstd,m_chromadata->hw); // second step convolve
|
max@1
|
986 for (int i = 0; i < nNote; i++)
|
max@1
|
987 {
|
max@1
|
988
|
max@1
|
989 runningstd[i] = sqrt(runningstd[i]);
|
max@1
|
990 // square root to finally have running std
|
max@1
|
991
|
max@1
|
992 if (runningstd[i] > 0)
|
max@1
|
993 {
|
max@1
|
994 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
|
max@1
|
995 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_chromadata->whitening) : 0;
|
max@1
|
996 }
|
max@1
|
997
|
max@1
|
998 if (f2.values[i] < 0) {
|
max@1
|
999
|
max@1
|
1000 cerr << "ERROR: negative value in logfreq spectrum" << endl;
|
max@1
|
1001
|
max@1
|
1002 }
|
max@1
|
1003 }
|
max@1
|
1004 tunedlogfreqspec.push_back(f2);
|
max@1
|
1005 count++;
|
max@1
|
1006 }
|
Chris@50
|
1007 // cerr << "done." << endl;
|
max@1
|
1008 /** Semitone spectrum and chromagrams
|
max@1
|
1009 Semitone-spaced log-frequency spectrum derived
|
max@1
|
1010 from the tuned log-freq spectrum above. the spectrum
|
max@1
|
1011 is inferred using a non-negative least squares algorithm.
|
max@1
|
1012 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
|
max@1
|
1013 bass and treble stacked onto each other).
|
max@1
|
1014 **/
|
Chris@50
|
1015 /*
|
max@1
|
1016 if (m_chromadata->useNNLS == 0) {
|
max@1
|
1017 cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... ";
|
max@1
|
1018 } else {
|
max@1
|
1019 cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... ";
|
max@1
|
1020 }
|
Chris@50
|
1021 */
|
max@1
|
1022 vector<float> oldchroma = vector<float>(12,0);
|
max@1
|
1023 vector<float> oldbasschroma = vector<float>(12,0);
|
max@1
|
1024 count = 0;
|
max@1
|
1025
|
max@1
|
1026 for (FeatureList::iterator it = tunedlogfreqspec.begin(); it != tunedlogfreqspec.end(); ++it) {
|
max@1
|
1027 Feature logfreqsp = *it; // logfreq spectrum
|
max@1
|
1028 Feature bothchroma; // treble and bass chromagram
|
max@1
|
1029
|
max@1
|
1030 bothchroma.hasTimestamp = true;
|
max@1
|
1031 bothchroma.timestamp = logfreqsp.timestamp;
|
max@1
|
1032
|
max@1
|
1033 float b[nNote];
|
max@1
|
1034
|
max@1
|
1035 bool some_b_greater_zero = false;
|
max@1
|
1036 float sumb = 0;
|
max@1
|
1037 for (int i = 0; i < nNote; i++) {
|
max@1
|
1038 b[i] = logfreqsp.values[i];
|
max@1
|
1039 sumb += b[i];
|
max@1
|
1040 if (b[i] > 0) {
|
max@1
|
1041 some_b_greater_zero = true;
|
max@1
|
1042 }
|
max@1
|
1043 }
|
max@1
|
1044
|
max@1
|
1045 // here's where the non-negative least squares algorithm calculates the note activation x
|
max@1
|
1046
|
max@1
|
1047 vector<float> chroma = vector<float>(12, 0);
|
max@1
|
1048 vector<float> basschroma = vector<float>(12, 0);
|
max@1
|
1049 float currval;
|
max@1
|
1050 int iSemitone = 0;
|
max@1
|
1051
|
max@1
|
1052 if (some_b_greater_zero) {
|
max@1
|
1053 if (m_chromadata->useNNLS == 0) {
|
max@1
|
1054 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
|
max@1
|
1055 currval = 0;
|
max@1
|
1056 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
|
max@1
|
1057 currval += b[iNote + iBPS] * (1-abs(iBPS*1.0/(nBPS/2+1)));
|
max@1
|
1058 }
|
max@1
|
1059 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
|
max@1
|
1060 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
|
max@1
|
1061 iSemitone++;
|
max@1
|
1062 }
|
max@1
|
1063
|
max@1
|
1064 } else {
|
max@1
|
1065 float x[84+1000];
|
max@1
|
1066 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
|
max@1
|
1067 vector<int> signifIndex;
|
max@1
|
1068 int index=0;
|
max@1
|
1069 sumb /= 84.0;
|
max@1
|
1070 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
|
max@1
|
1071 float currval = 0;
|
max@1
|
1072 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
|
max@1
|
1073 currval += b[iNote + iBPS];
|
max@1
|
1074 }
|
max@1
|
1075 if (currval > 0) signifIndex.push_back(index);
|
max@1
|
1076 index++;
|
max@1
|
1077 }
|
max@1
|
1078 float rnorm;
|
max@1
|
1079 float w[84+1000];
|
max@1
|
1080 float zz[84+1000];
|
max@1
|
1081 int indx[84+1000];
|
max@1
|
1082 int mode;
|
max@1
|
1083 int dictsize = nNote*signifIndex.size();
|
max@1
|
1084
|
max@1
|
1085 float *curr_dict = new float[dictsize];
|
max@1
|
1086 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
|
max@1
|
1087 for (int iBin = 0; iBin < nNote; iBin++) {
|
max@1
|
1088 curr_dict[iNote * nNote + iBin] =
|
max@1
|
1089 1.0 * m_chromadata->dict[signifIndex[iNote] * nNote + iBin];
|
max@1
|
1090 }
|
max@1
|
1091 }
|
max@1
|
1092 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
|
max@1
|
1093 delete [] curr_dict;
|
max@1
|
1094 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
|
max@1
|
1095 // cerr << mode << endl;
|
max@1
|
1096 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
|
max@1
|
1097 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
|
max@1
|
1098 }
|
max@1
|
1099 }
|
max@1
|
1100 }
|
max@1
|
1101
|
max@1
|
1102 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end());
|
max@1
|
1103 // just stack the both chromas
|
max@1
|
1104
|
max@1
|
1105 bothchroma.values = chroma;
|
max@1
|
1106 returnFeatureList.push_back(bothchroma);
|
max@1
|
1107 count++;
|
max@1
|
1108 }
|
Chris@50
|
1109 // cerr << "done." << endl;
|
max@1
|
1110
|
max@1
|
1111 return returnFeatureList;
|
max@1
|
1112 }
|
max@1
|
1113
|
max@1
|
1114 /* ------ Beat Quantizer ------ */
|
max@1
|
1115
|
max@4
|
1116 std::vector<Vamp::Plugin::FeatureList>
|
Chris@48
|
1117 Segmentino::beatQuantiser(Vamp::Plugin::FeatureList chromagram, Vamp::Plugin::FeatureList beats)
|
max@1
|
1118 {
|
max@1
|
1119 std::vector<FeatureList> returnVector;
|
max@1
|
1120
|
max@1
|
1121 FeatureList fwQchromagram; // frame-wise beat-quantised chroma
|
max@1
|
1122 FeatureList bwQchromagram; // beat-wise beat-quantised chroma
|
matthiasm@43
|
1123
|
matthiasm@43
|
1124
|
matthiasm@43
|
1125 size_t nChromaFrame = chromagram.size();
|
matthiasm@43
|
1126 size_t nBeat = beats.size();
|
max@1
|
1127
|
max@1
|
1128 if (nBeat == 0 && nChromaFrame == 0) return returnVector;
|
max@1
|
1129
|
Chris@37
|
1130 int nBin = chromagram[0].values.size();
|
max@1
|
1131
|
max@1
|
1132 vector<float> tempChroma = vector<float>(nBin);
|
max@1
|
1133
|
max@1
|
1134 Vamp::RealTime beatTimestamp = Vamp::RealTime::zeroTime;
|
max@1
|
1135 int currBeatCount = -1; // start before first beat
|
max@1
|
1136 int framesInBeat = 0;
|
max@1
|
1137
|
matthiasm@43
|
1138 for (size_t iChroma = 0; iChroma < nChromaFrame; ++iChroma)
|
max@1
|
1139 {
|
max@4
|
1140 Vamp::RealTime frameTimestamp = chromagram[iChroma].timestamp;
|
Chris@24
|
1141 Vamp::RealTime newBeatTimestamp;
|
Chris@22
|
1142
|
Chris@37
|
1143 if (currBeatCount != (int)beats.size() - 1) {
|
Chris@37
|
1144 newBeatTimestamp = beats[currBeatCount+1].timestamp;
|
Chris@37
|
1145 } else {
|
Chris@37
|
1146 newBeatTimestamp = chromagram[nChromaFrame-1].timestamp;
|
Chris@37
|
1147 }
|
Chris@22
|
1148
|
Chris@24
|
1149 if (frameTimestamp > newBeatTimestamp ||
|
max@1
|
1150 iChroma == nChromaFrame-1)
|
max@1
|
1151 {
|
max@1
|
1152 // new beat (or last chroma frame)
|
max@1
|
1153 // 1. finish all the old beat processing
|
Chris@23
|
1154 if (framesInBeat > 0)
|
Chris@23
|
1155 {
|
Chris@23
|
1156 for (int i = 0; i < nBin; ++i) tempChroma[i] /= framesInBeat; // average
|
Chris@23
|
1157 }
|
max@1
|
1158
|
max@1
|
1159 Feature bwQchromaFrame;
|
max@1
|
1160 bwQchromaFrame.hasTimestamp = true;
|
max@1
|
1161 bwQchromaFrame.timestamp = beatTimestamp;
|
max@1
|
1162 bwQchromaFrame.values = tempChroma;
|
Chris@24
|
1163 bwQchromaFrame.duration = newBeatTimestamp - beatTimestamp;
|
max@1
|
1164 bwQchromagram.push_back(bwQchromaFrame);
|
max@1
|
1165
|
max@1
|
1166 for (int iFrame = -framesInBeat; iFrame < 0; ++iFrame)
|
max@1
|
1167 {
|
max@1
|
1168 Feature fwQchromaFrame;
|
max@1
|
1169 fwQchromaFrame.hasTimestamp = true;
|
max@1
|
1170 fwQchromaFrame.timestamp = chromagram[iChroma+iFrame].timestamp;
|
max@1
|
1171 fwQchromaFrame.values = tempChroma; // all between two beats get the same
|
max@1
|
1172 fwQchromagram.push_back(fwQchromaFrame);
|
max@1
|
1173 }
|
max@1
|
1174
|
max@1
|
1175 // 2. increments / resets for current (new) beat
|
max@1
|
1176 currBeatCount++;
|
Chris@24
|
1177 beatTimestamp = newBeatTimestamp;
|
Chris@37
|
1178 for (int i = 0; i < nBin; ++i) tempChroma[i] = 0; // average
|
max@1
|
1179 framesInBeat = 0;
|
max@1
|
1180 }
|
max@1
|
1181 framesInBeat++;
|
Chris@37
|
1182 for (int i = 0; i < nBin; ++i) tempChroma[i] += chromagram[iChroma].values[i];
|
max@1
|
1183 }
|
max@1
|
1184 returnVector.push_back(fwQchromagram);
|
max@1
|
1185 returnVector.push_back(bwQchromagram);
|
Chris@30
|
1186 return returnVector;
|
max@1
|
1187 }
|
max@1
|
1188
|
matthiasm@43
|
1189
|
matthiasm@43
|
1190
|
max@1
|
1191 /* -------------------------------- */
|
max@1
|
1192 /* ------ Support Functions ------ */
|
max@1
|
1193 /* -------------------------------- */
|
max@1
|
1194
|
max@1
|
1195 // one-dimesion median filter
|
max@1
|
1196 arma::vec medfilt1(arma::vec v, int medfilt_length)
|
max@1
|
1197 {
|
matthiasm@46
|
1198 // TODO: check if this works with odd and even medfilt_length !!!
|
max@1
|
1199 int halfWin = medfilt_length/2;
|
max@1
|
1200
|
max@1
|
1201 // result vector
|
max@1
|
1202 arma::vec res = arma::zeros<arma::vec>(v.size());
|
max@1
|
1203
|
max@1
|
1204 // padding
|
max@1
|
1205 arma::vec padV = arma::zeros<arma::vec>(v.size()+medfilt_length-1);
|
max@1
|
1206
|
Chris@37
|
1207 for (int i=medfilt_length/2; i < medfilt_length/2+(int)v.size(); ++ i)
|
max@1
|
1208 {
|
max@1
|
1209 padV(i) = v(i-medfilt_length/2);
|
matthiasm@46
|
1210 }
|
matthiasm@46
|
1211
|
matthiasm@46
|
1212 // the above loop leaves the boundaries at 0,
|
matthiasm@46
|
1213 // the two loops below fill them with the start or end values of v at start and end
|
matthiasm@46
|
1214 for (int i = 0; i < halfWin; ++i) padV(i) = v(0);
|
matthiasm@46
|
1215 for (int i = halfWin+(int)v.size(); i < (int)v.size()+2*halfWin; ++i) padV(i) = v(v.size()-1);
|
matthiasm@46
|
1216
|
matthiasm@46
|
1217
|
max@1
|
1218
|
max@1
|
1219 // Median filter
|
max@1
|
1220 arma::vec win = arma::zeros<arma::vec>(medfilt_length);
|
max@1
|
1221
|
Chris@37
|
1222 for (int i=0; i < (int)v.size(); ++i)
|
max@1
|
1223 {
|
max@1
|
1224 win = padV.subvec(i,i+halfWin*2);
|
max@1
|
1225 win = sort(win);
|
max@1
|
1226 res(i) = win(halfWin);
|
max@1
|
1227 }
|
max@1
|
1228
|
max@1
|
1229 return res;
|
max@1
|
1230 }
|
max@1
|
1231
|
max@1
|
1232
|
max@1
|
1233 // Quantile
|
max@1
|
1234 double quantile(arma::vec v, double p)
|
max@1
|
1235 {
|
max@1
|
1236 arma::vec sortV = arma::sort(v);
|
max@1
|
1237 int n = sortV.size();
|
max@1
|
1238 arma::vec x = arma::zeros<vec>(n+2);
|
max@1
|
1239 arma::vec y = arma::zeros<vec>(n+2);
|
max@1
|
1240
|
max@1
|
1241 x(0) = 0;
|
max@1
|
1242 x(n+1) = 100;
|
max@1
|
1243
|
Chris@21
|
1244 for (int i=1; i<n+1; ++i)
|
max@1
|
1245 x(i) = 100*(0.5+(i-1))/n;
|
max@1
|
1246
|
max@1
|
1247 y(0) = sortV(0);
|
max@1
|
1248 y.subvec(1,n) = sortV;
|
max@1
|
1249 y(n+1) = sortV(n-1);
|
max@1
|
1250
|
max@1
|
1251 arma::uvec x2index = find(x>=p*100);
|
max@1
|
1252
|
max@1
|
1253 // Interpolation
|
max@1
|
1254 double x1 = x(x2index(0)-1);
|
max@1
|
1255 double x2 = x(x2index(0));
|
max@1
|
1256 double y1 = y(x2index(0)-1);
|
max@1
|
1257 double y2 = y(x2index(0));
|
max@1
|
1258
|
max@1
|
1259 double res = (y2-y1)/(x2-x1)*(p*100-x1)+y1;
|
max@1
|
1260
|
max@1
|
1261 return res;
|
max@1
|
1262 }
|
max@1
|
1263
|
max@1
|
1264 // Max Filtering
|
max@1
|
1265 arma::mat maxfilt1(arma::mat inmat, int len)
|
max@1
|
1266 {
|
max@1
|
1267 arma::mat outmat = inmat;
|
max@1
|
1268
|
Chris@37
|
1269 for (int i=0; i < (int)inmat.n_rows; ++i)
|
max@1
|
1270 {
|
max@1
|
1271 if (arma::sum(inmat.row(i)) > 0)
|
max@1
|
1272 {
|
max@1
|
1273 // Take a window of rows
|
max@1
|
1274 int startWin;
|
max@1
|
1275 int endWin;
|
max@1
|
1276
|
max@1
|
1277 if (0 > i-len)
|
max@1
|
1278 startWin = 0;
|
max@1
|
1279 else
|
max@1
|
1280 startWin = i-len;
|
max@1
|
1281
|
Chris@37
|
1282 if ((int)inmat.n_rows-1 < i+len-1)
|
max@1
|
1283 endWin = inmat.n_rows-1;
|
max@1
|
1284 else
|
max@1
|
1285 endWin = i+len-1;
|
max@1
|
1286
|
max@1
|
1287 outmat(i,span::all) = arma::max(inmat(span(startWin,endWin),span::all));
|
max@1
|
1288 }
|
max@1
|
1289 }
|
max@1
|
1290
|
max@1
|
1291 return outmat;
|
max@1
|
1292
|
max@1
|
1293 }
|
max@1
|
1294
|
max@1
|
1295 // Null Parts
|
max@1
|
1296 Part nullpart(vector<Part> parts, arma::vec barline)
|
max@1
|
1297 {
|
max@1
|
1298 arma::uvec nullindices = arma::ones<arma::uvec>(barline.size());
|
Chris@37
|
1299 for (int iPart=0; iPart<(int)parts.size(); ++iPart)
|
max@1
|
1300 {
|
Chris@21
|
1301 //for (int iIndex=0; iIndex < parts[0].indices.size(); ++iIndex)
|
Chris@37
|
1302 for (int iIndex=0; iIndex < (int)parts[iPart].indices.size(); ++iIndex)
|
Chris@21
|
1303 for (int i=0; i<parts[iPart].n; ++i)
|
max@1
|
1304 {
|
Chris@21
|
1305 int ind = parts[iPart].indices[iIndex]+i;
|
max@1
|
1306 nullindices(ind) = 0;
|
max@1
|
1307 }
|
max@1
|
1308 }
|
max@7
|
1309
|
max@1
|
1310 Part newPart;
|
max@1
|
1311 newPart.n = 1;
|
max@1
|
1312 uvec q = find(nullindices > 0);
|
max@1
|
1313
|
Chris@37
|
1314 for (int i=0; i<(int)q.size();++i)
|
max@1
|
1315 newPart.indices.push_back(q(i));
|
max@7
|
1316
|
max@1
|
1317 newPart.letter = '-';
|
max@1
|
1318 newPart.value = 0;
|
max@1
|
1319 newPart.level = 0;
|
max@1
|
1320
|
max@1
|
1321 return newPart;
|
max@1
|
1322 }
|
max@1
|
1323
|
max@1
|
1324
|
max@1
|
1325 // Merge Nulls
|
max@1
|
1326 void mergenulls(vector<Part> &parts)
|
max@1
|
1327 {
|
Chris@37
|
1328 for (int iPart=0; iPart<(int)parts.size(); ++iPart)
|
max@1
|
1329 {
|
max@1
|
1330
|
max@1
|
1331 vector<Part> newVectorPart;
|
max@1
|
1332
|
max@1
|
1333 if (parts[iPart].letter.compare("-")==0)
|
max@1
|
1334 {
|
max@1
|
1335 sort (parts[iPart].indices.begin(), parts[iPart].indices.end());
|
Chris@21
|
1336 int newpartind = -1;
|
max@1
|
1337
|
max@1
|
1338 vector<int> indices;
|
max@1
|
1339 indices.push_back(-2);
|
max@1
|
1340
|
Chris@37
|
1341 for (int iIndex=0; iIndex<(int)parts[iPart].indices.size(); ++iIndex)
|
max@1
|
1342 indices.push_back(parts[iPart].indices[iIndex]);
|
max@1
|
1343
|
Chris@37
|
1344 for (int iInd=1; iInd < (int)indices.size(); ++iInd)
|
max@1
|
1345 {
|
max@1
|
1346 if (indices[iInd] - indices[iInd-1] > 1)
|
max@1
|
1347 {
|
max@1
|
1348 newpartind++;
|
max@1
|
1349
|
max@1
|
1350 Part newPart;
|
matthiasm@46
|
1351 newPart.letter = 'N';
|
max@1
|
1352 std::stringstream out;
|
max@1
|
1353 out << newpartind+1;
|
max@1
|
1354 newPart.letter.append(out.str());
|
matthiasm@44
|
1355 // newPart.value = 20+newpartind+1;
|
matthiasm@44
|
1356 newPart.value = 0;
|
max@1
|
1357 newPart.n = 1;
|
max@1
|
1358 newPart.indices.push_back(indices[iInd]);
|
max@1
|
1359 newPart.level = 0;
|
max@1
|
1360
|
max@1
|
1361 newVectorPart.push_back(newPart);
|
max@1
|
1362 }
|
max@1
|
1363 else
|
max@1
|
1364 {
|
max@1
|
1365 newVectorPart[newpartind].n = newVectorPart[newpartind].n+1;
|
max@1
|
1366 }
|
max@1
|
1367 }
|
max@1
|
1368 parts.erase (parts.end());
|
max@1
|
1369
|
Chris@37
|
1370 for (int i=0; i<(int)newVectorPart.size(); ++i)
|
max@1
|
1371 parts.push_back(newVectorPart[i]);
|
max@1
|
1372 }
|
max@1
|
1373 }
|
max@1
|
1374 }
|
max@1
|
1375
|
max@1
|
1376 /* ------ Segmentation ------ */
|
max@1
|
1377
|
Chris@19
|
1378 vector<Part> songSegment(Vamp::Plugin::FeatureList quantisedChromagram)
|
max@1
|
1379 {
|
max@1
|
1380
|
max@1
|
1381
|
max@1
|
1382 /* ------ Parameters ------ */
|
max@1
|
1383 double thresh_beat = 0.85;
|
max@1
|
1384 double thresh_seg = 0.80;
|
matthiasm@46
|
1385 int medfilt_length = 5;
|
max@1
|
1386 int minlength = 28;
|
matthiasm@46
|
1387 int maxlength = 2*128;
|
max@1
|
1388 double quantilePerc = 0.1;
|
max@1
|
1389 /* ------------------------ */
|
max@1
|
1390
|
max@1
|
1391
|
max@1
|
1392 // Collect Info
|
Chris@19
|
1393 int nBeat = quantisedChromagram.size(); // Number of feature vector
|
Chris@19
|
1394 int nFeatValues = quantisedChromagram[0].values.size(); // Number of values for each feature vector
|
max@1
|
1395
|
Chris@27
|
1396 if (nBeat < minlength) {
|
Chris@27
|
1397 // return a single part
|
Chris@27
|
1398 vector<Part> parts;
|
Chris@27
|
1399 Part newPart;
|
Chris@27
|
1400 newPart.n = 1;
|
Chris@27
|
1401 newPart.indices.push_back(0);
|
Chris@27
|
1402 newPart.letter = "n1";
|
Chris@27
|
1403 newPart.value = 20;
|
Chris@27
|
1404 newPart.level = 0;
|
Chris@27
|
1405 parts.push_back(newPart);
|
Chris@27
|
1406 return parts;
|
Chris@27
|
1407 }
|
Chris@27
|
1408
|
max@1
|
1409 arma::irowvec timeStamp = arma::zeros<arma::imat>(1,nBeat); // Vector of Time Stamps
|
max@1
|
1410
|
Chris@22
|
1411 // Save time stamp as a Vector
|
Chris@19
|
1412 if (quantisedChromagram[0].hasTimestamp)
|
max@1
|
1413 {
|
Chris@21
|
1414 for (int i = 0; i < nBeat; ++ i)
|
Chris@19
|
1415 timeStamp[i] = quantisedChromagram[i].timestamp.nsec;
|
max@1
|
1416 }
|
max@1
|
1417
|
max@1
|
1418
|
max@1
|
1419 // Build a ObservationTOFeatures Matrix
|
max@1
|
1420 arma::mat featVal = arma::zeros<mat>(nBeat,nFeatValues/2);
|
max@1
|
1421
|
Chris@21
|
1422 for (int i = 0; i < nBeat; ++ i)
|
Chris@21
|
1423 for (int j = 0; j < nFeatValues/2; ++ j)
|
max@1
|
1424 {
|
matthiasm@44
|
1425 featVal(i,j) = 0.8 * quantisedChromagram[i].values[j] + quantisedChromagram[i].values[j+12]; // bass attenuated
|
max@1
|
1426 }
|
max@1
|
1427
|
max@1
|
1428 // Set to arbitrary value to feature vectors with low std
|
max@1
|
1429 arma::mat a = stddev(featVal,1,1);
|
max@1
|
1430
|
matthiasm@44
|
1431 // Feature Correlation Matrix
|
max@1
|
1432 arma::mat simmat0 = 1-arma::cor(arma::trans(featVal));
|
max@1
|
1433
|
max@1
|
1434
|
Chris@21
|
1435 for (int i = 0; i < nBeat; ++ i)
|
max@1
|
1436 {
|
max@1
|
1437 if (a(i)<0.000001)
|
max@1
|
1438 {
|
max@1
|
1439 featVal(i,1) = 1000; // arbitrary
|
max@1
|
1440
|
Chris@21
|
1441 for (int j = 0; j < nFeatValues/2; ++j)
|
max@1
|
1442 {
|
max@1
|
1443 simmat0(i,j) = 1;
|
max@1
|
1444 simmat0(j,i) = 1;
|
max@1
|
1445 }
|
max@1
|
1446 }
|
max@1
|
1447 }
|
max@1
|
1448
|
max@1
|
1449 arma::mat simmat = 1-simmat0/2;
|
max@1
|
1450
|
max@1
|
1451 // -------- To delate when the proble with the add of beat will be solved -------
|
matthiasm@45
|
1452 for (int i = 0; i < nBeat; ++ i)
|
matthiasm@45
|
1453 for (int j = 0; j < nBeat; ++ j)
|
matthiasm@45
|
1454 if (!std::isfinite(simmat(i,j)))
|
matthiasm@45
|
1455 simmat(i,j)=0;
|
max@1
|
1456 // ------------------------------------------------------------------------------
|
max@1
|
1457
|
max@1
|
1458 // Median Filtering applied to the Correlation Matrix
|
max@1
|
1459 // The median filter is for each diagonal of the Matrix
|
max@1
|
1460 arma::mat median_simmat = arma::zeros<arma::mat>(nBeat,nBeat);
|
max@1
|
1461
|
Chris@21
|
1462 for (int i = 0; i < nBeat; ++ i)
|
max@1
|
1463 {
|
max@1
|
1464 arma::vec temp = medfilt1(simmat.diag(i),medfilt_length);
|
max@1
|
1465 median_simmat.diag(i) = temp;
|
max@1
|
1466 median_simmat.diag(-i) = temp;
|
max@1
|
1467 }
|
max@1
|
1468
|
Chris@21
|
1469 for (int i = 0; i < nBeat; ++ i)
|
Chris@21
|
1470 for (int j = 0; j < nBeat; ++ j)
|
max@1
|
1471 if (!std::isfinite(median_simmat(i,j)))
|
max@1
|
1472 median_simmat(i,j) = 0;
|
max@1
|
1473
|
max@1
|
1474 // -------------- NOT CONVERTED -------------------------------------
|
max@1
|
1475 // if param.seg.standardise
|
max@1
|
1476 // med_median_simmat = repmat(median(median_simmat),nBeat,1);
|
max@1
|
1477 // std_median_simmat = repmat(std(median_simmat),nBeat,1);
|
max@1
|
1478 // median_simmat = (median_simmat - med_median_simmat) ./ std_median_simmat;
|
max@1
|
1479 // end
|
max@1
|
1480 // --------------------------------------------------------
|
max@1
|
1481
|
max@1
|
1482 // Retrieve Bar Bounderies
|
max@1
|
1483 arma::uvec dup = find(median_simmat > thresh_beat);
|
max@1
|
1484 arma::mat potential_duplicates = arma::zeros<arma::mat>(nBeat,nBeat);
|
max@1
|
1485 potential_duplicates.elem(dup) = arma::ones<arma::vec>(dup.size());
|
max@1
|
1486 potential_duplicates = trimatu(potential_duplicates);
|
max@1
|
1487
|
Chris@21
|
1488 int nPartlengths = round((maxlength-minlength)/4)+1;
|
max@1
|
1489 arma::vec partlengths = zeros<arma::vec>(nPartlengths);
|
max@1
|
1490
|
Chris@21
|
1491 for (int i = 0; i < nPartlengths; ++ i)
|
matthiasm@46
|
1492 partlengths(i) = (i*4) + minlength;
|
max@1
|
1493
|
max@1
|
1494 // initialise arrays
|
max@1
|
1495 arma::cube simArray = zeros<arma::cube>(nBeat,nBeat,nPartlengths);
|
max@1
|
1496 arma::cube decisionArray2 = zeros<arma::cube>(nBeat,nBeat,nPartlengths);
|
max@1
|
1497
|
matthiasm@46
|
1498 for (int iLength = 0; iLength < nPartlengths; ++ iLength)
|
matthiasm@46
|
1499 // for (int iLength = 0; iLength < 20; ++ iLength)
|
max@1
|
1500 {
|
Chris@21
|
1501 int len = partlengths(iLength);
|
Chris@21
|
1502 int nUsedBeat = nBeat - len + 1; // number of potential rep beginnings: they can't overlap at the end of the song
|
Chris@33
|
1503
|
Chris@33
|
1504 if (nUsedBeat < 1) continue;
|
max@1
|
1505
|
Chris@21
|
1506 for (int iBeat = 0; iBeat < nUsedBeat; ++ iBeat) // looping over all columns (arbitrarily chosen columns)
|
max@1
|
1507 {
|
max@1
|
1508 arma::uvec help2 = find(potential_duplicates(span(0,nUsedBeat-1),iBeat)==1);
|
max@1
|
1509
|
Chris@37
|
1510 for (int i=0; i < (int)help2.size(); ++i)
|
max@1
|
1511 {
|
max@1
|
1512
|
max@1
|
1513 // measure how well two length len segments go together
|
max@1
|
1514 int kBeat = help2(i);
|
max@1
|
1515 arma::vec distrib = median_simmat(span(iBeat,iBeat+len-1),span(kBeat,kBeat+len-1)).diag(0);
|
max@1
|
1516 simArray(iBeat,kBeat,iLength) = quantile(distrib,quantilePerc);
|
max@1
|
1517 }
|
max@1
|
1518 }
|
max@1
|
1519
|
max@1
|
1520 arma::mat tempM = simArray(span(0,nUsedBeat-1),span(0,nUsedBeat-1),span(iLength,iLength));
|
max@1
|
1521 simArray.slice(iLength)(span(0,nUsedBeat-1),span(0,nUsedBeat-1)) = tempM + arma::trans(tempM) - (eye<mat>(nUsedBeat,nUsedBeat)%tempM);
|
max@1
|
1522
|
max@1
|
1523 // convolution
|
max@1
|
1524 arma::vec K = arma::zeros<vec>(3);
|
max@1
|
1525 K << 0.01 << 0.98 << 0.01;
|
max@1
|
1526
|
max@1
|
1527
|
Chris@37
|
1528 for (int i=0; i < (int)simArray.n_rows; ++i)
|
max@1
|
1529 {
|
max@1
|
1530 arma::rowvec t = arma::conv((arma::rowvec)simArray.slice(iLength).row(i),K);
|
max@1
|
1531 simArray.slice(iLength)(i,span::all) = t.subvec(1,t.size()-2);
|
max@1
|
1532 }
|
max@1
|
1533
|
max@1
|
1534 // take only over-average bars that do not overlap
|
max@1
|
1535
|
max@1
|
1536 arma::mat temp = arma::zeros<mat>(simArray.n_rows, simArray.n_cols);
|
max@1
|
1537 temp(span::all, span(0,nUsedBeat-1)) = simArray.slice(iLength)(span::all,span(0,nUsedBeat-1));
|
max@1
|
1538
|
Chris@37
|
1539 for (int i=0; i < (int)temp.n_rows; ++i)
|
Chris@37
|
1540 for (int j=0; j < nUsedBeat; ++j)
|
max@1
|
1541 if (temp(i,j) < thresh_seg)
|
max@1
|
1542 temp(i,j) = 0;
|
max@1
|
1543
|
max@1
|
1544 decisionArray2.slice(iLength) = temp;
|
max@1
|
1545
|
max@1
|
1546 arma::mat maxMat = maxfilt1(decisionArray2.slice(iLength),len-1);
|
max@1
|
1547
|
Chris@37
|
1548 for (int i=0; i < (int)decisionArray2.n_rows; ++i)
|
Chris@37
|
1549 for (int j=0; j < (int)decisionArray2.n_cols; ++j)
|
max@1
|
1550 if (decisionArray2.slice(iLength)(i,j) < maxMat(i,j))
|
max@1
|
1551 decisionArray2.slice(iLength)(i,j) = 0;
|
max@1
|
1552
|
max@1
|
1553 decisionArray2.slice(iLength) = decisionArray2.slice(iLength) % arma::trans(decisionArray2.slice(iLength));
|
max@1
|
1554
|
Chris@37
|
1555 for (int i=0; i < (int)simArray.n_rows; ++i)
|
Chris@37
|
1556 for (int j=0; j < (int)simArray.n_cols; ++j)
|
max@1
|
1557 if (simArray.slice(iLength)(i,j) < thresh_seg)
|
max@1
|
1558 potential_duplicates(i,j) = 0;
|
max@1
|
1559 }
|
max@1
|
1560
|
max@1
|
1561 // Milk the data
|
max@1
|
1562
|
max@1
|
1563 arma::mat bestval;
|
max@1
|
1564
|
Chris@21
|
1565 for (int iLength=0; iLength<nPartlengths; ++iLength)
|
max@1
|
1566 {
|
max@1
|
1567 arma::mat temp = arma::zeros<arma::mat>(decisionArray2.n_rows,decisionArray2.n_cols);
|
max@1
|
1568
|
Chris@37
|
1569 for (int rows=0; rows < (int)decisionArray2.n_rows; ++rows)
|
Chris@37
|
1570 for (int cols=0; cols < (int)decisionArray2.n_cols; ++cols)
|
max@1
|
1571 if (decisionArray2.slice(iLength)(rows,cols) > 0)
|
max@1
|
1572 temp(rows,cols) = 1;
|
max@1
|
1573
|
max@1
|
1574 arma::vec currLogicSum = arma::sum(temp,1);
|
max@1
|
1575
|
Chris@37
|
1576 for (int iBeat=0; iBeat < nBeat; ++iBeat)
|
max@1
|
1577 if (currLogicSum(iBeat) > 1)
|
max@1
|
1578 {
|
max@1
|
1579 arma::vec t = decisionArray2.slice(iLength)(span::all,iBeat);
|
max@1
|
1580 double currSum = sum(t);
|
max@1
|
1581
|
Chris@21
|
1582 int count = 0;
|
Chris@37
|
1583 for (int i=0; i < (int)t.size(); ++i)
|
max@1
|
1584 if (t(i)>0)
|
max@1
|
1585 count++;
|
max@1
|
1586
|
max@1
|
1587 currSum = (currSum/count)/2;
|
max@1
|
1588
|
max@1
|
1589 arma::rowvec t1;
|
max@1
|
1590 t1 << (currLogicSum(iBeat)-1) * partlengths(iLength) << currSum << iLength << iBeat << currLogicSum(iBeat);
|
max@1
|
1591
|
max@1
|
1592 bestval = join_cols(bestval,t1);
|
max@1
|
1593 }
|
max@1
|
1594 }
|
max@1
|
1595
|
max@1
|
1596 // Definition of the resulting vector
|
max@1
|
1597 vector<Part> parts;
|
max@1
|
1598
|
max@1
|
1599 // make a table of all valid sets of parts
|
max@1
|
1600
|
max@1
|
1601 char partletters[] = {'A','B','C','D','E','F','G', 'H','I','J','K','L','M','N','O','P','Q','R','S'};
|
Chris@21
|
1602 int partvalues[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19};
|
max@1
|
1603 arma::vec valid_sets = arma::ones<arma::vec>(bestval.n_rows);
|
max@1
|
1604
|
max@1
|
1605 if (!bestval.is_empty())
|
max@1
|
1606 {
|
max@1
|
1607
|
max@1
|
1608 // In questo punto viene introdotto un errore alla 3 cifra decimale
|
max@1
|
1609
|
max@1
|
1610 arma::colvec t = arma::zeros<arma::colvec>(bestval.n_rows);
|
Chris@37
|
1611 for (int i=0; i < (int)bestval.n_rows; ++i)
|
max@1
|
1612 {
|
max@1
|
1613 t(i) = bestval(i,1)*2;
|
max@1
|
1614 }
|
max@1
|
1615
|
max@1
|
1616 double m = t.max();
|
max@1
|
1617
|
max@1
|
1618 bestval(span::all,1) = bestval(span::all,1) / m;
|
max@1
|
1619 bestval(span::all,0) = bestval(span::all,0) + bestval(span::all,1);
|
max@1
|
1620
|
max@1
|
1621 arma::mat bestval2;
|
Chris@37
|
1622 for (int i=0; i < (int)bestval.n_cols; ++i)
|
max@1
|
1623 if (i!=1)
|
max@1
|
1624 bestval2 = join_rows(bestval2,bestval.col(i));
|
max@1
|
1625
|
Chris@21
|
1626 for (int kSeg=0; kSeg<6; ++kSeg)
|
max@1
|
1627 {
|
max@1
|
1628 arma::mat currbestvals = arma::zeros<arma::mat>(bestval2.n_rows, bestval2.n_cols);
|
Chris@37
|
1629 for (int i=0; i < (int)bestval2.n_rows; ++i)
|
Chris@37
|
1630 for (int j=0; j < (int)bestval2.n_cols; ++j)
|
max@1
|
1631 if (valid_sets(i))
|
max@1
|
1632 currbestvals(i,j) = bestval2(i,j);
|
max@1
|
1633
|
max@1
|
1634 arma::vec t1 = currbestvals.col(0);
|
max@1
|
1635 double ma;
|
max@1
|
1636 uword maIdx;
|
max@1
|
1637 ma = t1.max(maIdx);
|
max@6
|
1638
|
max@6
|
1639 if ((maIdx == 0)&&(ma == 0))
|
max@6
|
1640 break;
|
max@1
|
1641
|
Chris@28
|
1642 int bestLength = lrint(partlengths(currbestvals(maIdx,1)));
|
max@1
|
1643 arma::rowvec bestIndices = decisionArray2.slice(currbestvals(maIdx,1))(currbestvals(maIdx,2),span::all);
|
max@1
|
1644
|
max@1
|
1645 arma::rowvec bestIndicesMap = arma::zeros<arma::rowvec>(bestIndices.size());
|
Chris@37
|
1646 for (int i=0; i < (int)bestIndices.size(); ++i)
|
max@1
|
1647 if (bestIndices(i)>0)
|
max@1
|
1648 bestIndicesMap(i) = 1;
|
max@1
|
1649
|
max@1
|
1650 arma::rowvec mask = arma::zeros<arma::rowvec>(bestLength*2-1);
|
Chris@21
|
1651 for (int i=0; i<bestLength; ++i)
|
max@1
|
1652 mask(i+bestLength-1) = 1;
|
max@1
|
1653
|
max@1
|
1654 arma::rowvec t2 = arma::conv(bestIndicesMap,mask);
|
max@1
|
1655 arma::rowvec island = t2.subvec(mask.size()/2,t2.size()-1-mask.size()/2);
|
max@1
|
1656
|
max@1
|
1657 // Save results in the structure
|
max@1
|
1658 Part newPart;
|
max@1
|
1659 newPart.n = bestLength;
|
max@1
|
1660 uvec q1 = find(bestIndices > 0);
|
max@1
|
1661
|
Chris@37
|
1662 for (int i=0; i < (int)q1.size();++i)
|
max@1
|
1663 newPart.indices.push_back(q1(i));
|
max@1
|
1664
|
max@1
|
1665 newPart.letter = partletters[kSeg];
|
max@1
|
1666 newPart.value = partvalues[kSeg];
|
max@1
|
1667 newPart.level = kSeg+1;
|
max@1
|
1668 parts.push_back(newPart);
|
max@1
|
1669
|
max@1
|
1670 uvec q2 = find(valid_sets==1);
|
max@1
|
1671
|
Chris@37
|
1672 for (int i=0; i < (int)q2.size(); ++i)
|
max@1
|
1673 {
|
Chris@21
|
1674 int iSet = q2(i);
|
Chris@21
|
1675 int s = partlengths(bestval2(iSet,1));
|
max@1
|
1676
|
max@1
|
1677 arma::rowvec mask1 = arma::zeros<arma::rowvec>(s*2-1);
|
Chris@21
|
1678 for (int i=0; i<s; ++i)
|
max@1
|
1679 mask1(i+s-1) = 1;
|
max@1
|
1680
|
max@1
|
1681 arma::rowvec Ind = decisionArray2.slice(bestval2(iSet,1))(bestval2(iSet,2),span::all);
|
max@1
|
1682 arma::rowvec IndMap = arma::zeros<arma::rowvec>(Ind.size());
|
Chris@37
|
1683 for (int i=0; i < (int)Ind.size(); ++i)
|
max@1
|
1684 if (Ind(i)>0)
|
max@1
|
1685 IndMap(i) = 2;
|
max@1
|
1686
|
max@1
|
1687 arma::rowvec t3 = arma::conv(IndMap,mask1);
|
max@6
|
1688 arma::rowvec currislands = t3.subvec(mask1.size()/2,t3.size()-1-mask1.size()/2);
|
max@1
|
1689 arma::rowvec islandsdMult = currislands%island;
|
max@6
|
1690
|
max@1
|
1691 arma::uvec islandsIndex = find(islandsdMult > 0);
|
max@1
|
1692
|
max@6
|
1693 if (islandsIndex.size() > 0)
|
max@1
|
1694 valid_sets(iSet) = 0;
|
max@1
|
1695 }
|
max@1
|
1696 }
|
max@1
|
1697 }
|
max@1
|
1698 else
|
max@1
|
1699 {
|
max@1
|
1700 Part newPart;
|
max@1
|
1701 newPart.n = nBeat;
|
Chris@33
|
1702 newPart.indices.push_back(0);
|
max@1
|
1703 newPart.letter = 'A';
|
max@1
|
1704 newPart.value = 1;
|
max@1
|
1705 newPart.level = 1;
|
max@1
|
1706 parts.push_back(newPart);
|
max@1
|
1707 }
|
max@6
|
1708
|
max@1
|
1709 arma::vec bar = linspace(1,nBeat,nBeat);
|
max@1
|
1710 Part np = nullpart(parts,bar);
|
max@7
|
1711
|
max@1
|
1712 parts.push_back(np);
|
max@1
|
1713
|
max@1
|
1714 // -------------- NOT CONVERTED -------------------------------------
|
max@1
|
1715 // if param.seg.editor
|
max@1
|
1716 // [pa, ta] = partarray(parts);
|
max@1
|
1717 // parts = editorssearch(pa, ta, parts);
|
max@1
|
1718 // parts = [parts, nullpart(parts,1:nBeat)];
|
max@1
|
1719 // end
|
max@1
|
1720 // ------------------------------------------------------------------
|
max@1
|
1721
|
max@1
|
1722
|
max@1
|
1723 mergenulls(parts);
|
max@1
|
1724
|
max@1
|
1725
|
max@1
|
1726 // -------------- NOT CONVERTED -------------------------------------
|
max@1
|
1727 // if param.seg.editor
|
max@1
|
1728 // [pa, ta] = partarray(parts);
|
max@1
|
1729 // parts = editorssearch(pa, ta, parts);
|
max@1
|
1730 // parts = [parts, nullpart(parts,1:nBeat)];
|
max@1
|
1731 // end
|
max@1
|
1732 // ------------------------------------------------------------------
|
max@1
|
1733
|
max@1
|
1734 return parts;
|
max@1
|
1735 }
|
max@1
|
1736
|
max@1
|
1737
|
max@1
|
1738
|
Chris@19
|
1739 void songSegmentChroma(Vamp::Plugin::FeatureList quantisedChromagram, vector<Part> &parts)
|
max@1
|
1740 {
|
max@1
|
1741 // Collect Info
|
Chris@19
|
1742 int nBeat = quantisedChromagram.size(); // Number of feature vector
|
Chris@19
|
1743 int nFeatValues = quantisedChromagram[0].values.size(); // Number of values for each feature vector
|
max@1
|
1744
|
max@1
|
1745 arma::mat synchTreble = arma::zeros<mat>(nBeat,nFeatValues/2);
|
max@1
|
1746
|
Chris@21
|
1747 for (int i = 0; i < nBeat; ++ i)
|
Chris@21
|
1748 for (int j = 0; j < nFeatValues/2; ++ j)
|
max@1
|
1749 {
|
Chris@19
|
1750 synchTreble(i,j) = quantisedChromagram[i].values[j];
|
max@1
|
1751 }
|
max@1
|
1752
|
max@1
|
1753 arma::mat synchBass = arma::zeros<mat>(nBeat,nFeatValues/2);
|
max@1
|
1754
|
Chris@21
|
1755 for (int i = 0; i < nBeat; ++ i)
|
Chris@21
|
1756 for (int j = 0; j < nFeatValues/2; ++ j)
|
max@1
|
1757 {
|
Chris@19
|
1758 synchBass(i,j) = quantisedChromagram[i].values[j+12];
|
max@1
|
1759 }
|
max@1
|
1760
|
max@1
|
1761 // Process
|
max@1
|
1762
|
Chris@19
|
1763 arma::mat segTreble = arma::zeros<arma::mat>(quantisedChromagram.size(),quantisedChromagram[0].values.size()/2);
|
Chris@19
|
1764 arma::mat segBass = arma::zeros<arma::mat>(quantisedChromagram.size(),quantisedChromagram[0].values.size()/2);
|
max@1
|
1765
|
Chris@37
|
1766 for (int iPart=0; iPart < (int)parts.size(); ++iPart)
|
max@1
|
1767 {
|
max@1
|
1768 parts[iPart].nInd = parts[iPart].indices.size();
|
max@1
|
1769
|
Chris@21
|
1770 for (int kOccur=0; kOccur<parts[iPart].nInd; ++kOccur)
|
max@1
|
1771 {
|
max@1
|
1772 int kStartIndex = parts[iPart].indices[kOccur];
|
max@1
|
1773 int kEndIndex = kStartIndex + parts[iPart].n-1;
|
max@1
|
1774
|
max@1
|
1775 segTreble.rows(kStartIndex,kEndIndex) = segTreble.rows(kStartIndex,kEndIndex) + synchTreble.rows(kStartIndex,kEndIndex);
|
max@1
|
1776 segBass.rows(kStartIndex,kEndIndex) = segBass.rows(kStartIndex,kEndIndex) + synchBass.rows(kStartIndex,kEndIndex);
|
max@1
|
1777 }
|
max@1
|
1778 }
|
max@1
|
1779 }
|
max@1
|
1780
|
max@1
|
1781
|
max@1
|
1782 // Segment Integration
|
max@1
|
1783 vector<Part> songSegmentIntegration(vector<Part> &parts)
|
max@1
|
1784 {
|
max@1
|
1785 // Break up parts (every part will have one instance)
|
max@1
|
1786 vector<Part> newPartVector;
|
max@1
|
1787 vector<int> partindices;
|
max@1
|
1788
|
Chris@37
|
1789 for (int iPart=0; iPart < (int)parts.size(); ++iPart)
|
max@1
|
1790 {
|
max@1
|
1791 parts[iPart].nInd = parts[iPart].indices.size();
|
Chris@21
|
1792 for (int iInstance=0; iInstance<parts[iPart].nInd; ++iInstance)
|
max@1
|
1793 {
|
max@1
|
1794 Part newPart;
|
max@1
|
1795 newPart.n = parts[iPart].n;
|
max@1
|
1796 newPart.letter = parts[iPart].letter;
|
max@1
|
1797 newPart.value = parts[iPart].value;
|
max@1
|
1798 newPart.level = parts[iPart].level;
|
max@1
|
1799 newPart.indices.push_back(parts[iPart].indices[iInstance]);
|
max@1
|
1800 newPart.nInd = 1;
|
max@1
|
1801 partindices.push_back(parts[iPart].indices[iInstance]);
|
max@1
|
1802
|
max@1
|
1803 newPartVector.push_back(newPart);
|
max@1
|
1804 }
|
max@1
|
1805 }
|
max@1
|
1806
|
max@1
|
1807
|
max@1
|
1808 // Sort the parts in order of occurrence
|
max@1
|
1809 sort (partindices.begin(), partindices.end());
|
max@1
|
1810
|
Chris@37
|
1811 for (int i=0; i < (int)partindices.size(); ++i)
|
max@1
|
1812 {
|
max@1
|
1813 bool found = false;
|
max@1
|
1814 int in=0;
|
max@1
|
1815 while (!found)
|
max@1
|
1816 {
|
max@1
|
1817 if (newPartVector[in].indices[0] == partindices[i])
|
max@1
|
1818 {
|
max@1
|
1819 newPartVector.push_back(newPartVector[in]);
|
max@1
|
1820 newPartVector.erase(newPartVector.begin()+in);
|
max@1
|
1821 found = true;
|
max@1
|
1822 }
|
max@1
|
1823 else
|
max@1
|
1824 in++;
|
max@1
|
1825 }
|
max@1
|
1826 }
|
max@1
|
1827
|
max@1
|
1828 // Clear the vector
|
Chris@37
|
1829 for (int iNewpart=1; iNewpart < (int)newPartVector.size(); ++iNewpart)
|
max@1
|
1830 {
|
max@1
|
1831 if (newPartVector[iNewpart].n < 12)
|
max@1
|
1832 {
|
max@1
|
1833 newPartVector[iNewpart-1].n = newPartVector[iNewpart-1].n + newPartVector[iNewpart].n;
|
max@1
|
1834 newPartVector.erase(newPartVector.begin()+iNewpart);
|
max@1
|
1835 }
|
max@1
|
1836 }
|
max@1
|
1837
|
max@1
|
1838 return newPartVector;
|
max@1
|
1839 }
|
max@1
|
1840
|
max@1
|
1841 // Segmenter
|
Chris@48
|
1842 Vamp::Plugin::FeatureList Segmentino::runSegmenter(Vamp::Plugin::FeatureList quantisedChromagram)
|
max@1
|
1843 {
|
max@1
|
1844 /* --- Display Information --- */
|
Chris@37
|
1845 // int numBeat = quantisedChromagram.size();
|
Chris@37
|
1846 // int numFeats = quantisedChromagram[0].values.size();
|
max@1
|
1847
|
max@1
|
1848 vector<Part> parts;
|
max@1
|
1849 vector<Part> finalParts;
|
max@1
|
1850
|
Chris@19
|
1851 parts = songSegment(quantisedChromagram);
|
Chris@19
|
1852 songSegmentChroma(quantisedChromagram,parts);
|
max@7
|
1853
|
max@1
|
1854 finalParts = songSegmentIntegration(parts);
|
max@1
|
1855
|
max@1
|
1856
|
max@1
|
1857 // TEMP ----
|
Chris@21
|
1858 /*for (int i=0;i<finalParts.size(); ++i)
|
max@1
|
1859 {
|
max@6
|
1860 std::cout << "Parts n° " << i << std::endl;
|
max@6
|
1861 std::cout << "n°: " << finalParts[i].n << std::endl;
|
max@6
|
1862 std::cout << "letter: " << finalParts[i].letter << std::endl;
|
max@1
|
1863
|
max@6
|
1864 std::cout << "indices: ";
|
Chris@21
|
1865 for (int j=0;j<finalParts[i].indices.size(); ++j)
|
max@6
|
1866 std::cout << finalParts[i].indices[j] << " ";
|
max@6
|
1867
|
max@6
|
1868 std::cout << std::endl;
|
max@6
|
1869 std::cout << "level: " << finalParts[i].level << std::endl;
|
max@1
|
1870 }*/
|
max@1
|
1871
|
max@1
|
1872 // ---------
|
max@1
|
1873
|
max@1
|
1874
|
max@1
|
1875 // Output
|
max@1
|
1876
|
max@1
|
1877 Vamp::Plugin::FeatureList results;
|
max@1
|
1878
|
max@1
|
1879
|
max@1
|
1880 Feature seg;
|
max@1
|
1881
|
max@1
|
1882 arma::vec indices;
|
Chris@37
|
1883 // int idx=0;
|
max@1
|
1884 vector<int> values;
|
max@1
|
1885 vector<string> letters;
|
max@1
|
1886
|
Chris@37
|
1887 for (int iPart=0; iPart < (int)finalParts.size()-1; ++iPart)
|
max@1
|
1888 {
|
Chris@21
|
1889 int iInstance=0;
|
max@1
|
1890 seg.hasTimestamp = true;
|
max@1
|
1891
|
max@1
|
1892 int ind = finalParts[iPart].indices[iInstance];
|
max@1
|
1893 int ind1 = finalParts[iPart+1].indices[iInstance];
|
max@1
|
1894
|
Chris@19
|
1895 seg.timestamp = quantisedChromagram[ind].timestamp;
|
max@1
|
1896 seg.hasDuration = true;
|
Chris@19
|
1897 seg.duration = quantisedChromagram[ind1].timestamp-quantisedChromagram[ind].timestamp;
|
max@1
|
1898 seg.values.clear();
|
max@1
|
1899 seg.values.push_back(finalParts[iPart].value);
|
max@1
|
1900 seg.label = finalParts[iPart].letter;
|
max@1
|
1901
|
max@1
|
1902 results.push_back(seg);
|
max@1
|
1903 }
|
max@1
|
1904
|
Chris@37
|
1905 if (finalParts.size() > 0) {
|
Chris@37
|
1906 int ind = finalParts[finalParts.size()-1].indices[0];
|
Chris@37
|
1907 seg.hasTimestamp = true;
|
Chris@37
|
1908 seg.timestamp = quantisedChromagram[ind].timestamp;
|
Chris@37
|
1909 seg.hasDuration = true;
|
Chris@37
|
1910 seg.duration = quantisedChromagram[quantisedChromagram.size()-1].timestamp-quantisedChromagram[ind].timestamp;
|
Chris@37
|
1911 seg.values.clear();
|
Chris@37
|
1912 seg.values.push_back(finalParts[finalParts.size()-1].value);
|
Chris@37
|
1913 seg.label = finalParts[finalParts.size()-1].letter;
|
max@1
|
1914
|
Chris@37
|
1915 results.push_back(seg);
|
Chris@37
|
1916 }
|
max@1
|
1917
|
max@1
|
1918 return results;
|
max@1
|
1919 }
|
max@1
|
1920
|
max@1
|
1921
|
max@1
|
1922
|
max@1
|
1923
|
max@1
|
1924
|
max@1
|
1925
|
max@1
|
1926
|
max@1
|
1927
|
max@1
|
1928
|
max@1
|
1929
|
max@1
|
1930
|
max@1
|
1931
|
max@1
|
1932
|
max@1
|
1933
|
max@1
|
1934
|
max@1
|
1935
|
max@1
|
1936
|