Mercurial > hg > segmenter-vamp-plugin
comparison segmentino/Segmentino.cpp @ 48:69251e11a913
Rename SongParts/songpartitioner to Segmentino throughout
author | Chris Cannam |
---|---|
date | Thu, 13 Jun 2013 09:43:01 +0100 |
parents | songparts/SongParts.cpp@f59ff6a22f8e |
children | 1ec0e2823891 |
comparison
equal
deleted
inserted
replaced
47:5ead8717a618 | 48:69251e11a913 |
---|---|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ | |
2 | |
3 /* | |
4 Segmentino | |
5 | |
6 Code by Massimiliano Zanoni and Matthias Mauch | |
7 Centre for Digital Music, Queen Mary, University of London | |
8 | |
9 Copyright 2009-2013 Queen Mary, University of London. | |
10 | |
11 This program is free software; you can redistribute it and/or | |
12 modify it under the terms of the GNU General Public License as | |
13 published by the Free Software Foundation; either version 2 of the | |
14 License, or (at your option) any later version. See the file | |
15 COPYING included with this distribution for more information. | |
16 */ | |
17 | |
18 #include "Segmentino.h" | |
19 | |
20 #include <base/Window.h> | |
21 #include <dsp/onsets/DetectionFunction.h> | |
22 #include <dsp/onsets/PeakPicking.h> | |
23 #include <dsp/transforms/FFT.h> | |
24 #include <dsp/tempotracking/TempoTrackV2.h> | |
25 #include <dsp/tempotracking/DownBeat.h> | |
26 #include <chromamethods.h> | |
27 #include <maths/MathUtilities.h> | |
28 #include <boost/numeric/ublas/matrix.hpp> | |
29 #include <boost/numeric/ublas/io.hpp> | |
30 #include <boost/math/distributions/normal.hpp> | |
31 #include "armadillo" | |
32 #include <fstream> | |
33 #include <sstream> | |
34 #include <cmath> | |
35 #include <vector> | |
36 | |
37 #include <vamp-sdk/Plugin.h> | |
38 | |
39 using namespace boost::numeric; | |
40 using namespace arma; | |
41 using std::string; | |
42 using std::vector; | |
43 using std::cerr; | |
44 using std::cout; | |
45 using std::endl; | |
46 | |
47 | |
48 #ifndef __GNUC__ | |
49 #include <alloca.h> | |
50 #endif | |
51 | |
52 | |
53 // Result Struct | |
54 typedef struct Part { | |
55 int n; | |
56 vector<int> indices; | |
57 string letter; | |
58 int value; | |
59 int level; | |
60 int nInd; | |
61 }Part; | |
62 | |
63 | |
64 | |
65 /* ------------------------------------ */ | |
66 /* ----- BEAT DETECTOR CLASS ---------- */ | |
67 /* ------------------------------------ */ | |
68 | |
69 class BeatTrackerData | |
70 { | |
71 /* --- ATTRIBUTES --- */ | |
72 public: | |
73 DFConfig dfConfig; | |
74 DetectionFunction *df; | |
75 DownBeat *downBeat; | |
76 vector<double> dfOutput; | |
77 Vamp::RealTime origin; | |
78 | |
79 | |
80 /* --- METHODS --- */ | |
81 | |
82 /* --- Constructor --- */ | |
83 public: | |
84 BeatTrackerData(float rate, const DFConfig &config) : dfConfig(config) { | |
85 | |
86 df = new DetectionFunction(config); | |
87 // decimation factor aims at resampling to c. 3KHz; must be power of 2 | |
88 int factor = MathUtilities::nextPowerOfTwo(rate / 3000); | |
89 // std::cerr << "BeatTrackerData: factor = " << factor << std::endl; | |
90 downBeat = new DownBeat(rate, factor, config.stepSize); | |
91 } | |
92 | |
93 /* --- Desctructor --- */ | |
94 ~BeatTrackerData() { | |
95 delete df; | |
96 delete downBeat; | |
97 } | |
98 | |
99 void reset() { | |
100 delete df; | |
101 df = new DetectionFunction(dfConfig); | |
102 dfOutput.clear(); | |
103 downBeat->resetAudioBuffer(); | |
104 origin = Vamp::RealTime::zeroTime; | |
105 } | |
106 }; | |
107 | |
108 | |
109 /* --------------------------------------- */ | |
110 /* ----- CHROMA EXTRACTOR CLASS ---------- */ | |
111 /* --------------------------------------- */ | |
112 | |
113 class ChromaData | |
114 { | |
115 | |
116 /* --- ATTRIBUTES --- */ | |
117 | |
118 public: | |
119 int frameCount; | |
120 int nBPS; | |
121 Vamp::Plugin::FeatureList logSpectrum; | |
122 int blockSize; | |
123 int lengthOfNoteIndex; | |
124 vector<float> meanTunings; | |
125 vector<float> localTunings; | |
126 float whitening; | |
127 float preset; | |
128 float useNNLS; | |
129 vector<float> localTuning; | |
130 vector<float> kernelValue; | |
131 vector<int> kernelFftIndex; | |
132 vector<int> kernelNoteIndex; | |
133 float *dict; | |
134 bool tuneLocal; | |
135 float doNormalizeChroma; | |
136 float rollon; | |
137 float s; | |
138 vector<float> hw; | |
139 vector<float> sinvalues; | |
140 vector<float> cosvalues; | |
141 Window<float> window; | |
142 FFTReal fft; | |
143 int inputSampleRate; | |
144 | |
145 /* --- METHODS --- */ | |
146 | |
147 /* --- Constructor --- */ | |
148 | |
149 public: | |
150 ChromaData(float inputSampleRate, size_t block_size) : | |
151 frameCount(0), | |
152 nBPS(3), | |
153 logSpectrum(0), | |
154 blockSize(0), | |
155 lengthOfNoteIndex(0), | |
156 meanTunings(0), | |
157 localTunings(0), | |
158 whitening(1.0), | |
159 preset(0.0), | |
160 useNNLS(1.0), | |
161 localTuning(0.0), | |
162 kernelValue(0), | |
163 kernelFftIndex(0), | |
164 kernelNoteIndex(0), | |
165 dict(0), | |
166 tuneLocal(0.0), | |
167 doNormalizeChroma(0), | |
168 rollon(0.0), | |
169 s(0.7), | |
170 sinvalues(0), | |
171 cosvalues(0), | |
172 window(HanningWindow, block_size), | |
173 fft(block_size), | |
174 inputSampleRate(inputSampleRate) | |
175 { | |
176 // make the *note* dictionary matrix | |
177 dict = new float[nNote * 84]; | |
178 for (int i = 0; i < nNote * 84; ++i) dict[i] = 0.0; | |
179 blockSize = block_size; | |
180 } | |
181 | |
182 | |
183 /* --- Desctructor --- */ | |
184 | |
185 ~ChromaData() { | |
186 delete [] dict; | |
187 } | |
188 | |
189 /* --- Public Methods --- */ | |
190 | |
191 void reset() { | |
192 frameCount = 0; | |
193 logSpectrum.clear(); | |
194 for (int iBPS = 0; iBPS < 3; ++iBPS) { | |
195 meanTunings[iBPS] = 0; | |
196 localTunings[iBPS] = 0; | |
197 } | |
198 localTuning.clear(); | |
199 } | |
200 | |
201 void baseProcess(float *inputBuffers, Vamp::RealTime timestamp) | |
202 { | |
203 | |
204 frameCount++; | |
205 float *magnitude = new float[blockSize/2]; | |
206 double *fftReal = new double[blockSize]; | |
207 double *fftImag = new double[blockSize]; | |
208 | |
209 // FFTReal wants doubles, so we need to make a local copy of inputBuffers | |
210 double *inputBuffersDouble = new double[blockSize]; | |
211 for (int i = 0; i < blockSize; i++) inputBuffersDouble[i] = inputBuffers[i]; | |
212 | |
213 fft.process(false, inputBuffersDouble, fftReal, fftImag); | |
214 | |
215 float energysum = 0; | |
216 // make magnitude | |
217 float maxmag = -10000; | |
218 for (int iBin = 0; iBin < static_cast<int>(blockSize/2); iBin++) { | |
219 magnitude[iBin] = sqrt(fftReal[iBin] * fftReal[iBin] + | |
220 fftImag[iBin] * fftImag[iBin]); | |
221 if (magnitude[iBin]>blockSize*1.0) magnitude[iBin] = blockSize; | |
222 // a valid audio signal (between -1 and 1) should not be limited here. | |
223 if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin]; | |
224 if (rollon > 0) { | |
225 energysum += pow(magnitude[iBin],2); | |
226 } | |
227 } | |
228 | |
229 float cumenergy = 0; | |
230 if (rollon > 0) { | |
231 for (int iBin = 2; iBin < static_cast<int>(blockSize/2); iBin++) { | |
232 cumenergy += pow(magnitude[iBin],2); | |
233 if (cumenergy < energysum * rollon / 100) magnitude[iBin-2] = 0; | |
234 else break; | |
235 } | |
236 } | |
237 | |
238 if (maxmag < 2) { | |
239 // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl; | |
240 for (int iBin = 0; iBin < static_cast<int>(blockSize/2); iBin++) { | |
241 magnitude[iBin] = 0; | |
242 } | |
243 } | |
244 | |
245 // cerr << magnitude[200] << endl; | |
246 | |
247 // note magnitude mapping using pre-calculated matrix | |
248 float *nm = new float[nNote]; // note magnitude | |
249 for (int iNote = 0; iNote < nNote; iNote++) { | |
250 nm[iNote] = 0; // initialise as 0 | |
251 } | |
252 int binCount = 0; | |
253 for (vector<float>::iterator it = kernelValue.begin(); it != kernelValue.end(); ++it) { | |
254 nm[kernelNoteIndex[binCount]] += magnitude[kernelFftIndex[binCount]] * kernelValue[binCount]; | |
255 binCount++; | |
256 } | |
257 | |
258 float one_over_N = 1.0/frameCount; | |
259 // update means of complex tuning variables | |
260 for (int iBPS = 0; iBPS < nBPS; ++iBPS) meanTunings[iBPS] *= float(frameCount-1)*one_over_N; | |
261 | |
262 for (int iTone = 0; iTone < round(nNote*0.62/nBPS)*nBPS+1; iTone = iTone + nBPS) { | |
263 for (int iBPS = 0; iBPS < nBPS; ++iBPS) meanTunings[iBPS] += nm[iTone + iBPS]*one_over_N; | |
264 float ratioOld = 0.997; | |
265 for (int iBPS = 0; iBPS < nBPS; ++iBPS) { | |
266 localTunings[iBPS] *= ratioOld; | |
267 localTunings[iBPS] += nm[iTone + iBPS] * (1 - ratioOld); | |
268 } | |
269 } | |
270 | |
271 float localTuningImag = 0; | |
272 float localTuningReal = 0; | |
273 for (int iBPS = 0; iBPS < nBPS; ++iBPS) { | |
274 localTuningReal += localTunings[iBPS] * cosvalues[iBPS]; | |
275 localTuningImag += localTunings[iBPS] * sinvalues[iBPS]; | |
276 } | |
277 | |
278 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI); | |
279 localTuning.push_back(normalisedtuning); | |
280 | |
281 Vamp::Plugin::Feature f1; // logfreqspec | |
282 f1.hasTimestamp = true; | |
283 f1.timestamp = timestamp; | |
284 for (int iNote = 0; iNote < nNote; iNote++) { | |
285 f1.values.push_back(nm[iNote]); | |
286 } | |
287 | |
288 // deletes | |
289 delete[] inputBuffersDouble; | |
290 delete[] magnitude; | |
291 delete[] fftReal; | |
292 delete[] fftImag; | |
293 delete[] nm; | |
294 | |
295 logSpectrum.push_back(f1); // remember note magnitude | |
296 } | |
297 | |
298 bool initialise() | |
299 { | |
300 dictionaryMatrix(dict, s); | |
301 | |
302 // make things for tuning estimation | |
303 for (int iBPS = 0; iBPS < nBPS; ++iBPS) { | |
304 sinvalues.push_back(sin(2*M_PI*(iBPS*1.0/nBPS))); | |
305 cosvalues.push_back(cos(2*M_PI*(iBPS*1.0/nBPS))); | |
306 } | |
307 | |
308 | |
309 // make hamming window of length 1/2 octave | |
310 int hamwinlength = nBPS * 6 + 1; | |
311 float hamwinsum = 0; | |
312 for (int i = 0; i < hamwinlength; ++i) { | |
313 hw.push_back(0.54 - 0.46 * cos((2*M_PI*i)/(hamwinlength-1))); | |
314 hamwinsum += 0.54 - 0.46 * cos((2*M_PI*i)/(hamwinlength-1)); | |
315 } | |
316 for (int i = 0; i < hamwinlength; ++i) hw[i] = hw[i] / hamwinsum; | |
317 | |
318 | |
319 // initialise the tuning | |
320 for (int iBPS = 0; iBPS < nBPS; ++iBPS) { | |
321 meanTunings.push_back(0); | |
322 localTunings.push_back(0); | |
323 } | |
324 | |
325 blockSize = blockSize; | |
326 frameCount = 0; | |
327 int tempn = nNote * blockSize/2; | |
328 // cerr << "length of tempkernel : " << tempn << endl; | |
329 float *tempkernel; | |
330 | |
331 tempkernel = new float[tempn]; | |
332 | |
333 logFreqMatrix(inputSampleRate, blockSize, tempkernel); | |
334 kernelValue.clear(); | |
335 kernelFftIndex.clear(); | |
336 kernelNoteIndex.clear(); | |
337 int countNonzero = 0; | |
338 for (int iNote = 0; iNote < nNote; ++iNote) { | |
339 // I don't know if this is wise: manually making a sparse matrix | |
340 for (int iFFT = 0; iFFT < static_cast<int>(blockSize/2); ++iFFT) { | |
341 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) { | |
342 kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]); | |
343 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) { | |
344 countNonzero++; | |
345 } | |
346 kernelFftIndex.push_back(iFFT); | |
347 kernelNoteIndex.push_back(iNote); | |
348 } | |
349 } | |
350 } | |
351 delete [] tempkernel; | |
352 | |
353 return true; | |
354 } | |
355 }; | |
356 | |
357 | |
358 /* --------------------------------- */ | |
359 /* ----- SONG PARTITIONER ---------- */ | |
360 /* --------------------------------- */ | |
361 | |
362 | |
363 /* --- ATTRIBUTES --- */ | |
364 | |
365 float Segmentino::m_stepSecs = 0.01161; // 512 samples at 44100 | |
366 int Segmentino::m_chromaFramesizeFactor = 16; // 16 times as long as beat tracker's | |
367 int Segmentino::m_chromaStepsizeFactor = 4; // 4 times as long as beat tracker's | |
368 | |
369 | |
370 /* --- METHODS --- */ | |
371 | |
372 /* --- Constructor --- */ | |
373 Segmentino::Segmentino(float inputSampleRate) : | |
374 Vamp::Plugin(inputSampleRate), | |
375 m_d(0), | |
376 m_chromadata(0), | |
377 m_bpb(4), | |
378 m_pluginFrameCount(0) | |
379 { | |
380 } | |
381 | |
382 | |
383 /* --- Desctructor --- */ | |
384 Segmentino::~Segmentino() | |
385 { | |
386 delete m_d; | |
387 delete m_chromadata; | |
388 } | |
389 | |
390 | |
391 /* --- Methods --- */ | |
392 string Segmentino::getIdentifier() const | |
393 { | |
394 return "qm-songpartitioner"; | |
395 } | |
396 | |
397 string Segmentino::getName() const | |
398 { | |
399 return "Song Partitioner"; | |
400 } | |
401 | |
402 string Segmentino::getDescription() const | |
403 { | |
404 return "Estimate contiguous segments pertaining to song parts such as verse and chorus."; | |
405 } | |
406 | |
407 string Segmentino::getMaker() const | |
408 { | |
409 return "Queen Mary, University of London"; | |
410 } | |
411 | |
412 int Segmentino::getPluginVersion() const | |
413 { | |
414 return 2; | |
415 } | |
416 | |
417 string Segmentino::getCopyright() const | |
418 { | |
419 return "Plugin by Matthew Davies, Christian Landone, Chris Cannam, Matthias Mauch and Massimiliano Zanoni Copyright (c) 2006-2012 QMUL - All Rights Reserved"; | |
420 } | |
421 | |
422 Segmentino::ParameterList Segmentino::getParameterDescriptors() const | |
423 { | |
424 ParameterList list; | |
425 | |
426 ParameterDescriptor desc; | |
427 | |
428 // desc.identifier = "bpb"; | |
429 // desc.name = "Beats per Bar"; | |
430 // desc.description = "The number of beats in each bar"; | |
431 // desc.minValue = 2; | |
432 // desc.maxValue = 16; | |
433 // desc.defaultValue = 4; | |
434 // desc.isQuantized = true; | |
435 // desc.quantizeStep = 1; | |
436 // list.push_back(desc); | |
437 | |
438 return list; | |
439 } | |
440 | |
441 float Segmentino::getParameter(std::string name) const | |
442 { | |
443 if (name == "bpb") return m_bpb; | |
444 return 0.0; | |
445 } | |
446 | |
447 void Segmentino::setParameter(std::string name, float value) | |
448 { | |
449 if (name == "bpb") m_bpb = lrintf(value); | |
450 } | |
451 | |
452 | |
453 // Return the StepSize for Chroma Extractor | |
454 size_t Segmentino::getPreferredStepSize() const | |
455 { | |
456 size_t step = size_t(m_inputSampleRate * m_stepSecs + 0.0001); | |
457 if (step < 1) step = 1; | |
458 | |
459 return step; | |
460 } | |
461 | |
462 // Return the BlockSize for Chroma Extractor | |
463 size_t Segmentino::getPreferredBlockSize() const | |
464 { | |
465 size_t theoretical = getPreferredStepSize() * 2; | |
466 theoretical *= m_chromaFramesizeFactor; | |
467 | |
468 return theoretical; | |
469 } | |
470 | |
471 | |
472 // Initialize the plugin and define Beat Tracker and Chroma Extractor Objects | |
473 bool Segmentino::initialise(size_t channels, size_t stepSize, size_t blockSize) | |
474 { | |
475 if (m_d) { | |
476 delete m_d; | |
477 m_d = 0; | |
478 } | |
479 if (m_chromadata) { | |
480 delete m_chromadata; | |
481 m_chromadata = 0; | |
482 } | |
483 | |
484 if (channels < getMinChannelCount() || | |
485 channels > getMaxChannelCount()) { | |
486 std::cerr << "Segmentino::initialise: Unsupported channel count: " | |
487 << channels << std::endl; | |
488 return false; | |
489 } | |
490 | |
491 if (stepSize != getPreferredStepSize()) { | |
492 std::cerr << "ERROR: Segmentino::initialise: Unsupported step size for this sample rate: " | |
493 << stepSize << " (wanted " << (getPreferredStepSize()) << ")" << std::endl; | |
494 return false; | |
495 } | |
496 | |
497 if (blockSize != getPreferredBlockSize()) { | |
498 std::cerr << "WARNING: Segmentino::initialise: Sub-optimal block size for this sample rate: " | |
499 << blockSize << " (wanted " << getPreferredBlockSize() << ")" << std::endl; | |
500 } | |
501 | |
502 // Beat tracker and Chroma extractor has two different configuration parameters | |
503 | |
504 // Configuration Parameters for Beat Tracker | |
505 DFConfig dfConfig; | |
506 dfConfig.DFType = DF_COMPLEXSD; | |
507 dfConfig.stepSize = stepSize; | |
508 dfConfig.frameLength = blockSize / m_chromaFramesizeFactor; | |
509 dfConfig.dbRise = 3; | |
510 dfConfig.adaptiveWhitening = false; | |
511 dfConfig.whiteningRelaxCoeff = -1; | |
512 dfConfig.whiteningFloor = -1; | |
513 | |
514 // Initialise Beat Tracker | |
515 m_d = new BeatTrackerData(m_inputSampleRate, dfConfig); | |
516 m_d->downBeat->setBeatsPerBar(m_bpb); | |
517 | |
518 // Initialise Chroma Extractor | |
519 m_chromadata = new ChromaData(m_inputSampleRate, blockSize); | |
520 m_chromadata->initialise(); | |
521 | |
522 return true; | |
523 } | |
524 | |
525 void Segmentino::reset() | |
526 { | |
527 if (m_d) m_d->reset(); | |
528 if (m_chromadata) m_chromadata->reset(); | |
529 m_pluginFrameCount = 0; | |
530 } | |
531 | |
532 Segmentino::OutputList Segmentino::getOutputDescriptors() const | |
533 { | |
534 OutputList list; | |
535 int outputCounter = 0; | |
536 | |
537 OutputDescriptor beat; | |
538 beat.identifier = "beats"; | |
539 beat.name = "Beats"; | |
540 beat.description = "Beat locations labelled with metrical position"; | |
541 beat.unit = ""; | |
542 beat.hasFixedBinCount = true; | |
543 beat.binCount = 0; | |
544 beat.sampleType = OutputDescriptor::VariableSampleRate; | |
545 beat.sampleRate = 1.0 / m_stepSecs; | |
546 m_beatOutputNumber = outputCounter++; | |
547 | |
548 OutputDescriptor bars; | |
549 bars.identifier = "bars"; | |
550 bars.name = "Bars"; | |
551 bars.description = "Bar locations"; | |
552 bars.unit = ""; | |
553 bars.hasFixedBinCount = true; | |
554 bars.binCount = 0; | |
555 bars.sampleType = OutputDescriptor::VariableSampleRate; | |
556 bars.sampleRate = 1.0 / m_stepSecs; | |
557 m_barsOutputNumber = outputCounter++; | |
558 | |
559 OutputDescriptor beatcounts; | |
560 beatcounts.identifier = "beatcounts"; | |
561 beatcounts.name = "Beat Count"; | |
562 beatcounts.description = "Beat counter function"; | |
563 beatcounts.unit = ""; | |
564 beatcounts.hasFixedBinCount = true; | |
565 beatcounts.binCount = 1; | |
566 beatcounts.sampleType = OutputDescriptor::VariableSampleRate; | |
567 beatcounts.sampleRate = 1.0 / m_stepSecs; | |
568 m_beatcountsOutputNumber = outputCounter++; | |
569 | |
570 OutputDescriptor beatsd; | |
571 beatsd.identifier = "beatsd"; | |
572 beatsd.name = "Beat Spectral Difference"; | |
573 beatsd.description = "Beat spectral difference function used for bar-line detection"; | |
574 beatsd.unit = ""; | |
575 beatsd.hasFixedBinCount = true; | |
576 beatsd.binCount = 1; | |
577 beatsd.sampleType = OutputDescriptor::VariableSampleRate; | |
578 beatsd.sampleRate = 1.0 / m_stepSecs; | |
579 m_beatsdOutputNumber = outputCounter++; | |
580 | |
581 OutputDescriptor logscalespec; | |
582 logscalespec.identifier = "logscalespec"; | |
583 logscalespec.name = "Log-Frequency Spectrum"; | |
584 logscalespec.description = "Spectrum with linear frequency on a log scale."; | |
585 logscalespec.unit = ""; | |
586 logscalespec.hasFixedBinCount = true; | |
587 logscalespec.binCount = nNote; | |
588 logscalespec.hasKnownExtents = false; | |
589 logscalespec.isQuantized = false; | |
590 logscalespec.sampleType = OutputDescriptor::FixedSampleRate; | |
591 logscalespec.hasDuration = false; | |
592 logscalespec.sampleRate = m_inputSampleRate/2048; | |
593 m_logscalespecOutputNumber = outputCounter++; | |
594 | |
595 OutputDescriptor bothchroma; | |
596 bothchroma.identifier = "bothchroma"; | |
597 bothchroma.name = "Chromagram and Bass Chromagram"; | |
598 bothchroma.description = "Tuning-adjusted chromagram and bass chromagram (stacked on top of each other) from NNLS approximate transcription."; | |
599 bothchroma.unit = ""; | |
600 bothchroma.hasFixedBinCount = true; | |
601 bothchroma.binCount = 24; | |
602 bothchroma.hasKnownExtents = false; | |
603 bothchroma.isQuantized = false; | |
604 bothchroma.sampleType = OutputDescriptor::FixedSampleRate; | |
605 bothchroma.hasDuration = false; | |
606 bothchroma.sampleRate = m_inputSampleRate/2048; | |
607 m_bothchromaOutputNumber = outputCounter++; | |
608 | |
609 OutputDescriptor qchromafw; | |
610 qchromafw.identifier = "qchromafw"; | |
611 qchromafw.name = "Pseudo-Quantised Chromagram and Bass Chromagram"; | |
612 qchromafw.description = "Pseudo-Quantised Chromagram and Bass Chromagram (frames between two beats are identical)."; | |
613 qchromafw.unit = ""; | |
614 qchromafw.hasFixedBinCount = true; | |
615 qchromafw.binCount = 24; | |
616 qchromafw.hasKnownExtents = false; | |
617 qchromafw.isQuantized = false; | |
618 qchromafw.sampleType = OutputDescriptor::FixedSampleRate; | |
619 qchromafw.hasDuration = false; | |
620 qchromafw.sampleRate = m_inputSampleRate/2048; | |
621 m_qchromafwOutputNumber = outputCounter++; | |
622 | |
623 OutputDescriptor qchroma; | |
624 qchroma.identifier = "qchroma"; | |
625 qchroma.name = "Quantised Chromagram and Bass Chromagram"; | |
626 qchroma.description = "Quantised Chromagram and Bass Chromagram."; | |
627 qchroma.unit = ""; | |
628 qchroma.hasFixedBinCount = true; | |
629 qchroma.binCount = 24; | |
630 qchroma.hasKnownExtents = false; | |
631 qchroma.isQuantized = false; | |
632 qchroma.sampleType = OutputDescriptor::FixedSampleRate; | |
633 qchroma.hasDuration = true; | |
634 qchroma.sampleRate = m_inputSampleRate/2048; | |
635 m_qchromaOutputNumber = outputCounter++; | |
636 | |
637 OutputDescriptor segm; | |
638 segm.identifier = "segmentation"; | |
639 segm.name = "Segmentation"; | |
640 segm.description = "Segmentation"; | |
641 segm.unit = "segment-type"; | |
642 segm.hasFixedBinCount = true; | |
643 //segm.binCount = 24; | |
644 segm.binCount = 1; | |
645 segm.hasKnownExtents = true; | |
646 segm.minValue = 1; | |
647 segm.maxValue = 5; | |
648 segm.isQuantized = true; | |
649 segm.quantizeStep = 1; | |
650 segm.sampleType = OutputDescriptor::VariableSampleRate; | |
651 segm.sampleRate = 1.0 / m_stepSecs; | |
652 segm.hasDuration = true; | |
653 m_segmOutputNumber = outputCounter++; | |
654 | |
655 | |
656 /* | |
657 OutputList list; | |
658 OutputDescriptor segmentation; | |
659 segmentation.identifier = "segmentation"; | |
660 segmentation.name = "Segmentation"; | |
661 segmentation.description = "Segmentation"; | |
662 segmentation.unit = "segment-type"; | |
663 segmentation.hasFixedBinCount = true; | |
664 segmentation.binCount = 1; | |
665 segmentation.hasKnownExtents = true; | |
666 segmentation.minValue = 1; | |
667 segmentation.maxValue = nSegmentTypes; | |
668 segmentation.isQuantized = true; | |
669 segmentation.quantizeStep = 1; | |
670 segmentation.sampleType = OutputDescriptor::VariableSampleRate; | |
671 segmentation.sampleRate = m_inputSampleRate / getPreferredStepSize(); | |
672 list.push_back(segmentation); | |
673 return list; | |
674 */ | |
675 | |
676 | |
677 list.push_back(beat); | |
678 list.push_back(bars); | |
679 list.push_back(beatcounts); | |
680 list.push_back(beatsd); | |
681 list.push_back(logscalespec); | |
682 list.push_back(bothchroma); | |
683 list.push_back(qchromafw); | |
684 list.push_back(qchroma); | |
685 list.push_back(segm); | |
686 | |
687 return list; | |
688 } | |
689 | |
690 // Executed for each frame - called from the host | |
691 | |
692 // We use time domain input, because DownBeat requires it -- so we | |
693 // use the time-domain version of DetectionFunction::process which | |
694 // does its own FFT. It requires doubles as input, so we need to | |
695 // make a temporary copy | |
696 | |
697 // We only support a single input channel | |
698 Segmentino::FeatureSet Segmentino::process(const float *const *inputBuffers,Vamp::RealTime timestamp) | |
699 { | |
700 if (!m_d) { | |
701 cerr << "ERROR: Segmentino::process: " | |
702 << "Segmentino has not been initialised" | |
703 << endl; | |
704 return FeatureSet(); | |
705 } | |
706 | |
707 const int fl = m_d->dfConfig.frameLength; | |
708 #ifndef __GNUC__ | |
709 double *dfinput = (double *)alloca(fl * sizeof(double)); | |
710 #else | |
711 double dfinput[fl]; | |
712 #endif | |
713 int sampleOffset = ((m_chromaFramesizeFactor-1) * fl) / 2; | |
714 | |
715 // Since chroma needs a much longer frame size, we only ever use the very | |
716 // beginning of the frame for beat tracking. | |
717 for (int i = 0; i < fl; ++i) dfinput[i] = inputBuffers[0][i]; | |
718 double output = m_d->df->process(dfinput); | |
719 | |
720 if (m_d->dfOutput.empty()) m_d->origin = timestamp; | |
721 | |
722 // std::cerr << "df[" << m_d->dfOutput.size() << "] is " << output << std::endl; | |
723 m_d->dfOutput.push_back(output); | |
724 | |
725 // Downsample and store the incoming audio block. | |
726 // We have an overlap on the incoming audio stream (step size is | |
727 // half block size) -- this function is configured to take only a | |
728 // step size's worth, so effectively ignoring the overlap. Note | |
729 // however that this means we omit the last blocksize - stepsize | |
730 // samples completely for the purposes of barline detection | |
731 // (hopefully not a problem) | |
732 m_d->downBeat->pushAudioBlock(inputBuffers[0]); | |
733 | |
734 // The following is not done every time, but only every m_chromaFramesizeFactor times, | |
735 // because the chroma does not need dense time frames. | |
736 | |
737 if (m_pluginFrameCount % m_chromaStepsizeFactor == 0) | |
738 { | |
739 | |
740 // Window the full time domain, data, FFT it and process chroma stuff. | |
741 | |
742 #ifndef __GNUC__ | |
743 float *windowedBuffers = (float *)alloca(m_chromadata->blockSize * sizeof(float)); | |
744 #else | |
745 float windowedBuffers[m_chromadata->blockSize]; | |
746 #endif | |
747 m_chromadata->window.cut(&inputBuffers[0][0], &windowedBuffers[0]); | |
748 | |
749 // adjust timestamp (we want the middle of the frame) | |
750 timestamp = timestamp + Vamp::RealTime::frame2RealTime(sampleOffset, lrintf(m_inputSampleRate)); | |
751 | |
752 m_chromadata->baseProcess(&windowedBuffers[0], timestamp); | |
753 | |
754 } | |
755 m_pluginFrameCount++; | |
756 | |
757 FeatureSet fs; | |
758 fs[m_logscalespecOutputNumber].push_back( | |
759 m_chromadata->logSpectrum.back()); | |
760 return fs; | |
761 } | |
762 | |
763 Segmentino::FeatureSet Segmentino::getRemainingFeatures() | |
764 { | |
765 if (!m_d) { | |
766 cerr << "ERROR: Segmentino::getRemainingFeatures: " | |
767 << "Segmentino has not been initialised" | |
768 << endl; | |
769 return FeatureSet(); | |
770 } | |
771 | |
772 FeatureSet masterFeatureset = beatTrack(); | |
773 Vamp::RealTime last_beattime = masterFeatureset[m_beatOutputNumber][masterFeatureset[m_beatOutputNumber].size()-1].timestamp; | |
774 masterFeatureset[m_beatOutputNumber].clear(); | |
775 Vamp::RealTime beattime = Vamp::RealTime::fromSeconds(1.0); | |
776 while (beattime < last_beattime) | |
777 { | |
778 Feature beatfeature; | |
779 beatfeature.hasTimestamp = true; | |
780 beatfeature.timestamp = beattime; | |
781 masterFeatureset[m_beatOutputNumber].push_back(beatfeature); | |
782 beattime = beattime + Vamp::RealTime::fromSeconds(0.5); | |
783 } | |
784 | |
785 | |
786 FeatureList chromaList = chromaFeatures(); | |
787 | |
788 for (int i = 0; i < (int)chromaList.size(); ++i) | |
789 { | |
790 masterFeatureset[m_bothchromaOutputNumber].push_back(chromaList[i]); | |
791 } | |
792 | |
793 // quantised and pseudo-quantised (beat-wise) chroma | |
794 std::vector<FeatureList> quantisedChroma = beatQuantiser(chromaList, masterFeatureset[m_beatOutputNumber]); | |
795 | |
796 if (quantisedChroma.empty()) return masterFeatureset; | |
797 | |
798 masterFeatureset[m_qchromafwOutputNumber] = quantisedChroma[0]; | |
799 masterFeatureset[m_qchromaOutputNumber] = quantisedChroma[1]; | |
800 | |
801 // Segmentation | |
802 try { | |
803 masterFeatureset[m_segmOutputNumber] = runSegmenter(quantisedChroma[1]); | |
804 } catch (std::bad_alloc &a) { | |
805 cerr << "ERROR: Segmentino::getRemainingFeatures: Failed to run segmenter, not enough memory (song too long?)" << endl; | |
806 } | |
807 | |
808 return(masterFeatureset); | |
809 } | |
810 | |
811 /* ------ Beat Tracker ------ */ | |
812 | |
813 Segmentino::FeatureSet Segmentino::beatTrack() | |
814 { | |
815 vector<double> df; | |
816 vector<double> beatPeriod; | |
817 vector<double> tempi; | |
818 | |
819 for (int i = 2; i < (int)m_d->dfOutput.size(); ++i) { // discard first two elts | |
820 df.push_back(m_d->dfOutput[i]); | |
821 beatPeriod.push_back(0.0); | |
822 } | |
823 if (df.empty()) return FeatureSet(); | |
824 | |
825 TempoTrackV2 tt(m_inputSampleRate, m_d->dfConfig.stepSize); | |
826 tt.calculateBeatPeriod(df, beatPeriod, tempi); | |
827 | |
828 vector<double> beats; | |
829 tt.calculateBeats(df, beatPeriod, beats); | |
830 | |
831 vector<int> downbeats; | |
832 size_t downLength = 0; | |
833 const float *downsampled = m_d->downBeat->getBufferedAudio(downLength); | |
834 m_d->downBeat->findDownBeats(downsampled, downLength, beats, downbeats); | |
835 | |
836 vector<double> beatsd; | |
837 m_d->downBeat->getBeatSD(beatsd); | |
838 | |
839 /*std::cout << "BeatTracker: found downbeats at: "; | |
840 for (int i = 0; i < downbeats.size(); ++i) std::cout << downbeats[i] << " " << std::endl;*/ | |
841 | |
842 FeatureSet returnFeatures; | |
843 | |
844 char label[20]; | |
845 | |
846 int dbi = 0; | |
847 int beat = 0; | |
848 int bar = 0; | |
849 | |
850 if (!downbeats.empty()) { | |
851 // get the right number for the first beat; this will be | |
852 // incremented before use (at top of the following loop) | |
853 int firstDown = downbeats[0]; | |
854 beat = m_bpb - firstDown - 1; | |
855 if (beat == m_bpb) beat = 0; | |
856 } | |
857 | |
858 for (int i = 0; i < (int)beats.size(); ++i) { | |
859 | |
860 int frame = beats[i] * m_d->dfConfig.stepSize; | |
861 | |
862 if (dbi < (int)downbeats.size() && i == downbeats[dbi]) { | |
863 beat = 0; | |
864 ++bar; | |
865 ++dbi; | |
866 } else { | |
867 ++beat; | |
868 } | |
869 | |
870 /* Ooutput Section */ | |
871 | |
872 // outputs are: | |
873 // | |
874 // 0 -> beats | |
875 // 1 -> bars | |
876 // 2 -> beat counter function | |
877 | |
878 Feature feature; | |
879 feature.hasTimestamp = true; | |
880 feature.timestamp = m_d->origin + Vamp::RealTime::frame2RealTime (frame, lrintf(m_inputSampleRate)); | |
881 | |
882 sprintf(label, "%d", beat + 1); | |
883 feature.label = label; | |
884 returnFeatures[m_beatOutputNumber].push_back(feature); // labelled beats | |
885 | |
886 feature.values.push_back(beat + 1); | |
887 returnFeatures[m_beatcountsOutputNumber].push_back(feature); // beat function | |
888 | |
889 if (i > 0 && i <= (int)beatsd.size()) { | |
890 feature.values.clear(); | |
891 feature.values.push_back(beatsd[i-1]); | |
892 feature.label = ""; | |
893 returnFeatures[m_beatsdOutputNumber].push_back(feature); // beat spectral difference | |
894 } | |
895 | |
896 if (beat == 0) { | |
897 feature.values.clear(); | |
898 sprintf(label, "%d", bar); | |
899 feature.label = label; | |
900 returnFeatures[m_barsOutputNumber].push_back(feature); // bars | |
901 } | |
902 } | |
903 | |
904 return returnFeatures; | |
905 } | |
906 | |
907 | |
908 /* ------ Chroma Extractor ------ */ | |
909 | |
910 Segmentino::FeatureList Segmentino::chromaFeatures() | |
911 { | |
912 | |
913 FeatureList returnFeatureList; | |
914 FeatureList tunedlogfreqspec; | |
915 | |
916 if (m_chromadata->logSpectrum.size() == 0) return returnFeatureList; | |
917 | |
918 /** Calculate Tuning | |
919 calculate tuning from (using the angle of the complex number defined by the | |
920 cumulative mean real and imag values) | |
921 **/ | |
922 float meanTuningImag = 0; | |
923 float meanTuningReal = 0; | |
924 for (int iBPS = 0; iBPS < nBPS; ++iBPS) { | |
925 meanTuningReal += m_chromadata->meanTunings[iBPS] * m_chromadata->cosvalues[iBPS]; | |
926 meanTuningImag += m_chromadata->meanTunings[iBPS] * m_chromadata->sinvalues[iBPS]; | |
927 } | |
928 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI)); | |
929 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI); | |
930 int intShift = floor(normalisedtuning * 3); | |
931 float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this | |
932 | |
933 char buffer0 [50]; | |
934 | |
935 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning); | |
936 | |
937 /** Tune Log-Frequency Spectrogram | |
938 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to | |
939 perform linear interpolation on the existing log-frequency spectrogram (kinda f1). | |
940 **/ | |
941 cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... "; | |
942 | |
943 float tempValue = 0; | |
944 | |
945 int count = 0; | |
946 | |
947 for (FeatureList::iterator i = m_chromadata->logSpectrum.begin(); i != m_chromadata->logSpectrum.end(); ++i) | |
948 { | |
949 | |
950 Feature f1 = *i; | |
951 Feature f2; // tuned log-frequency spectrum | |
952 | |
953 f2.hasTimestamp = true; | |
954 f2.timestamp = f1.timestamp; | |
955 | |
956 f2.values.push_back(0.0); | |
957 f2.values.push_back(0.0); // set lower edge to zero | |
958 | |
959 if (m_chromadata->tuneLocal) { | |
960 intShift = floor(m_chromadata->localTuning[count] * 3); | |
961 floatShift = m_chromadata->localTuning[count] * 3 - intShift; | |
962 // floatShift is a really bad name for this | |
963 } | |
964 | |
965 for (int k = 2; k < (int)f1.values.size() - 3; ++k) | |
966 { // interpolate all inner bins | |
967 tempValue = f1.values[k + intShift] * (1-floatShift) + f1.values[k+intShift+1] * floatShift; | |
968 f2.values.push_back(tempValue); | |
969 } | |
970 | |
971 f2.values.push_back(0.0); | |
972 f2.values.push_back(0.0); | |
973 f2.values.push_back(0.0); // upper edge | |
974 | |
975 vector<float> runningmean = SpecialConvolution(f2.values,m_chromadata->hw); | |
976 vector<float> runningstd; | |
977 for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance) | |
978 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i])); | |
979 } | |
980 runningstd = SpecialConvolution(runningstd,m_chromadata->hw); // second step convolve | |
981 for (int i = 0; i < nNote; i++) | |
982 { | |
983 | |
984 runningstd[i] = sqrt(runningstd[i]); | |
985 // square root to finally have running std | |
986 | |
987 if (runningstd[i] > 0) | |
988 { | |
989 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ? | |
990 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_chromadata->whitening) : 0; | |
991 } | |
992 | |
993 if (f2.values[i] < 0) { | |
994 | |
995 cerr << "ERROR: negative value in logfreq spectrum" << endl; | |
996 | |
997 } | |
998 } | |
999 tunedlogfreqspec.push_back(f2); | |
1000 count++; | |
1001 } | |
1002 cerr << "done." << endl; | |
1003 /** Semitone spectrum and chromagrams | |
1004 Semitone-spaced log-frequency spectrum derived | |
1005 from the tuned log-freq spectrum above. the spectrum | |
1006 is inferred using a non-negative least squares algorithm. | |
1007 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means | |
1008 bass and treble stacked onto each other). | |
1009 **/ | |
1010 if (m_chromadata->useNNLS == 0) { | |
1011 cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... "; | |
1012 } else { | |
1013 cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... "; | |
1014 } | |
1015 | |
1016 vector<float> oldchroma = vector<float>(12,0); | |
1017 vector<float> oldbasschroma = vector<float>(12,0); | |
1018 count = 0; | |
1019 | |
1020 for (FeatureList::iterator it = tunedlogfreqspec.begin(); it != tunedlogfreqspec.end(); ++it) { | |
1021 Feature logfreqsp = *it; // logfreq spectrum | |
1022 Feature bothchroma; // treble and bass chromagram | |
1023 | |
1024 bothchroma.hasTimestamp = true; | |
1025 bothchroma.timestamp = logfreqsp.timestamp; | |
1026 | |
1027 float b[nNote]; | |
1028 | |
1029 bool some_b_greater_zero = false; | |
1030 float sumb = 0; | |
1031 for (int i = 0; i < nNote; i++) { | |
1032 b[i] = logfreqsp.values[i]; | |
1033 sumb += b[i]; | |
1034 if (b[i] > 0) { | |
1035 some_b_greater_zero = true; | |
1036 } | |
1037 } | |
1038 | |
1039 // here's where the non-negative least squares algorithm calculates the note activation x | |
1040 | |
1041 vector<float> chroma = vector<float>(12, 0); | |
1042 vector<float> basschroma = vector<float>(12, 0); | |
1043 float currval; | |
1044 int iSemitone = 0; | |
1045 | |
1046 if (some_b_greater_zero) { | |
1047 if (m_chromadata->useNNLS == 0) { | |
1048 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) { | |
1049 currval = 0; | |
1050 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) { | |
1051 currval += b[iNote + iBPS] * (1-abs(iBPS*1.0/(nBPS/2+1))); | |
1052 } | |
1053 chroma[iSemitone % 12] += currval * treblewindow[iSemitone]; | |
1054 basschroma[iSemitone % 12] += currval * basswindow[iSemitone]; | |
1055 iSemitone++; | |
1056 } | |
1057 | |
1058 } else { | |
1059 float x[84+1000]; | |
1060 for (int i = 1; i < 1084; ++i) x[i] = 1.0; | |
1061 vector<int> signifIndex; | |
1062 int index=0; | |
1063 sumb /= 84.0; | |
1064 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) { | |
1065 float currval = 0; | |
1066 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) { | |
1067 currval += b[iNote + iBPS]; | |
1068 } | |
1069 if (currval > 0) signifIndex.push_back(index); | |
1070 index++; | |
1071 } | |
1072 float rnorm; | |
1073 float w[84+1000]; | |
1074 float zz[84+1000]; | |
1075 int indx[84+1000]; | |
1076 int mode; | |
1077 int dictsize = nNote*signifIndex.size(); | |
1078 | |
1079 float *curr_dict = new float[dictsize]; | |
1080 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) { | |
1081 for (int iBin = 0; iBin < nNote; iBin++) { | |
1082 curr_dict[iNote * nNote + iBin] = | |
1083 1.0 * m_chromadata->dict[signifIndex[iNote] * nNote + iBin]; | |
1084 } | |
1085 } | |
1086 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode); | |
1087 delete [] curr_dict; | |
1088 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) { | |
1089 // cerr << mode << endl; | |
1090 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]]; | |
1091 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]]; | |
1092 } | |
1093 } | |
1094 } | |
1095 | |
1096 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); | |
1097 // just stack the both chromas | |
1098 | |
1099 bothchroma.values = chroma; | |
1100 returnFeatureList.push_back(bothchroma); | |
1101 count++; | |
1102 } | |
1103 cerr << "done." << endl; | |
1104 | |
1105 return returnFeatureList; | |
1106 } | |
1107 | |
1108 /* ------ Beat Quantizer ------ */ | |
1109 | |
1110 std::vector<Vamp::Plugin::FeatureList> | |
1111 Segmentino::beatQuantiser(Vamp::Plugin::FeatureList chromagram, Vamp::Plugin::FeatureList beats) | |
1112 { | |
1113 std::vector<FeatureList> returnVector; | |
1114 | |
1115 FeatureList fwQchromagram; // frame-wise beat-quantised chroma | |
1116 FeatureList bwQchromagram; // beat-wise beat-quantised chroma | |
1117 | |
1118 | |
1119 size_t nChromaFrame = chromagram.size(); | |
1120 size_t nBeat = beats.size(); | |
1121 | |
1122 if (nBeat == 0 && nChromaFrame == 0) return returnVector; | |
1123 | |
1124 int nBin = chromagram[0].values.size(); | |
1125 | |
1126 vector<float> tempChroma = vector<float>(nBin); | |
1127 | |
1128 Vamp::RealTime beatTimestamp = Vamp::RealTime::zeroTime; | |
1129 int currBeatCount = -1; // start before first beat | |
1130 int framesInBeat = 0; | |
1131 | |
1132 for (size_t iChroma = 0; iChroma < nChromaFrame; ++iChroma) | |
1133 { | |
1134 Vamp::RealTime frameTimestamp = chromagram[iChroma].timestamp; | |
1135 Vamp::RealTime newBeatTimestamp; | |
1136 | |
1137 if (currBeatCount != (int)beats.size() - 1) { | |
1138 newBeatTimestamp = beats[currBeatCount+1].timestamp; | |
1139 } else { | |
1140 newBeatTimestamp = chromagram[nChromaFrame-1].timestamp; | |
1141 } | |
1142 | |
1143 if (frameTimestamp > newBeatTimestamp || | |
1144 iChroma == nChromaFrame-1) | |
1145 { | |
1146 // new beat (or last chroma frame) | |
1147 // 1. finish all the old beat processing | |
1148 if (framesInBeat > 0) | |
1149 { | |
1150 for (int i = 0; i < nBin; ++i) tempChroma[i] /= framesInBeat; // average | |
1151 } | |
1152 | |
1153 Feature bwQchromaFrame; | |
1154 bwQchromaFrame.hasTimestamp = true; | |
1155 bwQchromaFrame.timestamp = beatTimestamp; | |
1156 bwQchromaFrame.values = tempChroma; | |
1157 bwQchromaFrame.duration = newBeatTimestamp - beatTimestamp; | |
1158 bwQchromagram.push_back(bwQchromaFrame); | |
1159 | |
1160 for (int iFrame = -framesInBeat; iFrame < 0; ++iFrame) | |
1161 { | |
1162 Feature fwQchromaFrame; | |
1163 fwQchromaFrame.hasTimestamp = true; | |
1164 fwQchromaFrame.timestamp = chromagram[iChroma+iFrame].timestamp; | |
1165 fwQchromaFrame.values = tempChroma; // all between two beats get the same | |
1166 fwQchromagram.push_back(fwQchromaFrame); | |
1167 } | |
1168 | |
1169 // 2. increments / resets for current (new) beat | |
1170 currBeatCount++; | |
1171 beatTimestamp = newBeatTimestamp; | |
1172 for (int i = 0; i < nBin; ++i) tempChroma[i] = 0; // average | |
1173 framesInBeat = 0; | |
1174 } | |
1175 framesInBeat++; | |
1176 for (int i = 0; i < nBin; ++i) tempChroma[i] += chromagram[iChroma].values[i]; | |
1177 } | |
1178 returnVector.push_back(fwQchromagram); | |
1179 returnVector.push_back(bwQchromagram); | |
1180 return returnVector; | |
1181 } | |
1182 | |
1183 | |
1184 | |
1185 /* -------------------------------- */ | |
1186 /* ------ Support Functions ------ */ | |
1187 /* -------------------------------- */ | |
1188 | |
1189 // one-dimesion median filter | |
1190 arma::vec medfilt1(arma::vec v, int medfilt_length) | |
1191 { | |
1192 // TODO: check if this works with odd and even medfilt_length !!! | |
1193 int halfWin = medfilt_length/2; | |
1194 | |
1195 // result vector | |
1196 arma::vec res = arma::zeros<arma::vec>(v.size()); | |
1197 | |
1198 // padding | |
1199 arma::vec padV = arma::zeros<arma::vec>(v.size()+medfilt_length-1); | |
1200 | |
1201 for (int i=medfilt_length/2; i < medfilt_length/2+(int)v.size(); ++ i) | |
1202 { | |
1203 padV(i) = v(i-medfilt_length/2); | |
1204 } | |
1205 | |
1206 // the above loop leaves the boundaries at 0, | |
1207 // the two loops below fill them with the start or end values of v at start and end | |
1208 for (int i = 0; i < halfWin; ++i) padV(i) = v(0); | |
1209 for (int i = halfWin+(int)v.size(); i < (int)v.size()+2*halfWin; ++i) padV(i) = v(v.size()-1); | |
1210 | |
1211 | |
1212 | |
1213 // Median filter | |
1214 arma::vec win = arma::zeros<arma::vec>(medfilt_length); | |
1215 | |
1216 for (int i=0; i < (int)v.size(); ++i) | |
1217 { | |
1218 win = padV.subvec(i,i+halfWin*2); | |
1219 win = sort(win); | |
1220 res(i) = win(halfWin); | |
1221 } | |
1222 | |
1223 return res; | |
1224 } | |
1225 | |
1226 | |
1227 // Quantile | |
1228 double quantile(arma::vec v, double p) | |
1229 { | |
1230 arma::vec sortV = arma::sort(v); | |
1231 int n = sortV.size(); | |
1232 arma::vec x = arma::zeros<vec>(n+2); | |
1233 arma::vec y = arma::zeros<vec>(n+2); | |
1234 | |
1235 x(0) = 0; | |
1236 x(n+1) = 100; | |
1237 | |
1238 for (int i=1; i<n+1; ++i) | |
1239 x(i) = 100*(0.5+(i-1))/n; | |
1240 | |
1241 y(0) = sortV(0); | |
1242 y.subvec(1,n) = sortV; | |
1243 y(n+1) = sortV(n-1); | |
1244 | |
1245 arma::uvec x2index = find(x>=p*100); | |
1246 | |
1247 // Interpolation | |
1248 double x1 = x(x2index(0)-1); | |
1249 double x2 = x(x2index(0)); | |
1250 double y1 = y(x2index(0)-1); | |
1251 double y2 = y(x2index(0)); | |
1252 | |
1253 double res = (y2-y1)/(x2-x1)*(p*100-x1)+y1; | |
1254 | |
1255 return res; | |
1256 } | |
1257 | |
1258 // Max Filtering | |
1259 arma::mat maxfilt1(arma::mat inmat, int len) | |
1260 { | |
1261 arma::mat outmat = inmat; | |
1262 | |
1263 for (int i=0; i < (int)inmat.n_rows; ++i) | |
1264 { | |
1265 if (arma::sum(inmat.row(i)) > 0) | |
1266 { | |
1267 // Take a window of rows | |
1268 int startWin; | |
1269 int endWin; | |
1270 | |
1271 if (0 > i-len) | |
1272 startWin = 0; | |
1273 else | |
1274 startWin = i-len; | |
1275 | |
1276 if ((int)inmat.n_rows-1 < i+len-1) | |
1277 endWin = inmat.n_rows-1; | |
1278 else | |
1279 endWin = i+len-1; | |
1280 | |
1281 outmat(i,span::all) = arma::max(inmat(span(startWin,endWin),span::all)); | |
1282 } | |
1283 } | |
1284 | |
1285 return outmat; | |
1286 | |
1287 } | |
1288 | |
1289 // Null Parts | |
1290 Part nullpart(vector<Part> parts, arma::vec barline) | |
1291 { | |
1292 arma::uvec nullindices = arma::ones<arma::uvec>(barline.size()); | |
1293 for (int iPart=0; iPart<(int)parts.size(); ++iPart) | |
1294 { | |
1295 //for (int iIndex=0; iIndex < parts[0].indices.size(); ++iIndex) | |
1296 for (int iIndex=0; iIndex < (int)parts[iPart].indices.size(); ++iIndex) | |
1297 for (int i=0; i<parts[iPart].n; ++i) | |
1298 { | |
1299 int ind = parts[iPart].indices[iIndex]+i; | |
1300 nullindices(ind) = 0; | |
1301 } | |
1302 } | |
1303 | |
1304 Part newPart; | |
1305 newPart.n = 1; | |
1306 uvec q = find(nullindices > 0); | |
1307 | |
1308 for (int i=0; i<(int)q.size();++i) | |
1309 newPart.indices.push_back(q(i)); | |
1310 | |
1311 newPart.letter = '-'; | |
1312 newPart.value = 0; | |
1313 newPart.level = 0; | |
1314 | |
1315 return newPart; | |
1316 } | |
1317 | |
1318 | |
1319 // Merge Nulls | |
1320 void mergenulls(vector<Part> &parts) | |
1321 { | |
1322 for (int iPart=0; iPart<(int)parts.size(); ++iPart) | |
1323 { | |
1324 | |
1325 vector<Part> newVectorPart; | |
1326 | |
1327 if (parts[iPart].letter.compare("-")==0) | |
1328 { | |
1329 sort (parts[iPart].indices.begin(), parts[iPart].indices.end()); | |
1330 int newpartind = -1; | |
1331 | |
1332 vector<int> indices; | |
1333 indices.push_back(-2); | |
1334 | |
1335 for (int iIndex=0; iIndex<(int)parts[iPart].indices.size(); ++iIndex) | |
1336 indices.push_back(parts[iPart].indices[iIndex]); | |
1337 | |
1338 for (int iInd=1; iInd < (int)indices.size(); ++iInd) | |
1339 { | |
1340 if (indices[iInd] - indices[iInd-1] > 1) | |
1341 { | |
1342 newpartind++; | |
1343 | |
1344 Part newPart; | |
1345 newPart.letter = 'N'; | |
1346 std::stringstream out; | |
1347 out << newpartind+1; | |
1348 newPart.letter.append(out.str()); | |
1349 // newPart.value = 20+newpartind+1; | |
1350 newPart.value = 0; | |
1351 newPart.n = 1; | |
1352 newPart.indices.push_back(indices[iInd]); | |
1353 newPart.level = 0; | |
1354 | |
1355 newVectorPart.push_back(newPart); | |
1356 } | |
1357 else | |
1358 { | |
1359 newVectorPart[newpartind].n = newVectorPart[newpartind].n+1; | |
1360 } | |
1361 } | |
1362 parts.erase (parts.end()); | |
1363 | |
1364 for (int i=0; i<(int)newVectorPart.size(); ++i) | |
1365 parts.push_back(newVectorPart[i]); | |
1366 } | |
1367 } | |
1368 } | |
1369 | |
1370 /* ------ Segmentation ------ */ | |
1371 | |
1372 vector<Part> songSegment(Vamp::Plugin::FeatureList quantisedChromagram) | |
1373 { | |
1374 | |
1375 | |
1376 /* ------ Parameters ------ */ | |
1377 double thresh_beat = 0.85; | |
1378 double thresh_seg = 0.80; | |
1379 int medfilt_length = 5; | |
1380 int minlength = 28; | |
1381 int maxlength = 2*128; | |
1382 double quantilePerc = 0.1; | |
1383 /* ------------------------ */ | |
1384 | |
1385 | |
1386 // Collect Info | |
1387 int nBeat = quantisedChromagram.size(); // Number of feature vector | |
1388 int nFeatValues = quantisedChromagram[0].values.size(); // Number of values for each feature vector | |
1389 | |
1390 if (nBeat < minlength) { | |
1391 // return a single part | |
1392 vector<Part> parts; | |
1393 Part newPart; | |
1394 newPart.n = 1; | |
1395 newPart.indices.push_back(0); | |
1396 newPart.letter = "n1"; | |
1397 newPart.value = 20; | |
1398 newPart.level = 0; | |
1399 parts.push_back(newPart); | |
1400 return parts; | |
1401 } | |
1402 | |
1403 arma::irowvec timeStamp = arma::zeros<arma::imat>(1,nBeat); // Vector of Time Stamps | |
1404 | |
1405 // Save time stamp as a Vector | |
1406 if (quantisedChromagram[0].hasTimestamp) | |
1407 { | |
1408 for (int i = 0; i < nBeat; ++ i) | |
1409 timeStamp[i] = quantisedChromagram[i].timestamp.nsec; | |
1410 } | |
1411 | |
1412 | |
1413 // Build a ObservationTOFeatures Matrix | |
1414 arma::mat featVal = arma::zeros<mat>(nBeat,nFeatValues/2); | |
1415 | |
1416 for (int i = 0; i < nBeat; ++ i) | |
1417 for (int j = 0; j < nFeatValues/2; ++ j) | |
1418 { | |
1419 featVal(i,j) = 0.8 * quantisedChromagram[i].values[j] + quantisedChromagram[i].values[j+12]; // bass attenuated | |
1420 } | |
1421 | |
1422 // Set to arbitrary value to feature vectors with low std | |
1423 arma::mat a = stddev(featVal,1,1); | |
1424 | |
1425 // Feature Correlation Matrix | |
1426 arma::mat simmat0 = 1-arma::cor(arma::trans(featVal)); | |
1427 | |
1428 | |
1429 for (int i = 0; i < nBeat; ++ i) | |
1430 { | |
1431 if (a(i)<0.000001) | |
1432 { | |
1433 featVal(i,1) = 1000; // arbitrary | |
1434 | |
1435 for (int j = 0; j < nFeatValues/2; ++j) | |
1436 { | |
1437 simmat0(i,j) = 1; | |
1438 simmat0(j,i) = 1; | |
1439 } | |
1440 } | |
1441 } | |
1442 | |
1443 arma::mat simmat = 1-simmat0/2; | |
1444 | |
1445 // -------- To delate when the proble with the add of beat will be solved ------- | |
1446 for (int i = 0; i < nBeat; ++ i) | |
1447 for (int j = 0; j < nBeat; ++ j) | |
1448 if (!std::isfinite(simmat(i,j))) | |
1449 simmat(i,j)=0; | |
1450 // ------------------------------------------------------------------------------ | |
1451 | |
1452 // Median Filtering applied to the Correlation Matrix | |
1453 // The median filter is for each diagonal of the Matrix | |
1454 arma::mat median_simmat = arma::zeros<arma::mat>(nBeat,nBeat); | |
1455 | |
1456 for (int i = 0; i < nBeat; ++ i) | |
1457 { | |
1458 arma::vec temp = medfilt1(simmat.diag(i),medfilt_length); | |
1459 median_simmat.diag(i) = temp; | |
1460 median_simmat.diag(-i) = temp; | |
1461 } | |
1462 | |
1463 for (int i = 0; i < nBeat; ++ i) | |
1464 for (int j = 0; j < nBeat; ++ j) | |
1465 if (!std::isfinite(median_simmat(i,j))) | |
1466 median_simmat(i,j) = 0; | |
1467 | |
1468 // -------------- NOT CONVERTED ------------------------------------- | |
1469 // if param.seg.standardise | |
1470 // med_median_simmat = repmat(median(median_simmat),nBeat,1); | |
1471 // std_median_simmat = repmat(std(median_simmat),nBeat,1); | |
1472 // median_simmat = (median_simmat - med_median_simmat) ./ std_median_simmat; | |
1473 // end | |
1474 // -------------------------------------------------------- | |
1475 | |
1476 // Retrieve Bar Bounderies | |
1477 arma::uvec dup = find(median_simmat > thresh_beat); | |
1478 arma::mat potential_duplicates = arma::zeros<arma::mat>(nBeat,nBeat); | |
1479 potential_duplicates.elem(dup) = arma::ones<arma::vec>(dup.size()); | |
1480 potential_duplicates = trimatu(potential_duplicates); | |
1481 | |
1482 int nPartlengths = round((maxlength-minlength)/4)+1; | |
1483 arma::vec partlengths = zeros<arma::vec>(nPartlengths); | |
1484 | |
1485 for (int i = 0; i < nPartlengths; ++ i) | |
1486 partlengths(i) = (i*4) + minlength; | |
1487 | |
1488 // initialise arrays | |
1489 arma::cube simArray = zeros<arma::cube>(nBeat,nBeat,nPartlengths); | |
1490 arma::cube decisionArray2 = zeros<arma::cube>(nBeat,nBeat,nPartlengths); | |
1491 | |
1492 for (int iLength = 0; iLength < nPartlengths; ++ iLength) | |
1493 // for (int iLength = 0; iLength < 20; ++ iLength) | |
1494 { | |
1495 int len = partlengths(iLength); | |
1496 int nUsedBeat = nBeat - len + 1; // number of potential rep beginnings: they can't overlap at the end of the song | |
1497 | |
1498 if (nUsedBeat < 1) continue; | |
1499 | |
1500 for (int iBeat = 0; iBeat < nUsedBeat; ++ iBeat) // looping over all columns (arbitrarily chosen columns) | |
1501 { | |
1502 arma::uvec help2 = find(potential_duplicates(span(0,nUsedBeat-1),iBeat)==1); | |
1503 | |
1504 for (int i=0; i < (int)help2.size(); ++i) | |
1505 { | |
1506 | |
1507 // measure how well two length len segments go together | |
1508 int kBeat = help2(i); | |
1509 arma::vec distrib = median_simmat(span(iBeat,iBeat+len-1),span(kBeat,kBeat+len-1)).diag(0); | |
1510 simArray(iBeat,kBeat,iLength) = quantile(distrib,quantilePerc); | |
1511 } | |
1512 } | |
1513 | |
1514 arma::mat tempM = simArray(span(0,nUsedBeat-1),span(0,nUsedBeat-1),span(iLength,iLength)); | |
1515 simArray.slice(iLength)(span(0,nUsedBeat-1),span(0,nUsedBeat-1)) = tempM + arma::trans(tempM) - (eye<mat>(nUsedBeat,nUsedBeat)%tempM); | |
1516 | |
1517 // convolution | |
1518 arma::vec K = arma::zeros<vec>(3); | |
1519 K << 0.01 << 0.98 << 0.01; | |
1520 | |
1521 | |
1522 for (int i=0; i < (int)simArray.n_rows; ++i) | |
1523 { | |
1524 arma::rowvec t = arma::conv((arma::rowvec)simArray.slice(iLength).row(i),K); | |
1525 simArray.slice(iLength)(i,span::all) = t.subvec(1,t.size()-2); | |
1526 } | |
1527 | |
1528 // take only over-average bars that do not overlap | |
1529 | |
1530 arma::mat temp = arma::zeros<mat>(simArray.n_rows, simArray.n_cols); | |
1531 temp(span::all, span(0,nUsedBeat-1)) = simArray.slice(iLength)(span::all,span(0,nUsedBeat-1)); | |
1532 | |
1533 for (int i=0; i < (int)temp.n_rows; ++i) | |
1534 for (int j=0; j < nUsedBeat; ++j) | |
1535 if (temp(i,j) < thresh_seg) | |
1536 temp(i,j) = 0; | |
1537 | |
1538 decisionArray2.slice(iLength) = temp; | |
1539 | |
1540 arma::mat maxMat = maxfilt1(decisionArray2.slice(iLength),len-1); | |
1541 | |
1542 for (int i=0; i < (int)decisionArray2.n_rows; ++i) | |
1543 for (int j=0; j < (int)decisionArray2.n_cols; ++j) | |
1544 if (decisionArray2.slice(iLength)(i,j) < maxMat(i,j)) | |
1545 decisionArray2.slice(iLength)(i,j) = 0; | |
1546 | |
1547 decisionArray2.slice(iLength) = decisionArray2.slice(iLength) % arma::trans(decisionArray2.slice(iLength)); | |
1548 | |
1549 for (int i=0; i < (int)simArray.n_rows; ++i) | |
1550 for (int j=0; j < (int)simArray.n_cols; ++j) | |
1551 if (simArray.slice(iLength)(i,j) < thresh_seg) | |
1552 potential_duplicates(i,j) = 0; | |
1553 } | |
1554 | |
1555 // Milk the data | |
1556 | |
1557 arma::mat bestval; | |
1558 | |
1559 for (int iLength=0; iLength<nPartlengths; ++iLength) | |
1560 { | |
1561 arma::mat temp = arma::zeros<arma::mat>(decisionArray2.n_rows,decisionArray2.n_cols); | |
1562 | |
1563 for (int rows=0; rows < (int)decisionArray2.n_rows; ++rows) | |
1564 for (int cols=0; cols < (int)decisionArray2.n_cols; ++cols) | |
1565 if (decisionArray2.slice(iLength)(rows,cols) > 0) | |
1566 temp(rows,cols) = 1; | |
1567 | |
1568 arma::vec currLogicSum = arma::sum(temp,1); | |
1569 | |
1570 for (int iBeat=0; iBeat < nBeat; ++iBeat) | |
1571 if (currLogicSum(iBeat) > 1) | |
1572 { | |
1573 arma::vec t = decisionArray2.slice(iLength)(span::all,iBeat); | |
1574 double currSum = sum(t); | |
1575 | |
1576 int count = 0; | |
1577 for (int i=0; i < (int)t.size(); ++i) | |
1578 if (t(i)>0) | |
1579 count++; | |
1580 | |
1581 currSum = (currSum/count)/2; | |
1582 | |
1583 arma::rowvec t1; | |
1584 t1 << (currLogicSum(iBeat)-1) * partlengths(iLength) << currSum << iLength << iBeat << currLogicSum(iBeat); | |
1585 | |
1586 bestval = join_cols(bestval,t1); | |
1587 } | |
1588 } | |
1589 | |
1590 // Definition of the resulting vector | |
1591 vector<Part> parts; | |
1592 | |
1593 // make a table of all valid sets of parts | |
1594 | |
1595 char partletters[] = {'A','B','C','D','E','F','G', 'H','I','J','K','L','M','N','O','P','Q','R','S'}; | |
1596 int partvalues[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19}; | |
1597 arma::vec valid_sets = arma::ones<arma::vec>(bestval.n_rows); | |
1598 | |
1599 if (!bestval.is_empty()) | |
1600 { | |
1601 | |
1602 // In questo punto viene introdotto un errore alla 3 cifra decimale | |
1603 | |
1604 arma::colvec t = arma::zeros<arma::colvec>(bestval.n_rows); | |
1605 for (int i=0; i < (int)bestval.n_rows; ++i) | |
1606 { | |
1607 t(i) = bestval(i,1)*2; | |
1608 } | |
1609 | |
1610 double m = t.max(); | |
1611 | |
1612 bestval(span::all,1) = bestval(span::all,1) / m; | |
1613 bestval(span::all,0) = bestval(span::all,0) + bestval(span::all,1); | |
1614 | |
1615 arma::mat bestval2; | |
1616 for (int i=0; i < (int)bestval.n_cols; ++i) | |
1617 if (i!=1) | |
1618 bestval2 = join_rows(bestval2,bestval.col(i)); | |
1619 | |
1620 for (int kSeg=0; kSeg<6; ++kSeg) | |
1621 { | |
1622 arma::mat currbestvals = arma::zeros<arma::mat>(bestval2.n_rows, bestval2.n_cols); | |
1623 for (int i=0; i < (int)bestval2.n_rows; ++i) | |
1624 for (int j=0; j < (int)bestval2.n_cols; ++j) | |
1625 if (valid_sets(i)) | |
1626 currbestvals(i,j) = bestval2(i,j); | |
1627 | |
1628 arma::vec t1 = currbestvals.col(0); | |
1629 double ma; | |
1630 uword maIdx; | |
1631 ma = t1.max(maIdx); | |
1632 | |
1633 if ((maIdx == 0)&&(ma == 0)) | |
1634 break; | |
1635 | |
1636 int bestLength = lrint(partlengths(currbestvals(maIdx,1))); | |
1637 arma::rowvec bestIndices = decisionArray2.slice(currbestvals(maIdx,1))(currbestvals(maIdx,2),span::all); | |
1638 | |
1639 arma::rowvec bestIndicesMap = arma::zeros<arma::rowvec>(bestIndices.size()); | |
1640 for (int i=0; i < (int)bestIndices.size(); ++i) | |
1641 if (bestIndices(i)>0) | |
1642 bestIndicesMap(i) = 1; | |
1643 | |
1644 arma::rowvec mask = arma::zeros<arma::rowvec>(bestLength*2-1); | |
1645 for (int i=0; i<bestLength; ++i) | |
1646 mask(i+bestLength-1) = 1; | |
1647 | |
1648 arma::rowvec t2 = arma::conv(bestIndicesMap,mask); | |
1649 arma::rowvec island = t2.subvec(mask.size()/2,t2.size()-1-mask.size()/2); | |
1650 | |
1651 // Save results in the structure | |
1652 Part newPart; | |
1653 newPart.n = bestLength; | |
1654 uvec q1 = find(bestIndices > 0); | |
1655 | |
1656 for (int i=0; i < (int)q1.size();++i) | |
1657 newPart.indices.push_back(q1(i)); | |
1658 | |
1659 newPart.letter = partletters[kSeg]; | |
1660 newPart.value = partvalues[kSeg]; | |
1661 newPart.level = kSeg+1; | |
1662 parts.push_back(newPart); | |
1663 | |
1664 uvec q2 = find(valid_sets==1); | |
1665 | |
1666 for (int i=0; i < (int)q2.size(); ++i) | |
1667 { | |
1668 int iSet = q2(i); | |
1669 int s = partlengths(bestval2(iSet,1)); | |
1670 | |
1671 arma::rowvec mask1 = arma::zeros<arma::rowvec>(s*2-1); | |
1672 for (int i=0; i<s; ++i) | |
1673 mask1(i+s-1) = 1; | |
1674 | |
1675 arma::rowvec Ind = decisionArray2.slice(bestval2(iSet,1))(bestval2(iSet,2),span::all); | |
1676 arma::rowvec IndMap = arma::zeros<arma::rowvec>(Ind.size()); | |
1677 for (int i=0; i < (int)Ind.size(); ++i) | |
1678 if (Ind(i)>0) | |
1679 IndMap(i) = 2; | |
1680 | |
1681 arma::rowvec t3 = arma::conv(IndMap,mask1); | |
1682 arma::rowvec currislands = t3.subvec(mask1.size()/2,t3.size()-1-mask1.size()/2); | |
1683 arma::rowvec islandsdMult = currislands%island; | |
1684 | |
1685 arma::uvec islandsIndex = find(islandsdMult > 0); | |
1686 | |
1687 if (islandsIndex.size() > 0) | |
1688 valid_sets(iSet) = 0; | |
1689 } | |
1690 } | |
1691 } | |
1692 else | |
1693 { | |
1694 Part newPart; | |
1695 newPart.n = nBeat; | |
1696 newPart.indices.push_back(0); | |
1697 newPart.letter = 'A'; | |
1698 newPart.value = 1; | |
1699 newPart.level = 1; | |
1700 parts.push_back(newPart); | |
1701 } | |
1702 | |
1703 arma::vec bar = linspace(1,nBeat,nBeat); | |
1704 Part np = nullpart(parts,bar); | |
1705 | |
1706 parts.push_back(np); | |
1707 | |
1708 // -------------- NOT CONVERTED ------------------------------------- | |
1709 // if param.seg.editor | |
1710 // [pa, ta] = partarray(parts); | |
1711 // parts = editorssearch(pa, ta, parts); | |
1712 // parts = [parts, nullpart(parts,1:nBeat)]; | |
1713 // end | |
1714 // ------------------------------------------------------------------ | |
1715 | |
1716 | |
1717 mergenulls(parts); | |
1718 | |
1719 | |
1720 // -------------- NOT CONVERTED ------------------------------------- | |
1721 // if param.seg.editor | |
1722 // [pa, ta] = partarray(parts); | |
1723 // parts = editorssearch(pa, ta, parts); | |
1724 // parts = [parts, nullpart(parts,1:nBeat)]; | |
1725 // end | |
1726 // ------------------------------------------------------------------ | |
1727 | |
1728 return parts; | |
1729 } | |
1730 | |
1731 | |
1732 | |
1733 void songSegmentChroma(Vamp::Plugin::FeatureList quantisedChromagram, vector<Part> &parts) | |
1734 { | |
1735 // Collect Info | |
1736 int nBeat = quantisedChromagram.size(); // Number of feature vector | |
1737 int nFeatValues = quantisedChromagram[0].values.size(); // Number of values for each feature vector | |
1738 | |
1739 arma::mat synchTreble = arma::zeros<mat>(nBeat,nFeatValues/2); | |
1740 | |
1741 for (int i = 0; i < nBeat; ++ i) | |
1742 for (int j = 0; j < nFeatValues/2; ++ j) | |
1743 { | |
1744 synchTreble(i,j) = quantisedChromagram[i].values[j]; | |
1745 } | |
1746 | |
1747 arma::mat synchBass = arma::zeros<mat>(nBeat,nFeatValues/2); | |
1748 | |
1749 for (int i = 0; i < nBeat; ++ i) | |
1750 for (int j = 0; j < nFeatValues/2; ++ j) | |
1751 { | |
1752 synchBass(i,j) = quantisedChromagram[i].values[j+12]; | |
1753 } | |
1754 | |
1755 // Process | |
1756 | |
1757 arma::mat segTreble = arma::zeros<arma::mat>(quantisedChromagram.size(),quantisedChromagram[0].values.size()/2); | |
1758 arma::mat segBass = arma::zeros<arma::mat>(quantisedChromagram.size(),quantisedChromagram[0].values.size()/2); | |
1759 | |
1760 for (int iPart=0; iPart < (int)parts.size(); ++iPart) | |
1761 { | |
1762 parts[iPart].nInd = parts[iPart].indices.size(); | |
1763 | |
1764 for (int kOccur=0; kOccur<parts[iPart].nInd; ++kOccur) | |
1765 { | |
1766 int kStartIndex = parts[iPart].indices[kOccur]; | |
1767 int kEndIndex = kStartIndex + parts[iPart].n-1; | |
1768 | |
1769 segTreble.rows(kStartIndex,kEndIndex) = segTreble.rows(kStartIndex,kEndIndex) + synchTreble.rows(kStartIndex,kEndIndex); | |
1770 segBass.rows(kStartIndex,kEndIndex) = segBass.rows(kStartIndex,kEndIndex) + synchBass.rows(kStartIndex,kEndIndex); | |
1771 } | |
1772 } | |
1773 } | |
1774 | |
1775 | |
1776 // Segment Integration | |
1777 vector<Part> songSegmentIntegration(vector<Part> &parts) | |
1778 { | |
1779 // Break up parts (every part will have one instance) | |
1780 vector<Part> newPartVector; | |
1781 vector<int> partindices; | |
1782 | |
1783 for (int iPart=0; iPart < (int)parts.size(); ++iPart) | |
1784 { | |
1785 parts[iPart].nInd = parts[iPart].indices.size(); | |
1786 for (int iInstance=0; iInstance<parts[iPart].nInd; ++iInstance) | |
1787 { | |
1788 Part newPart; | |
1789 newPart.n = parts[iPart].n; | |
1790 newPart.letter = parts[iPart].letter; | |
1791 newPart.value = parts[iPart].value; | |
1792 newPart.level = parts[iPart].level; | |
1793 newPart.indices.push_back(parts[iPart].indices[iInstance]); | |
1794 newPart.nInd = 1; | |
1795 partindices.push_back(parts[iPart].indices[iInstance]); | |
1796 | |
1797 newPartVector.push_back(newPart); | |
1798 } | |
1799 } | |
1800 | |
1801 | |
1802 // Sort the parts in order of occurrence | |
1803 sort (partindices.begin(), partindices.end()); | |
1804 | |
1805 for (int i=0; i < (int)partindices.size(); ++i) | |
1806 { | |
1807 bool found = false; | |
1808 int in=0; | |
1809 while (!found) | |
1810 { | |
1811 if (newPartVector[in].indices[0] == partindices[i]) | |
1812 { | |
1813 newPartVector.push_back(newPartVector[in]); | |
1814 newPartVector.erase(newPartVector.begin()+in); | |
1815 found = true; | |
1816 } | |
1817 else | |
1818 in++; | |
1819 } | |
1820 } | |
1821 | |
1822 // Clear the vector | |
1823 for (int iNewpart=1; iNewpart < (int)newPartVector.size(); ++iNewpart) | |
1824 { | |
1825 if (newPartVector[iNewpart].n < 12) | |
1826 { | |
1827 newPartVector[iNewpart-1].n = newPartVector[iNewpart-1].n + newPartVector[iNewpart].n; | |
1828 newPartVector.erase(newPartVector.begin()+iNewpart); | |
1829 } | |
1830 } | |
1831 | |
1832 return newPartVector; | |
1833 } | |
1834 | |
1835 // Segmenter | |
1836 Vamp::Plugin::FeatureList Segmentino::runSegmenter(Vamp::Plugin::FeatureList quantisedChromagram) | |
1837 { | |
1838 /* --- Display Information --- */ | |
1839 // int numBeat = quantisedChromagram.size(); | |
1840 // int numFeats = quantisedChromagram[0].values.size(); | |
1841 | |
1842 vector<Part> parts; | |
1843 vector<Part> finalParts; | |
1844 | |
1845 parts = songSegment(quantisedChromagram); | |
1846 songSegmentChroma(quantisedChromagram,parts); | |
1847 | |
1848 finalParts = songSegmentIntegration(parts); | |
1849 | |
1850 | |
1851 // TEMP ---- | |
1852 /*for (int i=0;i<finalParts.size(); ++i) | |
1853 { | |
1854 std::cout << "Parts n° " << i << std::endl; | |
1855 std::cout << "n°: " << finalParts[i].n << std::endl; | |
1856 std::cout << "letter: " << finalParts[i].letter << std::endl; | |
1857 | |
1858 std::cout << "indices: "; | |
1859 for (int j=0;j<finalParts[i].indices.size(); ++j) | |
1860 std::cout << finalParts[i].indices[j] << " "; | |
1861 | |
1862 std::cout << std::endl; | |
1863 std::cout << "level: " << finalParts[i].level << std::endl; | |
1864 }*/ | |
1865 | |
1866 // --------- | |
1867 | |
1868 | |
1869 // Output | |
1870 | |
1871 Vamp::Plugin::FeatureList results; | |
1872 | |
1873 | |
1874 Feature seg; | |
1875 | |
1876 arma::vec indices; | |
1877 // int idx=0; | |
1878 vector<int> values; | |
1879 vector<string> letters; | |
1880 | |
1881 for (int iPart=0; iPart < (int)finalParts.size()-1; ++iPart) | |
1882 { | |
1883 int iInstance=0; | |
1884 seg.hasTimestamp = true; | |
1885 | |
1886 int ind = finalParts[iPart].indices[iInstance]; | |
1887 int ind1 = finalParts[iPart+1].indices[iInstance]; | |
1888 | |
1889 seg.timestamp = quantisedChromagram[ind].timestamp; | |
1890 seg.hasDuration = true; | |
1891 seg.duration = quantisedChromagram[ind1].timestamp-quantisedChromagram[ind].timestamp; | |
1892 seg.values.clear(); | |
1893 seg.values.push_back(finalParts[iPart].value); | |
1894 seg.label = finalParts[iPart].letter; | |
1895 | |
1896 results.push_back(seg); | |
1897 } | |
1898 | |
1899 if (finalParts.size() > 0) { | |
1900 int ind = finalParts[finalParts.size()-1].indices[0]; | |
1901 seg.hasTimestamp = true; | |
1902 seg.timestamp = quantisedChromagram[ind].timestamp; | |
1903 seg.hasDuration = true; | |
1904 seg.duration = quantisedChromagram[quantisedChromagram.size()-1].timestamp-quantisedChromagram[ind].timestamp; | |
1905 seg.values.clear(); | |
1906 seg.values.push_back(finalParts[finalParts.size()-1].value); | |
1907 seg.label = finalParts[finalParts.size()-1].letter; | |
1908 | |
1909 results.push_back(seg); | |
1910 } | |
1911 | |
1912 return results; | |
1913 } | |
1914 | |
1915 | |
1916 | |
1917 | |
1918 | |
1919 | |
1920 | |
1921 | |
1922 | |
1923 | |
1924 | |
1925 | |
1926 | |
1927 | |
1928 | |
1929 | |
1930 |