Mercurial > hg > silvet
comparison src/Silvet.cpp @ 342:ad45b18427e0
Merge from branch livemode
author | Chris Cannam |
---|---|
date | Mon, 06 Jul 2015 09:15:21 +0100 |
parents | 705d807ca2ca |
children | 460cabb27bf7 |
comparison
equal
deleted
inserted
replaced
313:fa2ffbb786df | 342:ad45b18427e0 |
---|---|
19 #include <cq/CQSpectrogram.h> | 19 #include <cq/CQSpectrogram.h> |
20 | 20 |
21 #include "MedianFilter.h" | 21 #include "MedianFilter.h" |
22 #include "constant-q-cpp/src/dsp/Resampler.h" | 22 #include "constant-q-cpp/src/dsp/Resampler.h" |
23 #include "flattendynamics-ladspa.h" | 23 #include "flattendynamics-ladspa.h" |
24 #include "LiveInstruments.h" | |
24 | 25 |
25 #include <vector> | 26 #include <vector> |
26 #include <future> | 27 #include <future> |
27 | 28 |
28 #include <cstdio> | 29 #include <cstdio> |
35 using std::future; | 36 using std::future; |
36 using std::async; | 37 using std::async; |
37 using Vamp::RealTime; | 38 using Vamp::RealTime; |
38 | 39 |
39 static int processingSampleRate = 44100; | 40 static int processingSampleRate = 44100; |
40 static int processingBPO = 60; | 41 |
42 static int binsPerSemitoneLive = 1; | |
43 static int binsPerSemitoneNormal = 5; | |
41 | 44 |
42 static int minInputSampleRate = 100; | 45 static int minInputSampleRate = 100; |
43 static int maxInputSampleRate = 192000; | 46 static int maxInputSampleRate = 192000; |
47 | |
48 static const Silvet::ProcessingMode defaultMode = Silvet::HighQualityMode; | |
44 | 49 |
45 Silvet::Silvet(float inputSampleRate) : | 50 Silvet::Silvet(float inputSampleRate) : |
46 Plugin(inputSampleRate), | 51 Plugin(inputSampleRate), |
47 m_instruments(InstrumentPack::listInstrumentPacks()), | 52 m_instruments(InstrumentPack::listInstrumentPacks()), |
53 m_liveInstruments(LiveAdapter::adaptAll(m_instruments)), | |
48 m_resampler(0), | 54 m_resampler(0), |
49 m_flattener(0), | 55 m_flattener(0), |
50 m_cq(0), | 56 m_cq(0), |
51 m_hqMode(true), | 57 m_mode(defaultMode), |
52 m_fineTuning(false), | 58 m_fineTuning(false), |
53 m_instrument(0), | 59 m_instrument(0), |
54 m_colsPerSec(50), | 60 m_colsPerSec(50), |
55 m_haveStartTime(false) | 61 m_haveStartTime(false) |
56 { | 62 { |
139 | 145 |
140 ParameterDescriptor desc; | 146 ParameterDescriptor desc; |
141 desc.identifier = "mode"; | 147 desc.identifier = "mode"; |
142 desc.name = "Processing mode"; | 148 desc.name = "Processing mode"; |
143 desc.unit = ""; | 149 desc.unit = ""; |
144 desc.description = "Sets the tradeoff of processing speed against transcription quality. Draft mode modifies a number of internal parameters in favour of speed. Intensive mode (the default) will almost always produce better results."; | 150 desc.description = "Sets the tradeoff of processing speed against transcription quality. Live mode is much faster and detects notes with relatively low latency; Intensive mode (the default) is slower but will almost always produce better results."; |
145 desc.minValue = 0; | 151 desc.minValue = 0; |
146 desc.maxValue = 1; | 152 desc.maxValue = 2; |
147 desc.defaultValue = 1; | 153 desc.defaultValue = int(defaultMode); |
148 desc.isQuantized = true; | 154 desc.isQuantized = true; |
149 desc.quantizeStep = 1; | 155 desc.quantizeStep = 1; |
150 desc.valueNames.push_back("Draft (faster)"); | 156 desc.valueNames.push_back("Live (faster and lower latency)"); |
151 desc.valueNames.push_back("Intensive (higher quality)"); | 157 desc.valueNames.push_back("Intensive (higher quality)"); |
152 list.push_back(desc); | 158 list.push_back(desc); |
153 | 159 |
154 desc.identifier = "instrument"; | 160 desc.identifier = "instrument"; |
155 desc.name = "Instrument"; | 161 desc.name = "Instrument"; |
183 | 189 |
184 float | 190 float |
185 Silvet::getParameter(string identifier) const | 191 Silvet::getParameter(string identifier) const |
186 { | 192 { |
187 if (identifier == "mode") { | 193 if (identifier == "mode") { |
188 return m_hqMode ? 1.f : 0.f; | 194 return (float)(int)m_mode; |
189 } else if (identifier == "finetune") { | 195 } else if (identifier == "finetune") { |
190 return m_fineTuning ? 1.f : 0.f; | 196 return m_fineTuning ? 1.f : 0.f; |
191 } else if (identifier == "instrument") { | 197 } else if (identifier == "instrument") { |
192 return m_instrument; | 198 return m_instrument; |
193 } | 199 } |
196 | 202 |
197 void | 203 void |
198 Silvet::setParameter(string identifier, float value) | 204 Silvet::setParameter(string identifier, float value) |
199 { | 205 { |
200 if (identifier == "mode") { | 206 if (identifier == "mode") { |
201 m_hqMode = (value > 0.5); | 207 m_mode = (ProcessingMode)(int)(value + 0.5); |
202 } else if (identifier == "finetune") { | 208 } else if (identifier == "finetune") { |
203 m_fineTuning = (value > 0.5); | 209 m_fineTuning = (value > 0.5); |
204 } else if (identifier == "instrument") { | 210 } else if (identifier == "instrument") { |
205 m_instrument = lrintf(value); | 211 m_instrument = lrintf(value); |
206 } | 212 } |
230 OutputList list; | 236 OutputList list; |
231 | 237 |
232 OutputDescriptor d; | 238 OutputDescriptor d; |
233 d.identifier = "notes"; | 239 d.identifier = "notes"; |
234 d.name = "Note transcription"; | 240 d.name = "Note transcription"; |
235 d.description = "Overall note transcription. Each note has time, duration, estimated pitch, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture."; | 241 d.description = "Overall note transcription. Each note has time, duration, estimated fundamental frequency, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture."; |
236 d.unit = "Hz"; | 242 d.unit = "Hz"; |
237 d.hasFixedBinCount = true; | 243 d.hasFixedBinCount = true; |
238 d.binCount = 2; | 244 d.binCount = 2; |
239 d.binNames.push_back("Frequency"); | 245 d.binNames.push_back("Frequency"); |
240 d.binNames.push_back("Velocity"); | 246 d.binNames.push_back("Velocity"); |
244 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62); | 250 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62); |
245 d.hasDuration = true; | 251 d.hasDuration = true; |
246 m_notesOutputNo = list.size(); | 252 m_notesOutputNo = list.size(); |
247 list.push_back(d); | 253 list.push_back(d); |
248 | 254 |
255 d.identifier = "onsets"; | |
256 d.name = "Note onsets"; | |
257 d.description = "Note onsets, without durations. These can be calculated sooner than complete notes, because it isn't necessary to wait for a note to finish before returning its feature. Each event has time, estimated fundamental frequency in Hz, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture."; | |
258 d.unit = "Hz"; | |
259 d.hasFixedBinCount = true; | |
260 d.binCount = 2; | |
261 d.binNames.push_back("Frequency"); | |
262 d.binNames.push_back("Velocity"); | |
263 d.hasKnownExtents = false; | |
264 d.isQuantized = false; | |
265 d.sampleType = OutputDescriptor::VariableSampleRate; | |
266 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62); | |
267 d.hasDuration = false; | |
268 m_onsetsOutputNo = list.size(); | |
269 list.push_back(d); | |
270 | |
271 d.identifier = "onoffsets"; | |
272 d.name = "Note onsets and offsets"; | |
273 d.description = "Note onsets and offsets as separate events. Each onset event has time, estimated fundamental frequency in Hz, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture. Offsets are represented in the same way but with a velocity of 0."; | |
274 d.unit = "Hz"; | |
275 d.hasFixedBinCount = true; | |
276 d.binCount = 2; | |
277 d.binNames.push_back("Frequency"); | |
278 d.binNames.push_back("Velocity"); | |
279 d.hasKnownExtents = false; | |
280 d.isQuantized = false; | |
281 d.sampleType = OutputDescriptor::VariableSampleRate; | |
282 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62); | |
283 d.hasDuration = false; | |
284 m_onOffsetsOutputNo = list.size(); | |
285 list.push_back(d); | |
286 | |
249 d.identifier = "timefreq"; | 287 d.identifier = "timefreq"; |
250 d.name = "Time-frequency distribution"; | 288 d.name = "Time-frequency distribution"; |
251 d.description = "Filtered constant-Q time-frequency distribution as used as input to the expectation-maximisation algorithm."; | 289 d.description = "Filtered constant-Q time-frequency distribution as used as input to the expectation-maximisation algorithm."; |
252 d.unit = ""; | 290 d.unit = ""; |
253 d.hasFixedBinCount = true; | 291 d.hasFixedBinCount = true; |
254 d.binCount = m_instruments[0].templateHeight; | 292 d.binCount = getPack(0).templateHeight; |
255 d.binNames.clear(); | 293 d.binNames.clear(); |
256 if (m_cq) { | 294 if (m_cq) { |
257 char name[50]; | 295 char name[50]; |
258 for (int i = 0; i < m_instruments[0].templateHeight; ++i) { | 296 for (int i = 0; i < getPack(0).templateHeight; ++i) { |
259 // We have a 600-bin (10 oct 60-bin CQ) of which the | 297 // We have a 600-bin (10 oct 60-bin CQ) of which the |
260 // lowest-frequency 55 bins have been dropped, for a | 298 // lowest-frequency 55 bins have been dropped, for a |
261 // 545-bin template. The native CQ bins go high->low | 299 // 545-bin template. The native CQ bins go high->low |
262 // frequency though, so these are still the first 545 bins | 300 // frequency though, so these are still the first 545 bins |
263 // as reported by getBinFrequency, though in reverse order | 301 // as reported by getBinFrequency, though in reverse order |
264 float freq = m_cq->getBinFrequency | 302 float freq = m_cq->getBinFrequency |
265 (m_instruments[0].templateHeight - i - 1); | 303 (getPack(0).templateHeight - i - 1); |
266 sprintf(name, "%.1f Hz", freq); | 304 sprintf(name, "%.1f Hz", freq); |
267 d.binNames.push_back(name); | 305 d.binNames.push_back(name); |
268 } | 306 } |
269 } | 307 } |
270 d.hasKnownExtents = false; | 308 d.hasKnownExtents = false; |
278 d.identifier = "pitchactivation"; | 316 d.identifier = "pitchactivation"; |
279 d.name = "Pitch activation distribution"; | 317 d.name = "Pitch activation distribution"; |
280 d.description = "Pitch activation distribution resulting from expectation-maximisation algorithm, prior to note extraction."; | 318 d.description = "Pitch activation distribution resulting from expectation-maximisation algorithm, prior to note extraction."; |
281 d.unit = ""; | 319 d.unit = ""; |
282 d.hasFixedBinCount = true; | 320 d.hasFixedBinCount = true; |
283 d.binCount = m_instruments[0].templateNoteCount; | 321 d.binCount = getPack(0).templateNoteCount; |
284 d.binNames.clear(); | 322 d.binNames.clear(); |
285 if (m_cq) { | 323 if (m_cq) { |
286 for (int i = 0; i < m_instruments[0].templateNoteCount; ++i) { | 324 for (int i = 0; i < getPack(0).templateNoteCount; ++i) { |
287 d.binNames.push_back(noteName(i, 0, 1)); | 325 d.binNames.push_back(getNoteName(i, 0)); |
288 } | 326 } |
289 } | 327 } |
290 d.hasKnownExtents = false; | 328 d.hasKnownExtents = false; |
291 d.isQuantized = false; | 329 d.isQuantized = false; |
292 d.sampleType = OutputDescriptor::FixedSampleRate; | 330 d.sampleType = OutputDescriptor::FixedSampleRate; |
302 d.hasFixedBinCount = true; | 340 d.hasFixedBinCount = true; |
303 d.binCount = 12; | 341 d.binCount = 12; |
304 d.binNames.clear(); | 342 d.binNames.clear(); |
305 if (m_cq) { | 343 if (m_cq) { |
306 for (int i = 0; i < 12; ++i) { | 344 for (int i = 0; i < 12; ++i) { |
307 d.binNames.push_back(chromaName(i)); | 345 d.binNames.push_back(getChromaName(i)); |
308 } | 346 } |
309 } | 347 } |
310 d.hasKnownExtents = false; | 348 d.hasKnownExtents = false; |
311 d.isQuantized = false; | 349 d.isQuantized = false; |
312 d.sampleType = OutputDescriptor::FixedSampleRate; | 350 d.sampleType = OutputDescriptor::FixedSampleRate; |
313 d.sampleRate = m_colsPerSec; | 351 d.sampleRate = m_colsPerSec; |
314 d.hasDuration = false; | 352 d.hasDuration = false; |
315 m_chromaOutputNo = list.size(); | 353 m_chromaOutputNo = list.size(); |
316 list.push_back(d); | 354 list.push_back(d); |
317 | 355 |
356 d.identifier = "templates"; | |
357 d.name = "Templates"; | |
358 d.description = "Constant-Q spectral templates for the selected instrument pack."; | |
359 d.unit = ""; | |
360 d.hasFixedBinCount = true; | |
361 d.binCount = getPack(0).templateHeight; | |
362 d.binNames.clear(); | |
363 if (m_cq) { | |
364 char name[50]; | |
365 for (int i = 0; i < getPack(0).templateHeight; ++i) { | |
366 // We have a 600-bin (10 oct 60-bin CQ) of which the | |
367 // lowest-frequency 55 bins have been dropped, for a | |
368 // 545-bin template. The native CQ bins go high->low | |
369 // frequency though, so these are still the first 545 bins | |
370 // as reported by getBinFrequency, though in reverse order | |
371 float freq = m_cq->getBinFrequency | |
372 (getPack(0).templateHeight - i - 1); | |
373 sprintf(name, "%.1f Hz", freq); | |
374 d.binNames.push_back(name); | |
375 } | |
376 } | |
377 d.hasKnownExtents = false; | |
378 d.isQuantized = false; | |
379 d.sampleType = OutputDescriptor::FixedSampleRate; | |
380 d.sampleRate = m_colsPerSec; | |
381 d.hasDuration = false; | |
382 m_templateOutputNo = list.size(); | |
383 list.push_back(d); | |
384 | |
318 return list; | 385 return list; |
319 } | 386 } |
320 | 387 |
321 std::string | 388 std::string |
322 Silvet::chromaName(int pitch) const | 389 Silvet::getChromaName(int pitch) const |
323 { | 390 { |
324 static const char *names[] = { | 391 static const char *names[] = { |
325 "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#" | 392 "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#" |
326 }; | 393 }; |
327 | 394 |
328 return names[pitch]; | 395 return names[pitch]; |
329 } | 396 } |
330 | 397 |
331 std::string | 398 std::string |
332 Silvet::noteName(int note, int shift, int shiftCount) const | 399 Silvet::getNoteName(int note, int shift) const |
333 { | 400 { |
334 string n = chromaName(note % 12); | 401 string n = getChromaName(note % 12); |
335 | 402 |
336 int oct = (note + 9) / 12; | 403 int oct = (note + 9) / 12; |
337 | 404 |
338 char buf[30]; | 405 char buf[30]; |
339 | 406 |
340 float pshift = 0.f; | 407 float pshift = 0.f; |
408 int shiftCount = getShiftCount(); | |
341 if (shiftCount > 1) { | 409 if (shiftCount > 1) { |
342 // see noteFrequency below | 410 // see getNoteFrequency below |
343 pshift = | 411 pshift = |
344 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount; | 412 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount; |
345 } | 413 } |
346 | 414 |
347 if (pshift > 0.f) { | 415 if (pshift > 0.f) { |
354 | 422 |
355 return buf; | 423 return buf; |
356 } | 424 } |
357 | 425 |
358 float | 426 float |
359 Silvet::noteFrequency(int note, int shift, int shiftCount) const | 427 Silvet::getNoteFrequency(int note, int shift) const |
360 { | 428 { |
361 // Convert shift number to a pitch shift. The given shift number | 429 // Convert shift number to a pitch shift. The given shift number |
362 // is an offset into the template array, which starts with some | 430 // is an offset into the template array, which starts with some |
363 // zeros, followed by the template, then some trailing zeros. | 431 // zeros, followed by the template, then some trailing zeros. |
364 // | 432 // |
370 // zeros at the start, which is the low-frequency end), for a | 438 // zeros at the start, which is the low-frequency end), for a |
371 // positive pitch shift; and higher values represent moving it | 439 // positive pitch shift; and higher values represent moving it |
372 // down in pitch, for a negative pitch shift. | 440 // down in pitch, for a negative pitch shift. |
373 | 441 |
374 float pshift = 0.f; | 442 float pshift = 0.f; |
443 int shiftCount = getShiftCount(); | |
375 if (shiftCount > 1) { | 444 if (shiftCount > 1) { |
376 pshift = | 445 pshift = |
377 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount; | 446 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount; |
378 } | 447 } |
379 | 448 |
380 return float(27.5 * pow(2.0, (note + pshift) / 12.0)); | 449 float freq = float(27.5 * pow(2.0, (note + pshift) / 12.0)); |
450 | |
451 // cerr << "note = " << note << ", shift = " << shift << ", shiftCount = " | |
452 // << shiftCount << ", obtained freq = " << freq << endl; | |
453 | |
454 return freq; | |
381 } | 455 } |
382 | 456 |
383 bool | 457 bool |
384 Silvet::initialise(size_t channels, size_t stepSize, size_t blockSize) | 458 Silvet::initialise(size_t channels, size_t stepSize, size_t blockSize) |
385 { | 459 { |
426 } | 500 } |
427 | 501 |
428 m_flattener = new FlattenDynamics(m_inputSampleRate); // before resampling | 502 m_flattener = new FlattenDynamics(m_inputSampleRate); // before resampling |
429 m_flattener->reset(); | 503 m_flattener->reset(); |
430 | 504 |
505 // this happens to be processingSampleRate / 3, and is the top | |
506 // freq used for the EM templates: | |
507 double maxFreq = 14700; | |
508 | |
509 if (m_mode == LiveMode) { | |
510 // We only have 12 bpo rather than 60, so we need the top bin | |
511 // to be the middle one of the top 5, i.e. 2/5 of a semitone | |
512 // lower than 14700 | |
513 maxFreq *= powf(2.0, -1.0 / 30.0); | |
514 } | |
515 | |
431 double minFreq = 27.5; | 516 double minFreq = 27.5; |
432 | 517 |
433 if (!m_hqMode) { | 518 if (m_mode == LiveMode) { |
434 // We don't actually return any notes from the bottom octave, | 519 // We don't actually return any notes from the bottom octave, |
435 // so we can just pad with zeros | 520 // so we can just pad with zeros |
436 minFreq *= 2; | 521 minFreq *= 2; |
437 } | 522 } |
438 | 523 |
524 int bpo = 12 * | |
525 (m_mode == LiveMode ? binsPerSemitoneLive : binsPerSemitoneNormal); | |
526 | |
439 CQParameters params(processingSampleRate, | 527 CQParameters params(processingSampleRate, |
440 minFreq, | 528 minFreq, |
441 processingSampleRate / 3, | 529 maxFreq, |
442 processingBPO); | 530 bpo); |
443 | 531 |
444 params.q = 0.95; // MIREX code uses 0.8, but it seems 0.9 or lower | 532 params.q = 0.8; |
445 // drops the FFT size to 512 from 1024 and alters | 533 params.atomHopFactor = (m_mode == LiveMode ? 1.0 : 0.3); |
446 // some other processing parameters, making | |
447 // everything much, much slower. Could be a flaw | |
448 // in the CQ parameter calculations, must check | |
449 params.atomHopFactor = 0.3; | |
450 params.threshold = 0.0005; | 534 params.threshold = 0.0005; |
535 params.decimator = | |
536 (m_mode == LiveMode ? | |
537 CQParameters::FasterDecimator : CQParameters::BetterDecimator); | |
451 params.window = CQParameters::Hann; | 538 params.window = CQParameters::Hann; |
452 | 539 |
453 m_cq = new CQSpectrogram(params, CQSpectrogram::InterpolateLinear); | 540 m_cq = new CQSpectrogram(params, CQSpectrogram::InterpolateLinear); |
454 | 541 |
455 m_colsPerSec = m_hqMode ? 50 : 25; | 542 // cerr << "CQ bins = " << m_cq->getTotalBins() << endl; |
543 // cerr << "CQ min freq = " << m_cq->getMinFrequency() << " (and for confirmation, freq of bin 0 = " << m_cq->getBinFrequency(0) << ")" << endl; | |
544 | |
545 m_colsPerSec = 50; | |
456 | 546 |
457 for (int i = 0; i < (int)m_postFilter.size(); ++i) { | 547 for (int i = 0; i < (int)m_postFilter.size(); ++i) { |
458 delete m_postFilter[i]; | 548 delete m_postFilter[i]; |
459 } | 549 } |
460 m_postFilter.clear(); | 550 m_postFilter.clear(); |
461 for (int i = 0; i < m_instruments[0].templateNoteCount; ++i) { | 551 int postFilterLength = 3; |
462 m_postFilter.push_back(new MedianFilter<double>(3)); | 552 for (int i = 0; i < getPack(0).templateNoteCount; ++i) { |
553 m_postFilter.push_back(new MedianFilter<double>(postFilterLength)); | |
463 } | 554 } |
464 m_pianoRoll.clear(); | 555 m_pianoRoll.clear(); |
465 m_inputGains.clear(); | 556 m_inputGains.clear(); |
466 m_columnCount = 0; | 557 m_columnCount = 0; |
467 m_resampledCount = 0; | 558 m_resampledCount = 0; |
470 } | 561 } |
471 | 562 |
472 Silvet::FeatureSet | 563 Silvet::FeatureSet |
473 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp) | 564 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp) |
474 { | 565 { |
566 FeatureSet fs; | |
567 | |
475 if (!m_haveStartTime) { | 568 if (!m_haveStartTime) { |
569 | |
476 m_startTime = timestamp; | 570 m_startTime = timestamp; |
477 m_haveStartTime = true; | 571 m_haveStartTime = true; |
572 | |
573 insertTemplateFeatures(fs); | |
478 } | 574 } |
479 | 575 |
480 vector<float> flattened(m_blockSize); | 576 vector<float> flattened(m_blockSize); |
481 float gain = 1.f; | 577 float gain = 1.f; |
482 m_flattener->connectInputPort | 578 m_flattener->connectInputPort |
505 int resamplerLatency = m_resampler->getLatency(); | 601 int resamplerLatency = m_resampler->getLatency(); |
506 | 602 |
507 if (hadCount < resamplerLatency) { | 603 if (hadCount < resamplerLatency) { |
508 int stillToDrop = resamplerLatency - hadCount; | 604 int stillToDrop = resamplerLatency - hadCount; |
509 if (stillToDrop >= int(data.size())) { | 605 if (stillToDrop >= int(data.size())) { |
510 return FeatureSet(); | 606 return fs; |
511 } else { | 607 } else { |
512 data = vector<double>(data.begin() + stillToDrop, data.end()); | 608 data = vector<double>(data.begin() + stillToDrop, data.end()); |
513 } | 609 } |
514 } | 610 } |
515 } | 611 } |
516 | 612 |
517 Grid cqout = m_cq->process(data); | 613 Grid cqout = m_cq->process(data); |
518 FeatureSet fs = transcribe(cqout); | 614 transcribe(cqout, fs); |
519 return fs; | 615 return fs; |
520 } | 616 } |
521 | 617 |
522 Silvet::FeatureSet | 618 Silvet::FeatureSet |
523 Silvet::getRemainingFeatures() | 619 Silvet::getRemainingFeatures() |
524 { | 620 { |
525 Grid cqout = m_cq->getRemainingOutput(); | 621 Grid cqout = m_cq->getRemainingOutput(); |
526 FeatureSet fs = transcribe(cqout); | 622 FeatureSet fs; |
623 | |
624 if (m_columnCount == 0) { | |
625 // process() was never called, but we still want these | |
626 insertTemplateFeatures(fs); | |
627 } else { | |
628 | |
629 // Complete the transcription | |
630 | |
631 transcribe(cqout, fs); | |
632 | |
633 // And make sure any extant playing notes are finished and returned | |
634 | |
635 m_pianoRoll.push_back({}); | |
636 | |
637 auto events = noteTrack(); | |
638 | |
639 for (const auto &f : events.notes) { | |
640 fs[m_notesOutputNo].push_back(f); | |
641 } | |
642 | |
643 for (const auto &f : events.onsets) { | |
644 fs[m_onsetsOutputNo].push_back(f); | |
645 } | |
646 | |
647 for (const auto &f : events.onOffsets) { | |
648 fs[m_onOffsetsOutputNo].push_back(f); | |
649 } | |
650 } | |
651 | |
527 return fs; | 652 return fs; |
528 } | 653 } |
529 | 654 |
530 Silvet::FeatureSet | 655 void |
531 Silvet::transcribe(const Grid &cqout) | 656 Silvet::insertTemplateFeatures(FeatureSet &fs) |
532 { | 657 { |
533 Grid filtered = preProcess(cqout); | 658 const InstrumentPack &pack = getPack(m_instrument); |
534 | 659 for (int i = 0; i < int(pack.templates.size()) * pack.templateNoteCount; ++i) { |
535 FeatureSet fs; | 660 RealTime timestamp = RealTime::fromSeconds(double(i) / m_colsPerSec); |
536 | |
537 if (filtered.empty()) return fs; | |
538 | |
539 const InstrumentPack &pack = m_instruments[m_instrument]; | |
540 | |
541 for (int i = 0; i < (int)filtered.size(); ++i) { | |
542 Feature f; | 661 Feature f; |
543 for (int j = 0; j < pack.templateHeight; ++j) { | 662 char buffer[50]; |
544 f.values.push_back(float(filtered[i][j])); | 663 sprintf(buffer, "Note %d", i + 1); |
545 } | 664 f.label = buffer; |
546 fs[m_fcqOutputNo].push_back(f); | 665 f.hasTimestamp = true; |
547 } | 666 f.timestamp = timestamp; |
548 | 667 f.values = pack.templates[i / pack.templateNoteCount] |
549 int width = filtered.size(); | 668 .data[i % pack.templateNoteCount]; |
550 | 669 fs[m_templateOutputNo].push_back(f); |
551 Grid localPitches(width); | 670 } |
552 | 671 } |
553 bool wantShifts = m_hqMode && m_fineTuning; | 672 |
673 int | |
674 Silvet::getShiftCount() const | |
675 { | |
676 bool wantShifts = (m_mode == HighQualityMode) && m_fineTuning; | |
554 int shiftCount = 1; | 677 int shiftCount = 1; |
555 if (wantShifts) { | 678 if (wantShifts) { |
679 const InstrumentPack &pack(getPack(m_instrument)); | |
556 shiftCount = pack.templateMaxShift * 2 + 1; | 680 shiftCount = pack.templateMaxShift * 2 + 1; |
557 } | 681 } |
682 return shiftCount; | |
683 } | |
684 | |
685 void | |
686 Silvet::transcribe(const Grid &cqout, Silvet::FeatureSet &fs) | |
687 { | |
688 Grid filtered = preProcess(cqout); | |
689 | |
690 if (filtered.empty()) return; | |
691 | |
692 const InstrumentPack &pack(getPack(m_instrument)); | |
693 | |
694 int width = filtered.size(); | |
695 | |
696 double silenceThreshold = 0.01; | |
697 | |
698 for (int i = 0; i < width; ++i) { | |
699 | |
700 RealTime timestamp = getColumnTimestamp(m_pianoRoll.size() - 1 + i); | |
701 float inputGain = getInputGainAt(timestamp); | |
702 | |
703 Feature f; | |
704 double rms = 0.0; | |
705 | |
706 for (int j = 0; j < pack.templateHeight; ++j) { | |
707 double v = filtered[i][j]; | |
708 rms += v * v; | |
709 f.values.push_back(float(v)); | |
710 } | |
711 | |
712 rms = sqrt(rms / pack.templateHeight); | |
713 if (rms / inputGain < silenceThreshold) { | |
714 filtered[i].clear(); | |
715 } | |
716 | |
717 fs[m_fcqOutputNo].push_back(f); | |
718 } | |
719 | |
720 Grid localPitches(width); | |
721 | |
722 int shiftCount = getShiftCount(); | |
723 bool wantShifts = (shiftCount > 1); | |
558 | 724 |
559 vector<vector<int> > localBestShifts; | 725 vector<vector<int> > localBestShifts; |
560 if (wantShifts) { | 726 if (wantShifts) { |
561 localBestShifts = vector<vector<int> >(width); | 727 localBestShifts = vector<vector<int> >(width); |
562 } | 728 } |
563 | 729 |
564 #ifndef MAX_EM_THREADS | 730 #ifndef MAX_EM_THREADS |
565 #define MAX_EM_THREADS 8 | 731 #define MAX_EM_THREADS 8 |
566 #endif | 732 #endif |
567 | 733 |
734 int emThreadCount = MAX_EM_THREADS; | |
735 if (m_mode == LiveMode && pack.templates.size() == 1) { | |
736 // The EM step is probably not slow enough to merit it | |
737 emThreadCount = 1; | |
738 } | |
739 | |
568 #if (defined(MAX_EM_THREADS) && (MAX_EM_THREADS > 1)) | 740 #if (defined(MAX_EM_THREADS) && (MAX_EM_THREADS > 1)) |
569 for (int i = 0; i < width; ) { | 741 if (emThreadCount > 1) { |
570 typedef future<pair<vector<double>, vector<int>>> EMFuture; | 742 for (int i = 0; i < width; ) { |
571 vector<EMFuture> results; | 743 typedef future<pair<vector<double>, vector<int>>> EMFuture; |
572 for (int j = 0; j < MAX_EM_THREADS && i + j < width; ++j) { | 744 vector<EMFuture> results; |
573 results.push_back | 745 for (int j = 0; j < emThreadCount && i + j < width; ++j) { |
574 (async(std::launch::async, | 746 results.push_back |
575 [&](int index) { | 747 (async(std::launch::async, |
576 return applyEM(pack, filtered.at(index), wantShifts); | 748 [&](int index) { |
577 }, i + j)); | 749 return applyEM(pack, filtered.at(index)); |
578 } | 750 }, i + j)); |
579 for (int j = 0; j < MAX_EM_THREADS && i + j < width; ++j) { | 751 } |
580 auto out = results[j].get(); | 752 for (int j = 0; j < emThreadCount && i + j < width; ++j) { |
581 localPitches[i+j] = out.first; | 753 auto out = results[j].get(); |
582 if (wantShifts) localBestShifts[i+j] = out.second; | 754 localPitches[i+j] = out.first; |
583 } | 755 if (wantShifts) localBestShifts[i+j] = out.second; |
584 i += MAX_EM_THREADS; | 756 } |
585 } | 757 i += emThreadCount; |
586 #else | 758 } |
759 } | |
760 #endif | |
761 | |
762 if (emThreadCount == 1) { | |
763 for (int i = 0; i < width; ++i) { | |
764 auto out = applyEM(pack, filtered.at(i)); | |
765 localPitches[i] = out.first; | |
766 if (wantShifts) localBestShifts[i] = out.second; | |
767 } | |
768 } | |
769 | |
587 for (int i = 0; i < width; ++i) { | 770 for (int i = 0; i < width; ++i) { |
588 auto out = applyEM(pack, filtered.at(i), wantShifts); | 771 |
589 localPitches[i] = out.first; | 772 vector<double> filtered; |
590 if (wantShifts) localBestShifts[i] = out.second; | 773 |
591 } | 774 for (int j = 0; j < pack.templateNoteCount; ++j) { |
592 #endif | 775 m_postFilter[j]->push(localPitches[i][j]); |
593 | 776 filtered.push_back(m_postFilter[j]->get()); |
594 for (int i = 0; i < width; ++i) { | 777 } |
595 | |
596 // This returns a filtered column, and pushes the | |
597 // up-to-max-polyphony activation column to m_pianoRoll | |
598 vector<double> filtered = postProcess | |
599 (localPitches[i], localBestShifts[i], wantShifts); | |
600 | 778 |
601 RealTime timestamp = getColumnTimestamp(m_pianoRoll.size() - 1); | 779 RealTime timestamp = getColumnTimestamp(m_pianoRoll.size() - 1); |
602 float inputGain = getInputGainAt(timestamp); | 780 float inputGain = getInputGainAt(timestamp); |
603 | 781 |
604 Feature f; | 782 Feature f; |
613 f.values.resize(12); | 791 f.values.resize(12); |
614 for (int j = 0; j < (int)filtered.size(); ++j) { | 792 for (int j = 0; j < (int)filtered.size(); ++j) { |
615 f.values[j % 12] += filtered[j] / inputGain; | 793 f.values[j % 12] += filtered[j] / inputGain; |
616 } | 794 } |
617 fs[m_chromaOutputNo].push_back(f); | 795 fs[m_chromaOutputNo].push_back(f); |
618 | 796 |
619 FeatureList noteFeatures = noteTrack(shiftCount); | 797 // This pushes the up-to-max-polyphony activation column to |
620 | 798 // m_pianoRoll |
621 for (FeatureList::const_iterator fi = noteFeatures.begin(); | 799 postProcess(filtered, localBestShifts[i]); |
622 fi != noteFeatures.end(); ++fi) { | 800 |
623 fs[m_notesOutputNo].push_back(*fi); | 801 auto events = noteTrack(); |
624 } | 802 |
625 } | 803 for (const auto &f : events.notes) { |
626 | 804 fs[m_notesOutputNo].push_back(f); |
627 return fs; | 805 } |
806 | |
807 for (const auto &f : events.onsets) { | |
808 fs[m_onsetsOutputNo].push_back(f); | |
809 } | |
810 | |
811 for (const auto &f : events.onOffsets) { | |
812 fs[m_onOffsetsOutputNo].push_back(f); | |
813 } | |
814 } | |
628 } | 815 } |
629 | 816 |
630 pair<vector<double>, vector<int> > | 817 pair<vector<double>, vector<int> > |
631 Silvet::applyEM(const InstrumentPack &pack, | 818 Silvet::applyEM(const InstrumentPack &pack, |
632 const vector<double> &column, | 819 const vector<double> &column) |
633 bool wantShifts) | |
634 { | 820 { |
635 double columnThreshold = 1e-5; | 821 double columnThreshold = 1e-5; |
822 | |
823 if (m_mode == LiveMode) { | |
824 columnThreshold /= 15; | |
825 } | |
636 | 826 |
637 vector<double> pitches(pack.templateNoteCount, 0.0); | 827 vector<double> pitches(pack.templateNoteCount, 0.0); |
638 vector<int> bestShifts; | 828 vector<int> bestShifts; |
829 | |
830 if (column.empty()) return { pitches, bestShifts }; | |
639 | 831 |
640 double sum = 0.0; | 832 double sum = 0.0; |
641 for (int j = 0; j < pack.templateHeight; ++j) { | 833 for (int j = 0; j < pack.templateHeight; ++j) { |
642 sum += column.at(j); | 834 sum += column.at(j); |
643 } | 835 } |
644 if (sum < columnThreshold) return { pitches, bestShifts }; | 836 if (sum < columnThreshold) return { pitches, bestShifts }; |
645 | 837 |
646 EM em(&pack, m_hqMode); | 838 EM em(&pack, m_mode == HighQualityMode); |
647 | 839 |
648 em.setPitchSparsity(pack.pitchSparsity); | 840 em.setPitchSparsity(pack.pitchSparsity); |
649 em.setSourceSparsity(pack.sourceSparsity); | 841 em.setSourceSparsity(pack.sourceSparsity); |
650 | 842 |
651 int iterations = m_hqMode ? 20 : 10; | 843 int iterations = (m_mode == HighQualityMode ? 20 : 10); |
652 | 844 |
653 for (int j = 0; j < iterations; ++j) { | 845 for (int j = 0; j < iterations; ++j) { |
654 em.iterate(column.data()); | 846 em.iterate(column.data()); |
655 } | 847 } |
656 | 848 |
657 const float *pitchDist = em.getPitchDistribution(); | 849 const float *pitchDist = em.getPitchDistribution(); |
658 const float *const *shiftDist = em.getShifts(); | 850 const float *const *shiftDist = em.getShifts(); |
659 | 851 |
660 int shiftCount = 1; | 852 int shiftCount = getShiftCount(); |
661 if (wantShifts) { | |
662 shiftCount = pack.templateMaxShift * 2 + 1; | |
663 } | |
664 | 853 |
665 for (int j = 0; j < pack.templateNoteCount; ++j) { | 854 for (int j = 0; j < pack.templateNoteCount; ++j) { |
666 | 855 |
667 pitches[j] = pitchDist[j] * sum; | 856 pitches[j] = pitchDist[j] * sum; |
668 | 857 |
669 int bestShift = 0; | 858 int bestShift = 0; |
670 float bestShiftValue = 0.0; | 859 float bestShiftValue = 0.0; |
671 if (wantShifts) { | 860 if (shiftCount > 1) { |
672 for (int k = 0; k < shiftCount; ++k) { | 861 for (int k = 0; k < shiftCount; ++k) { |
673 float value = shiftDist[k][j]; | 862 float value = shiftDist[k][j]; |
674 if (k == 0 || value > bestShiftValue) { | 863 if (k == 0 || value > bestShiftValue) { |
675 bestShiftValue = value; | 864 bestShiftValue = value; |
676 bestShift = k; | 865 bestShift = k; |
700 // isn't quite accurate. But the small constant offset is | 889 // isn't quite accurate. But the small constant offset is |
701 // practically irrelevant compared to the jitter from the frame | 890 // practically irrelevant compared to the jitter from the frame |
702 // size we reduce to in a moment | 891 // size we reduce to in a moment |
703 int latentColumns = m_cq->getLatency() / m_cq->getColumnHop(); | 892 int latentColumns = m_cq->getLatency() / m_cq->getColumnHop(); |
704 | 893 |
705 const InstrumentPack &pack = m_instruments[m_instrument]; | 894 const InstrumentPack &pack(getPack(m_instrument)); |
706 | 895 |
707 for (int i = 0; i < width; ++i) { | 896 for (int i = 0; i < width; ++i) { |
708 | 897 |
709 if (m_columnCount < latentColumns) { | 898 if (m_columnCount < latentColumns) { |
710 ++m_columnCount; | 899 ++m_columnCount; |
719 if (select) { | 908 if (select) { |
720 vector<double> inCol = in[i]; | 909 vector<double> inCol = in[i]; |
721 vector<double> outCol(pack.templateHeight); | 910 vector<double> outCol(pack.templateHeight); |
722 | 911 |
723 // In HQ mode, the CQ returns 600 bins and we ignore the | 912 // In HQ mode, the CQ returns 600 bins and we ignore the |
724 // lowest 55 of them. | 913 // lowest 55 of them (assuming binsPerSemitone == 5). |
725 // | 914 // |
726 // In draft mode the CQ is an octave shorter, returning | 915 // In live mode the CQ is an octave shorter, returning 540 |
727 // 540 bins, so we instead pad them with an additional 5 | 916 // bins or equivalent, so we instead pad them with an |
728 // zeros. | 917 // additional 5 or equivalent zeros. |
729 // | 918 // |
730 // We also need to reverse the column as we go, since the | 919 // We also need to reverse the column as we go, since the |
731 // raw CQ has the high frequencies first and we need it | 920 // raw CQ has the high frequencies first and we need it |
732 // the other way around. | 921 // the other way around. |
733 | 922 |
734 if (m_hqMode) { | 923 int bps = (m_mode == LiveMode ? |
924 binsPerSemitoneLive : binsPerSemitoneNormal); | |
925 | |
926 if (m_mode == HighQualityMode) { | |
735 for (int j = 0; j < pack.templateHeight; ++j) { | 927 for (int j = 0; j < pack.templateHeight; ++j) { |
736 int ix = inCol.size() - j - 55; | 928 int ix = inCol.size() - j - (11 * bps); |
737 outCol[j] = inCol[ix]; | 929 outCol[j] = inCol[ix]; |
738 } | 930 } |
739 } else { | 931 } else { |
740 for (int j = 0; j < 5; ++j) { | 932 for (int j = 0; j < bps; ++j) { |
741 outCol[j] = 0.0; | 933 outCol[j] = 0.0; |
742 } | 934 } |
743 for (int j = 5; j < pack.templateHeight; ++j) { | 935 for (int j = bps; j < pack.templateHeight; ++j) { |
744 int ix = inCol.size() - j + 4; | 936 int ix = inCol.size() - j + (bps-1); |
745 outCol[j] = inCol[ix]; | 937 outCol[j] = inCol[ix]; |
746 } | 938 } |
747 } | 939 } |
748 | 940 |
749 vector<double> noiseLevel1 = | 941 vector<double> noiseLevel1 = |
750 MedianFilter<double>::filter(40, outCol); | 942 MedianFilter<double>::filter(8 * bps, outCol); |
751 for (int j = 0; j < pack.templateHeight; ++j) { | 943 for (int j = 0; j < pack.templateHeight; ++j) { |
752 noiseLevel1[j] = std::min(outCol[j], noiseLevel1[j]); | 944 noiseLevel1[j] = std::min(outCol[j], noiseLevel1[j]); |
753 } | 945 } |
754 | 946 |
755 vector<double> noiseLevel2 = | 947 vector<double> noiseLevel2 = |
756 MedianFilter<double>::filter(40, noiseLevel1); | 948 MedianFilter<double>::filter(8 * bps, noiseLevel1); |
757 for (int j = 0; j < pack.templateHeight; ++j) { | 949 for (int j = 0; j < pack.templateHeight; ++j) { |
758 outCol[j] = std::max(outCol[j] - noiseLevel2[j], 0.0); | 950 outCol[j] = std::max(outCol[j] - noiseLevel2[j], 0.0); |
759 } | 951 } |
760 | 952 |
761 out.push_back(outCol); | 953 out.push_back(outCol); |
765 } | 957 } |
766 | 958 |
767 return out; | 959 return out; |
768 } | 960 } |
769 | 961 |
770 vector<double> | 962 void |
771 Silvet::postProcess(const vector<double> &pitches, | 963 Silvet::postProcess(const vector<double> &pitches, |
772 const vector<int> &bestShifts, | 964 const vector<int> &bestShifts) |
773 bool wantShifts) | 965 { |
774 { | 966 const InstrumentPack &pack(getPack(m_instrument)); |
775 const InstrumentPack &pack = m_instruments[m_instrument]; | 967 |
776 | 968 // Threshold for level and reduce number of candidate pitches |
777 vector<double> filtered; | 969 |
970 typedef std::multimap<double, int> ValueIndexMap; | |
971 | |
972 ValueIndexMap strengths; | |
778 | 973 |
779 for (int j = 0; j < pack.templateNoteCount; ++j) { | 974 for (int j = 0; j < pack.templateNoteCount; ++j) { |
780 m_postFilter[j]->push(pitches[j]); | 975 |
781 filtered.push_back(m_postFilter[j]->get()); | 976 double strength = pitches[j]; |
782 } | |
783 | |
784 // Threshold for level and reduce number of candidate pitches | |
785 | |
786 typedef std::multimap<double, int> ValueIndexMap; | |
787 | |
788 ValueIndexMap strengths; | |
789 | |
790 for (int j = 0; j < pack.templateNoteCount; ++j) { | |
791 double strength = filtered[j]; | |
792 if (strength < pack.levelThreshold) continue; | 977 if (strength < pack.levelThreshold) continue; |
978 | |
979 // In live mode with only a 12-bpo CQ, we are very likely to | |
980 // get clusters of two or three high scores at a time for | |
981 // neighbouring semitones. Eliminate these by picking only the | |
982 // peaks (except that we never eliminate a note that has | |
983 // already been established as currently playing). This means | |
984 // we can't recognise actual semitone chords if they ever | |
985 // appear, but it's not as if live mode is good enough for | |
986 // that to be a big deal anyway. | |
987 if (m_mode == LiveMode) { | |
988 if (m_current.find(j) == m_current.end() && | |
989 (j == 0 || | |
990 j + 1 == pack.templateNoteCount || | |
991 pitches[j] < pitches[j-1] || | |
992 pitches[j] < pitches[j+1])) { | |
993 // not a peak or a currently-playing note: skip it | |
994 continue; | |
995 } | |
996 } | |
997 | |
793 strengths.insert(ValueIndexMap::value_type(strength, j)); | 998 strengths.insert(ValueIndexMap::value_type(strength, j)); |
794 } | 999 } |
795 | 1000 |
796 ValueIndexMap::const_iterator si = strengths.end(); | 1001 ValueIndexMap::const_iterator si = strengths.end(); |
797 | 1002 |
798 map<int, double> active; | 1003 map<int, double> active; |
799 map<int, int> activeShifts; | 1004 map<int, int> activeShifts; |
800 | 1005 |
1006 int shiftCount = getShiftCount(); | |
1007 | |
801 while (int(active.size()) < pack.maxPolyphony && si != strengths.begin()) { | 1008 while (int(active.size()) < pack.maxPolyphony && si != strengths.begin()) { |
802 | 1009 |
803 --si; | 1010 --si; |
804 | 1011 |
805 double strength = si->first; | 1012 double strength = si->first; |
806 int j = si->second; | 1013 int j = si->second; |
807 | 1014 |
808 active[j] = strength; | 1015 active[j] = strength; |
809 | 1016 |
810 if (wantShifts) { | 1017 if (shiftCount > 1) { |
811 activeShifts[j] = bestShifts[j]; | 1018 activeShifts[j] = bestShifts[j]; |
812 } | 1019 } |
813 } | 1020 } |
814 | 1021 |
815 m_pianoRoll.push_back(active); | 1022 m_pianoRoll.push_back(active); |
816 | 1023 |
817 if (wantShifts) { | 1024 if (shiftCount > 1) { |
818 m_pianoRollShifts.push_back(activeShifts); | 1025 m_pianoRollShifts.push_back(activeShifts); |
819 } | 1026 } |
820 | 1027 |
821 return filtered; | 1028 return; |
822 } | 1029 } |
823 | 1030 |
824 Vamp::Plugin::FeatureList | 1031 Silvet::FeatureChunk |
825 Silvet::noteTrack(int shiftCount) | 1032 Silvet::noteTrack() |
826 { | 1033 { |
827 // Minimum duration pruning, and conversion to notes. We can only | 1034 // Minimum duration pruning, and conversion to notes. We can only |
828 // report notes that have just ended (i.e. that are absent in the | 1035 // report notes that have just ended (i.e. that are absent in the |
829 // latest active set but present in the prior set in the piano | 1036 // latest active set but present in the prior set in the piano |
830 // roll) -- any notes that ended earlier will have been reported | 1037 // roll) -- any notes that ended earlier will have been reported |
836 const map<int, double> &active = m_pianoRoll[width]; | 1043 const map<int, double> &active = m_pianoRoll[width]; |
837 | 1044 |
838 double columnDuration = 1.0 / m_colsPerSec; | 1045 double columnDuration = 1.0 / m_colsPerSec; |
839 | 1046 |
840 // only keep notes >= 100ms or thereabouts | 1047 // only keep notes >= 100ms or thereabouts |
841 int durationThreshold = floor(0.1 / columnDuration); // columns | 1048 double durationThrSec = 0.1; |
1049 int durationThreshold = floor(durationThrSec / columnDuration); // in cols | |
842 if (durationThreshold < 1) durationThreshold = 1; | 1050 if (durationThreshold < 1) durationThreshold = 1; |
843 | 1051 |
844 FeatureList noteFeatures; | 1052 FeatureList noteFeatures, onsetFeatures, onOffsetFeatures; |
845 | 1053 |
846 if (width < durationThreshold + 1) { | 1054 if (width < durationThreshold + 1) { |
847 return noteFeatures; | 1055 return { noteFeatures, onsetFeatures, onOffsetFeatures }; |
848 } | 1056 } |
849 | 1057 |
850 //!!! try: repeated note detection? (look for change in first derivative of the pitch matrix) | |
851 | |
852 for (map<int, double>::const_iterator ni = m_pianoRoll[width-1].begin(); | 1058 for (map<int, double>::const_iterator ni = m_pianoRoll[width-1].begin(); |
853 ni != m_pianoRoll[width-1].end(); ++ni) { | 1059 ni != m_pianoRoll[width-1].end(); ++ni) { |
854 | 1060 |
855 int note = ni->first; | 1061 int note = ni->first; |
856 | 1062 |
857 if (active.find(note) != active.end()) { | |
858 // the note is still playing | |
859 continue; | |
860 } | |
861 | |
862 // the note was playing but just ended | |
863 int end = width; | 1063 int end = width; |
864 int start = end-1; | 1064 int start = end-1; |
865 | 1065 |
866 while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) { | 1066 while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) { |
867 --start; | 1067 --start; |
868 } | 1068 } |
869 ++start; | 1069 ++start; |
870 | 1070 |
871 if ((end - start) < durationThreshold) { | 1071 int duration = end - start; |
1072 | |
1073 if (duration < durationThreshold) { | |
872 continue; | 1074 continue; |
873 } | 1075 } |
874 | 1076 |
875 emitNote(start, end, note, shiftCount, noteFeatures); | 1077 if (duration == durationThreshold) { |
1078 m_current.insert(note); | |
1079 emitOnset(start, note, onsetFeatures); | |
1080 emitOnset(start, note, onOffsetFeatures); | |
1081 } | |
1082 | |
1083 if (active.find(note) == active.end()) { | |
1084 // the note was playing but just ended | |
1085 m_current.erase(note); | |
1086 emitNote(start, end, note, noteFeatures); | |
1087 emitOffset(start, end, note, onOffsetFeatures); | |
1088 } else { // still playing | |
1089 // repeated note detection: if level is greater than this | |
1090 // multiple of its previous value, then we end the note and | |
1091 // restart it with the same pitch | |
1092 double restartFactor = 1.5; | |
1093 if (duration >= durationThreshold * 2 && | |
1094 (active.find(note)->second > | |
1095 restartFactor * m_pianoRoll[width-1][note])) { | |
1096 m_current.erase(note); | |
1097 emitNote(start, end-1, note, noteFeatures); | |
1098 emitOffset(start, end-1, note, onOffsetFeatures); | |
1099 // and remove this so that we start counting the new | |
1100 // note's duration from the current position | |
1101 m_pianoRoll[width-1].erase(note); | |
1102 } | |
1103 } | |
876 } | 1104 } |
877 | 1105 |
878 // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl; | 1106 // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl; |
879 | 1107 |
880 return noteFeatures; | 1108 return { noteFeatures, onsetFeatures, onOffsetFeatures }; |
881 } | 1109 } |
882 | 1110 |
883 void | 1111 void |
884 Silvet::emitNote(int start, int end, int note, int shiftCount, | 1112 Silvet::emitNote(int start, int end, int note, FeatureList ¬eFeatures) |
885 FeatureList ¬eFeatures) | |
886 { | 1113 { |
887 int partStart = start; | 1114 int partStart = start; |
888 int partShift = 0; | 1115 int partShift = 0; |
889 int partVelocity = 0; | 1116 double partStrength = 0; |
890 | 1117 |
891 int partThreshold = floor(0.05 * m_colsPerSec); | 1118 int partThreshold = floor(0.05 * m_colsPerSec); |
892 | 1119 |
893 for (int i = start; i != end; ++i) { | 1120 for (int i = start; i != end; ++i) { |
894 | 1121 |
895 double strength = m_pianoRoll[i][note]; | 1122 double strength = m_pianoRoll[i][note]; |
896 | 1123 |
897 int shift = 0; | 1124 int shift = 0; |
898 | 1125 |
899 if (shiftCount > 1) { | 1126 if (getShiftCount() > 1) { |
900 | 1127 |
901 shift = m_pianoRollShifts[i][note]; | 1128 shift = m_pianoRollShifts[i][note]; |
902 | 1129 |
903 if (i == partStart) { | 1130 if (i == partStart) { |
904 partShift = shift; | 1131 partShift = shift; |
911 // pitch has changed, emit an intermediate note | 1138 // pitch has changed, emit an intermediate note |
912 noteFeatures.push_back(makeNoteFeature(partStart, | 1139 noteFeatures.push_back(makeNoteFeature(partStart, |
913 i, | 1140 i, |
914 note, | 1141 note, |
915 partShift, | 1142 partShift, |
916 shiftCount, | 1143 partStrength)); |
917 partVelocity)); | |
918 partStart = i; | 1144 partStart = i; |
919 partShift = shift; | 1145 partShift = shift; |
920 partVelocity = 0; | 1146 partStrength = 0; |
921 } | 1147 } |
922 } | 1148 } |
923 | 1149 |
924 int v = round(strength * 2); | 1150 if (strength > partStrength) { |
925 if (v > partVelocity) { | 1151 partStrength = strength; |
926 partVelocity = v; | |
927 } | 1152 } |
928 } | 1153 } |
929 | 1154 |
930 if (end >= partStart + partThreshold) { | 1155 if (end >= partStart + partThreshold) { |
931 noteFeatures.push_back(makeNoteFeature(partStart, | 1156 noteFeatures.push_back(makeNoteFeature(partStart, |
932 end, | 1157 end, |
933 note, | 1158 note, |
934 partShift, | 1159 partShift, |
935 shiftCount, | 1160 partStrength)); |
936 partVelocity)); | 1161 } |
937 } | 1162 } |
1163 | |
1164 void | |
1165 Silvet::emitOnset(int start, int note, FeatureList &onOffsetFeatures) | |
1166 { | |
1167 int len = int(m_pianoRoll.size()); | |
1168 | |
1169 double onsetStrength = 0; | |
1170 | |
1171 int shift = 0; | |
1172 if (getShiftCount() > 1) { | |
1173 shift = m_pianoRollShifts[start][note]; | |
1174 } | |
1175 | |
1176 for (int i = start; i < len; ++i) { | |
1177 double strength = m_pianoRoll[i][note]; | |
1178 if (strength > onsetStrength) { | |
1179 onsetStrength = strength; | |
1180 } | |
1181 } | |
1182 | |
1183 if (onsetStrength == 0) return; | |
1184 | |
1185 onOffsetFeatures.push_back(makeOnsetFeature(start, | |
1186 note, | |
1187 shift, | |
1188 onsetStrength)); | |
1189 } | |
1190 | |
1191 void | |
1192 Silvet::emitOffset(int start, int end, int note, FeatureList &onOffsetFeatures) | |
1193 { | |
1194 int shift = 0; | |
1195 if (getShiftCount() > 1) { | |
1196 shift = m_pianoRollShifts[start][note]; | |
1197 } | |
1198 | |
1199 onOffsetFeatures.push_back(makeOffsetFeature(end, | |
1200 note, | |
1201 shift)); | |
938 } | 1202 } |
939 | 1203 |
940 RealTime | 1204 RealTime |
941 Silvet::getColumnTimestamp(int column) | 1205 Silvet::getColumnTimestamp(int column) |
942 { | 1206 { |
950 Silvet::Feature | 1214 Silvet::Feature |
951 Silvet::makeNoteFeature(int start, | 1215 Silvet::makeNoteFeature(int start, |
952 int end, | 1216 int end, |
953 int note, | 1217 int note, |
954 int shift, | 1218 int shift, |
955 int shiftCount, | 1219 double strength) |
956 int velocity) | |
957 { | 1220 { |
958 Feature f; | 1221 Feature f; |
959 | 1222 |
960 f.hasTimestamp = true; | 1223 f.hasTimestamp = true; |
961 f.timestamp = getColumnTimestamp(start); | 1224 f.timestamp = getColumnTimestamp(start); |
962 | 1225 |
963 f.hasDuration = true; | 1226 f.hasDuration = true; |
964 f.duration = getColumnTimestamp(end) - f.timestamp; | 1227 f.duration = getColumnTimestamp(end) - f.timestamp; |
965 | 1228 |
966 f.values.clear(); | 1229 f.values.clear(); |
967 | 1230 f.values.push_back(getNoteFrequency(note, shift)); |
968 f.values.push_back | 1231 f.values.push_back(getVelocityFor(strength, start)); |
969 (noteFrequency(note, shift, shiftCount)); | 1232 |
970 | 1233 f.label = getNoteName(note, shift); |
971 float inputGain = getInputGainAt(f.timestamp); | |
972 // cerr << "adjusting velocity from " << velocity << " to " << round(velocity/inputGain) << endl; | |
973 velocity = round(velocity / inputGain); | |
974 if (velocity > 127) velocity = 127; | |
975 if (velocity < 1) velocity = 1; | |
976 f.values.push_back(velocity); | |
977 | |
978 f.label = noteName(note, shift, shiftCount); | |
979 | 1234 |
980 return f; | 1235 return f; |
1236 } | |
1237 | |
1238 Silvet::Feature | |
1239 Silvet::makeOnsetFeature(int start, | |
1240 int note, | |
1241 int shift, | |
1242 double strength) | |
1243 { | |
1244 Feature f; | |
1245 | |
1246 f.hasTimestamp = true; | |
1247 f.timestamp = getColumnTimestamp(start); | |
1248 | |
1249 f.hasDuration = false; | |
1250 | |
1251 f.values.clear(); | |
1252 f.values.push_back(getNoteFrequency(note, shift)); | |
1253 f.values.push_back(getVelocityFor(strength, start)); | |
1254 | |
1255 f.label = getNoteName(note, shift); | |
1256 | |
1257 return f; | |
1258 } | |
1259 | |
1260 Silvet::Feature | |
1261 Silvet::makeOffsetFeature(int col, | |
1262 int note, | |
1263 int shift) | |
1264 { | |
1265 Feature f; | |
1266 | |
1267 f.hasTimestamp = true; | |
1268 f.timestamp = getColumnTimestamp(col); | |
1269 | |
1270 f.hasDuration = false; | |
1271 | |
1272 f.values.clear(); | |
1273 f.values.push_back(getNoteFrequency(note, shift)); | |
1274 f.values.push_back(0); // velocity 0 for offset | |
1275 | |
1276 f.label = getNoteName(note, shift) + " off"; | |
1277 | |
1278 return f; | |
1279 } | |
1280 | |
1281 int | |
1282 Silvet::getVelocityFor(double strength, int column) | |
1283 { | |
1284 RealTime rt = getColumnTimestamp(column + 1); | |
1285 | |
1286 float inputGain = getInputGainAt(rt); | |
1287 | |
1288 double scale = 2.0; | |
1289 if (m_mode == LiveMode) scale = 20.0; | |
1290 | |
1291 double velocity = round((strength * scale) / inputGain); | |
1292 | |
1293 if (velocity > 127.0) velocity = 127.0; | |
1294 if (velocity < 1.0) velocity = 1.0; // assume surpassed 0 threshold already | |
1295 | |
1296 return int(velocity); | |
981 } | 1297 } |
982 | 1298 |
983 float | 1299 float |
984 Silvet::getInputGainAt(RealTime t) | 1300 Silvet::getInputGainAt(RealTime t) |
985 { | 1301 { |