annotate src/Silvet.cpp @ 372:af71cbdab621 tip

Update bqvec code
author Chris Cannam
date Tue, 19 Nov 2019 10:13:32 +0000
parents 78ed74fa177b
children
rev   line source
Chris@31 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@31 2
Chris@31 3 /*
Chris@31 4 Silvet
Chris@31 5
Chris@31 6 A Vamp plugin for note transcription.
Chris@31 7 Centre for Digital Music, Queen Mary University of London.
Chris@31 8
Chris@31 9 This program is free software; you can redistribute it and/or
Chris@31 10 modify it under the terms of the GNU General Public License as
Chris@31 11 published by the Free Software Foundation; either version 2 of the
Chris@31 12 License, or (at your option) any later version. See the file
Chris@31 13 COPYING included with this distribution for more information.
Chris@31 14 */
Chris@31 15
Chris@31 16 #include "Silvet.h"
Chris@34 17 #include "EM.h"
Chris@31 18
Chris@152 19 #include <cq/CQSpectrogram.h>
Chris@31 20
Chris@152 21 #include "MedianFilter.h"
Chris@152 22 #include "constant-q-cpp/src/dsp/Resampler.h"
Chris@246 23 #include "flattendynamics-ladspa.h"
Chris@298 24 #include "LiveInstruments.h"
Chris@31 25
Chris@31 26 #include <vector>
Chris@31 27
Chris@32 28 #include <cstdio>
Chris@32 29
Chris@356 30 #if (defined(MAX_EM_THREADS) && (MAX_EM_THREADS > 1))
Chris@356 31 #include <future>
Chris@356 32 using std::future;
Chris@356 33 using std::async;
Chris@356 34 #endif
Chris@356 35
Chris@31 36 using std::vector;
Chris@48 37 using std::cout;
Chris@31 38 using std::cerr;
Chris@31 39 using std::endl;
Chris@311 40 using std::pair;
Chris@356 41
Chris@40 42 using Vamp::RealTime;
Chris@31 43
Chris@31 44 static int processingSampleRate = 44100;
Chris@298 45
Chris@298 46 static int binsPerSemitoneLive = 1;
Chris@298 47 static int binsPerSemitoneNormal = 5;
Chris@170 48
Chris@272 49 static int minInputSampleRate = 100;
Chris@272 50 static int maxInputSampleRate = 192000;
Chris@272 51
Chris@316 52 static const Silvet::ProcessingMode defaultMode = Silvet::HighQualityMode;
Chris@316 53
Chris@31 54 Silvet::Silvet(float inputSampleRate) :
Chris@31 55 Plugin(inputSampleRate),
Chris@161 56 m_instruments(InstrumentPack::listInstrumentPacks()),
Chris@298 57 m_liveInstruments(LiveAdapter::adaptAll(m_instruments)),
Chris@31 58 m_resampler(0),
Chris@246 59 m_flattener(0),
Chris@110 60 m_cq(0),
Chris@316 61 m_mode(defaultMode),
Chris@166 62 m_fineTuning(false),
Chris@178 63 m_instrument(0),
Chris@313 64 m_colsPerSec(50),
Chris@313 65 m_haveStartTime(false)
Chris@31 66 {
Chris@31 67 }
Chris@31 68
Chris@31 69 Silvet::~Silvet()
Chris@31 70 {
Chris@31 71 delete m_resampler;
Chris@246 72 delete m_flattener;
Chris@31 73 delete m_cq;
Chris@41 74 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
Chris@41 75 delete m_postFilter[i];
Chris@41 76 }
Chris@31 77 }
Chris@31 78
Chris@31 79 string
Chris@31 80 Silvet::getIdentifier() const
Chris@31 81 {
Chris@31 82 return "silvet";
Chris@31 83 }
Chris@31 84
Chris@31 85 string
Chris@31 86 Silvet::getName() const
Chris@31 87 {
Chris@31 88 return "Silvet Note Transcription";
Chris@31 89 }
Chris@31 90
Chris@31 91 string
Chris@31 92 Silvet::getDescription() const
Chris@31 93 {
Chris@191 94 return "Estimate the note onsets, pitches, and durations that make up a music recording.";
Chris@31 95 }
Chris@31 96
Chris@31 97 string
Chris@31 98 Silvet::getMaker() const
Chris@31 99 {
Chris@191 100 return "Queen Mary, University of London";
Chris@31 101 }
Chris@31 102
Chris@31 103 int
Chris@31 104 Silvet::getPluginVersion() const
Chris@31 105 {
Chris@309 106 return 3;
Chris@31 107 }
Chris@31 108
Chris@31 109 string
Chris@31 110 Silvet::getCopyright() const
Chris@31 111 {
Chris@191 112 return "Method by Emmanouil Benetos and Simon Dixon; plugin by Chris Cannam and Emmanouil Benetos. GPL licence.";
Chris@31 113 }
Chris@31 114
Chris@31 115 Silvet::InputDomain
Chris@31 116 Silvet::getInputDomain() const
Chris@31 117 {
Chris@31 118 return TimeDomain;
Chris@31 119 }
Chris@31 120
Chris@31 121 size_t
Chris@31 122 Silvet::getPreferredBlockSize() const
Chris@31 123 {
Chris@31 124 return 0;
Chris@31 125 }
Chris@31 126
Chris@31 127 size_t
Chris@31 128 Silvet::getPreferredStepSize() const
Chris@31 129 {
Chris@31 130 return 0;
Chris@31 131 }
Chris@31 132
Chris@31 133 size_t
Chris@31 134 Silvet::getMinChannelCount() const
Chris@31 135 {
Chris@31 136 return 1;
Chris@31 137 }
Chris@31 138
Chris@31 139 size_t
Chris@31 140 Silvet::getMaxChannelCount() const
Chris@31 141 {
Chris@31 142 return 1;
Chris@31 143 }
Chris@31 144
Chris@31 145 Silvet::ParameterList
Chris@31 146 Silvet::getParameterDescriptors() const
Chris@31 147 {
Chris@31 148 ParameterList list;
Chris@110 149
Chris@110 150 ParameterDescriptor desc;
Chris@110 151 desc.identifier = "mode";
Chris@110 152 desc.name = "Processing mode";
Chris@110 153 desc.unit = "";
Chris@341 154 desc.description = "Sets the tradeoff of processing speed against transcription quality. Live mode is much faster and detects notes with relatively low latency; Intensive mode (the default) is slower but will almost always produce better results.";
Chris@110 155 desc.minValue = 0;
Chris@344 156 desc.maxValue = 1;
Chris@316 157 desc.defaultValue = int(defaultMode);
Chris@110 158 desc.isQuantized = true;
Chris@110 159 desc.quantizeStep = 1;
Chris@341 160 desc.valueNames.push_back("Live (faster and lower latency)");
Chris@165 161 desc.valueNames.push_back("Intensive (higher quality)");
Chris@161 162 list.push_back(desc);
Chris@161 163
Chris@176 164 desc.identifier = "instrument";
Chris@176 165 desc.name = "Instrument";
Chris@161 166 desc.unit = "";
Chris@271 167 desc.description = "The instrument or instruments known to be present in the recording. This affects the set of instrument templates used, as well as the expected level of polyphony in the output. Using a more limited set of instruments than the default will also make the plugin run faster.\nNote that this plugin cannot isolate instruments: you can't use this setting to request notes from only one instrument in a recording with several. Instead, use this as a hint to the plugin about which instruments are actually present.";
Chris@161 168 desc.minValue = 0;
Chris@162 169 desc.maxValue = m_instruments.size()-1;
Chris@162 170 desc.defaultValue = 0;
Chris@161 171 desc.isQuantized = true;
Chris@161 172 desc.quantizeStep = 1;
Chris@161 173 desc.valueNames.clear();
Chris@162 174 for (int i = 0; i < int(m_instruments.size()); ++i) {
Chris@162 175 desc.valueNames.push_back(m_instruments[i].name);
Chris@162 176 }
Chris@166 177 list.push_back(desc);
Chris@161 178
Chris@166 179 desc.identifier = "finetune";
Chris@166 180 desc.name = "Return fine pitch estimates";
Chris@166 181 desc.unit = "";
Chris@271 182 desc.description = "Return pitch estimates at finer than semitone resolution. This works only in Intensive mode. Notes that appear to drift in pitch will be split up into shorter notes with individually finer pitches.";
Chris@166 183 desc.minValue = 0;
Chris@166 184 desc.maxValue = 1;
Chris@166 185 desc.defaultValue = 0;
Chris@166 186 desc.isQuantized = true;
Chris@166 187 desc.quantizeStep = 1;
Chris@166 188 desc.valueNames.clear();
Chris@110 189 list.push_back(desc);
Chris@110 190
Chris@31 191 return list;
Chris@31 192 }
Chris@31 193
Chris@31 194 float
Chris@31 195 Silvet::getParameter(string identifier) const
Chris@31 196 {
Chris@110 197 if (identifier == "mode") {
Chris@297 198 return (float)(int)m_mode;
Chris@166 199 } else if (identifier == "finetune") {
Chris@166 200 return m_fineTuning ? 1.f : 0.f;
Chris@176 201 } else if (identifier == "instrument") {
Chris@162 202 return m_instrument;
Chris@110 203 }
Chris@31 204 return 0;
Chris@31 205 }
Chris@31 206
Chris@31 207 void
Chris@31 208 Silvet::setParameter(string identifier, float value)
Chris@31 209 {
Chris@110 210 if (identifier == "mode") {
Chris@297 211 m_mode = (ProcessingMode)(int)(value + 0.5);
Chris@166 212 } else if (identifier == "finetune") {
Chris@166 213 m_fineTuning = (value > 0.5);
Chris@176 214 } else if (identifier == "instrument") {
Chris@162 215 m_instrument = lrintf(value);
Chris@110 216 }
Chris@31 217 }
Chris@31 218
Chris@31 219 Silvet::ProgramList
Chris@31 220 Silvet::getPrograms() const
Chris@31 221 {
Chris@31 222 ProgramList list;
Chris@31 223 return list;
Chris@31 224 }
Chris@31 225
Chris@31 226 string
Chris@31 227 Silvet::getCurrentProgram() const
Chris@31 228 {
Chris@31 229 return "";
Chris@31 230 }
Chris@31 231
Chris@31 232 void
Chris@31 233 Silvet::selectProgram(string name)
Chris@31 234 {
Chris@31 235 }
Chris@31 236
Chris@31 237 Silvet::OutputList
Chris@31 238 Silvet::getOutputDescriptors() const
Chris@31 239 {
Chris@31 240 OutputList list;
Chris@31 241
Chris@31 242 OutputDescriptor d;
Chris@51 243 d.identifier = "notes";
Chris@51 244 d.name = "Note transcription";
Chris@329 245 d.description = "Overall note transcription. Each note has time, duration, estimated fundamental frequency, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture.";
Chris@41 246 d.unit = "Hz";
Chris@31 247 d.hasFixedBinCount = true;
Chris@31 248 d.binCount = 2;
Chris@41 249 d.binNames.push_back("Frequency");
Chris@31 250 d.binNames.push_back("Velocity");
Chris@31 251 d.hasKnownExtents = false;
Chris@31 252 d.isQuantized = false;
Chris@31 253 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@246 254 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
Chris@31 255 d.hasDuration = true;
Chris@32 256 m_notesOutputNo = list.size();
Chris@32 257 list.push_back(d);
Chris@32 258
Chris@319 259 d.identifier = "onsets";
Chris@319 260 d.name = "Note onsets";
Chris@323 261 d.description = "Note onsets, without durations. These can be calculated sooner than complete notes, because it isn't necessary to wait for a note to finish before returning its feature. Each event has time, estimated fundamental frequency in Hz, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture.";
Chris@319 262 d.unit = "Hz";
Chris@319 263 d.hasFixedBinCount = true;
Chris@319 264 d.binCount = 2;
Chris@319 265 d.binNames.push_back("Frequency");
Chris@319 266 d.binNames.push_back("Velocity");
Chris@319 267 d.hasKnownExtents = false;
Chris@319 268 d.isQuantized = false;
Chris@319 269 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@319 270 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
Chris@319 271 d.hasDuration = false;
Chris@319 272 m_onsetsOutputNo = list.size();
Chris@319 273 list.push_back(d);
Chris@319 274
Chris@336 275 d.identifier = "onoffsets";
Chris@336 276 d.name = "Note onsets and offsets";
Chris@336 277 d.description = "Note onsets and offsets as separate events. Each onset event has time, estimated fundamental frequency in Hz, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture. Offsets are represented in the same way but with a velocity of 0.";
Chris@336 278 d.unit = "Hz";
Chris@336 279 d.hasFixedBinCount = true;
Chris@336 280 d.binCount = 2;
Chris@336 281 d.binNames.push_back("Frequency");
Chris@336 282 d.binNames.push_back("Velocity");
Chris@336 283 d.hasKnownExtents = false;
Chris@336 284 d.isQuantized = false;
Chris@336 285 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@336 286 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
Chris@336 287 d.hasDuration = false;
Chris@336 288 m_onOffsetsOutputNo = list.size();
Chris@336 289 list.push_back(d);
Chris@336 290
Chris@178 291 d.identifier = "timefreq";
Chris@178 292 d.name = "Time-frequency distribution";
Chris@271 293 d.description = "Filtered constant-Q time-frequency distribution as used as input to the expectation-maximisation algorithm.";
Chris@178 294 d.unit = "";
Chris@178 295 d.hasFixedBinCount = true;
Chris@298 296 d.binCount = getPack(0).templateHeight;
Chris@178 297 d.binNames.clear();
Chris@178 298 if (m_cq) {
Chris@294 299 char name[50];
Chris@298 300 for (int i = 0; i < getPack(0).templateHeight; ++i) {
Chris@178 301 // We have a 600-bin (10 oct 60-bin CQ) of which the
Chris@178 302 // lowest-frequency 55 bins have been dropped, for a
Chris@178 303 // 545-bin template. The native CQ bins go high->low
Chris@178 304 // frequency though, so these are still the first 545 bins
Chris@178 305 // as reported by getBinFrequency, though in reverse order
Chris@178 306 float freq = m_cq->getBinFrequency
Chris@298 307 (getPack(0).templateHeight - i - 1);
Chris@178 308 sprintf(name, "%.1f Hz", freq);
Chris@178 309 d.binNames.push_back(name);
Chris@178 310 }
Chris@178 311 }
Chris@178 312 d.hasKnownExtents = false;
Chris@178 313 d.isQuantized = false;
Chris@178 314 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@178 315 d.sampleRate = m_colsPerSec;
Chris@178 316 d.hasDuration = false;
Chris@178 317 m_fcqOutputNo = list.size();
Chris@178 318 list.push_back(d);
Chris@178 319
Chris@294 320 d.identifier = "pitchactivation";
Chris@294 321 d.name = "Pitch activation distribution";
Chris@294 322 d.description = "Pitch activation distribution resulting from expectation-maximisation algorithm, prior to note extraction.";
Chris@294 323 d.unit = "";
Chris@294 324 d.hasFixedBinCount = true;
Chris@298 325 d.binCount = getPack(0).templateNoteCount;
Chris@294 326 d.binNames.clear();
Chris@294 327 if (m_cq) {
Chris@298 328 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
Chris@336 329 d.binNames.push_back(getNoteName(i, 0));
Chris@294 330 }
Chris@294 331 }
Chris@294 332 d.hasKnownExtents = false;
Chris@294 333 d.isQuantized = false;
Chris@294 334 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@294 335 d.sampleRate = m_colsPerSec;
Chris@294 336 d.hasDuration = false;
Chris@294 337 m_pitchOutputNo = list.size();
Chris@294 338 list.push_back(d);
Chris@294 339
Chris@309 340 d.identifier = "chroma";
Chris@309 341 d.name = "Pitch chroma distribution";
Chris@309 342 d.description = "Pitch chroma distribution formed by wrapping the un-thresholded pitch activation distribution into a single octave of semitone bins.";
Chris@309 343 d.unit = "";
Chris@309 344 d.hasFixedBinCount = true;
Chris@309 345 d.binCount = 12;
Chris@309 346 d.binNames.clear();
Chris@309 347 if (m_cq) {
Chris@309 348 for (int i = 0; i < 12; ++i) {
Chris@320 349 d.binNames.push_back(getChromaName(i));
Chris@309 350 }
Chris@309 351 }
Chris@309 352 d.hasKnownExtents = false;
Chris@309 353 d.isQuantized = false;
Chris@309 354 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@309 355 d.sampleRate = m_colsPerSec;
Chris@309 356 d.hasDuration = false;
Chris@309 357 m_chromaOutputNo = list.size();
Chris@309 358 list.push_back(d);
Chris@309 359
Chris@302 360 d.identifier = "templates";
Chris@302 361 d.name = "Templates";
Chris@302 362 d.description = "Constant-Q spectral templates for the selected instrument pack.";
Chris@302 363 d.unit = "";
Chris@302 364 d.hasFixedBinCount = true;
Chris@302 365 d.binCount = getPack(0).templateHeight;
Chris@302 366 d.binNames.clear();
Chris@302 367 if (m_cq) {
Chris@302 368 char name[50];
Chris@302 369 for (int i = 0; i < getPack(0).templateHeight; ++i) {
Chris@302 370 // We have a 600-bin (10 oct 60-bin CQ) of which the
Chris@302 371 // lowest-frequency 55 bins have been dropped, for a
Chris@302 372 // 545-bin template. The native CQ bins go high->low
Chris@302 373 // frequency though, so these are still the first 545 bins
Chris@302 374 // as reported by getBinFrequency, though in reverse order
Chris@302 375 float freq = m_cq->getBinFrequency
Chris@302 376 (getPack(0).templateHeight - i - 1);
Chris@302 377 sprintf(name, "%.1f Hz", freq);
Chris@302 378 d.binNames.push_back(name);
Chris@302 379 }
Chris@302 380 }
Chris@302 381 d.hasKnownExtents = false;
Chris@302 382 d.isQuantized = false;
Chris@302 383 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@302 384 d.sampleRate = m_colsPerSec;
Chris@302 385 d.hasDuration = false;
Chris@302 386 m_templateOutputNo = list.size();
Chris@302 387 list.push_back(d);
Chris@302 388
Chris@31 389 return list;
Chris@31 390 }
Chris@31 391
Chris@38 392 std::string
Chris@320 393 Silvet::getChromaName(int pitch) const
Chris@38 394 {
Chris@38 395 static const char *names[] = {
Chris@38 396 "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"
Chris@38 397 };
Chris@38 398
Chris@309 399 return names[pitch];
Chris@309 400 }
Chris@309 401
Chris@309 402 std::string
Chris@336 403 Silvet::getNoteName(int note, int shift) const
Chris@309 404 {
Chris@320 405 string n = getChromaName(note % 12);
Chris@38 406
Chris@175 407 int oct = (note + 9) / 12;
Chris@38 408
Chris@175 409 char buf[30];
Chris@175 410
Chris@175 411 float pshift = 0.f;
Chris@336 412 int shiftCount = getShiftCount();
Chris@175 413 if (shiftCount > 1) {
Chris@320 414 // see getNoteFrequency below
Chris@175 415 pshift =
Chris@175 416 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
Chris@175 417 }
Chris@175 418
Chris@175 419 if (pshift > 0.f) {
Chris@309 420 sprintf(buf, "%s%d+%dc", n.c_str(), oct, int(round(pshift * 100)));
Chris@175 421 } else if (pshift < 0.f) {
Chris@309 422 sprintf(buf, "%s%d-%dc", n.c_str(), oct, int(round((-pshift) * 100)));
Chris@175 423 } else {
Chris@309 424 sprintf(buf, "%s%d", n.c_str(), oct);
Chris@175 425 }
Chris@38 426
Chris@38 427 return buf;
Chris@38 428 }
Chris@38 429
Chris@41 430 float
Chris@336 431 Silvet::getNoteFrequency(int note, int shift) const
Chris@41 432 {
Chris@169 433 // Convert shift number to a pitch shift. The given shift number
Chris@169 434 // is an offset into the template array, which starts with some
Chris@169 435 // zeros, followed by the template, then some trailing zeros.
Chris@169 436 //
Chris@169 437 // Example: if we have templateMaxShift == 2 and thus shiftCount
Chris@169 438 // == 5, then the number will be in the range 0-4 and the template
Chris@169 439 // will have 2 zeros at either end. Thus number 2 represents the
Chris@169 440 // template "as recorded", for a pitch shift of 0; smaller indices
Chris@169 441 // represent moving the template *up* in pitch (by introducing
Chris@169 442 // zeros at the start, which is the low-frequency end), for a
Chris@169 443 // positive pitch shift; and higher values represent moving it
Chris@169 444 // down in pitch, for a negative pitch shift.
Chris@169 445
Chris@175 446 float pshift = 0.f;
Chris@336 447 int shiftCount = getShiftCount();
Chris@175 448 if (shiftCount > 1) {
Chris@175 449 pshift =
Chris@175 450 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
Chris@175 451 }
Chris@169 452
Chris@301 453 float freq = float(27.5 * pow(2.0, (note + pshift) / 12.0));
Chris@301 454
Chris@303 455 // cerr << "note = " << note << ", shift = " << shift << ", shiftCount = "
Chris@303 456 // << shiftCount << ", obtained freq = " << freq << endl;
Chris@301 457
Chris@301 458 return freq;
Chris@41 459 }
Chris@41 460
Chris@31 461 bool
Chris@31 462 Silvet::initialise(size_t channels, size_t stepSize, size_t blockSize)
Chris@31 463 {
Chris@272 464 if (m_inputSampleRate < minInputSampleRate ||
Chris@272 465 m_inputSampleRate > maxInputSampleRate) {
Chris@272 466 cerr << "Silvet::initialise: Unsupported input sample rate "
Chris@272 467 << m_inputSampleRate << " (supported min " << minInputSampleRate
Chris@272 468 << ", max " << maxInputSampleRate << ")" << endl;
Chris@272 469 return false;
Chris@272 470 }
Chris@272 471
Chris@31 472 if (channels < getMinChannelCount() ||
Chris@272 473 channels > getMaxChannelCount()) {
Chris@272 474 cerr << "Silvet::initialise: Unsupported channel count " << channels
Chris@272 475 << " (supported min " << getMinChannelCount() << ", max "
Chris@272 476 << getMaxChannelCount() << ")" << endl;
Chris@272 477 return false;
Chris@272 478 }
Chris@31 479
Chris@31 480 if (stepSize != blockSize) {
Chris@31 481 cerr << "Silvet::initialise: Step size must be the same as block size ("
Chris@31 482 << stepSize << " != " << blockSize << ")" << endl;
Chris@31 483 return false;
Chris@31 484 }
Chris@31 485
Chris@31 486 m_blockSize = blockSize;
Chris@31 487
Chris@31 488 reset();
Chris@31 489
Chris@31 490 return true;
Chris@31 491 }
Chris@31 492
Chris@31 493 void
Chris@31 494 Silvet::reset()
Chris@31 495 {
Chris@31 496 delete m_resampler;
Chris@246 497 delete m_flattener;
Chris@31 498 delete m_cq;
Chris@31 499
Chris@31 500 if (m_inputSampleRate != processingSampleRate) {
Chris@31 501 m_resampler = new Resampler(m_inputSampleRate, processingSampleRate);
Chris@31 502 } else {
Chris@31 503 m_resampler = 0;
Chris@31 504 }
Chris@31 505
Chris@246 506 m_flattener = new FlattenDynamics(m_inputSampleRate); // before resampling
Chris@246 507 m_flattener->reset();
Chris@246 508
Chris@301 509 // this happens to be processingSampleRate / 3, and is the top
Chris@301 510 // freq used for the EM templates:
Chris@301 511 double maxFreq = 14700;
Chris@301 512
Chris@301 513 if (m_mode == LiveMode) {
Chris@301 514 // We only have 12 bpo rather than 60, so we need the top bin
Chris@301 515 // to be the middle one of the top 5, i.e. 2/5 of a semitone
Chris@301 516 // lower than 14700
Chris@301 517 maxFreq *= powf(2.0, -1.0 / 30.0);
Chris@301 518 }
Chris@301 519
Chris@173 520 double minFreq = 27.5;
Chris@173 521
Chris@341 522 if (m_mode == LiveMode) {
Chris@173 523 // We don't actually return any notes from the bottom octave,
Chris@173 524 // so we can just pad with zeros
Chris@173 525 minFreq *= 2;
Chris@173 526 }
Chris@173 527
Chris@298 528 int bpo = 12 *
Chris@298 529 (m_mode == LiveMode ? binsPerSemitoneLive : binsPerSemitoneNormal);
Chris@301 530
Chris@154 531 CQParameters params(processingSampleRate,
Chris@173 532 minFreq,
Chris@303 533 maxFreq,
Chris@298 534 bpo);
Chris@154 535
Chris@325 536 params.q = 0.8;
Chris@325 537 params.atomHopFactor = (m_mode == LiveMode ? 1.0 : 0.3);
Chris@154 538 params.threshold = 0.0005;
Chris@317 539 params.decimator =
Chris@317 540 (m_mode == LiveMode ?
Chris@317 541 CQParameters::FasterDecimator : CQParameters::BetterDecimator);
Chris@172 542 params.window = CQParameters::Hann;
Chris@154 543
Chris@154 544 m_cq = new CQSpectrogram(params, CQSpectrogram::InterpolateLinear);
Chris@31 545
Chris@303 546 // cerr << "CQ bins = " << m_cq->getTotalBins() << endl;
Chris@303 547 // cerr << "CQ min freq = " << m_cq->getMinFrequency() << " (and for confirmation, freq of bin 0 = " << m_cq->getBinFrequency(0) << ")" << endl;
Chris@297 548
Chris@341 549 m_colsPerSec = 50;
Chris@165 550
Chris@41 551 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
Chris@41 552 delete m_postFilter[i];
Chris@41 553 }
Chris@41 554 m_postFilter.clear();
Chris@303 555 int postFilterLength = 3;
Chris@298 556 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
Chris@303 557 m_postFilter.push_back(new MedianFilter<double>(postFilterLength));
Chris@41 558 }
Chris@41 559 m_pianoRoll.clear();
Chris@246 560 m_inputGains.clear();
Chris@32 561 m_columnCount = 0;
Chris@272 562 m_resampledCount = 0;
Chris@40 563 m_startTime = RealTime::zeroTime;
Chris@313 564 m_haveStartTime = false;
Chris@31 565 }
Chris@31 566
Chris@31 567 Silvet::FeatureSet
Chris@31 568 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
Chris@31 569 {
Chris@302 570 FeatureSet fs;
Chris@302 571
Chris@313 572 if (!m_haveStartTime) {
Chris@314 573
Chris@40 574 m_startTime = timestamp;
Chris@313 575 m_haveStartTime = true;
Chris@314 576
Chris@302 577 insertTemplateFeatures(fs);
Chris@40 578 }
Chris@246 579
Chris@246 580 vector<float> flattened(m_blockSize);
Chris@246 581 float gain = 1.f;
Chris@246 582 m_flattener->connectInputPort
Chris@246 583 (FlattenDynamics::AudioInputPort, inputBuffers[0]);
Chris@246 584 m_flattener->connectOutputPort
Chris@246 585 (FlattenDynamics::AudioOutputPort, &flattened[0]);
Chris@246 586 m_flattener->connectOutputPort
Chris@246 587 (FlattenDynamics::GainOutputPort, &gain);
Chris@246 588 m_flattener->process(m_blockSize);
Chris@246 589
Chris@252 590 m_inputGains[timestamp] = gain;
Chris@40 591
Chris@31 592 vector<double> data;
Chris@40 593 for (int i = 0; i < m_blockSize; ++i) {
Chris@246 594 double d = flattened[i];
Chris@235 595 data.push_back(d);
Chris@40 596 }
Chris@31 597
Chris@31 598 if (m_resampler) {
Chris@272 599
Chris@31 600 data = m_resampler->process(data.data(), data.size());
Chris@272 601
Chris@272 602 int hadCount = m_resampledCount;
Chris@272 603 m_resampledCount += data.size();
Chris@272 604
Chris@272 605 int resamplerLatency = m_resampler->getLatency();
Chris@272 606
Chris@272 607 if (hadCount < resamplerLatency) {
Chris@272 608 int stillToDrop = resamplerLatency - hadCount;
Chris@272 609 if (stillToDrop >= int(data.size())) {
Chris@302 610 return fs;
Chris@272 611 } else {
Chris@272 612 data = vector<double>(data.begin() + stillToDrop, data.end());
Chris@272 613 }
Chris@272 614 }
Chris@31 615 }
Chris@272 616
Chris@32 617 Grid cqout = m_cq->process(data);
Chris@302 618 transcribe(cqout, fs);
Chris@51 619 return fs;
Chris@34 620 }
Chris@34 621
Chris@34 622 Silvet::FeatureSet
Chris@34 623 Silvet::getRemainingFeatures()
Chris@34 624 {
Chris@145 625 Grid cqout = m_cq->getRemainingOutput();
Chris@302 626 FeatureSet fs;
Chris@336 627
Chris@302 628 if (m_columnCount == 0) {
Chris@302 629 // process() was never called, but we still want these
Chris@302 630 insertTemplateFeatures(fs);
Chris@302 631 } else {
Chris@336 632
Chris@336 633 // Complete the transcription
Chris@336 634
Chris@302 635 transcribe(cqout, fs);
Chris@336 636
Chris@336 637 // And make sure any extant playing notes are finished and returned
Chris@336 638
Chris@336 639 m_pianoRoll.push_back({});
Chris@336 640
Chris@336 641 auto events = noteTrack();
Chris@336 642
Chris@336 643 for (const auto &f : events.notes) {
Chris@336 644 fs[m_notesOutputNo].push_back(f);
Chris@336 645 }
Chris@336 646
Chris@336 647 for (const auto &f : events.onsets) {
Chris@336 648 fs[m_onsetsOutputNo].push_back(f);
Chris@336 649 }
Chris@336 650
Chris@336 651 for (const auto &f : events.onOffsets) {
Chris@336 652 fs[m_onOffsetsOutputNo].push_back(f);
Chris@336 653 }
Chris@302 654 }
Chris@336 655
Chris@51 656 return fs;
Chris@34 657 }
Chris@34 658
Chris@302 659 void
Chris@302 660 Silvet::insertTemplateFeatures(FeatureSet &fs)
Chris@302 661 {
Chris@302 662 const InstrumentPack &pack = getPack(m_instrument);
Chris@302 663 for (int i = 0; i < int(pack.templates.size()) * pack.templateNoteCount; ++i) {
Chris@302 664 RealTime timestamp = RealTime::fromSeconds(double(i) / m_colsPerSec);
Chris@302 665 Feature f;
Chris@302 666 char buffer[50];
Chris@302 667 sprintf(buffer, "Note %d", i + 1);
Chris@302 668 f.label = buffer;
Chris@302 669 f.hasTimestamp = true;
Chris@302 670 f.timestamp = timestamp;
Chris@302 671 f.values = pack.templates[i / pack.templateNoteCount]
Chris@302 672 .data[i % pack.templateNoteCount];
Chris@302 673 fs[m_templateOutputNo].push_back(f);
Chris@302 674 }
Chris@302 675 }
Chris@302 676
Chris@336 677 int
Chris@336 678 Silvet::getShiftCount() const
Chris@336 679 {
Chris@336 680 bool wantShifts = (m_mode == HighQualityMode) && m_fineTuning;
Chris@336 681 int shiftCount = 1;
Chris@336 682 if (wantShifts) {
Chris@336 683 const InstrumentPack &pack(getPack(m_instrument));
Chris@336 684 shiftCount = pack.templateMaxShift * 2 + 1;
Chris@336 685 }
Chris@336 686 return shiftCount;
Chris@336 687 }
Chris@336 688
Chris@302 689 void
Chris@302 690 Silvet::transcribe(const Grid &cqout, Silvet::FeatureSet &fs)
Chris@34 691 {
Chris@32 692 Grid filtered = preProcess(cqout);
Chris@31 693
Chris@302 694 if (filtered.empty()) return;
Chris@170 695
Chris@298 696 const InstrumentPack &pack(getPack(m_instrument));
Chris@104 697
Chris@325 698 int width = filtered.size();
Chris@325 699
Chris@325 700 double silenceThreshold = 0.01;
Chris@325 701
Chris@325 702 for (int i = 0; i < width; ++i) {
Chris@325 703
Chris@325 704 RealTime timestamp = getColumnTimestamp(m_pianoRoll.size() - 1 + i);
Chris@325 705 float inputGain = getInputGainAt(timestamp);
Chris@325 706
Chris@178 707 Feature f;
Chris@325 708 double rms = 0.0;
Chris@325 709
Chris@178 710 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@325 711 double v = filtered[i][j];
Chris@325 712 rms += v * v;
Chris@325 713 f.values.push_back(float(v));
Chris@178 714 }
Chris@325 715
Chris@325 716 rms = sqrt(rms / pack.templateHeight);
Chris@325 717 if (rms / inputGain < silenceThreshold) {
Chris@325 718 filtered[i].clear();
Chris@325 719 }
Chris@325 720
Chris@178 721 fs[m_fcqOutputNo].push_back(f);
Chris@178 722 }
Chris@325 723
Chris@311 724 Grid localPitches(width);
Chris@170 725
Chris@336 726 int shiftCount = getShiftCount();
Chris@336 727 bool wantShifts = (shiftCount > 1);
Chris@170 728
Chris@170 729 vector<vector<int> > localBestShifts;
Chris@170 730 if (wantShifts) {
Chris@311 731 localBestShifts = vector<vector<int> >(width);
Chris@170 732 }
Chris@170 733
Chris@356 734 int emThreadCount = 1;
Chris@356 735
Chris@356 736 #if (defined(MAX_EM_THREADS) && (MAX_EM_THREADS > 1))
Chris@356 737 emThreadCount = MAX_EM_THREADS;
Chris@356 738
Chris@352 739 if (emThreadCount > int(std::thread::hardware_concurrency())) {
Chris@352 740 emThreadCount = std::thread::hardware_concurrency();
Chris@352 741 }
Chris@317 742 if (m_mode == LiveMode && pack.templates.size() == 1) {
Chris@317 743 // The EM step is probably not slow enough to merit it
Chris@317 744 emThreadCount = 1;
Chris@317 745 }
Chris@317 746
Chris@317 747 if (emThreadCount > 1) {
Chris@317 748 for (int i = 0; i < width; ) {
Chris@317 749 typedef future<pair<vector<double>, vector<int>>> EMFuture;
Chris@317 750 vector<EMFuture> results;
Chris@317 751 for (int j = 0; j < emThreadCount && i + j < width; ++j) {
Chris@352 752 const vector<double> &column = filtered.at(i + j);
Chris@317 753 results.push_back
Chris@317 754 (async(std::launch::async,
Chris@352 755 [&]() { return applyEM(pack, column); }));
Chris@317 756 }
Chris@317 757 for (int j = 0; j < emThreadCount && i + j < width; ++j) {
Chris@317 758 auto out = results[j].get();
Chris@317 759 localPitches[i+j] = out.first;
Chris@317 760 if (wantShifts) localBestShifts[i+j] = out.second;
Chris@317 761 }
Chris@317 762 i += emThreadCount;
Chris@312 763 }
Chris@123 764 }
Chris@312 765 #endif
Chris@317 766
Chris@317 767 if (emThreadCount == 1) {
Chris@317 768 for (int i = 0; i < width; ++i) {
Chris@336 769 auto out = applyEM(pack, filtered.at(i));
Chris@317 770 localPitches[i] = out.first;
Chris@317 771 if (wantShifts) localBestShifts[i] = out.second;
Chris@317 772 }
Chris@317 773 }
Chris@305 774
Chris@166 775 for (int i = 0; i < width; ++i) {
Chris@37 776
Chris@321 777 vector<double> filtered;
Chris@321 778
Chris@321 779 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@321 780 m_postFilter[j]->push(localPitches[i][j]);
Chris@321 781 filtered.push_back(m_postFilter[j]->get());
Chris@321 782 }
Chris@294 783
Chris@309 784 RealTime timestamp = getColumnTimestamp(m_pianoRoll.size() - 1);
Chris@309 785 float inputGain = getInputGainAt(timestamp);
Chris@309 786
Chris@294 787 Feature f;
Chris@294 788 for (int j = 0; j < (int)filtered.size(); ++j) {
Chris@309 789 float v = filtered[j];
Chris@294 790 if (v < pack.levelThreshold) v = 0.f;
Chris@309 791 f.values.push_back(v / inputGain);
Chris@294 792 }
Chris@294 793 fs[m_pitchOutputNo].push_back(f);
Chris@309 794
Chris@309 795 f.values.clear();
Chris@309 796 f.values.resize(12);
Chris@309 797 for (int j = 0; j < (int)filtered.size(); ++j) {
Chris@309 798 f.values[j % 12] += filtered[j] / inputGain;
Chris@309 799 }
Chris@309 800 fs[m_chromaOutputNo].push_back(f);
Chris@38 801
Chris@321 802 // This pushes the up-to-max-polyphony activation column to
Chris@321 803 // m_pianoRoll
Chris@336 804 postProcess(filtered, localBestShifts[i]);
Chris@321 805
Chris@336 806 auto events = noteTrack();
Chris@319 807
Chris@336 808 for (const auto &f : events.notes) {
Chris@336 809 fs[m_notesOutputNo].push_back(f);
Chris@40 810 }
Chris@319 811
Chris@336 812 for (const auto &f : events.onsets) {
Chris@336 813 fs[m_onsetsOutputNo].push_back(f);
Chris@336 814 }
Chris@336 815
Chris@336 816 for (const auto &f : events.onOffsets) {
Chris@336 817 fs[m_onOffsetsOutputNo].push_back(f);
Chris@319 818 }
Chris@34 819 }
Chris@31 820 }
Chris@31 821
Chris@311 822 pair<vector<double>, vector<int> >
Chris@311 823 Silvet::applyEM(const InstrumentPack &pack,
Chris@336 824 const vector<double> &column)
Chris@311 825 {
Chris@311 826 double columnThreshold = 1e-5;
Chris@311 827
Chris@314 828 if (m_mode == LiveMode) {
Chris@325 829 columnThreshold /= 15;
Chris@314 830 }
Chris@314 831
Chris@311 832 vector<double> pitches(pack.templateNoteCount, 0.0);
Chris@311 833 vector<int> bestShifts;
Chris@325 834
Chris@325 835 if (column.empty()) return { pitches, bestShifts };
Chris@311 836
Chris@311 837 double sum = 0.0;
Chris@311 838 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@311 839 sum += column.at(j);
Chris@311 840 }
Chris@311 841 if (sum < columnThreshold) return { pitches, bestShifts };
Chris@311 842
Chris@314 843 EM em(&pack, m_mode == HighQualityMode);
Chris@311 844
Chris@311 845 em.setPitchSparsity(pack.pitchSparsity);
Chris@311 846 em.setSourceSparsity(pack.sourceSparsity);
Chris@311 847
Chris@314 848 int iterations = (m_mode == HighQualityMode ? 20 : 10);
Chris@311 849
Chris@311 850 for (int j = 0; j < iterations; ++j) {
Chris@311 851 em.iterate(column.data());
Chris@311 852 }
Chris@311 853
Chris@311 854 const float *pitchDist = em.getPitchDistribution();
Chris@311 855 const float *const *shiftDist = em.getShifts();
Chris@311 856
Chris@336 857 int shiftCount = getShiftCount();
Chris@311 858
Chris@311 859 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@311 860
Chris@311 861 pitches[j] = pitchDist[j] * sum;
Chris@311 862
Chris@311 863 int bestShift = 0;
Chris@311 864 float bestShiftValue = 0.0;
Chris@336 865 if (shiftCount > 1) {
Chris@311 866 for (int k = 0; k < shiftCount; ++k) {
Chris@311 867 float value = shiftDist[k][j];
Chris@311 868 if (k == 0 || value > bestShiftValue) {
Chris@311 869 bestShiftValue = value;
Chris@311 870 bestShift = k;
Chris@311 871 }
Chris@311 872 }
Chris@311 873 bestShifts.push_back(bestShift);
Chris@311 874 }
Chris@311 875 }
Chris@311 876
Chris@311 877 return { pitches, bestShifts };
Chris@311 878 }
Chris@311 879
Chris@32 880 Silvet::Grid
Chris@32 881 Silvet::preProcess(const Grid &in)
Chris@32 882 {
Chris@32 883 int width = in.size();
Chris@32 884
Chris@165 885 int spacing = processingSampleRate / m_colsPerSec;
Chris@32 886
Chris@165 887 // need to be careful that col spacing is an integer number of samples!
Chris@165 888 assert(spacing * m_colsPerSec == processingSampleRate);
Chris@32 889
Chris@32 890 Grid out;
Chris@32 891
Chris@58 892 // We count the CQ latency in terms of processing hops, but
Chris@58 893 // actually it probably isn't an exact number of hops so this
Chris@58 894 // isn't quite accurate. But the small constant offset is
Chris@165 895 // practically irrelevant compared to the jitter from the frame
Chris@165 896 // size we reduce to in a moment
Chris@33 897 int latentColumns = m_cq->getLatency() / m_cq->getColumnHop();
Chris@33 898
Chris@298 899 const InstrumentPack &pack(getPack(m_instrument));
Chris@176 900
Chris@32 901 for (int i = 0; i < width; ++i) {
Chris@32 902
Chris@33 903 if (m_columnCount < latentColumns) {
Chris@33 904 ++m_columnCount;
Chris@33 905 continue;
Chris@33 906 }
Chris@33 907
Chris@32 908 int prevSampleNo = (m_columnCount - 1) * m_cq->getColumnHop();
Chris@32 909 int sampleNo = m_columnCount * m_cq->getColumnHop();
Chris@32 910
Chris@32 911 bool select = (sampleNo / spacing != prevSampleNo / spacing);
Chris@32 912
Chris@32 913 if (select) {
Chris@32 914 vector<double> inCol = in[i];
Chris@176 915 vector<double> outCol(pack.templateHeight);
Chris@32 916
Chris@178 917 // In HQ mode, the CQ returns 600 bins and we ignore the
Chris@298 918 // lowest 55 of them (assuming binsPerSemitone == 5).
Chris@178 919 //
Chris@341 920 // In live mode the CQ is an octave shorter, returning 540
Chris@341 921 // bins or equivalent, so we instead pad them with an
Chris@341 922 // additional 5 or equivalent zeros.
Chris@178 923 //
Chris@178 924 // We also need to reverse the column as we go, since the
Chris@178 925 // raw CQ has the high frequencies first and we need it
Chris@178 926 // the other way around.
Chris@32 927
Chris@298 928 int bps = (m_mode == LiveMode ?
Chris@298 929 binsPerSemitoneLive : binsPerSemitoneNormal);
Chris@298 930
Chris@297 931 if (m_mode == HighQualityMode) {
Chris@178 932 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@298 933 int ix = inCol.size() - j - (11 * bps);
Chris@178 934 outCol[j] = inCol[ix];
Chris@178 935 }
Chris@178 936 } else {
Chris@298 937 for (int j = 0; j < bps; ++j) {
Chris@178 938 outCol[j] = 0.0;
Chris@178 939 }
Chris@298 940 for (int j = bps; j < pack.templateHeight; ++j) {
Chris@298 941 int ix = inCol.size() - j + (bps-1);
Chris@178 942 outCol[j] = inCol[ix];
Chris@178 943 }
Chris@46 944 }
Chris@32 945
Chris@46 946 vector<double> noiseLevel1 =
Chris@298 947 MedianFilter<double>::filter(8 * bps, outCol);
Chris@176 948 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@46 949 noiseLevel1[j] = std::min(outCol[j], noiseLevel1[j]);
Chris@46 950 }
Chris@32 951
Chris@46 952 vector<double> noiseLevel2 =
Chris@298 953 MedianFilter<double>::filter(8 * bps, noiseLevel1);
Chris@176 954 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@46 955 outCol[j] = std::max(outCol[j] - noiseLevel2[j], 0.0);
Chris@32 956 }
Chris@32 957
Chris@165 958 out.push_back(outCol);
Chris@32 959 }
Chris@32 960
Chris@32 961 ++m_columnCount;
Chris@32 962 }
Chris@32 963
Chris@32 964 return out;
Chris@32 965 }
Chris@32 966
Chris@321 967 void
Chris@170 968 Silvet::postProcess(const vector<double> &pitches,
Chris@336 969 const vector<int> &bestShifts)
Chris@166 970 {
Chris@298 971 const InstrumentPack &pack(getPack(m_instrument));
Chris@176 972
Chris@41 973 // Threshold for level and reduce number of candidate pitches
Chris@41 974
Chris@41 975 typedef std::multimap<double, int> ValueIndexMap;
Chris@41 976
Chris@41 977 ValueIndexMap strengths;
Chris@166 978
Chris@176 979 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@321 980
Chris@321 981 double strength = pitches[j];
Chris@183 982 if (strength < pack.levelThreshold) continue;
Chris@321 983
Chris@321 984 // In live mode with only a 12-bpo CQ, we are very likely to
Chris@321 985 // get clusters of two or three high scores at a time for
Chris@321 986 // neighbouring semitones. Eliminate these by picking only the
Chris@325 987 // peaks (except that we never eliminate a note that has
Chris@325 988 // already been established as currently playing). This means
Chris@325 989 // we can't recognise actual semitone chords if they ever
Chris@325 990 // appear, but it's not as if live mode is good enough for
Chris@325 991 // that to be a big deal anyway.
Chris@321 992 if (m_mode == LiveMode) {
Chris@325 993 if (m_current.find(j) == m_current.end() &&
Chris@325 994 (j == 0 ||
Chris@325 995 j + 1 == pack.templateNoteCount ||
Chris@325 996 pitches[j] < pitches[j-1] ||
Chris@325 997 pitches[j] < pitches[j+1])) {
Chris@325 998 // not a peak or a currently-playing note: skip it
Chris@321 999 continue;
Chris@321 1000 }
Chris@321 1001 }
Chris@323 1002
Chris@168 1003 strengths.insert(ValueIndexMap::value_type(strength, j));
Chris@168 1004 }
Chris@166 1005
Chris@168 1006 ValueIndexMap::const_iterator si = strengths.end();
Chris@167 1007
Chris@168 1008 map<int, double> active;
Chris@168 1009 map<int, int> activeShifts;
Chris@168 1010
Chris@336 1011 int shiftCount = getShiftCount();
Chris@336 1012
Chris@183 1013 while (int(active.size()) < pack.maxPolyphony && si != strengths.begin()) {
Chris@168 1014
Chris@168 1015 --si;
Chris@168 1016
Chris@168 1017 double strength = si->first;
Chris@168 1018 int j = si->second;
Chris@168 1019
Chris@168 1020 active[j] = strength;
Chris@168 1021
Chris@336 1022 if (shiftCount > 1) {
Chris@364 1023 if (!bestShifts.empty()) {
Chris@364 1024 activeShifts[j] = bestShifts[j];
Chris@364 1025 } else {
Chris@364 1026 // can happen if column was below threshold
Chris@364 1027 activeShifts[j] = 0;
Chris@364 1028 }
Chris@167 1029 }
Chris@41 1030 }
Chris@41 1031
Chris@168 1032 m_pianoRoll.push_back(active);
Chris@170 1033
Chris@336 1034 if (shiftCount > 1) {
Chris@168 1035 m_pianoRollShifts.push_back(activeShifts);
Chris@41 1036 }
Chris@294 1037
Chris@321 1038 return;
Chris@166 1039 }
Chris@166 1040
Chris@336 1041 Silvet::FeatureChunk
Chris@336 1042 Silvet::noteTrack()
Chris@166 1043 {
Chris@41 1044 // Minimum duration pruning, and conversion to notes. We can only
Chris@41 1045 // report notes that have just ended (i.e. that are absent in the
Chris@168 1046 // latest active set but present in the prior set in the piano
Chris@41 1047 // roll) -- any notes that ended earlier will have been reported
Chris@41 1048 // already, and if they haven't ended, we don't know their
Chris@41 1049 // duration.
Chris@41 1050
Chris@168 1051 int width = m_pianoRoll.size() - 1;
Chris@168 1052
Chris@168 1053 const map<int, double> &active = m_pianoRoll[width];
Chris@41 1054
Chris@165 1055 double columnDuration = 1.0 / m_colsPerSec;
Chris@165 1056
Chris@165 1057 // only keep notes >= 100ms or thereabouts
Chris@323 1058 double durationThrSec = 0.1;
Chris@323 1059 int durationThreshold = floor(durationThrSec / columnDuration); // in cols
Chris@165 1060 if (durationThreshold < 1) durationThreshold = 1;
Chris@41 1061
Chris@336 1062 FeatureList noteFeatures, onsetFeatures, onOffsetFeatures;
Chris@41 1063
Chris@41 1064 if (width < durationThreshold + 1) {
Chris@336 1065 return { noteFeatures, onsetFeatures, onOffsetFeatures };
Chris@41 1066 }
Chris@357 1067
Chris@357 1068 // Make a copy of the latest column. We need a copy because it is
Chris@357 1069 // possible we may erase from the "live" column within the loop.
Chris@357 1070 map<int, double> latest = m_pianoRoll[width-1];
Chris@41 1071
Chris@357 1072 for (const auto &ni: latest) {
Chris@41 1073
Chris@357 1074 int note = ni.first;
Chris@41 1075
Chris@41 1076 int end = width;
Chris@41 1077 int start = end-1;
Chris@41 1078
Chris@41 1079 while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) {
Chris@41 1080 --start;
Chris@41 1081 }
Chris@41 1082 ++start;
Chris@41 1083
Chris@319 1084 int duration = end - start;
Chris@319 1085
Chris@319 1086 if (duration < durationThreshold) {
Chris@41 1087 continue;
Chris@41 1088 }
Chris@41 1089
Chris@319 1090 if (duration == durationThreshold) {
Chris@325 1091 m_current.insert(note);
Chris@336 1092 emitOnset(start, note, onsetFeatures);
Chris@336 1093 emitOnset(start, note, onOffsetFeatures);
Chris@319 1094 }
Chris@319 1095
Chris@319 1096 if (active.find(note) == active.end()) {
Chris@319 1097 // the note was playing but just ended
Chris@325 1098 m_current.erase(note);
Chris@343 1099 emitNoteAndOffset(start, end, note, noteFeatures, onOffsetFeatures);
Chris@334 1100 } else { // still playing
Chris@334 1101 // repeated note detection: if level is greater than this
Chris@334 1102 // multiple of its previous value, then we end the note and
Chris@334 1103 // restart it with the same pitch
Chris@334 1104 double restartFactor = 1.5;
Chris@334 1105 if (duration >= durationThreshold * 2 &&
Chris@334 1106 (active.find(note)->second >
Chris@334 1107 restartFactor * m_pianoRoll[width-1][note])) {
Chris@334 1108 m_current.erase(note);
Chris@343 1109 emitNoteAndOffset(start, end-1, note, noteFeatures, onOffsetFeatures);
Chris@334 1110 // and remove this so that we start counting the new
Chris@357 1111 // note's duration from the current position. (This
Chris@357 1112 // erase is why we needed to copy this column at the
Chris@357 1113 // top of the loop.)
Chris@334 1114 m_pianoRoll[width-1].erase(note);
Chris@334 1115 }
Chris@319 1116 }
Chris@41 1117 }
Chris@41 1118
Chris@62 1119 // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl;
Chris@41 1120
Chris@336 1121 return { noteFeatures, onsetFeatures, onOffsetFeatures };
Chris@41 1122 }
Chris@41 1123
Chris@169 1124 void
Chris@343 1125 Silvet::emitNoteAndOffset(int start, int end, int note,
Chris@343 1126 FeatureList &noteFeatures,
Chris@343 1127 FeatureList &onOffsetFeatures)
Chris@169 1128 {
Chris@343 1129 // Emit the complete note-event feature, and its offset. We have
Chris@343 1130 // already emitted the note onset when it started -- that process
Chris@343 1131 // is separated out in order to get a faster response during live
Chris@343 1132 // tracking. However, if the note shift changes within the note
Chris@343 1133 // (which can happen only if we have fine-tuning switched on), we
Chris@343 1134 // emit an offset and then a new onset with the new shift.
Chris@343 1135
Chris@169 1136 int partStart = start;
Chris@169 1137 int partShift = 0;
Chris@320 1138 double partStrength = 0;
Chris@169 1139
Chris@343 1140 // NB this *must* be less than durationThreshold above
Chris@252 1141 int partThreshold = floor(0.05 * m_colsPerSec);
Chris@169 1142
Chris@169 1143 for (int i = start; i != end; ++i) {
Chris@169 1144
Chris@169 1145 double strength = m_pianoRoll[i][note];
Chris@169 1146
Chris@169 1147 int shift = 0;
Chris@169 1148
Chris@336 1149 if (getShiftCount() > 1) {
Chris@169 1150
Chris@169 1151 shift = m_pianoRollShifts[i][note];
Chris@169 1152
Chris@169 1153 if (i == partStart) {
Chris@169 1154 partShift = shift;
Chris@169 1155 }
Chris@169 1156
Chris@169 1157 if (i > partStart + partThreshold && shift != partShift) {
Chris@169 1158
Chris@169 1159 // pitch has changed, emit an intermediate note
Chris@252 1160 noteFeatures.push_back(makeNoteFeature(partStart,
Chris@252 1161 i,
Chris@252 1162 note,
Chris@252 1163 partShift,
Chris@320 1164 partStrength));
Chris@343 1165
Chris@343 1166 onOffsetFeatures.push_back(makeOffsetFeature(i,
Chris@343 1167 note,
Chris@343 1168 partShift));
Chris@343 1169
Chris@169 1170 partStart = i;
Chris@169 1171 partShift = shift;
Chris@343 1172
Chris@343 1173 onOffsetFeatures.push_back(makeOnsetFeature(i,
Chris@343 1174 note,
Chris@343 1175 partShift,
Chris@343 1176 partStrength));
Chris@343 1177
Chris@320 1178 partStrength = 0;
Chris@169 1179 }
Chris@169 1180 }
Chris@169 1181
Chris@320 1182 if (strength > partStrength) {
Chris@320 1183 partStrength = strength;
Chris@169 1184 }
Chris@169 1185 }
Chris@169 1186
Chris@169 1187 if (end >= partStart + partThreshold) {
Chris@343 1188
Chris@252 1189 noteFeatures.push_back(makeNoteFeature(partStart,
Chris@252 1190 end,
Chris@252 1191 note,
Chris@252 1192 partShift,
Chris@320 1193 partStrength));
Chris@343 1194
Chris@343 1195 onOffsetFeatures.push_back(makeOffsetFeature(end,
Chris@343 1196 note,
Chris@343 1197 partShift));
Chris@349 1198
Chris@349 1199 } else if (partStart > start) {
Chris@349 1200
Chris@349 1201 // we have emitted an onset for this, so must add an offset
Chris@349 1202 onOffsetFeatures.push_back(makeOffsetFeature(end,
Chris@349 1203 note,
Chris@349 1204 partShift));
Chris@169 1205 }
Chris@169 1206 }
Chris@252 1207
Chris@319 1208 void
Chris@336 1209 Silvet::emitOnset(int start, int note, FeatureList &onOffsetFeatures)
Chris@319 1210 {
Chris@319 1211 int len = int(m_pianoRoll.size());
Chris@320 1212
Chris@320 1213 double onsetStrength = 0;
Chris@319 1214
Chris@319 1215 int shift = 0;
Chris@336 1216 if (getShiftCount() > 1) {
Chris@319 1217 shift = m_pianoRollShifts[start][note];
Chris@319 1218 }
Chris@319 1219
Chris@319 1220 for (int i = start; i < len; ++i) {
Chris@319 1221 double strength = m_pianoRoll[i][note];
Chris@320 1222 if (strength > onsetStrength) {
Chris@320 1223 onsetStrength = strength;
Chris@319 1224 }
Chris@319 1225 }
Chris@319 1226
Chris@336 1227 if (onsetStrength == 0) return;
Chris@336 1228
Chris@336 1229 onOffsetFeatures.push_back(makeOnsetFeature(start,
Chris@336 1230 note,
Chris@336 1231 shift,
Chris@336 1232 onsetStrength));
Chris@336 1233 }
Chris@336 1234
Chris@309 1235 RealTime
Chris@309 1236 Silvet::getColumnTimestamp(int column)
Chris@309 1237 {
Chris@309 1238 double columnDuration = 1.0 / m_colsPerSec;
Chris@309 1239 int postFilterLatency = int(m_postFilter[0]->getSize() / 2);
Chris@309 1240
Chris@309 1241 return m_startTime + RealTime::fromSeconds
Chris@309 1242 (columnDuration * (column - postFilterLatency) + 0.02);
Chris@309 1243 }
Chris@309 1244
Chris@252 1245 Silvet::Feature
Chris@252 1246 Silvet::makeNoteFeature(int start,
Chris@252 1247 int end,
Chris@252 1248 int note,
Chris@252 1249 int shift,
Chris@320 1250 double strength)
Chris@252 1251 {
Chris@252 1252 Feature f;
Chris@252 1253
Chris@252 1254 f.hasTimestamp = true;
Chris@309 1255 f.timestamp = getColumnTimestamp(start);
Chris@252 1256
Chris@252 1257 f.hasDuration = true;
Chris@309 1258 f.duration = getColumnTimestamp(end) - f.timestamp;
Chris@252 1259
Chris@252 1260 f.values.clear();
Chris@336 1261 f.values.push_back(getNoteFrequency(note, shift));
Chris@320 1262 f.values.push_back(getVelocityFor(strength, start));
Chris@252 1263
Chris@336 1264 f.label = getNoteName(note, shift);
Chris@252 1265
Chris@252 1266 return f;
Chris@252 1267 }
Chris@252 1268
Chris@319 1269 Silvet::Feature
Chris@319 1270 Silvet::makeOnsetFeature(int start,
Chris@319 1271 int note,
Chris@319 1272 int shift,
Chris@320 1273 double strength)
Chris@319 1274 {
Chris@319 1275 Feature f;
Chris@319 1276
Chris@319 1277 f.hasTimestamp = true;
Chris@319 1278 f.timestamp = getColumnTimestamp(start);
Chris@319 1279
Chris@319 1280 f.hasDuration = false;
Chris@319 1281
Chris@319 1282 f.values.clear();
Chris@336 1283 f.values.push_back(getNoteFrequency(note, shift));
Chris@320 1284 f.values.push_back(getVelocityFor(strength, start));
Chris@319 1285
Chris@336 1286 f.label = getNoteName(note, shift);
Chris@336 1287
Chris@336 1288 return f;
Chris@336 1289 }
Chris@336 1290
Chris@336 1291 Silvet::Feature
Chris@336 1292 Silvet::makeOffsetFeature(int col,
Chris@336 1293 int note,
Chris@336 1294 int shift)
Chris@336 1295 {
Chris@336 1296 Feature f;
Chris@336 1297
Chris@336 1298 f.hasTimestamp = true;
Chris@336 1299 f.timestamp = getColumnTimestamp(col);
Chris@336 1300
Chris@336 1301 f.hasDuration = false;
Chris@336 1302
Chris@336 1303 f.values.clear();
Chris@336 1304 f.values.push_back(getNoteFrequency(note, shift));
Chris@336 1305 f.values.push_back(0); // velocity 0 for offset
Chris@336 1306
Chris@336 1307 f.label = getNoteName(note, shift) + " off";
Chris@319 1308
Chris@319 1309 return f;
Chris@319 1310 }
Chris@319 1311
Chris@320 1312 int
Chris@320 1313 Silvet::getVelocityFor(double strength, int column)
Chris@320 1314 {
Chris@320 1315 RealTime rt = getColumnTimestamp(column + 1);
Chris@320 1316
Chris@320 1317 float inputGain = getInputGainAt(rt);
Chris@320 1318
Chris@320 1319 double scale = 2.0;
Chris@320 1320 if (m_mode == LiveMode) scale = 20.0;
Chris@320 1321
Chris@320 1322 double velocity = round((strength * scale) / inputGain);
Chris@320 1323
Chris@320 1324 if (velocity > 127.0) velocity = 127.0;
Chris@320 1325 if (velocity < 1.0) velocity = 1.0; // assume surpassed 0 threshold already
Chris@320 1326
Chris@320 1327 return int(velocity);
Chris@320 1328 }
Chris@320 1329
Chris@252 1330 float
Chris@252 1331 Silvet::getInputGainAt(RealTime t)
Chris@252 1332 {
Chris@252 1333 map<RealTime, float>::const_iterator i = m_inputGains.lower_bound(t);
Chris@252 1334
Chris@252 1335 if (i == m_inputGains.end()) {
Chris@252 1336 if (i != m_inputGains.begin()) {
Chris@252 1337 --i;
Chris@252 1338 } else {
Chris@252 1339 return 1.f; // no data
Chris@252 1340 }
Chris@252 1341 }
Chris@252 1342
Chris@252 1343 // cerr << "gain at time " << t << " = " << i->second << endl;
Chris@252 1344
Chris@252 1345 return i->second;
Chris@252 1346 }
Chris@252 1347