annotate src/Silvet.cpp @ 357:0af00da90a40

Copy the column before potentially erasing while iterating through it
author Chris Cannam
date Mon, 07 Sep 2015 16:23:55 +0100
parents 7dcff010d9cd
children 78ed74fa177b
rev   line source
Chris@31 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@31 2
Chris@31 3 /*
Chris@31 4 Silvet
Chris@31 5
Chris@31 6 A Vamp plugin for note transcription.
Chris@31 7 Centre for Digital Music, Queen Mary University of London.
Chris@31 8
Chris@31 9 This program is free software; you can redistribute it and/or
Chris@31 10 modify it under the terms of the GNU General Public License as
Chris@31 11 published by the Free Software Foundation; either version 2 of the
Chris@31 12 License, or (at your option) any later version. See the file
Chris@31 13 COPYING included with this distribution for more information.
Chris@31 14 */
Chris@31 15
Chris@31 16 #include "Silvet.h"
Chris@34 17 #include "EM.h"
Chris@31 18
Chris@152 19 #include <cq/CQSpectrogram.h>
Chris@31 20
Chris@152 21 #include "MedianFilter.h"
Chris@152 22 #include "constant-q-cpp/src/dsp/Resampler.h"
Chris@246 23 #include "flattendynamics-ladspa.h"
Chris@298 24 #include "LiveInstruments.h"
Chris@31 25
Chris@31 26 #include <vector>
Chris@31 27
Chris@32 28 #include <cstdio>
Chris@32 29
Chris@356 30 #if (defined(MAX_EM_THREADS) && (MAX_EM_THREADS > 1))
Chris@356 31 #include <future>
Chris@356 32 using std::future;
Chris@356 33 using std::async;
Chris@356 34 #endif
Chris@356 35
Chris@31 36 using std::vector;
Chris@48 37 using std::cout;
Chris@31 38 using std::cerr;
Chris@31 39 using std::endl;
Chris@311 40 using std::pair;
Chris@356 41
Chris@40 42 using Vamp::RealTime;
Chris@31 43
Chris@31 44 static int processingSampleRate = 44100;
Chris@298 45
Chris@298 46 static int binsPerSemitoneLive = 1;
Chris@298 47 static int binsPerSemitoneNormal = 5;
Chris@170 48
Chris@272 49 static int minInputSampleRate = 100;
Chris@272 50 static int maxInputSampleRate = 192000;
Chris@272 51
Chris@316 52 static const Silvet::ProcessingMode defaultMode = Silvet::HighQualityMode;
Chris@316 53
Chris@31 54 Silvet::Silvet(float inputSampleRate) :
Chris@31 55 Plugin(inputSampleRate),
Chris@161 56 m_instruments(InstrumentPack::listInstrumentPacks()),
Chris@298 57 m_liveInstruments(LiveAdapter::adaptAll(m_instruments)),
Chris@31 58 m_resampler(0),
Chris@246 59 m_flattener(0),
Chris@110 60 m_cq(0),
Chris@316 61 m_mode(defaultMode),
Chris@166 62 m_fineTuning(false),
Chris@178 63 m_instrument(0),
Chris@313 64 m_colsPerSec(50),
Chris@313 65 m_haveStartTime(false)
Chris@31 66 {
Chris@31 67 }
Chris@31 68
Chris@31 69 Silvet::~Silvet()
Chris@31 70 {
Chris@31 71 delete m_resampler;
Chris@246 72 delete m_flattener;
Chris@31 73 delete m_cq;
Chris@41 74 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
Chris@41 75 delete m_postFilter[i];
Chris@41 76 }
Chris@31 77 }
Chris@31 78
Chris@31 79 string
Chris@31 80 Silvet::getIdentifier() const
Chris@31 81 {
Chris@31 82 return "silvet";
Chris@31 83 }
Chris@31 84
Chris@31 85 string
Chris@31 86 Silvet::getName() const
Chris@31 87 {
Chris@31 88 return "Silvet Note Transcription";
Chris@31 89 }
Chris@31 90
Chris@31 91 string
Chris@31 92 Silvet::getDescription() const
Chris@31 93 {
Chris@191 94 return "Estimate the note onsets, pitches, and durations that make up a music recording.";
Chris@31 95 }
Chris@31 96
Chris@31 97 string
Chris@31 98 Silvet::getMaker() const
Chris@31 99 {
Chris@191 100 return "Queen Mary, University of London";
Chris@31 101 }
Chris@31 102
Chris@31 103 int
Chris@31 104 Silvet::getPluginVersion() const
Chris@31 105 {
Chris@309 106 return 3;
Chris@31 107 }
Chris@31 108
Chris@31 109 string
Chris@31 110 Silvet::getCopyright() const
Chris@31 111 {
Chris@191 112 return "Method by Emmanouil Benetos and Simon Dixon; plugin by Chris Cannam and Emmanouil Benetos. GPL licence.";
Chris@31 113 }
Chris@31 114
Chris@31 115 Silvet::InputDomain
Chris@31 116 Silvet::getInputDomain() const
Chris@31 117 {
Chris@31 118 return TimeDomain;
Chris@31 119 }
Chris@31 120
Chris@31 121 size_t
Chris@31 122 Silvet::getPreferredBlockSize() const
Chris@31 123 {
Chris@31 124 return 0;
Chris@31 125 }
Chris@31 126
Chris@31 127 size_t
Chris@31 128 Silvet::getPreferredStepSize() const
Chris@31 129 {
Chris@31 130 return 0;
Chris@31 131 }
Chris@31 132
Chris@31 133 size_t
Chris@31 134 Silvet::getMinChannelCount() const
Chris@31 135 {
Chris@31 136 return 1;
Chris@31 137 }
Chris@31 138
Chris@31 139 size_t
Chris@31 140 Silvet::getMaxChannelCount() const
Chris@31 141 {
Chris@31 142 return 1;
Chris@31 143 }
Chris@31 144
Chris@31 145 Silvet::ParameterList
Chris@31 146 Silvet::getParameterDescriptors() const
Chris@31 147 {
Chris@31 148 ParameterList list;
Chris@110 149
Chris@110 150 ParameterDescriptor desc;
Chris@110 151 desc.identifier = "mode";
Chris@110 152 desc.name = "Processing mode";
Chris@110 153 desc.unit = "";
Chris@341 154 desc.description = "Sets the tradeoff of processing speed against transcription quality. Live mode is much faster and detects notes with relatively low latency; Intensive mode (the default) is slower but will almost always produce better results.";
Chris@110 155 desc.minValue = 0;
Chris@344 156 desc.maxValue = 1;
Chris@316 157 desc.defaultValue = int(defaultMode);
Chris@110 158 desc.isQuantized = true;
Chris@110 159 desc.quantizeStep = 1;
Chris@341 160 desc.valueNames.push_back("Live (faster and lower latency)");
Chris@165 161 desc.valueNames.push_back("Intensive (higher quality)");
Chris@161 162 list.push_back(desc);
Chris@161 163
Chris@176 164 desc.identifier = "instrument";
Chris@176 165 desc.name = "Instrument";
Chris@161 166 desc.unit = "";
Chris@271 167 desc.description = "The instrument or instruments known to be present in the recording. This affects the set of instrument templates used, as well as the expected level of polyphony in the output. Using a more limited set of instruments than the default will also make the plugin run faster.\nNote that this plugin cannot isolate instruments: you can't use this setting to request notes from only one instrument in a recording with several. Instead, use this as a hint to the plugin about which instruments are actually present.";
Chris@161 168 desc.minValue = 0;
Chris@162 169 desc.maxValue = m_instruments.size()-1;
Chris@162 170 desc.defaultValue = 0;
Chris@161 171 desc.isQuantized = true;
Chris@161 172 desc.quantizeStep = 1;
Chris@161 173 desc.valueNames.clear();
Chris@162 174 for (int i = 0; i < int(m_instruments.size()); ++i) {
Chris@162 175 desc.valueNames.push_back(m_instruments[i].name);
Chris@162 176 }
Chris@166 177 list.push_back(desc);
Chris@161 178
Chris@166 179 desc.identifier = "finetune";
Chris@166 180 desc.name = "Return fine pitch estimates";
Chris@166 181 desc.unit = "";
Chris@271 182 desc.description = "Return pitch estimates at finer than semitone resolution. This works only in Intensive mode. Notes that appear to drift in pitch will be split up into shorter notes with individually finer pitches.";
Chris@166 183 desc.minValue = 0;
Chris@166 184 desc.maxValue = 1;
Chris@166 185 desc.defaultValue = 0;
Chris@166 186 desc.isQuantized = true;
Chris@166 187 desc.quantizeStep = 1;
Chris@166 188 desc.valueNames.clear();
Chris@110 189 list.push_back(desc);
Chris@110 190
Chris@31 191 return list;
Chris@31 192 }
Chris@31 193
Chris@31 194 float
Chris@31 195 Silvet::getParameter(string identifier) const
Chris@31 196 {
Chris@110 197 if (identifier == "mode") {
Chris@297 198 return (float)(int)m_mode;
Chris@166 199 } else if (identifier == "finetune") {
Chris@166 200 return m_fineTuning ? 1.f : 0.f;
Chris@176 201 } else if (identifier == "instrument") {
Chris@162 202 return m_instrument;
Chris@110 203 }
Chris@31 204 return 0;
Chris@31 205 }
Chris@31 206
Chris@31 207 void
Chris@31 208 Silvet::setParameter(string identifier, float value)
Chris@31 209 {
Chris@110 210 if (identifier == "mode") {
Chris@297 211 m_mode = (ProcessingMode)(int)(value + 0.5);
Chris@166 212 } else if (identifier == "finetune") {
Chris@166 213 m_fineTuning = (value > 0.5);
Chris@176 214 } else if (identifier == "instrument") {
Chris@162 215 m_instrument = lrintf(value);
Chris@110 216 }
Chris@31 217 }
Chris@31 218
Chris@31 219 Silvet::ProgramList
Chris@31 220 Silvet::getPrograms() const
Chris@31 221 {
Chris@31 222 ProgramList list;
Chris@31 223 return list;
Chris@31 224 }
Chris@31 225
Chris@31 226 string
Chris@31 227 Silvet::getCurrentProgram() const
Chris@31 228 {
Chris@31 229 return "";
Chris@31 230 }
Chris@31 231
Chris@31 232 void
Chris@31 233 Silvet::selectProgram(string name)
Chris@31 234 {
Chris@31 235 }
Chris@31 236
Chris@31 237 Silvet::OutputList
Chris@31 238 Silvet::getOutputDescriptors() const
Chris@31 239 {
Chris@31 240 OutputList list;
Chris@31 241
Chris@31 242 OutputDescriptor d;
Chris@51 243 d.identifier = "notes";
Chris@51 244 d.name = "Note transcription";
Chris@329 245 d.description = "Overall note transcription. Each note has time, duration, estimated fundamental frequency, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture.";
Chris@41 246 d.unit = "Hz";
Chris@31 247 d.hasFixedBinCount = true;
Chris@31 248 d.binCount = 2;
Chris@41 249 d.binNames.push_back("Frequency");
Chris@31 250 d.binNames.push_back("Velocity");
Chris@31 251 d.hasKnownExtents = false;
Chris@31 252 d.isQuantized = false;
Chris@31 253 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@246 254 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
Chris@31 255 d.hasDuration = true;
Chris@32 256 m_notesOutputNo = list.size();
Chris@32 257 list.push_back(d);
Chris@32 258
Chris@319 259 d.identifier = "onsets";
Chris@319 260 d.name = "Note onsets";
Chris@323 261 d.description = "Note onsets, without durations. These can be calculated sooner than complete notes, because it isn't necessary to wait for a note to finish before returning its feature. Each event has time, estimated fundamental frequency in Hz, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture.";
Chris@319 262 d.unit = "Hz";
Chris@319 263 d.hasFixedBinCount = true;
Chris@319 264 d.binCount = 2;
Chris@319 265 d.binNames.push_back("Frequency");
Chris@319 266 d.binNames.push_back("Velocity");
Chris@319 267 d.hasKnownExtents = false;
Chris@319 268 d.isQuantized = false;
Chris@319 269 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@319 270 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
Chris@319 271 d.hasDuration = false;
Chris@319 272 m_onsetsOutputNo = list.size();
Chris@319 273 list.push_back(d);
Chris@319 274
Chris@336 275 d.identifier = "onoffsets";
Chris@336 276 d.name = "Note onsets and offsets";
Chris@336 277 d.description = "Note onsets and offsets as separate events. Each onset event has time, estimated fundamental frequency in Hz, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture. Offsets are represented in the same way but with a velocity of 0.";
Chris@336 278 d.unit = "Hz";
Chris@336 279 d.hasFixedBinCount = true;
Chris@336 280 d.binCount = 2;
Chris@336 281 d.binNames.push_back("Frequency");
Chris@336 282 d.binNames.push_back("Velocity");
Chris@336 283 d.hasKnownExtents = false;
Chris@336 284 d.isQuantized = false;
Chris@336 285 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@336 286 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
Chris@336 287 d.hasDuration = false;
Chris@336 288 m_onOffsetsOutputNo = list.size();
Chris@336 289 list.push_back(d);
Chris@336 290
Chris@178 291 d.identifier = "timefreq";
Chris@178 292 d.name = "Time-frequency distribution";
Chris@271 293 d.description = "Filtered constant-Q time-frequency distribution as used as input to the expectation-maximisation algorithm.";
Chris@178 294 d.unit = "";
Chris@178 295 d.hasFixedBinCount = true;
Chris@298 296 d.binCount = getPack(0).templateHeight;
Chris@178 297 d.binNames.clear();
Chris@178 298 if (m_cq) {
Chris@294 299 char name[50];
Chris@298 300 for (int i = 0; i < getPack(0).templateHeight; ++i) {
Chris@178 301 // We have a 600-bin (10 oct 60-bin CQ) of which the
Chris@178 302 // lowest-frequency 55 bins have been dropped, for a
Chris@178 303 // 545-bin template. The native CQ bins go high->low
Chris@178 304 // frequency though, so these are still the first 545 bins
Chris@178 305 // as reported by getBinFrequency, though in reverse order
Chris@178 306 float freq = m_cq->getBinFrequency
Chris@298 307 (getPack(0).templateHeight - i - 1);
Chris@178 308 sprintf(name, "%.1f Hz", freq);
Chris@178 309 d.binNames.push_back(name);
Chris@178 310 }
Chris@178 311 }
Chris@178 312 d.hasKnownExtents = false;
Chris@178 313 d.isQuantized = false;
Chris@178 314 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@178 315 d.sampleRate = m_colsPerSec;
Chris@178 316 d.hasDuration = false;
Chris@178 317 m_fcqOutputNo = list.size();
Chris@178 318 list.push_back(d);
Chris@178 319
Chris@294 320 d.identifier = "pitchactivation";
Chris@294 321 d.name = "Pitch activation distribution";
Chris@294 322 d.description = "Pitch activation distribution resulting from expectation-maximisation algorithm, prior to note extraction.";
Chris@294 323 d.unit = "";
Chris@294 324 d.hasFixedBinCount = true;
Chris@298 325 d.binCount = getPack(0).templateNoteCount;
Chris@294 326 d.binNames.clear();
Chris@294 327 if (m_cq) {
Chris@298 328 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
Chris@336 329 d.binNames.push_back(getNoteName(i, 0));
Chris@294 330 }
Chris@294 331 }
Chris@294 332 d.hasKnownExtents = false;
Chris@294 333 d.isQuantized = false;
Chris@294 334 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@294 335 d.sampleRate = m_colsPerSec;
Chris@294 336 d.hasDuration = false;
Chris@294 337 m_pitchOutputNo = list.size();
Chris@294 338 list.push_back(d);
Chris@294 339
Chris@309 340 d.identifier = "chroma";
Chris@309 341 d.name = "Pitch chroma distribution";
Chris@309 342 d.description = "Pitch chroma distribution formed by wrapping the un-thresholded pitch activation distribution into a single octave of semitone bins.";
Chris@309 343 d.unit = "";
Chris@309 344 d.hasFixedBinCount = true;
Chris@309 345 d.binCount = 12;
Chris@309 346 d.binNames.clear();
Chris@309 347 if (m_cq) {
Chris@309 348 for (int i = 0; i < 12; ++i) {
Chris@320 349 d.binNames.push_back(getChromaName(i));
Chris@309 350 }
Chris@309 351 }
Chris@309 352 d.hasKnownExtents = false;
Chris@309 353 d.isQuantized = false;
Chris@309 354 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@309 355 d.sampleRate = m_colsPerSec;
Chris@309 356 d.hasDuration = false;
Chris@309 357 m_chromaOutputNo = list.size();
Chris@309 358 list.push_back(d);
Chris@309 359
Chris@302 360 d.identifier = "templates";
Chris@302 361 d.name = "Templates";
Chris@302 362 d.description = "Constant-Q spectral templates for the selected instrument pack.";
Chris@302 363 d.unit = "";
Chris@302 364 d.hasFixedBinCount = true;
Chris@302 365 d.binCount = getPack(0).templateHeight;
Chris@302 366 d.binNames.clear();
Chris@302 367 if (m_cq) {
Chris@302 368 char name[50];
Chris@302 369 for (int i = 0; i < getPack(0).templateHeight; ++i) {
Chris@302 370 // We have a 600-bin (10 oct 60-bin CQ) of which the
Chris@302 371 // lowest-frequency 55 bins have been dropped, for a
Chris@302 372 // 545-bin template. The native CQ bins go high->low
Chris@302 373 // frequency though, so these are still the first 545 bins
Chris@302 374 // as reported by getBinFrequency, though in reverse order
Chris@302 375 float freq = m_cq->getBinFrequency
Chris@302 376 (getPack(0).templateHeight - i - 1);
Chris@302 377 sprintf(name, "%.1f Hz", freq);
Chris@302 378 d.binNames.push_back(name);
Chris@302 379 }
Chris@302 380 }
Chris@302 381 d.hasKnownExtents = false;
Chris@302 382 d.isQuantized = false;
Chris@302 383 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@302 384 d.sampleRate = m_colsPerSec;
Chris@302 385 d.hasDuration = false;
Chris@302 386 m_templateOutputNo = list.size();
Chris@302 387 list.push_back(d);
Chris@302 388
Chris@31 389 return list;
Chris@31 390 }
Chris@31 391
Chris@38 392 std::string
Chris@320 393 Silvet::getChromaName(int pitch) const
Chris@38 394 {
Chris@38 395 static const char *names[] = {
Chris@38 396 "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"
Chris@38 397 };
Chris@38 398
Chris@309 399 return names[pitch];
Chris@309 400 }
Chris@309 401
Chris@309 402 std::string
Chris@336 403 Silvet::getNoteName(int note, int shift) const
Chris@309 404 {
Chris@320 405 string n = getChromaName(note % 12);
Chris@38 406
Chris@175 407 int oct = (note + 9) / 12;
Chris@38 408
Chris@175 409 char buf[30];
Chris@175 410
Chris@175 411 float pshift = 0.f;
Chris@336 412 int shiftCount = getShiftCount();
Chris@175 413 if (shiftCount > 1) {
Chris@320 414 // see getNoteFrequency below
Chris@175 415 pshift =
Chris@175 416 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
Chris@175 417 }
Chris@175 418
Chris@175 419 if (pshift > 0.f) {
Chris@309 420 sprintf(buf, "%s%d+%dc", n.c_str(), oct, int(round(pshift * 100)));
Chris@175 421 } else if (pshift < 0.f) {
Chris@309 422 sprintf(buf, "%s%d-%dc", n.c_str(), oct, int(round((-pshift) * 100)));
Chris@175 423 } else {
Chris@309 424 sprintf(buf, "%s%d", n.c_str(), oct);
Chris@175 425 }
Chris@38 426
Chris@38 427 return buf;
Chris@38 428 }
Chris@38 429
Chris@41 430 float
Chris@336 431 Silvet::getNoteFrequency(int note, int shift) const
Chris@41 432 {
Chris@169 433 // Convert shift number to a pitch shift. The given shift number
Chris@169 434 // is an offset into the template array, which starts with some
Chris@169 435 // zeros, followed by the template, then some trailing zeros.
Chris@169 436 //
Chris@169 437 // Example: if we have templateMaxShift == 2 and thus shiftCount
Chris@169 438 // == 5, then the number will be in the range 0-4 and the template
Chris@169 439 // will have 2 zeros at either end. Thus number 2 represents the
Chris@169 440 // template "as recorded", for a pitch shift of 0; smaller indices
Chris@169 441 // represent moving the template *up* in pitch (by introducing
Chris@169 442 // zeros at the start, which is the low-frequency end), for a
Chris@169 443 // positive pitch shift; and higher values represent moving it
Chris@169 444 // down in pitch, for a negative pitch shift.
Chris@169 445
Chris@175 446 float pshift = 0.f;
Chris@336 447 int shiftCount = getShiftCount();
Chris@175 448 if (shiftCount > 1) {
Chris@175 449 pshift =
Chris@175 450 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
Chris@175 451 }
Chris@169 452
Chris@301 453 float freq = float(27.5 * pow(2.0, (note + pshift) / 12.0));
Chris@301 454
Chris@303 455 // cerr << "note = " << note << ", shift = " << shift << ", shiftCount = "
Chris@303 456 // << shiftCount << ", obtained freq = " << freq << endl;
Chris@301 457
Chris@301 458 return freq;
Chris@41 459 }
Chris@41 460
Chris@31 461 bool
Chris@31 462 Silvet::initialise(size_t channels, size_t stepSize, size_t blockSize)
Chris@31 463 {
Chris@272 464 if (m_inputSampleRate < minInputSampleRate ||
Chris@272 465 m_inputSampleRate > maxInputSampleRate) {
Chris@272 466 cerr << "Silvet::initialise: Unsupported input sample rate "
Chris@272 467 << m_inputSampleRate << " (supported min " << minInputSampleRate
Chris@272 468 << ", max " << maxInputSampleRate << ")" << endl;
Chris@272 469 return false;
Chris@272 470 }
Chris@272 471
Chris@31 472 if (channels < getMinChannelCount() ||
Chris@272 473 channels > getMaxChannelCount()) {
Chris@272 474 cerr << "Silvet::initialise: Unsupported channel count " << channels
Chris@272 475 << " (supported min " << getMinChannelCount() << ", max "
Chris@272 476 << getMaxChannelCount() << ")" << endl;
Chris@272 477 return false;
Chris@272 478 }
Chris@31 479
Chris@31 480 if (stepSize != blockSize) {
Chris@31 481 cerr << "Silvet::initialise: Step size must be the same as block size ("
Chris@31 482 << stepSize << " != " << blockSize << ")" << endl;
Chris@31 483 return false;
Chris@31 484 }
Chris@31 485
Chris@31 486 m_blockSize = blockSize;
Chris@31 487
Chris@31 488 reset();
Chris@31 489
Chris@31 490 return true;
Chris@31 491 }
Chris@31 492
Chris@31 493 void
Chris@31 494 Silvet::reset()
Chris@31 495 {
Chris@31 496 delete m_resampler;
Chris@246 497 delete m_flattener;
Chris@31 498 delete m_cq;
Chris@31 499
Chris@31 500 if (m_inputSampleRate != processingSampleRate) {
Chris@31 501 m_resampler = new Resampler(m_inputSampleRate, processingSampleRate);
Chris@31 502 } else {
Chris@31 503 m_resampler = 0;
Chris@31 504 }
Chris@31 505
Chris@246 506 m_flattener = new FlattenDynamics(m_inputSampleRate); // before resampling
Chris@246 507 m_flattener->reset();
Chris@246 508
Chris@301 509 // this happens to be processingSampleRate / 3, and is the top
Chris@301 510 // freq used for the EM templates:
Chris@301 511 double maxFreq = 14700;
Chris@301 512
Chris@301 513 if (m_mode == LiveMode) {
Chris@301 514 // We only have 12 bpo rather than 60, so we need the top bin
Chris@301 515 // to be the middle one of the top 5, i.e. 2/5 of a semitone
Chris@301 516 // lower than 14700
Chris@301 517 maxFreq *= powf(2.0, -1.0 / 30.0);
Chris@301 518 }
Chris@301 519
Chris@173 520 double minFreq = 27.5;
Chris@173 521
Chris@341 522 if (m_mode == LiveMode) {
Chris@173 523 // We don't actually return any notes from the bottom octave,
Chris@173 524 // so we can just pad with zeros
Chris@173 525 minFreq *= 2;
Chris@173 526 }
Chris@173 527
Chris@298 528 int bpo = 12 *
Chris@298 529 (m_mode == LiveMode ? binsPerSemitoneLive : binsPerSemitoneNormal);
Chris@301 530
Chris@154 531 CQParameters params(processingSampleRate,
Chris@173 532 minFreq,
Chris@303 533 maxFreq,
Chris@298 534 bpo);
Chris@154 535
Chris@325 536 params.q = 0.8;
Chris@325 537 params.atomHopFactor = (m_mode == LiveMode ? 1.0 : 0.3);
Chris@154 538 params.threshold = 0.0005;
Chris@317 539 params.decimator =
Chris@317 540 (m_mode == LiveMode ?
Chris@317 541 CQParameters::FasterDecimator : CQParameters::BetterDecimator);
Chris@172 542 params.window = CQParameters::Hann;
Chris@154 543
Chris@154 544 m_cq = new CQSpectrogram(params, CQSpectrogram::InterpolateLinear);
Chris@31 545
Chris@303 546 // cerr << "CQ bins = " << m_cq->getTotalBins() << endl;
Chris@303 547 // cerr << "CQ min freq = " << m_cq->getMinFrequency() << " (and for confirmation, freq of bin 0 = " << m_cq->getBinFrequency(0) << ")" << endl;
Chris@297 548
Chris@341 549 m_colsPerSec = 50;
Chris@165 550
Chris@41 551 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
Chris@41 552 delete m_postFilter[i];
Chris@41 553 }
Chris@41 554 m_postFilter.clear();
Chris@303 555 int postFilterLength = 3;
Chris@298 556 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
Chris@303 557 m_postFilter.push_back(new MedianFilter<double>(postFilterLength));
Chris@41 558 }
Chris@41 559 m_pianoRoll.clear();
Chris@246 560 m_inputGains.clear();
Chris@32 561 m_columnCount = 0;
Chris@272 562 m_resampledCount = 0;
Chris@40 563 m_startTime = RealTime::zeroTime;
Chris@313 564 m_haveStartTime = false;
Chris@31 565 }
Chris@31 566
Chris@31 567 Silvet::FeatureSet
Chris@31 568 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
Chris@31 569 {
Chris@302 570 FeatureSet fs;
Chris@302 571
Chris@313 572 if (!m_haveStartTime) {
Chris@314 573
Chris@40 574 m_startTime = timestamp;
Chris@313 575 m_haveStartTime = true;
Chris@314 576
Chris@302 577 insertTemplateFeatures(fs);
Chris@40 578 }
Chris@246 579
Chris@246 580 vector<float> flattened(m_blockSize);
Chris@246 581 float gain = 1.f;
Chris@246 582 m_flattener->connectInputPort
Chris@246 583 (FlattenDynamics::AudioInputPort, inputBuffers[0]);
Chris@246 584 m_flattener->connectOutputPort
Chris@246 585 (FlattenDynamics::AudioOutputPort, &flattened[0]);
Chris@246 586 m_flattener->connectOutputPort
Chris@246 587 (FlattenDynamics::GainOutputPort, &gain);
Chris@246 588 m_flattener->process(m_blockSize);
Chris@246 589
Chris@252 590 m_inputGains[timestamp] = gain;
Chris@40 591
Chris@31 592 vector<double> data;
Chris@40 593 for (int i = 0; i < m_blockSize; ++i) {
Chris@246 594 double d = flattened[i];
Chris@235 595 data.push_back(d);
Chris@40 596 }
Chris@31 597
Chris@31 598 if (m_resampler) {
Chris@272 599
Chris@31 600 data = m_resampler->process(data.data(), data.size());
Chris@272 601
Chris@272 602 int hadCount = m_resampledCount;
Chris@272 603 m_resampledCount += data.size();
Chris@272 604
Chris@272 605 int resamplerLatency = m_resampler->getLatency();
Chris@272 606
Chris@272 607 if (hadCount < resamplerLatency) {
Chris@272 608 int stillToDrop = resamplerLatency - hadCount;
Chris@272 609 if (stillToDrop >= int(data.size())) {
Chris@302 610 return fs;
Chris@272 611 } else {
Chris@272 612 data = vector<double>(data.begin() + stillToDrop, data.end());
Chris@272 613 }
Chris@272 614 }
Chris@31 615 }
Chris@272 616
Chris@32 617 Grid cqout = m_cq->process(data);
Chris@302 618 transcribe(cqout, fs);
Chris@51 619 return fs;
Chris@34 620 }
Chris@34 621
Chris@34 622 Silvet::FeatureSet
Chris@34 623 Silvet::getRemainingFeatures()
Chris@34 624 {
Chris@145 625 Grid cqout = m_cq->getRemainingOutput();
Chris@302 626 FeatureSet fs;
Chris@336 627
Chris@302 628 if (m_columnCount == 0) {
Chris@302 629 // process() was never called, but we still want these
Chris@302 630 insertTemplateFeatures(fs);
Chris@302 631 } else {
Chris@336 632
Chris@336 633 // Complete the transcription
Chris@336 634
Chris@302 635 transcribe(cqout, fs);
Chris@336 636
Chris@336 637 // And make sure any extant playing notes are finished and returned
Chris@336 638
Chris@336 639 m_pianoRoll.push_back({});
Chris@336 640
Chris@336 641 auto events = noteTrack();
Chris@336 642
Chris@336 643 for (const auto &f : events.notes) {
Chris@336 644 fs[m_notesOutputNo].push_back(f);
Chris@336 645 }
Chris@336 646
Chris@336 647 for (const auto &f : events.onsets) {
Chris@336 648 fs[m_onsetsOutputNo].push_back(f);
Chris@336 649 }
Chris@336 650
Chris@336 651 for (const auto &f : events.onOffsets) {
Chris@336 652 fs[m_onOffsetsOutputNo].push_back(f);
Chris@336 653 }
Chris@302 654 }
Chris@336 655
Chris@51 656 return fs;
Chris@34 657 }
Chris@34 658
Chris@302 659 void
Chris@302 660 Silvet::insertTemplateFeatures(FeatureSet &fs)
Chris@302 661 {
Chris@302 662 const InstrumentPack &pack = getPack(m_instrument);
Chris@302 663 for (int i = 0; i < int(pack.templates.size()) * pack.templateNoteCount; ++i) {
Chris@302 664 RealTime timestamp = RealTime::fromSeconds(double(i) / m_colsPerSec);
Chris@302 665 Feature f;
Chris@302 666 char buffer[50];
Chris@302 667 sprintf(buffer, "Note %d", i + 1);
Chris@302 668 f.label = buffer;
Chris@302 669 f.hasTimestamp = true;
Chris@302 670 f.timestamp = timestamp;
Chris@302 671 f.values = pack.templates[i / pack.templateNoteCount]
Chris@302 672 .data[i % pack.templateNoteCount];
Chris@302 673 fs[m_templateOutputNo].push_back(f);
Chris@302 674 }
Chris@302 675 }
Chris@302 676
Chris@336 677 int
Chris@336 678 Silvet::getShiftCount() const
Chris@336 679 {
Chris@336 680 bool wantShifts = (m_mode == HighQualityMode) && m_fineTuning;
Chris@336 681 int shiftCount = 1;
Chris@336 682 if (wantShifts) {
Chris@336 683 const InstrumentPack &pack(getPack(m_instrument));
Chris@336 684 shiftCount = pack.templateMaxShift * 2 + 1;
Chris@336 685 }
Chris@336 686 return shiftCount;
Chris@336 687 }
Chris@336 688
Chris@302 689 void
Chris@302 690 Silvet::transcribe(const Grid &cqout, Silvet::FeatureSet &fs)
Chris@34 691 {
Chris@32 692 Grid filtered = preProcess(cqout);
Chris@31 693
Chris@302 694 if (filtered.empty()) return;
Chris@170 695
Chris@298 696 const InstrumentPack &pack(getPack(m_instrument));
Chris@104 697
Chris@325 698 int width = filtered.size();
Chris@325 699
Chris@325 700 double silenceThreshold = 0.01;
Chris@325 701
Chris@325 702 for (int i = 0; i < width; ++i) {
Chris@325 703
Chris@325 704 RealTime timestamp = getColumnTimestamp(m_pianoRoll.size() - 1 + i);
Chris@325 705 float inputGain = getInputGainAt(timestamp);
Chris@325 706
Chris@178 707 Feature f;
Chris@325 708 double rms = 0.0;
Chris@325 709
Chris@178 710 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@325 711 double v = filtered[i][j];
Chris@325 712 rms += v * v;
Chris@325 713 f.values.push_back(float(v));
Chris@178 714 }
Chris@325 715
Chris@325 716 rms = sqrt(rms / pack.templateHeight);
Chris@325 717 if (rms / inputGain < silenceThreshold) {
Chris@325 718 filtered[i].clear();
Chris@325 719 }
Chris@325 720
Chris@178 721 fs[m_fcqOutputNo].push_back(f);
Chris@178 722 }
Chris@325 723
Chris@311 724 Grid localPitches(width);
Chris@170 725
Chris@336 726 int shiftCount = getShiftCount();
Chris@336 727 bool wantShifts = (shiftCount > 1);
Chris@170 728
Chris@170 729 vector<vector<int> > localBestShifts;
Chris@170 730 if (wantShifts) {
Chris@311 731 localBestShifts = vector<vector<int> >(width);
Chris@170 732 }
Chris@170 733
Chris@356 734 int emThreadCount = 1;
Chris@356 735
Chris@356 736 #if (defined(MAX_EM_THREADS) && (MAX_EM_THREADS > 1))
Chris@356 737 emThreadCount = MAX_EM_THREADS;
Chris@356 738
Chris@352 739 if (emThreadCount > int(std::thread::hardware_concurrency())) {
Chris@352 740 emThreadCount = std::thread::hardware_concurrency();
Chris@352 741 }
Chris@317 742 if (m_mode == LiveMode && pack.templates.size() == 1) {
Chris@317 743 // The EM step is probably not slow enough to merit it
Chris@317 744 emThreadCount = 1;
Chris@317 745 }
Chris@317 746
Chris@317 747 if (emThreadCount > 1) {
Chris@317 748 for (int i = 0; i < width; ) {
Chris@317 749 typedef future<pair<vector<double>, vector<int>>> EMFuture;
Chris@317 750 vector<EMFuture> results;
Chris@317 751 for (int j = 0; j < emThreadCount && i + j < width; ++j) {
Chris@352 752 const vector<double> &column = filtered.at(i + j);
Chris@317 753 results.push_back
Chris@317 754 (async(std::launch::async,
Chris@352 755 [&]() { return applyEM(pack, column); }));
Chris@317 756 }
Chris@317 757 for (int j = 0; j < emThreadCount && i + j < width; ++j) {
Chris@317 758 auto out = results[j].get();
Chris@317 759 localPitches[i+j] = out.first;
Chris@317 760 if (wantShifts) localBestShifts[i+j] = out.second;
Chris@317 761 }
Chris@317 762 i += emThreadCount;
Chris@312 763 }
Chris@123 764 }
Chris@312 765 #endif
Chris@317 766
Chris@317 767 if (emThreadCount == 1) {
Chris@317 768 for (int i = 0; i < width; ++i) {
Chris@336 769 auto out = applyEM(pack, filtered.at(i));
Chris@317 770 localPitches[i] = out.first;
Chris@317 771 if (wantShifts) localBestShifts[i] = out.second;
Chris@317 772 }
Chris@317 773 }
Chris@305 774
Chris@166 775 for (int i = 0; i < width; ++i) {
Chris@37 776
Chris@321 777 vector<double> filtered;
Chris@321 778
Chris@321 779 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@321 780 m_postFilter[j]->push(localPitches[i][j]);
Chris@321 781 filtered.push_back(m_postFilter[j]->get());
Chris@321 782 }
Chris@294 783
Chris@309 784 RealTime timestamp = getColumnTimestamp(m_pianoRoll.size() - 1);
Chris@309 785 float inputGain = getInputGainAt(timestamp);
Chris@309 786
Chris@294 787 Feature f;
Chris@294 788 for (int j = 0; j < (int)filtered.size(); ++j) {
Chris@309 789 float v = filtered[j];
Chris@294 790 if (v < pack.levelThreshold) v = 0.f;
Chris@309 791 f.values.push_back(v / inputGain);
Chris@294 792 }
Chris@294 793 fs[m_pitchOutputNo].push_back(f);
Chris@309 794
Chris@309 795 f.values.clear();
Chris@309 796 f.values.resize(12);
Chris@309 797 for (int j = 0; j < (int)filtered.size(); ++j) {
Chris@309 798 f.values[j % 12] += filtered[j] / inputGain;
Chris@309 799 }
Chris@309 800 fs[m_chromaOutputNo].push_back(f);
Chris@38 801
Chris@321 802 // This pushes the up-to-max-polyphony activation column to
Chris@321 803 // m_pianoRoll
Chris@336 804 postProcess(filtered, localBestShifts[i]);
Chris@321 805
Chris@336 806 auto events = noteTrack();
Chris@319 807
Chris@336 808 for (const auto &f : events.notes) {
Chris@336 809 fs[m_notesOutputNo].push_back(f);
Chris@40 810 }
Chris@319 811
Chris@336 812 for (const auto &f : events.onsets) {
Chris@336 813 fs[m_onsetsOutputNo].push_back(f);
Chris@336 814 }
Chris@336 815
Chris@336 816 for (const auto &f : events.onOffsets) {
Chris@336 817 fs[m_onOffsetsOutputNo].push_back(f);
Chris@319 818 }
Chris@34 819 }
Chris@31 820 }
Chris@31 821
Chris@311 822 pair<vector<double>, vector<int> >
Chris@311 823 Silvet::applyEM(const InstrumentPack &pack,
Chris@336 824 const vector<double> &column)
Chris@311 825 {
Chris@311 826 double columnThreshold = 1e-5;
Chris@311 827
Chris@314 828 if (m_mode == LiveMode) {
Chris@325 829 columnThreshold /= 15;
Chris@314 830 }
Chris@314 831
Chris@311 832 vector<double> pitches(pack.templateNoteCount, 0.0);
Chris@311 833 vector<int> bestShifts;
Chris@325 834
Chris@325 835 if (column.empty()) return { pitches, bestShifts };
Chris@311 836
Chris@311 837 double sum = 0.0;
Chris@311 838 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@311 839 sum += column.at(j);
Chris@311 840 }
Chris@311 841 if (sum < columnThreshold) return { pitches, bestShifts };
Chris@311 842
Chris@314 843 EM em(&pack, m_mode == HighQualityMode);
Chris@311 844
Chris@311 845 em.setPitchSparsity(pack.pitchSparsity);
Chris@311 846 em.setSourceSparsity(pack.sourceSparsity);
Chris@311 847
Chris@314 848 int iterations = (m_mode == HighQualityMode ? 20 : 10);
Chris@311 849
Chris@311 850 for (int j = 0; j < iterations; ++j) {
Chris@311 851 em.iterate(column.data());
Chris@311 852 }
Chris@311 853
Chris@311 854 const float *pitchDist = em.getPitchDistribution();
Chris@311 855 const float *const *shiftDist = em.getShifts();
Chris@311 856
Chris@336 857 int shiftCount = getShiftCount();
Chris@311 858
Chris@311 859 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@311 860
Chris@311 861 pitches[j] = pitchDist[j] * sum;
Chris@311 862
Chris@311 863 int bestShift = 0;
Chris@311 864 float bestShiftValue = 0.0;
Chris@336 865 if (shiftCount > 1) {
Chris@311 866 for (int k = 0; k < shiftCount; ++k) {
Chris@311 867 float value = shiftDist[k][j];
Chris@311 868 if (k == 0 || value > bestShiftValue) {
Chris@311 869 bestShiftValue = value;
Chris@311 870 bestShift = k;
Chris@311 871 }
Chris@311 872 }
Chris@311 873 bestShifts.push_back(bestShift);
Chris@311 874 }
Chris@311 875 }
Chris@311 876
Chris@311 877 return { pitches, bestShifts };
Chris@311 878 }
Chris@311 879
Chris@32 880 Silvet::Grid
Chris@32 881 Silvet::preProcess(const Grid &in)
Chris@32 882 {
Chris@32 883 int width = in.size();
Chris@32 884
Chris@165 885 int spacing = processingSampleRate / m_colsPerSec;
Chris@32 886
Chris@165 887 // need to be careful that col spacing is an integer number of samples!
Chris@165 888 assert(spacing * m_colsPerSec == processingSampleRate);
Chris@32 889
Chris@32 890 Grid out;
Chris@32 891
Chris@58 892 // We count the CQ latency in terms of processing hops, but
Chris@58 893 // actually it probably isn't an exact number of hops so this
Chris@58 894 // isn't quite accurate. But the small constant offset is
Chris@165 895 // practically irrelevant compared to the jitter from the frame
Chris@165 896 // size we reduce to in a moment
Chris@33 897 int latentColumns = m_cq->getLatency() / m_cq->getColumnHop();
Chris@33 898
Chris@298 899 const InstrumentPack &pack(getPack(m_instrument));
Chris@176 900
Chris@32 901 for (int i = 0; i < width; ++i) {
Chris@32 902
Chris@33 903 if (m_columnCount < latentColumns) {
Chris@33 904 ++m_columnCount;
Chris@33 905 continue;
Chris@33 906 }
Chris@33 907
Chris@32 908 int prevSampleNo = (m_columnCount - 1) * m_cq->getColumnHop();
Chris@32 909 int sampleNo = m_columnCount * m_cq->getColumnHop();
Chris@32 910
Chris@32 911 bool select = (sampleNo / spacing != prevSampleNo / spacing);
Chris@32 912
Chris@32 913 if (select) {
Chris@32 914 vector<double> inCol = in[i];
Chris@176 915 vector<double> outCol(pack.templateHeight);
Chris@32 916
Chris@178 917 // In HQ mode, the CQ returns 600 bins and we ignore the
Chris@298 918 // lowest 55 of them (assuming binsPerSemitone == 5).
Chris@178 919 //
Chris@341 920 // In live mode the CQ is an octave shorter, returning 540
Chris@341 921 // bins or equivalent, so we instead pad them with an
Chris@341 922 // additional 5 or equivalent zeros.
Chris@178 923 //
Chris@178 924 // We also need to reverse the column as we go, since the
Chris@178 925 // raw CQ has the high frequencies first and we need it
Chris@178 926 // the other way around.
Chris@32 927
Chris@298 928 int bps = (m_mode == LiveMode ?
Chris@298 929 binsPerSemitoneLive : binsPerSemitoneNormal);
Chris@298 930
Chris@297 931 if (m_mode == HighQualityMode) {
Chris@178 932 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@298 933 int ix = inCol.size() - j - (11 * bps);
Chris@178 934 outCol[j] = inCol[ix];
Chris@178 935 }
Chris@178 936 } else {
Chris@298 937 for (int j = 0; j < bps; ++j) {
Chris@178 938 outCol[j] = 0.0;
Chris@178 939 }
Chris@298 940 for (int j = bps; j < pack.templateHeight; ++j) {
Chris@298 941 int ix = inCol.size() - j + (bps-1);
Chris@178 942 outCol[j] = inCol[ix];
Chris@178 943 }
Chris@46 944 }
Chris@32 945
Chris@46 946 vector<double> noiseLevel1 =
Chris@298 947 MedianFilter<double>::filter(8 * bps, outCol);
Chris@176 948 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@46 949 noiseLevel1[j] = std::min(outCol[j], noiseLevel1[j]);
Chris@46 950 }
Chris@32 951
Chris@46 952 vector<double> noiseLevel2 =
Chris@298 953 MedianFilter<double>::filter(8 * bps, noiseLevel1);
Chris@176 954 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@46 955 outCol[j] = std::max(outCol[j] - noiseLevel2[j], 0.0);
Chris@32 956 }
Chris@32 957
Chris@165 958 out.push_back(outCol);
Chris@32 959 }
Chris@32 960
Chris@32 961 ++m_columnCount;
Chris@32 962 }
Chris@32 963
Chris@32 964 return out;
Chris@32 965 }
Chris@32 966
Chris@321 967 void
Chris@170 968 Silvet::postProcess(const vector<double> &pitches,
Chris@336 969 const vector<int> &bestShifts)
Chris@166 970 {
Chris@298 971 const InstrumentPack &pack(getPack(m_instrument));
Chris@176 972
Chris@41 973 // Threshold for level and reduce number of candidate pitches
Chris@41 974
Chris@41 975 typedef std::multimap<double, int> ValueIndexMap;
Chris@41 976
Chris@41 977 ValueIndexMap strengths;
Chris@166 978
Chris@176 979 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@321 980
Chris@321 981 double strength = pitches[j];
Chris@183 982 if (strength < pack.levelThreshold) continue;
Chris@321 983
Chris@321 984 // In live mode with only a 12-bpo CQ, we are very likely to
Chris@321 985 // get clusters of two or three high scores at a time for
Chris@321 986 // neighbouring semitones. Eliminate these by picking only the
Chris@325 987 // peaks (except that we never eliminate a note that has
Chris@325 988 // already been established as currently playing). This means
Chris@325 989 // we can't recognise actual semitone chords if they ever
Chris@325 990 // appear, but it's not as if live mode is good enough for
Chris@325 991 // that to be a big deal anyway.
Chris@321 992 if (m_mode == LiveMode) {
Chris@325 993 if (m_current.find(j) == m_current.end() &&
Chris@325 994 (j == 0 ||
Chris@325 995 j + 1 == pack.templateNoteCount ||
Chris@325 996 pitches[j] < pitches[j-1] ||
Chris@325 997 pitches[j] < pitches[j+1])) {
Chris@325 998 // not a peak or a currently-playing note: skip it
Chris@321 999 continue;
Chris@321 1000 }
Chris@321 1001 }
Chris@323 1002
Chris@168 1003 strengths.insert(ValueIndexMap::value_type(strength, j));
Chris@168 1004 }
Chris@166 1005
Chris@168 1006 ValueIndexMap::const_iterator si = strengths.end();
Chris@167 1007
Chris@168 1008 map<int, double> active;
Chris@168 1009 map<int, int> activeShifts;
Chris@168 1010
Chris@336 1011 int shiftCount = getShiftCount();
Chris@336 1012
Chris@183 1013 while (int(active.size()) < pack.maxPolyphony && si != strengths.begin()) {
Chris@168 1014
Chris@168 1015 --si;
Chris@168 1016
Chris@168 1017 double strength = si->first;
Chris@168 1018 int j = si->second;
Chris@168 1019
Chris@168 1020 active[j] = strength;
Chris@168 1021
Chris@336 1022 if (shiftCount > 1) {
Chris@170 1023 activeShifts[j] = bestShifts[j];
Chris@167 1024 }
Chris@41 1025 }
Chris@41 1026
Chris@168 1027 m_pianoRoll.push_back(active);
Chris@170 1028
Chris@336 1029 if (shiftCount > 1) {
Chris@168 1030 m_pianoRollShifts.push_back(activeShifts);
Chris@41 1031 }
Chris@294 1032
Chris@321 1033 return;
Chris@166 1034 }
Chris@166 1035
Chris@336 1036 Silvet::FeatureChunk
Chris@336 1037 Silvet::noteTrack()
Chris@166 1038 {
Chris@41 1039 // Minimum duration pruning, and conversion to notes. We can only
Chris@41 1040 // report notes that have just ended (i.e. that are absent in the
Chris@168 1041 // latest active set but present in the prior set in the piano
Chris@41 1042 // roll) -- any notes that ended earlier will have been reported
Chris@41 1043 // already, and if they haven't ended, we don't know their
Chris@41 1044 // duration.
Chris@41 1045
Chris@168 1046 int width = m_pianoRoll.size() - 1;
Chris@168 1047
Chris@168 1048 const map<int, double> &active = m_pianoRoll[width];
Chris@41 1049
Chris@165 1050 double columnDuration = 1.0 / m_colsPerSec;
Chris@165 1051
Chris@165 1052 // only keep notes >= 100ms or thereabouts
Chris@323 1053 double durationThrSec = 0.1;
Chris@323 1054 int durationThreshold = floor(durationThrSec / columnDuration); // in cols
Chris@165 1055 if (durationThreshold < 1) durationThreshold = 1;
Chris@41 1056
Chris@336 1057 FeatureList noteFeatures, onsetFeatures, onOffsetFeatures;
Chris@41 1058
Chris@41 1059 if (width < durationThreshold + 1) {
Chris@336 1060 return { noteFeatures, onsetFeatures, onOffsetFeatures };
Chris@41 1061 }
Chris@357 1062
Chris@357 1063 // Make a copy of the latest column. We need a copy because it is
Chris@357 1064 // possible we may erase from the "live" column within the loop.
Chris@357 1065 map<int, double> latest = m_pianoRoll[width-1];
Chris@41 1066
Chris@357 1067 for (const auto &ni: latest) {
Chris@41 1068
Chris@357 1069 int note = ni.first;
Chris@41 1070
Chris@41 1071 int end = width;
Chris@41 1072 int start = end-1;
Chris@41 1073
Chris@41 1074 while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) {
Chris@41 1075 --start;
Chris@41 1076 }
Chris@41 1077 ++start;
Chris@41 1078
Chris@319 1079 int duration = end - start;
Chris@319 1080
Chris@319 1081 if (duration < durationThreshold) {
Chris@41 1082 continue;
Chris@41 1083 }
Chris@41 1084
Chris@319 1085 if (duration == durationThreshold) {
Chris@325 1086 m_current.insert(note);
Chris@336 1087 emitOnset(start, note, onsetFeatures);
Chris@336 1088 emitOnset(start, note, onOffsetFeatures);
Chris@319 1089 }
Chris@319 1090
Chris@319 1091 if (active.find(note) == active.end()) {
Chris@319 1092 // the note was playing but just ended
Chris@325 1093 m_current.erase(note);
Chris@343 1094 emitNoteAndOffset(start, end, note, noteFeatures, onOffsetFeatures);
Chris@334 1095 } else { // still playing
Chris@334 1096 // repeated note detection: if level is greater than this
Chris@334 1097 // multiple of its previous value, then we end the note and
Chris@334 1098 // restart it with the same pitch
Chris@334 1099 double restartFactor = 1.5;
Chris@334 1100 if (duration >= durationThreshold * 2 &&
Chris@334 1101 (active.find(note)->second >
Chris@334 1102 restartFactor * m_pianoRoll[width-1][note])) {
Chris@334 1103 m_current.erase(note);
Chris@343 1104 emitNoteAndOffset(start, end-1, note, noteFeatures, onOffsetFeatures);
Chris@334 1105 // and remove this so that we start counting the new
Chris@357 1106 // note's duration from the current position. (This
Chris@357 1107 // erase is why we needed to copy this column at the
Chris@357 1108 // top of the loop.)
Chris@334 1109 m_pianoRoll[width-1].erase(note);
Chris@334 1110 }
Chris@319 1111 }
Chris@41 1112 }
Chris@41 1113
Chris@62 1114 // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl;
Chris@41 1115
Chris@336 1116 return { noteFeatures, onsetFeatures, onOffsetFeatures };
Chris@41 1117 }
Chris@41 1118
Chris@169 1119 void
Chris@343 1120 Silvet::emitNoteAndOffset(int start, int end, int note,
Chris@343 1121 FeatureList &noteFeatures,
Chris@343 1122 FeatureList &onOffsetFeatures)
Chris@169 1123 {
Chris@343 1124 // Emit the complete note-event feature, and its offset. We have
Chris@343 1125 // already emitted the note onset when it started -- that process
Chris@343 1126 // is separated out in order to get a faster response during live
Chris@343 1127 // tracking. However, if the note shift changes within the note
Chris@343 1128 // (which can happen only if we have fine-tuning switched on), we
Chris@343 1129 // emit an offset and then a new onset with the new shift.
Chris@343 1130
Chris@169 1131 int partStart = start;
Chris@169 1132 int partShift = 0;
Chris@320 1133 double partStrength = 0;
Chris@169 1134
Chris@343 1135 // NB this *must* be less than durationThreshold above
Chris@252 1136 int partThreshold = floor(0.05 * m_colsPerSec);
Chris@169 1137
Chris@169 1138 for (int i = start; i != end; ++i) {
Chris@169 1139
Chris@169 1140 double strength = m_pianoRoll[i][note];
Chris@169 1141
Chris@169 1142 int shift = 0;
Chris@169 1143
Chris@336 1144 if (getShiftCount() > 1) {
Chris@169 1145
Chris@169 1146 shift = m_pianoRollShifts[i][note];
Chris@169 1147
Chris@169 1148 if (i == partStart) {
Chris@169 1149 partShift = shift;
Chris@169 1150 }
Chris@169 1151
Chris@169 1152 if (i > partStart + partThreshold && shift != partShift) {
Chris@169 1153
Chris@169 1154 // pitch has changed, emit an intermediate note
Chris@252 1155 noteFeatures.push_back(makeNoteFeature(partStart,
Chris@252 1156 i,
Chris@252 1157 note,
Chris@252 1158 partShift,
Chris@320 1159 partStrength));
Chris@343 1160
Chris@343 1161 onOffsetFeatures.push_back(makeOffsetFeature(i,
Chris@343 1162 note,
Chris@343 1163 partShift));
Chris@343 1164
Chris@169 1165 partStart = i;
Chris@169 1166 partShift = shift;
Chris@343 1167
Chris@343 1168 onOffsetFeatures.push_back(makeOnsetFeature(i,
Chris@343 1169 note,
Chris@343 1170 partShift,
Chris@343 1171 partStrength));
Chris@343 1172
Chris@320 1173 partStrength = 0;
Chris@169 1174 }
Chris@169 1175 }
Chris@169 1176
Chris@320 1177 if (strength > partStrength) {
Chris@320 1178 partStrength = strength;
Chris@169 1179 }
Chris@169 1180 }
Chris@169 1181
Chris@169 1182 if (end >= partStart + partThreshold) {
Chris@343 1183
Chris@252 1184 noteFeatures.push_back(makeNoteFeature(partStart,
Chris@252 1185 end,
Chris@252 1186 note,
Chris@252 1187 partShift,
Chris@320 1188 partStrength));
Chris@343 1189
Chris@343 1190 onOffsetFeatures.push_back(makeOffsetFeature(end,
Chris@343 1191 note,
Chris@343 1192 partShift));
Chris@349 1193
Chris@349 1194 } else if (partStart > start) {
Chris@349 1195
Chris@349 1196 // we have emitted an onset for this, so must add an offset
Chris@349 1197 onOffsetFeatures.push_back(makeOffsetFeature(end,
Chris@349 1198 note,
Chris@349 1199 partShift));
Chris@169 1200 }
Chris@169 1201 }
Chris@252 1202
Chris@319 1203 void
Chris@336 1204 Silvet::emitOnset(int start, int note, FeatureList &onOffsetFeatures)
Chris@319 1205 {
Chris@319 1206 int len = int(m_pianoRoll.size());
Chris@320 1207
Chris@320 1208 double onsetStrength = 0;
Chris@319 1209
Chris@319 1210 int shift = 0;
Chris@336 1211 if (getShiftCount() > 1) {
Chris@319 1212 shift = m_pianoRollShifts[start][note];
Chris@319 1213 }
Chris@319 1214
Chris@319 1215 for (int i = start; i < len; ++i) {
Chris@319 1216 double strength = m_pianoRoll[i][note];
Chris@320 1217 if (strength > onsetStrength) {
Chris@320 1218 onsetStrength = strength;
Chris@319 1219 }
Chris@319 1220 }
Chris@319 1221
Chris@336 1222 if (onsetStrength == 0) return;
Chris@336 1223
Chris@336 1224 onOffsetFeatures.push_back(makeOnsetFeature(start,
Chris@336 1225 note,
Chris@336 1226 shift,
Chris@336 1227 onsetStrength));
Chris@336 1228 }
Chris@336 1229
Chris@309 1230 RealTime
Chris@309 1231 Silvet::getColumnTimestamp(int column)
Chris@309 1232 {
Chris@309 1233 double columnDuration = 1.0 / m_colsPerSec;
Chris@309 1234 int postFilterLatency = int(m_postFilter[0]->getSize() / 2);
Chris@309 1235
Chris@309 1236 return m_startTime + RealTime::fromSeconds
Chris@309 1237 (columnDuration * (column - postFilterLatency) + 0.02);
Chris@309 1238 }
Chris@309 1239
Chris@252 1240 Silvet::Feature
Chris@252 1241 Silvet::makeNoteFeature(int start,
Chris@252 1242 int end,
Chris@252 1243 int note,
Chris@252 1244 int shift,
Chris@320 1245 double strength)
Chris@252 1246 {
Chris@252 1247 Feature f;
Chris@252 1248
Chris@252 1249 f.hasTimestamp = true;
Chris@309 1250 f.timestamp = getColumnTimestamp(start);
Chris@252 1251
Chris@252 1252 f.hasDuration = true;
Chris@309 1253 f.duration = getColumnTimestamp(end) - f.timestamp;
Chris@252 1254
Chris@252 1255 f.values.clear();
Chris@336 1256 f.values.push_back(getNoteFrequency(note, shift));
Chris@320 1257 f.values.push_back(getVelocityFor(strength, start));
Chris@252 1258
Chris@336 1259 f.label = getNoteName(note, shift);
Chris@252 1260
Chris@252 1261 return f;
Chris@252 1262 }
Chris@252 1263
Chris@319 1264 Silvet::Feature
Chris@319 1265 Silvet::makeOnsetFeature(int start,
Chris@319 1266 int note,
Chris@319 1267 int shift,
Chris@320 1268 double strength)
Chris@319 1269 {
Chris@319 1270 Feature f;
Chris@319 1271
Chris@319 1272 f.hasTimestamp = true;
Chris@319 1273 f.timestamp = getColumnTimestamp(start);
Chris@319 1274
Chris@319 1275 f.hasDuration = false;
Chris@319 1276
Chris@319 1277 f.values.clear();
Chris@336 1278 f.values.push_back(getNoteFrequency(note, shift));
Chris@320 1279 f.values.push_back(getVelocityFor(strength, start));
Chris@319 1280
Chris@336 1281 f.label = getNoteName(note, shift);
Chris@336 1282
Chris@336 1283 return f;
Chris@336 1284 }
Chris@336 1285
Chris@336 1286 Silvet::Feature
Chris@336 1287 Silvet::makeOffsetFeature(int col,
Chris@336 1288 int note,
Chris@336 1289 int shift)
Chris@336 1290 {
Chris@336 1291 Feature f;
Chris@336 1292
Chris@336 1293 f.hasTimestamp = true;
Chris@336 1294 f.timestamp = getColumnTimestamp(col);
Chris@336 1295
Chris@336 1296 f.hasDuration = false;
Chris@336 1297
Chris@336 1298 f.values.clear();
Chris@336 1299 f.values.push_back(getNoteFrequency(note, shift));
Chris@336 1300 f.values.push_back(0); // velocity 0 for offset
Chris@336 1301
Chris@336 1302 f.label = getNoteName(note, shift) + " off";
Chris@319 1303
Chris@319 1304 return f;
Chris@319 1305 }
Chris@319 1306
Chris@320 1307 int
Chris@320 1308 Silvet::getVelocityFor(double strength, int column)
Chris@320 1309 {
Chris@320 1310 RealTime rt = getColumnTimestamp(column + 1);
Chris@320 1311
Chris@320 1312 float inputGain = getInputGainAt(rt);
Chris@320 1313
Chris@320 1314 double scale = 2.0;
Chris@320 1315 if (m_mode == LiveMode) scale = 20.0;
Chris@320 1316
Chris@320 1317 double velocity = round((strength * scale) / inputGain);
Chris@320 1318
Chris@320 1319 if (velocity > 127.0) velocity = 127.0;
Chris@320 1320 if (velocity < 1.0) velocity = 1.0; // assume surpassed 0 threshold already
Chris@320 1321
Chris@320 1322 return int(velocity);
Chris@320 1323 }
Chris@320 1324
Chris@252 1325 float
Chris@252 1326 Silvet::getInputGainAt(RealTime t)
Chris@252 1327 {
Chris@252 1328 map<RealTime, float>::const_iterator i = m_inputGains.lower_bound(t);
Chris@252 1329
Chris@252 1330 if (i == m_inputGains.end()) {
Chris@252 1331 if (i != m_inputGains.begin()) {
Chris@252 1332 --i;
Chris@252 1333 } else {
Chris@252 1334 return 1.f; // no data
Chris@252 1335 }
Chris@252 1336 }
Chris@252 1337
Chris@252 1338 // cerr << "gain at time " << t << " = " << i->second << endl;
Chris@252 1339
Chris@252 1340 return i->second;
Chris@252 1341 }
Chris@252 1342