annotate src/Silvet.cpp @ 349:071fd5e7b168 mirex2015

Be sure to emit offsets even after very short note-part
author Chris Cannam
date Wed, 12 Aug 2015 17:27:51 +0100
parents eee4c7fd15ab
children a3fc6e1f2d4e
rev   line source
Chris@31 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@31 2
Chris@31 3 /*
Chris@31 4 Silvet
Chris@31 5
Chris@31 6 A Vamp plugin for note transcription.
Chris@31 7 Centre for Digital Music, Queen Mary University of London.
Chris@31 8
Chris@31 9 This program is free software; you can redistribute it and/or
Chris@31 10 modify it under the terms of the GNU General Public License as
Chris@31 11 published by the Free Software Foundation; either version 2 of the
Chris@31 12 License, or (at your option) any later version. See the file
Chris@31 13 COPYING included with this distribution for more information.
Chris@31 14 */
Chris@31 15
Chris@31 16 #include "Silvet.h"
Chris@34 17 #include "EM.h"
Chris@31 18
Chris@152 19 #include <cq/CQSpectrogram.h>
Chris@31 20
Chris@152 21 #include "MedianFilter.h"
Chris@152 22 #include "constant-q-cpp/src/dsp/Resampler.h"
Chris@246 23 #include "flattendynamics-ladspa.h"
Chris@298 24 #include "LiveInstruments.h"
Chris@31 25
Chris@31 26 #include <vector>
Chris@312 27 #include <future>
Chris@31 28
Chris@32 29 #include <cstdio>
Chris@32 30
Chris@31 31 using std::vector;
Chris@48 32 using std::cout;
Chris@31 33 using std::cerr;
Chris@31 34 using std::endl;
Chris@311 35 using std::pair;
Chris@312 36 using std::future;
Chris@312 37 using std::async;
Chris@40 38 using Vamp::RealTime;
Chris@31 39
Chris@31 40 static int processingSampleRate = 44100;
Chris@298 41
Chris@298 42 static int binsPerSemitoneLive = 1;
Chris@298 43 static int binsPerSemitoneNormal = 5;
Chris@170 44
Chris@272 45 static int minInputSampleRate = 100;
Chris@272 46 static int maxInputSampleRate = 192000;
Chris@272 47
Chris@316 48 static const Silvet::ProcessingMode defaultMode = Silvet::HighQualityMode;
Chris@316 49
Chris@31 50 Silvet::Silvet(float inputSampleRate) :
Chris@31 51 Plugin(inputSampleRate),
Chris@161 52 m_instruments(InstrumentPack::listInstrumentPacks()),
Chris@298 53 m_liveInstruments(LiveAdapter::adaptAll(m_instruments)),
Chris@31 54 m_resampler(0),
Chris@246 55 m_flattener(0),
Chris@110 56 m_cq(0),
Chris@316 57 m_mode(defaultMode),
Chris@166 58 m_fineTuning(false),
Chris@178 59 m_instrument(0),
Chris@313 60 m_colsPerSec(50),
Chris@313 61 m_haveStartTime(false)
Chris@31 62 {
Chris@31 63 }
Chris@31 64
Chris@31 65 Silvet::~Silvet()
Chris@31 66 {
Chris@31 67 delete m_resampler;
Chris@246 68 delete m_flattener;
Chris@31 69 delete m_cq;
Chris@41 70 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
Chris@41 71 delete m_postFilter[i];
Chris@41 72 }
Chris@31 73 }
Chris@31 74
Chris@31 75 string
Chris@31 76 Silvet::getIdentifier() const
Chris@31 77 {
Chris@31 78 return "silvet";
Chris@31 79 }
Chris@31 80
Chris@31 81 string
Chris@31 82 Silvet::getName() const
Chris@31 83 {
Chris@31 84 return "Silvet Note Transcription";
Chris@31 85 }
Chris@31 86
Chris@31 87 string
Chris@31 88 Silvet::getDescription() const
Chris@31 89 {
Chris@191 90 return "Estimate the note onsets, pitches, and durations that make up a music recording.";
Chris@31 91 }
Chris@31 92
Chris@31 93 string
Chris@31 94 Silvet::getMaker() const
Chris@31 95 {
Chris@191 96 return "Queen Mary, University of London";
Chris@31 97 }
Chris@31 98
Chris@31 99 int
Chris@31 100 Silvet::getPluginVersion() const
Chris@31 101 {
Chris@309 102 return 3;
Chris@31 103 }
Chris@31 104
Chris@31 105 string
Chris@31 106 Silvet::getCopyright() const
Chris@31 107 {
Chris@191 108 return "Method by Emmanouil Benetos and Simon Dixon; plugin by Chris Cannam and Emmanouil Benetos. GPL licence.";
Chris@31 109 }
Chris@31 110
Chris@31 111 Silvet::InputDomain
Chris@31 112 Silvet::getInputDomain() const
Chris@31 113 {
Chris@31 114 return TimeDomain;
Chris@31 115 }
Chris@31 116
Chris@31 117 size_t
Chris@31 118 Silvet::getPreferredBlockSize() const
Chris@31 119 {
Chris@31 120 return 0;
Chris@31 121 }
Chris@31 122
Chris@31 123 size_t
Chris@31 124 Silvet::getPreferredStepSize() const
Chris@31 125 {
Chris@31 126 return 0;
Chris@31 127 }
Chris@31 128
Chris@31 129 size_t
Chris@31 130 Silvet::getMinChannelCount() const
Chris@31 131 {
Chris@31 132 return 1;
Chris@31 133 }
Chris@31 134
Chris@31 135 size_t
Chris@31 136 Silvet::getMaxChannelCount() const
Chris@31 137 {
Chris@31 138 return 1;
Chris@31 139 }
Chris@31 140
Chris@31 141 Silvet::ParameterList
Chris@31 142 Silvet::getParameterDescriptors() const
Chris@31 143 {
Chris@31 144 ParameterList list;
Chris@110 145
Chris@110 146 ParameterDescriptor desc;
Chris@110 147 desc.identifier = "mode";
Chris@110 148 desc.name = "Processing mode";
Chris@110 149 desc.unit = "";
Chris@341 150 desc.description = "Sets the tradeoff of processing speed against transcription quality. Live mode is much faster and detects notes with relatively low latency; Intensive mode (the default) is slower but will almost always produce better results.";
Chris@110 151 desc.minValue = 0;
Chris@344 152 desc.maxValue = 1;
Chris@316 153 desc.defaultValue = int(defaultMode);
Chris@110 154 desc.isQuantized = true;
Chris@110 155 desc.quantizeStep = 1;
Chris@341 156 desc.valueNames.push_back("Live (faster and lower latency)");
Chris@165 157 desc.valueNames.push_back("Intensive (higher quality)");
Chris@161 158 list.push_back(desc);
Chris@161 159
Chris@176 160 desc.identifier = "instrument";
Chris@176 161 desc.name = "Instrument";
Chris@161 162 desc.unit = "";
Chris@271 163 desc.description = "The instrument or instruments known to be present in the recording. This affects the set of instrument templates used, as well as the expected level of polyphony in the output. Using a more limited set of instruments than the default will also make the plugin run faster.\nNote that this plugin cannot isolate instruments: you can't use this setting to request notes from only one instrument in a recording with several. Instead, use this as a hint to the plugin about which instruments are actually present.";
Chris@161 164 desc.minValue = 0;
Chris@162 165 desc.maxValue = m_instruments.size()-1;
Chris@162 166 desc.defaultValue = 0;
Chris@161 167 desc.isQuantized = true;
Chris@161 168 desc.quantizeStep = 1;
Chris@161 169 desc.valueNames.clear();
Chris@162 170 for (int i = 0; i < int(m_instruments.size()); ++i) {
Chris@162 171 desc.valueNames.push_back(m_instruments[i].name);
Chris@162 172 }
Chris@166 173 list.push_back(desc);
Chris@161 174
Chris@166 175 desc.identifier = "finetune";
Chris@166 176 desc.name = "Return fine pitch estimates";
Chris@166 177 desc.unit = "";
Chris@271 178 desc.description = "Return pitch estimates at finer than semitone resolution. This works only in Intensive mode. Notes that appear to drift in pitch will be split up into shorter notes with individually finer pitches.";
Chris@166 179 desc.minValue = 0;
Chris@166 180 desc.maxValue = 1;
Chris@166 181 desc.defaultValue = 0;
Chris@166 182 desc.isQuantized = true;
Chris@166 183 desc.quantizeStep = 1;
Chris@166 184 desc.valueNames.clear();
Chris@110 185 list.push_back(desc);
Chris@110 186
Chris@31 187 return list;
Chris@31 188 }
Chris@31 189
Chris@31 190 float
Chris@31 191 Silvet::getParameter(string identifier) const
Chris@31 192 {
Chris@110 193 if (identifier == "mode") {
Chris@297 194 return (float)(int)m_mode;
Chris@166 195 } else if (identifier == "finetune") {
Chris@166 196 return m_fineTuning ? 1.f : 0.f;
Chris@176 197 } else if (identifier == "instrument") {
Chris@162 198 return m_instrument;
Chris@110 199 }
Chris@31 200 return 0;
Chris@31 201 }
Chris@31 202
Chris@31 203 void
Chris@31 204 Silvet::setParameter(string identifier, float value)
Chris@31 205 {
Chris@110 206 if (identifier == "mode") {
Chris@297 207 m_mode = (ProcessingMode)(int)(value + 0.5);
Chris@166 208 } else if (identifier == "finetune") {
Chris@166 209 m_fineTuning = (value > 0.5);
Chris@176 210 } else if (identifier == "instrument") {
Chris@162 211 m_instrument = lrintf(value);
Chris@110 212 }
Chris@31 213 }
Chris@31 214
Chris@31 215 Silvet::ProgramList
Chris@31 216 Silvet::getPrograms() const
Chris@31 217 {
Chris@31 218 ProgramList list;
Chris@31 219 return list;
Chris@31 220 }
Chris@31 221
Chris@31 222 string
Chris@31 223 Silvet::getCurrentProgram() const
Chris@31 224 {
Chris@31 225 return "";
Chris@31 226 }
Chris@31 227
Chris@31 228 void
Chris@31 229 Silvet::selectProgram(string name)
Chris@31 230 {
Chris@31 231 }
Chris@31 232
Chris@31 233 Silvet::OutputList
Chris@31 234 Silvet::getOutputDescriptors() const
Chris@31 235 {
Chris@31 236 OutputList list;
Chris@31 237
Chris@31 238 OutputDescriptor d;
Chris@51 239 d.identifier = "notes";
Chris@51 240 d.name = "Note transcription";
Chris@329 241 d.description = "Overall note transcription. Each note has time, duration, estimated fundamental frequency, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture.";
Chris@41 242 d.unit = "Hz";
Chris@31 243 d.hasFixedBinCount = true;
Chris@31 244 d.binCount = 2;
Chris@41 245 d.binNames.push_back("Frequency");
Chris@31 246 d.binNames.push_back("Velocity");
Chris@31 247 d.hasKnownExtents = false;
Chris@31 248 d.isQuantized = false;
Chris@31 249 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@246 250 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
Chris@31 251 d.hasDuration = true;
Chris@32 252 m_notesOutputNo = list.size();
Chris@32 253 list.push_back(d);
Chris@32 254
Chris@319 255 d.identifier = "onsets";
Chris@319 256 d.name = "Note onsets";
Chris@323 257 d.description = "Note onsets, without durations. These can be calculated sooner than complete notes, because it isn't necessary to wait for a note to finish before returning its feature. Each event has time, estimated fundamental frequency in Hz, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture.";
Chris@319 258 d.unit = "Hz";
Chris@319 259 d.hasFixedBinCount = true;
Chris@319 260 d.binCount = 2;
Chris@319 261 d.binNames.push_back("Frequency");
Chris@319 262 d.binNames.push_back("Velocity");
Chris@319 263 d.hasKnownExtents = false;
Chris@319 264 d.isQuantized = false;
Chris@319 265 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@319 266 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
Chris@319 267 d.hasDuration = false;
Chris@319 268 m_onsetsOutputNo = list.size();
Chris@319 269 list.push_back(d);
Chris@319 270
Chris@336 271 d.identifier = "onoffsets";
Chris@336 272 d.name = "Note onsets and offsets";
Chris@336 273 d.description = "Note onsets and offsets as separate events. Each onset event has time, estimated fundamental frequency in Hz, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture. Offsets are represented in the same way but with a velocity of 0.";
Chris@336 274 d.unit = "Hz";
Chris@336 275 d.hasFixedBinCount = true;
Chris@336 276 d.binCount = 2;
Chris@336 277 d.binNames.push_back("Frequency");
Chris@336 278 d.binNames.push_back("Velocity");
Chris@336 279 d.hasKnownExtents = false;
Chris@336 280 d.isQuantized = false;
Chris@336 281 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@336 282 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
Chris@336 283 d.hasDuration = false;
Chris@336 284 m_onOffsetsOutputNo = list.size();
Chris@336 285 list.push_back(d);
Chris@336 286
Chris@178 287 d.identifier = "timefreq";
Chris@178 288 d.name = "Time-frequency distribution";
Chris@271 289 d.description = "Filtered constant-Q time-frequency distribution as used as input to the expectation-maximisation algorithm.";
Chris@178 290 d.unit = "";
Chris@178 291 d.hasFixedBinCount = true;
Chris@298 292 d.binCount = getPack(0).templateHeight;
Chris@178 293 d.binNames.clear();
Chris@178 294 if (m_cq) {
Chris@294 295 char name[50];
Chris@298 296 for (int i = 0; i < getPack(0).templateHeight; ++i) {
Chris@178 297 // We have a 600-bin (10 oct 60-bin CQ) of which the
Chris@178 298 // lowest-frequency 55 bins have been dropped, for a
Chris@178 299 // 545-bin template. The native CQ bins go high->low
Chris@178 300 // frequency though, so these are still the first 545 bins
Chris@178 301 // as reported by getBinFrequency, though in reverse order
Chris@178 302 float freq = m_cq->getBinFrequency
Chris@298 303 (getPack(0).templateHeight - i - 1);
Chris@178 304 sprintf(name, "%.1f Hz", freq);
Chris@178 305 d.binNames.push_back(name);
Chris@178 306 }
Chris@178 307 }
Chris@178 308 d.hasKnownExtents = false;
Chris@178 309 d.isQuantized = false;
Chris@178 310 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@178 311 d.sampleRate = m_colsPerSec;
Chris@178 312 d.hasDuration = false;
Chris@178 313 m_fcqOutputNo = list.size();
Chris@178 314 list.push_back(d);
Chris@178 315
Chris@294 316 d.identifier = "pitchactivation";
Chris@294 317 d.name = "Pitch activation distribution";
Chris@294 318 d.description = "Pitch activation distribution resulting from expectation-maximisation algorithm, prior to note extraction.";
Chris@294 319 d.unit = "";
Chris@294 320 d.hasFixedBinCount = true;
Chris@298 321 d.binCount = getPack(0).templateNoteCount;
Chris@294 322 d.binNames.clear();
Chris@294 323 if (m_cq) {
Chris@298 324 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
Chris@336 325 d.binNames.push_back(getNoteName(i, 0));
Chris@294 326 }
Chris@294 327 }
Chris@294 328 d.hasKnownExtents = false;
Chris@294 329 d.isQuantized = false;
Chris@294 330 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@294 331 d.sampleRate = m_colsPerSec;
Chris@294 332 d.hasDuration = false;
Chris@294 333 m_pitchOutputNo = list.size();
Chris@294 334 list.push_back(d);
Chris@294 335
Chris@309 336 d.identifier = "chroma";
Chris@309 337 d.name = "Pitch chroma distribution";
Chris@309 338 d.description = "Pitch chroma distribution formed by wrapping the un-thresholded pitch activation distribution into a single octave of semitone bins.";
Chris@309 339 d.unit = "";
Chris@309 340 d.hasFixedBinCount = true;
Chris@309 341 d.binCount = 12;
Chris@309 342 d.binNames.clear();
Chris@309 343 if (m_cq) {
Chris@309 344 for (int i = 0; i < 12; ++i) {
Chris@320 345 d.binNames.push_back(getChromaName(i));
Chris@309 346 }
Chris@309 347 }
Chris@309 348 d.hasKnownExtents = false;
Chris@309 349 d.isQuantized = false;
Chris@309 350 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@309 351 d.sampleRate = m_colsPerSec;
Chris@309 352 d.hasDuration = false;
Chris@309 353 m_chromaOutputNo = list.size();
Chris@309 354 list.push_back(d);
Chris@309 355
Chris@302 356 d.identifier = "templates";
Chris@302 357 d.name = "Templates";
Chris@302 358 d.description = "Constant-Q spectral templates for the selected instrument pack.";
Chris@302 359 d.unit = "";
Chris@302 360 d.hasFixedBinCount = true;
Chris@302 361 d.binCount = getPack(0).templateHeight;
Chris@302 362 d.binNames.clear();
Chris@302 363 if (m_cq) {
Chris@302 364 char name[50];
Chris@302 365 for (int i = 0; i < getPack(0).templateHeight; ++i) {
Chris@302 366 // We have a 600-bin (10 oct 60-bin CQ) of which the
Chris@302 367 // lowest-frequency 55 bins have been dropped, for a
Chris@302 368 // 545-bin template. The native CQ bins go high->low
Chris@302 369 // frequency though, so these are still the first 545 bins
Chris@302 370 // as reported by getBinFrequency, though in reverse order
Chris@302 371 float freq = m_cq->getBinFrequency
Chris@302 372 (getPack(0).templateHeight - i - 1);
Chris@302 373 sprintf(name, "%.1f Hz", freq);
Chris@302 374 d.binNames.push_back(name);
Chris@302 375 }
Chris@302 376 }
Chris@302 377 d.hasKnownExtents = false;
Chris@302 378 d.isQuantized = false;
Chris@302 379 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@302 380 d.sampleRate = m_colsPerSec;
Chris@302 381 d.hasDuration = false;
Chris@302 382 m_templateOutputNo = list.size();
Chris@302 383 list.push_back(d);
Chris@302 384
Chris@31 385 return list;
Chris@31 386 }
Chris@31 387
Chris@38 388 std::string
Chris@320 389 Silvet::getChromaName(int pitch) const
Chris@38 390 {
Chris@38 391 static const char *names[] = {
Chris@38 392 "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"
Chris@38 393 };
Chris@38 394
Chris@309 395 return names[pitch];
Chris@309 396 }
Chris@309 397
Chris@309 398 std::string
Chris@336 399 Silvet::getNoteName(int note, int shift) const
Chris@309 400 {
Chris@320 401 string n = getChromaName(note % 12);
Chris@38 402
Chris@175 403 int oct = (note + 9) / 12;
Chris@38 404
Chris@175 405 char buf[30];
Chris@175 406
Chris@175 407 float pshift = 0.f;
Chris@336 408 int shiftCount = getShiftCount();
Chris@175 409 if (shiftCount > 1) {
Chris@320 410 // see getNoteFrequency below
Chris@175 411 pshift =
Chris@175 412 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
Chris@175 413 }
Chris@175 414
Chris@175 415 if (pshift > 0.f) {
Chris@309 416 sprintf(buf, "%s%d+%dc", n.c_str(), oct, int(round(pshift * 100)));
Chris@175 417 } else if (pshift < 0.f) {
Chris@309 418 sprintf(buf, "%s%d-%dc", n.c_str(), oct, int(round((-pshift) * 100)));
Chris@175 419 } else {
Chris@309 420 sprintf(buf, "%s%d", n.c_str(), oct);
Chris@175 421 }
Chris@38 422
Chris@38 423 return buf;
Chris@38 424 }
Chris@38 425
Chris@41 426 float
Chris@336 427 Silvet::getNoteFrequency(int note, int shift) const
Chris@41 428 {
Chris@169 429 // Convert shift number to a pitch shift. The given shift number
Chris@169 430 // is an offset into the template array, which starts with some
Chris@169 431 // zeros, followed by the template, then some trailing zeros.
Chris@169 432 //
Chris@169 433 // Example: if we have templateMaxShift == 2 and thus shiftCount
Chris@169 434 // == 5, then the number will be in the range 0-4 and the template
Chris@169 435 // will have 2 zeros at either end. Thus number 2 represents the
Chris@169 436 // template "as recorded", for a pitch shift of 0; smaller indices
Chris@169 437 // represent moving the template *up* in pitch (by introducing
Chris@169 438 // zeros at the start, which is the low-frequency end), for a
Chris@169 439 // positive pitch shift; and higher values represent moving it
Chris@169 440 // down in pitch, for a negative pitch shift.
Chris@169 441
Chris@175 442 float pshift = 0.f;
Chris@336 443 int shiftCount = getShiftCount();
Chris@175 444 if (shiftCount > 1) {
Chris@175 445 pshift =
Chris@175 446 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
Chris@175 447 }
Chris@169 448
Chris@301 449 float freq = float(27.5 * pow(2.0, (note + pshift) / 12.0));
Chris@301 450
Chris@303 451 // cerr << "note = " << note << ", shift = " << shift << ", shiftCount = "
Chris@303 452 // << shiftCount << ", obtained freq = " << freq << endl;
Chris@301 453
Chris@301 454 return freq;
Chris@41 455 }
Chris@41 456
Chris@31 457 bool
Chris@31 458 Silvet::initialise(size_t channels, size_t stepSize, size_t blockSize)
Chris@31 459 {
Chris@272 460 if (m_inputSampleRate < minInputSampleRate ||
Chris@272 461 m_inputSampleRate > maxInputSampleRate) {
Chris@272 462 cerr << "Silvet::initialise: Unsupported input sample rate "
Chris@272 463 << m_inputSampleRate << " (supported min " << minInputSampleRate
Chris@272 464 << ", max " << maxInputSampleRate << ")" << endl;
Chris@272 465 return false;
Chris@272 466 }
Chris@272 467
Chris@31 468 if (channels < getMinChannelCount() ||
Chris@272 469 channels > getMaxChannelCount()) {
Chris@272 470 cerr << "Silvet::initialise: Unsupported channel count " << channels
Chris@272 471 << " (supported min " << getMinChannelCount() << ", max "
Chris@272 472 << getMaxChannelCount() << ")" << endl;
Chris@272 473 return false;
Chris@272 474 }
Chris@31 475
Chris@31 476 if (stepSize != blockSize) {
Chris@31 477 cerr << "Silvet::initialise: Step size must be the same as block size ("
Chris@31 478 << stepSize << " != " << blockSize << ")" << endl;
Chris@31 479 return false;
Chris@31 480 }
Chris@31 481
Chris@31 482 m_blockSize = blockSize;
Chris@31 483
Chris@31 484 reset();
Chris@31 485
Chris@31 486 return true;
Chris@31 487 }
Chris@31 488
Chris@31 489 void
Chris@31 490 Silvet::reset()
Chris@31 491 {
Chris@31 492 delete m_resampler;
Chris@246 493 delete m_flattener;
Chris@31 494 delete m_cq;
Chris@31 495
Chris@31 496 if (m_inputSampleRate != processingSampleRate) {
Chris@31 497 m_resampler = new Resampler(m_inputSampleRate, processingSampleRate);
Chris@31 498 } else {
Chris@31 499 m_resampler = 0;
Chris@31 500 }
Chris@31 501
Chris@246 502 m_flattener = new FlattenDynamics(m_inputSampleRate); // before resampling
Chris@246 503 m_flattener->reset();
Chris@246 504
Chris@301 505 // this happens to be processingSampleRate / 3, and is the top
Chris@301 506 // freq used for the EM templates:
Chris@301 507 double maxFreq = 14700;
Chris@301 508
Chris@301 509 if (m_mode == LiveMode) {
Chris@301 510 // We only have 12 bpo rather than 60, so we need the top bin
Chris@301 511 // to be the middle one of the top 5, i.e. 2/5 of a semitone
Chris@301 512 // lower than 14700
Chris@301 513 maxFreq *= powf(2.0, -1.0 / 30.0);
Chris@301 514 }
Chris@301 515
Chris@173 516 double minFreq = 27.5;
Chris@173 517
Chris@341 518 if (m_mode == LiveMode) {
Chris@173 519 // We don't actually return any notes from the bottom octave,
Chris@173 520 // so we can just pad with zeros
Chris@173 521 minFreq *= 2;
Chris@173 522 }
Chris@173 523
Chris@298 524 int bpo = 12 *
Chris@298 525 (m_mode == LiveMode ? binsPerSemitoneLive : binsPerSemitoneNormal);
Chris@301 526
Chris@154 527 CQParameters params(processingSampleRate,
Chris@173 528 minFreq,
Chris@303 529 maxFreq,
Chris@298 530 bpo);
Chris@154 531
Chris@325 532 params.q = 0.8;
Chris@325 533 params.atomHopFactor = (m_mode == LiveMode ? 1.0 : 0.3);
Chris@154 534 params.threshold = 0.0005;
Chris@317 535 params.decimator =
Chris@317 536 (m_mode == LiveMode ?
Chris@317 537 CQParameters::FasterDecimator : CQParameters::BetterDecimator);
Chris@172 538 params.window = CQParameters::Hann;
Chris@154 539
Chris@154 540 m_cq = new CQSpectrogram(params, CQSpectrogram::InterpolateLinear);
Chris@31 541
Chris@303 542 // cerr << "CQ bins = " << m_cq->getTotalBins() << endl;
Chris@303 543 // cerr << "CQ min freq = " << m_cq->getMinFrequency() << " (and for confirmation, freq of bin 0 = " << m_cq->getBinFrequency(0) << ")" << endl;
Chris@297 544
Chris@341 545 m_colsPerSec = 50;
Chris@165 546
Chris@41 547 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
Chris@41 548 delete m_postFilter[i];
Chris@41 549 }
Chris@41 550 m_postFilter.clear();
Chris@303 551 int postFilterLength = 3;
Chris@298 552 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
Chris@303 553 m_postFilter.push_back(new MedianFilter<double>(postFilterLength));
Chris@41 554 }
Chris@41 555 m_pianoRoll.clear();
Chris@246 556 m_inputGains.clear();
Chris@32 557 m_columnCount = 0;
Chris@272 558 m_resampledCount = 0;
Chris@40 559 m_startTime = RealTime::zeroTime;
Chris@313 560 m_haveStartTime = false;
Chris@31 561 }
Chris@31 562
Chris@31 563 Silvet::FeatureSet
Chris@31 564 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
Chris@31 565 {
Chris@302 566 FeatureSet fs;
Chris@302 567
Chris@313 568 if (!m_haveStartTime) {
Chris@314 569
Chris@40 570 m_startTime = timestamp;
Chris@313 571 m_haveStartTime = true;
Chris@314 572
Chris@302 573 insertTemplateFeatures(fs);
Chris@40 574 }
Chris@246 575
Chris@246 576 vector<float> flattened(m_blockSize);
Chris@246 577 float gain = 1.f;
Chris@246 578 m_flattener->connectInputPort
Chris@246 579 (FlattenDynamics::AudioInputPort, inputBuffers[0]);
Chris@246 580 m_flattener->connectOutputPort
Chris@246 581 (FlattenDynamics::AudioOutputPort, &flattened[0]);
Chris@246 582 m_flattener->connectOutputPort
Chris@246 583 (FlattenDynamics::GainOutputPort, &gain);
Chris@246 584 m_flattener->process(m_blockSize);
Chris@246 585
Chris@252 586 m_inputGains[timestamp] = gain;
Chris@40 587
Chris@31 588 vector<double> data;
Chris@40 589 for (int i = 0; i < m_blockSize; ++i) {
Chris@246 590 double d = flattened[i];
Chris@235 591 data.push_back(d);
Chris@40 592 }
Chris@31 593
Chris@31 594 if (m_resampler) {
Chris@272 595
Chris@31 596 data = m_resampler->process(data.data(), data.size());
Chris@272 597
Chris@272 598 int hadCount = m_resampledCount;
Chris@272 599 m_resampledCount += data.size();
Chris@272 600
Chris@272 601 int resamplerLatency = m_resampler->getLatency();
Chris@272 602
Chris@272 603 if (hadCount < resamplerLatency) {
Chris@272 604 int stillToDrop = resamplerLatency - hadCount;
Chris@272 605 if (stillToDrop >= int(data.size())) {
Chris@302 606 return fs;
Chris@272 607 } else {
Chris@272 608 data = vector<double>(data.begin() + stillToDrop, data.end());
Chris@272 609 }
Chris@272 610 }
Chris@31 611 }
Chris@272 612
Chris@32 613 Grid cqout = m_cq->process(data);
Chris@302 614 transcribe(cqout, fs);
Chris@51 615 return fs;
Chris@34 616 }
Chris@34 617
Chris@34 618 Silvet::FeatureSet
Chris@34 619 Silvet::getRemainingFeatures()
Chris@34 620 {
Chris@145 621 Grid cqout = m_cq->getRemainingOutput();
Chris@302 622 FeatureSet fs;
Chris@336 623
Chris@302 624 if (m_columnCount == 0) {
Chris@302 625 // process() was never called, but we still want these
Chris@302 626 insertTemplateFeatures(fs);
Chris@302 627 } else {
Chris@336 628
Chris@336 629 // Complete the transcription
Chris@336 630
Chris@302 631 transcribe(cqout, fs);
Chris@336 632
Chris@336 633 // And make sure any extant playing notes are finished and returned
Chris@336 634
Chris@336 635 m_pianoRoll.push_back({});
Chris@336 636
Chris@336 637 auto events = noteTrack();
Chris@336 638
Chris@336 639 for (const auto &f : events.notes) {
Chris@336 640 fs[m_notesOutputNo].push_back(f);
Chris@336 641 }
Chris@336 642
Chris@336 643 for (const auto &f : events.onsets) {
Chris@336 644 fs[m_onsetsOutputNo].push_back(f);
Chris@336 645 }
Chris@336 646
Chris@336 647 for (const auto &f : events.onOffsets) {
Chris@336 648 fs[m_onOffsetsOutputNo].push_back(f);
Chris@336 649 }
Chris@302 650 }
Chris@336 651
Chris@51 652 return fs;
Chris@34 653 }
Chris@34 654
Chris@302 655 void
Chris@302 656 Silvet::insertTemplateFeatures(FeatureSet &fs)
Chris@302 657 {
Chris@302 658 const InstrumentPack &pack = getPack(m_instrument);
Chris@302 659 for (int i = 0; i < int(pack.templates.size()) * pack.templateNoteCount; ++i) {
Chris@302 660 RealTime timestamp = RealTime::fromSeconds(double(i) / m_colsPerSec);
Chris@302 661 Feature f;
Chris@302 662 char buffer[50];
Chris@302 663 sprintf(buffer, "Note %d", i + 1);
Chris@302 664 f.label = buffer;
Chris@302 665 f.hasTimestamp = true;
Chris@302 666 f.timestamp = timestamp;
Chris@302 667 f.values = pack.templates[i / pack.templateNoteCount]
Chris@302 668 .data[i % pack.templateNoteCount];
Chris@302 669 fs[m_templateOutputNo].push_back(f);
Chris@302 670 }
Chris@302 671 }
Chris@302 672
Chris@336 673 int
Chris@336 674 Silvet::getShiftCount() const
Chris@336 675 {
Chris@336 676 bool wantShifts = (m_mode == HighQualityMode) && m_fineTuning;
Chris@336 677 int shiftCount = 1;
Chris@336 678 if (wantShifts) {
Chris@336 679 const InstrumentPack &pack(getPack(m_instrument));
Chris@336 680 shiftCount = pack.templateMaxShift * 2 + 1;
Chris@336 681 }
Chris@336 682 return shiftCount;
Chris@336 683 }
Chris@336 684
Chris@302 685 void
Chris@302 686 Silvet::transcribe(const Grid &cqout, Silvet::FeatureSet &fs)
Chris@34 687 {
Chris@32 688 Grid filtered = preProcess(cqout);
Chris@31 689
Chris@302 690 if (filtered.empty()) return;
Chris@170 691
Chris@298 692 const InstrumentPack &pack(getPack(m_instrument));
Chris@104 693
Chris@325 694 int width = filtered.size();
Chris@325 695
Chris@325 696 double silenceThreshold = 0.01;
Chris@325 697
Chris@325 698 for (int i = 0; i < width; ++i) {
Chris@325 699
Chris@325 700 RealTime timestamp = getColumnTimestamp(m_pianoRoll.size() - 1 + i);
Chris@325 701 float inputGain = getInputGainAt(timestamp);
Chris@325 702
Chris@178 703 Feature f;
Chris@325 704 double rms = 0.0;
Chris@325 705
Chris@178 706 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@325 707 double v = filtered[i][j];
Chris@325 708 rms += v * v;
Chris@325 709 f.values.push_back(float(v));
Chris@178 710 }
Chris@325 711
Chris@325 712 rms = sqrt(rms / pack.templateHeight);
Chris@325 713 if (rms / inputGain < silenceThreshold) {
Chris@325 714 filtered[i].clear();
Chris@325 715 }
Chris@325 716
Chris@178 717 fs[m_fcqOutputNo].push_back(f);
Chris@178 718 }
Chris@325 719
Chris@311 720 Grid localPitches(width);
Chris@170 721
Chris@336 722 int shiftCount = getShiftCount();
Chris@336 723 bool wantShifts = (shiftCount > 1);
Chris@170 724
Chris@170 725 vector<vector<int> > localBestShifts;
Chris@170 726 if (wantShifts) {
Chris@311 727 localBestShifts = vector<vector<int> >(width);
Chris@170 728 }
Chris@170 729
Chris@312 730 #ifndef MAX_EM_THREADS
Chris@312 731 #define MAX_EM_THREADS 8
Chris@312 732 #endif
Chris@312 733
Chris@317 734 int emThreadCount = MAX_EM_THREADS;
Chris@317 735 if (m_mode == LiveMode && pack.templates.size() == 1) {
Chris@317 736 // The EM step is probably not slow enough to merit it
Chris@317 737 emThreadCount = 1;
Chris@317 738 }
Chris@317 739
Chris@312 740 #if (defined(MAX_EM_THREADS) && (MAX_EM_THREADS > 1))
Chris@317 741 if (emThreadCount > 1) {
Chris@317 742 for (int i = 0; i < width; ) {
Chris@317 743 typedef future<pair<vector<double>, vector<int>>> EMFuture;
Chris@317 744 vector<EMFuture> results;
Chris@317 745 for (int j = 0; j < emThreadCount && i + j < width; ++j) {
Chris@347 746 cerr << "creating future " << j << " (i = " << i << ", width = " << width << ")" << endl;
Chris@317 747 results.push_back
Chris@317 748 (async(std::launch::async,
Chris@317 749 [&](int index) {
Chris@336 750 return applyEM(pack, filtered.at(index));
Chris@317 751 }, i + j));
Chris@317 752 }
Chris@317 753 for (int j = 0; j < emThreadCount && i + j < width; ++j) {
Chris@347 754 cerr << "reaping future " << j << " (i = " << i << ", width = " << width << ")" << endl;
Chris@317 755 auto out = results[j].get();
Chris@317 756 localPitches[i+j] = out.first;
Chris@317 757 if (wantShifts) localBestShifts[i+j] = out.second;
Chris@317 758 }
Chris@317 759 i += emThreadCount;
Chris@312 760 }
Chris@123 761 }
Chris@312 762 #endif
Chris@317 763
Chris@317 764 if (emThreadCount == 1) {
Chris@317 765 for (int i = 0; i < width; ++i) {
Chris@336 766 auto out = applyEM(pack, filtered.at(i));
Chris@317 767 localPitches[i] = out.first;
Chris@317 768 if (wantShifts) localBestShifts[i] = out.second;
Chris@317 769 }
Chris@317 770 }
Chris@305 771
Chris@166 772 for (int i = 0; i < width; ++i) {
Chris@37 773
Chris@321 774 vector<double> filtered;
Chris@321 775
Chris@321 776 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@321 777 m_postFilter[j]->push(localPitches[i][j]);
Chris@321 778 filtered.push_back(m_postFilter[j]->get());
Chris@321 779 }
Chris@294 780
Chris@309 781 RealTime timestamp = getColumnTimestamp(m_pianoRoll.size() - 1);
Chris@309 782 float inputGain = getInputGainAt(timestamp);
Chris@309 783
Chris@294 784 Feature f;
Chris@294 785 for (int j = 0; j < (int)filtered.size(); ++j) {
Chris@309 786 float v = filtered[j];
Chris@294 787 if (v < pack.levelThreshold) v = 0.f;
Chris@309 788 f.values.push_back(v / inputGain);
Chris@294 789 }
Chris@294 790 fs[m_pitchOutputNo].push_back(f);
Chris@309 791
Chris@309 792 f.values.clear();
Chris@309 793 f.values.resize(12);
Chris@309 794 for (int j = 0; j < (int)filtered.size(); ++j) {
Chris@309 795 f.values[j % 12] += filtered[j] / inputGain;
Chris@309 796 }
Chris@309 797 fs[m_chromaOutputNo].push_back(f);
Chris@38 798
Chris@321 799 // This pushes the up-to-max-polyphony activation column to
Chris@321 800 // m_pianoRoll
Chris@336 801 postProcess(filtered, localBestShifts[i]);
Chris@321 802
Chris@336 803 auto events = noteTrack();
Chris@319 804
Chris@336 805 for (const auto &f : events.notes) {
Chris@336 806 fs[m_notesOutputNo].push_back(f);
Chris@40 807 }
Chris@319 808
Chris@336 809 for (const auto &f : events.onsets) {
Chris@336 810 fs[m_onsetsOutputNo].push_back(f);
Chris@336 811 }
Chris@336 812
Chris@336 813 for (const auto &f : events.onOffsets) {
Chris@336 814 fs[m_onOffsetsOutputNo].push_back(f);
Chris@319 815 }
Chris@34 816 }
Chris@31 817 }
Chris@31 818
Chris@311 819 pair<vector<double>, vector<int> >
Chris@311 820 Silvet::applyEM(const InstrumentPack &pack,
Chris@336 821 const vector<double> &column)
Chris@311 822 {
Chris@311 823 double columnThreshold = 1e-5;
Chris@311 824
Chris@314 825 if (m_mode == LiveMode) {
Chris@325 826 columnThreshold /= 15;
Chris@314 827 }
Chris@314 828
Chris@311 829 vector<double> pitches(pack.templateNoteCount, 0.0);
Chris@311 830 vector<int> bestShifts;
Chris@325 831
Chris@325 832 if (column.empty()) return { pitches, bestShifts };
Chris@311 833
Chris@311 834 double sum = 0.0;
Chris@311 835 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@311 836 sum += column.at(j);
Chris@311 837 }
Chris@311 838 if (sum < columnThreshold) return { pitches, bestShifts };
Chris@311 839
Chris@314 840 EM em(&pack, m_mode == HighQualityMode);
Chris@311 841
Chris@311 842 em.setPitchSparsity(pack.pitchSparsity);
Chris@311 843 em.setSourceSparsity(pack.sourceSparsity);
Chris@311 844
Chris@314 845 int iterations = (m_mode == HighQualityMode ? 20 : 10);
Chris@311 846
Chris@311 847 for (int j = 0; j < iterations; ++j) {
Chris@311 848 em.iterate(column.data());
Chris@311 849 }
Chris@311 850
Chris@311 851 const float *pitchDist = em.getPitchDistribution();
Chris@311 852 const float *const *shiftDist = em.getShifts();
Chris@311 853
Chris@336 854 int shiftCount = getShiftCount();
Chris@311 855
Chris@311 856 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@311 857
Chris@311 858 pitches[j] = pitchDist[j] * sum;
Chris@311 859
Chris@311 860 int bestShift = 0;
Chris@311 861 float bestShiftValue = 0.0;
Chris@336 862 if (shiftCount > 1) {
Chris@311 863 for (int k = 0; k < shiftCount; ++k) {
Chris@311 864 float value = shiftDist[k][j];
Chris@311 865 if (k == 0 || value > bestShiftValue) {
Chris@311 866 bestShiftValue = value;
Chris@311 867 bestShift = k;
Chris@311 868 }
Chris@311 869 }
Chris@311 870 bestShifts.push_back(bestShift);
Chris@311 871 }
Chris@311 872 }
Chris@311 873
Chris@311 874 return { pitches, bestShifts };
Chris@311 875 }
Chris@311 876
Chris@32 877 Silvet::Grid
Chris@32 878 Silvet::preProcess(const Grid &in)
Chris@32 879 {
Chris@32 880 int width = in.size();
Chris@32 881
Chris@165 882 int spacing = processingSampleRate / m_colsPerSec;
Chris@32 883
Chris@165 884 // need to be careful that col spacing is an integer number of samples!
Chris@165 885 assert(spacing * m_colsPerSec == processingSampleRate);
Chris@32 886
Chris@32 887 Grid out;
Chris@32 888
Chris@58 889 // We count the CQ latency in terms of processing hops, but
Chris@58 890 // actually it probably isn't an exact number of hops so this
Chris@58 891 // isn't quite accurate. But the small constant offset is
Chris@165 892 // practically irrelevant compared to the jitter from the frame
Chris@165 893 // size we reduce to in a moment
Chris@33 894 int latentColumns = m_cq->getLatency() / m_cq->getColumnHop();
Chris@33 895
Chris@298 896 const InstrumentPack &pack(getPack(m_instrument));
Chris@176 897
Chris@32 898 for (int i = 0; i < width; ++i) {
Chris@32 899
Chris@33 900 if (m_columnCount < latentColumns) {
Chris@33 901 ++m_columnCount;
Chris@33 902 continue;
Chris@33 903 }
Chris@33 904
Chris@32 905 int prevSampleNo = (m_columnCount - 1) * m_cq->getColumnHop();
Chris@32 906 int sampleNo = m_columnCount * m_cq->getColumnHop();
Chris@32 907
Chris@32 908 bool select = (sampleNo / spacing != prevSampleNo / spacing);
Chris@32 909
Chris@32 910 if (select) {
Chris@32 911 vector<double> inCol = in[i];
Chris@176 912 vector<double> outCol(pack.templateHeight);
Chris@32 913
Chris@178 914 // In HQ mode, the CQ returns 600 bins and we ignore the
Chris@298 915 // lowest 55 of them (assuming binsPerSemitone == 5).
Chris@178 916 //
Chris@341 917 // In live mode the CQ is an octave shorter, returning 540
Chris@341 918 // bins or equivalent, so we instead pad them with an
Chris@341 919 // additional 5 or equivalent zeros.
Chris@178 920 //
Chris@178 921 // We also need to reverse the column as we go, since the
Chris@178 922 // raw CQ has the high frequencies first and we need it
Chris@178 923 // the other way around.
Chris@32 924
Chris@298 925 int bps = (m_mode == LiveMode ?
Chris@298 926 binsPerSemitoneLive : binsPerSemitoneNormal);
Chris@298 927
Chris@297 928 if (m_mode == HighQualityMode) {
Chris@178 929 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@298 930 int ix = inCol.size() - j - (11 * bps);
Chris@178 931 outCol[j] = inCol[ix];
Chris@178 932 }
Chris@178 933 } else {
Chris@298 934 for (int j = 0; j < bps; ++j) {
Chris@178 935 outCol[j] = 0.0;
Chris@178 936 }
Chris@298 937 for (int j = bps; j < pack.templateHeight; ++j) {
Chris@298 938 int ix = inCol.size() - j + (bps-1);
Chris@178 939 outCol[j] = inCol[ix];
Chris@178 940 }
Chris@46 941 }
Chris@32 942
Chris@46 943 vector<double> noiseLevel1 =
Chris@298 944 MedianFilter<double>::filter(8 * bps, outCol);
Chris@176 945 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@46 946 noiseLevel1[j] = std::min(outCol[j], noiseLevel1[j]);
Chris@46 947 }
Chris@32 948
Chris@46 949 vector<double> noiseLevel2 =
Chris@298 950 MedianFilter<double>::filter(8 * bps, noiseLevel1);
Chris@176 951 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@46 952 outCol[j] = std::max(outCol[j] - noiseLevel2[j], 0.0);
Chris@32 953 }
Chris@32 954
Chris@165 955 out.push_back(outCol);
Chris@32 956 }
Chris@32 957
Chris@32 958 ++m_columnCount;
Chris@32 959 }
Chris@32 960
Chris@32 961 return out;
Chris@32 962 }
Chris@32 963
Chris@321 964 void
Chris@170 965 Silvet::postProcess(const vector<double> &pitches,
Chris@336 966 const vector<int> &bestShifts)
Chris@166 967 {
Chris@298 968 const InstrumentPack &pack(getPack(m_instrument));
Chris@176 969
Chris@41 970 // Threshold for level and reduce number of candidate pitches
Chris@41 971
Chris@41 972 typedef std::multimap<double, int> ValueIndexMap;
Chris@41 973
Chris@41 974 ValueIndexMap strengths;
Chris@166 975
Chris@176 976 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@321 977
Chris@321 978 double strength = pitches[j];
Chris@183 979 if (strength < pack.levelThreshold) continue;
Chris@321 980
Chris@321 981 // In live mode with only a 12-bpo CQ, we are very likely to
Chris@321 982 // get clusters of two or three high scores at a time for
Chris@321 983 // neighbouring semitones. Eliminate these by picking only the
Chris@325 984 // peaks (except that we never eliminate a note that has
Chris@325 985 // already been established as currently playing). This means
Chris@325 986 // we can't recognise actual semitone chords if they ever
Chris@325 987 // appear, but it's not as if live mode is good enough for
Chris@325 988 // that to be a big deal anyway.
Chris@321 989 if (m_mode == LiveMode) {
Chris@325 990 if (m_current.find(j) == m_current.end() &&
Chris@325 991 (j == 0 ||
Chris@325 992 j + 1 == pack.templateNoteCount ||
Chris@325 993 pitches[j] < pitches[j-1] ||
Chris@325 994 pitches[j] < pitches[j+1])) {
Chris@325 995 // not a peak or a currently-playing note: skip it
Chris@321 996 continue;
Chris@321 997 }
Chris@321 998 }
Chris@323 999
Chris@168 1000 strengths.insert(ValueIndexMap::value_type(strength, j));
Chris@168 1001 }
Chris@166 1002
Chris@168 1003 ValueIndexMap::const_iterator si = strengths.end();
Chris@167 1004
Chris@168 1005 map<int, double> active;
Chris@168 1006 map<int, int> activeShifts;
Chris@168 1007
Chris@336 1008 int shiftCount = getShiftCount();
Chris@336 1009
Chris@183 1010 while (int(active.size()) < pack.maxPolyphony && si != strengths.begin()) {
Chris@168 1011
Chris@168 1012 --si;
Chris@168 1013
Chris@168 1014 double strength = si->first;
Chris@168 1015 int j = si->second;
Chris@168 1016
Chris@168 1017 active[j] = strength;
Chris@168 1018
Chris@336 1019 if (shiftCount > 1) {
Chris@170 1020 activeShifts[j] = bestShifts[j];
Chris@167 1021 }
Chris@41 1022 }
Chris@41 1023
Chris@168 1024 m_pianoRoll.push_back(active);
Chris@170 1025
Chris@336 1026 if (shiftCount > 1) {
Chris@168 1027 m_pianoRollShifts.push_back(activeShifts);
Chris@41 1028 }
Chris@294 1029
Chris@321 1030 return;
Chris@166 1031 }
Chris@166 1032
Chris@336 1033 Silvet::FeatureChunk
Chris@336 1034 Silvet::noteTrack()
Chris@166 1035 {
Chris@41 1036 // Minimum duration pruning, and conversion to notes. We can only
Chris@41 1037 // report notes that have just ended (i.e. that are absent in the
Chris@168 1038 // latest active set but present in the prior set in the piano
Chris@41 1039 // roll) -- any notes that ended earlier will have been reported
Chris@41 1040 // already, and if they haven't ended, we don't know their
Chris@41 1041 // duration.
Chris@41 1042
Chris@168 1043 int width = m_pianoRoll.size() - 1;
Chris@168 1044
Chris@168 1045 const map<int, double> &active = m_pianoRoll[width];
Chris@41 1046
Chris@165 1047 double columnDuration = 1.0 / m_colsPerSec;
Chris@165 1048
Chris@165 1049 // only keep notes >= 100ms or thereabouts
Chris@323 1050 double durationThrSec = 0.1;
Chris@323 1051 int durationThreshold = floor(durationThrSec / columnDuration); // in cols
Chris@165 1052 if (durationThreshold < 1) durationThreshold = 1;
Chris@41 1053
Chris@336 1054 FeatureList noteFeatures, onsetFeatures, onOffsetFeatures;
Chris@41 1055
Chris@41 1056 if (width < durationThreshold + 1) {
Chris@336 1057 return { noteFeatures, onsetFeatures, onOffsetFeatures };
Chris@41 1058 }
Chris@41 1059
Chris@55 1060 for (map<int, double>::const_iterator ni = m_pianoRoll[width-1].begin();
Chris@41 1061 ni != m_pianoRoll[width-1].end(); ++ni) {
Chris@41 1062
Chris@55 1063 int note = ni->first;
Chris@41 1064
Chris@41 1065 int end = width;
Chris@41 1066 int start = end-1;
Chris@41 1067
Chris@41 1068 while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) {
Chris@41 1069 --start;
Chris@41 1070 }
Chris@41 1071 ++start;
Chris@41 1072
Chris@319 1073 int duration = end - start;
Chris@319 1074
Chris@319 1075 if (duration < durationThreshold) {
Chris@41 1076 continue;
Chris@41 1077 }
Chris@41 1078
Chris@319 1079 if (duration == durationThreshold) {
Chris@325 1080 m_current.insert(note);
Chris@336 1081 emitOnset(start, note, onsetFeatures);
Chris@336 1082 emitOnset(start, note, onOffsetFeatures);
Chris@319 1083 }
Chris@319 1084
Chris@319 1085 if (active.find(note) == active.end()) {
Chris@319 1086 // the note was playing but just ended
Chris@325 1087 m_current.erase(note);
Chris@343 1088 emitNoteAndOffset(start, end, note, noteFeatures, onOffsetFeatures);
Chris@334 1089 } else { // still playing
Chris@334 1090 // repeated note detection: if level is greater than this
Chris@334 1091 // multiple of its previous value, then we end the note and
Chris@334 1092 // restart it with the same pitch
Chris@334 1093 double restartFactor = 1.5;
Chris@334 1094 if (duration >= durationThreshold * 2 &&
Chris@334 1095 (active.find(note)->second >
Chris@334 1096 restartFactor * m_pianoRoll[width-1][note])) {
Chris@334 1097 m_current.erase(note);
Chris@343 1098 emitNoteAndOffset(start, end-1, note, noteFeatures, onOffsetFeatures);
Chris@334 1099 // and remove this so that we start counting the new
Chris@334 1100 // note's duration from the current position
Chris@334 1101 m_pianoRoll[width-1].erase(note);
Chris@334 1102 }
Chris@319 1103 }
Chris@41 1104 }
Chris@41 1105
Chris@62 1106 // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl;
Chris@41 1107
Chris@336 1108 return { noteFeatures, onsetFeatures, onOffsetFeatures };
Chris@41 1109 }
Chris@41 1110
Chris@169 1111 void
Chris@343 1112 Silvet::emitNoteAndOffset(int start, int end, int note,
Chris@343 1113 FeatureList &noteFeatures,
Chris@343 1114 FeatureList &onOffsetFeatures)
Chris@169 1115 {
Chris@343 1116 // Emit the complete note-event feature, and its offset. We have
Chris@343 1117 // already emitted the note onset when it started -- that process
Chris@343 1118 // is separated out in order to get a faster response during live
Chris@343 1119 // tracking. However, if the note shift changes within the note
Chris@343 1120 // (which can happen only if we have fine-tuning switched on), we
Chris@343 1121 // emit an offset and then a new onset with the new shift.
Chris@343 1122
Chris@169 1123 int partStart = start;
Chris@169 1124 int partShift = 0;
Chris@320 1125 double partStrength = 0;
Chris@169 1126
Chris@343 1127 // NB this *must* be less than durationThreshold above
Chris@252 1128 int partThreshold = floor(0.05 * m_colsPerSec);
Chris@169 1129
Chris@169 1130 for (int i = start; i != end; ++i) {
Chris@169 1131
Chris@169 1132 double strength = m_pianoRoll[i][note];
Chris@169 1133
Chris@169 1134 int shift = 0;
Chris@169 1135
Chris@336 1136 if (getShiftCount() > 1) {
Chris@169 1137
Chris@169 1138 shift = m_pianoRollShifts[i][note];
Chris@169 1139
Chris@169 1140 if (i == partStart) {
Chris@169 1141 partShift = shift;
Chris@169 1142 }
Chris@169 1143
Chris@169 1144 if (i > partStart + partThreshold && shift != partShift) {
Chris@169 1145
Chris@169 1146 // pitch has changed, emit an intermediate note
Chris@252 1147 noteFeatures.push_back(makeNoteFeature(partStart,
Chris@252 1148 i,
Chris@252 1149 note,
Chris@252 1150 partShift,
Chris@320 1151 partStrength));
Chris@343 1152
Chris@343 1153 onOffsetFeatures.push_back(makeOffsetFeature(i,
Chris@343 1154 note,
Chris@343 1155 partShift));
Chris@343 1156
Chris@169 1157 partStart = i;
Chris@169 1158 partShift = shift;
Chris@343 1159
Chris@343 1160 onOffsetFeatures.push_back(makeOnsetFeature(i,
Chris@343 1161 note,
Chris@343 1162 partShift,
Chris@343 1163 partStrength));
Chris@343 1164
Chris@320 1165 partStrength = 0;
Chris@169 1166 }
Chris@169 1167 }
Chris@169 1168
Chris@320 1169 if (strength > partStrength) {
Chris@320 1170 partStrength = strength;
Chris@169 1171 }
Chris@169 1172 }
Chris@169 1173
Chris@169 1174 if (end >= partStart + partThreshold) {
Chris@343 1175
Chris@252 1176 noteFeatures.push_back(makeNoteFeature(partStart,
Chris@252 1177 end,
Chris@252 1178 note,
Chris@252 1179 partShift,
Chris@320 1180 partStrength));
Chris@343 1181
Chris@343 1182 onOffsetFeatures.push_back(makeOffsetFeature(end,
Chris@343 1183 note,
Chris@343 1184 partShift));
Chris@349 1185
Chris@349 1186 } else if (partStart > start) {
Chris@349 1187
Chris@349 1188 // we have emitted an onset for this, so must add an offset
Chris@349 1189 onOffsetFeatures.push_back(makeOffsetFeature(end,
Chris@349 1190 note,
Chris@349 1191 partShift));
Chris@169 1192 }
Chris@169 1193 }
Chris@252 1194
Chris@319 1195 void
Chris@336 1196 Silvet::emitOnset(int start, int note, FeatureList &onOffsetFeatures)
Chris@319 1197 {
Chris@319 1198 int len = int(m_pianoRoll.size());
Chris@320 1199
Chris@320 1200 double onsetStrength = 0;
Chris@319 1201
Chris@319 1202 int shift = 0;
Chris@336 1203 if (getShiftCount() > 1) {
Chris@319 1204 shift = m_pianoRollShifts[start][note];
Chris@319 1205 }
Chris@319 1206
Chris@319 1207 for (int i = start; i < len; ++i) {
Chris@319 1208 double strength = m_pianoRoll[i][note];
Chris@320 1209 if (strength > onsetStrength) {
Chris@320 1210 onsetStrength = strength;
Chris@319 1211 }
Chris@319 1212 }
Chris@319 1213
Chris@336 1214 if (onsetStrength == 0) return;
Chris@336 1215
Chris@336 1216 onOffsetFeatures.push_back(makeOnsetFeature(start,
Chris@336 1217 note,
Chris@336 1218 shift,
Chris@336 1219 onsetStrength));
Chris@336 1220 }
Chris@336 1221
Chris@309 1222 RealTime
Chris@309 1223 Silvet::getColumnTimestamp(int column)
Chris@309 1224 {
Chris@309 1225 double columnDuration = 1.0 / m_colsPerSec;
Chris@309 1226 int postFilterLatency = int(m_postFilter[0]->getSize() / 2);
Chris@309 1227
Chris@309 1228 return m_startTime + RealTime::fromSeconds
Chris@309 1229 (columnDuration * (column - postFilterLatency) + 0.02);
Chris@309 1230 }
Chris@309 1231
Chris@252 1232 Silvet::Feature
Chris@252 1233 Silvet::makeNoteFeature(int start,
Chris@252 1234 int end,
Chris@252 1235 int note,
Chris@252 1236 int shift,
Chris@320 1237 double strength)
Chris@252 1238 {
Chris@252 1239 Feature f;
Chris@252 1240
Chris@252 1241 f.hasTimestamp = true;
Chris@309 1242 f.timestamp = getColumnTimestamp(start);
Chris@252 1243
Chris@252 1244 f.hasDuration = true;
Chris@309 1245 f.duration = getColumnTimestamp(end) - f.timestamp;
Chris@252 1246
Chris@252 1247 f.values.clear();
Chris@336 1248 f.values.push_back(getNoteFrequency(note, shift));
Chris@320 1249 f.values.push_back(getVelocityFor(strength, start));
Chris@252 1250
Chris@336 1251 f.label = getNoteName(note, shift);
Chris@252 1252
Chris@252 1253 return f;
Chris@252 1254 }
Chris@252 1255
Chris@319 1256 Silvet::Feature
Chris@319 1257 Silvet::makeOnsetFeature(int start,
Chris@319 1258 int note,
Chris@319 1259 int shift,
Chris@320 1260 double strength)
Chris@319 1261 {
Chris@319 1262 Feature f;
Chris@319 1263
Chris@319 1264 f.hasTimestamp = true;
Chris@319 1265 f.timestamp = getColumnTimestamp(start);
Chris@319 1266
Chris@319 1267 f.hasDuration = false;
Chris@319 1268
Chris@319 1269 f.values.clear();
Chris@336 1270 f.values.push_back(getNoteFrequency(note, shift));
Chris@320 1271 f.values.push_back(getVelocityFor(strength, start));
Chris@319 1272
Chris@336 1273 f.label = getNoteName(note, shift);
Chris@336 1274
Chris@336 1275 return f;
Chris@336 1276 }
Chris@336 1277
Chris@336 1278 Silvet::Feature
Chris@336 1279 Silvet::makeOffsetFeature(int col,
Chris@336 1280 int note,
Chris@336 1281 int shift)
Chris@336 1282 {
Chris@336 1283 Feature f;
Chris@336 1284
Chris@336 1285 f.hasTimestamp = true;
Chris@336 1286 f.timestamp = getColumnTimestamp(col);
Chris@336 1287
Chris@336 1288 f.hasDuration = false;
Chris@336 1289
Chris@336 1290 f.values.clear();
Chris@336 1291 f.values.push_back(getNoteFrequency(note, shift));
Chris@336 1292 f.values.push_back(0); // velocity 0 for offset
Chris@336 1293
Chris@336 1294 f.label = getNoteName(note, shift) + " off";
Chris@319 1295
Chris@319 1296 return f;
Chris@319 1297 }
Chris@319 1298
Chris@320 1299 int
Chris@320 1300 Silvet::getVelocityFor(double strength, int column)
Chris@320 1301 {
Chris@320 1302 RealTime rt = getColumnTimestamp(column + 1);
Chris@320 1303
Chris@320 1304 float inputGain = getInputGainAt(rt);
Chris@320 1305
Chris@320 1306 double scale = 2.0;
Chris@320 1307 if (m_mode == LiveMode) scale = 20.0;
Chris@320 1308
Chris@320 1309 double velocity = round((strength * scale) / inputGain);
Chris@320 1310
Chris@320 1311 if (velocity > 127.0) velocity = 127.0;
Chris@320 1312 if (velocity < 1.0) velocity = 1.0; // assume surpassed 0 threshold already
Chris@320 1313
Chris@320 1314 return int(velocity);
Chris@320 1315 }
Chris@320 1316
Chris@252 1317 float
Chris@252 1318 Silvet::getInputGainAt(RealTime t)
Chris@252 1319 {
Chris@252 1320 map<RealTime, float>::const_iterator i = m_inputGains.lower_bound(t);
Chris@252 1321
Chris@252 1322 if (i == m_inputGains.end()) {
Chris@252 1323 if (i != m_inputGains.begin()) {
Chris@252 1324 --i;
Chris@252 1325 } else {
Chris@252 1326 return 1.f; // no data
Chris@252 1327 }
Chris@252 1328 }
Chris@252 1329
Chris@252 1330 // cerr << "gain at time " << t << " = " << i->second << endl;
Chris@252 1331
Chris@252 1332 return i->second;
Chris@252 1333 }
Chris@252 1334