annotate src/Silvet.cpp @ 336:d25e4aee73d7 livemode

Add onset+offset output; look up shift counts rather than passing them around
author Chris Cannam
date Fri, 26 Jun 2015 10:23:54 +0100
parents d861f86f2b17
children 3c6f5d2d33e8 705d807ca2ca
rev   line source
Chris@31 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@31 2
Chris@31 3 /*
Chris@31 4 Silvet
Chris@31 5
Chris@31 6 A Vamp plugin for note transcription.
Chris@31 7 Centre for Digital Music, Queen Mary University of London.
Chris@31 8
Chris@31 9 This program is free software; you can redistribute it and/or
Chris@31 10 modify it under the terms of the GNU General Public License as
Chris@31 11 published by the Free Software Foundation; either version 2 of the
Chris@31 12 License, or (at your option) any later version. See the file
Chris@31 13 COPYING included with this distribution for more information.
Chris@31 14 */
Chris@31 15
Chris@31 16 #include "Silvet.h"
Chris@34 17 #include "EM.h"
Chris@31 18
Chris@152 19 #include <cq/CQSpectrogram.h>
Chris@31 20
Chris@152 21 #include "MedianFilter.h"
Chris@152 22 #include "constant-q-cpp/src/dsp/Resampler.h"
Chris@246 23 #include "flattendynamics-ladspa.h"
Chris@298 24 #include "LiveInstruments.h"
Chris@31 25
Chris@31 26 #include <vector>
Chris@312 27 #include <future>
Chris@31 28
Chris@32 29 #include <cstdio>
Chris@32 30
Chris@31 31 using std::vector;
Chris@48 32 using std::cout;
Chris@31 33 using std::cerr;
Chris@31 34 using std::endl;
Chris@311 35 using std::pair;
Chris@312 36 using std::future;
Chris@312 37 using std::async;
Chris@40 38 using Vamp::RealTime;
Chris@31 39
Chris@31 40 static int processingSampleRate = 44100;
Chris@298 41
Chris@298 42 static int binsPerSemitoneLive = 1;
Chris@298 43 static int binsPerSemitoneNormal = 5;
Chris@170 44
Chris@272 45 static int minInputSampleRate = 100;
Chris@272 46 static int maxInputSampleRate = 192000;
Chris@272 47
Chris@316 48 static const Silvet::ProcessingMode defaultMode = Silvet::HighQualityMode;
Chris@316 49
Chris@31 50 Silvet::Silvet(float inputSampleRate) :
Chris@31 51 Plugin(inputSampleRate),
Chris@161 52 m_instruments(InstrumentPack::listInstrumentPacks()),
Chris@298 53 m_liveInstruments(LiveAdapter::adaptAll(m_instruments)),
Chris@31 54 m_resampler(0),
Chris@246 55 m_flattener(0),
Chris@110 56 m_cq(0),
Chris@316 57 m_mode(defaultMode),
Chris@166 58 m_fineTuning(false),
Chris@178 59 m_instrument(0),
Chris@313 60 m_colsPerSec(50),
Chris@313 61 m_haveStartTime(false)
Chris@31 62 {
Chris@31 63 }
Chris@31 64
Chris@31 65 Silvet::~Silvet()
Chris@31 66 {
Chris@31 67 delete m_resampler;
Chris@246 68 delete m_flattener;
Chris@31 69 delete m_cq;
Chris@41 70 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
Chris@41 71 delete m_postFilter[i];
Chris@41 72 }
Chris@31 73 }
Chris@31 74
Chris@31 75 string
Chris@31 76 Silvet::getIdentifier() const
Chris@31 77 {
Chris@31 78 return "silvet";
Chris@31 79 }
Chris@31 80
Chris@31 81 string
Chris@31 82 Silvet::getName() const
Chris@31 83 {
Chris@31 84 return "Silvet Note Transcription";
Chris@31 85 }
Chris@31 86
Chris@31 87 string
Chris@31 88 Silvet::getDescription() const
Chris@31 89 {
Chris@191 90 return "Estimate the note onsets, pitches, and durations that make up a music recording.";
Chris@31 91 }
Chris@31 92
Chris@31 93 string
Chris@31 94 Silvet::getMaker() const
Chris@31 95 {
Chris@191 96 return "Queen Mary, University of London";
Chris@31 97 }
Chris@31 98
Chris@31 99 int
Chris@31 100 Silvet::getPluginVersion() const
Chris@31 101 {
Chris@309 102 return 3;
Chris@31 103 }
Chris@31 104
Chris@31 105 string
Chris@31 106 Silvet::getCopyright() const
Chris@31 107 {
Chris@191 108 return "Method by Emmanouil Benetos and Simon Dixon; plugin by Chris Cannam and Emmanouil Benetos. GPL licence.";
Chris@31 109 }
Chris@31 110
Chris@31 111 Silvet::InputDomain
Chris@31 112 Silvet::getInputDomain() const
Chris@31 113 {
Chris@31 114 return TimeDomain;
Chris@31 115 }
Chris@31 116
Chris@31 117 size_t
Chris@31 118 Silvet::getPreferredBlockSize() const
Chris@31 119 {
Chris@31 120 return 0;
Chris@31 121 }
Chris@31 122
Chris@31 123 size_t
Chris@31 124 Silvet::getPreferredStepSize() const
Chris@31 125 {
Chris@31 126 return 0;
Chris@31 127 }
Chris@31 128
Chris@31 129 size_t
Chris@31 130 Silvet::getMinChannelCount() const
Chris@31 131 {
Chris@31 132 return 1;
Chris@31 133 }
Chris@31 134
Chris@31 135 size_t
Chris@31 136 Silvet::getMaxChannelCount() const
Chris@31 137 {
Chris@31 138 return 1;
Chris@31 139 }
Chris@31 140
Chris@31 141 Silvet::ParameterList
Chris@31 142 Silvet::getParameterDescriptors() const
Chris@31 143 {
Chris@31 144 ParameterList list;
Chris@110 145
Chris@110 146 ParameterDescriptor desc;
Chris@110 147 desc.identifier = "mode";
Chris@110 148 desc.name = "Processing mode";
Chris@110 149 desc.unit = "";
Chris@297 150 desc.description = "Sets the tradeoff of processing speed against transcription quality. Draft mode is tuned in favour of overall speed; Live mode is tuned in favour of lower latency; while Intensive mode (the default) will almost always produce the best results.";
Chris@110 151 desc.minValue = 0;
Chris@297 152 desc.maxValue = 2;
Chris@316 153 desc.defaultValue = int(defaultMode);
Chris@110 154 desc.isQuantized = true;
Chris@110 155 desc.quantizeStep = 1;
Chris@166 156 desc.valueNames.push_back("Draft (faster)");
Chris@165 157 desc.valueNames.push_back("Intensive (higher quality)");
Chris@297 158 desc.valueNames.push_back("Live (lower latency)");
Chris@161 159 list.push_back(desc);
Chris@161 160
Chris@176 161 desc.identifier = "instrument";
Chris@176 162 desc.name = "Instrument";
Chris@161 163 desc.unit = "";
Chris@271 164 desc.description = "The instrument or instruments known to be present in the recording. This affects the set of instrument templates used, as well as the expected level of polyphony in the output. Using a more limited set of instruments than the default will also make the plugin run faster.\nNote that this plugin cannot isolate instruments: you can't use this setting to request notes from only one instrument in a recording with several. Instead, use this as a hint to the plugin about which instruments are actually present.";
Chris@161 165 desc.minValue = 0;
Chris@162 166 desc.maxValue = m_instruments.size()-1;
Chris@162 167 desc.defaultValue = 0;
Chris@161 168 desc.isQuantized = true;
Chris@161 169 desc.quantizeStep = 1;
Chris@161 170 desc.valueNames.clear();
Chris@162 171 for (int i = 0; i < int(m_instruments.size()); ++i) {
Chris@162 172 desc.valueNames.push_back(m_instruments[i].name);
Chris@162 173 }
Chris@166 174 list.push_back(desc);
Chris@161 175
Chris@166 176 desc.identifier = "finetune";
Chris@166 177 desc.name = "Return fine pitch estimates";
Chris@166 178 desc.unit = "";
Chris@271 179 desc.description = "Return pitch estimates at finer than semitone resolution. This works only in Intensive mode. Notes that appear to drift in pitch will be split up into shorter notes with individually finer pitches.";
Chris@166 180 desc.minValue = 0;
Chris@166 181 desc.maxValue = 1;
Chris@166 182 desc.defaultValue = 0;
Chris@166 183 desc.isQuantized = true;
Chris@166 184 desc.quantizeStep = 1;
Chris@166 185 desc.valueNames.clear();
Chris@110 186 list.push_back(desc);
Chris@110 187
Chris@31 188 return list;
Chris@31 189 }
Chris@31 190
Chris@31 191 float
Chris@31 192 Silvet::getParameter(string identifier) const
Chris@31 193 {
Chris@110 194 if (identifier == "mode") {
Chris@297 195 return (float)(int)m_mode;
Chris@166 196 } else if (identifier == "finetune") {
Chris@166 197 return m_fineTuning ? 1.f : 0.f;
Chris@176 198 } else if (identifier == "instrument") {
Chris@162 199 return m_instrument;
Chris@110 200 }
Chris@31 201 return 0;
Chris@31 202 }
Chris@31 203
Chris@31 204 void
Chris@31 205 Silvet::setParameter(string identifier, float value)
Chris@31 206 {
Chris@110 207 if (identifier == "mode") {
Chris@297 208 m_mode = (ProcessingMode)(int)(value + 0.5);
Chris@166 209 } else if (identifier == "finetune") {
Chris@166 210 m_fineTuning = (value > 0.5);
Chris@176 211 } else if (identifier == "instrument") {
Chris@162 212 m_instrument = lrintf(value);
Chris@110 213 }
Chris@31 214 }
Chris@31 215
Chris@31 216 Silvet::ProgramList
Chris@31 217 Silvet::getPrograms() const
Chris@31 218 {
Chris@31 219 ProgramList list;
Chris@31 220 return list;
Chris@31 221 }
Chris@31 222
Chris@31 223 string
Chris@31 224 Silvet::getCurrentProgram() const
Chris@31 225 {
Chris@31 226 return "";
Chris@31 227 }
Chris@31 228
Chris@31 229 void
Chris@31 230 Silvet::selectProgram(string name)
Chris@31 231 {
Chris@31 232 }
Chris@31 233
Chris@31 234 Silvet::OutputList
Chris@31 235 Silvet::getOutputDescriptors() const
Chris@31 236 {
Chris@31 237 OutputList list;
Chris@31 238
Chris@31 239 OutputDescriptor d;
Chris@51 240 d.identifier = "notes";
Chris@51 241 d.name = "Note transcription";
Chris@329 242 d.description = "Overall note transcription. Each note has time, duration, estimated fundamental frequency, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture.";
Chris@41 243 d.unit = "Hz";
Chris@31 244 d.hasFixedBinCount = true;
Chris@31 245 d.binCount = 2;
Chris@41 246 d.binNames.push_back("Frequency");
Chris@31 247 d.binNames.push_back("Velocity");
Chris@31 248 d.hasKnownExtents = false;
Chris@31 249 d.isQuantized = false;
Chris@31 250 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@246 251 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
Chris@31 252 d.hasDuration = true;
Chris@32 253 m_notesOutputNo = list.size();
Chris@32 254 list.push_back(d);
Chris@32 255
Chris@319 256 d.identifier = "onsets";
Chris@319 257 d.name = "Note onsets";
Chris@323 258 d.description = "Note onsets, without durations. These can be calculated sooner than complete notes, because it isn't necessary to wait for a note to finish before returning its feature. Each event has time, estimated fundamental frequency in Hz, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture.";
Chris@319 259 d.unit = "Hz";
Chris@319 260 d.hasFixedBinCount = true;
Chris@319 261 d.binCount = 2;
Chris@319 262 d.binNames.push_back("Frequency");
Chris@319 263 d.binNames.push_back("Velocity");
Chris@319 264 d.hasKnownExtents = false;
Chris@319 265 d.isQuantized = false;
Chris@319 266 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@319 267 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
Chris@319 268 d.hasDuration = false;
Chris@319 269 m_onsetsOutputNo = list.size();
Chris@319 270 list.push_back(d);
Chris@319 271
Chris@336 272 d.identifier = "onoffsets";
Chris@336 273 d.name = "Note onsets and offsets";
Chris@336 274 d.description = "Note onsets and offsets as separate events. Each onset event has time, estimated fundamental frequency in Hz, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture. Offsets are represented in the same way but with a velocity of 0.";
Chris@336 275 d.unit = "Hz";
Chris@336 276 d.hasFixedBinCount = true;
Chris@336 277 d.binCount = 2;
Chris@336 278 d.binNames.push_back("Frequency");
Chris@336 279 d.binNames.push_back("Velocity");
Chris@336 280 d.hasKnownExtents = false;
Chris@336 281 d.isQuantized = false;
Chris@336 282 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@336 283 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
Chris@336 284 d.hasDuration = false;
Chris@336 285 m_onOffsetsOutputNo = list.size();
Chris@336 286 list.push_back(d);
Chris@336 287
Chris@178 288 d.identifier = "timefreq";
Chris@178 289 d.name = "Time-frequency distribution";
Chris@271 290 d.description = "Filtered constant-Q time-frequency distribution as used as input to the expectation-maximisation algorithm.";
Chris@178 291 d.unit = "";
Chris@178 292 d.hasFixedBinCount = true;
Chris@298 293 d.binCount = getPack(0).templateHeight;
Chris@178 294 d.binNames.clear();
Chris@178 295 if (m_cq) {
Chris@294 296 char name[50];
Chris@298 297 for (int i = 0; i < getPack(0).templateHeight; ++i) {
Chris@178 298 // We have a 600-bin (10 oct 60-bin CQ) of which the
Chris@178 299 // lowest-frequency 55 bins have been dropped, for a
Chris@178 300 // 545-bin template. The native CQ bins go high->low
Chris@178 301 // frequency though, so these are still the first 545 bins
Chris@178 302 // as reported by getBinFrequency, though in reverse order
Chris@178 303 float freq = m_cq->getBinFrequency
Chris@298 304 (getPack(0).templateHeight - i - 1);
Chris@178 305 sprintf(name, "%.1f Hz", freq);
Chris@178 306 d.binNames.push_back(name);
Chris@178 307 }
Chris@178 308 }
Chris@178 309 d.hasKnownExtents = false;
Chris@178 310 d.isQuantized = false;
Chris@178 311 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@178 312 d.sampleRate = m_colsPerSec;
Chris@178 313 d.hasDuration = false;
Chris@178 314 m_fcqOutputNo = list.size();
Chris@178 315 list.push_back(d);
Chris@178 316
Chris@294 317 d.identifier = "pitchactivation";
Chris@294 318 d.name = "Pitch activation distribution";
Chris@294 319 d.description = "Pitch activation distribution resulting from expectation-maximisation algorithm, prior to note extraction.";
Chris@294 320 d.unit = "";
Chris@294 321 d.hasFixedBinCount = true;
Chris@298 322 d.binCount = getPack(0).templateNoteCount;
Chris@294 323 d.binNames.clear();
Chris@294 324 if (m_cq) {
Chris@298 325 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
Chris@336 326 d.binNames.push_back(getNoteName(i, 0));
Chris@294 327 }
Chris@294 328 }
Chris@294 329 d.hasKnownExtents = false;
Chris@294 330 d.isQuantized = false;
Chris@294 331 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@294 332 d.sampleRate = m_colsPerSec;
Chris@294 333 d.hasDuration = false;
Chris@294 334 m_pitchOutputNo = list.size();
Chris@294 335 list.push_back(d);
Chris@294 336
Chris@309 337 d.identifier = "chroma";
Chris@309 338 d.name = "Pitch chroma distribution";
Chris@309 339 d.description = "Pitch chroma distribution formed by wrapping the un-thresholded pitch activation distribution into a single octave of semitone bins.";
Chris@309 340 d.unit = "";
Chris@309 341 d.hasFixedBinCount = true;
Chris@309 342 d.binCount = 12;
Chris@309 343 d.binNames.clear();
Chris@309 344 if (m_cq) {
Chris@309 345 for (int i = 0; i < 12; ++i) {
Chris@320 346 d.binNames.push_back(getChromaName(i));
Chris@309 347 }
Chris@309 348 }
Chris@309 349 d.hasKnownExtents = false;
Chris@309 350 d.isQuantized = false;
Chris@309 351 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@309 352 d.sampleRate = m_colsPerSec;
Chris@309 353 d.hasDuration = false;
Chris@309 354 m_chromaOutputNo = list.size();
Chris@309 355 list.push_back(d);
Chris@309 356
Chris@302 357 d.identifier = "templates";
Chris@302 358 d.name = "Templates";
Chris@302 359 d.description = "Constant-Q spectral templates for the selected instrument pack.";
Chris@302 360 d.unit = "";
Chris@302 361 d.hasFixedBinCount = true;
Chris@302 362 d.binCount = getPack(0).templateHeight;
Chris@302 363 d.binNames.clear();
Chris@302 364 if (m_cq) {
Chris@302 365 char name[50];
Chris@302 366 for (int i = 0; i < getPack(0).templateHeight; ++i) {
Chris@302 367 // We have a 600-bin (10 oct 60-bin CQ) of which the
Chris@302 368 // lowest-frequency 55 bins have been dropped, for a
Chris@302 369 // 545-bin template. The native CQ bins go high->low
Chris@302 370 // frequency though, so these are still the first 545 bins
Chris@302 371 // as reported by getBinFrequency, though in reverse order
Chris@302 372 float freq = m_cq->getBinFrequency
Chris@302 373 (getPack(0).templateHeight - i - 1);
Chris@302 374 sprintf(name, "%.1f Hz", freq);
Chris@302 375 d.binNames.push_back(name);
Chris@302 376 }
Chris@302 377 }
Chris@302 378 d.hasKnownExtents = false;
Chris@302 379 d.isQuantized = false;
Chris@302 380 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@302 381 d.sampleRate = m_colsPerSec;
Chris@302 382 d.hasDuration = false;
Chris@302 383 m_templateOutputNo = list.size();
Chris@302 384 list.push_back(d);
Chris@302 385
Chris@31 386 return list;
Chris@31 387 }
Chris@31 388
Chris@38 389 std::string
Chris@320 390 Silvet::getChromaName(int pitch) const
Chris@38 391 {
Chris@38 392 static const char *names[] = {
Chris@38 393 "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"
Chris@38 394 };
Chris@38 395
Chris@309 396 return names[pitch];
Chris@309 397 }
Chris@309 398
Chris@309 399 std::string
Chris@336 400 Silvet::getNoteName(int note, int shift) const
Chris@309 401 {
Chris@320 402 string n = getChromaName(note % 12);
Chris@38 403
Chris@175 404 int oct = (note + 9) / 12;
Chris@38 405
Chris@175 406 char buf[30];
Chris@175 407
Chris@175 408 float pshift = 0.f;
Chris@336 409 int shiftCount = getShiftCount();
Chris@175 410 if (shiftCount > 1) {
Chris@320 411 // see getNoteFrequency below
Chris@175 412 pshift =
Chris@175 413 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
Chris@175 414 }
Chris@175 415
Chris@175 416 if (pshift > 0.f) {
Chris@309 417 sprintf(buf, "%s%d+%dc", n.c_str(), oct, int(round(pshift * 100)));
Chris@175 418 } else if (pshift < 0.f) {
Chris@309 419 sprintf(buf, "%s%d-%dc", n.c_str(), oct, int(round((-pshift) * 100)));
Chris@175 420 } else {
Chris@309 421 sprintf(buf, "%s%d", n.c_str(), oct);
Chris@175 422 }
Chris@38 423
Chris@38 424 return buf;
Chris@38 425 }
Chris@38 426
Chris@41 427 float
Chris@336 428 Silvet::getNoteFrequency(int note, int shift) const
Chris@41 429 {
Chris@169 430 // Convert shift number to a pitch shift. The given shift number
Chris@169 431 // is an offset into the template array, which starts with some
Chris@169 432 // zeros, followed by the template, then some trailing zeros.
Chris@169 433 //
Chris@169 434 // Example: if we have templateMaxShift == 2 and thus shiftCount
Chris@169 435 // == 5, then the number will be in the range 0-4 and the template
Chris@169 436 // will have 2 zeros at either end. Thus number 2 represents the
Chris@169 437 // template "as recorded", for a pitch shift of 0; smaller indices
Chris@169 438 // represent moving the template *up* in pitch (by introducing
Chris@169 439 // zeros at the start, which is the low-frequency end), for a
Chris@169 440 // positive pitch shift; and higher values represent moving it
Chris@169 441 // down in pitch, for a negative pitch shift.
Chris@169 442
Chris@175 443 float pshift = 0.f;
Chris@336 444 int shiftCount = getShiftCount();
Chris@175 445 if (shiftCount > 1) {
Chris@175 446 pshift =
Chris@175 447 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
Chris@175 448 }
Chris@169 449
Chris@301 450 float freq = float(27.5 * pow(2.0, (note + pshift) / 12.0));
Chris@301 451
Chris@303 452 // cerr << "note = " << note << ", shift = " << shift << ", shiftCount = "
Chris@303 453 // << shiftCount << ", obtained freq = " << freq << endl;
Chris@301 454
Chris@301 455 return freq;
Chris@41 456 }
Chris@41 457
Chris@31 458 bool
Chris@31 459 Silvet::initialise(size_t channels, size_t stepSize, size_t blockSize)
Chris@31 460 {
Chris@272 461 if (m_inputSampleRate < minInputSampleRate ||
Chris@272 462 m_inputSampleRate > maxInputSampleRate) {
Chris@272 463 cerr << "Silvet::initialise: Unsupported input sample rate "
Chris@272 464 << m_inputSampleRate << " (supported min " << minInputSampleRate
Chris@272 465 << ", max " << maxInputSampleRate << ")" << endl;
Chris@272 466 return false;
Chris@272 467 }
Chris@272 468
Chris@31 469 if (channels < getMinChannelCount() ||
Chris@272 470 channels > getMaxChannelCount()) {
Chris@272 471 cerr << "Silvet::initialise: Unsupported channel count " << channels
Chris@272 472 << " (supported min " << getMinChannelCount() << ", max "
Chris@272 473 << getMaxChannelCount() << ")" << endl;
Chris@272 474 return false;
Chris@272 475 }
Chris@31 476
Chris@31 477 if (stepSize != blockSize) {
Chris@31 478 cerr << "Silvet::initialise: Step size must be the same as block size ("
Chris@31 479 << stepSize << " != " << blockSize << ")" << endl;
Chris@31 480 return false;
Chris@31 481 }
Chris@31 482
Chris@31 483 m_blockSize = blockSize;
Chris@31 484
Chris@31 485 reset();
Chris@31 486
Chris@31 487 return true;
Chris@31 488 }
Chris@31 489
Chris@31 490 void
Chris@31 491 Silvet::reset()
Chris@31 492 {
Chris@31 493 delete m_resampler;
Chris@246 494 delete m_flattener;
Chris@31 495 delete m_cq;
Chris@31 496
Chris@31 497 if (m_inputSampleRate != processingSampleRate) {
Chris@31 498 m_resampler = new Resampler(m_inputSampleRate, processingSampleRate);
Chris@31 499 } else {
Chris@31 500 m_resampler = 0;
Chris@31 501 }
Chris@31 502
Chris@246 503 m_flattener = new FlattenDynamics(m_inputSampleRate); // before resampling
Chris@246 504 m_flattener->reset();
Chris@246 505
Chris@301 506 // this happens to be processingSampleRate / 3, and is the top
Chris@301 507 // freq used for the EM templates:
Chris@301 508 double maxFreq = 14700;
Chris@301 509
Chris@301 510 if (m_mode == LiveMode) {
Chris@301 511 // We only have 12 bpo rather than 60, so we need the top bin
Chris@301 512 // to be the middle one of the top 5, i.e. 2/5 of a semitone
Chris@301 513 // lower than 14700
Chris@301 514 maxFreq *= powf(2.0, -1.0 / 30.0);
Chris@301 515 }
Chris@301 516
Chris@173 517 double minFreq = 27.5;
Chris@173 518
Chris@297 519 if (m_mode != HighQualityMode) {
Chris@173 520 // We don't actually return any notes from the bottom octave,
Chris@173 521 // so we can just pad with zeros
Chris@173 522 minFreq *= 2;
Chris@173 523 }
Chris@173 524
Chris@298 525 int bpo = 12 *
Chris@298 526 (m_mode == LiveMode ? binsPerSemitoneLive : binsPerSemitoneNormal);
Chris@301 527
Chris@154 528 CQParameters params(processingSampleRate,
Chris@173 529 minFreq,
Chris@303 530 maxFreq,
Chris@298 531 bpo);
Chris@154 532
Chris@325 533 params.q = 0.8;
Chris@325 534 params.atomHopFactor = (m_mode == LiveMode ? 1.0 : 0.3);
Chris@154 535 params.threshold = 0.0005;
Chris@317 536 params.decimator =
Chris@317 537 (m_mode == LiveMode ?
Chris@317 538 CQParameters::FasterDecimator : CQParameters::BetterDecimator);
Chris@172 539 params.window = CQParameters::Hann;
Chris@154 540
Chris@154 541 m_cq = new CQSpectrogram(params, CQSpectrogram::InterpolateLinear);
Chris@31 542
Chris@303 543 // cerr << "CQ bins = " << m_cq->getTotalBins() << endl;
Chris@303 544 // cerr << "CQ min freq = " << m_cq->getMinFrequency() << " (and for confirmation, freq of bin 0 = " << m_cq->getBinFrequency(0) << ")" << endl;
Chris@297 545
Chris@297 546 m_colsPerSec = (m_mode == DraftMode ? 25 : 50);
Chris@165 547
Chris@41 548 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
Chris@41 549 delete m_postFilter[i];
Chris@41 550 }
Chris@41 551 m_postFilter.clear();
Chris@303 552 int postFilterLength = 3;
Chris@298 553 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
Chris@303 554 m_postFilter.push_back(new MedianFilter<double>(postFilterLength));
Chris@41 555 }
Chris@41 556 m_pianoRoll.clear();
Chris@246 557 m_inputGains.clear();
Chris@32 558 m_columnCount = 0;
Chris@272 559 m_resampledCount = 0;
Chris@40 560 m_startTime = RealTime::zeroTime;
Chris@313 561 m_haveStartTime = false;
Chris@31 562 }
Chris@31 563
Chris@31 564 Silvet::FeatureSet
Chris@31 565 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
Chris@31 566 {
Chris@302 567 FeatureSet fs;
Chris@302 568
Chris@313 569 if (!m_haveStartTime) {
Chris@314 570
Chris@40 571 m_startTime = timestamp;
Chris@313 572 m_haveStartTime = true;
Chris@314 573
Chris@302 574 insertTemplateFeatures(fs);
Chris@40 575 }
Chris@246 576
Chris@246 577 vector<float> flattened(m_blockSize);
Chris@246 578 float gain = 1.f;
Chris@246 579 m_flattener->connectInputPort
Chris@246 580 (FlattenDynamics::AudioInputPort, inputBuffers[0]);
Chris@246 581 m_flattener->connectOutputPort
Chris@246 582 (FlattenDynamics::AudioOutputPort, &flattened[0]);
Chris@246 583 m_flattener->connectOutputPort
Chris@246 584 (FlattenDynamics::GainOutputPort, &gain);
Chris@246 585 m_flattener->process(m_blockSize);
Chris@246 586
Chris@252 587 m_inputGains[timestamp] = gain;
Chris@40 588
Chris@31 589 vector<double> data;
Chris@40 590 for (int i = 0; i < m_blockSize; ++i) {
Chris@246 591 double d = flattened[i];
Chris@235 592 data.push_back(d);
Chris@40 593 }
Chris@31 594
Chris@31 595 if (m_resampler) {
Chris@272 596
Chris@31 597 data = m_resampler->process(data.data(), data.size());
Chris@272 598
Chris@272 599 int hadCount = m_resampledCount;
Chris@272 600 m_resampledCount += data.size();
Chris@272 601
Chris@272 602 int resamplerLatency = m_resampler->getLatency();
Chris@272 603
Chris@272 604 if (hadCount < resamplerLatency) {
Chris@272 605 int stillToDrop = resamplerLatency - hadCount;
Chris@272 606 if (stillToDrop >= int(data.size())) {
Chris@302 607 return fs;
Chris@272 608 } else {
Chris@272 609 data = vector<double>(data.begin() + stillToDrop, data.end());
Chris@272 610 }
Chris@272 611 }
Chris@31 612 }
Chris@272 613
Chris@32 614 Grid cqout = m_cq->process(data);
Chris@302 615 transcribe(cqout, fs);
Chris@51 616 return fs;
Chris@34 617 }
Chris@34 618
Chris@34 619 Silvet::FeatureSet
Chris@34 620 Silvet::getRemainingFeatures()
Chris@34 621 {
Chris@145 622 Grid cqout = m_cq->getRemainingOutput();
Chris@302 623 FeatureSet fs;
Chris@336 624
Chris@302 625 if (m_columnCount == 0) {
Chris@302 626 // process() was never called, but we still want these
Chris@302 627 insertTemplateFeatures(fs);
Chris@302 628 } else {
Chris@336 629
Chris@336 630 // Complete the transcription
Chris@336 631
Chris@302 632 transcribe(cqout, fs);
Chris@336 633
Chris@336 634 // And make sure any extant playing notes are finished and returned
Chris@336 635
Chris@336 636 m_pianoRoll.push_back({});
Chris@336 637
Chris@336 638 auto events = noteTrack();
Chris@336 639
Chris@336 640 for (const auto &f : events.notes) {
Chris@336 641 fs[m_notesOutputNo].push_back(f);
Chris@336 642 }
Chris@336 643
Chris@336 644 for (const auto &f : events.onsets) {
Chris@336 645 fs[m_onsetsOutputNo].push_back(f);
Chris@336 646 }
Chris@336 647
Chris@336 648 for (const auto &f : events.onOffsets) {
Chris@336 649 fs[m_onOffsetsOutputNo].push_back(f);
Chris@336 650 }
Chris@302 651 }
Chris@336 652
Chris@51 653 return fs;
Chris@34 654 }
Chris@34 655
Chris@302 656 void
Chris@302 657 Silvet::insertTemplateFeatures(FeatureSet &fs)
Chris@302 658 {
Chris@302 659 const InstrumentPack &pack = getPack(m_instrument);
Chris@302 660 for (int i = 0; i < int(pack.templates.size()) * pack.templateNoteCount; ++i) {
Chris@302 661 RealTime timestamp = RealTime::fromSeconds(double(i) / m_colsPerSec);
Chris@302 662 Feature f;
Chris@302 663 char buffer[50];
Chris@302 664 sprintf(buffer, "Note %d", i + 1);
Chris@302 665 f.label = buffer;
Chris@302 666 f.hasTimestamp = true;
Chris@302 667 f.timestamp = timestamp;
Chris@302 668 f.values = pack.templates[i / pack.templateNoteCount]
Chris@302 669 .data[i % pack.templateNoteCount];
Chris@302 670 fs[m_templateOutputNo].push_back(f);
Chris@302 671 }
Chris@302 672 }
Chris@302 673
Chris@336 674 int
Chris@336 675 Silvet::getShiftCount() const
Chris@336 676 {
Chris@336 677 bool wantShifts = (m_mode == HighQualityMode) && m_fineTuning;
Chris@336 678 int shiftCount = 1;
Chris@336 679 if (wantShifts) {
Chris@336 680 const InstrumentPack &pack(getPack(m_instrument));
Chris@336 681 shiftCount = pack.templateMaxShift * 2 + 1;
Chris@336 682 }
Chris@336 683 return shiftCount;
Chris@336 684 }
Chris@336 685
Chris@302 686 void
Chris@302 687 Silvet::transcribe(const Grid &cqout, Silvet::FeatureSet &fs)
Chris@34 688 {
Chris@32 689 Grid filtered = preProcess(cqout);
Chris@31 690
Chris@302 691 if (filtered.empty()) return;
Chris@170 692
Chris@298 693 const InstrumentPack &pack(getPack(m_instrument));
Chris@104 694
Chris@325 695 int width = filtered.size();
Chris@325 696
Chris@325 697 double silenceThreshold = 0.01;
Chris@325 698
Chris@325 699 for (int i = 0; i < width; ++i) {
Chris@325 700
Chris@325 701 RealTime timestamp = getColumnTimestamp(m_pianoRoll.size() - 1 + i);
Chris@325 702 float inputGain = getInputGainAt(timestamp);
Chris@325 703
Chris@178 704 Feature f;
Chris@325 705 double rms = 0.0;
Chris@325 706
Chris@178 707 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@325 708 double v = filtered[i][j];
Chris@325 709 rms += v * v;
Chris@325 710 f.values.push_back(float(v));
Chris@178 711 }
Chris@325 712
Chris@325 713 rms = sqrt(rms / pack.templateHeight);
Chris@325 714 if (rms / inputGain < silenceThreshold) {
Chris@325 715 filtered[i].clear();
Chris@325 716 }
Chris@325 717
Chris@178 718 fs[m_fcqOutputNo].push_back(f);
Chris@178 719 }
Chris@325 720
Chris@311 721 Grid localPitches(width);
Chris@170 722
Chris@336 723 int shiftCount = getShiftCount();
Chris@336 724 bool wantShifts = (shiftCount > 1);
Chris@170 725
Chris@170 726 vector<vector<int> > localBestShifts;
Chris@170 727 if (wantShifts) {
Chris@311 728 localBestShifts = vector<vector<int> >(width);
Chris@170 729 }
Chris@170 730
Chris@312 731 #ifndef MAX_EM_THREADS
Chris@312 732 #define MAX_EM_THREADS 8
Chris@312 733 #endif
Chris@312 734
Chris@317 735 int emThreadCount = MAX_EM_THREADS;
Chris@317 736 if (m_mode == LiveMode && pack.templates.size() == 1) {
Chris@317 737 // The EM step is probably not slow enough to merit it
Chris@317 738 emThreadCount = 1;
Chris@317 739 }
Chris@317 740
Chris@312 741 #if (defined(MAX_EM_THREADS) && (MAX_EM_THREADS > 1))
Chris@317 742 if (emThreadCount > 1) {
Chris@317 743 for (int i = 0; i < width; ) {
Chris@317 744 typedef future<pair<vector<double>, vector<int>>> EMFuture;
Chris@317 745 vector<EMFuture> results;
Chris@317 746 for (int j = 0; j < emThreadCount && i + j < width; ++j) {
Chris@317 747 results.push_back
Chris@317 748 (async(std::launch::async,
Chris@317 749 [&](int index) {
Chris@336 750 return applyEM(pack, filtered.at(index));
Chris@317 751 }, i + j));
Chris@317 752 }
Chris@317 753 for (int j = 0; j < emThreadCount && i + j < width; ++j) {
Chris@317 754 auto out = results[j].get();
Chris@317 755 localPitches[i+j] = out.first;
Chris@317 756 if (wantShifts) localBestShifts[i+j] = out.second;
Chris@317 757 }
Chris@317 758 i += emThreadCount;
Chris@312 759 }
Chris@123 760 }
Chris@312 761 #endif
Chris@317 762
Chris@317 763 if (emThreadCount == 1) {
Chris@317 764 for (int i = 0; i < width; ++i) {
Chris@336 765 auto out = applyEM(pack, filtered.at(i));
Chris@317 766 localPitches[i] = out.first;
Chris@317 767 if (wantShifts) localBestShifts[i] = out.second;
Chris@317 768 }
Chris@317 769 }
Chris@305 770
Chris@166 771 for (int i = 0; i < width; ++i) {
Chris@37 772
Chris@321 773 vector<double> filtered;
Chris@321 774
Chris@321 775 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@321 776 m_postFilter[j]->push(localPitches[i][j]);
Chris@321 777 filtered.push_back(m_postFilter[j]->get());
Chris@321 778 }
Chris@294 779
Chris@309 780 RealTime timestamp = getColumnTimestamp(m_pianoRoll.size() - 1);
Chris@309 781 float inputGain = getInputGainAt(timestamp);
Chris@309 782
Chris@294 783 Feature f;
Chris@294 784 for (int j = 0; j < (int)filtered.size(); ++j) {
Chris@309 785 float v = filtered[j];
Chris@294 786 if (v < pack.levelThreshold) v = 0.f;
Chris@309 787 f.values.push_back(v / inputGain);
Chris@294 788 }
Chris@294 789 fs[m_pitchOutputNo].push_back(f);
Chris@309 790
Chris@309 791 f.values.clear();
Chris@309 792 f.values.resize(12);
Chris@309 793 for (int j = 0; j < (int)filtered.size(); ++j) {
Chris@309 794 f.values[j % 12] += filtered[j] / inputGain;
Chris@309 795 }
Chris@309 796 fs[m_chromaOutputNo].push_back(f);
Chris@38 797
Chris@321 798 // This pushes the up-to-max-polyphony activation column to
Chris@321 799 // m_pianoRoll
Chris@336 800 postProcess(filtered, localBestShifts[i]);
Chris@321 801
Chris@336 802 auto events = noteTrack();
Chris@319 803
Chris@336 804 for (const auto &f : events.notes) {
Chris@336 805 fs[m_notesOutputNo].push_back(f);
Chris@40 806 }
Chris@319 807
Chris@336 808 for (const auto &f : events.onsets) {
Chris@336 809 fs[m_onsetsOutputNo].push_back(f);
Chris@336 810 }
Chris@336 811
Chris@336 812 for (const auto &f : events.onOffsets) {
Chris@336 813 fs[m_onOffsetsOutputNo].push_back(f);
Chris@319 814 }
Chris@34 815 }
Chris@31 816 }
Chris@31 817
Chris@311 818 pair<vector<double>, vector<int> >
Chris@311 819 Silvet::applyEM(const InstrumentPack &pack,
Chris@336 820 const vector<double> &column)
Chris@311 821 {
Chris@311 822 double columnThreshold = 1e-5;
Chris@311 823
Chris@314 824 if (m_mode == LiveMode) {
Chris@325 825 columnThreshold /= 15;
Chris@314 826 }
Chris@314 827
Chris@311 828 vector<double> pitches(pack.templateNoteCount, 0.0);
Chris@311 829 vector<int> bestShifts;
Chris@325 830
Chris@325 831 if (column.empty()) return { pitches, bestShifts };
Chris@311 832
Chris@311 833 double sum = 0.0;
Chris@311 834 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@311 835 sum += column.at(j);
Chris@311 836 }
Chris@311 837 if (sum < columnThreshold) return { pitches, bestShifts };
Chris@311 838
Chris@314 839 EM em(&pack, m_mode == HighQualityMode);
Chris@311 840
Chris@311 841 em.setPitchSparsity(pack.pitchSparsity);
Chris@311 842 em.setSourceSparsity(pack.sourceSparsity);
Chris@311 843
Chris@314 844 int iterations = (m_mode == HighQualityMode ? 20 : 10);
Chris@311 845
Chris@311 846 for (int j = 0; j < iterations; ++j) {
Chris@311 847 em.iterate(column.data());
Chris@311 848 }
Chris@311 849
Chris@311 850 const float *pitchDist = em.getPitchDistribution();
Chris@311 851 const float *const *shiftDist = em.getShifts();
Chris@311 852
Chris@336 853 int shiftCount = getShiftCount();
Chris@311 854
Chris@311 855 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@311 856
Chris@311 857 pitches[j] = pitchDist[j] * sum;
Chris@311 858
Chris@311 859 int bestShift = 0;
Chris@311 860 float bestShiftValue = 0.0;
Chris@336 861 if (shiftCount > 1) {
Chris@311 862 for (int k = 0; k < shiftCount; ++k) {
Chris@311 863 float value = shiftDist[k][j];
Chris@311 864 if (k == 0 || value > bestShiftValue) {
Chris@311 865 bestShiftValue = value;
Chris@311 866 bestShift = k;
Chris@311 867 }
Chris@311 868 }
Chris@311 869 bestShifts.push_back(bestShift);
Chris@311 870 }
Chris@311 871 }
Chris@311 872
Chris@311 873 return { pitches, bestShifts };
Chris@311 874 }
Chris@311 875
Chris@32 876 Silvet::Grid
Chris@32 877 Silvet::preProcess(const Grid &in)
Chris@32 878 {
Chris@32 879 int width = in.size();
Chris@32 880
Chris@165 881 int spacing = processingSampleRate / m_colsPerSec;
Chris@32 882
Chris@165 883 // need to be careful that col spacing is an integer number of samples!
Chris@165 884 assert(spacing * m_colsPerSec == processingSampleRate);
Chris@32 885
Chris@32 886 Grid out;
Chris@32 887
Chris@58 888 // We count the CQ latency in terms of processing hops, but
Chris@58 889 // actually it probably isn't an exact number of hops so this
Chris@58 890 // isn't quite accurate. But the small constant offset is
Chris@165 891 // practically irrelevant compared to the jitter from the frame
Chris@165 892 // size we reduce to in a moment
Chris@33 893 int latentColumns = m_cq->getLatency() / m_cq->getColumnHop();
Chris@33 894
Chris@298 895 const InstrumentPack &pack(getPack(m_instrument));
Chris@176 896
Chris@32 897 for (int i = 0; i < width; ++i) {
Chris@32 898
Chris@33 899 if (m_columnCount < latentColumns) {
Chris@33 900 ++m_columnCount;
Chris@33 901 continue;
Chris@33 902 }
Chris@33 903
Chris@32 904 int prevSampleNo = (m_columnCount - 1) * m_cq->getColumnHop();
Chris@32 905 int sampleNo = m_columnCount * m_cq->getColumnHop();
Chris@32 906
Chris@32 907 bool select = (sampleNo / spacing != prevSampleNo / spacing);
Chris@32 908
Chris@32 909 if (select) {
Chris@32 910 vector<double> inCol = in[i];
Chris@176 911 vector<double> outCol(pack.templateHeight);
Chris@32 912
Chris@178 913 // In HQ mode, the CQ returns 600 bins and we ignore the
Chris@298 914 // lowest 55 of them (assuming binsPerSemitone == 5).
Chris@178 915 //
Chris@297 916 // In draft and live mode the CQ is an octave shorter,
Chris@300 917 // returning 540 bins or equivalent, so we instead pad
Chris@300 918 // them with an additional 5 or equivalent zeros.
Chris@178 919 //
Chris@178 920 // We also need to reverse the column as we go, since the
Chris@178 921 // raw CQ has the high frequencies first and we need it
Chris@178 922 // the other way around.
Chris@32 923
Chris@298 924 int bps = (m_mode == LiveMode ?
Chris@298 925 binsPerSemitoneLive : binsPerSemitoneNormal);
Chris@298 926
Chris@297 927 if (m_mode == HighQualityMode) {
Chris@178 928 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@298 929 int ix = inCol.size() - j - (11 * bps);
Chris@178 930 outCol[j] = inCol[ix];
Chris@178 931 }
Chris@178 932 } else {
Chris@298 933 for (int j = 0; j < bps; ++j) {
Chris@178 934 outCol[j] = 0.0;
Chris@178 935 }
Chris@298 936 for (int j = bps; j < pack.templateHeight; ++j) {
Chris@298 937 int ix = inCol.size() - j + (bps-1);
Chris@178 938 outCol[j] = inCol[ix];
Chris@178 939 }
Chris@46 940 }
Chris@32 941
Chris@46 942 vector<double> noiseLevel1 =
Chris@298 943 MedianFilter<double>::filter(8 * bps, outCol);
Chris@176 944 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@46 945 noiseLevel1[j] = std::min(outCol[j], noiseLevel1[j]);
Chris@46 946 }
Chris@32 947
Chris@46 948 vector<double> noiseLevel2 =
Chris@298 949 MedianFilter<double>::filter(8 * bps, noiseLevel1);
Chris@176 950 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@46 951 outCol[j] = std::max(outCol[j] - noiseLevel2[j], 0.0);
Chris@32 952 }
Chris@32 953
Chris@165 954 out.push_back(outCol);
Chris@32 955 }
Chris@32 956
Chris@32 957 ++m_columnCount;
Chris@32 958 }
Chris@32 959
Chris@32 960 return out;
Chris@32 961 }
Chris@32 962
Chris@321 963 void
Chris@170 964 Silvet::postProcess(const vector<double> &pitches,
Chris@336 965 const vector<int> &bestShifts)
Chris@166 966 {
Chris@298 967 const InstrumentPack &pack(getPack(m_instrument));
Chris@176 968
Chris@41 969 // Threshold for level and reduce number of candidate pitches
Chris@41 970
Chris@41 971 typedef std::multimap<double, int> ValueIndexMap;
Chris@41 972
Chris@41 973 ValueIndexMap strengths;
Chris@166 974
Chris@176 975 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@321 976
Chris@321 977 double strength = pitches[j];
Chris@183 978 if (strength < pack.levelThreshold) continue;
Chris@321 979
Chris@321 980 // In live mode with only a 12-bpo CQ, we are very likely to
Chris@321 981 // get clusters of two or three high scores at a time for
Chris@321 982 // neighbouring semitones. Eliminate these by picking only the
Chris@325 983 // peaks (except that we never eliminate a note that has
Chris@325 984 // already been established as currently playing). This means
Chris@325 985 // we can't recognise actual semitone chords if they ever
Chris@325 986 // appear, but it's not as if live mode is good enough for
Chris@325 987 // that to be a big deal anyway.
Chris@321 988 if (m_mode == LiveMode) {
Chris@325 989 if (m_current.find(j) == m_current.end() &&
Chris@325 990 (j == 0 ||
Chris@325 991 j + 1 == pack.templateNoteCount ||
Chris@325 992 pitches[j] < pitches[j-1] ||
Chris@325 993 pitches[j] < pitches[j+1])) {
Chris@325 994 // not a peak or a currently-playing note: skip it
Chris@321 995 continue;
Chris@321 996 }
Chris@321 997 }
Chris@323 998
Chris@168 999 strengths.insert(ValueIndexMap::value_type(strength, j));
Chris@168 1000 }
Chris@166 1001
Chris@168 1002 ValueIndexMap::const_iterator si = strengths.end();
Chris@167 1003
Chris@168 1004 map<int, double> active;
Chris@168 1005 map<int, int> activeShifts;
Chris@168 1006
Chris@336 1007 int shiftCount = getShiftCount();
Chris@336 1008
Chris@183 1009 while (int(active.size()) < pack.maxPolyphony && si != strengths.begin()) {
Chris@168 1010
Chris@168 1011 --si;
Chris@168 1012
Chris@168 1013 double strength = si->first;
Chris@168 1014 int j = si->second;
Chris@168 1015
Chris@168 1016 active[j] = strength;
Chris@168 1017
Chris@336 1018 if (shiftCount > 1) {
Chris@170 1019 activeShifts[j] = bestShifts[j];
Chris@167 1020 }
Chris@41 1021 }
Chris@41 1022
Chris@168 1023 m_pianoRoll.push_back(active);
Chris@170 1024
Chris@336 1025 if (shiftCount > 1) {
Chris@168 1026 m_pianoRollShifts.push_back(activeShifts);
Chris@41 1027 }
Chris@294 1028
Chris@321 1029 return;
Chris@166 1030 }
Chris@166 1031
Chris@336 1032 Silvet::FeatureChunk
Chris@336 1033 Silvet::noteTrack()
Chris@166 1034 {
Chris@41 1035 // Minimum duration pruning, and conversion to notes. We can only
Chris@41 1036 // report notes that have just ended (i.e. that are absent in the
Chris@168 1037 // latest active set but present in the prior set in the piano
Chris@41 1038 // roll) -- any notes that ended earlier will have been reported
Chris@41 1039 // already, and if they haven't ended, we don't know their
Chris@41 1040 // duration.
Chris@41 1041
Chris@168 1042 int width = m_pianoRoll.size() - 1;
Chris@168 1043
Chris@168 1044 const map<int, double> &active = m_pianoRoll[width];
Chris@41 1045
Chris@165 1046 double columnDuration = 1.0 / m_colsPerSec;
Chris@165 1047
Chris@165 1048 // only keep notes >= 100ms or thereabouts
Chris@323 1049 double durationThrSec = 0.1;
Chris@323 1050 int durationThreshold = floor(durationThrSec / columnDuration); // in cols
Chris@165 1051 if (durationThreshold < 1) durationThreshold = 1;
Chris@41 1052
Chris@336 1053 FeatureList noteFeatures, onsetFeatures, onOffsetFeatures;
Chris@41 1054
Chris@41 1055 if (width < durationThreshold + 1) {
Chris@336 1056 return { noteFeatures, onsetFeatures, onOffsetFeatures };
Chris@41 1057 }
Chris@41 1058
Chris@55 1059 for (map<int, double>::const_iterator ni = m_pianoRoll[width-1].begin();
Chris@41 1060 ni != m_pianoRoll[width-1].end(); ++ni) {
Chris@41 1061
Chris@55 1062 int note = ni->first;
Chris@41 1063
Chris@41 1064 int end = width;
Chris@41 1065 int start = end-1;
Chris@41 1066
Chris@41 1067 while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) {
Chris@41 1068 --start;
Chris@41 1069 }
Chris@41 1070 ++start;
Chris@41 1071
Chris@319 1072 int duration = end - start;
Chris@319 1073
Chris@319 1074 if (duration < durationThreshold) {
Chris@41 1075 continue;
Chris@41 1076 }
Chris@41 1077
Chris@319 1078 if (duration == durationThreshold) {
Chris@325 1079 m_current.insert(note);
Chris@336 1080 emitOnset(start, note, onsetFeatures);
Chris@336 1081 emitOnset(start, note, onOffsetFeatures);
Chris@319 1082 }
Chris@319 1083
Chris@319 1084 if (active.find(note) == active.end()) {
Chris@319 1085 // the note was playing but just ended
Chris@325 1086 m_current.erase(note);
Chris@336 1087 emitNote(start, end, note, noteFeatures);
Chris@336 1088 emitOffset(start, end, note, onOffsetFeatures);
Chris@334 1089 } else { // still playing
Chris@334 1090 // repeated note detection: if level is greater than this
Chris@334 1091 // multiple of its previous value, then we end the note and
Chris@334 1092 // restart it with the same pitch
Chris@334 1093 double restartFactor = 1.5;
Chris@334 1094 if (duration >= durationThreshold * 2 &&
Chris@334 1095 (active.find(note)->second >
Chris@334 1096 restartFactor * m_pianoRoll[width-1][note])) {
Chris@334 1097 m_current.erase(note);
Chris@336 1098 emitNote(start, end-1, note, noteFeatures);
Chris@336 1099 emitOffset(start, end-1, note, onOffsetFeatures);
Chris@334 1100 // and remove this so that we start counting the new
Chris@334 1101 // note's duration from the current position
Chris@334 1102 m_pianoRoll[width-1].erase(note);
Chris@334 1103 }
Chris@319 1104 }
Chris@41 1105 }
Chris@41 1106
Chris@62 1107 // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl;
Chris@41 1108
Chris@336 1109 return { noteFeatures, onsetFeatures, onOffsetFeatures };
Chris@41 1110 }
Chris@41 1111
Chris@169 1112 void
Chris@336 1113 Silvet::emitNote(int start, int end, int note, FeatureList &noteFeatures)
Chris@169 1114 {
Chris@169 1115 int partStart = start;
Chris@169 1116 int partShift = 0;
Chris@320 1117 double partStrength = 0;
Chris@169 1118
Chris@252 1119 int partThreshold = floor(0.05 * m_colsPerSec);
Chris@169 1120
Chris@169 1121 for (int i = start; i != end; ++i) {
Chris@169 1122
Chris@169 1123 double strength = m_pianoRoll[i][note];
Chris@169 1124
Chris@169 1125 int shift = 0;
Chris@169 1126
Chris@336 1127 if (getShiftCount() > 1) {
Chris@169 1128
Chris@169 1129 shift = m_pianoRollShifts[i][note];
Chris@169 1130
Chris@169 1131 if (i == partStart) {
Chris@169 1132 partShift = shift;
Chris@169 1133 }
Chris@169 1134
Chris@169 1135 if (i > partStart + partThreshold && shift != partShift) {
Chris@169 1136
Chris@169 1137 // cerr << "i = " << i << ", partStart = " << partStart << ", shift = " << shift << ", partShift = " << partShift << endl;
Chris@169 1138
Chris@169 1139 // pitch has changed, emit an intermediate note
Chris@252 1140 noteFeatures.push_back(makeNoteFeature(partStart,
Chris@252 1141 i,
Chris@252 1142 note,
Chris@252 1143 partShift,
Chris@320 1144 partStrength));
Chris@169 1145 partStart = i;
Chris@169 1146 partShift = shift;
Chris@320 1147 partStrength = 0;
Chris@169 1148 }
Chris@169 1149 }
Chris@169 1150
Chris@320 1151 if (strength > partStrength) {
Chris@320 1152 partStrength = strength;
Chris@169 1153 }
Chris@169 1154 }
Chris@169 1155
Chris@169 1156 if (end >= partStart + partThreshold) {
Chris@252 1157 noteFeatures.push_back(makeNoteFeature(partStart,
Chris@252 1158 end,
Chris@252 1159 note,
Chris@252 1160 partShift,
Chris@320 1161 partStrength));
Chris@169 1162 }
Chris@169 1163 }
Chris@252 1164
Chris@319 1165 void
Chris@336 1166 Silvet::emitOnset(int start, int note, FeatureList &onOffsetFeatures)
Chris@319 1167 {
Chris@319 1168 int len = int(m_pianoRoll.size());
Chris@320 1169
Chris@320 1170 double onsetStrength = 0;
Chris@319 1171
Chris@319 1172 int shift = 0;
Chris@336 1173 if (getShiftCount() > 1) {
Chris@319 1174 shift = m_pianoRollShifts[start][note];
Chris@319 1175 }
Chris@319 1176
Chris@319 1177 for (int i = start; i < len; ++i) {
Chris@319 1178 double strength = m_pianoRoll[i][note];
Chris@320 1179 if (strength > onsetStrength) {
Chris@320 1180 onsetStrength = strength;
Chris@319 1181 }
Chris@319 1182 }
Chris@319 1183
Chris@336 1184 if (onsetStrength == 0) return;
Chris@336 1185
Chris@336 1186 onOffsetFeatures.push_back(makeOnsetFeature(start,
Chris@336 1187 note,
Chris@336 1188 shift,
Chris@336 1189 onsetStrength));
Chris@336 1190 }
Chris@336 1191
Chris@336 1192 void
Chris@336 1193 Silvet::emitOffset(int start, int end, int note, FeatureList &onOffsetFeatures)
Chris@336 1194 {
Chris@336 1195 int shift = 0;
Chris@336 1196 if (getShiftCount() > 1) {
Chris@336 1197 shift = m_pianoRollShifts[start][note];
Chris@336 1198 }
Chris@336 1199
Chris@336 1200 onOffsetFeatures.push_back(makeOffsetFeature(end,
Chris@336 1201 note,
Chris@336 1202 shift));
Chris@319 1203 }
Chris@319 1204
Chris@309 1205 RealTime
Chris@309 1206 Silvet::getColumnTimestamp(int column)
Chris@309 1207 {
Chris@309 1208 double columnDuration = 1.0 / m_colsPerSec;
Chris@309 1209 int postFilterLatency = int(m_postFilter[0]->getSize() / 2);
Chris@309 1210
Chris@309 1211 return m_startTime + RealTime::fromSeconds
Chris@309 1212 (columnDuration * (column - postFilterLatency) + 0.02);
Chris@309 1213 }
Chris@309 1214
Chris@252 1215 Silvet::Feature
Chris@252 1216 Silvet::makeNoteFeature(int start,
Chris@252 1217 int end,
Chris@252 1218 int note,
Chris@252 1219 int shift,
Chris@320 1220 double strength)
Chris@252 1221 {
Chris@252 1222 Feature f;
Chris@252 1223
Chris@252 1224 f.hasTimestamp = true;
Chris@309 1225 f.timestamp = getColumnTimestamp(start);
Chris@252 1226
Chris@252 1227 f.hasDuration = true;
Chris@309 1228 f.duration = getColumnTimestamp(end) - f.timestamp;
Chris@252 1229
Chris@252 1230 f.values.clear();
Chris@336 1231 f.values.push_back(getNoteFrequency(note, shift));
Chris@320 1232 f.values.push_back(getVelocityFor(strength, start));
Chris@252 1233
Chris@336 1234 f.label = getNoteName(note, shift);
Chris@252 1235
Chris@252 1236 return f;
Chris@252 1237 }
Chris@252 1238
Chris@319 1239 Silvet::Feature
Chris@319 1240 Silvet::makeOnsetFeature(int start,
Chris@319 1241 int note,
Chris@319 1242 int shift,
Chris@320 1243 double strength)
Chris@319 1244 {
Chris@319 1245 Feature f;
Chris@319 1246
Chris@319 1247 f.hasTimestamp = true;
Chris@319 1248 f.timestamp = getColumnTimestamp(start);
Chris@319 1249
Chris@319 1250 f.hasDuration = false;
Chris@319 1251
Chris@319 1252 f.values.clear();
Chris@336 1253 f.values.push_back(getNoteFrequency(note, shift));
Chris@320 1254 f.values.push_back(getVelocityFor(strength, start));
Chris@319 1255
Chris@336 1256 f.label = getNoteName(note, shift);
Chris@336 1257
Chris@336 1258 return f;
Chris@336 1259 }
Chris@336 1260
Chris@336 1261 Silvet::Feature
Chris@336 1262 Silvet::makeOffsetFeature(int col,
Chris@336 1263 int note,
Chris@336 1264 int shift)
Chris@336 1265 {
Chris@336 1266 Feature f;
Chris@336 1267
Chris@336 1268 f.hasTimestamp = true;
Chris@336 1269 f.timestamp = getColumnTimestamp(col);
Chris@336 1270
Chris@336 1271 f.hasDuration = false;
Chris@336 1272
Chris@336 1273 f.values.clear();
Chris@336 1274 f.values.push_back(getNoteFrequency(note, shift));
Chris@336 1275 f.values.push_back(0); // velocity 0 for offset
Chris@336 1276
Chris@336 1277 f.label = getNoteName(note, shift) + " off";
Chris@319 1278
Chris@319 1279 return f;
Chris@319 1280 }
Chris@319 1281
Chris@320 1282 int
Chris@320 1283 Silvet::getVelocityFor(double strength, int column)
Chris@320 1284 {
Chris@320 1285 RealTime rt = getColumnTimestamp(column + 1);
Chris@320 1286
Chris@320 1287 float inputGain = getInputGainAt(rt);
Chris@320 1288
Chris@320 1289 double scale = 2.0;
Chris@320 1290 if (m_mode == LiveMode) scale = 20.0;
Chris@320 1291
Chris@320 1292 double velocity = round((strength * scale) / inputGain);
Chris@320 1293
Chris@320 1294 if (velocity > 127.0) velocity = 127.0;
Chris@320 1295 if (velocity < 1.0) velocity = 1.0; // assume surpassed 0 threshold already
Chris@320 1296
Chris@320 1297 return int(velocity);
Chris@320 1298 }
Chris@320 1299
Chris@252 1300 float
Chris@252 1301 Silvet::getInputGainAt(RealTime t)
Chris@252 1302 {
Chris@252 1303 map<RealTime, float>::const_iterator i = m_inputGains.lower_bound(t);
Chris@252 1304
Chris@252 1305 if (i == m_inputGains.end()) {
Chris@252 1306 if (i != m_inputGains.begin()) {
Chris@252 1307 --i;
Chris@252 1308 } else {
Chris@252 1309 return 1.f; // no data
Chris@252 1310 }
Chris@252 1311 }
Chris@252 1312
Chris@252 1313 // cerr << "gain at time " << t << " = " << i->second << endl;
Chris@252 1314
Chris@252 1315 return i->second;
Chris@252 1316 }
Chris@252 1317