annotate src/Silvet.cpp @ 341:705d807ca2ca livemode

Replace Draft with Live mode
author Chris Cannam
date Mon, 06 Jul 2015 09:15:09 +0100
parents d25e4aee73d7
children 460cabb27bf7
rev   line source
Chris@31 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@31 2
Chris@31 3 /*
Chris@31 4 Silvet
Chris@31 5
Chris@31 6 A Vamp plugin for note transcription.
Chris@31 7 Centre for Digital Music, Queen Mary University of London.
Chris@31 8
Chris@31 9 This program is free software; you can redistribute it and/or
Chris@31 10 modify it under the terms of the GNU General Public License as
Chris@31 11 published by the Free Software Foundation; either version 2 of the
Chris@31 12 License, or (at your option) any later version. See the file
Chris@31 13 COPYING included with this distribution for more information.
Chris@31 14 */
Chris@31 15
Chris@31 16 #include "Silvet.h"
Chris@34 17 #include "EM.h"
Chris@31 18
Chris@152 19 #include <cq/CQSpectrogram.h>
Chris@31 20
Chris@152 21 #include "MedianFilter.h"
Chris@152 22 #include "constant-q-cpp/src/dsp/Resampler.h"
Chris@246 23 #include "flattendynamics-ladspa.h"
Chris@298 24 #include "LiveInstruments.h"
Chris@31 25
Chris@31 26 #include <vector>
Chris@312 27 #include <future>
Chris@31 28
Chris@32 29 #include <cstdio>
Chris@32 30
Chris@31 31 using std::vector;
Chris@48 32 using std::cout;
Chris@31 33 using std::cerr;
Chris@31 34 using std::endl;
Chris@311 35 using std::pair;
Chris@312 36 using std::future;
Chris@312 37 using std::async;
Chris@40 38 using Vamp::RealTime;
Chris@31 39
Chris@31 40 static int processingSampleRate = 44100;
Chris@298 41
Chris@298 42 static int binsPerSemitoneLive = 1;
Chris@298 43 static int binsPerSemitoneNormal = 5;
Chris@170 44
Chris@272 45 static int minInputSampleRate = 100;
Chris@272 46 static int maxInputSampleRate = 192000;
Chris@272 47
Chris@316 48 static const Silvet::ProcessingMode defaultMode = Silvet::HighQualityMode;
Chris@316 49
Chris@31 50 Silvet::Silvet(float inputSampleRate) :
Chris@31 51 Plugin(inputSampleRate),
Chris@161 52 m_instruments(InstrumentPack::listInstrumentPacks()),
Chris@298 53 m_liveInstruments(LiveAdapter::adaptAll(m_instruments)),
Chris@31 54 m_resampler(0),
Chris@246 55 m_flattener(0),
Chris@110 56 m_cq(0),
Chris@316 57 m_mode(defaultMode),
Chris@166 58 m_fineTuning(false),
Chris@178 59 m_instrument(0),
Chris@313 60 m_colsPerSec(50),
Chris@313 61 m_haveStartTime(false)
Chris@31 62 {
Chris@31 63 }
Chris@31 64
Chris@31 65 Silvet::~Silvet()
Chris@31 66 {
Chris@31 67 delete m_resampler;
Chris@246 68 delete m_flattener;
Chris@31 69 delete m_cq;
Chris@41 70 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
Chris@41 71 delete m_postFilter[i];
Chris@41 72 }
Chris@31 73 }
Chris@31 74
Chris@31 75 string
Chris@31 76 Silvet::getIdentifier() const
Chris@31 77 {
Chris@31 78 return "silvet";
Chris@31 79 }
Chris@31 80
Chris@31 81 string
Chris@31 82 Silvet::getName() const
Chris@31 83 {
Chris@31 84 return "Silvet Note Transcription";
Chris@31 85 }
Chris@31 86
Chris@31 87 string
Chris@31 88 Silvet::getDescription() const
Chris@31 89 {
Chris@191 90 return "Estimate the note onsets, pitches, and durations that make up a music recording.";
Chris@31 91 }
Chris@31 92
Chris@31 93 string
Chris@31 94 Silvet::getMaker() const
Chris@31 95 {
Chris@191 96 return "Queen Mary, University of London";
Chris@31 97 }
Chris@31 98
Chris@31 99 int
Chris@31 100 Silvet::getPluginVersion() const
Chris@31 101 {
Chris@309 102 return 3;
Chris@31 103 }
Chris@31 104
Chris@31 105 string
Chris@31 106 Silvet::getCopyright() const
Chris@31 107 {
Chris@191 108 return "Method by Emmanouil Benetos and Simon Dixon; plugin by Chris Cannam and Emmanouil Benetos. GPL licence.";
Chris@31 109 }
Chris@31 110
Chris@31 111 Silvet::InputDomain
Chris@31 112 Silvet::getInputDomain() const
Chris@31 113 {
Chris@31 114 return TimeDomain;
Chris@31 115 }
Chris@31 116
Chris@31 117 size_t
Chris@31 118 Silvet::getPreferredBlockSize() const
Chris@31 119 {
Chris@31 120 return 0;
Chris@31 121 }
Chris@31 122
Chris@31 123 size_t
Chris@31 124 Silvet::getPreferredStepSize() const
Chris@31 125 {
Chris@31 126 return 0;
Chris@31 127 }
Chris@31 128
Chris@31 129 size_t
Chris@31 130 Silvet::getMinChannelCount() const
Chris@31 131 {
Chris@31 132 return 1;
Chris@31 133 }
Chris@31 134
Chris@31 135 size_t
Chris@31 136 Silvet::getMaxChannelCount() const
Chris@31 137 {
Chris@31 138 return 1;
Chris@31 139 }
Chris@31 140
Chris@31 141 Silvet::ParameterList
Chris@31 142 Silvet::getParameterDescriptors() const
Chris@31 143 {
Chris@31 144 ParameterList list;
Chris@110 145
Chris@110 146 ParameterDescriptor desc;
Chris@110 147 desc.identifier = "mode";
Chris@110 148 desc.name = "Processing mode";
Chris@110 149 desc.unit = "";
Chris@341 150 desc.description = "Sets the tradeoff of processing speed against transcription quality. Live mode is much faster and detects notes with relatively low latency; Intensive mode (the default) is slower but will almost always produce better results.";
Chris@110 151 desc.minValue = 0;
Chris@297 152 desc.maxValue = 2;
Chris@316 153 desc.defaultValue = int(defaultMode);
Chris@110 154 desc.isQuantized = true;
Chris@110 155 desc.quantizeStep = 1;
Chris@341 156 desc.valueNames.push_back("Live (faster and lower latency)");
Chris@165 157 desc.valueNames.push_back("Intensive (higher quality)");
Chris@161 158 list.push_back(desc);
Chris@161 159
Chris@176 160 desc.identifier = "instrument";
Chris@176 161 desc.name = "Instrument";
Chris@161 162 desc.unit = "";
Chris@271 163 desc.description = "The instrument or instruments known to be present in the recording. This affects the set of instrument templates used, as well as the expected level of polyphony in the output. Using a more limited set of instruments than the default will also make the plugin run faster.\nNote that this plugin cannot isolate instruments: you can't use this setting to request notes from only one instrument in a recording with several. Instead, use this as a hint to the plugin about which instruments are actually present.";
Chris@161 164 desc.minValue = 0;
Chris@162 165 desc.maxValue = m_instruments.size()-1;
Chris@162 166 desc.defaultValue = 0;
Chris@161 167 desc.isQuantized = true;
Chris@161 168 desc.quantizeStep = 1;
Chris@161 169 desc.valueNames.clear();
Chris@162 170 for (int i = 0; i < int(m_instruments.size()); ++i) {
Chris@162 171 desc.valueNames.push_back(m_instruments[i].name);
Chris@162 172 }
Chris@166 173 list.push_back(desc);
Chris@161 174
Chris@166 175 desc.identifier = "finetune";
Chris@166 176 desc.name = "Return fine pitch estimates";
Chris@166 177 desc.unit = "";
Chris@271 178 desc.description = "Return pitch estimates at finer than semitone resolution. This works only in Intensive mode. Notes that appear to drift in pitch will be split up into shorter notes with individually finer pitches.";
Chris@166 179 desc.minValue = 0;
Chris@166 180 desc.maxValue = 1;
Chris@166 181 desc.defaultValue = 0;
Chris@166 182 desc.isQuantized = true;
Chris@166 183 desc.quantizeStep = 1;
Chris@166 184 desc.valueNames.clear();
Chris@110 185 list.push_back(desc);
Chris@110 186
Chris@31 187 return list;
Chris@31 188 }
Chris@31 189
Chris@31 190 float
Chris@31 191 Silvet::getParameter(string identifier) const
Chris@31 192 {
Chris@110 193 if (identifier == "mode") {
Chris@297 194 return (float)(int)m_mode;
Chris@166 195 } else if (identifier == "finetune") {
Chris@166 196 return m_fineTuning ? 1.f : 0.f;
Chris@176 197 } else if (identifier == "instrument") {
Chris@162 198 return m_instrument;
Chris@110 199 }
Chris@31 200 return 0;
Chris@31 201 }
Chris@31 202
Chris@31 203 void
Chris@31 204 Silvet::setParameter(string identifier, float value)
Chris@31 205 {
Chris@110 206 if (identifier == "mode") {
Chris@297 207 m_mode = (ProcessingMode)(int)(value + 0.5);
Chris@166 208 } else if (identifier == "finetune") {
Chris@166 209 m_fineTuning = (value > 0.5);
Chris@176 210 } else if (identifier == "instrument") {
Chris@162 211 m_instrument = lrintf(value);
Chris@110 212 }
Chris@31 213 }
Chris@31 214
Chris@31 215 Silvet::ProgramList
Chris@31 216 Silvet::getPrograms() const
Chris@31 217 {
Chris@31 218 ProgramList list;
Chris@31 219 return list;
Chris@31 220 }
Chris@31 221
Chris@31 222 string
Chris@31 223 Silvet::getCurrentProgram() const
Chris@31 224 {
Chris@31 225 return "";
Chris@31 226 }
Chris@31 227
Chris@31 228 void
Chris@31 229 Silvet::selectProgram(string name)
Chris@31 230 {
Chris@31 231 }
Chris@31 232
Chris@31 233 Silvet::OutputList
Chris@31 234 Silvet::getOutputDescriptors() const
Chris@31 235 {
Chris@31 236 OutputList list;
Chris@31 237
Chris@31 238 OutputDescriptor d;
Chris@51 239 d.identifier = "notes";
Chris@51 240 d.name = "Note transcription";
Chris@329 241 d.description = "Overall note transcription. Each note has time, duration, estimated fundamental frequency, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture.";
Chris@41 242 d.unit = "Hz";
Chris@31 243 d.hasFixedBinCount = true;
Chris@31 244 d.binCount = 2;
Chris@41 245 d.binNames.push_back("Frequency");
Chris@31 246 d.binNames.push_back("Velocity");
Chris@31 247 d.hasKnownExtents = false;
Chris@31 248 d.isQuantized = false;
Chris@31 249 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@246 250 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
Chris@31 251 d.hasDuration = true;
Chris@32 252 m_notesOutputNo = list.size();
Chris@32 253 list.push_back(d);
Chris@32 254
Chris@319 255 d.identifier = "onsets";
Chris@319 256 d.name = "Note onsets";
Chris@323 257 d.description = "Note onsets, without durations. These can be calculated sooner than complete notes, because it isn't necessary to wait for a note to finish before returning its feature. Each event has time, estimated fundamental frequency in Hz, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture.";
Chris@319 258 d.unit = "Hz";
Chris@319 259 d.hasFixedBinCount = true;
Chris@319 260 d.binCount = 2;
Chris@319 261 d.binNames.push_back("Frequency");
Chris@319 262 d.binNames.push_back("Velocity");
Chris@319 263 d.hasKnownExtents = false;
Chris@319 264 d.isQuantized = false;
Chris@319 265 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@319 266 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
Chris@319 267 d.hasDuration = false;
Chris@319 268 m_onsetsOutputNo = list.size();
Chris@319 269 list.push_back(d);
Chris@319 270
Chris@336 271 d.identifier = "onoffsets";
Chris@336 272 d.name = "Note onsets and offsets";
Chris@336 273 d.description = "Note onsets and offsets as separate events. Each onset event has time, estimated fundamental frequency in Hz, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture. Offsets are represented in the same way but with a velocity of 0.";
Chris@336 274 d.unit = "Hz";
Chris@336 275 d.hasFixedBinCount = true;
Chris@336 276 d.binCount = 2;
Chris@336 277 d.binNames.push_back("Frequency");
Chris@336 278 d.binNames.push_back("Velocity");
Chris@336 279 d.hasKnownExtents = false;
Chris@336 280 d.isQuantized = false;
Chris@336 281 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@336 282 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
Chris@336 283 d.hasDuration = false;
Chris@336 284 m_onOffsetsOutputNo = list.size();
Chris@336 285 list.push_back(d);
Chris@336 286
Chris@178 287 d.identifier = "timefreq";
Chris@178 288 d.name = "Time-frequency distribution";
Chris@271 289 d.description = "Filtered constant-Q time-frequency distribution as used as input to the expectation-maximisation algorithm.";
Chris@178 290 d.unit = "";
Chris@178 291 d.hasFixedBinCount = true;
Chris@298 292 d.binCount = getPack(0).templateHeight;
Chris@178 293 d.binNames.clear();
Chris@178 294 if (m_cq) {
Chris@294 295 char name[50];
Chris@298 296 for (int i = 0; i < getPack(0).templateHeight; ++i) {
Chris@178 297 // We have a 600-bin (10 oct 60-bin CQ) of which the
Chris@178 298 // lowest-frequency 55 bins have been dropped, for a
Chris@178 299 // 545-bin template. The native CQ bins go high->low
Chris@178 300 // frequency though, so these are still the first 545 bins
Chris@178 301 // as reported by getBinFrequency, though in reverse order
Chris@178 302 float freq = m_cq->getBinFrequency
Chris@298 303 (getPack(0).templateHeight - i - 1);
Chris@178 304 sprintf(name, "%.1f Hz", freq);
Chris@178 305 d.binNames.push_back(name);
Chris@178 306 }
Chris@178 307 }
Chris@178 308 d.hasKnownExtents = false;
Chris@178 309 d.isQuantized = false;
Chris@178 310 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@178 311 d.sampleRate = m_colsPerSec;
Chris@178 312 d.hasDuration = false;
Chris@178 313 m_fcqOutputNo = list.size();
Chris@178 314 list.push_back(d);
Chris@178 315
Chris@294 316 d.identifier = "pitchactivation";
Chris@294 317 d.name = "Pitch activation distribution";
Chris@294 318 d.description = "Pitch activation distribution resulting from expectation-maximisation algorithm, prior to note extraction.";
Chris@294 319 d.unit = "";
Chris@294 320 d.hasFixedBinCount = true;
Chris@298 321 d.binCount = getPack(0).templateNoteCount;
Chris@294 322 d.binNames.clear();
Chris@294 323 if (m_cq) {
Chris@298 324 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
Chris@336 325 d.binNames.push_back(getNoteName(i, 0));
Chris@294 326 }
Chris@294 327 }
Chris@294 328 d.hasKnownExtents = false;
Chris@294 329 d.isQuantized = false;
Chris@294 330 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@294 331 d.sampleRate = m_colsPerSec;
Chris@294 332 d.hasDuration = false;
Chris@294 333 m_pitchOutputNo = list.size();
Chris@294 334 list.push_back(d);
Chris@294 335
Chris@309 336 d.identifier = "chroma";
Chris@309 337 d.name = "Pitch chroma distribution";
Chris@309 338 d.description = "Pitch chroma distribution formed by wrapping the un-thresholded pitch activation distribution into a single octave of semitone bins.";
Chris@309 339 d.unit = "";
Chris@309 340 d.hasFixedBinCount = true;
Chris@309 341 d.binCount = 12;
Chris@309 342 d.binNames.clear();
Chris@309 343 if (m_cq) {
Chris@309 344 for (int i = 0; i < 12; ++i) {
Chris@320 345 d.binNames.push_back(getChromaName(i));
Chris@309 346 }
Chris@309 347 }
Chris@309 348 d.hasKnownExtents = false;
Chris@309 349 d.isQuantized = false;
Chris@309 350 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@309 351 d.sampleRate = m_colsPerSec;
Chris@309 352 d.hasDuration = false;
Chris@309 353 m_chromaOutputNo = list.size();
Chris@309 354 list.push_back(d);
Chris@309 355
Chris@302 356 d.identifier = "templates";
Chris@302 357 d.name = "Templates";
Chris@302 358 d.description = "Constant-Q spectral templates for the selected instrument pack.";
Chris@302 359 d.unit = "";
Chris@302 360 d.hasFixedBinCount = true;
Chris@302 361 d.binCount = getPack(0).templateHeight;
Chris@302 362 d.binNames.clear();
Chris@302 363 if (m_cq) {
Chris@302 364 char name[50];
Chris@302 365 for (int i = 0; i < getPack(0).templateHeight; ++i) {
Chris@302 366 // We have a 600-bin (10 oct 60-bin CQ) of which the
Chris@302 367 // lowest-frequency 55 bins have been dropped, for a
Chris@302 368 // 545-bin template. The native CQ bins go high->low
Chris@302 369 // frequency though, so these are still the first 545 bins
Chris@302 370 // as reported by getBinFrequency, though in reverse order
Chris@302 371 float freq = m_cq->getBinFrequency
Chris@302 372 (getPack(0).templateHeight - i - 1);
Chris@302 373 sprintf(name, "%.1f Hz", freq);
Chris@302 374 d.binNames.push_back(name);
Chris@302 375 }
Chris@302 376 }
Chris@302 377 d.hasKnownExtents = false;
Chris@302 378 d.isQuantized = false;
Chris@302 379 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@302 380 d.sampleRate = m_colsPerSec;
Chris@302 381 d.hasDuration = false;
Chris@302 382 m_templateOutputNo = list.size();
Chris@302 383 list.push_back(d);
Chris@302 384
Chris@31 385 return list;
Chris@31 386 }
Chris@31 387
Chris@38 388 std::string
Chris@320 389 Silvet::getChromaName(int pitch) const
Chris@38 390 {
Chris@38 391 static const char *names[] = {
Chris@38 392 "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"
Chris@38 393 };
Chris@38 394
Chris@309 395 return names[pitch];
Chris@309 396 }
Chris@309 397
Chris@309 398 std::string
Chris@336 399 Silvet::getNoteName(int note, int shift) const
Chris@309 400 {
Chris@320 401 string n = getChromaName(note % 12);
Chris@38 402
Chris@175 403 int oct = (note + 9) / 12;
Chris@38 404
Chris@175 405 char buf[30];
Chris@175 406
Chris@175 407 float pshift = 0.f;
Chris@336 408 int shiftCount = getShiftCount();
Chris@175 409 if (shiftCount > 1) {
Chris@320 410 // see getNoteFrequency below
Chris@175 411 pshift =
Chris@175 412 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
Chris@175 413 }
Chris@175 414
Chris@175 415 if (pshift > 0.f) {
Chris@309 416 sprintf(buf, "%s%d+%dc", n.c_str(), oct, int(round(pshift * 100)));
Chris@175 417 } else if (pshift < 0.f) {
Chris@309 418 sprintf(buf, "%s%d-%dc", n.c_str(), oct, int(round((-pshift) * 100)));
Chris@175 419 } else {
Chris@309 420 sprintf(buf, "%s%d", n.c_str(), oct);
Chris@175 421 }
Chris@38 422
Chris@38 423 return buf;
Chris@38 424 }
Chris@38 425
Chris@41 426 float
Chris@336 427 Silvet::getNoteFrequency(int note, int shift) const
Chris@41 428 {
Chris@169 429 // Convert shift number to a pitch shift. The given shift number
Chris@169 430 // is an offset into the template array, which starts with some
Chris@169 431 // zeros, followed by the template, then some trailing zeros.
Chris@169 432 //
Chris@169 433 // Example: if we have templateMaxShift == 2 and thus shiftCount
Chris@169 434 // == 5, then the number will be in the range 0-4 and the template
Chris@169 435 // will have 2 zeros at either end. Thus number 2 represents the
Chris@169 436 // template "as recorded", for a pitch shift of 0; smaller indices
Chris@169 437 // represent moving the template *up* in pitch (by introducing
Chris@169 438 // zeros at the start, which is the low-frequency end), for a
Chris@169 439 // positive pitch shift; and higher values represent moving it
Chris@169 440 // down in pitch, for a negative pitch shift.
Chris@169 441
Chris@175 442 float pshift = 0.f;
Chris@336 443 int shiftCount = getShiftCount();
Chris@175 444 if (shiftCount > 1) {
Chris@175 445 pshift =
Chris@175 446 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
Chris@175 447 }
Chris@169 448
Chris@301 449 float freq = float(27.5 * pow(2.0, (note + pshift) / 12.0));
Chris@301 450
Chris@303 451 // cerr << "note = " << note << ", shift = " << shift << ", shiftCount = "
Chris@303 452 // << shiftCount << ", obtained freq = " << freq << endl;
Chris@301 453
Chris@301 454 return freq;
Chris@41 455 }
Chris@41 456
Chris@31 457 bool
Chris@31 458 Silvet::initialise(size_t channels, size_t stepSize, size_t blockSize)
Chris@31 459 {
Chris@272 460 if (m_inputSampleRate < minInputSampleRate ||
Chris@272 461 m_inputSampleRate > maxInputSampleRate) {
Chris@272 462 cerr << "Silvet::initialise: Unsupported input sample rate "
Chris@272 463 << m_inputSampleRate << " (supported min " << minInputSampleRate
Chris@272 464 << ", max " << maxInputSampleRate << ")" << endl;
Chris@272 465 return false;
Chris@272 466 }
Chris@272 467
Chris@31 468 if (channels < getMinChannelCount() ||
Chris@272 469 channels > getMaxChannelCount()) {
Chris@272 470 cerr << "Silvet::initialise: Unsupported channel count " << channels
Chris@272 471 << " (supported min " << getMinChannelCount() << ", max "
Chris@272 472 << getMaxChannelCount() << ")" << endl;
Chris@272 473 return false;
Chris@272 474 }
Chris@31 475
Chris@31 476 if (stepSize != blockSize) {
Chris@31 477 cerr << "Silvet::initialise: Step size must be the same as block size ("
Chris@31 478 << stepSize << " != " << blockSize << ")" << endl;
Chris@31 479 return false;
Chris@31 480 }
Chris@31 481
Chris@31 482 m_blockSize = blockSize;
Chris@31 483
Chris@31 484 reset();
Chris@31 485
Chris@31 486 return true;
Chris@31 487 }
Chris@31 488
Chris@31 489 void
Chris@31 490 Silvet::reset()
Chris@31 491 {
Chris@31 492 delete m_resampler;
Chris@246 493 delete m_flattener;
Chris@31 494 delete m_cq;
Chris@31 495
Chris@31 496 if (m_inputSampleRate != processingSampleRate) {
Chris@31 497 m_resampler = new Resampler(m_inputSampleRate, processingSampleRate);
Chris@31 498 } else {
Chris@31 499 m_resampler = 0;
Chris@31 500 }
Chris@31 501
Chris@246 502 m_flattener = new FlattenDynamics(m_inputSampleRate); // before resampling
Chris@246 503 m_flattener->reset();
Chris@246 504
Chris@301 505 // this happens to be processingSampleRate / 3, and is the top
Chris@301 506 // freq used for the EM templates:
Chris@301 507 double maxFreq = 14700;
Chris@301 508
Chris@301 509 if (m_mode == LiveMode) {
Chris@301 510 // We only have 12 bpo rather than 60, so we need the top bin
Chris@301 511 // to be the middle one of the top 5, i.e. 2/5 of a semitone
Chris@301 512 // lower than 14700
Chris@301 513 maxFreq *= powf(2.0, -1.0 / 30.0);
Chris@301 514 }
Chris@301 515
Chris@173 516 double minFreq = 27.5;
Chris@173 517
Chris@341 518 if (m_mode == LiveMode) {
Chris@173 519 // We don't actually return any notes from the bottom octave,
Chris@173 520 // so we can just pad with zeros
Chris@173 521 minFreq *= 2;
Chris@173 522 }
Chris@173 523
Chris@298 524 int bpo = 12 *
Chris@298 525 (m_mode == LiveMode ? binsPerSemitoneLive : binsPerSemitoneNormal);
Chris@301 526
Chris@154 527 CQParameters params(processingSampleRate,
Chris@173 528 minFreq,
Chris@303 529 maxFreq,
Chris@298 530 bpo);
Chris@154 531
Chris@325 532 params.q = 0.8;
Chris@325 533 params.atomHopFactor = (m_mode == LiveMode ? 1.0 : 0.3);
Chris@154 534 params.threshold = 0.0005;
Chris@317 535 params.decimator =
Chris@317 536 (m_mode == LiveMode ?
Chris@317 537 CQParameters::FasterDecimator : CQParameters::BetterDecimator);
Chris@172 538 params.window = CQParameters::Hann;
Chris@154 539
Chris@154 540 m_cq = new CQSpectrogram(params, CQSpectrogram::InterpolateLinear);
Chris@31 541
Chris@303 542 // cerr << "CQ bins = " << m_cq->getTotalBins() << endl;
Chris@303 543 // cerr << "CQ min freq = " << m_cq->getMinFrequency() << " (and for confirmation, freq of bin 0 = " << m_cq->getBinFrequency(0) << ")" << endl;
Chris@297 544
Chris@341 545 m_colsPerSec = 50;
Chris@165 546
Chris@41 547 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
Chris@41 548 delete m_postFilter[i];
Chris@41 549 }
Chris@41 550 m_postFilter.clear();
Chris@303 551 int postFilterLength = 3;
Chris@298 552 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
Chris@303 553 m_postFilter.push_back(new MedianFilter<double>(postFilterLength));
Chris@41 554 }
Chris@41 555 m_pianoRoll.clear();
Chris@246 556 m_inputGains.clear();
Chris@32 557 m_columnCount = 0;
Chris@272 558 m_resampledCount = 0;
Chris@40 559 m_startTime = RealTime::zeroTime;
Chris@313 560 m_haveStartTime = false;
Chris@31 561 }
Chris@31 562
Chris@31 563 Silvet::FeatureSet
Chris@31 564 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
Chris@31 565 {
Chris@302 566 FeatureSet fs;
Chris@302 567
Chris@313 568 if (!m_haveStartTime) {
Chris@314 569
Chris@40 570 m_startTime = timestamp;
Chris@313 571 m_haveStartTime = true;
Chris@314 572
Chris@302 573 insertTemplateFeatures(fs);
Chris@40 574 }
Chris@246 575
Chris@246 576 vector<float> flattened(m_blockSize);
Chris@246 577 float gain = 1.f;
Chris@246 578 m_flattener->connectInputPort
Chris@246 579 (FlattenDynamics::AudioInputPort, inputBuffers[0]);
Chris@246 580 m_flattener->connectOutputPort
Chris@246 581 (FlattenDynamics::AudioOutputPort, &flattened[0]);
Chris@246 582 m_flattener->connectOutputPort
Chris@246 583 (FlattenDynamics::GainOutputPort, &gain);
Chris@246 584 m_flattener->process(m_blockSize);
Chris@246 585
Chris@252 586 m_inputGains[timestamp] = gain;
Chris@40 587
Chris@31 588 vector<double> data;
Chris@40 589 for (int i = 0; i < m_blockSize; ++i) {
Chris@246 590 double d = flattened[i];
Chris@235 591 data.push_back(d);
Chris@40 592 }
Chris@31 593
Chris@31 594 if (m_resampler) {
Chris@272 595
Chris@31 596 data = m_resampler->process(data.data(), data.size());
Chris@272 597
Chris@272 598 int hadCount = m_resampledCount;
Chris@272 599 m_resampledCount += data.size();
Chris@272 600
Chris@272 601 int resamplerLatency = m_resampler->getLatency();
Chris@272 602
Chris@272 603 if (hadCount < resamplerLatency) {
Chris@272 604 int stillToDrop = resamplerLatency - hadCount;
Chris@272 605 if (stillToDrop >= int(data.size())) {
Chris@302 606 return fs;
Chris@272 607 } else {
Chris@272 608 data = vector<double>(data.begin() + stillToDrop, data.end());
Chris@272 609 }
Chris@272 610 }
Chris@31 611 }
Chris@272 612
Chris@32 613 Grid cqout = m_cq->process(data);
Chris@302 614 transcribe(cqout, fs);
Chris@51 615 return fs;
Chris@34 616 }
Chris@34 617
Chris@34 618 Silvet::FeatureSet
Chris@34 619 Silvet::getRemainingFeatures()
Chris@34 620 {
Chris@145 621 Grid cqout = m_cq->getRemainingOutput();
Chris@302 622 FeatureSet fs;
Chris@336 623
Chris@302 624 if (m_columnCount == 0) {
Chris@302 625 // process() was never called, but we still want these
Chris@302 626 insertTemplateFeatures(fs);
Chris@302 627 } else {
Chris@336 628
Chris@336 629 // Complete the transcription
Chris@336 630
Chris@302 631 transcribe(cqout, fs);
Chris@336 632
Chris@336 633 // And make sure any extant playing notes are finished and returned
Chris@336 634
Chris@336 635 m_pianoRoll.push_back({});
Chris@336 636
Chris@336 637 auto events = noteTrack();
Chris@336 638
Chris@336 639 for (const auto &f : events.notes) {
Chris@336 640 fs[m_notesOutputNo].push_back(f);
Chris@336 641 }
Chris@336 642
Chris@336 643 for (const auto &f : events.onsets) {
Chris@336 644 fs[m_onsetsOutputNo].push_back(f);
Chris@336 645 }
Chris@336 646
Chris@336 647 for (const auto &f : events.onOffsets) {
Chris@336 648 fs[m_onOffsetsOutputNo].push_back(f);
Chris@336 649 }
Chris@302 650 }
Chris@336 651
Chris@51 652 return fs;
Chris@34 653 }
Chris@34 654
Chris@302 655 void
Chris@302 656 Silvet::insertTemplateFeatures(FeatureSet &fs)
Chris@302 657 {
Chris@302 658 const InstrumentPack &pack = getPack(m_instrument);
Chris@302 659 for (int i = 0; i < int(pack.templates.size()) * pack.templateNoteCount; ++i) {
Chris@302 660 RealTime timestamp = RealTime::fromSeconds(double(i) / m_colsPerSec);
Chris@302 661 Feature f;
Chris@302 662 char buffer[50];
Chris@302 663 sprintf(buffer, "Note %d", i + 1);
Chris@302 664 f.label = buffer;
Chris@302 665 f.hasTimestamp = true;
Chris@302 666 f.timestamp = timestamp;
Chris@302 667 f.values = pack.templates[i / pack.templateNoteCount]
Chris@302 668 .data[i % pack.templateNoteCount];
Chris@302 669 fs[m_templateOutputNo].push_back(f);
Chris@302 670 }
Chris@302 671 }
Chris@302 672
Chris@336 673 int
Chris@336 674 Silvet::getShiftCount() const
Chris@336 675 {
Chris@336 676 bool wantShifts = (m_mode == HighQualityMode) && m_fineTuning;
Chris@336 677 int shiftCount = 1;
Chris@336 678 if (wantShifts) {
Chris@336 679 const InstrumentPack &pack(getPack(m_instrument));
Chris@336 680 shiftCount = pack.templateMaxShift * 2 + 1;
Chris@336 681 }
Chris@336 682 return shiftCount;
Chris@336 683 }
Chris@336 684
Chris@302 685 void
Chris@302 686 Silvet::transcribe(const Grid &cqout, Silvet::FeatureSet &fs)
Chris@34 687 {
Chris@32 688 Grid filtered = preProcess(cqout);
Chris@31 689
Chris@302 690 if (filtered.empty()) return;
Chris@170 691
Chris@298 692 const InstrumentPack &pack(getPack(m_instrument));
Chris@104 693
Chris@325 694 int width = filtered.size();
Chris@325 695
Chris@325 696 double silenceThreshold = 0.01;
Chris@325 697
Chris@325 698 for (int i = 0; i < width; ++i) {
Chris@325 699
Chris@325 700 RealTime timestamp = getColumnTimestamp(m_pianoRoll.size() - 1 + i);
Chris@325 701 float inputGain = getInputGainAt(timestamp);
Chris@325 702
Chris@178 703 Feature f;
Chris@325 704 double rms = 0.0;
Chris@325 705
Chris@178 706 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@325 707 double v = filtered[i][j];
Chris@325 708 rms += v * v;
Chris@325 709 f.values.push_back(float(v));
Chris@178 710 }
Chris@325 711
Chris@325 712 rms = sqrt(rms / pack.templateHeight);
Chris@325 713 if (rms / inputGain < silenceThreshold) {
Chris@325 714 filtered[i].clear();
Chris@325 715 }
Chris@325 716
Chris@178 717 fs[m_fcqOutputNo].push_back(f);
Chris@178 718 }
Chris@325 719
Chris@311 720 Grid localPitches(width);
Chris@170 721
Chris@336 722 int shiftCount = getShiftCount();
Chris@336 723 bool wantShifts = (shiftCount > 1);
Chris@170 724
Chris@170 725 vector<vector<int> > localBestShifts;
Chris@170 726 if (wantShifts) {
Chris@311 727 localBestShifts = vector<vector<int> >(width);
Chris@170 728 }
Chris@170 729
Chris@312 730 #ifndef MAX_EM_THREADS
Chris@312 731 #define MAX_EM_THREADS 8
Chris@312 732 #endif
Chris@312 733
Chris@317 734 int emThreadCount = MAX_EM_THREADS;
Chris@317 735 if (m_mode == LiveMode && pack.templates.size() == 1) {
Chris@317 736 // The EM step is probably not slow enough to merit it
Chris@317 737 emThreadCount = 1;
Chris@317 738 }
Chris@317 739
Chris@312 740 #if (defined(MAX_EM_THREADS) && (MAX_EM_THREADS > 1))
Chris@317 741 if (emThreadCount > 1) {
Chris@317 742 for (int i = 0; i < width; ) {
Chris@317 743 typedef future<pair<vector<double>, vector<int>>> EMFuture;
Chris@317 744 vector<EMFuture> results;
Chris@317 745 for (int j = 0; j < emThreadCount && i + j < width; ++j) {
Chris@317 746 results.push_back
Chris@317 747 (async(std::launch::async,
Chris@317 748 [&](int index) {
Chris@336 749 return applyEM(pack, filtered.at(index));
Chris@317 750 }, i + j));
Chris@317 751 }
Chris@317 752 for (int j = 0; j < emThreadCount && i + j < width; ++j) {
Chris@317 753 auto out = results[j].get();
Chris@317 754 localPitches[i+j] = out.first;
Chris@317 755 if (wantShifts) localBestShifts[i+j] = out.second;
Chris@317 756 }
Chris@317 757 i += emThreadCount;
Chris@312 758 }
Chris@123 759 }
Chris@312 760 #endif
Chris@317 761
Chris@317 762 if (emThreadCount == 1) {
Chris@317 763 for (int i = 0; i < width; ++i) {
Chris@336 764 auto out = applyEM(pack, filtered.at(i));
Chris@317 765 localPitches[i] = out.first;
Chris@317 766 if (wantShifts) localBestShifts[i] = out.second;
Chris@317 767 }
Chris@317 768 }
Chris@305 769
Chris@166 770 for (int i = 0; i < width; ++i) {
Chris@37 771
Chris@321 772 vector<double> filtered;
Chris@321 773
Chris@321 774 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@321 775 m_postFilter[j]->push(localPitches[i][j]);
Chris@321 776 filtered.push_back(m_postFilter[j]->get());
Chris@321 777 }
Chris@294 778
Chris@309 779 RealTime timestamp = getColumnTimestamp(m_pianoRoll.size() - 1);
Chris@309 780 float inputGain = getInputGainAt(timestamp);
Chris@309 781
Chris@294 782 Feature f;
Chris@294 783 for (int j = 0; j < (int)filtered.size(); ++j) {
Chris@309 784 float v = filtered[j];
Chris@294 785 if (v < pack.levelThreshold) v = 0.f;
Chris@309 786 f.values.push_back(v / inputGain);
Chris@294 787 }
Chris@294 788 fs[m_pitchOutputNo].push_back(f);
Chris@309 789
Chris@309 790 f.values.clear();
Chris@309 791 f.values.resize(12);
Chris@309 792 for (int j = 0; j < (int)filtered.size(); ++j) {
Chris@309 793 f.values[j % 12] += filtered[j] / inputGain;
Chris@309 794 }
Chris@309 795 fs[m_chromaOutputNo].push_back(f);
Chris@38 796
Chris@321 797 // This pushes the up-to-max-polyphony activation column to
Chris@321 798 // m_pianoRoll
Chris@336 799 postProcess(filtered, localBestShifts[i]);
Chris@321 800
Chris@336 801 auto events = noteTrack();
Chris@319 802
Chris@336 803 for (const auto &f : events.notes) {
Chris@336 804 fs[m_notesOutputNo].push_back(f);
Chris@40 805 }
Chris@319 806
Chris@336 807 for (const auto &f : events.onsets) {
Chris@336 808 fs[m_onsetsOutputNo].push_back(f);
Chris@336 809 }
Chris@336 810
Chris@336 811 for (const auto &f : events.onOffsets) {
Chris@336 812 fs[m_onOffsetsOutputNo].push_back(f);
Chris@319 813 }
Chris@34 814 }
Chris@31 815 }
Chris@31 816
Chris@311 817 pair<vector<double>, vector<int> >
Chris@311 818 Silvet::applyEM(const InstrumentPack &pack,
Chris@336 819 const vector<double> &column)
Chris@311 820 {
Chris@311 821 double columnThreshold = 1e-5;
Chris@311 822
Chris@314 823 if (m_mode == LiveMode) {
Chris@325 824 columnThreshold /= 15;
Chris@314 825 }
Chris@314 826
Chris@311 827 vector<double> pitches(pack.templateNoteCount, 0.0);
Chris@311 828 vector<int> bestShifts;
Chris@325 829
Chris@325 830 if (column.empty()) return { pitches, bestShifts };
Chris@311 831
Chris@311 832 double sum = 0.0;
Chris@311 833 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@311 834 sum += column.at(j);
Chris@311 835 }
Chris@311 836 if (sum < columnThreshold) return { pitches, bestShifts };
Chris@311 837
Chris@314 838 EM em(&pack, m_mode == HighQualityMode);
Chris@311 839
Chris@311 840 em.setPitchSparsity(pack.pitchSparsity);
Chris@311 841 em.setSourceSparsity(pack.sourceSparsity);
Chris@311 842
Chris@314 843 int iterations = (m_mode == HighQualityMode ? 20 : 10);
Chris@311 844
Chris@311 845 for (int j = 0; j < iterations; ++j) {
Chris@311 846 em.iterate(column.data());
Chris@311 847 }
Chris@311 848
Chris@311 849 const float *pitchDist = em.getPitchDistribution();
Chris@311 850 const float *const *shiftDist = em.getShifts();
Chris@311 851
Chris@336 852 int shiftCount = getShiftCount();
Chris@311 853
Chris@311 854 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@311 855
Chris@311 856 pitches[j] = pitchDist[j] * sum;
Chris@311 857
Chris@311 858 int bestShift = 0;
Chris@311 859 float bestShiftValue = 0.0;
Chris@336 860 if (shiftCount > 1) {
Chris@311 861 for (int k = 0; k < shiftCount; ++k) {
Chris@311 862 float value = shiftDist[k][j];
Chris@311 863 if (k == 0 || value > bestShiftValue) {
Chris@311 864 bestShiftValue = value;
Chris@311 865 bestShift = k;
Chris@311 866 }
Chris@311 867 }
Chris@311 868 bestShifts.push_back(bestShift);
Chris@311 869 }
Chris@311 870 }
Chris@311 871
Chris@311 872 return { pitches, bestShifts };
Chris@311 873 }
Chris@311 874
Chris@32 875 Silvet::Grid
Chris@32 876 Silvet::preProcess(const Grid &in)
Chris@32 877 {
Chris@32 878 int width = in.size();
Chris@32 879
Chris@165 880 int spacing = processingSampleRate / m_colsPerSec;
Chris@32 881
Chris@165 882 // need to be careful that col spacing is an integer number of samples!
Chris@165 883 assert(spacing * m_colsPerSec == processingSampleRate);
Chris@32 884
Chris@32 885 Grid out;
Chris@32 886
Chris@58 887 // We count the CQ latency in terms of processing hops, but
Chris@58 888 // actually it probably isn't an exact number of hops so this
Chris@58 889 // isn't quite accurate. But the small constant offset is
Chris@165 890 // practically irrelevant compared to the jitter from the frame
Chris@165 891 // size we reduce to in a moment
Chris@33 892 int latentColumns = m_cq->getLatency() / m_cq->getColumnHop();
Chris@33 893
Chris@298 894 const InstrumentPack &pack(getPack(m_instrument));
Chris@176 895
Chris@32 896 for (int i = 0; i < width; ++i) {
Chris@32 897
Chris@33 898 if (m_columnCount < latentColumns) {
Chris@33 899 ++m_columnCount;
Chris@33 900 continue;
Chris@33 901 }
Chris@33 902
Chris@32 903 int prevSampleNo = (m_columnCount - 1) * m_cq->getColumnHop();
Chris@32 904 int sampleNo = m_columnCount * m_cq->getColumnHop();
Chris@32 905
Chris@32 906 bool select = (sampleNo / spacing != prevSampleNo / spacing);
Chris@32 907
Chris@32 908 if (select) {
Chris@32 909 vector<double> inCol = in[i];
Chris@176 910 vector<double> outCol(pack.templateHeight);
Chris@32 911
Chris@178 912 // In HQ mode, the CQ returns 600 bins and we ignore the
Chris@298 913 // lowest 55 of them (assuming binsPerSemitone == 5).
Chris@178 914 //
Chris@341 915 // In live mode the CQ is an octave shorter, returning 540
Chris@341 916 // bins or equivalent, so we instead pad them with an
Chris@341 917 // additional 5 or equivalent zeros.
Chris@178 918 //
Chris@178 919 // We also need to reverse the column as we go, since the
Chris@178 920 // raw CQ has the high frequencies first and we need it
Chris@178 921 // the other way around.
Chris@32 922
Chris@298 923 int bps = (m_mode == LiveMode ?
Chris@298 924 binsPerSemitoneLive : binsPerSemitoneNormal);
Chris@298 925
Chris@297 926 if (m_mode == HighQualityMode) {
Chris@178 927 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@298 928 int ix = inCol.size() - j - (11 * bps);
Chris@178 929 outCol[j] = inCol[ix];
Chris@178 930 }
Chris@178 931 } else {
Chris@298 932 for (int j = 0; j < bps; ++j) {
Chris@178 933 outCol[j] = 0.0;
Chris@178 934 }
Chris@298 935 for (int j = bps; j < pack.templateHeight; ++j) {
Chris@298 936 int ix = inCol.size() - j + (bps-1);
Chris@178 937 outCol[j] = inCol[ix];
Chris@178 938 }
Chris@46 939 }
Chris@32 940
Chris@46 941 vector<double> noiseLevel1 =
Chris@298 942 MedianFilter<double>::filter(8 * bps, outCol);
Chris@176 943 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@46 944 noiseLevel1[j] = std::min(outCol[j], noiseLevel1[j]);
Chris@46 945 }
Chris@32 946
Chris@46 947 vector<double> noiseLevel2 =
Chris@298 948 MedianFilter<double>::filter(8 * bps, noiseLevel1);
Chris@176 949 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@46 950 outCol[j] = std::max(outCol[j] - noiseLevel2[j], 0.0);
Chris@32 951 }
Chris@32 952
Chris@165 953 out.push_back(outCol);
Chris@32 954 }
Chris@32 955
Chris@32 956 ++m_columnCount;
Chris@32 957 }
Chris@32 958
Chris@32 959 return out;
Chris@32 960 }
Chris@32 961
Chris@321 962 void
Chris@170 963 Silvet::postProcess(const vector<double> &pitches,
Chris@336 964 const vector<int> &bestShifts)
Chris@166 965 {
Chris@298 966 const InstrumentPack &pack(getPack(m_instrument));
Chris@176 967
Chris@41 968 // Threshold for level and reduce number of candidate pitches
Chris@41 969
Chris@41 970 typedef std::multimap<double, int> ValueIndexMap;
Chris@41 971
Chris@41 972 ValueIndexMap strengths;
Chris@166 973
Chris@176 974 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@321 975
Chris@321 976 double strength = pitches[j];
Chris@183 977 if (strength < pack.levelThreshold) continue;
Chris@321 978
Chris@321 979 // In live mode with only a 12-bpo CQ, we are very likely to
Chris@321 980 // get clusters of two or three high scores at a time for
Chris@321 981 // neighbouring semitones. Eliminate these by picking only the
Chris@325 982 // peaks (except that we never eliminate a note that has
Chris@325 983 // already been established as currently playing). This means
Chris@325 984 // we can't recognise actual semitone chords if they ever
Chris@325 985 // appear, but it's not as if live mode is good enough for
Chris@325 986 // that to be a big deal anyway.
Chris@321 987 if (m_mode == LiveMode) {
Chris@325 988 if (m_current.find(j) == m_current.end() &&
Chris@325 989 (j == 0 ||
Chris@325 990 j + 1 == pack.templateNoteCount ||
Chris@325 991 pitches[j] < pitches[j-1] ||
Chris@325 992 pitches[j] < pitches[j+1])) {
Chris@325 993 // not a peak or a currently-playing note: skip it
Chris@321 994 continue;
Chris@321 995 }
Chris@321 996 }
Chris@323 997
Chris@168 998 strengths.insert(ValueIndexMap::value_type(strength, j));
Chris@168 999 }
Chris@166 1000
Chris@168 1001 ValueIndexMap::const_iterator si = strengths.end();
Chris@167 1002
Chris@168 1003 map<int, double> active;
Chris@168 1004 map<int, int> activeShifts;
Chris@168 1005
Chris@336 1006 int shiftCount = getShiftCount();
Chris@336 1007
Chris@183 1008 while (int(active.size()) < pack.maxPolyphony && si != strengths.begin()) {
Chris@168 1009
Chris@168 1010 --si;
Chris@168 1011
Chris@168 1012 double strength = si->first;
Chris@168 1013 int j = si->second;
Chris@168 1014
Chris@168 1015 active[j] = strength;
Chris@168 1016
Chris@336 1017 if (shiftCount > 1) {
Chris@170 1018 activeShifts[j] = bestShifts[j];
Chris@167 1019 }
Chris@41 1020 }
Chris@41 1021
Chris@168 1022 m_pianoRoll.push_back(active);
Chris@170 1023
Chris@336 1024 if (shiftCount > 1) {
Chris@168 1025 m_pianoRollShifts.push_back(activeShifts);
Chris@41 1026 }
Chris@294 1027
Chris@321 1028 return;
Chris@166 1029 }
Chris@166 1030
Chris@336 1031 Silvet::FeatureChunk
Chris@336 1032 Silvet::noteTrack()
Chris@166 1033 {
Chris@41 1034 // Minimum duration pruning, and conversion to notes. We can only
Chris@41 1035 // report notes that have just ended (i.e. that are absent in the
Chris@168 1036 // latest active set but present in the prior set in the piano
Chris@41 1037 // roll) -- any notes that ended earlier will have been reported
Chris@41 1038 // already, and if they haven't ended, we don't know their
Chris@41 1039 // duration.
Chris@41 1040
Chris@168 1041 int width = m_pianoRoll.size() - 1;
Chris@168 1042
Chris@168 1043 const map<int, double> &active = m_pianoRoll[width];
Chris@41 1044
Chris@165 1045 double columnDuration = 1.0 / m_colsPerSec;
Chris@165 1046
Chris@165 1047 // only keep notes >= 100ms or thereabouts
Chris@323 1048 double durationThrSec = 0.1;
Chris@323 1049 int durationThreshold = floor(durationThrSec / columnDuration); // in cols
Chris@165 1050 if (durationThreshold < 1) durationThreshold = 1;
Chris@41 1051
Chris@336 1052 FeatureList noteFeatures, onsetFeatures, onOffsetFeatures;
Chris@41 1053
Chris@41 1054 if (width < durationThreshold + 1) {
Chris@336 1055 return { noteFeatures, onsetFeatures, onOffsetFeatures };
Chris@41 1056 }
Chris@41 1057
Chris@55 1058 for (map<int, double>::const_iterator ni = m_pianoRoll[width-1].begin();
Chris@41 1059 ni != m_pianoRoll[width-1].end(); ++ni) {
Chris@41 1060
Chris@55 1061 int note = ni->first;
Chris@41 1062
Chris@41 1063 int end = width;
Chris@41 1064 int start = end-1;
Chris@41 1065
Chris@41 1066 while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) {
Chris@41 1067 --start;
Chris@41 1068 }
Chris@41 1069 ++start;
Chris@41 1070
Chris@319 1071 int duration = end - start;
Chris@319 1072
Chris@319 1073 if (duration < durationThreshold) {
Chris@41 1074 continue;
Chris@41 1075 }
Chris@41 1076
Chris@319 1077 if (duration == durationThreshold) {
Chris@325 1078 m_current.insert(note);
Chris@336 1079 emitOnset(start, note, onsetFeatures);
Chris@336 1080 emitOnset(start, note, onOffsetFeatures);
Chris@319 1081 }
Chris@319 1082
Chris@319 1083 if (active.find(note) == active.end()) {
Chris@319 1084 // the note was playing but just ended
Chris@325 1085 m_current.erase(note);
Chris@336 1086 emitNote(start, end, note, noteFeatures);
Chris@336 1087 emitOffset(start, end, note, onOffsetFeatures);
Chris@334 1088 } else { // still playing
Chris@334 1089 // repeated note detection: if level is greater than this
Chris@334 1090 // multiple of its previous value, then we end the note and
Chris@334 1091 // restart it with the same pitch
Chris@334 1092 double restartFactor = 1.5;
Chris@334 1093 if (duration >= durationThreshold * 2 &&
Chris@334 1094 (active.find(note)->second >
Chris@334 1095 restartFactor * m_pianoRoll[width-1][note])) {
Chris@334 1096 m_current.erase(note);
Chris@336 1097 emitNote(start, end-1, note, noteFeatures);
Chris@336 1098 emitOffset(start, end-1, note, onOffsetFeatures);
Chris@334 1099 // and remove this so that we start counting the new
Chris@334 1100 // note's duration from the current position
Chris@334 1101 m_pianoRoll[width-1].erase(note);
Chris@334 1102 }
Chris@319 1103 }
Chris@41 1104 }
Chris@41 1105
Chris@62 1106 // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl;
Chris@41 1107
Chris@336 1108 return { noteFeatures, onsetFeatures, onOffsetFeatures };
Chris@41 1109 }
Chris@41 1110
Chris@169 1111 void
Chris@336 1112 Silvet::emitNote(int start, int end, int note, FeatureList &noteFeatures)
Chris@169 1113 {
Chris@169 1114 int partStart = start;
Chris@169 1115 int partShift = 0;
Chris@320 1116 double partStrength = 0;
Chris@169 1117
Chris@252 1118 int partThreshold = floor(0.05 * m_colsPerSec);
Chris@169 1119
Chris@169 1120 for (int i = start; i != end; ++i) {
Chris@169 1121
Chris@169 1122 double strength = m_pianoRoll[i][note];
Chris@169 1123
Chris@169 1124 int shift = 0;
Chris@169 1125
Chris@336 1126 if (getShiftCount() > 1) {
Chris@169 1127
Chris@169 1128 shift = m_pianoRollShifts[i][note];
Chris@169 1129
Chris@169 1130 if (i == partStart) {
Chris@169 1131 partShift = shift;
Chris@169 1132 }
Chris@169 1133
Chris@169 1134 if (i > partStart + partThreshold && shift != partShift) {
Chris@169 1135
Chris@169 1136 // cerr << "i = " << i << ", partStart = " << partStart << ", shift = " << shift << ", partShift = " << partShift << endl;
Chris@169 1137
Chris@169 1138 // pitch has changed, emit an intermediate note
Chris@252 1139 noteFeatures.push_back(makeNoteFeature(partStart,
Chris@252 1140 i,
Chris@252 1141 note,
Chris@252 1142 partShift,
Chris@320 1143 partStrength));
Chris@169 1144 partStart = i;
Chris@169 1145 partShift = shift;
Chris@320 1146 partStrength = 0;
Chris@169 1147 }
Chris@169 1148 }
Chris@169 1149
Chris@320 1150 if (strength > partStrength) {
Chris@320 1151 partStrength = strength;
Chris@169 1152 }
Chris@169 1153 }
Chris@169 1154
Chris@169 1155 if (end >= partStart + partThreshold) {
Chris@252 1156 noteFeatures.push_back(makeNoteFeature(partStart,
Chris@252 1157 end,
Chris@252 1158 note,
Chris@252 1159 partShift,
Chris@320 1160 partStrength));
Chris@169 1161 }
Chris@169 1162 }
Chris@252 1163
Chris@319 1164 void
Chris@336 1165 Silvet::emitOnset(int start, int note, FeatureList &onOffsetFeatures)
Chris@319 1166 {
Chris@319 1167 int len = int(m_pianoRoll.size());
Chris@320 1168
Chris@320 1169 double onsetStrength = 0;
Chris@319 1170
Chris@319 1171 int shift = 0;
Chris@336 1172 if (getShiftCount() > 1) {
Chris@319 1173 shift = m_pianoRollShifts[start][note];
Chris@319 1174 }
Chris@319 1175
Chris@319 1176 for (int i = start; i < len; ++i) {
Chris@319 1177 double strength = m_pianoRoll[i][note];
Chris@320 1178 if (strength > onsetStrength) {
Chris@320 1179 onsetStrength = strength;
Chris@319 1180 }
Chris@319 1181 }
Chris@319 1182
Chris@336 1183 if (onsetStrength == 0) return;
Chris@336 1184
Chris@336 1185 onOffsetFeatures.push_back(makeOnsetFeature(start,
Chris@336 1186 note,
Chris@336 1187 shift,
Chris@336 1188 onsetStrength));
Chris@336 1189 }
Chris@336 1190
Chris@336 1191 void
Chris@336 1192 Silvet::emitOffset(int start, int end, int note, FeatureList &onOffsetFeatures)
Chris@336 1193 {
Chris@336 1194 int shift = 0;
Chris@336 1195 if (getShiftCount() > 1) {
Chris@336 1196 shift = m_pianoRollShifts[start][note];
Chris@336 1197 }
Chris@336 1198
Chris@336 1199 onOffsetFeatures.push_back(makeOffsetFeature(end,
Chris@336 1200 note,
Chris@336 1201 shift));
Chris@319 1202 }
Chris@319 1203
Chris@309 1204 RealTime
Chris@309 1205 Silvet::getColumnTimestamp(int column)
Chris@309 1206 {
Chris@309 1207 double columnDuration = 1.0 / m_colsPerSec;
Chris@309 1208 int postFilterLatency = int(m_postFilter[0]->getSize() / 2);
Chris@309 1209
Chris@309 1210 return m_startTime + RealTime::fromSeconds
Chris@309 1211 (columnDuration * (column - postFilterLatency) + 0.02);
Chris@309 1212 }
Chris@309 1213
Chris@252 1214 Silvet::Feature
Chris@252 1215 Silvet::makeNoteFeature(int start,
Chris@252 1216 int end,
Chris@252 1217 int note,
Chris@252 1218 int shift,
Chris@320 1219 double strength)
Chris@252 1220 {
Chris@252 1221 Feature f;
Chris@252 1222
Chris@252 1223 f.hasTimestamp = true;
Chris@309 1224 f.timestamp = getColumnTimestamp(start);
Chris@252 1225
Chris@252 1226 f.hasDuration = true;
Chris@309 1227 f.duration = getColumnTimestamp(end) - f.timestamp;
Chris@252 1228
Chris@252 1229 f.values.clear();
Chris@336 1230 f.values.push_back(getNoteFrequency(note, shift));
Chris@320 1231 f.values.push_back(getVelocityFor(strength, start));
Chris@252 1232
Chris@336 1233 f.label = getNoteName(note, shift);
Chris@252 1234
Chris@252 1235 return f;
Chris@252 1236 }
Chris@252 1237
Chris@319 1238 Silvet::Feature
Chris@319 1239 Silvet::makeOnsetFeature(int start,
Chris@319 1240 int note,
Chris@319 1241 int shift,
Chris@320 1242 double strength)
Chris@319 1243 {
Chris@319 1244 Feature f;
Chris@319 1245
Chris@319 1246 f.hasTimestamp = true;
Chris@319 1247 f.timestamp = getColumnTimestamp(start);
Chris@319 1248
Chris@319 1249 f.hasDuration = false;
Chris@319 1250
Chris@319 1251 f.values.clear();
Chris@336 1252 f.values.push_back(getNoteFrequency(note, shift));
Chris@320 1253 f.values.push_back(getVelocityFor(strength, start));
Chris@319 1254
Chris@336 1255 f.label = getNoteName(note, shift);
Chris@336 1256
Chris@336 1257 return f;
Chris@336 1258 }
Chris@336 1259
Chris@336 1260 Silvet::Feature
Chris@336 1261 Silvet::makeOffsetFeature(int col,
Chris@336 1262 int note,
Chris@336 1263 int shift)
Chris@336 1264 {
Chris@336 1265 Feature f;
Chris@336 1266
Chris@336 1267 f.hasTimestamp = true;
Chris@336 1268 f.timestamp = getColumnTimestamp(col);
Chris@336 1269
Chris@336 1270 f.hasDuration = false;
Chris@336 1271
Chris@336 1272 f.values.clear();
Chris@336 1273 f.values.push_back(getNoteFrequency(note, shift));
Chris@336 1274 f.values.push_back(0); // velocity 0 for offset
Chris@336 1275
Chris@336 1276 f.label = getNoteName(note, shift) + " off";
Chris@319 1277
Chris@319 1278 return f;
Chris@319 1279 }
Chris@319 1280
Chris@320 1281 int
Chris@320 1282 Silvet::getVelocityFor(double strength, int column)
Chris@320 1283 {
Chris@320 1284 RealTime rt = getColumnTimestamp(column + 1);
Chris@320 1285
Chris@320 1286 float inputGain = getInputGainAt(rt);
Chris@320 1287
Chris@320 1288 double scale = 2.0;
Chris@320 1289 if (m_mode == LiveMode) scale = 20.0;
Chris@320 1290
Chris@320 1291 double velocity = round((strength * scale) / inputGain);
Chris@320 1292
Chris@320 1293 if (velocity > 127.0) velocity = 127.0;
Chris@320 1294 if (velocity < 1.0) velocity = 1.0; // assume surpassed 0 threshold already
Chris@320 1295
Chris@320 1296 return int(velocity);
Chris@320 1297 }
Chris@320 1298
Chris@252 1299 float
Chris@252 1300 Silvet::getInputGainAt(RealTime t)
Chris@252 1301 {
Chris@252 1302 map<RealTime, float>::const_iterator i = m_inputGains.lower_bound(t);
Chris@252 1303
Chris@252 1304 if (i == m_inputGains.end()) {
Chris@252 1305 if (i != m_inputGains.begin()) {
Chris@252 1306 --i;
Chris@252 1307 } else {
Chris@252 1308 return 1.f; // no data
Chris@252 1309 }
Chris@252 1310 }
Chris@252 1311
Chris@252 1312 // cerr << "gain at time " << t << " = " << i->second << endl;
Chris@252 1313
Chris@252 1314 return i->second;
Chris@252 1315 }
Chris@252 1316