annotate src/Silvet.cpp @ 352:a3fc6e1f2d4e

Restore threads stuff -- the host needs to be compiled with threading
author Chris Cannam
date Thu, 03 Sep 2015 12:06:31 +0100
parents 071fd5e7b168
children 7dcff010d9cd
rev   line source
Chris@31 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@31 2
Chris@31 3 /*
Chris@31 4 Silvet
Chris@31 5
Chris@31 6 A Vamp plugin for note transcription.
Chris@31 7 Centre for Digital Music, Queen Mary University of London.
Chris@31 8
Chris@31 9 This program is free software; you can redistribute it and/or
Chris@31 10 modify it under the terms of the GNU General Public License as
Chris@31 11 published by the Free Software Foundation; either version 2 of the
Chris@31 12 License, or (at your option) any later version. See the file
Chris@31 13 COPYING included with this distribution for more information.
Chris@31 14 */
Chris@31 15
Chris@31 16 #include "Silvet.h"
Chris@34 17 #include "EM.h"
Chris@31 18
Chris@152 19 #include <cq/CQSpectrogram.h>
Chris@31 20
Chris@152 21 #include "MedianFilter.h"
Chris@152 22 #include "constant-q-cpp/src/dsp/Resampler.h"
Chris@246 23 #include "flattendynamics-ladspa.h"
Chris@298 24 #include "LiveInstruments.h"
Chris@31 25
Chris@31 26 #include <vector>
Chris@312 27 #include <future>
Chris@31 28
Chris@32 29 #include <cstdio>
Chris@32 30
Chris@31 31 using std::vector;
Chris@48 32 using std::cout;
Chris@31 33 using std::cerr;
Chris@31 34 using std::endl;
Chris@311 35 using std::pair;
Chris@312 36 using std::future;
Chris@312 37 using std::async;
Chris@40 38 using Vamp::RealTime;
Chris@31 39
Chris@31 40 static int processingSampleRate = 44100;
Chris@298 41
Chris@298 42 static int binsPerSemitoneLive = 1;
Chris@298 43 static int binsPerSemitoneNormal = 5;
Chris@170 44
Chris@272 45 static int minInputSampleRate = 100;
Chris@272 46 static int maxInputSampleRate = 192000;
Chris@272 47
Chris@316 48 static const Silvet::ProcessingMode defaultMode = Silvet::HighQualityMode;
Chris@316 49
Chris@31 50 Silvet::Silvet(float inputSampleRate) :
Chris@31 51 Plugin(inputSampleRate),
Chris@161 52 m_instruments(InstrumentPack::listInstrumentPacks()),
Chris@298 53 m_liveInstruments(LiveAdapter::adaptAll(m_instruments)),
Chris@31 54 m_resampler(0),
Chris@246 55 m_flattener(0),
Chris@110 56 m_cq(0),
Chris@316 57 m_mode(defaultMode),
Chris@166 58 m_fineTuning(false),
Chris@178 59 m_instrument(0),
Chris@313 60 m_colsPerSec(50),
Chris@313 61 m_haveStartTime(false)
Chris@31 62 {
Chris@31 63 }
Chris@31 64
Chris@31 65 Silvet::~Silvet()
Chris@31 66 {
Chris@31 67 delete m_resampler;
Chris@246 68 delete m_flattener;
Chris@31 69 delete m_cq;
Chris@41 70 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
Chris@41 71 delete m_postFilter[i];
Chris@41 72 }
Chris@31 73 }
Chris@31 74
Chris@31 75 string
Chris@31 76 Silvet::getIdentifier() const
Chris@31 77 {
Chris@31 78 return "silvet";
Chris@31 79 }
Chris@31 80
Chris@31 81 string
Chris@31 82 Silvet::getName() const
Chris@31 83 {
Chris@31 84 return "Silvet Note Transcription";
Chris@31 85 }
Chris@31 86
Chris@31 87 string
Chris@31 88 Silvet::getDescription() const
Chris@31 89 {
Chris@191 90 return "Estimate the note onsets, pitches, and durations that make up a music recording.";
Chris@31 91 }
Chris@31 92
Chris@31 93 string
Chris@31 94 Silvet::getMaker() const
Chris@31 95 {
Chris@191 96 return "Queen Mary, University of London";
Chris@31 97 }
Chris@31 98
Chris@31 99 int
Chris@31 100 Silvet::getPluginVersion() const
Chris@31 101 {
Chris@309 102 return 3;
Chris@31 103 }
Chris@31 104
Chris@31 105 string
Chris@31 106 Silvet::getCopyright() const
Chris@31 107 {
Chris@191 108 return "Method by Emmanouil Benetos and Simon Dixon; plugin by Chris Cannam and Emmanouil Benetos. GPL licence.";
Chris@31 109 }
Chris@31 110
Chris@31 111 Silvet::InputDomain
Chris@31 112 Silvet::getInputDomain() const
Chris@31 113 {
Chris@31 114 return TimeDomain;
Chris@31 115 }
Chris@31 116
Chris@31 117 size_t
Chris@31 118 Silvet::getPreferredBlockSize() const
Chris@31 119 {
Chris@31 120 return 0;
Chris@31 121 }
Chris@31 122
Chris@31 123 size_t
Chris@31 124 Silvet::getPreferredStepSize() const
Chris@31 125 {
Chris@31 126 return 0;
Chris@31 127 }
Chris@31 128
Chris@31 129 size_t
Chris@31 130 Silvet::getMinChannelCount() const
Chris@31 131 {
Chris@31 132 return 1;
Chris@31 133 }
Chris@31 134
Chris@31 135 size_t
Chris@31 136 Silvet::getMaxChannelCount() const
Chris@31 137 {
Chris@31 138 return 1;
Chris@31 139 }
Chris@31 140
Chris@31 141 Silvet::ParameterList
Chris@31 142 Silvet::getParameterDescriptors() const
Chris@31 143 {
Chris@31 144 ParameterList list;
Chris@110 145
Chris@110 146 ParameterDescriptor desc;
Chris@110 147 desc.identifier = "mode";
Chris@110 148 desc.name = "Processing mode";
Chris@110 149 desc.unit = "";
Chris@341 150 desc.description = "Sets the tradeoff of processing speed against transcription quality. Live mode is much faster and detects notes with relatively low latency; Intensive mode (the default) is slower but will almost always produce better results.";
Chris@110 151 desc.minValue = 0;
Chris@344 152 desc.maxValue = 1;
Chris@316 153 desc.defaultValue = int(defaultMode);
Chris@110 154 desc.isQuantized = true;
Chris@110 155 desc.quantizeStep = 1;
Chris@341 156 desc.valueNames.push_back("Live (faster and lower latency)");
Chris@165 157 desc.valueNames.push_back("Intensive (higher quality)");
Chris@161 158 list.push_back(desc);
Chris@161 159
Chris@176 160 desc.identifier = "instrument";
Chris@176 161 desc.name = "Instrument";
Chris@161 162 desc.unit = "";
Chris@271 163 desc.description = "The instrument or instruments known to be present in the recording. This affects the set of instrument templates used, as well as the expected level of polyphony in the output. Using a more limited set of instruments than the default will also make the plugin run faster.\nNote that this plugin cannot isolate instruments: you can't use this setting to request notes from only one instrument in a recording with several. Instead, use this as a hint to the plugin about which instruments are actually present.";
Chris@161 164 desc.minValue = 0;
Chris@162 165 desc.maxValue = m_instruments.size()-1;
Chris@162 166 desc.defaultValue = 0;
Chris@161 167 desc.isQuantized = true;
Chris@161 168 desc.quantizeStep = 1;
Chris@161 169 desc.valueNames.clear();
Chris@162 170 for (int i = 0; i < int(m_instruments.size()); ++i) {
Chris@162 171 desc.valueNames.push_back(m_instruments[i].name);
Chris@162 172 }
Chris@166 173 list.push_back(desc);
Chris@161 174
Chris@166 175 desc.identifier = "finetune";
Chris@166 176 desc.name = "Return fine pitch estimates";
Chris@166 177 desc.unit = "";
Chris@271 178 desc.description = "Return pitch estimates at finer than semitone resolution. This works only in Intensive mode. Notes that appear to drift in pitch will be split up into shorter notes with individually finer pitches.";
Chris@166 179 desc.minValue = 0;
Chris@166 180 desc.maxValue = 1;
Chris@166 181 desc.defaultValue = 0;
Chris@166 182 desc.isQuantized = true;
Chris@166 183 desc.quantizeStep = 1;
Chris@166 184 desc.valueNames.clear();
Chris@110 185 list.push_back(desc);
Chris@110 186
Chris@31 187 return list;
Chris@31 188 }
Chris@31 189
Chris@31 190 float
Chris@31 191 Silvet::getParameter(string identifier) const
Chris@31 192 {
Chris@110 193 if (identifier == "mode") {
Chris@297 194 return (float)(int)m_mode;
Chris@166 195 } else if (identifier == "finetune") {
Chris@166 196 return m_fineTuning ? 1.f : 0.f;
Chris@176 197 } else if (identifier == "instrument") {
Chris@162 198 return m_instrument;
Chris@110 199 }
Chris@31 200 return 0;
Chris@31 201 }
Chris@31 202
Chris@31 203 void
Chris@31 204 Silvet::setParameter(string identifier, float value)
Chris@31 205 {
Chris@110 206 if (identifier == "mode") {
Chris@297 207 m_mode = (ProcessingMode)(int)(value + 0.5);
Chris@166 208 } else if (identifier == "finetune") {
Chris@166 209 m_fineTuning = (value > 0.5);
Chris@176 210 } else if (identifier == "instrument") {
Chris@162 211 m_instrument = lrintf(value);
Chris@110 212 }
Chris@31 213 }
Chris@31 214
Chris@31 215 Silvet::ProgramList
Chris@31 216 Silvet::getPrograms() const
Chris@31 217 {
Chris@31 218 ProgramList list;
Chris@31 219 return list;
Chris@31 220 }
Chris@31 221
Chris@31 222 string
Chris@31 223 Silvet::getCurrentProgram() const
Chris@31 224 {
Chris@31 225 return "";
Chris@31 226 }
Chris@31 227
Chris@31 228 void
Chris@31 229 Silvet::selectProgram(string name)
Chris@31 230 {
Chris@31 231 }
Chris@31 232
Chris@31 233 Silvet::OutputList
Chris@31 234 Silvet::getOutputDescriptors() const
Chris@31 235 {
Chris@31 236 OutputList list;
Chris@31 237
Chris@31 238 OutputDescriptor d;
Chris@51 239 d.identifier = "notes";
Chris@51 240 d.name = "Note transcription";
Chris@329 241 d.description = "Overall note transcription. Each note has time, duration, estimated fundamental frequency, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture.";
Chris@41 242 d.unit = "Hz";
Chris@31 243 d.hasFixedBinCount = true;
Chris@31 244 d.binCount = 2;
Chris@41 245 d.binNames.push_back("Frequency");
Chris@31 246 d.binNames.push_back("Velocity");
Chris@31 247 d.hasKnownExtents = false;
Chris@31 248 d.isQuantized = false;
Chris@31 249 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@246 250 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
Chris@31 251 d.hasDuration = true;
Chris@32 252 m_notesOutputNo = list.size();
Chris@32 253 list.push_back(d);
Chris@32 254
Chris@319 255 d.identifier = "onsets";
Chris@319 256 d.name = "Note onsets";
Chris@323 257 d.description = "Note onsets, without durations. These can be calculated sooner than complete notes, because it isn't necessary to wait for a note to finish before returning its feature. Each event has time, estimated fundamental frequency in Hz, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture.";
Chris@319 258 d.unit = "Hz";
Chris@319 259 d.hasFixedBinCount = true;
Chris@319 260 d.binCount = 2;
Chris@319 261 d.binNames.push_back("Frequency");
Chris@319 262 d.binNames.push_back("Velocity");
Chris@319 263 d.hasKnownExtents = false;
Chris@319 264 d.isQuantized = false;
Chris@319 265 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@319 266 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
Chris@319 267 d.hasDuration = false;
Chris@319 268 m_onsetsOutputNo = list.size();
Chris@319 269 list.push_back(d);
Chris@319 270
Chris@336 271 d.identifier = "onoffsets";
Chris@336 272 d.name = "Note onsets and offsets";
Chris@336 273 d.description = "Note onsets and offsets as separate events. Each onset event has time, estimated fundamental frequency in Hz, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture. Offsets are represented in the same way but with a velocity of 0.";
Chris@336 274 d.unit = "Hz";
Chris@336 275 d.hasFixedBinCount = true;
Chris@336 276 d.binCount = 2;
Chris@336 277 d.binNames.push_back("Frequency");
Chris@336 278 d.binNames.push_back("Velocity");
Chris@336 279 d.hasKnownExtents = false;
Chris@336 280 d.isQuantized = false;
Chris@336 281 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@336 282 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
Chris@336 283 d.hasDuration = false;
Chris@336 284 m_onOffsetsOutputNo = list.size();
Chris@336 285 list.push_back(d);
Chris@336 286
Chris@178 287 d.identifier = "timefreq";
Chris@178 288 d.name = "Time-frequency distribution";
Chris@271 289 d.description = "Filtered constant-Q time-frequency distribution as used as input to the expectation-maximisation algorithm.";
Chris@178 290 d.unit = "";
Chris@178 291 d.hasFixedBinCount = true;
Chris@298 292 d.binCount = getPack(0).templateHeight;
Chris@178 293 d.binNames.clear();
Chris@178 294 if (m_cq) {
Chris@294 295 char name[50];
Chris@298 296 for (int i = 0; i < getPack(0).templateHeight; ++i) {
Chris@178 297 // We have a 600-bin (10 oct 60-bin CQ) of which the
Chris@178 298 // lowest-frequency 55 bins have been dropped, for a
Chris@178 299 // 545-bin template. The native CQ bins go high->low
Chris@178 300 // frequency though, so these are still the first 545 bins
Chris@178 301 // as reported by getBinFrequency, though in reverse order
Chris@178 302 float freq = m_cq->getBinFrequency
Chris@298 303 (getPack(0).templateHeight - i - 1);
Chris@178 304 sprintf(name, "%.1f Hz", freq);
Chris@178 305 d.binNames.push_back(name);
Chris@178 306 }
Chris@178 307 }
Chris@178 308 d.hasKnownExtents = false;
Chris@178 309 d.isQuantized = false;
Chris@178 310 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@178 311 d.sampleRate = m_colsPerSec;
Chris@178 312 d.hasDuration = false;
Chris@178 313 m_fcqOutputNo = list.size();
Chris@178 314 list.push_back(d);
Chris@178 315
Chris@294 316 d.identifier = "pitchactivation";
Chris@294 317 d.name = "Pitch activation distribution";
Chris@294 318 d.description = "Pitch activation distribution resulting from expectation-maximisation algorithm, prior to note extraction.";
Chris@294 319 d.unit = "";
Chris@294 320 d.hasFixedBinCount = true;
Chris@298 321 d.binCount = getPack(0).templateNoteCount;
Chris@294 322 d.binNames.clear();
Chris@294 323 if (m_cq) {
Chris@298 324 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
Chris@336 325 d.binNames.push_back(getNoteName(i, 0));
Chris@294 326 }
Chris@294 327 }
Chris@294 328 d.hasKnownExtents = false;
Chris@294 329 d.isQuantized = false;
Chris@294 330 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@294 331 d.sampleRate = m_colsPerSec;
Chris@294 332 d.hasDuration = false;
Chris@294 333 m_pitchOutputNo = list.size();
Chris@294 334 list.push_back(d);
Chris@294 335
Chris@309 336 d.identifier = "chroma";
Chris@309 337 d.name = "Pitch chroma distribution";
Chris@309 338 d.description = "Pitch chroma distribution formed by wrapping the un-thresholded pitch activation distribution into a single octave of semitone bins.";
Chris@309 339 d.unit = "";
Chris@309 340 d.hasFixedBinCount = true;
Chris@309 341 d.binCount = 12;
Chris@309 342 d.binNames.clear();
Chris@309 343 if (m_cq) {
Chris@309 344 for (int i = 0; i < 12; ++i) {
Chris@320 345 d.binNames.push_back(getChromaName(i));
Chris@309 346 }
Chris@309 347 }
Chris@309 348 d.hasKnownExtents = false;
Chris@309 349 d.isQuantized = false;
Chris@309 350 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@309 351 d.sampleRate = m_colsPerSec;
Chris@309 352 d.hasDuration = false;
Chris@309 353 m_chromaOutputNo = list.size();
Chris@309 354 list.push_back(d);
Chris@309 355
Chris@302 356 d.identifier = "templates";
Chris@302 357 d.name = "Templates";
Chris@302 358 d.description = "Constant-Q spectral templates for the selected instrument pack.";
Chris@302 359 d.unit = "";
Chris@302 360 d.hasFixedBinCount = true;
Chris@302 361 d.binCount = getPack(0).templateHeight;
Chris@302 362 d.binNames.clear();
Chris@302 363 if (m_cq) {
Chris@302 364 char name[50];
Chris@302 365 for (int i = 0; i < getPack(0).templateHeight; ++i) {
Chris@302 366 // We have a 600-bin (10 oct 60-bin CQ) of which the
Chris@302 367 // lowest-frequency 55 bins have been dropped, for a
Chris@302 368 // 545-bin template. The native CQ bins go high->low
Chris@302 369 // frequency though, so these are still the first 545 bins
Chris@302 370 // as reported by getBinFrequency, though in reverse order
Chris@302 371 float freq = m_cq->getBinFrequency
Chris@302 372 (getPack(0).templateHeight - i - 1);
Chris@302 373 sprintf(name, "%.1f Hz", freq);
Chris@302 374 d.binNames.push_back(name);
Chris@302 375 }
Chris@302 376 }
Chris@302 377 d.hasKnownExtents = false;
Chris@302 378 d.isQuantized = false;
Chris@302 379 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@302 380 d.sampleRate = m_colsPerSec;
Chris@302 381 d.hasDuration = false;
Chris@302 382 m_templateOutputNo = list.size();
Chris@302 383 list.push_back(d);
Chris@302 384
Chris@31 385 return list;
Chris@31 386 }
Chris@31 387
Chris@38 388 std::string
Chris@320 389 Silvet::getChromaName(int pitch) const
Chris@38 390 {
Chris@38 391 static const char *names[] = {
Chris@38 392 "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"
Chris@38 393 };
Chris@38 394
Chris@309 395 return names[pitch];
Chris@309 396 }
Chris@309 397
Chris@309 398 std::string
Chris@336 399 Silvet::getNoteName(int note, int shift) const
Chris@309 400 {
Chris@320 401 string n = getChromaName(note % 12);
Chris@38 402
Chris@175 403 int oct = (note + 9) / 12;
Chris@38 404
Chris@175 405 char buf[30];
Chris@175 406
Chris@175 407 float pshift = 0.f;
Chris@336 408 int shiftCount = getShiftCount();
Chris@175 409 if (shiftCount > 1) {
Chris@320 410 // see getNoteFrequency below
Chris@175 411 pshift =
Chris@175 412 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
Chris@175 413 }
Chris@175 414
Chris@175 415 if (pshift > 0.f) {
Chris@309 416 sprintf(buf, "%s%d+%dc", n.c_str(), oct, int(round(pshift * 100)));
Chris@175 417 } else if (pshift < 0.f) {
Chris@309 418 sprintf(buf, "%s%d-%dc", n.c_str(), oct, int(round((-pshift) * 100)));
Chris@175 419 } else {
Chris@309 420 sprintf(buf, "%s%d", n.c_str(), oct);
Chris@175 421 }
Chris@38 422
Chris@38 423 return buf;
Chris@38 424 }
Chris@38 425
Chris@41 426 float
Chris@336 427 Silvet::getNoteFrequency(int note, int shift) const
Chris@41 428 {
Chris@169 429 // Convert shift number to a pitch shift. The given shift number
Chris@169 430 // is an offset into the template array, which starts with some
Chris@169 431 // zeros, followed by the template, then some trailing zeros.
Chris@169 432 //
Chris@169 433 // Example: if we have templateMaxShift == 2 and thus shiftCount
Chris@169 434 // == 5, then the number will be in the range 0-4 and the template
Chris@169 435 // will have 2 zeros at either end. Thus number 2 represents the
Chris@169 436 // template "as recorded", for a pitch shift of 0; smaller indices
Chris@169 437 // represent moving the template *up* in pitch (by introducing
Chris@169 438 // zeros at the start, which is the low-frequency end), for a
Chris@169 439 // positive pitch shift; and higher values represent moving it
Chris@169 440 // down in pitch, for a negative pitch shift.
Chris@169 441
Chris@175 442 float pshift = 0.f;
Chris@336 443 int shiftCount = getShiftCount();
Chris@175 444 if (shiftCount > 1) {
Chris@175 445 pshift =
Chris@175 446 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
Chris@175 447 }
Chris@169 448
Chris@301 449 float freq = float(27.5 * pow(2.0, (note + pshift) / 12.0));
Chris@301 450
Chris@303 451 // cerr << "note = " << note << ", shift = " << shift << ", shiftCount = "
Chris@303 452 // << shiftCount << ", obtained freq = " << freq << endl;
Chris@301 453
Chris@301 454 return freq;
Chris@41 455 }
Chris@41 456
Chris@31 457 bool
Chris@31 458 Silvet::initialise(size_t channels, size_t stepSize, size_t blockSize)
Chris@31 459 {
Chris@272 460 if (m_inputSampleRate < minInputSampleRate ||
Chris@272 461 m_inputSampleRate > maxInputSampleRate) {
Chris@272 462 cerr << "Silvet::initialise: Unsupported input sample rate "
Chris@272 463 << m_inputSampleRate << " (supported min " << minInputSampleRate
Chris@272 464 << ", max " << maxInputSampleRate << ")" << endl;
Chris@272 465 return false;
Chris@272 466 }
Chris@272 467
Chris@31 468 if (channels < getMinChannelCount() ||
Chris@272 469 channels > getMaxChannelCount()) {
Chris@272 470 cerr << "Silvet::initialise: Unsupported channel count " << channels
Chris@272 471 << " (supported min " << getMinChannelCount() << ", max "
Chris@272 472 << getMaxChannelCount() << ")" << endl;
Chris@272 473 return false;
Chris@272 474 }
Chris@31 475
Chris@31 476 if (stepSize != blockSize) {
Chris@31 477 cerr << "Silvet::initialise: Step size must be the same as block size ("
Chris@31 478 << stepSize << " != " << blockSize << ")" << endl;
Chris@31 479 return false;
Chris@31 480 }
Chris@31 481
Chris@31 482 m_blockSize = blockSize;
Chris@31 483
Chris@31 484 reset();
Chris@31 485
Chris@31 486 return true;
Chris@31 487 }
Chris@31 488
Chris@31 489 void
Chris@31 490 Silvet::reset()
Chris@31 491 {
Chris@31 492 delete m_resampler;
Chris@246 493 delete m_flattener;
Chris@31 494 delete m_cq;
Chris@31 495
Chris@31 496 if (m_inputSampleRate != processingSampleRate) {
Chris@31 497 m_resampler = new Resampler(m_inputSampleRate, processingSampleRate);
Chris@31 498 } else {
Chris@31 499 m_resampler = 0;
Chris@31 500 }
Chris@31 501
Chris@246 502 m_flattener = new FlattenDynamics(m_inputSampleRate); // before resampling
Chris@246 503 m_flattener->reset();
Chris@246 504
Chris@301 505 // this happens to be processingSampleRate / 3, and is the top
Chris@301 506 // freq used for the EM templates:
Chris@301 507 double maxFreq = 14700;
Chris@301 508
Chris@301 509 if (m_mode == LiveMode) {
Chris@301 510 // We only have 12 bpo rather than 60, so we need the top bin
Chris@301 511 // to be the middle one of the top 5, i.e. 2/5 of a semitone
Chris@301 512 // lower than 14700
Chris@301 513 maxFreq *= powf(2.0, -1.0 / 30.0);
Chris@301 514 }
Chris@301 515
Chris@173 516 double minFreq = 27.5;
Chris@173 517
Chris@341 518 if (m_mode == LiveMode) {
Chris@173 519 // We don't actually return any notes from the bottom octave,
Chris@173 520 // so we can just pad with zeros
Chris@173 521 minFreq *= 2;
Chris@173 522 }
Chris@173 523
Chris@298 524 int bpo = 12 *
Chris@298 525 (m_mode == LiveMode ? binsPerSemitoneLive : binsPerSemitoneNormal);
Chris@301 526
Chris@154 527 CQParameters params(processingSampleRate,
Chris@173 528 minFreq,
Chris@303 529 maxFreq,
Chris@298 530 bpo);
Chris@154 531
Chris@325 532 params.q = 0.8;
Chris@325 533 params.atomHopFactor = (m_mode == LiveMode ? 1.0 : 0.3);
Chris@154 534 params.threshold = 0.0005;
Chris@317 535 params.decimator =
Chris@317 536 (m_mode == LiveMode ?
Chris@317 537 CQParameters::FasterDecimator : CQParameters::BetterDecimator);
Chris@172 538 params.window = CQParameters::Hann;
Chris@154 539
Chris@154 540 m_cq = new CQSpectrogram(params, CQSpectrogram::InterpolateLinear);
Chris@31 541
Chris@303 542 // cerr << "CQ bins = " << m_cq->getTotalBins() << endl;
Chris@303 543 // cerr << "CQ min freq = " << m_cq->getMinFrequency() << " (and for confirmation, freq of bin 0 = " << m_cq->getBinFrequency(0) << ")" << endl;
Chris@297 544
Chris@341 545 m_colsPerSec = 50;
Chris@165 546
Chris@41 547 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
Chris@41 548 delete m_postFilter[i];
Chris@41 549 }
Chris@41 550 m_postFilter.clear();
Chris@303 551 int postFilterLength = 3;
Chris@298 552 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
Chris@303 553 m_postFilter.push_back(new MedianFilter<double>(postFilterLength));
Chris@41 554 }
Chris@41 555 m_pianoRoll.clear();
Chris@246 556 m_inputGains.clear();
Chris@32 557 m_columnCount = 0;
Chris@272 558 m_resampledCount = 0;
Chris@40 559 m_startTime = RealTime::zeroTime;
Chris@313 560 m_haveStartTime = false;
Chris@31 561 }
Chris@31 562
Chris@31 563 Silvet::FeatureSet
Chris@31 564 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
Chris@31 565 {
Chris@302 566 FeatureSet fs;
Chris@302 567
Chris@313 568 if (!m_haveStartTime) {
Chris@314 569
Chris@40 570 m_startTime = timestamp;
Chris@313 571 m_haveStartTime = true;
Chris@314 572
Chris@302 573 insertTemplateFeatures(fs);
Chris@40 574 }
Chris@246 575
Chris@246 576 vector<float> flattened(m_blockSize);
Chris@246 577 float gain = 1.f;
Chris@246 578 m_flattener->connectInputPort
Chris@246 579 (FlattenDynamics::AudioInputPort, inputBuffers[0]);
Chris@246 580 m_flattener->connectOutputPort
Chris@246 581 (FlattenDynamics::AudioOutputPort, &flattened[0]);
Chris@246 582 m_flattener->connectOutputPort
Chris@246 583 (FlattenDynamics::GainOutputPort, &gain);
Chris@246 584 m_flattener->process(m_blockSize);
Chris@246 585
Chris@252 586 m_inputGains[timestamp] = gain;
Chris@40 587
Chris@31 588 vector<double> data;
Chris@40 589 for (int i = 0; i < m_blockSize; ++i) {
Chris@246 590 double d = flattened[i];
Chris@235 591 data.push_back(d);
Chris@40 592 }
Chris@31 593
Chris@31 594 if (m_resampler) {
Chris@272 595
Chris@31 596 data = m_resampler->process(data.data(), data.size());
Chris@272 597
Chris@272 598 int hadCount = m_resampledCount;
Chris@272 599 m_resampledCount += data.size();
Chris@272 600
Chris@272 601 int resamplerLatency = m_resampler->getLatency();
Chris@272 602
Chris@272 603 if (hadCount < resamplerLatency) {
Chris@272 604 int stillToDrop = resamplerLatency - hadCount;
Chris@272 605 if (stillToDrop >= int(data.size())) {
Chris@302 606 return fs;
Chris@272 607 } else {
Chris@272 608 data = vector<double>(data.begin() + stillToDrop, data.end());
Chris@272 609 }
Chris@272 610 }
Chris@31 611 }
Chris@272 612
Chris@32 613 Grid cqout = m_cq->process(data);
Chris@302 614 transcribe(cqout, fs);
Chris@51 615 return fs;
Chris@34 616 }
Chris@34 617
Chris@34 618 Silvet::FeatureSet
Chris@34 619 Silvet::getRemainingFeatures()
Chris@34 620 {
Chris@145 621 Grid cqout = m_cq->getRemainingOutput();
Chris@302 622 FeatureSet fs;
Chris@336 623
Chris@302 624 if (m_columnCount == 0) {
Chris@302 625 // process() was never called, but we still want these
Chris@302 626 insertTemplateFeatures(fs);
Chris@302 627 } else {
Chris@336 628
Chris@336 629 // Complete the transcription
Chris@336 630
Chris@302 631 transcribe(cqout, fs);
Chris@336 632
Chris@336 633 // And make sure any extant playing notes are finished and returned
Chris@336 634
Chris@336 635 m_pianoRoll.push_back({});
Chris@336 636
Chris@336 637 auto events = noteTrack();
Chris@336 638
Chris@336 639 for (const auto &f : events.notes) {
Chris@336 640 fs[m_notesOutputNo].push_back(f);
Chris@336 641 }
Chris@336 642
Chris@336 643 for (const auto &f : events.onsets) {
Chris@336 644 fs[m_onsetsOutputNo].push_back(f);
Chris@336 645 }
Chris@336 646
Chris@336 647 for (const auto &f : events.onOffsets) {
Chris@336 648 fs[m_onOffsetsOutputNo].push_back(f);
Chris@336 649 }
Chris@302 650 }
Chris@336 651
Chris@51 652 return fs;
Chris@34 653 }
Chris@34 654
Chris@302 655 void
Chris@302 656 Silvet::insertTemplateFeatures(FeatureSet &fs)
Chris@302 657 {
Chris@302 658 const InstrumentPack &pack = getPack(m_instrument);
Chris@302 659 for (int i = 0; i < int(pack.templates.size()) * pack.templateNoteCount; ++i) {
Chris@302 660 RealTime timestamp = RealTime::fromSeconds(double(i) / m_colsPerSec);
Chris@302 661 Feature f;
Chris@302 662 char buffer[50];
Chris@302 663 sprintf(buffer, "Note %d", i + 1);
Chris@302 664 f.label = buffer;
Chris@302 665 f.hasTimestamp = true;
Chris@302 666 f.timestamp = timestamp;
Chris@302 667 f.values = pack.templates[i / pack.templateNoteCount]
Chris@302 668 .data[i % pack.templateNoteCount];
Chris@302 669 fs[m_templateOutputNo].push_back(f);
Chris@302 670 }
Chris@302 671 }
Chris@302 672
Chris@336 673 int
Chris@336 674 Silvet::getShiftCount() const
Chris@336 675 {
Chris@336 676 bool wantShifts = (m_mode == HighQualityMode) && m_fineTuning;
Chris@336 677 int shiftCount = 1;
Chris@336 678 if (wantShifts) {
Chris@336 679 const InstrumentPack &pack(getPack(m_instrument));
Chris@336 680 shiftCount = pack.templateMaxShift * 2 + 1;
Chris@336 681 }
Chris@336 682 return shiftCount;
Chris@336 683 }
Chris@336 684
Chris@302 685 void
Chris@302 686 Silvet::transcribe(const Grid &cqout, Silvet::FeatureSet &fs)
Chris@34 687 {
Chris@32 688 Grid filtered = preProcess(cqout);
Chris@31 689
Chris@302 690 if (filtered.empty()) return;
Chris@170 691
Chris@298 692 const InstrumentPack &pack(getPack(m_instrument));
Chris@104 693
Chris@325 694 int width = filtered.size();
Chris@325 695
Chris@325 696 double silenceThreshold = 0.01;
Chris@325 697
Chris@325 698 for (int i = 0; i < width; ++i) {
Chris@325 699
Chris@325 700 RealTime timestamp = getColumnTimestamp(m_pianoRoll.size() - 1 + i);
Chris@325 701 float inputGain = getInputGainAt(timestamp);
Chris@325 702
Chris@178 703 Feature f;
Chris@325 704 double rms = 0.0;
Chris@325 705
Chris@178 706 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@325 707 double v = filtered[i][j];
Chris@325 708 rms += v * v;
Chris@325 709 f.values.push_back(float(v));
Chris@178 710 }
Chris@325 711
Chris@325 712 rms = sqrt(rms / pack.templateHeight);
Chris@325 713 if (rms / inputGain < silenceThreshold) {
Chris@325 714 filtered[i].clear();
Chris@325 715 }
Chris@325 716
Chris@178 717 fs[m_fcqOutputNo].push_back(f);
Chris@178 718 }
Chris@325 719
Chris@311 720 Grid localPitches(width);
Chris@170 721
Chris@336 722 int shiftCount = getShiftCount();
Chris@336 723 bool wantShifts = (shiftCount > 1);
Chris@170 724
Chris@170 725 vector<vector<int> > localBestShifts;
Chris@170 726 if (wantShifts) {
Chris@311 727 localBestShifts = vector<vector<int> >(width);
Chris@170 728 }
Chris@170 729
Chris@317 730 int emThreadCount = MAX_EM_THREADS;
Chris@352 731 if (emThreadCount > int(std::thread::hardware_concurrency())) {
Chris@352 732 emThreadCount = std::thread::hardware_concurrency();
Chris@352 733 }
Chris@317 734 if (m_mode == LiveMode && pack.templates.size() == 1) {
Chris@317 735 // The EM step is probably not slow enough to merit it
Chris@317 736 emThreadCount = 1;
Chris@317 737 }
Chris@317 738
Chris@312 739 #if (defined(MAX_EM_THREADS) && (MAX_EM_THREADS > 1))
Chris@317 740 if (emThreadCount > 1) {
Chris@317 741 for (int i = 0; i < width; ) {
Chris@317 742 typedef future<pair<vector<double>, vector<int>>> EMFuture;
Chris@317 743 vector<EMFuture> results;
Chris@317 744 for (int j = 0; j < emThreadCount && i + j < width; ++j) {
Chris@352 745 const vector<double> &column = filtered.at(i + j);
Chris@317 746 results.push_back
Chris@317 747 (async(std::launch::async,
Chris@352 748 [&]() { return applyEM(pack, column); }));
Chris@317 749 }
Chris@317 750 for (int j = 0; j < emThreadCount && i + j < width; ++j) {
Chris@317 751 auto out = results[j].get();
Chris@317 752 localPitches[i+j] = out.first;
Chris@317 753 if (wantShifts) localBestShifts[i+j] = out.second;
Chris@317 754 }
Chris@317 755 i += emThreadCount;
Chris@312 756 }
Chris@123 757 }
Chris@312 758 #endif
Chris@317 759
Chris@317 760 if (emThreadCount == 1) {
Chris@317 761 for (int i = 0; i < width; ++i) {
Chris@336 762 auto out = applyEM(pack, filtered.at(i));
Chris@317 763 localPitches[i] = out.first;
Chris@317 764 if (wantShifts) localBestShifts[i] = out.second;
Chris@317 765 }
Chris@317 766 }
Chris@305 767
Chris@166 768 for (int i = 0; i < width; ++i) {
Chris@37 769
Chris@321 770 vector<double> filtered;
Chris@321 771
Chris@321 772 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@321 773 m_postFilter[j]->push(localPitches[i][j]);
Chris@321 774 filtered.push_back(m_postFilter[j]->get());
Chris@321 775 }
Chris@294 776
Chris@309 777 RealTime timestamp = getColumnTimestamp(m_pianoRoll.size() - 1);
Chris@309 778 float inputGain = getInputGainAt(timestamp);
Chris@309 779
Chris@294 780 Feature f;
Chris@294 781 for (int j = 0; j < (int)filtered.size(); ++j) {
Chris@309 782 float v = filtered[j];
Chris@294 783 if (v < pack.levelThreshold) v = 0.f;
Chris@309 784 f.values.push_back(v / inputGain);
Chris@294 785 }
Chris@294 786 fs[m_pitchOutputNo].push_back(f);
Chris@309 787
Chris@309 788 f.values.clear();
Chris@309 789 f.values.resize(12);
Chris@309 790 for (int j = 0; j < (int)filtered.size(); ++j) {
Chris@309 791 f.values[j % 12] += filtered[j] / inputGain;
Chris@309 792 }
Chris@309 793 fs[m_chromaOutputNo].push_back(f);
Chris@38 794
Chris@321 795 // This pushes the up-to-max-polyphony activation column to
Chris@321 796 // m_pianoRoll
Chris@336 797 postProcess(filtered, localBestShifts[i]);
Chris@321 798
Chris@336 799 auto events = noteTrack();
Chris@319 800
Chris@336 801 for (const auto &f : events.notes) {
Chris@336 802 fs[m_notesOutputNo].push_back(f);
Chris@40 803 }
Chris@319 804
Chris@336 805 for (const auto &f : events.onsets) {
Chris@336 806 fs[m_onsetsOutputNo].push_back(f);
Chris@336 807 }
Chris@336 808
Chris@336 809 for (const auto &f : events.onOffsets) {
Chris@336 810 fs[m_onOffsetsOutputNo].push_back(f);
Chris@319 811 }
Chris@34 812 }
Chris@31 813 }
Chris@31 814
Chris@311 815 pair<vector<double>, vector<int> >
Chris@311 816 Silvet::applyEM(const InstrumentPack &pack,
Chris@336 817 const vector<double> &column)
Chris@311 818 {
Chris@311 819 double columnThreshold = 1e-5;
Chris@311 820
Chris@314 821 if (m_mode == LiveMode) {
Chris@325 822 columnThreshold /= 15;
Chris@314 823 }
Chris@314 824
Chris@311 825 vector<double> pitches(pack.templateNoteCount, 0.0);
Chris@311 826 vector<int> bestShifts;
Chris@325 827
Chris@325 828 if (column.empty()) return { pitches, bestShifts };
Chris@311 829
Chris@311 830 double sum = 0.0;
Chris@311 831 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@311 832 sum += column.at(j);
Chris@311 833 }
Chris@311 834 if (sum < columnThreshold) return { pitches, bestShifts };
Chris@311 835
Chris@314 836 EM em(&pack, m_mode == HighQualityMode);
Chris@311 837
Chris@311 838 em.setPitchSparsity(pack.pitchSparsity);
Chris@311 839 em.setSourceSparsity(pack.sourceSparsity);
Chris@311 840
Chris@314 841 int iterations = (m_mode == HighQualityMode ? 20 : 10);
Chris@311 842
Chris@311 843 for (int j = 0; j < iterations; ++j) {
Chris@311 844 em.iterate(column.data());
Chris@311 845 }
Chris@311 846
Chris@311 847 const float *pitchDist = em.getPitchDistribution();
Chris@311 848 const float *const *shiftDist = em.getShifts();
Chris@311 849
Chris@336 850 int shiftCount = getShiftCount();
Chris@311 851
Chris@311 852 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@311 853
Chris@311 854 pitches[j] = pitchDist[j] * sum;
Chris@311 855
Chris@311 856 int bestShift = 0;
Chris@311 857 float bestShiftValue = 0.0;
Chris@336 858 if (shiftCount > 1) {
Chris@311 859 for (int k = 0; k < shiftCount; ++k) {
Chris@311 860 float value = shiftDist[k][j];
Chris@311 861 if (k == 0 || value > bestShiftValue) {
Chris@311 862 bestShiftValue = value;
Chris@311 863 bestShift = k;
Chris@311 864 }
Chris@311 865 }
Chris@311 866 bestShifts.push_back(bestShift);
Chris@311 867 }
Chris@311 868 }
Chris@311 869
Chris@311 870 return { pitches, bestShifts };
Chris@311 871 }
Chris@311 872
Chris@32 873 Silvet::Grid
Chris@32 874 Silvet::preProcess(const Grid &in)
Chris@32 875 {
Chris@32 876 int width = in.size();
Chris@32 877
Chris@165 878 int spacing = processingSampleRate / m_colsPerSec;
Chris@32 879
Chris@165 880 // need to be careful that col spacing is an integer number of samples!
Chris@165 881 assert(spacing * m_colsPerSec == processingSampleRate);
Chris@32 882
Chris@32 883 Grid out;
Chris@32 884
Chris@58 885 // We count the CQ latency in terms of processing hops, but
Chris@58 886 // actually it probably isn't an exact number of hops so this
Chris@58 887 // isn't quite accurate. But the small constant offset is
Chris@165 888 // practically irrelevant compared to the jitter from the frame
Chris@165 889 // size we reduce to in a moment
Chris@33 890 int latentColumns = m_cq->getLatency() / m_cq->getColumnHop();
Chris@33 891
Chris@298 892 const InstrumentPack &pack(getPack(m_instrument));
Chris@176 893
Chris@32 894 for (int i = 0; i < width; ++i) {
Chris@32 895
Chris@33 896 if (m_columnCount < latentColumns) {
Chris@33 897 ++m_columnCount;
Chris@33 898 continue;
Chris@33 899 }
Chris@33 900
Chris@32 901 int prevSampleNo = (m_columnCount - 1) * m_cq->getColumnHop();
Chris@32 902 int sampleNo = m_columnCount * m_cq->getColumnHop();
Chris@32 903
Chris@32 904 bool select = (sampleNo / spacing != prevSampleNo / spacing);
Chris@32 905
Chris@32 906 if (select) {
Chris@32 907 vector<double> inCol = in[i];
Chris@176 908 vector<double> outCol(pack.templateHeight);
Chris@32 909
Chris@178 910 // In HQ mode, the CQ returns 600 bins and we ignore the
Chris@298 911 // lowest 55 of them (assuming binsPerSemitone == 5).
Chris@178 912 //
Chris@341 913 // In live mode the CQ is an octave shorter, returning 540
Chris@341 914 // bins or equivalent, so we instead pad them with an
Chris@341 915 // additional 5 or equivalent zeros.
Chris@178 916 //
Chris@178 917 // We also need to reverse the column as we go, since the
Chris@178 918 // raw CQ has the high frequencies first and we need it
Chris@178 919 // the other way around.
Chris@32 920
Chris@298 921 int bps = (m_mode == LiveMode ?
Chris@298 922 binsPerSemitoneLive : binsPerSemitoneNormal);
Chris@298 923
Chris@297 924 if (m_mode == HighQualityMode) {
Chris@178 925 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@298 926 int ix = inCol.size() - j - (11 * bps);
Chris@178 927 outCol[j] = inCol[ix];
Chris@178 928 }
Chris@178 929 } else {
Chris@298 930 for (int j = 0; j < bps; ++j) {
Chris@178 931 outCol[j] = 0.0;
Chris@178 932 }
Chris@298 933 for (int j = bps; j < pack.templateHeight; ++j) {
Chris@298 934 int ix = inCol.size() - j + (bps-1);
Chris@178 935 outCol[j] = inCol[ix];
Chris@178 936 }
Chris@46 937 }
Chris@32 938
Chris@46 939 vector<double> noiseLevel1 =
Chris@298 940 MedianFilter<double>::filter(8 * bps, outCol);
Chris@176 941 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@46 942 noiseLevel1[j] = std::min(outCol[j], noiseLevel1[j]);
Chris@46 943 }
Chris@32 944
Chris@46 945 vector<double> noiseLevel2 =
Chris@298 946 MedianFilter<double>::filter(8 * bps, noiseLevel1);
Chris@176 947 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@46 948 outCol[j] = std::max(outCol[j] - noiseLevel2[j], 0.0);
Chris@32 949 }
Chris@32 950
Chris@165 951 out.push_back(outCol);
Chris@32 952 }
Chris@32 953
Chris@32 954 ++m_columnCount;
Chris@32 955 }
Chris@32 956
Chris@32 957 return out;
Chris@32 958 }
Chris@32 959
Chris@321 960 void
Chris@170 961 Silvet::postProcess(const vector<double> &pitches,
Chris@336 962 const vector<int> &bestShifts)
Chris@166 963 {
Chris@298 964 const InstrumentPack &pack(getPack(m_instrument));
Chris@176 965
Chris@41 966 // Threshold for level and reduce number of candidate pitches
Chris@41 967
Chris@41 968 typedef std::multimap<double, int> ValueIndexMap;
Chris@41 969
Chris@41 970 ValueIndexMap strengths;
Chris@166 971
Chris@176 972 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@321 973
Chris@321 974 double strength = pitches[j];
Chris@183 975 if (strength < pack.levelThreshold) continue;
Chris@321 976
Chris@321 977 // In live mode with only a 12-bpo CQ, we are very likely to
Chris@321 978 // get clusters of two or three high scores at a time for
Chris@321 979 // neighbouring semitones. Eliminate these by picking only the
Chris@325 980 // peaks (except that we never eliminate a note that has
Chris@325 981 // already been established as currently playing). This means
Chris@325 982 // we can't recognise actual semitone chords if they ever
Chris@325 983 // appear, but it's not as if live mode is good enough for
Chris@325 984 // that to be a big deal anyway.
Chris@321 985 if (m_mode == LiveMode) {
Chris@325 986 if (m_current.find(j) == m_current.end() &&
Chris@325 987 (j == 0 ||
Chris@325 988 j + 1 == pack.templateNoteCount ||
Chris@325 989 pitches[j] < pitches[j-1] ||
Chris@325 990 pitches[j] < pitches[j+1])) {
Chris@325 991 // not a peak or a currently-playing note: skip it
Chris@321 992 continue;
Chris@321 993 }
Chris@321 994 }
Chris@323 995
Chris@168 996 strengths.insert(ValueIndexMap::value_type(strength, j));
Chris@168 997 }
Chris@166 998
Chris@168 999 ValueIndexMap::const_iterator si = strengths.end();
Chris@167 1000
Chris@168 1001 map<int, double> active;
Chris@168 1002 map<int, int> activeShifts;
Chris@168 1003
Chris@336 1004 int shiftCount = getShiftCount();
Chris@336 1005
Chris@183 1006 while (int(active.size()) < pack.maxPolyphony && si != strengths.begin()) {
Chris@168 1007
Chris@168 1008 --si;
Chris@168 1009
Chris@168 1010 double strength = si->first;
Chris@168 1011 int j = si->second;
Chris@168 1012
Chris@168 1013 active[j] = strength;
Chris@168 1014
Chris@336 1015 if (shiftCount > 1) {
Chris@170 1016 activeShifts[j] = bestShifts[j];
Chris@167 1017 }
Chris@41 1018 }
Chris@41 1019
Chris@168 1020 m_pianoRoll.push_back(active);
Chris@170 1021
Chris@336 1022 if (shiftCount > 1) {
Chris@168 1023 m_pianoRollShifts.push_back(activeShifts);
Chris@41 1024 }
Chris@294 1025
Chris@321 1026 return;
Chris@166 1027 }
Chris@166 1028
Chris@336 1029 Silvet::FeatureChunk
Chris@336 1030 Silvet::noteTrack()
Chris@166 1031 {
Chris@41 1032 // Minimum duration pruning, and conversion to notes. We can only
Chris@41 1033 // report notes that have just ended (i.e. that are absent in the
Chris@168 1034 // latest active set but present in the prior set in the piano
Chris@41 1035 // roll) -- any notes that ended earlier will have been reported
Chris@41 1036 // already, and if they haven't ended, we don't know their
Chris@41 1037 // duration.
Chris@41 1038
Chris@168 1039 int width = m_pianoRoll.size() - 1;
Chris@168 1040
Chris@168 1041 const map<int, double> &active = m_pianoRoll[width];
Chris@41 1042
Chris@165 1043 double columnDuration = 1.0 / m_colsPerSec;
Chris@165 1044
Chris@165 1045 // only keep notes >= 100ms or thereabouts
Chris@323 1046 double durationThrSec = 0.1;
Chris@323 1047 int durationThreshold = floor(durationThrSec / columnDuration); // in cols
Chris@165 1048 if (durationThreshold < 1) durationThreshold = 1;
Chris@41 1049
Chris@336 1050 FeatureList noteFeatures, onsetFeatures, onOffsetFeatures;
Chris@41 1051
Chris@41 1052 if (width < durationThreshold + 1) {
Chris@336 1053 return { noteFeatures, onsetFeatures, onOffsetFeatures };
Chris@41 1054 }
Chris@41 1055
Chris@55 1056 for (map<int, double>::const_iterator ni = m_pianoRoll[width-1].begin();
Chris@41 1057 ni != m_pianoRoll[width-1].end(); ++ni) {
Chris@41 1058
Chris@55 1059 int note = ni->first;
Chris@41 1060
Chris@41 1061 int end = width;
Chris@41 1062 int start = end-1;
Chris@41 1063
Chris@41 1064 while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) {
Chris@41 1065 --start;
Chris@41 1066 }
Chris@41 1067 ++start;
Chris@41 1068
Chris@319 1069 int duration = end - start;
Chris@319 1070
Chris@319 1071 if (duration < durationThreshold) {
Chris@41 1072 continue;
Chris@41 1073 }
Chris@41 1074
Chris@319 1075 if (duration == durationThreshold) {
Chris@325 1076 m_current.insert(note);
Chris@336 1077 emitOnset(start, note, onsetFeatures);
Chris@336 1078 emitOnset(start, note, onOffsetFeatures);
Chris@319 1079 }
Chris@319 1080
Chris@319 1081 if (active.find(note) == active.end()) {
Chris@319 1082 // the note was playing but just ended
Chris@325 1083 m_current.erase(note);
Chris@343 1084 emitNoteAndOffset(start, end, note, noteFeatures, onOffsetFeatures);
Chris@334 1085 } else { // still playing
Chris@334 1086 // repeated note detection: if level is greater than this
Chris@334 1087 // multiple of its previous value, then we end the note and
Chris@334 1088 // restart it with the same pitch
Chris@334 1089 double restartFactor = 1.5;
Chris@334 1090 if (duration >= durationThreshold * 2 &&
Chris@334 1091 (active.find(note)->second >
Chris@334 1092 restartFactor * m_pianoRoll[width-1][note])) {
Chris@334 1093 m_current.erase(note);
Chris@343 1094 emitNoteAndOffset(start, end-1, note, noteFeatures, onOffsetFeatures);
Chris@334 1095 // and remove this so that we start counting the new
Chris@334 1096 // note's duration from the current position
Chris@334 1097 m_pianoRoll[width-1].erase(note);
Chris@334 1098 }
Chris@319 1099 }
Chris@41 1100 }
Chris@41 1101
Chris@62 1102 // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl;
Chris@41 1103
Chris@336 1104 return { noteFeatures, onsetFeatures, onOffsetFeatures };
Chris@41 1105 }
Chris@41 1106
Chris@169 1107 void
Chris@343 1108 Silvet::emitNoteAndOffset(int start, int end, int note,
Chris@343 1109 FeatureList &noteFeatures,
Chris@343 1110 FeatureList &onOffsetFeatures)
Chris@169 1111 {
Chris@343 1112 // Emit the complete note-event feature, and its offset. We have
Chris@343 1113 // already emitted the note onset when it started -- that process
Chris@343 1114 // is separated out in order to get a faster response during live
Chris@343 1115 // tracking. However, if the note shift changes within the note
Chris@343 1116 // (which can happen only if we have fine-tuning switched on), we
Chris@343 1117 // emit an offset and then a new onset with the new shift.
Chris@343 1118
Chris@169 1119 int partStart = start;
Chris@169 1120 int partShift = 0;
Chris@320 1121 double partStrength = 0;
Chris@169 1122
Chris@343 1123 // NB this *must* be less than durationThreshold above
Chris@252 1124 int partThreshold = floor(0.05 * m_colsPerSec);
Chris@169 1125
Chris@169 1126 for (int i = start; i != end; ++i) {
Chris@169 1127
Chris@169 1128 double strength = m_pianoRoll[i][note];
Chris@169 1129
Chris@169 1130 int shift = 0;
Chris@169 1131
Chris@336 1132 if (getShiftCount() > 1) {
Chris@169 1133
Chris@169 1134 shift = m_pianoRollShifts[i][note];
Chris@169 1135
Chris@169 1136 if (i == partStart) {
Chris@169 1137 partShift = shift;
Chris@169 1138 }
Chris@169 1139
Chris@169 1140 if (i > partStart + partThreshold && shift != partShift) {
Chris@169 1141
Chris@169 1142 // pitch has changed, emit an intermediate note
Chris@252 1143 noteFeatures.push_back(makeNoteFeature(partStart,
Chris@252 1144 i,
Chris@252 1145 note,
Chris@252 1146 partShift,
Chris@320 1147 partStrength));
Chris@343 1148
Chris@343 1149 onOffsetFeatures.push_back(makeOffsetFeature(i,
Chris@343 1150 note,
Chris@343 1151 partShift));
Chris@343 1152
Chris@169 1153 partStart = i;
Chris@169 1154 partShift = shift;
Chris@343 1155
Chris@343 1156 onOffsetFeatures.push_back(makeOnsetFeature(i,
Chris@343 1157 note,
Chris@343 1158 partShift,
Chris@343 1159 partStrength));
Chris@343 1160
Chris@320 1161 partStrength = 0;
Chris@169 1162 }
Chris@169 1163 }
Chris@169 1164
Chris@320 1165 if (strength > partStrength) {
Chris@320 1166 partStrength = strength;
Chris@169 1167 }
Chris@169 1168 }
Chris@169 1169
Chris@169 1170 if (end >= partStart + partThreshold) {
Chris@343 1171
Chris@252 1172 noteFeatures.push_back(makeNoteFeature(partStart,
Chris@252 1173 end,
Chris@252 1174 note,
Chris@252 1175 partShift,
Chris@320 1176 partStrength));
Chris@343 1177
Chris@343 1178 onOffsetFeatures.push_back(makeOffsetFeature(end,
Chris@343 1179 note,
Chris@343 1180 partShift));
Chris@349 1181
Chris@349 1182 } else if (partStart > start) {
Chris@349 1183
Chris@349 1184 // we have emitted an onset for this, so must add an offset
Chris@349 1185 onOffsetFeatures.push_back(makeOffsetFeature(end,
Chris@349 1186 note,
Chris@349 1187 partShift));
Chris@169 1188 }
Chris@169 1189 }
Chris@252 1190
Chris@319 1191 void
Chris@336 1192 Silvet::emitOnset(int start, int note, FeatureList &onOffsetFeatures)
Chris@319 1193 {
Chris@319 1194 int len = int(m_pianoRoll.size());
Chris@320 1195
Chris@320 1196 double onsetStrength = 0;
Chris@319 1197
Chris@319 1198 int shift = 0;
Chris@336 1199 if (getShiftCount() > 1) {
Chris@319 1200 shift = m_pianoRollShifts[start][note];
Chris@319 1201 }
Chris@319 1202
Chris@319 1203 for (int i = start; i < len; ++i) {
Chris@319 1204 double strength = m_pianoRoll[i][note];
Chris@320 1205 if (strength > onsetStrength) {
Chris@320 1206 onsetStrength = strength;
Chris@319 1207 }
Chris@319 1208 }
Chris@319 1209
Chris@336 1210 if (onsetStrength == 0) return;
Chris@336 1211
Chris@336 1212 onOffsetFeatures.push_back(makeOnsetFeature(start,
Chris@336 1213 note,
Chris@336 1214 shift,
Chris@336 1215 onsetStrength));
Chris@336 1216 }
Chris@336 1217
Chris@309 1218 RealTime
Chris@309 1219 Silvet::getColumnTimestamp(int column)
Chris@309 1220 {
Chris@309 1221 double columnDuration = 1.0 / m_colsPerSec;
Chris@309 1222 int postFilterLatency = int(m_postFilter[0]->getSize() / 2);
Chris@309 1223
Chris@309 1224 return m_startTime + RealTime::fromSeconds
Chris@309 1225 (columnDuration * (column - postFilterLatency) + 0.02);
Chris@309 1226 }
Chris@309 1227
Chris@252 1228 Silvet::Feature
Chris@252 1229 Silvet::makeNoteFeature(int start,
Chris@252 1230 int end,
Chris@252 1231 int note,
Chris@252 1232 int shift,
Chris@320 1233 double strength)
Chris@252 1234 {
Chris@252 1235 Feature f;
Chris@252 1236
Chris@252 1237 f.hasTimestamp = true;
Chris@309 1238 f.timestamp = getColumnTimestamp(start);
Chris@252 1239
Chris@252 1240 f.hasDuration = true;
Chris@309 1241 f.duration = getColumnTimestamp(end) - f.timestamp;
Chris@252 1242
Chris@252 1243 f.values.clear();
Chris@336 1244 f.values.push_back(getNoteFrequency(note, shift));
Chris@320 1245 f.values.push_back(getVelocityFor(strength, start));
Chris@252 1246
Chris@336 1247 f.label = getNoteName(note, shift);
Chris@252 1248
Chris@252 1249 return f;
Chris@252 1250 }
Chris@252 1251
Chris@319 1252 Silvet::Feature
Chris@319 1253 Silvet::makeOnsetFeature(int start,
Chris@319 1254 int note,
Chris@319 1255 int shift,
Chris@320 1256 double strength)
Chris@319 1257 {
Chris@319 1258 Feature f;
Chris@319 1259
Chris@319 1260 f.hasTimestamp = true;
Chris@319 1261 f.timestamp = getColumnTimestamp(start);
Chris@319 1262
Chris@319 1263 f.hasDuration = false;
Chris@319 1264
Chris@319 1265 f.values.clear();
Chris@336 1266 f.values.push_back(getNoteFrequency(note, shift));
Chris@320 1267 f.values.push_back(getVelocityFor(strength, start));
Chris@319 1268
Chris@336 1269 f.label = getNoteName(note, shift);
Chris@336 1270
Chris@336 1271 return f;
Chris@336 1272 }
Chris@336 1273
Chris@336 1274 Silvet::Feature
Chris@336 1275 Silvet::makeOffsetFeature(int col,
Chris@336 1276 int note,
Chris@336 1277 int shift)
Chris@336 1278 {
Chris@336 1279 Feature f;
Chris@336 1280
Chris@336 1281 f.hasTimestamp = true;
Chris@336 1282 f.timestamp = getColumnTimestamp(col);
Chris@336 1283
Chris@336 1284 f.hasDuration = false;
Chris@336 1285
Chris@336 1286 f.values.clear();
Chris@336 1287 f.values.push_back(getNoteFrequency(note, shift));
Chris@336 1288 f.values.push_back(0); // velocity 0 for offset
Chris@336 1289
Chris@336 1290 f.label = getNoteName(note, shift) + " off";
Chris@319 1291
Chris@319 1292 return f;
Chris@319 1293 }
Chris@319 1294
Chris@320 1295 int
Chris@320 1296 Silvet::getVelocityFor(double strength, int column)
Chris@320 1297 {
Chris@320 1298 RealTime rt = getColumnTimestamp(column + 1);
Chris@320 1299
Chris@320 1300 float inputGain = getInputGainAt(rt);
Chris@320 1301
Chris@320 1302 double scale = 2.0;
Chris@320 1303 if (m_mode == LiveMode) scale = 20.0;
Chris@320 1304
Chris@320 1305 double velocity = round((strength * scale) / inputGain);
Chris@320 1306
Chris@320 1307 if (velocity > 127.0) velocity = 127.0;
Chris@320 1308 if (velocity < 1.0) velocity = 1.0; // assume surpassed 0 threshold already
Chris@320 1309
Chris@320 1310 return int(velocity);
Chris@320 1311 }
Chris@320 1312
Chris@252 1313 float
Chris@252 1314 Silvet::getInputGainAt(RealTime t)
Chris@252 1315 {
Chris@252 1316 map<RealTime, float>::const_iterator i = m_inputGains.lower_bound(t);
Chris@252 1317
Chris@252 1318 if (i == m_inputGains.end()) {
Chris@252 1319 if (i != m_inputGains.begin()) {
Chris@252 1320 --i;
Chris@252 1321 } else {
Chris@252 1322 return 1.f; // no data
Chris@252 1323 }
Chris@252 1324 }
Chris@252 1325
Chris@252 1326 // cerr << "gain at time " << t << " = " << i->second << endl;
Chris@252 1327
Chris@252 1328 return i->second;
Chris@252 1329 }
Chris@252 1330