annotate src/Silvet.cpp @ 334:806b2ea65416 livemode

Detect repeated notes
author Chris Cannam
date Thu, 25 Jun 2015 14:18:44 +0100
parents 19c17cd0c7d8
children d861f86f2b17
rev   line source
Chris@31 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@31 2
Chris@31 3 /*
Chris@31 4 Silvet
Chris@31 5
Chris@31 6 A Vamp plugin for note transcription.
Chris@31 7 Centre for Digital Music, Queen Mary University of London.
Chris@31 8
Chris@31 9 This program is free software; you can redistribute it and/or
Chris@31 10 modify it under the terms of the GNU General Public License as
Chris@31 11 published by the Free Software Foundation; either version 2 of the
Chris@31 12 License, or (at your option) any later version. See the file
Chris@31 13 COPYING included with this distribution for more information.
Chris@31 14 */
Chris@31 15
Chris@31 16 #include "Silvet.h"
Chris@34 17 #include "EM.h"
Chris@31 18
Chris@152 19 #include <cq/CQSpectrogram.h>
Chris@31 20
Chris@152 21 #include "MedianFilter.h"
Chris@152 22 #include "constant-q-cpp/src/dsp/Resampler.h"
Chris@246 23 #include "flattendynamics-ladspa.h"
Chris@298 24 #include "LiveInstruments.h"
Chris@31 25
Chris@31 26 #include <vector>
Chris@312 27 #include <future>
Chris@31 28
Chris@32 29 #include <cstdio>
Chris@32 30
Chris@31 31 using std::vector;
Chris@48 32 using std::cout;
Chris@31 33 using std::cerr;
Chris@31 34 using std::endl;
Chris@311 35 using std::pair;
Chris@312 36 using std::future;
Chris@312 37 using std::async;
Chris@40 38 using Vamp::RealTime;
Chris@31 39
Chris@31 40 static int processingSampleRate = 44100;
Chris@298 41
Chris@298 42 static int binsPerSemitoneLive = 1;
Chris@298 43 static int binsPerSemitoneNormal = 5;
Chris@170 44
Chris@272 45 static int minInputSampleRate = 100;
Chris@272 46 static int maxInputSampleRate = 192000;
Chris@272 47
Chris@316 48 static const Silvet::ProcessingMode defaultMode = Silvet::HighQualityMode;
Chris@316 49
Chris@31 50 Silvet::Silvet(float inputSampleRate) :
Chris@31 51 Plugin(inputSampleRate),
Chris@161 52 m_instruments(InstrumentPack::listInstrumentPacks()),
Chris@298 53 m_liveInstruments(LiveAdapter::adaptAll(m_instruments)),
Chris@31 54 m_resampler(0),
Chris@246 55 m_flattener(0),
Chris@110 56 m_cq(0),
Chris@316 57 m_mode(defaultMode),
Chris@166 58 m_fineTuning(false),
Chris@178 59 m_instrument(0),
Chris@313 60 m_colsPerSec(50),
Chris@313 61 m_haveStartTime(false)
Chris@31 62 {
Chris@31 63 }
Chris@31 64
Chris@31 65 Silvet::~Silvet()
Chris@31 66 {
Chris@31 67 delete m_resampler;
Chris@246 68 delete m_flattener;
Chris@31 69 delete m_cq;
Chris@41 70 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
Chris@41 71 delete m_postFilter[i];
Chris@41 72 }
Chris@31 73 }
Chris@31 74
Chris@31 75 string
Chris@31 76 Silvet::getIdentifier() const
Chris@31 77 {
Chris@31 78 return "silvet";
Chris@31 79 }
Chris@31 80
Chris@31 81 string
Chris@31 82 Silvet::getName() const
Chris@31 83 {
Chris@31 84 return "Silvet Note Transcription";
Chris@31 85 }
Chris@31 86
Chris@31 87 string
Chris@31 88 Silvet::getDescription() const
Chris@31 89 {
Chris@191 90 return "Estimate the note onsets, pitches, and durations that make up a music recording.";
Chris@31 91 }
Chris@31 92
Chris@31 93 string
Chris@31 94 Silvet::getMaker() const
Chris@31 95 {
Chris@191 96 return "Queen Mary, University of London";
Chris@31 97 }
Chris@31 98
Chris@31 99 int
Chris@31 100 Silvet::getPluginVersion() const
Chris@31 101 {
Chris@309 102 return 3;
Chris@31 103 }
Chris@31 104
Chris@31 105 string
Chris@31 106 Silvet::getCopyright() const
Chris@31 107 {
Chris@191 108 return "Method by Emmanouil Benetos and Simon Dixon; plugin by Chris Cannam and Emmanouil Benetos. GPL licence.";
Chris@31 109 }
Chris@31 110
Chris@31 111 Silvet::InputDomain
Chris@31 112 Silvet::getInputDomain() const
Chris@31 113 {
Chris@31 114 return TimeDomain;
Chris@31 115 }
Chris@31 116
Chris@31 117 size_t
Chris@31 118 Silvet::getPreferredBlockSize() const
Chris@31 119 {
Chris@31 120 return 0;
Chris@31 121 }
Chris@31 122
Chris@31 123 size_t
Chris@31 124 Silvet::getPreferredStepSize() const
Chris@31 125 {
Chris@31 126 return 0;
Chris@31 127 }
Chris@31 128
Chris@31 129 size_t
Chris@31 130 Silvet::getMinChannelCount() const
Chris@31 131 {
Chris@31 132 return 1;
Chris@31 133 }
Chris@31 134
Chris@31 135 size_t
Chris@31 136 Silvet::getMaxChannelCount() const
Chris@31 137 {
Chris@31 138 return 1;
Chris@31 139 }
Chris@31 140
Chris@31 141 Silvet::ParameterList
Chris@31 142 Silvet::getParameterDescriptors() const
Chris@31 143 {
Chris@31 144 ParameterList list;
Chris@110 145
Chris@110 146 ParameterDescriptor desc;
Chris@110 147 desc.identifier = "mode";
Chris@110 148 desc.name = "Processing mode";
Chris@110 149 desc.unit = "";
Chris@297 150 desc.description = "Sets the tradeoff of processing speed against transcription quality. Draft mode is tuned in favour of overall speed; Live mode is tuned in favour of lower latency; while Intensive mode (the default) will almost always produce the best results.";
Chris@110 151 desc.minValue = 0;
Chris@297 152 desc.maxValue = 2;
Chris@316 153 desc.defaultValue = int(defaultMode);
Chris@110 154 desc.isQuantized = true;
Chris@110 155 desc.quantizeStep = 1;
Chris@166 156 desc.valueNames.push_back("Draft (faster)");
Chris@165 157 desc.valueNames.push_back("Intensive (higher quality)");
Chris@297 158 desc.valueNames.push_back("Live (lower latency)");
Chris@161 159 list.push_back(desc);
Chris@161 160
Chris@176 161 desc.identifier = "instrument";
Chris@176 162 desc.name = "Instrument";
Chris@161 163 desc.unit = "";
Chris@271 164 desc.description = "The instrument or instruments known to be present in the recording. This affects the set of instrument templates used, as well as the expected level of polyphony in the output. Using a more limited set of instruments than the default will also make the plugin run faster.\nNote that this plugin cannot isolate instruments: you can't use this setting to request notes from only one instrument in a recording with several. Instead, use this as a hint to the plugin about which instruments are actually present.";
Chris@161 165 desc.minValue = 0;
Chris@162 166 desc.maxValue = m_instruments.size()-1;
Chris@162 167 desc.defaultValue = 0;
Chris@161 168 desc.isQuantized = true;
Chris@161 169 desc.quantizeStep = 1;
Chris@161 170 desc.valueNames.clear();
Chris@162 171 for (int i = 0; i < int(m_instruments.size()); ++i) {
Chris@162 172 desc.valueNames.push_back(m_instruments[i].name);
Chris@162 173 }
Chris@166 174 list.push_back(desc);
Chris@161 175
Chris@166 176 desc.identifier = "finetune";
Chris@166 177 desc.name = "Return fine pitch estimates";
Chris@166 178 desc.unit = "";
Chris@271 179 desc.description = "Return pitch estimates at finer than semitone resolution. This works only in Intensive mode. Notes that appear to drift in pitch will be split up into shorter notes with individually finer pitches.";
Chris@166 180 desc.minValue = 0;
Chris@166 181 desc.maxValue = 1;
Chris@166 182 desc.defaultValue = 0;
Chris@166 183 desc.isQuantized = true;
Chris@166 184 desc.quantizeStep = 1;
Chris@166 185 desc.valueNames.clear();
Chris@110 186 list.push_back(desc);
Chris@110 187
Chris@31 188 return list;
Chris@31 189 }
Chris@31 190
Chris@31 191 float
Chris@31 192 Silvet::getParameter(string identifier) const
Chris@31 193 {
Chris@110 194 if (identifier == "mode") {
Chris@297 195 return (float)(int)m_mode;
Chris@166 196 } else if (identifier == "finetune") {
Chris@166 197 return m_fineTuning ? 1.f : 0.f;
Chris@176 198 } else if (identifier == "instrument") {
Chris@162 199 return m_instrument;
Chris@110 200 }
Chris@31 201 return 0;
Chris@31 202 }
Chris@31 203
Chris@31 204 void
Chris@31 205 Silvet::setParameter(string identifier, float value)
Chris@31 206 {
Chris@110 207 if (identifier == "mode") {
Chris@297 208 m_mode = (ProcessingMode)(int)(value + 0.5);
Chris@166 209 } else if (identifier == "finetune") {
Chris@166 210 m_fineTuning = (value > 0.5);
Chris@176 211 } else if (identifier == "instrument") {
Chris@162 212 m_instrument = lrintf(value);
Chris@110 213 }
Chris@31 214 }
Chris@31 215
Chris@31 216 Silvet::ProgramList
Chris@31 217 Silvet::getPrograms() const
Chris@31 218 {
Chris@31 219 ProgramList list;
Chris@31 220 return list;
Chris@31 221 }
Chris@31 222
Chris@31 223 string
Chris@31 224 Silvet::getCurrentProgram() const
Chris@31 225 {
Chris@31 226 return "";
Chris@31 227 }
Chris@31 228
Chris@31 229 void
Chris@31 230 Silvet::selectProgram(string name)
Chris@31 231 {
Chris@31 232 }
Chris@31 233
Chris@31 234 Silvet::OutputList
Chris@31 235 Silvet::getOutputDescriptors() const
Chris@31 236 {
Chris@31 237 OutputList list;
Chris@31 238
Chris@31 239 OutputDescriptor d;
Chris@51 240 d.identifier = "notes";
Chris@51 241 d.name = "Note transcription";
Chris@329 242 d.description = "Overall note transcription. Each note has time, duration, estimated fundamental frequency, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture.";
Chris@41 243 d.unit = "Hz";
Chris@31 244 d.hasFixedBinCount = true;
Chris@31 245 d.binCount = 2;
Chris@41 246 d.binNames.push_back("Frequency");
Chris@31 247 d.binNames.push_back("Velocity");
Chris@31 248 d.hasKnownExtents = false;
Chris@31 249 d.isQuantized = false;
Chris@31 250 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@246 251 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
Chris@31 252 d.hasDuration = true;
Chris@32 253 m_notesOutputNo = list.size();
Chris@32 254 list.push_back(d);
Chris@32 255
Chris@319 256 d.identifier = "onsets";
Chris@319 257 d.name = "Note onsets";
Chris@323 258 d.description = "Note onsets, without durations. These can be calculated sooner than complete notes, because it isn't necessary to wait for a note to finish before returning its feature. Each event has time, estimated fundamental frequency in Hz, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture.";
Chris@319 259 d.unit = "Hz";
Chris@319 260 d.hasFixedBinCount = true;
Chris@319 261 d.binCount = 2;
Chris@319 262 d.binNames.push_back("Frequency");
Chris@319 263 d.binNames.push_back("Velocity");
Chris@319 264 d.hasKnownExtents = false;
Chris@319 265 d.isQuantized = false;
Chris@319 266 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@319 267 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
Chris@319 268 d.hasDuration = false;
Chris@319 269 m_onsetsOutputNo = list.size();
Chris@319 270 list.push_back(d);
Chris@319 271
Chris@178 272 d.identifier = "timefreq";
Chris@178 273 d.name = "Time-frequency distribution";
Chris@271 274 d.description = "Filtered constant-Q time-frequency distribution as used as input to the expectation-maximisation algorithm.";
Chris@178 275 d.unit = "";
Chris@178 276 d.hasFixedBinCount = true;
Chris@298 277 d.binCount = getPack(0).templateHeight;
Chris@178 278 d.binNames.clear();
Chris@178 279 if (m_cq) {
Chris@294 280 char name[50];
Chris@298 281 for (int i = 0; i < getPack(0).templateHeight; ++i) {
Chris@178 282 // We have a 600-bin (10 oct 60-bin CQ) of which the
Chris@178 283 // lowest-frequency 55 bins have been dropped, for a
Chris@178 284 // 545-bin template. The native CQ bins go high->low
Chris@178 285 // frequency though, so these are still the first 545 bins
Chris@178 286 // as reported by getBinFrequency, though in reverse order
Chris@178 287 float freq = m_cq->getBinFrequency
Chris@298 288 (getPack(0).templateHeight - i - 1);
Chris@178 289 sprintf(name, "%.1f Hz", freq);
Chris@178 290 d.binNames.push_back(name);
Chris@178 291 }
Chris@178 292 }
Chris@178 293 d.hasKnownExtents = false;
Chris@178 294 d.isQuantized = false;
Chris@178 295 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@178 296 d.sampleRate = m_colsPerSec;
Chris@178 297 d.hasDuration = false;
Chris@178 298 m_fcqOutputNo = list.size();
Chris@178 299 list.push_back(d);
Chris@178 300
Chris@294 301 d.identifier = "pitchactivation";
Chris@294 302 d.name = "Pitch activation distribution";
Chris@294 303 d.description = "Pitch activation distribution resulting from expectation-maximisation algorithm, prior to note extraction.";
Chris@294 304 d.unit = "";
Chris@294 305 d.hasFixedBinCount = true;
Chris@298 306 d.binCount = getPack(0).templateNoteCount;
Chris@294 307 d.binNames.clear();
Chris@294 308 if (m_cq) {
Chris@298 309 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
Chris@320 310 d.binNames.push_back(getNoteName(i, 0, 1));
Chris@294 311 }
Chris@294 312 }
Chris@294 313 d.hasKnownExtents = false;
Chris@294 314 d.isQuantized = false;
Chris@294 315 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@294 316 d.sampleRate = m_colsPerSec;
Chris@294 317 d.hasDuration = false;
Chris@294 318 m_pitchOutputNo = list.size();
Chris@294 319 list.push_back(d);
Chris@294 320
Chris@309 321 d.identifier = "chroma";
Chris@309 322 d.name = "Pitch chroma distribution";
Chris@309 323 d.description = "Pitch chroma distribution formed by wrapping the un-thresholded pitch activation distribution into a single octave of semitone bins.";
Chris@309 324 d.unit = "";
Chris@309 325 d.hasFixedBinCount = true;
Chris@309 326 d.binCount = 12;
Chris@309 327 d.binNames.clear();
Chris@309 328 if (m_cq) {
Chris@309 329 for (int i = 0; i < 12; ++i) {
Chris@320 330 d.binNames.push_back(getChromaName(i));
Chris@309 331 }
Chris@309 332 }
Chris@309 333 d.hasKnownExtents = false;
Chris@309 334 d.isQuantized = false;
Chris@309 335 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@309 336 d.sampleRate = m_colsPerSec;
Chris@309 337 d.hasDuration = false;
Chris@309 338 m_chromaOutputNo = list.size();
Chris@309 339 list.push_back(d);
Chris@309 340
Chris@302 341 d.identifier = "templates";
Chris@302 342 d.name = "Templates";
Chris@302 343 d.description = "Constant-Q spectral templates for the selected instrument pack.";
Chris@302 344 d.unit = "";
Chris@302 345 d.hasFixedBinCount = true;
Chris@302 346 d.binCount = getPack(0).templateHeight;
Chris@302 347 d.binNames.clear();
Chris@302 348 if (m_cq) {
Chris@302 349 char name[50];
Chris@302 350 for (int i = 0; i < getPack(0).templateHeight; ++i) {
Chris@302 351 // We have a 600-bin (10 oct 60-bin CQ) of which the
Chris@302 352 // lowest-frequency 55 bins have been dropped, for a
Chris@302 353 // 545-bin template. The native CQ bins go high->low
Chris@302 354 // frequency though, so these are still the first 545 bins
Chris@302 355 // as reported by getBinFrequency, though in reverse order
Chris@302 356 float freq = m_cq->getBinFrequency
Chris@302 357 (getPack(0).templateHeight - i - 1);
Chris@302 358 sprintf(name, "%.1f Hz", freq);
Chris@302 359 d.binNames.push_back(name);
Chris@302 360 }
Chris@302 361 }
Chris@302 362 d.hasKnownExtents = false;
Chris@302 363 d.isQuantized = false;
Chris@302 364 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@302 365 d.sampleRate = m_colsPerSec;
Chris@302 366 d.hasDuration = false;
Chris@302 367 m_templateOutputNo = list.size();
Chris@302 368 list.push_back(d);
Chris@302 369
Chris@31 370 return list;
Chris@31 371 }
Chris@31 372
Chris@38 373 std::string
Chris@320 374 Silvet::getChromaName(int pitch) const
Chris@38 375 {
Chris@38 376 static const char *names[] = {
Chris@38 377 "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"
Chris@38 378 };
Chris@38 379
Chris@309 380 return names[pitch];
Chris@309 381 }
Chris@309 382
Chris@309 383 std::string
Chris@320 384 Silvet::getNoteName(int note, int shift, int shiftCount) const
Chris@309 385 {
Chris@320 386 string n = getChromaName(note % 12);
Chris@38 387
Chris@175 388 int oct = (note + 9) / 12;
Chris@38 389
Chris@175 390 char buf[30];
Chris@175 391
Chris@175 392 float pshift = 0.f;
Chris@175 393 if (shiftCount > 1) {
Chris@320 394 // see getNoteFrequency below
Chris@175 395 pshift =
Chris@175 396 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
Chris@175 397 }
Chris@175 398
Chris@175 399 if (pshift > 0.f) {
Chris@309 400 sprintf(buf, "%s%d+%dc", n.c_str(), oct, int(round(pshift * 100)));
Chris@175 401 } else if (pshift < 0.f) {
Chris@309 402 sprintf(buf, "%s%d-%dc", n.c_str(), oct, int(round((-pshift) * 100)));
Chris@175 403 } else {
Chris@309 404 sprintf(buf, "%s%d", n.c_str(), oct);
Chris@175 405 }
Chris@38 406
Chris@38 407 return buf;
Chris@38 408 }
Chris@38 409
Chris@41 410 float
Chris@320 411 Silvet::getNoteFrequency(int note, int shift, int shiftCount) const
Chris@41 412 {
Chris@169 413 // Convert shift number to a pitch shift. The given shift number
Chris@169 414 // is an offset into the template array, which starts with some
Chris@169 415 // zeros, followed by the template, then some trailing zeros.
Chris@169 416 //
Chris@169 417 // Example: if we have templateMaxShift == 2 and thus shiftCount
Chris@169 418 // == 5, then the number will be in the range 0-4 and the template
Chris@169 419 // will have 2 zeros at either end. Thus number 2 represents the
Chris@169 420 // template "as recorded", for a pitch shift of 0; smaller indices
Chris@169 421 // represent moving the template *up* in pitch (by introducing
Chris@169 422 // zeros at the start, which is the low-frequency end), for a
Chris@169 423 // positive pitch shift; and higher values represent moving it
Chris@169 424 // down in pitch, for a negative pitch shift.
Chris@169 425
Chris@175 426 float pshift = 0.f;
Chris@175 427 if (shiftCount > 1) {
Chris@175 428 pshift =
Chris@175 429 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
Chris@175 430 }
Chris@169 431
Chris@301 432 float freq = float(27.5 * pow(2.0, (note + pshift) / 12.0));
Chris@301 433
Chris@303 434 // cerr << "note = " << note << ", shift = " << shift << ", shiftCount = "
Chris@303 435 // << shiftCount << ", obtained freq = " << freq << endl;
Chris@301 436
Chris@301 437 return freq;
Chris@41 438 }
Chris@41 439
Chris@31 440 bool
Chris@31 441 Silvet::initialise(size_t channels, size_t stepSize, size_t blockSize)
Chris@31 442 {
Chris@272 443 if (m_inputSampleRate < minInputSampleRate ||
Chris@272 444 m_inputSampleRate > maxInputSampleRate) {
Chris@272 445 cerr << "Silvet::initialise: Unsupported input sample rate "
Chris@272 446 << m_inputSampleRate << " (supported min " << minInputSampleRate
Chris@272 447 << ", max " << maxInputSampleRate << ")" << endl;
Chris@272 448 return false;
Chris@272 449 }
Chris@272 450
Chris@31 451 if (channels < getMinChannelCount() ||
Chris@272 452 channels > getMaxChannelCount()) {
Chris@272 453 cerr << "Silvet::initialise: Unsupported channel count " << channels
Chris@272 454 << " (supported min " << getMinChannelCount() << ", max "
Chris@272 455 << getMaxChannelCount() << ")" << endl;
Chris@272 456 return false;
Chris@272 457 }
Chris@31 458
Chris@31 459 if (stepSize != blockSize) {
Chris@31 460 cerr << "Silvet::initialise: Step size must be the same as block size ("
Chris@31 461 << stepSize << " != " << blockSize << ")" << endl;
Chris@31 462 return false;
Chris@31 463 }
Chris@31 464
Chris@31 465 m_blockSize = blockSize;
Chris@31 466
Chris@31 467 reset();
Chris@31 468
Chris@31 469 return true;
Chris@31 470 }
Chris@31 471
Chris@31 472 void
Chris@31 473 Silvet::reset()
Chris@31 474 {
Chris@31 475 delete m_resampler;
Chris@246 476 delete m_flattener;
Chris@31 477 delete m_cq;
Chris@31 478
Chris@31 479 if (m_inputSampleRate != processingSampleRate) {
Chris@31 480 m_resampler = new Resampler(m_inputSampleRate, processingSampleRate);
Chris@31 481 } else {
Chris@31 482 m_resampler = 0;
Chris@31 483 }
Chris@31 484
Chris@246 485 m_flattener = new FlattenDynamics(m_inputSampleRate); // before resampling
Chris@246 486 m_flattener->reset();
Chris@246 487
Chris@301 488 // this happens to be processingSampleRate / 3, and is the top
Chris@301 489 // freq used for the EM templates:
Chris@301 490 double maxFreq = 14700;
Chris@301 491
Chris@301 492 if (m_mode == LiveMode) {
Chris@301 493 // We only have 12 bpo rather than 60, so we need the top bin
Chris@301 494 // to be the middle one of the top 5, i.e. 2/5 of a semitone
Chris@301 495 // lower than 14700
Chris@301 496 maxFreq *= powf(2.0, -1.0 / 30.0);
Chris@301 497 }
Chris@301 498
Chris@173 499 double minFreq = 27.5;
Chris@173 500
Chris@297 501 if (m_mode != HighQualityMode) {
Chris@173 502 // We don't actually return any notes from the bottom octave,
Chris@173 503 // so we can just pad with zeros
Chris@173 504 minFreq *= 2;
Chris@173 505 }
Chris@173 506
Chris@298 507 int bpo = 12 *
Chris@298 508 (m_mode == LiveMode ? binsPerSemitoneLive : binsPerSemitoneNormal);
Chris@301 509
Chris@154 510 CQParameters params(processingSampleRate,
Chris@173 511 minFreq,
Chris@303 512 maxFreq,
Chris@298 513 bpo);
Chris@154 514
Chris@325 515 params.q = 0.8;
Chris@325 516 params.atomHopFactor = (m_mode == LiveMode ? 1.0 : 0.3);
Chris@154 517 params.threshold = 0.0005;
Chris@317 518 params.decimator =
Chris@317 519 (m_mode == LiveMode ?
Chris@317 520 CQParameters::FasterDecimator : CQParameters::BetterDecimator);
Chris@172 521 params.window = CQParameters::Hann;
Chris@154 522
Chris@154 523 m_cq = new CQSpectrogram(params, CQSpectrogram::InterpolateLinear);
Chris@31 524
Chris@303 525 // cerr << "CQ bins = " << m_cq->getTotalBins() << endl;
Chris@303 526 // cerr << "CQ min freq = " << m_cq->getMinFrequency() << " (and for confirmation, freq of bin 0 = " << m_cq->getBinFrequency(0) << ")" << endl;
Chris@297 527
Chris@297 528 m_colsPerSec = (m_mode == DraftMode ? 25 : 50);
Chris@165 529
Chris@41 530 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
Chris@41 531 delete m_postFilter[i];
Chris@41 532 }
Chris@41 533 m_postFilter.clear();
Chris@303 534 int postFilterLength = 3;
Chris@298 535 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
Chris@303 536 m_postFilter.push_back(new MedianFilter<double>(postFilterLength));
Chris@41 537 }
Chris@41 538 m_pianoRoll.clear();
Chris@246 539 m_inputGains.clear();
Chris@32 540 m_columnCount = 0;
Chris@272 541 m_resampledCount = 0;
Chris@40 542 m_startTime = RealTime::zeroTime;
Chris@313 543 m_haveStartTime = false;
Chris@31 544 }
Chris@31 545
Chris@31 546 Silvet::FeatureSet
Chris@31 547 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
Chris@31 548 {
Chris@302 549 FeatureSet fs;
Chris@302 550
Chris@313 551 if (!m_haveStartTime) {
Chris@314 552
Chris@40 553 m_startTime = timestamp;
Chris@313 554 m_haveStartTime = true;
Chris@314 555
Chris@302 556 insertTemplateFeatures(fs);
Chris@40 557 }
Chris@246 558
Chris@246 559 vector<float> flattened(m_blockSize);
Chris@246 560 float gain = 1.f;
Chris@246 561 m_flattener->connectInputPort
Chris@246 562 (FlattenDynamics::AudioInputPort, inputBuffers[0]);
Chris@246 563 m_flattener->connectOutputPort
Chris@246 564 (FlattenDynamics::AudioOutputPort, &flattened[0]);
Chris@246 565 m_flattener->connectOutputPort
Chris@246 566 (FlattenDynamics::GainOutputPort, &gain);
Chris@246 567 m_flattener->process(m_blockSize);
Chris@246 568
Chris@252 569 m_inputGains[timestamp] = gain;
Chris@40 570
Chris@31 571 vector<double> data;
Chris@40 572 for (int i = 0; i < m_blockSize; ++i) {
Chris@246 573 double d = flattened[i];
Chris@235 574 data.push_back(d);
Chris@40 575 }
Chris@31 576
Chris@31 577 if (m_resampler) {
Chris@272 578
Chris@31 579 data = m_resampler->process(data.data(), data.size());
Chris@272 580
Chris@272 581 int hadCount = m_resampledCount;
Chris@272 582 m_resampledCount += data.size();
Chris@272 583
Chris@272 584 int resamplerLatency = m_resampler->getLatency();
Chris@272 585
Chris@272 586 if (hadCount < resamplerLatency) {
Chris@272 587 int stillToDrop = resamplerLatency - hadCount;
Chris@272 588 if (stillToDrop >= int(data.size())) {
Chris@302 589 return fs;
Chris@272 590 } else {
Chris@272 591 data = vector<double>(data.begin() + stillToDrop, data.end());
Chris@272 592 }
Chris@272 593 }
Chris@31 594 }
Chris@272 595
Chris@32 596 Grid cqout = m_cq->process(data);
Chris@302 597 transcribe(cqout, fs);
Chris@51 598 return fs;
Chris@34 599 }
Chris@34 600
Chris@34 601 Silvet::FeatureSet
Chris@34 602 Silvet::getRemainingFeatures()
Chris@34 603 {
Chris@145 604 Grid cqout = m_cq->getRemainingOutput();
Chris@302 605 FeatureSet fs;
Chris@302 606 if (m_columnCount == 0) {
Chris@302 607 // process() was never called, but we still want these
Chris@302 608 insertTemplateFeatures(fs);
Chris@302 609 } else {
Chris@302 610 transcribe(cqout, fs);
Chris@302 611 }
Chris@51 612 return fs;
Chris@34 613 }
Chris@34 614
Chris@302 615 void
Chris@302 616 Silvet::insertTemplateFeatures(FeatureSet &fs)
Chris@302 617 {
Chris@302 618 const InstrumentPack &pack = getPack(m_instrument);
Chris@302 619 for (int i = 0; i < int(pack.templates.size()) * pack.templateNoteCount; ++i) {
Chris@302 620 RealTime timestamp = RealTime::fromSeconds(double(i) / m_colsPerSec);
Chris@302 621 Feature f;
Chris@302 622 char buffer[50];
Chris@302 623 sprintf(buffer, "Note %d", i + 1);
Chris@302 624 f.label = buffer;
Chris@302 625 f.hasTimestamp = true;
Chris@302 626 f.timestamp = timestamp;
Chris@302 627 f.values = pack.templates[i / pack.templateNoteCount]
Chris@302 628 .data[i % pack.templateNoteCount];
Chris@302 629 fs[m_templateOutputNo].push_back(f);
Chris@302 630 }
Chris@302 631 }
Chris@302 632
Chris@302 633 void
Chris@302 634 Silvet::transcribe(const Grid &cqout, Silvet::FeatureSet &fs)
Chris@34 635 {
Chris@32 636 Grid filtered = preProcess(cqout);
Chris@31 637
Chris@302 638 if (filtered.empty()) return;
Chris@170 639
Chris@298 640 const InstrumentPack &pack(getPack(m_instrument));
Chris@104 641
Chris@325 642 int width = filtered.size();
Chris@325 643
Chris@325 644 double silenceThreshold = 0.01;
Chris@325 645
Chris@325 646 for (int i = 0; i < width; ++i) {
Chris@325 647
Chris@325 648 RealTime timestamp = getColumnTimestamp(m_pianoRoll.size() - 1 + i);
Chris@325 649 float inputGain = getInputGainAt(timestamp);
Chris@325 650
Chris@178 651 Feature f;
Chris@325 652 double rms = 0.0;
Chris@325 653
Chris@178 654 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@325 655 double v = filtered[i][j];
Chris@325 656 rms += v * v;
Chris@325 657 f.values.push_back(float(v));
Chris@178 658 }
Chris@325 659
Chris@325 660 rms = sqrt(rms / pack.templateHeight);
Chris@325 661 if (rms / inputGain < silenceThreshold) {
Chris@325 662 filtered[i].clear();
Chris@325 663 }
Chris@325 664
Chris@178 665 fs[m_fcqOutputNo].push_back(f);
Chris@178 666 }
Chris@325 667
Chris@311 668 Grid localPitches(width);
Chris@170 669
Chris@297 670 bool wantShifts = (m_mode == HighQualityMode) && m_fineTuning;
Chris@170 671 int shiftCount = 1;
Chris@170 672 if (wantShifts) {
Chris@170 673 shiftCount = pack.templateMaxShift * 2 + 1;
Chris@170 674 }
Chris@170 675
Chris@170 676 vector<vector<int> > localBestShifts;
Chris@170 677 if (wantShifts) {
Chris@311 678 localBestShifts = vector<vector<int> >(width);
Chris@170 679 }
Chris@170 680
Chris@312 681 #ifndef MAX_EM_THREADS
Chris@312 682 #define MAX_EM_THREADS 8
Chris@312 683 #endif
Chris@312 684
Chris@317 685 int emThreadCount = MAX_EM_THREADS;
Chris@317 686 if (m_mode == LiveMode && pack.templates.size() == 1) {
Chris@317 687 // The EM step is probably not slow enough to merit it
Chris@317 688 emThreadCount = 1;
Chris@317 689 }
Chris@317 690
Chris@312 691 #if (defined(MAX_EM_THREADS) && (MAX_EM_THREADS > 1))
Chris@317 692 if (emThreadCount > 1) {
Chris@317 693 for (int i = 0; i < width; ) {
Chris@317 694 typedef future<pair<vector<double>, vector<int>>> EMFuture;
Chris@317 695 vector<EMFuture> results;
Chris@317 696 for (int j = 0; j < emThreadCount && i + j < width; ++j) {
Chris@317 697 results.push_back
Chris@317 698 (async(std::launch::async,
Chris@317 699 [&](int index) {
Chris@325 700 return applyEM
Chris@325 701 (pack, filtered.at(index), wantShifts);
Chris@317 702 }, i + j));
Chris@317 703 }
Chris@317 704 for (int j = 0; j < emThreadCount && i + j < width; ++j) {
Chris@317 705 auto out = results[j].get();
Chris@317 706 localPitches[i+j] = out.first;
Chris@317 707 if (wantShifts) localBestShifts[i+j] = out.second;
Chris@317 708 }
Chris@317 709 i += emThreadCount;
Chris@312 710 }
Chris@123 711 }
Chris@312 712 #endif
Chris@317 713
Chris@317 714 if (emThreadCount == 1) {
Chris@317 715 for (int i = 0; i < width; ++i) {
Chris@317 716 auto out = applyEM(pack, filtered.at(i), wantShifts);
Chris@317 717 localPitches[i] = out.first;
Chris@317 718 if (wantShifts) localBestShifts[i] = out.second;
Chris@317 719 }
Chris@317 720 }
Chris@305 721
Chris@166 722 for (int i = 0; i < width; ++i) {
Chris@37 723
Chris@321 724 vector<double> filtered;
Chris@321 725
Chris@321 726 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@321 727 m_postFilter[j]->push(localPitches[i][j]);
Chris@321 728 filtered.push_back(m_postFilter[j]->get());
Chris@321 729 }
Chris@294 730
Chris@309 731 RealTime timestamp = getColumnTimestamp(m_pianoRoll.size() - 1);
Chris@309 732 float inputGain = getInputGainAt(timestamp);
Chris@309 733
Chris@294 734 Feature f;
Chris@294 735 for (int j = 0; j < (int)filtered.size(); ++j) {
Chris@309 736 float v = filtered[j];
Chris@294 737 if (v < pack.levelThreshold) v = 0.f;
Chris@309 738 f.values.push_back(v / inputGain);
Chris@294 739 }
Chris@294 740 fs[m_pitchOutputNo].push_back(f);
Chris@309 741
Chris@309 742 f.values.clear();
Chris@309 743 f.values.resize(12);
Chris@309 744 for (int j = 0; j < (int)filtered.size(); ++j) {
Chris@309 745 f.values[j % 12] += filtered[j] / inputGain;
Chris@309 746 }
Chris@309 747 fs[m_chromaOutputNo].push_back(f);
Chris@38 748
Chris@321 749 // This pushes the up-to-max-polyphony activation column to
Chris@321 750 // m_pianoRoll
Chris@323 751 postProcess(filtered, localBestShifts[i], wantShifts);
Chris@321 752
Chris@319 753 auto events = noteTrack(shiftCount);
Chris@319 754
Chris@319 755 FeatureList noteFeatures = events.first;
Chris@123 756 for (FeatureList::const_iterator fi = noteFeatures.begin();
Chris@123 757 fi != noteFeatures.end(); ++fi) {
Chris@123 758 fs[m_notesOutputNo].push_back(*fi);
Chris@40 759 }
Chris@319 760
Chris@319 761 FeatureList onsetFeatures = events.second;
Chris@319 762 for (FeatureList::const_iterator fi = onsetFeatures.begin();
Chris@319 763 fi != onsetFeatures.end(); ++fi) {
Chris@319 764 fs[m_onsetsOutputNo].push_back(*fi);
Chris@319 765 }
Chris@34 766 }
Chris@31 767 }
Chris@31 768
Chris@311 769 pair<vector<double>, vector<int> >
Chris@311 770 Silvet::applyEM(const InstrumentPack &pack,
Chris@311 771 const vector<double> &column,
Chris@311 772 bool wantShifts)
Chris@311 773 {
Chris@311 774 double columnThreshold = 1e-5;
Chris@311 775
Chris@314 776 if (m_mode == LiveMode) {
Chris@325 777 columnThreshold /= 15;
Chris@314 778 }
Chris@314 779
Chris@311 780 vector<double> pitches(pack.templateNoteCount, 0.0);
Chris@311 781 vector<int> bestShifts;
Chris@325 782
Chris@325 783 if (column.empty()) return { pitches, bestShifts };
Chris@311 784
Chris@311 785 double sum = 0.0;
Chris@311 786 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@311 787 sum += column.at(j);
Chris@311 788 }
Chris@311 789 if (sum < columnThreshold) return { pitches, bestShifts };
Chris@311 790
Chris@314 791 EM em(&pack, m_mode == HighQualityMode);
Chris@311 792
Chris@311 793 em.setPitchSparsity(pack.pitchSparsity);
Chris@311 794 em.setSourceSparsity(pack.sourceSparsity);
Chris@311 795
Chris@314 796 int iterations = (m_mode == HighQualityMode ? 20 : 10);
Chris@311 797
Chris@311 798 for (int j = 0; j < iterations; ++j) {
Chris@311 799 em.iterate(column.data());
Chris@311 800 }
Chris@311 801
Chris@311 802 const float *pitchDist = em.getPitchDistribution();
Chris@311 803 const float *const *shiftDist = em.getShifts();
Chris@311 804
Chris@311 805 int shiftCount = 1;
Chris@311 806 if (wantShifts) {
Chris@311 807 shiftCount = pack.templateMaxShift * 2 + 1;
Chris@311 808 }
Chris@311 809
Chris@311 810 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@311 811
Chris@311 812 pitches[j] = pitchDist[j] * sum;
Chris@311 813
Chris@311 814 int bestShift = 0;
Chris@311 815 float bestShiftValue = 0.0;
Chris@311 816 if (wantShifts) {
Chris@311 817 for (int k = 0; k < shiftCount; ++k) {
Chris@311 818 float value = shiftDist[k][j];
Chris@311 819 if (k == 0 || value > bestShiftValue) {
Chris@311 820 bestShiftValue = value;
Chris@311 821 bestShift = k;
Chris@311 822 }
Chris@311 823 }
Chris@311 824 bestShifts.push_back(bestShift);
Chris@311 825 }
Chris@311 826 }
Chris@311 827
Chris@311 828 return { pitches, bestShifts };
Chris@311 829 }
Chris@311 830
Chris@32 831 Silvet::Grid
Chris@32 832 Silvet::preProcess(const Grid &in)
Chris@32 833 {
Chris@32 834 int width = in.size();
Chris@32 835
Chris@165 836 int spacing = processingSampleRate / m_colsPerSec;
Chris@32 837
Chris@165 838 // need to be careful that col spacing is an integer number of samples!
Chris@165 839 assert(spacing * m_colsPerSec == processingSampleRate);
Chris@32 840
Chris@32 841 Grid out;
Chris@32 842
Chris@58 843 // We count the CQ latency in terms of processing hops, but
Chris@58 844 // actually it probably isn't an exact number of hops so this
Chris@58 845 // isn't quite accurate. But the small constant offset is
Chris@165 846 // practically irrelevant compared to the jitter from the frame
Chris@165 847 // size we reduce to in a moment
Chris@33 848 int latentColumns = m_cq->getLatency() / m_cq->getColumnHop();
Chris@33 849
Chris@298 850 const InstrumentPack &pack(getPack(m_instrument));
Chris@176 851
Chris@32 852 for (int i = 0; i < width; ++i) {
Chris@32 853
Chris@33 854 if (m_columnCount < latentColumns) {
Chris@33 855 ++m_columnCount;
Chris@33 856 continue;
Chris@33 857 }
Chris@33 858
Chris@32 859 int prevSampleNo = (m_columnCount - 1) * m_cq->getColumnHop();
Chris@32 860 int sampleNo = m_columnCount * m_cq->getColumnHop();
Chris@32 861
Chris@32 862 bool select = (sampleNo / spacing != prevSampleNo / spacing);
Chris@32 863
Chris@32 864 if (select) {
Chris@32 865 vector<double> inCol = in[i];
Chris@176 866 vector<double> outCol(pack.templateHeight);
Chris@32 867
Chris@178 868 // In HQ mode, the CQ returns 600 bins and we ignore the
Chris@298 869 // lowest 55 of them (assuming binsPerSemitone == 5).
Chris@178 870 //
Chris@297 871 // In draft and live mode the CQ is an octave shorter,
Chris@300 872 // returning 540 bins or equivalent, so we instead pad
Chris@300 873 // them with an additional 5 or equivalent zeros.
Chris@178 874 //
Chris@178 875 // We also need to reverse the column as we go, since the
Chris@178 876 // raw CQ has the high frequencies first and we need it
Chris@178 877 // the other way around.
Chris@32 878
Chris@298 879 int bps = (m_mode == LiveMode ?
Chris@298 880 binsPerSemitoneLive : binsPerSemitoneNormal);
Chris@298 881
Chris@297 882 if (m_mode == HighQualityMode) {
Chris@178 883 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@298 884 int ix = inCol.size() - j - (11 * bps);
Chris@178 885 outCol[j] = inCol[ix];
Chris@178 886 }
Chris@178 887 } else {
Chris@298 888 for (int j = 0; j < bps; ++j) {
Chris@178 889 outCol[j] = 0.0;
Chris@178 890 }
Chris@298 891 for (int j = bps; j < pack.templateHeight; ++j) {
Chris@298 892 int ix = inCol.size() - j + (bps-1);
Chris@178 893 outCol[j] = inCol[ix];
Chris@178 894 }
Chris@46 895 }
Chris@32 896
Chris@46 897 vector<double> noiseLevel1 =
Chris@298 898 MedianFilter<double>::filter(8 * bps, outCol);
Chris@176 899 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@46 900 noiseLevel1[j] = std::min(outCol[j], noiseLevel1[j]);
Chris@46 901 }
Chris@32 902
Chris@46 903 vector<double> noiseLevel2 =
Chris@298 904 MedianFilter<double>::filter(8 * bps, noiseLevel1);
Chris@176 905 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@46 906 outCol[j] = std::max(outCol[j] - noiseLevel2[j], 0.0);
Chris@32 907 }
Chris@32 908
Chris@165 909 out.push_back(outCol);
Chris@32 910 }
Chris@32 911
Chris@32 912 ++m_columnCount;
Chris@32 913 }
Chris@32 914
Chris@32 915 return out;
Chris@32 916 }
Chris@32 917
Chris@321 918 void
Chris@170 919 Silvet::postProcess(const vector<double> &pitches,
Chris@170 920 const vector<int> &bestShifts,
Chris@170 921 bool wantShifts)
Chris@166 922 {
Chris@298 923 const InstrumentPack &pack(getPack(m_instrument));
Chris@176 924
Chris@41 925 // Threshold for level and reduce number of candidate pitches
Chris@41 926
Chris@41 927 typedef std::multimap<double, int> ValueIndexMap;
Chris@41 928
Chris@41 929 ValueIndexMap strengths;
Chris@166 930
Chris@176 931 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@321 932
Chris@321 933 double strength = pitches[j];
Chris@183 934 if (strength < pack.levelThreshold) continue;
Chris@321 935
Chris@321 936 // In live mode with only a 12-bpo CQ, we are very likely to
Chris@321 937 // get clusters of two or three high scores at a time for
Chris@321 938 // neighbouring semitones. Eliminate these by picking only the
Chris@325 939 // peaks (except that we never eliminate a note that has
Chris@325 940 // already been established as currently playing). This means
Chris@325 941 // we can't recognise actual semitone chords if they ever
Chris@325 942 // appear, but it's not as if live mode is good enough for
Chris@325 943 // that to be a big deal anyway.
Chris@321 944 if (m_mode == LiveMode) {
Chris@325 945 if (m_current.find(j) == m_current.end() &&
Chris@325 946 (j == 0 ||
Chris@325 947 j + 1 == pack.templateNoteCount ||
Chris@325 948 pitches[j] < pitches[j-1] ||
Chris@325 949 pitches[j] < pitches[j+1])) {
Chris@325 950 // not a peak or a currently-playing note: skip it
Chris@321 951 continue;
Chris@321 952 }
Chris@321 953 }
Chris@323 954
Chris@168 955 strengths.insert(ValueIndexMap::value_type(strength, j));
Chris@168 956 }
Chris@166 957
Chris@168 958 ValueIndexMap::const_iterator si = strengths.end();
Chris@167 959
Chris@168 960 map<int, double> active;
Chris@168 961 map<int, int> activeShifts;
Chris@168 962
Chris@183 963 while (int(active.size()) < pack.maxPolyphony && si != strengths.begin()) {
Chris@168 964
Chris@168 965 --si;
Chris@168 966
Chris@168 967 double strength = si->first;
Chris@168 968 int j = si->second;
Chris@168 969
Chris@168 970 active[j] = strength;
Chris@168 971
Chris@170 972 if (wantShifts) {
Chris@170 973 activeShifts[j] = bestShifts[j];
Chris@167 974 }
Chris@41 975 }
Chris@41 976
Chris@168 977 m_pianoRoll.push_back(active);
Chris@170 978
Chris@170 979 if (wantShifts) {
Chris@168 980 m_pianoRollShifts.push_back(activeShifts);
Chris@41 981 }
Chris@294 982
Chris@321 983 return;
Chris@166 984 }
Chris@166 985
Chris@319 986 pair<Vamp::Plugin::FeatureList, Vamp::Plugin::FeatureList>
Chris@168 987 Silvet::noteTrack(int shiftCount)
Chris@166 988 {
Chris@41 989 // Minimum duration pruning, and conversion to notes. We can only
Chris@41 990 // report notes that have just ended (i.e. that are absent in the
Chris@168 991 // latest active set but present in the prior set in the piano
Chris@41 992 // roll) -- any notes that ended earlier will have been reported
Chris@41 993 // already, and if they haven't ended, we don't know their
Chris@41 994 // duration.
Chris@41 995
Chris@168 996 int width = m_pianoRoll.size() - 1;
Chris@168 997
Chris@168 998 const map<int, double> &active = m_pianoRoll[width];
Chris@41 999
Chris@165 1000 double columnDuration = 1.0 / m_colsPerSec;
Chris@165 1001
Chris@165 1002 // only keep notes >= 100ms or thereabouts
Chris@323 1003 double durationThrSec = 0.1;
Chris@323 1004 if (m_mode == LiveMode) durationThrSec = 0.07;
Chris@323 1005 int durationThreshold = floor(durationThrSec / columnDuration); // in cols
Chris@165 1006 if (durationThreshold < 1) durationThreshold = 1;
Chris@41 1007
Chris@319 1008 FeatureList noteFeatures, onsetFeatures;
Chris@41 1009
Chris@41 1010 if (width < durationThreshold + 1) {
Chris@319 1011 return { noteFeatures, onsetFeatures };
Chris@41 1012 }
Chris@41 1013
Chris@55 1014 for (map<int, double>::const_iterator ni = m_pianoRoll[width-1].begin();
Chris@41 1015 ni != m_pianoRoll[width-1].end(); ++ni) {
Chris@41 1016
Chris@55 1017 int note = ni->first;
Chris@41 1018
Chris@41 1019 int end = width;
Chris@41 1020 int start = end-1;
Chris@41 1021
Chris@41 1022 while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) {
Chris@41 1023 --start;
Chris@41 1024 }
Chris@41 1025 ++start;
Chris@41 1026
Chris@319 1027 int duration = end - start;
Chris@319 1028
Chris@319 1029 if (duration < durationThreshold) {
Chris@41 1030 continue;
Chris@41 1031 }
Chris@41 1032
Chris@319 1033 if (duration == durationThreshold) {
Chris@325 1034 m_current.insert(note);
Chris@319 1035 emitOnset(start, note, shiftCount, onsetFeatures);
Chris@319 1036 }
Chris@319 1037
Chris@319 1038 if (active.find(note) == active.end()) {
Chris@319 1039 // the note was playing but just ended
Chris@325 1040 m_current.erase(note);
Chris@319 1041 emitNote(start, end, note, shiftCount, noteFeatures);
Chris@334 1042 } else { // still playing
Chris@334 1043 // repeated note detection: if level is greater than this
Chris@334 1044 // multiple of its previous value, then we end the note and
Chris@334 1045 // restart it with the same pitch
Chris@334 1046 double restartFactor = 1.5;
Chris@334 1047 if (duration >= durationThreshold * 2 &&
Chris@334 1048 (active.find(note)->second >
Chris@334 1049 restartFactor * m_pianoRoll[width-1][note])) {
Chris@334 1050 m_current.erase(note);
Chris@334 1051 emitNote(start, end-1, note, shiftCount, noteFeatures);
Chris@334 1052 // and remove this so that we start counting the new
Chris@334 1053 // note's duration from the current position
Chris@334 1054 m_pianoRoll[width-1].erase(note);
Chris@334 1055 }
Chris@319 1056 }
Chris@41 1057 }
Chris@41 1058
Chris@62 1059 // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl;
Chris@41 1060
Chris@319 1061 return { noteFeatures, onsetFeatures };
Chris@41 1062 }
Chris@41 1063
Chris@169 1064 void
Chris@169 1065 Silvet::emitNote(int start, int end, int note, int shiftCount,
Chris@169 1066 FeatureList &noteFeatures)
Chris@169 1067 {
Chris@169 1068 int partStart = start;
Chris@169 1069 int partShift = 0;
Chris@320 1070 double partStrength = 0;
Chris@169 1071
Chris@252 1072 int partThreshold = floor(0.05 * m_colsPerSec);
Chris@169 1073
Chris@169 1074 for (int i = start; i != end; ++i) {
Chris@169 1075
Chris@169 1076 double strength = m_pianoRoll[i][note];
Chris@169 1077
Chris@169 1078 int shift = 0;
Chris@169 1079
Chris@169 1080 if (shiftCount > 1) {
Chris@169 1081
Chris@169 1082 shift = m_pianoRollShifts[i][note];
Chris@169 1083
Chris@169 1084 if (i == partStart) {
Chris@169 1085 partShift = shift;
Chris@169 1086 }
Chris@169 1087
Chris@169 1088 if (i > partStart + partThreshold && shift != partShift) {
Chris@169 1089
Chris@169 1090 // cerr << "i = " << i << ", partStart = " << partStart << ", shift = " << shift << ", partShift = " << partShift << endl;
Chris@169 1091
Chris@169 1092 // pitch has changed, emit an intermediate note
Chris@252 1093 noteFeatures.push_back(makeNoteFeature(partStart,
Chris@252 1094 i,
Chris@252 1095 note,
Chris@252 1096 partShift,
Chris@252 1097 shiftCount,
Chris@320 1098 partStrength));
Chris@169 1099 partStart = i;
Chris@169 1100 partShift = shift;
Chris@320 1101 partStrength = 0;
Chris@169 1102 }
Chris@169 1103 }
Chris@169 1104
Chris@320 1105 if (strength > partStrength) {
Chris@320 1106 partStrength = strength;
Chris@169 1107 }
Chris@169 1108 }
Chris@169 1109
Chris@169 1110 if (end >= partStart + partThreshold) {
Chris@252 1111 noteFeatures.push_back(makeNoteFeature(partStart,
Chris@252 1112 end,
Chris@252 1113 note,
Chris@252 1114 partShift,
Chris@252 1115 shiftCount,
Chris@320 1116 partStrength));
Chris@169 1117 }
Chris@169 1118 }
Chris@252 1119
Chris@319 1120 void
Chris@319 1121 Silvet::emitOnset(int start, int note, int shiftCount,
Chris@319 1122 FeatureList &onsetFeatures)
Chris@319 1123 {
Chris@319 1124 int len = int(m_pianoRoll.size());
Chris@320 1125
Chris@320 1126 double onsetStrength = 0;
Chris@319 1127
Chris@319 1128 int shift = 0;
Chris@319 1129 if (shiftCount > 1) {
Chris@319 1130 shift = m_pianoRollShifts[start][note];
Chris@319 1131 }
Chris@319 1132
Chris@319 1133 for (int i = start; i < len; ++i) {
Chris@319 1134 double strength = m_pianoRoll[i][note];
Chris@320 1135 if (strength > onsetStrength) {
Chris@320 1136 onsetStrength = strength;
Chris@319 1137 }
Chris@319 1138 }
Chris@319 1139
Chris@319 1140 onsetFeatures.push_back(makeOnsetFeature(start,
Chris@319 1141 note,
Chris@319 1142 shift,
Chris@319 1143 shiftCount,
Chris@320 1144 onsetStrength));
Chris@319 1145 }
Chris@319 1146
Chris@309 1147 RealTime
Chris@309 1148 Silvet::getColumnTimestamp(int column)
Chris@309 1149 {
Chris@309 1150 double columnDuration = 1.0 / m_colsPerSec;
Chris@309 1151 int postFilterLatency = int(m_postFilter[0]->getSize() / 2);
Chris@309 1152
Chris@309 1153 return m_startTime + RealTime::fromSeconds
Chris@309 1154 (columnDuration * (column - postFilterLatency) + 0.02);
Chris@309 1155 }
Chris@309 1156
Chris@252 1157 Silvet::Feature
Chris@252 1158 Silvet::makeNoteFeature(int start,
Chris@252 1159 int end,
Chris@252 1160 int note,
Chris@252 1161 int shift,
Chris@252 1162 int shiftCount,
Chris@320 1163 double strength)
Chris@252 1164 {
Chris@252 1165 Feature f;
Chris@252 1166
Chris@252 1167 f.hasTimestamp = true;
Chris@309 1168 f.timestamp = getColumnTimestamp(start);
Chris@252 1169
Chris@252 1170 f.hasDuration = true;
Chris@309 1171 f.duration = getColumnTimestamp(end) - f.timestamp;
Chris@252 1172
Chris@252 1173 f.values.clear();
Chris@320 1174 f.values.push_back(getNoteFrequency(note, shift, shiftCount));
Chris@320 1175 f.values.push_back(getVelocityFor(strength, start));
Chris@252 1176
Chris@320 1177 f.label = getNoteName(note, shift, shiftCount);
Chris@252 1178
Chris@252 1179 return f;
Chris@252 1180 }
Chris@252 1181
Chris@319 1182 Silvet::Feature
Chris@319 1183 Silvet::makeOnsetFeature(int start,
Chris@319 1184 int note,
Chris@319 1185 int shift,
Chris@319 1186 int shiftCount,
Chris@320 1187 double strength)
Chris@319 1188 {
Chris@319 1189 Feature f;
Chris@319 1190
Chris@319 1191 f.hasTimestamp = true;
Chris@319 1192 f.timestamp = getColumnTimestamp(start);
Chris@319 1193
Chris@319 1194 f.hasDuration = false;
Chris@319 1195
Chris@319 1196 f.values.clear();
Chris@320 1197 f.values.push_back(getNoteFrequency(note, shift, shiftCount));
Chris@320 1198 f.values.push_back(getVelocityFor(strength, start));
Chris@319 1199
Chris@320 1200 f.label = getNoteName(note, shift, shiftCount);
Chris@319 1201
Chris@319 1202 return f;
Chris@319 1203 }
Chris@319 1204
Chris@320 1205 int
Chris@320 1206 Silvet::getVelocityFor(double strength, int column)
Chris@320 1207 {
Chris@320 1208 RealTime rt = getColumnTimestamp(column + 1);
Chris@320 1209
Chris@320 1210 float inputGain = getInputGainAt(rt);
Chris@320 1211
Chris@320 1212 double scale = 2.0;
Chris@320 1213 if (m_mode == LiveMode) scale = 20.0;
Chris@320 1214
Chris@320 1215 double velocity = round((strength * scale) / inputGain);
Chris@320 1216
Chris@320 1217 if (velocity > 127.0) velocity = 127.0;
Chris@320 1218 if (velocity < 1.0) velocity = 1.0; // assume surpassed 0 threshold already
Chris@320 1219
Chris@320 1220 return int(velocity);
Chris@320 1221 }
Chris@320 1222
Chris@252 1223 float
Chris@252 1224 Silvet::getInputGainAt(RealTime t)
Chris@252 1225 {
Chris@252 1226 map<RealTime, float>::const_iterator i = m_inputGains.lower_bound(t);
Chris@252 1227
Chris@252 1228 if (i == m_inputGains.end()) {
Chris@252 1229 if (i != m_inputGains.begin()) {
Chris@252 1230 --i;
Chris@252 1231 } else {
Chris@252 1232 return 1.f; // no data
Chris@252 1233 }
Chris@252 1234 }
Chris@252 1235
Chris@252 1236 // cerr << "gain at time " << t << " = " << i->second << endl;
Chris@252 1237
Chris@252 1238 return i->second;
Chris@252 1239 }
Chris@252 1240