annotate src/Silvet.cpp @ 337:3c6f5d2d33e8 simultaneities

An abortive attempt at this (doesn't compile)
author Chris Cannam
date Sat, 27 Jun 2015 14:37:13 +0100
parents d25e4aee73d7
children
rev   line source
Chris@31 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@31 2
Chris@31 3 /*
Chris@31 4 Silvet
Chris@31 5
Chris@31 6 A Vamp plugin for note transcription.
Chris@31 7 Centre for Digital Music, Queen Mary University of London.
Chris@31 8
Chris@31 9 This program is free software; you can redistribute it and/or
Chris@31 10 modify it under the terms of the GNU General Public License as
Chris@31 11 published by the Free Software Foundation; either version 2 of the
Chris@31 12 License, or (at your option) any later version. See the file
Chris@31 13 COPYING included with this distribution for more information.
Chris@31 14 */
Chris@31 15
Chris@31 16 #include "Silvet.h"
Chris@34 17 #include "EM.h"
Chris@31 18
Chris@152 19 #include <cq/CQSpectrogram.h>
Chris@31 20
Chris@152 21 #include "MedianFilter.h"
Chris@152 22 #include "constant-q-cpp/src/dsp/Resampler.h"
Chris@246 23 #include "flattendynamics-ladspa.h"
Chris@298 24 #include "LiveInstruments.h"
Chris@31 25
Chris@31 26 #include <vector>
Chris@312 27 #include <future>
Chris@31 28
Chris@32 29 #include <cstdio>
Chris@32 30
Chris@31 31 using std::vector;
Chris@48 32 using std::cout;
Chris@31 33 using std::cerr;
Chris@31 34 using std::endl;
Chris@311 35 using std::pair;
Chris@312 36 using std::future;
Chris@312 37 using std::async;
Chris@40 38 using Vamp::RealTime;
Chris@31 39
Chris@31 40 static int processingSampleRate = 44100;
Chris@298 41
Chris@298 42 static int binsPerSemitoneLive = 1;
Chris@298 43 static int binsPerSemitoneNormal = 5;
Chris@170 44
Chris@272 45 static int minInputSampleRate = 100;
Chris@272 46 static int maxInputSampleRate = 192000;
Chris@272 47
Chris@316 48 static const Silvet::ProcessingMode defaultMode = Silvet::HighQualityMode;
Chris@316 49
Chris@31 50 Silvet::Silvet(float inputSampleRate) :
Chris@31 51 Plugin(inputSampleRate),
Chris@161 52 m_instruments(InstrumentPack::listInstrumentPacks()),
Chris@298 53 m_liveInstruments(LiveAdapter::adaptAll(m_instruments)),
Chris@31 54 m_resampler(0),
Chris@246 55 m_flattener(0),
Chris@110 56 m_cq(0),
Chris@316 57 m_mode(defaultMode),
Chris@166 58 m_fineTuning(false),
Chris@178 59 m_instrument(0),
Chris@313 60 m_colsPerSec(50),
Chris@313 61 m_haveStartTime(false)
Chris@31 62 {
Chris@31 63 }
Chris@31 64
Chris@31 65 Silvet::~Silvet()
Chris@31 66 {
Chris@31 67 delete m_resampler;
Chris@246 68 delete m_flattener;
Chris@31 69 delete m_cq;
Chris@41 70 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
Chris@41 71 delete m_postFilter[i];
Chris@41 72 }
Chris@31 73 }
Chris@31 74
Chris@31 75 string
Chris@31 76 Silvet::getIdentifier() const
Chris@31 77 {
Chris@31 78 return "silvet";
Chris@31 79 }
Chris@31 80
Chris@31 81 string
Chris@31 82 Silvet::getName() const
Chris@31 83 {
Chris@31 84 return "Silvet Note Transcription";
Chris@31 85 }
Chris@31 86
Chris@31 87 string
Chris@31 88 Silvet::getDescription() const
Chris@31 89 {
Chris@191 90 return "Estimate the note onsets, pitches, and durations that make up a music recording.";
Chris@31 91 }
Chris@31 92
Chris@31 93 string
Chris@31 94 Silvet::getMaker() const
Chris@31 95 {
Chris@191 96 return "Queen Mary, University of London";
Chris@31 97 }
Chris@31 98
Chris@31 99 int
Chris@31 100 Silvet::getPluginVersion() const
Chris@31 101 {
Chris@309 102 return 3;
Chris@31 103 }
Chris@31 104
Chris@31 105 string
Chris@31 106 Silvet::getCopyright() const
Chris@31 107 {
Chris@191 108 return "Method by Emmanouil Benetos and Simon Dixon; plugin by Chris Cannam and Emmanouil Benetos. GPL licence.";
Chris@31 109 }
Chris@31 110
Chris@31 111 Silvet::InputDomain
Chris@31 112 Silvet::getInputDomain() const
Chris@31 113 {
Chris@31 114 return TimeDomain;
Chris@31 115 }
Chris@31 116
Chris@31 117 size_t
Chris@31 118 Silvet::getPreferredBlockSize() const
Chris@31 119 {
Chris@31 120 return 0;
Chris@31 121 }
Chris@31 122
Chris@31 123 size_t
Chris@31 124 Silvet::getPreferredStepSize() const
Chris@31 125 {
Chris@31 126 return 0;
Chris@31 127 }
Chris@31 128
Chris@31 129 size_t
Chris@31 130 Silvet::getMinChannelCount() const
Chris@31 131 {
Chris@31 132 return 1;
Chris@31 133 }
Chris@31 134
Chris@31 135 size_t
Chris@31 136 Silvet::getMaxChannelCount() const
Chris@31 137 {
Chris@31 138 return 1;
Chris@31 139 }
Chris@31 140
Chris@31 141 Silvet::ParameterList
Chris@31 142 Silvet::getParameterDescriptors() const
Chris@31 143 {
Chris@31 144 ParameterList list;
Chris@110 145
Chris@110 146 ParameterDescriptor desc;
Chris@110 147 desc.identifier = "mode";
Chris@110 148 desc.name = "Processing mode";
Chris@110 149 desc.unit = "";
Chris@297 150 desc.description = "Sets the tradeoff of processing speed against transcription quality. Draft mode is tuned in favour of overall speed; Live mode is tuned in favour of lower latency; while Intensive mode (the default) will almost always produce the best results.";
Chris@110 151 desc.minValue = 0;
Chris@297 152 desc.maxValue = 2;
Chris@316 153 desc.defaultValue = int(defaultMode);
Chris@110 154 desc.isQuantized = true;
Chris@110 155 desc.quantizeStep = 1;
Chris@166 156 desc.valueNames.push_back("Draft (faster)");
Chris@165 157 desc.valueNames.push_back("Intensive (higher quality)");
Chris@297 158 desc.valueNames.push_back("Live (lower latency)");
Chris@161 159 list.push_back(desc);
Chris@161 160
Chris@176 161 desc.identifier = "instrument";
Chris@176 162 desc.name = "Instrument";
Chris@161 163 desc.unit = "";
Chris@271 164 desc.description = "The instrument or instruments known to be present in the recording. This affects the set of instrument templates used, as well as the expected level of polyphony in the output. Using a more limited set of instruments than the default will also make the plugin run faster.\nNote that this plugin cannot isolate instruments: you can't use this setting to request notes from only one instrument in a recording with several. Instead, use this as a hint to the plugin about which instruments are actually present.";
Chris@161 165 desc.minValue = 0;
Chris@162 166 desc.maxValue = m_instruments.size()-1;
Chris@162 167 desc.defaultValue = 0;
Chris@161 168 desc.isQuantized = true;
Chris@161 169 desc.quantizeStep = 1;
Chris@161 170 desc.valueNames.clear();
Chris@162 171 for (int i = 0; i < int(m_instruments.size()); ++i) {
Chris@162 172 desc.valueNames.push_back(m_instruments[i].name);
Chris@162 173 }
Chris@166 174 list.push_back(desc);
Chris@161 175
Chris@166 176 desc.identifier = "finetune";
Chris@166 177 desc.name = "Return fine pitch estimates";
Chris@166 178 desc.unit = "";
Chris@271 179 desc.description = "Return pitch estimates at finer than semitone resolution. This works only in Intensive mode. Notes that appear to drift in pitch will be split up into shorter notes with individually finer pitches.";
Chris@166 180 desc.minValue = 0;
Chris@166 181 desc.maxValue = 1;
Chris@166 182 desc.defaultValue = 0;
Chris@166 183 desc.isQuantized = true;
Chris@166 184 desc.quantizeStep = 1;
Chris@166 185 desc.valueNames.clear();
Chris@110 186 list.push_back(desc);
Chris@110 187
Chris@31 188 return list;
Chris@31 189 }
Chris@31 190
Chris@31 191 float
Chris@31 192 Silvet::getParameter(string identifier) const
Chris@31 193 {
Chris@110 194 if (identifier == "mode") {
Chris@297 195 return (float)(int)m_mode;
Chris@166 196 } else if (identifier == "finetune") {
Chris@166 197 return m_fineTuning ? 1.f : 0.f;
Chris@176 198 } else if (identifier == "instrument") {
Chris@162 199 return m_instrument;
Chris@110 200 }
Chris@31 201 return 0;
Chris@31 202 }
Chris@31 203
Chris@31 204 void
Chris@31 205 Silvet::setParameter(string identifier, float value)
Chris@31 206 {
Chris@110 207 if (identifier == "mode") {
Chris@297 208 m_mode = (ProcessingMode)(int)(value + 0.5);
Chris@166 209 } else if (identifier == "finetune") {
Chris@166 210 m_fineTuning = (value > 0.5);
Chris@176 211 } else if (identifier == "instrument") {
Chris@162 212 m_instrument = lrintf(value);
Chris@110 213 }
Chris@31 214 }
Chris@31 215
Chris@31 216 Silvet::ProgramList
Chris@31 217 Silvet::getPrograms() const
Chris@31 218 {
Chris@31 219 ProgramList list;
Chris@31 220 return list;
Chris@31 221 }
Chris@31 222
Chris@31 223 string
Chris@31 224 Silvet::getCurrentProgram() const
Chris@31 225 {
Chris@31 226 return "";
Chris@31 227 }
Chris@31 228
Chris@31 229 void
Chris@31 230 Silvet::selectProgram(string name)
Chris@31 231 {
Chris@31 232 }
Chris@31 233
Chris@31 234 Silvet::OutputList
Chris@31 235 Silvet::getOutputDescriptors() const
Chris@31 236 {
Chris@31 237 OutputList list;
Chris@31 238
Chris@31 239 OutputDescriptor d;
Chris@51 240 d.identifier = "notes";
Chris@51 241 d.name = "Note transcription";
Chris@329 242 d.description = "Overall note transcription. Each note has time, duration, estimated fundamental frequency, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture.";
Chris@41 243 d.unit = "Hz";
Chris@31 244 d.hasFixedBinCount = true;
Chris@31 245 d.binCount = 2;
Chris@41 246 d.binNames.push_back("Frequency");
Chris@31 247 d.binNames.push_back("Velocity");
Chris@31 248 d.hasKnownExtents = false;
Chris@31 249 d.isQuantized = false;
Chris@31 250 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@246 251 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
Chris@31 252 d.hasDuration = true;
Chris@32 253 m_notesOutputNo = list.size();
Chris@32 254 list.push_back(d);
Chris@32 255
Chris@319 256 d.identifier = "onsets";
Chris@319 257 d.name = "Note onsets";
Chris@323 258 d.description = "Note onsets, without durations. These can be calculated sooner than complete notes, because it isn't necessary to wait for a note to finish before returning its feature. Each event has time, estimated fundamental frequency in Hz, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture.";
Chris@319 259 d.unit = "Hz";
Chris@319 260 d.hasFixedBinCount = true;
Chris@319 261 d.binCount = 2;
Chris@319 262 d.binNames.push_back("Frequency");
Chris@319 263 d.binNames.push_back("Velocity");
Chris@319 264 d.hasKnownExtents = false;
Chris@319 265 d.isQuantized = false;
Chris@319 266 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@319 267 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
Chris@319 268 d.hasDuration = false;
Chris@319 269 m_onsetsOutputNo = list.size();
Chris@319 270 list.push_back(d);
Chris@319 271
Chris@336 272 d.identifier = "onoffsets";
Chris@336 273 d.name = "Note onsets and offsets";
Chris@336 274 d.description = "Note onsets and offsets as separate events. Each onset event has time, estimated fundamental frequency in Hz, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture. Offsets are represented in the same way but with a velocity of 0.";
Chris@336 275 d.unit = "Hz";
Chris@336 276 d.hasFixedBinCount = true;
Chris@336 277 d.binCount = 2;
Chris@336 278 d.binNames.push_back("Frequency");
Chris@336 279 d.binNames.push_back("Velocity");
Chris@336 280 d.hasKnownExtents = false;
Chris@336 281 d.isQuantized = false;
Chris@336 282 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@336 283 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
Chris@336 284 d.hasDuration = false;
Chris@336 285 m_onOffsetsOutputNo = list.size();
Chris@336 286 list.push_back(d);
Chris@336 287
Chris@337 288 d.identifier = "simultaneities";
Chris@337 289 d.name = "Simultaneities";
Chris@337 290 d.description = "Events indicating which notes are active together. Whenever a note begins, it is collected with any other notes which begin during a short period of time immediately after, and the set of currently playing notes is reported as an event with the timestamp of the first note. Each feature has a variable number of values, depending on how many simultaneous notes are active.";
Chris@337 291 d.unit = "Hz";
Chris@337 292 d.hasFixedBinCount = false;
Chris@337 293 d.binNames.clear();
Chris@337 294 d.hasKnownExtents = false;
Chris@337 295 d.isQuantized = false;
Chris@337 296 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@337 297 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
Chris@337 298 d.hasDuration = false;
Chris@337 299 m_simultaneitiesOutputNo = list.size();
Chris@337 300 list.push_back(d);
Chris@337 301
Chris@178 302 d.identifier = "timefreq";
Chris@178 303 d.name = "Time-frequency distribution";
Chris@271 304 d.description = "Filtered constant-Q time-frequency distribution as used as input to the expectation-maximisation algorithm.";
Chris@178 305 d.unit = "";
Chris@178 306 d.hasFixedBinCount = true;
Chris@298 307 d.binCount = getPack(0).templateHeight;
Chris@178 308 d.binNames.clear();
Chris@178 309 if (m_cq) {
Chris@294 310 char name[50];
Chris@298 311 for (int i = 0; i < getPack(0).templateHeight; ++i) {
Chris@178 312 // We have a 600-bin (10 oct 60-bin CQ) of which the
Chris@178 313 // lowest-frequency 55 bins have been dropped, for a
Chris@178 314 // 545-bin template. The native CQ bins go high->low
Chris@178 315 // frequency though, so these are still the first 545 bins
Chris@178 316 // as reported by getBinFrequency, though in reverse order
Chris@178 317 float freq = m_cq->getBinFrequency
Chris@298 318 (getPack(0).templateHeight - i - 1);
Chris@178 319 sprintf(name, "%.1f Hz", freq);
Chris@178 320 d.binNames.push_back(name);
Chris@178 321 }
Chris@178 322 }
Chris@178 323 d.hasKnownExtents = false;
Chris@178 324 d.isQuantized = false;
Chris@178 325 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@178 326 d.sampleRate = m_colsPerSec;
Chris@178 327 d.hasDuration = false;
Chris@178 328 m_fcqOutputNo = list.size();
Chris@178 329 list.push_back(d);
Chris@178 330
Chris@294 331 d.identifier = "pitchactivation";
Chris@294 332 d.name = "Pitch activation distribution";
Chris@294 333 d.description = "Pitch activation distribution resulting from expectation-maximisation algorithm, prior to note extraction.";
Chris@294 334 d.unit = "";
Chris@294 335 d.hasFixedBinCount = true;
Chris@298 336 d.binCount = getPack(0).templateNoteCount;
Chris@294 337 d.binNames.clear();
Chris@294 338 if (m_cq) {
Chris@298 339 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
Chris@336 340 d.binNames.push_back(getNoteName(i, 0));
Chris@294 341 }
Chris@294 342 }
Chris@294 343 d.hasKnownExtents = false;
Chris@294 344 d.isQuantized = false;
Chris@294 345 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@294 346 d.sampleRate = m_colsPerSec;
Chris@294 347 d.hasDuration = false;
Chris@294 348 m_pitchOutputNo = list.size();
Chris@294 349 list.push_back(d);
Chris@294 350
Chris@309 351 d.identifier = "chroma";
Chris@309 352 d.name = "Pitch chroma distribution";
Chris@309 353 d.description = "Pitch chroma distribution formed by wrapping the un-thresholded pitch activation distribution into a single octave of semitone bins.";
Chris@309 354 d.unit = "";
Chris@309 355 d.hasFixedBinCount = true;
Chris@309 356 d.binCount = 12;
Chris@309 357 d.binNames.clear();
Chris@309 358 if (m_cq) {
Chris@309 359 for (int i = 0; i < 12; ++i) {
Chris@320 360 d.binNames.push_back(getChromaName(i));
Chris@309 361 }
Chris@309 362 }
Chris@309 363 d.hasKnownExtents = false;
Chris@309 364 d.isQuantized = false;
Chris@309 365 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@309 366 d.sampleRate = m_colsPerSec;
Chris@309 367 d.hasDuration = false;
Chris@309 368 m_chromaOutputNo = list.size();
Chris@309 369 list.push_back(d);
Chris@309 370
Chris@302 371 d.identifier = "templates";
Chris@302 372 d.name = "Templates";
Chris@302 373 d.description = "Constant-Q spectral templates for the selected instrument pack.";
Chris@302 374 d.unit = "";
Chris@302 375 d.hasFixedBinCount = true;
Chris@302 376 d.binCount = getPack(0).templateHeight;
Chris@302 377 d.binNames.clear();
Chris@302 378 if (m_cq) {
Chris@302 379 char name[50];
Chris@302 380 for (int i = 0; i < getPack(0).templateHeight; ++i) {
Chris@302 381 // We have a 600-bin (10 oct 60-bin CQ) of which the
Chris@302 382 // lowest-frequency 55 bins have been dropped, for a
Chris@302 383 // 545-bin template. The native CQ bins go high->low
Chris@302 384 // frequency though, so these are still the first 545 bins
Chris@302 385 // as reported by getBinFrequency, though in reverse order
Chris@302 386 float freq = m_cq->getBinFrequency
Chris@302 387 (getPack(0).templateHeight - i - 1);
Chris@302 388 sprintf(name, "%.1f Hz", freq);
Chris@302 389 d.binNames.push_back(name);
Chris@302 390 }
Chris@302 391 }
Chris@302 392 d.hasKnownExtents = false;
Chris@302 393 d.isQuantized = false;
Chris@302 394 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@302 395 d.sampleRate = m_colsPerSec;
Chris@302 396 d.hasDuration = false;
Chris@302 397 m_templateOutputNo = list.size();
Chris@302 398 list.push_back(d);
Chris@302 399
Chris@31 400 return list;
Chris@31 401 }
Chris@31 402
Chris@38 403 std::string
Chris@320 404 Silvet::getChromaName(int pitch) const
Chris@38 405 {
Chris@38 406 static const char *names[] = {
Chris@38 407 "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"
Chris@38 408 };
Chris@38 409
Chris@309 410 return names[pitch];
Chris@309 411 }
Chris@309 412
Chris@309 413 std::string
Chris@336 414 Silvet::getNoteName(int note, int shift) const
Chris@309 415 {
Chris@320 416 string n = getChromaName(note % 12);
Chris@38 417
Chris@175 418 int oct = (note + 9) / 12;
Chris@38 419
Chris@175 420 char buf[30];
Chris@175 421
Chris@175 422 float pshift = 0.f;
Chris@336 423 int shiftCount = getShiftCount();
Chris@175 424 if (shiftCount > 1) {
Chris@320 425 // see getNoteFrequency below
Chris@175 426 pshift =
Chris@175 427 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
Chris@175 428 }
Chris@175 429
Chris@175 430 if (pshift > 0.f) {
Chris@309 431 sprintf(buf, "%s%d+%dc", n.c_str(), oct, int(round(pshift * 100)));
Chris@175 432 } else if (pshift < 0.f) {
Chris@309 433 sprintf(buf, "%s%d-%dc", n.c_str(), oct, int(round((-pshift) * 100)));
Chris@175 434 } else {
Chris@309 435 sprintf(buf, "%s%d", n.c_str(), oct);
Chris@175 436 }
Chris@38 437
Chris@38 438 return buf;
Chris@38 439 }
Chris@38 440
Chris@41 441 float
Chris@336 442 Silvet::getNoteFrequency(int note, int shift) const
Chris@41 443 {
Chris@169 444 // Convert shift number to a pitch shift. The given shift number
Chris@169 445 // is an offset into the template array, which starts with some
Chris@169 446 // zeros, followed by the template, then some trailing zeros.
Chris@169 447 //
Chris@169 448 // Example: if we have templateMaxShift == 2 and thus shiftCount
Chris@169 449 // == 5, then the number will be in the range 0-4 and the template
Chris@169 450 // will have 2 zeros at either end. Thus number 2 represents the
Chris@169 451 // template "as recorded", for a pitch shift of 0; smaller indices
Chris@169 452 // represent moving the template *up* in pitch (by introducing
Chris@169 453 // zeros at the start, which is the low-frequency end), for a
Chris@169 454 // positive pitch shift; and higher values represent moving it
Chris@169 455 // down in pitch, for a negative pitch shift.
Chris@169 456
Chris@175 457 float pshift = 0.f;
Chris@336 458 int shiftCount = getShiftCount();
Chris@175 459 if (shiftCount > 1) {
Chris@175 460 pshift =
Chris@175 461 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
Chris@175 462 }
Chris@169 463
Chris@301 464 float freq = float(27.5 * pow(2.0, (note + pshift) / 12.0));
Chris@301 465
Chris@303 466 // cerr << "note = " << note << ", shift = " << shift << ", shiftCount = "
Chris@303 467 // << shiftCount << ", obtained freq = " << freq << endl;
Chris@301 468
Chris@301 469 return freq;
Chris@41 470 }
Chris@41 471
Chris@31 472 bool
Chris@31 473 Silvet::initialise(size_t channels, size_t stepSize, size_t blockSize)
Chris@31 474 {
Chris@272 475 if (m_inputSampleRate < minInputSampleRate ||
Chris@272 476 m_inputSampleRate > maxInputSampleRate) {
Chris@272 477 cerr << "Silvet::initialise: Unsupported input sample rate "
Chris@272 478 << m_inputSampleRate << " (supported min " << minInputSampleRate
Chris@272 479 << ", max " << maxInputSampleRate << ")" << endl;
Chris@272 480 return false;
Chris@272 481 }
Chris@272 482
Chris@31 483 if (channels < getMinChannelCount() ||
Chris@272 484 channels > getMaxChannelCount()) {
Chris@272 485 cerr << "Silvet::initialise: Unsupported channel count " << channels
Chris@272 486 << " (supported min " << getMinChannelCount() << ", max "
Chris@272 487 << getMaxChannelCount() << ")" << endl;
Chris@272 488 return false;
Chris@272 489 }
Chris@31 490
Chris@31 491 if (stepSize != blockSize) {
Chris@31 492 cerr << "Silvet::initialise: Step size must be the same as block size ("
Chris@31 493 << stepSize << " != " << blockSize << ")" << endl;
Chris@31 494 return false;
Chris@31 495 }
Chris@31 496
Chris@31 497 m_blockSize = blockSize;
Chris@31 498
Chris@31 499 reset();
Chris@31 500
Chris@31 501 return true;
Chris@31 502 }
Chris@31 503
Chris@31 504 void
Chris@31 505 Silvet::reset()
Chris@31 506 {
Chris@31 507 delete m_resampler;
Chris@246 508 delete m_flattener;
Chris@31 509 delete m_cq;
Chris@31 510
Chris@31 511 if (m_inputSampleRate != processingSampleRate) {
Chris@31 512 m_resampler = new Resampler(m_inputSampleRate, processingSampleRate);
Chris@31 513 } else {
Chris@31 514 m_resampler = 0;
Chris@31 515 }
Chris@31 516
Chris@246 517 m_flattener = new FlattenDynamics(m_inputSampleRate); // before resampling
Chris@246 518 m_flattener->reset();
Chris@246 519
Chris@301 520 // this happens to be processingSampleRate / 3, and is the top
Chris@301 521 // freq used for the EM templates:
Chris@301 522 double maxFreq = 14700;
Chris@301 523
Chris@301 524 if (m_mode == LiveMode) {
Chris@301 525 // We only have 12 bpo rather than 60, so we need the top bin
Chris@301 526 // to be the middle one of the top 5, i.e. 2/5 of a semitone
Chris@301 527 // lower than 14700
Chris@301 528 maxFreq *= powf(2.0, -1.0 / 30.0);
Chris@301 529 }
Chris@301 530
Chris@173 531 double minFreq = 27.5;
Chris@173 532
Chris@297 533 if (m_mode != HighQualityMode) {
Chris@173 534 // We don't actually return any notes from the bottom octave,
Chris@173 535 // so we can just pad with zeros
Chris@173 536 minFreq *= 2;
Chris@173 537 }
Chris@173 538
Chris@298 539 int bpo = 12 *
Chris@298 540 (m_mode == LiveMode ? binsPerSemitoneLive : binsPerSemitoneNormal);
Chris@301 541
Chris@154 542 CQParameters params(processingSampleRate,
Chris@173 543 minFreq,
Chris@303 544 maxFreq,
Chris@298 545 bpo);
Chris@154 546
Chris@325 547 params.q = 0.8;
Chris@325 548 params.atomHopFactor = (m_mode == LiveMode ? 1.0 : 0.3);
Chris@154 549 params.threshold = 0.0005;
Chris@317 550 params.decimator =
Chris@317 551 (m_mode == LiveMode ?
Chris@317 552 CQParameters::FasterDecimator : CQParameters::BetterDecimator);
Chris@172 553 params.window = CQParameters::Hann;
Chris@154 554
Chris@154 555 m_cq = new CQSpectrogram(params, CQSpectrogram::InterpolateLinear);
Chris@31 556
Chris@303 557 // cerr << "CQ bins = " << m_cq->getTotalBins() << endl;
Chris@303 558 // cerr << "CQ min freq = " << m_cq->getMinFrequency() << " (and for confirmation, freq of bin 0 = " << m_cq->getBinFrequency(0) << ")" << endl;
Chris@297 559
Chris@297 560 m_colsPerSec = (m_mode == DraftMode ? 25 : 50);
Chris@165 561
Chris@41 562 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
Chris@41 563 delete m_postFilter[i];
Chris@41 564 }
Chris@41 565 m_postFilter.clear();
Chris@303 566 int postFilterLength = 3;
Chris@298 567 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
Chris@303 568 m_postFilter.push_back(new MedianFilter<double>(postFilterLength));
Chris@41 569 }
Chris@41 570 m_pianoRoll.clear();
Chris@246 571 m_inputGains.clear();
Chris@337 572 m_simultaneity = Simultaneity();
Chris@32 573 m_columnCount = 0;
Chris@272 574 m_resampledCount = 0;
Chris@40 575 m_startTime = RealTime::zeroTime;
Chris@313 576 m_haveStartTime = false;
Chris@31 577 }
Chris@31 578
Chris@31 579 Silvet::FeatureSet
Chris@31 580 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
Chris@31 581 {
Chris@302 582 FeatureSet fs;
Chris@302 583
Chris@313 584 if (!m_haveStartTime) {
Chris@314 585
Chris@40 586 m_startTime = timestamp;
Chris@313 587 m_haveStartTime = true;
Chris@314 588
Chris@302 589 insertTemplateFeatures(fs);
Chris@40 590 }
Chris@246 591
Chris@246 592 vector<float> flattened(m_blockSize);
Chris@246 593 float gain = 1.f;
Chris@246 594 m_flattener->connectInputPort
Chris@246 595 (FlattenDynamics::AudioInputPort, inputBuffers[0]);
Chris@246 596 m_flattener->connectOutputPort
Chris@246 597 (FlattenDynamics::AudioOutputPort, &flattened[0]);
Chris@246 598 m_flattener->connectOutputPort
Chris@246 599 (FlattenDynamics::GainOutputPort, &gain);
Chris@246 600 m_flattener->process(m_blockSize);
Chris@246 601
Chris@252 602 m_inputGains[timestamp] = gain;
Chris@40 603
Chris@31 604 vector<double> data;
Chris@40 605 for (int i = 0; i < m_blockSize; ++i) {
Chris@246 606 double d = flattened[i];
Chris@235 607 data.push_back(d);
Chris@40 608 }
Chris@31 609
Chris@31 610 if (m_resampler) {
Chris@272 611
Chris@31 612 data = m_resampler->process(data.data(), data.size());
Chris@272 613
Chris@272 614 int hadCount = m_resampledCount;
Chris@272 615 m_resampledCount += data.size();
Chris@272 616
Chris@272 617 int resamplerLatency = m_resampler->getLatency();
Chris@272 618
Chris@272 619 if (hadCount < resamplerLatency) {
Chris@272 620 int stillToDrop = resamplerLatency - hadCount;
Chris@272 621 if (stillToDrop >= int(data.size())) {
Chris@302 622 return fs;
Chris@272 623 } else {
Chris@272 624 data = vector<double>(data.begin() + stillToDrop, data.end());
Chris@272 625 }
Chris@272 626 }
Chris@31 627 }
Chris@272 628
Chris@32 629 Grid cqout = m_cq->process(data);
Chris@302 630 transcribe(cqout, fs);
Chris@51 631 return fs;
Chris@34 632 }
Chris@34 633
Chris@34 634 Silvet::FeatureSet
Chris@34 635 Silvet::getRemainingFeatures()
Chris@34 636 {
Chris@145 637 Grid cqout = m_cq->getRemainingOutput();
Chris@302 638 FeatureSet fs;
Chris@336 639
Chris@302 640 if (m_columnCount == 0) {
Chris@302 641 // process() was never called, but we still want these
Chris@302 642 insertTemplateFeatures(fs);
Chris@302 643 } else {
Chris@336 644
Chris@336 645 // Complete the transcription
Chris@336 646
Chris@302 647 transcribe(cqout, fs);
Chris@336 648
Chris@336 649 // And make sure any extant playing notes are finished and returned
Chris@336 650
Chris@336 651 m_pianoRoll.push_back({});
Chris@336 652
Chris@336 653 auto events = noteTrack();
Chris@336 654
Chris@336 655 for (const auto &f : events.notes) {
Chris@336 656 fs[m_notesOutputNo].push_back(f);
Chris@336 657 }
Chris@336 658
Chris@336 659 for (const auto &f : events.onsets) {
Chris@336 660 fs[m_onsetsOutputNo].push_back(f);
Chris@336 661 }
Chris@336 662
Chris@336 663 for (const auto &f : events.onOffsets) {
Chris@336 664 fs[m_onOffsetsOutputNo].push_back(f);
Chris@336 665 }
Chris@337 666
Chris@337 667 for (const auto &f : events.simultaneities) {
Chris@337 668 fs[m_simultaneitiesOutputNo].push_back(f);
Chris@337 669 }
Chris@302 670 }
Chris@336 671
Chris@51 672 return fs;
Chris@34 673 }
Chris@34 674
Chris@302 675 void
Chris@302 676 Silvet::insertTemplateFeatures(FeatureSet &fs)
Chris@302 677 {
Chris@302 678 const InstrumentPack &pack = getPack(m_instrument);
Chris@302 679 for (int i = 0; i < int(pack.templates.size()) * pack.templateNoteCount; ++i) {
Chris@302 680 RealTime timestamp = RealTime::fromSeconds(double(i) / m_colsPerSec);
Chris@302 681 Feature f;
Chris@302 682 char buffer[50];
Chris@302 683 sprintf(buffer, "Note %d", i + 1);
Chris@302 684 f.label = buffer;
Chris@302 685 f.hasTimestamp = true;
Chris@302 686 f.timestamp = timestamp;
Chris@302 687 f.values = pack.templates[i / pack.templateNoteCount]
Chris@302 688 .data[i % pack.templateNoteCount];
Chris@302 689 fs[m_templateOutputNo].push_back(f);
Chris@302 690 }
Chris@302 691 }
Chris@302 692
Chris@336 693 int
Chris@336 694 Silvet::getShiftCount() const
Chris@336 695 {
Chris@336 696 bool wantShifts = (m_mode == HighQualityMode) && m_fineTuning;
Chris@336 697 int shiftCount = 1;
Chris@336 698 if (wantShifts) {
Chris@336 699 const InstrumentPack &pack(getPack(m_instrument));
Chris@336 700 shiftCount = pack.templateMaxShift * 2 + 1;
Chris@336 701 }
Chris@336 702 return shiftCount;
Chris@336 703 }
Chris@336 704
Chris@302 705 void
Chris@302 706 Silvet::transcribe(const Grid &cqout, Silvet::FeatureSet &fs)
Chris@34 707 {
Chris@32 708 Grid filtered = preProcess(cqout);
Chris@31 709
Chris@302 710 if (filtered.empty()) return;
Chris@170 711
Chris@298 712 const InstrumentPack &pack(getPack(m_instrument));
Chris@104 713
Chris@325 714 int width = filtered.size();
Chris@325 715
Chris@325 716 double silenceThreshold = 0.01;
Chris@325 717
Chris@325 718 for (int i = 0; i < width; ++i) {
Chris@325 719
Chris@325 720 RealTime timestamp = getColumnTimestamp(m_pianoRoll.size() - 1 + i);
Chris@325 721 float inputGain = getInputGainAt(timestamp);
Chris@325 722
Chris@178 723 Feature f;
Chris@325 724 double rms = 0.0;
Chris@325 725
Chris@178 726 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@325 727 double v = filtered[i][j];
Chris@325 728 rms += v * v;
Chris@325 729 f.values.push_back(float(v));
Chris@178 730 }
Chris@325 731
Chris@325 732 rms = sqrt(rms / pack.templateHeight);
Chris@325 733 if (rms / inputGain < silenceThreshold) {
Chris@325 734 filtered[i].clear();
Chris@325 735 }
Chris@325 736
Chris@178 737 fs[m_fcqOutputNo].push_back(f);
Chris@178 738 }
Chris@325 739
Chris@311 740 Grid localPitches(width);
Chris@170 741
Chris@336 742 int shiftCount = getShiftCount();
Chris@336 743 bool wantShifts = (shiftCount > 1);
Chris@170 744
Chris@170 745 vector<vector<int> > localBestShifts;
Chris@170 746 if (wantShifts) {
Chris@311 747 localBestShifts = vector<vector<int> >(width);
Chris@170 748 }
Chris@170 749
Chris@312 750 #ifndef MAX_EM_THREADS
Chris@312 751 #define MAX_EM_THREADS 8
Chris@312 752 #endif
Chris@312 753
Chris@317 754 int emThreadCount = MAX_EM_THREADS;
Chris@317 755 if (m_mode == LiveMode && pack.templates.size() == 1) {
Chris@317 756 // The EM step is probably not slow enough to merit it
Chris@317 757 emThreadCount = 1;
Chris@317 758 }
Chris@317 759
Chris@312 760 #if (defined(MAX_EM_THREADS) && (MAX_EM_THREADS > 1))
Chris@317 761 if (emThreadCount > 1) {
Chris@317 762 for (int i = 0; i < width; ) {
Chris@317 763 typedef future<pair<vector<double>, vector<int>>> EMFuture;
Chris@317 764 vector<EMFuture> results;
Chris@317 765 for (int j = 0; j < emThreadCount && i + j < width; ++j) {
Chris@317 766 results.push_back
Chris@317 767 (async(std::launch::async,
Chris@317 768 [&](int index) {
Chris@336 769 return applyEM(pack, filtered.at(index));
Chris@317 770 }, i + j));
Chris@317 771 }
Chris@317 772 for (int j = 0; j < emThreadCount && i + j < width; ++j) {
Chris@317 773 auto out = results[j].get();
Chris@317 774 localPitches[i+j] = out.first;
Chris@317 775 if (wantShifts) localBestShifts[i+j] = out.second;
Chris@317 776 }
Chris@317 777 i += emThreadCount;
Chris@312 778 }
Chris@123 779 }
Chris@312 780 #endif
Chris@317 781
Chris@317 782 if (emThreadCount == 1) {
Chris@317 783 for (int i = 0; i < width; ++i) {
Chris@336 784 auto out = applyEM(pack, filtered.at(i));
Chris@317 785 localPitches[i] = out.first;
Chris@317 786 if (wantShifts) localBestShifts[i] = out.second;
Chris@317 787 }
Chris@317 788 }
Chris@305 789
Chris@166 790 for (int i = 0; i < width; ++i) {
Chris@37 791
Chris@321 792 vector<double> filtered;
Chris@321 793
Chris@321 794 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@321 795 m_postFilter[j]->push(localPitches[i][j]);
Chris@321 796 filtered.push_back(m_postFilter[j]->get());
Chris@321 797 }
Chris@294 798
Chris@309 799 RealTime timestamp = getColumnTimestamp(m_pianoRoll.size() - 1);
Chris@309 800 float inputGain = getInputGainAt(timestamp);
Chris@309 801
Chris@294 802 Feature f;
Chris@294 803 for (int j = 0; j < (int)filtered.size(); ++j) {
Chris@309 804 float v = filtered[j];
Chris@294 805 if (v < pack.levelThreshold) v = 0.f;
Chris@309 806 f.values.push_back(v / inputGain);
Chris@294 807 }
Chris@294 808 fs[m_pitchOutputNo].push_back(f);
Chris@309 809
Chris@309 810 f.values.clear();
Chris@309 811 f.values.resize(12);
Chris@309 812 for (int j = 0; j < (int)filtered.size(); ++j) {
Chris@309 813 f.values[j % 12] += filtered[j] / inputGain;
Chris@309 814 }
Chris@309 815 fs[m_chromaOutputNo].push_back(f);
Chris@38 816
Chris@321 817 // This pushes the up-to-max-polyphony activation column to
Chris@321 818 // m_pianoRoll
Chris@336 819 postProcess(filtered, localBestShifts[i]);
Chris@321 820
Chris@336 821 auto events = noteTrack();
Chris@319 822
Chris@336 823 for (const auto &f : events.notes) {
Chris@336 824 fs[m_notesOutputNo].push_back(f);
Chris@40 825 }
Chris@319 826
Chris@336 827 for (const auto &f : events.onsets) {
Chris@336 828 fs[m_onsetsOutputNo].push_back(f);
Chris@336 829 }
Chris@336 830
Chris@336 831 for (const auto &f : events.onOffsets) {
Chris@336 832 fs[m_onOffsetsOutputNo].push_back(f);
Chris@319 833 }
Chris@337 834
Chris@337 835 for (const auto &f : events.simultaneities) {
Chris@337 836 fs[m_simultaneitiesOutputNo].push_back(f);
Chris@337 837 }
Chris@34 838 }
Chris@31 839 }
Chris@31 840
Chris@311 841 pair<vector<double>, vector<int> >
Chris@311 842 Silvet::applyEM(const InstrumentPack &pack,
Chris@336 843 const vector<double> &column)
Chris@311 844 {
Chris@311 845 double columnThreshold = 1e-5;
Chris@311 846
Chris@314 847 if (m_mode == LiveMode) {
Chris@325 848 columnThreshold /= 15;
Chris@314 849 }
Chris@314 850
Chris@311 851 vector<double> pitches(pack.templateNoteCount, 0.0);
Chris@311 852 vector<int> bestShifts;
Chris@325 853
Chris@325 854 if (column.empty()) return { pitches, bestShifts };
Chris@311 855
Chris@311 856 double sum = 0.0;
Chris@311 857 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@311 858 sum += column.at(j);
Chris@311 859 }
Chris@311 860 if (sum < columnThreshold) return { pitches, bestShifts };
Chris@311 861
Chris@314 862 EM em(&pack, m_mode == HighQualityMode);
Chris@311 863
Chris@311 864 em.setPitchSparsity(pack.pitchSparsity);
Chris@311 865 em.setSourceSparsity(pack.sourceSparsity);
Chris@311 866
Chris@314 867 int iterations = (m_mode == HighQualityMode ? 20 : 10);
Chris@311 868
Chris@311 869 for (int j = 0; j < iterations; ++j) {
Chris@311 870 em.iterate(column.data());
Chris@311 871 }
Chris@311 872
Chris@311 873 const float *pitchDist = em.getPitchDistribution();
Chris@311 874 const float *const *shiftDist = em.getShifts();
Chris@311 875
Chris@336 876 int shiftCount = getShiftCount();
Chris@311 877
Chris@311 878 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@311 879
Chris@311 880 pitches[j] = pitchDist[j] * sum;
Chris@311 881
Chris@311 882 int bestShift = 0;
Chris@311 883 float bestShiftValue = 0.0;
Chris@336 884 if (shiftCount > 1) {
Chris@311 885 for (int k = 0; k < shiftCount; ++k) {
Chris@311 886 float value = shiftDist[k][j];
Chris@311 887 if (k == 0 || value > bestShiftValue) {
Chris@311 888 bestShiftValue = value;
Chris@311 889 bestShift = k;
Chris@311 890 }
Chris@311 891 }
Chris@311 892 bestShifts.push_back(bestShift);
Chris@311 893 }
Chris@311 894 }
Chris@311 895
Chris@311 896 return { pitches, bestShifts };
Chris@311 897 }
Chris@311 898
Chris@32 899 Silvet::Grid
Chris@32 900 Silvet::preProcess(const Grid &in)
Chris@32 901 {
Chris@32 902 int width = in.size();
Chris@32 903
Chris@165 904 int spacing = processingSampleRate / m_colsPerSec;
Chris@32 905
Chris@165 906 // need to be careful that col spacing is an integer number of samples!
Chris@165 907 assert(spacing * m_colsPerSec == processingSampleRate);
Chris@32 908
Chris@32 909 Grid out;
Chris@32 910
Chris@58 911 // We count the CQ latency in terms of processing hops, but
Chris@58 912 // actually it probably isn't an exact number of hops so this
Chris@58 913 // isn't quite accurate. But the small constant offset is
Chris@165 914 // practically irrelevant compared to the jitter from the frame
Chris@165 915 // size we reduce to in a moment
Chris@33 916 int latentColumns = m_cq->getLatency() / m_cq->getColumnHop();
Chris@33 917
Chris@298 918 const InstrumentPack &pack(getPack(m_instrument));
Chris@176 919
Chris@32 920 for (int i = 0; i < width; ++i) {
Chris@32 921
Chris@33 922 if (m_columnCount < latentColumns) {
Chris@33 923 ++m_columnCount;
Chris@33 924 continue;
Chris@33 925 }
Chris@33 926
Chris@32 927 int prevSampleNo = (m_columnCount - 1) * m_cq->getColumnHop();
Chris@32 928 int sampleNo = m_columnCount * m_cq->getColumnHop();
Chris@32 929
Chris@32 930 bool select = (sampleNo / spacing != prevSampleNo / spacing);
Chris@32 931
Chris@32 932 if (select) {
Chris@32 933 vector<double> inCol = in[i];
Chris@176 934 vector<double> outCol(pack.templateHeight);
Chris@32 935
Chris@178 936 // In HQ mode, the CQ returns 600 bins and we ignore the
Chris@298 937 // lowest 55 of them (assuming binsPerSemitone == 5).
Chris@178 938 //
Chris@297 939 // In draft and live mode the CQ is an octave shorter,
Chris@300 940 // returning 540 bins or equivalent, so we instead pad
Chris@300 941 // them with an additional 5 or equivalent zeros.
Chris@178 942 //
Chris@178 943 // We also need to reverse the column as we go, since the
Chris@178 944 // raw CQ has the high frequencies first and we need it
Chris@178 945 // the other way around.
Chris@32 946
Chris@298 947 int bps = (m_mode == LiveMode ?
Chris@298 948 binsPerSemitoneLive : binsPerSemitoneNormal);
Chris@298 949
Chris@297 950 if (m_mode == HighQualityMode) {
Chris@178 951 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@298 952 int ix = inCol.size() - j - (11 * bps);
Chris@178 953 outCol[j] = inCol[ix];
Chris@178 954 }
Chris@178 955 } else {
Chris@298 956 for (int j = 0; j < bps; ++j) {
Chris@178 957 outCol[j] = 0.0;
Chris@178 958 }
Chris@298 959 for (int j = bps; j < pack.templateHeight; ++j) {
Chris@298 960 int ix = inCol.size() - j + (bps-1);
Chris@178 961 outCol[j] = inCol[ix];
Chris@178 962 }
Chris@46 963 }
Chris@32 964
Chris@46 965 vector<double> noiseLevel1 =
Chris@298 966 MedianFilter<double>::filter(8 * bps, outCol);
Chris@176 967 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@46 968 noiseLevel1[j] = std::min(outCol[j], noiseLevel1[j]);
Chris@46 969 }
Chris@32 970
Chris@46 971 vector<double> noiseLevel2 =
Chris@298 972 MedianFilter<double>::filter(8 * bps, noiseLevel1);
Chris@176 973 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@46 974 outCol[j] = std::max(outCol[j] - noiseLevel2[j], 0.0);
Chris@32 975 }
Chris@32 976
Chris@165 977 out.push_back(outCol);
Chris@32 978 }
Chris@32 979
Chris@32 980 ++m_columnCount;
Chris@32 981 }
Chris@32 982
Chris@32 983 return out;
Chris@32 984 }
Chris@32 985
Chris@321 986 void
Chris@170 987 Silvet::postProcess(const vector<double> &pitches,
Chris@336 988 const vector<int> &bestShifts)
Chris@166 989 {
Chris@298 990 const InstrumentPack &pack(getPack(m_instrument));
Chris@176 991
Chris@41 992 // Threshold for level and reduce number of candidate pitches
Chris@41 993
Chris@41 994 typedef std::multimap<double, int> ValueIndexMap;
Chris@41 995
Chris@41 996 ValueIndexMap strengths;
Chris@166 997
Chris@176 998 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@321 999
Chris@321 1000 double strength = pitches[j];
Chris@183 1001 if (strength < pack.levelThreshold) continue;
Chris@321 1002
Chris@321 1003 // In live mode with only a 12-bpo CQ, we are very likely to
Chris@321 1004 // get clusters of two or three high scores at a time for
Chris@321 1005 // neighbouring semitones. Eliminate these by picking only the
Chris@325 1006 // peaks (except that we never eliminate a note that has
Chris@325 1007 // already been established as currently playing). This means
Chris@325 1008 // we can't recognise actual semitone chords if they ever
Chris@325 1009 // appear, but it's not as if live mode is good enough for
Chris@325 1010 // that to be a big deal anyway.
Chris@321 1011 if (m_mode == LiveMode) {
Chris@325 1012 if (m_current.find(j) == m_current.end() &&
Chris@325 1013 (j == 0 ||
Chris@325 1014 j + 1 == pack.templateNoteCount ||
Chris@325 1015 pitches[j] < pitches[j-1] ||
Chris@325 1016 pitches[j] < pitches[j+1])) {
Chris@325 1017 // not a peak or a currently-playing note: skip it
Chris@321 1018 continue;
Chris@321 1019 }
Chris@321 1020 }
Chris@323 1021
Chris@168 1022 strengths.insert(ValueIndexMap::value_type(strength, j));
Chris@168 1023 }
Chris@166 1024
Chris@168 1025 ValueIndexMap::const_iterator si = strengths.end();
Chris@167 1026
Chris@168 1027 map<int, double> active;
Chris@168 1028 map<int, int> activeShifts;
Chris@168 1029
Chris@336 1030 int shiftCount = getShiftCount();
Chris@336 1031
Chris@183 1032 while (int(active.size()) < pack.maxPolyphony && si != strengths.begin()) {
Chris@168 1033
Chris@168 1034 --si;
Chris@168 1035
Chris@168 1036 double strength = si->first;
Chris@168 1037 int j = si->second;
Chris@168 1038
Chris@168 1039 active[j] = strength;
Chris@168 1040
Chris@336 1041 if (shiftCount > 1) {
Chris@170 1042 activeShifts[j] = bestShifts[j];
Chris@167 1043 }
Chris@41 1044 }
Chris@41 1045
Chris@168 1046 m_pianoRoll.push_back(active);
Chris@170 1047
Chris@336 1048 if (shiftCount > 1) {
Chris@168 1049 m_pianoRollShifts.push_back(activeShifts);
Chris@41 1050 }
Chris@294 1051
Chris@321 1052 return;
Chris@166 1053 }
Chris@166 1054
Chris@336 1055 Silvet::FeatureChunk
Chris@336 1056 Silvet::noteTrack()
Chris@166 1057 {
Chris@41 1058 // Minimum duration pruning, and conversion to notes. We can only
Chris@41 1059 // report notes that have just ended (i.e. that are absent in the
Chris@168 1060 // latest active set but present in the prior set in the piano
Chris@41 1061 // roll) -- any notes that ended earlier will have been reported
Chris@41 1062 // already, and if they haven't ended, we don't know their
Chris@41 1063 // duration.
Chris@41 1064
Chris@168 1065 int width = m_pianoRoll.size() - 1;
Chris@168 1066
Chris@168 1067 const map<int, double> &active = m_pianoRoll[width];
Chris@41 1068
Chris@165 1069 double columnDuration = 1.0 / m_colsPerSec;
Chris@165 1070
Chris@165 1071 // only keep notes >= 100ms or thereabouts
Chris@323 1072 double durationThrSec = 0.1;
Chris@323 1073 int durationThreshold = floor(durationThrSec / columnDuration); // in cols
Chris@165 1074 if (durationThreshold < 1) durationThreshold = 1;
Chris@41 1075
Chris@337 1076 FeatureList noteFeatures, onsetFeatures, onOffsetFeatures, simultaneousFeatures;
Chris@41 1077
Chris@41 1078 if (width < durationThreshold + 1) {
Chris@337 1079 return { noteFeatures, onsetFeatures, onOffsetFeatures, simultaneousFeatures };
Chris@41 1080 }
Chris@41 1081
Chris@55 1082 for (map<int, double>::const_iterator ni = m_pianoRoll[width-1].begin();
Chris@41 1083 ni != m_pianoRoll[width-1].end(); ++ni) {
Chris@41 1084
Chris@55 1085 int note = ni->first;
Chris@41 1086
Chris@41 1087 int end = width;
Chris@41 1088 int start = end-1;
Chris@41 1089
Chris@41 1090 while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) {
Chris@41 1091 --start;
Chris@41 1092 }
Chris@41 1093 ++start;
Chris@41 1094
Chris@319 1095 int duration = end - start;
Chris@319 1096
Chris@319 1097 if (duration < durationThreshold) {
Chris@41 1098 continue;
Chris@41 1099 }
Chris@41 1100
Chris@319 1101 if (duration == durationThreshold) {
Chris@325 1102 m_current.insert(note);
Chris@336 1103 emitOnset(start, note, onsetFeatures);
Chris@336 1104 emitOnset(start, note, onOffsetFeatures);
Chris@319 1105 }
Chris@319 1106
Chris@319 1107 if (active.find(note) == active.end()) {
Chris@319 1108 // the note was playing but just ended
Chris@325 1109 m_current.erase(note);
Chris@336 1110 emitNote(start, end, note, noteFeatures);
Chris@336 1111 emitOffset(start, end, note, onOffsetFeatures);
Chris@334 1112 } else { // still playing
Chris@334 1113 // repeated note detection: if level is greater than this
Chris@334 1114 // multiple of its previous value, then we end the note and
Chris@334 1115 // restart it with the same pitch
Chris@334 1116 double restartFactor = 1.5;
Chris@334 1117 if (duration >= durationThreshold * 2 &&
Chris@334 1118 (active.find(note)->second >
Chris@334 1119 restartFactor * m_pianoRoll[width-1][note])) {
Chris@334 1120 m_current.erase(note);
Chris@336 1121 emitNote(start, end-1, note, noteFeatures);
Chris@336 1122 emitOffset(start, end-1, note, onOffsetFeatures);
Chris@334 1123 // and remove this so that we start counting the new
Chris@334 1124 // note's duration from the current position
Chris@334 1125 m_pianoRoll[width-1].erase(note);
Chris@334 1126 }
Chris@319 1127 }
Chris@41 1128 }
Chris@41 1129
Chris@62 1130 // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl;
Chris@41 1131
Chris@337 1132 return { noteFeatures, onsetFeatures, onOffsetFeatures, simultaneousFeatures };
Chris@41 1133 }
Chris@41 1134
Chris@169 1135 void
Chris@336 1136 Silvet::emitNote(int start, int end, int note, FeatureList &noteFeatures)
Chris@169 1137 {
Chris@169 1138 int partStart = start;
Chris@169 1139 int partShift = 0;
Chris@320 1140 double partStrength = 0;
Chris@169 1141
Chris@252 1142 int partThreshold = floor(0.05 * m_colsPerSec);
Chris@169 1143
Chris@169 1144 for (int i = start; i != end; ++i) {
Chris@169 1145
Chris@169 1146 double strength = m_pianoRoll[i][note];
Chris@169 1147
Chris@169 1148 int shift = 0;
Chris@169 1149
Chris@336 1150 if (getShiftCount() > 1) {
Chris@169 1151
Chris@169 1152 shift = m_pianoRollShifts[i][note];
Chris@169 1153
Chris@169 1154 if (i == partStart) {
Chris@169 1155 partShift = shift;
Chris@169 1156 }
Chris@169 1157
Chris@169 1158 if (i > partStart + partThreshold && shift != partShift) {
Chris@169 1159
Chris@169 1160 // cerr << "i = " << i << ", partStart = " << partStart << ", shift = " << shift << ", partShift = " << partShift << endl;
Chris@169 1161
Chris@169 1162 // pitch has changed, emit an intermediate note
Chris@252 1163 noteFeatures.push_back(makeNoteFeature(partStart,
Chris@252 1164 i,
Chris@252 1165 note,
Chris@252 1166 partShift,
Chris@320 1167 partStrength));
Chris@169 1168 partStart = i;
Chris@169 1169 partShift = shift;
Chris@320 1170 partStrength = 0;
Chris@169 1171 }
Chris@169 1172 }
Chris@169 1173
Chris@320 1174 if (strength > partStrength) {
Chris@320 1175 partStrength = strength;
Chris@169 1176 }
Chris@169 1177 }
Chris@169 1178
Chris@169 1179 if (end >= partStart + partThreshold) {
Chris@252 1180 noteFeatures.push_back(makeNoteFeature(partStart,
Chris@252 1181 end,
Chris@252 1182 note,
Chris@252 1183 partShift,
Chris@320 1184 partStrength));
Chris@169 1185 }
Chris@169 1186 }
Chris@252 1187
Chris@319 1188 void
Chris@336 1189 Silvet::emitOnset(int start, int note, FeatureList &onOffsetFeatures)
Chris@319 1190 {
Chris@319 1191 int len = int(m_pianoRoll.size());
Chris@320 1192
Chris@320 1193 double onsetStrength = 0;
Chris@319 1194
Chris@319 1195 int shift = 0;
Chris@336 1196 if (getShiftCount() > 1) {
Chris@319 1197 shift = m_pianoRollShifts[start][note];
Chris@319 1198 }
Chris@319 1199
Chris@319 1200 for (int i = start; i < len; ++i) {
Chris@319 1201 double strength = m_pianoRoll[i][note];
Chris@320 1202 if (strength > onsetStrength) {
Chris@320 1203 onsetStrength = strength;
Chris@319 1204 }
Chris@319 1205 }
Chris@319 1206
Chris@336 1207 if (onsetStrength == 0) return;
Chris@336 1208
Chris@336 1209 onOffsetFeatures.push_back(makeOnsetFeature(start,
Chris@336 1210 note,
Chris@336 1211 shift,
Chris@336 1212 onsetStrength));
Chris@336 1213 }
Chris@336 1214
Chris@336 1215 void
Chris@336 1216 Silvet::emitOffset(int start, int end, int note, FeatureList &onOffsetFeatures)
Chris@336 1217 {
Chris@336 1218 int shift = 0;
Chris@336 1219 if (getShiftCount() > 1) {
Chris@336 1220 shift = m_pianoRollShifts[start][note];
Chris@336 1221 }
Chris@336 1222
Chris@336 1223 onOffsetFeatures.push_back(makeOffsetFeature(end,
Chris@336 1224 note,
Chris@336 1225 shift));
Chris@319 1226 }
Chris@319 1227
Chris@337 1228 void
Chris@337 1229 Silvet::emitSimultaneity(int start, FeatureList &simultaneousFeatures)
Chris@337 1230 {
Chris@337 1231 Feature f;
Chris@337 1232 f.hasTimestamp = true;
Chris@337 1233 f.timestamp = getColumnTimestamp(start);
Chris@337 1234 f.hasDuration = false;
Chris@337 1235 f.values.clear();
Chris@337 1236 for (auto &noteShift : m_simultaneity.notesShifts) {
Chris@337 1237 f.values.push_back(getNoteFrequency(noteShift.first, noteShift.second));
Chris@337 1238 }
Chris@337 1239 f.label = "";
Chris@337 1240 for (auto &noteShift : m_simultaneity.notesShifts) {
Chris@337 1241 if (f.label != "") f.label += ",";
Chris@337 1242 f.label += getNoteName(noteShift.first, noteShift.second);
Chris@337 1243 }
Chris@337 1244 simultaneousFeatures.push_back(f);
Chris@337 1245 }
Chris@337 1246
Chris@309 1247 RealTime
Chris@309 1248 Silvet::getColumnTimestamp(int column)
Chris@309 1249 {
Chris@309 1250 double columnDuration = 1.0 / m_colsPerSec;
Chris@309 1251 int postFilterLatency = int(m_postFilter[0]->getSize() / 2);
Chris@309 1252
Chris@309 1253 return m_startTime + RealTime::fromSeconds
Chris@309 1254 (columnDuration * (column - postFilterLatency) + 0.02);
Chris@309 1255 }
Chris@309 1256
Chris@252 1257 Silvet::Feature
Chris@252 1258 Silvet::makeNoteFeature(int start,
Chris@252 1259 int end,
Chris@252 1260 int note,
Chris@252 1261 int shift,
Chris@320 1262 double strength)
Chris@252 1263 {
Chris@252 1264 Feature f;
Chris@252 1265
Chris@252 1266 f.hasTimestamp = true;
Chris@309 1267 f.timestamp = getColumnTimestamp(start);
Chris@252 1268
Chris@252 1269 f.hasDuration = true;
Chris@309 1270 f.duration = getColumnTimestamp(end) - f.timestamp;
Chris@252 1271
Chris@252 1272 f.values.clear();
Chris@336 1273 f.values.push_back(getNoteFrequency(note, shift));
Chris@320 1274 f.values.push_back(getVelocityFor(strength, start));
Chris@252 1275
Chris@336 1276 f.label = getNoteName(note, shift);
Chris@252 1277
Chris@252 1278 return f;
Chris@252 1279 }
Chris@252 1280
Chris@319 1281 Silvet::Feature
Chris@319 1282 Silvet::makeOnsetFeature(int start,
Chris@319 1283 int note,
Chris@319 1284 int shift,
Chris@320 1285 double strength)
Chris@319 1286 {
Chris@319 1287 Feature f;
Chris@319 1288
Chris@319 1289 f.hasTimestamp = true;
Chris@319 1290 f.timestamp = getColumnTimestamp(start);
Chris@319 1291
Chris@319 1292 f.hasDuration = false;
Chris@319 1293
Chris@319 1294 f.values.clear();
Chris@336 1295 f.values.push_back(getNoteFrequency(note, shift));
Chris@320 1296 f.values.push_back(getVelocityFor(strength, start));
Chris@319 1297
Chris@336 1298 f.label = getNoteName(note, shift);
Chris@336 1299
Chris@336 1300 return f;
Chris@336 1301 }
Chris@336 1302
Chris@336 1303 Silvet::Feature
Chris@336 1304 Silvet::makeOffsetFeature(int col,
Chris@336 1305 int note,
Chris@336 1306 int shift)
Chris@336 1307 {
Chris@336 1308 Feature f;
Chris@336 1309
Chris@336 1310 f.hasTimestamp = true;
Chris@336 1311 f.timestamp = getColumnTimestamp(col);
Chris@336 1312
Chris@336 1313 f.hasDuration = false;
Chris@336 1314
Chris@336 1315 f.values.clear();
Chris@336 1316 f.values.push_back(getNoteFrequency(note, shift));
Chris@336 1317 f.values.push_back(0); // velocity 0 for offset
Chris@336 1318
Chris@336 1319 f.label = getNoteName(note, shift) + " off";
Chris@319 1320
Chris@319 1321 return f;
Chris@319 1322 }
Chris@319 1323
Chris@320 1324 int
Chris@320 1325 Silvet::getVelocityFor(double strength, int column)
Chris@320 1326 {
Chris@320 1327 RealTime rt = getColumnTimestamp(column + 1);
Chris@320 1328
Chris@320 1329 float inputGain = getInputGainAt(rt);
Chris@320 1330
Chris@320 1331 double scale = 2.0;
Chris@320 1332 if (m_mode == LiveMode) scale = 20.0;
Chris@320 1333
Chris@320 1334 double velocity = round((strength * scale) / inputGain);
Chris@320 1335
Chris@320 1336 if (velocity > 127.0) velocity = 127.0;
Chris@320 1337 if (velocity < 1.0) velocity = 1.0; // assume surpassed 0 threshold already
Chris@320 1338
Chris@320 1339 return int(velocity);
Chris@320 1340 }
Chris@320 1341
Chris@252 1342 float
Chris@252 1343 Silvet::getInputGainAt(RealTime t)
Chris@252 1344 {
Chris@252 1345 map<RealTime, float>::const_iterator i = m_inputGains.lower_bound(t);
Chris@252 1346
Chris@252 1347 if (i == m_inputGains.end()) {
Chris@252 1348 if (i != m_inputGains.begin()) {
Chris@252 1349 --i;
Chris@252 1350 } else {
Chris@252 1351 return 1.f; // no data
Chris@252 1352 }
Chris@252 1353 }
Chris@252 1354
Chris@252 1355 // cerr << "gain at time " << t << " = " << i->second << endl;
Chris@252 1356
Chris@252 1357 return i->second;
Chris@252 1358 }
Chris@252 1359