annotate src/Silvet.cpp @ 298:ebe5e0942bb8 livemode

More toward a possible live mode
author Chris Cannam
date Fri, 28 Nov 2014 10:18:22 +0000
parents d6ab1b4918bd
children ba5f3b084466
rev   line source
Chris@31 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@31 2
Chris@31 3 /*
Chris@31 4 Silvet
Chris@31 5
Chris@31 6 A Vamp plugin for note transcription.
Chris@31 7 Centre for Digital Music, Queen Mary University of London.
Chris@31 8
Chris@31 9 This program is free software; you can redistribute it and/or
Chris@31 10 modify it under the terms of the GNU General Public License as
Chris@31 11 published by the Free Software Foundation; either version 2 of the
Chris@31 12 License, or (at your option) any later version. See the file
Chris@31 13 COPYING included with this distribution for more information.
Chris@31 14 */
Chris@31 15
Chris@31 16 #include "Silvet.h"
Chris@34 17 #include "EM.h"
Chris@31 18
Chris@152 19 #include <cq/CQSpectrogram.h>
Chris@31 20
Chris@152 21 #include "MedianFilter.h"
Chris@152 22 #include "constant-q-cpp/src/dsp/Resampler.h"
Chris@246 23 #include "flattendynamics-ladspa.h"
Chris@298 24 #include "LiveInstruments.h"
Chris@31 25
Chris@31 26 #include <vector>
Chris@31 27
Chris@32 28 #include <cstdio>
Chris@32 29
Chris@31 30 using std::vector;
Chris@48 31 using std::cout;
Chris@31 32 using std::cerr;
Chris@31 33 using std::endl;
Chris@40 34 using Vamp::RealTime;
Chris@31 35
Chris@31 36 static int processingSampleRate = 44100;
Chris@298 37
Chris@298 38 static int binsPerSemitoneLive = 1;
Chris@298 39 static int binsPerSemitoneNormal = 5;
Chris@170 40
Chris@272 41 static int minInputSampleRate = 100;
Chris@272 42 static int maxInputSampleRate = 192000;
Chris@272 43
Chris@31 44 Silvet::Silvet(float inputSampleRate) :
Chris@31 45 Plugin(inputSampleRate),
Chris@161 46 m_instruments(InstrumentPack::listInstrumentPacks()),
Chris@298 47 m_liveInstruments(LiveAdapter::adaptAll(m_instruments)),
Chris@31 48 m_resampler(0),
Chris@246 49 m_flattener(0),
Chris@110 50 m_cq(0),
Chris@297 51 m_mode(HighQualityMode),
Chris@166 52 m_fineTuning(false),
Chris@178 53 m_instrument(0),
Chris@178 54 m_colsPerSec(50)
Chris@31 55 {
Chris@31 56 }
Chris@31 57
Chris@31 58 Silvet::~Silvet()
Chris@31 59 {
Chris@31 60 delete m_resampler;
Chris@246 61 delete m_flattener;
Chris@31 62 delete m_cq;
Chris@41 63 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
Chris@41 64 delete m_postFilter[i];
Chris@41 65 }
Chris@31 66 }
Chris@31 67
Chris@31 68 string
Chris@31 69 Silvet::getIdentifier() const
Chris@31 70 {
Chris@31 71 return "silvet";
Chris@31 72 }
Chris@31 73
Chris@31 74 string
Chris@31 75 Silvet::getName() const
Chris@31 76 {
Chris@31 77 return "Silvet Note Transcription";
Chris@31 78 }
Chris@31 79
Chris@31 80 string
Chris@31 81 Silvet::getDescription() const
Chris@31 82 {
Chris@191 83 return "Estimate the note onsets, pitches, and durations that make up a music recording.";
Chris@31 84 }
Chris@31 85
Chris@31 86 string
Chris@31 87 Silvet::getMaker() const
Chris@31 88 {
Chris@191 89 return "Queen Mary, University of London";
Chris@31 90 }
Chris@31 91
Chris@31 92 int
Chris@31 93 Silvet::getPluginVersion() const
Chris@31 94 {
Chris@295 95 return 2;
Chris@31 96 }
Chris@31 97
Chris@31 98 string
Chris@31 99 Silvet::getCopyright() const
Chris@31 100 {
Chris@191 101 return "Method by Emmanouil Benetos and Simon Dixon; plugin by Chris Cannam and Emmanouil Benetos. GPL licence.";
Chris@31 102 }
Chris@31 103
Chris@31 104 Silvet::InputDomain
Chris@31 105 Silvet::getInputDomain() const
Chris@31 106 {
Chris@31 107 return TimeDomain;
Chris@31 108 }
Chris@31 109
Chris@31 110 size_t
Chris@31 111 Silvet::getPreferredBlockSize() const
Chris@31 112 {
Chris@31 113 return 0;
Chris@31 114 }
Chris@31 115
Chris@31 116 size_t
Chris@31 117 Silvet::getPreferredStepSize() const
Chris@31 118 {
Chris@31 119 return 0;
Chris@31 120 }
Chris@31 121
Chris@31 122 size_t
Chris@31 123 Silvet::getMinChannelCount() const
Chris@31 124 {
Chris@31 125 return 1;
Chris@31 126 }
Chris@31 127
Chris@31 128 size_t
Chris@31 129 Silvet::getMaxChannelCount() const
Chris@31 130 {
Chris@31 131 return 1;
Chris@31 132 }
Chris@31 133
Chris@31 134 Silvet::ParameterList
Chris@31 135 Silvet::getParameterDescriptors() const
Chris@31 136 {
Chris@31 137 ParameterList list;
Chris@110 138
Chris@110 139 ParameterDescriptor desc;
Chris@110 140 desc.identifier = "mode";
Chris@110 141 desc.name = "Processing mode";
Chris@110 142 desc.unit = "";
Chris@297 143 desc.description = "Sets the tradeoff of processing speed against transcription quality. Draft mode is tuned in favour of overall speed; Live mode is tuned in favour of lower latency; while Intensive mode (the default) will almost always produce the best results.";
Chris@110 144 desc.minValue = 0;
Chris@297 145 desc.maxValue = 2;
Chris@113 146 desc.defaultValue = 1;
Chris@110 147 desc.isQuantized = true;
Chris@110 148 desc.quantizeStep = 1;
Chris@166 149 desc.valueNames.push_back("Draft (faster)");
Chris@165 150 desc.valueNames.push_back("Intensive (higher quality)");
Chris@297 151 desc.valueNames.push_back("Live (lower latency)");
Chris@161 152 list.push_back(desc);
Chris@161 153
Chris@176 154 desc.identifier = "instrument";
Chris@176 155 desc.name = "Instrument";
Chris@161 156 desc.unit = "";
Chris@271 157 desc.description = "The instrument or instruments known to be present in the recording. This affects the set of instrument templates used, as well as the expected level of polyphony in the output. Using a more limited set of instruments than the default will also make the plugin run faster.\nNote that this plugin cannot isolate instruments: you can't use this setting to request notes from only one instrument in a recording with several. Instead, use this as a hint to the plugin about which instruments are actually present.";
Chris@161 158 desc.minValue = 0;
Chris@162 159 desc.maxValue = m_instruments.size()-1;
Chris@162 160 desc.defaultValue = 0;
Chris@161 161 desc.isQuantized = true;
Chris@161 162 desc.quantizeStep = 1;
Chris@161 163 desc.valueNames.clear();
Chris@162 164 for (int i = 0; i < int(m_instruments.size()); ++i) {
Chris@162 165 desc.valueNames.push_back(m_instruments[i].name);
Chris@162 166 }
Chris@166 167 list.push_back(desc);
Chris@161 168
Chris@166 169 desc.identifier = "finetune";
Chris@166 170 desc.name = "Return fine pitch estimates";
Chris@166 171 desc.unit = "";
Chris@271 172 desc.description = "Return pitch estimates at finer than semitone resolution. This works only in Intensive mode. Notes that appear to drift in pitch will be split up into shorter notes with individually finer pitches.";
Chris@166 173 desc.minValue = 0;
Chris@166 174 desc.maxValue = 1;
Chris@166 175 desc.defaultValue = 0;
Chris@166 176 desc.isQuantized = true;
Chris@166 177 desc.quantizeStep = 1;
Chris@166 178 desc.valueNames.clear();
Chris@110 179 list.push_back(desc);
Chris@110 180
Chris@31 181 return list;
Chris@31 182 }
Chris@31 183
Chris@31 184 float
Chris@31 185 Silvet::getParameter(string identifier) const
Chris@31 186 {
Chris@110 187 if (identifier == "mode") {
Chris@297 188 return (float)(int)m_mode;
Chris@166 189 } else if (identifier == "finetune") {
Chris@166 190 return m_fineTuning ? 1.f : 0.f;
Chris@176 191 } else if (identifier == "instrument") {
Chris@162 192 return m_instrument;
Chris@110 193 }
Chris@31 194 return 0;
Chris@31 195 }
Chris@31 196
Chris@31 197 void
Chris@31 198 Silvet::setParameter(string identifier, float value)
Chris@31 199 {
Chris@110 200 if (identifier == "mode") {
Chris@297 201 m_mode = (ProcessingMode)(int)(value + 0.5);
Chris@166 202 } else if (identifier == "finetune") {
Chris@166 203 m_fineTuning = (value > 0.5);
Chris@176 204 } else if (identifier == "instrument") {
Chris@162 205 m_instrument = lrintf(value);
Chris@110 206 }
Chris@31 207 }
Chris@31 208
Chris@31 209 Silvet::ProgramList
Chris@31 210 Silvet::getPrograms() const
Chris@31 211 {
Chris@31 212 ProgramList list;
Chris@31 213 return list;
Chris@31 214 }
Chris@31 215
Chris@31 216 string
Chris@31 217 Silvet::getCurrentProgram() const
Chris@31 218 {
Chris@31 219 return "";
Chris@31 220 }
Chris@31 221
Chris@31 222 void
Chris@31 223 Silvet::selectProgram(string name)
Chris@31 224 {
Chris@31 225 }
Chris@31 226
Chris@31 227 Silvet::OutputList
Chris@31 228 Silvet::getOutputDescriptors() const
Chris@31 229 {
Chris@31 230 OutputList list;
Chris@31 231
Chris@31 232 OutputDescriptor d;
Chris@51 233 d.identifier = "notes";
Chris@51 234 d.name = "Note transcription";
Chris@271 235 d.description = "Overall note transcription. Each note has time, duration, estimated pitch, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture.";
Chris@41 236 d.unit = "Hz";
Chris@31 237 d.hasFixedBinCount = true;
Chris@31 238 d.binCount = 2;
Chris@41 239 d.binNames.push_back("Frequency");
Chris@31 240 d.binNames.push_back("Velocity");
Chris@31 241 d.hasKnownExtents = false;
Chris@31 242 d.isQuantized = false;
Chris@31 243 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@246 244 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
Chris@31 245 d.hasDuration = true;
Chris@32 246 m_notesOutputNo = list.size();
Chris@32 247 list.push_back(d);
Chris@32 248
Chris@178 249 d.identifier = "timefreq";
Chris@178 250 d.name = "Time-frequency distribution";
Chris@271 251 d.description = "Filtered constant-Q time-frequency distribution as used as input to the expectation-maximisation algorithm.";
Chris@178 252 d.unit = "";
Chris@178 253 d.hasFixedBinCount = true;
Chris@298 254 d.binCount = getPack(0).templateHeight;
Chris@178 255 d.binNames.clear();
Chris@178 256 if (m_cq) {
Chris@294 257 char name[50];
Chris@298 258 for (int i = 0; i < getPack(0).templateHeight; ++i) {
Chris@178 259 // We have a 600-bin (10 oct 60-bin CQ) of which the
Chris@178 260 // lowest-frequency 55 bins have been dropped, for a
Chris@178 261 // 545-bin template. The native CQ bins go high->low
Chris@178 262 // frequency though, so these are still the first 545 bins
Chris@178 263 // as reported by getBinFrequency, though in reverse order
Chris@178 264 float freq = m_cq->getBinFrequency
Chris@298 265 (getPack(0).templateHeight - i - 1);
Chris@178 266 sprintf(name, "%.1f Hz", freq);
Chris@178 267 d.binNames.push_back(name);
Chris@178 268 }
Chris@178 269 }
Chris@178 270 d.hasKnownExtents = false;
Chris@178 271 d.isQuantized = false;
Chris@178 272 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@178 273 d.sampleRate = m_colsPerSec;
Chris@178 274 d.hasDuration = false;
Chris@178 275 m_fcqOutputNo = list.size();
Chris@178 276 list.push_back(d);
Chris@178 277
Chris@294 278 d.identifier = "pitchactivation";
Chris@294 279 d.name = "Pitch activation distribution";
Chris@294 280 d.description = "Pitch activation distribution resulting from expectation-maximisation algorithm, prior to note extraction.";
Chris@294 281 d.unit = "";
Chris@294 282 d.hasFixedBinCount = true;
Chris@298 283 d.binCount = getPack(0).templateNoteCount;
Chris@294 284 d.binNames.clear();
Chris@294 285 if (m_cq) {
Chris@298 286 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
Chris@294 287 d.binNames.push_back(noteName(i, 0, 1));
Chris@294 288 }
Chris@294 289 }
Chris@294 290 d.hasKnownExtents = false;
Chris@294 291 d.isQuantized = false;
Chris@294 292 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@294 293 d.sampleRate = m_colsPerSec;
Chris@294 294 d.hasDuration = false;
Chris@294 295 m_pitchOutputNo = list.size();
Chris@294 296 list.push_back(d);
Chris@294 297
Chris@31 298 return list;
Chris@31 299 }
Chris@31 300
Chris@38 301 std::string
Chris@175 302 Silvet::noteName(int note, int shift, int shiftCount) const
Chris@38 303 {
Chris@38 304 static const char *names[] = {
Chris@38 305 "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"
Chris@38 306 };
Chris@38 307
Chris@175 308 const char *n = names[note % 12];
Chris@38 309
Chris@175 310 int oct = (note + 9) / 12;
Chris@38 311
Chris@175 312 char buf[30];
Chris@175 313
Chris@175 314 float pshift = 0.f;
Chris@175 315 if (shiftCount > 1) {
Chris@175 316 // see noteFrequency below
Chris@175 317 pshift =
Chris@175 318 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
Chris@175 319 }
Chris@175 320
Chris@175 321 if (pshift > 0.f) {
Chris@175 322 sprintf(buf, "%s%d+%dc", n, oct, int(round(pshift * 100)));
Chris@175 323 } else if (pshift < 0.f) {
Chris@175 324 sprintf(buf, "%s%d-%dc", n, oct, int(round((-pshift) * 100)));
Chris@175 325 } else {
Chris@175 326 sprintf(buf, "%s%d", n, oct);
Chris@175 327 }
Chris@38 328
Chris@38 329 return buf;
Chris@38 330 }
Chris@38 331
Chris@41 332 float
Chris@168 333 Silvet::noteFrequency(int note, int shift, int shiftCount) const
Chris@41 334 {
Chris@169 335 // Convert shift number to a pitch shift. The given shift number
Chris@169 336 // is an offset into the template array, which starts with some
Chris@169 337 // zeros, followed by the template, then some trailing zeros.
Chris@169 338 //
Chris@169 339 // Example: if we have templateMaxShift == 2 and thus shiftCount
Chris@169 340 // == 5, then the number will be in the range 0-4 and the template
Chris@169 341 // will have 2 zeros at either end. Thus number 2 represents the
Chris@169 342 // template "as recorded", for a pitch shift of 0; smaller indices
Chris@169 343 // represent moving the template *up* in pitch (by introducing
Chris@169 344 // zeros at the start, which is the low-frequency end), for a
Chris@169 345 // positive pitch shift; and higher values represent moving it
Chris@169 346 // down in pitch, for a negative pitch shift.
Chris@169 347
Chris@175 348 float pshift = 0.f;
Chris@175 349 if (shiftCount > 1) {
Chris@175 350 pshift =
Chris@175 351 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
Chris@175 352 }
Chris@169 353
Chris@169 354 return float(27.5 * pow(2.0, (note + pshift) / 12.0));
Chris@41 355 }
Chris@41 356
Chris@31 357 bool
Chris@31 358 Silvet::initialise(size_t channels, size_t stepSize, size_t blockSize)
Chris@31 359 {
Chris@272 360 if (m_inputSampleRate < minInputSampleRate ||
Chris@272 361 m_inputSampleRate > maxInputSampleRate) {
Chris@272 362 cerr << "Silvet::initialise: Unsupported input sample rate "
Chris@272 363 << m_inputSampleRate << " (supported min " << minInputSampleRate
Chris@272 364 << ", max " << maxInputSampleRate << ")" << endl;
Chris@272 365 return false;
Chris@272 366 }
Chris@272 367
Chris@31 368 if (channels < getMinChannelCount() ||
Chris@272 369 channels > getMaxChannelCount()) {
Chris@272 370 cerr << "Silvet::initialise: Unsupported channel count " << channels
Chris@272 371 << " (supported min " << getMinChannelCount() << ", max "
Chris@272 372 << getMaxChannelCount() << ")" << endl;
Chris@272 373 return false;
Chris@272 374 }
Chris@31 375
Chris@31 376 if (stepSize != blockSize) {
Chris@31 377 cerr << "Silvet::initialise: Step size must be the same as block size ("
Chris@31 378 << stepSize << " != " << blockSize << ")" << endl;
Chris@31 379 return false;
Chris@31 380 }
Chris@31 381
Chris@31 382 m_blockSize = blockSize;
Chris@31 383
Chris@31 384 reset();
Chris@31 385
Chris@31 386 return true;
Chris@31 387 }
Chris@31 388
Chris@31 389 void
Chris@31 390 Silvet::reset()
Chris@31 391 {
Chris@31 392 delete m_resampler;
Chris@246 393 delete m_flattener;
Chris@31 394 delete m_cq;
Chris@31 395
Chris@31 396 if (m_inputSampleRate != processingSampleRate) {
Chris@31 397 m_resampler = new Resampler(m_inputSampleRate, processingSampleRate);
Chris@31 398 } else {
Chris@31 399 m_resampler = 0;
Chris@31 400 }
Chris@31 401
Chris@246 402 m_flattener = new FlattenDynamics(m_inputSampleRate); // before resampling
Chris@246 403 m_flattener->reset();
Chris@246 404
Chris@173 405 double minFreq = 27.5;
Chris@173 406
Chris@297 407 if (m_mode != HighQualityMode) {
Chris@173 408 // We don't actually return any notes from the bottom octave,
Chris@173 409 // so we can just pad with zeros
Chris@173 410 minFreq *= 2;
Chris@173 411 }
Chris@173 412
Chris@298 413 int bpo = 12 *
Chris@298 414 (m_mode == LiveMode ? binsPerSemitoneLive : binsPerSemitoneNormal);
Chris@298 415
Chris@154 416 CQParameters params(processingSampleRate,
Chris@173 417 minFreq,
Chris@154 418 processingSampleRate / 3,
Chris@298 419 bpo);
Chris@154 420
Chris@155 421 params.q = 0.95; // MIREX code uses 0.8, but it seems 0.9 or lower
Chris@155 422 // drops the FFT size to 512 from 1024 and alters
Chris@155 423 // some other processing parameters, making
Chris@155 424 // everything much, much slower. Could be a flaw
Chris@155 425 // in the CQ parameter calculations, must check
Chris@154 426 params.atomHopFactor = 0.3;
Chris@154 427 params.threshold = 0.0005;
Chris@172 428 params.window = CQParameters::Hann;
Chris@154 429
Chris@154 430 m_cq = new CQSpectrogram(params, CQSpectrogram::InterpolateLinear);
Chris@31 431
Chris@297 432 cerr << "cq latency = " << m_cq->getLatency() << endl;
Chris@297 433
Chris@297 434 m_colsPerSec = (m_mode == DraftMode ? 25 : 50);
Chris@165 435
Chris@41 436 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
Chris@41 437 delete m_postFilter[i];
Chris@41 438 }
Chris@41 439 m_postFilter.clear();
Chris@298 440 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
Chris@41 441 m_postFilter.push_back(new MedianFilter<double>(3));
Chris@41 442 }
Chris@41 443 m_pianoRoll.clear();
Chris@246 444 m_inputGains.clear();
Chris@32 445 m_columnCount = 0;
Chris@272 446 m_resampledCount = 0;
Chris@40 447 m_startTime = RealTime::zeroTime;
Chris@31 448 }
Chris@31 449
Chris@31 450 Silvet::FeatureSet
Chris@31 451 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
Chris@31 452 {
Chris@40 453 if (m_columnCount == 0) {
Chris@40 454 m_startTime = timestamp;
Chris@40 455 }
Chris@246 456
Chris@246 457 vector<float> flattened(m_blockSize);
Chris@246 458 float gain = 1.f;
Chris@246 459 m_flattener->connectInputPort
Chris@246 460 (FlattenDynamics::AudioInputPort, inputBuffers[0]);
Chris@246 461 m_flattener->connectOutputPort
Chris@246 462 (FlattenDynamics::AudioOutputPort, &flattened[0]);
Chris@246 463 m_flattener->connectOutputPort
Chris@246 464 (FlattenDynamics::GainOutputPort, &gain);
Chris@246 465 m_flattener->process(m_blockSize);
Chris@246 466
Chris@252 467 m_inputGains[timestamp] = gain;
Chris@40 468
Chris@31 469 vector<double> data;
Chris@40 470 for (int i = 0; i < m_blockSize; ++i) {
Chris@246 471 double d = flattened[i];
Chris@235 472 data.push_back(d);
Chris@40 473 }
Chris@31 474
Chris@31 475 if (m_resampler) {
Chris@272 476
Chris@31 477 data = m_resampler->process(data.data(), data.size());
Chris@272 478
Chris@272 479 int hadCount = m_resampledCount;
Chris@272 480 m_resampledCount += data.size();
Chris@272 481
Chris@272 482 int resamplerLatency = m_resampler->getLatency();
Chris@272 483
Chris@272 484 if (hadCount < resamplerLatency) {
Chris@272 485 int stillToDrop = resamplerLatency - hadCount;
Chris@272 486 if (stillToDrop >= int(data.size())) {
Chris@272 487 return FeatureSet();
Chris@272 488 } else {
Chris@272 489 data = vector<double>(data.begin() + stillToDrop, data.end());
Chris@272 490 }
Chris@272 491 }
Chris@31 492 }
Chris@272 493
Chris@32 494 Grid cqout = m_cq->process(data);
Chris@51 495 FeatureSet fs = transcribe(cqout);
Chris@51 496 return fs;
Chris@34 497 }
Chris@34 498
Chris@34 499 Silvet::FeatureSet
Chris@34 500 Silvet::getRemainingFeatures()
Chris@34 501 {
Chris@145 502 Grid cqout = m_cq->getRemainingOutput();
Chris@51 503 FeatureSet fs = transcribe(cqout);
Chris@51 504 return fs;
Chris@34 505 }
Chris@34 506
Chris@34 507 Silvet::FeatureSet
Chris@34 508 Silvet::transcribe(const Grid &cqout)
Chris@34 509 {
Chris@32 510 Grid filtered = preProcess(cqout);
Chris@31 511
Chris@32 512 FeatureSet fs;
Chris@32 513
Chris@104 514 if (filtered.empty()) return fs;
Chris@170 515
Chris@298 516 const InstrumentPack &pack(getPack(m_instrument));
Chris@104 517
Chris@178 518 for (int i = 0; i < (int)filtered.size(); ++i) {
Chris@178 519 Feature f;
Chris@178 520 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@178 521 f.values.push_back(float(filtered[i][j]));
Chris@178 522 }
Chris@178 523 fs[m_fcqOutputNo].push_back(f);
Chris@178 524 }
Chris@178 525
Chris@34 526 int width = filtered.size();
Chris@34 527
Chris@297 528 int iterations = (m_mode == HighQualityMode ? 20 : 10);
Chris@34 529
Chris@176 530 Grid localPitches(width, vector<double>(pack.templateNoteCount, 0.0));
Chris@170 531
Chris@297 532 bool wantShifts = (m_mode == HighQualityMode) && m_fineTuning;
Chris@170 533 int shiftCount = 1;
Chris@170 534 if (wantShifts) {
Chris@170 535 shiftCount = pack.templateMaxShift * 2 + 1;
Chris@170 536 }
Chris@170 537
Chris@170 538 vector<vector<int> > localBestShifts;
Chris@170 539 if (wantShifts) {
Chris@170 540 localBestShifts =
Chris@176 541 vector<vector<int> >(width, vector<int>(pack.templateNoteCount, 0));
Chris@170 542 }
Chris@170 543
Chris@170 544 vector<bool> present(width, false);
Chris@37 545
Chris@123 546 #pragma omp parallel for
Chris@123 547 for (int i = 0; i < width; ++i) {
Chris@104 548
Chris@170 549 double sum = 0.0;
Chris@176 550 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@170 551 sum += filtered.at(i).at(j);
Chris@170 552 }
Chris@170 553 if (sum < 1e-5) continue;
Chris@170 554
Chris@170 555 present[i] = true;
Chris@170 556
Chris@297 557 EM em(&pack, m_mode == HighQualityMode);
Chris@170 558
Chris@183 559 em.setPitchSparsity(pack.pitchSparsity);
Chris@213 560 em.setSourceSparsity(pack.sourceSparsity);
Chris@183 561
Chris@170 562 for (int j = 0; j < iterations; ++j) {
Chris@170 563 em.iterate(filtered.at(i).data());
Chris@37 564 }
Chris@37 565
Chris@170 566 const float *pitchDist = em.getPitchDistribution();
Chris@170 567 const float *const *shiftDist = em.getShifts();
Chris@37 568
Chris@176 569 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@104 570
Chris@170 571 localPitches[i][j] = pitchDist[j] * sum;
Chris@170 572
Chris@170 573 int bestShift = 0;
Chris@179 574 float bestShiftValue = 0.0;
Chris@170 575 if (wantShifts) {
Chris@170 576 for (int k = 0; k < shiftCount; ++k) {
Chris@179 577 float value = shiftDist[k][j];
Chris@179 578 if (k == 0 || value > bestShiftValue) {
Chris@179 579 bestShiftValue = value;
Chris@170 580 bestShift = k;
Chris@170 581 }
Chris@170 582 }
Chris@170 583 localBestShifts[i][j] = bestShift;
Chris@170 584 }
Chris@123 585 }
Chris@123 586 }
Chris@166 587
Chris@166 588 for (int i = 0; i < width; ++i) {
Chris@37 589
Chris@170 590 if (!present[i]) {
Chris@170 591 // silent column
Chris@176 592 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@170 593 m_postFilter[j]->push(0.0);
Chris@170 594 }
Chris@168 595 m_pianoRoll.push_back(map<int, double>());
Chris@170 596 if (wantShifts) {
Chris@168 597 m_pianoRollShifts.push_back(map<int, int>());
Chris@168 598 }
Chris@166 599 continue;
Chris@166 600 }
Chris@166 601
Chris@294 602 vector<double> filtered = postProcess
Chris@294 603 (localPitches[i], localBestShifts[i], wantShifts);
Chris@294 604
Chris@294 605 Feature f;
Chris@294 606 for (int j = 0; j < (int)filtered.size(); ++j) {
Chris@294 607 float v(filtered[j]);
Chris@294 608 if (v < pack.levelThreshold) v = 0.f;
Chris@294 609 f.values.push_back(v);
Chris@294 610 }
Chris@294 611 fs[m_pitchOutputNo].push_back(f);
Chris@166 612
Chris@168 613 FeatureList noteFeatures = noteTrack(shiftCount);
Chris@38 614
Chris@123 615 for (FeatureList::const_iterator fi = noteFeatures.begin();
Chris@123 616 fi != noteFeatures.end(); ++fi) {
Chris@123 617 fs[m_notesOutputNo].push_back(*fi);
Chris@40 618 }
Chris@34 619 }
Chris@34 620
Chris@32 621 return fs;
Chris@31 622 }
Chris@31 623
Chris@32 624 Silvet::Grid
Chris@32 625 Silvet::preProcess(const Grid &in)
Chris@32 626 {
Chris@32 627 int width = in.size();
Chris@32 628
Chris@165 629 int spacing = processingSampleRate / m_colsPerSec;
Chris@32 630
Chris@165 631 // need to be careful that col spacing is an integer number of samples!
Chris@165 632 assert(spacing * m_colsPerSec == processingSampleRate);
Chris@32 633
Chris@32 634 Grid out;
Chris@32 635
Chris@58 636 // We count the CQ latency in terms of processing hops, but
Chris@58 637 // actually it probably isn't an exact number of hops so this
Chris@58 638 // isn't quite accurate. But the small constant offset is
Chris@165 639 // practically irrelevant compared to the jitter from the frame
Chris@165 640 // size we reduce to in a moment
Chris@33 641 int latentColumns = m_cq->getLatency() / m_cq->getColumnHop();
Chris@33 642
Chris@298 643 const InstrumentPack &pack(getPack(m_instrument));
Chris@176 644
Chris@32 645 for (int i = 0; i < width; ++i) {
Chris@32 646
Chris@33 647 if (m_columnCount < latentColumns) {
Chris@33 648 ++m_columnCount;
Chris@33 649 continue;
Chris@33 650 }
Chris@33 651
Chris@32 652 int prevSampleNo = (m_columnCount - 1) * m_cq->getColumnHop();
Chris@32 653 int sampleNo = m_columnCount * m_cq->getColumnHop();
Chris@32 654
Chris@32 655 bool select = (sampleNo / spacing != prevSampleNo / spacing);
Chris@32 656
Chris@32 657 if (select) {
Chris@32 658 vector<double> inCol = in[i];
Chris@176 659 vector<double> outCol(pack.templateHeight);
Chris@32 660
Chris@178 661 // In HQ mode, the CQ returns 600 bins and we ignore the
Chris@298 662 // lowest 55 of them (assuming binsPerSemitone == 5).
Chris@178 663 //
Chris@297 664 // In draft and live mode the CQ is an octave shorter,
Chris@297 665 // returning 540 bins, so we instead pad them with an
Chris@297 666 // additional 5 zeros.
Chris@178 667 //
Chris@178 668 // We also need to reverse the column as we go, since the
Chris@178 669 // raw CQ has the high frequencies first and we need it
Chris@178 670 // the other way around.
Chris@32 671
Chris@298 672 int bps = (m_mode == LiveMode ?
Chris@298 673 binsPerSemitoneLive : binsPerSemitoneNormal);
Chris@298 674
Chris@297 675 if (m_mode == HighQualityMode) {
Chris@178 676 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@298 677 int ix = inCol.size() - j - (11 * bps);
Chris@178 678 outCol[j] = inCol[ix];
Chris@178 679 }
Chris@178 680 } else {
Chris@298 681 for (int j = 0; j < bps; ++j) {
Chris@178 682 outCol[j] = 0.0;
Chris@178 683 }
Chris@298 684 for (int j = bps; j < pack.templateHeight; ++j) {
Chris@298 685 int ix = inCol.size() - j + (bps-1);
Chris@178 686 outCol[j] = inCol[ix];
Chris@178 687 }
Chris@46 688 }
Chris@32 689
Chris@46 690 vector<double> noiseLevel1 =
Chris@298 691 MedianFilter<double>::filter(8 * bps, outCol);
Chris@176 692 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@46 693 noiseLevel1[j] = std::min(outCol[j], noiseLevel1[j]);
Chris@46 694 }
Chris@32 695
Chris@46 696 vector<double> noiseLevel2 =
Chris@298 697 MedianFilter<double>::filter(8 * bps, noiseLevel1);
Chris@176 698 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@46 699 outCol[j] = std::max(outCol[j] - noiseLevel2[j], 0.0);
Chris@32 700 }
Chris@32 701
Chris@165 702 out.push_back(outCol);
Chris@32 703 }
Chris@32 704
Chris@32 705 ++m_columnCount;
Chris@32 706 }
Chris@32 707
Chris@32 708 return out;
Chris@32 709 }
Chris@32 710
Chris@294 711 vector<double>
Chris@170 712 Silvet::postProcess(const vector<double> &pitches,
Chris@170 713 const vector<int> &bestShifts,
Chris@170 714 bool wantShifts)
Chris@166 715 {
Chris@298 716 const InstrumentPack &pack(getPack(m_instrument));
Chris@176 717
Chris@41 718 vector<double> filtered;
Chris@41 719
Chris@176 720 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@170 721 m_postFilter[j]->push(pitches[j]);
Chris@41 722 filtered.push_back(m_postFilter[j]->get());
Chris@41 723 }
Chris@41 724
Chris@41 725 // Threshold for level and reduce number of candidate pitches
Chris@41 726
Chris@41 727 typedef std::multimap<double, int> ValueIndexMap;
Chris@41 728
Chris@41 729 ValueIndexMap strengths;
Chris@166 730
Chris@176 731 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@166 732 double strength = filtered[j];
Chris@183 733 if (strength < pack.levelThreshold) continue;
Chris@168 734 strengths.insert(ValueIndexMap::value_type(strength, j));
Chris@168 735 }
Chris@166 736
Chris@168 737 ValueIndexMap::const_iterator si = strengths.end();
Chris@167 738
Chris@168 739 map<int, double> active;
Chris@168 740 map<int, int> activeShifts;
Chris@168 741
Chris@183 742 while (int(active.size()) < pack.maxPolyphony && si != strengths.begin()) {
Chris@168 743
Chris@168 744 --si;
Chris@168 745
Chris@168 746 double strength = si->first;
Chris@168 747 int j = si->second;
Chris@168 748
Chris@168 749 active[j] = strength;
Chris@168 750
Chris@170 751 if (wantShifts) {
Chris@170 752 activeShifts[j] = bestShifts[j];
Chris@167 753 }
Chris@41 754 }
Chris@41 755
Chris@168 756 m_pianoRoll.push_back(active);
Chris@170 757
Chris@170 758 if (wantShifts) {
Chris@168 759 m_pianoRollShifts.push_back(activeShifts);
Chris@41 760 }
Chris@294 761
Chris@294 762 return filtered;
Chris@166 763 }
Chris@166 764
Chris@166 765 Vamp::Plugin::FeatureList
Chris@168 766 Silvet::noteTrack(int shiftCount)
Chris@166 767 {
Chris@41 768 // Minimum duration pruning, and conversion to notes. We can only
Chris@41 769 // report notes that have just ended (i.e. that are absent in the
Chris@168 770 // latest active set but present in the prior set in the piano
Chris@41 771 // roll) -- any notes that ended earlier will have been reported
Chris@41 772 // already, and if they haven't ended, we don't know their
Chris@41 773 // duration.
Chris@41 774
Chris@168 775 int width = m_pianoRoll.size() - 1;
Chris@168 776
Chris@168 777 const map<int, double> &active = m_pianoRoll[width];
Chris@41 778
Chris@165 779 double columnDuration = 1.0 / m_colsPerSec;
Chris@165 780
Chris@165 781 // only keep notes >= 100ms or thereabouts
Chris@165 782 int durationThreshold = floor(0.1 / columnDuration); // columns
Chris@165 783 if (durationThreshold < 1) durationThreshold = 1;
Chris@41 784
Chris@41 785 FeatureList noteFeatures;
Chris@41 786
Chris@41 787 if (width < durationThreshold + 1) {
Chris@41 788 return noteFeatures;
Chris@41 789 }
Chris@41 790
Chris@150 791 //!!! try: repeated note detection? (look for change in first derivative of the pitch matrix)
Chris@150 792
Chris@55 793 for (map<int, double>::const_iterator ni = m_pianoRoll[width-1].begin();
Chris@41 794 ni != m_pianoRoll[width-1].end(); ++ni) {
Chris@41 795
Chris@55 796 int note = ni->first;
Chris@41 797
Chris@41 798 if (active.find(note) != active.end()) {
Chris@41 799 // the note is still playing
Chris@41 800 continue;
Chris@41 801 }
Chris@41 802
Chris@41 803 // the note was playing but just ended
Chris@41 804 int end = width;
Chris@41 805 int start = end-1;
Chris@41 806
Chris@41 807 while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) {
Chris@41 808 --start;
Chris@41 809 }
Chris@41 810 ++start;
Chris@41 811
Chris@169 812 if ((end - start) < durationThreshold) {
Chris@41 813 continue;
Chris@41 814 }
Chris@41 815
Chris@169 816 emitNote(start, end, note, shiftCount, noteFeatures);
Chris@41 817 }
Chris@41 818
Chris@62 819 // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl;
Chris@41 820
Chris@41 821 return noteFeatures;
Chris@41 822 }
Chris@41 823
Chris@169 824 void
Chris@169 825 Silvet::emitNote(int start, int end, int note, int shiftCount,
Chris@169 826 FeatureList &noteFeatures)
Chris@169 827 {
Chris@169 828 int partStart = start;
Chris@169 829 int partShift = 0;
Chris@169 830 int partVelocity = 0;
Chris@169 831
Chris@252 832 int partThreshold = floor(0.05 * m_colsPerSec);
Chris@169 833
Chris@169 834 for (int i = start; i != end; ++i) {
Chris@169 835
Chris@169 836 double strength = m_pianoRoll[i][note];
Chris@169 837
Chris@169 838 int shift = 0;
Chris@169 839
Chris@169 840 if (shiftCount > 1) {
Chris@169 841
Chris@169 842 shift = m_pianoRollShifts[i][note];
Chris@169 843
Chris@169 844 if (i == partStart) {
Chris@169 845 partShift = shift;
Chris@169 846 }
Chris@169 847
Chris@169 848 if (i > partStart + partThreshold && shift != partShift) {
Chris@169 849
Chris@169 850 // cerr << "i = " << i << ", partStart = " << partStart << ", shift = " << shift << ", partShift = " << partShift << endl;
Chris@169 851
Chris@169 852 // pitch has changed, emit an intermediate note
Chris@252 853 noteFeatures.push_back(makeNoteFeature(partStart,
Chris@252 854 i,
Chris@252 855 note,
Chris@252 856 partShift,
Chris@252 857 shiftCount,
Chris@252 858 partVelocity));
Chris@169 859 partStart = i;
Chris@169 860 partShift = shift;
Chris@169 861 partVelocity = 0;
Chris@169 862 }
Chris@169 863 }
Chris@169 864
Chris@246 865 int v = round(strength * 2);
Chris@169 866 if (v > partVelocity) {
Chris@169 867 partVelocity = v;
Chris@169 868 }
Chris@169 869 }
Chris@169 870
Chris@169 871 if (end >= partStart + partThreshold) {
Chris@252 872 noteFeatures.push_back(makeNoteFeature(partStart,
Chris@252 873 end,
Chris@252 874 note,
Chris@252 875 partShift,
Chris@252 876 shiftCount,
Chris@252 877 partVelocity));
Chris@169 878 }
Chris@169 879 }
Chris@252 880
Chris@252 881 Silvet::Feature
Chris@252 882 Silvet::makeNoteFeature(int start,
Chris@252 883 int end,
Chris@252 884 int note,
Chris@252 885 int shift,
Chris@252 886 int shiftCount,
Chris@252 887 int velocity)
Chris@252 888 {
Chris@252 889 double columnDuration = 1.0 / m_colsPerSec;
Chris@252 890 int postFilterLatency = int(m_postFilter[0]->getSize() / 2);
Chris@252 891
Chris@252 892 Feature f;
Chris@252 893
Chris@252 894 f.hasTimestamp = true;
Chris@285 895 f.timestamp = m_startTime + RealTime::fromSeconds
Chris@252 896 (columnDuration * (start - postFilterLatency) + 0.02);
Chris@252 897
Chris@252 898 f.hasDuration = true;
Chris@252 899 f.duration = RealTime::fromSeconds
Chris@252 900 (columnDuration * (end - start));
Chris@252 901
Chris@252 902 f.values.clear();
Chris@252 903
Chris@252 904 f.values.push_back
Chris@252 905 (noteFrequency(note, shift, shiftCount));
Chris@252 906
Chris@252 907 float inputGain = getInputGainAt(f.timestamp);
Chris@252 908 // cerr << "adjusting velocity from " << velocity << " to " << round(velocity/inputGain) << endl;
Chris@252 909 velocity = round(velocity / inputGain);
Chris@252 910 if (velocity > 127) velocity = 127;
Chris@252 911 if (velocity < 1) velocity = 1;
Chris@252 912 f.values.push_back(velocity);
Chris@252 913
Chris@252 914 f.label = noteName(note, shift, shiftCount);
Chris@252 915
Chris@252 916 return f;
Chris@252 917 }
Chris@252 918
Chris@252 919 float
Chris@252 920 Silvet::getInputGainAt(RealTime t)
Chris@252 921 {
Chris@252 922 map<RealTime, float>::const_iterator i = m_inputGains.lower_bound(t);
Chris@252 923
Chris@252 924 if (i == m_inputGains.end()) {
Chris@252 925 if (i != m_inputGains.begin()) {
Chris@252 926 --i;
Chris@252 927 } else {
Chris@252 928 return 1.f; // no data
Chris@252 929 }
Chris@252 930 }
Chris@252 931
Chris@252 932 // cerr << "gain at time " << t << " = " << i->second << endl;
Chris@252 933
Chris@252 934 return i->second;
Chris@252 935 }
Chris@252 936