annotate src/Silvet.cpp @ 329:447ccdbfc6c0 livemode

Minor text fix
author Chris Cannam
date Tue, 28 Apr 2015 18:56:34 +0100
parents 5a181a427ac8
children 8f5cfd7dbaa5
rev   line source
Chris@31 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@31 2
Chris@31 3 /*
Chris@31 4 Silvet
Chris@31 5
Chris@31 6 A Vamp plugin for note transcription.
Chris@31 7 Centre for Digital Music, Queen Mary University of London.
Chris@31 8
Chris@31 9 This program is free software; you can redistribute it and/or
Chris@31 10 modify it under the terms of the GNU General Public License as
Chris@31 11 published by the Free Software Foundation; either version 2 of the
Chris@31 12 License, or (at your option) any later version. See the file
Chris@31 13 COPYING included with this distribution for more information.
Chris@31 14 */
Chris@31 15
Chris@31 16 #include "Silvet.h"
Chris@34 17 #include "EM.h"
Chris@31 18
Chris@152 19 #include <cq/CQSpectrogram.h>
Chris@31 20
Chris@152 21 #include "MedianFilter.h"
Chris@152 22 #include "constant-q-cpp/src/dsp/Resampler.h"
Chris@246 23 #include "flattendynamics-ladspa.h"
Chris@298 24 #include "LiveInstruments.h"
Chris@31 25
Chris@31 26 #include <vector>
Chris@31 27
Chris@32 28 #include <cstdio>
Chris@32 29
Chris@31 30 using std::vector;
Chris@48 31 using std::cout;
Chris@31 32 using std::cerr;
Chris@31 33 using std::endl;
Chris@40 34 using Vamp::RealTime;
Chris@31 35
Chris@31 36 static int processingSampleRate = 44100;
Chris@298 37
Chris@298 38 static int binsPerSemitoneLive = 1;
Chris@298 39 static int binsPerSemitoneNormal = 5;
Chris@170 40
Chris@272 41 static int minInputSampleRate = 100;
Chris@272 42 static int maxInputSampleRate = 192000;
Chris@272 43
Chris@31 44 Silvet::Silvet(float inputSampleRate) :
Chris@31 45 Plugin(inputSampleRate),
Chris@161 46 m_instruments(InstrumentPack::listInstrumentPacks()),
Chris@298 47 m_liveInstruments(LiveAdapter::adaptAll(m_instruments)),
Chris@31 48 m_resampler(0),
Chris@246 49 m_flattener(0),
Chris@110 50 m_cq(0),
Chris@297 51 m_mode(HighQualityMode),
Chris@166 52 m_fineTuning(false),
Chris@178 53 m_instrument(0),
Chris@178 54 m_colsPerSec(50)
Chris@31 55 {
Chris@31 56 }
Chris@31 57
Chris@31 58 Silvet::~Silvet()
Chris@31 59 {
Chris@31 60 delete m_resampler;
Chris@246 61 delete m_flattener;
Chris@31 62 delete m_cq;
Chris@41 63 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
Chris@41 64 delete m_postFilter[i];
Chris@41 65 }
Chris@31 66 }
Chris@31 67
Chris@31 68 string
Chris@31 69 Silvet::getIdentifier() const
Chris@31 70 {
Chris@31 71 return "silvet";
Chris@31 72 }
Chris@31 73
Chris@31 74 string
Chris@31 75 Silvet::getName() const
Chris@31 76 {
Chris@31 77 return "Silvet Note Transcription";
Chris@31 78 }
Chris@31 79
Chris@31 80 string
Chris@31 81 Silvet::getDescription() const
Chris@31 82 {
Chris@191 83 return "Estimate the note onsets, pitches, and durations that make up a music recording.";
Chris@31 84 }
Chris@31 85
Chris@31 86 string
Chris@31 87 Silvet::getMaker() const
Chris@31 88 {
Chris@191 89 return "Queen Mary, University of London";
Chris@31 90 }
Chris@31 91
Chris@31 92 int
Chris@31 93 Silvet::getPluginVersion() const
Chris@31 94 {
Chris@304 95 return 3;
Chris@31 96 }
Chris@31 97
Chris@31 98 string
Chris@31 99 Silvet::getCopyright() const
Chris@31 100 {
Chris@191 101 return "Method by Emmanouil Benetos and Simon Dixon; plugin by Chris Cannam and Emmanouil Benetos. GPL licence.";
Chris@31 102 }
Chris@31 103
Chris@31 104 Silvet::InputDomain
Chris@31 105 Silvet::getInputDomain() const
Chris@31 106 {
Chris@31 107 return TimeDomain;
Chris@31 108 }
Chris@31 109
Chris@31 110 size_t
Chris@31 111 Silvet::getPreferredBlockSize() const
Chris@31 112 {
Chris@31 113 return 0;
Chris@31 114 }
Chris@31 115
Chris@31 116 size_t
Chris@31 117 Silvet::getPreferredStepSize() const
Chris@31 118 {
Chris@31 119 return 0;
Chris@31 120 }
Chris@31 121
Chris@31 122 size_t
Chris@31 123 Silvet::getMinChannelCount() const
Chris@31 124 {
Chris@31 125 return 1;
Chris@31 126 }
Chris@31 127
Chris@31 128 size_t
Chris@31 129 Silvet::getMaxChannelCount() const
Chris@31 130 {
Chris@31 131 return 1;
Chris@31 132 }
Chris@31 133
Chris@31 134 Silvet::ParameterList
Chris@31 135 Silvet::getParameterDescriptors() const
Chris@31 136 {
Chris@31 137 ParameterList list;
Chris@110 138
Chris@110 139 ParameterDescriptor desc;
Chris@110 140 desc.identifier = "mode";
Chris@110 141 desc.name = "Processing mode";
Chris@110 142 desc.unit = "";
Chris@297 143 desc.description = "Sets the tradeoff of processing speed against transcription quality. Draft mode is tuned in favour of overall speed; Live mode is tuned in favour of lower latency; while Intensive mode (the default) will almost always produce the best results.";
Chris@110 144 desc.minValue = 0;
Chris@297 145 desc.maxValue = 2;
Chris@113 146 desc.defaultValue = 1;
Chris@110 147 desc.isQuantized = true;
Chris@110 148 desc.quantizeStep = 1;
Chris@166 149 desc.valueNames.push_back("Draft (faster)");
Chris@165 150 desc.valueNames.push_back("Intensive (higher quality)");
Chris@297 151 desc.valueNames.push_back("Live (lower latency)");
Chris@161 152 list.push_back(desc);
Chris@161 153
Chris@176 154 desc.identifier = "instrument";
Chris@176 155 desc.name = "Instrument";
Chris@161 156 desc.unit = "";
Chris@271 157 desc.description = "The instrument or instruments known to be present in the recording. This affects the set of instrument templates used, as well as the expected level of polyphony in the output. Using a more limited set of instruments than the default will also make the plugin run faster.\nNote that this plugin cannot isolate instruments: you can't use this setting to request notes from only one instrument in a recording with several. Instead, use this as a hint to the plugin about which instruments are actually present.";
Chris@161 158 desc.minValue = 0;
Chris@162 159 desc.maxValue = m_instruments.size()-1;
Chris@162 160 desc.defaultValue = 0;
Chris@161 161 desc.isQuantized = true;
Chris@161 162 desc.quantizeStep = 1;
Chris@161 163 desc.valueNames.clear();
Chris@162 164 for (int i = 0; i < int(m_instruments.size()); ++i) {
Chris@162 165 desc.valueNames.push_back(m_instruments[i].name);
Chris@162 166 }
Chris@166 167 list.push_back(desc);
Chris@161 168
Chris@166 169 desc.identifier = "finetune";
Chris@166 170 desc.name = "Return fine pitch estimates";
Chris@166 171 desc.unit = "";
Chris@271 172 desc.description = "Return pitch estimates at finer than semitone resolution. This works only in Intensive mode. Notes that appear to drift in pitch will be split up into shorter notes with individually finer pitches.";
Chris@166 173 desc.minValue = 0;
Chris@166 174 desc.maxValue = 1;
Chris@166 175 desc.defaultValue = 0;
Chris@166 176 desc.isQuantized = true;
Chris@166 177 desc.quantizeStep = 1;
Chris@166 178 desc.valueNames.clear();
Chris@110 179 list.push_back(desc);
Chris@110 180
Chris@31 181 return list;
Chris@31 182 }
Chris@31 183
Chris@31 184 float
Chris@31 185 Silvet::getParameter(string identifier) const
Chris@31 186 {
Chris@110 187 if (identifier == "mode") {
Chris@297 188 return (float)(int)m_mode;
Chris@166 189 } else if (identifier == "finetune") {
Chris@166 190 return m_fineTuning ? 1.f : 0.f;
Chris@176 191 } else if (identifier == "instrument") {
Chris@162 192 return m_instrument;
Chris@110 193 }
Chris@31 194 return 0;
Chris@31 195 }
Chris@31 196
Chris@31 197 void
Chris@31 198 Silvet::setParameter(string identifier, float value)
Chris@31 199 {
Chris@110 200 if (identifier == "mode") {
Chris@297 201 m_mode = (ProcessingMode)(int)(value + 0.5);
Chris@166 202 } else if (identifier == "finetune") {
Chris@166 203 m_fineTuning = (value > 0.5);
Chris@176 204 } else if (identifier == "instrument") {
Chris@162 205 m_instrument = lrintf(value);
Chris@110 206 }
Chris@31 207 }
Chris@31 208
Chris@31 209 Silvet::ProgramList
Chris@31 210 Silvet::getPrograms() const
Chris@31 211 {
Chris@31 212 ProgramList list;
Chris@31 213 return list;
Chris@31 214 }
Chris@31 215
Chris@31 216 string
Chris@31 217 Silvet::getCurrentProgram() const
Chris@31 218 {
Chris@31 219 return "";
Chris@31 220 }
Chris@31 221
Chris@31 222 void
Chris@31 223 Silvet::selectProgram(string name)
Chris@31 224 {
Chris@31 225 }
Chris@31 226
Chris@31 227 Silvet::OutputList
Chris@31 228 Silvet::getOutputDescriptors() const
Chris@31 229 {
Chris@31 230 OutputList list;
Chris@31 231
Chris@31 232 OutputDescriptor d;
Chris@51 233 d.identifier = "notes";
Chris@51 234 d.name = "Note transcription";
Chris@329 235 d.description = "Overall note transcription. Each note has time, duration, estimated fundamental frequency, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture.";
Chris@41 236 d.unit = "Hz";
Chris@31 237 d.hasFixedBinCount = true;
Chris@31 238 d.binCount = 2;
Chris@41 239 d.binNames.push_back("Frequency");
Chris@31 240 d.binNames.push_back("Velocity");
Chris@31 241 d.hasKnownExtents = false;
Chris@31 242 d.isQuantized = false;
Chris@31 243 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@246 244 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
Chris@31 245 d.hasDuration = true;
Chris@32 246 m_notesOutputNo = list.size();
Chris@32 247 list.push_back(d);
Chris@32 248
Chris@178 249 d.identifier = "timefreq";
Chris@178 250 d.name = "Time-frequency distribution";
Chris@271 251 d.description = "Filtered constant-Q time-frequency distribution as used as input to the expectation-maximisation algorithm.";
Chris@178 252 d.unit = "";
Chris@178 253 d.hasFixedBinCount = true;
Chris@298 254 d.binCount = getPack(0).templateHeight;
Chris@178 255 d.binNames.clear();
Chris@178 256 if (m_cq) {
Chris@294 257 char name[50];
Chris@298 258 for (int i = 0; i < getPack(0).templateHeight; ++i) {
Chris@178 259 // We have a 600-bin (10 oct 60-bin CQ) of which the
Chris@178 260 // lowest-frequency 55 bins have been dropped, for a
Chris@178 261 // 545-bin template. The native CQ bins go high->low
Chris@178 262 // frequency though, so these are still the first 545 bins
Chris@178 263 // as reported by getBinFrequency, though in reverse order
Chris@178 264 float freq = m_cq->getBinFrequency
Chris@298 265 (getPack(0).templateHeight - i - 1);
Chris@178 266 sprintf(name, "%.1f Hz", freq);
Chris@178 267 d.binNames.push_back(name);
Chris@178 268 }
Chris@178 269 }
Chris@178 270 d.hasKnownExtents = false;
Chris@178 271 d.isQuantized = false;
Chris@178 272 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@178 273 d.sampleRate = m_colsPerSec;
Chris@178 274 d.hasDuration = false;
Chris@178 275 m_fcqOutputNo = list.size();
Chris@178 276 list.push_back(d);
Chris@178 277
Chris@294 278 d.identifier = "pitchactivation";
Chris@294 279 d.name = "Pitch activation distribution";
Chris@294 280 d.description = "Pitch activation distribution resulting from expectation-maximisation algorithm, prior to note extraction.";
Chris@294 281 d.unit = "";
Chris@294 282 d.hasFixedBinCount = true;
Chris@298 283 d.binCount = getPack(0).templateNoteCount;
Chris@294 284 d.binNames.clear();
Chris@294 285 if (m_cq) {
Chris@298 286 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
Chris@294 287 d.binNames.push_back(noteName(i, 0, 1));
Chris@294 288 }
Chris@294 289 }
Chris@294 290 d.hasKnownExtents = false;
Chris@294 291 d.isQuantized = false;
Chris@294 292 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@294 293 d.sampleRate = m_colsPerSec;
Chris@294 294 d.hasDuration = false;
Chris@294 295 m_pitchOutputNo = list.size();
Chris@294 296 list.push_back(d);
Chris@294 297
Chris@302 298 d.identifier = "templates";
Chris@302 299 d.name = "Templates";
Chris@302 300 d.description = "Constant-Q spectral templates for the selected instrument pack.";
Chris@302 301 d.unit = "";
Chris@302 302 d.hasFixedBinCount = true;
Chris@302 303 d.binCount = getPack(0).templateHeight;
Chris@302 304 d.binNames.clear();
Chris@302 305 if (m_cq) {
Chris@302 306 char name[50];
Chris@302 307 for (int i = 0; i < getPack(0).templateHeight; ++i) {
Chris@302 308 // We have a 600-bin (10 oct 60-bin CQ) of which the
Chris@302 309 // lowest-frequency 55 bins have been dropped, for a
Chris@302 310 // 545-bin template. The native CQ bins go high->low
Chris@302 311 // frequency though, so these are still the first 545 bins
Chris@302 312 // as reported by getBinFrequency, though in reverse order
Chris@302 313 float freq = m_cq->getBinFrequency
Chris@302 314 (getPack(0).templateHeight - i - 1);
Chris@302 315 sprintf(name, "%.1f Hz", freq);
Chris@302 316 d.binNames.push_back(name);
Chris@302 317 }
Chris@302 318 }
Chris@302 319 d.hasKnownExtents = false;
Chris@302 320 d.isQuantized = false;
Chris@302 321 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@302 322 d.sampleRate = m_colsPerSec;
Chris@302 323 d.hasDuration = false;
Chris@302 324 m_templateOutputNo = list.size();
Chris@302 325 list.push_back(d);
Chris@302 326
Chris@31 327 return list;
Chris@31 328 }
Chris@31 329
Chris@38 330 std::string
Chris@175 331 Silvet::noteName(int note, int shift, int shiftCount) const
Chris@38 332 {
Chris@38 333 static const char *names[] = {
Chris@38 334 "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"
Chris@38 335 };
Chris@38 336
Chris@175 337 const char *n = names[note % 12];
Chris@38 338
Chris@175 339 int oct = (note + 9) / 12;
Chris@38 340
Chris@175 341 char buf[30];
Chris@175 342
Chris@175 343 float pshift = 0.f;
Chris@175 344 if (shiftCount > 1) {
Chris@175 345 // see noteFrequency below
Chris@175 346 pshift =
Chris@175 347 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
Chris@175 348 }
Chris@175 349
Chris@175 350 if (pshift > 0.f) {
Chris@175 351 sprintf(buf, "%s%d+%dc", n, oct, int(round(pshift * 100)));
Chris@175 352 } else if (pshift < 0.f) {
Chris@175 353 sprintf(buf, "%s%d-%dc", n, oct, int(round((-pshift) * 100)));
Chris@175 354 } else {
Chris@175 355 sprintf(buf, "%s%d", n, oct);
Chris@175 356 }
Chris@38 357
Chris@38 358 return buf;
Chris@38 359 }
Chris@38 360
Chris@41 361 float
Chris@168 362 Silvet::noteFrequency(int note, int shift, int shiftCount) const
Chris@41 363 {
Chris@169 364 // Convert shift number to a pitch shift. The given shift number
Chris@169 365 // is an offset into the template array, which starts with some
Chris@169 366 // zeros, followed by the template, then some trailing zeros.
Chris@169 367 //
Chris@169 368 // Example: if we have templateMaxShift == 2 and thus shiftCount
Chris@169 369 // == 5, then the number will be in the range 0-4 and the template
Chris@169 370 // will have 2 zeros at either end. Thus number 2 represents the
Chris@169 371 // template "as recorded", for a pitch shift of 0; smaller indices
Chris@169 372 // represent moving the template *up* in pitch (by introducing
Chris@169 373 // zeros at the start, which is the low-frequency end), for a
Chris@169 374 // positive pitch shift; and higher values represent moving it
Chris@169 375 // down in pitch, for a negative pitch shift.
Chris@169 376
Chris@175 377 float pshift = 0.f;
Chris@175 378 if (shiftCount > 1) {
Chris@175 379 pshift =
Chris@175 380 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
Chris@175 381 }
Chris@169 382
Chris@301 383 float freq = float(27.5 * pow(2.0, (note + pshift) / 12.0));
Chris@301 384
Chris@303 385 // cerr << "note = " << note << ", shift = " << shift << ", shiftCount = "
Chris@303 386 // << shiftCount << ", obtained freq = " << freq << endl;
Chris@301 387
Chris@301 388 return freq;
Chris@41 389 }
Chris@41 390
Chris@31 391 bool
Chris@31 392 Silvet::initialise(size_t channels, size_t stepSize, size_t blockSize)
Chris@31 393 {
Chris@272 394 if (m_inputSampleRate < minInputSampleRate ||
Chris@272 395 m_inputSampleRate > maxInputSampleRate) {
Chris@272 396 cerr << "Silvet::initialise: Unsupported input sample rate "
Chris@272 397 << m_inputSampleRate << " (supported min " << minInputSampleRate
Chris@272 398 << ", max " << maxInputSampleRate << ")" << endl;
Chris@272 399 return false;
Chris@272 400 }
Chris@272 401
Chris@31 402 if (channels < getMinChannelCount() ||
Chris@272 403 channels > getMaxChannelCount()) {
Chris@272 404 cerr << "Silvet::initialise: Unsupported channel count " << channels
Chris@272 405 << " (supported min " << getMinChannelCount() << ", max "
Chris@272 406 << getMaxChannelCount() << ")" << endl;
Chris@272 407 return false;
Chris@272 408 }
Chris@31 409
Chris@31 410 if (stepSize != blockSize) {
Chris@31 411 cerr << "Silvet::initialise: Step size must be the same as block size ("
Chris@31 412 << stepSize << " != " << blockSize << ")" << endl;
Chris@31 413 return false;
Chris@31 414 }
Chris@31 415
Chris@31 416 m_blockSize = blockSize;
Chris@31 417
Chris@31 418 reset();
Chris@31 419
Chris@31 420 return true;
Chris@31 421 }
Chris@31 422
Chris@31 423 void
Chris@31 424 Silvet::reset()
Chris@31 425 {
Chris@31 426 delete m_resampler;
Chris@246 427 delete m_flattener;
Chris@31 428 delete m_cq;
Chris@31 429
Chris@31 430 if (m_inputSampleRate != processingSampleRate) {
Chris@31 431 m_resampler = new Resampler(m_inputSampleRate, processingSampleRate);
Chris@31 432 } else {
Chris@31 433 m_resampler = 0;
Chris@31 434 }
Chris@31 435
Chris@246 436 m_flattener = new FlattenDynamics(m_inputSampleRate); // before resampling
Chris@246 437 m_flattener->reset();
Chris@246 438
Chris@301 439 // this happens to be processingSampleRate / 3, and is the top
Chris@301 440 // freq used for the EM templates:
Chris@301 441 double maxFreq = 14700;
Chris@301 442
Chris@301 443 if (m_mode == LiveMode) {
Chris@301 444 // We only have 12 bpo rather than 60, so we need the top bin
Chris@301 445 // to be the middle one of the top 5, i.e. 2/5 of a semitone
Chris@301 446 // lower than 14700
Chris@301 447 maxFreq *= powf(2.0, -1.0 / 30.0);
Chris@301 448 }
Chris@301 449
Chris@173 450 double minFreq = 27.5;
Chris@173 451
Chris@297 452 if (m_mode != HighQualityMode) {
Chris@173 453 // We don't actually return any notes from the bottom octave,
Chris@173 454 // so we can just pad with zeros
Chris@173 455 minFreq *= 2;
Chris@173 456 }
Chris@173 457
Chris@298 458 int bpo = 12 *
Chris@298 459 (m_mode == LiveMode ? binsPerSemitoneLive : binsPerSemitoneNormal);
Chris@301 460
Chris@154 461 CQParameters params(processingSampleRate,
Chris@173 462 minFreq,
Chris@303 463 maxFreq,
Chris@298 464 bpo);
Chris@154 465
Chris@155 466 params.q = 0.95; // MIREX code uses 0.8, but it seems 0.9 or lower
Chris@155 467 // drops the FFT size to 512 from 1024 and alters
Chris@155 468 // some other processing parameters, making
Chris@155 469 // everything much, much slower. Could be a flaw
Chris@155 470 // in the CQ parameter calculations, must check
Chris@154 471 params.atomHopFactor = 0.3;
Chris@154 472 params.threshold = 0.0005;
Chris@172 473 params.window = CQParameters::Hann;
Chris@154 474
Chris@154 475 m_cq = new CQSpectrogram(params, CQSpectrogram::InterpolateLinear);
Chris@31 476
Chris@303 477 // cerr << "CQ bins = " << m_cq->getTotalBins() << endl;
Chris@303 478 // cerr << "CQ min freq = " << m_cq->getMinFrequency() << " (and for confirmation, freq of bin 0 = " << m_cq->getBinFrequency(0) << ")" << endl;
Chris@297 479
Chris@297 480 m_colsPerSec = (m_mode == DraftMode ? 25 : 50);
Chris@165 481
Chris@41 482 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
Chris@41 483 delete m_postFilter[i];
Chris@41 484 }
Chris@41 485 m_postFilter.clear();
Chris@303 486 int postFilterLength = 3;
Chris@298 487 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
Chris@303 488 m_postFilter.push_back(new MedianFilter<double>(postFilterLength));
Chris@41 489 }
Chris@41 490 m_pianoRoll.clear();
Chris@246 491 m_inputGains.clear();
Chris@32 492 m_columnCount = 0;
Chris@272 493 m_resampledCount = 0;
Chris@40 494 m_startTime = RealTime::zeroTime;
Chris@31 495 }
Chris@31 496
Chris@31 497 Silvet::FeatureSet
Chris@31 498 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
Chris@31 499 {
Chris@302 500 FeatureSet fs;
Chris@302 501
Chris@40 502 if (m_columnCount == 0) {
Chris@40 503 m_startTime = timestamp;
Chris@302 504 insertTemplateFeatures(fs);
Chris@40 505 }
Chris@246 506
Chris@246 507 vector<float> flattened(m_blockSize);
Chris@246 508 float gain = 1.f;
Chris@246 509 m_flattener->connectInputPort
Chris@246 510 (FlattenDynamics::AudioInputPort, inputBuffers[0]);
Chris@246 511 m_flattener->connectOutputPort
Chris@246 512 (FlattenDynamics::AudioOutputPort, &flattened[0]);
Chris@246 513 m_flattener->connectOutputPort
Chris@246 514 (FlattenDynamics::GainOutputPort, &gain);
Chris@246 515 m_flattener->process(m_blockSize);
Chris@246 516
Chris@252 517 m_inputGains[timestamp] = gain;
Chris@40 518
Chris@31 519 vector<double> data;
Chris@40 520 for (int i = 0; i < m_blockSize; ++i) {
Chris@246 521 double d = flattened[i];
Chris@235 522 data.push_back(d);
Chris@40 523 }
Chris@31 524
Chris@31 525 if (m_resampler) {
Chris@272 526
Chris@31 527 data = m_resampler->process(data.data(), data.size());
Chris@272 528
Chris@272 529 int hadCount = m_resampledCount;
Chris@272 530 m_resampledCount += data.size();
Chris@272 531
Chris@272 532 int resamplerLatency = m_resampler->getLatency();
Chris@272 533
Chris@272 534 if (hadCount < resamplerLatency) {
Chris@272 535 int stillToDrop = resamplerLatency - hadCount;
Chris@272 536 if (stillToDrop >= int(data.size())) {
Chris@302 537 return fs;
Chris@272 538 } else {
Chris@272 539 data = vector<double>(data.begin() + stillToDrop, data.end());
Chris@272 540 }
Chris@272 541 }
Chris@31 542 }
Chris@272 543
Chris@32 544 Grid cqout = m_cq->process(data);
Chris@302 545 transcribe(cqout, fs);
Chris@51 546 return fs;
Chris@34 547 }
Chris@34 548
Chris@34 549 Silvet::FeatureSet
Chris@34 550 Silvet::getRemainingFeatures()
Chris@34 551 {
Chris@145 552 Grid cqout = m_cq->getRemainingOutput();
Chris@302 553 FeatureSet fs;
Chris@302 554 if (m_columnCount == 0) {
Chris@302 555 // process() was never called, but we still want these
Chris@302 556 insertTemplateFeatures(fs);
Chris@302 557 } else {
Chris@302 558 transcribe(cqout, fs);
Chris@302 559 }
Chris@51 560 return fs;
Chris@34 561 }
Chris@34 562
Chris@302 563 void
Chris@302 564 Silvet::insertTemplateFeatures(FeatureSet &fs)
Chris@302 565 {
Chris@302 566 const InstrumentPack &pack = getPack(m_instrument);
Chris@302 567 for (int i = 0; i < int(pack.templates.size()) * pack.templateNoteCount; ++i) {
Chris@302 568 RealTime timestamp = RealTime::fromSeconds(double(i) / m_colsPerSec);
Chris@302 569 Feature f;
Chris@302 570 char buffer[50];
Chris@302 571 sprintf(buffer, "Note %d", i + 1);
Chris@302 572 f.label = buffer;
Chris@302 573 f.hasTimestamp = true;
Chris@302 574 f.timestamp = timestamp;
Chris@302 575 f.values = pack.templates[i / pack.templateNoteCount]
Chris@302 576 .data[i % pack.templateNoteCount];
Chris@302 577 fs[m_templateOutputNo].push_back(f);
Chris@302 578 }
Chris@302 579 }
Chris@302 580
Chris@302 581 void
Chris@302 582 Silvet::transcribe(const Grid &cqout, Silvet::FeatureSet &fs)
Chris@34 583 {
Chris@32 584 Grid filtered = preProcess(cqout);
Chris@31 585
Chris@302 586 if (filtered.empty()) return;
Chris@170 587
Chris@298 588 const InstrumentPack &pack(getPack(m_instrument));
Chris@104 589
Chris@178 590 for (int i = 0; i < (int)filtered.size(); ++i) {
Chris@178 591 Feature f;
Chris@178 592 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@178 593 f.values.push_back(float(filtered[i][j]));
Chris@178 594 }
Chris@178 595 fs[m_fcqOutputNo].push_back(f);
Chris@178 596 }
Chris@178 597
Chris@34 598 int width = filtered.size();
Chris@34 599
Chris@297 600 int iterations = (m_mode == HighQualityMode ? 20 : 10);
Chris@34 601
Chris@176 602 Grid localPitches(width, vector<double>(pack.templateNoteCount, 0.0));
Chris@170 603
Chris@297 604 bool wantShifts = (m_mode == HighQualityMode) && m_fineTuning;
Chris@170 605 int shiftCount = 1;
Chris@170 606 if (wantShifts) {
Chris@170 607 shiftCount = pack.templateMaxShift * 2 + 1;
Chris@170 608 }
Chris@170 609
Chris@170 610 vector<vector<int> > localBestShifts;
Chris@170 611 if (wantShifts) {
Chris@170 612 localBestShifts =
Chris@176 613 vector<vector<int> >(width, vector<int>(pack.templateNoteCount, 0));
Chris@170 614 }
Chris@170 615
Chris@305 616 double columnThreshold = 1e-5;
Chris@307 617
Chris@307 618 if (m_mode == LiveMode) {
Chris@307 619 columnThreshold /= 20;
Chris@307 620 }
Chris@305 621
Chris@123 622 #pragma omp parallel for
Chris@123 623 for (int i = 0; i < width; ++i) {
Chris@104 624
Chris@170 625 double sum = 0.0;
Chris@176 626 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@170 627 sum += filtered.at(i).at(j);
Chris@170 628 }
Chris@305 629 if (sum < columnThreshold) continue;
Chris@170 630
Chris@297 631 EM em(&pack, m_mode == HighQualityMode);
Chris@170 632
Chris@183 633 em.setPitchSparsity(pack.pitchSparsity);
Chris@213 634 em.setSourceSparsity(pack.sourceSparsity);
Chris@183 635
Chris@170 636 for (int j = 0; j < iterations; ++j) {
Chris@170 637 em.iterate(filtered.at(i).data());
Chris@37 638 }
Chris@37 639
Chris@170 640 const float *pitchDist = em.getPitchDistribution();
Chris@170 641 const float *const *shiftDist = em.getShifts();
Chris@37 642
Chris@176 643 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@104 644
Chris@170 645 localPitches[i][j] = pitchDist[j] * sum;
Chris@170 646
Chris@170 647 int bestShift = 0;
Chris@179 648 float bestShiftValue = 0.0;
Chris@170 649 if (wantShifts) {
Chris@170 650 for (int k = 0; k < shiftCount; ++k) {
Chris@179 651 float value = shiftDist[k][j];
Chris@179 652 if (k == 0 || value > bestShiftValue) {
Chris@179 653 bestShiftValue = value;
Chris@170 654 bestShift = k;
Chris@170 655 }
Chris@170 656 }
Chris@170 657 localBestShifts[i][j] = bestShift;
Chris@170 658 }
Chris@123 659 }
Chris@123 660 }
Chris@166 661
Chris@166 662 for (int i = 0; i < width; ++i) {
Chris@37 663
Chris@294 664 vector<double> filtered = postProcess
Chris@294 665 (localPitches[i], localBestShifts[i], wantShifts);
Chris@294 666
Chris@294 667 Feature f;
Chris@294 668 for (int j = 0; j < (int)filtered.size(); ++j) {
Chris@294 669 float v(filtered[j]);
Chris@294 670 if (v < pack.levelThreshold) v = 0.f;
Chris@294 671 f.values.push_back(v);
Chris@294 672 }
Chris@294 673 fs[m_pitchOutputNo].push_back(f);
Chris@166 674
Chris@168 675 FeatureList noteFeatures = noteTrack(shiftCount);
Chris@38 676
Chris@123 677 for (FeatureList::const_iterator fi = noteFeatures.begin();
Chris@123 678 fi != noteFeatures.end(); ++fi) {
Chris@123 679 fs[m_notesOutputNo].push_back(*fi);
Chris@40 680 }
Chris@34 681 }
Chris@31 682 }
Chris@31 683
Chris@32 684 Silvet::Grid
Chris@32 685 Silvet::preProcess(const Grid &in)
Chris@32 686 {
Chris@32 687 int width = in.size();
Chris@32 688
Chris@165 689 int spacing = processingSampleRate / m_colsPerSec;
Chris@32 690
Chris@165 691 // need to be careful that col spacing is an integer number of samples!
Chris@165 692 assert(spacing * m_colsPerSec == processingSampleRate);
Chris@32 693
Chris@32 694 Grid out;
Chris@32 695
Chris@58 696 // We count the CQ latency in terms of processing hops, but
Chris@58 697 // actually it probably isn't an exact number of hops so this
Chris@58 698 // isn't quite accurate. But the small constant offset is
Chris@165 699 // practically irrelevant compared to the jitter from the frame
Chris@165 700 // size we reduce to in a moment
Chris@33 701 int latentColumns = m_cq->getLatency() / m_cq->getColumnHop();
Chris@33 702
Chris@298 703 const InstrumentPack &pack(getPack(m_instrument));
Chris@176 704
Chris@32 705 for (int i = 0; i < width; ++i) {
Chris@32 706
Chris@33 707 if (m_columnCount < latentColumns) {
Chris@33 708 ++m_columnCount;
Chris@33 709 continue;
Chris@33 710 }
Chris@33 711
Chris@32 712 int prevSampleNo = (m_columnCount - 1) * m_cq->getColumnHop();
Chris@32 713 int sampleNo = m_columnCount * m_cq->getColumnHop();
Chris@32 714
Chris@32 715 bool select = (sampleNo / spacing != prevSampleNo / spacing);
Chris@32 716
Chris@32 717 if (select) {
Chris@32 718 vector<double> inCol = in[i];
Chris@176 719 vector<double> outCol(pack.templateHeight);
Chris@32 720
Chris@178 721 // In HQ mode, the CQ returns 600 bins and we ignore the
Chris@298 722 // lowest 55 of them (assuming binsPerSemitone == 5).
Chris@178 723 //
Chris@297 724 // In draft and live mode the CQ is an octave shorter,
Chris@300 725 // returning 540 bins or equivalent, so we instead pad
Chris@300 726 // them with an additional 5 or equivalent zeros.
Chris@178 727 //
Chris@178 728 // We also need to reverse the column as we go, since the
Chris@178 729 // raw CQ has the high frequencies first and we need it
Chris@178 730 // the other way around.
Chris@32 731
Chris@298 732 int bps = (m_mode == LiveMode ?
Chris@298 733 binsPerSemitoneLive : binsPerSemitoneNormal);
Chris@298 734
Chris@297 735 if (m_mode == HighQualityMode) {
Chris@178 736 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@298 737 int ix = inCol.size() - j - (11 * bps);
Chris@178 738 outCol[j] = inCol[ix];
Chris@178 739 }
Chris@178 740 } else {
Chris@298 741 for (int j = 0; j < bps; ++j) {
Chris@178 742 outCol[j] = 0.0;
Chris@178 743 }
Chris@298 744 for (int j = bps; j < pack.templateHeight; ++j) {
Chris@298 745 int ix = inCol.size() - j + (bps-1);
Chris@178 746 outCol[j] = inCol[ix];
Chris@178 747 }
Chris@46 748 }
Chris@32 749
Chris@46 750 vector<double> noiseLevel1 =
Chris@298 751 MedianFilter<double>::filter(8 * bps, outCol);
Chris@176 752 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@46 753 noiseLevel1[j] = std::min(outCol[j], noiseLevel1[j]);
Chris@46 754 }
Chris@32 755
Chris@46 756 vector<double> noiseLevel2 =
Chris@298 757 MedianFilter<double>::filter(8 * bps, noiseLevel1);
Chris@176 758 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@46 759 outCol[j] = std::max(outCol[j] - noiseLevel2[j], 0.0);
Chris@32 760 }
Chris@32 761
Chris@165 762 out.push_back(outCol);
Chris@32 763 }
Chris@32 764
Chris@32 765 ++m_columnCount;
Chris@32 766 }
Chris@32 767
Chris@32 768 return out;
Chris@32 769 }
Chris@32 770
Chris@294 771 vector<double>
Chris@170 772 Silvet::postProcess(const vector<double> &pitches,
Chris@170 773 const vector<int> &bestShifts,
Chris@170 774 bool wantShifts)
Chris@166 775 {
Chris@298 776 const InstrumentPack &pack(getPack(m_instrument));
Chris@176 777
Chris@41 778 vector<double> filtered;
Chris@41 779
Chris@176 780 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@170 781 m_postFilter[j]->push(pitches[j]);
Chris@41 782 filtered.push_back(m_postFilter[j]->get());
Chris@41 783 }
Chris@41 784
Chris@41 785 // Threshold for level and reduce number of candidate pitches
Chris@41 786
Chris@41 787 typedef std::multimap<double, int> ValueIndexMap;
Chris@41 788
Chris@41 789 ValueIndexMap strengths;
Chris@166 790
Chris@176 791 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@166 792 double strength = filtered[j];
Chris@183 793 if (strength < pack.levelThreshold) continue;
Chris@168 794 strengths.insert(ValueIndexMap::value_type(strength, j));
Chris@168 795 }
Chris@166 796
Chris@168 797 ValueIndexMap::const_iterator si = strengths.end();
Chris@167 798
Chris@168 799 map<int, double> active;
Chris@168 800 map<int, int> activeShifts;
Chris@168 801
Chris@183 802 while (int(active.size()) < pack.maxPolyphony && si != strengths.begin()) {
Chris@168 803
Chris@168 804 --si;
Chris@168 805
Chris@168 806 double strength = si->first;
Chris@168 807 int j = si->second;
Chris@168 808
Chris@168 809 active[j] = strength;
Chris@168 810
Chris@170 811 if (wantShifts) {
Chris@170 812 activeShifts[j] = bestShifts[j];
Chris@167 813 }
Chris@41 814 }
Chris@41 815
Chris@168 816 m_pianoRoll.push_back(active);
Chris@170 817
Chris@170 818 if (wantShifts) {
Chris@168 819 m_pianoRollShifts.push_back(activeShifts);
Chris@41 820 }
Chris@294 821
Chris@294 822 return filtered;
Chris@166 823 }
Chris@166 824
Chris@166 825 Vamp::Plugin::FeatureList
Chris@168 826 Silvet::noteTrack(int shiftCount)
Chris@166 827 {
Chris@41 828 // Minimum duration pruning, and conversion to notes. We can only
Chris@41 829 // report notes that have just ended (i.e. that are absent in the
Chris@168 830 // latest active set but present in the prior set in the piano
Chris@41 831 // roll) -- any notes that ended earlier will have been reported
Chris@41 832 // already, and if they haven't ended, we don't know their
Chris@41 833 // duration.
Chris@41 834
Chris@168 835 int width = m_pianoRoll.size() - 1;
Chris@168 836
Chris@168 837 const map<int, double> &active = m_pianoRoll[width];
Chris@41 838
Chris@165 839 double columnDuration = 1.0 / m_colsPerSec;
Chris@165 840
Chris@165 841 // only keep notes >= 100ms or thereabouts
Chris@165 842 int durationThreshold = floor(0.1 / columnDuration); // columns
Chris@165 843 if (durationThreshold < 1) durationThreshold = 1;
Chris@41 844
Chris@41 845 FeatureList noteFeatures;
Chris@41 846
Chris@41 847 if (width < durationThreshold + 1) {
Chris@41 848 return noteFeatures;
Chris@41 849 }
Chris@41 850
Chris@150 851 //!!! try: repeated note detection? (look for change in first derivative of the pitch matrix)
Chris@150 852
Chris@55 853 for (map<int, double>::const_iterator ni = m_pianoRoll[width-1].begin();
Chris@41 854 ni != m_pianoRoll[width-1].end(); ++ni) {
Chris@41 855
Chris@55 856 int note = ni->first;
Chris@41 857
Chris@41 858 if (active.find(note) != active.end()) {
Chris@41 859 // the note is still playing
Chris@41 860 continue;
Chris@41 861 }
Chris@41 862
Chris@41 863 // the note was playing but just ended
Chris@41 864 int end = width;
Chris@41 865 int start = end-1;
Chris@41 866
Chris@41 867 while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) {
Chris@41 868 --start;
Chris@41 869 }
Chris@41 870 ++start;
Chris@41 871
Chris@169 872 if ((end - start) < durationThreshold) {
Chris@41 873 continue;
Chris@41 874 }
Chris@41 875
Chris@169 876 emitNote(start, end, note, shiftCount, noteFeatures);
Chris@41 877 }
Chris@41 878
Chris@62 879 // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl;
Chris@41 880
Chris@41 881 return noteFeatures;
Chris@41 882 }
Chris@41 883
Chris@169 884 void
Chris@169 885 Silvet::emitNote(int start, int end, int note, int shiftCount,
Chris@169 886 FeatureList &noteFeatures)
Chris@169 887 {
Chris@169 888 int partStart = start;
Chris@169 889 int partShift = 0;
Chris@169 890 int partVelocity = 0;
Chris@169 891
Chris@252 892 int partThreshold = floor(0.05 * m_colsPerSec);
Chris@169 893
Chris@169 894 for (int i = start; i != end; ++i) {
Chris@169 895
Chris@169 896 double strength = m_pianoRoll[i][note];
Chris@169 897
Chris@169 898 int shift = 0;
Chris@169 899
Chris@169 900 if (shiftCount > 1) {
Chris@169 901
Chris@169 902 shift = m_pianoRollShifts[i][note];
Chris@169 903
Chris@169 904 if (i == partStart) {
Chris@169 905 partShift = shift;
Chris@169 906 }
Chris@169 907
Chris@169 908 if (i > partStart + partThreshold && shift != partShift) {
Chris@169 909
Chris@169 910 // cerr << "i = " << i << ", partStart = " << partStart << ", shift = " << shift << ", partShift = " << partShift << endl;
Chris@169 911
Chris@169 912 // pitch has changed, emit an intermediate note
Chris@252 913 noteFeatures.push_back(makeNoteFeature(partStart,
Chris@252 914 i,
Chris@252 915 note,
Chris@252 916 partShift,
Chris@252 917 shiftCount,
Chris@252 918 partVelocity));
Chris@169 919 partStart = i;
Chris@169 920 partShift = shift;
Chris@169 921 partVelocity = 0;
Chris@169 922 }
Chris@169 923 }
Chris@169 924
Chris@303 925 int v;
Chris@303 926 if (m_mode == LiveMode) {
Chris@303 927 v = round(strength * 30);
Chris@303 928 } else {
Chris@303 929 v = round(strength * 2);
Chris@303 930 }
Chris@169 931 if (v > partVelocity) {
Chris@169 932 partVelocity = v;
Chris@169 933 }
Chris@169 934 }
Chris@169 935
Chris@169 936 if (end >= partStart + partThreshold) {
Chris@252 937 noteFeatures.push_back(makeNoteFeature(partStart,
Chris@252 938 end,
Chris@252 939 note,
Chris@252 940 partShift,
Chris@252 941 shiftCount,
Chris@252 942 partVelocity));
Chris@169 943 }
Chris@169 944 }
Chris@252 945
Chris@252 946 Silvet::Feature
Chris@252 947 Silvet::makeNoteFeature(int start,
Chris@252 948 int end,
Chris@252 949 int note,
Chris@252 950 int shift,
Chris@252 951 int shiftCount,
Chris@252 952 int velocity)
Chris@252 953 {
Chris@252 954 double columnDuration = 1.0 / m_colsPerSec;
Chris@252 955 int postFilterLatency = int(m_postFilter[0]->getSize() / 2);
Chris@252 956
Chris@252 957 Feature f;
Chris@252 958
Chris@252 959 f.hasTimestamp = true;
Chris@285 960 f.timestamp = m_startTime + RealTime::fromSeconds
Chris@252 961 (columnDuration * (start - postFilterLatency) + 0.02);
Chris@252 962
Chris@252 963 f.hasDuration = true;
Chris@252 964 f.duration = RealTime::fromSeconds
Chris@252 965 (columnDuration * (end - start));
Chris@252 966
Chris@252 967 f.values.clear();
Chris@252 968
Chris@252 969 f.values.push_back
Chris@252 970 (noteFrequency(note, shift, shiftCount));
Chris@252 971
Chris@252 972 float inputGain = getInputGainAt(f.timestamp);
Chris@252 973 // cerr << "adjusting velocity from " << velocity << " to " << round(velocity/inputGain) << endl;
Chris@252 974 velocity = round(velocity / inputGain);
Chris@252 975 if (velocity > 127) velocity = 127;
Chris@252 976 if (velocity < 1) velocity = 1;
Chris@252 977 f.values.push_back(velocity);
Chris@252 978
Chris@252 979 f.label = noteName(note, shift, shiftCount);
Chris@252 980
Chris@252 981 return f;
Chris@252 982 }
Chris@252 983
Chris@252 984 float
Chris@252 985 Silvet::getInputGainAt(RealTime t)
Chris@252 986 {
Chris@252 987 map<RealTime, float>::const_iterator i = m_inputGains.lower_bound(t);
Chris@252 988
Chris@252 989 if (i == m_inputGains.end()) {
Chris@252 990 if (i != m_inputGains.begin()) {
Chris@252 991 --i;
Chris@252 992 } else {
Chris@252 993 return 1.f; // no data
Chris@252 994 }
Chris@252 995 }
Chris@252 996
Chris@252 997 // cerr << "gain at time " << t << " = " << i->second << endl;
Chris@252 998
Chris@252 999 return i->second;
Chris@252 1000 }
Chris@252 1001