annotate src/Silvet.cpp @ 306:af19bee9e53b livemode

Merge from default branch
author Chris Cannam
date Fri, 05 Dec 2014 16:47:06 +0000
parents f5f3b50b2b9f 04a3c152e590
children 5a181a427ac8
rev   line source
Chris@31 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@31 2
Chris@31 3 /*
Chris@31 4 Silvet
Chris@31 5
Chris@31 6 A Vamp plugin for note transcription.
Chris@31 7 Centre for Digital Music, Queen Mary University of London.
Chris@31 8
Chris@31 9 This program is free software; you can redistribute it and/or
Chris@31 10 modify it under the terms of the GNU General Public License as
Chris@31 11 published by the Free Software Foundation; either version 2 of the
Chris@31 12 License, or (at your option) any later version. See the file
Chris@31 13 COPYING included with this distribution for more information.
Chris@31 14 */
Chris@31 15
Chris@31 16 #include "Silvet.h"
Chris@34 17 #include "EM.h"
Chris@31 18
Chris@152 19 #include <cq/CQSpectrogram.h>
Chris@31 20
Chris@152 21 #include "MedianFilter.h"
Chris@152 22 #include "constant-q-cpp/src/dsp/Resampler.h"
Chris@246 23 #include "flattendynamics-ladspa.h"
Chris@298 24 #include "LiveInstruments.h"
Chris@31 25
Chris@31 26 #include <vector>
Chris@31 27
Chris@32 28 #include <cstdio>
Chris@32 29
Chris@31 30 using std::vector;
Chris@48 31 using std::cout;
Chris@31 32 using std::cerr;
Chris@31 33 using std::endl;
Chris@40 34 using Vamp::RealTime;
Chris@31 35
Chris@31 36 static int processingSampleRate = 44100;
Chris@298 37
Chris@298 38 static int binsPerSemitoneLive = 1;
Chris@298 39 static int binsPerSemitoneNormal = 5;
Chris@170 40
Chris@272 41 static int minInputSampleRate = 100;
Chris@272 42 static int maxInputSampleRate = 192000;
Chris@272 43
Chris@31 44 Silvet::Silvet(float inputSampleRate) :
Chris@31 45 Plugin(inputSampleRate),
Chris@161 46 m_instruments(InstrumentPack::listInstrumentPacks()),
Chris@298 47 m_liveInstruments(LiveAdapter::adaptAll(m_instruments)),
Chris@31 48 m_resampler(0),
Chris@246 49 m_flattener(0),
Chris@110 50 m_cq(0),
Chris@297 51 m_mode(HighQualityMode),
Chris@166 52 m_fineTuning(false),
Chris@178 53 m_instrument(0),
Chris@178 54 m_colsPerSec(50)
Chris@31 55 {
Chris@31 56 }
Chris@31 57
Chris@31 58 Silvet::~Silvet()
Chris@31 59 {
Chris@31 60 delete m_resampler;
Chris@246 61 delete m_flattener;
Chris@31 62 delete m_cq;
Chris@41 63 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
Chris@41 64 delete m_postFilter[i];
Chris@41 65 }
Chris@31 66 }
Chris@31 67
Chris@31 68 string
Chris@31 69 Silvet::getIdentifier() const
Chris@31 70 {
Chris@31 71 return "silvet";
Chris@31 72 }
Chris@31 73
Chris@31 74 string
Chris@31 75 Silvet::getName() const
Chris@31 76 {
Chris@31 77 return "Silvet Note Transcription";
Chris@31 78 }
Chris@31 79
Chris@31 80 string
Chris@31 81 Silvet::getDescription() const
Chris@31 82 {
Chris@191 83 return "Estimate the note onsets, pitches, and durations that make up a music recording.";
Chris@31 84 }
Chris@31 85
Chris@31 86 string
Chris@31 87 Silvet::getMaker() const
Chris@31 88 {
Chris@191 89 return "Queen Mary, University of London";
Chris@31 90 }
Chris@31 91
Chris@31 92 int
Chris@31 93 Silvet::getPluginVersion() const
Chris@31 94 {
Chris@304 95 return 3;
Chris@31 96 }
Chris@31 97
Chris@31 98 string
Chris@31 99 Silvet::getCopyright() const
Chris@31 100 {
Chris@191 101 return "Method by Emmanouil Benetos and Simon Dixon; plugin by Chris Cannam and Emmanouil Benetos. GPL licence.";
Chris@31 102 }
Chris@31 103
Chris@31 104 Silvet::InputDomain
Chris@31 105 Silvet::getInputDomain() const
Chris@31 106 {
Chris@31 107 return TimeDomain;
Chris@31 108 }
Chris@31 109
Chris@31 110 size_t
Chris@31 111 Silvet::getPreferredBlockSize() const
Chris@31 112 {
Chris@31 113 return 0;
Chris@31 114 }
Chris@31 115
Chris@31 116 size_t
Chris@31 117 Silvet::getPreferredStepSize() const
Chris@31 118 {
Chris@31 119 return 0;
Chris@31 120 }
Chris@31 121
Chris@31 122 size_t
Chris@31 123 Silvet::getMinChannelCount() const
Chris@31 124 {
Chris@31 125 return 1;
Chris@31 126 }
Chris@31 127
Chris@31 128 size_t
Chris@31 129 Silvet::getMaxChannelCount() const
Chris@31 130 {
Chris@31 131 return 1;
Chris@31 132 }
Chris@31 133
Chris@31 134 Silvet::ParameterList
Chris@31 135 Silvet::getParameterDescriptors() const
Chris@31 136 {
Chris@31 137 ParameterList list;
Chris@110 138
Chris@110 139 ParameterDescriptor desc;
Chris@110 140 desc.identifier = "mode";
Chris@110 141 desc.name = "Processing mode";
Chris@110 142 desc.unit = "";
Chris@297 143 desc.description = "Sets the tradeoff of processing speed against transcription quality. Draft mode is tuned in favour of overall speed; Live mode is tuned in favour of lower latency; while Intensive mode (the default) will almost always produce the best results.";
Chris@110 144 desc.minValue = 0;
Chris@297 145 desc.maxValue = 2;
Chris@113 146 desc.defaultValue = 1;
Chris@110 147 desc.isQuantized = true;
Chris@110 148 desc.quantizeStep = 1;
Chris@166 149 desc.valueNames.push_back("Draft (faster)");
Chris@165 150 desc.valueNames.push_back("Intensive (higher quality)");
Chris@297 151 desc.valueNames.push_back("Live (lower latency)");
Chris@161 152 list.push_back(desc);
Chris@161 153
Chris@176 154 desc.identifier = "instrument";
Chris@176 155 desc.name = "Instrument";
Chris@161 156 desc.unit = "";
Chris@271 157 desc.description = "The instrument or instruments known to be present in the recording. This affects the set of instrument templates used, as well as the expected level of polyphony in the output. Using a more limited set of instruments than the default will also make the plugin run faster.\nNote that this plugin cannot isolate instruments: you can't use this setting to request notes from only one instrument in a recording with several. Instead, use this as a hint to the plugin about which instruments are actually present.";
Chris@161 158 desc.minValue = 0;
Chris@162 159 desc.maxValue = m_instruments.size()-1;
Chris@162 160 desc.defaultValue = 0;
Chris@161 161 desc.isQuantized = true;
Chris@161 162 desc.quantizeStep = 1;
Chris@161 163 desc.valueNames.clear();
Chris@162 164 for (int i = 0; i < int(m_instruments.size()); ++i) {
Chris@162 165 desc.valueNames.push_back(m_instruments[i].name);
Chris@162 166 }
Chris@166 167 list.push_back(desc);
Chris@161 168
Chris@166 169 desc.identifier = "finetune";
Chris@166 170 desc.name = "Return fine pitch estimates";
Chris@166 171 desc.unit = "";
Chris@271 172 desc.description = "Return pitch estimates at finer than semitone resolution. This works only in Intensive mode. Notes that appear to drift in pitch will be split up into shorter notes with individually finer pitches.";
Chris@166 173 desc.minValue = 0;
Chris@166 174 desc.maxValue = 1;
Chris@166 175 desc.defaultValue = 0;
Chris@166 176 desc.isQuantized = true;
Chris@166 177 desc.quantizeStep = 1;
Chris@166 178 desc.valueNames.clear();
Chris@110 179 list.push_back(desc);
Chris@110 180
Chris@31 181 return list;
Chris@31 182 }
Chris@31 183
Chris@31 184 float
Chris@31 185 Silvet::getParameter(string identifier) const
Chris@31 186 {
Chris@110 187 if (identifier == "mode") {
Chris@297 188 return (float)(int)m_mode;
Chris@166 189 } else if (identifier == "finetune") {
Chris@166 190 return m_fineTuning ? 1.f : 0.f;
Chris@176 191 } else if (identifier == "instrument") {
Chris@162 192 return m_instrument;
Chris@110 193 }
Chris@31 194 return 0;
Chris@31 195 }
Chris@31 196
Chris@31 197 void
Chris@31 198 Silvet::setParameter(string identifier, float value)
Chris@31 199 {
Chris@110 200 if (identifier == "mode") {
Chris@297 201 m_mode = (ProcessingMode)(int)(value + 0.5);
Chris@166 202 } else if (identifier == "finetune") {
Chris@166 203 m_fineTuning = (value > 0.5);
Chris@176 204 } else if (identifier == "instrument") {
Chris@162 205 m_instrument = lrintf(value);
Chris@110 206 }
Chris@31 207 }
Chris@31 208
Chris@31 209 Silvet::ProgramList
Chris@31 210 Silvet::getPrograms() const
Chris@31 211 {
Chris@31 212 ProgramList list;
Chris@31 213 return list;
Chris@31 214 }
Chris@31 215
Chris@31 216 string
Chris@31 217 Silvet::getCurrentProgram() const
Chris@31 218 {
Chris@31 219 return "";
Chris@31 220 }
Chris@31 221
Chris@31 222 void
Chris@31 223 Silvet::selectProgram(string name)
Chris@31 224 {
Chris@31 225 }
Chris@31 226
Chris@31 227 Silvet::OutputList
Chris@31 228 Silvet::getOutputDescriptors() const
Chris@31 229 {
Chris@31 230 OutputList list;
Chris@31 231
Chris@31 232 OutputDescriptor d;
Chris@51 233 d.identifier = "notes";
Chris@51 234 d.name = "Note transcription";
Chris@271 235 d.description = "Overall note transcription. Each note has time, duration, estimated pitch, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture.";
Chris@41 236 d.unit = "Hz";
Chris@31 237 d.hasFixedBinCount = true;
Chris@31 238 d.binCount = 2;
Chris@41 239 d.binNames.push_back("Frequency");
Chris@31 240 d.binNames.push_back("Velocity");
Chris@31 241 d.hasKnownExtents = false;
Chris@31 242 d.isQuantized = false;
Chris@31 243 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@246 244 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
Chris@31 245 d.hasDuration = true;
Chris@32 246 m_notesOutputNo = list.size();
Chris@32 247 list.push_back(d);
Chris@32 248
Chris@178 249 d.identifier = "timefreq";
Chris@178 250 d.name = "Time-frequency distribution";
Chris@271 251 d.description = "Filtered constant-Q time-frequency distribution as used as input to the expectation-maximisation algorithm.";
Chris@178 252 d.unit = "";
Chris@178 253 d.hasFixedBinCount = true;
Chris@298 254 d.binCount = getPack(0).templateHeight;
Chris@178 255 d.binNames.clear();
Chris@178 256 if (m_cq) {
Chris@294 257 char name[50];
Chris@298 258 for (int i = 0; i < getPack(0).templateHeight; ++i) {
Chris@178 259 // We have a 600-bin (10 oct 60-bin CQ) of which the
Chris@178 260 // lowest-frequency 55 bins have been dropped, for a
Chris@178 261 // 545-bin template. The native CQ bins go high->low
Chris@178 262 // frequency though, so these are still the first 545 bins
Chris@178 263 // as reported by getBinFrequency, though in reverse order
Chris@178 264 float freq = m_cq->getBinFrequency
Chris@298 265 (getPack(0).templateHeight - i - 1);
Chris@178 266 sprintf(name, "%.1f Hz", freq);
Chris@178 267 d.binNames.push_back(name);
Chris@178 268 }
Chris@178 269 }
Chris@178 270 d.hasKnownExtents = false;
Chris@178 271 d.isQuantized = false;
Chris@178 272 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@178 273 d.sampleRate = m_colsPerSec;
Chris@178 274 d.hasDuration = false;
Chris@178 275 m_fcqOutputNo = list.size();
Chris@178 276 list.push_back(d);
Chris@178 277
Chris@294 278 d.identifier = "pitchactivation";
Chris@294 279 d.name = "Pitch activation distribution";
Chris@294 280 d.description = "Pitch activation distribution resulting from expectation-maximisation algorithm, prior to note extraction.";
Chris@294 281 d.unit = "";
Chris@294 282 d.hasFixedBinCount = true;
Chris@298 283 d.binCount = getPack(0).templateNoteCount;
Chris@294 284 d.binNames.clear();
Chris@294 285 if (m_cq) {
Chris@298 286 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
Chris@294 287 d.binNames.push_back(noteName(i, 0, 1));
Chris@294 288 }
Chris@294 289 }
Chris@294 290 d.hasKnownExtents = false;
Chris@294 291 d.isQuantized = false;
Chris@294 292 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@294 293 d.sampleRate = m_colsPerSec;
Chris@294 294 d.hasDuration = false;
Chris@294 295 m_pitchOutputNo = list.size();
Chris@294 296 list.push_back(d);
Chris@294 297
Chris@302 298 d.identifier = "templates";
Chris@302 299 d.name = "Templates";
Chris@302 300 d.description = "Constant-Q spectral templates for the selected instrument pack.";
Chris@302 301 d.unit = "";
Chris@302 302 d.hasFixedBinCount = true;
Chris@302 303 d.binCount = getPack(0).templateHeight;
Chris@302 304 d.binNames.clear();
Chris@302 305 if (m_cq) {
Chris@302 306 char name[50];
Chris@302 307 for (int i = 0; i < getPack(0).templateHeight; ++i) {
Chris@302 308 // We have a 600-bin (10 oct 60-bin CQ) of which the
Chris@302 309 // lowest-frequency 55 bins have been dropped, for a
Chris@302 310 // 545-bin template. The native CQ bins go high->low
Chris@302 311 // frequency though, so these are still the first 545 bins
Chris@302 312 // as reported by getBinFrequency, though in reverse order
Chris@302 313 float freq = m_cq->getBinFrequency
Chris@302 314 (getPack(0).templateHeight - i - 1);
Chris@302 315 sprintf(name, "%.1f Hz", freq);
Chris@302 316 d.binNames.push_back(name);
Chris@302 317 }
Chris@302 318 }
Chris@302 319 d.hasKnownExtents = false;
Chris@302 320 d.isQuantized = false;
Chris@302 321 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@302 322 d.sampleRate = m_colsPerSec;
Chris@302 323 d.hasDuration = false;
Chris@302 324 m_templateOutputNo = list.size();
Chris@302 325 list.push_back(d);
Chris@302 326
Chris@31 327 return list;
Chris@31 328 }
Chris@31 329
Chris@38 330 std::string
Chris@175 331 Silvet::noteName(int note, int shift, int shiftCount) const
Chris@38 332 {
Chris@38 333 static const char *names[] = {
Chris@38 334 "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"
Chris@38 335 };
Chris@38 336
Chris@175 337 const char *n = names[note % 12];
Chris@38 338
Chris@175 339 int oct = (note + 9) / 12;
Chris@38 340
Chris@175 341 char buf[30];
Chris@175 342
Chris@175 343 float pshift = 0.f;
Chris@175 344 if (shiftCount > 1) {
Chris@175 345 // see noteFrequency below
Chris@175 346 pshift =
Chris@175 347 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
Chris@175 348 }
Chris@175 349
Chris@175 350 if (pshift > 0.f) {
Chris@175 351 sprintf(buf, "%s%d+%dc", n, oct, int(round(pshift * 100)));
Chris@175 352 } else if (pshift < 0.f) {
Chris@175 353 sprintf(buf, "%s%d-%dc", n, oct, int(round((-pshift) * 100)));
Chris@175 354 } else {
Chris@175 355 sprintf(buf, "%s%d", n, oct);
Chris@175 356 }
Chris@38 357
Chris@38 358 return buf;
Chris@38 359 }
Chris@38 360
Chris@41 361 float
Chris@168 362 Silvet::noteFrequency(int note, int shift, int shiftCount) const
Chris@41 363 {
Chris@169 364 // Convert shift number to a pitch shift. The given shift number
Chris@169 365 // is an offset into the template array, which starts with some
Chris@169 366 // zeros, followed by the template, then some trailing zeros.
Chris@169 367 //
Chris@169 368 // Example: if we have templateMaxShift == 2 and thus shiftCount
Chris@169 369 // == 5, then the number will be in the range 0-4 and the template
Chris@169 370 // will have 2 zeros at either end. Thus number 2 represents the
Chris@169 371 // template "as recorded", for a pitch shift of 0; smaller indices
Chris@169 372 // represent moving the template *up* in pitch (by introducing
Chris@169 373 // zeros at the start, which is the low-frequency end), for a
Chris@169 374 // positive pitch shift; and higher values represent moving it
Chris@169 375 // down in pitch, for a negative pitch shift.
Chris@169 376
Chris@175 377 float pshift = 0.f;
Chris@175 378 if (shiftCount > 1) {
Chris@175 379 pshift =
Chris@175 380 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
Chris@175 381 }
Chris@169 382
Chris@301 383 float freq = float(27.5 * pow(2.0, (note + pshift) / 12.0));
Chris@301 384
Chris@303 385 // cerr << "note = " << note << ", shift = " << shift << ", shiftCount = "
Chris@303 386 // << shiftCount << ", obtained freq = " << freq << endl;
Chris@301 387
Chris@301 388 return freq;
Chris@41 389 }
Chris@41 390
Chris@31 391 bool
Chris@31 392 Silvet::initialise(size_t channels, size_t stepSize, size_t blockSize)
Chris@31 393 {
Chris@272 394 if (m_inputSampleRate < minInputSampleRate ||
Chris@272 395 m_inputSampleRate > maxInputSampleRate) {
Chris@272 396 cerr << "Silvet::initialise: Unsupported input sample rate "
Chris@272 397 << m_inputSampleRate << " (supported min " << minInputSampleRate
Chris@272 398 << ", max " << maxInputSampleRate << ")" << endl;
Chris@272 399 return false;
Chris@272 400 }
Chris@272 401
Chris@31 402 if (channels < getMinChannelCount() ||
Chris@272 403 channels > getMaxChannelCount()) {
Chris@272 404 cerr << "Silvet::initialise: Unsupported channel count " << channels
Chris@272 405 << " (supported min " << getMinChannelCount() << ", max "
Chris@272 406 << getMaxChannelCount() << ")" << endl;
Chris@272 407 return false;
Chris@272 408 }
Chris@31 409
Chris@31 410 if (stepSize != blockSize) {
Chris@31 411 cerr << "Silvet::initialise: Step size must be the same as block size ("
Chris@31 412 << stepSize << " != " << blockSize << ")" << endl;
Chris@31 413 return false;
Chris@31 414 }
Chris@31 415
Chris@31 416 m_blockSize = blockSize;
Chris@31 417
Chris@31 418 reset();
Chris@31 419
Chris@31 420 return true;
Chris@31 421 }
Chris@31 422
Chris@31 423 void
Chris@31 424 Silvet::reset()
Chris@31 425 {
Chris@31 426 delete m_resampler;
Chris@246 427 delete m_flattener;
Chris@31 428 delete m_cq;
Chris@31 429
Chris@31 430 if (m_inputSampleRate != processingSampleRate) {
Chris@31 431 m_resampler = new Resampler(m_inputSampleRate, processingSampleRate);
Chris@31 432 } else {
Chris@31 433 m_resampler = 0;
Chris@31 434 }
Chris@31 435
Chris@246 436 m_flattener = new FlattenDynamics(m_inputSampleRate); // before resampling
Chris@246 437 m_flattener->reset();
Chris@246 438
Chris@301 439 // this happens to be processingSampleRate / 3, and is the top
Chris@301 440 // freq used for the EM templates:
Chris@301 441 double maxFreq = 14700;
Chris@301 442
Chris@301 443 if (m_mode == LiveMode) {
Chris@301 444 // We only have 12 bpo rather than 60, so we need the top bin
Chris@301 445 // to be the middle one of the top 5, i.e. 2/5 of a semitone
Chris@301 446 // lower than 14700
Chris@301 447 maxFreq *= powf(2.0, -1.0 / 30.0);
Chris@301 448 }
Chris@301 449
Chris@173 450 double minFreq = 27.5;
Chris@173 451
Chris@297 452 if (m_mode != HighQualityMode) {
Chris@173 453 // We don't actually return any notes from the bottom octave,
Chris@173 454 // so we can just pad with zeros
Chris@173 455 minFreq *= 2;
Chris@173 456 }
Chris@173 457
Chris@298 458 int bpo = 12 *
Chris@298 459 (m_mode == LiveMode ? binsPerSemitoneLive : binsPerSemitoneNormal);
Chris@301 460
Chris@154 461 CQParameters params(processingSampleRate,
Chris@173 462 minFreq,
Chris@303 463 maxFreq,
Chris@298 464 bpo);
Chris@154 465
Chris@155 466 params.q = 0.95; // MIREX code uses 0.8, but it seems 0.9 or lower
Chris@155 467 // drops the FFT size to 512 from 1024 and alters
Chris@155 468 // some other processing parameters, making
Chris@155 469 // everything much, much slower. Could be a flaw
Chris@155 470 // in the CQ parameter calculations, must check
Chris@154 471 params.atomHopFactor = 0.3;
Chris@154 472 params.threshold = 0.0005;
Chris@172 473 params.window = CQParameters::Hann;
Chris@154 474
Chris@154 475 m_cq = new CQSpectrogram(params, CQSpectrogram::InterpolateLinear);
Chris@31 476
Chris@303 477 // cerr << "CQ bins = " << m_cq->getTotalBins() << endl;
Chris@303 478 // cerr << "CQ min freq = " << m_cq->getMinFrequency() << " (and for confirmation, freq of bin 0 = " << m_cq->getBinFrequency(0) << ")" << endl;
Chris@297 479
Chris@297 480 m_colsPerSec = (m_mode == DraftMode ? 25 : 50);
Chris@165 481
Chris@41 482 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
Chris@41 483 delete m_postFilter[i];
Chris@41 484 }
Chris@41 485 m_postFilter.clear();
Chris@303 486 int postFilterLength = 3;
Chris@298 487 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
Chris@303 488 m_postFilter.push_back(new MedianFilter<double>(postFilterLength));
Chris@41 489 }
Chris@41 490 m_pianoRoll.clear();
Chris@246 491 m_inputGains.clear();
Chris@32 492 m_columnCount = 0;
Chris@272 493 m_resampledCount = 0;
Chris@40 494 m_startTime = RealTime::zeroTime;
Chris@31 495 }
Chris@31 496
Chris@31 497 Silvet::FeatureSet
Chris@31 498 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
Chris@31 499 {
Chris@302 500 FeatureSet fs;
Chris@302 501
Chris@40 502 if (m_columnCount == 0) {
Chris@40 503 m_startTime = timestamp;
Chris@302 504 insertTemplateFeatures(fs);
Chris@40 505 }
Chris@246 506
Chris@246 507 vector<float> flattened(m_blockSize);
Chris@246 508 float gain = 1.f;
Chris@246 509 m_flattener->connectInputPort
Chris@246 510 (FlattenDynamics::AudioInputPort, inputBuffers[0]);
Chris@246 511 m_flattener->connectOutputPort
Chris@246 512 (FlattenDynamics::AudioOutputPort, &flattened[0]);
Chris@246 513 m_flattener->connectOutputPort
Chris@246 514 (FlattenDynamics::GainOutputPort, &gain);
Chris@246 515 m_flattener->process(m_blockSize);
Chris@246 516
Chris@252 517 m_inputGains[timestamp] = gain;
Chris@40 518
Chris@31 519 vector<double> data;
Chris@40 520 for (int i = 0; i < m_blockSize; ++i) {
Chris@246 521 double d = flattened[i];
Chris@235 522 data.push_back(d);
Chris@40 523 }
Chris@31 524
Chris@31 525 if (m_resampler) {
Chris@272 526
Chris@31 527 data = m_resampler->process(data.data(), data.size());
Chris@272 528
Chris@272 529 int hadCount = m_resampledCount;
Chris@272 530 m_resampledCount += data.size();
Chris@272 531
Chris@272 532 int resamplerLatency = m_resampler->getLatency();
Chris@272 533
Chris@272 534 if (hadCount < resamplerLatency) {
Chris@272 535 int stillToDrop = resamplerLatency - hadCount;
Chris@272 536 if (stillToDrop >= int(data.size())) {
Chris@302 537 return fs;
Chris@272 538 } else {
Chris@272 539 data = vector<double>(data.begin() + stillToDrop, data.end());
Chris@272 540 }
Chris@272 541 }
Chris@31 542 }
Chris@272 543
Chris@32 544 Grid cqout = m_cq->process(data);
Chris@302 545 transcribe(cqout, fs);
Chris@51 546 return fs;
Chris@34 547 }
Chris@34 548
Chris@34 549 Silvet::FeatureSet
Chris@34 550 Silvet::getRemainingFeatures()
Chris@34 551 {
Chris@145 552 Grid cqout = m_cq->getRemainingOutput();
Chris@302 553 FeatureSet fs;
Chris@302 554 if (m_columnCount == 0) {
Chris@302 555 // process() was never called, but we still want these
Chris@302 556 insertTemplateFeatures(fs);
Chris@302 557 } else {
Chris@302 558 transcribe(cqout, fs);
Chris@302 559 }
Chris@51 560 return fs;
Chris@34 561 }
Chris@34 562
Chris@302 563 void
Chris@302 564 Silvet::insertTemplateFeatures(FeatureSet &fs)
Chris@302 565 {
Chris@302 566 const InstrumentPack &pack = getPack(m_instrument);
Chris@302 567 for (int i = 0; i < int(pack.templates.size()) * pack.templateNoteCount; ++i) {
Chris@302 568 RealTime timestamp = RealTime::fromSeconds(double(i) / m_colsPerSec);
Chris@302 569 Feature f;
Chris@302 570 char buffer[50];
Chris@302 571 sprintf(buffer, "Note %d", i + 1);
Chris@302 572 f.label = buffer;
Chris@302 573 f.hasTimestamp = true;
Chris@302 574 f.timestamp = timestamp;
Chris@302 575 f.values = pack.templates[i / pack.templateNoteCount]
Chris@302 576 .data[i % pack.templateNoteCount];
Chris@302 577 fs[m_templateOutputNo].push_back(f);
Chris@302 578 }
Chris@302 579 }
Chris@302 580
Chris@302 581 void
Chris@302 582 Silvet::transcribe(const Grid &cqout, Silvet::FeatureSet &fs)
Chris@34 583 {
Chris@32 584 Grid filtered = preProcess(cqout);
Chris@31 585
Chris@302 586 if (filtered.empty()) return;
Chris@170 587
Chris@298 588 const InstrumentPack &pack(getPack(m_instrument));
Chris@104 589
Chris@178 590 for (int i = 0; i < (int)filtered.size(); ++i) {
Chris@178 591 Feature f;
Chris@178 592 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@178 593 f.values.push_back(float(filtered[i][j]));
Chris@178 594 }
Chris@178 595 fs[m_fcqOutputNo].push_back(f);
Chris@178 596 }
Chris@178 597
Chris@34 598 int width = filtered.size();
Chris@34 599
Chris@297 600 int iterations = (m_mode == HighQualityMode ? 20 : 10);
Chris@34 601
Chris@176 602 Grid localPitches(width, vector<double>(pack.templateNoteCount, 0.0));
Chris@170 603
Chris@297 604 bool wantShifts = (m_mode == HighQualityMode) && m_fineTuning;
Chris@170 605 int shiftCount = 1;
Chris@170 606 if (wantShifts) {
Chris@170 607 shiftCount = pack.templateMaxShift * 2 + 1;
Chris@170 608 }
Chris@170 609
Chris@170 610 vector<vector<int> > localBestShifts;
Chris@170 611 if (wantShifts) {
Chris@170 612 localBestShifts =
Chris@176 613 vector<vector<int> >(width, vector<int>(pack.templateNoteCount, 0));
Chris@170 614 }
Chris@170 615
Chris@305 616 double columnThreshold = 1e-5;
Chris@305 617
Chris@123 618 #pragma omp parallel for
Chris@123 619 for (int i = 0; i < width; ++i) {
Chris@104 620
Chris@170 621 double sum = 0.0;
Chris@176 622 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@170 623 sum += filtered.at(i).at(j);
Chris@170 624 }
Chris@305 625 if (sum < columnThreshold) continue;
Chris@170 626
Chris@297 627 EM em(&pack, m_mode == HighQualityMode);
Chris@170 628
Chris@183 629 em.setPitchSparsity(pack.pitchSparsity);
Chris@213 630 em.setSourceSparsity(pack.sourceSparsity);
Chris@183 631
Chris@170 632 for (int j = 0; j < iterations; ++j) {
Chris@170 633 em.iterate(filtered.at(i).data());
Chris@37 634 }
Chris@37 635
Chris@170 636 const float *pitchDist = em.getPitchDistribution();
Chris@170 637 const float *const *shiftDist = em.getShifts();
Chris@37 638
Chris@176 639 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@104 640
Chris@170 641 localPitches[i][j] = pitchDist[j] * sum;
Chris@170 642
Chris@170 643 int bestShift = 0;
Chris@179 644 float bestShiftValue = 0.0;
Chris@170 645 if (wantShifts) {
Chris@170 646 for (int k = 0; k < shiftCount; ++k) {
Chris@179 647 float value = shiftDist[k][j];
Chris@179 648 if (k == 0 || value > bestShiftValue) {
Chris@179 649 bestShiftValue = value;
Chris@170 650 bestShift = k;
Chris@170 651 }
Chris@170 652 }
Chris@170 653 localBestShifts[i][j] = bestShift;
Chris@170 654 }
Chris@123 655 }
Chris@123 656 }
Chris@166 657
Chris@166 658 for (int i = 0; i < width; ++i) {
Chris@37 659
Chris@294 660 vector<double> filtered = postProcess
Chris@294 661 (localPitches[i], localBestShifts[i], wantShifts);
Chris@294 662
Chris@294 663 Feature f;
Chris@294 664 for (int j = 0; j < (int)filtered.size(); ++j) {
Chris@294 665 float v(filtered[j]);
Chris@294 666 if (v < pack.levelThreshold) v = 0.f;
Chris@294 667 f.values.push_back(v);
Chris@294 668 }
Chris@294 669 fs[m_pitchOutputNo].push_back(f);
Chris@166 670
Chris@168 671 FeatureList noteFeatures = noteTrack(shiftCount);
Chris@38 672
Chris@123 673 for (FeatureList::const_iterator fi = noteFeatures.begin();
Chris@123 674 fi != noteFeatures.end(); ++fi) {
Chris@123 675 fs[m_notesOutputNo].push_back(*fi);
Chris@40 676 }
Chris@34 677 }
Chris@31 678 }
Chris@31 679
Chris@32 680 Silvet::Grid
Chris@32 681 Silvet::preProcess(const Grid &in)
Chris@32 682 {
Chris@32 683 int width = in.size();
Chris@32 684
Chris@165 685 int spacing = processingSampleRate / m_colsPerSec;
Chris@32 686
Chris@165 687 // need to be careful that col spacing is an integer number of samples!
Chris@165 688 assert(spacing * m_colsPerSec == processingSampleRate);
Chris@32 689
Chris@32 690 Grid out;
Chris@32 691
Chris@58 692 // We count the CQ latency in terms of processing hops, but
Chris@58 693 // actually it probably isn't an exact number of hops so this
Chris@58 694 // isn't quite accurate. But the small constant offset is
Chris@165 695 // practically irrelevant compared to the jitter from the frame
Chris@165 696 // size we reduce to in a moment
Chris@33 697 int latentColumns = m_cq->getLatency() / m_cq->getColumnHop();
Chris@33 698
Chris@298 699 const InstrumentPack &pack(getPack(m_instrument));
Chris@176 700
Chris@32 701 for (int i = 0; i < width; ++i) {
Chris@32 702
Chris@33 703 if (m_columnCount < latentColumns) {
Chris@33 704 ++m_columnCount;
Chris@33 705 continue;
Chris@33 706 }
Chris@33 707
Chris@32 708 int prevSampleNo = (m_columnCount - 1) * m_cq->getColumnHop();
Chris@32 709 int sampleNo = m_columnCount * m_cq->getColumnHop();
Chris@32 710
Chris@32 711 bool select = (sampleNo / spacing != prevSampleNo / spacing);
Chris@32 712
Chris@32 713 if (select) {
Chris@32 714 vector<double> inCol = in[i];
Chris@176 715 vector<double> outCol(pack.templateHeight);
Chris@32 716
Chris@178 717 // In HQ mode, the CQ returns 600 bins and we ignore the
Chris@298 718 // lowest 55 of them (assuming binsPerSemitone == 5).
Chris@178 719 //
Chris@297 720 // In draft and live mode the CQ is an octave shorter,
Chris@300 721 // returning 540 bins or equivalent, so we instead pad
Chris@300 722 // them with an additional 5 or equivalent zeros.
Chris@178 723 //
Chris@178 724 // We also need to reverse the column as we go, since the
Chris@178 725 // raw CQ has the high frequencies first and we need it
Chris@178 726 // the other way around.
Chris@32 727
Chris@298 728 int bps = (m_mode == LiveMode ?
Chris@298 729 binsPerSemitoneLive : binsPerSemitoneNormal);
Chris@298 730
Chris@297 731 if (m_mode == HighQualityMode) {
Chris@178 732 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@298 733 int ix = inCol.size() - j - (11 * bps);
Chris@178 734 outCol[j] = inCol[ix];
Chris@178 735 }
Chris@178 736 } else {
Chris@298 737 for (int j = 0; j < bps; ++j) {
Chris@178 738 outCol[j] = 0.0;
Chris@178 739 }
Chris@298 740 for (int j = bps; j < pack.templateHeight; ++j) {
Chris@298 741 int ix = inCol.size() - j + (bps-1);
Chris@178 742 outCol[j] = inCol[ix];
Chris@178 743 }
Chris@46 744 }
Chris@32 745
Chris@46 746 vector<double> noiseLevel1 =
Chris@298 747 MedianFilter<double>::filter(8 * bps, outCol);
Chris@176 748 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@46 749 noiseLevel1[j] = std::min(outCol[j], noiseLevel1[j]);
Chris@46 750 }
Chris@32 751
Chris@46 752 vector<double> noiseLevel2 =
Chris@298 753 MedianFilter<double>::filter(8 * bps, noiseLevel1);
Chris@176 754 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@46 755 outCol[j] = std::max(outCol[j] - noiseLevel2[j], 0.0);
Chris@32 756 }
Chris@32 757
Chris@165 758 out.push_back(outCol);
Chris@32 759 }
Chris@32 760
Chris@32 761 ++m_columnCount;
Chris@32 762 }
Chris@32 763
Chris@32 764 return out;
Chris@32 765 }
Chris@32 766
Chris@294 767 vector<double>
Chris@170 768 Silvet::postProcess(const vector<double> &pitches,
Chris@170 769 const vector<int> &bestShifts,
Chris@170 770 bool wantShifts)
Chris@166 771 {
Chris@298 772 const InstrumentPack &pack(getPack(m_instrument));
Chris@176 773
Chris@41 774 vector<double> filtered;
Chris@41 775
Chris@176 776 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@170 777 m_postFilter[j]->push(pitches[j]);
Chris@41 778 filtered.push_back(m_postFilter[j]->get());
Chris@41 779 }
Chris@41 780
Chris@41 781 // Threshold for level and reduce number of candidate pitches
Chris@41 782
Chris@41 783 typedef std::multimap<double, int> ValueIndexMap;
Chris@41 784
Chris@41 785 ValueIndexMap strengths;
Chris@166 786
Chris@176 787 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@166 788 double strength = filtered[j];
Chris@183 789 if (strength < pack.levelThreshold) continue;
Chris@168 790 strengths.insert(ValueIndexMap::value_type(strength, j));
Chris@168 791 }
Chris@166 792
Chris@168 793 ValueIndexMap::const_iterator si = strengths.end();
Chris@167 794
Chris@168 795 map<int, double> active;
Chris@168 796 map<int, int> activeShifts;
Chris@168 797
Chris@183 798 while (int(active.size()) < pack.maxPolyphony && si != strengths.begin()) {
Chris@168 799
Chris@168 800 --si;
Chris@168 801
Chris@168 802 double strength = si->first;
Chris@168 803 int j = si->second;
Chris@168 804
Chris@168 805 active[j] = strength;
Chris@168 806
Chris@170 807 if (wantShifts) {
Chris@170 808 activeShifts[j] = bestShifts[j];
Chris@167 809 }
Chris@41 810 }
Chris@41 811
Chris@168 812 m_pianoRoll.push_back(active);
Chris@170 813
Chris@170 814 if (wantShifts) {
Chris@168 815 m_pianoRollShifts.push_back(activeShifts);
Chris@41 816 }
Chris@294 817
Chris@294 818 return filtered;
Chris@166 819 }
Chris@166 820
Chris@166 821 Vamp::Plugin::FeatureList
Chris@168 822 Silvet::noteTrack(int shiftCount)
Chris@166 823 {
Chris@41 824 // Minimum duration pruning, and conversion to notes. We can only
Chris@41 825 // report notes that have just ended (i.e. that are absent in the
Chris@168 826 // latest active set but present in the prior set in the piano
Chris@41 827 // roll) -- any notes that ended earlier will have been reported
Chris@41 828 // already, and if they haven't ended, we don't know their
Chris@41 829 // duration.
Chris@41 830
Chris@168 831 int width = m_pianoRoll.size() - 1;
Chris@168 832
Chris@168 833 const map<int, double> &active = m_pianoRoll[width];
Chris@41 834
Chris@165 835 double columnDuration = 1.0 / m_colsPerSec;
Chris@165 836
Chris@165 837 // only keep notes >= 100ms or thereabouts
Chris@165 838 int durationThreshold = floor(0.1 / columnDuration); // columns
Chris@165 839 if (durationThreshold < 1) durationThreshold = 1;
Chris@41 840
Chris@41 841 FeatureList noteFeatures;
Chris@41 842
Chris@41 843 if (width < durationThreshold + 1) {
Chris@41 844 return noteFeatures;
Chris@41 845 }
Chris@41 846
Chris@150 847 //!!! try: repeated note detection? (look for change in first derivative of the pitch matrix)
Chris@150 848
Chris@55 849 for (map<int, double>::const_iterator ni = m_pianoRoll[width-1].begin();
Chris@41 850 ni != m_pianoRoll[width-1].end(); ++ni) {
Chris@41 851
Chris@55 852 int note = ni->first;
Chris@41 853
Chris@41 854 if (active.find(note) != active.end()) {
Chris@41 855 // the note is still playing
Chris@41 856 continue;
Chris@41 857 }
Chris@41 858
Chris@41 859 // the note was playing but just ended
Chris@41 860 int end = width;
Chris@41 861 int start = end-1;
Chris@41 862
Chris@41 863 while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) {
Chris@41 864 --start;
Chris@41 865 }
Chris@41 866 ++start;
Chris@41 867
Chris@169 868 if ((end - start) < durationThreshold) {
Chris@41 869 continue;
Chris@41 870 }
Chris@41 871
Chris@169 872 emitNote(start, end, note, shiftCount, noteFeatures);
Chris@41 873 }
Chris@41 874
Chris@62 875 // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl;
Chris@41 876
Chris@41 877 return noteFeatures;
Chris@41 878 }
Chris@41 879
Chris@169 880 void
Chris@169 881 Silvet::emitNote(int start, int end, int note, int shiftCount,
Chris@169 882 FeatureList &noteFeatures)
Chris@169 883 {
Chris@169 884 int partStart = start;
Chris@169 885 int partShift = 0;
Chris@169 886 int partVelocity = 0;
Chris@169 887
Chris@252 888 int partThreshold = floor(0.05 * m_colsPerSec);
Chris@169 889
Chris@169 890 for (int i = start; i != end; ++i) {
Chris@169 891
Chris@169 892 double strength = m_pianoRoll[i][note];
Chris@169 893
Chris@169 894 int shift = 0;
Chris@169 895
Chris@169 896 if (shiftCount > 1) {
Chris@169 897
Chris@169 898 shift = m_pianoRollShifts[i][note];
Chris@169 899
Chris@169 900 if (i == partStart) {
Chris@169 901 partShift = shift;
Chris@169 902 }
Chris@169 903
Chris@169 904 if (i > partStart + partThreshold && shift != partShift) {
Chris@169 905
Chris@169 906 // cerr << "i = " << i << ", partStart = " << partStart << ", shift = " << shift << ", partShift = " << partShift << endl;
Chris@169 907
Chris@169 908 // pitch has changed, emit an intermediate note
Chris@252 909 noteFeatures.push_back(makeNoteFeature(partStart,
Chris@252 910 i,
Chris@252 911 note,
Chris@252 912 partShift,
Chris@252 913 shiftCount,
Chris@252 914 partVelocity));
Chris@169 915 partStart = i;
Chris@169 916 partShift = shift;
Chris@169 917 partVelocity = 0;
Chris@169 918 }
Chris@169 919 }
Chris@169 920
Chris@303 921 int v;
Chris@303 922 if (m_mode == LiveMode) {
Chris@303 923 v = round(strength * 30);
Chris@303 924 } else {
Chris@303 925 v = round(strength * 2);
Chris@303 926 }
Chris@169 927 if (v > partVelocity) {
Chris@169 928 partVelocity = v;
Chris@169 929 }
Chris@169 930 }
Chris@169 931
Chris@169 932 if (end >= partStart + partThreshold) {
Chris@252 933 noteFeatures.push_back(makeNoteFeature(partStart,
Chris@252 934 end,
Chris@252 935 note,
Chris@252 936 partShift,
Chris@252 937 shiftCount,
Chris@252 938 partVelocity));
Chris@169 939 }
Chris@169 940 }
Chris@252 941
Chris@252 942 Silvet::Feature
Chris@252 943 Silvet::makeNoteFeature(int start,
Chris@252 944 int end,
Chris@252 945 int note,
Chris@252 946 int shift,
Chris@252 947 int shiftCount,
Chris@252 948 int velocity)
Chris@252 949 {
Chris@252 950 double columnDuration = 1.0 / m_colsPerSec;
Chris@252 951 int postFilterLatency = int(m_postFilter[0]->getSize() / 2);
Chris@252 952
Chris@252 953 Feature f;
Chris@252 954
Chris@252 955 f.hasTimestamp = true;
Chris@285 956 f.timestamp = m_startTime + RealTime::fromSeconds
Chris@252 957 (columnDuration * (start - postFilterLatency) + 0.02);
Chris@252 958
Chris@252 959 f.hasDuration = true;
Chris@252 960 f.duration = RealTime::fromSeconds
Chris@252 961 (columnDuration * (end - start));
Chris@252 962
Chris@252 963 f.values.clear();
Chris@252 964
Chris@252 965 f.values.push_back
Chris@252 966 (noteFrequency(note, shift, shiftCount));
Chris@252 967
Chris@252 968 float inputGain = getInputGainAt(f.timestamp);
Chris@252 969 // cerr << "adjusting velocity from " << velocity << " to " << round(velocity/inputGain) << endl;
Chris@252 970 velocity = round(velocity / inputGain);
Chris@252 971 if (velocity > 127) velocity = 127;
Chris@252 972 if (velocity < 1) velocity = 1;
Chris@252 973 f.values.push_back(velocity);
Chris@252 974
Chris@252 975 f.label = noteName(note, shift, shiftCount);
Chris@252 976
Chris@252 977 return f;
Chris@252 978 }
Chris@252 979
Chris@252 980 float
Chris@252 981 Silvet::getInputGainAt(RealTime t)
Chris@252 982 {
Chris@252 983 map<RealTime, float>::const_iterator i = m_inputGains.lower_bound(t);
Chris@252 984
Chris@252 985 if (i == m_inputGains.end()) {
Chris@252 986 if (i != m_inputGains.begin()) {
Chris@252 987 --i;
Chris@252 988 } else {
Chris@252 989 return 1.f; // no data
Chris@252 990 }
Chris@252 991 }
Chris@252 992
Chris@252 993 // cerr << "gain at time " << t << " = " << i->second << endl;
Chris@252 994
Chris@252 995 return i->second;
Chris@252 996 }
Chris@252 997