annotate src/Silvet.cpp @ 271:10d8bd634a77

Docs, fix to temporary file removal in test script
author Chris Cannam
date Sat, 26 Jul 2014 10:18:01 +0100
parents 34e69544691b
children e5f897b2d5e8
rev   line source
Chris@31 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@31 2
Chris@31 3 /*
Chris@31 4 Silvet
Chris@31 5
Chris@31 6 A Vamp plugin for note transcription.
Chris@31 7 Centre for Digital Music, Queen Mary University of London.
Chris@31 8
Chris@31 9 This program is free software; you can redistribute it and/or
Chris@31 10 modify it under the terms of the GNU General Public License as
Chris@31 11 published by the Free Software Foundation; either version 2 of the
Chris@31 12 License, or (at your option) any later version. See the file
Chris@31 13 COPYING included with this distribution for more information.
Chris@31 14 */
Chris@31 15
Chris@31 16 #include "Silvet.h"
Chris@34 17 #include "EM.h"
Chris@31 18
Chris@152 19 #include <cq/CQSpectrogram.h>
Chris@31 20
Chris@152 21 #include "MedianFilter.h"
Chris@152 22 #include "constant-q-cpp/src/dsp/Resampler.h"
Chris@246 23 #include "flattendynamics-ladspa.h"
Chris@31 24
Chris@31 25 #include <vector>
Chris@31 26
Chris@32 27 #include <cstdio>
Chris@32 28
Chris@31 29 using std::vector;
Chris@48 30 using std::cout;
Chris@31 31 using std::cerr;
Chris@31 32 using std::endl;
Chris@40 33 using Vamp::RealTime;
Chris@31 34
Chris@31 35 static int processingSampleRate = 44100;
Chris@31 36 static int processingBPO = 60;
Chris@170 37
Chris@31 38 Silvet::Silvet(float inputSampleRate) :
Chris@31 39 Plugin(inputSampleRate),
Chris@161 40 m_instruments(InstrumentPack::listInstrumentPacks()),
Chris@31 41 m_resampler(0),
Chris@246 42 m_flattener(0),
Chris@110 43 m_cq(0),
Chris@162 44 m_hqMode(true),
Chris@166 45 m_fineTuning(false),
Chris@178 46 m_instrument(0),
Chris@178 47 m_colsPerSec(50)
Chris@31 48 {
Chris@31 49 }
Chris@31 50
Chris@31 51 Silvet::~Silvet()
Chris@31 52 {
Chris@31 53 delete m_resampler;
Chris@246 54 delete m_flattener;
Chris@31 55 delete m_cq;
Chris@41 56 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
Chris@41 57 delete m_postFilter[i];
Chris@41 58 }
Chris@31 59 }
Chris@31 60
Chris@31 61 string
Chris@31 62 Silvet::getIdentifier() const
Chris@31 63 {
Chris@31 64 return "silvet";
Chris@31 65 }
Chris@31 66
Chris@31 67 string
Chris@31 68 Silvet::getName() const
Chris@31 69 {
Chris@31 70 return "Silvet Note Transcription";
Chris@31 71 }
Chris@31 72
Chris@31 73 string
Chris@31 74 Silvet::getDescription() const
Chris@31 75 {
Chris@191 76 return "Estimate the note onsets, pitches, and durations that make up a music recording.";
Chris@31 77 }
Chris@31 78
Chris@31 79 string
Chris@31 80 Silvet::getMaker() const
Chris@31 81 {
Chris@191 82 return "Queen Mary, University of London";
Chris@31 83 }
Chris@31 84
Chris@31 85 int
Chris@31 86 Silvet::getPluginVersion() const
Chris@31 87 {
Chris@31 88 return 1;
Chris@31 89 }
Chris@31 90
Chris@31 91 string
Chris@31 92 Silvet::getCopyright() const
Chris@31 93 {
Chris@191 94 return "Method by Emmanouil Benetos and Simon Dixon; plugin by Chris Cannam and Emmanouil Benetos. GPL licence.";
Chris@31 95 }
Chris@31 96
Chris@31 97 Silvet::InputDomain
Chris@31 98 Silvet::getInputDomain() const
Chris@31 99 {
Chris@31 100 return TimeDomain;
Chris@31 101 }
Chris@31 102
Chris@31 103 size_t
Chris@31 104 Silvet::getPreferredBlockSize() const
Chris@31 105 {
Chris@31 106 return 0;
Chris@31 107 }
Chris@31 108
Chris@31 109 size_t
Chris@31 110 Silvet::getPreferredStepSize() const
Chris@31 111 {
Chris@31 112 return 0;
Chris@31 113 }
Chris@31 114
Chris@31 115 size_t
Chris@31 116 Silvet::getMinChannelCount() const
Chris@31 117 {
Chris@31 118 return 1;
Chris@31 119 }
Chris@31 120
Chris@31 121 size_t
Chris@31 122 Silvet::getMaxChannelCount() const
Chris@31 123 {
Chris@31 124 return 1;
Chris@31 125 }
Chris@31 126
Chris@31 127 Silvet::ParameterList
Chris@31 128 Silvet::getParameterDescriptors() const
Chris@31 129 {
Chris@31 130 ParameterList list;
Chris@110 131
Chris@110 132 ParameterDescriptor desc;
Chris@110 133 desc.identifier = "mode";
Chris@110 134 desc.name = "Processing mode";
Chris@110 135 desc.unit = "";
Chris@271 136 desc.description = "Sets the tradeoff of processing speed against transcription quality. Draft mode modifies a number of internal parameters in favour of speed. Intensive mode (the default) will almost always produce better results.";
Chris@110 137 desc.minValue = 0;
Chris@110 138 desc.maxValue = 1;
Chris@113 139 desc.defaultValue = 1;
Chris@110 140 desc.isQuantized = true;
Chris@110 141 desc.quantizeStep = 1;
Chris@166 142 desc.valueNames.push_back("Draft (faster)");
Chris@165 143 desc.valueNames.push_back("Intensive (higher quality)");
Chris@161 144 list.push_back(desc);
Chris@161 145
Chris@176 146 desc.identifier = "instrument";
Chris@176 147 desc.name = "Instrument";
Chris@161 148 desc.unit = "";
Chris@271 149 desc.description = "The instrument or instruments known to be present in the recording. This affects the set of instrument templates used, as well as the expected level of polyphony in the output. Using a more limited set of instruments than the default will also make the plugin run faster.\nNote that this plugin cannot isolate instruments: you can't use this setting to request notes from only one instrument in a recording with several. Instead, use this as a hint to the plugin about which instruments are actually present.";
Chris@161 150 desc.minValue = 0;
Chris@162 151 desc.maxValue = m_instruments.size()-1;
Chris@162 152 desc.defaultValue = 0;
Chris@161 153 desc.isQuantized = true;
Chris@161 154 desc.quantizeStep = 1;
Chris@161 155 desc.valueNames.clear();
Chris@162 156 for (int i = 0; i < int(m_instruments.size()); ++i) {
Chris@162 157 desc.valueNames.push_back(m_instruments[i].name);
Chris@162 158 }
Chris@166 159 list.push_back(desc);
Chris@161 160
Chris@166 161 desc.identifier = "finetune";
Chris@166 162 desc.name = "Return fine pitch estimates";
Chris@166 163 desc.unit = "";
Chris@271 164 desc.description = "Return pitch estimates at finer than semitone resolution. This works only in Intensive mode. Notes that appear to drift in pitch will be split up into shorter notes with individually finer pitches.";
Chris@166 165 desc.minValue = 0;
Chris@166 166 desc.maxValue = 1;
Chris@166 167 desc.defaultValue = 0;
Chris@166 168 desc.isQuantized = true;
Chris@166 169 desc.quantizeStep = 1;
Chris@166 170 desc.valueNames.clear();
Chris@110 171 list.push_back(desc);
Chris@110 172
Chris@31 173 return list;
Chris@31 174 }
Chris@31 175
Chris@31 176 float
Chris@31 177 Silvet::getParameter(string identifier) const
Chris@31 178 {
Chris@110 179 if (identifier == "mode") {
Chris@110 180 return m_hqMode ? 1.f : 0.f;
Chris@166 181 } else if (identifier == "finetune") {
Chris@166 182 return m_fineTuning ? 1.f : 0.f;
Chris@176 183 } else if (identifier == "instrument") {
Chris@162 184 return m_instrument;
Chris@110 185 }
Chris@31 186 return 0;
Chris@31 187 }
Chris@31 188
Chris@31 189 void
Chris@31 190 Silvet::setParameter(string identifier, float value)
Chris@31 191 {
Chris@110 192 if (identifier == "mode") {
Chris@110 193 m_hqMode = (value > 0.5);
Chris@166 194 } else if (identifier == "finetune") {
Chris@166 195 m_fineTuning = (value > 0.5);
Chris@176 196 } else if (identifier == "instrument") {
Chris@162 197 m_instrument = lrintf(value);
Chris@110 198 }
Chris@31 199 }
Chris@31 200
Chris@31 201 Silvet::ProgramList
Chris@31 202 Silvet::getPrograms() const
Chris@31 203 {
Chris@31 204 ProgramList list;
Chris@31 205 return list;
Chris@31 206 }
Chris@31 207
Chris@31 208 string
Chris@31 209 Silvet::getCurrentProgram() const
Chris@31 210 {
Chris@31 211 return "";
Chris@31 212 }
Chris@31 213
Chris@31 214 void
Chris@31 215 Silvet::selectProgram(string name)
Chris@31 216 {
Chris@31 217 }
Chris@31 218
Chris@31 219 Silvet::OutputList
Chris@31 220 Silvet::getOutputDescriptors() const
Chris@31 221 {
Chris@31 222 OutputList list;
Chris@31 223
Chris@31 224 OutputDescriptor d;
Chris@51 225 d.identifier = "notes";
Chris@51 226 d.name = "Note transcription";
Chris@271 227 d.description = "Overall note transcription. Each note has time, duration, estimated pitch, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture.";
Chris@41 228 d.unit = "Hz";
Chris@31 229 d.hasFixedBinCount = true;
Chris@31 230 d.binCount = 2;
Chris@41 231 d.binNames.push_back("Frequency");
Chris@31 232 d.binNames.push_back("Velocity");
Chris@31 233 d.hasKnownExtents = false;
Chris@31 234 d.isQuantized = false;
Chris@31 235 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@246 236 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
Chris@31 237 d.hasDuration = true;
Chris@32 238 m_notesOutputNo = list.size();
Chris@32 239 list.push_back(d);
Chris@32 240
Chris@178 241 d.identifier = "timefreq";
Chris@178 242 d.name = "Time-frequency distribution";
Chris@271 243 d.description = "Filtered constant-Q time-frequency distribution as used as input to the expectation-maximisation algorithm.";
Chris@178 244 d.unit = "";
Chris@178 245 d.hasFixedBinCount = true;
Chris@178 246 d.binCount = m_instruments[0].templateHeight;
Chris@178 247 d.binNames.clear();
Chris@178 248 if (m_cq) {
Chris@178 249 char name[20];
Chris@178 250 for (int i = 0; i < m_instruments[0].templateHeight; ++i) {
Chris@178 251 // We have a 600-bin (10 oct 60-bin CQ) of which the
Chris@178 252 // lowest-frequency 55 bins have been dropped, for a
Chris@178 253 // 545-bin template. The native CQ bins go high->low
Chris@178 254 // frequency though, so these are still the first 545 bins
Chris@178 255 // as reported by getBinFrequency, though in reverse order
Chris@178 256 float freq = m_cq->getBinFrequency
Chris@178 257 (m_instruments[0].templateHeight - i - 1);
Chris@178 258 sprintf(name, "%.1f Hz", freq);
Chris@178 259 d.binNames.push_back(name);
Chris@178 260 }
Chris@178 261 }
Chris@178 262 d.hasKnownExtents = false;
Chris@178 263 d.isQuantized = false;
Chris@178 264 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@178 265 d.sampleRate = m_colsPerSec;
Chris@178 266 d.hasDuration = false;
Chris@178 267 m_fcqOutputNo = list.size();
Chris@178 268 list.push_back(d);
Chris@178 269
Chris@31 270 return list;
Chris@31 271 }
Chris@31 272
Chris@38 273 std::string
Chris@175 274 Silvet::noteName(int note, int shift, int shiftCount) const
Chris@38 275 {
Chris@38 276 static const char *names[] = {
Chris@38 277 "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"
Chris@38 278 };
Chris@38 279
Chris@175 280 const char *n = names[note % 12];
Chris@38 281
Chris@175 282 int oct = (note + 9) / 12;
Chris@38 283
Chris@175 284 char buf[30];
Chris@175 285
Chris@175 286 float pshift = 0.f;
Chris@175 287 if (shiftCount > 1) {
Chris@175 288 // see noteFrequency below
Chris@175 289 pshift =
Chris@175 290 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
Chris@175 291 }
Chris@175 292
Chris@175 293 if (pshift > 0.f) {
Chris@175 294 sprintf(buf, "%s%d+%dc", n, oct, int(round(pshift * 100)));
Chris@175 295 } else if (pshift < 0.f) {
Chris@175 296 sprintf(buf, "%s%d-%dc", n, oct, int(round((-pshift) * 100)));
Chris@175 297 } else {
Chris@175 298 sprintf(buf, "%s%d", n, oct);
Chris@175 299 }
Chris@38 300
Chris@38 301 return buf;
Chris@38 302 }
Chris@38 303
Chris@41 304 float
Chris@168 305 Silvet::noteFrequency(int note, int shift, int shiftCount) const
Chris@41 306 {
Chris@169 307 // Convert shift number to a pitch shift. The given shift number
Chris@169 308 // is an offset into the template array, which starts with some
Chris@169 309 // zeros, followed by the template, then some trailing zeros.
Chris@169 310 //
Chris@169 311 // Example: if we have templateMaxShift == 2 and thus shiftCount
Chris@169 312 // == 5, then the number will be in the range 0-4 and the template
Chris@169 313 // will have 2 zeros at either end. Thus number 2 represents the
Chris@169 314 // template "as recorded", for a pitch shift of 0; smaller indices
Chris@169 315 // represent moving the template *up* in pitch (by introducing
Chris@169 316 // zeros at the start, which is the low-frequency end), for a
Chris@169 317 // positive pitch shift; and higher values represent moving it
Chris@169 318 // down in pitch, for a negative pitch shift.
Chris@169 319
Chris@175 320 float pshift = 0.f;
Chris@175 321 if (shiftCount > 1) {
Chris@175 322 pshift =
Chris@175 323 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
Chris@175 324 }
Chris@169 325
Chris@169 326 return float(27.5 * pow(2.0, (note + pshift) / 12.0));
Chris@41 327 }
Chris@41 328
Chris@31 329 bool
Chris@31 330 Silvet::initialise(size_t channels, size_t stepSize, size_t blockSize)
Chris@31 331 {
Chris@31 332 if (channels < getMinChannelCount() ||
Chris@31 333 channels > getMaxChannelCount()) return false;
Chris@31 334
Chris@31 335 if (stepSize != blockSize) {
Chris@31 336 cerr << "Silvet::initialise: Step size must be the same as block size ("
Chris@31 337 << stepSize << " != " << blockSize << ")" << endl;
Chris@31 338 return false;
Chris@31 339 }
Chris@31 340
Chris@31 341 m_blockSize = blockSize;
Chris@31 342
Chris@31 343 reset();
Chris@31 344
Chris@31 345 return true;
Chris@31 346 }
Chris@31 347
Chris@31 348 void
Chris@31 349 Silvet::reset()
Chris@31 350 {
Chris@31 351 delete m_resampler;
Chris@246 352 delete m_flattener;
Chris@31 353 delete m_cq;
Chris@31 354
Chris@31 355 if (m_inputSampleRate != processingSampleRate) {
Chris@31 356 m_resampler = new Resampler(m_inputSampleRate, processingSampleRate);
Chris@31 357 } else {
Chris@31 358 m_resampler = 0;
Chris@31 359 }
Chris@31 360
Chris@246 361 m_flattener = new FlattenDynamics(m_inputSampleRate); // before resampling
Chris@246 362 m_flattener->reset();
Chris@246 363
Chris@173 364 double minFreq = 27.5;
Chris@173 365
Chris@173 366 if (!m_hqMode) {
Chris@173 367 // We don't actually return any notes from the bottom octave,
Chris@173 368 // so we can just pad with zeros
Chris@173 369 minFreq *= 2;
Chris@173 370 }
Chris@173 371
Chris@154 372 CQParameters params(processingSampleRate,
Chris@173 373 minFreq,
Chris@154 374 processingSampleRate / 3,
Chris@154 375 processingBPO);
Chris@154 376
Chris@155 377 params.q = 0.95; // MIREX code uses 0.8, but it seems 0.9 or lower
Chris@155 378 // drops the FFT size to 512 from 1024 and alters
Chris@155 379 // some other processing parameters, making
Chris@155 380 // everything much, much slower. Could be a flaw
Chris@155 381 // in the CQ parameter calculations, must check
Chris@154 382 params.atomHopFactor = 0.3;
Chris@154 383 params.threshold = 0.0005;
Chris@172 384 params.window = CQParameters::Hann;
Chris@154 385
Chris@154 386 m_cq = new CQSpectrogram(params, CQSpectrogram::InterpolateLinear);
Chris@31 387
Chris@165 388 m_colsPerSec = m_hqMode ? 50 : 25;
Chris@165 389
Chris@41 390 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
Chris@41 391 delete m_postFilter[i];
Chris@41 392 }
Chris@41 393 m_postFilter.clear();
Chris@176 394 for (int i = 0; i < m_instruments[0].templateNoteCount; ++i) {
Chris@41 395 m_postFilter.push_back(new MedianFilter<double>(3));
Chris@41 396 }
Chris@41 397 m_pianoRoll.clear();
Chris@246 398 m_inputGains.clear();
Chris@32 399 m_columnCount = 0;
Chris@40 400 m_startTime = RealTime::zeroTime;
Chris@31 401 }
Chris@31 402
Chris@31 403 Silvet::FeatureSet
Chris@31 404 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
Chris@31 405 {
Chris@40 406 if (m_columnCount == 0) {
Chris@40 407 m_startTime = timestamp;
Chris@40 408 }
Chris@246 409
Chris@246 410 vector<float> flattened(m_blockSize);
Chris@246 411 float gain = 1.f;
Chris@246 412 m_flattener->connectInputPort
Chris@246 413 (FlattenDynamics::AudioInputPort, inputBuffers[0]);
Chris@246 414 m_flattener->connectOutputPort
Chris@246 415 (FlattenDynamics::AudioOutputPort, &flattened[0]);
Chris@246 416 m_flattener->connectOutputPort
Chris@246 417 (FlattenDynamics::GainOutputPort, &gain);
Chris@246 418 m_flattener->process(m_blockSize);
Chris@246 419
Chris@252 420 m_inputGains[timestamp] = gain;
Chris@40 421
Chris@31 422 vector<double> data;
Chris@40 423 for (int i = 0; i < m_blockSize; ++i) {
Chris@246 424 double d = flattened[i];
Chris@235 425 data.push_back(d);
Chris@40 426 }
Chris@31 427
Chris@31 428 if (m_resampler) {
Chris@31 429 data = m_resampler->process(data.data(), data.size());
Chris@31 430 }
Chris@246 431
Chris@32 432 Grid cqout = m_cq->process(data);
Chris@51 433 FeatureSet fs = transcribe(cqout);
Chris@51 434 return fs;
Chris@34 435 }
Chris@34 436
Chris@34 437 Silvet::FeatureSet
Chris@34 438 Silvet::getRemainingFeatures()
Chris@34 439 {
Chris@145 440 Grid cqout = m_cq->getRemainingOutput();
Chris@51 441 FeatureSet fs = transcribe(cqout);
Chris@51 442 return fs;
Chris@34 443 }
Chris@34 444
Chris@34 445 Silvet::FeatureSet
Chris@34 446 Silvet::transcribe(const Grid &cqout)
Chris@34 447 {
Chris@32 448 Grid filtered = preProcess(cqout);
Chris@31 449
Chris@32 450 FeatureSet fs;
Chris@32 451
Chris@104 452 if (filtered.empty()) return fs;
Chris@170 453
Chris@170 454 const InstrumentPack &pack = m_instruments[m_instrument];
Chris@104 455
Chris@178 456 for (int i = 0; i < (int)filtered.size(); ++i) {
Chris@178 457 Feature f;
Chris@178 458 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@178 459 f.values.push_back(float(filtered[i][j]));
Chris@178 460 }
Chris@178 461 fs[m_fcqOutputNo].push_back(f);
Chris@178 462 }
Chris@178 463
Chris@34 464 int width = filtered.size();
Chris@34 465
Chris@164 466 int iterations = m_hqMode ? 20 : 10;
Chris@34 467
Chris@170 468 //!!! pitches or notes? [terminology]
Chris@176 469 Grid localPitches(width, vector<double>(pack.templateNoteCount, 0.0));
Chris@170 470
Chris@170 471 bool wantShifts = m_hqMode && m_fineTuning;
Chris@170 472 int shiftCount = 1;
Chris@170 473 if (wantShifts) {
Chris@170 474 shiftCount = pack.templateMaxShift * 2 + 1;
Chris@170 475 }
Chris@170 476
Chris@170 477 vector<vector<int> > localBestShifts;
Chris@170 478 if (wantShifts) {
Chris@170 479 localBestShifts =
Chris@176 480 vector<vector<int> >(width, vector<int>(pack.templateNoteCount, 0));
Chris@170 481 }
Chris@170 482
Chris@170 483 vector<bool> present(width, false);
Chris@37 484
Chris@123 485 #pragma omp parallel for
Chris@123 486 for (int i = 0; i < width; ++i) {
Chris@104 487
Chris@170 488 double sum = 0.0;
Chris@176 489 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@170 490 sum += filtered.at(i).at(j);
Chris@170 491 }
Chris@170 492 if (sum < 1e-5) continue;
Chris@170 493
Chris@170 494 present[i] = true;
Chris@170 495
Chris@170 496 EM em(&pack, m_hqMode);
Chris@170 497
Chris@183 498 em.setPitchSparsity(pack.pitchSparsity);
Chris@213 499 em.setSourceSparsity(pack.sourceSparsity);
Chris@183 500
Chris@170 501 for (int j = 0; j < iterations; ++j) {
Chris@170 502 em.iterate(filtered.at(i).data());
Chris@37 503 }
Chris@37 504
Chris@170 505 const float *pitchDist = em.getPitchDistribution();
Chris@170 506 const float *const *shiftDist = em.getShifts();
Chris@37 507
Chris@176 508 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@104 509
Chris@170 510 localPitches[i][j] = pitchDist[j] * sum;
Chris@170 511
Chris@170 512 int bestShift = 0;
Chris@179 513 float bestShiftValue = 0.0;
Chris@170 514 if (wantShifts) {
Chris@170 515 for (int k = 0; k < shiftCount; ++k) {
Chris@179 516 float value = shiftDist[k][j];
Chris@179 517 if (k == 0 || value > bestShiftValue) {
Chris@179 518 bestShiftValue = value;
Chris@170 519 bestShift = k;
Chris@170 520 }
Chris@170 521 }
Chris@170 522 localBestShifts[i][j] = bestShift;
Chris@170 523 }
Chris@123 524 }
Chris@123 525 }
Chris@166 526
Chris@166 527 for (int i = 0; i < width; ++i) {
Chris@37 528
Chris@170 529 if (!present[i]) {
Chris@170 530 // silent column
Chris@176 531 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@170 532 m_postFilter[j]->push(0.0);
Chris@170 533 }
Chris@168 534 m_pianoRoll.push_back(map<int, double>());
Chris@170 535 if (wantShifts) {
Chris@168 536 m_pianoRollShifts.push_back(map<int, int>());
Chris@168 537 }
Chris@166 538 continue;
Chris@166 539 }
Chris@166 540
Chris@170 541 postProcess(localPitches[i], localBestShifts[i], wantShifts);
Chris@166 542
Chris@168 543 FeatureList noteFeatures = noteTrack(shiftCount);
Chris@38 544
Chris@123 545 for (FeatureList::const_iterator fi = noteFeatures.begin();
Chris@123 546 fi != noteFeatures.end(); ++fi) {
Chris@123 547 fs[m_notesOutputNo].push_back(*fi);
Chris@40 548 }
Chris@34 549 }
Chris@34 550
Chris@32 551 return fs;
Chris@31 552 }
Chris@31 553
Chris@32 554 Silvet::Grid
Chris@32 555 Silvet::preProcess(const Grid &in)
Chris@32 556 {
Chris@32 557 int width = in.size();
Chris@32 558
Chris@165 559 int spacing = processingSampleRate / m_colsPerSec;
Chris@32 560
Chris@165 561 // need to be careful that col spacing is an integer number of samples!
Chris@165 562 assert(spacing * m_colsPerSec == processingSampleRate);
Chris@32 563
Chris@32 564 Grid out;
Chris@32 565
Chris@58 566 // We count the CQ latency in terms of processing hops, but
Chris@58 567 // actually it probably isn't an exact number of hops so this
Chris@58 568 // isn't quite accurate. But the small constant offset is
Chris@165 569 // practically irrelevant compared to the jitter from the frame
Chris@165 570 // size we reduce to in a moment
Chris@33 571 int latentColumns = m_cq->getLatency() / m_cq->getColumnHop();
Chris@33 572
Chris@176 573 const InstrumentPack &pack = m_instruments[m_instrument];
Chris@176 574
Chris@32 575 for (int i = 0; i < width; ++i) {
Chris@32 576
Chris@33 577 if (m_columnCount < latentColumns) {
Chris@33 578 ++m_columnCount;
Chris@33 579 continue;
Chris@33 580 }
Chris@33 581
Chris@32 582 int prevSampleNo = (m_columnCount - 1) * m_cq->getColumnHop();
Chris@32 583 int sampleNo = m_columnCount * m_cq->getColumnHop();
Chris@32 584
Chris@32 585 bool select = (sampleNo / spacing != prevSampleNo / spacing);
Chris@32 586
Chris@32 587 if (select) {
Chris@32 588 vector<double> inCol = in[i];
Chris@176 589 vector<double> outCol(pack.templateHeight);
Chris@32 590
Chris@178 591 // In HQ mode, the CQ returns 600 bins and we ignore the
Chris@178 592 // lowest 55 of them.
Chris@178 593 //
Chris@178 594 // In draft mode the CQ is an octave shorter, returning
Chris@178 595 // 540 bins, so we instead pad them with an additional 5
Chris@178 596 // zeros.
Chris@178 597 //
Chris@178 598 // We also need to reverse the column as we go, since the
Chris@178 599 // raw CQ has the high frequencies first and we need it
Chris@178 600 // the other way around.
Chris@32 601
Chris@178 602 if (m_hqMode) {
Chris@178 603 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@178 604 int ix = inCol.size() - j - 55;
Chris@178 605 outCol[j] = inCol[ix];
Chris@178 606 }
Chris@178 607 } else {
Chris@178 608 for (int j = 0; j < 5; ++j) {
Chris@178 609 outCol[j] = 0.0;
Chris@178 610 }
Chris@178 611 for (int j = 5; j < pack.templateHeight; ++j) {
Chris@178 612 int ix = inCol.size() - j + 4;
Chris@178 613 outCol[j] = inCol[ix];
Chris@178 614 }
Chris@46 615 }
Chris@32 616
Chris@46 617 vector<double> noiseLevel1 =
Chris@46 618 MedianFilter<double>::filter(40, outCol);
Chris@176 619 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@46 620 noiseLevel1[j] = std::min(outCol[j], noiseLevel1[j]);
Chris@46 621 }
Chris@32 622
Chris@46 623 vector<double> noiseLevel2 =
Chris@46 624 MedianFilter<double>::filter(40, noiseLevel1);
Chris@176 625 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@46 626 outCol[j] = std::max(outCol[j] - noiseLevel2[j], 0.0);
Chris@32 627 }
Chris@32 628
Chris@165 629 out.push_back(outCol);
Chris@32 630 }
Chris@32 631
Chris@32 632 ++m_columnCount;
Chris@32 633 }
Chris@32 634
Chris@32 635 return out;
Chris@32 636 }
Chris@32 637
Chris@168 638 void
Chris@170 639 Silvet::postProcess(const vector<double> &pitches,
Chris@170 640 const vector<int> &bestShifts,
Chris@170 641 bool wantShifts)
Chris@166 642 {
Chris@176 643 const InstrumentPack &pack = m_instruments[m_instrument];
Chris@176 644
Chris@41 645 vector<double> filtered;
Chris@41 646
Chris@176 647 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@170 648 m_postFilter[j]->push(pitches[j]);
Chris@41 649 filtered.push_back(m_postFilter[j]->get());
Chris@41 650 }
Chris@41 651
Chris@41 652 // Threshold for level and reduce number of candidate pitches
Chris@41 653
Chris@41 654 typedef std::multimap<double, int> ValueIndexMap;
Chris@41 655
Chris@41 656 ValueIndexMap strengths;
Chris@166 657
Chris@176 658 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@166 659 double strength = filtered[j];
Chris@183 660 if (strength < pack.levelThreshold) continue;
Chris@168 661 strengths.insert(ValueIndexMap::value_type(strength, j));
Chris@168 662 }
Chris@166 663
Chris@168 664 ValueIndexMap::const_iterator si = strengths.end();
Chris@167 665
Chris@168 666 map<int, double> active;
Chris@168 667 map<int, int> activeShifts;
Chris@168 668
Chris@183 669 while (int(active.size()) < pack.maxPolyphony && si != strengths.begin()) {
Chris@168 670
Chris@168 671 --si;
Chris@168 672
Chris@168 673 double strength = si->first;
Chris@168 674 int j = si->second;
Chris@168 675
Chris@168 676 active[j] = strength;
Chris@168 677
Chris@170 678 if (wantShifts) {
Chris@170 679 activeShifts[j] = bestShifts[j];
Chris@167 680 }
Chris@41 681 }
Chris@41 682
Chris@168 683 m_pianoRoll.push_back(active);
Chris@170 684
Chris@170 685 if (wantShifts) {
Chris@168 686 m_pianoRollShifts.push_back(activeShifts);
Chris@41 687 }
Chris@166 688 }
Chris@166 689
Chris@166 690 Vamp::Plugin::FeatureList
Chris@168 691 Silvet::noteTrack(int shiftCount)
Chris@166 692 {
Chris@41 693 // Minimum duration pruning, and conversion to notes. We can only
Chris@41 694 // report notes that have just ended (i.e. that are absent in the
Chris@168 695 // latest active set but present in the prior set in the piano
Chris@41 696 // roll) -- any notes that ended earlier will have been reported
Chris@41 697 // already, and if they haven't ended, we don't know their
Chris@41 698 // duration.
Chris@41 699
Chris@168 700 int width = m_pianoRoll.size() - 1;
Chris@168 701
Chris@168 702 const map<int, double> &active = m_pianoRoll[width];
Chris@41 703
Chris@165 704 double columnDuration = 1.0 / m_colsPerSec;
Chris@165 705
Chris@165 706 // only keep notes >= 100ms or thereabouts
Chris@165 707 int durationThreshold = floor(0.1 / columnDuration); // columns
Chris@165 708 if (durationThreshold < 1) durationThreshold = 1;
Chris@41 709
Chris@41 710 FeatureList noteFeatures;
Chris@41 711
Chris@41 712 if (width < durationThreshold + 1) {
Chris@41 713 return noteFeatures;
Chris@41 714 }
Chris@41 715
Chris@150 716 //!!! try: repeated note detection? (look for change in first derivative of the pitch matrix)
Chris@150 717
Chris@55 718 for (map<int, double>::const_iterator ni = m_pianoRoll[width-1].begin();
Chris@41 719 ni != m_pianoRoll[width-1].end(); ++ni) {
Chris@41 720
Chris@55 721 int note = ni->first;
Chris@41 722
Chris@41 723 if (active.find(note) != active.end()) {
Chris@41 724 // the note is still playing
Chris@41 725 continue;
Chris@41 726 }
Chris@41 727
Chris@41 728 // the note was playing but just ended
Chris@41 729 int end = width;
Chris@41 730 int start = end-1;
Chris@41 731
Chris@41 732 while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) {
Chris@41 733 --start;
Chris@41 734 }
Chris@41 735 ++start;
Chris@41 736
Chris@169 737 if ((end - start) < durationThreshold) {
Chris@41 738 continue;
Chris@41 739 }
Chris@41 740
Chris@169 741 emitNote(start, end, note, shiftCount, noteFeatures);
Chris@41 742 }
Chris@41 743
Chris@62 744 // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl;
Chris@41 745
Chris@41 746 return noteFeatures;
Chris@41 747 }
Chris@41 748
Chris@169 749 void
Chris@169 750 Silvet::emitNote(int start, int end, int note, int shiftCount,
Chris@169 751 FeatureList &noteFeatures)
Chris@169 752 {
Chris@169 753 int partStart = start;
Chris@169 754 int partShift = 0;
Chris@169 755 int partVelocity = 0;
Chris@169 756
Chris@252 757 int partThreshold = floor(0.05 * m_colsPerSec);
Chris@169 758
Chris@169 759 for (int i = start; i != end; ++i) {
Chris@169 760
Chris@169 761 double strength = m_pianoRoll[i][note];
Chris@169 762
Chris@169 763 int shift = 0;
Chris@169 764
Chris@169 765 if (shiftCount > 1) {
Chris@169 766
Chris@169 767 shift = m_pianoRollShifts[i][note];
Chris@169 768
Chris@169 769 if (i == partStart) {
Chris@169 770 partShift = shift;
Chris@169 771 }
Chris@169 772
Chris@169 773 if (i > partStart + partThreshold && shift != partShift) {
Chris@169 774
Chris@169 775 // cerr << "i = " << i << ", partStart = " << partStart << ", shift = " << shift << ", partShift = " << partShift << endl;
Chris@169 776
Chris@169 777 // pitch has changed, emit an intermediate note
Chris@252 778 noteFeatures.push_back(makeNoteFeature(partStart,
Chris@252 779 i,
Chris@252 780 note,
Chris@252 781 partShift,
Chris@252 782 shiftCount,
Chris@252 783 partVelocity));
Chris@169 784 partStart = i;
Chris@169 785 partShift = shift;
Chris@169 786 partVelocity = 0;
Chris@169 787 }
Chris@169 788 }
Chris@169 789
Chris@246 790 int v = round(strength * 2);
Chris@169 791 if (v > partVelocity) {
Chris@169 792 partVelocity = v;
Chris@169 793 }
Chris@169 794 }
Chris@169 795
Chris@169 796 if (end >= partStart + partThreshold) {
Chris@252 797 noteFeatures.push_back(makeNoteFeature(partStart,
Chris@252 798 end,
Chris@252 799 note,
Chris@252 800 partShift,
Chris@252 801 shiftCount,
Chris@252 802 partVelocity));
Chris@169 803 }
Chris@169 804 }
Chris@252 805
Chris@252 806 Silvet::Feature
Chris@252 807 Silvet::makeNoteFeature(int start,
Chris@252 808 int end,
Chris@252 809 int note,
Chris@252 810 int shift,
Chris@252 811 int shiftCount,
Chris@252 812 int velocity)
Chris@252 813 {
Chris@252 814 double columnDuration = 1.0 / m_colsPerSec;
Chris@252 815 int postFilterLatency = int(m_postFilter[0]->getSize() / 2);
Chris@252 816
Chris@252 817 Feature f;
Chris@252 818
Chris@252 819 f.hasTimestamp = true;
Chris@252 820 f.timestamp = RealTime::fromSeconds
Chris@252 821 (columnDuration * (start - postFilterLatency) + 0.02);
Chris@252 822
Chris@252 823 f.hasDuration = true;
Chris@252 824 f.duration = RealTime::fromSeconds
Chris@252 825 (columnDuration * (end - start));
Chris@252 826
Chris@252 827 f.values.clear();
Chris@252 828
Chris@252 829 f.values.push_back
Chris@252 830 (noteFrequency(note, shift, shiftCount));
Chris@252 831
Chris@252 832 float inputGain = getInputGainAt(f.timestamp);
Chris@252 833 // cerr << "adjusting velocity from " << velocity << " to " << round(velocity/inputGain) << endl;
Chris@252 834 velocity = round(velocity / inputGain);
Chris@252 835 if (velocity > 127) velocity = 127;
Chris@252 836 if (velocity < 1) velocity = 1;
Chris@252 837 f.values.push_back(velocity);
Chris@252 838
Chris@252 839 f.label = noteName(note, shift, shiftCount);
Chris@252 840
Chris@252 841 return f;
Chris@252 842 }
Chris@252 843
Chris@252 844 float
Chris@252 845 Silvet::getInputGainAt(RealTime t)
Chris@252 846 {
Chris@252 847 map<RealTime, float>::const_iterator i = m_inputGains.lower_bound(t);
Chris@252 848
Chris@252 849 if (i == m_inputGains.end()) {
Chris@252 850 if (i != m_inputGains.begin()) {
Chris@252 851 --i;
Chris@252 852 } else {
Chris@252 853 return 1.f; // no data
Chris@252 854 }
Chris@252 855 }
Chris@252 856
Chris@252 857 // cerr << "gain at time " << t << " = " << i->second << endl;
Chris@252 858
Chris@252 859 return i->second;
Chris@252 860 }
Chris@252 861