annotate src/Silvet.cpp @ 189:3de7c871d9c8 noteagent

Fixes to mono feeder; use it for monophonic instruments
author Chris Cannam
date Thu, 29 May 2014 10:30:08 +0100
parents 462b165c8c0f
children 28cbc7eaf415
rev   line source
Chris@31 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@31 2
Chris@31 3 /*
Chris@31 4 Silvet
Chris@31 5
Chris@31 6 A Vamp plugin for note transcription.
Chris@31 7 Centre for Digital Music, Queen Mary University of London.
Chris@31 8
Chris@31 9 This program is free software; you can redistribute it and/or
Chris@31 10 modify it under the terms of the GNU General Public License as
Chris@31 11 published by the Free Software Foundation; either version 2 of the
Chris@31 12 License, or (at your option) any later version. See the file
Chris@31 13 COPYING included with this distribution for more information.
Chris@31 14 */
Chris@31 15
Chris@31 16 #include "Silvet.h"
Chris@34 17 #include "EM.h"
Chris@31 18
Chris@152 19 #include <cq/CQSpectrogram.h>
Chris@31 20
Chris@152 21 #include "MedianFilter.h"
Chris@184 22 #include "AgentFeederPoly.h"
Chris@189 23 #include "AgentFeederMono.h"
Chris@184 24 #include "NoteHypothesis.h"
Chris@184 25
Chris@152 26 #include "constant-q-cpp/src/dsp/Resampler.h"
Chris@31 27
Chris@31 28 #include <vector>
Chris@31 29
Chris@32 30 #include <cstdio>
Chris@32 31
Chris@31 32 using std::vector;
Chris@48 33 using std::cout;
Chris@31 34 using std::cerr;
Chris@31 35 using std::endl;
Chris@40 36 using Vamp::RealTime;
Chris@31 37
Chris@31 38 static int processingSampleRate = 44100;
Chris@31 39 static int processingBPO = 60;
Chris@170 40
Chris@31 41 Silvet::Silvet(float inputSampleRate) :
Chris@31 42 Plugin(inputSampleRate),
Chris@161 43 m_instruments(InstrumentPack::listInstrumentPacks()),
Chris@31 44 m_resampler(0),
Chris@110 45 m_cq(0),
Chris@162 46 m_hqMode(true),
Chris@166 47 m_fineTuning(false),
Chris@178 48 m_instrument(0),
Chris@184 49 m_colsPerSec(50),
Chris@184 50 m_agentFeeder(0)
Chris@31 51 {
Chris@31 52 }
Chris@31 53
Chris@31 54 Silvet::~Silvet()
Chris@31 55 {
Chris@31 56 delete m_resampler;
Chris@31 57 delete m_cq;
Chris@41 58 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
Chris@41 59 delete m_postFilter[i];
Chris@41 60 }
Chris@184 61 delete m_agentFeeder;
Chris@31 62 }
Chris@31 63
Chris@31 64 string
Chris@31 65 Silvet::getIdentifier() const
Chris@31 66 {
Chris@31 67 return "silvet";
Chris@31 68 }
Chris@31 69
Chris@31 70 string
Chris@31 71 Silvet::getName() const
Chris@31 72 {
Chris@31 73 return "Silvet Note Transcription";
Chris@31 74 }
Chris@31 75
Chris@31 76 string
Chris@31 77 Silvet::getDescription() const
Chris@31 78 {
Chris@31 79 // Return something helpful here!
Chris@31 80 return "";
Chris@31 81 }
Chris@31 82
Chris@31 83 string
Chris@31 84 Silvet::getMaker() const
Chris@31 85 {
Chris@31 86 // Your name here
Chris@31 87 return "";
Chris@31 88 }
Chris@31 89
Chris@31 90 int
Chris@31 91 Silvet::getPluginVersion() const
Chris@31 92 {
Chris@31 93 return 1;
Chris@31 94 }
Chris@31 95
Chris@31 96 string
Chris@31 97 Silvet::getCopyright() const
Chris@31 98 {
Chris@31 99 // This function is not ideally named. It does not necessarily
Chris@31 100 // need to say who made the plugin -- getMaker does that -- but it
Chris@31 101 // should indicate the terms under which it is distributed. For
Chris@31 102 // example, "Copyright (year). All Rights Reserved", or "GPL"
Chris@31 103 return "";
Chris@31 104 }
Chris@31 105
Chris@31 106 Silvet::InputDomain
Chris@31 107 Silvet::getInputDomain() const
Chris@31 108 {
Chris@31 109 return TimeDomain;
Chris@31 110 }
Chris@31 111
Chris@31 112 size_t
Chris@31 113 Silvet::getPreferredBlockSize() const
Chris@31 114 {
Chris@31 115 return 0;
Chris@31 116 }
Chris@31 117
Chris@31 118 size_t
Chris@31 119 Silvet::getPreferredStepSize() const
Chris@31 120 {
Chris@31 121 return 0;
Chris@31 122 }
Chris@31 123
Chris@31 124 size_t
Chris@31 125 Silvet::getMinChannelCount() const
Chris@31 126 {
Chris@31 127 return 1;
Chris@31 128 }
Chris@31 129
Chris@31 130 size_t
Chris@31 131 Silvet::getMaxChannelCount() const
Chris@31 132 {
Chris@31 133 return 1;
Chris@31 134 }
Chris@31 135
Chris@31 136 Silvet::ParameterList
Chris@31 137 Silvet::getParameterDescriptors() const
Chris@31 138 {
Chris@31 139 ParameterList list;
Chris@110 140
Chris@110 141 ParameterDescriptor desc;
Chris@110 142 desc.identifier = "mode";
Chris@110 143 desc.name = "Processing mode";
Chris@110 144 desc.unit = "";
Chris@110 145 desc.description = "Determines the tradeoff of processing speed against transcription quality";
Chris@110 146 desc.minValue = 0;
Chris@110 147 desc.maxValue = 1;
Chris@113 148 desc.defaultValue = 1;
Chris@110 149 desc.isQuantized = true;
Chris@110 150 desc.quantizeStep = 1;
Chris@166 151 desc.valueNames.push_back("Draft (faster)");
Chris@165 152 desc.valueNames.push_back("Intensive (higher quality)");
Chris@161 153 list.push_back(desc);
Chris@161 154
Chris@176 155 desc.identifier = "instrument";
Chris@176 156 desc.name = "Instrument";
Chris@161 157 desc.unit = "";
Chris@162 158 desc.description = "The instrument known to be present in the recording, if there is only one";
Chris@161 159 desc.minValue = 0;
Chris@162 160 desc.maxValue = m_instruments.size()-1;
Chris@162 161 desc.defaultValue = 0;
Chris@161 162 desc.isQuantized = true;
Chris@161 163 desc.quantizeStep = 1;
Chris@161 164 desc.valueNames.clear();
Chris@162 165 for (int i = 0; i < int(m_instruments.size()); ++i) {
Chris@162 166 desc.valueNames.push_back(m_instruments[i].name);
Chris@162 167 }
Chris@166 168 list.push_back(desc);
Chris@161 169
Chris@166 170 desc.identifier = "finetune";
Chris@166 171 desc.name = "Return fine pitch estimates";
Chris@166 172 desc.unit = "";
Chris@166 173 desc.description = "Return pitch estimates at finer than semitone resolution (works only in Intensive mode)";
Chris@166 174 desc.minValue = 0;
Chris@166 175 desc.maxValue = 1;
Chris@166 176 desc.defaultValue = 0;
Chris@166 177 desc.isQuantized = true;
Chris@166 178 desc.quantizeStep = 1;
Chris@166 179 desc.valueNames.clear();
Chris@110 180 list.push_back(desc);
Chris@110 181
Chris@31 182 return list;
Chris@31 183 }
Chris@31 184
Chris@31 185 float
Chris@31 186 Silvet::getParameter(string identifier) const
Chris@31 187 {
Chris@110 188 if (identifier == "mode") {
Chris@110 189 return m_hqMode ? 1.f : 0.f;
Chris@166 190 } else if (identifier == "finetune") {
Chris@166 191 return m_fineTuning ? 1.f : 0.f;
Chris@176 192 } else if (identifier == "instrument") {
Chris@162 193 return m_instrument;
Chris@110 194 }
Chris@31 195 return 0;
Chris@31 196 }
Chris@31 197
Chris@31 198 void
Chris@31 199 Silvet::setParameter(string identifier, float value)
Chris@31 200 {
Chris@110 201 if (identifier == "mode") {
Chris@110 202 m_hqMode = (value > 0.5);
Chris@166 203 } else if (identifier == "finetune") {
Chris@166 204 m_fineTuning = (value > 0.5);
Chris@176 205 } else if (identifier == "instrument") {
Chris@162 206 m_instrument = lrintf(value);
Chris@110 207 }
Chris@31 208 }
Chris@31 209
Chris@31 210 Silvet::ProgramList
Chris@31 211 Silvet::getPrograms() const
Chris@31 212 {
Chris@31 213 ProgramList list;
Chris@31 214 return list;
Chris@31 215 }
Chris@31 216
Chris@31 217 string
Chris@31 218 Silvet::getCurrentProgram() const
Chris@31 219 {
Chris@31 220 return "";
Chris@31 221 }
Chris@31 222
Chris@31 223 void
Chris@31 224 Silvet::selectProgram(string name)
Chris@31 225 {
Chris@31 226 }
Chris@31 227
Chris@31 228 Silvet::OutputList
Chris@31 229 Silvet::getOutputDescriptors() const
Chris@31 230 {
Chris@31 231 OutputList list;
Chris@31 232
Chris@31 233 OutputDescriptor d;
Chris@51 234 d.identifier = "notes";
Chris@51 235 d.name = "Note transcription";
Chris@162 236 d.description = "Overall note transcription across selected instruments";
Chris@41 237 d.unit = "Hz";
Chris@31 238 d.hasFixedBinCount = true;
Chris@31 239 d.binCount = 2;
Chris@41 240 d.binNames.push_back("Frequency");
Chris@31 241 d.binNames.push_back("Velocity");
Chris@31 242 d.hasKnownExtents = false;
Chris@31 243 d.isQuantized = false;
Chris@31 244 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@51 245 d.sampleRate = m_inputSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
Chris@31 246 d.hasDuration = true;
Chris@32 247 m_notesOutputNo = list.size();
Chris@32 248 list.push_back(d);
Chris@32 249
Chris@178 250 d.identifier = "timefreq";
Chris@178 251 d.name = "Time-frequency distribution";
Chris@178 252 d.description = "Filtered constant-Q time-frequency distribution used as input to the expectation-maximisation algorithm";
Chris@178 253 d.unit = "";
Chris@178 254 d.hasFixedBinCount = true;
Chris@178 255 d.binCount = m_instruments[0].templateHeight;
Chris@178 256 d.binNames.clear();
Chris@178 257 if (m_cq) {
Chris@178 258 char name[20];
Chris@178 259 for (int i = 0; i < m_instruments[0].templateHeight; ++i) {
Chris@178 260 // We have a 600-bin (10 oct 60-bin CQ) of which the
Chris@178 261 // lowest-frequency 55 bins have been dropped, for a
Chris@178 262 // 545-bin template. The native CQ bins go high->low
Chris@178 263 // frequency though, so these are still the first 545 bins
Chris@178 264 // as reported by getBinFrequency, though in reverse order
Chris@178 265 float freq = m_cq->getBinFrequency
Chris@178 266 (m_instruments[0].templateHeight - i - 1);
Chris@178 267 sprintf(name, "%.1f Hz", freq);
Chris@178 268 d.binNames.push_back(name);
Chris@178 269 }
Chris@178 270 }
Chris@178 271 d.hasKnownExtents = false;
Chris@178 272 d.isQuantized = false;
Chris@178 273 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@178 274 d.sampleRate = m_colsPerSec;
Chris@178 275 d.hasDuration = false;
Chris@178 276 m_fcqOutputNo = list.size();
Chris@178 277 list.push_back(d);
Chris@178 278
Chris@31 279 return list;
Chris@31 280 }
Chris@31 281
Chris@38 282 std::string
Chris@175 283 Silvet::noteName(int note, int shift, int shiftCount) const
Chris@38 284 {
Chris@38 285 static const char *names[] = {
Chris@38 286 "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"
Chris@38 287 };
Chris@38 288
Chris@175 289 const char *n = names[note % 12];
Chris@38 290
Chris@175 291 int oct = (note + 9) / 12;
Chris@38 292
Chris@175 293 char buf[30];
Chris@175 294
Chris@175 295 float pshift = 0.f;
Chris@175 296 if (shiftCount > 1) {
Chris@175 297 // see noteFrequency below
Chris@175 298 pshift =
Chris@175 299 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
Chris@175 300 }
Chris@175 301
Chris@175 302 if (pshift > 0.f) {
Chris@175 303 sprintf(buf, "%s%d+%dc", n, oct, int(round(pshift * 100)));
Chris@175 304 } else if (pshift < 0.f) {
Chris@175 305 sprintf(buf, "%s%d-%dc", n, oct, int(round((-pshift) * 100)));
Chris@175 306 } else {
Chris@175 307 sprintf(buf, "%s%d", n, oct);
Chris@175 308 }
Chris@38 309
Chris@38 310 return buf;
Chris@38 311 }
Chris@38 312
Chris@41 313 float
Chris@168 314 Silvet::noteFrequency(int note, int shift, int shiftCount) const
Chris@41 315 {
Chris@169 316 // Convert shift number to a pitch shift. The given shift number
Chris@169 317 // is an offset into the template array, which starts with some
Chris@169 318 // zeros, followed by the template, then some trailing zeros.
Chris@169 319 //
Chris@169 320 // Example: if we have templateMaxShift == 2 and thus shiftCount
Chris@169 321 // == 5, then the number will be in the range 0-4 and the template
Chris@169 322 // will have 2 zeros at either end. Thus number 2 represents the
Chris@169 323 // template "as recorded", for a pitch shift of 0; smaller indices
Chris@169 324 // represent moving the template *up* in pitch (by introducing
Chris@169 325 // zeros at the start, which is the low-frequency end), for a
Chris@169 326 // positive pitch shift; and higher values represent moving it
Chris@169 327 // down in pitch, for a negative pitch shift.
Chris@169 328
Chris@175 329 float pshift = 0.f;
Chris@175 330 if (shiftCount > 1) {
Chris@175 331 pshift =
Chris@175 332 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
Chris@175 333 }
Chris@169 334
Chris@169 335 return float(27.5 * pow(2.0, (note + pshift) / 12.0));
Chris@41 336 }
Chris@41 337
Chris@188 338 float
Chris@188 339 Silvet::roundToMidiFrequency(float freq) const
Chris@188 340 {
Chris@188 341 // n is our note number, not actually MIDI note number as we have
Chris@188 342 // a different origin
Chris@188 343 float n = 12.0 * (log(freq / 27.5) / log(2.0));
Chris@188 344 return 27.5 * pow(2.0, round(n) / 12.0);
Chris@188 345 }
Chris@188 346
Chris@31 347 bool
Chris@31 348 Silvet::initialise(size_t channels, size_t stepSize, size_t blockSize)
Chris@31 349 {
Chris@31 350 if (channels < getMinChannelCount() ||
Chris@31 351 channels > getMaxChannelCount()) return false;
Chris@31 352
Chris@31 353 if (stepSize != blockSize) {
Chris@31 354 cerr << "Silvet::initialise: Step size must be the same as block size ("
Chris@31 355 << stepSize << " != " << blockSize << ")" << endl;
Chris@31 356 return false;
Chris@31 357 }
Chris@31 358
Chris@31 359 m_blockSize = blockSize;
Chris@31 360
Chris@31 361 reset();
Chris@31 362
Chris@31 363 return true;
Chris@31 364 }
Chris@31 365
Chris@31 366 void
Chris@31 367 Silvet::reset()
Chris@31 368 {
Chris@31 369 delete m_resampler;
Chris@31 370 delete m_cq;
Chris@184 371 delete m_agentFeeder;
Chris@31 372
Chris@31 373 if (m_inputSampleRate != processingSampleRate) {
Chris@31 374 m_resampler = new Resampler(m_inputSampleRate, processingSampleRate);
Chris@31 375 } else {
Chris@31 376 m_resampler = 0;
Chris@31 377 }
Chris@31 378
Chris@173 379 double minFreq = 27.5;
Chris@173 380
Chris@173 381 if (!m_hqMode) {
Chris@173 382 // We don't actually return any notes from the bottom octave,
Chris@173 383 // so we can just pad with zeros
Chris@173 384 minFreq *= 2;
Chris@173 385 }
Chris@173 386
Chris@154 387 CQParameters params(processingSampleRate,
Chris@173 388 minFreq,
Chris@154 389 processingSampleRate / 3,
Chris@154 390 processingBPO);
Chris@154 391
Chris@155 392 params.q = 0.95; // MIREX code uses 0.8, but it seems 0.9 or lower
Chris@155 393 // drops the FFT size to 512 from 1024 and alters
Chris@155 394 // some other processing parameters, making
Chris@155 395 // everything much, much slower. Could be a flaw
Chris@155 396 // in the CQ parameter calculations, must check
Chris@154 397 params.atomHopFactor = 0.3;
Chris@154 398 params.threshold = 0.0005;
Chris@172 399 params.window = CQParameters::Hann;
Chris@154 400
Chris@154 401 m_cq = new CQSpectrogram(params, CQSpectrogram::InterpolateLinear);
Chris@31 402
Chris@165 403 m_colsPerSec = m_hqMode ? 50 : 25;
Chris@165 404
Chris@41 405 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
Chris@41 406 delete m_postFilter[i];
Chris@41 407 }
Chris@41 408 m_postFilter.clear();
Chris@189 409 for (int i = 0; i < m_instruments[m_instrument].templateNoteCount; ++i) {
Chris@188 410 //!!! m_postFilter.push_back(new MedianFilter<double>(3));
Chris@188 411 m_postFilter.push_back(new MedianFilter<double>(1));//!!!
Chris@41 412 }
Chris@184 413
Chris@184 414 m_columnCountIn = 0;
Chris@184 415 m_columnCountOut = 0;
Chris@40 416 m_startTime = RealTime::zeroTime;
Chris@184 417
Chris@189 418 if (m_instruments[m_instrument].maxPolyphony == 1) {
Chris@189 419 m_agentFeeder = new AgentFeederMono<NoteHypothesis>();
Chris@189 420 } else {
Chris@189 421 m_agentFeeder = new AgentFeederPoly<NoteHypothesis>();
Chris@189 422 }
Chris@31 423 }
Chris@31 424
Chris@31 425 Silvet::FeatureSet
Chris@31 426 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
Chris@31 427 {
Chris@184 428 if (m_columnCountIn == 0) {
Chris@40 429 m_startTime = timestamp;
Chris@40 430 }
Chris@40 431
Chris@31 432 vector<double> data;
Chris@40 433 for (int i = 0; i < m_blockSize; ++i) {
Chris@40 434 data.push_back(inputBuffers[0][i]);
Chris@40 435 }
Chris@31 436
Chris@31 437 if (m_resampler) {
Chris@31 438 data = m_resampler->process(data.data(), data.size());
Chris@31 439 }
Chris@31 440
Chris@32 441 Grid cqout = m_cq->process(data);
Chris@51 442 FeatureSet fs = transcribe(cqout);
Chris@51 443 return fs;
Chris@34 444 }
Chris@34 445
Chris@34 446 Silvet::FeatureSet
Chris@34 447 Silvet::getRemainingFeatures()
Chris@34 448 {
Chris@145 449 Grid cqout = m_cq->getRemainingOutput();
Chris@184 450
Chris@51 451 FeatureSet fs = transcribe(cqout);
Chris@184 452
Chris@184 453 m_agentFeeder->finish();
Chris@184 454
Chris@184 455 FeatureList noteFeatures = obtainNotes();
Chris@184 456 for (FeatureList::const_iterator fi = noteFeatures.begin();
Chris@184 457 fi != noteFeatures.end(); ++fi) {
Chris@184 458 fs[m_notesOutputNo].push_back(*fi);
Chris@184 459 }
Chris@184 460
Chris@51 461 return fs;
Chris@34 462 }
Chris@34 463
Chris@34 464 Silvet::FeatureSet
Chris@34 465 Silvet::transcribe(const Grid &cqout)
Chris@34 466 {
Chris@32 467 Grid filtered = preProcess(cqout);
Chris@31 468
Chris@32 469 FeatureSet fs;
Chris@32 470
Chris@104 471 if (filtered.empty()) return fs;
Chris@170 472
Chris@170 473 const InstrumentPack &pack = m_instruments[m_instrument];
Chris@104 474
Chris@178 475 for (int i = 0; i < (int)filtered.size(); ++i) {
Chris@178 476 Feature f;
Chris@178 477 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@178 478 f.values.push_back(float(filtered[i][j]));
Chris@178 479 }
Chris@178 480 fs[m_fcqOutputNo].push_back(f);
Chris@178 481 }
Chris@178 482
Chris@34 483 int width = filtered.size();
Chris@34 484
Chris@164 485 int iterations = m_hqMode ? 20 : 10;
Chris@34 486
Chris@170 487 //!!! pitches or notes? [terminology]
Chris@176 488 Grid localPitches(width, vector<double>(pack.templateNoteCount, 0.0));
Chris@170 489
Chris@184 490 bool wantShifts = m_hqMode;
Chris@170 491 int shiftCount = 1;
Chris@170 492 if (wantShifts) {
Chris@170 493 shiftCount = pack.templateMaxShift * 2 + 1;
Chris@170 494 }
Chris@170 495
Chris@170 496 vector<vector<int> > localBestShifts;
Chris@170 497 if (wantShifts) {
Chris@170 498 localBestShifts =
Chris@176 499 vector<vector<int> >(width, vector<int>(pack.templateNoteCount, 0));
Chris@170 500 }
Chris@170 501
Chris@170 502 vector<bool> present(width, false);
Chris@37 503
Chris@123 504 #pragma omp parallel for
Chris@123 505 for (int i = 0; i < width; ++i) {
Chris@104 506
Chris@170 507 double sum = 0.0;
Chris@176 508 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@170 509 sum += filtered.at(i).at(j);
Chris@170 510 }
Chris@170 511 if (sum < 1e-5) continue;
Chris@170 512
Chris@170 513 present[i] = true;
Chris@170 514
Chris@170 515 EM em(&pack, m_hqMode);
Chris@170 516
Chris@183 517 em.setPitchSparsity(pack.pitchSparsity);
Chris@183 518
Chris@170 519 for (int j = 0; j < iterations; ++j) {
Chris@170 520 em.iterate(filtered.at(i).data());
Chris@37 521 }
Chris@37 522
Chris@170 523 const float *pitchDist = em.getPitchDistribution();
Chris@170 524 const float *const *shiftDist = em.getShifts();
Chris@37 525
Chris@176 526 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@104 527
Chris@170 528 localPitches[i][j] = pitchDist[j] * sum;
Chris@170 529
Chris@170 530 int bestShift = 0;
Chris@179 531 float bestShiftValue = 0.0;
Chris@170 532 if (wantShifts) {
Chris@170 533 for (int k = 0; k < shiftCount; ++k) {
Chris@179 534 float value = shiftDist[k][j];
Chris@179 535 if (k == 0 || value > bestShiftValue) {
Chris@179 536 bestShiftValue = value;
Chris@170 537 bestShift = k;
Chris@170 538 }
Chris@170 539 }
Chris@170 540 localBestShifts[i][j] = bestShift;
Chris@170 541 }
Chris@123 542 }
Chris@123 543 }
Chris@166 544
Chris@166 545 for (int i = 0; i < width; ++i) {
Chris@37 546
Chris@170 547 if (!present[i]) {
Chris@170 548 // silent column
Chris@176 549 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@170 550 m_postFilter[j]->push(0.0);
Chris@170 551 }
Chris@186 552 } else {
Chris@186 553
Chris@186 554 postProcess(localPitches[i], localBestShifts[i],
Chris@186 555 wantShifts, shiftCount);
Chris@186 556
Chris@186 557 FeatureList noteFeatures = obtainNotes();
Chris@186 558
Chris@186 559 for (FeatureList::const_iterator fi = noteFeatures.begin();
Chris@186 560 fi != noteFeatures.end(); ++fi) {
Chris@186 561 fs[m_notesOutputNo].push_back(*fi);
Chris@186 562 }
Chris@166 563 }
Chris@166 564
Chris@186 565 ++m_columnCountOut;
Chris@34 566 }
Chris@34 567
Chris@32 568 return fs;
Chris@31 569 }
Chris@31 570
Chris@32 571 Silvet::Grid
Chris@32 572 Silvet::preProcess(const Grid &in)
Chris@32 573 {
Chris@32 574 int width = in.size();
Chris@32 575
Chris@165 576 int spacing = processingSampleRate / m_colsPerSec;
Chris@32 577
Chris@165 578 // need to be careful that col spacing is an integer number of samples!
Chris@165 579 assert(spacing * m_colsPerSec == processingSampleRate);
Chris@32 580
Chris@32 581 Grid out;
Chris@32 582
Chris@58 583 // We count the CQ latency in terms of processing hops, but
Chris@58 584 // actually it probably isn't an exact number of hops so this
Chris@58 585 // isn't quite accurate. But the small constant offset is
Chris@165 586 // practically irrelevant compared to the jitter from the frame
Chris@165 587 // size we reduce to in a moment
Chris@33 588 int latentColumns = m_cq->getLatency() / m_cq->getColumnHop();
Chris@33 589
Chris@176 590 const InstrumentPack &pack = m_instruments[m_instrument];
Chris@176 591
Chris@32 592 for (int i = 0; i < width; ++i) {
Chris@32 593
Chris@184 594 if (m_columnCountIn < latentColumns) {
Chris@184 595 ++m_columnCountIn;
Chris@33 596 continue;
Chris@33 597 }
Chris@33 598
Chris@184 599 int prevSampleNo = (m_columnCountIn - 1) * m_cq->getColumnHop();
Chris@184 600 int sampleNo = m_columnCountIn * m_cq->getColumnHop();
Chris@32 601
Chris@32 602 bool select = (sampleNo / spacing != prevSampleNo / spacing);
Chris@32 603
Chris@32 604 if (select) {
Chris@32 605 vector<double> inCol = in[i];
Chris@176 606 vector<double> outCol(pack.templateHeight);
Chris@32 607
Chris@178 608 // In HQ mode, the CQ returns 600 bins and we ignore the
Chris@178 609 // lowest 55 of them.
Chris@178 610 //
Chris@178 611 // In draft mode the CQ is an octave shorter, returning
Chris@178 612 // 540 bins, so we instead pad them with an additional 5
Chris@178 613 // zeros.
Chris@178 614 //
Chris@178 615 // We also need to reverse the column as we go, since the
Chris@178 616 // raw CQ has the high frequencies first and we need it
Chris@178 617 // the other way around.
Chris@32 618
Chris@178 619 if (m_hqMode) {
Chris@178 620 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@178 621 int ix = inCol.size() - j - 55;
Chris@178 622 outCol[j] = inCol[ix];
Chris@178 623 }
Chris@178 624 } else {
Chris@178 625 for (int j = 0; j < 5; ++j) {
Chris@178 626 outCol[j] = 0.0;
Chris@178 627 }
Chris@178 628 for (int j = 5; j < pack.templateHeight; ++j) {
Chris@178 629 int ix = inCol.size() - j + 4;
Chris@178 630 outCol[j] = inCol[ix];
Chris@178 631 }
Chris@46 632 }
Chris@32 633
Chris@46 634 vector<double> noiseLevel1 =
Chris@46 635 MedianFilter<double>::filter(40, outCol);
Chris@176 636 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@46 637 noiseLevel1[j] = std::min(outCol[j], noiseLevel1[j]);
Chris@46 638 }
Chris@32 639
Chris@46 640 vector<double> noiseLevel2 =
Chris@46 641 MedianFilter<double>::filter(40, noiseLevel1);
Chris@176 642 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@46 643 outCol[j] = std::max(outCol[j] - noiseLevel2[j], 0.0);
Chris@32 644 }
Chris@32 645
Chris@165 646 out.push_back(outCol);
Chris@32 647 }
Chris@32 648
Chris@184 649 ++m_columnCountIn;
Chris@32 650 }
Chris@32 651
Chris@32 652 return out;
Chris@32 653 }
Chris@32 654
Chris@168 655 void
Chris@170 656 Silvet::postProcess(const vector<double> &pitches,
Chris@170 657 const vector<int> &bestShifts,
Chris@184 658 bool wantShifts,
Chris@184 659 int shiftCount)
Chris@166 660 {
Chris@176 661 const InstrumentPack &pack = m_instruments[m_instrument];
Chris@176 662
Chris@41 663 vector<double> filtered;
Chris@41 664
Chris@176 665 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@170 666 m_postFilter[j]->push(pitches[j]);
Chris@41 667 filtered.push_back(m_postFilter[j]->get());
Chris@41 668 }
Chris@41 669
Chris@185 670 double threshold = 1; //!!! pack.levelThreshold
Chris@41 671
Chris@184 672 double columnDuration = 1.0 / m_colsPerSec;
Chris@184 673 int postFilterLatency = int(m_postFilter[0]->getSize() / 2);
Chris@184 674 RealTime t = RealTime::fromSeconds
Chris@184 675 (columnDuration * (m_columnCountOut - postFilterLatency) + 0.02);
Chris@166 676
Chris@176 677 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@184 678
Chris@166 679 double strength = filtered[j];
Chris@184 680 if (strength < threshold) {
Chris@184 681 continue;
Chris@184 682 }
Chris@184 683
Chris@184 684 double freq;
Chris@184 685 if (wantShifts) {
Chris@184 686 freq = noteFrequency(j, bestShifts[j], shiftCount);
Chris@184 687 } else {
Chris@184 688 freq = noteFrequency(j, 0, shiftCount);
Chris@184 689 }
Chris@184 690
Chris@184 691 double confidence = strength / 50.0; //!!!???
Chris@184 692 if (confidence > 1.0) confidence = 1.0;
Chris@184 693
Chris@184 694 AgentHypothesis::Observation obs(freq, t, confidence);
Chris@184 695 m_agentFeeder->feed(obs);
Chris@168 696 }
Chris@166 697 }
Chris@166 698
Chris@166 699 Vamp::Plugin::FeatureList
Chris@184 700 Silvet::obtainNotes()
Chris@166 701 {
Chris@41 702 FeatureList noteFeatures;
Chris@41 703
Chris@189 704 std::set<NoteHypothesis> hh;
Chris@184 705
Chris@189 706 AgentFeederPoly<NoteHypothesis> *polyFeeder =
Chris@189 707 dynamic_cast<AgentFeederPoly<NoteHypothesis> *>(m_agentFeeder);
Chris@184 708
Chris@189 709 AgentFeederMono<NoteHypothesis> *monoFeeder =
Chris@189 710 dynamic_cast<AgentFeederMono<NoteHypothesis> *>(m_agentFeeder);
Chris@189 711
Chris@189 712 if (polyFeeder) {
Chris@189 713
Chris@189 714 hh = polyFeeder->retrieveAcceptedHypotheses();
Chris@189 715
Chris@189 716 } else if (monoFeeder) {
Chris@189 717
Chris@189 718 hh = monoFeeder->retrieveAcceptedHypotheses();
Chris@189 719
Chris@189 720 } else {
Chris@189 721
Chris@189 722 cerr << "INTERNAL ERROR: Feeder is neither poly- nor "
Chris@189 723 << "mono-note-hypothesis-feeder!" << endl;
Chris@41 724 return noteFeatures;
Chris@41 725 }
Chris@150 726
Chris@184 727 for (std::set<NoteHypothesis>::const_iterator hi = hh.begin();
Chris@184 728 hi != hh.end(); ++hi) {
Chris@184 729
Chris@184 730 NoteHypothesis h(*hi);
Chris@184 731
Chris@184 732 NoteHypothesis::Note n = h.getAveragedNote();
Chris@41 733
Chris@184 734 int velocity = n.confidence * 127;
Chris@184 735 if (velocity > 127) velocity = 127;
Chris@41 736
Chris@188 737 float freq = n.freq;
Chris@188 738 if (!m_fineTuning) {
Chris@188 739 freq = roundToMidiFrequency(freq);
Chris@188 740 }
Chris@188 741
Chris@184 742 Feature f;
Chris@184 743 f.hasTimestamp = true;
Chris@184 744 f.hasDuration = true;
Chris@184 745 f.timestamp = n.time;
Chris@184 746 f.duration = n.duration;
Chris@184 747 f.values.clear();
Chris@188 748 f.values.push_back(freq);
Chris@184 749 f.values.push_back(velocity);
Chris@184 750 // f.label = noteName(note, partShift, shiftCount);
Chris@184 751 noteFeatures.push_back(f);
Chris@41 752 }
Chris@41 753
Chris@41 754 return noteFeatures;
Chris@41 755 }