annotate src/Silvet.cpp @ 316:f3e10617a60d livemode

Live mode enhancements: Adjust processing parameters (for speed) and peak-pick pitch activations across frequency (to avoid neighbouring-semitone clusters)
author Chris Cannam
date Tue, 28 Apr 2015 12:21:40 +0100
parents f98ba4f47e49
children 92293058368a
rev   line source
Chris@31 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@31 2
Chris@31 3 /*
Chris@31 4 Silvet
Chris@31 5
Chris@31 6 A Vamp plugin for note transcription.
Chris@31 7 Centre for Digital Music, Queen Mary University of London.
Chris@31 8
Chris@31 9 This program is free software; you can redistribute it and/or
Chris@31 10 modify it under the terms of the GNU General Public License as
Chris@31 11 published by the Free Software Foundation; either version 2 of the
Chris@31 12 License, or (at your option) any later version. See the file
Chris@31 13 COPYING included with this distribution for more information.
Chris@31 14 */
Chris@31 15
Chris@31 16 #include "Silvet.h"
Chris@34 17 #include "EM.h"
Chris@31 18
Chris@152 19 #include <cq/CQSpectrogram.h>
Chris@31 20
Chris@152 21 #include "MedianFilter.h"
Chris@152 22 #include "constant-q-cpp/src/dsp/Resampler.h"
Chris@246 23 #include "flattendynamics-ladspa.h"
Chris@298 24 #include "LiveInstruments.h"
Chris@31 25
Chris@31 26 #include <vector>
Chris@312 27 #include <future>
Chris@31 28
Chris@32 29 #include <cstdio>
Chris@32 30
Chris@31 31 using std::vector;
Chris@48 32 using std::cout;
Chris@31 33 using std::cerr;
Chris@31 34 using std::endl;
Chris@311 35 using std::pair;
Chris@312 36 using std::future;
Chris@312 37 using std::async;
Chris@40 38 using Vamp::RealTime;
Chris@31 39
Chris@31 40 static int processingSampleRate = 44100;
Chris@298 41
Chris@298 42 static int binsPerSemitoneLive = 1;
Chris@298 43 static int binsPerSemitoneNormal = 5;
Chris@170 44
Chris@272 45 static int minInputSampleRate = 100;
Chris@272 46 static int maxInputSampleRate = 192000;
Chris@272 47
Chris@316 48 static const Silvet::ProcessingMode defaultMode = Silvet::HighQualityMode;
Chris@316 49
Chris@31 50 Silvet::Silvet(float inputSampleRate) :
Chris@31 51 Plugin(inputSampleRate),
Chris@161 52 m_instruments(InstrumentPack::listInstrumentPacks()),
Chris@298 53 m_liveInstruments(LiveAdapter::adaptAll(m_instruments)),
Chris@31 54 m_resampler(0),
Chris@246 55 m_flattener(0),
Chris@110 56 m_cq(0),
Chris@316 57 m_mode(defaultMode),
Chris@166 58 m_fineTuning(false),
Chris@178 59 m_instrument(0),
Chris@313 60 m_colsPerSec(50),
Chris@313 61 m_haveStartTime(false)
Chris@31 62 {
Chris@31 63 }
Chris@31 64
Chris@31 65 Silvet::~Silvet()
Chris@31 66 {
Chris@31 67 delete m_resampler;
Chris@246 68 delete m_flattener;
Chris@31 69 delete m_cq;
Chris@41 70 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
Chris@41 71 delete m_postFilter[i];
Chris@41 72 }
Chris@31 73 }
Chris@31 74
Chris@31 75 string
Chris@31 76 Silvet::getIdentifier() const
Chris@31 77 {
Chris@31 78 return "silvet";
Chris@31 79 }
Chris@31 80
Chris@31 81 string
Chris@31 82 Silvet::getName() const
Chris@31 83 {
Chris@31 84 return "Silvet Note Transcription";
Chris@31 85 }
Chris@31 86
Chris@31 87 string
Chris@31 88 Silvet::getDescription() const
Chris@31 89 {
Chris@191 90 return "Estimate the note onsets, pitches, and durations that make up a music recording.";
Chris@31 91 }
Chris@31 92
Chris@31 93 string
Chris@31 94 Silvet::getMaker() const
Chris@31 95 {
Chris@191 96 return "Queen Mary, University of London";
Chris@31 97 }
Chris@31 98
Chris@31 99 int
Chris@31 100 Silvet::getPluginVersion() const
Chris@31 101 {
Chris@309 102 return 3;
Chris@31 103 }
Chris@31 104
Chris@31 105 string
Chris@31 106 Silvet::getCopyright() const
Chris@31 107 {
Chris@191 108 return "Method by Emmanouil Benetos and Simon Dixon; plugin by Chris Cannam and Emmanouil Benetos. GPL licence.";
Chris@31 109 }
Chris@31 110
Chris@31 111 Silvet::InputDomain
Chris@31 112 Silvet::getInputDomain() const
Chris@31 113 {
Chris@31 114 return TimeDomain;
Chris@31 115 }
Chris@31 116
Chris@31 117 size_t
Chris@31 118 Silvet::getPreferredBlockSize() const
Chris@31 119 {
Chris@31 120 return 0;
Chris@31 121 }
Chris@31 122
Chris@31 123 size_t
Chris@31 124 Silvet::getPreferredStepSize() const
Chris@31 125 {
Chris@31 126 return 0;
Chris@31 127 }
Chris@31 128
Chris@31 129 size_t
Chris@31 130 Silvet::getMinChannelCount() const
Chris@31 131 {
Chris@31 132 return 1;
Chris@31 133 }
Chris@31 134
Chris@31 135 size_t
Chris@31 136 Silvet::getMaxChannelCount() const
Chris@31 137 {
Chris@31 138 return 1;
Chris@31 139 }
Chris@31 140
Chris@31 141 Silvet::ParameterList
Chris@31 142 Silvet::getParameterDescriptors() const
Chris@31 143 {
Chris@31 144 ParameterList list;
Chris@110 145
Chris@110 146 ParameterDescriptor desc;
Chris@110 147 desc.identifier = "mode";
Chris@110 148 desc.name = "Processing mode";
Chris@110 149 desc.unit = "";
Chris@297 150 desc.description = "Sets the tradeoff of processing speed against transcription quality. Draft mode is tuned in favour of overall speed; Live mode is tuned in favour of lower latency; while Intensive mode (the default) will almost always produce the best results.";
Chris@110 151 desc.minValue = 0;
Chris@297 152 desc.maxValue = 2;
Chris@316 153 desc.defaultValue = int(defaultMode);
Chris@110 154 desc.isQuantized = true;
Chris@110 155 desc.quantizeStep = 1;
Chris@166 156 desc.valueNames.push_back("Draft (faster)");
Chris@165 157 desc.valueNames.push_back("Intensive (higher quality)");
Chris@297 158 desc.valueNames.push_back("Live (lower latency)");
Chris@161 159 list.push_back(desc);
Chris@161 160
Chris@176 161 desc.identifier = "instrument";
Chris@176 162 desc.name = "Instrument";
Chris@161 163 desc.unit = "";
Chris@271 164 desc.description = "The instrument or instruments known to be present in the recording. This affects the set of instrument templates used, as well as the expected level of polyphony in the output. Using a more limited set of instruments than the default will also make the plugin run faster.\nNote that this plugin cannot isolate instruments: you can't use this setting to request notes from only one instrument in a recording with several. Instead, use this as a hint to the plugin about which instruments are actually present.";
Chris@161 165 desc.minValue = 0;
Chris@162 166 desc.maxValue = m_instruments.size()-1;
Chris@162 167 desc.defaultValue = 0;
Chris@161 168 desc.isQuantized = true;
Chris@161 169 desc.quantizeStep = 1;
Chris@161 170 desc.valueNames.clear();
Chris@162 171 for (int i = 0; i < int(m_instruments.size()); ++i) {
Chris@162 172 desc.valueNames.push_back(m_instruments[i].name);
Chris@162 173 }
Chris@166 174 list.push_back(desc);
Chris@161 175
Chris@166 176 desc.identifier = "finetune";
Chris@166 177 desc.name = "Return fine pitch estimates";
Chris@166 178 desc.unit = "";
Chris@271 179 desc.description = "Return pitch estimates at finer than semitone resolution. This works only in Intensive mode. Notes that appear to drift in pitch will be split up into shorter notes with individually finer pitches.";
Chris@166 180 desc.minValue = 0;
Chris@166 181 desc.maxValue = 1;
Chris@166 182 desc.defaultValue = 0;
Chris@166 183 desc.isQuantized = true;
Chris@166 184 desc.quantizeStep = 1;
Chris@166 185 desc.valueNames.clear();
Chris@110 186 list.push_back(desc);
Chris@110 187
Chris@31 188 return list;
Chris@31 189 }
Chris@31 190
Chris@31 191 float
Chris@31 192 Silvet::getParameter(string identifier) const
Chris@31 193 {
Chris@110 194 if (identifier == "mode") {
Chris@297 195 return (float)(int)m_mode;
Chris@166 196 } else if (identifier == "finetune") {
Chris@166 197 return m_fineTuning ? 1.f : 0.f;
Chris@176 198 } else if (identifier == "instrument") {
Chris@162 199 return m_instrument;
Chris@110 200 }
Chris@31 201 return 0;
Chris@31 202 }
Chris@31 203
Chris@31 204 void
Chris@31 205 Silvet::setParameter(string identifier, float value)
Chris@31 206 {
Chris@110 207 if (identifier == "mode") {
Chris@297 208 m_mode = (ProcessingMode)(int)(value + 0.5);
Chris@166 209 } else if (identifier == "finetune") {
Chris@166 210 m_fineTuning = (value > 0.5);
Chris@176 211 } else if (identifier == "instrument") {
Chris@162 212 m_instrument = lrintf(value);
Chris@110 213 }
Chris@31 214 }
Chris@31 215
Chris@31 216 Silvet::ProgramList
Chris@31 217 Silvet::getPrograms() const
Chris@31 218 {
Chris@31 219 ProgramList list;
Chris@31 220 return list;
Chris@31 221 }
Chris@31 222
Chris@31 223 string
Chris@31 224 Silvet::getCurrentProgram() const
Chris@31 225 {
Chris@31 226 return "";
Chris@31 227 }
Chris@31 228
Chris@31 229 void
Chris@31 230 Silvet::selectProgram(string name)
Chris@31 231 {
Chris@31 232 }
Chris@31 233
Chris@31 234 Silvet::OutputList
Chris@31 235 Silvet::getOutputDescriptors() const
Chris@31 236 {
Chris@31 237 OutputList list;
Chris@31 238
Chris@31 239 OutputDescriptor d;
Chris@51 240 d.identifier = "notes";
Chris@51 241 d.name = "Note transcription";
Chris@271 242 d.description = "Overall note transcription. Each note has time, duration, estimated pitch, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture.";
Chris@41 243 d.unit = "Hz";
Chris@31 244 d.hasFixedBinCount = true;
Chris@31 245 d.binCount = 2;
Chris@41 246 d.binNames.push_back("Frequency");
Chris@31 247 d.binNames.push_back("Velocity");
Chris@31 248 d.hasKnownExtents = false;
Chris@31 249 d.isQuantized = false;
Chris@31 250 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@246 251 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
Chris@31 252 d.hasDuration = true;
Chris@32 253 m_notesOutputNo = list.size();
Chris@32 254 list.push_back(d);
Chris@32 255
Chris@178 256 d.identifier = "timefreq";
Chris@178 257 d.name = "Time-frequency distribution";
Chris@271 258 d.description = "Filtered constant-Q time-frequency distribution as used as input to the expectation-maximisation algorithm.";
Chris@178 259 d.unit = "";
Chris@178 260 d.hasFixedBinCount = true;
Chris@298 261 d.binCount = getPack(0).templateHeight;
Chris@178 262 d.binNames.clear();
Chris@178 263 if (m_cq) {
Chris@294 264 char name[50];
Chris@298 265 for (int i = 0; i < getPack(0).templateHeight; ++i) {
Chris@178 266 // We have a 600-bin (10 oct 60-bin CQ) of which the
Chris@178 267 // lowest-frequency 55 bins have been dropped, for a
Chris@178 268 // 545-bin template. The native CQ bins go high->low
Chris@178 269 // frequency though, so these are still the first 545 bins
Chris@178 270 // as reported by getBinFrequency, though in reverse order
Chris@178 271 float freq = m_cq->getBinFrequency
Chris@298 272 (getPack(0).templateHeight - i - 1);
Chris@178 273 sprintf(name, "%.1f Hz", freq);
Chris@178 274 d.binNames.push_back(name);
Chris@178 275 }
Chris@178 276 }
Chris@178 277 d.hasKnownExtents = false;
Chris@178 278 d.isQuantized = false;
Chris@178 279 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@178 280 d.sampleRate = m_colsPerSec;
Chris@178 281 d.hasDuration = false;
Chris@178 282 m_fcqOutputNo = list.size();
Chris@178 283 list.push_back(d);
Chris@178 284
Chris@294 285 d.identifier = "pitchactivation";
Chris@294 286 d.name = "Pitch activation distribution";
Chris@294 287 d.description = "Pitch activation distribution resulting from expectation-maximisation algorithm, prior to note extraction.";
Chris@294 288 d.unit = "";
Chris@294 289 d.hasFixedBinCount = true;
Chris@298 290 d.binCount = getPack(0).templateNoteCount;
Chris@294 291 d.binNames.clear();
Chris@294 292 if (m_cq) {
Chris@298 293 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
Chris@294 294 d.binNames.push_back(noteName(i, 0, 1));
Chris@294 295 }
Chris@294 296 }
Chris@294 297 d.hasKnownExtents = false;
Chris@294 298 d.isQuantized = false;
Chris@294 299 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@294 300 d.sampleRate = m_colsPerSec;
Chris@294 301 d.hasDuration = false;
Chris@294 302 m_pitchOutputNo = list.size();
Chris@294 303 list.push_back(d);
Chris@294 304
Chris@309 305 d.identifier = "chroma";
Chris@309 306 d.name = "Pitch chroma distribution";
Chris@309 307 d.description = "Pitch chroma distribution formed by wrapping the un-thresholded pitch activation distribution into a single octave of semitone bins.";
Chris@309 308 d.unit = "";
Chris@309 309 d.hasFixedBinCount = true;
Chris@309 310 d.binCount = 12;
Chris@309 311 d.binNames.clear();
Chris@309 312 if (m_cq) {
Chris@309 313 for (int i = 0; i < 12; ++i) {
Chris@309 314 d.binNames.push_back(chromaName(i));
Chris@309 315 }
Chris@309 316 }
Chris@309 317 d.hasKnownExtents = false;
Chris@309 318 d.isQuantized = false;
Chris@309 319 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@309 320 d.sampleRate = m_colsPerSec;
Chris@309 321 d.hasDuration = false;
Chris@309 322 m_chromaOutputNo = list.size();
Chris@309 323 list.push_back(d);
Chris@309 324
Chris@302 325 d.identifier = "templates";
Chris@302 326 d.name = "Templates";
Chris@302 327 d.description = "Constant-Q spectral templates for the selected instrument pack.";
Chris@302 328 d.unit = "";
Chris@302 329 d.hasFixedBinCount = true;
Chris@302 330 d.binCount = getPack(0).templateHeight;
Chris@302 331 d.binNames.clear();
Chris@302 332 if (m_cq) {
Chris@302 333 char name[50];
Chris@302 334 for (int i = 0; i < getPack(0).templateHeight; ++i) {
Chris@302 335 // We have a 600-bin (10 oct 60-bin CQ) of which the
Chris@302 336 // lowest-frequency 55 bins have been dropped, for a
Chris@302 337 // 545-bin template. The native CQ bins go high->low
Chris@302 338 // frequency though, so these are still the first 545 bins
Chris@302 339 // as reported by getBinFrequency, though in reverse order
Chris@302 340 float freq = m_cq->getBinFrequency
Chris@302 341 (getPack(0).templateHeight - i - 1);
Chris@302 342 sprintf(name, "%.1f Hz", freq);
Chris@302 343 d.binNames.push_back(name);
Chris@302 344 }
Chris@302 345 }
Chris@302 346 d.hasKnownExtents = false;
Chris@302 347 d.isQuantized = false;
Chris@302 348 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@302 349 d.sampleRate = m_colsPerSec;
Chris@302 350 d.hasDuration = false;
Chris@302 351 m_templateOutputNo = list.size();
Chris@302 352 list.push_back(d);
Chris@302 353
Chris@31 354 return list;
Chris@31 355 }
Chris@31 356
Chris@38 357 std::string
Chris@309 358 Silvet::chromaName(int pitch) const
Chris@38 359 {
Chris@38 360 static const char *names[] = {
Chris@38 361 "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"
Chris@38 362 };
Chris@38 363
Chris@309 364 return names[pitch];
Chris@309 365 }
Chris@309 366
Chris@309 367 std::string
Chris@309 368 Silvet::noteName(int note, int shift, int shiftCount) const
Chris@309 369 {
Chris@309 370 string n = chromaName(note % 12);
Chris@38 371
Chris@175 372 int oct = (note + 9) / 12;
Chris@38 373
Chris@175 374 char buf[30];
Chris@175 375
Chris@175 376 float pshift = 0.f;
Chris@175 377 if (shiftCount > 1) {
Chris@175 378 // see noteFrequency below
Chris@175 379 pshift =
Chris@175 380 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
Chris@175 381 }
Chris@175 382
Chris@175 383 if (pshift > 0.f) {
Chris@309 384 sprintf(buf, "%s%d+%dc", n.c_str(), oct, int(round(pshift * 100)));
Chris@175 385 } else if (pshift < 0.f) {
Chris@309 386 sprintf(buf, "%s%d-%dc", n.c_str(), oct, int(round((-pshift) * 100)));
Chris@175 387 } else {
Chris@309 388 sprintf(buf, "%s%d", n.c_str(), oct);
Chris@175 389 }
Chris@38 390
Chris@38 391 return buf;
Chris@38 392 }
Chris@38 393
Chris@41 394 float
Chris@168 395 Silvet::noteFrequency(int note, int shift, int shiftCount) const
Chris@41 396 {
Chris@169 397 // Convert shift number to a pitch shift. The given shift number
Chris@169 398 // is an offset into the template array, which starts with some
Chris@169 399 // zeros, followed by the template, then some trailing zeros.
Chris@169 400 //
Chris@169 401 // Example: if we have templateMaxShift == 2 and thus shiftCount
Chris@169 402 // == 5, then the number will be in the range 0-4 and the template
Chris@169 403 // will have 2 zeros at either end. Thus number 2 represents the
Chris@169 404 // template "as recorded", for a pitch shift of 0; smaller indices
Chris@169 405 // represent moving the template *up* in pitch (by introducing
Chris@169 406 // zeros at the start, which is the low-frequency end), for a
Chris@169 407 // positive pitch shift; and higher values represent moving it
Chris@169 408 // down in pitch, for a negative pitch shift.
Chris@169 409
Chris@175 410 float pshift = 0.f;
Chris@175 411 if (shiftCount > 1) {
Chris@175 412 pshift =
Chris@175 413 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
Chris@175 414 }
Chris@169 415
Chris@301 416 float freq = float(27.5 * pow(2.0, (note + pshift) / 12.0));
Chris@301 417
Chris@303 418 // cerr << "note = " << note << ", shift = " << shift << ", shiftCount = "
Chris@303 419 // << shiftCount << ", obtained freq = " << freq << endl;
Chris@301 420
Chris@301 421 return freq;
Chris@41 422 }
Chris@41 423
Chris@31 424 bool
Chris@31 425 Silvet::initialise(size_t channels, size_t stepSize, size_t blockSize)
Chris@31 426 {
Chris@272 427 if (m_inputSampleRate < minInputSampleRate ||
Chris@272 428 m_inputSampleRate > maxInputSampleRate) {
Chris@272 429 cerr << "Silvet::initialise: Unsupported input sample rate "
Chris@272 430 << m_inputSampleRate << " (supported min " << minInputSampleRate
Chris@272 431 << ", max " << maxInputSampleRate << ")" << endl;
Chris@272 432 return false;
Chris@272 433 }
Chris@272 434
Chris@31 435 if (channels < getMinChannelCount() ||
Chris@272 436 channels > getMaxChannelCount()) {
Chris@272 437 cerr << "Silvet::initialise: Unsupported channel count " << channels
Chris@272 438 << " (supported min " << getMinChannelCount() << ", max "
Chris@272 439 << getMaxChannelCount() << ")" << endl;
Chris@272 440 return false;
Chris@272 441 }
Chris@31 442
Chris@31 443 if (stepSize != blockSize) {
Chris@31 444 cerr << "Silvet::initialise: Step size must be the same as block size ("
Chris@31 445 << stepSize << " != " << blockSize << ")" << endl;
Chris@31 446 return false;
Chris@31 447 }
Chris@31 448
Chris@31 449 m_blockSize = blockSize;
Chris@31 450
Chris@31 451 reset();
Chris@31 452
Chris@31 453 return true;
Chris@31 454 }
Chris@31 455
Chris@31 456 void
Chris@31 457 Silvet::reset()
Chris@31 458 {
Chris@31 459 delete m_resampler;
Chris@246 460 delete m_flattener;
Chris@31 461 delete m_cq;
Chris@31 462
Chris@31 463 if (m_inputSampleRate != processingSampleRate) {
Chris@31 464 m_resampler = new Resampler(m_inputSampleRate, processingSampleRate);
Chris@31 465 } else {
Chris@31 466 m_resampler = 0;
Chris@31 467 }
Chris@31 468
Chris@246 469 m_flattener = new FlattenDynamics(m_inputSampleRate); // before resampling
Chris@246 470 m_flattener->reset();
Chris@246 471
Chris@301 472 // this happens to be processingSampleRate / 3, and is the top
Chris@301 473 // freq used for the EM templates:
Chris@301 474 double maxFreq = 14700;
Chris@301 475
Chris@301 476 if (m_mode == LiveMode) {
Chris@301 477 // We only have 12 bpo rather than 60, so we need the top bin
Chris@301 478 // to be the middle one of the top 5, i.e. 2/5 of a semitone
Chris@301 479 // lower than 14700
Chris@301 480 maxFreq *= powf(2.0, -1.0 / 30.0);
Chris@301 481 }
Chris@301 482
Chris@173 483 double minFreq = 27.5;
Chris@173 484
Chris@297 485 if (m_mode != HighQualityMode) {
Chris@173 486 // We don't actually return any notes from the bottom octave,
Chris@173 487 // so we can just pad with zeros
Chris@173 488 minFreq *= 2;
Chris@173 489 }
Chris@173 490
Chris@298 491 int bpo = 12 *
Chris@298 492 (m_mode == LiveMode ? binsPerSemitoneLive : binsPerSemitoneNormal);
Chris@301 493
Chris@154 494 CQParameters params(processingSampleRate,
Chris@173 495 minFreq,
Chris@303 496 maxFreq,
Chris@298 497 bpo);
Chris@154 498
Chris@316 499 // For params.q, the MIREX code uses 0.8, but it seems that with
Chris@316 500 // atomHopFactor of 0.3, using q == 0.9 or lower drops the FFT
Chris@316 501 // size to 512 from 1024 and alters some other processing
Chris@316 502 // parameters, making everything much, much slower. Could be a
Chris@316 503 // flaw in the CQ parameter calculations, must check. For
Chris@316 504 // atomHopFactor == 1, q == 0.8 is fine
Chris@316 505 params.q = (m_mode == HighQualityMode ? 0.95 : 0.8);
Chris@316 506 params.atomHopFactor = (m_mode == HighQualityMode ? 0.3 : 1.0);
Chris@154 507 params.threshold = 0.0005;
Chris@172 508 params.window = CQParameters::Hann;
Chris@154 509
Chris@154 510 m_cq = new CQSpectrogram(params, CQSpectrogram::InterpolateLinear);
Chris@31 511
Chris@303 512 // cerr << "CQ bins = " << m_cq->getTotalBins() << endl;
Chris@303 513 // cerr << "CQ min freq = " << m_cq->getMinFrequency() << " (and for confirmation, freq of bin 0 = " << m_cq->getBinFrequency(0) << ")" << endl;
Chris@297 514
Chris@297 515 m_colsPerSec = (m_mode == DraftMode ? 25 : 50);
Chris@165 516
Chris@41 517 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
Chris@41 518 delete m_postFilter[i];
Chris@41 519 }
Chris@41 520 m_postFilter.clear();
Chris@303 521 int postFilterLength = 3;
Chris@298 522 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
Chris@303 523 m_postFilter.push_back(new MedianFilter<double>(postFilterLength));
Chris@41 524 }
Chris@41 525 m_pianoRoll.clear();
Chris@246 526 m_inputGains.clear();
Chris@32 527 m_columnCount = 0;
Chris@272 528 m_resampledCount = 0;
Chris@40 529 m_startTime = RealTime::zeroTime;
Chris@313 530 m_haveStartTime = false;
Chris@31 531 }
Chris@31 532
Chris@31 533 Silvet::FeatureSet
Chris@31 534 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
Chris@31 535 {
Chris@302 536 FeatureSet fs;
Chris@302 537
Chris@313 538 if (!m_haveStartTime) {
Chris@314 539
Chris@40 540 m_startTime = timestamp;
Chris@313 541 m_haveStartTime = true;
Chris@314 542
Chris@302 543 insertTemplateFeatures(fs);
Chris@40 544 }
Chris@246 545
Chris@246 546 vector<float> flattened(m_blockSize);
Chris@246 547 float gain = 1.f;
Chris@246 548 m_flattener->connectInputPort
Chris@246 549 (FlattenDynamics::AudioInputPort, inputBuffers[0]);
Chris@246 550 m_flattener->connectOutputPort
Chris@246 551 (FlattenDynamics::AudioOutputPort, &flattened[0]);
Chris@246 552 m_flattener->connectOutputPort
Chris@246 553 (FlattenDynamics::GainOutputPort, &gain);
Chris@246 554 m_flattener->process(m_blockSize);
Chris@246 555
Chris@252 556 m_inputGains[timestamp] = gain;
Chris@40 557
Chris@31 558 vector<double> data;
Chris@40 559 for (int i = 0; i < m_blockSize; ++i) {
Chris@246 560 double d = flattened[i];
Chris@235 561 data.push_back(d);
Chris@40 562 }
Chris@31 563
Chris@31 564 if (m_resampler) {
Chris@272 565
Chris@31 566 data = m_resampler->process(data.data(), data.size());
Chris@272 567
Chris@272 568 int hadCount = m_resampledCount;
Chris@272 569 m_resampledCount += data.size();
Chris@272 570
Chris@272 571 int resamplerLatency = m_resampler->getLatency();
Chris@272 572
Chris@272 573 if (hadCount < resamplerLatency) {
Chris@272 574 int stillToDrop = resamplerLatency - hadCount;
Chris@272 575 if (stillToDrop >= int(data.size())) {
Chris@302 576 return fs;
Chris@272 577 } else {
Chris@272 578 data = vector<double>(data.begin() + stillToDrop, data.end());
Chris@272 579 }
Chris@272 580 }
Chris@31 581 }
Chris@272 582
Chris@32 583 Grid cqout = m_cq->process(data);
Chris@302 584 transcribe(cqout, fs);
Chris@51 585 return fs;
Chris@34 586 }
Chris@34 587
Chris@34 588 Silvet::FeatureSet
Chris@34 589 Silvet::getRemainingFeatures()
Chris@34 590 {
Chris@145 591 Grid cqout = m_cq->getRemainingOutput();
Chris@302 592 FeatureSet fs;
Chris@302 593 if (m_columnCount == 0) {
Chris@302 594 // process() was never called, but we still want these
Chris@302 595 insertTemplateFeatures(fs);
Chris@302 596 } else {
Chris@302 597 transcribe(cqout, fs);
Chris@302 598 }
Chris@51 599 return fs;
Chris@34 600 }
Chris@34 601
Chris@302 602 void
Chris@302 603 Silvet::insertTemplateFeatures(FeatureSet &fs)
Chris@302 604 {
Chris@302 605 const InstrumentPack &pack = getPack(m_instrument);
Chris@302 606 for (int i = 0; i < int(pack.templates.size()) * pack.templateNoteCount; ++i) {
Chris@302 607 RealTime timestamp = RealTime::fromSeconds(double(i) / m_colsPerSec);
Chris@302 608 Feature f;
Chris@302 609 char buffer[50];
Chris@302 610 sprintf(buffer, "Note %d", i + 1);
Chris@302 611 f.label = buffer;
Chris@302 612 f.hasTimestamp = true;
Chris@302 613 f.timestamp = timestamp;
Chris@302 614 f.values = pack.templates[i / pack.templateNoteCount]
Chris@302 615 .data[i % pack.templateNoteCount];
Chris@302 616 fs[m_templateOutputNo].push_back(f);
Chris@302 617 }
Chris@302 618 }
Chris@302 619
Chris@302 620 void
Chris@302 621 Silvet::transcribe(const Grid &cqout, Silvet::FeatureSet &fs)
Chris@34 622 {
Chris@32 623 Grid filtered = preProcess(cqout);
Chris@31 624
Chris@302 625 if (filtered.empty()) return;
Chris@170 626
Chris@298 627 const InstrumentPack &pack(getPack(m_instrument));
Chris@104 628
Chris@178 629 for (int i = 0; i < (int)filtered.size(); ++i) {
Chris@178 630 Feature f;
Chris@178 631 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@178 632 f.values.push_back(float(filtered[i][j]));
Chris@178 633 }
Chris@178 634 fs[m_fcqOutputNo].push_back(f);
Chris@178 635 }
Chris@178 636
Chris@34 637 int width = filtered.size();
Chris@34 638
Chris@311 639 Grid localPitches(width);
Chris@170 640
Chris@297 641 bool wantShifts = (m_mode == HighQualityMode) && m_fineTuning;
Chris@170 642 int shiftCount = 1;
Chris@170 643 if (wantShifts) {
Chris@170 644 shiftCount = pack.templateMaxShift * 2 + 1;
Chris@170 645 }
Chris@170 646
Chris@170 647 vector<vector<int> > localBestShifts;
Chris@170 648 if (wantShifts) {
Chris@311 649 localBestShifts = vector<vector<int> >(width);
Chris@170 650 }
Chris@170 651
Chris@312 652 #ifndef MAX_EM_THREADS
Chris@312 653 #define MAX_EM_THREADS 8
Chris@312 654 #endif
Chris@312 655
Chris@312 656 #if (defined(MAX_EM_THREADS) && (MAX_EM_THREADS > 1))
Chris@312 657 for (int i = 0; i < width; ) {
Chris@312 658 typedef future<pair<vector<double>, vector<int>>> EMFuture;
Chris@312 659 vector<EMFuture> results;
Chris@312 660 for (int j = 0; j < MAX_EM_THREADS && i + j < width; ++j) {
Chris@312 661 results.push_back
Chris@312 662 (async(std::launch::async,
Chris@312 663 [&](int index) {
Chris@312 664 return applyEM(pack, filtered.at(index), wantShifts);
Chris@312 665 }, i + j));
Chris@312 666 }
Chris@312 667 for (int j = 0; j < MAX_EM_THREADS && i + j < width; ++j) {
Chris@312 668 auto out = results[j].get();
Chris@312 669 localPitches[i+j] = out.first;
Chris@312 670 if (wantShifts) localBestShifts[i+j] = out.second;
Chris@312 671 }
Chris@312 672 i += MAX_EM_THREADS;
Chris@312 673 }
Chris@312 674 #else
Chris@123 675 for (int i = 0; i < width; ++i) {
Chris@311 676 auto out = applyEM(pack, filtered.at(i), wantShifts);
Chris@311 677 localPitches[i] = out.first;
Chris@311 678 if (wantShifts) localBestShifts[i] = out.second;
Chris@123 679 }
Chris@312 680 #endif
Chris@305 681
Chris@166 682 for (int i = 0; i < width; ++i) {
Chris@37 683
Chris@309 684 // This returns a filtered column, and pushes the
Chris@309 685 // up-to-max-polyphony activation column to m_pianoRoll
Chris@294 686 vector<double> filtered = postProcess
Chris@294 687 (localPitches[i], localBestShifts[i], wantShifts);
Chris@294 688
Chris@309 689 RealTime timestamp = getColumnTimestamp(m_pianoRoll.size() - 1);
Chris@309 690 float inputGain = getInputGainAt(timestamp);
Chris@309 691
Chris@294 692 Feature f;
Chris@294 693 for (int j = 0; j < (int)filtered.size(); ++j) {
Chris@309 694 float v = filtered[j];
Chris@294 695 if (v < pack.levelThreshold) v = 0.f;
Chris@309 696 f.values.push_back(v / inputGain);
Chris@294 697 }
Chris@294 698 fs[m_pitchOutputNo].push_back(f);
Chris@309 699
Chris@309 700 f.values.clear();
Chris@309 701 f.values.resize(12);
Chris@309 702 for (int j = 0; j < (int)filtered.size(); ++j) {
Chris@309 703 f.values[j % 12] += filtered[j] / inputGain;
Chris@309 704 }
Chris@309 705 fs[m_chromaOutputNo].push_back(f);
Chris@166 706
Chris@168 707 FeatureList noteFeatures = noteTrack(shiftCount);
Chris@38 708
Chris@123 709 for (FeatureList::const_iterator fi = noteFeatures.begin();
Chris@123 710 fi != noteFeatures.end(); ++fi) {
Chris@123 711 fs[m_notesOutputNo].push_back(*fi);
Chris@40 712 }
Chris@34 713 }
Chris@31 714 }
Chris@31 715
Chris@311 716 pair<vector<double>, vector<int> >
Chris@311 717 Silvet::applyEM(const InstrumentPack &pack,
Chris@311 718 const vector<double> &column,
Chris@311 719 bool wantShifts)
Chris@311 720 {
Chris@311 721 double columnThreshold = 1e-5;
Chris@311 722
Chris@314 723 if (m_mode == LiveMode) {
Chris@314 724 columnThreshold /= 20;
Chris@314 725 }
Chris@314 726
Chris@311 727 vector<double> pitches(pack.templateNoteCount, 0.0);
Chris@311 728 vector<int> bestShifts;
Chris@311 729
Chris@311 730 double sum = 0.0;
Chris@311 731 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@311 732 sum += column.at(j);
Chris@311 733 }
Chris@311 734 if (sum < columnThreshold) return { pitches, bestShifts };
Chris@311 735
Chris@314 736 EM em(&pack, m_mode == HighQualityMode);
Chris@311 737
Chris@311 738 em.setPitchSparsity(pack.pitchSparsity);
Chris@311 739 em.setSourceSparsity(pack.sourceSparsity);
Chris@311 740
Chris@314 741 int iterations = (m_mode == HighQualityMode ? 20 : 10);
Chris@311 742
Chris@311 743 for (int j = 0; j < iterations; ++j) {
Chris@311 744 em.iterate(column.data());
Chris@311 745 }
Chris@311 746
Chris@311 747 const float *pitchDist = em.getPitchDistribution();
Chris@311 748 const float *const *shiftDist = em.getShifts();
Chris@311 749
Chris@311 750 int shiftCount = 1;
Chris@311 751 if (wantShifts) {
Chris@311 752 shiftCount = pack.templateMaxShift * 2 + 1;
Chris@311 753 }
Chris@311 754
Chris@311 755 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@311 756
Chris@311 757 pitches[j] = pitchDist[j] * sum;
Chris@311 758
Chris@311 759 int bestShift = 0;
Chris@311 760 float bestShiftValue = 0.0;
Chris@311 761 if (wantShifts) {
Chris@311 762 for (int k = 0; k < shiftCount; ++k) {
Chris@311 763 float value = shiftDist[k][j];
Chris@311 764 if (k == 0 || value > bestShiftValue) {
Chris@311 765 bestShiftValue = value;
Chris@311 766 bestShift = k;
Chris@311 767 }
Chris@311 768 }
Chris@311 769 bestShifts.push_back(bestShift);
Chris@311 770 }
Chris@311 771 }
Chris@311 772
Chris@311 773 return { pitches, bestShifts };
Chris@311 774 }
Chris@311 775
Chris@32 776 Silvet::Grid
Chris@32 777 Silvet::preProcess(const Grid &in)
Chris@32 778 {
Chris@32 779 int width = in.size();
Chris@32 780
Chris@165 781 int spacing = processingSampleRate / m_colsPerSec;
Chris@32 782
Chris@165 783 // need to be careful that col spacing is an integer number of samples!
Chris@165 784 assert(spacing * m_colsPerSec == processingSampleRate);
Chris@32 785
Chris@32 786 Grid out;
Chris@32 787
Chris@58 788 // We count the CQ latency in terms of processing hops, but
Chris@58 789 // actually it probably isn't an exact number of hops so this
Chris@58 790 // isn't quite accurate. But the small constant offset is
Chris@165 791 // practically irrelevant compared to the jitter from the frame
Chris@165 792 // size we reduce to in a moment
Chris@33 793 int latentColumns = m_cq->getLatency() / m_cq->getColumnHop();
Chris@33 794
Chris@298 795 const InstrumentPack &pack(getPack(m_instrument));
Chris@176 796
Chris@32 797 for (int i = 0; i < width; ++i) {
Chris@32 798
Chris@33 799 if (m_columnCount < latentColumns) {
Chris@33 800 ++m_columnCount;
Chris@33 801 continue;
Chris@33 802 }
Chris@33 803
Chris@32 804 int prevSampleNo = (m_columnCount - 1) * m_cq->getColumnHop();
Chris@32 805 int sampleNo = m_columnCount * m_cq->getColumnHop();
Chris@32 806
Chris@32 807 bool select = (sampleNo / spacing != prevSampleNo / spacing);
Chris@32 808
Chris@32 809 if (select) {
Chris@32 810 vector<double> inCol = in[i];
Chris@176 811 vector<double> outCol(pack.templateHeight);
Chris@32 812
Chris@178 813 // In HQ mode, the CQ returns 600 bins and we ignore the
Chris@298 814 // lowest 55 of them (assuming binsPerSemitone == 5).
Chris@178 815 //
Chris@297 816 // In draft and live mode the CQ is an octave shorter,
Chris@300 817 // returning 540 bins or equivalent, so we instead pad
Chris@300 818 // them with an additional 5 or equivalent zeros.
Chris@178 819 //
Chris@178 820 // We also need to reverse the column as we go, since the
Chris@178 821 // raw CQ has the high frequencies first and we need it
Chris@178 822 // the other way around.
Chris@32 823
Chris@298 824 int bps = (m_mode == LiveMode ?
Chris@298 825 binsPerSemitoneLive : binsPerSemitoneNormal);
Chris@298 826
Chris@297 827 if (m_mode == HighQualityMode) {
Chris@178 828 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@298 829 int ix = inCol.size() - j - (11 * bps);
Chris@178 830 outCol[j] = inCol[ix];
Chris@178 831 }
Chris@178 832 } else {
Chris@298 833 for (int j = 0; j < bps; ++j) {
Chris@178 834 outCol[j] = 0.0;
Chris@178 835 }
Chris@298 836 for (int j = bps; j < pack.templateHeight; ++j) {
Chris@298 837 int ix = inCol.size() - j + (bps-1);
Chris@178 838 outCol[j] = inCol[ix];
Chris@178 839 }
Chris@46 840 }
Chris@32 841
Chris@46 842 vector<double> noiseLevel1 =
Chris@298 843 MedianFilter<double>::filter(8 * bps, outCol);
Chris@176 844 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@46 845 noiseLevel1[j] = std::min(outCol[j], noiseLevel1[j]);
Chris@46 846 }
Chris@32 847
Chris@46 848 vector<double> noiseLevel2 =
Chris@298 849 MedianFilter<double>::filter(8 * bps, noiseLevel1);
Chris@176 850 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@46 851 outCol[j] = std::max(outCol[j] - noiseLevel2[j], 0.0);
Chris@32 852 }
Chris@32 853
Chris@165 854 out.push_back(outCol);
Chris@32 855 }
Chris@32 856
Chris@32 857 ++m_columnCount;
Chris@32 858 }
Chris@32 859
Chris@32 860 return out;
Chris@32 861 }
Chris@32 862
Chris@294 863 vector<double>
Chris@170 864 Silvet::postProcess(const vector<double> &pitches,
Chris@170 865 const vector<int> &bestShifts,
Chris@170 866 bool wantShifts)
Chris@166 867 {
Chris@298 868 const InstrumentPack &pack(getPack(m_instrument));
Chris@176 869
Chris@41 870 vector<double> filtered;
Chris@41 871
Chris@176 872 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@170 873 m_postFilter[j]->push(pitches[j]);
Chris@41 874 filtered.push_back(m_postFilter[j]->get());
Chris@41 875 }
Chris@41 876
Chris@316 877 if (m_mode == LiveMode) {
Chris@316 878 // In live mode with only a 12-bpo CQ, we are very likely to
Chris@316 879 // get clusters of two or three high scores at a time for
Chris@316 880 // neighbouring semitones. Eliminate these by picking only the
Chris@316 881 // peaks. This means we can't recognise actual semitone chords
Chris@316 882 // if they ever appear, but it's not as if live mode is good
Chris@316 883 // enough for that to be a big deal anyway.
Chris@316 884 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@316 885 if (j > 0 && j + 1 < pack.templateNoteCount &&
Chris@316 886 filtered[j] >= filtered[j-1] &&
Chris@316 887 filtered[j] >= filtered[j+1]) {
Chris@316 888 } else {
Chris@316 889 filtered[j] = 0.0;
Chris@316 890 }
Chris@316 891 }
Chris@316 892 }
Chris@316 893
Chris@41 894 // Threshold for level and reduce number of candidate pitches
Chris@41 895
Chris@41 896 typedef std::multimap<double, int> ValueIndexMap;
Chris@41 897
Chris@41 898 ValueIndexMap strengths;
Chris@166 899
Chris@176 900 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@166 901 double strength = filtered[j];
Chris@183 902 if (strength < pack.levelThreshold) continue;
Chris@168 903 strengths.insert(ValueIndexMap::value_type(strength, j));
Chris@168 904 }
Chris@166 905
Chris@168 906 ValueIndexMap::const_iterator si = strengths.end();
Chris@167 907
Chris@168 908 map<int, double> active;
Chris@168 909 map<int, int> activeShifts;
Chris@168 910
Chris@183 911 while (int(active.size()) < pack.maxPolyphony && si != strengths.begin()) {
Chris@168 912
Chris@168 913 --si;
Chris@168 914
Chris@168 915 double strength = si->first;
Chris@168 916 int j = si->second;
Chris@168 917
Chris@168 918 active[j] = strength;
Chris@168 919
Chris@170 920 if (wantShifts) {
Chris@170 921 activeShifts[j] = bestShifts[j];
Chris@167 922 }
Chris@41 923 }
Chris@41 924
Chris@168 925 m_pianoRoll.push_back(active);
Chris@170 926
Chris@170 927 if (wantShifts) {
Chris@168 928 m_pianoRollShifts.push_back(activeShifts);
Chris@41 929 }
Chris@294 930
Chris@294 931 return filtered;
Chris@166 932 }
Chris@166 933
Chris@166 934 Vamp::Plugin::FeatureList
Chris@168 935 Silvet::noteTrack(int shiftCount)
Chris@166 936 {
Chris@41 937 // Minimum duration pruning, and conversion to notes. We can only
Chris@41 938 // report notes that have just ended (i.e. that are absent in the
Chris@168 939 // latest active set but present in the prior set in the piano
Chris@41 940 // roll) -- any notes that ended earlier will have been reported
Chris@41 941 // already, and if they haven't ended, we don't know their
Chris@41 942 // duration.
Chris@41 943
Chris@168 944 int width = m_pianoRoll.size() - 1;
Chris@168 945
Chris@168 946 const map<int, double> &active = m_pianoRoll[width];
Chris@41 947
Chris@165 948 double columnDuration = 1.0 / m_colsPerSec;
Chris@165 949
Chris@165 950 // only keep notes >= 100ms or thereabouts
Chris@165 951 int durationThreshold = floor(0.1 / columnDuration); // columns
Chris@165 952 if (durationThreshold < 1) durationThreshold = 1;
Chris@41 953
Chris@41 954 FeatureList noteFeatures;
Chris@41 955
Chris@41 956 if (width < durationThreshold + 1) {
Chris@41 957 return noteFeatures;
Chris@41 958 }
Chris@41 959
Chris@150 960 //!!! try: repeated note detection? (look for change in first derivative of the pitch matrix)
Chris@150 961
Chris@55 962 for (map<int, double>::const_iterator ni = m_pianoRoll[width-1].begin();
Chris@41 963 ni != m_pianoRoll[width-1].end(); ++ni) {
Chris@41 964
Chris@55 965 int note = ni->first;
Chris@41 966
Chris@41 967 if (active.find(note) != active.end()) {
Chris@41 968 // the note is still playing
Chris@41 969 continue;
Chris@41 970 }
Chris@41 971
Chris@41 972 // the note was playing but just ended
Chris@41 973 int end = width;
Chris@41 974 int start = end-1;
Chris@41 975
Chris@41 976 while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) {
Chris@41 977 --start;
Chris@41 978 }
Chris@41 979 ++start;
Chris@41 980
Chris@169 981 if ((end - start) < durationThreshold) {
Chris@41 982 continue;
Chris@41 983 }
Chris@41 984
Chris@169 985 emitNote(start, end, note, shiftCount, noteFeatures);
Chris@41 986 }
Chris@41 987
Chris@62 988 // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl;
Chris@41 989
Chris@41 990 return noteFeatures;
Chris@41 991 }
Chris@41 992
Chris@169 993 void
Chris@169 994 Silvet::emitNote(int start, int end, int note, int shiftCount,
Chris@169 995 FeatureList &noteFeatures)
Chris@169 996 {
Chris@169 997 int partStart = start;
Chris@169 998 int partShift = 0;
Chris@169 999 int partVelocity = 0;
Chris@169 1000
Chris@252 1001 int partThreshold = floor(0.05 * m_colsPerSec);
Chris@169 1002
Chris@169 1003 for (int i = start; i != end; ++i) {
Chris@169 1004
Chris@169 1005 double strength = m_pianoRoll[i][note];
Chris@169 1006
Chris@169 1007 int shift = 0;
Chris@169 1008
Chris@169 1009 if (shiftCount > 1) {
Chris@169 1010
Chris@169 1011 shift = m_pianoRollShifts[i][note];
Chris@169 1012
Chris@169 1013 if (i == partStart) {
Chris@169 1014 partShift = shift;
Chris@169 1015 }
Chris@169 1016
Chris@169 1017 if (i > partStart + partThreshold && shift != partShift) {
Chris@169 1018
Chris@169 1019 // cerr << "i = " << i << ", partStart = " << partStart << ", shift = " << shift << ", partShift = " << partShift << endl;
Chris@169 1020
Chris@169 1021 // pitch has changed, emit an intermediate note
Chris@252 1022 noteFeatures.push_back(makeNoteFeature(partStart,
Chris@252 1023 i,
Chris@252 1024 note,
Chris@252 1025 partShift,
Chris@252 1026 shiftCount,
Chris@252 1027 partVelocity));
Chris@169 1028 partStart = i;
Chris@169 1029 partShift = shift;
Chris@169 1030 partVelocity = 0;
Chris@169 1031 }
Chris@169 1032 }
Chris@169 1033
Chris@303 1034 int v;
Chris@303 1035 if (m_mode == LiveMode) {
Chris@316 1036 v = round(strength * 20);
Chris@303 1037 } else {
Chris@303 1038 v = round(strength * 2);
Chris@303 1039 }
Chris@169 1040 if (v > partVelocity) {
Chris@169 1041 partVelocity = v;
Chris@169 1042 }
Chris@169 1043 }
Chris@169 1044
Chris@169 1045 if (end >= partStart + partThreshold) {
Chris@252 1046 noteFeatures.push_back(makeNoteFeature(partStart,
Chris@252 1047 end,
Chris@252 1048 note,
Chris@252 1049 partShift,
Chris@252 1050 shiftCount,
Chris@252 1051 partVelocity));
Chris@169 1052 }
Chris@169 1053 }
Chris@252 1054
Chris@309 1055 RealTime
Chris@309 1056 Silvet::getColumnTimestamp(int column)
Chris@309 1057 {
Chris@309 1058 double columnDuration = 1.0 / m_colsPerSec;
Chris@309 1059 int postFilterLatency = int(m_postFilter[0]->getSize() / 2);
Chris@309 1060
Chris@309 1061 return m_startTime + RealTime::fromSeconds
Chris@309 1062 (columnDuration * (column - postFilterLatency) + 0.02);
Chris@309 1063 }
Chris@309 1064
Chris@252 1065 Silvet::Feature
Chris@252 1066 Silvet::makeNoteFeature(int start,
Chris@252 1067 int end,
Chris@252 1068 int note,
Chris@252 1069 int shift,
Chris@252 1070 int shiftCount,
Chris@252 1071 int velocity)
Chris@252 1072 {
Chris@252 1073 Feature f;
Chris@252 1074
Chris@252 1075 f.hasTimestamp = true;
Chris@309 1076 f.timestamp = getColumnTimestamp(start);
Chris@252 1077
Chris@252 1078 f.hasDuration = true;
Chris@309 1079 f.duration = getColumnTimestamp(end) - f.timestamp;
Chris@252 1080
Chris@252 1081 f.values.clear();
Chris@252 1082
Chris@252 1083 f.values.push_back
Chris@252 1084 (noteFrequency(note, shift, shiftCount));
Chris@252 1085
Chris@252 1086 float inputGain = getInputGainAt(f.timestamp);
Chris@252 1087 // cerr << "adjusting velocity from " << velocity << " to " << round(velocity/inputGain) << endl;
Chris@252 1088 velocity = round(velocity / inputGain);
Chris@252 1089 if (velocity > 127) velocity = 127;
Chris@252 1090 if (velocity < 1) velocity = 1;
Chris@252 1091 f.values.push_back(velocity);
Chris@252 1092
Chris@252 1093 f.label = noteName(note, shift, shiftCount);
Chris@252 1094
Chris@252 1095 return f;
Chris@252 1096 }
Chris@252 1097
Chris@252 1098 float
Chris@252 1099 Silvet::getInputGainAt(RealTime t)
Chris@252 1100 {
Chris@252 1101 map<RealTime, float>::const_iterator i = m_inputGains.lower_bound(t);
Chris@252 1102
Chris@252 1103 if (i == m_inputGains.end()) {
Chris@252 1104 if (i != m_inputGains.begin()) {
Chris@252 1105 --i;
Chris@252 1106 } else {
Chris@252 1107 return 1.f; // no data
Chris@252 1108 }
Chris@252 1109 }
Chris@252 1110
Chris@252 1111 // cerr << "gain at time " << t << " = " << i->second << endl;
Chris@252 1112
Chris@252 1113 return i->second;
Chris@252 1114 }
Chris@252 1115