annotate src/Silvet.cpp @ 317:92293058368a livemode

Some small speed improvements for live mode (+ don't use async for it when there's only one template in the EM process: the overhead isn't worth it)
author Chris Cannam
date Tue, 28 Apr 2015 13:55:32 +0100
parents f3e10617a60d
children c37da62ba4e5 8f5cfd7dbaa5
rev   line source
Chris@31 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@31 2
Chris@31 3 /*
Chris@31 4 Silvet
Chris@31 5
Chris@31 6 A Vamp plugin for note transcription.
Chris@31 7 Centre for Digital Music, Queen Mary University of London.
Chris@31 8
Chris@31 9 This program is free software; you can redistribute it and/or
Chris@31 10 modify it under the terms of the GNU General Public License as
Chris@31 11 published by the Free Software Foundation; either version 2 of the
Chris@31 12 License, or (at your option) any later version. See the file
Chris@31 13 COPYING included with this distribution for more information.
Chris@31 14 */
Chris@31 15
Chris@31 16 #include "Silvet.h"
Chris@34 17 #include "EM.h"
Chris@31 18
Chris@152 19 #include <cq/CQSpectrogram.h>
Chris@31 20
Chris@152 21 #include "MedianFilter.h"
Chris@152 22 #include "constant-q-cpp/src/dsp/Resampler.h"
Chris@246 23 #include "flattendynamics-ladspa.h"
Chris@298 24 #include "LiveInstruments.h"
Chris@31 25
Chris@31 26 #include <vector>
Chris@312 27 #include <future>
Chris@31 28
Chris@32 29 #include <cstdio>
Chris@32 30
Chris@31 31 using std::vector;
Chris@48 32 using std::cout;
Chris@31 33 using std::cerr;
Chris@31 34 using std::endl;
Chris@311 35 using std::pair;
Chris@312 36 using std::future;
Chris@312 37 using std::async;
Chris@40 38 using Vamp::RealTime;
Chris@31 39
Chris@31 40 static int processingSampleRate = 44100;
Chris@298 41
Chris@298 42 static int binsPerSemitoneLive = 1;
Chris@298 43 static int binsPerSemitoneNormal = 5;
Chris@170 44
Chris@272 45 static int minInputSampleRate = 100;
Chris@272 46 static int maxInputSampleRate = 192000;
Chris@272 47
Chris@316 48 static const Silvet::ProcessingMode defaultMode = Silvet::HighQualityMode;
Chris@316 49
Chris@31 50 Silvet::Silvet(float inputSampleRate) :
Chris@31 51 Plugin(inputSampleRate),
Chris@161 52 m_instruments(InstrumentPack::listInstrumentPacks()),
Chris@298 53 m_liveInstruments(LiveAdapter::adaptAll(m_instruments)),
Chris@31 54 m_resampler(0),
Chris@246 55 m_flattener(0),
Chris@110 56 m_cq(0),
Chris@316 57 m_mode(defaultMode),
Chris@166 58 m_fineTuning(false),
Chris@178 59 m_instrument(0),
Chris@313 60 m_colsPerSec(50),
Chris@313 61 m_haveStartTime(false)
Chris@31 62 {
Chris@31 63 }
Chris@31 64
Chris@31 65 Silvet::~Silvet()
Chris@31 66 {
Chris@31 67 delete m_resampler;
Chris@246 68 delete m_flattener;
Chris@31 69 delete m_cq;
Chris@41 70 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
Chris@41 71 delete m_postFilter[i];
Chris@41 72 }
Chris@31 73 }
Chris@31 74
Chris@31 75 string
Chris@31 76 Silvet::getIdentifier() const
Chris@31 77 {
Chris@31 78 return "silvet";
Chris@31 79 }
Chris@31 80
Chris@31 81 string
Chris@31 82 Silvet::getName() const
Chris@31 83 {
Chris@31 84 return "Silvet Note Transcription";
Chris@31 85 }
Chris@31 86
Chris@31 87 string
Chris@31 88 Silvet::getDescription() const
Chris@31 89 {
Chris@191 90 return "Estimate the note onsets, pitches, and durations that make up a music recording.";
Chris@31 91 }
Chris@31 92
Chris@31 93 string
Chris@31 94 Silvet::getMaker() const
Chris@31 95 {
Chris@191 96 return "Queen Mary, University of London";
Chris@31 97 }
Chris@31 98
Chris@31 99 int
Chris@31 100 Silvet::getPluginVersion() const
Chris@31 101 {
Chris@309 102 return 3;
Chris@31 103 }
Chris@31 104
Chris@31 105 string
Chris@31 106 Silvet::getCopyright() const
Chris@31 107 {
Chris@191 108 return "Method by Emmanouil Benetos and Simon Dixon; plugin by Chris Cannam and Emmanouil Benetos. GPL licence.";
Chris@31 109 }
Chris@31 110
Chris@31 111 Silvet::InputDomain
Chris@31 112 Silvet::getInputDomain() const
Chris@31 113 {
Chris@31 114 return TimeDomain;
Chris@31 115 }
Chris@31 116
Chris@31 117 size_t
Chris@31 118 Silvet::getPreferredBlockSize() const
Chris@31 119 {
Chris@31 120 return 0;
Chris@31 121 }
Chris@31 122
Chris@31 123 size_t
Chris@31 124 Silvet::getPreferredStepSize() const
Chris@31 125 {
Chris@31 126 return 0;
Chris@31 127 }
Chris@31 128
Chris@31 129 size_t
Chris@31 130 Silvet::getMinChannelCount() const
Chris@31 131 {
Chris@31 132 return 1;
Chris@31 133 }
Chris@31 134
Chris@31 135 size_t
Chris@31 136 Silvet::getMaxChannelCount() const
Chris@31 137 {
Chris@31 138 return 1;
Chris@31 139 }
Chris@31 140
Chris@31 141 Silvet::ParameterList
Chris@31 142 Silvet::getParameterDescriptors() const
Chris@31 143 {
Chris@31 144 ParameterList list;
Chris@110 145
Chris@110 146 ParameterDescriptor desc;
Chris@110 147 desc.identifier = "mode";
Chris@110 148 desc.name = "Processing mode";
Chris@110 149 desc.unit = "";
Chris@297 150 desc.description = "Sets the tradeoff of processing speed against transcription quality. Draft mode is tuned in favour of overall speed; Live mode is tuned in favour of lower latency; while Intensive mode (the default) will almost always produce the best results.";
Chris@110 151 desc.minValue = 0;
Chris@297 152 desc.maxValue = 2;
Chris@316 153 desc.defaultValue = int(defaultMode);
Chris@110 154 desc.isQuantized = true;
Chris@110 155 desc.quantizeStep = 1;
Chris@166 156 desc.valueNames.push_back("Draft (faster)");
Chris@165 157 desc.valueNames.push_back("Intensive (higher quality)");
Chris@297 158 desc.valueNames.push_back("Live (lower latency)");
Chris@161 159 list.push_back(desc);
Chris@161 160
Chris@176 161 desc.identifier = "instrument";
Chris@176 162 desc.name = "Instrument";
Chris@161 163 desc.unit = "";
Chris@271 164 desc.description = "The instrument or instruments known to be present in the recording. This affects the set of instrument templates used, as well as the expected level of polyphony in the output. Using a more limited set of instruments than the default will also make the plugin run faster.\nNote that this plugin cannot isolate instruments: you can't use this setting to request notes from only one instrument in a recording with several. Instead, use this as a hint to the plugin about which instruments are actually present.";
Chris@161 165 desc.minValue = 0;
Chris@162 166 desc.maxValue = m_instruments.size()-1;
Chris@162 167 desc.defaultValue = 0;
Chris@161 168 desc.isQuantized = true;
Chris@161 169 desc.quantizeStep = 1;
Chris@161 170 desc.valueNames.clear();
Chris@162 171 for (int i = 0; i < int(m_instruments.size()); ++i) {
Chris@162 172 desc.valueNames.push_back(m_instruments[i].name);
Chris@162 173 }
Chris@166 174 list.push_back(desc);
Chris@161 175
Chris@166 176 desc.identifier = "finetune";
Chris@166 177 desc.name = "Return fine pitch estimates";
Chris@166 178 desc.unit = "";
Chris@271 179 desc.description = "Return pitch estimates at finer than semitone resolution. This works only in Intensive mode. Notes that appear to drift in pitch will be split up into shorter notes with individually finer pitches.";
Chris@166 180 desc.minValue = 0;
Chris@166 181 desc.maxValue = 1;
Chris@166 182 desc.defaultValue = 0;
Chris@166 183 desc.isQuantized = true;
Chris@166 184 desc.quantizeStep = 1;
Chris@166 185 desc.valueNames.clear();
Chris@110 186 list.push_back(desc);
Chris@110 187
Chris@31 188 return list;
Chris@31 189 }
Chris@31 190
Chris@31 191 float
Chris@31 192 Silvet::getParameter(string identifier) const
Chris@31 193 {
Chris@110 194 if (identifier == "mode") {
Chris@297 195 return (float)(int)m_mode;
Chris@166 196 } else if (identifier == "finetune") {
Chris@166 197 return m_fineTuning ? 1.f : 0.f;
Chris@176 198 } else if (identifier == "instrument") {
Chris@162 199 return m_instrument;
Chris@110 200 }
Chris@31 201 return 0;
Chris@31 202 }
Chris@31 203
Chris@31 204 void
Chris@31 205 Silvet::setParameter(string identifier, float value)
Chris@31 206 {
Chris@110 207 if (identifier == "mode") {
Chris@297 208 m_mode = (ProcessingMode)(int)(value + 0.5);
Chris@166 209 } else if (identifier == "finetune") {
Chris@166 210 m_fineTuning = (value > 0.5);
Chris@176 211 } else if (identifier == "instrument") {
Chris@162 212 m_instrument = lrintf(value);
Chris@110 213 }
Chris@31 214 }
Chris@31 215
Chris@31 216 Silvet::ProgramList
Chris@31 217 Silvet::getPrograms() const
Chris@31 218 {
Chris@31 219 ProgramList list;
Chris@31 220 return list;
Chris@31 221 }
Chris@31 222
Chris@31 223 string
Chris@31 224 Silvet::getCurrentProgram() const
Chris@31 225 {
Chris@31 226 return "";
Chris@31 227 }
Chris@31 228
Chris@31 229 void
Chris@31 230 Silvet::selectProgram(string name)
Chris@31 231 {
Chris@31 232 }
Chris@31 233
Chris@31 234 Silvet::OutputList
Chris@31 235 Silvet::getOutputDescriptors() const
Chris@31 236 {
Chris@31 237 OutputList list;
Chris@31 238
Chris@31 239 OutputDescriptor d;
Chris@51 240 d.identifier = "notes";
Chris@51 241 d.name = "Note transcription";
Chris@271 242 d.description = "Overall note transcription. Each note has time, duration, estimated pitch, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture.";
Chris@41 243 d.unit = "Hz";
Chris@31 244 d.hasFixedBinCount = true;
Chris@31 245 d.binCount = 2;
Chris@41 246 d.binNames.push_back("Frequency");
Chris@31 247 d.binNames.push_back("Velocity");
Chris@31 248 d.hasKnownExtents = false;
Chris@31 249 d.isQuantized = false;
Chris@31 250 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@246 251 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
Chris@31 252 d.hasDuration = true;
Chris@32 253 m_notesOutputNo = list.size();
Chris@32 254 list.push_back(d);
Chris@32 255
Chris@178 256 d.identifier = "timefreq";
Chris@178 257 d.name = "Time-frequency distribution";
Chris@271 258 d.description = "Filtered constant-Q time-frequency distribution as used as input to the expectation-maximisation algorithm.";
Chris@178 259 d.unit = "";
Chris@178 260 d.hasFixedBinCount = true;
Chris@298 261 d.binCount = getPack(0).templateHeight;
Chris@178 262 d.binNames.clear();
Chris@178 263 if (m_cq) {
Chris@294 264 char name[50];
Chris@298 265 for (int i = 0; i < getPack(0).templateHeight; ++i) {
Chris@178 266 // We have a 600-bin (10 oct 60-bin CQ) of which the
Chris@178 267 // lowest-frequency 55 bins have been dropped, for a
Chris@178 268 // 545-bin template. The native CQ bins go high->low
Chris@178 269 // frequency though, so these are still the first 545 bins
Chris@178 270 // as reported by getBinFrequency, though in reverse order
Chris@178 271 float freq = m_cq->getBinFrequency
Chris@298 272 (getPack(0).templateHeight - i - 1);
Chris@178 273 sprintf(name, "%.1f Hz", freq);
Chris@178 274 d.binNames.push_back(name);
Chris@178 275 }
Chris@178 276 }
Chris@178 277 d.hasKnownExtents = false;
Chris@178 278 d.isQuantized = false;
Chris@178 279 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@178 280 d.sampleRate = m_colsPerSec;
Chris@178 281 d.hasDuration = false;
Chris@178 282 m_fcqOutputNo = list.size();
Chris@178 283 list.push_back(d);
Chris@178 284
Chris@294 285 d.identifier = "pitchactivation";
Chris@294 286 d.name = "Pitch activation distribution";
Chris@294 287 d.description = "Pitch activation distribution resulting from expectation-maximisation algorithm, prior to note extraction.";
Chris@294 288 d.unit = "";
Chris@294 289 d.hasFixedBinCount = true;
Chris@298 290 d.binCount = getPack(0).templateNoteCount;
Chris@294 291 d.binNames.clear();
Chris@294 292 if (m_cq) {
Chris@298 293 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
Chris@294 294 d.binNames.push_back(noteName(i, 0, 1));
Chris@294 295 }
Chris@294 296 }
Chris@294 297 d.hasKnownExtents = false;
Chris@294 298 d.isQuantized = false;
Chris@294 299 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@294 300 d.sampleRate = m_colsPerSec;
Chris@294 301 d.hasDuration = false;
Chris@294 302 m_pitchOutputNo = list.size();
Chris@294 303 list.push_back(d);
Chris@294 304
Chris@309 305 d.identifier = "chroma";
Chris@309 306 d.name = "Pitch chroma distribution";
Chris@309 307 d.description = "Pitch chroma distribution formed by wrapping the un-thresholded pitch activation distribution into a single octave of semitone bins.";
Chris@309 308 d.unit = "";
Chris@309 309 d.hasFixedBinCount = true;
Chris@309 310 d.binCount = 12;
Chris@309 311 d.binNames.clear();
Chris@309 312 if (m_cq) {
Chris@309 313 for (int i = 0; i < 12; ++i) {
Chris@309 314 d.binNames.push_back(chromaName(i));
Chris@309 315 }
Chris@309 316 }
Chris@309 317 d.hasKnownExtents = false;
Chris@309 318 d.isQuantized = false;
Chris@309 319 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@309 320 d.sampleRate = m_colsPerSec;
Chris@309 321 d.hasDuration = false;
Chris@309 322 m_chromaOutputNo = list.size();
Chris@309 323 list.push_back(d);
Chris@309 324
Chris@302 325 d.identifier = "templates";
Chris@302 326 d.name = "Templates";
Chris@302 327 d.description = "Constant-Q spectral templates for the selected instrument pack.";
Chris@302 328 d.unit = "";
Chris@302 329 d.hasFixedBinCount = true;
Chris@302 330 d.binCount = getPack(0).templateHeight;
Chris@302 331 d.binNames.clear();
Chris@302 332 if (m_cq) {
Chris@302 333 char name[50];
Chris@302 334 for (int i = 0; i < getPack(0).templateHeight; ++i) {
Chris@302 335 // We have a 600-bin (10 oct 60-bin CQ) of which the
Chris@302 336 // lowest-frequency 55 bins have been dropped, for a
Chris@302 337 // 545-bin template. The native CQ bins go high->low
Chris@302 338 // frequency though, so these are still the first 545 bins
Chris@302 339 // as reported by getBinFrequency, though in reverse order
Chris@302 340 float freq = m_cq->getBinFrequency
Chris@302 341 (getPack(0).templateHeight - i - 1);
Chris@302 342 sprintf(name, "%.1f Hz", freq);
Chris@302 343 d.binNames.push_back(name);
Chris@302 344 }
Chris@302 345 }
Chris@302 346 d.hasKnownExtents = false;
Chris@302 347 d.isQuantized = false;
Chris@302 348 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@302 349 d.sampleRate = m_colsPerSec;
Chris@302 350 d.hasDuration = false;
Chris@302 351 m_templateOutputNo = list.size();
Chris@302 352 list.push_back(d);
Chris@302 353
Chris@31 354 return list;
Chris@31 355 }
Chris@31 356
Chris@38 357 std::string
Chris@309 358 Silvet::chromaName(int pitch) const
Chris@38 359 {
Chris@38 360 static const char *names[] = {
Chris@38 361 "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"
Chris@38 362 };
Chris@38 363
Chris@309 364 return names[pitch];
Chris@309 365 }
Chris@309 366
Chris@309 367 std::string
Chris@309 368 Silvet::noteName(int note, int shift, int shiftCount) const
Chris@309 369 {
Chris@309 370 string n = chromaName(note % 12);
Chris@38 371
Chris@175 372 int oct = (note + 9) / 12;
Chris@38 373
Chris@175 374 char buf[30];
Chris@175 375
Chris@175 376 float pshift = 0.f;
Chris@175 377 if (shiftCount > 1) {
Chris@175 378 // see noteFrequency below
Chris@175 379 pshift =
Chris@175 380 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
Chris@175 381 }
Chris@175 382
Chris@175 383 if (pshift > 0.f) {
Chris@309 384 sprintf(buf, "%s%d+%dc", n.c_str(), oct, int(round(pshift * 100)));
Chris@175 385 } else if (pshift < 0.f) {
Chris@309 386 sprintf(buf, "%s%d-%dc", n.c_str(), oct, int(round((-pshift) * 100)));
Chris@175 387 } else {
Chris@309 388 sprintf(buf, "%s%d", n.c_str(), oct);
Chris@175 389 }
Chris@38 390
Chris@38 391 return buf;
Chris@38 392 }
Chris@38 393
Chris@41 394 float
Chris@168 395 Silvet::noteFrequency(int note, int shift, int shiftCount) const
Chris@41 396 {
Chris@169 397 // Convert shift number to a pitch shift. The given shift number
Chris@169 398 // is an offset into the template array, which starts with some
Chris@169 399 // zeros, followed by the template, then some trailing zeros.
Chris@169 400 //
Chris@169 401 // Example: if we have templateMaxShift == 2 and thus shiftCount
Chris@169 402 // == 5, then the number will be in the range 0-4 and the template
Chris@169 403 // will have 2 zeros at either end. Thus number 2 represents the
Chris@169 404 // template "as recorded", for a pitch shift of 0; smaller indices
Chris@169 405 // represent moving the template *up* in pitch (by introducing
Chris@169 406 // zeros at the start, which is the low-frequency end), for a
Chris@169 407 // positive pitch shift; and higher values represent moving it
Chris@169 408 // down in pitch, for a negative pitch shift.
Chris@169 409
Chris@175 410 float pshift = 0.f;
Chris@175 411 if (shiftCount > 1) {
Chris@175 412 pshift =
Chris@175 413 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
Chris@175 414 }
Chris@169 415
Chris@301 416 float freq = float(27.5 * pow(2.0, (note + pshift) / 12.0));
Chris@301 417
Chris@303 418 // cerr << "note = " << note << ", shift = " << shift << ", shiftCount = "
Chris@303 419 // << shiftCount << ", obtained freq = " << freq << endl;
Chris@301 420
Chris@301 421 return freq;
Chris@41 422 }
Chris@41 423
Chris@31 424 bool
Chris@31 425 Silvet::initialise(size_t channels, size_t stepSize, size_t blockSize)
Chris@31 426 {
Chris@272 427 if (m_inputSampleRate < minInputSampleRate ||
Chris@272 428 m_inputSampleRate > maxInputSampleRate) {
Chris@272 429 cerr << "Silvet::initialise: Unsupported input sample rate "
Chris@272 430 << m_inputSampleRate << " (supported min " << minInputSampleRate
Chris@272 431 << ", max " << maxInputSampleRate << ")" << endl;
Chris@272 432 return false;
Chris@272 433 }
Chris@272 434
Chris@31 435 if (channels < getMinChannelCount() ||
Chris@272 436 channels > getMaxChannelCount()) {
Chris@272 437 cerr << "Silvet::initialise: Unsupported channel count " << channels
Chris@272 438 << " (supported min " << getMinChannelCount() << ", max "
Chris@272 439 << getMaxChannelCount() << ")" << endl;
Chris@272 440 return false;
Chris@272 441 }
Chris@31 442
Chris@31 443 if (stepSize != blockSize) {
Chris@31 444 cerr << "Silvet::initialise: Step size must be the same as block size ("
Chris@31 445 << stepSize << " != " << blockSize << ")" << endl;
Chris@31 446 return false;
Chris@31 447 }
Chris@31 448
Chris@31 449 m_blockSize = blockSize;
Chris@31 450
Chris@31 451 reset();
Chris@31 452
Chris@31 453 return true;
Chris@31 454 }
Chris@31 455
Chris@31 456 void
Chris@31 457 Silvet::reset()
Chris@31 458 {
Chris@31 459 delete m_resampler;
Chris@246 460 delete m_flattener;
Chris@31 461 delete m_cq;
Chris@31 462
Chris@31 463 if (m_inputSampleRate != processingSampleRate) {
Chris@31 464 m_resampler = new Resampler(m_inputSampleRate, processingSampleRate);
Chris@31 465 } else {
Chris@31 466 m_resampler = 0;
Chris@31 467 }
Chris@31 468
Chris@246 469 m_flattener = new FlattenDynamics(m_inputSampleRate); // before resampling
Chris@246 470 m_flattener->reset();
Chris@246 471
Chris@301 472 // this happens to be processingSampleRate / 3, and is the top
Chris@301 473 // freq used for the EM templates:
Chris@301 474 double maxFreq = 14700;
Chris@301 475
Chris@301 476 if (m_mode == LiveMode) {
Chris@301 477 // We only have 12 bpo rather than 60, so we need the top bin
Chris@301 478 // to be the middle one of the top 5, i.e. 2/5 of a semitone
Chris@301 479 // lower than 14700
Chris@301 480 maxFreq *= powf(2.0, -1.0 / 30.0);
Chris@301 481 }
Chris@301 482
Chris@173 483 double minFreq = 27.5;
Chris@173 484
Chris@297 485 if (m_mode != HighQualityMode) {
Chris@173 486 // We don't actually return any notes from the bottom octave,
Chris@173 487 // so we can just pad with zeros
Chris@173 488 minFreq *= 2;
Chris@173 489 }
Chris@173 490
Chris@298 491 int bpo = 12 *
Chris@298 492 (m_mode == LiveMode ? binsPerSemitoneLive : binsPerSemitoneNormal);
Chris@301 493
Chris@154 494 CQParameters params(processingSampleRate,
Chris@173 495 minFreq,
Chris@303 496 maxFreq,
Chris@298 497 bpo);
Chris@154 498
Chris@316 499 // For params.q, the MIREX code uses 0.8, but it seems that with
Chris@316 500 // atomHopFactor of 0.3, using q == 0.9 or lower drops the FFT
Chris@316 501 // size to 512 from 1024 and alters some other processing
Chris@316 502 // parameters, making everything much, much slower. Could be a
Chris@316 503 // flaw in the CQ parameter calculations, must check. For
Chris@316 504 // atomHopFactor == 1, q == 0.8 is fine
Chris@316 505 params.q = (m_mode == HighQualityMode ? 0.95 : 0.8);
Chris@316 506 params.atomHopFactor = (m_mode == HighQualityMode ? 0.3 : 1.0);
Chris@154 507 params.threshold = 0.0005;
Chris@317 508 params.decimator =
Chris@317 509 (m_mode == LiveMode ?
Chris@317 510 CQParameters::FasterDecimator : CQParameters::BetterDecimator);
Chris@172 511 params.window = CQParameters::Hann;
Chris@154 512
Chris@154 513 m_cq = new CQSpectrogram(params, CQSpectrogram::InterpolateLinear);
Chris@31 514
Chris@303 515 // cerr << "CQ bins = " << m_cq->getTotalBins() << endl;
Chris@303 516 // cerr << "CQ min freq = " << m_cq->getMinFrequency() << " (and for confirmation, freq of bin 0 = " << m_cq->getBinFrequency(0) << ")" << endl;
Chris@297 517
Chris@297 518 m_colsPerSec = (m_mode == DraftMode ? 25 : 50);
Chris@165 519
Chris@41 520 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
Chris@41 521 delete m_postFilter[i];
Chris@41 522 }
Chris@41 523 m_postFilter.clear();
Chris@303 524 int postFilterLength = 3;
Chris@298 525 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
Chris@303 526 m_postFilter.push_back(new MedianFilter<double>(postFilterLength));
Chris@41 527 }
Chris@41 528 m_pianoRoll.clear();
Chris@246 529 m_inputGains.clear();
Chris@32 530 m_columnCount = 0;
Chris@272 531 m_resampledCount = 0;
Chris@40 532 m_startTime = RealTime::zeroTime;
Chris@313 533 m_haveStartTime = false;
Chris@31 534 }
Chris@31 535
Chris@31 536 Silvet::FeatureSet
Chris@31 537 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
Chris@31 538 {
Chris@302 539 FeatureSet fs;
Chris@302 540
Chris@313 541 if (!m_haveStartTime) {
Chris@314 542
Chris@40 543 m_startTime = timestamp;
Chris@313 544 m_haveStartTime = true;
Chris@314 545
Chris@302 546 insertTemplateFeatures(fs);
Chris@40 547 }
Chris@246 548
Chris@246 549 vector<float> flattened(m_blockSize);
Chris@246 550 float gain = 1.f;
Chris@246 551 m_flattener->connectInputPort
Chris@246 552 (FlattenDynamics::AudioInputPort, inputBuffers[0]);
Chris@246 553 m_flattener->connectOutputPort
Chris@246 554 (FlattenDynamics::AudioOutputPort, &flattened[0]);
Chris@246 555 m_flattener->connectOutputPort
Chris@246 556 (FlattenDynamics::GainOutputPort, &gain);
Chris@246 557 m_flattener->process(m_blockSize);
Chris@246 558
Chris@252 559 m_inputGains[timestamp] = gain;
Chris@40 560
Chris@31 561 vector<double> data;
Chris@40 562 for (int i = 0; i < m_blockSize; ++i) {
Chris@246 563 double d = flattened[i];
Chris@235 564 data.push_back(d);
Chris@40 565 }
Chris@31 566
Chris@31 567 if (m_resampler) {
Chris@272 568
Chris@31 569 data = m_resampler->process(data.data(), data.size());
Chris@272 570
Chris@272 571 int hadCount = m_resampledCount;
Chris@272 572 m_resampledCount += data.size();
Chris@272 573
Chris@272 574 int resamplerLatency = m_resampler->getLatency();
Chris@272 575
Chris@272 576 if (hadCount < resamplerLatency) {
Chris@272 577 int stillToDrop = resamplerLatency - hadCount;
Chris@272 578 if (stillToDrop >= int(data.size())) {
Chris@302 579 return fs;
Chris@272 580 } else {
Chris@272 581 data = vector<double>(data.begin() + stillToDrop, data.end());
Chris@272 582 }
Chris@272 583 }
Chris@31 584 }
Chris@272 585
Chris@32 586 Grid cqout = m_cq->process(data);
Chris@302 587 transcribe(cqout, fs);
Chris@51 588 return fs;
Chris@34 589 }
Chris@34 590
Chris@34 591 Silvet::FeatureSet
Chris@34 592 Silvet::getRemainingFeatures()
Chris@34 593 {
Chris@145 594 Grid cqout = m_cq->getRemainingOutput();
Chris@302 595 FeatureSet fs;
Chris@302 596 if (m_columnCount == 0) {
Chris@302 597 // process() was never called, but we still want these
Chris@302 598 insertTemplateFeatures(fs);
Chris@302 599 } else {
Chris@302 600 transcribe(cqout, fs);
Chris@302 601 }
Chris@51 602 return fs;
Chris@34 603 }
Chris@34 604
Chris@302 605 void
Chris@302 606 Silvet::insertTemplateFeatures(FeatureSet &fs)
Chris@302 607 {
Chris@302 608 const InstrumentPack &pack = getPack(m_instrument);
Chris@302 609 for (int i = 0; i < int(pack.templates.size()) * pack.templateNoteCount; ++i) {
Chris@302 610 RealTime timestamp = RealTime::fromSeconds(double(i) / m_colsPerSec);
Chris@302 611 Feature f;
Chris@302 612 char buffer[50];
Chris@302 613 sprintf(buffer, "Note %d", i + 1);
Chris@302 614 f.label = buffer;
Chris@302 615 f.hasTimestamp = true;
Chris@302 616 f.timestamp = timestamp;
Chris@302 617 f.values = pack.templates[i / pack.templateNoteCount]
Chris@302 618 .data[i % pack.templateNoteCount];
Chris@302 619 fs[m_templateOutputNo].push_back(f);
Chris@302 620 }
Chris@302 621 }
Chris@302 622
Chris@302 623 void
Chris@302 624 Silvet::transcribe(const Grid &cqout, Silvet::FeatureSet &fs)
Chris@34 625 {
Chris@32 626 Grid filtered = preProcess(cqout);
Chris@31 627
Chris@302 628 if (filtered.empty()) return;
Chris@170 629
Chris@298 630 const InstrumentPack &pack(getPack(m_instrument));
Chris@104 631
Chris@178 632 for (int i = 0; i < (int)filtered.size(); ++i) {
Chris@178 633 Feature f;
Chris@178 634 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@178 635 f.values.push_back(float(filtered[i][j]));
Chris@178 636 }
Chris@178 637 fs[m_fcqOutputNo].push_back(f);
Chris@178 638 }
Chris@178 639
Chris@34 640 int width = filtered.size();
Chris@34 641
Chris@311 642 Grid localPitches(width);
Chris@170 643
Chris@297 644 bool wantShifts = (m_mode == HighQualityMode) && m_fineTuning;
Chris@170 645 int shiftCount = 1;
Chris@170 646 if (wantShifts) {
Chris@170 647 shiftCount = pack.templateMaxShift * 2 + 1;
Chris@170 648 }
Chris@170 649
Chris@170 650 vector<vector<int> > localBestShifts;
Chris@170 651 if (wantShifts) {
Chris@311 652 localBestShifts = vector<vector<int> >(width);
Chris@170 653 }
Chris@170 654
Chris@312 655 #ifndef MAX_EM_THREADS
Chris@312 656 #define MAX_EM_THREADS 8
Chris@312 657 #endif
Chris@312 658
Chris@317 659 int emThreadCount = MAX_EM_THREADS;
Chris@317 660 if (m_mode == LiveMode && pack.templates.size() == 1) {
Chris@317 661 // The EM step is probably not slow enough to merit it
Chris@317 662 emThreadCount = 1;
Chris@317 663 }
Chris@317 664
Chris@312 665 #if (defined(MAX_EM_THREADS) && (MAX_EM_THREADS > 1))
Chris@317 666 if (emThreadCount > 1) {
Chris@317 667 for (int i = 0; i < width; ) {
Chris@317 668 typedef future<pair<vector<double>, vector<int>>> EMFuture;
Chris@317 669 vector<EMFuture> results;
Chris@317 670 for (int j = 0; j < emThreadCount && i + j < width; ++j) {
Chris@317 671 results.push_back
Chris@317 672 (async(std::launch::async,
Chris@317 673 [&](int index) {
Chris@317 674 return applyEM(pack, filtered.at(index), wantShifts);
Chris@317 675 }, i + j));
Chris@317 676 }
Chris@317 677 for (int j = 0; j < emThreadCount && i + j < width; ++j) {
Chris@317 678 auto out = results[j].get();
Chris@317 679 localPitches[i+j] = out.first;
Chris@317 680 if (wantShifts) localBestShifts[i+j] = out.second;
Chris@317 681 }
Chris@317 682 i += emThreadCount;
Chris@312 683 }
Chris@123 684 }
Chris@312 685 #endif
Chris@317 686
Chris@317 687 if (emThreadCount == 1) {
Chris@317 688 for (int i = 0; i < width; ++i) {
Chris@317 689 auto out = applyEM(pack, filtered.at(i), wantShifts);
Chris@317 690 localPitches[i] = out.first;
Chris@317 691 if (wantShifts) localBestShifts[i] = out.second;
Chris@317 692 }
Chris@317 693 }
Chris@305 694
Chris@166 695 for (int i = 0; i < width; ++i) {
Chris@37 696
Chris@309 697 // This returns a filtered column, and pushes the
Chris@309 698 // up-to-max-polyphony activation column to m_pianoRoll
Chris@294 699 vector<double> filtered = postProcess
Chris@294 700 (localPitches[i], localBestShifts[i], wantShifts);
Chris@294 701
Chris@309 702 RealTime timestamp = getColumnTimestamp(m_pianoRoll.size() - 1);
Chris@309 703 float inputGain = getInputGainAt(timestamp);
Chris@309 704
Chris@294 705 Feature f;
Chris@294 706 for (int j = 0; j < (int)filtered.size(); ++j) {
Chris@309 707 float v = filtered[j];
Chris@294 708 if (v < pack.levelThreshold) v = 0.f;
Chris@309 709 f.values.push_back(v / inputGain);
Chris@294 710 }
Chris@294 711 fs[m_pitchOutputNo].push_back(f);
Chris@309 712
Chris@309 713 f.values.clear();
Chris@309 714 f.values.resize(12);
Chris@309 715 for (int j = 0; j < (int)filtered.size(); ++j) {
Chris@309 716 f.values[j % 12] += filtered[j] / inputGain;
Chris@309 717 }
Chris@309 718 fs[m_chromaOutputNo].push_back(f);
Chris@166 719
Chris@168 720 FeatureList noteFeatures = noteTrack(shiftCount);
Chris@38 721
Chris@123 722 for (FeatureList::const_iterator fi = noteFeatures.begin();
Chris@123 723 fi != noteFeatures.end(); ++fi) {
Chris@123 724 fs[m_notesOutputNo].push_back(*fi);
Chris@40 725 }
Chris@34 726 }
Chris@31 727 }
Chris@31 728
Chris@311 729 pair<vector<double>, vector<int> >
Chris@311 730 Silvet::applyEM(const InstrumentPack &pack,
Chris@311 731 const vector<double> &column,
Chris@311 732 bool wantShifts)
Chris@311 733 {
Chris@311 734 double columnThreshold = 1e-5;
Chris@311 735
Chris@314 736 if (m_mode == LiveMode) {
Chris@314 737 columnThreshold /= 20;
Chris@314 738 }
Chris@314 739
Chris@311 740 vector<double> pitches(pack.templateNoteCount, 0.0);
Chris@311 741 vector<int> bestShifts;
Chris@311 742
Chris@311 743 double sum = 0.0;
Chris@311 744 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@311 745 sum += column.at(j);
Chris@311 746 }
Chris@311 747 if (sum < columnThreshold) return { pitches, bestShifts };
Chris@311 748
Chris@314 749 EM em(&pack, m_mode == HighQualityMode);
Chris@311 750
Chris@311 751 em.setPitchSparsity(pack.pitchSparsity);
Chris@311 752 em.setSourceSparsity(pack.sourceSparsity);
Chris@311 753
Chris@314 754 int iterations = (m_mode == HighQualityMode ? 20 : 10);
Chris@311 755
Chris@311 756 for (int j = 0; j < iterations; ++j) {
Chris@311 757 em.iterate(column.data());
Chris@311 758 }
Chris@311 759
Chris@311 760 const float *pitchDist = em.getPitchDistribution();
Chris@311 761 const float *const *shiftDist = em.getShifts();
Chris@311 762
Chris@311 763 int shiftCount = 1;
Chris@311 764 if (wantShifts) {
Chris@311 765 shiftCount = pack.templateMaxShift * 2 + 1;
Chris@311 766 }
Chris@311 767
Chris@311 768 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@311 769
Chris@311 770 pitches[j] = pitchDist[j] * sum;
Chris@311 771
Chris@311 772 int bestShift = 0;
Chris@311 773 float bestShiftValue = 0.0;
Chris@311 774 if (wantShifts) {
Chris@311 775 for (int k = 0; k < shiftCount; ++k) {
Chris@311 776 float value = shiftDist[k][j];
Chris@311 777 if (k == 0 || value > bestShiftValue) {
Chris@311 778 bestShiftValue = value;
Chris@311 779 bestShift = k;
Chris@311 780 }
Chris@311 781 }
Chris@311 782 bestShifts.push_back(bestShift);
Chris@311 783 }
Chris@311 784 }
Chris@311 785
Chris@311 786 return { pitches, bestShifts };
Chris@311 787 }
Chris@311 788
Chris@32 789 Silvet::Grid
Chris@32 790 Silvet::preProcess(const Grid &in)
Chris@32 791 {
Chris@32 792 int width = in.size();
Chris@32 793
Chris@165 794 int spacing = processingSampleRate / m_colsPerSec;
Chris@32 795
Chris@165 796 // need to be careful that col spacing is an integer number of samples!
Chris@165 797 assert(spacing * m_colsPerSec == processingSampleRate);
Chris@32 798
Chris@32 799 Grid out;
Chris@32 800
Chris@58 801 // We count the CQ latency in terms of processing hops, but
Chris@58 802 // actually it probably isn't an exact number of hops so this
Chris@58 803 // isn't quite accurate. But the small constant offset is
Chris@165 804 // practically irrelevant compared to the jitter from the frame
Chris@165 805 // size we reduce to in a moment
Chris@33 806 int latentColumns = m_cq->getLatency() / m_cq->getColumnHop();
Chris@33 807
Chris@298 808 const InstrumentPack &pack(getPack(m_instrument));
Chris@176 809
Chris@32 810 for (int i = 0; i < width; ++i) {
Chris@32 811
Chris@33 812 if (m_columnCount < latentColumns) {
Chris@33 813 ++m_columnCount;
Chris@33 814 continue;
Chris@33 815 }
Chris@33 816
Chris@32 817 int prevSampleNo = (m_columnCount - 1) * m_cq->getColumnHop();
Chris@32 818 int sampleNo = m_columnCount * m_cq->getColumnHop();
Chris@32 819
Chris@32 820 bool select = (sampleNo / spacing != prevSampleNo / spacing);
Chris@32 821
Chris@32 822 if (select) {
Chris@32 823 vector<double> inCol = in[i];
Chris@176 824 vector<double> outCol(pack.templateHeight);
Chris@32 825
Chris@178 826 // In HQ mode, the CQ returns 600 bins and we ignore the
Chris@298 827 // lowest 55 of them (assuming binsPerSemitone == 5).
Chris@178 828 //
Chris@297 829 // In draft and live mode the CQ is an octave shorter,
Chris@300 830 // returning 540 bins or equivalent, so we instead pad
Chris@300 831 // them with an additional 5 or equivalent zeros.
Chris@178 832 //
Chris@178 833 // We also need to reverse the column as we go, since the
Chris@178 834 // raw CQ has the high frequencies first and we need it
Chris@178 835 // the other way around.
Chris@32 836
Chris@298 837 int bps = (m_mode == LiveMode ?
Chris@298 838 binsPerSemitoneLive : binsPerSemitoneNormal);
Chris@298 839
Chris@297 840 if (m_mode == HighQualityMode) {
Chris@178 841 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@298 842 int ix = inCol.size() - j - (11 * bps);
Chris@178 843 outCol[j] = inCol[ix];
Chris@178 844 }
Chris@178 845 } else {
Chris@298 846 for (int j = 0; j < bps; ++j) {
Chris@178 847 outCol[j] = 0.0;
Chris@178 848 }
Chris@298 849 for (int j = bps; j < pack.templateHeight; ++j) {
Chris@298 850 int ix = inCol.size() - j + (bps-1);
Chris@178 851 outCol[j] = inCol[ix];
Chris@178 852 }
Chris@46 853 }
Chris@32 854
Chris@46 855 vector<double> noiseLevel1 =
Chris@298 856 MedianFilter<double>::filter(8 * bps, outCol);
Chris@176 857 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@46 858 noiseLevel1[j] = std::min(outCol[j], noiseLevel1[j]);
Chris@46 859 }
Chris@32 860
Chris@46 861 vector<double> noiseLevel2 =
Chris@298 862 MedianFilter<double>::filter(8 * bps, noiseLevel1);
Chris@176 863 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@46 864 outCol[j] = std::max(outCol[j] - noiseLevel2[j], 0.0);
Chris@32 865 }
Chris@32 866
Chris@165 867 out.push_back(outCol);
Chris@32 868 }
Chris@32 869
Chris@32 870 ++m_columnCount;
Chris@32 871 }
Chris@32 872
Chris@32 873 return out;
Chris@32 874 }
Chris@32 875
Chris@294 876 vector<double>
Chris@170 877 Silvet::postProcess(const vector<double> &pitches,
Chris@170 878 const vector<int> &bestShifts,
Chris@170 879 bool wantShifts)
Chris@166 880 {
Chris@298 881 const InstrumentPack &pack(getPack(m_instrument));
Chris@176 882
Chris@41 883 vector<double> filtered;
Chris@41 884
Chris@176 885 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@170 886 m_postFilter[j]->push(pitches[j]);
Chris@41 887 filtered.push_back(m_postFilter[j]->get());
Chris@41 888 }
Chris@41 889
Chris@316 890 if (m_mode == LiveMode) {
Chris@316 891 // In live mode with only a 12-bpo CQ, we are very likely to
Chris@316 892 // get clusters of two or three high scores at a time for
Chris@316 893 // neighbouring semitones. Eliminate these by picking only the
Chris@316 894 // peaks. This means we can't recognise actual semitone chords
Chris@316 895 // if they ever appear, but it's not as if live mode is good
Chris@316 896 // enough for that to be a big deal anyway.
Chris@316 897 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@316 898 if (j > 0 && j + 1 < pack.templateNoteCount &&
Chris@316 899 filtered[j] >= filtered[j-1] &&
Chris@316 900 filtered[j] >= filtered[j+1]) {
Chris@316 901 } else {
Chris@316 902 filtered[j] = 0.0;
Chris@316 903 }
Chris@316 904 }
Chris@316 905 }
Chris@316 906
Chris@41 907 // Threshold for level and reduce number of candidate pitches
Chris@41 908
Chris@41 909 typedef std::multimap<double, int> ValueIndexMap;
Chris@41 910
Chris@41 911 ValueIndexMap strengths;
Chris@166 912
Chris@176 913 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@166 914 double strength = filtered[j];
Chris@183 915 if (strength < pack.levelThreshold) continue;
Chris@168 916 strengths.insert(ValueIndexMap::value_type(strength, j));
Chris@168 917 }
Chris@166 918
Chris@168 919 ValueIndexMap::const_iterator si = strengths.end();
Chris@167 920
Chris@168 921 map<int, double> active;
Chris@168 922 map<int, int> activeShifts;
Chris@168 923
Chris@183 924 while (int(active.size()) < pack.maxPolyphony && si != strengths.begin()) {
Chris@168 925
Chris@168 926 --si;
Chris@168 927
Chris@168 928 double strength = si->first;
Chris@168 929 int j = si->second;
Chris@168 930
Chris@168 931 active[j] = strength;
Chris@168 932
Chris@170 933 if (wantShifts) {
Chris@170 934 activeShifts[j] = bestShifts[j];
Chris@167 935 }
Chris@41 936 }
Chris@41 937
Chris@168 938 m_pianoRoll.push_back(active);
Chris@170 939
Chris@170 940 if (wantShifts) {
Chris@168 941 m_pianoRollShifts.push_back(activeShifts);
Chris@41 942 }
Chris@294 943
Chris@294 944 return filtered;
Chris@166 945 }
Chris@166 946
Chris@166 947 Vamp::Plugin::FeatureList
Chris@168 948 Silvet::noteTrack(int shiftCount)
Chris@166 949 {
Chris@41 950 // Minimum duration pruning, and conversion to notes. We can only
Chris@41 951 // report notes that have just ended (i.e. that are absent in the
Chris@168 952 // latest active set but present in the prior set in the piano
Chris@41 953 // roll) -- any notes that ended earlier will have been reported
Chris@41 954 // already, and if they haven't ended, we don't know their
Chris@41 955 // duration.
Chris@41 956
Chris@168 957 int width = m_pianoRoll.size() - 1;
Chris@168 958
Chris@168 959 const map<int, double> &active = m_pianoRoll[width];
Chris@41 960
Chris@165 961 double columnDuration = 1.0 / m_colsPerSec;
Chris@165 962
Chris@165 963 // only keep notes >= 100ms or thereabouts
Chris@165 964 int durationThreshold = floor(0.1 / columnDuration); // columns
Chris@165 965 if (durationThreshold < 1) durationThreshold = 1;
Chris@41 966
Chris@41 967 FeatureList noteFeatures;
Chris@41 968
Chris@41 969 if (width < durationThreshold + 1) {
Chris@41 970 return noteFeatures;
Chris@41 971 }
Chris@41 972
Chris@150 973 //!!! try: repeated note detection? (look for change in first derivative of the pitch matrix)
Chris@150 974
Chris@55 975 for (map<int, double>::const_iterator ni = m_pianoRoll[width-1].begin();
Chris@41 976 ni != m_pianoRoll[width-1].end(); ++ni) {
Chris@41 977
Chris@55 978 int note = ni->first;
Chris@41 979
Chris@41 980 if (active.find(note) != active.end()) {
Chris@41 981 // the note is still playing
Chris@41 982 continue;
Chris@41 983 }
Chris@41 984
Chris@41 985 // the note was playing but just ended
Chris@41 986 int end = width;
Chris@41 987 int start = end-1;
Chris@41 988
Chris@41 989 while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) {
Chris@41 990 --start;
Chris@41 991 }
Chris@41 992 ++start;
Chris@41 993
Chris@169 994 if ((end - start) < durationThreshold) {
Chris@41 995 continue;
Chris@41 996 }
Chris@41 997
Chris@169 998 emitNote(start, end, note, shiftCount, noteFeatures);
Chris@41 999 }
Chris@41 1000
Chris@62 1001 // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl;
Chris@41 1002
Chris@41 1003 return noteFeatures;
Chris@41 1004 }
Chris@41 1005
Chris@169 1006 void
Chris@169 1007 Silvet::emitNote(int start, int end, int note, int shiftCount,
Chris@169 1008 FeatureList &noteFeatures)
Chris@169 1009 {
Chris@169 1010 int partStart = start;
Chris@169 1011 int partShift = 0;
Chris@169 1012 int partVelocity = 0;
Chris@169 1013
Chris@252 1014 int partThreshold = floor(0.05 * m_colsPerSec);
Chris@169 1015
Chris@169 1016 for (int i = start; i != end; ++i) {
Chris@169 1017
Chris@169 1018 double strength = m_pianoRoll[i][note];
Chris@169 1019
Chris@169 1020 int shift = 0;
Chris@169 1021
Chris@169 1022 if (shiftCount > 1) {
Chris@169 1023
Chris@169 1024 shift = m_pianoRollShifts[i][note];
Chris@169 1025
Chris@169 1026 if (i == partStart) {
Chris@169 1027 partShift = shift;
Chris@169 1028 }
Chris@169 1029
Chris@169 1030 if (i > partStart + partThreshold && shift != partShift) {
Chris@169 1031
Chris@169 1032 // cerr << "i = " << i << ", partStart = " << partStart << ", shift = " << shift << ", partShift = " << partShift << endl;
Chris@169 1033
Chris@169 1034 // pitch has changed, emit an intermediate note
Chris@252 1035 noteFeatures.push_back(makeNoteFeature(partStart,
Chris@252 1036 i,
Chris@252 1037 note,
Chris@252 1038 partShift,
Chris@252 1039 shiftCount,
Chris@252 1040 partVelocity));
Chris@169 1041 partStart = i;
Chris@169 1042 partShift = shift;
Chris@169 1043 partVelocity = 0;
Chris@169 1044 }
Chris@169 1045 }
Chris@169 1046
Chris@303 1047 int v;
Chris@303 1048 if (m_mode == LiveMode) {
Chris@316 1049 v = round(strength * 20);
Chris@303 1050 } else {
Chris@303 1051 v = round(strength * 2);
Chris@303 1052 }
Chris@169 1053 if (v > partVelocity) {
Chris@169 1054 partVelocity = v;
Chris@169 1055 }
Chris@169 1056 }
Chris@169 1057
Chris@169 1058 if (end >= partStart + partThreshold) {
Chris@252 1059 noteFeatures.push_back(makeNoteFeature(partStart,
Chris@252 1060 end,
Chris@252 1061 note,
Chris@252 1062 partShift,
Chris@252 1063 shiftCount,
Chris@252 1064 partVelocity));
Chris@169 1065 }
Chris@169 1066 }
Chris@252 1067
Chris@309 1068 RealTime
Chris@309 1069 Silvet::getColumnTimestamp(int column)
Chris@309 1070 {
Chris@309 1071 double columnDuration = 1.0 / m_colsPerSec;
Chris@309 1072 int postFilterLatency = int(m_postFilter[0]->getSize() / 2);
Chris@309 1073
Chris@309 1074 return m_startTime + RealTime::fromSeconds
Chris@309 1075 (columnDuration * (column - postFilterLatency) + 0.02);
Chris@309 1076 }
Chris@309 1077
Chris@252 1078 Silvet::Feature
Chris@252 1079 Silvet::makeNoteFeature(int start,
Chris@252 1080 int end,
Chris@252 1081 int note,
Chris@252 1082 int shift,
Chris@252 1083 int shiftCount,
Chris@252 1084 int velocity)
Chris@252 1085 {
Chris@252 1086 Feature f;
Chris@252 1087
Chris@252 1088 f.hasTimestamp = true;
Chris@309 1089 f.timestamp = getColumnTimestamp(start);
Chris@252 1090
Chris@252 1091 f.hasDuration = true;
Chris@309 1092 f.duration = getColumnTimestamp(end) - f.timestamp;
Chris@252 1093
Chris@252 1094 f.values.clear();
Chris@252 1095
Chris@252 1096 f.values.push_back
Chris@252 1097 (noteFrequency(note, shift, shiftCount));
Chris@252 1098
Chris@252 1099 float inputGain = getInputGainAt(f.timestamp);
Chris@252 1100 // cerr << "adjusting velocity from " << velocity << " to " << round(velocity/inputGain) << endl;
Chris@252 1101 velocity = round(velocity / inputGain);
Chris@252 1102 if (velocity > 127) velocity = 127;
Chris@252 1103 if (velocity < 1) velocity = 1;
Chris@252 1104 f.values.push_back(velocity);
Chris@252 1105
Chris@252 1106 f.label = noteName(note, shift, shiftCount);
Chris@252 1107
Chris@252 1108 return f;
Chris@252 1109 }
Chris@252 1110
Chris@252 1111 float
Chris@252 1112 Silvet::getInputGainAt(RealTime t)
Chris@252 1113 {
Chris@252 1114 map<RealTime, float>::const_iterator i = m_inputGains.lower_bound(t);
Chris@252 1115
Chris@252 1116 if (i == m_inputGains.end()) {
Chris@252 1117 if (i != m_inputGains.begin()) {
Chris@252 1118 --i;
Chris@252 1119 } else {
Chris@252 1120 return 1.f; // no data
Chris@252 1121 }
Chris@252 1122 }
Chris@252 1123
Chris@252 1124 // cerr << "gain at time " << t << " = " << i->second << endl;
Chris@252 1125
Chris@252 1126 return i->second;
Chris@252 1127 }
Chris@252 1128