annotate src/Silvet.cpp @ 327:df9a8e16bae6 livemode-octave-higher

Experiment with dropping the bottom octave off each template (since most of the information is in higher harmonics anyway!) -- this is about 15% faster again and has half the latency, but per
author Chris Cannam
date Tue, 19 May 2015 09:29:00 +0100
parents 4cf4313d7e30
children
rev   line source
Chris@31 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@31 2
Chris@31 3 /*
Chris@31 4 Silvet
Chris@31 5
Chris@31 6 A Vamp plugin for note transcription.
Chris@31 7 Centre for Digital Music, Queen Mary University of London.
Chris@31 8
Chris@31 9 This program is free software; you can redistribute it and/or
Chris@31 10 modify it under the terms of the GNU General Public License as
Chris@31 11 published by the Free Software Foundation; either version 2 of the
Chris@31 12 License, or (at your option) any later version. See the file
Chris@31 13 COPYING included with this distribution for more information.
Chris@31 14 */
Chris@31 15
Chris@31 16 #include "Silvet.h"
Chris@34 17 #include "EM.h"
Chris@31 18
Chris@152 19 #include <cq/CQSpectrogram.h>
Chris@31 20
Chris@152 21 #include "MedianFilter.h"
Chris@152 22 #include "constant-q-cpp/src/dsp/Resampler.h"
Chris@246 23 #include "flattendynamics-ladspa.h"
Chris@298 24 #include "LiveInstruments.h"
Chris@31 25
Chris@31 26 #include <vector>
Chris@312 27 #include <future>
Chris@31 28
Chris@32 29 #include <cstdio>
Chris@32 30
Chris@31 31 using std::vector;
Chris@48 32 using std::cout;
Chris@31 33 using std::cerr;
Chris@31 34 using std::endl;
Chris@311 35 using std::pair;
Chris@312 36 using std::future;
Chris@312 37 using std::async;
Chris@40 38 using Vamp::RealTime;
Chris@31 39
Chris@31 40 static int processingSampleRate = 44100;
Chris@298 41
Chris@298 42 static int binsPerSemitoneLive = 1;
Chris@298 43 static int binsPerSemitoneNormal = 5;
Chris@170 44
Chris@272 45 static int minInputSampleRate = 100;
Chris@272 46 static int maxInputSampleRate = 192000;
Chris@272 47
Chris@316 48 static const Silvet::ProcessingMode defaultMode = Silvet::HighQualityMode;
Chris@316 49
Chris@31 50 Silvet::Silvet(float inputSampleRate) :
Chris@31 51 Plugin(inputSampleRate),
Chris@161 52 m_instruments(InstrumentPack::listInstrumentPacks()),
Chris@298 53 m_liveInstruments(LiveAdapter::adaptAll(m_instruments)),
Chris@31 54 m_resampler(0),
Chris@246 55 m_flattener(0),
Chris@110 56 m_cq(0),
Chris@316 57 m_mode(defaultMode),
Chris@166 58 m_fineTuning(false),
Chris@178 59 m_instrument(0),
Chris@313 60 m_colsPerSec(50),
Chris@313 61 m_haveStartTime(false)
Chris@31 62 {
Chris@31 63 }
Chris@31 64
Chris@31 65 Silvet::~Silvet()
Chris@31 66 {
Chris@31 67 delete m_resampler;
Chris@246 68 delete m_flattener;
Chris@31 69 delete m_cq;
Chris@41 70 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
Chris@41 71 delete m_postFilter[i];
Chris@41 72 }
Chris@31 73 }
Chris@31 74
Chris@31 75 string
Chris@31 76 Silvet::getIdentifier() const
Chris@31 77 {
Chris@31 78 return "silvet";
Chris@31 79 }
Chris@31 80
Chris@31 81 string
Chris@31 82 Silvet::getName() const
Chris@31 83 {
Chris@31 84 return "Silvet Note Transcription";
Chris@31 85 }
Chris@31 86
Chris@31 87 string
Chris@31 88 Silvet::getDescription() const
Chris@31 89 {
Chris@191 90 return "Estimate the note onsets, pitches, and durations that make up a music recording.";
Chris@31 91 }
Chris@31 92
Chris@31 93 string
Chris@31 94 Silvet::getMaker() const
Chris@31 95 {
Chris@191 96 return "Queen Mary, University of London";
Chris@31 97 }
Chris@31 98
Chris@31 99 int
Chris@31 100 Silvet::getPluginVersion() const
Chris@31 101 {
Chris@309 102 return 3;
Chris@31 103 }
Chris@31 104
Chris@31 105 string
Chris@31 106 Silvet::getCopyright() const
Chris@31 107 {
Chris@191 108 return "Method by Emmanouil Benetos and Simon Dixon; plugin by Chris Cannam and Emmanouil Benetos. GPL licence.";
Chris@31 109 }
Chris@31 110
Chris@31 111 Silvet::InputDomain
Chris@31 112 Silvet::getInputDomain() const
Chris@31 113 {
Chris@31 114 return TimeDomain;
Chris@31 115 }
Chris@31 116
Chris@31 117 size_t
Chris@31 118 Silvet::getPreferredBlockSize() const
Chris@31 119 {
Chris@31 120 return 0;
Chris@31 121 }
Chris@31 122
Chris@31 123 size_t
Chris@31 124 Silvet::getPreferredStepSize() const
Chris@31 125 {
Chris@31 126 return 0;
Chris@31 127 }
Chris@31 128
Chris@31 129 size_t
Chris@31 130 Silvet::getMinChannelCount() const
Chris@31 131 {
Chris@31 132 return 1;
Chris@31 133 }
Chris@31 134
Chris@31 135 size_t
Chris@31 136 Silvet::getMaxChannelCount() const
Chris@31 137 {
Chris@31 138 return 1;
Chris@31 139 }
Chris@31 140
Chris@31 141 Silvet::ParameterList
Chris@31 142 Silvet::getParameterDescriptors() const
Chris@31 143 {
Chris@31 144 ParameterList list;
Chris@110 145
Chris@110 146 ParameterDescriptor desc;
Chris@110 147 desc.identifier = "mode";
Chris@110 148 desc.name = "Processing mode";
Chris@110 149 desc.unit = "";
Chris@297 150 desc.description = "Sets the tradeoff of processing speed against transcription quality. Draft mode is tuned in favour of overall speed; Live mode is tuned in favour of lower latency; while Intensive mode (the default) will almost always produce the best results.";
Chris@110 151 desc.minValue = 0;
Chris@297 152 desc.maxValue = 2;
Chris@316 153 desc.defaultValue = int(defaultMode);
Chris@110 154 desc.isQuantized = true;
Chris@110 155 desc.quantizeStep = 1;
Chris@166 156 desc.valueNames.push_back("Draft (faster)");
Chris@165 157 desc.valueNames.push_back("Intensive (higher quality)");
Chris@297 158 desc.valueNames.push_back("Live (lower latency)");
Chris@161 159 list.push_back(desc);
Chris@161 160
Chris@176 161 desc.identifier = "instrument";
Chris@176 162 desc.name = "Instrument";
Chris@161 163 desc.unit = "";
Chris@271 164 desc.description = "The instrument or instruments known to be present in the recording. This affects the set of instrument templates used, as well as the expected level of polyphony in the output. Using a more limited set of instruments than the default will also make the plugin run faster.\nNote that this plugin cannot isolate instruments: you can't use this setting to request notes from only one instrument in a recording with several. Instead, use this as a hint to the plugin about which instruments are actually present.";
Chris@161 165 desc.minValue = 0;
Chris@162 166 desc.maxValue = m_instruments.size()-1;
Chris@162 167 desc.defaultValue = 0;
Chris@161 168 desc.isQuantized = true;
Chris@161 169 desc.quantizeStep = 1;
Chris@161 170 desc.valueNames.clear();
Chris@162 171 for (int i = 0; i < int(m_instruments.size()); ++i) {
Chris@162 172 desc.valueNames.push_back(m_instruments[i].name);
Chris@162 173 }
Chris@166 174 list.push_back(desc);
Chris@161 175
Chris@166 176 desc.identifier = "finetune";
Chris@166 177 desc.name = "Return fine pitch estimates";
Chris@166 178 desc.unit = "";
Chris@271 179 desc.description = "Return pitch estimates at finer than semitone resolution. This works only in Intensive mode. Notes that appear to drift in pitch will be split up into shorter notes with individually finer pitches.";
Chris@166 180 desc.minValue = 0;
Chris@166 181 desc.maxValue = 1;
Chris@166 182 desc.defaultValue = 0;
Chris@166 183 desc.isQuantized = true;
Chris@166 184 desc.quantizeStep = 1;
Chris@166 185 desc.valueNames.clear();
Chris@110 186 list.push_back(desc);
Chris@110 187
Chris@31 188 return list;
Chris@31 189 }
Chris@31 190
Chris@31 191 float
Chris@31 192 Silvet::getParameter(string identifier) const
Chris@31 193 {
Chris@110 194 if (identifier == "mode") {
Chris@297 195 return (float)(int)m_mode;
Chris@166 196 } else if (identifier == "finetune") {
Chris@166 197 return m_fineTuning ? 1.f : 0.f;
Chris@176 198 } else if (identifier == "instrument") {
Chris@162 199 return m_instrument;
Chris@110 200 }
Chris@31 201 return 0;
Chris@31 202 }
Chris@31 203
Chris@31 204 void
Chris@31 205 Silvet::setParameter(string identifier, float value)
Chris@31 206 {
Chris@110 207 if (identifier == "mode") {
Chris@297 208 m_mode = (ProcessingMode)(int)(value + 0.5);
Chris@166 209 } else if (identifier == "finetune") {
Chris@166 210 m_fineTuning = (value > 0.5);
Chris@176 211 } else if (identifier == "instrument") {
Chris@162 212 m_instrument = lrintf(value);
Chris@110 213 }
Chris@31 214 }
Chris@31 215
Chris@31 216 Silvet::ProgramList
Chris@31 217 Silvet::getPrograms() const
Chris@31 218 {
Chris@31 219 ProgramList list;
Chris@31 220 return list;
Chris@31 221 }
Chris@31 222
Chris@31 223 string
Chris@31 224 Silvet::getCurrentProgram() const
Chris@31 225 {
Chris@31 226 return "";
Chris@31 227 }
Chris@31 228
Chris@31 229 void
Chris@31 230 Silvet::selectProgram(string name)
Chris@31 231 {
Chris@31 232 }
Chris@31 233
Chris@31 234 Silvet::OutputList
Chris@31 235 Silvet::getOutputDescriptors() const
Chris@31 236 {
Chris@31 237 OutputList list;
Chris@31 238
Chris@31 239 OutputDescriptor d;
Chris@51 240 d.identifier = "notes";
Chris@51 241 d.name = "Note transcription";
Chris@271 242 d.description = "Overall note transcription. Each note has time, duration, estimated pitch, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture.";
Chris@41 243 d.unit = "Hz";
Chris@31 244 d.hasFixedBinCount = true;
Chris@31 245 d.binCount = 2;
Chris@41 246 d.binNames.push_back("Frequency");
Chris@31 247 d.binNames.push_back("Velocity");
Chris@31 248 d.hasKnownExtents = false;
Chris@31 249 d.isQuantized = false;
Chris@31 250 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@246 251 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
Chris@31 252 d.hasDuration = true;
Chris@32 253 m_notesOutputNo = list.size();
Chris@32 254 list.push_back(d);
Chris@32 255
Chris@319 256 d.identifier = "onsets";
Chris@319 257 d.name = "Note onsets";
Chris@323 258 d.description = "Note onsets, without durations. These can be calculated sooner than complete notes, because it isn't necessary to wait for a note to finish before returning its feature. Each event has time, estimated fundamental frequency in Hz, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture.";
Chris@319 259 d.unit = "Hz";
Chris@319 260 d.hasFixedBinCount = true;
Chris@319 261 d.binCount = 2;
Chris@319 262 d.binNames.push_back("Frequency");
Chris@319 263 d.binNames.push_back("Velocity");
Chris@319 264 d.hasKnownExtents = false;
Chris@319 265 d.isQuantized = false;
Chris@319 266 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@319 267 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
Chris@319 268 d.hasDuration = false;
Chris@319 269 m_onsetsOutputNo = list.size();
Chris@319 270 list.push_back(d);
Chris@319 271
Chris@178 272 d.identifier = "timefreq";
Chris@178 273 d.name = "Time-frequency distribution";
Chris@271 274 d.description = "Filtered constant-Q time-frequency distribution as used as input to the expectation-maximisation algorithm.";
Chris@178 275 d.unit = "";
Chris@178 276 d.hasFixedBinCount = true;
Chris@298 277 d.binCount = getPack(0).templateHeight;
Chris@178 278 d.binNames.clear();
Chris@178 279 if (m_cq) {
Chris@294 280 char name[50];
Chris@298 281 for (int i = 0; i < getPack(0).templateHeight; ++i) {
Chris@178 282 // We have a 600-bin (10 oct 60-bin CQ) of which the
Chris@178 283 // lowest-frequency 55 bins have been dropped, for a
Chris@178 284 // 545-bin template. The native CQ bins go high->low
Chris@178 285 // frequency though, so these are still the first 545 bins
Chris@178 286 // as reported by getBinFrequency, though in reverse order
Chris@178 287 float freq = m_cq->getBinFrequency
Chris@298 288 (getPack(0).templateHeight - i - 1);
Chris@178 289 sprintf(name, "%.1f Hz", freq);
Chris@178 290 d.binNames.push_back(name);
Chris@178 291 }
Chris@178 292 }
Chris@178 293 d.hasKnownExtents = false;
Chris@178 294 d.isQuantized = false;
Chris@178 295 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@178 296 d.sampleRate = m_colsPerSec;
Chris@178 297 d.hasDuration = false;
Chris@178 298 m_fcqOutputNo = list.size();
Chris@178 299 list.push_back(d);
Chris@178 300
Chris@294 301 d.identifier = "pitchactivation";
Chris@294 302 d.name = "Pitch activation distribution";
Chris@294 303 d.description = "Pitch activation distribution resulting from expectation-maximisation algorithm, prior to note extraction.";
Chris@294 304 d.unit = "";
Chris@294 305 d.hasFixedBinCount = true;
Chris@298 306 d.binCount = getPack(0).templateNoteCount;
Chris@294 307 d.binNames.clear();
Chris@294 308 if (m_cq) {
Chris@298 309 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
Chris@320 310 d.binNames.push_back(getNoteName(i, 0, 1));
Chris@294 311 }
Chris@294 312 }
Chris@294 313 d.hasKnownExtents = false;
Chris@294 314 d.isQuantized = false;
Chris@294 315 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@294 316 d.sampleRate = m_colsPerSec;
Chris@294 317 d.hasDuration = false;
Chris@294 318 m_pitchOutputNo = list.size();
Chris@294 319 list.push_back(d);
Chris@294 320
Chris@309 321 d.identifier = "chroma";
Chris@309 322 d.name = "Pitch chroma distribution";
Chris@309 323 d.description = "Pitch chroma distribution formed by wrapping the un-thresholded pitch activation distribution into a single octave of semitone bins.";
Chris@309 324 d.unit = "";
Chris@309 325 d.hasFixedBinCount = true;
Chris@309 326 d.binCount = 12;
Chris@309 327 d.binNames.clear();
Chris@309 328 if (m_cq) {
Chris@309 329 for (int i = 0; i < 12; ++i) {
Chris@320 330 d.binNames.push_back(getChromaName(i));
Chris@309 331 }
Chris@309 332 }
Chris@309 333 d.hasKnownExtents = false;
Chris@309 334 d.isQuantized = false;
Chris@309 335 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@309 336 d.sampleRate = m_colsPerSec;
Chris@309 337 d.hasDuration = false;
Chris@309 338 m_chromaOutputNo = list.size();
Chris@309 339 list.push_back(d);
Chris@309 340
Chris@302 341 d.identifier = "templates";
Chris@302 342 d.name = "Templates";
Chris@302 343 d.description = "Constant-Q spectral templates for the selected instrument pack.";
Chris@302 344 d.unit = "";
Chris@302 345 d.hasFixedBinCount = true;
Chris@302 346 d.binCount = getPack(0).templateHeight;
Chris@302 347 d.binNames.clear();
Chris@302 348 if (m_cq) {
Chris@302 349 char name[50];
Chris@302 350 for (int i = 0; i < getPack(0).templateHeight; ++i) {
Chris@302 351 // We have a 600-bin (10 oct 60-bin CQ) of which the
Chris@302 352 // lowest-frequency 55 bins have been dropped, for a
Chris@302 353 // 545-bin template. The native CQ bins go high->low
Chris@302 354 // frequency though, so these are still the first 545 bins
Chris@302 355 // as reported by getBinFrequency, though in reverse order
Chris@302 356 float freq = m_cq->getBinFrequency
Chris@302 357 (getPack(0).templateHeight - i - 1);
Chris@302 358 sprintf(name, "%.1f Hz", freq);
Chris@302 359 d.binNames.push_back(name);
Chris@302 360 }
Chris@302 361 }
Chris@302 362 d.hasKnownExtents = false;
Chris@302 363 d.isQuantized = false;
Chris@302 364 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@302 365 d.sampleRate = m_colsPerSec;
Chris@302 366 d.hasDuration = false;
Chris@302 367 m_templateOutputNo = list.size();
Chris@302 368 list.push_back(d);
Chris@302 369
Chris@31 370 return list;
Chris@31 371 }
Chris@31 372
Chris@38 373 std::string
Chris@320 374 Silvet::getChromaName(int pitch) const
Chris@38 375 {
Chris@38 376 static const char *names[] = {
Chris@38 377 "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"
Chris@38 378 };
Chris@38 379
Chris@309 380 return names[pitch];
Chris@309 381 }
Chris@309 382
Chris@309 383 std::string
Chris@320 384 Silvet::getNoteName(int note, int shift, int shiftCount) const
Chris@309 385 {
Chris@320 386 string n = getChromaName(note % 12);
Chris@38 387
Chris@175 388 int oct = (note + 9) / 12;
Chris@38 389
Chris@175 390 char buf[30];
Chris@175 391
Chris@175 392 float pshift = 0.f;
Chris@175 393 if (shiftCount > 1) {
Chris@320 394 // see getNoteFrequency below
Chris@175 395 pshift =
Chris@175 396 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
Chris@175 397 }
Chris@175 398
Chris@175 399 if (pshift > 0.f) {
Chris@309 400 sprintf(buf, "%s%d+%dc", n.c_str(), oct, int(round(pshift * 100)));
Chris@175 401 } else if (pshift < 0.f) {
Chris@309 402 sprintf(buf, "%s%d-%dc", n.c_str(), oct, int(round((-pshift) * 100)));
Chris@175 403 } else {
Chris@309 404 sprintf(buf, "%s%d", n.c_str(), oct);
Chris@175 405 }
Chris@38 406
Chris@38 407 return buf;
Chris@38 408 }
Chris@38 409
Chris@41 410 float
Chris@320 411 Silvet::getNoteFrequency(int note, int shift, int shiftCount) const
Chris@41 412 {
Chris@169 413 // Convert shift number to a pitch shift. The given shift number
Chris@169 414 // is an offset into the template array, which starts with some
Chris@169 415 // zeros, followed by the template, then some trailing zeros.
Chris@169 416 //
Chris@169 417 // Example: if we have templateMaxShift == 2 and thus shiftCount
Chris@169 418 // == 5, then the number will be in the range 0-4 and the template
Chris@169 419 // will have 2 zeros at either end. Thus number 2 represents the
Chris@169 420 // template "as recorded", for a pitch shift of 0; smaller indices
Chris@169 421 // represent moving the template *up* in pitch (by introducing
Chris@169 422 // zeros at the start, which is the low-frequency end), for a
Chris@169 423 // positive pitch shift; and higher values represent moving it
Chris@169 424 // down in pitch, for a negative pitch shift.
Chris@169 425
Chris@175 426 float pshift = 0.f;
Chris@175 427 if (shiftCount > 1) {
Chris@175 428 pshift =
Chris@175 429 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
Chris@175 430 }
Chris@169 431
Chris@301 432 float freq = float(27.5 * pow(2.0, (note + pshift) / 12.0));
Chris@301 433
Chris@303 434 // cerr << "note = " << note << ", shift = " << shift << ", shiftCount = "
Chris@303 435 // << shiftCount << ", obtained freq = " << freq << endl;
Chris@301 436
Chris@301 437 return freq;
Chris@41 438 }
Chris@41 439
Chris@31 440 bool
Chris@31 441 Silvet::initialise(size_t channels, size_t stepSize, size_t blockSize)
Chris@31 442 {
Chris@272 443 if (m_inputSampleRate < minInputSampleRate ||
Chris@272 444 m_inputSampleRate > maxInputSampleRate) {
Chris@272 445 cerr << "Silvet::initialise: Unsupported input sample rate "
Chris@272 446 << m_inputSampleRate << " (supported min " << minInputSampleRate
Chris@272 447 << ", max " << maxInputSampleRate << ")" << endl;
Chris@272 448 return false;
Chris@272 449 }
Chris@272 450
Chris@31 451 if (channels < getMinChannelCount() ||
Chris@272 452 channels > getMaxChannelCount()) {
Chris@272 453 cerr << "Silvet::initialise: Unsupported channel count " << channels
Chris@272 454 << " (supported min " << getMinChannelCount() << ", max "
Chris@272 455 << getMaxChannelCount() << ")" << endl;
Chris@272 456 return false;
Chris@272 457 }
Chris@31 458
Chris@31 459 if (stepSize != blockSize) {
Chris@31 460 cerr << "Silvet::initialise: Step size must be the same as block size ("
Chris@31 461 << stepSize << " != " << blockSize << ")" << endl;
Chris@31 462 return false;
Chris@31 463 }
Chris@31 464
Chris@31 465 m_blockSize = blockSize;
Chris@31 466
Chris@31 467 reset();
Chris@31 468
Chris@31 469 return true;
Chris@31 470 }
Chris@31 471
Chris@31 472 void
Chris@31 473 Silvet::reset()
Chris@31 474 {
Chris@31 475 delete m_resampler;
Chris@246 476 delete m_flattener;
Chris@31 477 delete m_cq;
Chris@31 478
Chris@31 479 if (m_inputSampleRate != processingSampleRate) {
Chris@31 480 m_resampler = new Resampler(m_inputSampleRate, processingSampleRate);
Chris@31 481 } else {
Chris@31 482 m_resampler = 0;
Chris@31 483 }
Chris@31 484
Chris@246 485 m_flattener = new FlattenDynamics(m_inputSampleRate); // before resampling
Chris@246 486 m_flattener->reset();
Chris@246 487
Chris@301 488 // this happens to be processingSampleRate / 3, and is the top
Chris@301 489 // freq used for the EM templates:
Chris@301 490 double maxFreq = 14700;
Chris@301 491
Chris@301 492 if (m_mode == LiveMode) {
Chris@301 493 // We only have 12 bpo rather than 60, so we need the top bin
Chris@301 494 // to be the middle one of the top 5, i.e. 2/5 of a semitone
Chris@301 495 // lower than 14700
Chris@301 496 maxFreq *= powf(2.0, -1.0 / 30.0);
Chris@301 497 }
Chris@301 498
Chris@173 499 double minFreq = 27.5;
Chris@173 500
Chris@297 501 if (m_mode != HighQualityMode) {
Chris@173 502 // We don't actually return any notes from the bottom octave,
Chris@327 503 // so we can just pad with zeros. In live mode the template is
Chris@327 504 // an octave shorter as well. Each octave the min frequency is
Chris@327 505 // raised by halves the processing latency.
Chris@327 506 if (m_mode == LiveMode) {
Chris@327 507 minFreq *= 4;
Chris@327 508 } else {
Chris@327 509 minFreq *= 2;
Chris@327 510 }
Chris@173 511 }
Chris@173 512
Chris@298 513 int bpo = 12 *
Chris@298 514 (m_mode == LiveMode ? binsPerSemitoneLive : binsPerSemitoneNormal);
Chris@301 515
Chris@154 516 CQParameters params(processingSampleRate,
Chris@173 517 minFreq,
Chris@303 518 maxFreq,
Chris@298 519 bpo);
Chris@154 520
Chris@325 521 params.q = 0.8;
Chris@325 522 params.atomHopFactor = (m_mode == LiveMode ? 1.0 : 0.3);
Chris@154 523 params.threshold = 0.0005;
Chris@317 524 params.decimator =
Chris@317 525 (m_mode == LiveMode ?
Chris@317 526 CQParameters::FasterDecimator : CQParameters::BetterDecimator);
Chris@172 527 params.window = CQParameters::Hann;
Chris@154 528
Chris@154 529 m_cq = new CQSpectrogram(params, CQSpectrogram::InterpolateLinear);
Chris@31 530
Chris@303 531 // cerr << "CQ bins = " << m_cq->getTotalBins() << endl;
Chris@303 532 // cerr << "CQ min freq = " << m_cq->getMinFrequency() << " (and for confirmation, freq of bin 0 = " << m_cq->getBinFrequency(0) << ")" << endl;
Chris@297 533
Chris@297 534 m_colsPerSec = (m_mode == DraftMode ? 25 : 50);
Chris@165 535
Chris@41 536 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
Chris@41 537 delete m_postFilter[i];
Chris@41 538 }
Chris@41 539 m_postFilter.clear();
Chris@303 540 int postFilterLength = 3;
Chris@298 541 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
Chris@303 542 m_postFilter.push_back(new MedianFilter<double>(postFilterLength));
Chris@41 543 }
Chris@41 544 m_pianoRoll.clear();
Chris@246 545 m_inputGains.clear();
Chris@32 546 m_columnCount = 0;
Chris@272 547 m_resampledCount = 0;
Chris@40 548 m_startTime = RealTime::zeroTime;
Chris@313 549 m_haveStartTime = false;
Chris@31 550 }
Chris@31 551
Chris@31 552 Silvet::FeatureSet
Chris@31 553 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
Chris@31 554 {
Chris@302 555 FeatureSet fs;
Chris@302 556
Chris@313 557 if (!m_haveStartTime) {
Chris@314 558
Chris@40 559 m_startTime = timestamp;
Chris@313 560 m_haveStartTime = true;
Chris@314 561
Chris@302 562 insertTemplateFeatures(fs);
Chris@40 563 }
Chris@246 564
Chris@246 565 vector<float> flattened(m_blockSize);
Chris@246 566 float gain = 1.f;
Chris@246 567 m_flattener->connectInputPort
Chris@246 568 (FlattenDynamics::AudioInputPort, inputBuffers[0]);
Chris@246 569 m_flattener->connectOutputPort
Chris@246 570 (FlattenDynamics::AudioOutputPort, &flattened[0]);
Chris@246 571 m_flattener->connectOutputPort
Chris@246 572 (FlattenDynamics::GainOutputPort, &gain);
Chris@246 573 m_flattener->process(m_blockSize);
Chris@246 574
Chris@252 575 m_inputGains[timestamp] = gain;
Chris@40 576
Chris@31 577 vector<double> data;
Chris@40 578 for (int i = 0; i < m_blockSize; ++i) {
Chris@246 579 double d = flattened[i];
Chris@235 580 data.push_back(d);
Chris@40 581 }
Chris@31 582
Chris@31 583 if (m_resampler) {
Chris@272 584
Chris@31 585 data = m_resampler->process(data.data(), data.size());
Chris@272 586
Chris@272 587 int hadCount = m_resampledCount;
Chris@272 588 m_resampledCount += data.size();
Chris@272 589
Chris@272 590 int resamplerLatency = m_resampler->getLatency();
Chris@272 591
Chris@272 592 if (hadCount < resamplerLatency) {
Chris@272 593 int stillToDrop = resamplerLatency - hadCount;
Chris@272 594 if (stillToDrop >= int(data.size())) {
Chris@302 595 return fs;
Chris@272 596 } else {
Chris@272 597 data = vector<double>(data.begin() + stillToDrop, data.end());
Chris@272 598 }
Chris@272 599 }
Chris@31 600 }
Chris@272 601
Chris@32 602 Grid cqout = m_cq->process(data);
Chris@302 603 transcribe(cqout, fs);
Chris@51 604 return fs;
Chris@34 605 }
Chris@34 606
Chris@34 607 Silvet::FeatureSet
Chris@34 608 Silvet::getRemainingFeatures()
Chris@34 609 {
Chris@145 610 Grid cqout = m_cq->getRemainingOutput();
Chris@302 611 FeatureSet fs;
Chris@302 612 if (m_columnCount == 0) {
Chris@302 613 // process() was never called, but we still want these
Chris@302 614 insertTemplateFeatures(fs);
Chris@302 615 } else {
Chris@302 616 transcribe(cqout, fs);
Chris@302 617 }
Chris@51 618 return fs;
Chris@34 619 }
Chris@34 620
Chris@302 621 void
Chris@302 622 Silvet::insertTemplateFeatures(FeatureSet &fs)
Chris@302 623 {
Chris@302 624 const InstrumentPack &pack = getPack(m_instrument);
Chris@302 625 for (int i = 0; i < int(pack.templates.size()) * pack.templateNoteCount; ++i) {
Chris@302 626 RealTime timestamp = RealTime::fromSeconds(double(i) / m_colsPerSec);
Chris@302 627 Feature f;
Chris@302 628 char buffer[50];
Chris@302 629 sprintf(buffer, "Note %d", i + 1);
Chris@302 630 f.label = buffer;
Chris@302 631 f.hasTimestamp = true;
Chris@302 632 f.timestamp = timestamp;
Chris@302 633 f.values = pack.templates[i / pack.templateNoteCount]
Chris@302 634 .data[i % pack.templateNoteCount];
Chris@302 635 fs[m_templateOutputNo].push_back(f);
Chris@302 636 }
Chris@302 637 }
Chris@302 638
Chris@302 639 void
Chris@302 640 Silvet::transcribe(const Grid &cqout, Silvet::FeatureSet &fs)
Chris@34 641 {
Chris@32 642 Grid filtered = preProcess(cqout);
Chris@31 643
Chris@302 644 if (filtered.empty()) return;
Chris@170 645
Chris@298 646 const InstrumentPack &pack(getPack(m_instrument));
Chris@104 647
Chris@325 648 int width = filtered.size();
Chris@325 649
Chris@325 650 double silenceThreshold = 0.01;
Chris@325 651
Chris@325 652 for (int i = 0; i < width; ++i) {
Chris@325 653
Chris@325 654 RealTime timestamp = getColumnTimestamp(m_pianoRoll.size() - 1 + i);
Chris@325 655 float inputGain = getInputGainAt(timestamp);
Chris@325 656
Chris@178 657 Feature f;
Chris@325 658 double rms = 0.0;
Chris@325 659
Chris@178 660 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@325 661 double v = filtered[i][j];
Chris@325 662 rms += v * v;
Chris@325 663 f.values.push_back(float(v));
Chris@178 664 }
Chris@325 665
Chris@325 666 rms = sqrt(rms / pack.templateHeight);
Chris@325 667 if (rms / inputGain < silenceThreshold) {
Chris@325 668 filtered[i].clear();
Chris@325 669 }
Chris@325 670
Chris@178 671 fs[m_fcqOutputNo].push_back(f);
Chris@178 672 }
Chris@325 673
Chris@311 674 Grid localPitches(width);
Chris@170 675
Chris@297 676 bool wantShifts = (m_mode == HighQualityMode) && m_fineTuning;
Chris@170 677 int shiftCount = 1;
Chris@170 678 if (wantShifts) {
Chris@170 679 shiftCount = pack.templateMaxShift * 2 + 1;
Chris@170 680 }
Chris@170 681
Chris@170 682 vector<vector<int> > localBestShifts;
Chris@170 683 if (wantShifts) {
Chris@311 684 localBestShifts = vector<vector<int> >(width);
Chris@170 685 }
Chris@170 686
Chris@312 687 #ifndef MAX_EM_THREADS
Chris@312 688 #define MAX_EM_THREADS 8
Chris@312 689 #endif
Chris@312 690
Chris@317 691 int emThreadCount = MAX_EM_THREADS;
Chris@317 692 if (m_mode == LiveMode && pack.templates.size() == 1) {
Chris@317 693 // The EM step is probably not slow enough to merit it
Chris@317 694 emThreadCount = 1;
Chris@317 695 }
Chris@317 696
Chris@312 697 #if (defined(MAX_EM_THREADS) && (MAX_EM_THREADS > 1))
Chris@317 698 if (emThreadCount > 1) {
Chris@317 699 for (int i = 0; i < width; ) {
Chris@317 700 typedef future<pair<vector<double>, vector<int>>> EMFuture;
Chris@317 701 vector<EMFuture> results;
Chris@317 702 for (int j = 0; j < emThreadCount && i + j < width; ++j) {
Chris@317 703 results.push_back
Chris@317 704 (async(std::launch::async,
Chris@317 705 [&](int index) {
Chris@325 706 return applyEM
Chris@325 707 (pack, filtered.at(index), wantShifts);
Chris@317 708 }, i + j));
Chris@317 709 }
Chris@317 710 for (int j = 0; j < emThreadCount && i + j < width; ++j) {
Chris@317 711 auto out = results[j].get();
Chris@317 712 localPitches[i+j] = out.first;
Chris@317 713 if (wantShifts) localBestShifts[i+j] = out.second;
Chris@317 714 }
Chris@317 715 i += emThreadCount;
Chris@312 716 }
Chris@123 717 }
Chris@312 718 #endif
Chris@317 719
Chris@317 720 if (emThreadCount == 1) {
Chris@317 721 for (int i = 0; i < width; ++i) {
Chris@317 722 auto out = applyEM(pack, filtered.at(i), wantShifts);
Chris@317 723 localPitches[i] = out.first;
Chris@317 724 if (wantShifts) localBestShifts[i] = out.second;
Chris@317 725 }
Chris@317 726 }
Chris@305 727
Chris@166 728 for (int i = 0; i < width; ++i) {
Chris@37 729
Chris@321 730 vector<double> filtered;
Chris@321 731
Chris@321 732 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@321 733 m_postFilter[j]->push(localPitches[i][j]);
Chris@321 734 filtered.push_back(m_postFilter[j]->get());
Chris@321 735 }
Chris@294 736
Chris@309 737 RealTime timestamp = getColumnTimestamp(m_pianoRoll.size() - 1);
Chris@309 738 float inputGain = getInputGainAt(timestamp);
Chris@309 739
Chris@294 740 Feature f;
Chris@294 741 for (int j = 0; j < (int)filtered.size(); ++j) {
Chris@309 742 float v = filtered[j];
Chris@294 743 if (v < pack.levelThreshold) v = 0.f;
Chris@309 744 f.values.push_back(v / inputGain);
Chris@294 745 }
Chris@294 746 fs[m_pitchOutputNo].push_back(f);
Chris@309 747
Chris@309 748 f.values.clear();
Chris@309 749 f.values.resize(12);
Chris@309 750 for (int j = 0; j < (int)filtered.size(); ++j) {
Chris@309 751 f.values[j % 12] += filtered[j] / inputGain;
Chris@309 752 }
Chris@309 753 fs[m_chromaOutputNo].push_back(f);
Chris@38 754
Chris@321 755 // This pushes the up-to-max-polyphony activation column to
Chris@321 756 // m_pianoRoll
Chris@323 757 postProcess(filtered, localBestShifts[i], wantShifts);
Chris@321 758
Chris@319 759 auto events = noteTrack(shiftCount);
Chris@319 760
Chris@319 761 FeatureList noteFeatures = events.first;
Chris@123 762 for (FeatureList::const_iterator fi = noteFeatures.begin();
Chris@123 763 fi != noteFeatures.end(); ++fi) {
Chris@123 764 fs[m_notesOutputNo].push_back(*fi);
Chris@40 765 }
Chris@319 766
Chris@319 767 FeatureList onsetFeatures = events.second;
Chris@319 768 for (FeatureList::const_iterator fi = onsetFeatures.begin();
Chris@319 769 fi != onsetFeatures.end(); ++fi) {
Chris@319 770 fs[m_onsetsOutputNo].push_back(*fi);
Chris@319 771 }
Chris@34 772 }
Chris@31 773 }
Chris@31 774
Chris@311 775 pair<vector<double>, vector<int> >
Chris@311 776 Silvet::applyEM(const InstrumentPack &pack,
Chris@311 777 const vector<double> &column,
Chris@311 778 bool wantShifts)
Chris@311 779 {
Chris@311 780 double columnThreshold = 1e-5;
Chris@311 781
Chris@314 782 if (m_mode == LiveMode) {
Chris@327 783 columnThreshold /= 20;
Chris@314 784 }
Chris@314 785
Chris@311 786 vector<double> pitches(pack.templateNoteCount, 0.0);
Chris@311 787 vector<int> bestShifts;
Chris@325 788
Chris@325 789 if (column.empty()) return { pitches, bestShifts };
Chris@311 790
Chris@311 791 double sum = 0.0;
Chris@311 792 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@311 793 sum += column.at(j);
Chris@311 794 }
Chris@311 795 if (sum < columnThreshold) return { pitches, bestShifts };
Chris@311 796
Chris@314 797 EM em(&pack, m_mode == HighQualityMode);
Chris@311 798
Chris@311 799 em.setPitchSparsity(pack.pitchSparsity);
Chris@311 800 em.setSourceSparsity(pack.sourceSparsity);
Chris@311 801
Chris@314 802 int iterations = (m_mode == HighQualityMode ? 20 : 10);
Chris@311 803
Chris@311 804 for (int j = 0; j < iterations; ++j) {
Chris@311 805 em.iterate(column.data());
Chris@311 806 }
Chris@311 807
Chris@311 808 const float *pitchDist = em.getPitchDistribution();
Chris@311 809 const float *const *shiftDist = em.getShifts();
Chris@311 810
Chris@311 811 int shiftCount = 1;
Chris@311 812 if (wantShifts) {
Chris@311 813 shiftCount = pack.templateMaxShift * 2 + 1;
Chris@311 814 }
Chris@311 815
Chris@311 816 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@311 817
Chris@311 818 pitches[j] = pitchDist[j] * sum;
Chris@311 819
Chris@311 820 int bestShift = 0;
Chris@311 821 float bestShiftValue = 0.0;
Chris@311 822 if (wantShifts) {
Chris@311 823 for (int k = 0; k < shiftCount; ++k) {
Chris@311 824 float value = shiftDist[k][j];
Chris@311 825 if (k == 0 || value > bestShiftValue) {
Chris@311 826 bestShiftValue = value;
Chris@311 827 bestShift = k;
Chris@311 828 }
Chris@311 829 }
Chris@311 830 bestShifts.push_back(bestShift);
Chris@311 831 }
Chris@311 832 }
Chris@311 833
Chris@311 834 return { pitches, bestShifts };
Chris@311 835 }
Chris@311 836
Chris@32 837 Silvet::Grid
Chris@32 838 Silvet::preProcess(const Grid &in)
Chris@32 839 {
Chris@32 840 int width = in.size();
Chris@32 841
Chris@165 842 int spacing = processingSampleRate / m_colsPerSec;
Chris@32 843
Chris@165 844 // need to be careful that col spacing is an integer number of samples!
Chris@165 845 assert(spacing * m_colsPerSec == processingSampleRate);
Chris@32 846
Chris@32 847 Grid out;
Chris@32 848
Chris@58 849 // We count the CQ latency in terms of processing hops, but
Chris@58 850 // actually it probably isn't an exact number of hops so this
Chris@58 851 // isn't quite accurate. But the small constant offset is
Chris@165 852 // practically irrelevant compared to the jitter from the frame
Chris@165 853 // size we reduce to in a moment
Chris@33 854 int latentColumns = m_cq->getLatency() / m_cq->getColumnHop();
Chris@33 855
Chris@298 856 const InstrumentPack &pack(getPack(m_instrument));
Chris@176 857
Chris@32 858 for (int i = 0; i < width; ++i) {
Chris@32 859
Chris@33 860 if (m_columnCount < latentColumns) {
Chris@33 861 ++m_columnCount;
Chris@33 862 continue;
Chris@33 863 }
Chris@33 864
Chris@32 865 int prevSampleNo = (m_columnCount - 1) * m_cq->getColumnHop();
Chris@32 866 int sampleNo = m_columnCount * m_cq->getColumnHop();
Chris@32 867
Chris@32 868 bool select = (sampleNo / spacing != prevSampleNo / spacing);
Chris@32 869
Chris@32 870 if (select) {
Chris@32 871 vector<double> inCol = in[i];
Chris@176 872 vector<double> outCol(pack.templateHeight);
Chris@32 873
Chris@327 874 // In HQ mode, the CQ returns 600 bins (10 octaves at 5
Chris@327 875 // bins per semitone) and we ignore the lowest 55 of them,
Chris@327 876 // giving us 545 bins total, which matches the height of
Chris@327 877 // each of our instrument templates.
Chris@327 878 //
Chris@327 879 // In draft mode the CQ is an octave shorter, returning
Chris@327 880 // 540 bins, so we instead pad with an additional 5 zeros
Chris@327 881 // at the lowest frequencies to get the same 545 bins.
Chris@327 882 //
Chris@327 883 // In live mode the CQ is two octaves shorter and only has
Chris@327 884 // 1 bin per semitone, and the template is also an octave
Chris@327 885 // shorter. So we get 96 bins (= 8 * 12) and want 97 (=
Chris@327 886 // (545 / 5) - 12), meaning we have to pad with one extra
Chris@327 887 // bin at the lowest frequency position. Essentially this
Chris@327 888 // is the same as draft mode (pad with bins-per-semitone
Chris@327 889 // bins), just that the result is a shorter vector.
Chris@178 890 //
Chris@178 891 // We also need to reverse the column as we go, since the
Chris@178 892 // raw CQ has the high frequencies first and we need it
Chris@178 893 // the other way around.
Chris@32 894
Chris@298 895 int bps = (m_mode == LiveMode ?
Chris@298 896 binsPerSemitoneLive : binsPerSemitoneNormal);
Chris@298 897
Chris@297 898 if (m_mode == HighQualityMode) {
Chris@178 899 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@298 900 int ix = inCol.size() - j - (11 * bps);
Chris@178 901 outCol[j] = inCol[ix];
Chris@178 902 }
Chris@178 903 } else {
Chris@327 904 int pad = bps;
Chris@327 905 for (int j = 0; j < pad; ++j) {
Chris@178 906 outCol[j] = 0.0;
Chris@178 907 }
Chris@327 908 for (int j = pad; j < pack.templateHeight; ++j) {
Chris@327 909 int ix = inCol.size() - j + (pad-1);
Chris@178 910 outCol[j] = inCol[ix];
Chris@178 911 }
Chris@46 912 }
Chris@32 913
Chris@46 914 vector<double> noiseLevel1 =
Chris@298 915 MedianFilter<double>::filter(8 * bps, outCol);
Chris@176 916 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@46 917 noiseLevel1[j] = std::min(outCol[j], noiseLevel1[j]);
Chris@46 918 }
Chris@32 919
Chris@46 920 vector<double> noiseLevel2 =
Chris@298 921 MedianFilter<double>::filter(8 * bps, noiseLevel1);
Chris@176 922 for (int j = 0; j < pack.templateHeight; ++j) {
Chris@46 923 outCol[j] = std::max(outCol[j] - noiseLevel2[j], 0.0);
Chris@32 924 }
Chris@32 925
Chris@165 926 out.push_back(outCol);
Chris@32 927 }
Chris@32 928
Chris@32 929 ++m_columnCount;
Chris@32 930 }
Chris@32 931
Chris@32 932 return out;
Chris@32 933 }
Chris@32 934
Chris@321 935 void
Chris@170 936 Silvet::postProcess(const vector<double> &pitches,
Chris@170 937 const vector<int> &bestShifts,
Chris@170 938 bool wantShifts)
Chris@166 939 {
Chris@298 940 const InstrumentPack &pack(getPack(m_instrument));
Chris@176 941
Chris@41 942 // Threshold for level and reduce number of candidate pitches
Chris@41 943
Chris@41 944 typedef std::multimap<double, int> ValueIndexMap;
Chris@41 945
Chris@41 946 ValueIndexMap strengths;
Chris@166 947
Chris@176 948 for (int j = 0; j < pack.templateNoteCount; ++j) {
Chris@321 949
Chris@321 950 double strength = pitches[j];
Chris@183 951 if (strength < pack.levelThreshold) continue;
Chris@321 952
Chris@321 953 // In live mode with only a 12-bpo CQ, we are very likely to
Chris@321 954 // get clusters of two or three high scores at a time for
Chris@321 955 // neighbouring semitones. Eliminate these by picking only the
Chris@325 956 // peaks (except that we never eliminate a note that has
Chris@325 957 // already been established as currently playing). This means
Chris@325 958 // we can't recognise actual semitone chords if they ever
Chris@325 959 // appear, but it's not as if live mode is good enough for
Chris@325 960 // that to be a big deal anyway.
Chris@321 961 if (m_mode == LiveMode) {
Chris@325 962 if (m_current.find(j) == m_current.end() &&
Chris@325 963 (j == 0 ||
Chris@325 964 j + 1 == pack.templateNoteCount ||
Chris@325 965 pitches[j] < pitches[j-1] ||
Chris@325 966 pitches[j] < pitches[j+1])) {
Chris@325 967 // not a peak or a currently-playing note: skip it
Chris@321 968 continue;
Chris@321 969 }
Chris@321 970 }
Chris@323 971
Chris@168 972 strengths.insert(ValueIndexMap::value_type(strength, j));
Chris@168 973 }
Chris@166 974
Chris@168 975 ValueIndexMap::const_iterator si = strengths.end();
Chris@167 976
Chris@168 977 map<int, double> active;
Chris@168 978 map<int, int> activeShifts;
Chris@168 979
Chris@183 980 while (int(active.size()) < pack.maxPolyphony && si != strengths.begin()) {
Chris@168 981
Chris@168 982 --si;
Chris@168 983
Chris@168 984 double strength = si->first;
Chris@168 985 int j = si->second;
Chris@168 986
Chris@168 987 active[j] = strength;
Chris@168 988
Chris@170 989 if (wantShifts) {
Chris@170 990 activeShifts[j] = bestShifts[j];
Chris@167 991 }
Chris@41 992 }
Chris@41 993
Chris@168 994 m_pianoRoll.push_back(active);
Chris@170 995
Chris@170 996 if (wantShifts) {
Chris@168 997 m_pianoRollShifts.push_back(activeShifts);
Chris@41 998 }
Chris@294 999
Chris@321 1000 return;
Chris@166 1001 }
Chris@166 1002
Chris@319 1003 pair<Vamp::Plugin::FeatureList, Vamp::Plugin::FeatureList>
Chris@168 1004 Silvet::noteTrack(int shiftCount)
Chris@166 1005 {
Chris@41 1006 // Minimum duration pruning, and conversion to notes. We can only
Chris@41 1007 // report notes that have just ended (i.e. that are absent in the
Chris@168 1008 // latest active set but present in the prior set in the piano
Chris@41 1009 // roll) -- any notes that ended earlier will have been reported
Chris@41 1010 // already, and if they haven't ended, we don't know their
Chris@41 1011 // duration.
Chris@41 1012
Chris@168 1013 int width = m_pianoRoll.size() - 1;
Chris@168 1014
Chris@168 1015 const map<int, double> &active = m_pianoRoll[width];
Chris@41 1016
Chris@165 1017 double columnDuration = 1.0 / m_colsPerSec;
Chris@165 1018
Chris@165 1019 // only keep notes >= 100ms or thereabouts
Chris@323 1020 double durationThrSec = 0.1;
Chris@323 1021 if (m_mode == LiveMode) durationThrSec = 0.07;
Chris@323 1022 int durationThreshold = floor(durationThrSec / columnDuration); // in cols
Chris@165 1023 if (durationThreshold < 1) durationThreshold = 1;
Chris@41 1024
Chris@319 1025 FeatureList noteFeatures, onsetFeatures;
Chris@41 1026
Chris@41 1027 if (width < durationThreshold + 1) {
Chris@319 1028 return { noteFeatures, onsetFeatures };
Chris@41 1029 }
Chris@41 1030
Chris@150 1031 //!!! try: repeated note detection? (look for change in first derivative of the pitch matrix)
Chris@150 1032
Chris@55 1033 for (map<int, double>::const_iterator ni = m_pianoRoll[width-1].begin();
Chris@41 1034 ni != m_pianoRoll[width-1].end(); ++ni) {
Chris@41 1035
Chris@55 1036 int note = ni->first;
Chris@41 1037
Chris@41 1038 int end = width;
Chris@41 1039 int start = end-1;
Chris@41 1040
Chris@41 1041 while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) {
Chris@41 1042 --start;
Chris@41 1043 }
Chris@41 1044 ++start;
Chris@41 1045
Chris@319 1046 int duration = end - start;
Chris@319 1047
Chris@319 1048 if (duration < durationThreshold) {
Chris@41 1049 continue;
Chris@41 1050 }
Chris@41 1051
Chris@319 1052 if (duration == durationThreshold) {
Chris@325 1053 m_current.insert(note);
Chris@319 1054 emitOnset(start, note, shiftCount, onsetFeatures);
Chris@319 1055 }
Chris@319 1056
Chris@319 1057 if (active.find(note) == active.end()) {
Chris@319 1058 // the note was playing but just ended
Chris@325 1059 m_current.erase(note);
Chris@319 1060 emitNote(start, end, note, shiftCount, noteFeatures);
Chris@319 1061 }
Chris@41 1062 }
Chris@41 1063
Chris@62 1064 // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl;
Chris@41 1065
Chris@319 1066 return { noteFeatures, onsetFeatures };
Chris@41 1067 }
Chris@41 1068
Chris@169 1069 void
Chris@169 1070 Silvet::emitNote(int start, int end, int note, int shiftCount,
Chris@169 1071 FeatureList &noteFeatures)
Chris@169 1072 {
Chris@169 1073 int partStart = start;
Chris@169 1074 int partShift = 0;
Chris@320 1075 double partStrength = 0;
Chris@169 1076
Chris@252 1077 int partThreshold = floor(0.05 * m_colsPerSec);
Chris@169 1078
Chris@169 1079 for (int i = start; i != end; ++i) {
Chris@169 1080
Chris@169 1081 double strength = m_pianoRoll[i][note];
Chris@169 1082
Chris@169 1083 int shift = 0;
Chris@169 1084
Chris@169 1085 if (shiftCount > 1) {
Chris@169 1086
Chris@169 1087 shift = m_pianoRollShifts[i][note];
Chris@169 1088
Chris@169 1089 if (i == partStart) {
Chris@169 1090 partShift = shift;
Chris@169 1091 }
Chris@169 1092
Chris@169 1093 if (i > partStart + partThreshold && shift != partShift) {
Chris@169 1094
Chris@169 1095 // cerr << "i = " << i << ", partStart = " << partStart << ", shift = " << shift << ", partShift = " << partShift << endl;
Chris@169 1096
Chris@169 1097 // pitch has changed, emit an intermediate note
Chris@252 1098 noteFeatures.push_back(makeNoteFeature(partStart,
Chris@252 1099 i,
Chris@252 1100 note,
Chris@252 1101 partShift,
Chris@252 1102 shiftCount,
Chris@320 1103 partStrength));
Chris@169 1104 partStart = i;
Chris@169 1105 partShift = shift;
Chris@320 1106 partStrength = 0;
Chris@169 1107 }
Chris@169 1108 }
Chris@169 1109
Chris@320 1110 if (strength > partStrength) {
Chris@320 1111 partStrength = strength;
Chris@169 1112 }
Chris@169 1113 }
Chris@169 1114
Chris@169 1115 if (end >= partStart + partThreshold) {
Chris@252 1116 noteFeatures.push_back(makeNoteFeature(partStart,
Chris@252 1117 end,
Chris@252 1118 note,
Chris@252 1119 partShift,
Chris@252 1120 shiftCount,
Chris@320 1121 partStrength));
Chris@169 1122 }
Chris@169 1123 }
Chris@252 1124
Chris@319 1125 void
Chris@319 1126 Silvet::emitOnset(int start, int note, int shiftCount,
Chris@319 1127 FeatureList &onsetFeatures)
Chris@319 1128 {
Chris@319 1129 int len = int(m_pianoRoll.size());
Chris@320 1130
Chris@320 1131 double onsetStrength = 0;
Chris@319 1132
Chris@319 1133 int shift = 0;
Chris@319 1134 if (shiftCount > 1) {
Chris@319 1135 shift = m_pianoRollShifts[start][note];
Chris@319 1136 }
Chris@319 1137
Chris@319 1138 for (int i = start; i < len; ++i) {
Chris@319 1139 double strength = m_pianoRoll[i][note];
Chris@320 1140 if (strength > onsetStrength) {
Chris@320 1141 onsetStrength = strength;
Chris@319 1142 }
Chris@319 1143 }
Chris@319 1144
Chris@319 1145 onsetFeatures.push_back(makeOnsetFeature(start,
Chris@319 1146 note,
Chris@319 1147 shift,
Chris@319 1148 shiftCount,
Chris@320 1149 onsetStrength));
Chris@319 1150 }
Chris@319 1151
Chris@309 1152 RealTime
Chris@309 1153 Silvet::getColumnTimestamp(int column)
Chris@309 1154 {
Chris@309 1155 double columnDuration = 1.0 / m_colsPerSec;
Chris@309 1156 int postFilterLatency = int(m_postFilter[0]->getSize() / 2);
Chris@309 1157
Chris@309 1158 return m_startTime + RealTime::fromSeconds
Chris@309 1159 (columnDuration * (column - postFilterLatency) + 0.02);
Chris@309 1160 }
Chris@309 1161
Chris@252 1162 Silvet::Feature
Chris@252 1163 Silvet::makeNoteFeature(int start,
Chris@252 1164 int end,
Chris@252 1165 int note,
Chris@252 1166 int shift,
Chris@252 1167 int shiftCount,
Chris@320 1168 double strength)
Chris@252 1169 {
Chris@252 1170 Feature f;
Chris@252 1171
Chris@252 1172 f.hasTimestamp = true;
Chris@309 1173 f.timestamp = getColumnTimestamp(start);
Chris@252 1174
Chris@252 1175 f.hasDuration = true;
Chris@309 1176 f.duration = getColumnTimestamp(end) - f.timestamp;
Chris@252 1177
Chris@252 1178 f.values.clear();
Chris@320 1179 f.values.push_back(getNoteFrequency(note, shift, shiftCount));
Chris@320 1180 f.values.push_back(getVelocityFor(strength, start));
Chris@252 1181
Chris@320 1182 f.label = getNoteName(note, shift, shiftCount);
Chris@252 1183
Chris@252 1184 return f;
Chris@252 1185 }
Chris@252 1186
Chris@319 1187 Silvet::Feature
Chris@319 1188 Silvet::makeOnsetFeature(int start,
Chris@319 1189 int note,
Chris@319 1190 int shift,
Chris@319 1191 int shiftCount,
Chris@320 1192 double strength)
Chris@319 1193 {
Chris@319 1194 Feature f;
Chris@319 1195
Chris@319 1196 f.hasTimestamp = true;
Chris@319 1197 f.timestamp = getColumnTimestamp(start);
Chris@319 1198
Chris@319 1199 f.hasDuration = false;
Chris@319 1200
Chris@319 1201 f.values.clear();
Chris@320 1202 f.values.push_back(getNoteFrequency(note, shift, shiftCount));
Chris@320 1203 f.values.push_back(getVelocityFor(strength, start));
Chris@319 1204
Chris@320 1205 f.label = getNoteName(note, shift, shiftCount);
Chris@319 1206
Chris@319 1207 return f;
Chris@319 1208 }
Chris@319 1209
Chris@320 1210 int
Chris@320 1211 Silvet::getVelocityFor(double strength, int column)
Chris@320 1212 {
Chris@320 1213 RealTime rt = getColumnTimestamp(column + 1);
Chris@320 1214
Chris@320 1215 float inputGain = getInputGainAt(rt);
Chris@320 1216
Chris@320 1217 double scale = 2.0;
Chris@320 1218 if (m_mode == LiveMode) scale = 20.0;
Chris@320 1219
Chris@320 1220 double velocity = round((strength * scale) / inputGain);
Chris@320 1221
Chris@320 1222 if (velocity > 127.0) velocity = 127.0;
Chris@320 1223 if (velocity < 1.0) velocity = 1.0; // assume surpassed 0 threshold already
Chris@320 1224
Chris@320 1225 return int(velocity);
Chris@320 1226 }
Chris@320 1227
Chris@252 1228 float
Chris@252 1229 Silvet::getInputGainAt(RealTime t)
Chris@252 1230 {
Chris@252 1231 map<RealTime, float>::const_iterator i = m_inputGains.lower_bound(t);
Chris@252 1232
Chris@252 1233 if (i == m_inputGains.end()) {
Chris@252 1234 if (i != m_inputGains.begin()) {
Chris@252 1235 --i;
Chris@252 1236 } else {
Chris@252 1237 return 1.f; // no data
Chris@252 1238 }
Chris@252 1239 }
Chris@252 1240
Chris@252 1241 // cerr << "gain at time " << t << " = " << i->second << endl;
Chris@252 1242
Chris@252 1243 return i->second;
Chris@252 1244 }
Chris@252 1245