annotate PYinVamp.cpp @ 141:72bda34e0e64 fixedlag

Merge from branch vamp-fft-revision
author Chris Cannam
date Fri, 24 Mar 2017 14:50:44 +0000
parents 83978b93aac1 d71170f5ba76
children 218bfe953159
rev   line source
matthiasm@0 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@9 2
matthiasm@0 3 /*
Chris@9 4 pYIN - A fundamental frequency estimator for monophonic audio
Chris@9 5 Centre for Digital Music, Queen Mary, University of London.
Chris@9 6
Chris@9 7 This program is free software; you can redistribute it and/or
Chris@9 8 modify it under the terms of the GNU General Public License as
Chris@9 9 published by the Free Software Foundation; either version 2 of the
Chris@9 10 License, or (at your option) any later version. See the file
Chris@9 11 COPYING included with this distribution for more information.
matthiasm@0 12 */
matthiasm@0 13
matthiasm@36 14 #include "PYinVamp.h"
matthiasm@0 15 #include "MonoNote.h"
mail@131 16 #include "MonoPitchHMM.h"
matthiasm@0 17
matthiasm@0 18 #include <vector>
matthiasm@0 19 #include <algorithm>
matthiasm@0 20
matthiasm@0 21 #include <cstdio>
matthiasm@0 22 #include <cmath>
matthiasm@0 23 #include <complex>
matthiasm@0 24
matthiasm@0 25 using std::string;
matthiasm@0 26 using std::vector;
matthiasm@0 27 using Vamp::RealTime;
matthiasm@0 28
matthiasm@0 29
matthiasm@36 30 PYinVamp::PYinVamp(float inputSampleRate) :
matthiasm@0 31 Plugin(inputSampleRate),
matthiasm@0 32 m_channels(0),
matthiasm@0 33 m_stepSize(256),
matthiasm@0 34 m_blockSize(2048),
matthiasm@0 35 m_fmin(40),
matthiasm@58 36 m_fmax(1600),
matthiasm@0 37 m_yin(2048, inputSampleRate, 0.0),
matthiasm@0 38 m_oF0Candidates(0),
matthiasm@0 39 m_oF0Probs(0),
matthiasm@0 40 m_oVoicedProb(0),
matthiasm@0 41 m_oCandidateSalience(0),
matthiasm@0 42 m_oSmoothedPitchTrack(0),
matthiasm@0 43 m_oNotes(0),
matthiasm@0 44 m_threshDistr(2.0f),
mail@132 45 m_fixedLag(1.0f),
matthiasm@6 46 m_outputUnvoiced(0.0f),
matthiasm@70 47 m_preciseTime(0.0f),
matthiasm@117 48 m_lowAmp(0.1f),
matthiasm@117 49 m_onsetSensitivity(0.7f),
matthiasm@117 50 m_pruneThresh(0.1f),
mail@132 51 m_pitchHmm(0),
matthiasm@0 52 m_pitchProb(0),
matthiasm@103 53 m_timestamp(0),
mail@133 54 m_level(0),
mail@133 55 m_pitchTrack(0)
matthiasm@0 56 {
matthiasm@0 57 }
matthiasm@0 58
matthiasm@36 59 PYinVamp::~PYinVamp()
matthiasm@0 60 {
matthiasm@0 61 }
matthiasm@0 62
matthiasm@0 63 string
matthiasm@36 64 PYinVamp::getIdentifier() const
matthiasm@0 65 {
matthiasm@1 66 return "pyin";
matthiasm@0 67 }
matthiasm@0 68
matthiasm@0 69 string
matthiasm@36 70 PYinVamp::getName() const
matthiasm@0 71 {
matthiasm@1 72 return "pYin";
matthiasm@0 73 }
matthiasm@0 74
matthiasm@0 75 string
matthiasm@36 76 PYinVamp::getDescription() const
matthiasm@0 77 {
matthiasm@0 78 return "Monophonic pitch and note tracking based on a probabilistic Yin extension.";
matthiasm@0 79 }
matthiasm@0 80
matthiasm@0 81 string
matthiasm@36 82 PYinVamp::getMaker() const
matthiasm@0 83 {
matthiasm@0 84 return "Matthias Mauch";
matthiasm@0 85 }
matthiasm@0 86
matthiasm@0 87 int
matthiasm@36 88 PYinVamp::getPluginVersion() const
matthiasm@0 89 {
matthiasm@0 90 // Increment this each time you release a version that behaves
matthiasm@0 91 // differently from the previous one
Chris@125 92 return 2;
matthiasm@0 93 }
matthiasm@0 94
matthiasm@0 95 string
matthiasm@36 96 PYinVamp::getCopyright() const
matthiasm@0 97 {
matthiasm@0 98 return "GPL";
matthiasm@0 99 }
matthiasm@0 100
matthiasm@36 101 PYinVamp::InputDomain
matthiasm@36 102 PYinVamp::getInputDomain() const
matthiasm@0 103 {
matthiasm@0 104 return TimeDomain;
matthiasm@0 105 }
matthiasm@0 106
matthiasm@0 107 size_t
matthiasm@36 108 PYinVamp::getPreferredBlockSize() const
matthiasm@0 109 {
matthiasm@0 110 return 2048;
matthiasm@0 111 }
matthiasm@0 112
matthiasm@0 113 size_t
matthiasm@36 114 PYinVamp::getPreferredStepSize() const
matthiasm@0 115 {
matthiasm@0 116 return 256;
matthiasm@0 117 }
matthiasm@0 118
matthiasm@0 119 size_t
matthiasm@36 120 PYinVamp::getMinChannelCount() const
matthiasm@0 121 {
matthiasm@0 122 return 1;
matthiasm@0 123 }
matthiasm@0 124
matthiasm@0 125 size_t
matthiasm@36 126 PYinVamp::getMaxChannelCount() const
matthiasm@0 127 {
matthiasm@0 128 return 1;
matthiasm@0 129 }
matthiasm@0 130
matthiasm@36 131 PYinVamp::ParameterList
matthiasm@36 132 PYinVamp::getParameterDescriptors() const
matthiasm@0 133 {
matthiasm@0 134 ParameterList list;
matthiasm@0 135
matthiasm@0 136 ParameterDescriptor d;
matthiasm@0 137
matthiasm@0 138 d.identifier = "threshdistr";
matthiasm@0 139 d.name = "Yin threshold distribution";
matthiasm@0 140 d.description = ".";
matthiasm@0 141 d.unit = "";
matthiasm@0 142 d.minValue = 0.0f;
matthiasm@0 143 d.maxValue = 7.0f;
matthiasm@0 144 d.defaultValue = 2.0f;
matthiasm@0 145 d.isQuantized = true;
matthiasm@0 146 d.quantizeStep = 1.0f;
matthiasm@0 147 d.valueNames.push_back("Uniform");
matthiasm@0 148 d.valueNames.push_back("Beta (mean 0.10)");
matthiasm@0 149 d.valueNames.push_back("Beta (mean 0.15)");
matthiasm@0 150 d.valueNames.push_back("Beta (mean 0.20)");
matthiasm@0 151 d.valueNames.push_back("Beta (mean 0.30)");
matthiasm@0 152 d.valueNames.push_back("Single Value 0.10");
matthiasm@0 153 d.valueNames.push_back("Single Value 0.15");
matthiasm@0 154 d.valueNames.push_back("Single Value 0.20");
matthiasm@0 155 list.push_back(d);
matthiasm@0 156
mail@130 157 d.valueNames.clear();
mail@130 158
mail@130 159 d.identifier = "fixedlag";
mail@130 160 d.name = "Fixed-lag smoothing";
mail@130 161 d.description = "Use fixed lag smoothing, not full Viterbi smoothing.";
mail@130 162 d.unit = "";
mail@130 163 d.minValue = 0.0f;
mail@130 164 d.maxValue = 1.0f;
mail@130 165 d.defaultValue = 0.0f;
mail@130 166 d.isQuantized = true;
mail@130 167 d.quantizeStep = 1.0f;
mail@130 168 list.push_back(d);
mail@130 169
matthiasm@0 170 d.identifier = "outputunvoiced";
matthiasm@0 171 d.valueNames.clear();
matthiasm@0 172 d.name = "Output estimates classified as unvoiced?";
matthiasm@0 173 d.description = ".";
matthiasm@0 174 d.unit = "";
matthiasm@0 175 d.minValue = 0.0f;
matthiasm@0 176 d.maxValue = 2.0f;
matthiasm@6 177 d.defaultValue = 0.0f;
matthiasm@0 178 d.isQuantized = true;
matthiasm@0 179 d.quantizeStep = 1.0f;
matthiasm@0 180 d.valueNames.push_back("No");
matthiasm@0 181 d.valueNames.push_back("Yes");
matthiasm@0 182 d.valueNames.push_back("Yes, as negative frequencies");
matthiasm@0 183 list.push_back(d);
matthiasm@0 184
matthiasm@70 185 d.identifier = "precisetime";
matthiasm@70 186 d.valueNames.clear();
matthiasm@70 187 d.name = "Use non-standard precise YIN timing (slow).";
matthiasm@70 188 d.description = ".";
matthiasm@70 189 d.unit = "";
matthiasm@70 190 d.minValue = 0.0f;
matthiasm@70 191 d.maxValue = 1.0f;
matthiasm@70 192 d.defaultValue = 0.0f;
matthiasm@70 193 d.isQuantized = true;
matthiasm@70 194 d.quantizeStep = 1.0f;
matthiasm@70 195 list.push_back(d);
matthiasm@70 196
matthiasm@72 197 d.identifier = "lowampsuppression";
matthiasm@72 198 d.valueNames.clear();
matthiasm@72 199 d.name = "Suppress low amplitude pitch estimates.";
matthiasm@72 200 d.description = ".";
matthiasm@72 201 d.unit = "";
matthiasm@72 202 d.minValue = 0.0f;
matthiasm@72 203 d.maxValue = 1.0f;
matthiasm@73 204 d.defaultValue = 0.1f;
matthiasm@72 205 d.isQuantized = false;
matthiasm@72 206 list.push_back(d);
matthiasm@70 207
matthiasm@107 208 d.identifier = "onsetsensitivity";
matthiasm@107 209 d.valueNames.clear();
matthiasm@107 210 d.name = "Onset sensitivity";
matthiasm@107 211 d.description = "Adds additional note onsets when RMS increases.";
matthiasm@107 212 d.unit = "";
matthiasm@107 213 d.minValue = 0.0f;
matthiasm@107 214 d.maxValue = 1.0f;
matthiasm@117 215 d.defaultValue = 0.7f;
matthiasm@108 216 d.isQuantized = false;
matthiasm@108 217 list.push_back(d);
matthiasm@108 218
matthiasm@108 219 d.identifier = "prunethresh";
matthiasm@108 220 d.valueNames.clear();
matthiasm@108 221 d.name = "Duration pruning threshold.";
matthiasm@108 222 d.description = "Prune notes that are shorter than this value.";
matthiasm@108 223 d.unit = "";
matthiasm@108 224 d.minValue = 0.0f;
matthiasm@108 225 d.maxValue = 0.2f;
matthiasm@117 226 d.defaultValue = 0.1f;
matthiasm@107 227 d.isQuantized = false;
matthiasm@107 228 list.push_back(d);
matthiasm@107 229
matthiasm@0 230 return list;
matthiasm@0 231 }
matthiasm@0 232
matthiasm@0 233 float
matthiasm@36 234 PYinVamp::getParameter(string identifier) const
matthiasm@0 235 {
matthiasm@0 236 if (identifier == "threshdistr") {
matthiasm@0 237 return m_threshDistr;
matthiasm@0 238 }
mail@130 239 if (identifier == "fixedlag") {
mail@130 240 return m_fixedLag;
mail@130 241 }
matthiasm@0 242 if (identifier == "outputunvoiced") {
matthiasm@0 243 return m_outputUnvoiced;
matthiasm@0 244 }
matthiasm@70 245 if (identifier == "precisetime") {
matthiasm@70 246 return m_preciseTime;
matthiasm@70 247 }
matthiasm@72 248 if (identifier == "lowampsuppression") {
matthiasm@72 249 return m_lowAmp;
matthiasm@72 250 }
matthiasm@107 251 if (identifier == "onsetsensitivity") {
matthiasm@107 252 return m_onsetSensitivity;
matthiasm@107 253 }
matthiasm@108 254 if (identifier == "prunethresh") {
matthiasm@108 255 return m_pruneThresh;
matthiasm@108 256 }
matthiasm@0 257 return 0.f;
matthiasm@0 258 }
matthiasm@0 259
matthiasm@0 260 void
matthiasm@36 261 PYinVamp::setParameter(string identifier, float value)
matthiasm@0 262 {
matthiasm@0 263 if (identifier == "threshdistr")
matthiasm@0 264 {
matthiasm@0 265 m_threshDistr = value;
matthiasm@0 266 }
mail@130 267 if (identifier == "fixedlag")
mail@130 268 {
mail@130 269 m_fixedLag = value;
mail@130 270 }
matthiasm@0 271 if (identifier == "outputunvoiced")
matthiasm@0 272 {
matthiasm@0 273 m_outputUnvoiced = value;
matthiasm@0 274 }
matthiasm@70 275 if (identifier == "precisetime")
matthiasm@70 276 {
matthiasm@70 277 m_preciseTime = value;
matthiasm@70 278 }
matthiasm@72 279 if (identifier == "lowampsuppression")
matthiasm@72 280 {
matthiasm@72 281 m_lowAmp = value;
matthiasm@72 282 }
matthiasm@107 283 if (identifier == "onsetsensitivity")
matthiasm@107 284 {
matthiasm@107 285 m_onsetSensitivity = value;
matthiasm@107 286 }
matthiasm@108 287 if (identifier == "prunethresh")
matthiasm@108 288 {
matthiasm@108 289 m_pruneThresh = value;
matthiasm@108 290 }
matthiasm@0 291 }
matthiasm@0 292
matthiasm@36 293 PYinVamp::ProgramList
matthiasm@36 294 PYinVamp::getPrograms() const
matthiasm@0 295 {
matthiasm@0 296 ProgramList list;
matthiasm@0 297 return list;
matthiasm@0 298 }
matthiasm@0 299
matthiasm@0 300 string
matthiasm@36 301 PYinVamp::getCurrentProgram() const
matthiasm@0 302 {
matthiasm@0 303 return ""; // no programs
matthiasm@0 304 }
matthiasm@0 305
matthiasm@0 306 void
Chris@138 307 PYinVamp::selectProgram(string)
matthiasm@0 308 {
matthiasm@0 309 }
matthiasm@0 310
matthiasm@36 311 PYinVamp::OutputList
matthiasm@36 312 PYinVamp::getOutputDescriptors() const
matthiasm@0 313 {
matthiasm@0 314 OutputList outputs;
matthiasm@0 315
matthiasm@0 316 OutputDescriptor d;
matthiasm@0 317
matthiasm@0 318 int outputNumber = 0;
matthiasm@0 319
matthiasm@0 320 d.identifier = "f0candidates";
matthiasm@0 321 d.name = "F0 Candidates";
matthiasm@0 322 d.description = "Estimated fundamental frequency candidates.";
matthiasm@0 323 d.unit = "Hz";
matthiasm@0 324 d.hasFixedBinCount = false;
matthiasm@0 325 // d.binCount = 1;
matthiasm@0 326 d.hasKnownExtents = true;
matthiasm@0 327 d.minValue = m_fmin;
matthiasm@0 328 d.maxValue = 500;
matthiasm@0 329 d.isQuantized = false;
matthiasm@0 330 d.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 331 d.sampleRate = (m_inputSampleRate / m_stepSize);
matthiasm@0 332 d.hasDuration = false;
matthiasm@0 333 outputs.push_back(d);
matthiasm@0 334 m_oF0Candidates = outputNumber++;
matthiasm@0 335
matthiasm@0 336 d.identifier = "f0probs";
matthiasm@0 337 d.name = "Candidate Probabilities";
matthiasm@0 338 d.description = "Probabilities of estimated fundamental frequency candidates.";
matthiasm@0 339 d.unit = "";
matthiasm@0 340 d.hasFixedBinCount = false;
matthiasm@0 341 // d.binCount = 1;
matthiasm@0 342 d.hasKnownExtents = true;
matthiasm@0 343 d.minValue = 0;
matthiasm@0 344 d.maxValue = 1;
matthiasm@0 345 d.isQuantized = false;
matthiasm@0 346 d.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 347 d.sampleRate = (m_inputSampleRate / m_stepSize);
matthiasm@0 348 d.hasDuration = false;
matthiasm@0 349 outputs.push_back(d);
matthiasm@0 350 m_oF0Probs = outputNumber++;
matthiasm@0 351
matthiasm@0 352 d.identifier = "voicedprob";
matthiasm@0 353 d.name = "Voiced Probability";
matthiasm@0 354 d.description = "Probability that the signal is voiced according to Probabilistic Yin.";
matthiasm@0 355 d.unit = "";
matthiasm@0 356 d.hasFixedBinCount = true;
matthiasm@0 357 d.binCount = 1;
matthiasm@0 358 d.hasKnownExtents = true;
matthiasm@0 359 d.minValue = 0;
matthiasm@0 360 d.maxValue = 1;
matthiasm@0 361 d.isQuantized = false;
matthiasm@0 362 d.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 363 d.sampleRate = (m_inputSampleRate / m_stepSize);
matthiasm@0 364 d.hasDuration = false;
matthiasm@0 365 outputs.push_back(d);
matthiasm@0 366 m_oVoicedProb = outputNumber++;
matthiasm@0 367
matthiasm@0 368 d.identifier = "candidatesalience";
matthiasm@0 369 d.name = "Candidate Salience";
matthiasm@0 370 d.description = "Candidate Salience";
matthiasm@0 371 d.hasFixedBinCount = true;
matthiasm@0 372 d.binCount = m_blockSize / 2;
matthiasm@0 373 d.hasKnownExtents = true;
matthiasm@0 374 d.minValue = 0;
matthiasm@0 375 d.maxValue = 1;
matthiasm@0 376 d.isQuantized = false;
matthiasm@0 377 d.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 378 d.sampleRate = (m_inputSampleRate / m_stepSize);
matthiasm@0 379 d.hasDuration = false;
matthiasm@0 380 outputs.push_back(d);
matthiasm@0 381 m_oCandidateSalience = outputNumber++;
matthiasm@0 382
matthiasm@0 383 d.identifier = "smoothedpitchtrack";
matthiasm@0 384 d.name = "Smoothed Pitch Track";
matthiasm@0 385 d.description = ".";
matthiasm@0 386 d.unit = "Hz";
matthiasm@0 387 d.hasFixedBinCount = true;
matthiasm@0 388 d.binCount = 1;
matthiasm@0 389 d.hasKnownExtents = false;
matthiasm@0 390 // d.minValue = 0;
matthiasm@0 391 // d.maxValue = 1;
matthiasm@0 392 d.isQuantized = false;
matthiasm@0 393 d.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 394 d.sampleRate = (m_inputSampleRate / m_stepSize);
matthiasm@0 395 d.hasDuration = false;
matthiasm@0 396 outputs.push_back(d);
matthiasm@0 397 m_oSmoothedPitchTrack = outputNumber++;
matthiasm@0 398
matthiasm@0 399 d.identifier = "notes";
matthiasm@0 400 d.name = "Notes";
matthiasm@0 401 d.description = "Derived fixed-pitch note frequencies";
matthiasm@0 402 // d.unit = "MIDI unit";
matthiasm@0 403 d.unit = "Hz";
matthiasm@0 404 d.hasFixedBinCount = true;
matthiasm@0 405 d.binCount = 1;
matthiasm@0 406 d.hasKnownExtents = false;
matthiasm@0 407 d.isQuantized = false;
matthiasm@0 408 d.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@0 409 d.sampleRate = (m_inputSampleRate / m_stepSize);
matthiasm@0 410 d.hasDuration = true;
matthiasm@0 411 outputs.push_back(d);
matthiasm@0 412 m_oNotes = outputNumber++;
matthiasm@0 413
matthiasm@0 414 return outputs;
matthiasm@0 415 }
matthiasm@0 416
matthiasm@0 417 bool
matthiasm@36 418 PYinVamp::initialise(size_t channels, size_t stepSize, size_t blockSize)
matthiasm@0 419 {
matthiasm@0 420 if (channels < getMinChannelCount() ||
matthiasm@0 421 channels > getMaxChannelCount()) return false;
matthiasm@0 422
Chris@9 423 /*
matthiasm@36 424 std::cerr << "PYinVamp::initialise: channels = " << channels
matthiasm@0 425 << ", stepSize = " << stepSize << ", blockSize = " << blockSize
matthiasm@0 426 << std::endl;
Chris@9 427 */
matthiasm@0 428 m_channels = channels;
matthiasm@0 429 m_stepSize = stepSize;
matthiasm@0 430 m_blockSize = blockSize;
matthiasm@0 431
matthiasm@0 432 reset();
matthiasm@0 433
matthiasm@0 434 return true;
matthiasm@0 435 }
matthiasm@0 436
matthiasm@0 437 void
matthiasm@36 438 PYinVamp::reset()
matthiasm@0 439 {
matthiasm@0 440 m_yin.setThresholdDistr(m_threshDistr);
matthiasm@0 441 m_yin.setFrameSize(m_blockSize);
matthiasm@117 442 m_yin.setFast(!m_preciseTime);
mail@132 443
mail@132 444 if (m_fixedLag == 1.f) m_pitchHmm = MonoPitchHMM(100);
mail@132 445 else m_pitchHmm = MonoPitchHMM(0);
matthiasm@0 446
matthiasm@0 447 m_pitchProb.clear();
matthiasm@0 448 m_timestamp.clear();
matthiasm@103 449 m_level.clear();
mail@133 450 m_pitchTrack.clear();
Chris@9 451 /*
matthiasm@36 452 std::cerr << "PYinVamp::reset"
matthiasm@0 453 << ", blockSize = " << m_blockSize
matthiasm@0 454 << std::endl;
Chris@9 455 */
matthiasm@0 456 }
matthiasm@0 457
matthiasm@36 458 PYinVamp::FeatureSet
matthiasm@36 459 PYinVamp::process(const float *const *inputBuffers, RealTime timestamp)
matthiasm@0 460 {
mail@133 461 std::cerr << timestamp << std::endl;
matthiasm@77 462 int offset = m_preciseTime == 1.0 ? m_blockSize/2 : m_blockSize/4;
mail@133 463 timestamp = timestamp + Vamp::RealTime::frame2RealTime(offset,
mail@133 464 lrintf(m_inputSampleRate));
matthiasm@77 465
matthiasm@0 466 FeatureSet fs;
matthiasm@0 467
matthiasm@46 468 float rms = 0;
matthiasm@46 469
matthiasm@0 470 double *dInputBuffers = new double[m_blockSize];
matthiasm@46 471 for (size_t i = 0; i < m_blockSize; ++i) {
matthiasm@46 472 dInputBuffers[i] = inputBuffers[0][i];
matthiasm@46 473 rms += inputBuffers[0][i] * inputBuffers[0][i];
matthiasm@46 474 }
matthiasm@46 475 rms /= m_blockSize;
matthiasm@46 476 rms = sqrt(rms);
matthiasm@116 477
matthiasm@72 478 bool isLowAmplitude = (rms < m_lowAmp);
matthiasm@0 479
matthiasm@0 480 Yin::YinOutput yo = m_yin.processProbabilisticYin(dInputBuffers);
matthiasm@27 481 delete [] dInputBuffers;
matthiasm@27 482
matthiasm@103 483 m_level.push_back(yo.rms);
matthiasm@103 484
matthiasm@27 485 vector<pair<double, double> > tempPitchProb;
matthiasm@27 486 for (size_t iCandidate = 0; iCandidate < yo.freqProb.size(); ++iCandidate)
matthiasm@27 487 {
matthiasm@27 488 double tempPitch = 12 * std::log(yo.freqProb[iCandidate].first/440)/std::log(2.) + 69;
matthiasm@50 489 if (!isLowAmplitude)
matthiasm@116 490 {
matthiasm@46 491 tempPitchProb.push_back(pair<double, double>
matthiasm@46 492 (tempPitch, yo.freqProb[iCandidate].second));
matthiasm@116 493 } else {
matthiasm@116 494 float factor = ((rms+0.01*m_lowAmp)/(1.01*m_lowAmp));
matthiasm@46 495 tempPitchProb.push_back(pair<double, double>
matthiasm@65 496 (tempPitch, yo.freqProb[iCandidate].second*factor));
matthiasm@65 497 }
matthiasm@27 498 }
mail@130 499
mail@132 500 vector<double> tempObsProb = m_pitchHmm.calculateObsProb(tempPitchProb);
mail@132 501 if (m_timestamp.empty())
mail@130 502 {
mail@132 503 m_pitchHmm.initialise(tempObsProb);
mail@132 504 } else {
mail@132 505 m_pitchHmm.process(tempObsProb);
mail@132 506 }
mail@132 507
matthiasm@27 508 m_pitchProb.push_back(tempPitchProb);
matthiasm@27 509 m_timestamp.push_back(timestamp);
matthiasm@27 510
mail@132 511 int lag = m_pitchHmm.m_fixedLag;
mail@132 512
mail@133 513 if (m_fixedLag == 1.f) // do fixed-lag smoothing instead of full Viterbi
mail@132 514 {
Chris@141 515 if (int(m_timestamp.size()) == lag + 1)
mail@131 516 {
mail@132 517 m_timestamp.pop_front();
mail@132 518 m_pitchProb.pop_front();
mail@132 519
mail@132 520 Feature f;
mail@132 521 f.hasTimestamp = true;
mail@132 522 vector<int> rawPitchPath = m_pitchHmm.track();
mail@132 523 float freq = m_pitchHmm.nearestFreq(rawPitchPath[0],
mail@132 524 m_pitchProb[0]);
mail@133 525 m_pitchTrack.push_back(freq);
mail@132 526 f.timestamp = m_timestamp[0];
mail@132 527 f.values.clear();
mail@132 528
mail@132 529 // different output modes
mail@132 530 if (freq < 0 && (m_outputUnvoiced==0))
mail@132 531 {
mail@132 532
mail@132 533 } else {
mail@132 534 if (m_outputUnvoiced == 1)
mail@132 535 {
mail@132 536 f.values.push_back(fabs(freq));
mail@132 537 } else {
mail@132 538 f.values.push_back(freq);
mail@132 539 }
mail@132 540 fs[m_oSmoothedPitchTrack].push_back(f);
mail@132 541 }
mail@131 542 }
mail@130 543 }
mail@132 544
mail@128 545
matthiasm@27 546 // F0 CANDIDATES
matthiasm@0 547 Feature f;
matthiasm@0 548 f.hasTimestamp = true;
matthiasm@0 549 f.timestamp = timestamp;
matthiasm@0 550 for (size_t i = 0; i < yo.freqProb.size(); ++i)
matthiasm@0 551 {
matthiasm@0 552 f.values.push_back(yo.freqProb[i].first);
matthiasm@0 553 }
matthiasm@0 554 fs[m_oF0Candidates].push_back(f);
matthiasm@0 555
matthiasm@27 556 // VOICEDPROB
matthiasm@0 557 f.values.clear();
matthiasm@0 558 float voicedProb = 0;
matthiasm@0 559 for (size_t i = 0; i < yo.freqProb.size(); ++i)
matthiasm@0 560 {
matthiasm@0 561 f.values.push_back(yo.freqProb[i].second);
matthiasm@0 562 voicedProb += yo.freqProb[i].second;
matthiasm@0 563 }
matthiasm@0 564 fs[m_oF0Probs].push_back(f);
matthiasm@0 565
mail@128 566 f.values.clear();
matthiasm@0 567 f.values.push_back(voicedProb);
matthiasm@0 568 fs[m_oVoicedProb].push_back(f);
matthiasm@0 569
matthiasm@27 570 // SALIENCE -- maybe this should eventually disappear
matthiasm@0 571 f.values.clear();
matthiasm@0 572 float salienceSum = 0;
matthiasm@0 573 for (size_t iBin = 0; iBin < yo.salience.size(); ++iBin)
matthiasm@0 574 {
matthiasm@0 575 f.values.push_back(yo.salience[iBin]);
matthiasm@0 576 salienceSum += yo.salience[iBin];
matthiasm@0 577 }
matthiasm@0 578 fs[m_oCandidateSalience].push_back(f);
matthiasm@0 579
matthiasm@0 580 return fs;
matthiasm@0 581 }
matthiasm@0 582
matthiasm@36 583 PYinVamp::FeatureSet
matthiasm@36 584 PYinVamp::getRemainingFeatures()
matthiasm@0 585 {
matthiasm@0 586 FeatureSet fs;
matthiasm@0 587 Feature f;
matthiasm@0 588 f.hasTimestamp = true;
matthiasm@0 589 f.hasDuration = false;
matthiasm@0 590
Chris@4 591 if (m_pitchProb.empty()) {
Chris@4 592 return fs;
Chris@4 593 }
Chris@4 594
mail@131 595 // ================== P I T C H T R A C K =================================
mail@131 596
mail@132 597 vector<int> rawPitchPath = m_pitchHmm.track();
mail@131 598
mail@131 599 for (size_t iFrame = 0; iFrame < rawPitchPath.size(); ++iFrame)
matthiasm@0 600 {
mail@132 601 float freq = m_pitchHmm.nearestFreq(rawPitchPath[iFrame],
mail@132 602 m_pitchProb[iFrame]);
mail@133 603 m_pitchTrack.push_back(freq); // for note processing below
mail@133 604
matthiasm@0 605 f.timestamp = m_timestamp[iFrame];
matthiasm@0 606 f.values.clear();
mail@131 607
mail@131 608 // different output modes
mail@131 609 if (freq < 0 && (m_outputUnvoiced==0)) continue;
matthiasm@0 610 if (m_outputUnvoiced == 1)
matthiasm@0 611 {
mail@131 612 f.values.push_back(fabs(freq));
matthiasm@0 613 } else {
mail@131 614 f.values.push_back(freq);
matthiasm@0 615 }
matthiasm@0 616 fs[m_oSmoothedPitchTrack].push_back(f);
matthiasm@0 617 }
matthiasm@0 618
mail@131 619 // ======================== N O T E S ======================================
matthiasm@1 620 MonoNote mn;
matthiasm@1 621 std::vector<std::vector<std::pair<double, double> > > smoothedPitch;
mail@133 622 for (size_t iFrame = 0; iFrame < m_pitchTrack.size(); ++iFrame) {
matthiasm@1 623 std::vector<std::pair<double, double> > temp;
mail@133 624 if (m_pitchTrack[iFrame] > 0)
matthiasm@1 625 {
mail@133 626 double tempPitch = 12 *
mail@133 627 std::log(m_pitchTrack[iFrame]/440)/std::log(2.) + 69;
matthiasm@1 628 temp.push_back(std::pair<double,double>(tempPitch, .9));
mail@133 629 // std::cerr << "tempPitch: " << tempPitch << std::endl;
matthiasm@1 630 }
mail@133 631 // std::cerr << "temp size: " << temp.size() << std::endl;
matthiasm@1 632 smoothedPitch.push_back(temp);
matthiasm@1 633 }
mail@133 634
matthiasm@1 635 vector<MonoNote::FrameOutput> mnOut = mn.process(smoothedPitch);
mail@133 636 std::cerr << "mnOut size: " << mnOut.size() << std::endl;
mail@133 637 std::cerr << "m_pitchTrack size: " << m_pitchTrack.size() << std::endl;
matthiasm@1 638
matthiasm@6 639 // turning feature into a note feature
matthiasm@1 640 f.hasTimestamp = true;
matthiasm@1 641 f.hasDuration = true;
matthiasm@1 642 f.values.clear();
matthiasm@6 643
matthiasm@6 644 int onsetFrame = 0;
matthiasm@6 645 bool isVoiced = 0;
matthiasm@6 646 bool oldIsVoiced = 0;
mail@133 647 size_t nFrame = m_pitchTrack.size();
matthiasm@108 648
matthiasm@108 649 float minNoteFrames = (m_inputSampleRate*m_pruneThresh) / m_stepSize;
matthiasm@1 650
mail@133 651 // the body of the loop below should be in a function/method
mail@133 652 // but what does it actually do??
mail@133 653 // * takes the result of the note tracking HMM
mail@133 654 // * collects contiguously pitched pitches
mail@133 655 // * writes a note once it notices the voiced segment has ended
mail@133 656 // complications:
mail@133 657 // * it needs a lookahead of two frames for m_level (wtf was I thinking)
mail@133 658 // * it needs to know the timestamp (which can be guessed from the frame no)
mail@133 659 // *
mail@133 660 int offset = m_preciseTime == 1.0 ? m_blockSize/2 : m_blockSize/4;
mail@133 661 RealTime timestampOffset = Vamp::RealTime::frame2RealTime(offset,
mail@133 662 lrintf(m_inputSampleRate));
mail@133 663
mail@133 664 std::vector<float> notePitchTrack; // collects pitches for 1 note at a time
matthiasm@6 665 for (size_t iFrame = 0; iFrame < nFrame; ++iFrame)
matthiasm@1 666 {
mail@133 667 isVoiced = mnOut[iFrame].noteState < 3
mail@133 668 && smoothedPitch[iFrame].size() > 0
mail@133 669 && (iFrame >= nFrame-2
mail@133 670 || ((m_level[iFrame]/m_level[iFrame+2]) > m_onsetSensitivity));
matthiasm@6 671 if (isVoiced && iFrame != nFrame-1)
matthiasm@1 672 {
matthiasm@6 673 if (oldIsVoiced == 0) // beginning of a note
matthiasm@1 674 {
matthiasm@6 675 onsetFrame = iFrame;
matthiasm@1 676 }
matthiasm@6 677 float pitch = smoothedPitch[iFrame][0].first;
matthiasm@6 678 notePitchTrack.push_back(pitch); // add to the note's pitch track
matthiasm@6 679 } else { // not currently voiced
matthiasm@108 680 if (oldIsVoiced == 1) // end of note
matthiasm@6 681 {
matthiasm@108 682 if (notePitchTrack.size() >= minNoteFrames)
matthiasm@108 683 {
matthiasm@108 684 std::sort(notePitchTrack.begin(), notePitchTrack.end());
matthiasm@108 685 float medianPitch = notePitchTrack[notePitchTrack.size()/2];
mail@133 686 float medianFreq =
mail@133 687 std::pow(2,(medianPitch - 69) / 12) * 440;
matthiasm@108 688 f.values.clear();
matthiasm@108 689 f.values.push_back(medianFreq);
mail@133 690 RealTime start = RealTime::frame2RealTime(
mail@133 691 onsetFrame * m_stepSize, lrintf(m_inputSampleRate)) +
mail@133 692 timestampOffset;
mail@133 693 RealTime end = RealTime::frame2RealTime(
mail@133 694 iFrame * m_stepSize, lrintf(m_inputSampleRate)) +
mail@133 695 timestampOffset;
mail@133 696 f.timestamp = start;
mail@133 697 f.duration = end - start;
matthiasm@108 698 fs[m_oNotes].push_back(f);
matthiasm@108 699 }
matthiasm@108 700 notePitchTrack.clear();
matthiasm@1 701 }
matthiasm@1 702 }
matthiasm@6 703 oldIsVoiced = isVoiced;
matthiasm@1 704 }
matthiasm@0 705 return fs;
matthiasm@0 706 }