annotate PYinVamp.cpp @ 132:926c292fa3ff fixedlag

fixed lag smoothing for pitch track working
author Matthias Mauch <mail@matthiasmauch.net>
date Fri, 03 Jul 2015 17:34:38 +0100
parents b877df85ad9e
children 83978b93aac1
rev   line source
matthiasm@0 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@9 2
matthiasm@0 3 /*
Chris@9 4 pYIN - A fundamental frequency estimator for monophonic audio
Chris@9 5 Centre for Digital Music, Queen Mary, University of London.
Chris@9 6
Chris@9 7 This program is free software; you can redistribute it and/or
Chris@9 8 modify it under the terms of the GNU General Public License as
Chris@9 9 published by the Free Software Foundation; either version 2 of the
Chris@9 10 License, or (at your option) any later version. See the file
Chris@9 11 COPYING included with this distribution for more information.
matthiasm@0 12 */
matthiasm@0 13
matthiasm@36 14 #include "PYinVamp.h"
matthiasm@0 15 #include "MonoNote.h"
mail@131 16 #include "MonoPitchHMM.h"
matthiasm@0 17
matthiasm@0 18 #include "vamp-sdk/FFT.h"
matthiasm@0 19
matthiasm@0 20 #include <vector>
matthiasm@0 21 #include <algorithm>
matthiasm@0 22
matthiasm@0 23 #include <cstdio>
matthiasm@0 24 #include <cmath>
matthiasm@0 25 #include <complex>
matthiasm@0 26
matthiasm@0 27 using std::string;
matthiasm@0 28 using std::vector;
matthiasm@0 29 using Vamp::RealTime;
matthiasm@0 30
matthiasm@0 31
matthiasm@36 32 PYinVamp::PYinVamp(float inputSampleRate) :
matthiasm@0 33 Plugin(inputSampleRate),
matthiasm@0 34 m_channels(0),
matthiasm@0 35 m_stepSize(256),
matthiasm@0 36 m_blockSize(2048),
matthiasm@0 37 m_fmin(40),
matthiasm@58 38 m_fmax(1600),
matthiasm@0 39 m_yin(2048, inputSampleRate, 0.0),
matthiasm@0 40 m_oF0Candidates(0),
matthiasm@0 41 m_oF0Probs(0),
matthiasm@0 42 m_oVoicedProb(0),
matthiasm@0 43 m_oCandidateSalience(0),
matthiasm@0 44 m_oSmoothedPitchTrack(0),
matthiasm@0 45 m_oNotes(0),
matthiasm@0 46 m_threshDistr(2.0f),
mail@132 47 m_fixedLag(1.0f),
matthiasm@6 48 m_outputUnvoiced(0.0f),
matthiasm@70 49 m_preciseTime(0.0f),
matthiasm@117 50 m_lowAmp(0.1f),
matthiasm@117 51 m_onsetSensitivity(0.7f),
matthiasm@117 52 m_pruneThresh(0.1f),
mail@132 53 m_pitchHmm(0),
matthiasm@0 54 m_pitchProb(0),
matthiasm@103 55 m_timestamp(0),
matthiasm@103 56 m_level(0)
matthiasm@0 57 {
matthiasm@0 58 }
matthiasm@0 59
matthiasm@36 60 PYinVamp::~PYinVamp()
matthiasm@0 61 {
matthiasm@0 62 }
matthiasm@0 63
matthiasm@0 64 string
matthiasm@36 65 PYinVamp::getIdentifier() const
matthiasm@0 66 {
matthiasm@1 67 return "pyin";
matthiasm@0 68 }
matthiasm@0 69
matthiasm@0 70 string
matthiasm@36 71 PYinVamp::getName() const
matthiasm@0 72 {
matthiasm@1 73 return "pYin";
matthiasm@0 74 }
matthiasm@0 75
matthiasm@0 76 string
matthiasm@36 77 PYinVamp::getDescription() const
matthiasm@0 78 {
matthiasm@0 79 return "Monophonic pitch and note tracking based on a probabilistic Yin extension.";
matthiasm@0 80 }
matthiasm@0 81
matthiasm@0 82 string
matthiasm@36 83 PYinVamp::getMaker() const
matthiasm@0 84 {
matthiasm@0 85 return "Matthias Mauch";
matthiasm@0 86 }
matthiasm@0 87
matthiasm@0 88 int
matthiasm@36 89 PYinVamp::getPluginVersion() const
matthiasm@0 90 {
matthiasm@0 91 // Increment this each time you release a version that behaves
matthiasm@0 92 // differently from the previous one
Chris@125 93 return 2;
matthiasm@0 94 }
matthiasm@0 95
matthiasm@0 96 string
matthiasm@36 97 PYinVamp::getCopyright() const
matthiasm@0 98 {
matthiasm@0 99 return "GPL";
matthiasm@0 100 }
matthiasm@0 101
matthiasm@36 102 PYinVamp::InputDomain
matthiasm@36 103 PYinVamp::getInputDomain() const
matthiasm@0 104 {
matthiasm@0 105 return TimeDomain;
matthiasm@0 106 }
matthiasm@0 107
matthiasm@0 108 size_t
matthiasm@36 109 PYinVamp::getPreferredBlockSize() const
matthiasm@0 110 {
matthiasm@0 111 return 2048;
matthiasm@0 112 }
matthiasm@0 113
matthiasm@0 114 size_t
matthiasm@36 115 PYinVamp::getPreferredStepSize() const
matthiasm@0 116 {
matthiasm@0 117 return 256;
matthiasm@0 118 }
matthiasm@0 119
matthiasm@0 120 size_t
matthiasm@36 121 PYinVamp::getMinChannelCount() const
matthiasm@0 122 {
matthiasm@0 123 return 1;
matthiasm@0 124 }
matthiasm@0 125
matthiasm@0 126 size_t
matthiasm@36 127 PYinVamp::getMaxChannelCount() const
matthiasm@0 128 {
matthiasm@0 129 return 1;
matthiasm@0 130 }
matthiasm@0 131
matthiasm@36 132 PYinVamp::ParameterList
matthiasm@36 133 PYinVamp::getParameterDescriptors() const
matthiasm@0 134 {
matthiasm@0 135 ParameterList list;
matthiasm@0 136
matthiasm@0 137 ParameterDescriptor d;
matthiasm@0 138
matthiasm@0 139 d.identifier = "threshdistr";
matthiasm@0 140 d.name = "Yin threshold distribution";
matthiasm@0 141 d.description = ".";
matthiasm@0 142 d.unit = "";
matthiasm@0 143 d.minValue = 0.0f;
matthiasm@0 144 d.maxValue = 7.0f;
matthiasm@0 145 d.defaultValue = 2.0f;
matthiasm@0 146 d.isQuantized = true;
matthiasm@0 147 d.quantizeStep = 1.0f;
matthiasm@0 148 d.valueNames.push_back("Uniform");
matthiasm@0 149 d.valueNames.push_back("Beta (mean 0.10)");
matthiasm@0 150 d.valueNames.push_back("Beta (mean 0.15)");
matthiasm@0 151 d.valueNames.push_back("Beta (mean 0.20)");
matthiasm@0 152 d.valueNames.push_back("Beta (mean 0.30)");
matthiasm@0 153 d.valueNames.push_back("Single Value 0.10");
matthiasm@0 154 d.valueNames.push_back("Single Value 0.15");
matthiasm@0 155 d.valueNames.push_back("Single Value 0.20");
matthiasm@0 156 list.push_back(d);
matthiasm@0 157
mail@130 158 d.valueNames.clear();
mail@130 159
mail@130 160 d.identifier = "fixedlag";
mail@130 161 d.name = "Fixed-lag smoothing";
mail@130 162 d.description = "Use fixed lag smoothing, not full Viterbi smoothing.";
mail@130 163 d.unit = "";
mail@130 164 d.minValue = 0.0f;
mail@130 165 d.maxValue = 1.0f;
mail@130 166 d.defaultValue = 0.0f;
mail@130 167 d.isQuantized = true;
mail@130 168 d.quantizeStep = 1.0f;
mail@130 169 list.push_back(d);
mail@130 170
matthiasm@0 171 d.identifier = "outputunvoiced";
matthiasm@0 172 d.valueNames.clear();
matthiasm@0 173 d.name = "Output estimates classified as unvoiced?";
matthiasm@0 174 d.description = ".";
matthiasm@0 175 d.unit = "";
matthiasm@0 176 d.minValue = 0.0f;
matthiasm@0 177 d.maxValue = 2.0f;
matthiasm@6 178 d.defaultValue = 0.0f;
matthiasm@0 179 d.isQuantized = true;
matthiasm@0 180 d.quantizeStep = 1.0f;
matthiasm@0 181 d.valueNames.push_back("No");
matthiasm@0 182 d.valueNames.push_back("Yes");
matthiasm@0 183 d.valueNames.push_back("Yes, as negative frequencies");
matthiasm@0 184 list.push_back(d);
matthiasm@0 185
matthiasm@70 186 d.identifier = "precisetime";
matthiasm@70 187 d.valueNames.clear();
matthiasm@70 188 d.name = "Use non-standard precise YIN timing (slow).";
matthiasm@70 189 d.description = ".";
matthiasm@70 190 d.unit = "";
matthiasm@70 191 d.minValue = 0.0f;
matthiasm@70 192 d.maxValue = 1.0f;
matthiasm@70 193 d.defaultValue = 0.0f;
matthiasm@70 194 d.isQuantized = true;
matthiasm@70 195 d.quantizeStep = 1.0f;
matthiasm@70 196 list.push_back(d);
matthiasm@70 197
matthiasm@72 198 d.identifier = "lowampsuppression";
matthiasm@72 199 d.valueNames.clear();
matthiasm@72 200 d.name = "Suppress low amplitude pitch estimates.";
matthiasm@72 201 d.description = ".";
matthiasm@72 202 d.unit = "";
matthiasm@72 203 d.minValue = 0.0f;
matthiasm@72 204 d.maxValue = 1.0f;
matthiasm@73 205 d.defaultValue = 0.1f;
matthiasm@72 206 d.isQuantized = false;
matthiasm@72 207 list.push_back(d);
matthiasm@70 208
matthiasm@107 209 d.identifier = "onsetsensitivity";
matthiasm@107 210 d.valueNames.clear();
matthiasm@107 211 d.name = "Onset sensitivity";
matthiasm@107 212 d.description = "Adds additional note onsets when RMS increases.";
matthiasm@107 213 d.unit = "";
matthiasm@107 214 d.minValue = 0.0f;
matthiasm@107 215 d.maxValue = 1.0f;
matthiasm@117 216 d.defaultValue = 0.7f;
matthiasm@108 217 d.isQuantized = false;
matthiasm@108 218 list.push_back(d);
matthiasm@108 219
matthiasm@108 220 d.identifier = "prunethresh";
matthiasm@108 221 d.valueNames.clear();
matthiasm@108 222 d.name = "Duration pruning threshold.";
matthiasm@108 223 d.description = "Prune notes that are shorter than this value.";
matthiasm@108 224 d.unit = "";
matthiasm@108 225 d.minValue = 0.0f;
matthiasm@108 226 d.maxValue = 0.2f;
matthiasm@117 227 d.defaultValue = 0.1f;
matthiasm@107 228 d.isQuantized = false;
matthiasm@107 229 list.push_back(d);
matthiasm@107 230
matthiasm@0 231 return list;
matthiasm@0 232 }
matthiasm@0 233
matthiasm@0 234 float
matthiasm@36 235 PYinVamp::getParameter(string identifier) const
matthiasm@0 236 {
matthiasm@0 237 if (identifier == "threshdistr") {
matthiasm@0 238 return m_threshDistr;
matthiasm@0 239 }
mail@130 240 if (identifier == "fixedlag") {
mail@130 241 return m_fixedLag;
mail@130 242 }
matthiasm@0 243 if (identifier == "outputunvoiced") {
matthiasm@0 244 return m_outputUnvoiced;
matthiasm@0 245 }
matthiasm@70 246 if (identifier == "precisetime") {
matthiasm@70 247 return m_preciseTime;
matthiasm@70 248 }
matthiasm@72 249 if (identifier == "lowampsuppression") {
matthiasm@72 250 return m_lowAmp;
matthiasm@72 251 }
matthiasm@107 252 if (identifier == "onsetsensitivity") {
matthiasm@107 253 return m_onsetSensitivity;
matthiasm@107 254 }
matthiasm@108 255 if (identifier == "prunethresh") {
matthiasm@108 256 return m_pruneThresh;
matthiasm@108 257 }
matthiasm@0 258 return 0.f;
matthiasm@0 259 }
matthiasm@0 260
matthiasm@0 261 void
matthiasm@36 262 PYinVamp::setParameter(string identifier, float value)
matthiasm@0 263 {
matthiasm@0 264 if (identifier == "threshdistr")
matthiasm@0 265 {
matthiasm@0 266 m_threshDistr = value;
matthiasm@0 267 }
mail@130 268 if (identifier == "fixedlag")
mail@130 269 {
mail@130 270 m_fixedLag = value;
mail@130 271 }
matthiasm@0 272 if (identifier == "outputunvoiced")
matthiasm@0 273 {
matthiasm@0 274 m_outputUnvoiced = value;
matthiasm@0 275 }
matthiasm@70 276 if (identifier == "precisetime")
matthiasm@70 277 {
matthiasm@70 278 m_preciseTime = value;
matthiasm@70 279 }
matthiasm@72 280 if (identifier == "lowampsuppression")
matthiasm@72 281 {
matthiasm@72 282 m_lowAmp = value;
matthiasm@72 283 }
matthiasm@107 284 if (identifier == "onsetsensitivity")
matthiasm@107 285 {
matthiasm@107 286 m_onsetSensitivity = value;
matthiasm@107 287 }
matthiasm@108 288 if (identifier == "prunethresh")
matthiasm@108 289 {
matthiasm@108 290 m_pruneThresh = value;
matthiasm@108 291 }
matthiasm@0 292 }
matthiasm@0 293
matthiasm@36 294 PYinVamp::ProgramList
matthiasm@36 295 PYinVamp::getPrograms() const
matthiasm@0 296 {
matthiasm@0 297 ProgramList list;
matthiasm@0 298 return list;
matthiasm@0 299 }
matthiasm@0 300
matthiasm@0 301 string
matthiasm@36 302 PYinVamp::getCurrentProgram() const
matthiasm@0 303 {
matthiasm@0 304 return ""; // no programs
matthiasm@0 305 }
matthiasm@0 306
matthiasm@0 307 void
matthiasm@36 308 PYinVamp::selectProgram(string name)
matthiasm@0 309 {
matthiasm@0 310 }
matthiasm@0 311
matthiasm@36 312 PYinVamp::OutputList
matthiasm@36 313 PYinVamp::getOutputDescriptors() const
matthiasm@0 314 {
matthiasm@0 315 OutputList outputs;
matthiasm@0 316
matthiasm@0 317 OutputDescriptor d;
matthiasm@0 318
matthiasm@0 319 int outputNumber = 0;
matthiasm@0 320
matthiasm@0 321 d.identifier = "f0candidates";
matthiasm@0 322 d.name = "F0 Candidates";
matthiasm@0 323 d.description = "Estimated fundamental frequency candidates.";
matthiasm@0 324 d.unit = "Hz";
matthiasm@0 325 d.hasFixedBinCount = false;
matthiasm@0 326 // d.binCount = 1;
matthiasm@0 327 d.hasKnownExtents = true;
matthiasm@0 328 d.minValue = m_fmin;
matthiasm@0 329 d.maxValue = 500;
matthiasm@0 330 d.isQuantized = false;
matthiasm@0 331 d.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 332 d.sampleRate = (m_inputSampleRate / m_stepSize);
matthiasm@0 333 d.hasDuration = false;
matthiasm@0 334 outputs.push_back(d);
matthiasm@0 335 m_oF0Candidates = outputNumber++;
matthiasm@0 336
matthiasm@0 337 d.identifier = "f0probs";
matthiasm@0 338 d.name = "Candidate Probabilities";
matthiasm@0 339 d.description = "Probabilities of estimated fundamental frequency candidates.";
matthiasm@0 340 d.unit = "";
matthiasm@0 341 d.hasFixedBinCount = false;
matthiasm@0 342 // d.binCount = 1;
matthiasm@0 343 d.hasKnownExtents = true;
matthiasm@0 344 d.minValue = 0;
matthiasm@0 345 d.maxValue = 1;
matthiasm@0 346 d.isQuantized = false;
matthiasm@0 347 d.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 348 d.sampleRate = (m_inputSampleRate / m_stepSize);
matthiasm@0 349 d.hasDuration = false;
matthiasm@0 350 outputs.push_back(d);
matthiasm@0 351 m_oF0Probs = outputNumber++;
matthiasm@0 352
matthiasm@0 353 d.identifier = "voicedprob";
matthiasm@0 354 d.name = "Voiced Probability";
matthiasm@0 355 d.description = "Probability that the signal is voiced according to Probabilistic Yin.";
matthiasm@0 356 d.unit = "";
matthiasm@0 357 d.hasFixedBinCount = true;
matthiasm@0 358 d.binCount = 1;
matthiasm@0 359 d.hasKnownExtents = true;
matthiasm@0 360 d.minValue = 0;
matthiasm@0 361 d.maxValue = 1;
matthiasm@0 362 d.isQuantized = false;
matthiasm@0 363 d.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 364 d.sampleRate = (m_inputSampleRate / m_stepSize);
matthiasm@0 365 d.hasDuration = false;
matthiasm@0 366 outputs.push_back(d);
matthiasm@0 367 m_oVoicedProb = outputNumber++;
matthiasm@0 368
matthiasm@0 369 d.identifier = "candidatesalience";
matthiasm@0 370 d.name = "Candidate Salience";
matthiasm@0 371 d.description = "Candidate Salience";
matthiasm@0 372 d.hasFixedBinCount = true;
matthiasm@0 373 d.binCount = m_blockSize / 2;
matthiasm@0 374 d.hasKnownExtents = true;
matthiasm@0 375 d.minValue = 0;
matthiasm@0 376 d.maxValue = 1;
matthiasm@0 377 d.isQuantized = false;
matthiasm@0 378 d.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 379 d.sampleRate = (m_inputSampleRate / m_stepSize);
matthiasm@0 380 d.hasDuration = false;
matthiasm@0 381 outputs.push_back(d);
matthiasm@0 382 m_oCandidateSalience = outputNumber++;
matthiasm@0 383
matthiasm@0 384 d.identifier = "smoothedpitchtrack";
matthiasm@0 385 d.name = "Smoothed Pitch Track";
matthiasm@0 386 d.description = ".";
matthiasm@0 387 d.unit = "Hz";
matthiasm@0 388 d.hasFixedBinCount = true;
matthiasm@0 389 d.binCount = 1;
matthiasm@0 390 d.hasKnownExtents = false;
matthiasm@0 391 // d.minValue = 0;
matthiasm@0 392 // d.maxValue = 1;
matthiasm@0 393 d.isQuantized = false;
matthiasm@0 394 d.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 395 d.sampleRate = (m_inputSampleRate / m_stepSize);
matthiasm@0 396 d.hasDuration = false;
matthiasm@0 397 outputs.push_back(d);
matthiasm@0 398 m_oSmoothedPitchTrack = outputNumber++;
matthiasm@0 399
matthiasm@0 400 d.identifier = "notes";
matthiasm@0 401 d.name = "Notes";
matthiasm@0 402 d.description = "Derived fixed-pitch note frequencies";
matthiasm@0 403 // d.unit = "MIDI unit";
matthiasm@0 404 d.unit = "Hz";
matthiasm@0 405 d.hasFixedBinCount = true;
matthiasm@0 406 d.binCount = 1;
matthiasm@0 407 d.hasKnownExtents = false;
matthiasm@0 408 d.isQuantized = false;
matthiasm@0 409 d.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@0 410 d.sampleRate = (m_inputSampleRate / m_stepSize);
matthiasm@0 411 d.hasDuration = true;
matthiasm@0 412 outputs.push_back(d);
matthiasm@0 413 m_oNotes = outputNumber++;
matthiasm@0 414
matthiasm@0 415 return outputs;
matthiasm@0 416 }
matthiasm@0 417
matthiasm@0 418 bool
matthiasm@36 419 PYinVamp::initialise(size_t channels, size_t stepSize, size_t blockSize)
matthiasm@0 420 {
matthiasm@0 421 if (channels < getMinChannelCount() ||
matthiasm@0 422 channels > getMaxChannelCount()) return false;
matthiasm@0 423
Chris@9 424 /*
matthiasm@36 425 std::cerr << "PYinVamp::initialise: channels = " << channels
matthiasm@0 426 << ", stepSize = " << stepSize << ", blockSize = " << blockSize
matthiasm@0 427 << std::endl;
Chris@9 428 */
matthiasm@0 429 m_channels = channels;
matthiasm@0 430 m_stepSize = stepSize;
matthiasm@0 431 m_blockSize = blockSize;
matthiasm@0 432
matthiasm@0 433 reset();
matthiasm@0 434
matthiasm@0 435 return true;
matthiasm@0 436 }
matthiasm@0 437
matthiasm@0 438 void
matthiasm@36 439 PYinVamp::reset()
matthiasm@0 440 {
matthiasm@0 441 m_yin.setThresholdDistr(m_threshDistr);
matthiasm@0 442 m_yin.setFrameSize(m_blockSize);
matthiasm@117 443 m_yin.setFast(!m_preciseTime);
mail@132 444
mail@132 445 if (m_fixedLag == 1.f) m_pitchHmm = MonoPitchHMM(100);
mail@132 446 else m_pitchHmm = MonoPitchHMM(0);
matthiasm@0 447
matthiasm@0 448 m_pitchProb.clear();
matthiasm@0 449 m_timestamp.clear();
matthiasm@103 450 m_level.clear();
Chris@9 451 /*
matthiasm@36 452 std::cerr << "PYinVamp::reset"
matthiasm@0 453 << ", blockSize = " << m_blockSize
matthiasm@0 454 << std::endl;
Chris@9 455 */
matthiasm@0 456 }
matthiasm@0 457
matthiasm@36 458 PYinVamp::FeatureSet
matthiasm@36 459 PYinVamp::process(const float *const *inputBuffers, RealTime timestamp)
matthiasm@0 460 {
matthiasm@77 461 int offset = m_preciseTime == 1.0 ? m_blockSize/2 : m_blockSize/4;
matthiasm@77 462 timestamp = timestamp + Vamp::RealTime::frame2RealTime(offset, lrintf(m_inputSampleRate));
matthiasm@77 463
matthiasm@0 464 FeatureSet fs;
matthiasm@0 465
matthiasm@46 466 float rms = 0;
matthiasm@46 467
matthiasm@0 468 double *dInputBuffers = new double[m_blockSize];
matthiasm@46 469 for (size_t i = 0; i < m_blockSize; ++i) {
matthiasm@46 470 dInputBuffers[i] = inputBuffers[0][i];
matthiasm@46 471 rms += inputBuffers[0][i] * inputBuffers[0][i];
matthiasm@46 472 }
matthiasm@46 473 rms /= m_blockSize;
matthiasm@46 474 rms = sqrt(rms);
matthiasm@116 475
matthiasm@72 476 bool isLowAmplitude = (rms < m_lowAmp);
matthiasm@0 477
matthiasm@0 478 Yin::YinOutput yo = m_yin.processProbabilisticYin(dInputBuffers);
matthiasm@27 479 delete [] dInputBuffers;
matthiasm@27 480
matthiasm@103 481 m_level.push_back(yo.rms);
matthiasm@103 482
matthiasm@27 483 vector<pair<double, double> > tempPitchProb;
matthiasm@27 484 for (size_t iCandidate = 0; iCandidate < yo.freqProb.size(); ++iCandidate)
matthiasm@27 485 {
matthiasm@27 486 double tempPitch = 12 * std::log(yo.freqProb[iCandidate].first/440)/std::log(2.) + 69;
matthiasm@50 487 if (!isLowAmplitude)
matthiasm@116 488 {
matthiasm@46 489 tempPitchProb.push_back(pair<double, double>
matthiasm@46 490 (tempPitch, yo.freqProb[iCandidate].second));
matthiasm@116 491 } else {
matthiasm@116 492 float factor = ((rms+0.01*m_lowAmp)/(1.01*m_lowAmp));
matthiasm@46 493 tempPitchProb.push_back(pair<double, double>
matthiasm@65 494 (tempPitch, yo.freqProb[iCandidate].second*factor));
matthiasm@65 495 }
matthiasm@27 496 }
mail@130 497
mail@132 498 vector<double> tempObsProb = m_pitchHmm.calculateObsProb(tempPitchProb);
mail@132 499 if (m_timestamp.empty())
mail@130 500 {
mail@132 501 m_pitchHmm.initialise(tempObsProb);
mail@132 502 } else {
mail@132 503 m_pitchHmm.process(tempObsProb);
mail@132 504 }
mail@132 505
mail@132 506 m_pitchProb.push_back(tempPitchProb);
mail@132 507 m_timestamp.push_back(timestamp);
mail@132 508
mail@132 509 int lag = m_pitchHmm.m_fixedLag;
mail@132 510
mail@132 511 if (m_fixedLag == 1.f)
mail@132 512 {
mail@132 513 if (m_timestamp.size() == lag + 1)
mail@131 514 {
mail@132 515 m_timestamp.pop_front();
mail@132 516 m_pitchProb.pop_front();
mail@132 517
mail@132 518 Feature f;
mail@132 519 f.hasTimestamp = true;
mail@132 520 vector<int> rawPitchPath = m_pitchHmm.track();
mail@132 521 float freq = m_pitchHmm.nearestFreq(rawPitchPath[0],
mail@132 522 m_pitchProb[0]);
mail@132 523 f.timestamp = m_timestamp[0];
mail@132 524 f.values.clear();
mail@132 525
mail@132 526 // different output modes
mail@132 527 if (freq < 0 && (m_outputUnvoiced==0))
mail@132 528 {
mail@132 529
mail@132 530 } else {
mail@132 531 if (m_outputUnvoiced == 1)
mail@132 532 {
mail@132 533 f.values.push_back(fabs(freq));
mail@132 534 } else {
mail@132 535 f.values.push_back(freq);
mail@132 536 }
mail@132 537 fs[m_oSmoothedPitchTrack].push_back(f);
mail@132 538 }
mail@131 539 }
mail@130 540 }
mail@132 541
matthiasm@27 542
matthiasm@27 543 // F0 CANDIDATES
matthiasm@0 544 Feature f;
matthiasm@0 545 f.hasTimestamp = true;
matthiasm@0 546 f.timestamp = timestamp;
matthiasm@0 547 for (size_t i = 0; i < yo.freqProb.size(); ++i)
matthiasm@0 548 {
matthiasm@0 549 f.values.push_back(yo.freqProb[i].first);
matthiasm@0 550 }
matthiasm@0 551 fs[m_oF0Candidates].push_back(f);
matthiasm@0 552
matthiasm@27 553 // VOICEDPROB
matthiasm@0 554 f.values.clear();
matthiasm@0 555 float voicedProb = 0;
matthiasm@0 556 for (size_t i = 0; i < yo.freqProb.size(); ++i)
matthiasm@0 557 {
matthiasm@0 558 f.values.push_back(yo.freqProb[i].second);
matthiasm@0 559 voicedProb += yo.freqProb[i].second;
matthiasm@0 560 }
matthiasm@0 561 fs[m_oF0Probs].push_back(f);
matthiasm@0 562
mail@128 563 f.values.clear();
matthiasm@0 564 f.values.push_back(voicedProb);
matthiasm@0 565 fs[m_oVoicedProb].push_back(f);
matthiasm@0 566
matthiasm@27 567 // SALIENCE -- maybe this should eventually disappear
matthiasm@0 568 f.values.clear();
matthiasm@0 569 float salienceSum = 0;
matthiasm@0 570 for (size_t iBin = 0; iBin < yo.salience.size(); ++iBin)
matthiasm@0 571 {
matthiasm@0 572 f.values.push_back(yo.salience[iBin]);
matthiasm@0 573 salienceSum += yo.salience[iBin];
matthiasm@0 574 }
matthiasm@0 575 fs[m_oCandidateSalience].push_back(f);
matthiasm@0 576
matthiasm@0 577 return fs;
matthiasm@0 578 }
matthiasm@0 579
matthiasm@36 580 PYinVamp::FeatureSet
matthiasm@36 581 PYinVamp::getRemainingFeatures()
matthiasm@0 582 {
matthiasm@0 583 FeatureSet fs;
matthiasm@0 584 Feature f;
matthiasm@0 585 f.hasTimestamp = true;
matthiasm@0 586 f.hasDuration = false;
matthiasm@0 587
Chris@4 588 if (m_pitchProb.empty()) {
Chris@4 589 return fs;
Chris@4 590 }
Chris@4 591
mail@131 592 // ================== P I T C H T R A C K =================================
mail@131 593
mail@132 594 vector<int> rawPitchPath = m_pitchHmm.track();
mail@131 595 vector<float> mpOut;
mail@131 596
mail@131 597 for (size_t iFrame = 0; iFrame < rawPitchPath.size(); ++iFrame)
matthiasm@0 598 {
mail@132 599 float freq = m_pitchHmm.nearestFreq(rawPitchPath[iFrame],
mail@132 600 m_pitchProb[iFrame]);
mail@131 601 mpOut.push_back(freq); // for note processing below
mail@131 602
matthiasm@0 603 f.timestamp = m_timestamp[iFrame];
matthiasm@0 604 f.values.clear();
mail@131 605
mail@131 606 // different output modes
mail@131 607 if (freq < 0 && (m_outputUnvoiced==0)) continue;
matthiasm@0 608 if (m_outputUnvoiced == 1)
matthiasm@0 609 {
mail@131 610 f.values.push_back(fabs(freq));
matthiasm@0 611 } else {
mail@131 612 f.values.push_back(freq);
matthiasm@0 613 }
matthiasm@0 614 fs[m_oSmoothedPitchTrack].push_back(f);
matthiasm@0 615 }
matthiasm@0 616
mail@131 617 // ======================== N O T E S ======================================
mail@132 618 // MonoNote mn;
mail@132 619 // std::vector<std::vector<std::pair<double, double> > > smoothedPitch;
mail@132 620 // for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame) {
mail@132 621 // std::vector<std::pair<double, double> > temp;
mail@132 622 // if (mpOut[iFrame] > 0)
mail@132 623 // {
mail@132 624 // double tempPitch = 12 *
mail@132 625 // std::log(mpOut[iFrame]/440)/std::log(2.) + 69;
mail@132 626 // temp.push_back(std::pair<double,double>(tempPitch, .9));
mail@132 627 // }
mail@132 628 // smoothedPitch.push_back(temp);
mail@132 629 // }
mail@132 630 // // vector<MonoNote::FrameOutput> mnOut = mn.process(m_pitchProb);
mail@132 631 // vector<MonoNote::FrameOutput> mnOut = mn.process(smoothedPitch);
matthiasm@1 632
mail@132 633 // // turning feature into a note feature
mail@132 634 // f.hasTimestamp = true;
mail@132 635 // f.hasDuration = true;
mail@132 636 // f.values.clear();
matthiasm@6 637
mail@132 638 // int onsetFrame = 0;
mail@132 639 // bool isVoiced = 0;
mail@132 640 // bool oldIsVoiced = 0;
mail@132 641 // size_t nFrame = m_pitchProb.size();
matthiasm@108 642
mail@132 643 // float minNoteFrames = (m_inputSampleRate*m_pruneThresh) / m_stepSize;
matthiasm@1 644
mail@132 645 // // the body of the loop below should be in a function/method
mail@132 646 // std::vector<float> notePitchTrack; // collects pitches for one note at a time
mail@132 647 // for (size_t iFrame = 0; iFrame < nFrame; ++iFrame)
mail@132 648 // {
mail@132 649 // isVoiced = mnOut[iFrame].noteState < 3
mail@132 650 // && smoothedPitch[iFrame].size() > 0
mail@132 651 // && (iFrame >= nFrame-2
mail@132 652 // || ((m_level[iFrame]/m_level[iFrame+2]) >
mail@132 653 // m_onsetSensitivity));
mail@132 654 // if (isVoiced && iFrame != nFrame-1)
mail@132 655 // {
mail@132 656 // if (oldIsVoiced == 0) // beginning of a note
mail@132 657 // {
mail@132 658 // onsetFrame = iFrame;
mail@132 659 // }
mail@132 660 // float pitch = smoothedPitch[iFrame][0].first;
mail@132 661 // notePitchTrack.push_back(pitch); // add to the note's pitch track
mail@132 662 // } else { // not currently voiced
mail@132 663 // if (oldIsVoiced == 1) // end of note
mail@132 664 // {
mail@132 665 // if (notePitchTrack.size() >= minNoteFrames)
mail@132 666 // {
mail@132 667 // std::sort(notePitchTrack.begin(), notePitchTrack.end());
mail@132 668 // float medianPitch = notePitchTrack[notePitchTrack.size()/2];
mail@132 669 // float medianFreq = std::pow(2,(medianPitch - 69) / 12) * 440;
mail@132 670 // f.values.clear();
mail@132 671 // f.values.push_back(medianFreq);
mail@132 672 // f.timestamp = m_timestamp[onsetFrame];
mail@132 673 // f.duration = m_timestamp[iFrame] - m_timestamp[onsetFrame];
mail@132 674 // fs[m_oNotes].push_back(f);
mail@132 675 // }
mail@132 676 // notePitchTrack.clear();
mail@132 677 // }
mail@132 678 // }
mail@132 679 // oldIsVoiced = isVoiced;
mail@132 680 // }
matthiasm@0 681 return fs;
matthiasm@0 682 }