annotate PYinVamp.cpp @ 131:b877df85ad9e fixedlag

mono pitch works now with the refactored HMM implementation
author Matthias Mauch <mail@matthiasmauch.net>
date Fri, 03 Jul 2015 14:09:05 +0100
parents 080fe18f5ebf
children 926c292fa3ff
rev   line source
matthiasm@0 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@9 2
matthiasm@0 3 /*
Chris@9 4 pYIN - A fundamental frequency estimator for monophonic audio
Chris@9 5 Centre for Digital Music, Queen Mary, University of London.
Chris@9 6
Chris@9 7 This program is free software; you can redistribute it and/or
Chris@9 8 modify it under the terms of the GNU General Public License as
Chris@9 9 published by the Free Software Foundation; either version 2 of the
Chris@9 10 License, or (at your option) any later version. See the file
Chris@9 11 COPYING included with this distribution for more information.
matthiasm@0 12 */
matthiasm@0 13
matthiasm@36 14 #include "PYinVamp.h"
matthiasm@0 15 #include "MonoNote.h"
matthiasm@0 16 #include "MonoPitch.h"
mail@131 17 #include "MonoPitchHMM.h"
matthiasm@0 18
matthiasm@0 19 #include "vamp-sdk/FFT.h"
matthiasm@0 20
matthiasm@0 21 #include <vector>
matthiasm@0 22 #include <algorithm>
matthiasm@0 23
matthiasm@0 24 #include <cstdio>
matthiasm@0 25 #include <cmath>
matthiasm@0 26 #include <complex>
matthiasm@0 27
matthiasm@0 28 using std::string;
matthiasm@0 29 using std::vector;
matthiasm@0 30 using Vamp::RealTime;
matthiasm@0 31
matthiasm@0 32
matthiasm@36 33 PYinVamp::PYinVamp(float inputSampleRate) :
matthiasm@0 34 Plugin(inputSampleRate),
matthiasm@0 35 m_channels(0),
matthiasm@0 36 m_stepSize(256),
matthiasm@0 37 m_blockSize(2048),
matthiasm@0 38 m_fmin(40),
matthiasm@58 39 m_fmax(1600),
matthiasm@0 40 m_yin(2048, inputSampleRate, 0.0),
matthiasm@0 41 m_oF0Candidates(0),
matthiasm@0 42 m_oF0Probs(0),
matthiasm@0 43 m_oVoicedProb(0),
matthiasm@0 44 m_oCandidateSalience(0),
matthiasm@0 45 m_oSmoothedPitchTrack(0),
matthiasm@0 46 m_oNotes(0),
matthiasm@0 47 m_threshDistr(2.0f),
mail@130 48 m_fixedLag(0.0f),
matthiasm@6 49 m_outputUnvoiced(0.0f),
matthiasm@70 50 m_preciseTime(0.0f),
matthiasm@117 51 m_lowAmp(0.1f),
matthiasm@117 52 m_onsetSensitivity(0.7f),
matthiasm@117 53 m_pruneThresh(0.1f),
mail@131 54 m_pitchHmm(),
matthiasm@0 55 m_pitchProb(0),
matthiasm@103 56 m_timestamp(0),
matthiasm@103 57 m_level(0)
matthiasm@0 58 {
matthiasm@0 59 }
matthiasm@0 60
matthiasm@36 61 PYinVamp::~PYinVamp()
matthiasm@0 62 {
matthiasm@0 63 }
matthiasm@0 64
matthiasm@0 65 string
matthiasm@36 66 PYinVamp::getIdentifier() const
matthiasm@0 67 {
matthiasm@1 68 return "pyin";
matthiasm@0 69 }
matthiasm@0 70
matthiasm@0 71 string
matthiasm@36 72 PYinVamp::getName() const
matthiasm@0 73 {
matthiasm@1 74 return "pYin";
matthiasm@0 75 }
matthiasm@0 76
matthiasm@0 77 string
matthiasm@36 78 PYinVamp::getDescription() const
matthiasm@0 79 {
matthiasm@0 80 return "Monophonic pitch and note tracking based on a probabilistic Yin extension.";
matthiasm@0 81 }
matthiasm@0 82
matthiasm@0 83 string
matthiasm@36 84 PYinVamp::getMaker() const
matthiasm@0 85 {
matthiasm@0 86 return "Matthias Mauch";
matthiasm@0 87 }
matthiasm@0 88
matthiasm@0 89 int
matthiasm@36 90 PYinVamp::getPluginVersion() const
matthiasm@0 91 {
matthiasm@0 92 // Increment this each time you release a version that behaves
matthiasm@0 93 // differently from the previous one
Chris@125 94 return 2;
matthiasm@0 95 }
matthiasm@0 96
matthiasm@0 97 string
matthiasm@36 98 PYinVamp::getCopyright() const
matthiasm@0 99 {
matthiasm@0 100 return "GPL";
matthiasm@0 101 }
matthiasm@0 102
matthiasm@36 103 PYinVamp::InputDomain
matthiasm@36 104 PYinVamp::getInputDomain() const
matthiasm@0 105 {
matthiasm@0 106 return TimeDomain;
matthiasm@0 107 }
matthiasm@0 108
matthiasm@0 109 size_t
matthiasm@36 110 PYinVamp::getPreferredBlockSize() const
matthiasm@0 111 {
matthiasm@0 112 return 2048;
matthiasm@0 113 }
matthiasm@0 114
matthiasm@0 115 size_t
matthiasm@36 116 PYinVamp::getPreferredStepSize() const
matthiasm@0 117 {
matthiasm@0 118 return 256;
matthiasm@0 119 }
matthiasm@0 120
matthiasm@0 121 size_t
matthiasm@36 122 PYinVamp::getMinChannelCount() const
matthiasm@0 123 {
matthiasm@0 124 return 1;
matthiasm@0 125 }
matthiasm@0 126
matthiasm@0 127 size_t
matthiasm@36 128 PYinVamp::getMaxChannelCount() const
matthiasm@0 129 {
matthiasm@0 130 return 1;
matthiasm@0 131 }
matthiasm@0 132
matthiasm@36 133 PYinVamp::ParameterList
matthiasm@36 134 PYinVamp::getParameterDescriptors() const
matthiasm@0 135 {
matthiasm@0 136 ParameterList list;
matthiasm@0 137
matthiasm@0 138 ParameterDescriptor d;
matthiasm@0 139
matthiasm@0 140 d.identifier = "threshdistr";
matthiasm@0 141 d.name = "Yin threshold distribution";
matthiasm@0 142 d.description = ".";
matthiasm@0 143 d.unit = "";
matthiasm@0 144 d.minValue = 0.0f;
matthiasm@0 145 d.maxValue = 7.0f;
matthiasm@0 146 d.defaultValue = 2.0f;
matthiasm@0 147 d.isQuantized = true;
matthiasm@0 148 d.quantizeStep = 1.0f;
matthiasm@0 149 d.valueNames.push_back("Uniform");
matthiasm@0 150 d.valueNames.push_back("Beta (mean 0.10)");
matthiasm@0 151 d.valueNames.push_back("Beta (mean 0.15)");
matthiasm@0 152 d.valueNames.push_back("Beta (mean 0.20)");
matthiasm@0 153 d.valueNames.push_back("Beta (mean 0.30)");
matthiasm@0 154 d.valueNames.push_back("Single Value 0.10");
matthiasm@0 155 d.valueNames.push_back("Single Value 0.15");
matthiasm@0 156 d.valueNames.push_back("Single Value 0.20");
matthiasm@0 157 list.push_back(d);
matthiasm@0 158
mail@130 159 d.valueNames.clear();
mail@130 160
mail@130 161 d.identifier = "fixedlag";
mail@130 162 d.name = "Fixed-lag smoothing";
mail@130 163 d.description = "Use fixed lag smoothing, not full Viterbi smoothing.";
mail@130 164 d.unit = "";
mail@130 165 d.minValue = 0.0f;
mail@130 166 d.maxValue = 1.0f;
mail@130 167 d.defaultValue = 0.0f;
mail@130 168 d.isQuantized = true;
mail@130 169 d.quantizeStep = 1.0f;
mail@130 170 list.push_back(d);
mail@130 171
matthiasm@0 172 d.identifier = "outputunvoiced";
matthiasm@0 173 d.valueNames.clear();
matthiasm@0 174 d.name = "Output estimates classified as unvoiced?";
matthiasm@0 175 d.description = ".";
matthiasm@0 176 d.unit = "";
matthiasm@0 177 d.minValue = 0.0f;
matthiasm@0 178 d.maxValue = 2.0f;
matthiasm@6 179 d.defaultValue = 0.0f;
matthiasm@0 180 d.isQuantized = true;
matthiasm@0 181 d.quantizeStep = 1.0f;
matthiasm@0 182 d.valueNames.push_back("No");
matthiasm@0 183 d.valueNames.push_back("Yes");
matthiasm@0 184 d.valueNames.push_back("Yes, as negative frequencies");
matthiasm@0 185 list.push_back(d);
matthiasm@0 186
matthiasm@70 187 d.identifier = "precisetime";
matthiasm@70 188 d.valueNames.clear();
matthiasm@70 189 d.name = "Use non-standard precise YIN timing (slow).";
matthiasm@70 190 d.description = ".";
matthiasm@70 191 d.unit = "";
matthiasm@70 192 d.minValue = 0.0f;
matthiasm@70 193 d.maxValue = 1.0f;
matthiasm@70 194 d.defaultValue = 0.0f;
matthiasm@70 195 d.isQuantized = true;
matthiasm@70 196 d.quantizeStep = 1.0f;
matthiasm@70 197 list.push_back(d);
matthiasm@70 198
matthiasm@72 199 d.identifier = "lowampsuppression";
matthiasm@72 200 d.valueNames.clear();
matthiasm@72 201 d.name = "Suppress low amplitude pitch estimates.";
matthiasm@72 202 d.description = ".";
matthiasm@72 203 d.unit = "";
matthiasm@72 204 d.minValue = 0.0f;
matthiasm@72 205 d.maxValue = 1.0f;
matthiasm@73 206 d.defaultValue = 0.1f;
matthiasm@72 207 d.isQuantized = false;
matthiasm@72 208 list.push_back(d);
matthiasm@70 209
matthiasm@107 210 d.identifier = "onsetsensitivity";
matthiasm@107 211 d.valueNames.clear();
matthiasm@107 212 d.name = "Onset sensitivity";
matthiasm@107 213 d.description = "Adds additional note onsets when RMS increases.";
matthiasm@107 214 d.unit = "";
matthiasm@107 215 d.minValue = 0.0f;
matthiasm@107 216 d.maxValue = 1.0f;
matthiasm@117 217 d.defaultValue = 0.7f;
matthiasm@108 218 d.isQuantized = false;
matthiasm@108 219 list.push_back(d);
matthiasm@108 220
matthiasm@108 221 d.identifier = "prunethresh";
matthiasm@108 222 d.valueNames.clear();
matthiasm@108 223 d.name = "Duration pruning threshold.";
matthiasm@108 224 d.description = "Prune notes that are shorter than this value.";
matthiasm@108 225 d.unit = "";
matthiasm@108 226 d.minValue = 0.0f;
matthiasm@108 227 d.maxValue = 0.2f;
matthiasm@117 228 d.defaultValue = 0.1f;
matthiasm@107 229 d.isQuantized = false;
matthiasm@107 230 list.push_back(d);
matthiasm@107 231
matthiasm@0 232 return list;
matthiasm@0 233 }
matthiasm@0 234
matthiasm@0 235 float
matthiasm@36 236 PYinVamp::getParameter(string identifier) const
matthiasm@0 237 {
matthiasm@0 238 if (identifier == "threshdistr") {
matthiasm@0 239 return m_threshDistr;
matthiasm@0 240 }
mail@130 241 if (identifier == "fixedlag") {
mail@130 242 return m_fixedLag;
mail@130 243 }
matthiasm@0 244 if (identifier == "outputunvoiced") {
matthiasm@0 245 return m_outputUnvoiced;
matthiasm@0 246 }
matthiasm@70 247 if (identifier == "precisetime") {
matthiasm@70 248 return m_preciseTime;
matthiasm@70 249 }
matthiasm@72 250 if (identifier == "lowampsuppression") {
matthiasm@72 251 return m_lowAmp;
matthiasm@72 252 }
matthiasm@107 253 if (identifier == "onsetsensitivity") {
matthiasm@107 254 return m_onsetSensitivity;
matthiasm@107 255 }
matthiasm@108 256 if (identifier == "prunethresh") {
matthiasm@108 257 return m_pruneThresh;
matthiasm@108 258 }
matthiasm@0 259 return 0.f;
matthiasm@0 260 }
matthiasm@0 261
matthiasm@0 262 void
matthiasm@36 263 PYinVamp::setParameter(string identifier, float value)
matthiasm@0 264 {
matthiasm@0 265 if (identifier == "threshdistr")
matthiasm@0 266 {
matthiasm@0 267 m_threshDistr = value;
matthiasm@0 268 }
mail@130 269 if (identifier == "fixedlag")
mail@130 270 {
mail@130 271 m_fixedLag = value;
mail@130 272 }
matthiasm@0 273 if (identifier == "outputunvoiced")
matthiasm@0 274 {
matthiasm@0 275 m_outputUnvoiced = value;
matthiasm@0 276 }
matthiasm@70 277 if (identifier == "precisetime")
matthiasm@70 278 {
matthiasm@70 279 m_preciseTime = value;
matthiasm@70 280 }
matthiasm@72 281 if (identifier == "lowampsuppression")
matthiasm@72 282 {
matthiasm@72 283 m_lowAmp = value;
matthiasm@72 284 }
matthiasm@107 285 if (identifier == "onsetsensitivity")
matthiasm@107 286 {
matthiasm@107 287 m_onsetSensitivity = value;
matthiasm@107 288 }
matthiasm@108 289 if (identifier == "prunethresh")
matthiasm@108 290 {
matthiasm@108 291 m_pruneThresh = value;
matthiasm@108 292 }
matthiasm@0 293 }
matthiasm@0 294
matthiasm@36 295 PYinVamp::ProgramList
matthiasm@36 296 PYinVamp::getPrograms() const
matthiasm@0 297 {
matthiasm@0 298 ProgramList list;
matthiasm@0 299 return list;
matthiasm@0 300 }
matthiasm@0 301
matthiasm@0 302 string
matthiasm@36 303 PYinVamp::getCurrentProgram() const
matthiasm@0 304 {
matthiasm@0 305 return ""; // no programs
matthiasm@0 306 }
matthiasm@0 307
matthiasm@0 308 void
matthiasm@36 309 PYinVamp::selectProgram(string name)
matthiasm@0 310 {
matthiasm@0 311 }
matthiasm@0 312
matthiasm@36 313 PYinVamp::OutputList
matthiasm@36 314 PYinVamp::getOutputDescriptors() const
matthiasm@0 315 {
matthiasm@0 316 OutputList outputs;
matthiasm@0 317
matthiasm@0 318 OutputDescriptor d;
matthiasm@0 319
matthiasm@0 320 int outputNumber = 0;
matthiasm@0 321
matthiasm@0 322 d.identifier = "f0candidates";
matthiasm@0 323 d.name = "F0 Candidates";
matthiasm@0 324 d.description = "Estimated fundamental frequency candidates.";
matthiasm@0 325 d.unit = "Hz";
matthiasm@0 326 d.hasFixedBinCount = false;
matthiasm@0 327 // d.binCount = 1;
matthiasm@0 328 d.hasKnownExtents = true;
matthiasm@0 329 d.minValue = m_fmin;
matthiasm@0 330 d.maxValue = 500;
matthiasm@0 331 d.isQuantized = false;
matthiasm@0 332 d.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 333 d.sampleRate = (m_inputSampleRate / m_stepSize);
matthiasm@0 334 d.hasDuration = false;
matthiasm@0 335 outputs.push_back(d);
matthiasm@0 336 m_oF0Candidates = outputNumber++;
matthiasm@0 337
matthiasm@0 338 d.identifier = "f0probs";
matthiasm@0 339 d.name = "Candidate Probabilities";
matthiasm@0 340 d.description = "Probabilities of estimated fundamental frequency candidates.";
matthiasm@0 341 d.unit = "";
matthiasm@0 342 d.hasFixedBinCount = false;
matthiasm@0 343 // d.binCount = 1;
matthiasm@0 344 d.hasKnownExtents = true;
matthiasm@0 345 d.minValue = 0;
matthiasm@0 346 d.maxValue = 1;
matthiasm@0 347 d.isQuantized = false;
matthiasm@0 348 d.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 349 d.sampleRate = (m_inputSampleRate / m_stepSize);
matthiasm@0 350 d.hasDuration = false;
matthiasm@0 351 outputs.push_back(d);
matthiasm@0 352 m_oF0Probs = outputNumber++;
matthiasm@0 353
matthiasm@0 354 d.identifier = "voicedprob";
matthiasm@0 355 d.name = "Voiced Probability";
matthiasm@0 356 d.description = "Probability that the signal is voiced according to Probabilistic Yin.";
matthiasm@0 357 d.unit = "";
matthiasm@0 358 d.hasFixedBinCount = true;
matthiasm@0 359 d.binCount = 1;
matthiasm@0 360 d.hasKnownExtents = true;
matthiasm@0 361 d.minValue = 0;
matthiasm@0 362 d.maxValue = 1;
matthiasm@0 363 d.isQuantized = false;
matthiasm@0 364 d.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 365 d.sampleRate = (m_inputSampleRate / m_stepSize);
matthiasm@0 366 d.hasDuration = false;
matthiasm@0 367 outputs.push_back(d);
matthiasm@0 368 m_oVoicedProb = outputNumber++;
matthiasm@0 369
matthiasm@0 370 d.identifier = "candidatesalience";
matthiasm@0 371 d.name = "Candidate Salience";
matthiasm@0 372 d.description = "Candidate Salience";
matthiasm@0 373 d.hasFixedBinCount = true;
matthiasm@0 374 d.binCount = m_blockSize / 2;
matthiasm@0 375 d.hasKnownExtents = true;
matthiasm@0 376 d.minValue = 0;
matthiasm@0 377 d.maxValue = 1;
matthiasm@0 378 d.isQuantized = false;
matthiasm@0 379 d.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 380 d.sampleRate = (m_inputSampleRate / m_stepSize);
matthiasm@0 381 d.hasDuration = false;
matthiasm@0 382 outputs.push_back(d);
matthiasm@0 383 m_oCandidateSalience = outputNumber++;
matthiasm@0 384
matthiasm@0 385 d.identifier = "smoothedpitchtrack";
matthiasm@0 386 d.name = "Smoothed Pitch Track";
matthiasm@0 387 d.description = ".";
matthiasm@0 388 d.unit = "Hz";
matthiasm@0 389 d.hasFixedBinCount = true;
matthiasm@0 390 d.binCount = 1;
matthiasm@0 391 d.hasKnownExtents = false;
matthiasm@0 392 // d.minValue = 0;
matthiasm@0 393 // d.maxValue = 1;
matthiasm@0 394 d.isQuantized = false;
matthiasm@0 395 d.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 396 d.sampleRate = (m_inputSampleRate / m_stepSize);
matthiasm@0 397 d.hasDuration = false;
matthiasm@0 398 outputs.push_back(d);
matthiasm@0 399 m_oSmoothedPitchTrack = outputNumber++;
matthiasm@0 400
matthiasm@0 401 d.identifier = "notes";
matthiasm@0 402 d.name = "Notes";
matthiasm@0 403 d.description = "Derived fixed-pitch note frequencies";
matthiasm@0 404 // d.unit = "MIDI unit";
matthiasm@0 405 d.unit = "Hz";
matthiasm@0 406 d.hasFixedBinCount = true;
matthiasm@0 407 d.binCount = 1;
matthiasm@0 408 d.hasKnownExtents = false;
matthiasm@0 409 d.isQuantized = false;
matthiasm@0 410 d.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@0 411 d.sampleRate = (m_inputSampleRate / m_stepSize);
matthiasm@0 412 d.hasDuration = true;
matthiasm@0 413 outputs.push_back(d);
matthiasm@0 414 m_oNotes = outputNumber++;
matthiasm@0 415
matthiasm@0 416 return outputs;
matthiasm@0 417 }
matthiasm@0 418
matthiasm@0 419 bool
matthiasm@36 420 PYinVamp::initialise(size_t channels, size_t stepSize, size_t blockSize)
matthiasm@0 421 {
matthiasm@0 422 if (channels < getMinChannelCount() ||
matthiasm@0 423 channels > getMaxChannelCount()) return false;
matthiasm@0 424
Chris@9 425 /*
matthiasm@36 426 std::cerr << "PYinVamp::initialise: channels = " << channels
matthiasm@0 427 << ", stepSize = " << stepSize << ", blockSize = " << blockSize
matthiasm@0 428 << std::endl;
Chris@9 429 */
matthiasm@0 430 m_channels = channels;
matthiasm@0 431 m_stepSize = stepSize;
matthiasm@0 432 m_blockSize = blockSize;
matthiasm@0 433
matthiasm@0 434 reset();
matthiasm@0 435
matthiasm@0 436 return true;
matthiasm@0 437 }
matthiasm@0 438
matthiasm@0 439 void
matthiasm@36 440 PYinVamp::reset()
matthiasm@0 441 {
matthiasm@0 442 m_yin.setThresholdDistr(m_threshDistr);
matthiasm@0 443 m_yin.setFrameSize(m_blockSize);
matthiasm@117 444 m_yin.setFast(!m_preciseTime);
matthiasm@0 445
matthiasm@0 446 m_pitchProb.clear();
matthiasm@0 447 m_timestamp.clear();
matthiasm@103 448 m_level.clear();
Chris@9 449 /*
matthiasm@36 450 std::cerr << "PYinVamp::reset"
matthiasm@0 451 << ", blockSize = " << m_blockSize
matthiasm@0 452 << std::endl;
Chris@9 453 */
matthiasm@0 454 }
matthiasm@0 455
matthiasm@36 456 PYinVamp::FeatureSet
matthiasm@36 457 PYinVamp::process(const float *const *inputBuffers, RealTime timestamp)
matthiasm@0 458 {
matthiasm@77 459 int offset = m_preciseTime == 1.0 ? m_blockSize/2 : m_blockSize/4;
matthiasm@77 460 timestamp = timestamp + Vamp::RealTime::frame2RealTime(offset, lrintf(m_inputSampleRate));
matthiasm@77 461
matthiasm@0 462 FeatureSet fs;
matthiasm@0 463
matthiasm@46 464 float rms = 0;
matthiasm@46 465
matthiasm@0 466 double *dInputBuffers = new double[m_blockSize];
matthiasm@46 467 for (size_t i = 0; i < m_blockSize; ++i) {
matthiasm@46 468 dInputBuffers[i] = inputBuffers[0][i];
matthiasm@46 469 rms += inputBuffers[0][i] * inputBuffers[0][i];
matthiasm@46 470 }
matthiasm@46 471 rms /= m_blockSize;
matthiasm@46 472 rms = sqrt(rms);
matthiasm@116 473
matthiasm@72 474 bool isLowAmplitude = (rms < m_lowAmp);
matthiasm@0 475
matthiasm@0 476 Yin::YinOutput yo = m_yin.processProbabilisticYin(dInputBuffers);
matthiasm@27 477 delete [] dInputBuffers;
matthiasm@27 478
matthiasm@103 479 m_level.push_back(yo.rms);
matthiasm@103 480
matthiasm@27 481 vector<pair<double, double> > tempPitchProb;
matthiasm@27 482 for (size_t iCandidate = 0; iCandidate < yo.freqProb.size(); ++iCandidate)
matthiasm@27 483 {
matthiasm@27 484 double tempPitch = 12 * std::log(yo.freqProb[iCandidate].first/440)/std::log(2.) + 69;
matthiasm@50 485 if (!isLowAmplitude)
matthiasm@116 486 {
matthiasm@46 487 tempPitchProb.push_back(pair<double, double>
matthiasm@46 488 (tempPitch, yo.freqProb[iCandidate].second));
matthiasm@116 489 } else {
matthiasm@116 490 float factor = ((rms+0.01*m_lowAmp)/(1.01*m_lowAmp));
matthiasm@46 491 tempPitchProb.push_back(pair<double, double>
matthiasm@65 492 (tempPitch, yo.freqProb[iCandidate].second*factor));
matthiasm@65 493 }
matthiasm@27 494 }
mail@130 495
mail@130 496 if (m_fixedLag == 0.f)
mail@130 497 {
mail@131 498 vector<double> tempObsProb = m_pitchHmm.calculateObsProb(tempPitchProb);
mail@131 499 if (m_timestamp.empty())
mail@131 500 {
mail@131 501 m_pitchHmm.initialise(tempObsProb);
mail@131 502 } else {
mail@131 503 m_pitchHmm.process(tempObsProb);
mail@131 504 }
mail@130 505 m_pitchProb.push_back(tempPitchProb);
mail@130 506 } else {
mail@130 507 // Damn, so I need the hmm right here! Sadly it isn't defined here yet.
mail@130 508 // Perhaps I could re-design the whole shabang
mail@130 509 }
matthiasm@27 510 m_timestamp.push_back(timestamp);
matthiasm@27 511
matthiasm@27 512 // F0 CANDIDATES
matthiasm@0 513 Feature f;
matthiasm@0 514 f.hasTimestamp = true;
matthiasm@0 515 f.timestamp = timestamp;
matthiasm@0 516 for (size_t i = 0; i < yo.freqProb.size(); ++i)
matthiasm@0 517 {
matthiasm@0 518 f.values.push_back(yo.freqProb[i].first);
matthiasm@0 519 }
matthiasm@0 520 fs[m_oF0Candidates].push_back(f);
matthiasm@0 521
matthiasm@27 522 // VOICEDPROB
matthiasm@0 523 f.values.clear();
matthiasm@0 524 float voicedProb = 0;
matthiasm@0 525 for (size_t i = 0; i < yo.freqProb.size(); ++i)
matthiasm@0 526 {
matthiasm@0 527 f.values.push_back(yo.freqProb[i].second);
matthiasm@0 528 voicedProb += yo.freqProb[i].second;
matthiasm@0 529 }
matthiasm@0 530 fs[m_oF0Probs].push_back(f);
matthiasm@0 531
mail@128 532 f.values.clear();
matthiasm@0 533 f.values.push_back(voicedProb);
matthiasm@0 534 fs[m_oVoicedProb].push_back(f);
matthiasm@0 535
matthiasm@27 536 // SALIENCE -- maybe this should eventually disappear
matthiasm@0 537 f.values.clear();
matthiasm@0 538 float salienceSum = 0;
matthiasm@0 539 for (size_t iBin = 0; iBin < yo.salience.size(); ++iBin)
matthiasm@0 540 {
matthiasm@0 541 f.values.push_back(yo.salience[iBin]);
matthiasm@0 542 salienceSum += yo.salience[iBin];
matthiasm@0 543 }
matthiasm@0 544 fs[m_oCandidateSalience].push_back(f);
matthiasm@0 545
matthiasm@0 546 return fs;
matthiasm@0 547 }
matthiasm@0 548
matthiasm@36 549 PYinVamp::FeatureSet
matthiasm@36 550 PYinVamp::getRemainingFeatures()
matthiasm@0 551 {
matthiasm@0 552 FeatureSet fs;
matthiasm@0 553 Feature f;
matthiasm@0 554 f.hasTimestamp = true;
matthiasm@0 555 f.hasDuration = false;
matthiasm@0 556
Chris@4 557 if (m_pitchProb.empty()) {
Chris@4 558 return fs;
Chris@4 559 }
Chris@4 560
mail@131 561 // ================== P I T C H T R A C K =================================
mail@131 562
mail@131 563 vector<int> rawPitchPath = m_pitchHmm.finalise();
mail@131 564 vector<float> mpOut;
mail@131 565
mail@131 566 for (size_t iFrame = 0; iFrame < rawPitchPath.size(); ++iFrame)
matthiasm@0 567 {
mail@131 568 float freq = pitchState2Freq(rawPitchPath[iFrame], m_pitchProb[iFrame]);
mail@131 569 mpOut.push_back(freq); // for note processing below
mail@131 570
matthiasm@0 571 f.timestamp = m_timestamp[iFrame];
mail@131 572 // std::cerr << f.timestamp << std::endl;
matthiasm@0 573 f.values.clear();
mail@131 574
mail@131 575 // different output modes
mail@131 576 if (freq < 0 && (m_outputUnvoiced==0)) continue;
matthiasm@0 577 if (m_outputUnvoiced == 1)
matthiasm@0 578 {
mail@131 579 f.values.push_back(fabs(freq));
matthiasm@0 580 } else {
mail@131 581 f.values.push_back(freq);
matthiasm@0 582 }
matthiasm@0 583 fs[m_oSmoothedPitchTrack].push_back(f);
matthiasm@0 584 }
mail@131 585
mail@131 586 // for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame)
mail@131 587 // {
mail@131 588 // if (mpOut[iFrame] < 0 && (m_outputUnvoiced==0)) continue;
mail@131 589
mail@131 590 // if (m_outputUnvoiced == 1)
mail@131 591 // {
mail@131 592 // f.values.push_back(fabs(mpOut[iFrame]));
mail@131 593 // } else {
mail@131 594 // f.values.push_back(mpOut[iFrame]);
mail@131 595 // }
mail@131 596
mail@131 597 // fs[m_oSmoothedPitchTrack].push_back(f);
mail@131 598 // }
matthiasm@0 599
mail@131 600 // ======================== N O T E S ======================================
matthiasm@1 601 MonoNote mn;
matthiasm@1 602 std::vector<std::vector<std::pair<double, double> > > smoothedPitch;
matthiasm@1 603 for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame) {
matthiasm@1 604 std::vector<std::pair<double, double> > temp;
matthiasm@1 605 if (mpOut[iFrame] > 0)
matthiasm@1 606 {
mail@130 607 double tempPitch = 12 *
mail@130 608 std::log(mpOut[iFrame]/440)/std::log(2.) + 69;
matthiasm@1 609 temp.push_back(std::pair<double,double>(tempPitch, .9));
matthiasm@1 610 }
matthiasm@1 611 smoothedPitch.push_back(temp);
matthiasm@1 612 }
matthiasm@0 613 // vector<MonoNote::FrameOutput> mnOut = mn.process(m_pitchProb);
matthiasm@1 614 vector<MonoNote::FrameOutput> mnOut = mn.process(smoothedPitch);
matthiasm@1 615
matthiasm@6 616 // turning feature into a note feature
matthiasm@1 617 f.hasTimestamp = true;
matthiasm@1 618 f.hasDuration = true;
matthiasm@1 619 f.values.clear();
matthiasm@6 620
matthiasm@6 621 int onsetFrame = 0;
matthiasm@6 622 bool isVoiced = 0;
matthiasm@6 623 bool oldIsVoiced = 0;
matthiasm@6 624 size_t nFrame = m_pitchProb.size();
matthiasm@108 625
matthiasm@108 626 float minNoteFrames = (m_inputSampleRate*m_pruneThresh) / m_stepSize;
matthiasm@1 627
mail@130 628 // the body of the loop below should be in a function/method
matthiasm@6 629 std::vector<float> notePitchTrack; // collects pitches for one note at a time
matthiasm@6 630 for (size_t iFrame = 0; iFrame < nFrame; ++iFrame)
matthiasm@1 631 {
matthiasm@103 632 isVoiced = mnOut[iFrame].noteState < 3
matthiasm@103 633 && smoothedPitch[iFrame].size() > 0
matthiasm@106 634 && (iFrame >= nFrame-2
mail@130 635 || ((m_level[iFrame]/m_level[iFrame+2]) >
mail@130 636 m_onsetSensitivity));
matthiasm@6 637 if (isVoiced && iFrame != nFrame-1)
matthiasm@1 638 {
matthiasm@6 639 if (oldIsVoiced == 0) // beginning of a note
matthiasm@1 640 {
matthiasm@6 641 onsetFrame = iFrame;
matthiasm@1 642 }
matthiasm@6 643 float pitch = smoothedPitch[iFrame][0].first;
matthiasm@6 644 notePitchTrack.push_back(pitch); // add to the note's pitch track
matthiasm@6 645 } else { // not currently voiced
matthiasm@108 646 if (oldIsVoiced == 1) // end of note
matthiasm@6 647 {
matthiasm@108 648 if (notePitchTrack.size() >= minNoteFrames)
matthiasm@108 649 {
matthiasm@108 650 std::sort(notePitchTrack.begin(), notePitchTrack.end());
matthiasm@108 651 float medianPitch = notePitchTrack[notePitchTrack.size()/2];
matthiasm@108 652 float medianFreq = std::pow(2,(medianPitch - 69) / 12) * 440;
matthiasm@108 653 f.values.clear();
matthiasm@108 654 f.values.push_back(medianFreq);
matthiasm@108 655 f.timestamp = m_timestamp[onsetFrame];
matthiasm@108 656 f.duration = m_timestamp[iFrame] - m_timestamp[onsetFrame];
matthiasm@108 657 fs[m_oNotes].push_back(f);
matthiasm@108 658 }
matthiasm@108 659 notePitchTrack.clear();
matthiasm@1 660 }
matthiasm@1 661 }
matthiasm@6 662 oldIsVoiced = isVoiced;
matthiasm@1 663 }
matthiasm@0 664 return fs;
matthiasm@0 665 }
mail@131 666
mail@131 667 float
mail@131 668 PYinVamp::pitchState2Freq(int state, vector<pair<double, double> > pitchProb)
mail@131 669 {
mail@131 670 float hmmFreq = m_pitchHmm.m_freqs[state];
mail@131 671 float bestFreq = 0;
mail@131 672 float leastDist = 10000;
mail@131 673 if (hmmFreq > 0)
mail@131 674 {
mail@131 675 // This was a Yin estimate, so try to get original pitch estimate back
mail@131 676 // ... a bit hacky, since we could have direclty saved the frequency
mail@131 677 // that was assigned to the HMM bin in hmm.calculateObsProb -- but would
mail@131 678 // have had to rethink the interface of that method.
mail@131 679 for (size_t iPt = 0; iPt < pitchProb.size(); ++iPt)
mail@131 680 {
mail@131 681 float freq = 440. *
mail@131 682 std::pow(2,
mail@131 683 (pitchProb[iPt].first - 69)/12);
mail@131 684 float dist = std::abs(hmmFreq-freq);
mail@131 685 if (dist < leastDist)
mail@131 686 {
mail@131 687 leastDist = dist;
mail@131 688 bestFreq = freq;
mail@131 689 }
mail@131 690 }
mail@131 691 } else {
mail@131 692 bestFreq = hmmFreq;
mail@131 693 }
mail@131 694 return bestFreq;
mail@131 695 }