annotate PYIN.cpp @ 22:12cab92b7c26 interactive

added test track
author matthiasm
date Fri, 17 Jan 2014 12:10:33 +0000
parents 70dd2b4e776b
children a8c73e5d1f7e
rev   line source
matthiasm@0 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@9 2
matthiasm@0 3 /*
Chris@9 4 pYIN - A fundamental frequency estimator for monophonic audio
Chris@9 5 Centre for Digital Music, Queen Mary, University of London.
Chris@9 6
Chris@9 7 This program is free software; you can redistribute it and/or
Chris@9 8 modify it under the terms of the GNU General Public License as
Chris@9 9 published by the Free Software Foundation; either version 2 of the
Chris@9 10 License, or (at your option) any later version. See the file
Chris@9 11 COPYING included with this distribution for more information.
matthiasm@0 12 */
matthiasm@0 13
matthiasm@0 14 #include "PYIN.h"
matthiasm@0 15 #include "MonoNote.h"
matthiasm@0 16 #include "MonoPitch.h"
matthiasm@0 17
matthiasm@0 18 #include "vamp-sdk/FFT.h"
matthiasm@0 19
matthiasm@0 20 #include <vector>
matthiasm@0 21 #include <algorithm>
matthiasm@0 22
matthiasm@0 23 #include <cstdio>
matthiasm@0 24 #include <cmath>
matthiasm@0 25 #include <complex>
matthiasm@0 26
matthiasm@0 27 using std::string;
matthiasm@0 28 using std::vector;
matthiasm@0 29 using Vamp::RealTime;
matthiasm@0 30
matthiasm@0 31
matthiasm@0 32 PYIN::PYIN(float inputSampleRate) :
matthiasm@0 33 Plugin(inputSampleRate),
matthiasm@0 34 m_channels(0),
matthiasm@0 35 m_stepSize(256),
matthiasm@0 36 m_blockSize(2048),
matthiasm@0 37 m_fmin(40),
matthiasm@0 38 m_fmax(700),
matthiasm@0 39 m_yin(2048, inputSampleRate, 0.0),
matthiasm@0 40 m_oF0Candidates(0),
matthiasm@0 41 m_oF0Probs(0),
matthiasm@0 42 m_oVoicedProb(0),
matthiasm@0 43 m_oCandidateSalience(0),
matthiasm@0 44 m_oSmoothedPitchTrack(0),
matthiasm@0 45 m_oNotes(0),
matthiasm@0 46 m_threshDistr(2.0f),
matthiasm@6 47 m_outputUnvoiced(0.0f),
matthiasm@21 48 m_minLocalFreq(0.f),
matthiasm@21 49 m_maxLocalFreq(5000.f),
matthiasm@21 50 m_leftBoundary(0.f),
matthiasm@21 51 m_rightBoundary(5000.f), // hack
matthiasm@0 52 m_pitchProb(0),
matthiasm@21 53 m_timestamp(0),
matthiasm@21 54 m_currentProgram("")
matthiasm@0 55 {
matthiasm@0 56 }
matthiasm@0 57
matthiasm@0 58 PYIN::~PYIN()
matthiasm@0 59 {
matthiasm@0 60 }
matthiasm@0 61
matthiasm@0 62 string
matthiasm@0 63 PYIN::getIdentifier() const
matthiasm@0 64 {
matthiasm@1 65 return "pyin";
matthiasm@0 66 }
matthiasm@0 67
matthiasm@0 68 string
matthiasm@0 69 PYIN::getName() const
matthiasm@0 70 {
matthiasm@1 71 return "pYin";
matthiasm@0 72 }
matthiasm@0 73
matthiasm@0 74 string
matthiasm@0 75 PYIN::getDescription() const
matthiasm@0 76 {
matthiasm@0 77 return "Monophonic pitch and note tracking based on a probabilistic Yin extension.";
matthiasm@0 78 }
matthiasm@0 79
matthiasm@0 80 string
matthiasm@0 81 PYIN::getMaker() const
matthiasm@0 82 {
matthiasm@0 83 return "Matthias Mauch";
matthiasm@0 84 }
matthiasm@0 85
matthiasm@0 86 int
matthiasm@0 87 PYIN::getPluginVersion() const
matthiasm@0 88 {
matthiasm@0 89 // Increment this each time you release a version that behaves
matthiasm@0 90 // differently from the previous one
matthiasm@0 91 return 1;
matthiasm@0 92 }
matthiasm@0 93
matthiasm@0 94 string
matthiasm@0 95 PYIN::getCopyright() const
matthiasm@0 96 {
matthiasm@0 97 return "GPL";
matthiasm@0 98 }
matthiasm@0 99
matthiasm@0 100 PYIN::InputDomain
matthiasm@0 101 PYIN::getInputDomain() const
matthiasm@0 102 {
matthiasm@0 103 return TimeDomain;
matthiasm@0 104 }
matthiasm@0 105
matthiasm@0 106 size_t
matthiasm@0 107 PYIN::getPreferredBlockSize() const
matthiasm@0 108 {
matthiasm@0 109 return 2048;
matthiasm@0 110 }
matthiasm@0 111
matthiasm@0 112 size_t
matthiasm@0 113 PYIN::getPreferredStepSize() const
matthiasm@0 114 {
matthiasm@0 115 return 256;
matthiasm@0 116 }
matthiasm@0 117
matthiasm@0 118 size_t
matthiasm@0 119 PYIN::getMinChannelCount() const
matthiasm@0 120 {
matthiasm@0 121 return 1;
matthiasm@0 122 }
matthiasm@0 123
matthiasm@0 124 size_t
matthiasm@0 125 PYIN::getMaxChannelCount() const
matthiasm@0 126 {
matthiasm@0 127 return 1;
matthiasm@0 128 }
matthiasm@0 129
matthiasm@0 130 PYIN::ParameterList
matthiasm@0 131 PYIN::getParameterDescriptors() const
matthiasm@0 132 {
matthiasm@0 133 ParameterList list;
matthiasm@0 134
matthiasm@0 135 ParameterDescriptor d;
matthiasm@0 136
matthiasm@0 137 d.identifier = "threshdistr";
matthiasm@0 138 d.name = "Yin threshold distribution";
matthiasm@0 139 d.description = ".";
matthiasm@0 140 d.unit = "";
matthiasm@0 141 d.minValue = 0.0f;
matthiasm@0 142 d.maxValue = 7.0f;
matthiasm@0 143 d.defaultValue = 2.0f;
matthiasm@0 144 d.isQuantized = true;
matthiasm@0 145 d.quantizeStep = 1.0f;
matthiasm@0 146 d.valueNames.push_back("Uniform");
matthiasm@0 147 d.valueNames.push_back("Beta (mean 0.10)");
matthiasm@0 148 d.valueNames.push_back("Beta (mean 0.15)");
matthiasm@0 149 d.valueNames.push_back("Beta (mean 0.20)");
matthiasm@0 150 d.valueNames.push_back("Beta (mean 0.30)");
matthiasm@0 151 d.valueNames.push_back("Single Value 0.10");
matthiasm@0 152 d.valueNames.push_back("Single Value 0.15");
matthiasm@0 153 d.valueNames.push_back("Single Value 0.20");
matthiasm@0 154 list.push_back(d);
matthiasm@0 155
matthiasm@0 156 d.identifier = "outputunvoiced";
matthiasm@0 157 d.valueNames.clear();
matthiasm@0 158 d.name = "Output estimates classified as unvoiced?";
matthiasm@0 159 d.description = ".";
matthiasm@0 160 d.unit = "";
matthiasm@0 161 d.minValue = 0.0f;
matthiasm@0 162 d.maxValue = 2.0f;
matthiasm@6 163 d.defaultValue = 0.0f;
matthiasm@0 164 d.isQuantized = true;
matthiasm@0 165 d.quantizeStep = 1.0f;
matthiasm@0 166 d.valueNames.push_back("No");
matthiasm@0 167 d.valueNames.push_back("Yes");
matthiasm@0 168 d.valueNames.push_back("Yes, as negative frequencies");
matthiasm@0 169 list.push_back(d);
matthiasm@21 170
matthiasm@21 171 d.identifier = "minlocalfreq";
matthiasm@21 172 d.valueNames.clear();
matthiasm@21 173 d.name = "Minimum local frequency.";
matthiasm@21 174 d.description = "Minimum frequency in selection.";
matthiasm@21 175 d.unit = "";
matthiasm@21 176 d.minValue = 50.f;
matthiasm@21 177 d.maxValue = 5000.f;
matthiasm@21 178 d.defaultValue = 50.f;
matthiasm@21 179 d.isQuantized = false;
matthiasm@21 180 d.quantizeStep = 0;
matthiasm@21 181 d.valueNames.clear();
matthiasm@21 182 list.push_back(d);
matthiasm@21 183
matthiasm@21 184 d.identifier = "maxlocalfreq";
matthiasm@21 185 d.valueNames.clear();
matthiasm@21 186 d.name = "Maximum local frequency.";
matthiasm@21 187 d.description = "Maximum frequency in selection.";
matthiasm@21 188 d.unit = "";
matthiasm@21 189 d.minValue = 50.f;
matthiasm@21 190 d.maxValue = 5000.f;
matthiasm@21 191 d.defaultValue = 5000.f;
matthiasm@21 192 d.isQuantized = false;
matthiasm@21 193 d.quantizeStep = 0;
matthiasm@21 194 d.valueNames.clear();
matthiasm@21 195 list.push_back(d);
matthiasm@21 196
matthiasm@21 197 d.identifier = "leftboundary";
matthiasm@21 198 d.valueNames.clear();
matthiasm@21 199 d.name = "Left boundary.";
matthiasm@21 200 d.description = "Left boundary of time selection.";
matthiasm@21 201 d.unit = "";
matthiasm@21 202 d.minValue = 0.f;
matthiasm@21 203 d.maxValue = 1000.f;
matthiasm@21 204 d.defaultValue = 0.f;
matthiasm@21 205 d.isQuantized = false;
matthiasm@21 206 d.quantizeStep = 0;
matthiasm@21 207 d.valueNames.clear();
matthiasm@21 208 list.push_back(d);
matthiasm@21 209
matthiasm@21 210 d.identifier = "rightboundary";
matthiasm@21 211 d.valueNames.clear();
matthiasm@21 212 d.name = "Right boundary.";
matthiasm@21 213 d.description = "Right boundary of time selection.";
matthiasm@21 214 d.unit = "";
matthiasm@21 215 d.minValue = 0.f;
matthiasm@21 216 d.maxValue = 1000.f;
matthiasm@21 217 d.defaultValue = 0.f;
matthiasm@21 218 d.isQuantized = false;
matthiasm@21 219 d.quantizeStep = 0;
matthiasm@21 220 d.valueNames.clear();
matthiasm@21 221 list.push_back(d);
matthiasm@0 222
matthiasm@0 223 return list;
matthiasm@0 224 }
matthiasm@0 225
matthiasm@0 226 float
matthiasm@0 227 PYIN::getParameter(string identifier) const
matthiasm@0 228 {
matthiasm@0 229 if (identifier == "threshdistr") {
matthiasm@0 230 return m_threshDistr;
matthiasm@0 231 }
matthiasm@0 232 if (identifier == "outputunvoiced") {
matthiasm@0 233 return m_outputUnvoiced;
matthiasm@0 234 }
matthiasm@21 235 if (identifier == "minlocalfreq") {
matthiasm@21 236 return m_minLocalFreq;
matthiasm@21 237 }
matthiasm@21 238 if (identifier == "maxlocalfreq") {
matthiasm@21 239 return m_maxLocalFreq;
matthiasm@21 240 }
matthiasm@21 241 if (identifier == "leftboundary") {
matthiasm@21 242 return m_leftBoundary;
matthiasm@21 243 }
matthiasm@21 244 if (identifier == "rightboundary") {
matthiasm@21 245 return m_rightBoundary;
matthiasm@21 246 }
matthiasm@0 247 return 0.f;
matthiasm@0 248 }
matthiasm@0 249
matthiasm@0 250 void
matthiasm@0 251 PYIN::setParameter(string identifier, float value)
matthiasm@0 252 {
matthiasm@21 253 m_currentProgram = "custom";
matthiasm@0 254 if (identifier == "threshdistr")
matthiasm@0 255 {
matthiasm@0 256 m_threshDistr = value;
matthiasm@0 257 }
matthiasm@0 258 if (identifier == "outputunvoiced")
matthiasm@0 259 {
matthiasm@0 260 m_outputUnvoiced = value;
matthiasm@0 261 }
matthiasm@21 262 if (identifier == "minlocalfreq")
matthiasm@21 263 {
matthiasm@21 264 m_minLocalFreq = value;
matthiasm@21 265 }
matthiasm@21 266 if (identifier == "maxlocalfreq")
matthiasm@21 267 {
matthiasm@21 268 m_maxLocalFreq = value;
matthiasm@21 269 }
matthiasm@21 270 if (identifier == "leftboundary")
matthiasm@21 271 {
matthiasm@21 272 m_leftBoundary = value;
matthiasm@21 273 }
matthiasm@21 274 if (identifier == "rightboundary")
matthiasm@21 275 {
matthiasm@21 276 m_rightBoundary = value;
matthiasm@21 277 }
matthiasm@0 278 }
matthiasm@0 279
matthiasm@0 280 PYIN::ProgramList
matthiasm@0 281 PYIN::getPrograms() const
matthiasm@0 282 {
matthiasm@0 283 ProgramList list;
matthiasm@21 284 list.push_back("default");
matthiasm@21 285 list.push_back("custom");
matthiasm@21 286 list.push_back("donttellme");
matthiasm@0 287 return list;
matthiasm@0 288 }
matthiasm@0 289
matthiasm@0 290 string
matthiasm@0 291 PYIN::getCurrentProgram() const
matthiasm@0 292 {
matthiasm@21 293 return m_currentProgram;
matthiasm@0 294 }
matthiasm@0 295
matthiasm@0 296 void
matthiasm@0 297 PYIN::selectProgram(string name)
matthiasm@0 298 {
matthiasm@21 299 if (name == "default") {
matthiasm@21 300 m_minLocalFreq = 0;
matthiasm@21 301 m_maxLocalFreq = 10000;
matthiasm@21 302 m_leftBoundary = 0;
matthiasm@21 303 m_rightBoundary = 5000;
matthiasm@21 304 }
matthiasm@21 305 if (name == "custom") {
matthiasm@21 306 // do nothing
matthiasm@21 307 }
matthiasm@21 308 if (name == "donttellme")
matthiasm@21 309 {
matthiasm@21 310 m_currentProgram = "donttellme";
matthiasm@21 311 m_minLocalFreq = 0;
matthiasm@21 312 m_maxLocalFreq = 400;
matthiasm@21 313 m_leftBoundary = 1.9;
matthiasm@21 314 m_rightBoundary = 2.9;
matthiasm@21 315 }
matthiasm@0 316 }
matthiasm@0 317
matthiasm@0 318 PYIN::OutputList
matthiasm@0 319 PYIN::getOutputDescriptors() const
matthiasm@0 320 {
matthiasm@0 321 OutputList outputs;
matthiasm@0 322
matthiasm@0 323 OutputDescriptor d;
matthiasm@0 324
matthiasm@0 325 int outputNumber = 0;
matthiasm@0 326
matthiasm@0 327 d.identifier = "f0candidates";
matthiasm@0 328 d.name = "F0 Candidates";
matthiasm@0 329 d.description = "Estimated fundamental frequency candidates.";
matthiasm@0 330 d.unit = "Hz";
matthiasm@0 331 d.hasFixedBinCount = false;
matthiasm@0 332 // d.binCount = 1;
matthiasm@0 333 d.hasKnownExtents = true;
matthiasm@0 334 d.minValue = m_fmin;
matthiasm@0 335 d.maxValue = 500;
matthiasm@0 336 d.isQuantized = false;
matthiasm@0 337 d.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 338 d.sampleRate = (m_inputSampleRate / m_stepSize);
matthiasm@0 339 d.hasDuration = false;
matthiasm@0 340 outputs.push_back(d);
matthiasm@0 341 m_oF0Candidates = outputNumber++;
matthiasm@0 342
matthiasm@0 343 d.identifier = "f0probs";
matthiasm@0 344 d.name = "Candidate Probabilities";
matthiasm@0 345 d.description = "Probabilities of estimated fundamental frequency candidates.";
matthiasm@0 346 d.unit = "";
matthiasm@0 347 d.hasFixedBinCount = false;
matthiasm@0 348 // d.binCount = 1;
matthiasm@0 349 d.hasKnownExtents = true;
matthiasm@0 350 d.minValue = 0;
matthiasm@0 351 d.maxValue = 1;
matthiasm@0 352 d.isQuantized = false;
matthiasm@0 353 d.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 354 d.sampleRate = (m_inputSampleRate / m_stepSize);
matthiasm@0 355 d.hasDuration = false;
matthiasm@0 356 outputs.push_back(d);
matthiasm@0 357 m_oF0Probs = outputNumber++;
matthiasm@0 358
matthiasm@0 359 d.identifier = "voicedprob";
matthiasm@0 360 d.name = "Voiced Probability";
matthiasm@0 361 d.description = "Probability that the signal is voiced according to Probabilistic Yin.";
matthiasm@0 362 d.unit = "";
matthiasm@0 363 d.hasFixedBinCount = true;
matthiasm@0 364 d.binCount = 1;
matthiasm@0 365 d.hasKnownExtents = true;
matthiasm@0 366 d.minValue = 0;
matthiasm@0 367 d.maxValue = 1;
matthiasm@0 368 d.isQuantized = false;
matthiasm@0 369 d.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 370 d.sampleRate = (m_inputSampleRate / m_stepSize);
matthiasm@0 371 d.hasDuration = false;
matthiasm@0 372 outputs.push_back(d);
matthiasm@0 373 m_oVoicedProb = outputNumber++;
matthiasm@0 374
matthiasm@0 375 d.identifier = "candidatesalience";
matthiasm@0 376 d.name = "Candidate Salience";
matthiasm@0 377 d.description = "Candidate Salience";
matthiasm@0 378 d.hasFixedBinCount = true;
matthiasm@0 379 d.binCount = m_blockSize / 2;
matthiasm@0 380 d.hasKnownExtents = true;
matthiasm@0 381 d.minValue = 0;
matthiasm@0 382 d.maxValue = 1;
matthiasm@0 383 d.isQuantized = false;
matthiasm@0 384 d.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 385 d.sampleRate = (m_inputSampleRate / m_stepSize);
matthiasm@0 386 d.hasDuration = false;
matthiasm@0 387 outputs.push_back(d);
matthiasm@0 388 m_oCandidateSalience = outputNumber++;
matthiasm@0 389
matthiasm@0 390 d.identifier = "smoothedpitchtrack";
matthiasm@0 391 d.name = "Smoothed Pitch Track";
matthiasm@0 392 d.description = ".";
matthiasm@0 393 d.unit = "Hz";
matthiasm@0 394 d.hasFixedBinCount = true;
matthiasm@0 395 d.binCount = 1;
matthiasm@0 396 d.hasKnownExtents = false;
matthiasm@0 397 // d.minValue = 0;
matthiasm@0 398 // d.maxValue = 1;
matthiasm@0 399 d.isQuantized = false;
matthiasm@0 400 d.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 401 d.sampleRate = (m_inputSampleRate / m_stepSize);
matthiasm@0 402 d.hasDuration = false;
matthiasm@0 403 outputs.push_back(d);
matthiasm@0 404 m_oSmoothedPitchTrack = outputNumber++;
matthiasm@0 405
matthiasm@0 406 d.identifier = "notes";
matthiasm@0 407 d.name = "Notes";
matthiasm@0 408 d.description = "Derived fixed-pitch note frequencies";
matthiasm@0 409 // d.unit = "MIDI unit";
matthiasm@0 410 d.unit = "Hz";
matthiasm@0 411 d.hasFixedBinCount = true;
matthiasm@0 412 d.binCount = 1;
matthiasm@0 413 d.hasKnownExtents = false;
matthiasm@0 414 d.isQuantized = false;
matthiasm@0 415 d.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@0 416 d.sampleRate = (m_inputSampleRate / m_stepSize);
matthiasm@0 417 d.hasDuration = true;
matthiasm@0 418 outputs.push_back(d);
matthiasm@0 419 m_oNotes = outputNumber++;
matthiasm@0 420
matthiasm@0 421 return outputs;
matthiasm@0 422 }
matthiasm@0 423
matthiasm@0 424 bool
matthiasm@0 425 PYIN::initialise(size_t channels, size_t stepSize, size_t blockSize)
matthiasm@0 426 {
matthiasm@0 427 if (channels < getMinChannelCount() ||
matthiasm@0 428 channels > getMaxChannelCount()) return false;
matthiasm@0 429
Chris@9 430 /*
matthiasm@0 431 std::cerr << "PYIN::initialise: channels = " << channels
matthiasm@0 432 << ", stepSize = " << stepSize << ", blockSize = " << blockSize
matthiasm@0 433 << std::endl;
Chris@9 434 */
matthiasm@0 435 m_channels = channels;
matthiasm@0 436 m_stepSize = stepSize;
matthiasm@0 437 m_blockSize = blockSize;
matthiasm@0 438
matthiasm@0 439 reset();
matthiasm@0 440
matthiasm@0 441 return true;
matthiasm@0 442 }
matthiasm@0 443
matthiasm@0 444 void
matthiasm@0 445 PYIN::reset()
matthiasm@0 446 {
matthiasm@0 447 m_yin.setThresholdDistr(m_threshDistr);
matthiasm@0 448 m_yin.setFrameSize(m_blockSize);
matthiasm@0 449
matthiasm@0 450 m_pitchProb.clear();
matthiasm@0 451 m_timestamp.clear();
Chris@9 452 /*
matthiasm@0 453 std::cerr << "PYIN::reset"
matthiasm@0 454 << ", blockSize = " << m_blockSize
matthiasm@0 455 << std::endl;
Chris@9 456 */
matthiasm@0 457 }
matthiasm@0 458
matthiasm@0 459 PYIN::FeatureSet
matthiasm@0 460 PYIN::process(const float *const *inputBuffers, RealTime timestamp)
matthiasm@0 461 {
matthiasm@0 462 timestamp = timestamp + Vamp::RealTime::frame2RealTime(m_blockSize/4, lrintf(m_inputSampleRate));
matthiasm@0 463 FeatureSet fs;
matthiasm@0 464
matthiasm@0 465 double *dInputBuffers = new double[m_blockSize];
matthiasm@0 466 for (size_t i = 0; i < m_blockSize; ++i) dInputBuffers[i] = inputBuffers[0][i];
matthiasm@0 467
matthiasm@21 468 Yin::YinOutput yo;
matthiasm@21 469 float floatTime = timestamp.sec + timestamp.nsec * 1.0 / 1000000000;
matthiasm@21 470 std::cerr << timestamp << " " << floatTime << std::endl;
matthiasm@21 471 if (floatTime > m_leftBoundary && floatTime < m_rightBoundary) {
matthiasm@21 472 // constrained
matthiasm@21 473 yo = m_yin.processProbabilisticYin(dInputBuffers, m_minLocalFreq, m_maxLocalFreq);
matthiasm@21 474 } else {
matthiasm@21 475 yo = m_yin.processProbabilisticYin(dInputBuffers);
matthiasm@21 476 }
matthiasm@21 477
matthiasm@0 478
matthiasm@0 479 Feature f;
matthiasm@0 480 f.hasTimestamp = true;
matthiasm@0 481 f.timestamp = timestamp;
matthiasm@0 482 for (size_t i = 0; i < yo.freqProb.size(); ++i)
matthiasm@0 483 {
matthiasm@0 484 f.values.push_back(yo.freqProb[i].first);
matthiasm@0 485 }
matthiasm@0 486 fs[m_oF0Candidates].push_back(f);
matthiasm@0 487
matthiasm@0 488 f.values.clear();
matthiasm@0 489 float voicedProb = 0;
matthiasm@0 490 for (size_t i = 0; i < yo.freqProb.size(); ++i)
matthiasm@0 491 {
matthiasm@0 492 f.values.push_back(yo.freqProb[i].second);
matthiasm@0 493 voicedProb += yo.freqProb[i].second;
matthiasm@0 494 }
matthiasm@0 495 fs[m_oF0Probs].push_back(f);
matthiasm@0 496
matthiasm@0 497 f.values.clear();
matthiasm@0 498 f.values.push_back(voicedProb);
matthiasm@0 499 fs[m_oVoicedProb].push_back(f);
matthiasm@0 500
matthiasm@0 501 f.values.clear();
matthiasm@0 502 float salienceSum = 0;
matthiasm@0 503 for (size_t iBin = 0; iBin < yo.salience.size(); ++iBin)
matthiasm@0 504 {
matthiasm@0 505 f.values.push_back(yo.salience[iBin]);
matthiasm@0 506 salienceSum += yo.salience[iBin];
matthiasm@0 507 }
matthiasm@0 508 fs[m_oCandidateSalience].push_back(f);
matthiasm@0 509
matthiasm@0 510 delete [] dInputBuffers;
matthiasm@0 511
matthiasm@0 512 vector<pair<double, double> > tempPitchProb;
matthiasm@0 513 for (size_t iCandidate = 0; iCandidate < yo.freqProb.size(); ++iCandidate)
matthiasm@0 514 {
matthiasm@0 515 double tempPitch = 12 * std::log(yo.freqProb[iCandidate].first/440)/std::log(2.) + 69;
matthiasm@0 516 tempPitchProb.push_back(pair<double, double>
matthiasm@0 517 (tempPitch, yo.freqProb[iCandidate].second));
matthiasm@0 518 }
matthiasm@0 519 m_pitchProb.push_back(tempPitchProb);
matthiasm@0 520
matthiasm@0 521 m_timestamp.push_back(timestamp);
matthiasm@0 522
matthiasm@0 523 return fs;
matthiasm@0 524 }
matthiasm@0 525
matthiasm@0 526 PYIN::FeatureSet
matthiasm@0 527 PYIN::getRemainingFeatures()
matthiasm@0 528 {
matthiasm@0 529 FeatureSet fs;
matthiasm@0 530 Feature f;
matthiasm@0 531 f.hasTimestamp = true;
matthiasm@0 532 f.hasDuration = false;
matthiasm@0 533
Chris@4 534 if (m_pitchProb.empty()) {
Chris@4 535 return fs;
Chris@4 536 }
Chris@4 537
matthiasm@0 538 // MONO-PITCH STUFF
matthiasm@0 539 MonoPitch mp;
Chris@9 540 // std::cerr << "before viterbi" << std::endl;
matthiasm@0 541 vector<float> mpOut = mp.process(m_pitchProb);
matthiasm@0 542 // std::cerr << "after viterbi " << mpOut.size() << " "<< m_timestamp.size() << std::endl;
matthiasm@0 543 for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame)
matthiasm@0 544 {
matthiasm@0 545 if (mpOut[iFrame] < 0 && (m_outputUnvoiced==0)) continue;
matthiasm@0 546 f.timestamp = m_timestamp[iFrame];
matthiasm@0 547 f.values.clear();
matthiasm@0 548 if (m_outputUnvoiced == 1)
matthiasm@0 549 {
matthiasm@0 550 f.values.push_back(abs(mpOut[iFrame]));
matthiasm@0 551 } else {
matthiasm@0 552 f.values.push_back(mpOut[iFrame]);
matthiasm@0 553 }
matthiasm@0 554
matthiasm@0 555 fs[m_oSmoothedPitchTrack].push_back(f);
matthiasm@0 556 }
matthiasm@0 557
matthiasm@1 558 // MONO-NOTE STUFF
matthiasm@1 559 MonoNote mn;
matthiasm@1 560 std::vector<std::vector<std::pair<double, double> > > smoothedPitch;
matthiasm@1 561 for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame) {
matthiasm@1 562 std::vector<std::pair<double, double> > temp;
matthiasm@1 563 if (mpOut[iFrame] > 0)
matthiasm@1 564 {
matthiasm@1 565 double tempPitch = 12 * std::log(mpOut[iFrame]/440)/std::log(2.) + 69;
matthiasm@1 566 temp.push_back(std::pair<double,double>(tempPitch, .9));
matthiasm@1 567 }
matthiasm@1 568 smoothedPitch.push_back(temp);
matthiasm@1 569 }
matthiasm@0 570 // vector<MonoNote::FrameOutput> mnOut = mn.process(m_pitchProb);
matthiasm@1 571 vector<MonoNote::FrameOutput> mnOut = mn.process(smoothedPitch);
matthiasm@1 572
matthiasm@6 573 // turning feature into a note feature
matthiasm@1 574 f.hasTimestamp = true;
matthiasm@1 575 f.hasDuration = true;
matthiasm@1 576 f.values.clear();
matthiasm@6 577
matthiasm@6 578 int onsetFrame = 0;
matthiasm@6 579 bool isVoiced = 0;
matthiasm@6 580 bool oldIsVoiced = 0;
matthiasm@6 581 size_t nFrame = m_pitchProb.size();
matthiasm@1 582
matthiasm@6 583 std::vector<float> notePitchTrack; // collects pitches for one note at a time
matthiasm@6 584 for (size_t iFrame = 0; iFrame < nFrame; ++iFrame)
matthiasm@1 585 {
matthiasm@6 586 isVoiced = mnOut[iFrame].noteState < 3 && smoothedPitch[iFrame].size() > 0;
matthiasm@6 587 if (isVoiced && iFrame != nFrame-1)
matthiasm@1 588 {
matthiasm@6 589 if (oldIsVoiced == 0) // beginning of a note
matthiasm@1 590 {
matthiasm@6 591 onsetFrame = iFrame;
matthiasm@6 592 notePitchTrack.clear();
matthiasm@1 593 }
matthiasm@6 594 float pitch = smoothedPitch[iFrame][0].first;
matthiasm@6 595 notePitchTrack.push_back(pitch); // add to the note's pitch track
matthiasm@6 596 } else { // not currently voiced
matthiasm@6 597 if (oldIsVoiced == 1 && notePitchTrack.size() > 4) // end of the note
matthiasm@6 598 {
matthiasm@1 599 std::sort(notePitchTrack.begin(), notePitchTrack.end());
matthiasm@6 600 float medianPitch = notePitchTrack[notePitchTrack.size()/2];
matthiasm@6 601 float medianFreq = std::pow(2,(medianPitch - 69) / 12) * 440;
matthiasm@6 602 f.values.clear();
matthiasm@6 603 f.values.push_back(medianFreq);
matthiasm@6 604 f.timestamp = m_timestamp[onsetFrame];
matthiasm@6 605 f.duration = m_timestamp[iFrame] - m_timestamp[onsetFrame];
matthiasm@5 606 fs[m_oNotes].push_back(f);
matthiasm@1 607 }
matthiasm@1 608 }
matthiasm@6 609 oldIsVoiced = isVoiced;
matthiasm@1 610 }
matthiasm@0 611 return fs;
matthiasm@0 612 }