annotate PYinVamp.cpp @ 126:292b75059949 v1.1

Update versions in n3 file as well
author Chris Cannam
date Tue, 21 Apr 2015 12:54:31 +0100
parents c3a4aa614e33
children 060ae29d3fdf
rev   line source
matthiasm@0 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@9 2
matthiasm@0 3 /*
Chris@9 4 pYIN - A fundamental frequency estimator for monophonic audio
Chris@9 5 Centre for Digital Music, Queen Mary, University of London.
Chris@9 6
Chris@9 7 This program is free software; you can redistribute it and/or
Chris@9 8 modify it under the terms of the GNU General Public License as
Chris@9 9 published by the Free Software Foundation; either version 2 of the
Chris@9 10 License, or (at your option) any later version. See the file
Chris@9 11 COPYING included with this distribution for more information.
matthiasm@0 12 */
matthiasm@0 13
matthiasm@36 14 #include "PYinVamp.h"
matthiasm@0 15 #include "MonoNote.h"
matthiasm@0 16 #include "MonoPitch.h"
matthiasm@0 17
matthiasm@0 18 #include "vamp-sdk/FFT.h"
matthiasm@0 19
matthiasm@0 20 #include <vector>
matthiasm@0 21 #include <algorithm>
matthiasm@0 22
matthiasm@0 23 #include <cstdio>
matthiasm@0 24 #include <cmath>
matthiasm@0 25 #include <complex>
matthiasm@0 26
matthiasm@0 27 using std::string;
matthiasm@0 28 using std::vector;
matthiasm@0 29 using Vamp::RealTime;
matthiasm@0 30
matthiasm@0 31
matthiasm@36 32 PYinVamp::PYinVamp(float inputSampleRate) :
matthiasm@0 33 Plugin(inputSampleRate),
matthiasm@0 34 m_channels(0),
matthiasm@0 35 m_stepSize(256),
matthiasm@0 36 m_blockSize(2048),
matthiasm@0 37 m_fmin(40),
matthiasm@58 38 m_fmax(1600),
matthiasm@0 39 m_yin(2048, inputSampleRate, 0.0),
matthiasm@0 40 m_oF0Candidates(0),
matthiasm@0 41 m_oF0Probs(0),
matthiasm@0 42 m_oVoicedProb(0),
matthiasm@0 43 m_oCandidateSalience(0),
matthiasm@0 44 m_oSmoothedPitchTrack(0),
matthiasm@0 45 m_oNotes(0),
matthiasm@0 46 m_threshDistr(2.0f),
matthiasm@6 47 m_outputUnvoiced(0.0f),
matthiasm@70 48 m_preciseTime(0.0f),
matthiasm@117 49 m_lowAmp(0.1f),
matthiasm@117 50 m_onsetSensitivity(0.7f),
matthiasm@117 51 m_pruneThresh(0.1f),
matthiasm@0 52 m_pitchProb(0),
matthiasm@103 53 m_timestamp(0),
matthiasm@103 54 m_level(0)
matthiasm@0 55 {
matthiasm@0 56 }
matthiasm@0 57
matthiasm@36 58 PYinVamp::~PYinVamp()
matthiasm@0 59 {
matthiasm@0 60 }
matthiasm@0 61
matthiasm@0 62 string
matthiasm@36 63 PYinVamp::getIdentifier() const
matthiasm@0 64 {
matthiasm@1 65 return "pyin";
matthiasm@0 66 }
matthiasm@0 67
matthiasm@0 68 string
matthiasm@36 69 PYinVamp::getName() const
matthiasm@0 70 {
matthiasm@1 71 return "pYin";
matthiasm@0 72 }
matthiasm@0 73
matthiasm@0 74 string
matthiasm@36 75 PYinVamp::getDescription() const
matthiasm@0 76 {
matthiasm@0 77 return "Monophonic pitch and note tracking based on a probabilistic Yin extension.";
matthiasm@0 78 }
matthiasm@0 79
matthiasm@0 80 string
matthiasm@36 81 PYinVamp::getMaker() const
matthiasm@0 82 {
matthiasm@0 83 return "Matthias Mauch";
matthiasm@0 84 }
matthiasm@0 85
matthiasm@0 86 int
matthiasm@36 87 PYinVamp::getPluginVersion() const
matthiasm@0 88 {
matthiasm@0 89 // Increment this each time you release a version that behaves
matthiasm@0 90 // differently from the previous one
Chris@125 91 return 2;
matthiasm@0 92 }
matthiasm@0 93
matthiasm@0 94 string
matthiasm@36 95 PYinVamp::getCopyright() const
matthiasm@0 96 {
matthiasm@0 97 return "GPL";
matthiasm@0 98 }
matthiasm@0 99
matthiasm@36 100 PYinVamp::InputDomain
matthiasm@36 101 PYinVamp::getInputDomain() const
matthiasm@0 102 {
matthiasm@0 103 return TimeDomain;
matthiasm@0 104 }
matthiasm@0 105
matthiasm@0 106 size_t
matthiasm@36 107 PYinVamp::getPreferredBlockSize() const
matthiasm@0 108 {
matthiasm@0 109 return 2048;
matthiasm@0 110 }
matthiasm@0 111
matthiasm@0 112 size_t
matthiasm@36 113 PYinVamp::getPreferredStepSize() const
matthiasm@0 114 {
matthiasm@0 115 return 256;
matthiasm@0 116 }
matthiasm@0 117
matthiasm@0 118 size_t
matthiasm@36 119 PYinVamp::getMinChannelCount() const
matthiasm@0 120 {
matthiasm@0 121 return 1;
matthiasm@0 122 }
matthiasm@0 123
matthiasm@0 124 size_t
matthiasm@36 125 PYinVamp::getMaxChannelCount() const
matthiasm@0 126 {
matthiasm@0 127 return 1;
matthiasm@0 128 }
matthiasm@0 129
matthiasm@36 130 PYinVamp::ParameterList
matthiasm@36 131 PYinVamp::getParameterDescriptors() const
matthiasm@0 132 {
matthiasm@0 133 ParameterList list;
matthiasm@0 134
matthiasm@0 135 ParameterDescriptor d;
matthiasm@0 136
matthiasm@0 137 d.identifier = "threshdistr";
matthiasm@0 138 d.name = "Yin threshold distribution";
matthiasm@0 139 d.description = ".";
matthiasm@0 140 d.unit = "";
matthiasm@0 141 d.minValue = 0.0f;
matthiasm@0 142 d.maxValue = 7.0f;
matthiasm@0 143 d.defaultValue = 2.0f;
matthiasm@0 144 d.isQuantized = true;
matthiasm@0 145 d.quantizeStep = 1.0f;
matthiasm@0 146 d.valueNames.push_back("Uniform");
matthiasm@0 147 d.valueNames.push_back("Beta (mean 0.10)");
matthiasm@0 148 d.valueNames.push_back("Beta (mean 0.15)");
matthiasm@0 149 d.valueNames.push_back("Beta (mean 0.20)");
matthiasm@0 150 d.valueNames.push_back("Beta (mean 0.30)");
matthiasm@0 151 d.valueNames.push_back("Single Value 0.10");
matthiasm@0 152 d.valueNames.push_back("Single Value 0.15");
matthiasm@0 153 d.valueNames.push_back("Single Value 0.20");
matthiasm@0 154 list.push_back(d);
matthiasm@0 155
matthiasm@0 156 d.identifier = "outputunvoiced";
matthiasm@0 157 d.valueNames.clear();
matthiasm@0 158 d.name = "Output estimates classified as unvoiced?";
matthiasm@0 159 d.description = ".";
matthiasm@0 160 d.unit = "";
matthiasm@0 161 d.minValue = 0.0f;
matthiasm@0 162 d.maxValue = 2.0f;
matthiasm@6 163 d.defaultValue = 0.0f;
matthiasm@0 164 d.isQuantized = true;
matthiasm@0 165 d.quantizeStep = 1.0f;
matthiasm@0 166 d.valueNames.push_back("No");
matthiasm@0 167 d.valueNames.push_back("Yes");
matthiasm@0 168 d.valueNames.push_back("Yes, as negative frequencies");
matthiasm@0 169 list.push_back(d);
matthiasm@0 170
matthiasm@70 171 d.identifier = "precisetime";
matthiasm@70 172 d.valueNames.clear();
matthiasm@70 173 d.name = "Use non-standard precise YIN timing (slow).";
matthiasm@70 174 d.description = ".";
matthiasm@70 175 d.unit = "";
matthiasm@70 176 d.minValue = 0.0f;
matthiasm@70 177 d.maxValue = 1.0f;
matthiasm@70 178 d.defaultValue = 0.0f;
matthiasm@70 179 d.isQuantized = true;
matthiasm@70 180 d.quantizeStep = 1.0f;
matthiasm@70 181 list.push_back(d);
matthiasm@70 182
matthiasm@72 183 d.identifier = "lowampsuppression";
matthiasm@72 184 d.valueNames.clear();
matthiasm@72 185 d.name = "Suppress low amplitude pitch estimates.";
matthiasm@72 186 d.description = ".";
matthiasm@72 187 d.unit = "";
matthiasm@72 188 d.minValue = 0.0f;
matthiasm@72 189 d.maxValue = 1.0f;
matthiasm@73 190 d.defaultValue = 0.1f;
matthiasm@72 191 d.isQuantized = false;
matthiasm@72 192 list.push_back(d);
matthiasm@70 193
matthiasm@107 194 d.identifier = "onsetsensitivity";
matthiasm@107 195 d.valueNames.clear();
matthiasm@107 196 d.name = "Onset sensitivity";
matthiasm@107 197 d.description = "Adds additional note onsets when RMS increases.";
matthiasm@107 198 d.unit = "";
matthiasm@107 199 d.minValue = 0.0f;
matthiasm@107 200 d.maxValue = 1.0f;
matthiasm@117 201 d.defaultValue = 0.7f;
matthiasm@108 202 d.isQuantized = false;
matthiasm@108 203 list.push_back(d);
matthiasm@108 204
matthiasm@108 205 d.identifier = "prunethresh";
matthiasm@108 206 d.valueNames.clear();
matthiasm@108 207 d.name = "Duration pruning threshold.";
matthiasm@108 208 d.description = "Prune notes that are shorter than this value.";
matthiasm@108 209 d.unit = "";
matthiasm@108 210 d.minValue = 0.0f;
matthiasm@108 211 d.maxValue = 0.2f;
matthiasm@117 212 d.defaultValue = 0.1f;
matthiasm@107 213 d.isQuantized = false;
matthiasm@107 214 list.push_back(d);
matthiasm@107 215
matthiasm@0 216 return list;
matthiasm@0 217 }
matthiasm@0 218
matthiasm@0 219 float
matthiasm@36 220 PYinVamp::getParameter(string identifier) const
matthiasm@0 221 {
matthiasm@0 222 if (identifier == "threshdistr") {
matthiasm@0 223 return m_threshDistr;
matthiasm@0 224 }
matthiasm@0 225 if (identifier == "outputunvoiced") {
matthiasm@0 226 return m_outputUnvoiced;
matthiasm@0 227 }
matthiasm@70 228 if (identifier == "precisetime") {
matthiasm@70 229 return m_preciseTime;
matthiasm@70 230 }
matthiasm@72 231 if (identifier == "lowampsuppression") {
matthiasm@72 232 return m_lowAmp;
matthiasm@72 233 }
matthiasm@107 234 if (identifier == "onsetsensitivity") {
matthiasm@107 235 return m_onsetSensitivity;
matthiasm@107 236 }
matthiasm@108 237 if (identifier == "prunethresh") {
matthiasm@108 238 return m_pruneThresh;
matthiasm@108 239 }
matthiasm@0 240 return 0.f;
matthiasm@0 241 }
matthiasm@0 242
matthiasm@0 243 void
matthiasm@36 244 PYinVamp::setParameter(string identifier, float value)
matthiasm@0 245 {
matthiasm@0 246 if (identifier == "threshdistr")
matthiasm@0 247 {
matthiasm@0 248 m_threshDistr = value;
matthiasm@0 249 }
matthiasm@0 250 if (identifier == "outputunvoiced")
matthiasm@0 251 {
matthiasm@0 252 m_outputUnvoiced = value;
matthiasm@0 253 }
matthiasm@70 254 if (identifier == "precisetime")
matthiasm@70 255 {
matthiasm@70 256 m_preciseTime = value;
matthiasm@70 257 }
matthiasm@72 258 if (identifier == "lowampsuppression")
matthiasm@72 259 {
matthiasm@72 260 m_lowAmp = value;
matthiasm@72 261 }
matthiasm@107 262 if (identifier == "onsetsensitivity")
matthiasm@107 263 {
matthiasm@107 264 m_onsetSensitivity = value;
matthiasm@107 265 }
matthiasm@108 266 if (identifier == "prunethresh")
matthiasm@108 267 {
matthiasm@108 268 m_pruneThresh = value;
matthiasm@108 269 }
matthiasm@0 270 }
matthiasm@0 271
matthiasm@36 272 PYinVamp::ProgramList
matthiasm@36 273 PYinVamp::getPrograms() const
matthiasm@0 274 {
matthiasm@0 275 ProgramList list;
matthiasm@0 276 return list;
matthiasm@0 277 }
matthiasm@0 278
matthiasm@0 279 string
matthiasm@36 280 PYinVamp::getCurrentProgram() const
matthiasm@0 281 {
matthiasm@0 282 return ""; // no programs
matthiasm@0 283 }
matthiasm@0 284
matthiasm@0 285 void
matthiasm@36 286 PYinVamp::selectProgram(string name)
matthiasm@0 287 {
matthiasm@0 288 }
matthiasm@0 289
matthiasm@36 290 PYinVamp::OutputList
matthiasm@36 291 PYinVamp::getOutputDescriptors() const
matthiasm@0 292 {
matthiasm@0 293 OutputList outputs;
matthiasm@0 294
matthiasm@0 295 OutputDescriptor d;
matthiasm@0 296
matthiasm@0 297 int outputNumber = 0;
matthiasm@0 298
matthiasm@0 299 d.identifier = "f0candidates";
matthiasm@0 300 d.name = "F0 Candidates";
matthiasm@0 301 d.description = "Estimated fundamental frequency candidates.";
matthiasm@0 302 d.unit = "Hz";
matthiasm@0 303 d.hasFixedBinCount = false;
matthiasm@0 304 // d.binCount = 1;
matthiasm@0 305 d.hasKnownExtents = true;
matthiasm@0 306 d.minValue = m_fmin;
matthiasm@0 307 d.maxValue = 500;
matthiasm@0 308 d.isQuantized = false;
matthiasm@0 309 d.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 310 d.sampleRate = (m_inputSampleRate / m_stepSize);
matthiasm@0 311 d.hasDuration = false;
matthiasm@0 312 outputs.push_back(d);
matthiasm@0 313 m_oF0Candidates = outputNumber++;
matthiasm@0 314
matthiasm@0 315 d.identifier = "f0probs";
matthiasm@0 316 d.name = "Candidate Probabilities";
matthiasm@0 317 d.description = "Probabilities of estimated fundamental frequency candidates.";
matthiasm@0 318 d.unit = "";
matthiasm@0 319 d.hasFixedBinCount = false;
matthiasm@0 320 // d.binCount = 1;
matthiasm@0 321 d.hasKnownExtents = true;
matthiasm@0 322 d.minValue = 0;
matthiasm@0 323 d.maxValue = 1;
matthiasm@0 324 d.isQuantized = false;
matthiasm@0 325 d.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 326 d.sampleRate = (m_inputSampleRate / m_stepSize);
matthiasm@0 327 d.hasDuration = false;
matthiasm@0 328 outputs.push_back(d);
matthiasm@0 329 m_oF0Probs = outputNumber++;
matthiasm@0 330
matthiasm@0 331 d.identifier = "voicedprob";
matthiasm@0 332 d.name = "Voiced Probability";
matthiasm@0 333 d.description = "Probability that the signal is voiced according to Probabilistic Yin.";
matthiasm@0 334 d.unit = "";
matthiasm@0 335 d.hasFixedBinCount = true;
matthiasm@0 336 d.binCount = 1;
matthiasm@0 337 d.hasKnownExtents = true;
matthiasm@0 338 d.minValue = 0;
matthiasm@0 339 d.maxValue = 1;
matthiasm@0 340 d.isQuantized = false;
matthiasm@0 341 d.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 342 d.sampleRate = (m_inputSampleRate / m_stepSize);
matthiasm@0 343 d.hasDuration = false;
matthiasm@0 344 outputs.push_back(d);
matthiasm@0 345 m_oVoicedProb = outputNumber++;
matthiasm@0 346
matthiasm@0 347 d.identifier = "candidatesalience";
matthiasm@0 348 d.name = "Candidate Salience";
matthiasm@0 349 d.description = "Candidate Salience";
matthiasm@0 350 d.hasFixedBinCount = true;
matthiasm@0 351 d.binCount = m_blockSize / 2;
matthiasm@0 352 d.hasKnownExtents = true;
matthiasm@0 353 d.minValue = 0;
matthiasm@0 354 d.maxValue = 1;
matthiasm@0 355 d.isQuantized = false;
matthiasm@0 356 d.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 357 d.sampleRate = (m_inputSampleRate / m_stepSize);
matthiasm@0 358 d.hasDuration = false;
matthiasm@0 359 outputs.push_back(d);
matthiasm@0 360 m_oCandidateSalience = outputNumber++;
matthiasm@0 361
matthiasm@0 362 d.identifier = "smoothedpitchtrack";
matthiasm@0 363 d.name = "Smoothed Pitch Track";
matthiasm@0 364 d.description = ".";
matthiasm@0 365 d.unit = "Hz";
matthiasm@0 366 d.hasFixedBinCount = true;
matthiasm@0 367 d.binCount = 1;
matthiasm@0 368 d.hasKnownExtents = false;
matthiasm@0 369 // d.minValue = 0;
matthiasm@0 370 // d.maxValue = 1;
matthiasm@0 371 d.isQuantized = false;
matthiasm@0 372 d.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 373 d.sampleRate = (m_inputSampleRate / m_stepSize);
matthiasm@0 374 d.hasDuration = false;
matthiasm@0 375 outputs.push_back(d);
matthiasm@0 376 m_oSmoothedPitchTrack = outputNumber++;
matthiasm@0 377
matthiasm@0 378 d.identifier = "notes";
matthiasm@0 379 d.name = "Notes";
matthiasm@0 380 d.description = "Derived fixed-pitch note frequencies";
matthiasm@0 381 // d.unit = "MIDI unit";
matthiasm@0 382 d.unit = "Hz";
matthiasm@0 383 d.hasFixedBinCount = true;
matthiasm@0 384 d.binCount = 1;
matthiasm@0 385 d.hasKnownExtents = false;
matthiasm@0 386 d.isQuantized = false;
matthiasm@0 387 d.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@0 388 d.sampleRate = (m_inputSampleRate / m_stepSize);
matthiasm@0 389 d.hasDuration = true;
matthiasm@0 390 outputs.push_back(d);
matthiasm@0 391 m_oNotes = outputNumber++;
matthiasm@0 392
matthiasm@0 393 return outputs;
matthiasm@0 394 }
matthiasm@0 395
matthiasm@0 396 bool
matthiasm@36 397 PYinVamp::initialise(size_t channels, size_t stepSize, size_t blockSize)
matthiasm@0 398 {
matthiasm@0 399 if (channels < getMinChannelCount() ||
matthiasm@0 400 channels > getMaxChannelCount()) return false;
matthiasm@0 401
Chris@9 402 /*
matthiasm@36 403 std::cerr << "PYinVamp::initialise: channels = " << channels
matthiasm@0 404 << ", stepSize = " << stepSize << ", blockSize = " << blockSize
matthiasm@0 405 << std::endl;
Chris@9 406 */
matthiasm@0 407 m_channels = channels;
matthiasm@0 408 m_stepSize = stepSize;
matthiasm@0 409 m_blockSize = blockSize;
matthiasm@0 410
matthiasm@0 411 reset();
matthiasm@0 412
matthiasm@0 413 return true;
matthiasm@0 414 }
matthiasm@0 415
matthiasm@0 416 void
matthiasm@36 417 PYinVamp::reset()
matthiasm@0 418 {
matthiasm@0 419 m_yin.setThresholdDistr(m_threshDistr);
matthiasm@0 420 m_yin.setFrameSize(m_blockSize);
matthiasm@117 421 m_yin.setFast(!m_preciseTime);
matthiasm@0 422
matthiasm@0 423 m_pitchProb.clear();
matthiasm@0 424 m_timestamp.clear();
matthiasm@103 425 m_level.clear();
Chris@9 426 /*
matthiasm@36 427 std::cerr << "PYinVamp::reset"
matthiasm@0 428 << ", blockSize = " << m_blockSize
matthiasm@0 429 << std::endl;
Chris@9 430 */
matthiasm@0 431 }
matthiasm@0 432
matthiasm@36 433 PYinVamp::FeatureSet
matthiasm@36 434 PYinVamp::process(const float *const *inputBuffers, RealTime timestamp)
matthiasm@0 435 {
matthiasm@77 436 int offset = m_preciseTime == 1.0 ? m_blockSize/2 : m_blockSize/4;
matthiasm@77 437 timestamp = timestamp + Vamp::RealTime::frame2RealTime(offset, lrintf(m_inputSampleRate));
matthiasm@77 438
matthiasm@0 439 FeatureSet fs;
matthiasm@0 440
matthiasm@46 441 float rms = 0;
matthiasm@46 442
matthiasm@0 443 double *dInputBuffers = new double[m_blockSize];
matthiasm@46 444 for (size_t i = 0; i < m_blockSize; ++i) {
matthiasm@46 445 dInputBuffers[i] = inputBuffers[0][i];
matthiasm@46 446 rms += inputBuffers[0][i] * inputBuffers[0][i];
matthiasm@46 447 }
matthiasm@46 448 rms /= m_blockSize;
matthiasm@46 449 rms = sqrt(rms);
matthiasm@116 450
matthiasm@72 451 bool isLowAmplitude = (rms < m_lowAmp);
matthiasm@0 452
matthiasm@0 453 Yin::YinOutput yo = m_yin.processProbabilisticYin(dInputBuffers);
matthiasm@27 454 delete [] dInputBuffers;
matthiasm@27 455
matthiasm@103 456 m_level.push_back(yo.rms);
matthiasm@103 457
matthiasm@27 458 // First, get the things out of the way that we don't want to output
matthiasm@27 459 // immediately, but instead save for later.
matthiasm@27 460 vector<pair<double, double> > tempPitchProb;
matthiasm@27 461 for (size_t iCandidate = 0; iCandidate < yo.freqProb.size(); ++iCandidate)
matthiasm@27 462 {
matthiasm@27 463 double tempPitch = 12 * std::log(yo.freqProb[iCandidate].first/440)/std::log(2.) + 69;
matthiasm@50 464 if (!isLowAmplitude)
matthiasm@116 465 {
matthiasm@46 466 tempPitchProb.push_back(pair<double, double>
matthiasm@46 467 (tempPitch, yo.freqProb[iCandidate].second));
matthiasm@116 468 } else {
matthiasm@116 469 float factor = ((rms+0.01*m_lowAmp)/(1.01*m_lowAmp));
matthiasm@46 470 tempPitchProb.push_back(pair<double, double>
matthiasm@65 471 (tempPitch, yo.freqProb[iCandidate].second*factor));
matthiasm@65 472 }
matthiasm@27 473 }
matthiasm@27 474 m_pitchProb.push_back(tempPitchProb);
matthiasm@27 475 m_timestamp.push_back(timestamp);
matthiasm@27 476
matthiasm@27 477 // F0 CANDIDATES
matthiasm@0 478 Feature f;
matthiasm@0 479 f.hasTimestamp = true;
matthiasm@0 480 f.timestamp = timestamp;
matthiasm@0 481 for (size_t i = 0; i < yo.freqProb.size(); ++i)
matthiasm@0 482 {
matthiasm@0 483 f.values.push_back(yo.freqProb[i].first);
matthiasm@0 484 }
matthiasm@0 485 fs[m_oF0Candidates].push_back(f);
matthiasm@0 486
matthiasm@27 487 // VOICEDPROB
matthiasm@0 488 f.values.clear();
matthiasm@0 489 float voicedProb = 0;
matthiasm@0 490 for (size_t i = 0; i < yo.freqProb.size(); ++i)
matthiasm@0 491 {
matthiasm@0 492 f.values.push_back(yo.freqProb[i].second);
matthiasm@0 493 voicedProb += yo.freqProb[i].second;
matthiasm@0 494 }
matthiasm@0 495 fs[m_oF0Probs].push_back(f);
matthiasm@0 496
matthiasm@0 497 f.values.push_back(voicedProb);
matthiasm@0 498 fs[m_oVoicedProb].push_back(f);
matthiasm@0 499
matthiasm@27 500 // SALIENCE -- maybe this should eventually disappear
matthiasm@0 501 f.values.clear();
matthiasm@0 502 float salienceSum = 0;
matthiasm@0 503 for (size_t iBin = 0; iBin < yo.salience.size(); ++iBin)
matthiasm@0 504 {
matthiasm@0 505 f.values.push_back(yo.salience[iBin]);
matthiasm@0 506 salienceSum += yo.salience[iBin];
matthiasm@0 507 }
matthiasm@0 508 fs[m_oCandidateSalience].push_back(f);
matthiasm@0 509
matthiasm@0 510 return fs;
matthiasm@0 511 }
matthiasm@0 512
matthiasm@36 513 PYinVamp::FeatureSet
matthiasm@36 514 PYinVamp::getRemainingFeatures()
matthiasm@0 515 {
matthiasm@0 516 FeatureSet fs;
matthiasm@0 517 Feature f;
matthiasm@0 518 f.hasTimestamp = true;
matthiasm@0 519 f.hasDuration = false;
matthiasm@0 520
Chris@4 521 if (m_pitchProb.empty()) {
Chris@4 522 return fs;
Chris@4 523 }
Chris@4 524
matthiasm@0 525 // MONO-PITCH STUFF
matthiasm@0 526 MonoPitch mp;
matthiasm@0 527 vector<float> mpOut = mp.process(m_pitchProb);
matthiasm@0 528 for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame)
matthiasm@0 529 {
matthiasm@0 530 if (mpOut[iFrame] < 0 && (m_outputUnvoiced==0)) continue;
matthiasm@0 531 f.timestamp = m_timestamp[iFrame];
matthiasm@0 532 f.values.clear();
matthiasm@0 533 if (m_outputUnvoiced == 1)
matthiasm@0 534 {
matthiasm@26 535 f.values.push_back(fabs(mpOut[iFrame]));
matthiasm@0 536 } else {
matthiasm@0 537 f.values.push_back(mpOut[iFrame]);
matthiasm@0 538 }
matthiasm@0 539
matthiasm@0 540 fs[m_oSmoothedPitchTrack].push_back(f);
matthiasm@0 541 }
matthiasm@0 542
matthiasm@1 543 // MONO-NOTE STUFF
Chris@122 544 // std::cerr << "Mono Note Stuff" << std::endl;
matthiasm@1 545 MonoNote mn;
matthiasm@1 546 std::vector<std::vector<std::pair<double, double> > > smoothedPitch;
matthiasm@1 547 for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame) {
matthiasm@1 548 std::vector<std::pair<double, double> > temp;
matthiasm@1 549 if (mpOut[iFrame] > 0)
matthiasm@1 550 {
matthiasm@1 551 double tempPitch = 12 * std::log(mpOut[iFrame]/440)/std::log(2.) + 69;
matthiasm@1 552 temp.push_back(std::pair<double,double>(tempPitch, .9));
matthiasm@1 553 }
matthiasm@1 554 smoothedPitch.push_back(temp);
matthiasm@1 555 }
matthiasm@0 556 // vector<MonoNote::FrameOutput> mnOut = mn.process(m_pitchProb);
matthiasm@1 557 vector<MonoNote::FrameOutput> mnOut = mn.process(smoothedPitch);
matthiasm@1 558
matthiasm@6 559 // turning feature into a note feature
matthiasm@1 560 f.hasTimestamp = true;
matthiasm@1 561 f.hasDuration = true;
matthiasm@1 562 f.values.clear();
matthiasm@6 563
matthiasm@6 564 int onsetFrame = 0;
matthiasm@6 565 bool isVoiced = 0;
matthiasm@6 566 bool oldIsVoiced = 0;
matthiasm@6 567 size_t nFrame = m_pitchProb.size();
matthiasm@108 568
matthiasm@108 569 float minNoteFrames = (m_inputSampleRate*m_pruneThresh) / m_stepSize;
matthiasm@1 570
matthiasm@6 571 std::vector<float> notePitchTrack; // collects pitches for one note at a time
matthiasm@6 572 for (size_t iFrame = 0; iFrame < nFrame; ++iFrame)
matthiasm@1 573 {
matthiasm@103 574 isVoiced = mnOut[iFrame].noteState < 3
matthiasm@103 575 && smoothedPitch[iFrame].size() > 0
matthiasm@106 576 && (iFrame >= nFrame-2
matthiasm@107 577 || ((m_level[iFrame]/m_level[iFrame+2]) > m_onsetSensitivity));
matthiasm@108 578 // std::cerr << m_level[iFrame]/m_level[iFrame-1] << " " << isVoiced << std::endl;
matthiasm@6 579 if (isVoiced && iFrame != nFrame-1)
matthiasm@1 580 {
matthiasm@6 581 if (oldIsVoiced == 0) // beginning of a note
matthiasm@1 582 {
matthiasm@6 583 onsetFrame = iFrame;
matthiasm@1 584 }
matthiasm@6 585 float pitch = smoothedPitch[iFrame][0].first;
matthiasm@6 586 notePitchTrack.push_back(pitch); // add to the note's pitch track
matthiasm@6 587 } else { // not currently voiced
matthiasm@108 588 if (oldIsVoiced == 1) // end of note
matthiasm@6 589 {
matthiasm@118 590 // std::cerr << notePitchTrack.size() << " " << minNoteFrames << std::endl;
matthiasm@108 591 if (notePitchTrack.size() >= minNoteFrames)
matthiasm@108 592 {
matthiasm@108 593 std::sort(notePitchTrack.begin(), notePitchTrack.end());
matthiasm@108 594 float medianPitch = notePitchTrack[notePitchTrack.size()/2];
matthiasm@108 595 float medianFreq = std::pow(2,(medianPitch - 69) / 12) * 440;
matthiasm@108 596 f.values.clear();
matthiasm@108 597 f.values.push_back(medianFreq);
matthiasm@108 598 f.timestamp = m_timestamp[onsetFrame];
matthiasm@108 599 f.duration = m_timestamp[iFrame] - m_timestamp[onsetFrame];
matthiasm@108 600 fs[m_oNotes].push_back(f);
matthiasm@108 601 }
matthiasm@108 602 notePitchTrack.clear();
matthiasm@1 603 }
matthiasm@1 604 }
matthiasm@6 605 oldIsVoiced = isVoiced;
matthiasm@1 606 }
matthiasm@0 607 return fs;
matthiasm@0 608 }