annotate PYinVamp.cpp @ 81:e6b234575df1 tony

added spurious file VampYin.cpp
author matthiasm
date Wed, 03 Dec 2014 14:49:54 +0000
parents 4cbbd87a8c7f
children 7ef7f6e90966
rev   line source
matthiasm@0 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@9 2
matthiasm@0 3 /*
Chris@9 4 pYIN - A fundamental frequency estimator for monophonic audio
Chris@9 5 Centre for Digital Music, Queen Mary, University of London.
Chris@9 6
Chris@9 7 This program is free software; you can redistribute it and/or
Chris@9 8 modify it under the terms of the GNU General Public License as
Chris@9 9 published by the Free Software Foundation; either version 2 of the
Chris@9 10 License, or (at your option) any later version. See the file
Chris@9 11 COPYING included with this distribution for more information.
matthiasm@0 12 */
matthiasm@0 13
matthiasm@36 14 #include "PYinVamp.h"
matthiasm@0 15 #include "MonoNote.h"
matthiasm@0 16 #include "MonoPitch.h"
matthiasm@0 17
matthiasm@0 18 #include "vamp-sdk/FFT.h"
matthiasm@0 19
matthiasm@0 20 #include <vector>
matthiasm@0 21 #include <algorithm>
matthiasm@0 22
matthiasm@0 23 #include <cstdio>
matthiasm@0 24 #include <cmath>
matthiasm@0 25 #include <complex>
matthiasm@0 26
matthiasm@0 27 using std::string;
matthiasm@0 28 using std::vector;
matthiasm@0 29 using Vamp::RealTime;
matthiasm@0 30
matthiasm@0 31
matthiasm@36 32 PYinVamp::PYinVamp(float inputSampleRate) :
matthiasm@0 33 Plugin(inputSampleRate),
matthiasm@0 34 m_channels(0),
matthiasm@0 35 m_stepSize(256),
matthiasm@0 36 m_blockSize(2048),
matthiasm@0 37 m_fmin(40),
matthiasm@58 38 m_fmax(1600),
matthiasm@0 39 m_yin(2048, inputSampleRate, 0.0),
matthiasm@0 40 m_oF0Candidates(0),
matthiasm@0 41 m_oF0Probs(0),
matthiasm@0 42 m_oVoicedProb(0),
matthiasm@0 43 m_oCandidateSalience(0),
matthiasm@0 44 m_oSmoothedPitchTrack(0),
matthiasm@0 45 m_oNotes(0),
matthiasm@0 46 m_threshDistr(2.0f),
matthiasm@6 47 m_outputUnvoiced(0.0f),
matthiasm@70 48 m_preciseTime(0.0f),
matthiasm@73 49 m_lowAmp(0.1),
matthiasm@0 50 m_pitchProb(0),
matthiasm@0 51 m_timestamp(0)
matthiasm@0 52 {
matthiasm@0 53 }
matthiasm@0 54
matthiasm@36 55 PYinVamp::~PYinVamp()
matthiasm@0 56 {
matthiasm@0 57 }
matthiasm@0 58
matthiasm@0 59 string
matthiasm@36 60 PYinVamp::getIdentifier() const
matthiasm@0 61 {
matthiasm@1 62 return "pyin";
matthiasm@0 63 }
matthiasm@0 64
matthiasm@0 65 string
matthiasm@36 66 PYinVamp::getName() const
matthiasm@0 67 {
matthiasm@1 68 return "pYin";
matthiasm@0 69 }
matthiasm@0 70
matthiasm@0 71 string
matthiasm@36 72 PYinVamp::getDescription() const
matthiasm@0 73 {
matthiasm@0 74 return "Monophonic pitch and note tracking based on a probabilistic Yin extension.";
matthiasm@0 75 }
matthiasm@0 76
matthiasm@0 77 string
matthiasm@36 78 PYinVamp::getMaker() const
matthiasm@0 79 {
matthiasm@0 80 return "Matthias Mauch";
matthiasm@0 81 }
matthiasm@0 82
matthiasm@0 83 int
matthiasm@36 84 PYinVamp::getPluginVersion() const
matthiasm@0 85 {
matthiasm@0 86 // Increment this each time you release a version that behaves
matthiasm@0 87 // differently from the previous one
matthiasm@0 88 return 1;
matthiasm@0 89 }
matthiasm@0 90
matthiasm@0 91 string
matthiasm@36 92 PYinVamp::getCopyright() const
matthiasm@0 93 {
matthiasm@0 94 return "GPL";
matthiasm@0 95 }
matthiasm@0 96
matthiasm@36 97 PYinVamp::InputDomain
matthiasm@36 98 PYinVamp::getInputDomain() const
matthiasm@0 99 {
matthiasm@0 100 return TimeDomain;
matthiasm@0 101 }
matthiasm@0 102
matthiasm@0 103 size_t
matthiasm@36 104 PYinVamp::getPreferredBlockSize() const
matthiasm@0 105 {
matthiasm@0 106 return 2048;
matthiasm@0 107 }
matthiasm@0 108
matthiasm@0 109 size_t
matthiasm@36 110 PYinVamp::getPreferredStepSize() const
matthiasm@0 111 {
matthiasm@0 112 return 256;
matthiasm@0 113 }
matthiasm@0 114
matthiasm@0 115 size_t
matthiasm@36 116 PYinVamp::getMinChannelCount() const
matthiasm@0 117 {
matthiasm@0 118 return 1;
matthiasm@0 119 }
matthiasm@0 120
matthiasm@0 121 size_t
matthiasm@36 122 PYinVamp::getMaxChannelCount() const
matthiasm@0 123 {
matthiasm@0 124 return 1;
matthiasm@0 125 }
matthiasm@0 126
matthiasm@36 127 PYinVamp::ParameterList
matthiasm@36 128 PYinVamp::getParameterDescriptors() const
matthiasm@0 129 {
matthiasm@0 130 ParameterList list;
matthiasm@0 131
matthiasm@0 132 ParameterDescriptor d;
matthiasm@0 133
matthiasm@0 134 d.identifier = "threshdistr";
matthiasm@0 135 d.name = "Yin threshold distribution";
matthiasm@0 136 d.description = ".";
matthiasm@0 137 d.unit = "";
matthiasm@0 138 d.minValue = 0.0f;
matthiasm@0 139 d.maxValue = 7.0f;
matthiasm@0 140 d.defaultValue = 2.0f;
matthiasm@0 141 d.isQuantized = true;
matthiasm@0 142 d.quantizeStep = 1.0f;
matthiasm@0 143 d.valueNames.push_back("Uniform");
matthiasm@0 144 d.valueNames.push_back("Beta (mean 0.10)");
matthiasm@0 145 d.valueNames.push_back("Beta (mean 0.15)");
matthiasm@0 146 d.valueNames.push_back("Beta (mean 0.20)");
matthiasm@0 147 d.valueNames.push_back("Beta (mean 0.30)");
matthiasm@0 148 d.valueNames.push_back("Single Value 0.10");
matthiasm@0 149 d.valueNames.push_back("Single Value 0.15");
matthiasm@0 150 d.valueNames.push_back("Single Value 0.20");
matthiasm@0 151 list.push_back(d);
matthiasm@0 152
matthiasm@0 153 d.identifier = "outputunvoiced";
matthiasm@0 154 d.valueNames.clear();
matthiasm@0 155 d.name = "Output estimates classified as unvoiced?";
matthiasm@0 156 d.description = ".";
matthiasm@0 157 d.unit = "";
matthiasm@0 158 d.minValue = 0.0f;
matthiasm@0 159 d.maxValue = 2.0f;
matthiasm@6 160 d.defaultValue = 0.0f;
matthiasm@0 161 d.isQuantized = true;
matthiasm@0 162 d.quantizeStep = 1.0f;
matthiasm@0 163 d.valueNames.push_back("No");
matthiasm@0 164 d.valueNames.push_back("Yes");
matthiasm@0 165 d.valueNames.push_back("Yes, as negative frequencies");
matthiasm@0 166 list.push_back(d);
matthiasm@0 167
matthiasm@70 168 d.identifier = "precisetime";
matthiasm@70 169 d.valueNames.clear();
matthiasm@70 170 d.name = "Use non-standard precise YIN timing (slow).";
matthiasm@70 171 d.description = ".";
matthiasm@70 172 d.unit = "";
matthiasm@70 173 d.minValue = 0.0f;
matthiasm@70 174 d.maxValue = 1.0f;
matthiasm@70 175 d.defaultValue = 0.0f;
matthiasm@70 176 d.isQuantized = true;
matthiasm@70 177 d.quantizeStep = 1.0f;
matthiasm@70 178 list.push_back(d);
matthiasm@70 179
matthiasm@72 180 d.identifier = "lowampsuppression";
matthiasm@72 181 d.valueNames.clear();
matthiasm@72 182 d.name = "Suppress low amplitude pitch estimates.";
matthiasm@72 183 d.description = ".";
matthiasm@72 184 d.unit = "";
matthiasm@72 185 d.minValue = 0.0f;
matthiasm@72 186 d.maxValue = 1.0f;
matthiasm@73 187 d.defaultValue = 0.1f;
matthiasm@72 188 d.isQuantized = false;
matthiasm@72 189 list.push_back(d);
matthiasm@70 190
matthiasm@0 191 return list;
matthiasm@0 192 }
matthiasm@0 193
matthiasm@0 194 float
matthiasm@36 195 PYinVamp::getParameter(string identifier) const
matthiasm@0 196 {
matthiasm@0 197 if (identifier == "threshdistr") {
matthiasm@0 198 return m_threshDistr;
matthiasm@0 199 }
matthiasm@0 200 if (identifier == "outputunvoiced") {
matthiasm@0 201 return m_outputUnvoiced;
matthiasm@0 202 }
matthiasm@70 203 if (identifier == "precisetime") {
matthiasm@70 204 return m_preciseTime;
matthiasm@70 205 }
matthiasm@72 206 if (identifier == "lowampsuppression") {
matthiasm@72 207 return m_lowAmp;
matthiasm@72 208 }
matthiasm@0 209 return 0.f;
matthiasm@0 210 }
matthiasm@0 211
matthiasm@0 212 void
matthiasm@36 213 PYinVamp::setParameter(string identifier, float value)
matthiasm@0 214 {
matthiasm@0 215 if (identifier == "threshdistr")
matthiasm@0 216 {
matthiasm@0 217 m_threshDistr = value;
matthiasm@0 218 }
matthiasm@0 219 if (identifier == "outputunvoiced")
matthiasm@0 220 {
matthiasm@0 221 m_outputUnvoiced = value;
matthiasm@0 222 }
matthiasm@70 223 if (identifier == "precisetime")
matthiasm@70 224 {
matthiasm@70 225 m_preciseTime = value;
matthiasm@70 226 }
matthiasm@72 227 if (identifier == "lowampsuppression")
matthiasm@72 228 {
matthiasm@72 229 m_lowAmp = value;
matthiasm@72 230 }
matthiasm@0 231 }
matthiasm@0 232
matthiasm@36 233 PYinVamp::ProgramList
matthiasm@36 234 PYinVamp::getPrograms() const
matthiasm@0 235 {
matthiasm@0 236 ProgramList list;
matthiasm@0 237 return list;
matthiasm@0 238 }
matthiasm@0 239
matthiasm@0 240 string
matthiasm@36 241 PYinVamp::getCurrentProgram() const
matthiasm@0 242 {
matthiasm@0 243 return ""; // no programs
matthiasm@0 244 }
matthiasm@0 245
matthiasm@0 246 void
matthiasm@36 247 PYinVamp::selectProgram(string name)
matthiasm@0 248 {
matthiasm@0 249 }
matthiasm@0 250
matthiasm@36 251 PYinVamp::OutputList
matthiasm@36 252 PYinVamp::getOutputDescriptors() const
matthiasm@0 253 {
matthiasm@0 254 OutputList outputs;
matthiasm@0 255
matthiasm@0 256 OutputDescriptor d;
matthiasm@0 257
matthiasm@0 258 int outputNumber = 0;
matthiasm@0 259
matthiasm@0 260 d.identifier = "f0candidates";
matthiasm@0 261 d.name = "F0 Candidates";
matthiasm@0 262 d.description = "Estimated fundamental frequency candidates.";
matthiasm@0 263 d.unit = "Hz";
matthiasm@0 264 d.hasFixedBinCount = false;
matthiasm@0 265 // d.binCount = 1;
matthiasm@0 266 d.hasKnownExtents = true;
matthiasm@0 267 d.minValue = m_fmin;
matthiasm@0 268 d.maxValue = 500;
matthiasm@0 269 d.isQuantized = false;
matthiasm@0 270 d.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 271 d.sampleRate = (m_inputSampleRate / m_stepSize);
matthiasm@0 272 d.hasDuration = false;
matthiasm@0 273 outputs.push_back(d);
matthiasm@0 274 m_oF0Candidates = outputNumber++;
matthiasm@0 275
matthiasm@0 276 d.identifier = "f0probs";
matthiasm@0 277 d.name = "Candidate Probabilities";
matthiasm@0 278 d.description = "Probabilities of estimated fundamental frequency candidates.";
matthiasm@0 279 d.unit = "";
matthiasm@0 280 d.hasFixedBinCount = false;
matthiasm@0 281 // d.binCount = 1;
matthiasm@0 282 d.hasKnownExtents = true;
matthiasm@0 283 d.minValue = 0;
matthiasm@0 284 d.maxValue = 1;
matthiasm@0 285 d.isQuantized = false;
matthiasm@0 286 d.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 287 d.sampleRate = (m_inputSampleRate / m_stepSize);
matthiasm@0 288 d.hasDuration = false;
matthiasm@0 289 outputs.push_back(d);
matthiasm@0 290 m_oF0Probs = outputNumber++;
matthiasm@0 291
matthiasm@0 292 d.identifier = "voicedprob";
matthiasm@0 293 d.name = "Voiced Probability";
matthiasm@0 294 d.description = "Probability that the signal is voiced according to Probabilistic Yin.";
matthiasm@0 295 d.unit = "";
matthiasm@0 296 d.hasFixedBinCount = true;
matthiasm@0 297 d.binCount = 1;
matthiasm@0 298 d.hasKnownExtents = true;
matthiasm@0 299 d.minValue = 0;
matthiasm@0 300 d.maxValue = 1;
matthiasm@0 301 d.isQuantized = false;
matthiasm@0 302 d.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 303 d.sampleRate = (m_inputSampleRate / m_stepSize);
matthiasm@0 304 d.hasDuration = false;
matthiasm@0 305 outputs.push_back(d);
matthiasm@0 306 m_oVoicedProb = outputNumber++;
matthiasm@0 307
matthiasm@0 308 d.identifier = "candidatesalience";
matthiasm@0 309 d.name = "Candidate Salience";
matthiasm@0 310 d.description = "Candidate Salience";
matthiasm@0 311 d.hasFixedBinCount = true;
matthiasm@0 312 d.binCount = m_blockSize / 2;
matthiasm@0 313 d.hasKnownExtents = true;
matthiasm@0 314 d.minValue = 0;
matthiasm@0 315 d.maxValue = 1;
matthiasm@0 316 d.isQuantized = false;
matthiasm@0 317 d.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 318 d.sampleRate = (m_inputSampleRate / m_stepSize);
matthiasm@0 319 d.hasDuration = false;
matthiasm@0 320 outputs.push_back(d);
matthiasm@0 321 m_oCandidateSalience = outputNumber++;
matthiasm@0 322
matthiasm@0 323 d.identifier = "smoothedpitchtrack";
matthiasm@0 324 d.name = "Smoothed Pitch Track";
matthiasm@0 325 d.description = ".";
matthiasm@0 326 d.unit = "Hz";
matthiasm@0 327 d.hasFixedBinCount = true;
matthiasm@0 328 d.binCount = 1;
matthiasm@0 329 d.hasKnownExtents = false;
matthiasm@0 330 // d.minValue = 0;
matthiasm@0 331 // d.maxValue = 1;
matthiasm@0 332 d.isQuantized = false;
matthiasm@0 333 d.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 334 d.sampleRate = (m_inputSampleRate / m_stepSize);
matthiasm@0 335 d.hasDuration = false;
matthiasm@0 336 outputs.push_back(d);
matthiasm@0 337 m_oSmoothedPitchTrack = outputNumber++;
matthiasm@0 338
matthiasm@0 339 d.identifier = "notes";
matthiasm@0 340 d.name = "Notes";
matthiasm@0 341 d.description = "Derived fixed-pitch note frequencies";
matthiasm@0 342 // d.unit = "MIDI unit";
matthiasm@0 343 d.unit = "Hz";
matthiasm@0 344 d.hasFixedBinCount = true;
matthiasm@0 345 d.binCount = 1;
matthiasm@0 346 d.hasKnownExtents = false;
matthiasm@0 347 d.isQuantized = false;
matthiasm@0 348 d.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@0 349 d.sampleRate = (m_inputSampleRate / m_stepSize);
matthiasm@0 350 d.hasDuration = true;
matthiasm@0 351 outputs.push_back(d);
matthiasm@0 352 m_oNotes = outputNumber++;
matthiasm@0 353
matthiasm@0 354 return outputs;
matthiasm@0 355 }
matthiasm@0 356
matthiasm@0 357 bool
matthiasm@36 358 PYinVamp::initialise(size_t channels, size_t stepSize, size_t blockSize)
matthiasm@0 359 {
matthiasm@0 360 if (channels < getMinChannelCount() ||
matthiasm@0 361 channels > getMaxChannelCount()) return false;
matthiasm@0 362
Chris@9 363 /*
matthiasm@36 364 std::cerr << "PYinVamp::initialise: channels = " << channels
matthiasm@0 365 << ", stepSize = " << stepSize << ", blockSize = " << blockSize
matthiasm@0 366 << std::endl;
Chris@9 367 */
matthiasm@0 368 m_channels = channels;
matthiasm@0 369 m_stepSize = stepSize;
matthiasm@0 370 m_blockSize = blockSize;
matthiasm@0 371
matthiasm@0 372 reset();
matthiasm@0 373
matthiasm@0 374 return true;
matthiasm@0 375 }
matthiasm@0 376
matthiasm@0 377 void
matthiasm@36 378 PYinVamp::reset()
matthiasm@0 379 {
matthiasm@0 380 m_yin.setThresholdDistr(m_threshDistr);
matthiasm@0 381 m_yin.setFrameSize(m_blockSize);
matthiasm@70 382 m_yin.setFast(!m_preciseTime);
matthiasm@0 383
matthiasm@0 384 m_pitchProb.clear();
matthiasm@0 385 m_timestamp.clear();
Chris@9 386 /*
matthiasm@36 387 std::cerr << "PYinVamp::reset"
matthiasm@0 388 << ", blockSize = " << m_blockSize
matthiasm@0 389 << std::endl;
Chris@9 390 */
matthiasm@0 391 }
matthiasm@0 392
matthiasm@36 393 PYinVamp::FeatureSet
matthiasm@36 394 PYinVamp::process(const float *const *inputBuffers, RealTime timestamp)
matthiasm@0 395 {
matthiasm@77 396 int offset = m_preciseTime == 1.0 ? m_blockSize/2 : m_blockSize/4;
matthiasm@77 397 timestamp = timestamp + Vamp::RealTime::frame2RealTime(offset, lrintf(m_inputSampleRate));
matthiasm@77 398
matthiasm@0 399 FeatureSet fs;
matthiasm@0 400
matthiasm@46 401 float rms = 0;
matthiasm@46 402
matthiasm@0 403 double *dInputBuffers = new double[m_blockSize];
matthiasm@46 404 for (size_t i = 0; i < m_blockSize; ++i) {
matthiasm@46 405 dInputBuffers[i] = inputBuffers[0][i];
matthiasm@46 406 rms += inputBuffers[0][i] * inputBuffers[0][i];
matthiasm@46 407 }
matthiasm@46 408 rms /= m_blockSize;
matthiasm@46 409 rms = sqrt(rms);
matthiasm@72 410 // float m_lowAmp = 0.5;
matthiasm@72 411 bool isLowAmplitude = (rms < m_lowAmp);
matthiasm@72 412 float factor = ((rms+0.01*m_lowAmp)/(1.01*m_lowAmp));
matthiasm@65 413 // std::cerr << rms << " " << factor << std::endl;
matthiasm@0 414
matthiasm@0 415 Yin::YinOutput yo = m_yin.processProbabilisticYin(dInputBuffers);
matthiasm@27 416 delete [] dInputBuffers;
matthiasm@27 417
matthiasm@27 418 // First, get the things out of the way that we don't want to output
matthiasm@27 419 // immediately, but instead save for later.
matthiasm@27 420 vector<pair<double, double> > tempPitchProb;
matthiasm@27 421 for (size_t iCandidate = 0; iCandidate < yo.freqProb.size(); ++iCandidate)
matthiasm@27 422 {
matthiasm@27 423 double tempPitch = 12 * std::log(yo.freqProb[iCandidate].first/440)/std::log(2.) + 69;
matthiasm@50 424 if (!isLowAmplitude)
matthiasm@46 425 tempPitchProb.push_back(pair<double, double>
matthiasm@46 426 (tempPitch, yo.freqProb[iCandidate].second));
matthiasm@65 427 else {
matthiasm@46 428 tempPitchProb.push_back(pair<double, double>
matthiasm@65 429 (tempPitch, yo.freqProb[iCandidate].second*factor));
matthiasm@65 430 }
matthiasm@27 431 }
matthiasm@27 432 m_pitchProb.push_back(tempPitchProb);
matthiasm@27 433 m_timestamp.push_back(timestamp);
matthiasm@27 434
matthiasm@27 435 // F0 CANDIDATES
matthiasm@0 436 Feature f;
matthiasm@0 437 f.hasTimestamp = true;
matthiasm@0 438 f.timestamp = timestamp;
matthiasm@0 439 for (size_t i = 0; i < yo.freqProb.size(); ++i)
matthiasm@0 440 {
matthiasm@0 441 f.values.push_back(yo.freqProb[i].first);
matthiasm@0 442 }
matthiasm@0 443 fs[m_oF0Candidates].push_back(f);
matthiasm@0 444
matthiasm@27 445 // VOICEDPROB
matthiasm@0 446 f.values.clear();
matthiasm@0 447 float voicedProb = 0;
matthiasm@0 448 for (size_t i = 0; i < yo.freqProb.size(); ++i)
matthiasm@0 449 {
matthiasm@0 450 f.values.push_back(yo.freqProb[i].second);
matthiasm@0 451 voicedProb += yo.freqProb[i].second;
matthiasm@0 452 }
matthiasm@0 453 fs[m_oF0Probs].push_back(f);
matthiasm@0 454
matthiasm@0 455 f.values.push_back(voicedProb);
matthiasm@0 456 fs[m_oVoicedProb].push_back(f);
matthiasm@0 457
matthiasm@27 458 // SALIENCE -- maybe this should eventually disappear
matthiasm@0 459 f.values.clear();
matthiasm@0 460 float salienceSum = 0;
matthiasm@0 461 for (size_t iBin = 0; iBin < yo.salience.size(); ++iBin)
matthiasm@0 462 {
matthiasm@0 463 f.values.push_back(yo.salience[iBin]);
matthiasm@0 464 salienceSum += yo.salience[iBin];
matthiasm@0 465 }
matthiasm@0 466 fs[m_oCandidateSalience].push_back(f);
matthiasm@0 467
matthiasm@0 468 return fs;
matthiasm@0 469 }
matthiasm@0 470
matthiasm@36 471 PYinVamp::FeatureSet
matthiasm@36 472 PYinVamp::getRemainingFeatures()
matthiasm@0 473 {
matthiasm@0 474 FeatureSet fs;
matthiasm@0 475 Feature f;
matthiasm@0 476 f.hasTimestamp = true;
matthiasm@0 477 f.hasDuration = false;
matthiasm@0 478
Chris@4 479 if (m_pitchProb.empty()) {
Chris@4 480 return fs;
Chris@4 481 }
Chris@4 482
matthiasm@0 483 // MONO-PITCH STUFF
matthiasm@0 484 MonoPitch mp;
matthiasm@0 485 vector<float> mpOut = mp.process(m_pitchProb);
matthiasm@0 486 for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame)
matthiasm@0 487 {
matthiasm@0 488 if (mpOut[iFrame] < 0 && (m_outputUnvoiced==0)) continue;
matthiasm@0 489 f.timestamp = m_timestamp[iFrame];
matthiasm@0 490 f.values.clear();
matthiasm@0 491 if (m_outputUnvoiced == 1)
matthiasm@0 492 {
matthiasm@26 493 f.values.push_back(fabs(mpOut[iFrame]));
matthiasm@0 494 } else {
matthiasm@0 495 f.values.push_back(mpOut[iFrame]);
matthiasm@0 496 }
matthiasm@0 497
matthiasm@0 498 fs[m_oSmoothedPitchTrack].push_back(f);
matthiasm@0 499 }
matthiasm@0 500
matthiasm@1 501 // MONO-NOTE STUFF
matthiasm@67 502 std::cerr << "Mono Note Stuff" << std::endl;
matthiasm@1 503 MonoNote mn;
matthiasm@1 504 std::vector<std::vector<std::pair<double, double> > > smoothedPitch;
matthiasm@1 505 for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame) {
matthiasm@1 506 std::vector<std::pair<double, double> > temp;
matthiasm@1 507 if (mpOut[iFrame] > 0)
matthiasm@1 508 {
matthiasm@1 509 double tempPitch = 12 * std::log(mpOut[iFrame]/440)/std::log(2.) + 69;
matthiasm@1 510 temp.push_back(std::pair<double,double>(tempPitch, .9));
matthiasm@1 511 }
matthiasm@1 512 smoothedPitch.push_back(temp);
matthiasm@1 513 }
matthiasm@0 514 // vector<MonoNote::FrameOutput> mnOut = mn.process(m_pitchProb);
matthiasm@1 515 vector<MonoNote::FrameOutput> mnOut = mn.process(smoothedPitch);
matthiasm@1 516
matthiasm@6 517 // turning feature into a note feature
matthiasm@1 518 f.hasTimestamp = true;
matthiasm@1 519 f.hasDuration = true;
matthiasm@1 520 f.values.clear();
matthiasm@6 521
matthiasm@6 522 int onsetFrame = 0;
matthiasm@6 523 bool isVoiced = 0;
matthiasm@6 524 bool oldIsVoiced = 0;
matthiasm@6 525 size_t nFrame = m_pitchProb.size();
matthiasm@1 526
matthiasm@6 527 std::vector<float> notePitchTrack; // collects pitches for one note at a time
matthiasm@6 528 for (size_t iFrame = 0; iFrame < nFrame; ++iFrame)
matthiasm@1 529 {
matthiasm@6 530 isVoiced = mnOut[iFrame].noteState < 3 && smoothedPitch[iFrame].size() > 0;
matthiasm@6 531 if (isVoiced && iFrame != nFrame-1)
matthiasm@1 532 {
matthiasm@6 533 if (oldIsVoiced == 0) // beginning of a note
matthiasm@1 534 {
matthiasm@6 535 onsetFrame = iFrame;
matthiasm@6 536 notePitchTrack.clear();
matthiasm@1 537 }
matthiasm@6 538 float pitch = smoothedPitch[iFrame][0].first;
matthiasm@6 539 notePitchTrack.push_back(pitch); // add to the note's pitch track
matthiasm@6 540 } else { // not currently voiced
matthiasm@6 541 if (oldIsVoiced == 1 && notePitchTrack.size() > 4) // end of the note
matthiasm@6 542 {
matthiasm@1 543 std::sort(notePitchTrack.begin(), notePitchTrack.end());
matthiasm@6 544 float medianPitch = notePitchTrack[notePitchTrack.size()/2];
matthiasm@6 545 float medianFreq = std::pow(2,(medianPitch - 69) / 12) * 440;
matthiasm@6 546 f.values.clear();
matthiasm@6 547 f.values.push_back(medianFreq);
matthiasm@6 548 f.timestamp = m_timestamp[onsetFrame];
matthiasm@6 549 f.duration = m_timestamp[iFrame] - m_timestamp[onsetFrame];
matthiasm@5 550 fs[m_oNotes].push_back(f);
matthiasm@1 551 }
matthiasm@1 552 }
matthiasm@6 553 oldIsVoiced = isVoiced;
matthiasm@1 554 }
matthiasm@0 555 return fs;
matthiasm@0 556 }