annotate LocalCandidatePYIN.cpp @ 44:e5ccda2c06d9 tony

fixed some issues (in a very slightly hacky way) -- need to revisit.
author matthiasm
date Tue, 04 Feb 2014 22:31:26 +0000
parents ae21806fe84b
children 68812db649e6
rev   line source
matthiasm@32 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
matthiasm@32 2
matthiasm@32 3 /*
matthiasm@32 4 pYIN - A fundamental frequency estimator for monophonic audio
matthiasm@32 5 Centre for Digital Music, Queen Mary, University of London.
matthiasm@32 6
matthiasm@32 7 This program is free software; you can redistribute it and/or
matthiasm@32 8 modify it under the terms of the GNU General Public License as
matthiasm@32 9 published by the Free Software Foundation; either version 2 of the
matthiasm@32 10 License, or (at your option) any later version. See the file
matthiasm@32 11 COLocalCandidatePYING included with this distribution for more information.
matthiasm@32 12 */
matthiasm@32 13
matthiasm@32 14 #include "LocalCandidatePYIN.h"
matthiasm@32 15 #include "MonoPitch.h"
matthiasm@32 16 #include "YinUtil.h"
matthiasm@32 17
matthiasm@32 18 #include "vamp-sdk/FFT.h"
matthiasm@32 19
matthiasm@32 20 #include <vector>
matthiasm@32 21 #include <algorithm>
matthiasm@32 22
matthiasm@32 23 #include <cstdio>
matthiasm@32 24 #include <sstream>
matthiasm@32 25 // #include <iostream>
matthiasm@32 26 #include <cmath>
matthiasm@32 27 #include <complex>
Chris@39 28 #include <map>
matthiasm@32 29
matthiasm@32 30 using std::string;
matthiasm@32 31 using std::vector;
Chris@39 32 using std::map;
matthiasm@32 33 using Vamp::RealTime;
matthiasm@32 34
matthiasm@32 35
matthiasm@32 36 LocalCandidatePYIN::LocalCandidatePYIN(float inputSampleRate) :
matthiasm@32 37 Plugin(inputSampleRate),
matthiasm@32 38 m_channels(0),
matthiasm@32 39 m_stepSize(256),
matthiasm@32 40 m_blockSize(2048),
matthiasm@32 41 m_fmin(40),
matthiasm@32 42 m_fmax(700),
matthiasm@32 43 m_yin(2048, inputSampleRate, 0.0),
matthiasm@32 44 m_oPitchTrackCandidates(0),
matthiasm@32 45 m_threshDistr(2.0f),
matthiasm@32 46 m_outputUnvoiced(0.0f),
matthiasm@32 47 m_pitchProb(0),
matthiasm@32 48 m_timestamp(0),
matthiasm@32 49 m_nCandidate(20)
matthiasm@32 50 {
matthiasm@32 51 }
matthiasm@32 52
matthiasm@32 53 LocalCandidatePYIN::~LocalCandidatePYIN()
matthiasm@32 54 {
matthiasm@32 55 }
matthiasm@32 56
matthiasm@32 57 string
matthiasm@32 58 LocalCandidatePYIN::getIdentifier() const
matthiasm@32 59 {
matthiasm@32 60 return "localcandidatepyin";
matthiasm@32 61 }
matthiasm@32 62
matthiasm@32 63 string
matthiasm@32 64 LocalCandidatePYIN::getName() const
matthiasm@32 65 {
matthiasm@32 66 return "Local Candidate PYIN";
matthiasm@32 67 }
matthiasm@32 68
matthiasm@32 69 string
matthiasm@32 70 LocalCandidatePYIN::getDescription() const
matthiasm@32 71 {
matthiasm@32 72 return "Monophonic pitch and note tracking based on a probabilistic Yin extension.";
matthiasm@32 73 }
matthiasm@32 74
matthiasm@32 75 string
matthiasm@32 76 LocalCandidatePYIN::getMaker() const
matthiasm@32 77 {
matthiasm@32 78 return "Matthias Mauch";
matthiasm@32 79 }
matthiasm@32 80
matthiasm@32 81 int
matthiasm@32 82 LocalCandidatePYIN::getPluginVersion() const
matthiasm@32 83 {
matthiasm@32 84 // Increment this each time you release a version that behaves
matthiasm@32 85 // differently from the previous one
matthiasm@32 86 return 1;
matthiasm@32 87 }
matthiasm@32 88
matthiasm@32 89 string
matthiasm@32 90 LocalCandidatePYIN::getCopyright() const
matthiasm@32 91 {
matthiasm@32 92 return "GPL";
matthiasm@32 93 }
matthiasm@32 94
matthiasm@32 95 LocalCandidatePYIN::InputDomain
matthiasm@32 96 LocalCandidatePYIN::getInputDomain() const
matthiasm@32 97 {
matthiasm@32 98 return TimeDomain;
matthiasm@32 99 }
matthiasm@32 100
matthiasm@32 101 size_t
matthiasm@32 102 LocalCandidatePYIN::getPreferredBlockSize() const
matthiasm@32 103 {
matthiasm@32 104 return 2048;
matthiasm@32 105 }
matthiasm@32 106
matthiasm@32 107 size_t
matthiasm@32 108 LocalCandidatePYIN::getPreferredStepSize() const
matthiasm@32 109 {
matthiasm@32 110 return 256;
matthiasm@32 111 }
matthiasm@32 112
matthiasm@32 113 size_t
matthiasm@32 114 LocalCandidatePYIN::getMinChannelCount() const
matthiasm@32 115 {
matthiasm@32 116 return 1;
matthiasm@32 117 }
matthiasm@32 118
matthiasm@32 119 size_t
matthiasm@32 120 LocalCandidatePYIN::getMaxChannelCount() const
matthiasm@32 121 {
matthiasm@32 122 return 1;
matthiasm@32 123 }
matthiasm@32 124
matthiasm@32 125 LocalCandidatePYIN::ParameterList
matthiasm@32 126 LocalCandidatePYIN::getParameterDescriptors() const
matthiasm@32 127 {
matthiasm@32 128 ParameterList list;
matthiasm@32 129
matthiasm@32 130 ParameterDescriptor d;
matthiasm@32 131
matthiasm@32 132 d.identifier = "threshdistr";
matthiasm@32 133 d.name = "Yin threshold distribution";
matthiasm@32 134 d.description = ".";
matthiasm@32 135 d.unit = "";
matthiasm@32 136 d.minValue = 0.0f;
matthiasm@32 137 d.maxValue = 7.0f;
matthiasm@32 138 d.defaultValue = 2.0f;
matthiasm@32 139 d.isQuantized = true;
matthiasm@32 140 d.quantizeStep = 1.0f;
matthiasm@32 141 d.valueNames.push_back("Uniform");
matthiasm@32 142 d.valueNames.push_back("Beta (mean 0.10)");
matthiasm@32 143 d.valueNames.push_back("Beta (mean 0.15)");
matthiasm@32 144 d.valueNames.push_back("Beta (mean 0.20)");
matthiasm@32 145 d.valueNames.push_back("Beta (mean 0.30)");
matthiasm@32 146 d.valueNames.push_back("Single Value 0.10");
matthiasm@32 147 d.valueNames.push_back("Single Value 0.15");
matthiasm@32 148 d.valueNames.push_back("Single Value 0.20");
matthiasm@32 149 list.push_back(d);
matthiasm@32 150
matthiasm@32 151 d.identifier = "outputunvoiced";
matthiasm@32 152 d.valueNames.clear();
matthiasm@32 153 d.name = "Output estimates classified as unvoiced?";
matthiasm@32 154 d.description = ".";
matthiasm@32 155 d.unit = "";
matthiasm@32 156 d.minValue = 0.0f;
matthiasm@32 157 d.maxValue = 2.0f;
matthiasm@32 158 d.defaultValue = 0.0f;
matthiasm@32 159 d.isQuantized = true;
matthiasm@32 160 d.quantizeStep = 1.0f;
matthiasm@32 161 d.valueNames.push_back("No");
matthiasm@32 162 d.valueNames.push_back("Yes");
matthiasm@32 163 d.valueNames.push_back("Yes, as negative frequencies");
matthiasm@32 164 list.push_back(d);
matthiasm@32 165
matthiasm@32 166 return list;
matthiasm@32 167 }
matthiasm@32 168
matthiasm@32 169 float
matthiasm@32 170 LocalCandidatePYIN::getParameter(string identifier) const
matthiasm@32 171 {
matthiasm@32 172 if (identifier == "threshdistr") {
matthiasm@32 173 return m_threshDistr;
matthiasm@32 174 }
matthiasm@32 175 if (identifier == "outputunvoiced") {
matthiasm@32 176 return m_outputUnvoiced;
matthiasm@32 177 }
matthiasm@32 178 return 0.f;
matthiasm@32 179 }
matthiasm@32 180
matthiasm@32 181 void
matthiasm@32 182 LocalCandidatePYIN::setParameter(string identifier, float value)
matthiasm@32 183 {
matthiasm@32 184 if (identifier == "threshdistr")
matthiasm@32 185 {
matthiasm@32 186 m_threshDistr = value;
matthiasm@32 187 }
matthiasm@32 188 if (identifier == "outputunvoiced")
matthiasm@32 189 {
matthiasm@32 190 m_outputUnvoiced = value;
matthiasm@32 191 }
matthiasm@32 192
matthiasm@32 193 }
matthiasm@32 194
matthiasm@32 195 LocalCandidatePYIN::ProgramList
matthiasm@32 196 LocalCandidatePYIN::getPrograms() const
matthiasm@32 197 {
matthiasm@32 198 ProgramList list;
matthiasm@32 199 return list;
matthiasm@32 200 }
matthiasm@32 201
matthiasm@32 202 string
matthiasm@32 203 LocalCandidatePYIN::getCurrentProgram() const
matthiasm@32 204 {
matthiasm@32 205 return ""; // no programs
matthiasm@32 206 }
matthiasm@32 207
matthiasm@32 208 void
matthiasm@32 209 LocalCandidatePYIN::selectProgram(string name)
matthiasm@32 210 {
matthiasm@32 211 }
matthiasm@32 212
matthiasm@32 213 LocalCandidatePYIN::OutputList
matthiasm@32 214 LocalCandidatePYIN::getOutputDescriptors() const
matthiasm@32 215 {
matthiasm@32 216 OutputList outputs;
matthiasm@32 217
matthiasm@32 218 OutputDescriptor d;
matthiasm@32 219
matthiasm@32 220 int outputNumber = 0;
matthiasm@32 221
matthiasm@32 222 d.identifier = "pitchtrackcandidates";
matthiasm@32 223 d.name = "Pitch track candidates";
matthiasm@32 224 d.description = "Multiple candidate pitch tracks.";
matthiasm@32 225 d.unit = "Hz";
matthiasm@32 226 d.hasFixedBinCount = false;
matthiasm@32 227 d.hasKnownExtents = true;
matthiasm@32 228 d.minValue = m_fmin;
Chris@39 229 d.maxValue = 500; //!!!???
matthiasm@32 230 d.isQuantized = false;
matthiasm@32 231 d.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@32 232 d.sampleRate = (m_inputSampleRate / m_stepSize);
matthiasm@32 233 d.hasDuration = false;
matthiasm@32 234 outputs.push_back(d);
matthiasm@32 235
matthiasm@32 236 return outputs;
matthiasm@32 237 }
matthiasm@32 238
matthiasm@32 239 bool
matthiasm@32 240 LocalCandidatePYIN::initialise(size_t channels, size_t stepSize, size_t blockSize)
matthiasm@32 241 {
matthiasm@32 242 if (channels < getMinChannelCount() ||
matthiasm@32 243 channels > getMaxChannelCount()) return false;
matthiasm@32 244
matthiasm@32 245 /*
matthiasm@32 246 std::cerr << "LocalCandidatePYIN::initialise: channels = " << channels
matthiasm@32 247 << ", stepSize = " << stepSize << ", blockSize = " << blockSize
matthiasm@32 248 << std::endl;
matthiasm@32 249 */
matthiasm@32 250 m_channels = channels;
matthiasm@32 251 m_stepSize = stepSize;
matthiasm@32 252 m_blockSize = blockSize;
matthiasm@32 253
matthiasm@32 254 reset();
matthiasm@32 255
matthiasm@32 256 return true;
matthiasm@32 257 }
matthiasm@32 258
matthiasm@32 259 void
matthiasm@32 260 LocalCandidatePYIN::reset()
matthiasm@32 261 {
matthiasm@32 262 m_yin.setThresholdDistr(m_threshDistr);
matthiasm@32 263 m_yin.setFrameSize(m_blockSize);
matthiasm@32 264
matthiasm@32 265 m_pitchProb.clear();
matthiasm@32 266 for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate)
matthiasm@32 267 {
matthiasm@32 268 m_pitchProb.push_back(vector<vector<pair<double, double> > >());
matthiasm@32 269 }
matthiasm@32 270 m_timestamp.clear();
matthiasm@32 271 /*
matthiasm@32 272 std::cerr << "LocalCandidatePYIN::reset"
matthiasm@32 273 << ", blockSize = " << m_blockSize
matthiasm@32 274 << std::endl;
matthiasm@32 275 */
matthiasm@32 276 }
matthiasm@32 277
matthiasm@32 278 LocalCandidatePYIN::FeatureSet
matthiasm@32 279 LocalCandidatePYIN::process(const float *const *inputBuffers, RealTime timestamp)
matthiasm@32 280 {
matthiasm@32 281 timestamp = timestamp + Vamp::RealTime::frame2RealTime(m_blockSize/4, lrintf(m_inputSampleRate));
matthiasm@32 282
matthiasm@32 283 double *dInputBuffers = new double[m_blockSize];
matthiasm@32 284 for (size_t i = 0; i < m_blockSize; ++i) dInputBuffers[i] = inputBuffers[0][i];
matthiasm@32 285
matthiasm@32 286 size_t yinBufferSize = m_blockSize/2;
matthiasm@32 287 double* yinBuffer = new double[yinBufferSize];
matthiasm@32 288 YinUtil::fastDifference(dInputBuffers, yinBuffer, yinBufferSize);
matthiasm@32 289
matthiasm@32 290 delete [] dInputBuffers;
matthiasm@32 291
matthiasm@32 292 YinUtil::cumulativeDifference(yinBuffer, yinBufferSize);
matthiasm@32 293
matthiasm@32 294 for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate)
matthiasm@32 295 {
matthiasm@32 296 float minFrequency = m_fmin * std::pow(2,(3.0*iCandidate)/12);
matthiasm@32 297 float maxFrequency = m_fmin * std::pow(2,(3.0*iCandidate+9)/12);
matthiasm@34 298 vector<double> peakProbability = YinUtil::yinProb(yinBuffer,
matthiasm@34 299 m_threshDistr,
matthiasm@34 300 yinBufferSize,
matthiasm@34 301 m_inputSampleRate/maxFrequency,
matthiasm@34 302 m_inputSampleRate/minFrequency);
matthiasm@32 303
matthiasm@32 304 vector<pair<double, double> > tempPitchProb;
matthiasm@32 305 for (size_t iBuf = 0; iBuf < yinBufferSize; ++iBuf)
matthiasm@32 306 {
matthiasm@32 307 if (peakProbability[iBuf] > 0)
matthiasm@32 308 {
matthiasm@32 309 double currentF0 =
matthiasm@32 310 m_inputSampleRate * (1.0 /
matthiasm@32 311 YinUtil::parabolicInterpolation(yinBuffer, iBuf, yinBufferSize));
matthiasm@32 312 double tempPitch = 12 * std::log(currentF0/440)/std::log(2.) + 69;
matthiasm@32 313 tempPitchProb.push_back(pair<double, double>(tempPitch, peakProbability[iBuf]));
matthiasm@32 314 }
matthiasm@32 315 }
matthiasm@32 316 m_pitchProb[iCandidate].push_back(tempPitchProb);
matthiasm@32 317 }
matthiasm@32 318 m_timestamp.push_back(timestamp);
matthiasm@32 319
Chris@39 320 return FeatureSet();
matthiasm@32 321 }
matthiasm@32 322
matthiasm@32 323 LocalCandidatePYIN::FeatureSet
matthiasm@32 324 LocalCandidatePYIN::getRemainingFeatures()
matthiasm@32 325 {
Chris@39 326 // timestamp -> candidate number -> value
Chris@39 327 map<RealTime, map<int, float> > featureValues;
matthiasm@32 328
matthiasm@37 329 // std::cerr << "in remaining features" << std::endl;
matthiasm@32 330
matthiasm@32 331 if (m_pitchProb.empty()) {
Chris@39 332 return FeatureSet();
matthiasm@32 333 }
matthiasm@32 334
matthiasm@32 335 // MONO-PITCH STUFF
matthiasm@32 336 MonoPitch mp;
matthiasm@32 337 size_t nFrame = m_timestamp.size();
matthiasm@32 338 vector<vector<float> > pitchTracks;
matthiasm@32 339 vector<float> freqSum = vector<float>(m_nCandidate);
matthiasm@32 340 vector<float> freqNumber = vector<float>(m_nCandidate);
matthiasm@32 341 vector<float> freqMean = vector<float>(m_nCandidate);
matthiasm@44 342
matthiasm@32 343 for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate)
matthiasm@32 344 {
matthiasm@32 345 pitchTracks.push_back(vector<float>(nFrame));
matthiasm@32 346 vector<float> mpOut = mp.process(m_pitchProb[iCandidate]);
matthiasm@44 347 float prevFreq = 0;
matthiasm@32 348 for (size_t iFrame = 0; iFrame < nFrame; ++iFrame)
matthiasm@32 349 {
matthiasm@32 350 if (mpOut[iFrame] > 0) {
matthiasm@44 351 if (prevFreq>0 && fabs(log2(mpOut[iFrame]/prevFreq)) > 0.1) {
matthiasm@44 352 for (size_t jFrame = iFrame; jFrame != -1; --jFrame) {
matthiasm@44 353 // hack: setting all freqs to 0 -- will be eliminated later
matthiasm@44 354 pitchTracks[iCandidate][jFrame] = 0;
matthiasm@44 355 }
matthiasm@44 356 break;
matthiasm@44 357 }
matthiasm@32 358 pitchTracks[iCandidate][iFrame] = mpOut[iFrame];
matthiasm@32 359 freqSum[iCandidate] += mpOut[iFrame];
matthiasm@32 360 freqNumber[iCandidate]++;
matthiasm@44 361 prevFreq = mpOut[iFrame];
matthiasm@32 362 }
matthiasm@32 363 }
matthiasm@32 364 freqMean[iCandidate] = freqSum[iCandidate]*1.0/freqNumber[iCandidate];
matthiasm@32 365 }
matthiasm@32 366
matthiasm@37 367 // find near duplicate pitch tracks
matthiasm@34 368 vector<size_t> duplicates;
matthiasm@34 369 for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate) {
matthiasm@34 370 for (size_t jCandidate = iCandidate+1; jCandidate < m_nCandidate; ++jCandidate) {
matthiasm@34 371 size_t countEqual = 0;
matthiasm@34 372 for (size_t iFrame = 0; iFrame < nFrame; ++iFrame)
matthiasm@34 373 {
matthiasm@34 374 if (fabs(pitchTracks[iCandidate][iFrame]/pitchTracks[jCandidate][iFrame]-1)<0.01)
matthiasm@34 375 countEqual++;
matthiasm@34 376 }
matthiasm@34 377 if (countEqual * 1.0 / nFrame > 0.8) {
matthiasm@34 378 if (freqNumber[iCandidate] > freqNumber[jCandidate]) {
matthiasm@34 379 duplicates.push_back(jCandidate);
matthiasm@34 380 } else {
matthiasm@34 381 duplicates.push_back(iCandidate);
matthiasm@34 382 }
matthiasm@34 383 }
matthiasm@34 384 }
matthiasm@34 385 }
matthiasm@34 386
matthiasm@37 387 // now find non-duplicate pitch tracks
Chris@39 388 map<int, int> candidateActuals;
Chris@39 389 map<int, std::string> candidateLabels;
Chris@39 390
matthiasm@32 391 int actualCandidateNumber = 0;
matthiasm@32 392 for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate) {
matthiasm@34 393 bool isDuplicate = false;
matthiasm@34 394 for (size_t i = 0; i < duplicates.size(); ++i) {
matthiasm@37 395 // std::cerr << duplicates[i] << std::endl;
matthiasm@34 396 if (duplicates[i] == iCandidate) {
matthiasm@34 397 isDuplicate = true;
matthiasm@34 398 break;
matthiasm@34 399 }
matthiasm@34 400 }
matthiasm@34 401 if (!isDuplicate && freqNumber[iCandidate] > 0.8*nFrame)
matthiasm@32 402 {
matthiasm@32 403 std::ostringstream convert;
matthiasm@32 404 convert << actualCandidateNumber++;
Chris@39 405 candidateLabels[iCandidate] = convert.str();
Chris@39 406 candidateActuals[iCandidate] = actualCandidateNumber;
matthiasm@32 407 std::cerr << freqNumber[iCandidate] << " " << freqMean[iCandidate] << std::endl;
matthiasm@32 408 for (size_t iFrame = 0; iFrame < nFrame; ++iFrame)
matthiasm@32 409 {
matthiasm@32 410 if (pitchTracks[iCandidate][iFrame] > 0)
matthiasm@32 411 {
Chris@39 412 featureValues[m_timestamp[iFrame]][iCandidate] =
Chris@39 413 pitchTracks[iCandidate][iFrame];
matthiasm@32 414 }
matthiasm@32 415 }
matthiasm@32 416 }
matthiasm@43 417 // fs[m_oPitchTrackCandidates].push_back(f);
matthiasm@32 418 }
matthiasm@32 419
Chris@39 420 // adapt our features so as to return a stack of candidate values
Chris@39 421 // per frame
Chris@39 422
Chris@39 423 FeatureSet fs;
Chris@39 424
Chris@39 425 for (map<RealTime, map<int, float> >::const_iterator i =
Chris@39 426 featureValues.begin(); i != featureValues.end(); ++i) {
Chris@39 427 Feature f;
Chris@39 428 f.hasTimestamp = true;
Chris@39 429 f.timestamp = i->first;
Chris@39 430 int nextCandidate = candidateActuals.begin()->second;
Chris@39 431 for (map<int, float>::const_iterator j =
Chris@39 432 i->second.begin(); j != i->second.end(); ++j) {
Chris@39 433 while (candidateActuals[j->first] > nextCandidate) {
Chris@39 434 f.values.push_back(0);
Chris@39 435 ++nextCandidate;
Chris@39 436 }
Chris@39 437 f.values.push_back(j->second);
Chris@39 438 nextCandidate = j->first + 1;
Chris@39 439 }
Chris@39 440 //!!! can't use labels?
Chris@39 441 fs[0].push_back(f);
Chris@39 442 }
matthiasm@32 443
matthiasm@32 444 return fs;
matthiasm@32 445 }