annotate CepstralPitchTracker.cpp @ 35:2f5b169e4a3b

Integrate NoteHypothesis class, build fixes
author Chris Cannam
date Thu, 19 Jul 2012 13:46:45 +0100
parents 2c175adf8736
children 822cf7b8e070
rev   line source
Chris@3 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@3 2 /*
Chris@31 3 This file is Copyright (c) 2012 Chris Cannam
Chris@31 4
Chris@3 5 Permission is hereby granted, free of charge, to any person
Chris@3 6 obtaining a copy of this software and associated documentation
Chris@3 7 files (the "Software"), to deal in the Software without
Chris@3 8 restriction, including without limitation the rights to use, copy,
Chris@3 9 modify, merge, publish, distribute, sublicense, and/or sell copies
Chris@3 10 of the Software, and to permit persons to whom the Software is
Chris@3 11 furnished to do so, subject to the following conditions:
Chris@3 12
Chris@3 13 The above copyright notice and this permission notice shall be
Chris@3 14 included in all copies or substantial portions of the Software.
Chris@3 15
Chris@3 16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
Chris@3 17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
Chris@3 18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
Chris@3 19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
Chris@3 20 ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
Chris@3 21 CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
Chris@3 22 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Chris@3 23 */
Chris@3 24
Chris@31 25 #include "CepstralPitchTracker.h"
Chris@3 26
Chris@26 27 #include "vamp-sdk/FFT.h"
Chris@26 28
Chris@3 29 #include <vector>
Chris@3 30 #include <algorithm>
Chris@3 31
Chris@3 32 #include <cstdio>
Chris@3 33 #include <cmath>
Chris@3 34 #include <complex>
Chris@3 35
Chris@3 36 using std::string;
Chris@7 37 using std::vector;
Chris@16 38 using Vamp::RealTime;
Chris@7 39
Chris@16 40
Chris@31 41 CepstralPitchTracker::CepstralPitchTracker(float inputSampleRate) :
Chris@3 42 Plugin(inputSampleRate),
Chris@3 43 m_channels(0),
Chris@3 44 m_stepSize(256),
Chris@3 45 m_blockSize(1024),
Chris@3 46 m_fmin(50),
Chris@25 47 m_fmax(900),
Chris@18 48 m_vflen(1),
Chris@3 49 m_binFrom(0),
Chris@3 50 m_binTo(0),
Chris@15 51 m_bins(0)
Chris@3 52 {
Chris@3 53 }
Chris@3 54
Chris@31 55 CepstralPitchTracker::~CepstralPitchTracker()
Chris@3 56 {
Chris@3 57 }
Chris@3 58
Chris@3 59 string
Chris@31 60 CepstralPitchTracker::getIdentifier() const
Chris@3 61 {
Chris@3 62 return "cepstrum-pitch";
Chris@3 63 }
Chris@3 64
Chris@3 65 string
Chris@31 66 CepstralPitchTracker::getName() const
Chris@3 67 {
Chris@3 68 return "Cepstrum Pitch Tracker";
Chris@3 69 }
Chris@3 70
Chris@3 71 string
Chris@31 72 CepstralPitchTracker::getDescription() const
Chris@3 73 {
Chris@3 74 return "Estimate f0 of monophonic material using a cepstrum method.";
Chris@3 75 }
Chris@3 76
Chris@3 77 string
Chris@31 78 CepstralPitchTracker::getMaker() const
Chris@3 79 {
Chris@3 80 return "Chris Cannam";
Chris@3 81 }
Chris@3 82
Chris@3 83 int
Chris@31 84 CepstralPitchTracker::getPluginVersion() const
Chris@3 85 {
Chris@3 86 // Increment this each time you release a version that behaves
Chris@3 87 // differently from the previous one
Chris@3 88 return 1;
Chris@3 89 }
Chris@3 90
Chris@3 91 string
Chris@31 92 CepstralPitchTracker::getCopyright() const
Chris@3 93 {
Chris@3 94 return "Freely redistributable (BSD license)";
Chris@3 95 }
Chris@3 96
Chris@31 97 CepstralPitchTracker::InputDomain
Chris@31 98 CepstralPitchTracker::getInputDomain() const
Chris@3 99 {
Chris@3 100 return FrequencyDomain;
Chris@3 101 }
Chris@3 102
Chris@3 103 size_t
Chris@31 104 CepstralPitchTracker::getPreferredBlockSize() const
Chris@3 105 {
Chris@3 106 return 1024;
Chris@3 107 }
Chris@3 108
Chris@3 109 size_t
Chris@31 110 CepstralPitchTracker::getPreferredStepSize() const
Chris@3 111 {
Chris@3 112 return 256;
Chris@3 113 }
Chris@3 114
Chris@3 115 size_t
Chris@31 116 CepstralPitchTracker::getMinChannelCount() const
Chris@3 117 {
Chris@3 118 return 1;
Chris@3 119 }
Chris@3 120
Chris@3 121 size_t
Chris@31 122 CepstralPitchTracker::getMaxChannelCount() const
Chris@3 123 {
Chris@3 124 return 1;
Chris@3 125 }
Chris@3 126
Chris@31 127 CepstralPitchTracker::ParameterList
Chris@31 128 CepstralPitchTracker::getParameterDescriptors() const
Chris@3 129 {
Chris@3 130 ParameterList list;
Chris@3 131 return list;
Chris@3 132 }
Chris@3 133
Chris@3 134 float
Chris@31 135 CepstralPitchTracker::getParameter(string identifier) const
Chris@3 136 {
Chris@3 137 return 0.f;
Chris@3 138 }
Chris@3 139
Chris@3 140 void
Chris@31 141 CepstralPitchTracker::setParameter(string identifier, float value)
Chris@3 142 {
Chris@3 143 }
Chris@3 144
Chris@31 145 CepstralPitchTracker::ProgramList
Chris@31 146 CepstralPitchTracker::getPrograms() const
Chris@3 147 {
Chris@3 148 ProgramList list;
Chris@3 149 return list;
Chris@3 150 }
Chris@3 151
Chris@3 152 string
Chris@31 153 CepstralPitchTracker::getCurrentProgram() const
Chris@3 154 {
Chris@3 155 return ""; // no programs
Chris@3 156 }
Chris@3 157
Chris@3 158 void
Chris@31 159 CepstralPitchTracker::selectProgram(string name)
Chris@3 160 {
Chris@3 161 }
Chris@3 162
Chris@31 163 CepstralPitchTracker::OutputList
Chris@31 164 CepstralPitchTracker::getOutputDescriptors() const
Chris@3 165 {
Chris@3 166 OutputList outputs;
Chris@3 167
Chris@3 168 OutputDescriptor d;
Chris@3 169
Chris@3 170 d.identifier = "f0";
Chris@3 171 d.name = "Estimated f0";
Chris@3 172 d.description = "Estimated fundamental frequency";
Chris@3 173 d.unit = "Hz";
Chris@3 174 d.hasFixedBinCount = true;
Chris@3 175 d.binCount = 1;
Chris@3 176 d.hasKnownExtents = true;
Chris@3 177 d.minValue = m_fmin;
Chris@3 178 d.maxValue = m_fmax;
Chris@3 179 d.isQuantized = false;
Chris@3 180 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@3 181 d.sampleRate = (m_inputSampleRate / m_stepSize);
Chris@3 182 d.hasDuration = false;
Chris@3 183 outputs.push_back(d);
Chris@3 184
Chris@16 185 d.identifier = "notes";
Chris@16 186 d.name = "Notes";
Chris@16 187 d.description = "Derived fixed-pitch note frequencies";
Chris@16 188 d.unit = "Hz";
Chris@16 189 d.hasFixedBinCount = true;
Chris@16 190 d.binCount = 1;
Chris@16 191 d.hasKnownExtents = true;
Chris@16 192 d.minValue = m_fmin;
Chris@16 193 d.maxValue = m_fmax;
Chris@16 194 d.isQuantized = false;
Chris@16 195 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@16 196 d.sampleRate = (m_inputSampleRate / m_stepSize);
Chris@16 197 d.hasDuration = true;
Chris@16 198 outputs.push_back(d);
Chris@16 199
Chris@3 200 return outputs;
Chris@3 201 }
Chris@3 202
Chris@3 203 bool
Chris@31 204 CepstralPitchTracker::initialise(size_t channels, size_t stepSize, size_t blockSize)
Chris@3 205 {
Chris@3 206 if (channels < getMinChannelCount() ||
Chris@3 207 channels > getMaxChannelCount()) return false;
Chris@3 208
Chris@31 209 // std::cerr << "CepstralPitchTracker::initialise: channels = " << channels
Chris@3 210 // << ", stepSize = " << stepSize << ", blockSize = " << blockSize
Chris@3 211 // << std::endl;
Chris@3 212
Chris@3 213 m_channels = channels;
Chris@3 214 m_stepSize = stepSize;
Chris@3 215 m_blockSize = blockSize;
Chris@3 216
Chris@3 217 m_binFrom = int(m_inputSampleRate / m_fmax);
Chris@3 218 m_binTo = int(m_inputSampleRate / m_fmin);
Chris@3 219
Chris@3 220 if (m_binTo >= (int)m_blockSize / 2) {
Chris@3 221 m_binTo = m_blockSize / 2 - 1;
Chris@3 222 }
Chris@3 223
Chris@3 224 m_bins = (m_binTo - m_binFrom) + 1;
Chris@3 225
Chris@3 226 reset();
Chris@3 227
Chris@3 228 return true;
Chris@3 229 }
Chris@3 230
Chris@3 231 void
Chris@31 232 CepstralPitchTracker::reset()
Chris@3 233 {
Chris@3 234 }
Chris@3 235
Chris@3 236 void
Chris@35 237 CepstralPitchTracker::addFeaturesFrom(NoteHypothesis h, FeatureSet &fs)
Chris@30 238 {
Chris@35 239 NoteHypothesis::Estimates es = h.getAcceptedEstimates();
Chris@30 240
Chris@35 241 for (int i = 0; i < (int)es.size(); ++i) {
Chris@30 242 Feature f;
Chris@30 243 f.hasTimestamp = true;
Chris@30 244 f.timestamp = es[i].time;
Chris@30 245 f.values.push_back(es[i].freq);
Chris@30 246 fs[0].push_back(f);
Chris@30 247 }
Chris@30 248
Chris@30 249 Feature nf;
Chris@30 250 nf.hasTimestamp = true;
Chris@30 251 nf.hasDuration = true;
Chris@35 252 NoteHypothesis::Note n = h.getAveragedNote();
Chris@30 253 nf.timestamp = n.time;
Chris@30 254 nf.duration = n.duration;
Chris@30 255 nf.values.push_back(n.freq);
Chris@30 256 fs[1].push_back(nf);
Chris@30 257 }
Chris@30 258
Chris@30 259 void
Chris@31 260 CepstralPitchTracker::filter(const double *cep, double *data)
Chris@3 261 {
Chris@3 262 for (int i = 0; i < m_bins; ++i) {
Chris@5 263 double v = 0;
Chris@5 264 int n = 0;
Chris@5 265 // average according to the vertical filter length
Chris@5 266 for (int j = -m_vflen/2; j <= m_vflen/2; ++j) {
Chris@5 267 int ix = i + m_binFrom + j;
Chris@35 268 if (ix >= 0 && ix < (int)m_blockSize) {
Chris@5 269 v += cep[ix];
Chris@5 270 ++n;
Chris@5 271 }
Chris@5 272 }
Chris@15 273 data[i] = v / n;
Chris@3 274 }
Chris@6 275 }
Chris@6 276
Chris@18 277 double
Chris@31 278 CepstralPitchTracker::cubicInterpolate(const double y[4], double x)
Chris@18 279 {
Chris@18 280 double a0 = y[3] - y[2] - y[0] + y[1];
Chris@18 281 double a1 = y[0] - y[1] - a0;
Chris@18 282 double a2 = y[2] - y[0];
Chris@18 283 double a3 = y[1];
Chris@18 284 return
Chris@18 285 a0 * x * x * x +
Chris@18 286 a1 * x * x +
Chris@18 287 a2 * x +
Chris@18 288 a3;
Chris@18 289 }
Chris@18 290
Chris@18 291 double
Chris@31 292 CepstralPitchTracker::findInterpolatedPeak(const double *in, int maxbin)
Chris@18 293 {
Chris@18 294 if (maxbin < 2 || maxbin > m_bins - 3) {
Chris@18 295 return maxbin;
Chris@18 296 }
Chris@18 297
Chris@18 298 double maxval = 0.0;
Chris@18 299 double maxidx = maxbin;
Chris@18 300
Chris@18 301 const int divisions = 10;
Chris@18 302 double y[4];
Chris@18 303
Chris@18 304 y[0] = in[maxbin-1];
Chris@18 305 y[1] = in[maxbin];
Chris@18 306 y[2] = in[maxbin+1];
Chris@18 307 y[3] = in[maxbin+2];
Chris@18 308 for (int i = 0; i < divisions; ++i) {
Chris@18 309 double probe = double(i) / double(divisions);
Chris@18 310 double value = cubicInterpolate(y, probe);
Chris@18 311 if (value > maxval) {
Chris@18 312 maxval = value;
Chris@18 313 maxidx = maxbin + probe;
Chris@18 314 }
Chris@18 315 }
Chris@18 316
Chris@18 317 y[3] = y[2];
Chris@18 318 y[2] = y[1];
Chris@18 319 y[1] = y[0];
Chris@18 320 y[0] = in[maxbin-2];
Chris@18 321 for (int i = 0; i < divisions; ++i) {
Chris@18 322 double probe = double(i) / double(divisions);
Chris@18 323 double value = cubicInterpolate(y, probe);
Chris@18 324 if (value > maxval) {
Chris@18 325 maxval = value;
Chris@18 326 maxidx = maxbin - 1 + probe;
Chris@18 327 }
Chris@18 328 }
Chris@18 329
Chris@18 330 /*
Chris@18 331 std::cerr << "centre = " << maxbin << ": ["
Chris@18 332 << in[maxbin-2] << ","
Chris@18 333 << in[maxbin-1] << ","
Chris@18 334 << in[maxbin] << ","
Chris@18 335 << in[maxbin+1] << ","
Chris@18 336 << in[maxbin+2] << "] -> " << maxidx << std::endl;
Chris@18 337 */
Chris@18 338
Chris@18 339 return maxidx;
Chris@18 340 }
Chris@18 341
Chris@31 342 CepstralPitchTracker::FeatureSet
Chris@31 343 CepstralPitchTracker::process(const float *const *inputBuffers, RealTime timestamp)
Chris@3 344 {
Chris@3 345 FeatureSet fs;
Chris@3 346
Chris@3 347 int bs = m_blockSize;
Chris@3 348 int hs = m_blockSize/2 + 1;
Chris@3 349
Chris@3 350 double *rawcep = new double[bs];
Chris@3 351 double *io = new double[bs];
Chris@3 352 double *logmag = new double[bs];
Chris@3 353
Chris@4 354 // The "inverse symmetric" method. Seems to be the most reliable
Chris@3 355
Chris@25 356 double magmean = 0.0;
Chris@25 357
Chris@3 358 for (int i = 0; i < hs; ++i) {
Chris@3 359
Chris@3 360 double power =
Chris@3 361 inputBuffers[0][i*2 ] * inputBuffers[0][i*2 ] +
Chris@3 362 inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1];
Chris@3 363 double mag = sqrt(power);
Chris@25 364
Chris@25 365 magmean += mag;
Chris@25 366
Chris@3 367 double lm = log(mag + 0.00000001);
Chris@3 368
Chris@4 369 logmag[i] = lm;
Chris@4 370 if (i > 0) logmag[bs - i] = lm;
Chris@3 371 }
Chris@3 372
Chris@25 373 magmean /= hs;
Chris@25 374 double threshold = 0.1; // for magmean
Chris@25 375
Chris@26 376 Vamp::FFT::inverse(bs, logmag, 0, rawcep, io);
Chris@3 377
Chris@3 378 delete[] logmag;
Chris@3 379 delete[] io;
Chris@3 380
Chris@3 381 int n = m_bins;
Chris@3 382 double *data = new double[n];
Chris@3 383 filter(rawcep, data);
Chris@3 384 delete[] rawcep;
Chris@3 385
Chris@3 386 double maxval = 0.0;
Chris@6 387 int maxbin = -1;
Chris@3 388
Chris@3 389 for (int i = 0; i < n; ++i) {
Chris@3 390 if (data[i] > maxval) {
Chris@3 391 maxval = data[i];
Chris@3 392 maxbin = i;
Chris@3 393 }
Chris@3 394 }
Chris@3 395
Chris@15 396 if (maxbin < 0) {
Chris@15 397 delete[] data;
Chris@15 398 return fs;
Chris@15 399 }
Chris@15 400
Chris@15 401 double nextPeakVal = 0.0;
Chris@15 402 for (int i = 1; i+1 < n; ++i) {
Chris@15 403 if (data[i] > data[i-1] &&
Chris@15 404 data[i] > data[i+1] &&
Chris@15 405 i != maxbin &&
Chris@15 406 data[i] > nextPeakVal) {
Chris@15 407 nextPeakVal = data[i];
Chris@15 408 }
Chris@15 409 }
Chris@8 410
Chris@18 411 double cimax = findInterpolatedPeak(data, maxbin);
Chris@18 412 double peakfreq = m_inputSampleRate / (cimax + m_binFrom);
Chris@15 413
Chris@15 414 double confidence = 0.0;
Chris@15 415 if (nextPeakVal != 0.0) {
Chris@27 416 confidence = (maxval - nextPeakVal) * 10.0;
Chris@25 417 if (magmean < threshold) confidence = 0.0;
Chris@25 418 std::cerr << "magmean = " << magmean << ", confidence = " << confidence << std::endl;
Chris@15 419 }
Chris@15 420
Chris@35 421 NoteHypothesis::Estimate e;
Chris@8 422 e.freq = peakfreq;
Chris@8 423 e.time = timestamp;
Chris@15 424 e.confidence = confidence;
Chris@8 425
Chris@28 426 if (!m_good.accept(e)) {
Chris@13 427
Chris@11 428 int candidate = -1;
Chris@13 429 bool accepted = false;
Chris@13 430
Chris@35 431 for (int i = 0; i < (int)m_possible.size(); ++i) {
Chris@28 432 if (m_possible[i].accept(e)) {
Chris@35 433 if (m_possible[i].getState() == NoteHypothesis::Satisfied) {
Chris@28 434 accepted = true;
Chris@11 435 candidate = i;
Chris@11 436 }
Chris@11 437 break;
Chris@11 438 }
Chris@11 439 }
Chris@12 440
Chris@13 441 if (!accepted) {
Chris@35 442 NoteHypothesis h;
Chris@28 443 h.accept(e); //!!! must succeed as h is new, so perhaps there should be a ctor for this
Chris@13 444 m_possible.push_back(h);
Chris@13 445 }
Chris@13 446
Chris@35 447 if (m_good.getState() == NoteHypothesis::Expired) {
Chris@30 448 addFeaturesFrom(m_good, fs);
Chris@12 449 }
Chris@12 450
Chris@35 451 if (m_good.getState() == NoteHypothesis::Expired ||
Chris@35 452 m_good.getState() == NoteHypothesis::Rejected) {
Chris@11 453 if (candidate >= 0) {
Chris@28 454 m_good = m_possible[candidate];
Chris@11 455 } else {
Chris@35 456 m_good = NoteHypothesis();
Chris@11 457 }
Chris@11 458 }
Chris@8 459
Chris@14 460 // reap rejected/expired hypotheses from possible list
Chris@14 461 Hypotheses toReap = m_possible;
Chris@14 462 m_possible.clear();
Chris@35 463 for (int i = 0; i < (int)toReap.size(); ++i) {
Chris@35 464 NoteHypothesis h = toReap[i];
Chris@35 465 if (h.getState() != NoteHypothesis::Rejected &&
Chris@35 466 h.getState() != NoteHypothesis::Expired) {
Chris@14 467 m_possible.push_back(h);
Chris@14 468 }
Chris@14 469 }
Chris@14 470 }
Chris@14 471
Chris@3 472 delete[] data;
Chris@3 473 return fs;
Chris@3 474 }
Chris@3 475
Chris@31 476 CepstralPitchTracker::FeatureSet
Chris@31 477 CepstralPitchTracker::getRemainingFeatures()
Chris@3 478 {
Chris@3 479 FeatureSet fs;
Chris@35 480 if (m_good.getState() == NoteHypothesis::Satisfied) {
Chris@30 481 addFeaturesFrom(m_good, fs);
Chris@11 482 }
Chris@3 483 return fs;
Chris@3 484 }