annotate CepstralPitchTracker.cpp @ 47:f72a470fe4b5

Pull out mean filter, test it
author Chris Cannam
date Tue, 11 Sep 2012 16:37:47 +0100
parents 822cf7b8e070
children d84049e20c61
rev   line source
Chris@3 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@3 2 /*
Chris@31 3 This file is Copyright (c) 2012 Chris Cannam
Chris@31 4
Chris@3 5 Permission is hereby granted, free of charge, to any person
Chris@3 6 obtaining a copy of this software and associated documentation
Chris@3 7 files (the "Software"), to deal in the Software without
Chris@3 8 restriction, including without limitation the rights to use, copy,
Chris@3 9 modify, merge, publish, distribute, sublicense, and/or sell copies
Chris@3 10 of the Software, and to permit persons to whom the Software is
Chris@3 11 furnished to do so, subject to the following conditions:
Chris@3 12
Chris@3 13 The above copyright notice and this permission notice shall be
Chris@3 14 included in all copies or substantial portions of the Software.
Chris@3 15
Chris@3 16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
Chris@3 17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
Chris@3 18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
Chris@3 19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
Chris@3 20 ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
Chris@3 21 CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
Chris@3 22 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Chris@3 23 */
Chris@3 24
Chris@31 25 #include "CepstralPitchTracker.h"
Chris@47 26 #include "MeanFilter.h"
Chris@3 27
Chris@26 28 #include "vamp-sdk/FFT.h"
Chris@26 29
Chris@3 30 #include <vector>
Chris@3 31 #include <algorithm>
Chris@3 32
Chris@3 33 #include <cstdio>
Chris@3 34 #include <cmath>
Chris@3 35 #include <complex>
Chris@3 36
Chris@3 37 using std::string;
Chris@7 38 using std::vector;
Chris@16 39 using Vamp::RealTime;
Chris@7 40
Chris@16 41
Chris@31 42 CepstralPitchTracker::CepstralPitchTracker(float inputSampleRate) :
Chris@3 43 Plugin(inputSampleRate),
Chris@3 44 m_channels(0),
Chris@3 45 m_stepSize(256),
Chris@3 46 m_blockSize(1024),
Chris@3 47 m_fmin(50),
Chris@25 48 m_fmax(900),
Chris@18 49 m_vflen(1),
Chris@3 50 m_binFrom(0),
Chris@3 51 m_binTo(0),
Chris@15 52 m_bins(0)
Chris@3 53 {
Chris@3 54 }
Chris@3 55
Chris@31 56 CepstralPitchTracker::~CepstralPitchTracker()
Chris@3 57 {
Chris@3 58 }
Chris@3 59
Chris@3 60 string
Chris@31 61 CepstralPitchTracker::getIdentifier() const
Chris@3 62 {
Chris@39 63 return "cepstral-pitchtracker";
Chris@3 64 }
Chris@3 65
Chris@3 66 string
Chris@31 67 CepstralPitchTracker::getName() const
Chris@3 68 {
Chris@39 69 return "Cepstral Pitch Tracker";
Chris@3 70 }
Chris@3 71
Chris@3 72 string
Chris@31 73 CepstralPitchTracker::getDescription() const
Chris@3 74 {
Chris@3 75 return "Estimate f0 of monophonic material using a cepstrum method.";
Chris@3 76 }
Chris@3 77
Chris@3 78 string
Chris@31 79 CepstralPitchTracker::getMaker() const
Chris@3 80 {
Chris@3 81 return "Chris Cannam";
Chris@3 82 }
Chris@3 83
Chris@3 84 int
Chris@31 85 CepstralPitchTracker::getPluginVersion() const
Chris@3 86 {
Chris@3 87 // Increment this each time you release a version that behaves
Chris@3 88 // differently from the previous one
Chris@3 89 return 1;
Chris@3 90 }
Chris@3 91
Chris@3 92 string
Chris@31 93 CepstralPitchTracker::getCopyright() const
Chris@3 94 {
Chris@3 95 return "Freely redistributable (BSD license)";
Chris@3 96 }
Chris@3 97
Chris@31 98 CepstralPitchTracker::InputDomain
Chris@31 99 CepstralPitchTracker::getInputDomain() const
Chris@3 100 {
Chris@3 101 return FrequencyDomain;
Chris@3 102 }
Chris@3 103
Chris@3 104 size_t
Chris@31 105 CepstralPitchTracker::getPreferredBlockSize() const
Chris@3 106 {
Chris@3 107 return 1024;
Chris@3 108 }
Chris@3 109
Chris@3 110 size_t
Chris@31 111 CepstralPitchTracker::getPreferredStepSize() const
Chris@3 112 {
Chris@3 113 return 256;
Chris@3 114 }
Chris@3 115
Chris@3 116 size_t
Chris@31 117 CepstralPitchTracker::getMinChannelCount() const
Chris@3 118 {
Chris@3 119 return 1;
Chris@3 120 }
Chris@3 121
Chris@3 122 size_t
Chris@31 123 CepstralPitchTracker::getMaxChannelCount() const
Chris@3 124 {
Chris@3 125 return 1;
Chris@3 126 }
Chris@3 127
Chris@31 128 CepstralPitchTracker::ParameterList
Chris@31 129 CepstralPitchTracker::getParameterDescriptors() const
Chris@3 130 {
Chris@3 131 ParameterList list;
Chris@3 132 return list;
Chris@3 133 }
Chris@3 134
Chris@3 135 float
Chris@31 136 CepstralPitchTracker::getParameter(string identifier) const
Chris@3 137 {
Chris@3 138 return 0.f;
Chris@3 139 }
Chris@3 140
Chris@3 141 void
Chris@31 142 CepstralPitchTracker::setParameter(string identifier, float value)
Chris@3 143 {
Chris@3 144 }
Chris@3 145
Chris@31 146 CepstralPitchTracker::ProgramList
Chris@31 147 CepstralPitchTracker::getPrograms() const
Chris@3 148 {
Chris@3 149 ProgramList list;
Chris@3 150 return list;
Chris@3 151 }
Chris@3 152
Chris@3 153 string
Chris@31 154 CepstralPitchTracker::getCurrentProgram() const
Chris@3 155 {
Chris@3 156 return ""; // no programs
Chris@3 157 }
Chris@3 158
Chris@3 159 void
Chris@31 160 CepstralPitchTracker::selectProgram(string name)
Chris@3 161 {
Chris@3 162 }
Chris@3 163
Chris@31 164 CepstralPitchTracker::OutputList
Chris@31 165 CepstralPitchTracker::getOutputDescriptors() const
Chris@3 166 {
Chris@3 167 OutputList outputs;
Chris@3 168
Chris@3 169 OutputDescriptor d;
Chris@3 170
Chris@3 171 d.identifier = "f0";
Chris@3 172 d.name = "Estimated f0";
Chris@3 173 d.description = "Estimated fundamental frequency";
Chris@3 174 d.unit = "Hz";
Chris@3 175 d.hasFixedBinCount = true;
Chris@3 176 d.binCount = 1;
Chris@3 177 d.hasKnownExtents = true;
Chris@3 178 d.minValue = m_fmin;
Chris@3 179 d.maxValue = m_fmax;
Chris@3 180 d.isQuantized = false;
Chris@3 181 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@3 182 d.sampleRate = (m_inputSampleRate / m_stepSize);
Chris@3 183 d.hasDuration = false;
Chris@3 184 outputs.push_back(d);
Chris@3 185
Chris@16 186 d.identifier = "notes";
Chris@16 187 d.name = "Notes";
Chris@16 188 d.description = "Derived fixed-pitch note frequencies";
Chris@16 189 d.unit = "Hz";
Chris@16 190 d.hasFixedBinCount = true;
Chris@16 191 d.binCount = 1;
Chris@16 192 d.hasKnownExtents = true;
Chris@16 193 d.minValue = m_fmin;
Chris@16 194 d.maxValue = m_fmax;
Chris@16 195 d.isQuantized = false;
Chris@16 196 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@16 197 d.sampleRate = (m_inputSampleRate / m_stepSize);
Chris@16 198 d.hasDuration = true;
Chris@16 199 outputs.push_back(d);
Chris@16 200
Chris@3 201 return outputs;
Chris@3 202 }
Chris@3 203
Chris@3 204 bool
Chris@31 205 CepstralPitchTracker::initialise(size_t channels, size_t stepSize, size_t blockSize)
Chris@3 206 {
Chris@3 207 if (channels < getMinChannelCount() ||
Chris@3 208 channels > getMaxChannelCount()) return false;
Chris@3 209
Chris@31 210 // std::cerr << "CepstralPitchTracker::initialise: channels = " << channels
Chris@3 211 // << ", stepSize = " << stepSize << ", blockSize = " << blockSize
Chris@3 212 // << std::endl;
Chris@3 213
Chris@3 214 m_channels = channels;
Chris@3 215 m_stepSize = stepSize;
Chris@3 216 m_blockSize = blockSize;
Chris@3 217
Chris@3 218 m_binFrom = int(m_inputSampleRate / m_fmax);
Chris@3 219 m_binTo = int(m_inputSampleRate / m_fmin);
Chris@3 220
Chris@3 221 if (m_binTo >= (int)m_blockSize / 2) {
Chris@3 222 m_binTo = m_blockSize / 2 - 1;
Chris@3 223 }
Chris@3 224
Chris@3 225 m_bins = (m_binTo - m_binFrom) + 1;
Chris@3 226
Chris@3 227 reset();
Chris@3 228
Chris@3 229 return true;
Chris@3 230 }
Chris@3 231
Chris@3 232 void
Chris@31 233 CepstralPitchTracker::reset()
Chris@3 234 {
Chris@3 235 }
Chris@3 236
Chris@3 237 void
Chris@35 238 CepstralPitchTracker::addFeaturesFrom(NoteHypothesis h, FeatureSet &fs)
Chris@30 239 {
Chris@35 240 NoteHypothesis::Estimates es = h.getAcceptedEstimates();
Chris@30 241
Chris@35 242 for (int i = 0; i < (int)es.size(); ++i) {
Chris@30 243 Feature f;
Chris@30 244 f.hasTimestamp = true;
Chris@30 245 f.timestamp = es[i].time;
Chris@30 246 f.values.push_back(es[i].freq);
Chris@30 247 fs[0].push_back(f);
Chris@30 248 }
Chris@30 249
Chris@30 250 Feature nf;
Chris@30 251 nf.hasTimestamp = true;
Chris@30 252 nf.hasDuration = true;
Chris@35 253 NoteHypothesis::Note n = h.getAveragedNote();
Chris@30 254 nf.timestamp = n.time;
Chris@30 255 nf.duration = n.duration;
Chris@30 256 nf.values.push_back(n.freq);
Chris@30 257 fs[1].push_back(nf);
Chris@30 258 }
Chris@30 259
Chris@18 260 double
Chris@31 261 CepstralPitchTracker::cubicInterpolate(const double y[4], double x)
Chris@18 262 {
Chris@18 263 double a0 = y[3] - y[2] - y[0] + y[1];
Chris@18 264 double a1 = y[0] - y[1] - a0;
Chris@18 265 double a2 = y[2] - y[0];
Chris@18 266 double a3 = y[1];
Chris@18 267 return
Chris@18 268 a0 * x * x * x +
Chris@18 269 a1 * x * x +
Chris@18 270 a2 * x +
Chris@18 271 a3;
Chris@18 272 }
Chris@18 273
Chris@18 274 double
Chris@31 275 CepstralPitchTracker::findInterpolatedPeak(const double *in, int maxbin)
Chris@18 276 {
Chris@18 277 if (maxbin < 2 || maxbin > m_bins - 3) {
Chris@18 278 return maxbin;
Chris@18 279 }
Chris@18 280
Chris@18 281 double maxval = 0.0;
Chris@18 282 double maxidx = maxbin;
Chris@18 283
Chris@18 284 const int divisions = 10;
Chris@18 285 double y[4];
Chris@18 286
Chris@18 287 y[0] = in[maxbin-1];
Chris@18 288 y[1] = in[maxbin];
Chris@18 289 y[2] = in[maxbin+1];
Chris@18 290 y[3] = in[maxbin+2];
Chris@18 291 for (int i = 0; i < divisions; ++i) {
Chris@18 292 double probe = double(i) / double(divisions);
Chris@18 293 double value = cubicInterpolate(y, probe);
Chris@18 294 if (value > maxval) {
Chris@18 295 maxval = value;
Chris@18 296 maxidx = maxbin + probe;
Chris@18 297 }
Chris@18 298 }
Chris@18 299
Chris@18 300 y[3] = y[2];
Chris@18 301 y[2] = y[1];
Chris@18 302 y[1] = y[0];
Chris@18 303 y[0] = in[maxbin-2];
Chris@18 304 for (int i = 0; i < divisions; ++i) {
Chris@18 305 double probe = double(i) / double(divisions);
Chris@18 306 double value = cubicInterpolate(y, probe);
Chris@18 307 if (value > maxval) {
Chris@18 308 maxval = value;
Chris@18 309 maxidx = maxbin - 1 + probe;
Chris@18 310 }
Chris@18 311 }
Chris@18 312
Chris@18 313 /*
Chris@18 314 std::cerr << "centre = " << maxbin << ": ["
Chris@18 315 << in[maxbin-2] << ","
Chris@18 316 << in[maxbin-1] << ","
Chris@18 317 << in[maxbin] << ","
Chris@18 318 << in[maxbin+1] << ","
Chris@18 319 << in[maxbin+2] << "] -> " << maxidx << std::endl;
Chris@18 320 */
Chris@18 321
Chris@18 322 return maxidx;
Chris@18 323 }
Chris@18 324
Chris@31 325 CepstralPitchTracker::FeatureSet
Chris@31 326 CepstralPitchTracker::process(const float *const *inputBuffers, RealTime timestamp)
Chris@3 327 {
Chris@3 328 FeatureSet fs;
Chris@3 329
Chris@3 330 int bs = m_blockSize;
Chris@3 331 int hs = m_blockSize/2 + 1;
Chris@3 332
Chris@3 333 double *rawcep = new double[bs];
Chris@3 334 double *io = new double[bs];
Chris@3 335 double *logmag = new double[bs];
Chris@3 336
Chris@4 337 // The "inverse symmetric" method. Seems to be the most reliable
Chris@3 338
Chris@25 339 double magmean = 0.0;
Chris@25 340
Chris@3 341 for (int i = 0; i < hs; ++i) {
Chris@3 342
Chris@3 343 double power =
Chris@3 344 inputBuffers[0][i*2 ] * inputBuffers[0][i*2 ] +
Chris@3 345 inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1];
Chris@3 346 double mag = sqrt(power);
Chris@25 347
Chris@25 348 magmean += mag;
Chris@25 349
Chris@3 350 double lm = log(mag + 0.00000001);
Chris@3 351
Chris@4 352 logmag[i] = lm;
Chris@4 353 if (i > 0) logmag[bs - i] = lm;
Chris@3 354 }
Chris@3 355
Chris@25 356 magmean /= hs;
Chris@25 357 double threshold = 0.1; // for magmean
Chris@25 358
Chris@26 359 Vamp::FFT::inverse(bs, logmag, 0, rawcep, io);
Chris@3 360
Chris@3 361 delete[] logmag;
Chris@3 362 delete[] io;
Chris@3 363
Chris@3 364 int n = m_bins;
Chris@3 365 double *data = new double[n];
Chris@47 366 MeanFilter(m_vflen).filterSubsequence(rawcep, data, m_blockSize, n, m_binFrom);
Chris@3 367 delete[] rawcep;
Chris@3 368
Chris@3 369 double maxval = 0.0;
Chris@6 370 int maxbin = -1;
Chris@3 371
Chris@3 372 for (int i = 0; i < n; ++i) {
Chris@3 373 if (data[i] > maxval) {
Chris@3 374 maxval = data[i];
Chris@3 375 maxbin = i;
Chris@3 376 }
Chris@3 377 }
Chris@3 378
Chris@15 379 if (maxbin < 0) {
Chris@15 380 delete[] data;
Chris@15 381 return fs;
Chris@15 382 }
Chris@15 383
Chris@15 384 double nextPeakVal = 0.0;
Chris@15 385 for (int i = 1; i+1 < n; ++i) {
Chris@15 386 if (data[i] > data[i-1] &&
Chris@15 387 data[i] > data[i+1] &&
Chris@15 388 i != maxbin &&
Chris@15 389 data[i] > nextPeakVal) {
Chris@15 390 nextPeakVal = data[i];
Chris@15 391 }
Chris@15 392 }
Chris@8 393
Chris@18 394 double cimax = findInterpolatedPeak(data, maxbin);
Chris@18 395 double peakfreq = m_inputSampleRate / (cimax + m_binFrom);
Chris@15 396
Chris@15 397 double confidence = 0.0;
Chris@15 398 if (nextPeakVal != 0.0) {
Chris@27 399 confidence = (maxval - nextPeakVal) * 10.0;
Chris@25 400 if (magmean < threshold) confidence = 0.0;
Chris@39 401 // std::cerr << "magmean = " << magmean << ", confidence = " << confidence << std::endl;
Chris@15 402 }
Chris@15 403
Chris@35 404 NoteHypothesis::Estimate e;
Chris@8 405 e.freq = peakfreq;
Chris@8 406 e.time = timestamp;
Chris@15 407 e.confidence = confidence;
Chris@8 408
Chris@28 409 if (!m_good.accept(e)) {
Chris@13 410
Chris@11 411 int candidate = -1;
Chris@13 412 bool accepted = false;
Chris@13 413
Chris@35 414 for (int i = 0; i < (int)m_possible.size(); ++i) {
Chris@28 415 if (m_possible[i].accept(e)) {
Chris@35 416 if (m_possible[i].getState() == NoteHypothesis::Satisfied) {
Chris@28 417 accepted = true;
Chris@11 418 candidate = i;
Chris@11 419 }
Chris@11 420 break;
Chris@11 421 }
Chris@11 422 }
Chris@12 423
Chris@13 424 if (!accepted) {
Chris@35 425 NoteHypothesis h;
Chris@28 426 h.accept(e); //!!! must succeed as h is new, so perhaps there should be a ctor for this
Chris@13 427 m_possible.push_back(h);
Chris@13 428 }
Chris@13 429
Chris@35 430 if (m_good.getState() == NoteHypothesis::Expired) {
Chris@30 431 addFeaturesFrom(m_good, fs);
Chris@12 432 }
Chris@12 433
Chris@35 434 if (m_good.getState() == NoteHypothesis::Expired ||
Chris@35 435 m_good.getState() == NoteHypothesis::Rejected) {
Chris@11 436 if (candidate >= 0) {
Chris@28 437 m_good = m_possible[candidate];
Chris@11 438 } else {
Chris@35 439 m_good = NoteHypothesis();
Chris@11 440 }
Chris@11 441 }
Chris@8 442
Chris@14 443 // reap rejected/expired hypotheses from possible list
Chris@14 444 Hypotheses toReap = m_possible;
Chris@14 445 m_possible.clear();
Chris@35 446 for (int i = 0; i < (int)toReap.size(); ++i) {
Chris@35 447 NoteHypothesis h = toReap[i];
Chris@35 448 if (h.getState() != NoteHypothesis::Rejected &&
Chris@35 449 h.getState() != NoteHypothesis::Expired) {
Chris@14 450 m_possible.push_back(h);
Chris@14 451 }
Chris@14 452 }
Chris@14 453 }
Chris@14 454
Chris@3 455 delete[] data;
Chris@3 456 return fs;
Chris@3 457 }
Chris@3 458
Chris@31 459 CepstralPitchTracker::FeatureSet
Chris@31 460 CepstralPitchTracker::getRemainingFeatures()
Chris@3 461 {
Chris@3 462 FeatureSet fs;
Chris@35 463 if (m_good.getState() == NoteHypothesis::Satisfied) {
Chris@30 464 addFeaturesFrom(m_good, fs);
Chris@11 465 }
Chris@3 466 return fs;
Chris@3 467 }