annotate CepstrumPitchTracker.cpp @ 6:291c75f6e837 track

Some work on accepting/rejecting peaks
author Chris Cannam
date Tue, 26 Jun 2012 16:06:00 +0100
parents 383c5b497f4a
children 32defdb2f9d9
rev   line source
Chris@3 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@3 2 /*
Chris@3 3 Permission is hereby granted, free of charge, to any person
Chris@3 4 obtaining a copy of this software and associated documentation
Chris@3 5 files (the "Software"), to deal in the Software without
Chris@3 6 restriction, including without limitation the rights to use, copy,
Chris@3 7 modify, merge, publish, distribute, sublicense, and/or sell copies
Chris@3 8 of the Software, and to permit persons to whom the Software is
Chris@3 9 furnished to do so, subject to the following conditions:
Chris@3 10
Chris@3 11 The above copyright notice and this permission notice shall be
Chris@3 12 included in all copies or substantial portions of the Software.
Chris@3 13
Chris@3 14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
Chris@3 15 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
Chris@3 16 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
Chris@3 17 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
Chris@3 18 ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
Chris@3 19 CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
Chris@3 20 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Chris@3 21 */
Chris@3 22
Chris@3 23 #include "CepstrumPitchTracker.h"
Chris@3 24
Chris@3 25 #include <vector>
Chris@3 26 #include <algorithm>
Chris@3 27
Chris@3 28 #include <cstdio>
Chris@3 29 #include <cmath>
Chris@3 30 #include <complex>
Chris@3 31
Chris@3 32 using std::string;
Chris@3 33
Chris@3 34 CepstrumPitchTracker::CepstrumPitchTracker(float inputSampleRate) :
Chris@3 35 Plugin(inputSampleRate),
Chris@3 36 m_channels(0),
Chris@3 37 m_stepSize(256),
Chris@3 38 m_blockSize(1024),
Chris@3 39 m_fmin(50),
Chris@3 40 m_fmax(1000),
Chris@5 41 m_histlen(1),
Chris@5 42 m_vflen(3),
Chris@3 43 m_binFrom(0),
Chris@3 44 m_binTo(0),
Chris@3 45 m_bins(0),
Chris@6 46 m_history(0),
Chris@6 47 m_prevpeak(0),
Chris@6 48 m_prevprop(0)
Chris@3 49 {
Chris@3 50 }
Chris@3 51
Chris@3 52 CepstrumPitchTracker::~CepstrumPitchTracker()
Chris@3 53 {
Chris@3 54 if (m_history) {
Chris@3 55 for (int i = 0; i < m_histlen; ++i) {
Chris@3 56 delete[] m_history[i];
Chris@3 57 }
Chris@3 58 delete[] m_history;
Chris@3 59 }
Chris@3 60 }
Chris@3 61
Chris@3 62 string
Chris@3 63 CepstrumPitchTracker::getIdentifier() const
Chris@3 64 {
Chris@3 65 return "cepstrum-pitch";
Chris@3 66 }
Chris@3 67
Chris@3 68 string
Chris@3 69 CepstrumPitchTracker::getName() const
Chris@3 70 {
Chris@3 71 return "Cepstrum Pitch Tracker";
Chris@3 72 }
Chris@3 73
Chris@3 74 string
Chris@3 75 CepstrumPitchTracker::getDescription() const
Chris@3 76 {
Chris@3 77 return "Estimate f0 of monophonic material using a cepstrum method.";
Chris@3 78 }
Chris@3 79
Chris@3 80 string
Chris@3 81 CepstrumPitchTracker::getMaker() const
Chris@3 82 {
Chris@3 83 return "Chris Cannam";
Chris@3 84 }
Chris@3 85
Chris@3 86 int
Chris@3 87 CepstrumPitchTracker::getPluginVersion() const
Chris@3 88 {
Chris@3 89 // Increment this each time you release a version that behaves
Chris@3 90 // differently from the previous one
Chris@3 91 return 1;
Chris@3 92 }
Chris@3 93
Chris@3 94 string
Chris@3 95 CepstrumPitchTracker::getCopyright() const
Chris@3 96 {
Chris@3 97 return "Freely redistributable (BSD license)";
Chris@3 98 }
Chris@3 99
Chris@3 100 CepstrumPitchTracker::InputDomain
Chris@3 101 CepstrumPitchTracker::getInputDomain() const
Chris@3 102 {
Chris@3 103 return FrequencyDomain;
Chris@3 104 }
Chris@3 105
Chris@3 106 size_t
Chris@3 107 CepstrumPitchTracker::getPreferredBlockSize() const
Chris@3 108 {
Chris@3 109 return 1024;
Chris@3 110 }
Chris@3 111
Chris@3 112 size_t
Chris@3 113 CepstrumPitchTracker::getPreferredStepSize() const
Chris@3 114 {
Chris@3 115 return 256;
Chris@3 116 }
Chris@3 117
Chris@3 118 size_t
Chris@3 119 CepstrumPitchTracker::getMinChannelCount() const
Chris@3 120 {
Chris@3 121 return 1;
Chris@3 122 }
Chris@3 123
Chris@3 124 size_t
Chris@3 125 CepstrumPitchTracker::getMaxChannelCount() const
Chris@3 126 {
Chris@3 127 return 1;
Chris@3 128 }
Chris@3 129
Chris@3 130 CepstrumPitchTracker::ParameterList
Chris@3 131 CepstrumPitchTracker::getParameterDescriptors() const
Chris@3 132 {
Chris@3 133 ParameterList list;
Chris@3 134 return list;
Chris@3 135 }
Chris@3 136
Chris@3 137 float
Chris@3 138 CepstrumPitchTracker::getParameter(string identifier) const
Chris@3 139 {
Chris@3 140 return 0.f;
Chris@3 141 }
Chris@3 142
Chris@3 143 void
Chris@3 144 CepstrumPitchTracker::setParameter(string identifier, float value)
Chris@3 145 {
Chris@3 146 }
Chris@3 147
Chris@3 148 CepstrumPitchTracker::ProgramList
Chris@3 149 CepstrumPitchTracker::getPrograms() const
Chris@3 150 {
Chris@3 151 ProgramList list;
Chris@3 152 return list;
Chris@3 153 }
Chris@3 154
Chris@3 155 string
Chris@3 156 CepstrumPitchTracker::getCurrentProgram() const
Chris@3 157 {
Chris@3 158 return ""; // no programs
Chris@3 159 }
Chris@3 160
Chris@3 161 void
Chris@3 162 CepstrumPitchTracker::selectProgram(string name)
Chris@3 163 {
Chris@3 164 }
Chris@3 165
Chris@3 166 CepstrumPitchTracker::OutputList
Chris@3 167 CepstrumPitchTracker::getOutputDescriptors() const
Chris@3 168 {
Chris@3 169 OutputList outputs;
Chris@3 170
Chris@3 171 int n = 0;
Chris@3 172
Chris@3 173 OutputDescriptor d;
Chris@3 174
Chris@3 175 d.identifier = "f0";
Chris@3 176 d.name = "Estimated f0";
Chris@3 177 d.description = "Estimated fundamental frequency";
Chris@3 178 d.unit = "Hz";
Chris@3 179 d.hasFixedBinCount = true;
Chris@3 180 d.binCount = 1;
Chris@3 181 d.hasKnownExtents = true;
Chris@3 182 d.minValue = m_fmin;
Chris@3 183 d.maxValue = m_fmax;
Chris@3 184 d.isQuantized = false;
Chris@3 185 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@3 186 d.sampleRate = (m_inputSampleRate / m_stepSize);
Chris@3 187 d.hasDuration = false;
Chris@3 188 outputs.push_back(d);
Chris@3 189
Chris@3 190 return outputs;
Chris@3 191 }
Chris@3 192
Chris@3 193 bool
Chris@3 194 CepstrumPitchTracker::initialise(size_t channels, size_t stepSize, size_t blockSize)
Chris@3 195 {
Chris@3 196 if (channels < getMinChannelCount() ||
Chris@3 197 channels > getMaxChannelCount()) return false;
Chris@3 198
Chris@3 199 // std::cerr << "CepstrumPitchTracker::initialise: channels = " << channels
Chris@3 200 // << ", stepSize = " << stepSize << ", blockSize = " << blockSize
Chris@3 201 // << std::endl;
Chris@3 202
Chris@3 203 m_channels = channels;
Chris@3 204 m_stepSize = stepSize;
Chris@3 205 m_blockSize = blockSize;
Chris@3 206
Chris@3 207 m_binFrom = int(m_inputSampleRate / m_fmax);
Chris@3 208 m_binTo = int(m_inputSampleRate / m_fmin);
Chris@3 209
Chris@3 210 if (m_binTo >= (int)m_blockSize / 2) {
Chris@3 211 m_binTo = m_blockSize / 2 - 1;
Chris@3 212 }
Chris@3 213
Chris@3 214 m_bins = (m_binTo - m_binFrom) + 1;
Chris@3 215
Chris@3 216 m_history = new double *[m_histlen];
Chris@3 217 for (int i = 0; i < m_histlen; ++i) {
Chris@3 218 m_history[i] = new double[m_bins];
Chris@3 219 }
Chris@3 220
Chris@3 221 reset();
Chris@3 222
Chris@3 223 return true;
Chris@3 224 }
Chris@3 225
Chris@3 226 void
Chris@3 227 CepstrumPitchTracker::reset()
Chris@3 228 {
Chris@3 229 for (int i = 0; i < m_histlen; ++i) {
Chris@3 230 for (int j = 0; j < m_bins; ++j) {
Chris@3 231 m_history[i][j] = 0.0;
Chris@3 232 }
Chris@3 233 }
Chris@3 234 }
Chris@3 235
Chris@3 236 void
Chris@3 237 CepstrumPitchTracker::filter(const double *cep, double *result)
Chris@3 238 {
Chris@3 239 int hix = m_histlen - 1; // current history index
Chris@3 240
Chris@3 241 // roll back the history
Chris@3 242 if (m_histlen > 1) {
Chris@3 243 double *oldest = m_history[0];
Chris@3 244 for (int i = 1; i < m_histlen; ++i) {
Chris@3 245 m_history[i-1] = m_history[i];
Chris@3 246 }
Chris@3 247 // and stick this back in the newest spot, to recycle
Chris@3 248 m_history[hix] = oldest;
Chris@3 249 }
Chris@3 250
Chris@3 251 for (int i = 0; i < m_bins; ++i) {
Chris@5 252 double v = 0;
Chris@5 253 int n = 0;
Chris@5 254 // average according to the vertical filter length
Chris@5 255 for (int j = -m_vflen/2; j <= m_vflen/2; ++j) {
Chris@5 256 int ix = i + m_binFrom + j;
Chris@5 257 if (ix >= 0 && ix < m_blockSize) {
Chris@5 258 v += cep[ix];
Chris@5 259 ++n;
Chris@5 260 }
Chris@5 261 }
Chris@5 262 m_history[hix][i] = v / n;
Chris@3 263 }
Chris@3 264
Chris@3 265 for (int i = 0; i < m_bins; ++i) {
Chris@3 266 double mean = 0.0;
Chris@3 267 for (int j = 0; j < m_histlen; ++j) {
Chris@3 268 mean += m_history[j][i];
Chris@3 269 }
Chris@3 270 mean /= m_histlen;
Chris@3 271 result[i] = mean;
Chris@3 272 }
Chris@3 273 }
Chris@3 274
Chris@6 275 double
Chris@6 276 CepstrumPitchTracker::calculatePeakProportion(const double *data, double abstot, int n)
Chris@6 277 {
Chris@6 278 double aroundPeak = data[n];
Chris@6 279 double peakProportion = 0.0;
Chris@6 280
Chris@6 281 int i = n - 1;
Chris@6 282 while (i > 0 && data[i] <= data[i+1]) {
Chris@6 283 aroundPeak += fabs(data[i]);
Chris@6 284 --i;
Chris@6 285 }
Chris@6 286 i = n + 1;
Chris@6 287 while (i < m_bins && data[i] <= data[i-1]) {
Chris@6 288 aroundPeak += fabs(data[i]);
Chris@6 289 ++i;
Chris@6 290 }
Chris@6 291 peakProportion = aroundPeak / abstot;
Chris@6 292
Chris@6 293 return peakProportion;
Chris@6 294 }
Chris@6 295
Chris@6 296 bool
Chris@6 297 CepstrumPitchTracker::acceptPeak(int n, double peakProportion)
Chris@6 298 {
Chris@6 299 bool accept = false;
Chris@6 300
Chris@6 301 if (abs(n - m_prevpeak) < 10) { //!!! should depend on bin count
Chris@6 302 accept = true;
Chris@6 303 } else if (peakProportion > m_prevprop * 2) {
Chris@6 304 accept = true;
Chris@6 305 }
Chris@6 306
Chris@6 307 return accept;
Chris@6 308 }
Chris@6 309
Chris@3 310 CepstrumPitchTracker::FeatureSet
Chris@3 311 CepstrumPitchTracker::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
Chris@3 312 {
Chris@3 313 FeatureSet fs;
Chris@3 314
Chris@3 315 int bs = m_blockSize;
Chris@3 316 int hs = m_blockSize/2 + 1;
Chris@3 317
Chris@3 318 double *rawcep = new double[bs];
Chris@3 319 double *io = new double[bs];
Chris@3 320 double *logmag = new double[bs];
Chris@3 321
Chris@4 322 // The "inverse symmetric" method. Seems to be the most reliable
Chris@3 323
Chris@3 324 for (int i = 0; i < hs; ++i) {
Chris@3 325
Chris@3 326 double power =
Chris@3 327 inputBuffers[0][i*2 ] * inputBuffers[0][i*2 ] +
Chris@3 328 inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1];
Chris@3 329 double mag = sqrt(power);
Chris@3 330
Chris@3 331 double lm = log(mag + 0.00000001);
Chris@3 332
Chris@4 333 logmag[i] = lm;
Chris@4 334 if (i > 0) logmag[bs - i] = lm;
Chris@3 335 }
Chris@3 336
Chris@4 337 fft(bs, true, logmag, 0, rawcep, io);
Chris@3 338
Chris@3 339 delete[] logmag;
Chris@3 340 delete[] io;
Chris@3 341
Chris@3 342 int n = m_bins;
Chris@3 343 double *data = new double[n];
Chris@3 344 filter(rawcep, data);
Chris@3 345 delete[] rawcep;
Chris@3 346
Chris@6 347 double abstot = 0.0;
Chris@6 348
Chris@6 349 for (int i = 0; i < n; ++i) {
Chris@6 350 abstot += fabs(data[i]);
Chris@6 351 }
Chris@6 352
Chris@3 353 double maxval = 0.0;
Chris@6 354 int maxbin = -1;
Chris@3 355
Chris@3 356 for (int i = 0; i < n; ++i) {
Chris@3 357 if (data[i] > maxval) {
Chris@3 358 maxval = data[i];
Chris@3 359 maxbin = i;
Chris@3 360 }
Chris@3 361 }
Chris@3 362
Chris@6 363 bool accepted = false;
Chris@6 364
Chris@6 365 if (maxbin >= 0) {
Chris@6 366 double pp = calculatePeakProportion(data, abstot, maxbin);
Chris@6 367 if (acceptPeak(maxbin, pp)) {
Chris@6 368 accepted = true;
Chris@6 369 } else {
Chris@6 370 // try a secondary peak
Chris@6 371 maxval = 0.0;
Chris@6 372 int secondbin = 0;
Chris@6 373 for (int i = 1; i < n-1; ++i) {
Chris@6 374 if (i != maxbin &&
Chris@6 375 data[i] > data[i-1] &&
Chris@6 376 data[i] > data[i+1] &&
Chris@6 377 data[i] > maxval) {
Chris@6 378 maxval = data[i];
Chris@6 379 secondbin = i;
Chris@6 380 }
Chris@6 381 }
Chris@6 382 double spp = calculatePeakProportion(data, abstot, secondbin);
Chris@6 383 if (acceptPeak(secondbin, spp)) {
Chris@6 384 maxbin = secondbin;
Chris@6 385 pp = spp;
Chris@6 386 accepted = true;
Chris@6 387 }
Chris@3 388 }
Chris@6 389 if (accepted) {
Chris@6 390 m_prevpeak = maxbin;
Chris@6 391 m_prevprop = pp;
Chris@3 392 }
Chris@3 393 }
Chris@6 394
Chris@3 395 // std::cerr << "peakProportion = " << peakProportion << std::endl;
Chris@3 396 // std::cerr << "peak = " << m_inputSampleRate / (maxbin + m_binFrom) << std::endl;
Chris@4 397 // std::cerr << "bins = " << m_bins << std::endl;
Chris@3 398
Chris@6 399 // if (peakProportion >= (0.00006 * m_bins)) {
Chris@6 400 if (accepted) {
Chris@3 401 Feature f;
Chris@3 402 f.hasTimestamp = true;
Chris@3 403 f.timestamp = timestamp;
Chris@3 404 f.values.push_back(m_inputSampleRate / (maxbin + m_binFrom));
Chris@3 405 fs[0].push_back(f);
Chris@3 406 }
Chris@3 407
Chris@3 408 delete[] data;
Chris@3 409 return fs;
Chris@3 410 }
Chris@3 411
Chris@3 412 CepstrumPitchTracker::FeatureSet
Chris@3 413 CepstrumPitchTracker::getRemainingFeatures()
Chris@3 414 {
Chris@3 415 FeatureSet fs;
Chris@3 416 return fs;
Chris@3 417 }
Chris@3 418
Chris@3 419 void
Chris@3 420 CepstrumPitchTracker::fft(unsigned int n, bool inverse,
Chris@3 421 double *ri, double *ii, double *ro, double *io)
Chris@3 422 {
Chris@3 423 if (!ri || !ro || !io) return;
Chris@3 424
Chris@3 425 unsigned int bits;
Chris@3 426 unsigned int i, j, k, m;
Chris@3 427 unsigned int blockSize, blockEnd;
Chris@3 428
Chris@3 429 double tr, ti;
Chris@3 430
Chris@3 431 if (n < 2) return;
Chris@3 432 if (n & (n-1)) return;
Chris@3 433
Chris@3 434 double angle = 2.0 * M_PI;
Chris@3 435 if (inverse) angle = -angle;
Chris@3 436
Chris@3 437 for (i = 0; ; ++i) {
Chris@3 438 if (n & (1 << i)) {
Chris@3 439 bits = i;
Chris@3 440 break;
Chris@3 441 }
Chris@3 442 }
Chris@3 443
Chris@3 444 static unsigned int tableSize = 0;
Chris@3 445 static int *table = 0;
Chris@3 446
Chris@3 447 if (tableSize != n) {
Chris@3 448
Chris@3 449 delete[] table;
Chris@3 450
Chris@3 451 table = new int[n];
Chris@3 452
Chris@3 453 for (i = 0; i < n; ++i) {
Chris@3 454
Chris@3 455 m = i;
Chris@3 456
Chris@3 457 for (j = k = 0; j < bits; ++j) {
Chris@3 458 k = (k << 1) | (m & 1);
Chris@3 459 m >>= 1;
Chris@3 460 }
Chris@3 461
Chris@3 462 table[i] = k;
Chris@3 463 }
Chris@3 464
Chris@3 465 tableSize = n;
Chris@3 466 }
Chris@3 467
Chris@3 468 if (ii) {
Chris@3 469 for (i = 0; i < n; ++i) {
Chris@3 470 ro[table[i]] = ri[i];
Chris@3 471 io[table[i]] = ii[i];
Chris@3 472 }
Chris@3 473 } else {
Chris@3 474 for (i = 0; i < n; ++i) {
Chris@3 475 ro[table[i]] = ri[i];
Chris@3 476 io[table[i]] = 0.0;
Chris@3 477 }
Chris@3 478 }
Chris@3 479
Chris@3 480 blockEnd = 1;
Chris@3 481
Chris@3 482 for (blockSize = 2; blockSize <= n; blockSize <<= 1) {
Chris@3 483
Chris@3 484 double delta = angle / (double)blockSize;
Chris@3 485 double sm2 = -sin(-2 * delta);
Chris@3 486 double sm1 = -sin(-delta);
Chris@3 487 double cm2 = cos(-2 * delta);
Chris@3 488 double cm1 = cos(-delta);
Chris@3 489 double w = 2 * cm1;
Chris@3 490 double ar[3], ai[3];
Chris@3 491
Chris@3 492 for (i = 0; i < n; i += blockSize) {
Chris@3 493
Chris@3 494 ar[2] = cm2;
Chris@3 495 ar[1] = cm1;
Chris@3 496
Chris@3 497 ai[2] = sm2;
Chris@3 498 ai[1] = sm1;
Chris@3 499
Chris@3 500 for (j = i, m = 0; m < blockEnd; j++, m++) {
Chris@3 501
Chris@3 502 ar[0] = w * ar[1] - ar[2];
Chris@3 503 ar[2] = ar[1];
Chris@3 504 ar[1] = ar[0];
Chris@3 505
Chris@3 506 ai[0] = w * ai[1] - ai[2];
Chris@3 507 ai[2] = ai[1];
Chris@3 508 ai[1] = ai[0];
Chris@3 509
Chris@3 510 k = j + blockEnd;
Chris@3 511 tr = ar[0] * ro[k] - ai[0] * io[k];
Chris@3 512 ti = ar[0] * io[k] + ai[0] * ro[k];
Chris@3 513
Chris@3 514 ro[k] = ro[j] - tr;
Chris@3 515 io[k] = io[j] - ti;
Chris@3 516
Chris@3 517 ro[j] += tr;
Chris@3 518 io[j] += ti;
Chris@3 519 }
Chris@3 520 }
Chris@3 521
Chris@3 522 blockEnd = blockSize;
Chris@3 523 }
Chris@3 524 }
Chris@3 525
Chris@3 526