annotate CepstrumPitchTracker.cpp @ 10:960868d5f841 track

Vertical filtering
author Chris Cannam
date Tue, 26 Jun 2012 15:30:12 +0100
parents 0510372cb340
children 0c95dc49163a
rev   line source
Chris@8 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@8 2 /*
Chris@8 3 Permission is hereby granted, free of charge, to any person
Chris@8 4 obtaining a copy of this software and associated documentation
Chris@8 5 files (the "Software"), to deal in the Software without
Chris@8 6 restriction, including without limitation the rights to use, copy,
Chris@8 7 modify, merge, publish, distribute, sublicense, and/or sell copies
Chris@8 8 of the Software, and to permit persons to whom the Software is
Chris@8 9 furnished to do so, subject to the following conditions:
Chris@8 10
Chris@8 11 The above copyright notice and this permission notice shall be
Chris@8 12 included in all copies or substantial portions of the Software.
Chris@8 13
Chris@8 14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
Chris@8 15 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
Chris@8 16 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
Chris@8 17 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
Chris@8 18 ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
Chris@8 19 CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
Chris@8 20 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Chris@8 21 */
Chris@8 22
Chris@8 23 #include "CepstrumPitchTracker.h"
Chris@8 24
Chris@8 25 #include <vector>
Chris@8 26 #include <algorithm>
Chris@8 27
Chris@8 28 #include <cstdio>
Chris@8 29 #include <cmath>
Chris@8 30 #include <complex>
Chris@8 31
Chris@8 32 using std::string;
Chris@8 33
Chris@8 34 CepstrumPitchTracker::CepstrumPitchTracker(float inputSampleRate) :
Chris@8 35 Plugin(inputSampleRate),
Chris@8 36 m_channels(0),
Chris@8 37 m_stepSize(256),
Chris@8 38 m_blockSize(1024),
Chris@8 39 m_fmin(50),
Chris@8 40 m_fmax(1000),
Chris@10 41 m_histlen(1),
Chris@10 42 m_vflen(3),
Chris@8 43 m_binFrom(0),
Chris@8 44 m_binTo(0),
Chris@8 45 m_bins(0),
Chris@8 46 m_history(0)
Chris@8 47 {
Chris@8 48 }
Chris@8 49
Chris@8 50 CepstrumPitchTracker::~CepstrumPitchTracker()
Chris@8 51 {
Chris@8 52 if (m_history) {
Chris@8 53 for (int i = 0; i < m_histlen; ++i) {
Chris@8 54 delete[] m_history[i];
Chris@8 55 }
Chris@8 56 delete[] m_history;
Chris@8 57 }
Chris@8 58 }
Chris@8 59
Chris@8 60 string
Chris@8 61 CepstrumPitchTracker::getIdentifier() const
Chris@8 62 {
Chris@8 63 return "cepstrum-pitch";
Chris@8 64 }
Chris@8 65
Chris@8 66 string
Chris@8 67 CepstrumPitchTracker::getName() const
Chris@8 68 {
Chris@8 69 return "Cepstrum Pitch Tracker";
Chris@8 70 }
Chris@8 71
Chris@8 72 string
Chris@8 73 CepstrumPitchTracker::getDescription() const
Chris@8 74 {
Chris@8 75 return "Estimate f0 of monophonic material using a cepstrum method.";
Chris@8 76 }
Chris@8 77
Chris@8 78 string
Chris@8 79 CepstrumPitchTracker::getMaker() const
Chris@8 80 {
Chris@8 81 return "Chris Cannam";
Chris@8 82 }
Chris@8 83
Chris@8 84 int
Chris@8 85 CepstrumPitchTracker::getPluginVersion() const
Chris@8 86 {
Chris@8 87 // Increment this each time you release a version that behaves
Chris@8 88 // differently from the previous one
Chris@8 89 return 1;
Chris@8 90 }
Chris@8 91
Chris@8 92 string
Chris@8 93 CepstrumPitchTracker::getCopyright() const
Chris@8 94 {
Chris@8 95 return "Freely redistributable (BSD license)";
Chris@8 96 }
Chris@8 97
Chris@8 98 CepstrumPitchTracker::InputDomain
Chris@8 99 CepstrumPitchTracker::getInputDomain() const
Chris@8 100 {
Chris@8 101 return FrequencyDomain;
Chris@8 102 }
Chris@8 103
Chris@8 104 size_t
Chris@8 105 CepstrumPitchTracker::getPreferredBlockSize() const
Chris@8 106 {
Chris@8 107 return 1024;
Chris@8 108 }
Chris@8 109
Chris@8 110 size_t
Chris@8 111 CepstrumPitchTracker::getPreferredStepSize() const
Chris@8 112 {
Chris@8 113 return 256;
Chris@8 114 }
Chris@8 115
Chris@8 116 size_t
Chris@8 117 CepstrumPitchTracker::getMinChannelCount() const
Chris@8 118 {
Chris@8 119 return 1;
Chris@8 120 }
Chris@8 121
Chris@8 122 size_t
Chris@8 123 CepstrumPitchTracker::getMaxChannelCount() const
Chris@8 124 {
Chris@8 125 return 1;
Chris@8 126 }
Chris@8 127
Chris@8 128 CepstrumPitchTracker::ParameterList
Chris@8 129 CepstrumPitchTracker::getParameterDescriptors() const
Chris@8 130 {
Chris@8 131 ParameterList list;
Chris@8 132 return list;
Chris@8 133 }
Chris@8 134
Chris@8 135 float
Chris@8 136 CepstrumPitchTracker::getParameter(string identifier) const
Chris@8 137 {
Chris@8 138 return 0.f;
Chris@8 139 }
Chris@8 140
Chris@8 141 void
Chris@8 142 CepstrumPitchTracker::setParameter(string identifier, float value)
Chris@8 143 {
Chris@8 144 }
Chris@8 145
Chris@8 146 CepstrumPitchTracker::ProgramList
Chris@8 147 CepstrumPitchTracker::getPrograms() const
Chris@8 148 {
Chris@8 149 ProgramList list;
Chris@8 150 return list;
Chris@8 151 }
Chris@8 152
Chris@8 153 string
Chris@8 154 CepstrumPitchTracker::getCurrentProgram() const
Chris@8 155 {
Chris@8 156 return ""; // no programs
Chris@8 157 }
Chris@8 158
Chris@8 159 void
Chris@8 160 CepstrumPitchTracker::selectProgram(string name)
Chris@8 161 {
Chris@8 162 }
Chris@8 163
Chris@8 164 CepstrumPitchTracker::OutputList
Chris@8 165 CepstrumPitchTracker::getOutputDescriptors() const
Chris@8 166 {
Chris@8 167 OutputList outputs;
Chris@8 168
Chris@8 169 int n = 0;
Chris@8 170
Chris@8 171 OutputDescriptor d;
Chris@8 172
Chris@8 173 d.identifier = "f0";
Chris@8 174 d.name = "Estimated f0";
Chris@8 175 d.description = "Estimated fundamental frequency";
Chris@8 176 d.unit = "Hz";
Chris@8 177 d.hasFixedBinCount = true;
Chris@8 178 d.binCount = 1;
Chris@8 179 d.hasKnownExtents = true;
Chris@8 180 d.minValue = m_fmin;
Chris@8 181 d.maxValue = m_fmax;
Chris@8 182 d.isQuantized = false;
Chris@8 183 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@8 184 d.sampleRate = (m_inputSampleRate / m_stepSize);
Chris@8 185 d.hasDuration = false;
Chris@8 186 outputs.push_back(d);
Chris@8 187
Chris@8 188 return outputs;
Chris@8 189 }
Chris@8 190
Chris@8 191 bool
Chris@8 192 CepstrumPitchTracker::initialise(size_t channels, size_t stepSize, size_t blockSize)
Chris@8 193 {
Chris@8 194 if (channels < getMinChannelCount() ||
Chris@8 195 channels > getMaxChannelCount()) return false;
Chris@8 196
Chris@8 197 // std::cerr << "CepstrumPitchTracker::initialise: channels = " << channels
Chris@8 198 // << ", stepSize = " << stepSize << ", blockSize = " << blockSize
Chris@8 199 // << std::endl;
Chris@8 200
Chris@8 201 m_channels = channels;
Chris@8 202 m_stepSize = stepSize;
Chris@8 203 m_blockSize = blockSize;
Chris@8 204
Chris@8 205 m_binFrom = int(m_inputSampleRate / m_fmax);
Chris@8 206 m_binTo = int(m_inputSampleRate / m_fmin);
Chris@8 207
Chris@8 208 if (m_binTo >= (int)m_blockSize / 2) {
Chris@8 209 m_binTo = m_blockSize / 2 - 1;
Chris@8 210 }
Chris@8 211
Chris@8 212 m_bins = (m_binTo - m_binFrom) + 1;
Chris@8 213
Chris@8 214 m_history = new double *[m_histlen];
Chris@8 215 for (int i = 0; i < m_histlen; ++i) {
Chris@8 216 m_history[i] = new double[m_bins];
Chris@8 217 }
Chris@8 218
Chris@8 219 reset();
Chris@8 220
Chris@8 221 return true;
Chris@8 222 }
Chris@8 223
Chris@8 224 void
Chris@8 225 CepstrumPitchTracker::reset()
Chris@8 226 {
Chris@8 227 for (int i = 0; i < m_histlen; ++i) {
Chris@8 228 for (int j = 0; j < m_bins; ++j) {
Chris@8 229 m_history[i][j] = 0.0;
Chris@8 230 }
Chris@8 231 }
Chris@8 232 }
Chris@8 233
Chris@8 234 void
Chris@8 235 CepstrumPitchTracker::filter(const double *cep, double *result)
Chris@8 236 {
Chris@8 237 int hix = m_histlen - 1; // current history index
Chris@8 238
Chris@8 239 // roll back the history
Chris@8 240 if (m_histlen > 1) {
Chris@8 241 double *oldest = m_history[0];
Chris@8 242 for (int i = 1; i < m_histlen; ++i) {
Chris@8 243 m_history[i-1] = m_history[i];
Chris@8 244 }
Chris@8 245 // and stick this back in the newest spot, to recycle
Chris@8 246 m_history[hix] = oldest;
Chris@8 247 }
Chris@8 248
Chris@8 249 for (int i = 0; i < m_bins; ++i) {
Chris@10 250 double v = 0;
Chris@10 251 int n = 0;
Chris@10 252 // average according to the vertical filter length
Chris@10 253 for (int j = -m_vflen/2; j <= m_vflen/2; ++j) {
Chris@10 254 int ix = i + m_binFrom + j;
Chris@10 255 if (ix >= 0 && ix < m_blockSize) {
Chris@10 256 v += cep[ix];
Chris@10 257 ++n;
Chris@10 258 }
Chris@10 259 }
Chris@10 260 m_history[hix][i] = v / n;
Chris@8 261 }
Chris@8 262
Chris@8 263 for (int i = 0; i < m_bins; ++i) {
Chris@8 264 double mean = 0.0;
Chris@8 265 for (int j = 0; j < m_histlen; ++j) {
Chris@8 266 mean += m_history[j][i];
Chris@8 267 }
Chris@8 268 mean /= m_histlen;
Chris@8 269 result[i] = mean;
Chris@8 270 }
Chris@8 271 }
Chris@8 272
Chris@8 273 CepstrumPitchTracker::FeatureSet
Chris@8 274 CepstrumPitchTracker::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
Chris@8 275 {
Chris@8 276 FeatureSet fs;
Chris@8 277
Chris@8 278 int bs = m_blockSize;
Chris@8 279 int hs = m_blockSize/2 + 1;
Chris@8 280
Chris@8 281 double *rawcep = new double[bs];
Chris@8 282 double *io = new double[bs];
Chris@8 283 double *logmag = new double[bs];
Chris@8 284
Chris@9 285 // The "inverse symmetric" method. Seems to be the most reliable
Chris@8 286
Chris@8 287 for (int i = 0; i < hs; ++i) {
Chris@8 288
Chris@8 289 double power =
Chris@8 290 inputBuffers[0][i*2 ] * inputBuffers[0][i*2 ] +
Chris@8 291 inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1];
Chris@8 292 double mag = sqrt(power);
Chris@8 293
Chris@8 294 double lm = log(mag + 0.00000001);
Chris@8 295
Chris@9 296 logmag[i] = lm;
Chris@9 297 if (i > 0) logmag[bs - i] = lm;
Chris@8 298 }
Chris@8 299
Chris@9 300 fft(bs, true, logmag, 0, rawcep, io);
Chris@8 301
Chris@8 302 delete[] logmag;
Chris@8 303 delete[] io;
Chris@8 304
Chris@8 305 int n = m_bins;
Chris@8 306 double *data = new double[n];
Chris@8 307 filter(rawcep, data);
Chris@8 308 delete[] rawcep;
Chris@8 309
Chris@8 310 double maxval = 0.0;
Chris@8 311 int maxbin = 0;
Chris@8 312 double abstot = 0.0;
Chris@8 313
Chris@8 314 for (int i = 0; i < n; ++i) {
Chris@8 315 if (data[i] > maxval) {
Chris@8 316 maxval = data[i];
Chris@8 317 maxbin = i;
Chris@8 318 }
Chris@8 319 abstot += fabs(data[i]);
Chris@8 320 }
Chris@8 321
Chris@8 322 double aroundPeak = 0.0;
Chris@8 323 double peakProportion = 0.0;
Chris@8 324 if (maxval > 0.0) {
Chris@8 325 aroundPeak += fabs(maxval);
Chris@8 326 int i = maxbin - 1;
Chris@8 327 while (i > 0 && data[i] <= data[i+1]) {
Chris@8 328 aroundPeak += fabs(data[i]);
Chris@8 329 --i;
Chris@8 330 }
Chris@8 331 i = maxbin + 1;
Chris@8 332 while (i < n && data[i] <= data[i-1]) {
Chris@8 333 aroundPeak += fabs(data[i]);
Chris@8 334 ++i;
Chris@8 335 }
Chris@8 336 }
Chris@8 337 peakProportion = aroundPeak / abstot;
Chris@8 338
Chris@8 339 // std::cerr << "peakProportion = " << peakProportion << std::endl;
Chris@8 340 // std::cerr << "peak = " << m_inputSampleRate / (maxbin + m_binFrom) << std::endl;
Chris@9 341 // std::cerr << "bins = " << m_bins << std::endl;
Chris@8 342
Chris@9 343 if (peakProportion >= (0.00006 * m_bins)) {
Chris@8 344 Feature f;
Chris@8 345 f.hasTimestamp = true;
Chris@8 346 f.timestamp = timestamp;
Chris@8 347 f.values.push_back(m_inputSampleRate / (maxbin + m_binFrom));
Chris@8 348 fs[0].push_back(f);
Chris@8 349 }
Chris@8 350
Chris@8 351 delete[] data;
Chris@8 352 return fs;
Chris@8 353 }
Chris@8 354
Chris@8 355 CepstrumPitchTracker::FeatureSet
Chris@8 356 CepstrumPitchTracker::getRemainingFeatures()
Chris@8 357 {
Chris@8 358 FeatureSet fs;
Chris@8 359 return fs;
Chris@8 360 }
Chris@8 361
Chris@8 362 void
Chris@8 363 CepstrumPitchTracker::fft(unsigned int n, bool inverse,
Chris@8 364 double *ri, double *ii, double *ro, double *io)
Chris@8 365 {
Chris@8 366 if (!ri || !ro || !io) return;
Chris@8 367
Chris@8 368 unsigned int bits;
Chris@8 369 unsigned int i, j, k, m;
Chris@8 370 unsigned int blockSize, blockEnd;
Chris@8 371
Chris@8 372 double tr, ti;
Chris@8 373
Chris@8 374 if (n < 2) return;
Chris@8 375 if (n & (n-1)) return;
Chris@8 376
Chris@8 377 double angle = 2.0 * M_PI;
Chris@8 378 if (inverse) angle = -angle;
Chris@8 379
Chris@8 380 for (i = 0; ; ++i) {
Chris@8 381 if (n & (1 << i)) {
Chris@8 382 bits = i;
Chris@8 383 break;
Chris@8 384 }
Chris@8 385 }
Chris@8 386
Chris@8 387 static unsigned int tableSize = 0;
Chris@8 388 static int *table = 0;
Chris@8 389
Chris@8 390 if (tableSize != n) {
Chris@8 391
Chris@8 392 delete[] table;
Chris@8 393
Chris@8 394 table = new int[n];
Chris@8 395
Chris@8 396 for (i = 0; i < n; ++i) {
Chris@8 397
Chris@8 398 m = i;
Chris@8 399
Chris@8 400 for (j = k = 0; j < bits; ++j) {
Chris@8 401 k = (k << 1) | (m & 1);
Chris@8 402 m >>= 1;
Chris@8 403 }
Chris@8 404
Chris@8 405 table[i] = k;
Chris@8 406 }
Chris@8 407
Chris@8 408 tableSize = n;
Chris@8 409 }
Chris@8 410
Chris@8 411 if (ii) {
Chris@8 412 for (i = 0; i < n; ++i) {
Chris@8 413 ro[table[i]] = ri[i];
Chris@8 414 io[table[i]] = ii[i];
Chris@8 415 }
Chris@8 416 } else {
Chris@8 417 for (i = 0; i < n; ++i) {
Chris@8 418 ro[table[i]] = ri[i];
Chris@8 419 io[table[i]] = 0.0;
Chris@8 420 }
Chris@8 421 }
Chris@8 422
Chris@8 423 blockEnd = 1;
Chris@8 424
Chris@8 425 for (blockSize = 2; blockSize <= n; blockSize <<= 1) {
Chris@8 426
Chris@8 427 double delta = angle / (double)blockSize;
Chris@8 428 double sm2 = -sin(-2 * delta);
Chris@8 429 double sm1 = -sin(-delta);
Chris@8 430 double cm2 = cos(-2 * delta);
Chris@8 431 double cm1 = cos(-delta);
Chris@8 432 double w = 2 * cm1;
Chris@8 433 double ar[3], ai[3];
Chris@8 434
Chris@8 435 for (i = 0; i < n; i += blockSize) {
Chris@8 436
Chris@8 437 ar[2] = cm2;
Chris@8 438 ar[1] = cm1;
Chris@8 439
Chris@8 440 ai[2] = sm2;
Chris@8 441 ai[1] = sm1;
Chris@8 442
Chris@8 443 for (j = i, m = 0; m < blockEnd; j++, m++) {
Chris@8 444
Chris@8 445 ar[0] = w * ar[1] - ar[2];
Chris@8 446 ar[2] = ar[1];
Chris@8 447 ar[1] = ar[0];
Chris@8 448
Chris@8 449 ai[0] = w * ai[1] - ai[2];
Chris@8 450 ai[2] = ai[1];
Chris@8 451 ai[1] = ai[0];
Chris@8 452
Chris@8 453 k = j + blockEnd;
Chris@8 454 tr = ar[0] * ro[k] - ai[0] * io[k];
Chris@8 455 ti = ar[0] * io[k] + ai[0] * ro[k];
Chris@8 456
Chris@8 457 ro[k] = ro[j] - tr;
Chris@8 458 io[k] = io[j] - ti;
Chris@8 459
Chris@8 460 ro[j] += tr;
Chris@8 461 io[j] += ti;
Chris@8 462 }
Chris@8 463 }
Chris@8 464
Chris@8 465 blockEnd = blockSize;
Chris@8 466 }
Chris@8 467 }
Chris@8 468
Chris@8 469