annotate CepstralPitchTracker.cpp @ 67:dd5ab48fd58a parameters mirex2013

Add "fill gaps" option, add new outputs to rdf
author Chris Cannam
date Fri, 30 Aug 2013 17:00:02 +0100
parents 7ad142c710c6
children
rev   line source
Chris@3 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@3 2 /*
Chris@31 3 This file is Copyright (c) 2012 Chris Cannam
Chris@31 4
Chris@3 5 Permission is hereby granted, free of charge, to any person
Chris@3 6 obtaining a copy of this software and associated documentation
Chris@3 7 files (the "Software"), to deal in the Software without
Chris@3 8 restriction, including without limitation the rights to use, copy,
Chris@3 9 modify, merge, publish, distribute, sublicense, and/or sell copies
Chris@3 10 of the Software, and to permit persons to whom the Software is
Chris@3 11 furnished to do so, subject to the following conditions:
Chris@3 12
Chris@3 13 The above copyright notice and this permission notice shall be
Chris@3 14 included in all copies or substantial portions of the Software.
Chris@3 15
Chris@3 16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
Chris@3 17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
Chris@3 18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
Chris@3 19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
Chris@3 20 ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
Chris@3 21 CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
Chris@3 22 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Chris@3 23 */
Chris@3 24
Chris@31 25 #include "CepstralPitchTracker.h"
Chris@51 26 #include "Cepstrum.h"
Chris@47 27 #include "MeanFilter.h"
Chris@50 28 #include "PeakInterpolator.h"
Chris@56 29 #include "AgentFeeder.h"
Chris@3 30
Chris@26 31 #include "vamp-sdk/FFT.h"
Chris@26 32
Chris@3 33 #include <vector>
Chris@3 34 #include <algorithm>
Chris@3 35
Chris@3 36 #include <cstdio>
Chris@3 37 #include <cmath>
Chris@3 38 #include <complex>
Chris@3 39
Chris@3 40 using std::string;
Chris@7 41 using std::vector;
Chris@16 42 using Vamp::RealTime;
Chris@7 43
Chris@16 44
Chris@31 45 CepstralPitchTracker::CepstralPitchTracker(float inputSampleRate) :
Chris@3 46 Plugin(inputSampleRate),
Chris@3 47 m_channels(0),
Chris@3 48 m_stepSize(256),
Chris@3 49 m_blockSize(1024),
Chris@3 50 m_fmin(50),
Chris@25 51 m_fmax(900),
Chris@18 52 m_vflen(1),
Chris@66 53 m_slack(40),
Chris@66 54 m_sensitivity(10),
Chris@66 55 m_threshold(0.1),
Chris@67 56 m_fillGaps(false),
Chris@3 57 m_binFrom(0),
Chris@3 58 m_binTo(0),
Chris@56 59 m_bins(0),
Chris@57 60 m_nAccepted(0),
Chris@56 61 m_feeder(0)
Chris@3 62 {
Chris@3 63 }
Chris@3 64
Chris@31 65 CepstralPitchTracker::~CepstralPitchTracker()
Chris@3 66 {
Chris@56 67 delete m_feeder;
Chris@3 68 }
Chris@3 69
Chris@3 70 string
Chris@31 71 CepstralPitchTracker::getIdentifier() const
Chris@3 72 {
Chris@39 73 return "cepstral-pitchtracker";
Chris@3 74 }
Chris@3 75
Chris@3 76 string
Chris@31 77 CepstralPitchTracker::getName() const
Chris@3 78 {
Chris@39 79 return "Cepstral Pitch Tracker";
Chris@3 80 }
Chris@3 81
Chris@3 82 string
Chris@31 83 CepstralPitchTracker::getDescription() const
Chris@3 84 {
Chris@3 85 return "Estimate f0 of monophonic material using a cepstrum method.";
Chris@3 86 }
Chris@3 87
Chris@3 88 string
Chris@31 89 CepstralPitchTracker::getMaker() const
Chris@3 90 {
Chris@3 91 return "Chris Cannam";
Chris@3 92 }
Chris@3 93
Chris@3 94 int
Chris@31 95 CepstralPitchTracker::getPluginVersion() const
Chris@3 96 {
Chris@3 97 // Increment this each time you release a version that behaves
Chris@3 98 // differently from the previous one
Chris@66 99 return 2;
Chris@3 100 }
Chris@3 101
Chris@3 102 string
Chris@31 103 CepstralPitchTracker::getCopyright() const
Chris@3 104 {
Chris@3 105 return "Freely redistributable (BSD license)";
Chris@3 106 }
Chris@3 107
Chris@31 108 CepstralPitchTracker::InputDomain
Chris@31 109 CepstralPitchTracker::getInputDomain() const
Chris@3 110 {
Chris@3 111 return FrequencyDomain;
Chris@3 112 }
Chris@3 113
Chris@3 114 size_t
Chris@31 115 CepstralPitchTracker::getPreferredBlockSize() const
Chris@3 116 {
Chris@3 117 return 1024;
Chris@3 118 }
Chris@3 119
Chris@3 120 size_t
Chris@31 121 CepstralPitchTracker::getPreferredStepSize() const
Chris@3 122 {
Chris@3 123 return 256;
Chris@3 124 }
Chris@3 125
Chris@3 126 size_t
Chris@31 127 CepstralPitchTracker::getMinChannelCount() const
Chris@3 128 {
Chris@3 129 return 1;
Chris@3 130 }
Chris@3 131
Chris@3 132 size_t
Chris@31 133 CepstralPitchTracker::getMaxChannelCount() const
Chris@3 134 {
Chris@3 135 return 1;
Chris@3 136 }
Chris@3 137
Chris@31 138 CepstralPitchTracker::ParameterList
Chris@31 139 CepstralPitchTracker::getParameterDescriptors() const
Chris@3 140 {
Chris@3 141 ParameterList list;
Chris@66 142
Chris@66 143 ParameterDescriptor d;
Chris@66 144 d.identifier = "sensitivity";
Chris@66 145 d.name = "Sensitivity";
Chris@66 146 d.description = "Sensitivity of the voicing detector";
Chris@66 147 d.unit = "";
Chris@66 148 d.minValue = 0;
Chris@66 149 d.maxValue = 100;
Chris@66 150 d.defaultValue = 10;
Chris@66 151 d.isQuantized = true;
Chris@66 152 d.quantizeStep = 1;
Chris@66 153 list.push_back(d);
Chris@66 154
Chris@66 155 d.identifier = "slack";
Chris@66 156 d.name = "Slack";
Chris@66 157 d.description = "Maximum permissible length of voicing gap for a continuous note";
Chris@66 158 d.unit = "ms";
Chris@66 159 d.minValue = 0;
Chris@66 160 d.maxValue = 200;
Chris@66 161 d.defaultValue = 40;
Chris@66 162 d.isQuantized = true;
Chris@66 163 d.quantizeStep = 1;
Chris@66 164 list.push_back(d);
Chris@66 165
Chris@66 166 d.identifier = "threshold";
Chris@66 167 d.name = "Silence threshold";
Chris@66 168 d.description = "Threshold for silence detection";
Chris@66 169 d.unit = ""; //!!! todo: convert this threshold to a meaningful unit!
Chris@66 170 d.minValue = 0;
Chris@66 171 d.maxValue = 0.5;
Chris@66 172 d.defaultValue = 0.1;
Chris@66 173 d.isQuantized = false;
Chris@66 174 list.push_back(d);
Chris@66 175
Chris@67 176 d.identifier = "fill";
Chris@67 177 d.name = "Fill f0 gaps within a note";
Chris@67 178 d.description = "Return an f0 value for every frame within each discovered note, interpolating results into any gaps in the measurement";
Chris@67 179 d.unit = ""; //!!! todo: convert this threshold to a meaningful unit!
Chris@67 180 d.minValue = 0;
Chris@67 181 d.maxValue = 1;
Chris@67 182 d.defaultValue = 0;
Chris@67 183 d.isQuantized = true;
Chris@67 184 d.quantizeStep = 1;
Chris@67 185 list.push_back(d);
Chris@67 186
Chris@3 187 return list;
Chris@3 188 }
Chris@3 189
Chris@3 190 float
Chris@31 191 CepstralPitchTracker::getParameter(string identifier) const
Chris@3 192 {
Chris@66 193 if (identifier == "sensitivity") return m_sensitivity;
Chris@66 194 else if (identifier == "slack") return m_slack;
Chris@66 195 else if (identifier == "threshold") return m_threshold;
Chris@67 196 else if (identifier == "fill") return (m_fillGaps ? 1 : 0);
Chris@3 197 return 0.f;
Chris@3 198 }
Chris@3 199
Chris@3 200 void
Chris@31 201 CepstralPitchTracker::setParameter(string identifier, float value)
Chris@3 202 {
Chris@66 203 if (identifier == "sensitivity") m_sensitivity = value;
Chris@66 204 else if (identifier == "slack") m_slack = value;
Chris@66 205 else if (identifier == "threshold") m_threshold = value;
Chris@67 206 else if (identifier == "fill") m_fillGaps = (value > 0.5);
Chris@3 207 }
Chris@3 208
Chris@31 209 CepstralPitchTracker::ProgramList
Chris@31 210 CepstralPitchTracker::getPrograms() const
Chris@3 211 {
Chris@3 212 ProgramList list;
Chris@3 213 return list;
Chris@3 214 }
Chris@3 215
Chris@3 216 string
Chris@31 217 CepstralPitchTracker::getCurrentProgram() const
Chris@3 218 {
Chris@3 219 return ""; // no programs
Chris@3 220 }
Chris@3 221
Chris@3 222 void
Chris@31 223 CepstralPitchTracker::selectProgram(string name)
Chris@3 224 {
Chris@3 225 }
Chris@3 226
Chris@31 227 CepstralPitchTracker::OutputList
Chris@31 228 CepstralPitchTracker::getOutputDescriptors() const
Chris@3 229 {
Chris@3 230 OutputList outputs;
Chris@3 231
Chris@3 232 OutputDescriptor d;
Chris@3 233
Chris@3 234 d.identifier = "f0";
Chris@3 235 d.name = "Estimated f0";
Chris@3 236 d.description = "Estimated fundamental frequency";
Chris@3 237 d.unit = "Hz";
Chris@3 238 d.hasFixedBinCount = true;
Chris@3 239 d.binCount = 1;
Chris@3 240 d.hasKnownExtents = true;
Chris@3 241 d.minValue = m_fmin;
Chris@3 242 d.maxValue = m_fmax;
Chris@3 243 d.isQuantized = false;
Chris@3 244 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@3 245 d.sampleRate = (m_inputSampleRate / m_stepSize);
Chris@3 246 d.hasDuration = false;
Chris@3 247 outputs.push_back(d);
Chris@3 248
Chris@16 249 d.identifier = "notes";
Chris@16 250 d.name = "Notes";
Chris@16 251 d.description = "Derived fixed-pitch note frequencies";
Chris@16 252 d.unit = "Hz";
Chris@16 253 d.hasFixedBinCount = true;
Chris@16 254 d.binCount = 1;
Chris@16 255 d.hasKnownExtents = true;
Chris@16 256 d.minValue = m_fmin;
Chris@16 257 d.maxValue = m_fmax;
Chris@16 258 d.isQuantized = false;
Chris@16 259 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@16 260 d.sampleRate = (m_inputSampleRate / m_stepSize);
Chris@16 261 d.hasDuration = true;
Chris@16 262 outputs.push_back(d);
Chris@16 263
Chris@66 264 d.identifier = "raw";
Chris@66 265 d.name = "Raw frequencies";
Chris@66 266 d.description = "Raw peak frequencies from cepstrum, including unvoiced segments";
Chris@66 267 d.unit = "Hz";
Chris@66 268 d.hasFixedBinCount = true;
Chris@66 269 d.binCount = 1;
Chris@66 270 d.hasKnownExtents = true;
Chris@66 271 d.minValue = m_fmin;
Chris@66 272 d.maxValue = m_fmax;
Chris@66 273 d.isQuantized = false;
Chris@66 274 d.sampleType = OutputDescriptor::OneSamplePerStep;
Chris@66 275 d.hasDuration = false;
Chris@66 276 outputs.push_back(d);
Chris@66 277
Chris@3 278 return outputs;
Chris@3 279 }
Chris@3 280
Chris@3 281 bool
Chris@31 282 CepstralPitchTracker::initialise(size_t channels, size_t stepSize, size_t blockSize)
Chris@3 283 {
Chris@3 284 if (channels < getMinChannelCount() ||
Chris@3 285 channels > getMaxChannelCount()) return false;
Chris@3 286
Chris@31 287 // std::cerr << "CepstralPitchTracker::initialise: channels = " << channels
Chris@3 288 // << ", stepSize = " << stepSize << ", blockSize = " << blockSize
Chris@3 289 // << std::endl;
Chris@3 290
Chris@3 291 m_channels = channels;
Chris@3 292 m_stepSize = stepSize;
Chris@3 293 m_blockSize = blockSize;
Chris@3 294
Chris@3 295 m_binFrom = int(m_inputSampleRate / m_fmax);
Chris@3 296 m_binTo = int(m_inputSampleRate / m_fmin);
Chris@3 297
Chris@3 298 if (m_binTo >= (int)m_blockSize / 2) {
Chris@3 299 m_binTo = m_blockSize / 2 - 1;
Chris@3 300 }
Chris@56 301 if (m_binFrom >= m_binTo) {
Chris@56 302 // shouldn't happen except for degenerate samplerate / blocksize combos
Chris@56 303 m_binFrom = m_binTo - 1;
Chris@56 304 }
Chris@3 305
Chris@3 306 m_bins = (m_binTo - m_binFrom) + 1;
Chris@3 307
Chris@3 308 reset();
Chris@3 309
Chris@3 310 return true;
Chris@3 311 }
Chris@3 312
Chris@3 313 void
Chris@31 314 CepstralPitchTracker::reset()
Chris@3 315 {
Chris@56 316 delete m_feeder;
Chris@66 317 m_feeder = new AgentFeeder(m_slack);
Chris@57 318 m_nAccepted = 0;
Chris@3 319 }
Chris@3 320
Chris@3 321 void
Chris@35 322 CepstralPitchTracker::addFeaturesFrom(NoteHypothesis h, FeatureSet &fs)
Chris@30 323 {
Chris@35 324 NoteHypothesis::Estimates es = h.getAcceptedEstimates();
Chris@67 325 NoteHypothesis::Note n = h.getAveragedNote();
Chris@30 326
Chris@67 327 if (!m_fillGaps) {
Chris@67 328
Chris@67 329 for (int i = 0; i < (int)es.size(); ++i) {
Chris@67 330 Feature f;
Chris@67 331 f.hasTimestamp = true;
Chris@67 332 f.timestamp = es[i].time;
Chris@67 333 f.values.push_back(es[i].freq);
Chris@67 334 fs[0].push_back(f);
Chris@67 335 }
Chris@67 336
Chris@67 337 } else {
Chris@67 338
Chris@67 339 int ix = 0;
Chris@67 340 RealTime increment = RealTime::frame2RealTime
Chris@67 341 (m_stepSize, m_inputSampleRate);
Chris@67 342
Chris@67 343 float freq = 0;
Chris@67 344
Chris@67 345 for (RealTime t = n.time; t < n.time + n.duration; t = t + increment) {
Chris@67 346 if (ix < (int)es.size() && t >= es[ix].time) {
Chris@67 347 freq = es[ix].freq;
Chris@67 348 ++ix;
Chris@67 349 }
Chris@67 350 if (freq > 0) {
Chris@67 351 Feature f;
Chris@67 352 f.hasTimestamp = true;
Chris@67 353 f.timestamp = t;
Chris@67 354 f.values.push_back(freq);
Chris@67 355 fs[0].push_back(f);
Chris@67 356 }
Chris@67 357 }
Chris@30 358 }
Chris@30 359
Chris@30 360 Feature nf;
Chris@30 361 nf.hasTimestamp = true;
Chris@30 362 nf.hasDuration = true;
Chris@30 363 nf.timestamp = n.time;
Chris@30 364 nf.duration = n.duration;
Chris@30 365 nf.values.push_back(n.freq);
Chris@30 366 fs[1].push_back(nf);
Chris@30 367 }
Chris@30 368
Chris@57 369 void
Chris@57 370 CepstralPitchTracker::addNewFeatures(FeatureSet &fs)
Chris@57 371 {
Chris@57 372 int n = m_feeder->getAcceptedHypotheses().size();
Chris@57 373 if (n == m_nAccepted) return;
Chris@57 374
Chris@57 375 AgentFeeder::Hypotheses accepted = m_feeder->getAcceptedHypotheses();
Chris@57 376
Chris@57 377 for (int i = m_nAccepted; i < n; ++i) {
Chris@57 378 addFeaturesFrom(accepted[i], fs);
Chris@57 379 }
Chris@57 380
Chris@57 381 m_nAccepted = n;
Chris@57 382 }
Chris@57 383
Chris@31 384 CepstralPitchTracker::FeatureSet
Chris@31 385 CepstralPitchTracker::process(const float *const *inputBuffers, RealTime timestamp)
Chris@3 386 {
Chris@51 387 double *rawcep = new double[m_blockSize];
Chris@51 388 double magmean = Cepstrum(m_blockSize).process(inputBuffers[0], rawcep);
Chris@3 389
Chris@3 390 int n = m_bins;
Chris@3 391 double *data = new double[n];
Chris@51 392 MeanFilter(m_vflen).filterSubsequence
Chris@51 393 (rawcep, data, m_blockSize, n, m_binFrom);
Chris@51 394
Chris@3 395 delete[] rawcep;
Chris@3 396
Chris@3 397 double maxval = 0.0;
Chris@6 398 int maxbin = -1;
Chris@3 399
Chris@3 400 for (int i = 0; i < n; ++i) {
Chris@3 401 if (data[i] > maxval) {
Chris@3 402 maxval = data[i];
Chris@3 403 maxbin = i;
Chris@3 404 }
Chris@3 405 }
Chris@3 406
Chris@15 407 if (maxbin < 0) {
Chris@15 408 delete[] data;
Chris@57 409 return FeatureSet();
Chris@15 410 }
Chris@15 411
Chris@15 412 double nextPeakVal = 0.0;
Chris@15 413 for (int i = 1; i+1 < n; ++i) {
Chris@15 414 if (data[i] > data[i-1] &&
Chris@15 415 data[i] > data[i+1] &&
Chris@15 416 i != maxbin &&
Chris@15 417 data[i] > nextPeakVal) {
Chris@15 418 nextPeakVal = data[i];
Chris@15 419 }
Chris@15 420 }
Chris@8 421
Chris@50 422 PeakInterpolator pi;
Chris@50 423 double cimax = pi.findPeakLocation(data, m_bins, maxbin);
Chris@18 424 double peakfreq = m_inputSampleRate / (cimax + m_binFrom);
Chris@15 425
Chris@66 426 FeatureSet fs;
Chris@66 427 Feature rawf;
Chris@66 428 rawf.hasTimestamp = false;
Chris@66 429 rawf.hasDuration = false;
Chris@66 430 rawf.values.push_back(peakfreq);
Chris@66 431 fs[2].push_back(rawf);
Chris@66 432
Chris@15 433 double confidence = 0.0;
Chris@51 434
Chris@15 435 if (nextPeakVal != 0.0) {
Chris@66 436 confidence = (maxval - nextPeakVal) * m_sensitivity;
Chris@66 437 if (magmean < m_threshold) confidence = 0.0;
Chris@15 438 }
Chris@15 439
Chris@57 440 delete[] data;
Chris@57 441
Chris@35 442 NoteHypothesis::Estimate e;
Chris@8 443 e.freq = peakfreq;
Chris@8 444 e.time = timestamp;
Chris@15 445 e.confidence = confidence;
Chris@8 446
Chris@56 447 m_feeder->feed(e);
Chris@14 448
Chris@57 449 addNewFeatures(fs);
Chris@3 450 return fs;
Chris@3 451 }
Chris@3 452
Chris@31 453 CepstralPitchTracker::FeatureSet
Chris@31 454 CepstralPitchTracker::getRemainingFeatures()
Chris@3 455 {
Chris@56 456 m_feeder->finish();
Chris@56 457
Chris@3 458 FeatureSet fs;
Chris@57 459 addNewFeatures(fs);
Chris@3 460 return fs;
Chris@3 461 }