annotate CepstralPitchTracker.cpp @ 66:7ad142c710c6 parameters

Add some parameters
author Chris Cannam
date Fri, 30 Aug 2013 15:35:49 +0100
parents 9f50a5876dd3
children dd5ab48fd58a
rev   line source
Chris@3 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@3 2 /*
Chris@31 3 This file is Copyright (c) 2012 Chris Cannam
Chris@31 4
Chris@3 5 Permission is hereby granted, free of charge, to any person
Chris@3 6 obtaining a copy of this software and associated documentation
Chris@3 7 files (the "Software"), to deal in the Software without
Chris@3 8 restriction, including without limitation the rights to use, copy,
Chris@3 9 modify, merge, publish, distribute, sublicense, and/or sell copies
Chris@3 10 of the Software, and to permit persons to whom the Software is
Chris@3 11 furnished to do so, subject to the following conditions:
Chris@3 12
Chris@3 13 The above copyright notice and this permission notice shall be
Chris@3 14 included in all copies or substantial portions of the Software.
Chris@3 15
Chris@3 16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
Chris@3 17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
Chris@3 18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
Chris@3 19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
Chris@3 20 ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
Chris@3 21 CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
Chris@3 22 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Chris@3 23 */
Chris@3 24
Chris@31 25 #include "CepstralPitchTracker.h"
Chris@51 26 #include "Cepstrum.h"
Chris@47 27 #include "MeanFilter.h"
Chris@50 28 #include "PeakInterpolator.h"
Chris@56 29 #include "AgentFeeder.h"
Chris@3 30
Chris@26 31 #include "vamp-sdk/FFT.h"
Chris@26 32
Chris@3 33 #include <vector>
Chris@3 34 #include <algorithm>
Chris@3 35
Chris@3 36 #include <cstdio>
Chris@3 37 #include <cmath>
Chris@3 38 #include <complex>
Chris@3 39
Chris@3 40 using std::string;
Chris@7 41 using std::vector;
Chris@16 42 using Vamp::RealTime;
Chris@7 43
Chris@16 44
Chris@31 45 CepstralPitchTracker::CepstralPitchTracker(float inputSampleRate) :
Chris@3 46 Plugin(inputSampleRate),
Chris@3 47 m_channels(0),
Chris@3 48 m_stepSize(256),
Chris@3 49 m_blockSize(1024),
Chris@3 50 m_fmin(50),
Chris@25 51 m_fmax(900),
Chris@18 52 m_vflen(1),
Chris@66 53 m_slack(40),
Chris@66 54 m_sensitivity(10),
Chris@66 55 m_threshold(0.1),
Chris@3 56 m_binFrom(0),
Chris@3 57 m_binTo(0),
Chris@56 58 m_bins(0),
Chris@57 59 m_nAccepted(0),
Chris@56 60 m_feeder(0)
Chris@3 61 {
Chris@3 62 }
Chris@3 63
Chris@31 64 CepstralPitchTracker::~CepstralPitchTracker()
Chris@3 65 {
Chris@56 66 delete m_feeder;
Chris@3 67 }
Chris@3 68
Chris@3 69 string
Chris@31 70 CepstralPitchTracker::getIdentifier() const
Chris@3 71 {
Chris@39 72 return "cepstral-pitchtracker";
Chris@3 73 }
Chris@3 74
Chris@3 75 string
Chris@31 76 CepstralPitchTracker::getName() const
Chris@3 77 {
Chris@39 78 return "Cepstral Pitch Tracker";
Chris@3 79 }
Chris@3 80
Chris@3 81 string
Chris@31 82 CepstralPitchTracker::getDescription() const
Chris@3 83 {
Chris@3 84 return "Estimate f0 of monophonic material using a cepstrum method.";
Chris@3 85 }
Chris@3 86
Chris@3 87 string
Chris@31 88 CepstralPitchTracker::getMaker() const
Chris@3 89 {
Chris@3 90 return "Chris Cannam";
Chris@3 91 }
Chris@3 92
Chris@3 93 int
Chris@31 94 CepstralPitchTracker::getPluginVersion() const
Chris@3 95 {
Chris@3 96 // Increment this each time you release a version that behaves
Chris@3 97 // differently from the previous one
Chris@66 98 return 2;
Chris@3 99 }
Chris@3 100
Chris@3 101 string
Chris@31 102 CepstralPitchTracker::getCopyright() const
Chris@3 103 {
Chris@3 104 return "Freely redistributable (BSD license)";
Chris@3 105 }
Chris@3 106
Chris@31 107 CepstralPitchTracker::InputDomain
Chris@31 108 CepstralPitchTracker::getInputDomain() const
Chris@3 109 {
Chris@3 110 return FrequencyDomain;
Chris@3 111 }
Chris@3 112
Chris@3 113 size_t
Chris@31 114 CepstralPitchTracker::getPreferredBlockSize() const
Chris@3 115 {
Chris@3 116 return 1024;
Chris@3 117 }
Chris@3 118
Chris@3 119 size_t
Chris@31 120 CepstralPitchTracker::getPreferredStepSize() const
Chris@3 121 {
Chris@3 122 return 256;
Chris@3 123 }
Chris@3 124
Chris@3 125 size_t
Chris@31 126 CepstralPitchTracker::getMinChannelCount() const
Chris@3 127 {
Chris@3 128 return 1;
Chris@3 129 }
Chris@3 130
Chris@3 131 size_t
Chris@31 132 CepstralPitchTracker::getMaxChannelCount() const
Chris@3 133 {
Chris@3 134 return 1;
Chris@3 135 }
Chris@3 136
Chris@31 137 CepstralPitchTracker::ParameterList
Chris@31 138 CepstralPitchTracker::getParameterDescriptors() const
Chris@3 139 {
Chris@3 140 ParameterList list;
Chris@66 141
Chris@66 142 ParameterDescriptor d;
Chris@66 143 d.identifier = "sensitivity";
Chris@66 144 d.name = "Sensitivity";
Chris@66 145 d.description = "Sensitivity of the voicing detector";
Chris@66 146 d.unit = "";
Chris@66 147 d.minValue = 0;
Chris@66 148 d.maxValue = 100;
Chris@66 149 d.defaultValue = 10;
Chris@66 150 d.isQuantized = true;
Chris@66 151 d.quantizeStep = 1;
Chris@66 152 list.push_back(d);
Chris@66 153
Chris@66 154 d.identifier = "slack";
Chris@66 155 d.name = "Slack";
Chris@66 156 d.description = "Maximum permissible length of voicing gap for a continuous note";
Chris@66 157 d.unit = "ms";
Chris@66 158 d.minValue = 0;
Chris@66 159 d.maxValue = 200;
Chris@66 160 d.defaultValue = 40;
Chris@66 161 d.isQuantized = true;
Chris@66 162 d.quantizeStep = 1;
Chris@66 163 list.push_back(d);
Chris@66 164
Chris@66 165 d.identifier = "threshold";
Chris@66 166 d.name = "Silence threshold";
Chris@66 167 d.description = "Threshold for silence detection";
Chris@66 168 d.unit = ""; //!!! todo: convert this threshold to a meaningful unit!
Chris@66 169 d.minValue = 0;
Chris@66 170 d.maxValue = 0.5;
Chris@66 171 d.defaultValue = 0.1;
Chris@66 172 d.isQuantized = false;
Chris@66 173 list.push_back(d);
Chris@66 174
Chris@3 175 return list;
Chris@3 176 }
Chris@3 177
Chris@3 178 float
Chris@31 179 CepstralPitchTracker::getParameter(string identifier) const
Chris@3 180 {
Chris@66 181 if (identifier == "sensitivity") return m_sensitivity;
Chris@66 182 else if (identifier == "slack") return m_slack;
Chris@66 183 else if (identifier == "threshold") return m_threshold;
Chris@3 184 return 0.f;
Chris@3 185 }
Chris@3 186
Chris@3 187 void
Chris@31 188 CepstralPitchTracker::setParameter(string identifier, float value)
Chris@3 189 {
Chris@66 190 if (identifier == "sensitivity") m_sensitivity = value;
Chris@66 191 else if (identifier == "slack") m_slack = value;
Chris@66 192 else if (identifier == "threshold") m_threshold = value;
Chris@3 193 }
Chris@3 194
Chris@31 195 CepstralPitchTracker::ProgramList
Chris@31 196 CepstralPitchTracker::getPrograms() const
Chris@3 197 {
Chris@3 198 ProgramList list;
Chris@3 199 return list;
Chris@3 200 }
Chris@3 201
Chris@3 202 string
Chris@31 203 CepstralPitchTracker::getCurrentProgram() const
Chris@3 204 {
Chris@3 205 return ""; // no programs
Chris@3 206 }
Chris@3 207
Chris@3 208 void
Chris@31 209 CepstralPitchTracker::selectProgram(string name)
Chris@3 210 {
Chris@3 211 }
Chris@3 212
Chris@31 213 CepstralPitchTracker::OutputList
Chris@31 214 CepstralPitchTracker::getOutputDescriptors() const
Chris@3 215 {
Chris@3 216 OutputList outputs;
Chris@3 217
Chris@3 218 OutputDescriptor d;
Chris@3 219
Chris@3 220 d.identifier = "f0";
Chris@3 221 d.name = "Estimated f0";
Chris@3 222 d.description = "Estimated fundamental frequency";
Chris@3 223 d.unit = "Hz";
Chris@3 224 d.hasFixedBinCount = true;
Chris@3 225 d.binCount = 1;
Chris@3 226 d.hasKnownExtents = true;
Chris@3 227 d.minValue = m_fmin;
Chris@3 228 d.maxValue = m_fmax;
Chris@3 229 d.isQuantized = false;
Chris@3 230 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@3 231 d.sampleRate = (m_inputSampleRate / m_stepSize);
Chris@3 232 d.hasDuration = false;
Chris@3 233 outputs.push_back(d);
Chris@3 234
Chris@16 235 d.identifier = "notes";
Chris@16 236 d.name = "Notes";
Chris@16 237 d.description = "Derived fixed-pitch note frequencies";
Chris@16 238 d.unit = "Hz";
Chris@16 239 d.hasFixedBinCount = true;
Chris@16 240 d.binCount = 1;
Chris@16 241 d.hasKnownExtents = true;
Chris@16 242 d.minValue = m_fmin;
Chris@16 243 d.maxValue = m_fmax;
Chris@16 244 d.isQuantized = false;
Chris@16 245 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@16 246 d.sampleRate = (m_inputSampleRate / m_stepSize);
Chris@16 247 d.hasDuration = true;
Chris@16 248 outputs.push_back(d);
Chris@16 249
Chris@66 250 d.identifier = "raw";
Chris@66 251 d.name = "Raw frequencies";
Chris@66 252 d.description = "Raw peak frequencies from cepstrum, including unvoiced segments";
Chris@66 253 d.unit = "Hz";
Chris@66 254 d.hasFixedBinCount = true;
Chris@66 255 d.binCount = 1;
Chris@66 256 d.hasKnownExtents = true;
Chris@66 257 d.minValue = m_fmin;
Chris@66 258 d.maxValue = m_fmax;
Chris@66 259 d.isQuantized = false;
Chris@66 260 d.sampleType = OutputDescriptor::OneSamplePerStep;
Chris@66 261 d.hasDuration = false;
Chris@66 262 outputs.push_back(d);
Chris@66 263
Chris@3 264 return outputs;
Chris@3 265 }
Chris@3 266
Chris@3 267 bool
Chris@31 268 CepstralPitchTracker::initialise(size_t channels, size_t stepSize, size_t blockSize)
Chris@3 269 {
Chris@3 270 if (channels < getMinChannelCount() ||
Chris@3 271 channels > getMaxChannelCount()) return false;
Chris@3 272
Chris@31 273 // std::cerr << "CepstralPitchTracker::initialise: channels = " << channels
Chris@3 274 // << ", stepSize = " << stepSize << ", blockSize = " << blockSize
Chris@3 275 // << std::endl;
Chris@3 276
Chris@3 277 m_channels = channels;
Chris@3 278 m_stepSize = stepSize;
Chris@3 279 m_blockSize = blockSize;
Chris@3 280
Chris@3 281 m_binFrom = int(m_inputSampleRate / m_fmax);
Chris@3 282 m_binTo = int(m_inputSampleRate / m_fmin);
Chris@3 283
Chris@3 284 if (m_binTo >= (int)m_blockSize / 2) {
Chris@3 285 m_binTo = m_blockSize / 2 - 1;
Chris@3 286 }
Chris@56 287 if (m_binFrom >= m_binTo) {
Chris@56 288 // shouldn't happen except for degenerate samplerate / blocksize combos
Chris@56 289 m_binFrom = m_binTo - 1;
Chris@56 290 }
Chris@3 291
Chris@3 292 m_bins = (m_binTo - m_binFrom) + 1;
Chris@3 293
Chris@3 294 reset();
Chris@3 295
Chris@3 296 return true;
Chris@3 297 }
Chris@3 298
Chris@3 299 void
Chris@31 300 CepstralPitchTracker::reset()
Chris@3 301 {
Chris@56 302 delete m_feeder;
Chris@66 303 m_feeder = new AgentFeeder(m_slack);
Chris@57 304 m_nAccepted = 0;
Chris@3 305 }
Chris@3 306
Chris@3 307 void
Chris@35 308 CepstralPitchTracker::addFeaturesFrom(NoteHypothesis h, FeatureSet &fs)
Chris@30 309 {
Chris@35 310 NoteHypothesis::Estimates es = h.getAcceptedEstimates();
Chris@30 311
Chris@35 312 for (int i = 0; i < (int)es.size(); ++i) {
Chris@30 313 Feature f;
Chris@30 314 f.hasTimestamp = true;
Chris@30 315 f.timestamp = es[i].time;
Chris@30 316 f.values.push_back(es[i].freq);
Chris@30 317 fs[0].push_back(f);
Chris@30 318 }
Chris@30 319
Chris@30 320 Feature nf;
Chris@30 321 nf.hasTimestamp = true;
Chris@30 322 nf.hasDuration = true;
Chris@35 323 NoteHypothesis::Note n = h.getAveragedNote();
Chris@30 324 nf.timestamp = n.time;
Chris@30 325 nf.duration = n.duration;
Chris@30 326 nf.values.push_back(n.freq);
Chris@30 327 fs[1].push_back(nf);
Chris@30 328 }
Chris@30 329
Chris@57 330 void
Chris@57 331 CepstralPitchTracker::addNewFeatures(FeatureSet &fs)
Chris@57 332 {
Chris@57 333 int n = m_feeder->getAcceptedHypotheses().size();
Chris@57 334 if (n == m_nAccepted) return;
Chris@57 335
Chris@57 336 AgentFeeder::Hypotheses accepted = m_feeder->getAcceptedHypotheses();
Chris@57 337
Chris@57 338 for (int i = m_nAccepted; i < n; ++i) {
Chris@57 339 addFeaturesFrom(accepted[i], fs);
Chris@57 340 }
Chris@57 341
Chris@57 342 m_nAccepted = n;
Chris@57 343 }
Chris@57 344
Chris@31 345 CepstralPitchTracker::FeatureSet
Chris@31 346 CepstralPitchTracker::process(const float *const *inputBuffers, RealTime timestamp)
Chris@3 347 {
Chris@51 348 double *rawcep = new double[m_blockSize];
Chris@51 349 double magmean = Cepstrum(m_blockSize).process(inputBuffers[0], rawcep);
Chris@3 350
Chris@3 351 int n = m_bins;
Chris@3 352 double *data = new double[n];
Chris@51 353 MeanFilter(m_vflen).filterSubsequence
Chris@51 354 (rawcep, data, m_blockSize, n, m_binFrom);
Chris@51 355
Chris@3 356 delete[] rawcep;
Chris@3 357
Chris@3 358 double maxval = 0.0;
Chris@6 359 int maxbin = -1;
Chris@3 360
Chris@3 361 for (int i = 0; i < n; ++i) {
Chris@3 362 if (data[i] > maxval) {
Chris@3 363 maxval = data[i];
Chris@3 364 maxbin = i;
Chris@3 365 }
Chris@3 366 }
Chris@3 367
Chris@15 368 if (maxbin < 0) {
Chris@15 369 delete[] data;
Chris@57 370 return FeatureSet();
Chris@15 371 }
Chris@15 372
Chris@15 373 double nextPeakVal = 0.0;
Chris@15 374 for (int i = 1; i+1 < n; ++i) {
Chris@15 375 if (data[i] > data[i-1] &&
Chris@15 376 data[i] > data[i+1] &&
Chris@15 377 i != maxbin &&
Chris@15 378 data[i] > nextPeakVal) {
Chris@15 379 nextPeakVal = data[i];
Chris@15 380 }
Chris@15 381 }
Chris@8 382
Chris@50 383 PeakInterpolator pi;
Chris@50 384 double cimax = pi.findPeakLocation(data, m_bins, maxbin);
Chris@18 385 double peakfreq = m_inputSampleRate / (cimax + m_binFrom);
Chris@15 386
Chris@66 387 FeatureSet fs;
Chris@66 388 Feature rawf;
Chris@66 389 rawf.hasTimestamp = false;
Chris@66 390 rawf.hasDuration = false;
Chris@66 391 rawf.values.push_back(peakfreq);
Chris@66 392 fs[2].push_back(rawf);
Chris@66 393
Chris@15 394 double confidence = 0.0;
Chris@51 395
Chris@15 396 if (nextPeakVal != 0.0) {
Chris@66 397 confidence = (maxval - nextPeakVal) * m_sensitivity;
Chris@66 398 if (magmean < m_threshold) confidence = 0.0;
Chris@15 399 }
Chris@15 400
Chris@57 401 delete[] data;
Chris@57 402
Chris@35 403 NoteHypothesis::Estimate e;
Chris@8 404 e.freq = peakfreq;
Chris@8 405 e.time = timestamp;
Chris@15 406 e.confidence = confidence;
Chris@8 407
Chris@56 408 m_feeder->feed(e);
Chris@14 409
Chris@57 410 addNewFeatures(fs);
Chris@3 411 return fs;
Chris@3 412 }
Chris@3 413
Chris@31 414 CepstralPitchTracker::FeatureSet
Chris@31 415 CepstralPitchTracker::getRemainingFeatures()
Chris@3 416 {
Chris@56 417 m_feeder->finish();
Chris@56 418
Chris@3 419 FeatureSet fs;
Chris@57 420 addNewFeatures(fs);
Chris@3 421 return fs;
Chris@3 422 }