annotate CepstralPitchTracker.cpp @ 56:d8eeba570d70

Switch to using feeder in plugin
author Chris Cannam
date Thu, 27 Sep 2012 17:14:11 +0100
parents 0997774f5fdc
children 82b3cdf6ca6b
rev   line source
Chris@3 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@3 2 /*
Chris@31 3 This file is Copyright (c) 2012 Chris Cannam
Chris@31 4
Chris@3 5 Permission is hereby granted, free of charge, to any person
Chris@3 6 obtaining a copy of this software and associated documentation
Chris@3 7 files (the "Software"), to deal in the Software without
Chris@3 8 restriction, including without limitation the rights to use, copy,
Chris@3 9 modify, merge, publish, distribute, sublicense, and/or sell copies
Chris@3 10 of the Software, and to permit persons to whom the Software is
Chris@3 11 furnished to do so, subject to the following conditions:
Chris@3 12
Chris@3 13 The above copyright notice and this permission notice shall be
Chris@3 14 included in all copies or substantial portions of the Software.
Chris@3 15
Chris@3 16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
Chris@3 17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
Chris@3 18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
Chris@3 19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
Chris@3 20 ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
Chris@3 21 CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
Chris@3 22 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Chris@3 23 */
Chris@3 24
Chris@31 25 #include "CepstralPitchTracker.h"
Chris@51 26 #include "Cepstrum.h"
Chris@47 27 #include "MeanFilter.h"
Chris@50 28 #include "PeakInterpolator.h"
Chris@56 29 #include "AgentFeeder.h"
Chris@3 30
Chris@26 31 #include "vamp-sdk/FFT.h"
Chris@26 32
Chris@3 33 #include <vector>
Chris@3 34 #include <algorithm>
Chris@3 35
Chris@3 36 #include <cstdio>
Chris@3 37 #include <cmath>
Chris@3 38 #include <complex>
Chris@3 39
Chris@3 40 using std::string;
Chris@7 41 using std::vector;
Chris@16 42 using Vamp::RealTime;
Chris@7 43
Chris@16 44
Chris@31 45 CepstralPitchTracker::CepstralPitchTracker(float inputSampleRate) :
Chris@3 46 Plugin(inputSampleRate),
Chris@3 47 m_channels(0),
Chris@3 48 m_stepSize(256),
Chris@3 49 m_blockSize(1024),
Chris@3 50 m_fmin(50),
Chris@25 51 m_fmax(900),
Chris@18 52 m_vflen(1),
Chris@3 53 m_binFrom(0),
Chris@3 54 m_binTo(0),
Chris@56 55 m_bins(0),
Chris@56 56 m_feeder(0)
Chris@3 57 {
Chris@3 58 }
Chris@3 59
Chris@31 60 CepstralPitchTracker::~CepstralPitchTracker()
Chris@3 61 {
Chris@56 62 delete m_feeder;
Chris@3 63 }
Chris@3 64
Chris@3 65 string
Chris@31 66 CepstralPitchTracker::getIdentifier() const
Chris@3 67 {
Chris@39 68 return "cepstral-pitchtracker";
Chris@3 69 }
Chris@3 70
Chris@3 71 string
Chris@31 72 CepstralPitchTracker::getName() const
Chris@3 73 {
Chris@39 74 return "Cepstral Pitch Tracker";
Chris@3 75 }
Chris@3 76
Chris@3 77 string
Chris@31 78 CepstralPitchTracker::getDescription() const
Chris@3 79 {
Chris@3 80 return "Estimate f0 of monophonic material using a cepstrum method.";
Chris@3 81 }
Chris@3 82
Chris@3 83 string
Chris@31 84 CepstralPitchTracker::getMaker() const
Chris@3 85 {
Chris@3 86 return "Chris Cannam";
Chris@3 87 }
Chris@3 88
Chris@3 89 int
Chris@31 90 CepstralPitchTracker::getPluginVersion() const
Chris@3 91 {
Chris@3 92 // Increment this each time you release a version that behaves
Chris@3 93 // differently from the previous one
Chris@3 94 return 1;
Chris@3 95 }
Chris@3 96
Chris@3 97 string
Chris@31 98 CepstralPitchTracker::getCopyright() const
Chris@3 99 {
Chris@3 100 return "Freely redistributable (BSD license)";
Chris@3 101 }
Chris@3 102
Chris@31 103 CepstralPitchTracker::InputDomain
Chris@31 104 CepstralPitchTracker::getInputDomain() const
Chris@3 105 {
Chris@3 106 return FrequencyDomain;
Chris@3 107 }
Chris@3 108
Chris@3 109 size_t
Chris@31 110 CepstralPitchTracker::getPreferredBlockSize() const
Chris@3 111 {
Chris@3 112 return 1024;
Chris@3 113 }
Chris@3 114
Chris@3 115 size_t
Chris@31 116 CepstralPitchTracker::getPreferredStepSize() const
Chris@3 117 {
Chris@3 118 return 256;
Chris@3 119 }
Chris@3 120
Chris@3 121 size_t
Chris@31 122 CepstralPitchTracker::getMinChannelCount() const
Chris@3 123 {
Chris@3 124 return 1;
Chris@3 125 }
Chris@3 126
Chris@3 127 size_t
Chris@31 128 CepstralPitchTracker::getMaxChannelCount() const
Chris@3 129 {
Chris@3 130 return 1;
Chris@3 131 }
Chris@3 132
Chris@31 133 CepstralPitchTracker::ParameterList
Chris@31 134 CepstralPitchTracker::getParameterDescriptors() const
Chris@3 135 {
Chris@3 136 ParameterList list;
Chris@3 137 return list;
Chris@3 138 }
Chris@3 139
Chris@3 140 float
Chris@31 141 CepstralPitchTracker::getParameter(string identifier) const
Chris@3 142 {
Chris@3 143 return 0.f;
Chris@3 144 }
Chris@3 145
Chris@3 146 void
Chris@31 147 CepstralPitchTracker::setParameter(string identifier, float value)
Chris@3 148 {
Chris@3 149 }
Chris@3 150
Chris@31 151 CepstralPitchTracker::ProgramList
Chris@31 152 CepstralPitchTracker::getPrograms() const
Chris@3 153 {
Chris@3 154 ProgramList list;
Chris@3 155 return list;
Chris@3 156 }
Chris@3 157
Chris@3 158 string
Chris@31 159 CepstralPitchTracker::getCurrentProgram() const
Chris@3 160 {
Chris@3 161 return ""; // no programs
Chris@3 162 }
Chris@3 163
Chris@3 164 void
Chris@31 165 CepstralPitchTracker::selectProgram(string name)
Chris@3 166 {
Chris@3 167 }
Chris@3 168
Chris@31 169 CepstralPitchTracker::OutputList
Chris@31 170 CepstralPitchTracker::getOutputDescriptors() const
Chris@3 171 {
Chris@3 172 OutputList outputs;
Chris@3 173
Chris@3 174 OutputDescriptor d;
Chris@3 175
Chris@3 176 d.identifier = "f0";
Chris@3 177 d.name = "Estimated f0";
Chris@3 178 d.description = "Estimated fundamental frequency";
Chris@3 179 d.unit = "Hz";
Chris@3 180 d.hasFixedBinCount = true;
Chris@3 181 d.binCount = 1;
Chris@3 182 d.hasKnownExtents = true;
Chris@3 183 d.minValue = m_fmin;
Chris@3 184 d.maxValue = m_fmax;
Chris@3 185 d.isQuantized = false;
Chris@3 186 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@3 187 d.sampleRate = (m_inputSampleRate / m_stepSize);
Chris@3 188 d.hasDuration = false;
Chris@3 189 outputs.push_back(d);
Chris@3 190
Chris@16 191 d.identifier = "notes";
Chris@16 192 d.name = "Notes";
Chris@16 193 d.description = "Derived fixed-pitch note frequencies";
Chris@16 194 d.unit = "Hz";
Chris@16 195 d.hasFixedBinCount = true;
Chris@16 196 d.binCount = 1;
Chris@16 197 d.hasKnownExtents = true;
Chris@16 198 d.minValue = m_fmin;
Chris@16 199 d.maxValue = m_fmax;
Chris@16 200 d.isQuantized = false;
Chris@16 201 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@16 202 d.sampleRate = (m_inputSampleRate / m_stepSize);
Chris@16 203 d.hasDuration = true;
Chris@16 204 outputs.push_back(d);
Chris@16 205
Chris@3 206 return outputs;
Chris@3 207 }
Chris@3 208
Chris@3 209 bool
Chris@31 210 CepstralPitchTracker::initialise(size_t channels, size_t stepSize, size_t blockSize)
Chris@3 211 {
Chris@3 212 if (channels < getMinChannelCount() ||
Chris@3 213 channels > getMaxChannelCount()) return false;
Chris@3 214
Chris@31 215 // std::cerr << "CepstralPitchTracker::initialise: channels = " << channels
Chris@3 216 // << ", stepSize = " << stepSize << ", blockSize = " << blockSize
Chris@3 217 // << std::endl;
Chris@3 218
Chris@3 219 m_channels = channels;
Chris@3 220 m_stepSize = stepSize;
Chris@3 221 m_blockSize = blockSize;
Chris@3 222
Chris@3 223 m_binFrom = int(m_inputSampleRate / m_fmax);
Chris@3 224 m_binTo = int(m_inputSampleRate / m_fmin);
Chris@3 225
Chris@3 226 if (m_binTo >= (int)m_blockSize / 2) {
Chris@3 227 m_binTo = m_blockSize / 2 - 1;
Chris@3 228 }
Chris@56 229 if (m_binFrom >= m_binTo) {
Chris@56 230 // shouldn't happen except for degenerate samplerate / blocksize combos
Chris@56 231 m_binFrom = m_binTo - 1;
Chris@56 232 }
Chris@3 233
Chris@3 234 m_bins = (m_binTo - m_binFrom) + 1;
Chris@3 235
Chris@3 236 reset();
Chris@3 237
Chris@3 238 return true;
Chris@3 239 }
Chris@3 240
Chris@3 241 void
Chris@31 242 CepstralPitchTracker::reset()
Chris@3 243 {
Chris@56 244 delete m_feeder;
Chris@56 245 m_feeder = new AgentFeeder();
Chris@3 246 }
Chris@3 247
Chris@3 248 void
Chris@35 249 CepstralPitchTracker::addFeaturesFrom(NoteHypothesis h, FeatureSet &fs)
Chris@30 250 {
Chris@35 251 NoteHypothesis::Estimates es = h.getAcceptedEstimates();
Chris@30 252
Chris@35 253 for (int i = 0; i < (int)es.size(); ++i) {
Chris@30 254 Feature f;
Chris@30 255 f.hasTimestamp = true;
Chris@30 256 f.timestamp = es[i].time;
Chris@30 257 f.values.push_back(es[i].freq);
Chris@30 258 fs[0].push_back(f);
Chris@30 259 }
Chris@30 260
Chris@30 261 Feature nf;
Chris@30 262 nf.hasTimestamp = true;
Chris@30 263 nf.hasDuration = true;
Chris@35 264 NoteHypothesis::Note n = h.getAveragedNote();
Chris@30 265 nf.timestamp = n.time;
Chris@30 266 nf.duration = n.duration;
Chris@30 267 nf.values.push_back(n.freq);
Chris@30 268 fs[1].push_back(nf);
Chris@30 269 }
Chris@30 270
Chris@31 271 CepstralPitchTracker::FeatureSet
Chris@31 272 CepstralPitchTracker::process(const float *const *inputBuffers, RealTime timestamp)
Chris@3 273 {
Chris@3 274 FeatureSet fs;
Chris@3 275
Chris@51 276 double *rawcep = new double[m_blockSize];
Chris@51 277 double magmean = Cepstrum(m_blockSize).process(inputBuffers[0], rawcep);
Chris@3 278
Chris@3 279 int n = m_bins;
Chris@3 280 double *data = new double[n];
Chris@51 281 MeanFilter(m_vflen).filterSubsequence
Chris@51 282 (rawcep, data, m_blockSize, n, m_binFrom);
Chris@51 283
Chris@3 284 delete[] rawcep;
Chris@3 285
Chris@3 286 double maxval = 0.0;
Chris@6 287 int maxbin = -1;
Chris@3 288
Chris@3 289 for (int i = 0; i < n; ++i) {
Chris@3 290 if (data[i] > maxval) {
Chris@3 291 maxval = data[i];
Chris@3 292 maxbin = i;
Chris@3 293 }
Chris@3 294 }
Chris@3 295
Chris@15 296 if (maxbin < 0) {
Chris@15 297 delete[] data;
Chris@15 298 return fs;
Chris@15 299 }
Chris@15 300
Chris@15 301 double nextPeakVal = 0.0;
Chris@15 302 for (int i = 1; i+1 < n; ++i) {
Chris@15 303 if (data[i] > data[i-1] &&
Chris@15 304 data[i] > data[i+1] &&
Chris@15 305 i != maxbin &&
Chris@15 306 data[i] > nextPeakVal) {
Chris@15 307 nextPeakVal = data[i];
Chris@15 308 }
Chris@15 309 }
Chris@8 310
Chris@50 311 PeakInterpolator pi;
Chris@50 312 double cimax = pi.findPeakLocation(data, m_bins, maxbin);
Chris@18 313 double peakfreq = m_inputSampleRate / (cimax + m_binFrom);
Chris@15 314
Chris@15 315 double confidence = 0.0;
Chris@51 316 double threshold = 0.1; // for magmean
Chris@51 317
Chris@15 318 if (nextPeakVal != 0.0) {
Chris@27 319 confidence = (maxval - nextPeakVal) * 10.0;
Chris@25 320 if (magmean < threshold) confidence = 0.0;
Chris@39 321 // std::cerr << "magmean = " << magmean << ", confidence = " << confidence << std::endl;
Chris@15 322 }
Chris@15 323
Chris@35 324 NoteHypothesis::Estimate e;
Chris@8 325 e.freq = peakfreq;
Chris@8 326 e.time = timestamp;
Chris@15 327 e.confidence = confidence;
Chris@8 328
Chris@56 329 m_feeder->feed(e);
Chris@14 330
Chris@3 331 delete[] data;
Chris@3 332 return fs;
Chris@3 333 }
Chris@3 334
Chris@31 335 CepstralPitchTracker::FeatureSet
Chris@31 336 CepstralPitchTracker::getRemainingFeatures()
Chris@3 337 {
Chris@56 338 m_feeder->finish();
Chris@56 339
Chris@56 340 AgentFeeder::Hypotheses accepted = m_feeder->getAcceptedHypotheses();
Chris@56 341
Chris@3 342 FeatureSet fs;
Chris@56 343 for (int i = 0; i < accepted.size(); ++i) {
Chris@56 344 addFeaturesFrom(accepted[i], fs);
Chris@11 345 }
Chris@3 346 return fs;
Chris@3 347 }