annotate src/TuningDifference.cpp @ 25:85714824256a

Remove some (but not yet all) debug output
author Chris Cannam
date Fri, 06 Feb 2015 09:03:12 +0000
parents 6a75d371938f
children c21ce05afbe4
rev   line source
Chris@21 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@21 2
Chris@21 3 /*
Chris@21 4 Centre for Digital Music, Queen Mary University of London.
Chris@21 5
Chris@21 6 This program is free software; you can redistribute it and/or
Chris@21 7 modify it under the terms of the GNU General Public License as
Chris@21 8 published by the Free Software Foundation; either version 2 of the
Chris@21 9 License, or (at your option) any later version. See the file
Chris@21 10 COPYING included with this distribution for more information.
Chris@21 11 */
Chris@0 12
Chris@0 13 #include "TuningDifference.h"
Chris@0 14
Chris@1 15 #include <iostream>
Chris@1 16
Chris@4 17 #include <cmath>
Chris@4 18 #include <cstdio>
Chris@4 19
Chris@13 20 #include <algorithm>
Chris@1 21
Chris@13 22 using namespace std;
Chris@13 23
Chris@13 24 static double pitchToFrequency(int pitch,
Chris@13 25 double centsOffset = 0.,
Chris@13 26 double concertA = 440.)
Chris@13 27 {
Chris@13 28 double p = double(pitch) + (centsOffset / 100.);
Chris@13 29 return concertA * pow(2.0, (p - 69.0) / 12.0);
Chris@13 30 }
Chris@13 31
Chris@13 32 static double frequencyForCentsAbove440(double cents)
Chris@13 33 {
Chris@13 34 return pitchToFrequency(69, cents, 440.);
Chris@13 35 }
Chris@5 36
Chris@0 37 TuningDifference::TuningDifference(float inputSampleRate) :
Chris@13 38 Plugin(inputSampleRate),
Chris@13 39 m_bpo(60),
Chris@18 40 m_refChroma(new Chromagram(paramsForTuningFrequency(440.))),
Chris@18 41 m_blockSize(0),
Chris@18 42 m_frameCount(0)
Chris@0 43 {
Chris@0 44 }
Chris@0 45
Chris@0 46 TuningDifference::~TuningDifference()
Chris@0 47 {
Chris@0 48 }
Chris@0 49
Chris@0 50 string
Chris@0 51 TuningDifference::getIdentifier() const
Chris@0 52 {
Chris@1 53 return "tuning-difference";
Chris@0 54 }
Chris@0 55
Chris@0 56 string
Chris@0 57 TuningDifference::getName() const
Chris@0 58 {
Chris@1 59 return "Tuning Difference";
Chris@0 60 }
Chris@0 61
Chris@0 62 string
Chris@0 63 TuningDifference::getDescription() const
Chris@0 64 {
Chris@0 65 // Return something helpful here!
Chris@0 66 return "";
Chris@0 67 }
Chris@0 68
Chris@0 69 string
Chris@0 70 TuningDifference::getMaker() const
Chris@0 71 {
Chris@0 72 // Your name here
Chris@0 73 return "";
Chris@0 74 }
Chris@0 75
Chris@0 76 int
Chris@0 77 TuningDifference::getPluginVersion() const
Chris@0 78 {
Chris@0 79 // Increment this each time you release a version that behaves
Chris@0 80 // differently from the previous one
Chris@0 81 return 1;
Chris@0 82 }
Chris@0 83
Chris@0 84 string
Chris@0 85 TuningDifference::getCopyright() const
Chris@0 86 {
Chris@0 87 // This function is not ideally named. It does not necessarily
Chris@0 88 // need to say who made the plugin -- getMaker does that -- but it
Chris@0 89 // should indicate the terms under which it is distributed. For
Chris@0 90 // example, "Copyright (year). All Rights Reserved", or "GPL"
Chris@0 91 return "";
Chris@0 92 }
Chris@0 93
Chris@0 94 TuningDifference::InputDomain
Chris@0 95 TuningDifference::getInputDomain() const
Chris@0 96 {
Chris@13 97 return TimeDomain;
Chris@0 98 }
Chris@0 99
Chris@0 100 size_t
Chris@0 101 TuningDifference::getPreferredBlockSize() const
Chris@0 102 {
Chris@13 103 return 0;
Chris@0 104 }
Chris@0 105
Chris@0 106 size_t
Chris@0 107 TuningDifference::getPreferredStepSize() const
Chris@0 108 {
Chris@1 109 return 0;
Chris@0 110 }
Chris@0 111
Chris@0 112 size_t
Chris@0 113 TuningDifference::getMinChannelCount() const
Chris@0 114 {
Chris@1 115 return 2;
Chris@0 116 }
Chris@0 117
Chris@0 118 size_t
Chris@0 119 TuningDifference::getMaxChannelCount() const
Chris@0 120 {
Chris@1 121 return 2;
Chris@0 122 }
Chris@0 123
Chris@0 124 TuningDifference::ParameterList
Chris@0 125 TuningDifference::getParameterDescriptors() const
Chris@0 126 {
Chris@0 127 ParameterList list;
Chris@20 128 //!!! parameter: max search range
Chris@20 129 //!!! parameter: fine search precision
Chris@0 130 return list;
Chris@0 131 }
Chris@0 132
Chris@0 133 float
Chris@1 134 TuningDifference::getParameter(string) const
Chris@0 135 {
Chris@0 136 return 0;
Chris@0 137 }
Chris@0 138
Chris@0 139 void
Chris@1 140 TuningDifference::setParameter(string, float)
Chris@0 141 {
Chris@0 142 }
Chris@0 143
Chris@0 144 TuningDifference::ProgramList
Chris@0 145 TuningDifference::getPrograms() const
Chris@0 146 {
Chris@0 147 ProgramList list;
Chris@0 148 return list;
Chris@0 149 }
Chris@0 150
Chris@0 151 string
Chris@0 152 TuningDifference::getCurrentProgram() const
Chris@0 153 {
Chris@0 154 return ""; // no programs
Chris@0 155 }
Chris@0 156
Chris@0 157 void
Chris@1 158 TuningDifference::selectProgram(string)
Chris@0 159 {
Chris@0 160 }
Chris@0 161
Chris@0 162 TuningDifference::OutputList
Chris@0 163 TuningDifference::getOutputDescriptors() const
Chris@0 164 {
Chris@0 165 OutputList list;
Chris@0 166
Chris@1 167 OutputDescriptor d;
Chris@1 168 d.identifier = "cents";
Chris@1 169 d.name = "Tuning Difference";
Chris@1 170 d.description = "Difference in averaged frequency profile between channels 1 and 2, in cents. A positive value means channel 2 is higher.";
Chris@1 171 d.unit = "cents";
Chris@1 172 d.hasFixedBinCount = true;
Chris@1 173 d.binCount = 1;
Chris@1 174 d.hasKnownExtents = false;
Chris@1 175 d.isQuantized = false;
Chris@1 176 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@1 177 d.hasDuration = false;
Chris@13 178 m_outputs[d.identifier] = list.size();
Chris@1 179 list.push_back(d);
Chris@0 180
Chris@1 181 d.identifier = "tuningfreq";
Chris@1 182 d.name = "Relative Tuning Frequency";
Chris@1 183 d.description = "Tuning frequency of channel 2, if channel 1 is assumed to contain the same music as it at a tuning frequency of A=440Hz.";
Chris@4 184 d.unit = "hz";
Chris@1 185 d.hasFixedBinCount = true;
Chris@1 186 d.binCount = 1;
Chris@1 187 d.hasKnownExtents = false;
Chris@1 188 d.isQuantized = false;
Chris@1 189 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@1 190 d.hasDuration = false;
Chris@13 191 m_outputs[d.identifier] = list.size();
Chris@1 192 list.push_back(d);
Chris@1 193
Chris@13 194 d.identifier = "reffeature";
Chris@13 195 d.name = "Reference Feature";
Chris@13 196 d.description = "Chroma feature from reference audio.";
Chris@0 197 d.unit = "";
Chris@0 198 d.hasFixedBinCount = true;
Chris@13 199 d.binCount = m_bpo;
Chris@4 200 d.hasKnownExtents = false;
Chris@4 201 d.isQuantized = false;
Chris@4 202 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@5 203 d.sampleRate = 1;
Chris@4 204 d.hasDuration = false;
Chris@13 205 m_outputs[d.identifier] = list.size();
Chris@13 206 list.push_back(d);
Chris@13 207
Chris@13 208 d.identifier = "otherfeature";
Chris@13 209 d.name = "Other Feature";
Chris@13 210 d.description = "Chroma feature from other audio, before rotation.";
Chris@13 211 d.unit = "";
Chris@13 212 d.hasFixedBinCount = true;
Chris@13 213 d.binCount = m_bpo;
Chris@13 214 d.hasKnownExtents = false;
Chris@13 215 d.isQuantized = false;
Chris@13 216 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@13 217 d.sampleRate = 1;
Chris@13 218 d.hasDuration = false;
Chris@13 219 m_outputs[d.identifier] = list.size();
Chris@13 220 list.push_back(d);
Chris@13 221
Chris@13 222 d.identifier = "rotfeature";
Chris@13 223 d.name = "Other Feature at Rotated Frequency";
Chris@13 224 d.description = "Chroma feature from reference audio calculated with the tuning frequency obtained from rotation matching.";
Chris@13 225 d.unit = "";
Chris@13 226 d.hasFixedBinCount = true;
Chris@13 227 d.binCount = m_bpo;
Chris@13 228 d.hasKnownExtents = false;
Chris@13 229 d.isQuantized = false;
Chris@13 230 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@13 231 d.sampleRate = 1;
Chris@13 232 d.hasDuration = false;
Chris@13 233 m_outputs[d.identifier] = list.size();
Chris@4 234 list.push_back(d);
Chris@4 235
Chris@0 236 return list;
Chris@0 237 }
Chris@0 238
Chris@0 239 bool
Chris@0 240 TuningDifference::initialise(size_t channels, size_t stepSize, size_t blockSize)
Chris@0 241 {
Chris@0 242 if (channels < getMinChannelCount() ||
Chris@0 243 channels > getMaxChannelCount()) return false;
Chris@0 244
Chris@13 245 if (stepSize != blockSize) return false;
Chris@0 246
Chris@1 247 m_blockSize = blockSize;
Chris@1 248
Chris@1 249 reset();
Chris@1 250
Chris@0 251 return true;
Chris@0 252 }
Chris@0 253
Chris@0 254 void
Chris@0 255 TuningDifference::reset()
Chris@0 256 {
Chris@13 257 if (m_frameCount > 0) {
Chris@15 258 m_refChroma.reset(new Chromagram(paramsForTuningFrequency(440.)));
Chris@13 259 m_frameCount = 0;
Chris@13 260 }
Chris@15 261 m_refTotals = TFeature(m_bpo, 0.0);
Chris@13 262 m_other.clear();
Chris@13 263 }
Chris@13 264
Chris@13 265 template<typename T>
Chris@13 266 void addTo(vector<T> &a, const vector<T> &b)
Chris@13 267 {
Chris@13 268 transform(a.begin(), a.end(), b.begin(), a.begin(), plus<T>());
Chris@13 269 }
Chris@13 270
Chris@13 271 template<typename T>
Chris@13 272 T distance(const vector<T> &a, const vector<T> &b)
Chris@13 273 {
Chris@13 274 return inner_product(a.begin(), a.end(), b.begin(), T(),
Chris@13 275 plus<T>(), [](T x, T y) { return fabs(x - y); });
Chris@13 276 }
Chris@13 277
Chris@13 278 TuningDifference::TFeature
Chris@15 279 TuningDifference::computeFeatureFromTotals(const TFeature &totals) const
Chris@13 280 {
Chris@16 281 if (m_frameCount == 0) return totals;
Chris@16 282
Chris@13 283 TFeature feature(m_bpo);
Chris@13 284 double sum = 0.0;
Chris@16 285
Chris@15 286 for (int i = 0; i < m_bpo; ++i) {
Chris@13 287 double value = totals[i] / m_frameCount;
Chris@15 288 feature[i] += value;
Chris@13 289 sum += value;
Chris@13 290 }
Chris@13 291
Chris@13 292 for (int i = 0; i < m_bpo; ++i) {
Chris@13 293 feature[i] /= sum;
Chris@13 294 }
Chris@13 295
Chris@25 296 // cerr << "computeFeatureFromTotals: feature values:" << endl;
Chris@25 297 // for (auto v: feature) cerr << v << " ";
Chris@25 298 // cerr << endl;
Chris@13 299
Chris@13 300 return feature;
Chris@13 301 }
Chris@13 302
Chris@15 303 Chromagram::Parameters
Chris@13 304 TuningDifference::paramsForTuningFrequency(double hz) const
Chris@13 305 {
Chris@15 306 Chromagram::Parameters params(m_inputSampleRate);
Chris@24 307 params.lowestOctave = 2;
Chris@24 308 params.octaveCount = 4;
Chris@20 309 params.binsPerOctave = m_bpo;
Chris@15 310 params.tuningFrequency = hz;
Chris@20 311 params.atomHopFactor = 0.5;
Chris@24 312 params.window = CQParameters::Hann;
Chris@15 313 return params;
Chris@13 314 }
Chris@13 315
Chris@13 316 TuningDifference::TFeature
Chris@13 317 TuningDifference::computeFeatureFromSignal(const Signal &signal, double hz) const
Chris@13 318 {
Chris@15 319 Chromagram chromagram(paramsForTuningFrequency(hz));
Chris@13 320
Chris@15 321 TFeature totals(m_bpo, 0.0);
Chris@16 322
Chris@16 323 cerr << "computeFeatureFromSignal: hz = " << hz << ", frame count = " << m_frameCount << endl;
Chris@13 324
Chris@13 325 for (int i = 0; i < m_frameCount; ++i) {
Chris@13 326 Signal::const_iterator first = signal.begin() + i * m_blockSize;
Chris@13 327 Signal::const_iterator last = first + m_blockSize;
Chris@13 328 if (last > signal.end()) last = signal.end();
Chris@15 329 CQBase::RealSequence input(first, last);
Chris@13 330 input.resize(m_blockSize);
Chris@15 331 CQBase::RealBlock block = chromagram.process(input);
Chris@13 332 for (const auto &v: block) addTo(totals, v);
Chris@13 333 }
Chris@13 334
Chris@13 335 return computeFeatureFromTotals(totals);
Chris@0 336 }
Chris@0 337
Chris@0 338 TuningDifference::FeatureSet
Chris@13 339 TuningDifference::process(const float *const *inputBuffers, Vamp::RealTime)
Chris@0 340 {
Chris@15 341 CQBase::RealBlock block;
Chris@15 342 CQBase::RealSequence input;
Chris@13 343
Chris@15 344 input = CQBase::RealSequence
Chris@13 345 (inputBuffers[0], inputBuffers[0] + m_blockSize);
Chris@15 346 block = m_refChroma->process(input);
Chris@13 347 for (const auto &v: block) addTo(m_refTotals, v);
Chris@13 348
Chris@13 349 m_other.insert(m_other.end(),
Chris@13 350 inputBuffers[1], inputBuffers[1] + m_blockSize);
Chris@1 351
Chris@1 352 ++m_frameCount;
Chris@0 353 return FeatureSet();
Chris@0 354 }
Chris@0 355
Chris@13 356 double
Chris@13 357 TuningDifference::featureDistance(const TFeature &other, int rotation) const
Chris@13 358 {
Chris@13 359 if (rotation == 0) {
Chris@13 360 return distance(m_refFeature, other);
Chris@13 361 } else {
Chris@15 362 // A positive rotation pushes the tuning frequency up for this
Chris@15 363 // chroma, negative one pulls it down. If a positive rotation
Chris@15 364 // makes this chroma match an un-rotated reference, then this
Chris@15 365 // chroma must have initially been lower than the reference.
Chris@13 366 TFeature r(other);
Chris@15 367 if (rotation < 0) {
Chris@15 368 rotate(r.begin(), r.begin() - rotation, r.end());
Chris@13 369 } else {
Chris@15 370 rotate(r.begin(), r.end() - rotation, r.end());
Chris@13 371 }
Chris@13 372 return distance(m_refFeature, r);
Chris@13 373 }
Chris@13 374 }
Chris@13 375
Chris@13 376 int
Chris@13 377 TuningDifference::findBestRotation(const TFeature &other) const
Chris@13 378 {
Chris@13 379 map<double, int> dists;
Chris@13 380
Chris@13 381 int maxSemis = 6;
Chris@13 382 int maxRotation = (m_bpo * maxSemis) / 12;
Chris@13 383
Chris@13 384 for (int r = -maxRotation; r <= maxRotation; ++r) {
Chris@13 385 double dist = featureDistance(other, r);
Chris@13 386 dists[dist] = r;
Chris@25 387 // cerr << "rotation " << r << ": score " << dist << endl;
Chris@13 388 }
Chris@13 389
Chris@13 390 int best = dists.begin()->second;
Chris@13 391
Chris@25 392 // cerr << "best is " << best << endl;
Chris@13 393 return best;
Chris@13 394 }
Chris@13 395
Chris@19 396 pair<int, double>
Chris@16 397 TuningDifference::findFineFrequency(int coarseCents, double coarseScore)
Chris@16 398 {
Chris@16 399 int coarseResolution = 1200 / m_bpo;
Chris@16 400 int searchDistance = coarseResolution/2 - 1;
Chris@16 401
Chris@16 402 double bestScore = coarseScore;
Chris@19 403 int bestCents = coarseCents;
Chris@16 404 double bestHz = frequencyForCentsAbove440(coarseCents);
Chris@16 405
Chris@16 406 cerr << "corresponding coarse Hz " << bestHz << " scores " << coarseScore << endl;
Chris@16 407 cerr << "searchDistance = " << searchDistance << endl;
Chris@16 408
Chris@16 409 for (int sign = -1; sign <= 1; sign += 2) {
Chris@16 410 for (int offset = 1; offset <= searchDistance; ++offset) {
Chris@16 411
Chris@16 412 int fineCents = coarseCents + sign * offset;
Chris@16 413
Chris@16 414 cerr << "trying with fineCents = " << fineCents << "..." << endl;
Chris@16 415
Chris@16 416 double fineHz = frequencyForCentsAbove440(fineCents);
Chris@16 417 TFeature fineFeature = computeFeatureFromSignal(m_other, fineHz);
Chris@16 418 double fineScore = featureDistance(fineFeature);
Chris@16 419
Chris@16 420 cerr << "fine offset = " << offset << ", cents = " << fineCents
Chris@16 421 << ", Hz = " << fineHz << ", score " << fineScore
Chris@16 422 << " (best score so far " << bestScore << ")" << endl;
Chris@16 423
Chris@16 424 if (fineScore < bestScore) {
Chris@16 425 cerr << "is good!" << endl;
Chris@16 426 bestScore = fineScore;
Chris@19 427 bestCents = fineCents;
Chris@17 428 bestHz = fineHz;
Chris@16 429 } else {
Chris@16 430 break;
Chris@16 431 }
Chris@16 432 }
Chris@16 433 }
Chris@16 434
Chris@20 435 //!!! could keep a vector of scores & then interpolate...
Chris@20 436
Chris@19 437 return pair<int, double>(bestCents, bestHz);
Chris@16 438 }
Chris@16 439
Chris@0 440 TuningDifference::FeatureSet
Chris@0 441 TuningDifference::getRemainingFeatures()
Chris@0 442 {
Chris@13 443 FeatureSet fs;
Chris@13 444 if (m_frameCount == 0) return fs;
Chris@13 445
Chris@13 446 m_refFeature = computeFeatureFromTotals(m_refTotals);
Chris@13 447 TFeature otherFeature = computeFeatureFromSignal(m_other, 440.);
Chris@1 448
Chris@1 449 Feature f;
Chris@1 450
Chris@4 451 f.values.clear();
Chris@13 452 for (auto v: m_refFeature) f.values.push_back(v);
Chris@13 453 fs[m_outputs["reffeature"]].push_back(f);
Chris@4 454
Chris@4 455 f.values.clear();
Chris@13 456 for (auto v: otherFeature) f.values.push_back(v);
Chris@13 457 fs[m_outputs["otherfeature"]].push_back(f);
Chris@13 458
Chris@13 459 int rotation = findBestRotation(otherFeature);
Chris@13 460
Chris@16 461 int coarseCents = -(rotation * 1200) / m_bpo;
Chris@13 462
Chris@13 463 cerr << "rotation " << rotation << " -> cents " << coarseCents << endl;
Chris@13 464
Chris@13 465 double coarseHz = frequencyForCentsAbove440(coarseCents);
Chris@13 466
Chris@24 467 TFeature coarseFeature;
Chris@24 468 if (rotation == 0) {
Chris@24 469 coarseFeature = otherFeature;
Chris@24 470 } else {
Chris@24 471 coarseFeature = computeFeatureFromSignal(m_other, coarseHz);
Chris@24 472 }
Chris@13 473 double coarseScore = featureDistance(coarseFeature);
Chris@13 474
Chris@13 475 cerr << "corresponding Hz " << coarseHz << " scores " << coarseScore << endl;
Chris@4 476
Chris@19 477 //!!! This should be returning the fine chroma, not the coarse
Chris@4 478 f.values.clear();
Chris@13 479 for (auto v: coarseFeature) f.values.push_back(v);
Chris@13 480 fs[m_outputs["rotfeature"]].push_back(f);
Chris@16 481
Chris@19 482 pair<int, double> fine = findFineFrequency(coarseCents, coarseScore);
Chris@19 483 int fineCents = fine.first;
Chris@19 484 double fineHz = fine.second;
Chris@16 485
Chris@19 486 f.values.clear();
Chris@19 487 f.values.push_back(fineHz);
Chris@19 488 fs[m_outputs["tuningfreq"]].push_back(f);
Chris@19 489
Chris@19 490 f.values.clear();
Chris@19 491 f.values.push_back(fineCents);
Chris@19 492 fs[m_outputs["cents"]].push_back(f);
Chris@19 493
Chris@16 494 cerr << "overall best Hz = " << fineHz << endl;
Chris@4 495
Chris@1 496 return fs;
Chris@0 497 }
Chris@0 498