annotate src/TuningDifference.cpp @ 29:409ff482cb30

Add max-duration parameter. The results are often going to be quite wrong if you use this, simply because the two performances may be at different speeds and so the same duration of each may represent a different (sub-)performance. But it's sometimes necessary just to avoid blowing the thing up.
author Chris Cannam
date Fri, 27 Mar 2015 17:20:40 +0000
parents c21ce05afbe4
children 86695c191896
rev   line source
Chris@21 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@21 2
Chris@21 3 /*
Chris@21 4 Centre for Digital Music, Queen Mary University of London.
Chris@21 5
Chris@21 6 This program is free software; you can redistribute it and/or
Chris@21 7 modify it under the terms of the GNU General Public License as
Chris@21 8 published by the Free Software Foundation; either version 2 of the
Chris@21 9 License, or (at your option) any later version. See the file
Chris@21 10 COPYING included with this distribution for more information.
Chris@21 11 */
Chris@0 12
Chris@0 13 #include "TuningDifference.h"
Chris@0 14
Chris@1 15 #include <iostream>
Chris@1 16
Chris@4 17 #include <cmath>
Chris@4 18 #include <cstdio>
Chris@4 19
Chris@13 20 #include <algorithm>
Chris@1 21
Chris@13 22 using namespace std;
Chris@13 23
Chris@13 24 static double pitchToFrequency(int pitch,
Chris@13 25 double centsOffset = 0.,
Chris@13 26 double concertA = 440.)
Chris@13 27 {
Chris@13 28 double p = double(pitch) + (centsOffset / 100.);
Chris@13 29 return concertA * pow(2.0, (p - 69.0) / 12.0);
Chris@13 30 }
Chris@13 31
Chris@13 32 static double frequencyForCentsAbove440(double cents)
Chris@13 33 {
Chris@13 34 return pitchToFrequency(69, cents, 440.);
Chris@13 35 }
Chris@5 36
Chris@29 37 static float defaultMaxDuration = 0;
Chris@29 38
Chris@0 39 TuningDifference::TuningDifference(float inputSampleRate) :
Chris@13 40 Plugin(inputSampleRate),
Chris@13 41 m_bpo(60),
Chris@18 42 m_refChroma(new Chromagram(paramsForTuningFrequency(440.))),
Chris@18 43 m_blockSize(0),
Chris@29 44 m_frameCount(0),
Chris@29 45 m_maxDuration(defaultMaxDuration)
Chris@0 46 {
Chris@0 47 }
Chris@0 48
Chris@0 49 TuningDifference::~TuningDifference()
Chris@0 50 {
Chris@0 51 }
Chris@0 52
Chris@0 53 string
Chris@0 54 TuningDifference::getIdentifier() const
Chris@0 55 {
Chris@1 56 return "tuning-difference";
Chris@0 57 }
Chris@0 58
Chris@0 59 string
Chris@0 60 TuningDifference::getName() const
Chris@0 61 {
Chris@1 62 return "Tuning Difference";
Chris@0 63 }
Chris@0 64
Chris@0 65 string
Chris@0 66 TuningDifference::getDescription() const
Chris@0 67 {
Chris@0 68 // Return something helpful here!
Chris@0 69 return "";
Chris@0 70 }
Chris@0 71
Chris@0 72 string
Chris@0 73 TuningDifference::getMaker() const
Chris@0 74 {
Chris@0 75 // Your name here
Chris@0 76 return "";
Chris@0 77 }
Chris@0 78
Chris@0 79 int
Chris@0 80 TuningDifference::getPluginVersion() const
Chris@0 81 {
Chris@0 82 // Increment this each time you release a version that behaves
Chris@0 83 // differently from the previous one
Chris@0 84 return 1;
Chris@0 85 }
Chris@0 86
Chris@0 87 string
Chris@0 88 TuningDifference::getCopyright() const
Chris@0 89 {
Chris@0 90 // This function is not ideally named. It does not necessarily
Chris@0 91 // need to say who made the plugin -- getMaker does that -- but it
Chris@0 92 // should indicate the terms under which it is distributed. For
Chris@0 93 // example, "Copyright (year). All Rights Reserved", or "GPL"
Chris@0 94 return "";
Chris@0 95 }
Chris@0 96
Chris@0 97 TuningDifference::InputDomain
Chris@0 98 TuningDifference::getInputDomain() const
Chris@0 99 {
Chris@13 100 return TimeDomain;
Chris@0 101 }
Chris@0 102
Chris@0 103 size_t
Chris@0 104 TuningDifference::getPreferredBlockSize() const
Chris@0 105 {
Chris@13 106 return 0;
Chris@0 107 }
Chris@0 108
Chris@0 109 size_t
Chris@0 110 TuningDifference::getPreferredStepSize() const
Chris@0 111 {
Chris@1 112 return 0;
Chris@0 113 }
Chris@0 114
Chris@0 115 size_t
Chris@0 116 TuningDifference::getMinChannelCount() const
Chris@0 117 {
Chris@1 118 return 2;
Chris@0 119 }
Chris@0 120
Chris@0 121 size_t
Chris@0 122 TuningDifference::getMaxChannelCount() const
Chris@0 123 {
Chris@1 124 return 2;
Chris@0 125 }
Chris@0 126
Chris@0 127 TuningDifference::ParameterList
Chris@0 128 TuningDifference::getParameterDescriptors() const
Chris@0 129 {
Chris@0 130 ParameterList list;
Chris@29 131
Chris@29 132 ParameterDescriptor desc;
Chris@29 133
Chris@29 134 desc.identifier = "maxduration";
Chris@29 135 desc.name = "Maximum duration to analyse";
Chris@29 136 desc.description = "The maximum duration (in seconds) to consider from either input file. Zero means there is no limit.";
Chris@29 137 desc.minValue = 0;
Chris@29 138 desc.maxValue = 3600;
Chris@29 139 desc.defaultValue = defaultMaxDuration;
Chris@29 140 desc.isQuantized = false;
Chris@29 141 desc.unit = "s";
Chris@29 142 list.push_back(desc);
Chris@29 143
Chris@20 144 //!!! parameter: max search range
Chris@20 145 //!!! parameter: fine search precision
Chris@29 146
Chris@0 147 return list;
Chris@0 148 }
Chris@0 149
Chris@0 150 float
Chris@29 151 TuningDifference::getParameter(string id) const
Chris@0 152 {
Chris@29 153 if (id == "maxduration") {
Chris@29 154 return m_maxDuration;
Chris@29 155 }
Chris@0 156 return 0;
Chris@0 157 }
Chris@0 158
Chris@0 159 void
Chris@29 160 TuningDifference::setParameter(string id, float value)
Chris@0 161 {
Chris@29 162 if (id == "maxduration") {
Chris@29 163 m_maxDuration = value;
Chris@29 164 }
Chris@0 165 }
Chris@0 166
Chris@0 167 TuningDifference::ProgramList
Chris@0 168 TuningDifference::getPrograms() const
Chris@0 169 {
Chris@0 170 ProgramList list;
Chris@0 171 return list;
Chris@0 172 }
Chris@0 173
Chris@0 174 string
Chris@0 175 TuningDifference::getCurrentProgram() const
Chris@0 176 {
Chris@0 177 return ""; // no programs
Chris@0 178 }
Chris@0 179
Chris@0 180 void
Chris@1 181 TuningDifference::selectProgram(string)
Chris@0 182 {
Chris@0 183 }
Chris@0 184
Chris@0 185 TuningDifference::OutputList
Chris@0 186 TuningDifference::getOutputDescriptors() const
Chris@0 187 {
Chris@0 188 OutputList list;
Chris@0 189
Chris@1 190 OutputDescriptor d;
Chris@1 191 d.identifier = "cents";
Chris@1 192 d.name = "Tuning Difference";
Chris@1 193 d.description = "Difference in averaged frequency profile between channels 1 and 2, in cents. A positive value means channel 2 is higher.";
Chris@1 194 d.unit = "cents";
Chris@1 195 d.hasFixedBinCount = true;
Chris@1 196 d.binCount = 1;
Chris@1 197 d.hasKnownExtents = false;
Chris@1 198 d.isQuantized = false;
Chris@1 199 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@1 200 d.hasDuration = false;
Chris@13 201 m_outputs[d.identifier] = list.size();
Chris@1 202 list.push_back(d);
Chris@0 203
Chris@1 204 d.identifier = "tuningfreq";
Chris@1 205 d.name = "Relative Tuning Frequency";
Chris@1 206 d.description = "Tuning frequency of channel 2, if channel 1 is assumed to contain the same music as it at a tuning frequency of A=440Hz.";
Chris@4 207 d.unit = "hz";
Chris@1 208 d.hasFixedBinCount = true;
Chris@1 209 d.binCount = 1;
Chris@1 210 d.hasKnownExtents = false;
Chris@1 211 d.isQuantized = false;
Chris@1 212 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@1 213 d.hasDuration = false;
Chris@13 214 m_outputs[d.identifier] = list.size();
Chris@1 215 list.push_back(d);
Chris@1 216
Chris@13 217 d.identifier = "reffeature";
Chris@13 218 d.name = "Reference Feature";
Chris@13 219 d.description = "Chroma feature from reference audio.";
Chris@0 220 d.unit = "";
Chris@0 221 d.hasFixedBinCount = true;
Chris@13 222 d.binCount = m_bpo;
Chris@4 223 d.hasKnownExtents = false;
Chris@4 224 d.isQuantized = false;
Chris@4 225 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@5 226 d.sampleRate = 1;
Chris@4 227 d.hasDuration = false;
Chris@13 228 m_outputs[d.identifier] = list.size();
Chris@13 229 list.push_back(d);
Chris@13 230
Chris@13 231 d.identifier = "otherfeature";
Chris@13 232 d.name = "Other Feature";
Chris@13 233 d.description = "Chroma feature from other audio, before rotation.";
Chris@13 234 d.unit = "";
Chris@13 235 d.hasFixedBinCount = true;
Chris@13 236 d.binCount = m_bpo;
Chris@13 237 d.hasKnownExtents = false;
Chris@13 238 d.isQuantized = false;
Chris@13 239 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@13 240 d.sampleRate = 1;
Chris@13 241 d.hasDuration = false;
Chris@13 242 m_outputs[d.identifier] = list.size();
Chris@13 243 list.push_back(d);
Chris@13 244
Chris@13 245 d.identifier = "rotfeature";
Chris@13 246 d.name = "Other Feature at Rotated Frequency";
Chris@13 247 d.description = "Chroma feature from reference audio calculated with the tuning frequency obtained from rotation matching.";
Chris@13 248 d.unit = "";
Chris@13 249 d.hasFixedBinCount = true;
Chris@13 250 d.binCount = m_bpo;
Chris@13 251 d.hasKnownExtents = false;
Chris@13 252 d.isQuantized = false;
Chris@13 253 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@13 254 d.sampleRate = 1;
Chris@13 255 d.hasDuration = false;
Chris@13 256 m_outputs[d.identifier] = list.size();
Chris@4 257 list.push_back(d);
Chris@4 258
Chris@0 259 return list;
Chris@0 260 }
Chris@0 261
Chris@0 262 bool
Chris@0 263 TuningDifference::initialise(size_t channels, size_t stepSize, size_t blockSize)
Chris@0 264 {
Chris@0 265 if (channels < getMinChannelCount() ||
Chris@0 266 channels > getMaxChannelCount()) return false;
Chris@0 267
Chris@13 268 if (stepSize != blockSize) return false;
Chris@0 269
Chris@1 270 m_blockSize = blockSize;
Chris@1 271
Chris@1 272 reset();
Chris@1 273
Chris@0 274 return true;
Chris@0 275 }
Chris@0 276
Chris@0 277 void
Chris@0 278 TuningDifference::reset()
Chris@0 279 {
Chris@13 280 if (m_frameCount > 0) {
Chris@15 281 m_refChroma.reset(new Chromagram(paramsForTuningFrequency(440.)));
Chris@13 282 m_frameCount = 0;
Chris@13 283 }
Chris@15 284 m_refTotals = TFeature(m_bpo, 0.0);
Chris@13 285 m_other.clear();
Chris@13 286 }
Chris@13 287
Chris@13 288 template<typename T>
Chris@13 289 void addTo(vector<T> &a, const vector<T> &b)
Chris@13 290 {
Chris@13 291 transform(a.begin(), a.end(), b.begin(), a.begin(), plus<T>());
Chris@13 292 }
Chris@13 293
Chris@13 294 template<typename T>
Chris@13 295 T distance(const vector<T> &a, const vector<T> &b)
Chris@13 296 {
Chris@13 297 return inner_product(a.begin(), a.end(), b.begin(), T(),
Chris@13 298 plus<T>(), [](T x, T y) { return fabs(x - y); });
Chris@13 299 }
Chris@13 300
Chris@13 301 TuningDifference::TFeature
Chris@15 302 TuningDifference::computeFeatureFromTotals(const TFeature &totals) const
Chris@13 303 {
Chris@16 304 if (m_frameCount == 0) return totals;
Chris@16 305
Chris@13 306 TFeature feature(m_bpo);
Chris@13 307 double sum = 0.0;
Chris@16 308
Chris@15 309 for (int i = 0; i < m_bpo; ++i) {
Chris@13 310 double value = totals[i] / m_frameCount;
Chris@15 311 feature[i] += value;
Chris@13 312 sum += value;
Chris@13 313 }
Chris@13 314
Chris@13 315 for (int i = 0; i < m_bpo; ++i) {
Chris@13 316 feature[i] /= sum;
Chris@13 317 }
Chris@13 318
Chris@25 319 // cerr << "computeFeatureFromTotals: feature values:" << endl;
Chris@25 320 // for (auto v: feature) cerr << v << " ";
Chris@25 321 // cerr << endl;
Chris@13 322
Chris@13 323 return feature;
Chris@13 324 }
Chris@13 325
Chris@15 326 Chromagram::Parameters
Chris@13 327 TuningDifference::paramsForTuningFrequency(double hz) const
Chris@13 328 {
Chris@15 329 Chromagram::Parameters params(m_inputSampleRate);
Chris@24 330 params.lowestOctave = 2;
Chris@24 331 params.octaveCount = 4;
Chris@20 332 params.binsPerOctave = m_bpo;
Chris@15 333 params.tuningFrequency = hz;
Chris@20 334 params.atomHopFactor = 0.5;
Chris@24 335 params.window = CQParameters::Hann;
Chris@15 336 return params;
Chris@13 337 }
Chris@13 338
Chris@13 339 TuningDifference::TFeature
Chris@13 340 TuningDifference::computeFeatureFromSignal(const Signal &signal, double hz) const
Chris@13 341 {
Chris@15 342 Chromagram chromagram(paramsForTuningFrequency(hz));
Chris@13 343
Chris@15 344 TFeature totals(m_bpo, 0.0);
Chris@16 345
Chris@16 346 cerr << "computeFeatureFromSignal: hz = " << hz << ", frame count = " << m_frameCount << endl;
Chris@13 347
Chris@13 348 for (int i = 0; i < m_frameCount; ++i) {
Chris@13 349 Signal::const_iterator first = signal.begin() + i * m_blockSize;
Chris@13 350 Signal::const_iterator last = first + m_blockSize;
Chris@13 351 if (last > signal.end()) last = signal.end();
Chris@15 352 CQBase::RealSequence input(first, last);
Chris@13 353 input.resize(m_blockSize);
Chris@15 354 CQBase::RealBlock block = chromagram.process(input);
Chris@13 355 for (const auto &v: block) addTo(totals, v);
Chris@13 356 }
Chris@13 357
Chris@13 358 return computeFeatureFromTotals(totals);
Chris@0 359 }
Chris@0 360
Chris@0 361 TuningDifference::FeatureSet
Chris@13 362 TuningDifference::process(const float *const *inputBuffers, Vamp::RealTime)
Chris@0 363 {
Chris@29 364 if (m_maxDuration > 0) {
Chris@29 365 int maxFrames = (m_maxDuration * m_inputSampleRate) / m_blockSize;
Chris@29 366 if (m_frameCount > maxFrames) return FeatureSet();
Chris@29 367 }
Chris@29 368
Chris@15 369 CQBase::RealBlock block;
Chris@15 370 CQBase::RealSequence input;
Chris@13 371
Chris@15 372 input = CQBase::RealSequence
Chris@13 373 (inputBuffers[0], inputBuffers[0] + m_blockSize);
Chris@15 374 block = m_refChroma->process(input);
Chris@13 375 for (const auto &v: block) addTo(m_refTotals, v);
Chris@13 376
Chris@13 377 m_other.insert(m_other.end(),
Chris@13 378 inputBuffers[1], inputBuffers[1] + m_blockSize);
Chris@1 379
Chris@1 380 ++m_frameCount;
Chris@0 381 return FeatureSet();
Chris@0 382 }
Chris@0 383
Chris@13 384 double
Chris@13 385 TuningDifference::featureDistance(const TFeature &other, int rotation) const
Chris@13 386 {
Chris@13 387 if (rotation == 0) {
Chris@13 388 return distance(m_refFeature, other);
Chris@13 389 } else {
Chris@15 390 // A positive rotation pushes the tuning frequency up for this
Chris@15 391 // chroma, negative one pulls it down. If a positive rotation
Chris@15 392 // makes this chroma match an un-rotated reference, then this
Chris@15 393 // chroma must have initially been lower than the reference.
Chris@13 394 TFeature r(other);
Chris@15 395 if (rotation < 0) {
Chris@15 396 rotate(r.begin(), r.begin() - rotation, r.end());
Chris@13 397 } else {
Chris@15 398 rotate(r.begin(), r.end() - rotation, r.end());
Chris@13 399 }
Chris@13 400 return distance(m_refFeature, r);
Chris@13 401 }
Chris@13 402 }
Chris@13 403
Chris@13 404 int
Chris@13 405 TuningDifference::findBestRotation(const TFeature &other) const
Chris@13 406 {
Chris@13 407 map<double, int> dists;
Chris@13 408
Chris@13 409 int maxSemis = 6;
Chris@13 410 int maxRotation = (m_bpo * maxSemis) / 12;
Chris@13 411
Chris@13 412 for (int r = -maxRotation; r <= maxRotation; ++r) {
Chris@13 413 double dist = featureDistance(other, r);
Chris@13 414 dists[dist] = r;
Chris@25 415 // cerr << "rotation " << r << ": score " << dist << endl;
Chris@13 416 }
Chris@13 417
Chris@13 418 int best = dists.begin()->second;
Chris@13 419
Chris@25 420 // cerr << "best is " << best << endl;
Chris@13 421 return best;
Chris@13 422 }
Chris@13 423
Chris@19 424 pair<int, double>
Chris@16 425 TuningDifference::findFineFrequency(int coarseCents, double coarseScore)
Chris@16 426 {
Chris@16 427 int coarseResolution = 1200 / m_bpo;
Chris@16 428 int searchDistance = coarseResolution/2 - 1;
Chris@16 429
Chris@16 430 double bestScore = coarseScore;
Chris@19 431 int bestCents = coarseCents;
Chris@16 432 double bestHz = frequencyForCentsAbove440(coarseCents);
Chris@16 433
Chris@16 434 cerr << "corresponding coarse Hz " << bestHz << " scores " << coarseScore << endl;
Chris@16 435 cerr << "searchDistance = " << searchDistance << endl;
Chris@16 436
Chris@16 437 for (int sign = -1; sign <= 1; sign += 2) {
Chris@16 438 for (int offset = 1; offset <= searchDistance; ++offset) {
Chris@16 439
Chris@16 440 int fineCents = coarseCents + sign * offset;
Chris@16 441
Chris@16 442 cerr << "trying with fineCents = " << fineCents << "..." << endl;
Chris@16 443
Chris@16 444 double fineHz = frequencyForCentsAbove440(fineCents);
Chris@16 445 TFeature fineFeature = computeFeatureFromSignal(m_other, fineHz);
Chris@16 446 double fineScore = featureDistance(fineFeature);
Chris@16 447
Chris@16 448 cerr << "fine offset = " << offset << ", cents = " << fineCents
Chris@16 449 << ", Hz = " << fineHz << ", score " << fineScore
Chris@16 450 << " (best score so far " << bestScore << ")" << endl;
Chris@16 451
Chris@16 452 if (fineScore < bestScore) {
Chris@16 453 cerr << "is good!" << endl;
Chris@16 454 bestScore = fineScore;
Chris@19 455 bestCents = fineCents;
Chris@17 456 bestHz = fineHz;
Chris@16 457 } else {
Chris@16 458 break;
Chris@16 459 }
Chris@16 460 }
Chris@16 461 }
Chris@16 462
Chris@20 463 //!!! could keep a vector of scores & then interpolate...
Chris@20 464
Chris@19 465 return pair<int, double>(bestCents, bestHz);
Chris@16 466 }
Chris@16 467
Chris@0 468 TuningDifference::FeatureSet
Chris@0 469 TuningDifference::getRemainingFeatures()
Chris@0 470 {
Chris@13 471 FeatureSet fs;
Chris@13 472 if (m_frameCount == 0) return fs;
Chris@13 473
Chris@13 474 m_refFeature = computeFeatureFromTotals(m_refTotals);
Chris@13 475 TFeature otherFeature = computeFeatureFromSignal(m_other, 440.);
Chris@1 476
Chris@1 477 Feature f;
Chris@1 478
Chris@4 479 f.values.clear();
Chris@13 480 for (auto v: m_refFeature) f.values.push_back(v);
Chris@13 481 fs[m_outputs["reffeature"]].push_back(f);
Chris@4 482
Chris@4 483 f.values.clear();
Chris@13 484 for (auto v: otherFeature) f.values.push_back(v);
Chris@13 485 fs[m_outputs["otherfeature"]].push_back(f);
Chris@13 486
Chris@13 487 int rotation = findBestRotation(otherFeature);
Chris@13 488
Chris@16 489 int coarseCents = -(rotation * 1200) / m_bpo;
Chris@13 490
Chris@13 491 cerr << "rotation " << rotation << " -> cents " << coarseCents << endl;
Chris@13 492
Chris@13 493 double coarseHz = frequencyForCentsAbove440(coarseCents);
Chris@13 494
Chris@24 495 TFeature coarseFeature;
Chris@24 496 if (rotation == 0) {
Chris@24 497 coarseFeature = otherFeature;
Chris@24 498 } else {
Chris@24 499 coarseFeature = computeFeatureFromSignal(m_other, coarseHz);
Chris@24 500 }
Chris@13 501 double coarseScore = featureDistance(coarseFeature);
Chris@13 502
Chris@13 503 cerr << "corresponding Hz " << coarseHz << " scores " << coarseScore << endl;
Chris@4 504
Chris@19 505 //!!! This should be returning the fine chroma, not the coarse
Chris@4 506 f.values.clear();
Chris@13 507 for (auto v: coarseFeature) f.values.push_back(v);
Chris@13 508 fs[m_outputs["rotfeature"]].push_back(f);
Chris@16 509
Chris@19 510 pair<int, double> fine = findFineFrequency(coarseCents, coarseScore);
Chris@19 511 int fineCents = fine.first;
Chris@19 512 double fineHz = fine.second;
Chris@16 513
Chris@19 514 f.values.clear();
Chris@19 515 f.values.push_back(fineHz);
Chris@19 516 fs[m_outputs["tuningfreq"]].push_back(f);
Chris@19 517
Chris@19 518 f.values.clear();
Chris@19 519 f.values.push_back(fineCents);
Chris@19 520 fs[m_outputs["cents"]].push_back(f);
Chris@19 521
Chris@16 522 cerr << "overall best Hz = " << fineHz << endl;
Chris@4 523
Chris@1 524 return fs;
Chris@0 525 }
Chris@0 526