annotate src/TuningDifference.cpp @ 31:52598a8fad5b

Subrepo update
author Chris Cannam
date Fri, 17 Jul 2015 15:50:02 +0100
parents 409ff482cb30
children 86695c191896
rev   line source
Chris@21 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@21 2
Chris@21 3 /*
Chris@21 4 Centre for Digital Music, Queen Mary University of London.
Chris@21 5
Chris@21 6 This program is free software; you can redistribute it and/or
Chris@21 7 modify it under the terms of the GNU General Public License as
Chris@21 8 published by the Free Software Foundation; either version 2 of the
Chris@21 9 License, or (at your option) any later version. See the file
Chris@21 10 COPYING included with this distribution for more information.
Chris@21 11 */
Chris@0 12
Chris@0 13 #include "TuningDifference.h"
Chris@0 14
Chris@1 15 #include <iostream>
Chris@1 16
Chris@4 17 #include <cmath>
Chris@4 18 #include <cstdio>
Chris@4 19
Chris@13 20 #include <algorithm>
Chris@1 21
Chris@13 22 using namespace std;
Chris@13 23
Chris@13 24 static double pitchToFrequency(int pitch,
Chris@13 25 double centsOffset = 0.,
Chris@13 26 double concertA = 440.)
Chris@13 27 {
Chris@13 28 double p = double(pitch) + (centsOffset / 100.);
Chris@13 29 return concertA * pow(2.0, (p - 69.0) / 12.0);
Chris@13 30 }
Chris@13 31
Chris@13 32 static double frequencyForCentsAbove440(double cents)
Chris@13 33 {
Chris@13 34 return pitchToFrequency(69, cents, 440.);
Chris@13 35 }
Chris@5 36
Chris@29 37 static float defaultMaxDuration = 0;
Chris@29 38
Chris@0 39 TuningDifference::TuningDifference(float inputSampleRate) :
Chris@13 40 Plugin(inputSampleRate),
Chris@13 41 m_bpo(60),
Chris@18 42 m_refChroma(new Chromagram(paramsForTuningFrequency(440.))),
Chris@18 43 m_blockSize(0),
Chris@29 44 m_frameCount(0),
Chris@29 45 m_maxDuration(defaultMaxDuration)
Chris@0 46 {
Chris@0 47 }
Chris@0 48
Chris@0 49 TuningDifference::~TuningDifference()
Chris@0 50 {
Chris@0 51 }
Chris@0 52
Chris@0 53 string
Chris@0 54 TuningDifference::getIdentifier() const
Chris@0 55 {
Chris@1 56 return "tuning-difference";
Chris@0 57 }
Chris@0 58
Chris@0 59 string
Chris@0 60 TuningDifference::getName() const
Chris@0 61 {
Chris@1 62 return "Tuning Difference";
Chris@0 63 }
Chris@0 64
Chris@0 65 string
Chris@0 66 TuningDifference::getDescription() const
Chris@0 67 {
Chris@0 68 // Return something helpful here!
Chris@0 69 return "";
Chris@0 70 }
Chris@0 71
Chris@0 72 string
Chris@0 73 TuningDifference::getMaker() const
Chris@0 74 {
Chris@0 75 // Your name here
Chris@0 76 return "";
Chris@0 77 }
Chris@0 78
Chris@0 79 int
Chris@0 80 TuningDifference::getPluginVersion() const
Chris@0 81 {
Chris@0 82 // Increment this each time you release a version that behaves
Chris@0 83 // differently from the previous one
Chris@0 84 return 1;
Chris@0 85 }
Chris@0 86
Chris@0 87 string
Chris@0 88 TuningDifference::getCopyright() const
Chris@0 89 {
Chris@0 90 // This function is not ideally named. It does not necessarily
Chris@0 91 // need to say who made the plugin -- getMaker does that -- but it
Chris@0 92 // should indicate the terms under which it is distributed. For
Chris@0 93 // example, "Copyright (year). All Rights Reserved", or "GPL"
Chris@0 94 return "";
Chris@0 95 }
Chris@0 96
Chris@0 97 TuningDifference::InputDomain
Chris@0 98 TuningDifference::getInputDomain() const
Chris@0 99 {
Chris@13 100 return TimeDomain;
Chris@0 101 }
Chris@0 102
Chris@0 103 size_t
Chris@0 104 TuningDifference::getPreferredBlockSize() const
Chris@0 105 {
Chris@13 106 return 0;
Chris@0 107 }
Chris@0 108
Chris@0 109 size_t
Chris@0 110 TuningDifference::getPreferredStepSize() const
Chris@0 111 {
Chris@1 112 return 0;
Chris@0 113 }
Chris@0 114
Chris@0 115 size_t
Chris@0 116 TuningDifference::getMinChannelCount() const
Chris@0 117 {
Chris@1 118 return 2;
Chris@0 119 }
Chris@0 120
Chris@0 121 size_t
Chris@0 122 TuningDifference::getMaxChannelCount() const
Chris@0 123 {
Chris@1 124 return 2;
Chris@0 125 }
Chris@0 126
Chris@0 127 TuningDifference::ParameterList
Chris@0 128 TuningDifference::getParameterDescriptors() const
Chris@0 129 {
Chris@0 130 ParameterList list;
Chris@29 131
Chris@29 132 ParameterDescriptor desc;
Chris@29 133
Chris@29 134 desc.identifier = "maxduration";
Chris@29 135 desc.name = "Maximum duration to analyse";
Chris@29 136 desc.description = "The maximum duration (in seconds) to consider from either input file. Zero means there is no limit.";
Chris@29 137 desc.minValue = 0;
Chris@29 138 desc.maxValue = 3600;
Chris@29 139 desc.defaultValue = defaultMaxDuration;
Chris@29 140 desc.isQuantized = false;
Chris@29 141 desc.unit = "s";
Chris@29 142 list.push_back(desc);
Chris@29 143
Chris@20 144 //!!! parameter: max search range
Chris@20 145 //!!! parameter: fine search precision
Chris@29 146
Chris@0 147 return list;
Chris@0 148 }
Chris@0 149
Chris@0 150 float
Chris@29 151 TuningDifference::getParameter(string id) const
Chris@0 152 {
Chris@29 153 if (id == "maxduration") {
Chris@29 154 return m_maxDuration;
Chris@29 155 }
Chris@0 156 return 0;
Chris@0 157 }
Chris@0 158
Chris@0 159 void
Chris@29 160 TuningDifference::setParameter(string id, float value)
Chris@0 161 {
Chris@29 162 if (id == "maxduration") {
Chris@29 163 m_maxDuration = value;
Chris@29 164 }
Chris@0 165 }
Chris@0 166
Chris@0 167 TuningDifference::ProgramList
Chris@0 168 TuningDifference::getPrograms() const
Chris@0 169 {
Chris@0 170 ProgramList list;
Chris@0 171 return list;
Chris@0 172 }
Chris@0 173
Chris@0 174 string
Chris@0 175 TuningDifference::getCurrentProgram() const
Chris@0 176 {
Chris@0 177 return ""; // no programs
Chris@0 178 }
Chris@0 179
Chris@0 180 void
Chris@1 181 TuningDifference::selectProgram(string)
Chris@0 182 {
Chris@0 183 }
Chris@0 184
Chris@0 185 TuningDifference::OutputList
Chris@0 186 TuningDifference::getOutputDescriptors() const
Chris@0 187 {
Chris@0 188 OutputList list;
Chris@0 189
Chris@1 190 OutputDescriptor d;
Chris@1 191 d.identifier = "cents";
Chris@1 192 d.name = "Tuning Difference";
Chris@1 193 d.description = "Difference in averaged frequency profile between channels 1 and 2, in cents. A positive value means channel 2 is higher.";
Chris@1 194 d.unit = "cents";
Chris@1 195 d.hasFixedBinCount = true;
Chris@1 196 d.binCount = 1;
Chris@1 197 d.hasKnownExtents = false;
Chris@1 198 d.isQuantized = false;
Chris@1 199 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@1 200 d.hasDuration = false;
Chris@13 201 m_outputs[d.identifier] = list.size();
Chris@1 202 list.push_back(d);
Chris@0 203
Chris@1 204 d.identifier = "tuningfreq";
Chris@1 205 d.name = "Relative Tuning Frequency";
Chris@1 206 d.description = "Tuning frequency of channel 2, if channel 1 is assumed to contain the same music as it at a tuning frequency of A=440Hz.";
Chris@4 207 d.unit = "hz";
Chris@1 208 d.hasFixedBinCount = true;
Chris@1 209 d.binCount = 1;
Chris@1 210 d.hasKnownExtents = false;
Chris@1 211 d.isQuantized = false;
Chris@1 212 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@1 213 d.hasDuration = false;
Chris@13 214 m_outputs[d.identifier] = list.size();
Chris@1 215 list.push_back(d);
Chris@1 216
Chris@13 217 d.identifier = "reffeature";
Chris@13 218 d.name = "Reference Feature";
Chris@13 219 d.description = "Chroma feature from reference audio.";
Chris@0 220 d.unit = "";
Chris@0 221 d.hasFixedBinCount = true;
Chris@13 222 d.binCount = m_bpo;
Chris@4 223 d.hasKnownExtents = false;
Chris@4 224 d.isQuantized = false;
Chris@4 225 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@5 226 d.sampleRate = 1;
Chris@4 227 d.hasDuration = false;
Chris@13 228 m_outputs[d.identifier] = list.size();
Chris@13 229 list.push_back(d);
Chris@13 230
Chris@13 231 d.identifier = "otherfeature";
Chris@13 232 d.name = "Other Feature";
Chris@13 233 d.description = "Chroma feature from other audio, before rotation.";
Chris@13 234 d.unit = "";
Chris@13 235 d.hasFixedBinCount = true;
Chris@13 236 d.binCount = m_bpo;
Chris@13 237 d.hasKnownExtents = false;
Chris@13 238 d.isQuantized = false;
Chris@13 239 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@13 240 d.sampleRate = 1;
Chris@13 241 d.hasDuration = false;
Chris@13 242 m_outputs[d.identifier] = list.size();
Chris@13 243 list.push_back(d);
Chris@13 244
Chris@13 245 d.identifier = "rotfeature";
Chris@13 246 d.name = "Other Feature at Rotated Frequency";
Chris@13 247 d.description = "Chroma feature from reference audio calculated with the tuning frequency obtained from rotation matching.";
Chris@13 248 d.unit = "";
Chris@13 249 d.hasFixedBinCount = true;
Chris@13 250 d.binCount = m_bpo;
Chris@13 251 d.hasKnownExtents = false;
Chris@13 252 d.isQuantized = false;
Chris@13 253 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@13 254 d.sampleRate = 1;
Chris@13 255 d.hasDuration = false;
Chris@13 256 m_outputs[d.identifier] = list.size();
Chris@4 257 list.push_back(d);
Chris@4 258
Chris@0 259 return list;
Chris@0 260 }
Chris@0 261
Chris@0 262 bool
Chris@0 263 TuningDifference::initialise(size_t channels, size_t stepSize, size_t blockSize)
Chris@0 264 {
Chris@0 265 if (channels < getMinChannelCount() ||
Chris@0 266 channels > getMaxChannelCount()) return false;
Chris@0 267
Chris@13 268 if (stepSize != blockSize) return false;
Chris@0 269
Chris@1 270 m_blockSize = blockSize;
Chris@1 271
Chris@1 272 reset();
Chris@1 273
Chris@0 274 return true;
Chris@0 275 }
Chris@0 276
Chris@0 277 void
Chris@0 278 TuningDifference::reset()
Chris@0 279 {
Chris@13 280 if (m_frameCount > 0) {
Chris@15 281 m_refChroma.reset(new Chromagram(paramsForTuningFrequency(440.)));
Chris@13 282 m_frameCount = 0;
Chris@13 283 }
Chris@15 284 m_refTotals = TFeature(m_bpo, 0.0);
Chris@13 285 m_other.clear();
Chris@13 286 }
Chris@13 287
Chris@13 288 template<typename T>
Chris@13 289 void addTo(vector<T> &a, const vector<T> &b)
Chris@13 290 {
Chris@13 291 transform(a.begin(), a.end(), b.begin(), a.begin(), plus<T>());
Chris@13 292 }
Chris@13 293
Chris@13 294 template<typename T>
Chris@13 295 T distance(const vector<T> &a, const vector<T> &b)
Chris@13 296 {
Chris@13 297 return inner_product(a.begin(), a.end(), b.begin(), T(),
Chris@13 298 plus<T>(), [](T x, T y) { return fabs(x - y); });
Chris@13 299 }
Chris@13 300
Chris@13 301 TuningDifference::TFeature
Chris@15 302 TuningDifference::computeFeatureFromTotals(const TFeature &totals) const
Chris@13 303 {
Chris@16 304 if (m_frameCount == 0) return totals;
Chris@16 305
Chris@13 306 TFeature feature(m_bpo);
Chris@13 307 double sum = 0.0;
Chris@16 308
Chris@15 309 for (int i = 0; i < m_bpo; ++i) {
Chris@13 310 double value = totals[i] / m_frameCount;
Chris@15 311 feature[i] += value;
Chris@13 312 sum += value;
Chris@13 313 }
Chris@13 314
Chris@13 315 for (int i = 0; i < m_bpo; ++i) {
Chris@13 316 feature[i] /= sum;
Chris@13 317 }
Chris@13 318
Chris@25 319 // cerr << "computeFeatureFromTotals: feature values:" << endl;
Chris@25 320 // for (auto v: feature) cerr << v << " ";
Chris@25 321 // cerr << endl;
Chris@13 322
Chris@13 323 return feature;
Chris@13 324 }
Chris@13 325
Chris@15 326 Chromagram::Parameters
Chris@13 327 TuningDifference::paramsForTuningFrequency(double hz) const
Chris@13 328 {
Chris@15 329 Chromagram::Parameters params(m_inputSampleRate);
Chris@24 330 params.lowestOctave = 2;
Chris@24 331 params.octaveCount = 4;
Chris@20 332 params.binsPerOctave = m_bpo;
Chris@15 333 params.tuningFrequency = hz;
Chris@20 334 params.atomHopFactor = 0.5;
Chris@24 335 params.window = CQParameters::Hann;
Chris@15 336 return params;
Chris@13 337 }
Chris@13 338
Chris@13 339 TuningDifference::TFeature
Chris@13 340 TuningDifference::computeFeatureFromSignal(const Signal &signal, double hz) const
Chris@13 341 {
Chris@15 342 Chromagram chromagram(paramsForTuningFrequency(hz));
Chris@13 343
Chris@15 344 TFeature totals(m_bpo, 0.0);
Chris@16 345
Chris@16 346 cerr << "computeFeatureFromSignal: hz = " << hz << ", frame count = " << m_frameCount << endl;
Chris@13 347
Chris@13 348 for (int i = 0; i < m_frameCount; ++i) {
Chris@13 349 Signal::const_iterator first = signal.begin() + i * m_blockSize;
Chris@13 350 Signal::const_iterator last = first + m_blockSize;
Chris@13 351 if (last > signal.end()) last = signal.end();
Chris@15 352 CQBase::RealSequence input(first, last);
Chris@13 353 input.resize(m_blockSize);
Chris@15 354 CQBase::RealBlock block = chromagram.process(input);
Chris@13 355 for (const auto &v: block) addTo(totals, v);
Chris@13 356 }
Chris@13 357
Chris@13 358 return computeFeatureFromTotals(totals);
Chris@0 359 }
Chris@0 360
Chris@0 361 TuningDifference::FeatureSet
Chris@13 362 TuningDifference::process(const float *const *inputBuffers, Vamp::RealTime)
Chris@0 363 {
Chris@29 364 if (m_maxDuration > 0) {
Chris@29 365 int maxFrames = (m_maxDuration * m_inputSampleRate) / m_blockSize;
Chris@29 366 if (m_frameCount > maxFrames) return FeatureSet();
Chris@29 367 }
Chris@29 368
Chris@15 369 CQBase::RealBlock block;
Chris@15 370 CQBase::RealSequence input;
Chris@13 371
Chris@15 372 input = CQBase::RealSequence
Chris@13 373 (inputBuffers[0], inputBuffers[0] + m_blockSize);
Chris@15 374 block = m_refChroma->process(input);
Chris@13 375 for (const auto &v: block) addTo(m_refTotals, v);
Chris@13 376
Chris@13 377 m_other.insert(m_other.end(),
Chris@13 378 inputBuffers[1], inputBuffers[1] + m_blockSize);
Chris@1 379
Chris@1 380 ++m_frameCount;
Chris@0 381 return FeatureSet();
Chris@0 382 }
Chris@0 383
Chris@13 384 double
Chris@13 385 TuningDifference::featureDistance(const TFeature &other, int rotation) const
Chris@13 386 {
Chris@13 387 if (rotation == 0) {
Chris@13 388 return distance(m_refFeature, other);
Chris@13 389 } else {
Chris@15 390 // A positive rotation pushes the tuning frequency up for this
Chris@15 391 // chroma, negative one pulls it down. If a positive rotation
Chris@15 392 // makes this chroma match an un-rotated reference, then this
Chris@15 393 // chroma must have initially been lower than the reference.
Chris@13 394 TFeature r(other);
Chris@15 395 if (rotation < 0) {
Chris@15 396 rotate(r.begin(), r.begin() - rotation, r.end());
Chris@13 397 } else {
Chris@15 398 rotate(r.begin(), r.end() - rotation, r.end());
Chris@13 399 }
Chris@13 400 return distance(m_refFeature, r);
Chris@13 401 }
Chris@13 402 }
Chris@13 403
Chris@13 404 int
Chris@13 405 TuningDifference::findBestRotation(const TFeature &other) const
Chris@13 406 {
Chris@13 407 map<double, int> dists;
Chris@13 408
Chris@13 409 int maxSemis = 6;
Chris@13 410 int maxRotation = (m_bpo * maxSemis) / 12;
Chris@13 411
Chris@13 412 for (int r = -maxRotation; r <= maxRotation; ++r) {
Chris@13 413 double dist = featureDistance(other, r);
Chris@13 414 dists[dist] = r;
Chris@25 415 // cerr << "rotation " << r << ": score " << dist << endl;
Chris@13 416 }
Chris@13 417
Chris@13 418 int best = dists.begin()->second;
Chris@13 419
Chris@25 420 // cerr << "best is " << best << endl;
Chris@13 421 return best;
Chris@13 422 }
Chris@13 423
Chris@19 424 pair<int, double>
Chris@16 425 TuningDifference::findFineFrequency(int coarseCents, double coarseScore)
Chris@16 426 {
Chris@16 427 int coarseResolution = 1200 / m_bpo;
Chris@16 428 int searchDistance = coarseResolution/2 - 1;
Chris@16 429
Chris@16 430 double bestScore = coarseScore;
Chris@19 431 int bestCents = coarseCents;
Chris@16 432 double bestHz = frequencyForCentsAbove440(coarseCents);
Chris@16 433
Chris@16 434 cerr << "corresponding coarse Hz " << bestHz << " scores " << coarseScore << endl;
Chris@16 435 cerr << "searchDistance = " << searchDistance << endl;
Chris@16 436
Chris@16 437 for (int sign = -1; sign <= 1; sign += 2) {
Chris@16 438 for (int offset = 1; offset <= searchDistance; ++offset) {
Chris@16 439
Chris@16 440 int fineCents = coarseCents + sign * offset;
Chris@16 441
Chris@16 442 cerr << "trying with fineCents = " << fineCents << "..." << endl;
Chris@16 443
Chris@16 444 double fineHz = frequencyForCentsAbove440(fineCents);
Chris@16 445 TFeature fineFeature = computeFeatureFromSignal(m_other, fineHz);
Chris@16 446 double fineScore = featureDistance(fineFeature);
Chris@16 447
Chris@16 448 cerr << "fine offset = " << offset << ", cents = " << fineCents
Chris@16 449 << ", Hz = " << fineHz << ", score " << fineScore
Chris@16 450 << " (best score so far " << bestScore << ")" << endl;
Chris@16 451
Chris@16 452 if (fineScore < bestScore) {
Chris@16 453 cerr << "is good!" << endl;
Chris@16 454 bestScore = fineScore;
Chris@19 455 bestCents = fineCents;
Chris@17 456 bestHz = fineHz;
Chris@16 457 } else {
Chris@16 458 break;
Chris@16 459 }
Chris@16 460 }
Chris@16 461 }
Chris@16 462
Chris@20 463 //!!! could keep a vector of scores & then interpolate...
Chris@20 464
Chris@19 465 return pair<int, double>(bestCents, bestHz);
Chris@16 466 }
Chris@16 467
Chris@0 468 TuningDifference::FeatureSet
Chris@0 469 TuningDifference::getRemainingFeatures()
Chris@0 470 {
Chris@13 471 FeatureSet fs;
Chris@13 472 if (m_frameCount == 0) return fs;
Chris@13 473
Chris@13 474 m_refFeature = computeFeatureFromTotals(m_refTotals);
Chris@13 475 TFeature otherFeature = computeFeatureFromSignal(m_other, 440.);
Chris@1 476
Chris@1 477 Feature f;
Chris@1 478
Chris@4 479 f.values.clear();
Chris@13 480 for (auto v: m_refFeature) f.values.push_back(v);
Chris@13 481 fs[m_outputs["reffeature"]].push_back(f);
Chris@4 482
Chris@4 483 f.values.clear();
Chris@13 484 for (auto v: otherFeature) f.values.push_back(v);
Chris@13 485 fs[m_outputs["otherfeature"]].push_back(f);
Chris@13 486
Chris@13 487 int rotation = findBestRotation(otherFeature);
Chris@13 488
Chris@16 489 int coarseCents = -(rotation * 1200) / m_bpo;
Chris@13 490
Chris@13 491 cerr << "rotation " << rotation << " -> cents " << coarseCents << endl;
Chris@13 492
Chris@13 493 double coarseHz = frequencyForCentsAbove440(coarseCents);
Chris@13 494
Chris@24 495 TFeature coarseFeature;
Chris@24 496 if (rotation == 0) {
Chris@24 497 coarseFeature = otherFeature;
Chris@24 498 } else {
Chris@24 499 coarseFeature = computeFeatureFromSignal(m_other, coarseHz);
Chris@24 500 }
Chris@13 501 double coarseScore = featureDistance(coarseFeature);
Chris@13 502
Chris@13 503 cerr << "corresponding Hz " << coarseHz << " scores " << coarseScore << endl;
Chris@4 504
Chris@19 505 //!!! This should be returning the fine chroma, not the coarse
Chris@4 506 f.values.clear();
Chris@13 507 for (auto v: coarseFeature) f.values.push_back(v);
Chris@13 508 fs[m_outputs["rotfeature"]].push_back(f);
Chris@16 509
Chris@19 510 pair<int, double> fine = findFineFrequency(coarseCents, coarseScore);
Chris@19 511 int fineCents = fine.first;
Chris@19 512 double fineHz = fine.second;
Chris@16 513
Chris@19 514 f.values.clear();
Chris@19 515 f.values.push_back(fineHz);
Chris@19 516 fs[m_outputs["tuningfreq"]].push_back(f);
Chris@19 517
Chris@19 518 f.values.clear();
Chris@19 519 f.values.push_back(fineCents);
Chris@19 520 fs[m_outputs["cents"]].push_back(f);
Chris@19 521
Chris@16 522 cerr << "overall best Hz = " << fineHz << endl;
Chris@4 523
Chris@1 524 return fs;
Chris@0 525 }
Chris@0 526