annotate chroma-compare-plugin/TuningDifference.cpp @ 20:331a520cdadb

Update to reflect changes in Chromagram api
author Chris Cannam
date Thu, 05 Feb 2015 09:52:28 +0000
parents 9c5ec36c223e
children
rev   line source
Chris@0 1
Chris@0 2 #include "TuningDifference.h"
Chris@0 3
Chris@1 4 #include <iostream>
Chris@1 5
Chris@4 6 #include <cmath>
Chris@4 7 #include <cstdio>
Chris@4 8
Chris@13 9 #include <algorithm>
Chris@1 10
Chris@13 11 using namespace std;
Chris@13 12
Chris@13 13 static double pitchToFrequency(int pitch,
Chris@13 14 double centsOffset = 0.,
Chris@13 15 double concertA = 440.)
Chris@13 16 {
Chris@13 17 double p = double(pitch) + (centsOffset / 100.);
Chris@13 18 return concertA * pow(2.0, (p - 69.0) / 12.0);
Chris@13 19 }
Chris@13 20
Chris@13 21 static double frequencyForCentsAbove440(double cents)
Chris@13 22 {
Chris@13 23 return pitchToFrequency(69, cents, 440.);
Chris@13 24 }
Chris@5 25
Chris@0 26 TuningDifference::TuningDifference(float inputSampleRate) :
Chris@13 27 Plugin(inputSampleRate),
Chris@13 28 m_bpo(60),
Chris@18 29 m_refChroma(new Chromagram(paramsForTuningFrequency(440.))),
Chris@18 30 m_blockSize(0),
Chris@18 31 m_frameCount(0)
Chris@0 32 {
Chris@0 33 }
Chris@0 34
Chris@0 35 TuningDifference::~TuningDifference()
Chris@0 36 {
Chris@0 37 }
Chris@0 38
Chris@0 39 string
Chris@0 40 TuningDifference::getIdentifier() const
Chris@0 41 {
Chris@1 42 return "tuning-difference";
Chris@0 43 }
Chris@0 44
Chris@0 45 string
Chris@0 46 TuningDifference::getName() const
Chris@0 47 {
Chris@1 48 return "Tuning Difference";
Chris@0 49 }
Chris@0 50
Chris@0 51 string
Chris@0 52 TuningDifference::getDescription() const
Chris@0 53 {
Chris@0 54 // Return something helpful here!
Chris@0 55 return "";
Chris@0 56 }
Chris@0 57
Chris@0 58 string
Chris@0 59 TuningDifference::getMaker() const
Chris@0 60 {
Chris@0 61 // Your name here
Chris@0 62 return "";
Chris@0 63 }
Chris@0 64
Chris@0 65 int
Chris@0 66 TuningDifference::getPluginVersion() const
Chris@0 67 {
Chris@0 68 // Increment this each time you release a version that behaves
Chris@0 69 // differently from the previous one
Chris@0 70 return 1;
Chris@0 71 }
Chris@0 72
Chris@0 73 string
Chris@0 74 TuningDifference::getCopyright() const
Chris@0 75 {
Chris@0 76 // This function is not ideally named. It does not necessarily
Chris@0 77 // need to say who made the plugin -- getMaker does that -- but it
Chris@0 78 // should indicate the terms under which it is distributed. For
Chris@0 79 // example, "Copyright (year). All Rights Reserved", or "GPL"
Chris@0 80 return "";
Chris@0 81 }
Chris@0 82
Chris@0 83 TuningDifference::InputDomain
Chris@0 84 TuningDifference::getInputDomain() const
Chris@0 85 {
Chris@13 86 return TimeDomain;
Chris@0 87 }
Chris@0 88
Chris@0 89 size_t
Chris@0 90 TuningDifference::getPreferredBlockSize() const
Chris@0 91 {
Chris@13 92 return 0;
Chris@0 93 }
Chris@0 94
Chris@0 95 size_t
Chris@0 96 TuningDifference::getPreferredStepSize() const
Chris@0 97 {
Chris@1 98 return 0;
Chris@0 99 }
Chris@0 100
Chris@0 101 size_t
Chris@0 102 TuningDifference::getMinChannelCount() const
Chris@0 103 {
Chris@1 104 return 2;
Chris@0 105 }
Chris@0 106
Chris@0 107 size_t
Chris@0 108 TuningDifference::getMaxChannelCount() const
Chris@0 109 {
Chris@1 110 return 2;
Chris@0 111 }
Chris@0 112
Chris@0 113 TuningDifference::ParameterList
Chris@0 114 TuningDifference::getParameterDescriptors() const
Chris@0 115 {
Chris@0 116 ParameterList list;
Chris@20 117 //!!! parameter: max search range
Chris@20 118 //!!! parameter: fine search precision
Chris@0 119 return list;
Chris@0 120 }
Chris@0 121
Chris@0 122 float
Chris@1 123 TuningDifference::getParameter(string) const
Chris@0 124 {
Chris@0 125 return 0;
Chris@0 126 }
Chris@0 127
Chris@0 128 void
Chris@1 129 TuningDifference::setParameter(string, float)
Chris@0 130 {
Chris@0 131 }
Chris@0 132
Chris@0 133 TuningDifference::ProgramList
Chris@0 134 TuningDifference::getPrograms() const
Chris@0 135 {
Chris@0 136 ProgramList list;
Chris@0 137 return list;
Chris@0 138 }
Chris@0 139
Chris@0 140 string
Chris@0 141 TuningDifference::getCurrentProgram() const
Chris@0 142 {
Chris@0 143 return ""; // no programs
Chris@0 144 }
Chris@0 145
Chris@0 146 void
Chris@1 147 TuningDifference::selectProgram(string)
Chris@0 148 {
Chris@0 149 }
Chris@0 150
Chris@0 151 TuningDifference::OutputList
Chris@0 152 TuningDifference::getOutputDescriptors() const
Chris@0 153 {
Chris@0 154 OutputList list;
Chris@0 155
Chris@1 156 OutputDescriptor d;
Chris@1 157 d.identifier = "cents";
Chris@1 158 d.name = "Tuning Difference";
Chris@1 159 d.description = "Difference in averaged frequency profile between channels 1 and 2, in cents. A positive value means channel 2 is higher.";
Chris@1 160 d.unit = "cents";
Chris@1 161 d.hasFixedBinCount = true;
Chris@1 162 d.binCount = 1;
Chris@1 163 d.hasKnownExtents = false;
Chris@1 164 d.isQuantized = false;
Chris@1 165 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@1 166 d.hasDuration = false;
Chris@13 167 m_outputs[d.identifier] = list.size();
Chris@1 168 list.push_back(d);
Chris@0 169
Chris@1 170 d.identifier = "tuningfreq";
Chris@1 171 d.name = "Relative Tuning Frequency";
Chris@1 172 d.description = "Tuning frequency of channel 2, if channel 1 is assumed to contain the same music as it at a tuning frequency of A=440Hz.";
Chris@4 173 d.unit = "hz";
Chris@1 174 d.hasFixedBinCount = true;
Chris@1 175 d.binCount = 1;
Chris@1 176 d.hasKnownExtents = false;
Chris@1 177 d.isQuantized = false;
Chris@1 178 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@1 179 d.hasDuration = false;
Chris@13 180 m_outputs[d.identifier] = list.size();
Chris@1 181 list.push_back(d);
Chris@1 182
Chris@13 183 d.identifier = "reffeature";
Chris@13 184 d.name = "Reference Feature";
Chris@13 185 d.description = "Chroma feature from reference audio.";
Chris@0 186 d.unit = "";
Chris@0 187 d.hasFixedBinCount = true;
Chris@13 188 d.binCount = m_bpo;
Chris@4 189 d.hasKnownExtents = false;
Chris@4 190 d.isQuantized = false;
Chris@4 191 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@5 192 d.sampleRate = 1;
Chris@4 193 d.hasDuration = false;
Chris@13 194 m_outputs[d.identifier] = list.size();
Chris@13 195 list.push_back(d);
Chris@13 196
Chris@13 197 d.identifier = "otherfeature";
Chris@13 198 d.name = "Other Feature";
Chris@13 199 d.description = "Chroma feature from other audio, before rotation.";
Chris@13 200 d.unit = "";
Chris@13 201 d.hasFixedBinCount = true;
Chris@13 202 d.binCount = m_bpo;
Chris@13 203 d.hasKnownExtents = false;
Chris@13 204 d.isQuantized = false;
Chris@13 205 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@13 206 d.sampleRate = 1;
Chris@13 207 d.hasDuration = false;
Chris@13 208 m_outputs[d.identifier] = list.size();
Chris@13 209 list.push_back(d);
Chris@13 210
Chris@13 211 d.identifier = "rotfeature";
Chris@13 212 d.name = "Other Feature at Rotated Frequency";
Chris@13 213 d.description = "Chroma feature from reference audio calculated with the tuning frequency obtained from rotation matching.";
Chris@13 214 d.unit = "";
Chris@13 215 d.hasFixedBinCount = true;
Chris@13 216 d.binCount = m_bpo;
Chris@13 217 d.hasKnownExtents = false;
Chris@13 218 d.isQuantized = false;
Chris@13 219 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@13 220 d.sampleRate = 1;
Chris@13 221 d.hasDuration = false;
Chris@13 222 m_outputs[d.identifier] = list.size();
Chris@4 223 list.push_back(d);
Chris@4 224
Chris@0 225 return list;
Chris@0 226 }
Chris@0 227
Chris@0 228 bool
Chris@0 229 TuningDifference::initialise(size_t channels, size_t stepSize, size_t blockSize)
Chris@0 230 {
Chris@0 231 if (channels < getMinChannelCount() ||
Chris@0 232 channels > getMaxChannelCount()) return false;
Chris@0 233
Chris@13 234 if (stepSize != blockSize) return false;
Chris@0 235
Chris@1 236 m_blockSize = blockSize;
Chris@1 237
Chris@1 238 reset();
Chris@1 239
Chris@0 240 return true;
Chris@0 241 }
Chris@0 242
Chris@0 243 void
Chris@0 244 TuningDifference::reset()
Chris@0 245 {
Chris@13 246 if (m_frameCount > 0) {
Chris@15 247 m_refChroma.reset(new Chromagram(paramsForTuningFrequency(440.)));
Chris@13 248 m_frameCount = 0;
Chris@13 249 }
Chris@15 250 m_refTotals = TFeature(m_bpo, 0.0);
Chris@13 251 m_other.clear();
Chris@13 252 }
Chris@13 253
Chris@13 254 template<typename T>
Chris@13 255 void addTo(vector<T> &a, const vector<T> &b)
Chris@13 256 {
Chris@13 257 transform(a.begin(), a.end(), b.begin(), a.begin(), plus<T>());
Chris@13 258 }
Chris@13 259
Chris@13 260 template<typename T>
Chris@13 261 T distance(const vector<T> &a, const vector<T> &b)
Chris@13 262 {
Chris@13 263 return inner_product(a.begin(), a.end(), b.begin(), T(),
Chris@13 264 plus<T>(), [](T x, T y) { return fabs(x - y); });
Chris@13 265 }
Chris@13 266
Chris@13 267 TuningDifference::TFeature
Chris@15 268 TuningDifference::computeFeatureFromTotals(const TFeature &totals) const
Chris@13 269 {
Chris@16 270 if (m_frameCount == 0) return totals;
Chris@16 271
Chris@13 272 TFeature feature(m_bpo);
Chris@13 273 double sum = 0.0;
Chris@16 274
Chris@15 275 for (int i = 0; i < m_bpo; ++i) {
Chris@13 276 double value = totals[i] / m_frameCount;
Chris@15 277 feature[i] += value;
Chris@13 278 sum += value;
Chris@13 279 }
Chris@13 280
Chris@13 281 for (int i = 0; i < m_bpo; ++i) {
Chris@13 282 feature[i] /= sum;
Chris@13 283 }
Chris@13 284
Chris@13 285 cerr << "computeFeatureFromTotals: feature values:" << endl;
Chris@13 286 for (auto v: feature) cerr << v << " ";
Chris@13 287 cerr << endl;
Chris@13 288
Chris@13 289 return feature;
Chris@13 290 }
Chris@13 291
Chris@15 292 Chromagram::Parameters
Chris@13 293 TuningDifference::paramsForTuningFrequency(double hz) const
Chris@13 294 {
Chris@15 295 Chromagram::Parameters params(m_inputSampleRate);
Chris@15 296 params.lowestOctave = 0;
Chris@20 297 params.octaveCount = 6;
Chris@20 298 params.binsPerOctave = m_bpo;
Chris@15 299 params.tuningFrequency = hz;
Chris@20 300 params.atomHopFactor = 0.5;
Chris@15 301 return params;
Chris@13 302 }
Chris@13 303
Chris@13 304 TuningDifference::TFeature
Chris@13 305 TuningDifference::computeFeatureFromSignal(const Signal &signal, double hz) const
Chris@13 306 {
Chris@15 307 Chromagram chromagram(paramsForTuningFrequency(hz));
Chris@13 308
Chris@15 309 TFeature totals(m_bpo, 0.0);
Chris@16 310
Chris@16 311 cerr << "computeFeatureFromSignal: hz = " << hz << ", frame count = " << m_frameCount << endl;
Chris@13 312
Chris@13 313 for (int i = 0; i < m_frameCount; ++i) {
Chris@13 314 Signal::const_iterator first = signal.begin() + i * m_blockSize;
Chris@13 315 Signal::const_iterator last = first + m_blockSize;
Chris@13 316 if (last > signal.end()) last = signal.end();
Chris@15 317 CQBase::RealSequence input(first, last);
Chris@13 318 input.resize(m_blockSize);
Chris@15 319 CQBase::RealBlock block = chromagram.process(input);
Chris@13 320 for (const auto &v: block) addTo(totals, v);
Chris@13 321 }
Chris@13 322
Chris@13 323 return computeFeatureFromTotals(totals);
Chris@0 324 }
Chris@0 325
Chris@0 326 TuningDifference::FeatureSet
Chris@13 327 TuningDifference::process(const float *const *inputBuffers, Vamp::RealTime)
Chris@0 328 {
Chris@15 329 CQBase::RealBlock block;
Chris@15 330 CQBase::RealSequence input;
Chris@13 331
Chris@15 332 input = CQBase::RealSequence
Chris@13 333 (inputBuffers[0], inputBuffers[0] + m_blockSize);
Chris@15 334 block = m_refChroma->process(input);
Chris@13 335 for (const auto &v: block) addTo(m_refTotals, v);
Chris@13 336
Chris@13 337 m_other.insert(m_other.end(),
Chris@13 338 inputBuffers[1], inputBuffers[1] + m_blockSize);
Chris@1 339
Chris@1 340 ++m_frameCount;
Chris@0 341 return FeatureSet();
Chris@0 342 }
Chris@0 343
Chris@13 344 double
Chris@13 345 TuningDifference::featureDistance(const TFeature &other, int rotation) const
Chris@13 346 {
Chris@13 347 if (rotation == 0) {
Chris@13 348 return distance(m_refFeature, other);
Chris@13 349 } else {
Chris@15 350 // A positive rotation pushes the tuning frequency up for this
Chris@15 351 // chroma, negative one pulls it down. If a positive rotation
Chris@15 352 // makes this chroma match an un-rotated reference, then this
Chris@15 353 // chroma must have initially been lower than the reference.
Chris@13 354 TFeature r(other);
Chris@15 355 if (rotation < 0) {
Chris@15 356 rotate(r.begin(), r.begin() - rotation, r.end());
Chris@13 357 } else {
Chris@15 358 rotate(r.begin(), r.end() - rotation, r.end());
Chris@13 359 }
Chris@13 360 return distance(m_refFeature, r);
Chris@13 361 }
Chris@13 362 }
Chris@13 363
Chris@13 364 int
Chris@13 365 TuningDifference::findBestRotation(const TFeature &other) const
Chris@13 366 {
Chris@13 367 map<double, int> dists;
Chris@13 368
Chris@13 369 int maxSemis = 6;
Chris@13 370 int maxRotation = (m_bpo * maxSemis) / 12;
Chris@13 371
Chris@13 372 for (int r = -maxRotation; r <= maxRotation; ++r) {
Chris@13 373 double dist = featureDistance(other, r);
Chris@13 374 dists[dist] = r;
Chris@13 375 cerr << "rotation " << r << ": score " << dist << endl;
Chris@13 376 }
Chris@13 377
Chris@13 378 int best = dists.begin()->second;
Chris@13 379
Chris@13 380 cerr << "best is " << best << endl;
Chris@13 381 return best;
Chris@13 382 }
Chris@13 383
Chris@19 384 pair<int, double>
Chris@16 385 TuningDifference::findFineFrequency(int coarseCents, double coarseScore)
Chris@16 386 {
Chris@16 387 int coarseResolution = 1200 / m_bpo;
Chris@16 388 int searchDistance = coarseResolution/2 - 1;
Chris@16 389
Chris@16 390 double bestScore = coarseScore;
Chris@19 391 int bestCents = coarseCents;
Chris@16 392 double bestHz = frequencyForCentsAbove440(coarseCents);
Chris@16 393
Chris@16 394 cerr << "corresponding coarse Hz " << bestHz << " scores " << coarseScore << endl;
Chris@16 395 cerr << "searchDistance = " << searchDistance << endl;
Chris@16 396
Chris@16 397 for (int sign = -1; sign <= 1; sign += 2) {
Chris@16 398 for (int offset = 1; offset <= searchDistance; ++offset) {
Chris@16 399
Chris@16 400 int fineCents = coarseCents + sign * offset;
Chris@16 401
Chris@16 402 cerr << "trying with fineCents = " << fineCents << "..." << endl;
Chris@16 403
Chris@16 404 double fineHz = frequencyForCentsAbove440(fineCents);
Chris@16 405 TFeature fineFeature = computeFeatureFromSignal(m_other, fineHz);
Chris@16 406 double fineScore = featureDistance(fineFeature);
Chris@16 407
Chris@16 408 cerr << "fine offset = " << offset << ", cents = " << fineCents
Chris@16 409 << ", Hz = " << fineHz << ", score " << fineScore
Chris@16 410 << " (best score so far " << bestScore << ")" << endl;
Chris@16 411
Chris@16 412 if (fineScore < bestScore) {
Chris@16 413 cerr << "is good!" << endl;
Chris@16 414 bestScore = fineScore;
Chris@19 415 bestCents = fineCents;
Chris@17 416 bestHz = fineHz;
Chris@16 417 } else {
Chris@16 418 break;
Chris@16 419 }
Chris@16 420 }
Chris@16 421 }
Chris@16 422
Chris@20 423 //!!! could keep a vector of scores & then interpolate...
Chris@20 424
Chris@19 425 return pair<int, double>(bestCents, bestHz);
Chris@16 426 }
Chris@16 427
Chris@0 428 TuningDifference::FeatureSet
Chris@0 429 TuningDifference::getRemainingFeatures()
Chris@0 430 {
Chris@13 431 FeatureSet fs;
Chris@13 432 if (m_frameCount == 0) return fs;
Chris@13 433
Chris@13 434 m_refFeature = computeFeatureFromTotals(m_refTotals);
Chris@13 435 TFeature otherFeature = computeFeatureFromSignal(m_other, 440.);
Chris@1 436
Chris@1 437 Feature f;
Chris@1 438
Chris@4 439 f.values.clear();
Chris@13 440 for (auto v: m_refFeature) f.values.push_back(v);
Chris@13 441 fs[m_outputs["reffeature"]].push_back(f);
Chris@4 442
Chris@4 443 f.values.clear();
Chris@13 444 for (auto v: otherFeature) f.values.push_back(v);
Chris@13 445 fs[m_outputs["otherfeature"]].push_back(f);
Chris@13 446
Chris@13 447 int rotation = findBestRotation(otherFeature);
Chris@13 448
Chris@16 449 int coarseCents = -(rotation * 1200) / m_bpo;
Chris@13 450
Chris@13 451 cerr << "rotation " << rotation << " -> cents " << coarseCents << endl;
Chris@13 452
Chris@13 453 double coarseHz = frequencyForCentsAbove440(coarseCents);
Chris@13 454
Chris@13 455 TFeature coarseFeature = computeFeatureFromSignal(m_other, coarseHz);
Chris@13 456 double coarseScore = featureDistance(coarseFeature);
Chris@13 457
Chris@13 458 cerr << "corresponding Hz " << coarseHz << " scores " << coarseScore << endl;
Chris@4 459
Chris@19 460 //!!! This should be returning the fine chroma, not the coarse
Chris@4 461 f.values.clear();
Chris@13 462 for (auto v: coarseFeature) f.values.push_back(v);
Chris@13 463 fs[m_outputs["rotfeature"]].push_back(f);
Chris@16 464
Chris@19 465 pair<int, double> fine = findFineFrequency(coarseCents, coarseScore);
Chris@19 466 int fineCents = fine.first;
Chris@19 467 double fineHz = fine.second;
Chris@16 468
Chris@19 469 f.values.clear();
Chris@19 470 f.values.push_back(fineHz);
Chris@19 471 fs[m_outputs["tuningfreq"]].push_back(f);
Chris@19 472
Chris@19 473 f.values.clear();
Chris@19 474 f.values.push_back(fineCents);
Chris@19 475 fs[m_outputs["cents"]].push_back(f);
Chris@19 476
Chris@16 477 cerr << "overall best Hz = " << fineHz << endl;
Chris@4 478
Chris@1 479 return fs;
Chris@0 480 }
Chris@0 481