annotate chroma-compare-plugin/TuningDifference.cpp @ 13:c74071731d74

The bulk of the chroma-compare implementation. Should be using the newly refactored Chromagram class from libcq though (chroma is currently upside-down!)
author Chris Cannam
date Wed, 04 Feb 2015 15:10:07 +0000
parents 23572f9d25d9
children 812e4d021443
rev   line source
Chris@0 1
Chris@0 2 #include "TuningDifference.h"
Chris@0 3
Chris@1 4 #include <iostream>
Chris@1 5
Chris@4 6 #include <cmath>
Chris@4 7 #include <cstdio>
Chris@4 8
Chris@13 9 #include <algorithm>
Chris@1 10
Chris@13 11 using namespace std;
Chris@13 12
Chris@13 13 static double pitchToFrequency(int pitch,
Chris@13 14 double centsOffset = 0.,
Chris@13 15 double concertA = 440.)
Chris@13 16 {
Chris@13 17 double p = double(pitch) + (centsOffset / 100.);
Chris@13 18 return concertA * pow(2.0, (p - 69.0) / 12.0);
Chris@13 19 }
Chris@13 20
Chris@13 21 static double frequencyForCentsAbove440(double cents)
Chris@13 22 {
Chris@13 23 return pitchToFrequency(69, cents, 440.);
Chris@13 24 }
Chris@5 25
Chris@0 26 TuningDifference::TuningDifference(float inputSampleRate) :
Chris@13 27 Plugin(inputSampleRate),
Chris@13 28 m_bpo(60),
Chris@13 29 m_refCQ(new CQSpectrogram(paramsForTuningFrequency(440.),
Chris@13 30 CQSpectrogram::InterpolateHold))
Chris@0 31 {
Chris@0 32 }
Chris@0 33
Chris@0 34 TuningDifference::~TuningDifference()
Chris@0 35 {
Chris@0 36 }
Chris@0 37
Chris@0 38 string
Chris@0 39 TuningDifference::getIdentifier() const
Chris@0 40 {
Chris@1 41 return "tuning-difference";
Chris@0 42 }
Chris@0 43
Chris@0 44 string
Chris@0 45 TuningDifference::getName() const
Chris@0 46 {
Chris@1 47 return "Tuning Difference";
Chris@0 48 }
Chris@0 49
Chris@0 50 string
Chris@0 51 TuningDifference::getDescription() const
Chris@0 52 {
Chris@0 53 // Return something helpful here!
Chris@0 54 return "";
Chris@0 55 }
Chris@0 56
Chris@0 57 string
Chris@0 58 TuningDifference::getMaker() const
Chris@0 59 {
Chris@0 60 // Your name here
Chris@0 61 return "";
Chris@0 62 }
Chris@0 63
Chris@0 64 int
Chris@0 65 TuningDifference::getPluginVersion() const
Chris@0 66 {
Chris@0 67 // Increment this each time you release a version that behaves
Chris@0 68 // differently from the previous one
Chris@0 69 return 1;
Chris@0 70 }
Chris@0 71
Chris@0 72 string
Chris@0 73 TuningDifference::getCopyright() const
Chris@0 74 {
Chris@0 75 // This function is not ideally named. It does not necessarily
Chris@0 76 // need to say who made the plugin -- getMaker does that -- but it
Chris@0 77 // should indicate the terms under which it is distributed. For
Chris@0 78 // example, "Copyright (year). All Rights Reserved", or "GPL"
Chris@0 79 return "";
Chris@0 80 }
Chris@0 81
Chris@0 82 TuningDifference::InputDomain
Chris@0 83 TuningDifference::getInputDomain() const
Chris@0 84 {
Chris@13 85 return TimeDomain;
Chris@0 86 }
Chris@0 87
Chris@0 88 size_t
Chris@0 89 TuningDifference::getPreferredBlockSize() const
Chris@0 90 {
Chris@13 91 return 0;
Chris@0 92 }
Chris@0 93
Chris@0 94 size_t
Chris@0 95 TuningDifference::getPreferredStepSize() const
Chris@0 96 {
Chris@1 97 return 0;
Chris@0 98 }
Chris@0 99
Chris@0 100 size_t
Chris@0 101 TuningDifference::getMinChannelCount() const
Chris@0 102 {
Chris@1 103 return 2;
Chris@0 104 }
Chris@0 105
Chris@0 106 size_t
Chris@0 107 TuningDifference::getMaxChannelCount() const
Chris@0 108 {
Chris@1 109 return 2;
Chris@0 110 }
Chris@0 111
Chris@0 112 TuningDifference::ParameterList
Chris@0 113 TuningDifference::getParameterDescriptors() const
Chris@0 114 {
Chris@0 115 ParameterList list;
Chris@0 116 return list;
Chris@0 117 }
Chris@0 118
Chris@0 119 float
Chris@1 120 TuningDifference::getParameter(string) const
Chris@0 121 {
Chris@0 122 return 0;
Chris@0 123 }
Chris@0 124
Chris@0 125 void
Chris@1 126 TuningDifference::setParameter(string, float)
Chris@0 127 {
Chris@0 128 }
Chris@0 129
Chris@0 130 TuningDifference::ProgramList
Chris@0 131 TuningDifference::getPrograms() const
Chris@0 132 {
Chris@0 133 ProgramList list;
Chris@0 134 return list;
Chris@0 135 }
Chris@0 136
Chris@0 137 string
Chris@0 138 TuningDifference::getCurrentProgram() const
Chris@0 139 {
Chris@0 140 return ""; // no programs
Chris@0 141 }
Chris@0 142
Chris@0 143 void
Chris@1 144 TuningDifference::selectProgram(string)
Chris@0 145 {
Chris@0 146 }
Chris@0 147
Chris@0 148 TuningDifference::OutputList
Chris@0 149 TuningDifference::getOutputDescriptors() const
Chris@0 150 {
Chris@0 151 OutputList list;
Chris@0 152
Chris@1 153 OutputDescriptor d;
Chris@1 154 d.identifier = "cents";
Chris@1 155 d.name = "Tuning Difference";
Chris@1 156 d.description = "Difference in averaged frequency profile between channels 1 and 2, in cents. A positive value means channel 2 is higher.";
Chris@1 157 d.unit = "cents";
Chris@1 158 d.hasFixedBinCount = true;
Chris@1 159 d.binCount = 1;
Chris@1 160 d.hasKnownExtents = false;
Chris@1 161 d.isQuantized = false;
Chris@1 162 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@1 163 d.hasDuration = false;
Chris@13 164 m_outputs[d.identifier] = list.size();
Chris@1 165 list.push_back(d);
Chris@0 166
Chris@1 167 d.identifier = "tuningfreq";
Chris@1 168 d.name = "Relative Tuning Frequency";
Chris@1 169 d.description = "Tuning frequency of channel 2, if channel 1 is assumed to contain the same music as it at a tuning frequency of A=440Hz.";
Chris@4 170 d.unit = "hz";
Chris@1 171 d.hasFixedBinCount = true;
Chris@1 172 d.binCount = 1;
Chris@1 173 d.hasKnownExtents = false;
Chris@1 174 d.isQuantized = false;
Chris@1 175 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@1 176 d.hasDuration = false;
Chris@13 177 m_outputs[d.identifier] = list.size();
Chris@1 178 list.push_back(d);
Chris@1 179
Chris@13 180 d.identifier = "reffeature";
Chris@13 181 d.name = "Reference Feature";
Chris@13 182 d.description = "Chroma feature from reference audio.";
Chris@0 183 d.unit = "";
Chris@0 184 d.hasFixedBinCount = true;
Chris@13 185 d.binCount = m_bpo;
Chris@4 186 d.hasKnownExtents = false;
Chris@4 187 d.isQuantized = false;
Chris@4 188 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@5 189 d.sampleRate = 1;
Chris@4 190 d.hasDuration = false;
Chris@13 191 m_outputs[d.identifier] = list.size();
Chris@13 192 list.push_back(d);
Chris@13 193
Chris@13 194 d.identifier = "otherfeature";
Chris@13 195 d.name = "Other Feature";
Chris@13 196 d.description = "Chroma feature from other audio, before rotation.";
Chris@13 197 d.unit = "";
Chris@13 198 d.hasFixedBinCount = true;
Chris@13 199 d.binCount = m_bpo;
Chris@13 200 d.hasKnownExtents = false;
Chris@13 201 d.isQuantized = false;
Chris@13 202 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@13 203 d.sampleRate = 1;
Chris@13 204 d.hasDuration = false;
Chris@13 205 m_outputs[d.identifier] = list.size();
Chris@13 206 list.push_back(d);
Chris@13 207
Chris@13 208 d.identifier = "rotfeature";
Chris@13 209 d.name = "Other Feature at Rotated Frequency";
Chris@13 210 d.description = "Chroma feature from reference audio calculated with the tuning frequency obtained from rotation matching.";
Chris@13 211 d.unit = "";
Chris@13 212 d.hasFixedBinCount = true;
Chris@13 213 d.binCount = m_bpo;
Chris@13 214 d.hasKnownExtents = false;
Chris@13 215 d.isQuantized = false;
Chris@13 216 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@13 217 d.sampleRate = 1;
Chris@13 218 d.hasDuration = false;
Chris@13 219 m_outputs[d.identifier] = list.size();
Chris@4 220 list.push_back(d);
Chris@4 221
Chris@0 222 return list;
Chris@0 223 }
Chris@0 224
Chris@0 225 bool
Chris@0 226 TuningDifference::initialise(size_t channels, size_t stepSize, size_t blockSize)
Chris@0 227 {
Chris@0 228 if (channels < getMinChannelCount() ||
Chris@0 229 channels > getMaxChannelCount()) return false;
Chris@0 230
Chris@13 231 if (stepSize != blockSize) return false;
Chris@0 232
Chris@1 233 m_blockSize = blockSize;
Chris@1 234
Chris@1 235 reset();
Chris@1 236
Chris@0 237 return true;
Chris@0 238 }
Chris@0 239
Chris@0 240 void
Chris@0 241 TuningDifference::reset()
Chris@0 242 {
Chris@13 243 if (m_frameCount > 0) {
Chris@13 244 m_refCQ.reset(new CQSpectrogram(paramsForTuningFrequency(440.),
Chris@13 245 CQSpectrogram::InterpolateHold));
Chris@13 246 m_frameCount = 0;
Chris@13 247 }
Chris@13 248 m_refTotals = Chroma(m_refCQ->getTotalBins(), 0.0);
Chris@13 249 m_other.clear();
Chris@13 250 }
Chris@13 251
Chris@13 252 template<typename T>
Chris@13 253 void addTo(vector<T> &a, const vector<T> &b)
Chris@13 254 {
Chris@13 255 transform(a.begin(), a.end(), b.begin(), a.begin(), plus<T>());
Chris@13 256 }
Chris@13 257
Chris@13 258 template<typename T>
Chris@13 259 T distance(const vector<T> &a, const vector<T> &b)
Chris@13 260 {
Chris@13 261 return inner_product(a.begin(), a.end(), b.begin(), T(),
Chris@13 262 plus<T>(), [](T x, T y) { return fabs(x - y); });
Chris@13 263 }
Chris@13 264
Chris@13 265 TuningDifference::TFeature
Chris@13 266 TuningDifference::computeFeatureFromTotals(const Chroma &totals) const
Chris@13 267 {
Chris@13 268 TFeature feature(m_bpo);
Chris@13 269 double sum = 0.0;
Chris@13 270
Chris@13 271 for (int i = 0; i < (int)totals.size(); ++i) {
Chris@13 272 double value = totals[i] / m_frameCount;
Chris@13 273 feature[i % m_bpo] += value;
Chris@13 274 sum += value;
Chris@13 275 }
Chris@13 276
Chris@13 277 for (int i = 0; i < m_bpo; ++i) {
Chris@13 278 feature[i] /= sum;
Chris@13 279 }
Chris@13 280
Chris@13 281 cerr << "computeFeatureFromTotals: feature values:" << endl;
Chris@13 282 for (auto v: feature) cerr << v << " ";
Chris@13 283 cerr << endl;
Chris@13 284
Chris@13 285 return feature;
Chris@13 286 }
Chris@13 287
Chris@13 288 CQParameters
Chris@13 289 TuningDifference::paramsForTuningFrequency(double hz) const
Chris@13 290 {
Chris@13 291 return CQParameters(m_inputSampleRate,
Chris@13 292 pitchToFrequency(36, hz),
Chris@13 293 pitchToFrequency(96, hz),
Chris@13 294 m_bpo);
Chris@13 295 }
Chris@13 296
Chris@13 297 TuningDifference::TFeature
Chris@13 298 TuningDifference::computeFeatureFromSignal(const Signal &signal, double hz) const
Chris@13 299 {
Chris@13 300 CQSpectrogram cq(paramsForTuningFrequency(hz),
Chris@13 301 CQSpectrogram::InterpolateHold);
Chris@13 302
Chris@13 303 Chroma totals(m_refCQ->getTotalBins(), 0.0);
Chris@13 304
Chris@13 305 for (int i = 0; i < m_frameCount; ++i) {
Chris@13 306 Signal::const_iterator first = signal.begin() + i * m_blockSize;
Chris@13 307 Signal::const_iterator last = first + m_blockSize;
Chris@13 308 if (last > signal.end()) last = signal.end();
Chris@13 309 CQSpectrogram::RealSequence input(first, last);
Chris@13 310 input.resize(m_blockSize);
Chris@13 311 CQSpectrogram::RealBlock block = cq.process(input);
Chris@13 312 for (const auto &v: block) addTo(totals, v);
Chris@13 313 }
Chris@13 314
Chris@13 315 return computeFeatureFromTotals(totals);
Chris@0 316 }
Chris@0 317
Chris@0 318 TuningDifference::FeatureSet
Chris@13 319 TuningDifference::process(const float *const *inputBuffers, Vamp::RealTime)
Chris@0 320 {
Chris@13 321 CQSpectrogram::RealBlock block;
Chris@13 322 CQSpectrogram::RealSequence input;
Chris@13 323
Chris@13 324 input = CQSpectrogram::RealSequence
Chris@13 325 (inputBuffers[0], inputBuffers[0] + m_blockSize);
Chris@13 326 block = m_refCQ->process(input);
Chris@13 327 for (const auto &v: block) addTo(m_refTotals, v);
Chris@13 328
Chris@13 329 m_other.insert(m_other.end(),
Chris@13 330 inputBuffers[1], inputBuffers[1] + m_blockSize);
Chris@1 331
Chris@1 332 ++m_frameCount;
Chris@0 333 return FeatureSet();
Chris@0 334 }
Chris@0 335
Chris@13 336 double
Chris@13 337 TuningDifference::featureDistance(const TFeature &other, int rotation) const
Chris@13 338 {
Chris@13 339 if (rotation == 0) {
Chris@13 340 return distance(m_refFeature, other);
Chris@13 341 } else {
Chris@13 342 TFeature r(other);
Chris@13 343 if (rotation > 0) {
Chris@13 344 rotate(r.begin(), r.begin() + rotation, r.end());
Chris@13 345 } else {
Chris@13 346 rotate(r.begin(), r.end() + rotation, r.end());
Chris@13 347 }
Chris@13 348 return distance(m_refFeature, r);
Chris@13 349 }
Chris@13 350 }
Chris@13 351
Chris@13 352 int
Chris@13 353 TuningDifference::findBestRotation(const TFeature &other) const
Chris@13 354 {
Chris@13 355 map<double, int> dists;
Chris@13 356
Chris@13 357 int maxSemis = 6;
Chris@13 358 int maxRotation = (m_bpo * maxSemis) / 12;
Chris@13 359
Chris@13 360 for (int r = -maxRotation; r <= maxRotation; ++r) {
Chris@13 361 double dist = featureDistance(other, r);
Chris@13 362 dists[dist] = r;
Chris@13 363 cerr << "rotation " << r << ": score " << dist << endl;
Chris@13 364 }
Chris@13 365
Chris@13 366 int best = dists.begin()->second;
Chris@13 367
Chris@13 368 cerr << "best is " << best << endl;
Chris@13 369 return best;
Chris@13 370 }
Chris@13 371
Chris@0 372 TuningDifference::FeatureSet
Chris@0 373 TuningDifference::getRemainingFeatures()
Chris@0 374 {
Chris@13 375 FeatureSet fs;
Chris@13 376 if (m_frameCount == 0) return fs;
Chris@13 377
Chris@13 378 m_refFeature = computeFeatureFromTotals(m_refTotals);
Chris@13 379 TFeature otherFeature = computeFeatureFromSignal(m_other, 440.);
Chris@1 380
Chris@1 381 Feature f;
Chris@1 382
Chris@4 383 f.values.clear();
Chris@13 384 for (auto v: m_refFeature) f.values.push_back(v);
Chris@13 385 fs[m_outputs["reffeature"]].push_back(f);
Chris@4 386
Chris@4 387 f.values.clear();
Chris@13 388 for (auto v: otherFeature) f.values.push_back(v);
Chris@13 389 fs[m_outputs["otherfeature"]].push_back(f);
Chris@13 390
Chris@13 391 int rotation = findBestRotation(otherFeature);
Chris@13 392
Chris@13 393 int coarseCents = -(rotation * 100) / (m_bpo / 12);
Chris@13 394
Chris@13 395 cerr << "rotation " << rotation << " -> cents " << coarseCents << endl;
Chris@13 396
Chris@13 397 double coarseHz = frequencyForCentsAbove440(coarseCents);
Chris@13 398
Chris@13 399 TFeature coarseFeature = computeFeatureFromSignal(m_other, coarseHz);
Chris@13 400 double coarseScore = featureDistance(coarseFeature);
Chris@13 401
Chris@13 402 cerr << "corresponding Hz " << coarseHz << " scores " << coarseScore << endl;
Chris@4 403
Chris@4 404 f.values.clear();
Chris@13 405 for (auto v: coarseFeature) f.values.push_back(v);
Chris@13 406 fs[m_outputs["rotfeature"]].push_back(f);
Chris@4 407
Chris@1 408 return fs;
Chris@0 409 }
Chris@0 410