annotate chroma-compare-plugin/TuningDifference.cpp @ 15:994f5294996d

Use Chromagram from libcq, fix incorrect rotation
author Chris Cannam
date Thu, 05 Feb 2015 08:38:59 +0000
parents 812e4d021443
children 2c6460fb1fcf
rev   line source
Chris@0 1
Chris@0 2 #include "TuningDifference.h"
Chris@0 3
Chris@1 4 #include <iostream>
Chris@1 5
Chris@4 6 #include <cmath>
Chris@4 7 #include <cstdio>
Chris@4 8
Chris@13 9 #include <algorithm>
Chris@1 10
Chris@13 11 using namespace std;
Chris@13 12
Chris@13 13 static double pitchToFrequency(int pitch,
Chris@13 14 double centsOffset = 0.,
Chris@13 15 double concertA = 440.)
Chris@13 16 {
Chris@13 17 double p = double(pitch) + (centsOffset / 100.);
Chris@13 18 return concertA * pow(2.0, (p - 69.0) / 12.0);
Chris@13 19 }
Chris@13 20
Chris@13 21 static double frequencyForCentsAbove440(double cents)
Chris@13 22 {
Chris@13 23 return pitchToFrequency(69, cents, 440.);
Chris@13 24 }
Chris@5 25
Chris@0 26 TuningDifference::TuningDifference(float inputSampleRate) :
Chris@13 27 Plugin(inputSampleRate),
Chris@13 28 m_bpo(60),
Chris@15 29 m_refChroma(new Chromagram(paramsForTuningFrequency(440.)))
Chris@0 30 {
Chris@0 31 }
Chris@0 32
Chris@0 33 TuningDifference::~TuningDifference()
Chris@0 34 {
Chris@0 35 }
Chris@0 36
Chris@0 37 string
Chris@0 38 TuningDifference::getIdentifier() const
Chris@0 39 {
Chris@1 40 return "tuning-difference";
Chris@0 41 }
Chris@0 42
Chris@0 43 string
Chris@0 44 TuningDifference::getName() const
Chris@0 45 {
Chris@1 46 return "Tuning Difference";
Chris@0 47 }
Chris@0 48
Chris@0 49 string
Chris@0 50 TuningDifference::getDescription() const
Chris@0 51 {
Chris@0 52 // Return something helpful here!
Chris@0 53 return "";
Chris@0 54 }
Chris@0 55
Chris@0 56 string
Chris@0 57 TuningDifference::getMaker() const
Chris@0 58 {
Chris@0 59 // Your name here
Chris@0 60 return "";
Chris@0 61 }
Chris@0 62
Chris@0 63 int
Chris@0 64 TuningDifference::getPluginVersion() const
Chris@0 65 {
Chris@0 66 // Increment this each time you release a version that behaves
Chris@0 67 // differently from the previous one
Chris@0 68 return 1;
Chris@0 69 }
Chris@0 70
Chris@0 71 string
Chris@0 72 TuningDifference::getCopyright() const
Chris@0 73 {
Chris@0 74 // This function is not ideally named. It does not necessarily
Chris@0 75 // need to say who made the plugin -- getMaker does that -- but it
Chris@0 76 // should indicate the terms under which it is distributed. For
Chris@0 77 // example, "Copyright (year). All Rights Reserved", or "GPL"
Chris@0 78 return "";
Chris@0 79 }
Chris@0 80
Chris@0 81 TuningDifference::InputDomain
Chris@0 82 TuningDifference::getInputDomain() const
Chris@0 83 {
Chris@13 84 return TimeDomain;
Chris@0 85 }
Chris@0 86
Chris@0 87 size_t
Chris@0 88 TuningDifference::getPreferredBlockSize() const
Chris@0 89 {
Chris@13 90 return 0;
Chris@0 91 }
Chris@0 92
Chris@0 93 size_t
Chris@0 94 TuningDifference::getPreferredStepSize() const
Chris@0 95 {
Chris@1 96 return 0;
Chris@0 97 }
Chris@0 98
Chris@0 99 size_t
Chris@0 100 TuningDifference::getMinChannelCount() const
Chris@0 101 {
Chris@1 102 return 2;
Chris@0 103 }
Chris@0 104
Chris@0 105 size_t
Chris@0 106 TuningDifference::getMaxChannelCount() const
Chris@0 107 {
Chris@1 108 return 2;
Chris@0 109 }
Chris@0 110
Chris@0 111 TuningDifference::ParameterList
Chris@0 112 TuningDifference::getParameterDescriptors() const
Chris@0 113 {
Chris@0 114 ParameterList list;
Chris@0 115 return list;
Chris@0 116 }
Chris@0 117
Chris@0 118 float
Chris@1 119 TuningDifference::getParameter(string) const
Chris@0 120 {
Chris@0 121 return 0;
Chris@0 122 }
Chris@0 123
Chris@0 124 void
Chris@1 125 TuningDifference::setParameter(string, float)
Chris@0 126 {
Chris@0 127 }
Chris@0 128
Chris@0 129 TuningDifference::ProgramList
Chris@0 130 TuningDifference::getPrograms() const
Chris@0 131 {
Chris@0 132 ProgramList list;
Chris@0 133 return list;
Chris@0 134 }
Chris@0 135
Chris@0 136 string
Chris@0 137 TuningDifference::getCurrentProgram() const
Chris@0 138 {
Chris@0 139 return ""; // no programs
Chris@0 140 }
Chris@0 141
Chris@0 142 void
Chris@1 143 TuningDifference::selectProgram(string)
Chris@0 144 {
Chris@0 145 }
Chris@0 146
Chris@0 147 TuningDifference::OutputList
Chris@0 148 TuningDifference::getOutputDescriptors() const
Chris@0 149 {
Chris@0 150 OutputList list;
Chris@0 151
Chris@1 152 OutputDescriptor d;
Chris@1 153 d.identifier = "cents";
Chris@1 154 d.name = "Tuning Difference";
Chris@1 155 d.description = "Difference in averaged frequency profile between channels 1 and 2, in cents. A positive value means channel 2 is higher.";
Chris@1 156 d.unit = "cents";
Chris@1 157 d.hasFixedBinCount = true;
Chris@1 158 d.binCount = 1;
Chris@1 159 d.hasKnownExtents = false;
Chris@1 160 d.isQuantized = false;
Chris@1 161 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@1 162 d.hasDuration = false;
Chris@13 163 m_outputs[d.identifier] = list.size();
Chris@1 164 list.push_back(d);
Chris@0 165
Chris@1 166 d.identifier = "tuningfreq";
Chris@1 167 d.name = "Relative Tuning Frequency";
Chris@1 168 d.description = "Tuning frequency of channel 2, if channel 1 is assumed to contain the same music as it at a tuning frequency of A=440Hz.";
Chris@4 169 d.unit = "hz";
Chris@1 170 d.hasFixedBinCount = true;
Chris@1 171 d.binCount = 1;
Chris@1 172 d.hasKnownExtents = false;
Chris@1 173 d.isQuantized = false;
Chris@1 174 d.sampleType = OutputDescriptor::VariableSampleRate;
Chris@1 175 d.hasDuration = false;
Chris@13 176 m_outputs[d.identifier] = list.size();
Chris@1 177 list.push_back(d);
Chris@1 178
Chris@13 179 d.identifier = "reffeature";
Chris@13 180 d.name = "Reference Feature";
Chris@13 181 d.description = "Chroma feature from reference audio.";
Chris@0 182 d.unit = "";
Chris@0 183 d.hasFixedBinCount = true;
Chris@13 184 d.binCount = m_bpo;
Chris@4 185 d.hasKnownExtents = false;
Chris@4 186 d.isQuantized = false;
Chris@4 187 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@5 188 d.sampleRate = 1;
Chris@4 189 d.hasDuration = false;
Chris@13 190 m_outputs[d.identifier] = list.size();
Chris@13 191 list.push_back(d);
Chris@13 192
Chris@13 193 d.identifier = "otherfeature";
Chris@13 194 d.name = "Other Feature";
Chris@13 195 d.description = "Chroma feature from other audio, before rotation.";
Chris@13 196 d.unit = "";
Chris@13 197 d.hasFixedBinCount = true;
Chris@13 198 d.binCount = m_bpo;
Chris@13 199 d.hasKnownExtents = false;
Chris@13 200 d.isQuantized = false;
Chris@13 201 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@13 202 d.sampleRate = 1;
Chris@13 203 d.hasDuration = false;
Chris@13 204 m_outputs[d.identifier] = list.size();
Chris@13 205 list.push_back(d);
Chris@13 206
Chris@13 207 d.identifier = "rotfeature";
Chris@13 208 d.name = "Other Feature at Rotated Frequency";
Chris@13 209 d.description = "Chroma feature from reference audio calculated with the tuning frequency obtained from rotation matching.";
Chris@13 210 d.unit = "";
Chris@13 211 d.hasFixedBinCount = true;
Chris@13 212 d.binCount = m_bpo;
Chris@13 213 d.hasKnownExtents = false;
Chris@13 214 d.isQuantized = false;
Chris@13 215 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@13 216 d.sampleRate = 1;
Chris@13 217 d.hasDuration = false;
Chris@13 218 m_outputs[d.identifier] = list.size();
Chris@4 219 list.push_back(d);
Chris@4 220
Chris@0 221 return list;
Chris@0 222 }
Chris@0 223
Chris@0 224 bool
Chris@0 225 TuningDifference::initialise(size_t channels, size_t stepSize, size_t blockSize)
Chris@0 226 {
Chris@0 227 if (channels < getMinChannelCount() ||
Chris@0 228 channels > getMaxChannelCount()) return false;
Chris@0 229
Chris@13 230 if (stepSize != blockSize) return false;
Chris@0 231
Chris@1 232 m_blockSize = blockSize;
Chris@1 233
Chris@1 234 reset();
Chris@1 235
Chris@0 236 return true;
Chris@0 237 }
Chris@0 238
Chris@0 239 void
Chris@0 240 TuningDifference::reset()
Chris@0 241 {
Chris@13 242 if (m_frameCount > 0) {
Chris@15 243 m_refChroma.reset(new Chromagram(paramsForTuningFrequency(440.)));
Chris@13 244 m_frameCount = 0;
Chris@13 245 }
Chris@15 246 m_refTotals = TFeature(m_bpo, 0.0);
Chris@13 247 m_other.clear();
Chris@13 248 }
Chris@13 249
Chris@13 250 template<typename T>
Chris@13 251 void addTo(vector<T> &a, const vector<T> &b)
Chris@13 252 {
Chris@13 253 transform(a.begin(), a.end(), b.begin(), a.begin(), plus<T>());
Chris@13 254 }
Chris@13 255
Chris@13 256 template<typename T>
Chris@13 257 T distance(const vector<T> &a, const vector<T> &b)
Chris@13 258 {
Chris@13 259 return inner_product(a.begin(), a.end(), b.begin(), T(),
Chris@13 260 plus<T>(), [](T x, T y) { return fabs(x - y); });
Chris@13 261 }
Chris@13 262
Chris@13 263 TuningDifference::TFeature
Chris@15 264 TuningDifference::computeFeatureFromTotals(const TFeature &totals) const
Chris@13 265 {
Chris@13 266 TFeature feature(m_bpo);
Chris@13 267 double sum = 0.0;
Chris@13 268
Chris@15 269 for (int i = 0; i < m_bpo; ++i) {
Chris@13 270 double value = totals[i] / m_frameCount;
Chris@15 271 feature[i] += value;
Chris@13 272 sum += value;
Chris@13 273 }
Chris@13 274
Chris@13 275 for (int i = 0; i < m_bpo; ++i) {
Chris@13 276 feature[i] /= sum;
Chris@13 277 }
Chris@13 278
Chris@13 279 cerr << "computeFeatureFromTotals: feature values:" << endl;
Chris@13 280 for (auto v: feature) cerr << v << " ";
Chris@13 281 cerr << endl;
Chris@13 282
Chris@13 283 return feature;
Chris@13 284 }
Chris@13 285
Chris@15 286 Chromagram::Parameters
Chris@13 287 TuningDifference::paramsForTuningFrequency(double hz) const
Chris@13 288 {
Chris@15 289 Chromagram::Parameters params(m_inputSampleRate);
Chris@15 290 params.lowestOctave = 0;
Chris@15 291 params.octaves = 6;
Chris@15 292 params.bpo = m_bpo;
Chris@15 293 params.tuningFrequency = hz;
Chris@15 294 return params;
Chris@13 295 }
Chris@13 296
Chris@13 297 TuningDifference::TFeature
Chris@13 298 TuningDifference::computeFeatureFromSignal(const Signal &signal, double hz) const
Chris@13 299 {
Chris@15 300 Chromagram chromagram(paramsForTuningFrequency(hz));
Chris@13 301
Chris@15 302 TFeature totals(m_bpo, 0.0);
Chris@13 303
Chris@13 304 for (int i = 0; i < m_frameCount; ++i) {
Chris@13 305 Signal::const_iterator first = signal.begin() + i * m_blockSize;
Chris@13 306 Signal::const_iterator last = first + m_blockSize;
Chris@13 307 if (last > signal.end()) last = signal.end();
Chris@15 308 CQBase::RealSequence input(first, last);
Chris@13 309 input.resize(m_blockSize);
Chris@15 310 CQBase::RealBlock block = chromagram.process(input);
Chris@13 311 for (const auto &v: block) addTo(totals, v);
Chris@13 312 }
Chris@13 313
Chris@13 314 return computeFeatureFromTotals(totals);
Chris@0 315 }
Chris@0 316
Chris@0 317 TuningDifference::FeatureSet
Chris@13 318 TuningDifference::process(const float *const *inputBuffers, Vamp::RealTime)
Chris@0 319 {
Chris@15 320 CQBase::RealBlock block;
Chris@15 321 CQBase::RealSequence input;
Chris@13 322
Chris@15 323 input = CQBase::RealSequence
Chris@13 324 (inputBuffers[0], inputBuffers[0] + m_blockSize);
Chris@15 325 block = m_refChroma->process(input);
Chris@13 326 for (const auto &v: block) addTo(m_refTotals, v);
Chris@13 327
Chris@13 328 m_other.insert(m_other.end(),
Chris@13 329 inputBuffers[1], inputBuffers[1] + m_blockSize);
Chris@1 330
Chris@1 331 ++m_frameCount;
Chris@0 332 return FeatureSet();
Chris@0 333 }
Chris@0 334
Chris@13 335 double
Chris@13 336 TuningDifference::featureDistance(const TFeature &other, int rotation) const
Chris@13 337 {
Chris@13 338 if (rotation == 0) {
Chris@13 339 return distance(m_refFeature, other);
Chris@13 340 } else {
Chris@15 341 // A positive rotation pushes the tuning frequency up for this
Chris@15 342 // chroma, negative one pulls it down. If a positive rotation
Chris@15 343 // makes this chroma match an un-rotated reference, then this
Chris@15 344 // chroma must have initially been lower than the reference.
Chris@13 345 TFeature r(other);
Chris@15 346 if (rotation < 0) {
Chris@15 347 rotate(r.begin(), r.begin() - rotation, r.end());
Chris@13 348 } else {
Chris@15 349 rotate(r.begin(), r.end() - rotation, r.end());
Chris@13 350 }
Chris@13 351 return distance(m_refFeature, r);
Chris@13 352 }
Chris@13 353 }
Chris@13 354
Chris@13 355 int
Chris@13 356 TuningDifference::findBestRotation(const TFeature &other) const
Chris@13 357 {
Chris@13 358 map<double, int> dists;
Chris@13 359
Chris@13 360 int maxSemis = 6;
Chris@13 361 int maxRotation = (m_bpo * maxSemis) / 12;
Chris@13 362
Chris@13 363 for (int r = -maxRotation; r <= maxRotation; ++r) {
Chris@13 364 double dist = featureDistance(other, r);
Chris@13 365 dists[dist] = r;
Chris@13 366 cerr << "rotation " << r << ": score " << dist << endl;
Chris@13 367 }
Chris@13 368
Chris@13 369 int best = dists.begin()->second;
Chris@13 370
Chris@13 371 cerr << "best is " << best << endl;
Chris@13 372 return best;
Chris@13 373 }
Chris@13 374
Chris@0 375 TuningDifference::FeatureSet
Chris@0 376 TuningDifference::getRemainingFeatures()
Chris@0 377 {
Chris@13 378 FeatureSet fs;
Chris@13 379 if (m_frameCount == 0) return fs;
Chris@13 380
Chris@13 381 m_refFeature = computeFeatureFromTotals(m_refTotals);
Chris@13 382 TFeature otherFeature = computeFeatureFromSignal(m_other, 440.);
Chris@1 383
Chris@1 384 Feature f;
Chris@1 385
Chris@4 386 f.values.clear();
Chris@13 387 for (auto v: m_refFeature) f.values.push_back(v);
Chris@13 388 fs[m_outputs["reffeature"]].push_back(f);
Chris@4 389
Chris@4 390 f.values.clear();
Chris@13 391 for (auto v: otherFeature) f.values.push_back(v);
Chris@13 392 fs[m_outputs["otherfeature"]].push_back(f);
Chris@13 393
Chris@13 394 int rotation = findBestRotation(otherFeature);
Chris@13 395
Chris@13 396 int coarseCents = -(rotation * 100) / (m_bpo / 12);
Chris@13 397
Chris@13 398 cerr << "rotation " << rotation << " -> cents " << coarseCents << endl;
Chris@13 399
Chris@13 400 double coarseHz = frequencyForCentsAbove440(coarseCents);
Chris@13 401
Chris@13 402 TFeature coarseFeature = computeFeatureFromSignal(m_other, coarseHz);
Chris@13 403 double coarseScore = featureDistance(coarseFeature);
Chris@13 404
Chris@13 405 cerr << "corresponding Hz " << coarseHz << " scores " << coarseScore << endl;
Chris@4 406
Chris@4 407 f.values.clear();
Chris@13 408 for (auto v: coarseFeature) f.values.push_back(v);
Chris@13 409 fs[m_outputs["rotfeature"]].push_back(f);
Chris@4 410
Chris@1 411 return fs;
Chris@0 412 }
Chris@0 413