Mercurial > hg > tuning-difference
view src/TuningDifference.cpp @ 46:1623751c4549
Switch from 60bpo to 120 - seems to work better (?) with little performance downside. Also avoid recalculating second feature if not needed for fine tuning, and extend regression test
author | Chris Cannam |
---|---|
date | Tue, 09 Jul 2019 15:09:33 +0100 |
parents | dcfae9ef87de |
children | f28b34e7ce8d |
line wrap: on
line source
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ /* Centre for Digital Music, Queen Mary University of London. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. See the file COPYING included with this distribution for more information. */ #include "TuningDifference.h" #include <iostream> #include <cmath> #include <cstdio> #include <climits> #include <algorithm> #include <numeric> using namespace std; static double pitchToFrequency(int pitch, double centsOffset = 0., double concertA = 440.) { double p = double(pitch) + (centsOffset / 100.); return concertA * pow(2.0, (p - 69.0) / 12.0); } static double frequencyForCentsAbove440(double cents) { return pitchToFrequency(69, cents, 440.); } static float defaultMaxDuration = 0.f; static int defaultMaxSemis = 4; static bool defaultFineTuning = true; TuningDifference::TuningDifference(float inputSampleRate) : Plugin(inputSampleRate), m_bpo(120), m_refChroma(new Chromagram(paramsForTuningFrequency(440.))), m_blockSize(0), m_frameCount(0), m_maxDuration(defaultMaxDuration), m_maxSemis(defaultMaxSemis), m_fineTuning(defaultFineTuning) { } TuningDifference::~TuningDifference() { } string TuningDifference::getIdentifier() const { return "tuning-difference"; } string TuningDifference::getName() const { return "Tuning Difference"; } string TuningDifference::getDescription() const { return "Estimate the tuning frequency of a recording, by comparing it to another recording of the same music whose tuning frequency is known"; } string TuningDifference::getMaker() const { // Your name here return "Chris Cannam"; } int TuningDifference::getPluginVersion() const { // Increment this each time you release a version that behaves // differently from the previous one return 3; } string TuningDifference::getCopyright() const { // This function is not ideally named. It does not necessarily // need to say who made the plugin -- getMaker does that -- but it // should indicate the terms under which it is distributed. For // example, "Copyright (year). All Rights Reserved", or "GPL" return "GPL"; } TuningDifference::InputDomain TuningDifference::getInputDomain() const { return TimeDomain; } size_t TuningDifference::getPreferredBlockSize() const { return 0; } size_t TuningDifference::getPreferredStepSize() const { return 0; } size_t TuningDifference::getMinChannelCount() const { return 2; } size_t TuningDifference::getMaxChannelCount() const { return 2; } TuningDifference::ParameterList TuningDifference::getParameterDescriptors() const { ParameterList list; ParameterDescriptor desc; desc.identifier = "maxduration"; desc.name = "Maximum duration to analyse"; desc.description = "The maximum duration (in seconds) to consider from either input file, always taken from the start of the input. Zero means there is no limit."; desc.minValue = 0; desc.maxValue = 3600; desc.defaultValue = defaultMaxDuration; desc.isQuantized = false; desc.unit = "s"; list.push_back(desc); desc.identifier = "maxrange"; desc.name = "Maximum range in semitones"; desc.description = "The maximum difference in semitones that will be searched."; desc.minValue = 1; desc.maxValue = 11; desc.defaultValue = defaultMaxSemis; desc.isQuantized = true; desc.quantizeStep = 1; desc.unit = "semitones"; list.push_back(desc); desc.identifier = "finetuning"; desc.name = "Fine tuning"; desc.description = "Use a fine tuning stage to increase nominal resolution from 10 cents to 1 cent."; desc.minValue = 0; desc.maxValue = 1; desc.defaultValue = (defaultFineTuning ? 1.f : 0.f); desc.isQuantized = true; desc.quantizeStep = 1; desc.unit = ""; list.push_back(desc); return list; } float TuningDifference::getParameter(string id) const { if (id == "maxduration") { return m_maxDuration; } else if (id == "maxrange") { return float(m_maxSemis); } else if (id == "finetuning") { return m_fineTuning ? 1.f : 0.f; } return 0; } void TuningDifference::setParameter(string id, float value) { if (id == "maxduration") { m_maxDuration = value; } else if (id == "maxrange") { m_maxSemis = int(roundf(value)); } else if (id == "finetuning") { m_fineTuning = (value > 0.5f); } } TuningDifference::ProgramList TuningDifference::getPrograms() const { ProgramList list; return list; } string TuningDifference::getCurrentProgram() const { return ""; // no programs } void TuningDifference::selectProgram(string) { } TuningDifference::OutputList TuningDifference::getOutputDescriptors() const { OutputList list; OutputDescriptor d; d.identifier = "cents"; d.name = "Tuning Difference"; d.description = "Difference in averaged frequency profile between channels 1 and 2, in cents. A positive value means channel 2 is higher."; d.unit = "cents"; d.hasFixedBinCount = true; d.binCount = 1; d.hasKnownExtents = false; d.isQuantized = false; d.sampleType = OutputDescriptor::VariableSampleRate; d.hasDuration = false; m_outputs[d.identifier] = int(list.size()); list.push_back(d); d.identifier = "tuningfreq"; d.name = "Relative Tuning Frequency"; d.description = "Tuning frequency of channel 2, if channel 1 is assumed to contain the same music as it at a tuning frequency of A=440Hz."; d.unit = "hz"; d.hasFixedBinCount = true; d.binCount = 1; d.hasKnownExtents = false; d.isQuantized = false; d.sampleType = OutputDescriptor::VariableSampleRate; d.hasDuration = false; m_outputs[d.identifier] = int(list.size()); list.push_back(d); d.identifier = "reffeature"; d.name = "Reference Feature"; d.description = "Chroma feature from reference audio."; d.unit = ""; d.hasFixedBinCount = true; d.binCount = m_bpo; d.hasKnownExtents = false; d.isQuantized = false; d.sampleType = OutputDescriptor::FixedSampleRate; d.sampleRate = 1; d.hasDuration = false; m_outputs[d.identifier] = int(list.size()); list.push_back(d); d.identifier = "otherfeature"; d.name = "Other Feature"; d.description = "Chroma feature from other audio, before rotation."; d.unit = ""; d.hasFixedBinCount = true; d.binCount = m_bpo; d.hasKnownExtents = false; d.isQuantized = false; d.sampleType = OutputDescriptor::FixedSampleRate; d.sampleRate = 1; d.hasDuration = false; m_outputs[d.identifier] = int(list.size()); list.push_back(d); d.identifier = "rotfeature"; d.name = "Other Feature at Rotated Frequency"; d.description = "Chroma feature from reference audio calculated with the tuning frequency obtained from rotation matching."; d.unit = ""; d.hasFixedBinCount = true; d.binCount = m_bpo; d.hasKnownExtents = false; d.isQuantized = false; d.sampleType = OutputDescriptor::FixedSampleRate; d.sampleRate = 1; d.hasDuration = false; m_outputs[d.identifier] = int(list.size()); list.push_back(d); return list; } bool TuningDifference::initialise(size_t channels, size_t stepSize, size_t blockSize) { if (channels < getMinChannelCount() || channels > getMaxChannelCount()) return false; if (stepSize != blockSize) return false; if (m_blockSize > INT_MAX) return false; m_blockSize = int(blockSize); reset(); return true; } void TuningDifference::reset() { if (m_frameCount > 0) { m_refChroma.reset(new Chromagram(paramsForTuningFrequency(440.))); m_frameCount = 0; } m_refTotals = TFeature(m_bpo, 0.0); m_other.clear(); } template<typename T> void addTo(vector<T> &a, const vector<T> &b) { transform(a.begin(), a.end(), b.begin(), a.begin(), plus<T>()); } template<typename T> T distance(const vector<T> &a, const vector<T> &b) { return inner_product(a.begin(), a.end(), b.begin(), T(), plus<T>(), [](T x, T y) { return fabs(x - y); }); } TuningDifference::TFeature TuningDifference::computeFeatureFromTotals(const TFeature &totals) const { if (m_frameCount == 0) return totals; TFeature feature(m_bpo); double sum = 0.0; for (int i = 0; i < m_bpo; ++i) { double value = totals[i] / m_frameCount; feature[i] += value; sum += value; } for (int i = 0; i < m_bpo; ++i) { feature[i] /= sum; } // cerr << "computeFeatureFromTotals: feature values:" << endl; // for (auto v: feature) cerr << v << " "; // cerr << endl; return feature; } Chromagram::Parameters TuningDifference::paramsForTuningFrequency(double hz) const { Chromagram::Parameters params(m_inputSampleRate); params.lowestOctave = 2; params.octaveCount = 4; params.binsPerOctave = m_bpo; params.tuningFrequency = hz; params.atomHopFactor = 0.5; params.window = CQParameters::Hann; return params; } TuningDifference::TFeature TuningDifference::computeFeatureFromSignal(const Signal &signal, double hz) const { Chromagram chromagram(paramsForTuningFrequency(hz)); TFeature totals(m_bpo, 0.0); cerr << "computeFeatureFromSignal: hz = " << hz << ", frame count = " << m_frameCount << endl; for (int i = 0; i < m_frameCount; ++i) { Signal::const_iterator first = signal.begin() + i * m_blockSize; Signal::const_iterator last = first + m_blockSize; if (last > signal.end()) last = signal.end(); CQBase::RealSequence input(first, last); input.resize(m_blockSize); CQBase::RealBlock block = chromagram.process(input); for (const auto &v: block) addTo(totals, v); } return computeFeatureFromTotals(totals); } TuningDifference::FeatureSet TuningDifference::process(const float *const *inputBuffers, Vamp::RealTime) { if (m_maxDuration > 0) { int maxFrames = int((m_maxDuration * m_inputSampleRate) / float(m_blockSize)); if (m_frameCount > maxFrames) return FeatureSet(); } CQBase::RealBlock block; CQBase::RealSequence input; input = CQBase::RealSequence (inputBuffers[0], inputBuffers[0] + m_blockSize); block = m_refChroma->process(input); for (const auto &v: block) addTo(m_refTotals, v); m_other.insert(m_other.end(), inputBuffers[1], inputBuffers[1] + m_blockSize); ++m_frameCount; return FeatureSet(); } void TuningDifference::rotateFeature(TFeature &r, int rotation) const { if (rotation < 0) { rotate(r.begin(), r.begin() - rotation, r.end()); } else { rotate(r.begin(), r.end() - rotation, r.end()); } } double TuningDifference::featureDistance(const TFeature &other, int rotation) const { if (rotation == 0) { return distance(m_refFeature, other); } else { // A positive rotation pushes the tuning frequency up for this // chroma, negative one pulls it down. If a positive rotation // makes this chroma match an un-rotated reference, then this // chroma must have initially been lower than the reference. TFeature r(other); rotateFeature(r, rotation); return distance(m_refFeature, r); } } int TuningDifference::findBestRotation(const TFeature &other) const { map<double, int> dists; int maxRotation = (m_bpo * m_maxSemis) / 12; for (int r = -maxRotation; r <= maxRotation; ++r) { double dist = featureDistance(other, r); dists[dist] = r; // cerr << "rotation " << r << ": score " << dist << endl; } int best = dists.begin()->second; // cerr << "best is " << best << endl; return best; } pair<int, double> TuningDifference::findFineFrequency(int coarseCents) { int coarseResolution = 1200 / m_bpo; int searchDistance = coarseResolution/2 - 1; int bestCents = coarseCents; double bestHz = frequencyForCentsAbove440(coarseCents); if (!m_fineTuning) { cerr << "fine tuning disabled, returning coarse Hz " << bestHz << " and cents " << bestCents << " in lieu of fine ones" << endl; return pair<int, double>(bestCents, bestHz); } //!!! This is kind of absurd - all this brute force but all we're //!!! really doing is aligning two very short signals at //!!! sub-sample level - let's rewrite it someday cerr << "findFineFrequency: coarse frequency is " << bestHz << endl; cerr << "searchDistance = " << searchDistance << endl; double bestScore = 0; bool firstScore = true; for (int sign = -1; sign <= 1; sign += 2) { for (int offset = (sign < 0 ? 0 : 1); offset <= searchDistance; ++offset) { int fineCents = coarseCents + sign * offset; cerr << "trying with fineCents = " << fineCents << "..." << endl; double fineHz = frequencyForCentsAbove440(fineCents); TFeature fineFeature = computeFeatureFromSignal(m_other, fineHz); double fineScore = featureDistance(fineFeature); cerr << "fine offset = " << offset << ", cents = " << fineCents << ", Hz = " << fineHz << ", score " << fineScore << " (best score so far " << bestScore << ")" << endl; if ((fineScore < bestScore) || firstScore) { cerr << "is good!" << endl; bestScore = fineScore; bestCents = fineCents; bestHz = fineHz; firstScore = false; } else { break; } } } //!!! could keep a vector of scores & then interpolate... return pair<int, double>(bestCents, bestHz); } TuningDifference::FeatureSet TuningDifference::getRemainingFeatures() { FeatureSet fs; if (m_frameCount == 0) return fs; m_refFeature = computeFeatureFromTotals(m_refTotals); TFeature otherFeature = computeFeatureFromSignal(m_other, 440.); Feature f; f.hasTimestamp = true; f.timestamp = Vamp::RealTime::zeroTime; f.values.clear(); for (auto v: m_refFeature) f.values.push_back(float(v)); fs[m_outputs["reffeature"]].push_back(f); f.values.clear(); for (auto v: otherFeature) f.values.push_back(float(v)); fs[m_outputs["otherfeature"]].push_back(f); int rotation = findBestRotation(otherFeature); int coarseCents = -(rotation * 1200) / m_bpo; cerr << "rotation " << rotation << " -> cents " << coarseCents << endl; TFeature coarseFeature = otherFeature; if (rotation != 0) { rotateFeature(coarseFeature, rotation); } //!!! This should be returning the fine chroma, not the coarse f.values.clear(); for (auto v: coarseFeature) f.values.push_back(float(v)); fs[m_outputs["rotfeature"]].push_back(f); pair<int, double> fine = findFineFrequency(coarseCents); int fineCents = fine.first; double fineHz = fine.second; f.values.clear(); f.values.push_back(float(fineHz)); fs[m_outputs["tuningfreq"]].push_back(f); f.values.clear(); f.values.push_back(float(fineCents)); fs[m_outputs["cents"]].push_back(f); cerr << "overall best Hz = " << fineHz << endl; return fs; }