Mercurial > hg > tuning-difference
changeset 47:f28b34e7ce8d
Add BulkTuningDifference plugin
author | Chris Cannam |
---|---|
date | Tue, 09 Jul 2019 16:14:19 +0100 |
parents | 1623751c4549 |
children | 3b1b966d8f49 |
files | .travis.yml Makefile.inc src/BulkTuningDifference.cpp src/BulkTuningDifference.h src/TuningDifference.cpp src/TuningDifference.h src/plugins.cpp |
diffstat | 7 files changed, 777 insertions(+), 377 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.travis.yml Tue Jul 09 16:14:19 2019 +0100 @@ -0,0 +1,29 @@ +dist: + - xenial + +language: + - cpp + +sudo: + - false + +os: + - linux + +addons: + apt: + packages: + - wget + - rubberband + +before_install: + - ( cd ../ ; hg clone https://code.soundsoftware.ac.uk/hg/vamp-plugin-sdk ) + - ( cd ../ ; hg clone https://code.soundsoftware.ac.uk/hg/vamp-plugin-tester ) + - ( cd ../vamp-plugin-sdk ; ./configure && make && sudo make install ) + - ( cd ../vamp-plugin-tester ; ./repoint install && make ) + - wget https://code.soundsoftware.ac.uk/attachments/download/2250/sonic-annotator_1.5_amd64.deb + - sudo apt install -y ./sonic-annotator_1.5_amd64.deb + +script: + - make -f Makefile.linux test + - VAMP_PATH=. ../vamp-plugin-tester/vamp-plugin-tester -a
--- a/Makefile.inc Tue Jul 09 15:09:33 2019 +0100 +++ b/Makefile.inc Tue Jul 09 16:14:19 2019 +0100 @@ -5,11 +5,11 @@ # Edit this to list the .cpp or .c files in your plugin project # -PLUGIN_SOURCES := src/TuningDifference.cpp src/plugins.cpp +PLUGIN_SOURCES := src/TuningDifference.cpp src/BulkTuningDifference.cpp src/plugins.cpp # Edit this to list the .h files in your plugin project # -PLUGIN_HEADERS := src/TuningDifference.h +PLUGIN_HEADERS := src/TuningDifference.h src/BulkTuningDifference.h ## Normally you should not edit anything below this line @@ -44,6 +44,9 @@ $(PLUGIN_OBJECTS): $(PLUGIN_HEADERS) +test: $(PLUGIN) + bash test/regression.sh + clean: rm -f $(PLUGIN_OBJECTS) $(MAKE) -C constant-q-cpp -f Makefile$(MAKEFILE_EXT) clean
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/BulkTuningDifference.cpp Tue Jul 09 16:14:19 2019 +0100 @@ -0,0 +1,601 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + Centre for Digital Music, Queen Mary University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#include "BulkTuningDifference.h" + +#include <iostream> + +#include <cmath> +#include <cstdio> +#include <climits> + +#include <algorithm> +#include <numeric> + +using namespace std; + +static double pitchToFrequency(int pitch, + double centsOffset = 0., + double concertA = 440.) +{ + double p = double(pitch) + (centsOffset / 100.); + return concertA * pow(2.0, (p - 69.0) / 12.0); +} + +static double frequencyForCentsAbove440(double cents) +{ + return pitchToFrequency(69, cents, 440.); +} + +static float defaultMaxDuration = 0.f; +static int defaultMaxSemis = 4; +static bool defaultFineTuning = true; + +BulkTuningDifference::BulkTuningDifference(float inputSampleRate) : + Plugin(inputSampleRate), + m_channelCount(0), + m_bpo(120), + m_refChroma(new Chromagram(paramsForTuningFrequency(440.))), + m_blockSize(0), + m_frameCount(0), + m_maxDuration(defaultMaxDuration), + m_maxSemis(defaultMaxSemis), + m_fineTuning(defaultFineTuning) +{ +} + +BulkTuningDifference::~BulkTuningDifference() +{ +} + +string +BulkTuningDifference::getIdentifier() const +{ + return "bulk-tuning-difference"; +} + +string +BulkTuningDifference::getName() const +{ + return "Bulk Tuning Difference"; +} + +string +BulkTuningDifference::getDescription() const +{ + return "Estimate the tuning frequencies of a set of recordings at once, by comparing them to a reference recording of the same music whose tuning frequency is known"; +} + +string +BulkTuningDifference::getMaker() const +{ + return "Chris Cannam"; +} + +int +BulkTuningDifference::getPluginVersion() const +{ + // Increment this each time you release a version that behaves + // differently from the previous one + return 3; +} + +string +BulkTuningDifference::getCopyright() const +{ + // This function is not ideally named. It does not necessarily + // need to say who made the plugin -- getMaker does that -- but it + // should indicate the terms under which it is distributed. For + // example, "Copyright (year). All Rights Reserved", or "GPL" + return "GPL"; +} + +BulkTuningDifference::InputDomain +BulkTuningDifference::getInputDomain() const +{ + return TimeDomain; +} + +size_t +BulkTuningDifference::getPreferredBlockSize() const +{ + return 0; +} + +size_t +BulkTuningDifference::getPreferredStepSize() const +{ + return 0; +} + +size_t +BulkTuningDifference::getMinChannelCount() const +{ + return 2; +} + +size_t +BulkTuningDifference::getMaxChannelCount() const +{ + return 256; +} + +BulkTuningDifference::ParameterList +BulkTuningDifference::getParameterDescriptors() const +{ + ParameterList list; + + ParameterDescriptor desc; + + desc.identifier = "maxduration"; + desc.name = "Maximum duration to analyse"; + desc.description = "The maximum duration (in seconds) to consider from either input file, always taken from the start of the input. Zero means there is no limit."; + desc.minValue = 0; + desc.maxValue = 3600; + desc.defaultValue = defaultMaxDuration; + desc.isQuantized = false; + desc.unit = "s"; + list.push_back(desc); + + desc.identifier = "maxrange"; + desc.name = "Maximum range in semitones"; + desc.description = "The maximum difference in semitones that will be searched."; + desc.minValue = 1; + desc.maxValue = 11; + desc.defaultValue = defaultMaxSemis; + desc.isQuantized = true; + desc.quantizeStep = 1; + desc.unit = "semitones"; + list.push_back(desc); + + desc.identifier = "finetuning"; + desc.name = "Fine tuning"; + desc.description = "Use a fine tuning stage to increase nominal resolution from 10 cents to 1 cent."; + desc.minValue = 0; + desc.maxValue = 1; + desc.defaultValue = (defaultFineTuning ? 1.f : 0.f); + desc.isQuantized = true; + desc.quantizeStep = 1; + desc.unit = ""; + list.push_back(desc); + + return list; +} + +float +BulkTuningDifference::getParameter(string id) const +{ + if (id == "maxduration") { + return m_maxDuration; + } else if (id == "maxrange") { + return float(m_maxSemis); + } else if (id == "finetuning") { + return m_fineTuning ? 1.f : 0.f; + } + return 0; +} + +void +BulkTuningDifference::setParameter(string id, float value) +{ + if (id == "maxduration") { + m_maxDuration = value; + } else if (id == "maxrange") { + m_maxSemis = int(roundf(value)); + } else if (id == "finetuning") { + m_fineTuning = (value > 0.5f); + } +} + +BulkTuningDifference::ProgramList +BulkTuningDifference::getPrograms() const +{ + ProgramList list; + return list; +} + +string +BulkTuningDifference::getCurrentProgram() const +{ + return ""; // no programs +} + +void +BulkTuningDifference::selectProgram(string) +{ +} + +BulkTuningDifference::OutputList +BulkTuningDifference::getOutputDescriptors() const +{ + OutputList list; + + OutputDescriptor d; + d.identifier = "cents"; + d.name = "Tuning Differences"; + d.description = "A single feature vector containing a value for each input channel after the first (reference) channel, containing the difference in averaged frequency profile between that channel and the reference channel, in cents. A positive value means the corresponding channel is higher than the reference."; + d.unit = "cents"; + if (m_channelCount > 1) { + d.hasFixedBinCount = true; + d.binCount = m_channelCount - 1; + } else { + d.hasFixedBinCount = false; + } + d.hasKnownExtents = false; + d.isQuantized = false; + d.sampleType = OutputDescriptor::VariableSampleRate; + d.hasDuration = false; + m_outputs[d.identifier] = int(list.size()); + list.push_back(d); + + d.identifier = "tuningfreq"; + d.name = "Relative Tuning Frequencies"; + d.description = "A single feature vector containing a value for each input channel after the first (reference) channel, containing the tuning frequency of that channel, if the reference channel is assumed to contain the same music as it at a tuning frequency of A=440Hz."; + d.unit = "hz"; + if (m_channelCount > 1) { + d.hasFixedBinCount = true; + d.binCount = m_channelCount - 1; + } else { + d.hasFixedBinCount = false; + } + d.hasKnownExtents = false; + d.isQuantized = false; + d.sampleType = OutputDescriptor::VariableSampleRate; + d.hasDuration = false; + m_outputs[d.identifier] = int(list.size()); + list.push_back(d); + + d.identifier = "reffeature"; + d.name = "Reference Feature"; + d.description = "Chroma feature from reference channel."; + d.unit = ""; + d.hasFixedBinCount = true; + d.binCount = m_bpo; + d.hasKnownExtents = false; + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = 1; + d.hasDuration = false; + m_outputs[d.identifier] = int(list.size()); + list.push_back(d); + + d.identifier = "otherfeature"; + d.name = "Other Features"; + d.description = "Series of chroma feature vectors from the non-reference audio channels, before rotation."; + d.unit = ""; + d.hasFixedBinCount = true; + d.binCount = m_bpo; + d.hasKnownExtents = false; + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = 1; + d.hasDuration = false; + m_outputs[d.identifier] = int(list.size()); + list.push_back(d); + + d.identifier = "rotfeature"; + d.name = "Other Features at Rotated Frequency"; + d.description = "Series of chroma feature vectors from the non-reference audio channels, calculated with the tuning frequency obtained from rotation matching."; + d.unit = ""; + d.hasFixedBinCount = true; + d.binCount = m_bpo; + d.hasKnownExtents = false; + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = 1; + d.hasDuration = false; + m_outputs[d.identifier] = int(list.size()); + list.push_back(d); + + return list; +} + +bool +BulkTuningDifference::initialise(size_t channels, size_t stepSize, size_t blockSize) +{ + if (channels < getMinChannelCount()) return false; + + m_channelCount = channels; + + if (stepSize != blockSize) return false; + if (m_blockSize > INT_MAX) return false; + + m_blockSize = int(blockSize); + + reset(); + + return true; +} + +void +BulkTuningDifference::reset() +{ + if (m_frameCount > 0) { + m_refChroma.reset(new Chromagram(paramsForTuningFrequency(440.))); + m_frameCount = 0; + } + m_refTotals = TFeature(m_bpo, 0.0); + m_others = vector<Signal>(m_channelCount - 1); + +} + +template<typename T> +void addTo(vector<T> &a, const vector<T> &b) +{ + transform(a.begin(), a.end(), b.begin(), a.begin(), plus<T>()); +} + +template<typename T> +T distance(const vector<T> &a, const vector<T> &b) +{ + return inner_product(a.begin(), a.end(), b.begin(), T(), + plus<T>(), [](T x, T y) { return fabs(x - y); }); +} + +BulkTuningDifference::TFeature +BulkTuningDifference::computeFeatureFromTotals(const TFeature &totals) const +{ + if (m_frameCount == 0) return totals; + + TFeature feature(m_bpo); + double sum = 0.0; + + for (int i = 0; i < m_bpo; ++i) { + double value = totals[i] / m_frameCount; + feature[i] += value; + sum += value; + } + + for (int i = 0; i < m_bpo; ++i) { + feature[i] /= sum; + } + +// cerr << "computeFeatureFromTotals: feature values:" << endl; +// for (auto v: feature) cerr << v << " "; +// cerr << endl; + + return feature; +} + +Chromagram::Parameters +BulkTuningDifference::paramsForTuningFrequency(double hz) const +{ + Chromagram::Parameters params(m_inputSampleRate); + params.lowestOctave = 2; + params.octaveCount = 4; + params.binsPerOctave = m_bpo; + params.tuningFrequency = hz; + params.atomHopFactor = 0.5; + params.window = CQParameters::Hann; + return params; +} + +BulkTuningDifference::TFeature +BulkTuningDifference::computeFeatureFromSignal(const Signal &signal, double hz) const +{ + Chromagram chromagram(paramsForTuningFrequency(hz)); + + TFeature totals(m_bpo, 0.0); + + cerr << "computeFeatureFromSignal: hz = " << hz << ", frame count = " << m_frameCount << endl; + + for (int i = 0; i < m_frameCount; ++i) { + Signal::const_iterator first = signal.begin() + i * m_blockSize; + Signal::const_iterator last = first + m_blockSize; + if (last > signal.end()) last = signal.end(); + CQBase::RealSequence input(first, last); + input.resize(m_blockSize); + CQBase::RealBlock block = chromagram.process(input); + for (const auto &v: block) addTo(totals, v); + } + + return computeFeatureFromTotals(totals); +} + +BulkTuningDifference::FeatureSet +BulkTuningDifference::process(const float *const *inputBuffers, Vamp::RealTime) +{ + if (m_maxDuration > 0) { + int maxFrames = int((m_maxDuration * m_inputSampleRate) / + float(m_blockSize)); + if (m_frameCount > maxFrames) return FeatureSet(); + } + + CQBase::RealBlock block; + CQBase::RealSequence input; + + input = CQBase::RealSequence + (inputBuffers[0], inputBuffers[0] + m_blockSize); + block = m_refChroma->process(input); + for (const auto &v: block) addTo(m_refTotals, v); + + for (int c = 1; c < m_channelCount; ++c) { + m_others[c-1].insert(m_others[c-1].end(), + inputBuffers[c], + inputBuffers[c] + m_blockSize); + } + + ++m_frameCount; + return FeatureSet(); +} + +void +BulkTuningDifference::rotateFeature(TFeature &r, int rotation) const +{ + if (rotation < 0) { + rotate(r.begin(), r.begin() - rotation, r.end()); + } else { + rotate(r.begin(), r.end() - rotation, r.end()); + } +} + +double +BulkTuningDifference::featureDistance(const TFeature &other, int rotation) const +{ + if (rotation == 0) { + return distance(m_refFeature, other); + } else { + // A positive rotation pushes the tuning frequency up for this + // chroma, negative one pulls it down. If a positive rotation + // makes this chroma match an un-rotated reference, then this + // chroma must have initially been lower than the reference. + TFeature r(other); + rotateFeature(r, rotation); + return distance(m_refFeature, r); + } +} + +int +BulkTuningDifference::findBestRotation(const TFeature &other) const +{ + map<double, int> dists; + + int maxRotation = (m_bpo * m_maxSemis) / 12; + + for (int r = -maxRotation; r <= maxRotation; ++r) { + double dist = featureDistance(other, r); + dists[dist] = r; +// cerr << "rotation " << r << ": score " << dist << endl; + } + + int best = dists.begin()->second; + +// cerr << "best is " << best << endl; + return best; +} + +pair<int, double> +BulkTuningDifference::findFineFrequency(int channel, int coarseCents) +{ + int coarseResolution = 1200 / m_bpo; + int searchDistance = coarseResolution/2 - 1; + + int bestCents = coarseCents; + double bestHz = frequencyForCentsAbove440(coarseCents); + + if (!m_fineTuning) { + cerr << "fine tuning disabled, returning coarse Hz " << bestHz << " and cents " << bestCents << " in lieu of fine ones" << endl; + return pair<int, double>(bestCents, bestHz); + } + + //!!! This is kind of absurd - all this brute force but all we're + //!!! really doing is aligning two very short signals at + //!!! sub-sample level - let's rewrite it someday + + cerr << "findFineFrequency: coarse frequency is " << bestHz << endl; + cerr << "searchDistance = " << searchDistance << endl; + + double bestScore = 0; + bool firstScore = true; + + for (int sign = -1; sign <= 1; sign += 2) { + for (int offset = (sign < 0 ? 0 : 1); + offset <= searchDistance; + ++offset) { + + int fineCents = coarseCents + sign * offset; + + cerr << "trying with fineCents = " << fineCents << "..." << endl; + + double fineHz = frequencyForCentsAbove440(fineCents); + TFeature fineFeature = computeFeatureFromSignal + (m_others[channel-1], fineHz); + double fineScore = featureDistance(fineFeature); + + cerr << "fine offset = " << offset << ", cents = " << fineCents + << ", Hz = " << fineHz << ", score " << fineScore + << " (best score so far " << bestScore << ")" << endl; + + if ((fineScore < bestScore) || firstScore) { + cerr << "is good!" << endl; + bestScore = fineScore; + bestCents = fineCents; + bestHz = fineHz; + firstScore = false; + } else { + break; + } + } + } + + //!!! could keep a vector of scores & then interpolate... + + return pair<int, double>(bestCents, bestHz); +} + +BulkTuningDifference::FeatureSet +BulkTuningDifference::getRemainingFeatures() +{ + FeatureSet fs; + if (m_frameCount == 0) return fs; + + m_refFeature = computeFeatureFromTotals(m_refTotals); + + Feature f; + f.hasTimestamp = true; + f.timestamp = Vamp::RealTime::zeroTime; + f.values.clear(); + fs[m_outputs["cents"]].push_back(f); + fs[m_outputs["tuningfreq"]].push_back(f); + + for (int c = 1; c < m_channelCount; ++c) { + getRemainingFeaturesForChannel(c, fs); + } + + return fs; +} + +void +BulkTuningDifference::getRemainingFeaturesForChannel(int channel, + FeatureSet &fs) +{ + TFeature otherFeature = computeFeatureFromSignal + (m_others[channel-1], 440.); + + Feature f; + f.hasTimestamp = true; + f.timestamp = Vamp::RealTime::zeroTime; + + f.values.clear(); + for (auto v: m_refFeature) f.values.push_back(float(v)); + fs[m_outputs["reffeature"]].push_back(f); + + f.values.clear(); + for (auto v: otherFeature) f.values.push_back(float(v)); + fs[m_outputs["otherfeature"]].push_back(f); + + int rotation = findBestRotation(otherFeature); + + int coarseCents = -(rotation * 1200) / m_bpo; + + cerr << "channel " << channel << ": rotation " << rotation << " -> cents " << coarseCents << endl; + + TFeature coarseFeature = otherFeature; + if (rotation != 0) { + rotateFeature(coarseFeature, rotation); + } + + //!!! This should be returning the fine chroma, not the coarse + f.values.clear(); + for (auto v: coarseFeature) f.values.push_back(float(v)); + fs[m_outputs["rotfeature"]].push_back(f); + + pair<int, double> fine = findFineFrequency(channel, coarseCents); + int fineCents = fine.first; + double fineHz = fine.second; + + fs[m_outputs["cents"]][0].values.push_back(float(fineCents)); + fs[m_outputs["tuningfreq"]][0].values.push_back(float(fineHz)); + + cerr << "channel " << channel << ": overall best Hz = " << fineHz << endl; +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/BulkTuningDifference.h Tue Jul 09 16:14:19 2019 +0100 @@ -0,0 +1,91 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + Centre for Digital Music, Queen Mary University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#ifndef BULK_TUNING_DIFFERENCE_H +#define BULK_TUNING_DIFFERENCE_H + +#include <vamp-sdk/Plugin.h> + +#include <cq/Chromagram.h> + +#include <memory> + +using std::string; +using std::vector; + +class BulkTuningDifference : public Vamp::Plugin +{ +public: + BulkTuningDifference(float inputSampleRate); + virtual ~BulkTuningDifference(); + + string getIdentifier() const; + string getName() const; + string getDescription() const; + string getMaker() const; + int getPluginVersion() const; + string getCopyright() const; + + InputDomain getInputDomain() const; + size_t getPreferredBlockSize() const; + size_t getPreferredStepSize() const; + size_t getMinChannelCount() const; + size_t getMaxChannelCount() const; + + ParameterList getParameterDescriptors() const; + float getParameter(string identifier) const; + void setParameter(string identifier, float value); + + ProgramList getPrograms() const; + string getCurrentProgram() const; + void selectProgram(string name); + + OutputList getOutputDescriptors() const; + + bool initialise(size_t channels, size_t stepSize, size_t blockSize); + void reset(); + + FeatureSet process(const float *const *inputBuffers, + Vamp::RealTime timestamp); + + FeatureSet getRemainingFeatures(); + +protected: + typedef vector<float> Signal; + typedef vector<double> TFeature; + + int m_channelCount; + int m_bpo; + std::unique_ptr<Chromagram> m_refChroma; + TFeature m_refTotals; + TFeature m_refFeature; + std::vector<Signal> m_others; + int m_blockSize; + int m_frameCount; + float m_maxDuration; + int m_maxSemis; + bool m_fineTuning; + + Chromagram::Parameters paramsForTuningFrequency(double hz) const; + TFeature computeFeatureFromTotals(const TFeature &totals) const; + TFeature computeFeatureFromSignal(const Signal &signal, double hz) const; + void rotateFeature(TFeature &feature, int rotation) const; + double featureDistance(const TFeature &other, int rotation = 0) const; + int findBestRotation(const TFeature &other) const; + std::pair<int, double> findFineFrequency(int channel, int coarseCents); + void getRemainingFeaturesForChannel(int channel, FeatureSet &fs); + + mutable std::map<string, int> m_outputs; +}; + + +#endif
--- a/src/TuningDifference.cpp Tue Jul 09 15:09:33 2019 +0100 +++ b/src/TuningDifference.cpp Tue Jul 09 16:14:19 2019 +0100 @@ -12,43 +12,11 @@ #include "TuningDifference.h" -#include <iostream> - -#include <cmath> -#include <cstdio> -#include <climits> - -#include <algorithm> -#include <numeric> - using namespace std; -static double pitchToFrequency(int pitch, - double centsOffset = 0., - double concertA = 440.) -{ - double p = double(pitch) + (centsOffset / 100.); - return concertA * pow(2.0, (p - 69.0) / 12.0); -} - -static double frequencyForCentsAbove440(double cents) -{ - return pitchToFrequency(69, cents, 440.); -} - -static float defaultMaxDuration = 0.f; -static int defaultMaxSemis = 4; -static bool defaultFineTuning = true; - TuningDifference::TuningDifference(float inputSampleRate) : Plugin(inputSampleRate), - m_bpo(120), - m_refChroma(new Chromagram(paramsForTuningFrequency(440.))), - m_blockSize(0), - m_frameCount(0), - m_maxDuration(defaultMaxDuration), - m_maxSemis(defaultMaxSemis), - m_fineTuning(defaultFineTuning) + m_bulk(inputSampleRate) { } @@ -77,7 +45,6 @@ string TuningDifference::getMaker() const { - // Your name here return "Chris Cannam"; } @@ -102,19 +69,19 @@ TuningDifference::InputDomain TuningDifference::getInputDomain() const { - return TimeDomain; + return m_bulk.getInputDomain(); } size_t TuningDifference::getPreferredBlockSize() const { - return 0; + return m_bulk.getInputDomain(); } size_t TuningDifference::getPreferredStepSize() const { - return 0; + return m_bulk.getInputDomain(); } size_t @@ -132,86 +99,37 @@ TuningDifference::ParameterList TuningDifference::getParameterDescriptors() const { - ParameterList list; - - ParameterDescriptor desc; - - desc.identifier = "maxduration"; - desc.name = "Maximum duration to analyse"; - desc.description = "The maximum duration (in seconds) to consider from either input file, always taken from the start of the input. Zero means there is no limit."; - desc.minValue = 0; - desc.maxValue = 3600; - desc.defaultValue = defaultMaxDuration; - desc.isQuantized = false; - desc.unit = "s"; - list.push_back(desc); - - desc.identifier = "maxrange"; - desc.name = "Maximum range in semitones"; - desc.description = "The maximum difference in semitones that will be searched."; - desc.minValue = 1; - desc.maxValue = 11; - desc.defaultValue = defaultMaxSemis; - desc.isQuantized = true; - desc.quantizeStep = 1; - desc.unit = "semitones"; - list.push_back(desc); - - desc.identifier = "finetuning"; - desc.name = "Fine tuning"; - desc.description = "Use a fine tuning stage to increase nominal resolution from 10 cents to 1 cent."; - desc.minValue = 0; - desc.maxValue = 1; - desc.defaultValue = (defaultFineTuning ? 1.f : 0.f); - desc.isQuantized = true; - desc.quantizeStep = 1; - desc.unit = ""; - list.push_back(desc); - - return list; + return m_bulk.getParameterDescriptors(); } float TuningDifference::getParameter(string id) const { - if (id == "maxduration") { - return m_maxDuration; - } else if (id == "maxrange") { - return float(m_maxSemis); - } else if (id == "finetuning") { - return m_fineTuning ? 1.f : 0.f; - } - return 0; + return m_bulk.getParameter(id); } void TuningDifference::setParameter(string id, float value) { - if (id == "maxduration") { - m_maxDuration = value; - } else if (id == "maxrange") { - m_maxSemis = int(roundf(value)); - } else if (id == "finetuning") { - m_fineTuning = (value > 0.5f); - } + m_bulk.setParameter(id, value); } TuningDifference::ProgramList TuningDifference::getPrograms() const { - ProgramList list; - return list; + return m_bulk.getPrograms(); } string TuningDifference::getCurrentProgram() const { - return ""; // no programs + return m_bulk.getCurrentProgram(); } void -TuningDifference::selectProgram(string) +TuningDifference::selectProgram(string program) { + m_bulk.selectProgram(program); } TuningDifference::OutputList @@ -219,10 +137,12 @@ { OutputList list; + OutputList bulkOutputs = m_bulk.getOutputDescriptors(); + OutputDescriptor d; d.identifier = "cents"; d.name = "Tuning Difference"; - d.description = "Difference in averaged frequency profile between channels 1 and 2, in cents. A positive value means channel 2 is higher."; + d.description = "Difference in averaged frequency profile between the first (reference) channel and the other channel, in cents. A positive value means the other channel is higher than the reference."; d.unit = "cents"; d.hasFixedBinCount = true; d.binCount = 1; @@ -235,7 +155,7 @@ d.identifier = "tuningfreq"; d.name = "Relative Tuning Frequency"; - d.description = "Tuning frequency of channel 2, if channel 1 is assumed to contain the same music as it at a tuning frequency of A=440Hz."; + d.description = "Tuning frequency of the second (other) channel, if the first (reference) channel is assumed to contain the same music as it at a tuning frequency of A=440Hz."; d.unit = "hz"; d.hasFixedBinCount = true; d.binCount = 1; @@ -248,10 +168,11 @@ d.identifier = "reffeature"; d.name = "Reference Feature"; - d.description = "Chroma feature from reference audio."; + d.description = "Chroma feature from reference channel."; d.unit = ""; d.hasFixedBinCount = true; - d.binCount = m_bpo; + // caution: implementation dependency on BulkTuningDifference + d.binCount = bulkOutputs[list.size()].binCount; d.hasKnownExtents = false; d.isQuantized = false; d.sampleType = OutputDescriptor::FixedSampleRate; @@ -262,10 +183,11 @@ d.identifier = "otherfeature"; d.name = "Other Feature"; - d.description = "Chroma feature from other audio, before rotation."; + d.description = "Chroma feature from non-reference channel, before rotation."; d.unit = ""; d.hasFixedBinCount = true; - d.binCount = m_bpo; + // caution: implementation dependency on BulkTuningDifference + d.binCount = bulkOutputs[list.size()].binCount; d.hasKnownExtents = false; d.isQuantized = false; d.sampleType = OutputDescriptor::FixedSampleRate; @@ -276,10 +198,11 @@ d.identifier = "rotfeature"; d.name = "Other Feature at Rotated Frequency"; - d.description = "Chroma feature from reference audio calculated with the tuning frequency obtained from rotation matching."; + d.description = "Chroma feature from non-reference channel calculated with the tuning frequency obtained from rotation matching."; d.unit = ""; d.hasFixedBinCount = true; - d.binCount = m_bpo; + // caution: implementation dependency on BulkTuningDifference + d.binCount = bulkOutputs[list.size()].binCount; d.hasKnownExtents = false; d.isQuantized = false; d.sampleType = OutputDescriptor::FixedSampleRate; @@ -295,279 +218,52 @@ TuningDifference::initialise(size_t channels, size_t stepSize, size_t blockSize) { if (channels < getMinChannelCount() || - channels > getMaxChannelCount()) return false; + channels > getMaxChannelCount()) { + return false; + } - if (stepSize != blockSize) return false; - if (m_blockSize > INT_MAX) return false; - - m_blockSize = int(blockSize); - - reset(); - - return true; + return m_bulk.initialise(channels, stepSize, blockSize); } void TuningDifference::reset() { - if (m_frameCount > 0) { - m_refChroma.reset(new Chromagram(paramsForTuningFrequency(440.))); - m_frameCount = 0; - } - m_refTotals = TFeature(m_bpo, 0.0); - m_other.clear(); -} - -template<typename T> -void addTo(vector<T> &a, const vector<T> &b) -{ - transform(a.begin(), a.end(), b.begin(), a.begin(), plus<T>()); -} - -template<typename T> -T distance(const vector<T> &a, const vector<T> &b) -{ - return inner_product(a.begin(), a.end(), b.begin(), T(), - plus<T>(), [](T x, T y) { return fabs(x - y); }); -} - -TuningDifference::TFeature -TuningDifference::computeFeatureFromTotals(const TFeature &totals) const -{ - if (m_frameCount == 0) return totals; - - TFeature feature(m_bpo); - double sum = 0.0; - - for (int i = 0; i < m_bpo; ++i) { - double value = totals[i] / m_frameCount; - feature[i] += value; - sum += value; - } - - for (int i = 0; i < m_bpo; ++i) { - feature[i] /= sum; - } - -// cerr << "computeFeatureFromTotals: feature values:" << endl; -// for (auto v: feature) cerr << v << " "; -// cerr << endl; - - return feature; -} - -Chromagram::Parameters -TuningDifference::paramsForTuningFrequency(double hz) const -{ - Chromagram::Parameters params(m_inputSampleRate); - params.lowestOctave = 2; - params.octaveCount = 4; - params.binsPerOctave = m_bpo; - params.tuningFrequency = hz; - params.atomHopFactor = 0.5; - params.window = CQParameters::Hann; - return params; -} - -TuningDifference::TFeature -TuningDifference::computeFeatureFromSignal(const Signal &signal, double hz) const -{ - Chromagram chromagram(paramsForTuningFrequency(hz)); - - TFeature totals(m_bpo, 0.0); - - cerr << "computeFeatureFromSignal: hz = " << hz << ", frame count = " << m_frameCount << endl; - - for (int i = 0; i < m_frameCount; ++i) { - Signal::const_iterator first = signal.begin() + i * m_blockSize; - Signal::const_iterator last = first + m_blockSize; - if (last > signal.end()) last = signal.end(); - CQBase::RealSequence input(first, last); - input.resize(m_blockSize); - CQBase::RealBlock block = chromagram.process(input); - for (const auto &v: block) addTo(totals, v); - } - - return computeFeatureFromTotals(totals); + m_bulk.reset(); } TuningDifference::FeatureSet -TuningDifference::process(const float *const *inputBuffers, Vamp::RealTime) +TuningDifference::process(const float *const *inputBuffers, + Vamp::RealTime timestamp) { - if (m_maxDuration > 0) { - int maxFrames = int((m_maxDuration * m_inputSampleRate) / - float(m_blockSize)); - if (m_frameCount > maxFrames) return FeatureSet(); - } - - CQBase::RealBlock block; - CQBase::RealSequence input; - - input = CQBase::RealSequence - (inputBuffers[0], inputBuffers[0] + m_blockSize); - block = m_refChroma->process(input); - for (const auto &v: block) addTo(m_refTotals, v); - - m_other.insert(m_other.end(), - inputBuffers[1], inputBuffers[1] + m_blockSize); - - ++m_frameCount; + (void)m_bulk.process(inputBuffers, timestamp); return FeatureSet(); } -void -TuningDifference::rotateFeature(TFeature &r, int rotation) const -{ - if (rotation < 0) { - rotate(r.begin(), r.begin() - rotation, r.end()); - } else { - rotate(r.begin(), r.end() - rotation, r.end()); - } -} - -double -TuningDifference::featureDistance(const TFeature &other, int rotation) const -{ - if (rotation == 0) { - return distance(m_refFeature, other); - } else { - // A positive rotation pushes the tuning frequency up for this - // chroma, negative one pulls it down. If a positive rotation - // makes this chroma match an un-rotated reference, then this - // chroma must have initially been lower than the reference. - TFeature r(other); - rotateFeature(r, rotation); - return distance(m_refFeature, r); - } -} - -int -TuningDifference::findBestRotation(const TFeature &other) const -{ - map<double, int> dists; - - int maxRotation = (m_bpo * m_maxSemis) / 12; - - for (int r = -maxRotation; r <= maxRotation; ++r) { - double dist = featureDistance(other, r); - dists[dist] = r; -// cerr << "rotation " << r << ": score " << dist << endl; - } - - int best = dists.begin()->second; - -// cerr << "best is " << best << endl; - return best; -} - -pair<int, double> -TuningDifference::findFineFrequency(int coarseCents) -{ - int coarseResolution = 1200 / m_bpo; - int searchDistance = coarseResolution/2 - 1; - - int bestCents = coarseCents; - double bestHz = frequencyForCentsAbove440(coarseCents); - - if (!m_fineTuning) { - cerr << "fine tuning disabled, returning coarse Hz " << bestHz << " and cents " << bestCents << " in lieu of fine ones" << endl; - return pair<int, double>(bestCents, bestHz); - } - - //!!! This is kind of absurd - all this brute force but all we're - //!!! really doing is aligning two very short signals at - //!!! sub-sample level - let's rewrite it someday - - cerr << "findFineFrequency: coarse frequency is " << bestHz << endl; - cerr << "searchDistance = " << searchDistance << endl; - - double bestScore = 0; - bool firstScore = true; - - for (int sign = -1; sign <= 1; sign += 2) { - for (int offset = (sign < 0 ? 0 : 1); - offset <= searchDistance; - ++offset) { - - int fineCents = coarseCents + sign * offset; - - cerr << "trying with fineCents = " << fineCents << "..." << endl; - - double fineHz = frequencyForCentsAbove440(fineCents); - TFeature fineFeature = computeFeatureFromSignal(m_other, fineHz); - double fineScore = featureDistance(fineFeature); - - cerr << "fine offset = " << offset << ", cents = " << fineCents - << ", Hz = " << fineHz << ", score " << fineScore - << " (best score so far " << bestScore << ")" << endl; - - if ((fineScore < bestScore) || firstScore) { - cerr << "is good!" << endl; - bestScore = fineScore; - bestCents = fineCents; - bestHz = fineHz; - firstScore = false; - } else { - break; - } - } - } - - //!!! could keep a vector of scores & then interpolate... - - return pair<int, double>(bestCents, bestHz); -} - TuningDifference::FeatureSet TuningDifference::getRemainingFeatures() { + FeatureSet bulkFs = m_bulk.getRemainingFeatures(); + FeatureSet fs; - if (m_frameCount == 0) return fs; - m_refFeature = computeFeatureFromTotals(m_refTotals); - TFeature otherFeature = computeFeatureFromSignal(m_other, 440.); + // caution: implementation dependency on BulkTuningDifference + + fs[m_outputs["reffeature"]] = bulkFs[m_outputs["reffeature"]]; + + fs[m_outputs["otherfeature"]] + .push_back(bulkFs[m_outputs["otherfeature"]][0]); Feature f; f.hasTimestamp = true; f.timestamp = Vamp::RealTime::zeroTime; f.values.clear(); - for (auto v: m_refFeature) f.values.push_back(float(v)); - fs[m_outputs["reffeature"]].push_back(f); - - f.values.clear(); - for (auto v: otherFeature) f.values.push_back(float(v)); - fs[m_outputs["otherfeature"]].push_back(f); - - int rotation = findBestRotation(otherFeature); - - int coarseCents = -(rotation * 1200) / m_bpo; - - cerr << "rotation " << rotation << " -> cents " << coarseCents << endl; - - TFeature coarseFeature = otherFeature; - if (rotation != 0) { - rotateFeature(coarseFeature, rotation); - } - - //!!! This should be returning the fine chroma, not the coarse - f.values.clear(); - for (auto v: coarseFeature) f.values.push_back(float(v)); - fs[m_outputs["rotfeature"]].push_back(f); - - pair<int, double> fine = findFineFrequency(coarseCents); - int fineCents = fine.first; - double fineHz = fine.second; - - f.values.clear(); - f.values.push_back(float(fineHz)); - fs[m_outputs["tuningfreq"]].push_back(f); - - f.values.clear(); - f.values.push_back(float(fineCents)); + f.values.push_back(bulkFs[m_outputs["cents"]][0].values[0]); fs[m_outputs["cents"]].push_back(f); - cerr << "overall best Hz = " << fineHz << endl; + f.values.clear(); + f.values.push_back(bulkFs[m_outputs["tuningfreq"]][0].values[0]); + fs[m_outputs["tuningfreq"]].push_back(f); return fs; }
--- a/src/TuningDifference.h Tue Jul 09 15:09:33 2019 +0100 +++ b/src/TuningDifference.h Tue Jul 09 16:14:19 2019 +0100 @@ -13,11 +13,9 @@ #ifndef TUNING_DIFFERENCE_H #define TUNING_DIFFERENCE_H -#include <vamp-sdk/Plugin.h> - -#include <cq/Chromagram.h> - -#include <memory> +// This plugin is a thin wrapper around a BulkTuningDifference with +// only two channels +#include "BulkTuningDifference.h" using std::string; using std::vector; @@ -60,30 +58,9 @@ FeatureSet getRemainingFeatures(); protected: - typedef vector<float> Signal; - typedef vector<double> TFeature; - - int m_bpo; - std::unique_ptr<Chromagram> m_refChroma; - TFeature m_refTotals; - TFeature m_refFeature; - Signal m_other; - int m_blockSize; - int m_frameCount; - float m_maxDuration; - int m_maxSemis; - bool m_fineTuning; - - Chromagram::Parameters paramsForTuningFrequency(double hz) const; - TFeature computeFeatureFromTotals(const TFeature &totals) const; - TFeature computeFeatureFromSignal(const Signal &signal, double hz) const; - void rotateFeature(TFeature &feature, int rotation) const; - double featureDistance(const TFeature &other, int rotation = 0) const; - int findBestRotation(const TFeature &other) const; - std::pair<int, double> findFineFrequency(int coarseCents); + BulkTuningDifference m_bulk; mutable std::map<string, int> m_outputs; }; - #endif
--- a/src/plugins.cpp Tue Jul 09 15:09:33 2019 +0100 +++ b/src/plugins.cpp Tue Jul 09 16:14:19 2019 +0100 @@ -3,8 +3,10 @@ #include <vamp-sdk/PluginAdapter.h> #include "TuningDifference.h" +#include "BulkTuningDifference.h" static Vamp::PluginAdapter<TuningDifference> tdAdapter; +static Vamp::PluginAdapter<BulkTuningDifference> bulkTdAdapter; const VampPluginDescriptor * @@ -14,6 +16,7 @@ switch (index) { case 0: return tdAdapter.getDescriptor(); + case 1: return bulkTdAdapter.getDescriptor(); default: return 0; } }