Mercurial > hg > tuning-difference
changeset 50:69ab9a6e7aee
Make BulkTuningDifference much faster when working with more than two files in fine-tuning mode, by storing the reference signal and calculating the fine-tuned features from that (shifted the opposite way) while leaving the other features alone at their coarse rotations. Slightly changes the results but not by a significant amount, I think.
author | Chris Cannam |
---|---|
date | Wed, 10 Jul 2019 18:52:36 +0100 |
parents | b8f65462d793 |
children | af117132ccaf |
files | .travis.yml Makefile.osx src/BulkTuningDifference.cpp src/BulkTuningDifference.h test/expected/input_vamp_tuning-difference_bulk-tuning-difference_cents.csv test/expected/input_vamp_tuning-difference_bulk-tuning-difference_tuningfreq.csv test/expected/input_vamp_tuning-difference_tuning-difference_cents.csv test/expected/input_vamp_tuning-difference_tuning-difference_tuningfreq.csv test/regression.sh |
diffstat | 9 files changed, 174 insertions(+), 149 deletions(-) [+] |
line wrap: on
line diff
--- a/.travis.yml Wed Jul 10 14:00:51 2019 +0100 +++ b/.travis.yml Wed Jul 10 18:52:36 2019 +0100 @@ -17,12 +17,12 @@ - rubberband-cli before_install: + - wget https://code.soundsoftware.ac.uk/attachments/download/2250/sonic-annotator_1.5_amd64.deb + - sudo apt install -y ./sonic-annotator_1.5_amd64.deb - ( cd ../ ; hg clone https://code.soundsoftware.ac.uk/hg/vamp-plugin-sdk ) - ( cd ../ ; hg clone https://code.soundsoftware.ac.uk/hg/vamp-plugin-tester ) - - ( cd ../vamp-plugin-sdk ; ./configure && make && sudo make install ) + - ( cd ../vamp-plugin-sdk ; ./configure --disable-programs && make && sudo make install ) - ( cd ../vamp-plugin-tester ; ./repoint install && make ) - - wget https://code.soundsoftware.ac.uk/attachments/download/2250/sonic-annotator_1.5_amd64.deb - - sudo apt install -y ./sonic-annotator_1.5_amd64.deb script: - make -f Makefile.linux test
--- a/Makefile.osx Wed Jul 10 14:00:51 2019 +0100 +++ b/Makefile.osx Wed Jul 10 18:52:36 2019 +0100 @@ -3,7 +3,7 @@ CFLAGS := -Wall -Wextra -O3 -ftree-vectorize -fPIC -ARCHFLAGS ?= -mmacosx-version-min=10.6 -arch x86_64 -arch i386 +ARCHFLAGS ?= -arch x86_64 -mmacosx-version-min=10.7 -stdlib=libc++ VAMPSDK_DIR := ../vamp-plugin-sdk
--- a/src/BulkTuningDifference.cpp Wed Jul 10 14:00:51 2019 +0100 +++ b/src/BulkTuningDifference.cpp Wed Jul 10 18:52:36 2019 +0100 @@ -37,14 +37,13 @@ } static float defaultMaxDuration = 0.f; -static int defaultMaxSemis = 4; +static int defaultMaxSemis = 5; static bool defaultFineTuning = true; BulkTuningDifference::BulkTuningDifference(float inputSampleRate) : Plugin(inputSampleRate), m_channelCount(0), m_bpo(120), - m_refChroma(new Chromagram(paramsForTuningFrequency(440.))), m_blockSize(0), m_frameCount(0), m_maxDuration(defaultMaxDuration), @@ -303,12 +302,10 @@ BulkTuningDifference::initialise(size_t channels, size_t stepSize, size_t blockSize) { if (channels < getMinChannelCount()) return false; - - m_channelCount = channels; - if (stepSize != blockSize) return false; if (m_blockSize > INT_MAX) return false; + m_channelCount = int(channels); m_blockSize = int(blockSize); reset(); @@ -319,13 +316,17 @@ void BulkTuningDifference::reset() { - if (m_frameCount > 0) { - m_refChroma.reset(new Chromagram(paramsForTuningFrequency(440.))); - m_frameCount = 0; + Chromagram::Parameters params(paramsForTuningFrequency(440.)); + m_reference.clear(); + m_refChroma.reset(new Chromagram(params)); + m_refTotals = TFeature(m_bpo, 0.0); + m_refFeatures.clear(); + m_otherChroma.clear(); + for (int i = 1; i < m_channelCount; ++i) { + m_otherChroma.push_back(std::make_shared<Chromagram>(params)); } - m_refTotals = TFeature(m_bpo, 0.0); - m_others = vector<Signal>(m_channelCount - 1); - + m_otherTotals = vector<TFeature>(m_channelCount-1, TFeature(m_bpo, 0.0)); + m_frameCount = 0; } template<typename T> @@ -355,13 +356,11 @@ sum += value; } - for (int i = 0; i < m_bpo; ++i) { - feature[i] /= sum; + if (sum != 0.0) { + for (int i = 0; i < m_bpo; ++i) { + feature[i] /= sum; + } } - -// cerr << "computeFeatureFromTotals: feature values:" << endl; -// for (auto v: feature) cerr << v << " "; -// cerr << endl; return feature; } @@ -409,7 +408,7 @@ float(m_blockSize)); if (m_frameCount > maxFrames) return FeatureSet(); } - + CQBase::RealBlock block; CQBase::RealSequence input; @@ -418,127 +417,30 @@ block = m_refChroma->process(input); for (const auto &v: block) addTo(m_refTotals, v); + if (m_fineTuning) { + m_reference.insert(m_reference.end(), + inputBuffers[0], + inputBuffers[0] + m_blockSize); + } + for (int c = 1; c < m_channelCount; ++c) { - m_others[c-1].insert(m_others[c-1].end(), - inputBuffers[c], - inputBuffers[c] + m_blockSize); + input = CQBase::RealSequence + (inputBuffers[c], inputBuffers[c] + m_blockSize); + block = m_otherChroma[c-1]->process(input); + for (const auto &v: block) addTo(m_otherTotals[c-1], v); } ++m_frameCount; return FeatureSet(); } -void -BulkTuningDifference::rotateFeature(TFeature &r, int rotation) const -{ - if (rotation < 0) { - rotate(r.begin(), r.begin() - rotation, r.end()); - } else { - rotate(r.begin(), r.end() - rotation, r.end()); - } -} - -double -BulkTuningDifference::featureDistance(const TFeature &other, int rotation) const -{ - if (rotation == 0) { - return distance(m_refFeature, other); - } else { - // A positive rotation pushes the tuning frequency up for this - // chroma, negative one pulls it down. If a positive rotation - // makes this chroma match an un-rotated reference, then this - // chroma must have initially been lower than the reference. - TFeature r(other); - rotateFeature(r, rotation); - return distance(m_refFeature, r); - } -} - -int -BulkTuningDifference::findBestRotation(const TFeature &other) const -{ - map<double, int> dists; - - int maxRotation = (m_bpo * m_maxSemis) / 12; - - for (int r = -maxRotation; r <= maxRotation; ++r) { - double dist = featureDistance(other, r); - dists[dist] = r; -// cerr << "rotation " << r << ": score " << dist << endl; - } - - int best = dists.begin()->second; - -// cerr << "best is " << best << endl; - return best; -} - -pair<int, double> -BulkTuningDifference::findFineFrequency(int channel, int coarseCents) -{ - int coarseResolution = 1200 / m_bpo; - int searchDistance = coarseResolution/2 - 1; - - int bestCents = coarseCents; - double bestHz = frequencyForCentsAbove440(coarseCents); - - if (!m_fineTuning) { - cerr << "fine tuning disabled, returning coarse Hz " << bestHz << " and cents " << bestCents << " in lieu of fine ones" << endl; - return pair<int, double>(bestCents, bestHz); - } - - //!!! This is kind of absurd - all this brute force but all we're - //!!! really doing is aligning two very short signals at - //!!! sub-sample level - let's rewrite it someday - - cerr << "findFineFrequency: coarse frequency is " << bestHz << endl; - cerr << "searchDistance = " << searchDistance << endl; - - double bestScore = 0; - bool firstScore = true; - - for (int sign = -1; sign <= 1; sign += 2) { - for (int offset = (sign < 0 ? 0 : 1); - offset <= searchDistance; - ++offset) { - - int fineCents = coarseCents + sign * offset; - - cerr << "trying with fineCents = " << fineCents << "..." << endl; - - double fineHz = frequencyForCentsAbove440(fineCents); - TFeature fineFeature = computeFeatureFromSignal - (m_others[channel-1], fineHz); - double fineScore = featureDistance(fineFeature); - - cerr << "fine offset = " << offset << ", cents = " << fineCents - << ", Hz = " << fineHz << ", score " << fineScore - << " (best score so far " << bestScore << ")" << endl; - - if ((fineScore < bestScore) || firstScore) { - cerr << "is good!" << endl; - bestScore = fineScore; - bestCents = fineCents; - bestHz = fineHz; - firstScore = false; - } else { - break; - } - } - } - - //!!! could keep a vector of scores & then interpolate... - - return pair<int, double>(bestCents, bestHz); -} - BulkTuningDifference::FeatureSet BulkTuningDifference::getRemainingFeatures() { FeatureSet fs; if (m_frameCount == 0) return fs; - m_refFeature = computeFeatureFromTotals(m_refTotals); + m_refFeatures[0] = computeFeatureFromTotals(m_refTotals); Feature f; f.hasTimestamp = true; @@ -558,39 +460,40 @@ BulkTuningDifference::getRemainingFeaturesForChannel(int channel, FeatureSet &fs) { - TFeature otherFeature = computeFeatureFromSignal - (m_others[channel-1], 440.); + TFeature otherFeature = + computeFeatureFromTotals(m_otherTotals[channel-1]); Feature f; f.hasTimestamp = true; f.timestamp = Vamp::RealTime::zeroTime; f.values.clear(); - for (auto v: m_refFeature) f.values.push_back(float(v)); + for (auto v: m_refFeatures[0]) f.values.push_back(float(v)); fs[m_outputs["reffeature"]].push_back(f); f.values.clear(); for (auto v: otherFeature) f.values.push_back(float(v)); fs[m_outputs["otherfeature"]].push_back(f); - int rotation = findBestRotation(otherFeature); + int rotation = findBestRotation(m_refFeatures[0], otherFeature); int coarseCents = -(rotation * 1200) / m_bpo; cerr << "channel " << channel << ": rotation " << rotation << " -> cents " << coarseCents << endl; - TFeature coarseFeature = otherFeature; + TFeature rotatedFeature = otherFeature; if (rotation != 0) { - rotateFeature(coarseFeature, rotation); + rotateFeature(rotatedFeature, rotation); } f.values.clear(); - for (auto v: coarseFeature) f.values.push_back(float(v)); + for (auto v: rotatedFeature) f.values.push_back(float(v)); fs[m_outputs["rotfeature"]].push_back(f); if (m_fineTuning) { - pair<int, double> fine = findFineFrequency(channel, coarseCents); + pair<int, double> fine = findFineFrequency(rotatedFeature, coarseCents); + int fineCents = fine.first; double fineHz = fine.second; @@ -607,3 +510,118 @@ } } +void +BulkTuningDifference::rotateFeature(TFeature &r, int rotation) const +{ + if (rotation < 0) { + rotate(r.begin(), r.begin() - rotation, r.end()); + } else { + rotate(r.begin(), r.end() - rotation, r.end()); + } +} + +double +BulkTuningDifference::featureDistance(const TFeature &ref, + const TFeature &other, + int rotation) const +{ + if (rotation == 0) { + return distance(ref, other); + } else { + // A positive rotation pushes the tuning frequency up for this + // chroma, negative one pulls it down. If a positive rotation + // makes this chroma match an un-rotated reference, then this + // chroma must have initially been lower than the reference. + TFeature r(other); + rotateFeature(r, rotation); + return distance(ref, r); + } +} + +int +BulkTuningDifference::findBestRotation(const TFeature &ref, + const TFeature &other) const +{ + map<double, int> dists; + + int maxRotation = (m_bpo * m_maxSemis) / 12; + + for (int r = -maxRotation; r <= maxRotation; ++r) { + double dist = featureDistance(ref, other, r); + dists[dist] = r; + } + + int best = dists.begin()->second; + + return best; +} + +pair<int, double> +BulkTuningDifference::findFineFrequency(const TFeature &rotatedOtherFeature, + int coarseCents) +{ + int coarseResolution = 1200 / m_bpo; + int searchDistance = coarseResolution/2 - 1; + + int bestCents = coarseCents; + double bestHz = frequencyForCentsAbove440(coarseCents); + + cerr << "findFineFrequency: coarse frequency is " << bestHz << endl; + cerr << "searchDistance = " << searchDistance << endl; + + double bestScore = 0; + bool firstScore = true; + + for (int sign = -1; sign <= 1; sign += 2) { + for (int offset = (sign < 0 ? 0 : 1); + offset <= searchDistance; + ++offset) { + + int fineCents = coarseCents + sign * offset; + double fineHz = frequencyForCentsAbove440(fineCents); + + cerr << "trying with fineCents = " << fineCents << "..." << endl; + + // compare the rotated "other" chroma with a reference + // chroma shifted by the offset in the opposite direction + + int compensatingCents = -sign * offset; + TFeature compensatedReference; + + if (m_refFeatures.find(compensatingCents) == m_refFeatures.end()) { + double compensatingHz = frequencyForCentsAbove440 + (compensatingCents); + + compensatedReference = computeFeatureFromSignal + (m_reference, compensatingHz); + + m_refFeatures[compensatingCents] = compensatedReference; + + } else { + + compensatedReference = m_refFeatures[compensatingCents]; + } + + double fineScore = featureDistance(compensatedReference, + rotatedOtherFeature, + 0); // we are rotated already + + cerr << "fine offset = " << offset << ", cents = " << fineCents + << ", Hz = " << fineHz << ", score " << fineScore + << " (best score so far " << bestScore << ")" << endl; + + if ((fineScore < bestScore) || firstScore) { + cerr << "is good!" << endl; + bestScore = fineScore; + bestCents = fineCents; + bestHz = fineHz; + firstScore = false; + } else { + break; + } + } + } + + return pair<int, double>(bestCents, bestHz); +} +
--- a/src/BulkTuningDifference.h Wed Jul 10 14:00:51 2019 +0100 +++ b/src/BulkTuningDifference.h Wed Jul 10 18:52:36 2019 +0100 @@ -65,23 +65,28 @@ int m_channelCount; int m_bpo; - std::unique_ptr<Chromagram> m_refChroma; - TFeature m_refTotals; - TFeature m_refFeature; - std::vector<Signal> m_others; int m_blockSize; int m_frameCount; float m_maxDuration; int m_maxSemis; bool m_fineTuning; + std::unique_ptr<Chromagram> m_refChroma; + TFeature m_refTotals; + std::map<int, TFeature> m_refFeatures; // map from cents-offset to feature + Signal m_reference; // we have to retain this when fine-tuning is enabled + std::vector<std::shared_ptr<Chromagram>> m_otherChroma; + std::vector<TFeature> m_otherTotals; + Chromagram::Parameters paramsForTuningFrequency(double hz) const; TFeature computeFeatureFromTotals(const TFeature &totals) const; TFeature computeFeatureFromSignal(const Signal &signal, double hz) const; void rotateFeature(TFeature &feature, int rotation) const; - double featureDistance(const TFeature &other, int rotation = 0) const; - int findBestRotation(const TFeature &other) const; - std::pair<int, double> findFineFrequency(int channel, int coarseCents); + double featureDistance(const TFeature &ref, const TFeature &other, + int rotation) const; + int findBestRotation(const TFeature &ref, const TFeature &other) const; + std::pair<int, double> findFineFrequency(const TFeature &rotated, + int coarseCents); void getRemainingFeaturesForChannel(int channel, FeatureSet &fs); mutable std::map<string, int> m_outputs;
--- a/test/expected/input_vamp_tuning-difference_bulk-tuning-difference_cents.csv Wed Jul 10 14:00:51 2019 +0100 +++ b/test/expected/input_vamp_tuning-difference_bulk-tuning-difference_cents.csv Wed Jul 10 18:52:36 2019 +0100 @@ -1,2 +1,2 @@ 0.000000000,-230 -0.000000000,-234 +0.000000000,-233
--- a/test/expected/input_vamp_tuning-difference_bulk-tuning-difference_tuningfreq.csv Wed Jul 10 14:00:51 2019 +0100 +++ b/test/expected/input_vamp_tuning-difference_bulk-tuning-difference_tuningfreq.csv Wed Jul 10 18:52:36 2019 +0100 @@ -1,2 +1,2 @@ 0.000000000,385.261 -0.000000000,384.372 +0.000000000,384.594
--- a/test/expected/input_vamp_tuning-difference_tuning-difference_cents.csv Wed Jul 10 14:00:51 2019 +0100 +++ b/test/expected/input_vamp_tuning-difference_tuning-difference_cents.csv Wed Jul 10 18:52:36 2019 +0100 @@ -1,2 +1,2 @@ 0.000000000,-230 -0.000000000,-234 +0.000000000,-233
--- a/test/expected/input_vamp_tuning-difference_tuning-difference_tuningfreq.csv Wed Jul 10 14:00:51 2019 +0100 +++ b/test/expected/input_vamp_tuning-difference_tuning-difference_tuningfreq.csv Wed Jul 10 18:52:36 2019 +0100 @@ -1,2 +1,2 @@ 0.000000000,385.261 -0.000000000,384.372 +0.000000000,384.594