changeset 50:69ab9a6e7aee

Make BulkTuningDifference much faster when working with more than two files in fine-tuning mode, by storing the reference signal and calculating the fine-tuned features from that (shifted the opposite way) while leaving the other features alone at their coarse rotations. Slightly changes the results but not by a significant amount, I think.
author Chris Cannam
date Wed, 10 Jul 2019 18:52:36 +0100
parents b8f65462d793
children af117132ccaf
files .travis.yml Makefile.osx src/BulkTuningDifference.cpp src/BulkTuningDifference.h test/expected/input_vamp_tuning-difference_bulk-tuning-difference_cents.csv test/expected/input_vamp_tuning-difference_bulk-tuning-difference_tuningfreq.csv test/expected/input_vamp_tuning-difference_tuning-difference_cents.csv test/expected/input_vamp_tuning-difference_tuning-difference_tuningfreq.csv test/regression.sh
diffstat 9 files changed, 174 insertions(+), 149 deletions(-) [+]
line wrap: on
line diff
--- a/.travis.yml	Wed Jul 10 14:00:51 2019 +0100
+++ b/.travis.yml	Wed Jul 10 18:52:36 2019 +0100
@@ -17,12 +17,12 @@
       - rubberband-cli
       
 before_install:
+  - wget https://code.soundsoftware.ac.uk/attachments/download/2250/sonic-annotator_1.5_amd64.deb
+  - sudo apt install -y ./sonic-annotator_1.5_amd64.deb
   - ( cd ../ ; hg clone https://code.soundsoftware.ac.uk/hg/vamp-plugin-sdk )
   - ( cd ../ ; hg clone https://code.soundsoftware.ac.uk/hg/vamp-plugin-tester )
-  - ( cd ../vamp-plugin-sdk ; ./configure && make && sudo make install )
+  - ( cd ../vamp-plugin-sdk ; ./configure --disable-programs && make && sudo make install )
   - ( cd ../vamp-plugin-tester ; ./repoint install && make )
-  - wget https://code.soundsoftware.ac.uk/attachments/download/2250/sonic-annotator_1.5_amd64.deb
-  - sudo apt install -y ./sonic-annotator_1.5_amd64.deb
 
 script:
   - make -f Makefile.linux test
--- a/Makefile.osx	Wed Jul 10 14:00:51 2019 +0100
+++ b/Makefile.osx	Wed Jul 10 18:52:36 2019 +0100
@@ -3,7 +3,7 @@
 
 CFLAGS		:= -Wall -Wextra -O3 -ftree-vectorize -fPIC
 
-ARCHFLAGS	?= -mmacosx-version-min=10.6 -arch x86_64 -arch i386
+ARCHFLAGS	?= -arch x86_64 -mmacosx-version-min=10.7 -stdlib=libc++
 
 VAMPSDK_DIR	:= ../vamp-plugin-sdk
 
--- a/src/BulkTuningDifference.cpp	Wed Jul 10 14:00:51 2019 +0100
+++ b/src/BulkTuningDifference.cpp	Wed Jul 10 18:52:36 2019 +0100
@@ -37,14 +37,13 @@
 }
 
 static float defaultMaxDuration = 0.f;
-static int defaultMaxSemis = 4;
+static int defaultMaxSemis = 5;
 static bool defaultFineTuning = true;
 
 BulkTuningDifference::BulkTuningDifference(float inputSampleRate) :
     Plugin(inputSampleRate),
     m_channelCount(0),
     m_bpo(120),
-    m_refChroma(new Chromagram(paramsForTuningFrequency(440.))),
     m_blockSize(0),
     m_frameCount(0),
     m_maxDuration(defaultMaxDuration),
@@ -303,12 +302,10 @@
 BulkTuningDifference::initialise(size_t channels, size_t stepSize, size_t blockSize)
 {
     if (channels < getMinChannelCount()) return false;
-
-    m_channelCount = channels;
-    
     if (stepSize != blockSize) return false;
     if (m_blockSize > INT_MAX) return false;
 
+    m_channelCount = int(channels);
     m_blockSize = int(blockSize);
 
     reset();
@@ -319,13 +316,17 @@
 void
 BulkTuningDifference::reset()
 {
-    if (m_frameCount > 0) {
-	m_refChroma.reset(new Chromagram(paramsForTuningFrequency(440.)));
-	m_frameCount = 0;
+    Chromagram::Parameters params(paramsForTuningFrequency(440.));
+    m_reference.clear();
+    m_refChroma.reset(new Chromagram(params));
+    m_refTotals = TFeature(m_bpo, 0.0);
+    m_refFeatures.clear();
+    m_otherChroma.clear();
+    for (int i = 1; i < m_channelCount; ++i) {
+        m_otherChroma.push_back(std::make_shared<Chromagram>(params));
     }
-    m_refTotals = TFeature(m_bpo, 0.0);
-    m_others = vector<Signal>(m_channelCount - 1);
-    
+    m_otherTotals = vector<TFeature>(m_channelCount-1, TFeature(m_bpo, 0.0));
+    m_frameCount = 0;
 }
 
 template<typename T>
@@ -355,13 +356,11 @@
 	sum += value;
     }
 
-    for (int i = 0; i < m_bpo; ++i) {
-	feature[i] /= sum;
+    if (sum != 0.0) {
+        for (int i = 0; i < m_bpo; ++i) {
+            feature[i] /= sum;
+        }
     }
-
-//    cerr << "computeFeatureFromTotals: feature values:" << endl;
-//    for (auto v: feature) cerr << v << " ";
-//    cerr << endl;
     
     return feature;
 }
@@ -409,7 +408,7 @@
                             float(m_blockSize));
         if (m_frameCount > maxFrames) return FeatureSet();
     }
-    
+
     CQBase::RealBlock block;
     CQBase::RealSequence input;
 
@@ -418,127 +417,30 @@
     block = m_refChroma->process(input);
     for (const auto &v: block) addTo(m_refTotals, v);
 
+    if (m_fineTuning) {
+        m_reference.insert(m_reference.end(),
+                           inputBuffers[0],
+                           inputBuffers[0] + m_blockSize);
+    }
+
     for (int c = 1; c < m_channelCount; ++c) {
-        m_others[c-1].insert(m_others[c-1].end(),
-                             inputBuffers[c],
-                             inputBuffers[c] + m_blockSize);
+        input = CQBase::RealSequence
+            (inputBuffers[c], inputBuffers[c] + m_blockSize);
+        block = m_otherChroma[c-1]->process(input);
+        for (const auto &v: block) addTo(m_otherTotals[c-1], v);
     }
     
     ++m_frameCount;
     return FeatureSet();
 }
 
-void
-BulkTuningDifference::rotateFeature(TFeature &r, int rotation) const
-{
-    if (rotation < 0) {
-        rotate(r.begin(), r.begin() - rotation, r.end());
-    } else {
-        rotate(r.begin(), r.end() - rotation, r.end());
-    }
-}
-
-double
-BulkTuningDifference::featureDistance(const TFeature &other, int rotation) const
-{
-    if (rotation == 0) {
-	return distance(m_refFeature, other);
-    } else {
-	// A positive rotation pushes the tuning frequency up for this
-	// chroma, negative one pulls it down. If a positive rotation
-	// makes this chroma match an un-rotated reference, then this
-	// chroma must have initially been lower than the reference.
-	TFeature r(other);
-        rotateFeature(r, rotation);
-	return distance(m_refFeature, r);
-    }
-}
-
-int
-BulkTuningDifference::findBestRotation(const TFeature &other) const
-{
-    map<double, int> dists;
-
-    int maxRotation = (m_bpo * m_maxSemis) / 12;
-
-    for (int r = -maxRotation; r <= maxRotation; ++r) {
-	double dist = featureDistance(other, r);
-	dists[dist] = r;
-//	cerr << "rotation " << r << ": score " << dist << endl;
-    }
-
-    int best = dists.begin()->second;
-
-//    cerr << "best is " << best << endl;
-    return best;
-}
-
-pair<int, double>
-BulkTuningDifference::findFineFrequency(int channel, int coarseCents)
-{
-    int coarseResolution = 1200 / m_bpo;
-    int searchDistance = coarseResolution/2 - 1;
-
-    int bestCents = coarseCents;
-    double bestHz = frequencyForCentsAbove440(coarseCents);
-
-    if (!m_fineTuning) {
-        cerr << "fine tuning disabled, returning coarse Hz " << bestHz << " and cents " << bestCents << " in lieu of fine ones" << endl;
-        return pair<int, double>(bestCents, bestHz);
-    }
-    
-    //!!! This is kind of absurd - all this brute force but all we're
-    //!!! really doing is aligning two very short signals at
-    //!!! sub-sample level - let's rewrite it someday
-    
-    cerr << "findFineFrequency: coarse frequency is " << bestHz << endl;
-    cerr << "searchDistance = " << searchDistance << endl;
-
-    double bestScore = 0;
-    bool firstScore = true;
-    
-    for (int sign = -1; sign <= 1; sign += 2) {
-	for (int offset = (sign < 0 ? 0 : 1);
-             offset <= searchDistance;
-             ++offset) {
-
-	    int fineCents = coarseCents + sign * offset;
-
-	    cerr << "trying with fineCents = " << fineCents << "..." << endl;
-	    
-	    double fineHz = frequencyForCentsAbove440(fineCents);
-	    TFeature fineFeature = computeFeatureFromSignal
-                (m_others[channel-1], fineHz);
-	    double fineScore = featureDistance(fineFeature);
-
-	    cerr << "fine offset = " << offset << ", cents = " << fineCents
-		 << ", Hz = " << fineHz << ", score " << fineScore
-		 << " (best score so far " << bestScore << ")" << endl;
-	    
-	    if ((fineScore < bestScore) || firstScore) {
-		cerr << "is good!" << endl;
-		bestScore = fineScore;
-		bestCents = fineCents;
-		bestHz = fineHz;
-                firstScore = false;
-	    } else {
-		break;
-	    }
-	}
-    }
-
-    //!!! could keep a vector of scores & then interpolate...
-    
-    return pair<int, double>(bestCents, bestHz);
-}
-
 BulkTuningDifference::FeatureSet
 BulkTuningDifference::getRemainingFeatures()
 {
     FeatureSet fs;
     if (m_frameCount == 0) return fs;
 
-    m_refFeature = computeFeatureFromTotals(m_refTotals);
+    m_refFeatures[0] = computeFeatureFromTotals(m_refTotals);
 
     Feature f;
     f.hasTimestamp = true;
@@ -558,39 +460,40 @@
 BulkTuningDifference::getRemainingFeaturesForChannel(int channel,
                                                      FeatureSet &fs)
 {
-    TFeature otherFeature = computeFeatureFromSignal
-        (m_others[channel-1], 440.);
+    TFeature otherFeature =
+        computeFeatureFromTotals(m_otherTotals[channel-1]);
 
     Feature f;
     f.hasTimestamp = true;
     f.timestamp = Vamp::RealTime::zeroTime;
 
     f.values.clear();
-    for (auto v: m_refFeature) f.values.push_back(float(v));
+    for (auto v: m_refFeatures[0]) f.values.push_back(float(v));
     fs[m_outputs["reffeature"]].push_back(f);
 
     f.values.clear();
     for (auto v: otherFeature) f.values.push_back(float(v));
     fs[m_outputs["otherfeature"]].push_back(f); 
    
-    int rotation = findBestRotation(otherFeature);
+    int rotation = findBestRotation(m_refFeatures[0], otherFeature);
 
     int coarseCents = -(rotation * 1200) / m_bpo;
 
     cerr << "channel " << channel << ": rotation " << rotation << " -> cents " << coarseCents << endl;
 
-    TFeature coarseFeature = otherFeature;
+    TFeature rotatedFeature = otherFeature;
     if (rotation != 0) {
-        rotateFeature(coarseFeature, rotation);
+        rotateFeature(rotatedFeature, rotation);
     }
 
     f.values.clear();
-    for (auto v: coarseFeature) f.values.push_back(float(v));
+    for (auto v: rotatedFeature) f.values.push_back(float(v));
     fs[m_outputs["rotfeature"]].push_back(f);
 
     if (m_fineTuning) {
     
-        pair<int, double> fine = findFineFrequency(channel, coarseCents);
+        pair<int, double> fine = findFineFrequency(rotatedFeature, coarseCents);
+
         int fineCents = fine.first;
         double fineHz = fine.second;
 
@@ -607,3 +510,118 @@
     }        
 }
 
+void
+BulkTuningDifference::rotateFeature(TFeature &r, int rotation) const
+{
+    if (rotation < 0) {
+        rotate(r.begin(), r.begin() - rotation, r.end());
+    } else {
+        rotate(r.begin(), r.end() - rotation, r.end());
+    }
+}
+
+double
+BulkTuningDifference::featureDistance(const TFeature &ref,
+                                      const TFeature &other,
+                                      int rotation) const
+{
+    if (rotation == 0) {
+	return distance(ref, other);
+    } else {
+	// A positive rotation pushes the tuning frequency up for this
+	// chroma, negative one pulls it down. If a positive rotation
+	// makes this chroma match an un-rotated reference, then this
+	// chroma must have initially been lower than the reference.
+	TFeature r(other);
+        rotateFeature(r, rotation);
+	return distance(ref, r);
+    }
+}
+
+int
+BulkTuningDifference::findBestRotation(const TFeature &ref,
+                                       const TFeature &other) const
+{
+    map<double, int> dists;
+
+    int maxRotation = (m_bpo * m_maxSemis) / 12;
+
+    for (int r = -maxRotation; r <= maxRotation; ++r) {
+	double dist = featureDistance(ref, other, r);
+	dists[dist] = r;
+    }
+
+    int best = dists.begin()->second;
+
+    return best;
+}
+
+pair<int, double>
+BulkTuningDifference::findFineFrequency(const TFeature &rotatedOtherFeature,
+                                        int coarseCents)
+{
+    int coarseResolution = 1200 / m_bpo;
+    int searchDistance = coarseResolution/2 - 1;
+
+    int bestCents = coarseCents;
+    double bestHz = frequencyForCentsAbove440(coarseCents);
+
+    cerr << "findFineFrequency: coarse frequency is " << bestHz << endl;
+    cerr << "searchDistance = " << searchDistance << endl;
+
+    double bestScore = 0;
+    bool firstScore = true;
+    
+    for (int sign = -1; sign <= 1; sign += 2) {
+	for (int offset = (sign < 0 ? 0 : 1);
+             offset <= searchDistance;
+             ++offset) {
+
+	    int fineCents = coarseCents + sign * offset;
+	    double fineHz = frequencyForCentsAbove440(fineCents);
+
+	    cerr << "trying with fineCents = " << fineCents << "..." << endl;
+
+            // compare the rotated "other" chroma with a reference
+            // chroma shifted by the offset in the opposite direction
+
+            int compensatingCents = -sign * offset;
+            TFeature compensatedReference;
+
+            if (m_refFeatures.find(compensatingCents) == m_refFeatures.end()) {
+                double compensatingHz = frequencyForCentsAbove440
+                    (compensatingCents);
+            
+                compensatedReference = computeFeatureFromSignal
+                    (m_reference, compensatingHz);
+
+                m_refFeatures[compensatingCents] = compensatedReference;
+
+            } else {
+
+                compensatedReference = m_refFeatures[compensatingCents];
+            }
+
+	    double fineScore = featureDistance(compensatedReference,
+                                               rotatedOtherFeature,
+                                               0); // we are rotated already
+
+	    cerr << "fine offset = " << offset << ", cents = " << fineCents
+		 << ", Hz = " << fineHz << ", score " << fineScore
+		 << " (best score so far " << bestScore << ")" << endl;
+	    
+	    if ((fineScore < bestScore) || firstScore) {
+		cerr << "is good!" << endl;
+		bestScore = fineScore;
+		bestCents = fineCents;
+		bestHz = fineHz;
+                firstScore = false;
+	    } else {
+		break;
+	    }
+	}
+    }
+    
+    return pair<int, double>(bestCents, bestHz);
+}
+
--- a/src/BulkTuningDifference.h	Wed Jul 10 14:00:51 2019 +0100
+++ b/src/BulkTuningDifference.h	Wed Jul 10 18:52:36 2019 +0100
@@ -65,23 +65,28 @@
 
     int m_channelCount;
     int m_bpo;
-    std::unique_ptr<Chromagram> m_refChroma;
-    TFeature m_refTotals;
-    TFeature m_refFeature;
-    std::vector<Signal> m_others;
     int m_blockSize;
     int m_frameCount;
     float m_maxDuration;
     int m_maxSemis;
     bool m_fineTuning;
 
+    std::unique_ptr<Chromagram> m_refChroma;
+    TFeature m_refTotals;
+    std::map<int, TFeature> m_refFeatures; // map from cents-offset to feature
+    Signal m_reference; // we have to retain this when fine-tuning is enabled
+    std::vector<std::shared_ptr<Chromagram>> m_otherChroma;
+    std::vector<TFeature> m_otherTotals;
+
     Chromagram::Parameters paramsForTuningFrequency(double hz) const;
     TFeature computeFeatureFromTotals(const TFeature &totals) const;
     TFeature computeFeatureFromSignal(const Signal &signal, double hz) const;
     void rotateFeature(TFeature &feature, int rotation) const;
-    double featureDistance(const TFeature &other, int rotation = 0) const;
-    int findBestRotation(const TFeature &other) const;
-    std::pair<int, double> findFineFrequency(int channel, int coarseCents);
+    double featureDistance(const TFeature &ref, const TFeature &other,
+                           int rotation) const;
+    int findBestRotation(const TFeature &ref, const TFeature &other) const;
+    std::pair<int, double> findFineFrequency(const TFeature &rotated,
+                                             int coarseCents);
     void getRemainingFeaturesForChannel(int channel, FeatureSet &fs);
 
     mutable std::map<string, int> m_outputs;
--- a/test/expected/input_vamp_tuning-difference_bulk-tuning-difference_cents.csv	Wed Jul 10 14:00:51 2019 +0100
+++ b/test/expected/input_vamp_tuning-difference_bulk-tuning-difference_cents.csv	Wed Jul 10 18:52:36 2019 +0100
@@ -1,2 +1,2 @@
 0.000000000,-230
-0.000000000,-234
+0.000000000,-233
--- a/test/expected/input_vamp_tuning-difference_bulk-tuning-difference_tuningfreq.csv	Wed Jul 10 14:00:51 2019 +0100
+++ b/test/expected/input_vamp_tuning-difference_bulk-tuning-difference_tuningfreq.csv	Wed Jul 10 18:52:36 2019 +0100
@@ -1,2 +1,2 @@
 0.000000000,385.261
-0.000000000,384.372
+0.000000000,384.594
--- a/test/expected/input_vamp_tuning-difference_tuning-difference_cents.csv	Wed Jul 10 14:00:51 2019 +0100
+++ b/test/expected/input_vamp_tuning-difference_tuning-difference_cents.csv	Wed Jul 10 18:52:36 2019 +0100
@@ -1,2 +1,2 @@
 0.000000000,-230
-0.000000000,-234
+0.000000000,-233
--- a/test/expected/input_vamp_tuning-difference_tuning-difference_tuningfreq.csv	Wed Jul 10 14:00:51 2019 +0100
+++ b/test/expected/input_vamp_tuning-difference_tuning-difference_tuningfreq.csv	Wed Jul 10 18:52:36 2019 +0100
@@ -1,2 +1,2 @@
 0.000000000,385.261
-0.000000000,384.372
+0.000000000,384.594
--- a/test/regression.sh	Wed Jul 10 14:00:51 2019 +0100
+++ b/test/regression.sh	Wed Jul 10 18:52:36 2019 +0100
@@ -42,6 +42,8 @@
 
 rubberband -p -2.34 "$wavfile" "$lowfile"
 
+mkdir -p "$mydir/output"
+
 VAMP_PATH="$mydir/.." \
          time \
 	 sonic-annotator \