Mercurial > hg > tuning-difference

--- a/chroma-compare-plugin/Makefile.inc	Wed Feb 04 10:15:57 2015 +0000
+++ b/chroma-compare-plugin/Makefile.inc	Wed Feb 04 15:10:07 2015 +0000
@@ -1,27 +1,3 @@
-
-##  Makefile for Vamp plugin builds using command-line tools.
-##
-##  This file defines all of the system-independent information about
-##  your project: the list of source files, plugin library name, etc.
-##  Edit this file to make sure it has all the right information.
-##
-##  This file does not define the system-specific stuff such as which
-##  compiler to use -- that goes into Makefile.osx, Makefile.mingw32,
-##  Makefile.linux etc.  Those files all include this file, so that
-##  they all have a consistent set of project data.
-##
-##  To build the plugin project, type
-##
-##    $ gmake -f Makefile.osx
-##
-##  or whatever the equivalent filename suffix is for your platform.
-##
-##  This requires GNU make, which is what you get with OS/X, Linux, or
-##  MinGW/Cygwin on Windows.
-##
-##  (For Windows builds using MS Visual Studio, start instead with the
-##  VampExamplePlugins project found in the build directory of the SDK.)
-

 # Edit this to the base name of your plugin library
 #
@@ -41,9 +17,9 @@
 SRC_DIR		:= .

 CFLAGS		:= $(ARCHFLAGS) $(CFLAGS)
-CXXFLAGS	:= $(CFLAGS) -I. -I$(VAMPSDK_DIR) $(CXXFLAGS)
+CXXFLAGS	:= $(CFLAGS) -I. -I$(VAMPSDK_DIR) -I../../constant-q-cpp $(CXXFLAGS)

-LDFLAGS		:= $(ARCHFLAGS) $(LDFLAGS)
+LDFLAGS		:= $(ARCHFLAGS) $(LDFLAGS) -L../../constant-q-cpp -lcq
 PLUGIN_LDFLAGS	:= $(LDFLAGS) $(PLUGIN_LDFLAGS)

 # Defaults, overridden from the platform-specific Makefile
--- a/chroma-compare-plugin/Makefile.linux	Wed Feb 04 10:15:57 2015 +0000
+++ b/chroma-compare-plugin/Makefile.linux	Wed Feb 04 15:10:07 2015 +0000
@@ -1,25 +1,16 @@
-
-##  Makefile for Vamp plugin using GNU tools on Linux.
-##
-##  Edit this to adjust compiler and library settings when
-##  building for Linux.
-##
-##  Note that the list of source files, etc, goes in Makefile.inc
-##  instead so that it can be included by all platform Makefiles.
-

 # For a debug build...

-CFLAGS		:= -Wall -Wextra -g -fPIC
+CXXFLAGS	:= -Wall -Wextra -Werror -g -fPIC -std=c++11

 # ... or for a release build

-#CFLAGS		:= -Wall -Wextra -O3 -msse -msse2 -mfpmath=sse -ftree-vectorize -fPIC
+#CXXFLAGS	:= -Wall -Wextra -Werror -O3 -msse -msse2 -mfpmath=sse -ftree-vectorize -fPIC -std=c++11


 # Location of Vamp plugin SDK relative to the project directory

-VAMPSDK_DIR	:= ../vamp-plugin-sdk
+VAMPSDK_DIR	:= ../../vamp-plugin-sdk


 # Libraries and linker flags required by plugin: add any -l<library>
--- a/chroma-compare-plugin/TuningDifference.cpp	Wed Feb 04 10:15:57 2015 +0000
+++ b/chroma-compare-plugin/TuningDifference.cpp	Wed Feb 04 15:10:07 2015 +0000
@@ -6,14 +6,28 @@
 #include <cmath>
 #include <cstdio>

-using std::cerr;
-using std::endl;
+#include <algorithm>

-static double targetFmin = 60.0;
-static double targetFmax = 1500.0;
+using namespace std;
+
+static double pitchToFrequency(int pitch,
+			       double centsOffset = 0.,
+			       double concertA = 440.)
+{
+    double p = double(pitch) + (centsOffset / 100.);
+    return concertA * pow(2.0, (p - 69.0) / 12.0);
+}
+
+static double frequencyForCentsAbove440(double cents)
+{
+    return pitchToFrequency(69, cents, 440.);
+}

 TuningDifference::TuningDifference(float inputSampleRate) :
-    Plugin(inputSampleRate)
+    Plugin(inputSampleRate),
+    m_bpo(60),
+    m_refCQ(new CQSpectrogram(paramsForTuningFrequency(440.),
+			      CQSpectrogram::InterpolateHold))
 {
 }

@@ -68,13 +82,13 @@
 TuningDifference::InputDomain
 TuningDifference::getInputDomain() const
 {
-    return FrequencyDomain;
+    return TimeDomain;
 }

 size_t
 TuningDifference::getPreferredBlockSize() const
 {
-    return 16384;
+    return 0;
 }

 size_t
@@ -147,6 +161,7 @@
     d.isQuantized = false;
     d.sampleType = OutputDescriptor::VariableSampleRate;
     d.hasDuration = false;
+    m_outputs[d.identifier] = list.size();
     list.push_back(d);

     d.identifier = "tuningfreq";
@@ -159,36 +174,49 @@
     d.isQuantized = false;
     d.sampleType = OutputDescriptor::VariableSampleRate;
     d.hasDuration = false;
+    m_outputs[d.identifier] = list.size();
     list.push_back(d);

-    d.identifier = "curve";
-    d.name = "Shift Correlation Curve";
-    d.description = "Correlation between shifted and unshifted sources, for each probed shift in cents.";
+    d.identifier = "reffeature";
+    d.name = "Reference Feature";
+    d.description = "Chroma feature from reference audio.";
     d.unit = "";
     d.hasFixedBinCount = true;
-    d.binCount = 1;
-    d.hasKnownExtents = false;
-    d.isQuantized = false;
-    d.sampleType = OutputDescriptor::FixedSampleRate;
-    d.sampleRate = 100;
-    d.hasDuration = false;
-    list.push_back(d);
-
-    int targetBinMin = int(floor(targetFmin * m_blockSize / m_inputSampleRate));
-    int targetBinMax = int(ceil(targetFmax * m_blockSize / m_inputSampleRate));
-    cerr << "target bin range: " << targetBinMin << " -> " << targetBinMax << endl;
-
-    d.identifier = "averages";
-    d.name = "Spectrum averages";
-    d.description = "Average magnitude spectrum for each channel.";
-    d.unit = "";
-    d.hasFixedBinCount = true;
-    d.binCount = (targetBinMax > targetBinMin ? targetBinMax - targetBinMin + 1 : 100);
+    d.binCount = m_bpo;
     d.hasKnownExtents = false;
     d.isQuantized = false;
     d.sampleType = OutputDescriptor::FixedSampleRate;
     d.sampleRate = 1;
     d.hasDuration = false;
+    m_outputs[d.identifier] = list.size();
+    list.push_back(d);
+
+    d.identifier = "otherfeature";
+    d.name = "Other Feature";
+    d.description = "Chroma feature from other audio, before rotation.";
+    d.unit = "";
+    d.hasFixedBinCount = true;
+    d.binCount = m_bpo;
+    d.hasKnownExtents = false;
+    d.isQuantized = false;
+    d.sampleType = OutputDescriptor::FixedSampleRate;
+    d.sampleRate = 1;
+    d.hasDuration = false;
+    m_outputs[d.identifier] = list.size();
+    list.push_back(d);
+
+    d.identifier = "rotfeature";
+    d.name = "Other Feature at Rotated Frequency";
+    d.description = "Chroma feature from reference audio calculated with the tuning frequency obtained from rotation matching.";
+    d.unit = "";
+    d.hasFixedBinCount = true;
+    d.binCount = m_bpo;
+    d.hasKnownExtents = false;
+    d.isQuantized = false;
+    d.sampleType = OutputDescriptor::FixedSampleRate;
+    d.sampleRate = 1;
+    d.hasDuration = false;
+    m_outputs[d.identifier] = list.size();
     list.push_back(d);

     return list;
@@ -200,8 +228,7 @@
     if (channels < getMinChannelCount() ||
 	channels > getMaxChannelCount()) return false;

-    if (blockSize != getPreferredBlockSize() ||
-	stepSize != blockSize/2) return false;
+    if (stepSize != blockSize) return false;

     m_blockSize = blockSize;

@@ -213,141 +240,170 @@
 void
 TuningDifference::reset()
 {
-    m_sum[0].clear();
-    m_sum[1].clear();
-    m_frameCount = 0;
+    if (m_frameCount > 0) {
+	m_refCQ.reset(new CQSpectrogram(paramsForTuningFrequency(440.),
+					CQSpectrogram::InterpolateHold));
+	m_frameCount = 0;
+    }
+    m_refTotals = Chroma(m_refCQ->getTotalBins(), 0.0);
+    m_other.clear();
+}
+
+template<typename T>
+void addTo(vector<T> &a, const vector<T> &b)
+{
+    transform(a.begin(), a.end(), b.begin(), a.begin(), plus<T>());
+}
+
+template<typename T>
+T distance(const vector<T> &a, const vector<T> &b)
+{
+    return inner_product(a.begin(), a.end(), b.begin(), T(),
+			 plus<T>(), [](T x, T y) { return fabs(x - y); });
+}
+
+TuningDifference::TFeature
+TuningDifference::computeFeatureFromTotals(const Chroma &totals) const
+{
+    TFeature feature(m_bpo);
+    double sum = 0.0;
+
+    for (int i = 0; i < (int)totals.size(); ++i) {
+	double value = totals[i] / m_frameCount;
+	feature[i % m_bpo] += value;
+	sum += value;
+    }
+
+    for (int i = 0; i < m_bpo; ++i) {
+	feature[i] /= sum;
+    }
+
+    cerr << "computeFeatureFromTotals: feature values:" << endl;
+    for (auto v: feature) cerr << v << " ";
+    cerr << endl;
+
+    return feature;
+}
+
+CQParameters
+TuningDifference::paramsForTuningFrequency(double hz) const
+{
+    return CQParameters(m_inputSampleRate,
+			pitchToFrequency(36, hz),
+			pitchToFrequency(96, hz),
+			m_bpo);
+}
+
+TuningDifference::TFeature
+TuningDifference::computeFeatureFromSignal(const Signal &signal, double hz) const
+{
+    CQSpectrogram cq(paramsForTuningFrequency(hz),
+		     CQSpectrogram::InterpolateHold);
+
+    Chroma totals(m_refCQ->getTotalBins(), 0.0);
+
+    for (int i = 0; i < m_frameCount; ++i) {
+	Signal::const_iterator first = signal.begin() + i * m_blockSize;
+	Signal::const_iterator last = first + m_blockSize;
+	if (last > signal.end()) last = signal.end();
+	CQSpectrogram::RealSequence input(first, last);
+	input.resize(m_blockSize);
+	CQSpectrogram::RealBlock block = cq.process(input);
+	for (const auto &v: block) addTo(totals, v);
+    }
+
+    return computeFeatureFromTotals(totals);
 }

 TuningDifference::FeatureSet
-TuningDifference::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
+TuningDifference::process(const float *const *inputBuffers, Vamp::RealTime)
 {
-    for (int c = 0; c < 2; ++c) {
-	if (m_sum[c].size() == 0) {
-	    m_sum[c].resize(m_blockSize/2 + 1, 0.0);
-	}
-	for (int i = 0; i <= m_blockSize/2; ++i) {
-	    double energy =
-		inputBuffers[c][i*2  ] * inputBuffers[c][i*2  ] +
-		inputBuffers[c][i*2+1] * inputBuffers[c][i*2+1];
-	    double mag = sqrt(energy);
-	    m_sum[c][i] += mag;
-	    m_sum[c][i/2] += mag;
-	}
-    }
+    CQSpectrogram::RealBlock block;
+    CQSpectrogram::RealSequence input;
+
+    input = CQSpectrogram::RealSequence
+	(inputBuffers[0], inputBuffers[0] + m_blockSize);
+    block = m_refCQ->process(input);
+    for (const auto &v: block) addTo(m_refTotals, v);
+
+    m_other.insert(m_other.end(),
+		   inputBuffers[1], inputBuffers[1] + m_blockSize);

     ++m_frameCount;
     return FeatureSet();
 }

+double
+TuningDifference::featureDistance(const TFeature &other, int rotation) const
+{
+    if (rotation == 0) {
+	return distance(m_refFeature, other);
+    } else {
+	TFeature r(other);
+	if (rotation > 0) {
+	    rotate(r.begin(), r.begin() + rotation, r.end());
+	} else {
+	    rotate(r.begin(), r.end() + rotation, r.end());
+	}
+	return distance(m_refFeature, r);
+    }
+}
+
+int
+TuningDifference::findBestRotation(const TFeature &other) const
+{
+    map<double, int> dists;
+
+    int maxSemis = 6;
+    int maxRotation = (m_bpo * maxSemis) / 12;
+
+    for (int r = -maxRotation; r <= maxRotation; ++r) {
+	double dist = featureDistance(other, r);
+	dists[dist] = r;
+	cerr << "rotation " << r << ": score " << dist << endl;
+    }
+
+    int best = dists.begin()->second;
+
+    cerr << "best is " << best << endl;
+    return best;
+}
+
 TuningDifference::FeatureSet
 TuningDifference::getRemainingFeatures()
 {
-    int n = m_sum[0].size();
-    if (n == 0) return FeatureSet();
+    FeatureSet fs;
+    if (m_frameCount == 0) return fs;
+
+    m_refFeature = computeFeatureFromTotals(m_refTotals);
+    TFeature otherFeature = computeFeatureFromSignal(m_other, 440.);

     Feature f;
-    FeatureSet fs;
-
-    int maxshift = 2400; // cents
-
-    int bestshift = -1;
-    double bestdist = 0;
-
-    for (int i = 0; i < n; ++i) {
-	m_sum[0][i] /= m_frameCount;
-	m_sum[1][i] /= m_frameCount;
-    }
-
-    for (int c = 0; c < 2; ++c) {
-	double tot = 0.0;
-	for (int i = 0; i < n; ++i) {
-	    tot += m_sum[c][i];
-	}
-	if (tot != 0.0) {
-	    for (int i = 0; i < n; ++i) {
-		m_sum[c][i] /= tot;
-	    }
-	}
-    }
-
-    int targetBinMin = int(floor(targetFmin * m_blockSize / m_inputSampleRate));
-    int targetBinMax = int(ceil(targetFmax * m_blockSize / m_inputSampleRate));
-    cerr << "target bin range: " << targetBinMin << " -> " << targetBinMax << endl;
-
-    f.values.clear();
-    for (int i = targetBinMin; i < targetBinMax; ++i) {
-	f.values.push_back(m_sum[0][i]);
-    }
-    fs[3].push_back(f);
-    f.values.clear();
-    for (int i = targetBinMin; i < targetBinMax; ++i) {
-	f.values.push_back(m_sum[1][i]);
-    }
-    fs[3].push_back(f);

     f.values.clear();
-
-    for (int shift = -maxshift; shift <= maxshift; ++shift) {
-
-	double multiplier = pow(2.0, double(shift) / 1200.0);
-	double dist = 0.0;
-
-//	cerr << "shift = " << shift << ", multiplier = " << multiplier << endl;
-
-	int contributing = 0;
-
-	for (int i = targetBinMin; i < targetBinMax; ++i) {
-
-	    double source = i / multiplier;
-	    int s0 = int(source), s1 = s0 + 1;
-	    double p1 = source - s0;
-	    double p0 = 1.0 - p1;
-
-	    double value = 0.0;
-	    if (s0 >= 0 && s0 < n) {
-		value += p0 * m_sum[1][s0];
-		++contributing;
-	    }
-	    if (s1 >= 0 && s1 < n) {
-		value += p1 * m_sum[1][s1];
-		++contributing;
-	    }
-
-//	    if (shift == -1) {
-//		cerr << "for multiplier " << multiplier << ", target " << i << ", source " << source << ", value " << p0 << " * " << m_sum[1][s0] << " + " << p1 << " * " << m_sum[1][s1] << " = " << value << ", other " << m_sum[0][i] << endl;
-//	    }
-
-	    double diff = fabs(m_sum[0][i] - value);
-	    dist += diff;
-	}
-
-	dist /= contributing;
-
-	f.values.clear();
-	f.values.push_back(dist);
-	char label[100];
-	sprintf(label, "%f at shift %d freq mult %f", dist, shift, multiplier);
-	f.label = label;
-	fs[2].push_back(f);
-
-	if (bestshift == -1 || dist < bestdist) {
-	    bestshift = shift;
-	    bestdist = dist;
-	}
-    }
-
-    f.timestamp = Vamp::RealTime::zeroTime;
-    f.hasTimestamp = true;
-    f.label = "";
+    for (auto v: m_refFeature) f.values.push_back(v);
+    fs[m_outputs["reffeature"]].push_back(f);

     f.values.clear();
-//    cerr << "best dist = " << bestdist << " at shift " << bestshift << endl;
-    f.values.push_back(-bestshift);
-    fs[0].push_back(f);
+    for (auto v: otherFeature) f.values.push_back(v);
+    fs[m_outputs["otherfeature"]].push_back(f);
+
+    int rotation = findBestRotation(otherFeature);
+
+    int coarseCents = -(rotation * 100) / (m_bpo / 12);
+
+    cerr << "rotation " << rotation << " -> cents " << coarseCents << endl;
+
+    double coarseHz = frequencyForCentsAbove440(coarseCents);
+
+    TFeature coarseFeature = computeFeatureFromSignal(m_other, coarseHz);
+    double coarseScore = featureDistance(coarseFeature);
+
+    cerr << "corresponding Hz " << coarseHz << " scores " << coarseScore << endl;

     f.values.clear();
-    f.values.push_back(440.0 / pow(2.0, double(bestshift) / 1200.0));
-    fs[1].push_back(f);
+    for (auto v: coarseFeature) f.values.push_back(v);
+    fs[m_outputs["rotfeature"]].push_back(f);

     return fs;
 }
--- a/chroma-compare-plugin/TuningDifference.h	Wed Feb 04 10:15:57 2015 +0000
+++ b/chroma-compare-plugin/TuningDifference.h	Wed Feb 04 15:10:07 2015 +0000
@@ -3,6 +3,10 @@

 #include <vamp-sdk/Plugin.h>

+#include <cq/CQSpectrogram.h>
+
+#include <memory>
+
 using std::string;
 using std::vector;

@@ -44,9 +48,25 @@
     FeatureSet getRemainingFeatures();

 protected:
+    typedef vector<float> Signal;
+    typedef vector<double> Chroma;
+    typedef vector<double> TFeature;
+
+    int m_bpo;
+    std::unique_ptr<CQSpectrogram> m_refCQ;
+    Chroma m_refTotals;
+    TFeature m_refFeature;
+    Signal m_other;
     int m_blockSize;
-    vector<double> m_sum[2];
     int m_frameCount;
+
+    CQParameters paramsForTuningFrequency(double hz) const;
+    TFeature computeFeatureFromTotals(const Chroma &totals) const;
+    TFeature computeFeatureFromSignal(const Signal &signal, double hz) const;
+    double featureDistance(const TFeature &other, int rotation = 0) const;
+    int findBestRotation(const TFeature &other) const;
+
+    mutable std::map<string, int> m_outputs;
 };