changeset 147:1060a19e2334

Report validity of CQKernel construction, and avoid NaN values for invalid parameters. Also documentation.
author Chris Cannam <c.cannam@qmul.ac.uk>
date Thu, 10 Jul 2014 12:19:39 +0100
parents 0cbd9c904dbb
children b1db858e0541
files .hgignore Makefile.inc Makefile.linux cq/CQBase.h cq/CQInverse.h cq/CQKernel.h cq/CQSpectrogram.h cq/ConstantQ.h src/CQKernel.cpp src/CQSpectrogram.cpp src/ConstantQ.cpp vamp/CQChromaVamp.cpp vamp/CQVamp.cpp
diffstat 13 files changed, 213 insertions(+), 48 deletions(-) [+]
line wrap: on
line diff
--- a/.hgignore	Fri Jun 20 10:33:11 2014 +0100
+++ b/.hgignore	Thu Jul 10 12:19:39 2014 +0100
@@ -6,6 +6,7 @@
 *.dylib
 *.dll
 *.class
+doc/
 
 syntax: re
 ^test/Test[^.]*$
--- a/Makefile.inc	Fri Jun 20 10:33:11 2014 +0100
+++ b/Makefile.inc	Thu Jul 10 12:19:39 2014 +0100
@@ -11,7 +11,7 @@
 
 CXX	?= g++
 CC	?= gcc
-VALGRIND	?= valgrind -q
+#VALGRIND	?= valgrind -q
 
 GENERAL_FLAGS	:= -I. -I$(VAMPSDK_DIR) -I$(INC_DIR) -I$(LIB_DIR) -I$(KFFT_DIR) -I$(KFFT_DIR)/tools -Dkiss_fft_scalar=double
 
--- a/Makefile.linux	Fri Jun 20 10:33:11 2014 +0100
+++ b/Makefile.linux	Thu Jul 10 12:19:39 2014 +0100
@@ -1,5 +1,5 @@
 
-CFLAGS := -Wall -O3 -fPIC -I../vamp-plugin-sdk/
+CFLAGS := -Wall -O3 -ffast-math -msse -msse2 -mfpmath=sse -fPIC -I../vamp-plugin-sdk/
 #CFLAGS := -g -fPIC -I../vamp-plugin-sdk
 
 CXXFLAGS := $(CFLAGS)
--- a/cq/CQBase.h	Fri Jun 20 10:33:11 2014 +0100
+++ b/cq/CQBase.h	Thu Jul 10 12:19:39 2014 +0100
@@ -35,25 +35,98 @@
 #include <vector>
 #include <complex>
 
-class CQBase // interface class
+/**
+ * Interface class for Constant-Q implementations, containing common
+ * type declarations and means to query configuration parameters.
+ */
+class CQBase
 {
 public:
+    /// A single complex-valued sample.
     typedef std::complex<double> Complex;
+
+    /// A series of real-valued samples ordered in time.
     typedef std::vector<double> RealSequence;
+
+    /// A series of real-valued samples ordered by bin (frequency or similar).
     typedef std::vector<double> RealColumn;
+
+    /// A series of complex-valued samples ordered in time.
     typedef std::vector<Complex> ComplexSequence;
+
+    /// A series of complex-valued samples ordered by bin (frequency or similar).
     typedef std::vector<Complex> ComplexColumn;
+
+    /// A matrix of real-valued samples, indexed by time then bin number.
     typedef std::vector<RealColumn> RealBlock;
+
+    /// A matrix of complex-valued samples, indexed by time then bin number.
     typedef std::vector<ComplexColumn> ComplexBlock;
 
+    /**
+     * Return true if the Constant-Q implementation was successfully
+     * constructed, with a valid set of initialisation parameters.
+     */
+    virtual bool isValid() const = 0;
+    
+    /**
+     * Return the sample rate used when constructing the specific
+     * Constant-Q implementation.
+     */
     virtual double getSampleRate() const = 0;
+
+    /**
+     * Return the number of bins per octave specified when
+     * constructing the Constant-Q implementation.
+     */
     virtual int getBinsPerOctave() const = 0;
+
+    /**
+     * Return the number of octaves spanned by the Constant-Q
+     * transform.
+     */
     virtual int getOctaves() const = 0; 
+
+    /**
+     * Return the total number of bins in each Constant-Q column
+     * (i.e. bins-per-octave times number of octaves).
+     */
     virtual int getTotalBins() const = 0;
+
+    /**
+     * Return the spacing, in samples at the sample rate returned from
+     * getSampleRate(), between one column and the next.
+     */
     virtual int getColumnHop() const = 0;
+
+    /**
+     * Return the latency of Constant-Q calculation, in samples at the
+     * sample rate returned from getSampleRate().
+     */
     virtual int getLatency() const = 0;
+
+    /**
+     * Return the maximum frequency of the Constant-Q output, i.e. the
+     * frequency of the highest bin in the output. This will normally
+     * be the same as the maximum frequency passed to the constructor
+     * of the specific Constant-Q implementation.
+     */
     virtual double getMaxFrequency() const = 0;
-    virtual double getMinFrequency() const = 0; // actual min, not that passed to ctor
+
+    /**
+     * Return the minimum frequency of the Constant-Q output, i.e. the
+     * frequency of the lowest bin in the output. This is derived from
+     * the maximum frequency and octave count, and is not necessarily
+     * the same as any minimum frequency requested when constructing
+     * the Constant-Q implementation.
+     */
+    virtual double getMinFrequency() const = 0;
+
+    /**
+     * Return the frequency of a given bin in the Constant-Q
+     * output. This actually maps a continuous "bin scale" value to 
+     * frequency: the bin parameter does not have to be an integer.
+     */
     virtual double getBinFrequency(double bin) const = 0;
 };
 
--- a/cq/CQInverse.h	Fri Jun 20 10:33:11 2014 +0100
+++ b/cq/CQInverse.h	Thu Jul 10 12:19:39 2014 +0100
@@ -38,12 +38,32 @@
 class Resampler;
 class FFTReal;
 
+/**
+ * Calculate an inverse constant-Q transform. The input must be the
+ * same representation as returned as output of a \ref ConstantQ
+ * object with the same parameters. The output is a time-domain
+ * signal.
+ *
+ * Note that you cannot perform an inverse transform from the
+ * magnitude-only output of \ref CQSpectrogram; you need the complex
+ * valued data from \ref ConstantQ.
+ *
+ * Our implementation of the Constant-Q transform is not exactly
+ * invertible, and this produces only an approximation of the original
+ * signal (see publications for details).
+ */
 class CQInverse : public CQBase
 {
 public:
+    /**
+     * Construct an inverse Constant-Q transform object using the
+     * given transform parameters.
+     */
     CQInverse(CQParameters params);
     virtual ~CQInverse();
 
+    // CQBase methods, see CQBase.h for documentation
+    virtual bool isValid() const { return m_kernel && m_kernel->isValid(); }
     virtual double getSampleRate() const { return m_sampleRate; }
     virtual int getBinsPerOctave() const { return m_binsPerOctave; }
     virtual int getOctaves() const { return m_octaves; }
@@ -54,11 +74,18 @@
     virtual double getMinFrequency() const; // actual min, not that passed to ctor
     virtual double getBinFrequency(double bin) const;
 
-    // Input is the format produced by ConstantQ class,
-    // i.e. uninterpolated complex, not the real-valued stuff produced
-    // by CQSpectrogram
+    /**
+     * Given a series of constant-Q columns in the form produced by
+     * the \ref ConstantQ class, return a series of time-domain
+     * samples resulting from approximately inverting the constant-Q
+     * transform.
+     */
+    RealSequence process(const ComplexBlock &);
 
-    RealSequence process(const ComplexBlock &);
+    /**
+     * Return the remaining time-domain samples following the end of
+     * processing.
+     */
     RealSequence getRemainingOutput();
 
 private:
--- a/cq/CQKernel.h	Fri Jun 20 10:33:11 2014 +0100
+++ b/cq/CQKernel.h	Thu Jul 10 12:19:39 2014 +0100
@@ -44,6 +44,8 @@
 public:
     CQKernel(CQParameters params);
     ~CQKernel();
+
+    bool isValid() const { return m_valid; }
     
     struct Properties {
         double sampleRate;
@@ -70,6 +72,7 @@
 private:
     const CQParameters m_inparams;
     Properties m_p;
+    bool m_valid;
     FFT *m_fft;
 
     struct KernelMatrix {
@@ -79,7 +82,7 @@
     KernelMatrix m_kernel;
 
     std::vector<double> makeWindow(int len) const;
-    void generateKernel();
+    bool generateKernel();
     void finaliseKernel();
 };
 
--- a/cq/CQSpectrogram.h	Fri Jun 20 10:33:11 2014 +0100
+++ b/cq/CQSpectrogram.h	Thu Jul 10 12:19:39 2014 +0100
@@ -34,18 +34,36 @@
 
 #include "ConstantQ.h"
 
+/**
+ * Calculate a dense constant-Q magnitude spectrogram from time-domain
+ * input. The input of each \ref process call is a single frame of
+ * time-domain samples; the output is a series of fixed-height
+ * columns. See \ref process for details.
+ *
+ * If you need the full complex-valued constant-Q output, you must use
+ * the \ref ConstantQ class instead.
+ */
 class CQSpectrogram : public CQBase
 {
 public:
     enum Interpolation {
-	InterpolateZeros, // leave empty cells as zero
-	InterpolateHold, // repeat prior cell
-	InterpolateLinear, // linear interpolation between consecutive time cells
+        /// leave empty cells as zero
+	InterpolateZeros,
+        /// replace empty cells with a repeat of the previous column
+	InterpolateHold,
+        /// perform linear interpolation between consecutive time cells
+	InterpolateLinear,
     };
 
+    /**
+     * Construct a Constant-Q magnitude spectrogram object using the
+     * given transform parameters.
+     */
     CQSpectrogram(CQParameters params, Interpolation interpolation);
     virtual ~CQSpectrogram();
 
+    // CQBase methods, see CQBase.h for documentation
+    virtual bool isValid() const { return m_cq.isValid(); }
     virtual double getSampleRate() const { return m_cq.getSampleRate(); }
     virtual int getBinsPerOctave() const { return m_cq.getBinsPerOctave(); }
     virtual int getOctaves() const { return m_cq.getOctaves(); }
@@ -62,7 +80,12 @@
      * not fit into a constant-Q processing block) are saved for the
      * next call to process or getRemainingBlocks.
      *
-     * Each column contains a series of constant-Q bin value
+     * The input is assumed to be a single frame of time-domain sample
+     * values, such that consecutive calls to \ref process receive
+     * contiguous frames from the source signal. Each frame may be of
+     * any length in samples.
+     *
+     * Each output column contains a series of constant-Q bin value
      * magnitudes, ordered from highest to lowest frequency.
      *  
      * The columns are all of the same height, but they might not all
--- a/cq/ConstantQ.h	Fri Jun 20 10:33:11 2014 +0100
+++ b/cq/ConstantQ.h	Thu Jul 10 12:19:39 2014 +0100
@@ -41,17 +41,25 @@
 
 /**
  * Calculate a complex sparse constant-Q representation from
- * time-domain input.
+ * time-domain input. The input of each \ref process call is a single
+ * frame of time-domain samples; the output is a series of columns of
+ * varying height. See \ref process for details.
  *
- * For a real (magnitude-only) or interpolated representation, see
+ * For a real (magnitude-only) interpolated dense representation, see
  * CQSpectrogram.
  */
 class ConstantQ : public CQBase
 {
 public:
+    /**
+     * Construct a complex Constant-Q transform object using the given
+     * transform parameters.
+     */
     ConstantQ(CQParameters params);
     virtual ~ConstantQ();
 
+    // CQBase methods, see CQBase.h for documentation
+    virtual bool isValid() const { return m_kernel && m_kernel->isValid(); }
     virtual double getSampleRate() const { return m_sampleRate; }
     virtual int getBinsPerOctave() const { return m_binsPerOctave; }
     virtual int getOctaves() const { return m_octaves; }
@@ -66,20 +74,25 @@
      * Given a series of time-domain samples, return a series of
      * constant-Q columns. Any samples left over (that did not fit
      * into a constant-Q processing block) are saved for the next call
-     * to process or getRemainingBlocks.
+     * to process or getRemainingBlocks. 
      *
-     * Each column contains a series of constant-Q bin values ordered
-     * from highest to lowest frequency.
+     * The input is assumed to be a single frame of time-domain sample
+     * values, such that consecutive calls to \ref process receive
+     * contiguous frames from the source signal. Each frame may be of
+     * any length in samples.
      *
-     * Columns are of variable height: each will contain at least
-     * getBinsPerOctave() values, because the highest-frequency octave
-     * is always present, but a second octave (if requested) will
-     * appear only in alternate columns, a third octave only in every
-     * fourth column, and so on.
+     * Each output column contains a series of constant-Q bin values
+     * ordered from highest to lowest frequency.
+     *
+     * Output columns are of varying height: each will contain at
+     * least getBinsPerOctave() values, because the highest-frequency
+     * octave is always present, but a second octave (if requested)
+     * will appear only in alternate columns, a third octave only in
+     * every fourth column, and so on.
      *
      * If you need a format in which all columns are of equal height
-     * and every bin contains a value, use CQInterpolated instead of
-     * ConstantQ.
+     * and every bin contains a value, use \ref CQSpectrogram instead
+     * of ConstantQ.
      */
     ComplexBlock process(const RealSequence &);
 
--- a/src/CQKernel.cpp	Fri Jun 20 10:33:11 2014 +0100
+++ b/src/CQKernel.cpp	Thu Jul 10 12:19:39 2014 +0100
@@ -52,12 +52,13 @@
 
 CQKernel::CQKernel(CQParameters params) :
     m_inparams(params),
+    m_valid(false),
     m_fft(0)
 {
     m_p.sampleRate = params.sampleRate;
     m_p.maxFrequency = params.maxFrequency;
     m_p.binsPerOctave = params.binsPerOctave;
-    generateKernel();
+    m_valid = generateKernel();
 }
 
 CQKernel::~CQKernel()
@@ -113,7 +114,7 @@
     return win;
 }
 
-void
+bool
 CQKernel::generateKernel()
 {
     double q = m_inparams.q;
@@ -139,7 +140,7 @@
         m_p.atomsPerFrame = 0;
         m_p.lastCentre = 0;
         m_p.fftHop = 0;
-        return;
+        return false;
     }
 
     m_p.atomSpacing = round(minNK * atomHopFactor);
@@ -246,6 +247,7 @@
 #endif
 
     finaliseKernel();
+    return true;
 }
 
 static bool ccomparator(C &c1, C &c2)
@@ -298,11 +300,13 @@
     }
 
     double weight = double(m_p.fftHop) / m_p.fftSize;
-    weight /= MathUtilities::mean(wK.data(), wK.size());
+    if (!wK.empty()) {
+        weight /= MathUtilities::mean(wK.data(), wK.size());
+    }
     weight = sqrt(weight);
 
 #ifdef DEBUG_CQ_KERNEL    
-    cerr << "weight = " << weight << endl;
+    cerr << "weight = " << weight << " (from " << wK.size() << " elements in wK, ncols = " << ncols << ", q = " << q << ")" << endl;
 #endif
 
     // apply normalisation weight, make sparse, and store conjugate
--- a/src/CQSpectrogram.cpp	Fri Jun 20 10:33:11 2014 +0100
+++ b/src/CQSpectrogram.cpp	Thu Jul 10 12:19:39 2014 +0100
@@ -71,6 +71,14 @@
         int height = cq[i].size();
         RealColumn col(height, 0);
         for (int j = 0; j < height; ++j) {
+
+            if (isnan(cq[i][j].real())) {
+                cerr << "WARNING: NaN in real at (" << i << "," << j << ")" << endl;
+            }
+            if (isnan(cq[i][j].imag())) {
+                cerr << "WARNING: NaN in imag at (" << i << "," << j << ")" << endl;
+            }
+
             col[j] = abs(cq[i][j]);
         }
         spec.push_back(col);
--- a/src/ConstantQ.cpp	Fri Jun 20 10:33:11 2014 +0100
+++ b/src/ConstantQ.cpp	Thu Jul 10 12:19:39 2014 +0100
@@ -92,6 +92,10 @@
     m_kernel = new CQKernel(m_inparams);
     m_p = m_kernel->getProperties();
     
+    if (!m_kernel->isValid()) {
+        return;
+    }
+
     // Use exact powers of two for resampling rates. They don't have
     // to be related to our actual samplerate: the resampler only
     // cares about the ratio, but it only accepts integer source and
--- a/vamp/CQChromaVamp.cpp	Fri Jun 20 10:33:11 2014 +0100
+++ b/vamp/CQChromaVamp.cpp	Thu Jul 10 12:19:39 2014 +0100
@@ -223,13 +223,17 @@
     // appropriate number of octaves.
     m_minFrequency = midiPitchLimitFreq / pow(2, m_octaveCount + 1);
 
-    cerr << "lowest octave: " << m_lowestOctave << ", highest octave: "
-         << highestOctave << ", limit midi pitch: " << midiPitchLimit
-         << ", min freq " << m_minFrequency << ", max freq " << m_maxFrequency
-         << endl;
+//    cerr << "lowest octave: " << m_lowestOctave << ", highest octave: "
+//         << highestOctave << ", limit midi pitch: " << midiPitchLimit
+//         << ", min freq " << m_minFrequency << ", max freq " << m_maxFrequency
+//         << endl;
 
-    CQParameters p(m_inputSampleRate, m_minFrequency, m_maxFrequency, m_bpo);
-    m_cq = new CQSpectrogram(p, CQSpectrogram::InterpolateLinear);
+    reset();
+
+    if (!m_cq || !m_cq->isValid()) {
+        cerr << "CQVamp::initialise: Constant-Q parameters not valid! Not initialising" << endl;
+        return false;
+    }
 
     return true;
 }
@@ -237,12 +241,14 @@
 void
 CQChromaVamp::reset()
 {
-    if (m_cq) {
-	delete m_cq;
-        CQParameters p(m_inputSampleRate, m_minFrequency, m_maxFrequency, m_bpo);
-        m_cq = new CQSpectrogram(p, CQSpectrogram::InterpolateLinear);
-    }
+    cerr << "reset: rate " << m_inputSampleRate << ", minf " << m_minFrequency << ", maxf " << m_maxFrequency << ", bpo " << m_bpo << endl;
+
+    delete m_cq;
+    CQParameters p(m_inputSampleRate, m_minFrequency, m_maxFrequency, m_bpo);
+    m_cq = new CQSpectrogram(p, CQSpectrogram::InterpolateLinear);
+
     m_haveStartTime = false;
+    m_startTime = Vamp::RealTime::zeroTime;
     m_columnCount = 0;
 }
 
@@ -344,6 +350,7 @@
         // fold and invert to put low frequencies at the start
 
         int thisHeight = cqout[i].size();
+
 	for (int j = 0; j < thisHeight; ++j) {
 	    column[m_bpo - (j % m_bpo) - 1] += cqout[i][j];
 	}
--- a/vamp/CQVamp.cpp	Fri Jun 20 10:33:11 2014 +0100
+++ b/vamp/CQVamp.cpp	Thu Jul 10 12:19:39 2014 +0100
@@ -286,8 +286,12 @@
             (m_maxMIDIPitch, 0, m_tuningFrequency);
     }
 
-    CQParameters p(m_inputSampleRate, m_minFrequency, m_maxFrequency, m_bpo);
-    m_cq = new CQSpectrogram(p, m_interpolation);
+    reset();
+
+    if (!m_cq || !m_cq->isValid()) {
+        cerr << "CQVamp::initialise: Constant-Q parameters not valid! Not initialising" << endl;
+        return false;
+    }
 
     return true;
 }
@@ -295,11 +299,9 @@
 void
 CQVamp::reset()
 {
-    if (m_cq) {
-	delete m_cq;
-        CQParameters p(m_inputSampleRate, m_minFrequency, m_maxFrequency, m_bpo);
-        m_cq = new CQSpectrogram(p, m_interpolation);
-    }
+    delete m_cq;
+    CQParameters p(m_inputSampleRate, m_minFrequency, m_maxFrequency, m_bpo);
+    m_cq = new CQSpectrogram(p, m_interpolation);
     m_haveStartTime = false;
     m_columnCount = 0;
 }