changeset 280:9c403afdd9e9

* Various fixes related to the bar estimator code
author Chris Cannam <c.cannam@qmul.ac.uk>
date Tue, 10 Feb 2009 16:37:11 +0000
parents c8908cdc8c32
children 33e03341d541
files base/Window.h dsp/onsets/DetectionFunction.cpp dsp/onsets/DetectionFunction.h dsp/phasevocoder/PhaseVocoder.cpp dsp/phasevocoder/PhaseVocoder.h dsp/rateconversion/Decimator.cpp dsp/rateconversion/Decimator.h dsp/tempotracking/DownBeat.cpp dsp/tempotracking/DownBeat.h dsp/tempotracking/TempoTrackV2.cpp dsp/transforms/FFT.cpp maths/MathUtilities.cpp maths/MathUtilities.h qm-dsp.pro
diffstat 14 files changed, 191 insertions(+), 56 deletions(-) [+]
line wrap: on
line diff
--- a/base/Window.h	Tue Feb 10 12:52:43 2009 +0000
+++ b/base/Window.h	Tue Feb 10 16:37:11 2009 +0000
@@ -43,7 +43,7 @@
     virtual ~Window() { delete[] m_cache; }
     
     void cut(T *src) const { cut(src, src); }
-    void cut(T *src, T *dst) const {
+    void cut(const T *src, T *dst) const {
 	for (size_t i = 0; i < m_size; ++i) dst[i] = src[i] * m_cache[i];
     }
 
--- a/dsp/onsets/DetectionFunction.cpp	Tue Feb 10 12:52:43 2009 +0000
+++ b/dsp/onsets/DetectionFunction.cpp	Tue Feb 10 16:37:11 2009 +0000
@@ -83,18 +83,35 @@
     delete m_window;
 }
 
-double DetectionFunction::process( double *TDomain )
+double DetectionFunction::process( const double *TDomain )
 {
     m_window->cut( TDomain, m_DFWindowedFrame );
-	
-    m_phaseVoc->process( m_dataLength, m_DFWindowedFrame, m_magnitude, m_thetaAngle );
+
+    // Our own FFT implementation supports power-of-two sizes only.
+    // If we have to use this implementation (as opposed to the
+    // version of process() below that operates on frequency domain
+    // data directly), we will have to use the next smallest power of
+    // two from the block size.  Results may vary accordingly!
+
+    int actualLength = MathUtilities::previousPowerOfTwo(m_dataLength);
+
+    if (actualLength != m_dataLength) {
+        // Pre-fill mag and phase vectors with zero, as the FFT output
+        // will not fill the arrays
+        for (int i = actualLength/2; i < m_dataLength/2; ++i) {
+            m_magnitude[i] = 0;
+            m_thetaAngle[0] = 0;
+        }
+    }
+
+    m_phaseVoc->process(actualLength, m_DFWindowedFrame, m_magnitude, m_thetaAngle);
 
     if (m_whiten) whiten();
 
     return runDF();
 }
 
-double DetectionFunction::process( double *magnitudes, double *phases )
+double DetectionFunction::process( const double *magnitudes, const double *phases )
 {
     for (size_t i = 0; i < m_halfLength; ++i) {
         m_magnitude[i] = magnitudes[i];
--- a/dsp/onsets/DetectionFunction.h	Tue Feb 10 12:52:43 2009 +0000
+++ b/dsp/onsets/DetectionFunction.h	Tue Feb 10 16:37:11 2009 +0000
@@ -38,8 +38,8 @@
     double* getSpectrumMagnitude();
     DetectionFunction( DFConfig Config );
     virtual ~DetectionFunction();
-    double process( double* TDomain );
-    double process( double* magnitudes, double* phases );
+    double process( const double* TDomain );
+    double process( const double* magnitudes, const double* phases );
 
 private:
     void whiten();
--- a/dsp/phasevocoder/PhaseVocoder.cpp	Tue Feb 10 12:52:43 2009 +0000
+++ b/dsp/phasevocoder/PhaseVocoder.cpp	Tue Feb 10 16:37:11 2009 +0000
@@ -28,24 +28,12 @@
 
 void PhaseVocoder::FFTShift(unsigned int size, double *src)
 {
-    // IN-place Rotation of FFT arrays
-    unsigned int i;
-
-    shiftBuffer = new double[size/2];
-
-    for( i = 0; i < size/2; i++)
-    {
-	shiftBuffer[ i ] = src[ i ];
-	src[ i ] = src[ i + size/2];
+    const int hs = size/2;
+    for (int i = 0; i < hs; ++i) {
+        double tmp = src[i];
+        src[i] = src[i + hs];
+        src[i + hs] = tmp;
     }
-	
-    for( i =size/2; i < size;  i++)
-    {
-	src[ i ] = shiftBuffer[ i -(size/2)];
-    }
-
-    delete [] shiftBuffer;
-
 }
 
 void PhaseVocoder::process(unsigned int size, double *src, double *mag, double *theta)
--- a/dsp/phasevocoder/PhaseVocoder.h	Tue Feb 10 12:52:43 2009 +0000
+++ b/dsp/phasevocoder/PhaseVocoder.h	Tue Feb 10 16:37:11 2009 +0000
@@ -19,14 +19,13 @@
     virtual ~PhaseVocoder();
 
     void process( unsigned int size, double* src, double* mag, double* theta);
-    void FFTShift( unsigned int size, double* src);
 
 protected:
     void getPhase(unsigned int size, double *theta, double *real, double *imag);
     void coreFFT( unsigned int NumSamples, double *RealIn, double* ImagIn, double *RealOut, double *ImagOut);
     void getMagnitude( unsigned int size, double* mag, double* real, double* imag);
+    void FFTShift( unsigned int size, double* src);
 
-    double* shiftBuffer;
     double* imagOut;
     double* realOut;
 
--- a/dsp/rateconversion/Decimator.cpp	Tue Feb 10 12:52:43 2009 +0000
+++ b/dsp/rateconversion/Decimator.cpp	Tue Feb 10 16:37:11 2009 +0000
@@ -170,6 +170,28 @@
 
 }
 
+void Decimator::doAntiAlias(const float *src, double *dst, unsigned int length)
+{
+
+    for( unsigned int i = 0; i < length; i++ )
+    {
+	Input = (double)src[ i ];
+
+	Output = Input * b[ 0 ] + o1;
+
+	o1 = Input * b[ 1 ] - Output * a[ 1 ] + o2;
+	o2 = Input * b[ 2 ] - Output * a[ 2 ] + o3;
+	o3 = Input * b[ 3 ] - Output * a[ 3 ] + o4;
+	o4 = Input * b[ 4 ] - Output * a[ 4 ] + o5;
+	o5 = Input * b[ 5 ] - Output * a[ 5 ] + o6;
+	o6 = Input * b[ 6 ] - Output * a[ 6 ] + o7;
+	o7 = Input * b[ 7 ] - Output * a[ 7 ] ;
+
+	dst[ i ] = Output;
+    }
+
+}
+
 void Decimator::process(const double *src, double *dst)
 {
     if( m_decFactor != 1 )
@@ -183,3 +205,17 @@
 	dst[ idx++ ] = decBuffer[ m_decFactor * i ];
     }
 }
+
+void Decimator::process(const float *src, float *dst)
+{
+    if( m_decFactor != 1 )
+    {
+	doAntiAlias( src, decBuffer, m_inputLength );
+    }
+    unsigned idx = 0;
+
+    for( unsigned int i = 0; i < m_outputLength; i++ )
+    {
+	dst[ idx++ ] = decBuffer[ m_decFactor * i ];
+    }
+}
--- a/dsp/rateconversion/Decimator.h	Tue Feb 10 12:52:43 2009 +0000
+++ b/dsp/rateconversion/Decimator.h	Tue Feb 10 16:37:11 2009 +0000
@@ -14,7 +14,7 @@
 {
 public:
     void process( const double* src, double* dst );
-    void doAntiAlias( const double* src, double* dst, unsigned int length );
+    void process( const float* src, float* dst );
 
     /**
      * Construct a Decimator to operate on input blocks of length
@@ -36,6 +36,8 @@
     void resetFilter();
     void deInitialise();
     void initialise( unsigned int inLength, unsigned int decFactor );
+    void doAntiAlias( const double* src, double* dst, unsigned int length );
+    void doAntiAlias( const float* src, double* dst, unsigned int length );
 
     unsigned int m_inputLength;
     unsigned int m_outputLength;
--- a/dsp/tempotracking/DownBeat.cpp	Tue Feb 10 12:52:43 2009 +0000
+++ b/dsp/tempotracking/DownBeat.cpp	Tue Feb 10 16:37:11 2009 +0000
@@ -12,6 +12,7 @@
 
 #include "maths/MathAliases.h"
 #include "maths/MathUtilities.h"
+#include "maths/KLDivergence.h"
 #include "dsp/transforms/FFT.h"
 
 #include <iostream>
@@ -20,6 +21,7 @@
 DownBeat::DownBeat(float originalSampleRate,
                    size_t decimationFactor,
                    size_t dfIncrement) :
+    m_bpb(0),
     m_rate(originalSampleRate),
     m_factor(decimationFactor),
     m_increment(dfIncrement),
@@ -34,9 +36,8 @@
     // beat frame size is next power of two up from 1.3 seconds at the
     // downsampled rate (happens to produce 4096 for 44100 or 48000 at
     // 16x decimation, which is our expected normal situation)
-    int bfs = int((m_rate / decimationFactor) * 1.3);
-    m_beatframesize = 1;
-    while (bfs) { bfs >>= 1; m_beatframesize <<= 1; }
+    m_beatframesize = MathUtilities::nextPowerOfTwo
+        (int((m_rate / decimationFactor) * 1.3));
     std::cerr << "rate = " << m_rate << ", bfs = " << m_beatframesize << std::endl;
     m_beatframe = new double[m_beatframesize];
     m_fftRealOut = new double[m_beatframesize];
@@ -55,43 +56,60 @@
 }
 
 void
+DownBeat::setBeatsPerBar(int bpb)
+{
+    m_bpb = bpb;
+}
+
+void
 DownBeat::makeDecimators()
 {
     if (m_factor < 2) return;
     int highest = Decimator::getHighestSupportedFactor();
     if (m_factor <= highest) {
         m_decimator1 = new Decimator(m_increment, m_factor);
+        std::cerr << "DownBeat: decimator 1 factor " << m_factor << ", size " << m_increment << std::endl;
         return;
     }
     m_decimator1 = new Decimator(m_increment, highest);
+    std::cerr << "DownBeat: decimator 1 factor " << highest << ", size " << m_increment << std::endl;
     m_decimator2 = new Decimator(m_increment / highest, m_factor / highest);
-    m_decbuf = new double[m_factor / highest];
+    std::cerr << "DownBeat: decimator 2 factor " << m_factor / highest << ", size " << m_increment / highest << std::endl;
+    m_decbuf = new float[m_increment / highest];
 }
 
 void
-DownBeat::pushAudioBlock(const double *audio)
+DownBeat::pushAudioBlock(const float *audio)
 {
     if (m_buffill + (m_increment / m_factor) > m_bufsiz) {
         if (m_bufsiz == 0) m_bufsiz = m_increment * 16;
         else m_bufsiz = m_bufsiz * 2;
         if (!m_buffer) {
-            m_buffer = (double *)malloc(m_bufsiz * sizeof(double));
+            m_buffer = (float *)malloc(m_bufsiz * sizeof(float));
         } else {
             std::cerr << "DownBeat::pushAudioBlock: realloc m_buffer to " << m_bufsiz << std::endl;
-            m_buffer = (double *)realloc(m_buffer, m_bufsiz * sizeof(double));
+            m_buffer = (float *)realloc(m_buffer, m_bufsiz * sizeof(float));
         }
     }
     if (!m_decimator1) makeDecimators();
+    float rmsin = 0, rmsout = 0;
+    for (int i = 0; i < m_increment; ++i) {
+        rmsin += audio[i] * audio[i];
+    }
     if (m_decimator2) {
         m_decimator1->process(audio, m_decbuf);
         m_decimator2->process(m_decbuf, m_buffer + m_buffill);
     } else {
         m_decimator1->process(audio, m_buffer + m_buffill);
     }
+    for (int i = 0; i < m_increment / m_factor; ++i) {
+        rmsout += m_buffer[m_buffill + i] * m_buffer[m_buffill + i];
+    }
+    std::cerr << "pushAudioBlock: rms in " << sqrt(rmsin) << ", out " << sqrt(rmsout) << std::endl;
     m_buffill += m_increment / m_factor;
 }
     
-const double *
+const float *
 DownBeat::getBufferedAudio(size_t &length) const
 {
     length = m_buffill;
@@ -99,7 +117,15 @@
 }
 
 void
-DownBeat::findDownBeats(const double *audio,
+DownBeat::resetAudioBuffer()
+{
+    if (m_buffer) free(m_buffer);
+    m_buffill = 0;
+    m_bufsiz = 0;
+}
+
+void
+DownBeat::findDownBeats(const float *audio,
                         size_t audioLength,
                         const d_vec_t &beats,
                         i_vec_t &downbeats)
@@ -124,7 +150,7 @@
         // into beat frame buffer
 
         size_t beatstart = (beats[i] * m_increment) / m_factor;
-        size_t beatend = (beats[i] * m_increment) / m_factor;
+        size_t beatend = (beats[i+1] * m_increment) / m_factor;
         if (beatend >= audioLength) beatend = audioLength - 1;
         if (beatend < beatstart) beatend = beatstart;
         size_t beatlen = beatend - beatstart;
@@ -134,10 +160,14 @@
         // the size varies, it's easier to do this by hand than use
         // our Window abstraction.)
 
+        float rms = 0;
         for (size_t j = 0; j < beatlen; ++j) {
             double mul = 0.5 * (1.0 - cos(TWO_PI * (double(j) / double(beatlen))));
             m_beatframe[j] = audio[beatstart + j] * mul;
+            rms += m_beatframe[j] * m_beatframe[j];
         }
+        rms = sqrt(rms);
+        std::cerr << "beat " << i << ": audio rms " << rms << std::endl;
 
         for (size_t j = beatlen; j < m_beatframesize; ++j) {
             m_beatframe[j] = 0.0;
@@ -162,6 +192,9 @@
         // Calculate JS divergence between new and old spectral frames
 
         specdiff.push_back(measureSpecDiff(oldspec, newspec));
+//        specdiff.push_back(KLDivergence().distanceDistribution(oldspec, newspec, false));
+
+        std::cerr << "specdiff: " << specdiff[specdiff.size()-1] << std::endl;
 
         // Copy newspec across to old
 
@@ -172,16 +205,24 @@
 
     // We now have all spectral difference measures in specdiff
 
-    uint timesig = 4;   // SHOULD REPLACE THIS WITH A FIND_METER FUNCTION - OR USER PARAMETER
+    uint timesig = m_bpb;
+    if (timesig == 0) timesig = 4;
+
     d_vec_t dbcand(timesig); // downbeat candidates
 
+    for (int beat = 0; beat < timesig; ++beat) {
+        dbcand[beat] = 0;
+    }
+
     // look for beat transition which leads to greatest spectral change
     for (int beat = 0; beat < timesig; ++beat) {
-        for (int example = beat; example < specdiff.size(); ++example) {
+        for (int example = beat; example < specdiff.size(); example += timesig) {
             dbcand[beat] += (specdiff[example]) / timesig;
         }
+        std::cerr << "dbcand[" << beat << "] = " << dbcand[beat] << std::endl;
     }
 
+
     // first downbeat is beat at index of maximum value of dbcand
     int dbind = MathUtilities::getMax(dbcand);
 
--- a/dsp/tempotracking/DownBeat.h	Tue Feb 10 12:52:43 2009 +0000
+++ b/dsp/tempotracking/DownBeat.h	Tue Feb 10 16:37:11 2009 +0000
@@ -45,6 +45,8 @@
              size_t dfIncrement);
     ~DownBeat();
 
+    void setBeatsPerBar(int bpb);
+
     /**
      * Estimate which beats are down-beats.
      * 
@@ -59,7 +61,7 @@
      * The returned downbeat array contains a series of indices to the
      * beats array.
      */
-    void findDownBeats(const double *audio, // downsampled
+    void findDownBeats(const float *audio, // downsampled
                        size_t audioLength, // after downsampling
                        const vector<double> &beats,
                        vector<int> &downbeats);
@@ -73,12 +75,17 @@
      * Call getBufferedAudio() to retrieve the results after all
      * blocks have been processed.
      */
-    void pushAudioBlock(const double *audio);
+    void pushAudioBlock(const float *audio);
     
     /**
      * Retrieve the accumulated audio produced by pushAudioBlock calls.
      */
-    const double *getBufferedAudio(size_t &length) const;
+    const float *getBufferedAudio(size_t &length) const;
+
+    /**
+     * Clear any buffered downsampled audio data.
+     */
+    void resetAudioBuffer();
 
 private:
     typedef vector<int> i_vec_t;
@@ -89,13 +96,14 @@
     void makeDecimators();
     double measureSpecDiff(d_vec_t oldspec, d_vec_t newspec);
 
+    int m_bpb;
     float m_rate;
     size_t m_factor;
     size_t m_increment;
     Decimator *m_decimator1;
     Decimator *m_decimator2;
-    double *m_buffer;
-    double *m_decbuf;
+    float *m_buffer;
+    float *m_decbuf;
     size_t m_bufsiz;
     size_t m_buffill;
     size_t m_beatframesize;
--- a/dsp/tempotracking/TempoTrackV2.cpp	Tue Feb 10 12:52:43 2009 +0000
+++ b/dsp/tempotracking/TempoTrackV2.cpp	Tue Feb 10 16:37:11 2009 +0000
@@ -326,6 +326,7 @@
             lastind = i*step+j;
             beat_period[lastind] = bestpath[i];
         }
+        std::cerr << "bestpath[" << i << "] = " << bestpath[i] << " (used for beat_periods " << i*step << " to " << i*step+step-1 << ")" << std::endl;
     }
 
     //fill in the last values...
@@ -435,6 +436,8 @@
 
         cumscore[i] = alpha*vv + (1.-alpha)*localscore[i];
         backlink[i] = i+prange_min+xx;
+
+        std::cerr << "backlink[" << i << "] <= " << backlink[i] << std::endl;
     }
 
     // STARTING POINT, I.E. LAST BEAT.. PICK A STRONG POINT IN cumscore VECTOR
@@ -450,8 +453,10 @@
     //  BACKTRACKING FROM THE END TO THE BEGINNING.. MAKING SURE NOT TO GO BEFORE SAMPLE 0
     i_vec_t ibeats;
     ibeats.push_back(startpoint);
+    std::cerr << "startpoint = " << startpoint << std::endl;
     while (backlink[ibeats.back()] > 0)
     {
+        std::cerr << "backlink[" << ibeats.back() << "] = " << backlink[ibeats.back()] << std::endl;
         ibeats.push_back(backlink[ibeats.back()]);
     }
   
--- a/dsp/transforms/FFT.cpp	Tue Feb 10 12:52:43 2009 +0000
+++ b/dsp/transforms/FFT.cpp	Tue Feb 10 16:37:11 2009 +0000
@@ -8,8 +8,13 @@
 */
 
 #include "FFT.h"
+
+#include "maths/MathUtilities.h"
+
 #include <cmath>
 
+#include <iostream>
+
 //////////////////////////////////////////////////////////////////////
 // Construction/Destruction
 //////////////////////////////////////////////////////////////////////
@@ -39,8 +44,11 @@
     double angle_numerator = 2.0 * M_PI;
     double tr, ti;
 
-    if( !isPowerOfTwo(p_nSamples) )
+    if( !MathUtilities::isPowerOfTwo(p_nSamples) )
     {
+        std::cerr << "ERROR: FFT::process: Non-power-of-two FFT size "
+                  << p_nSamples << " not supported in this implementation"
+                  << std::endl;
 	return;
     }
 
@@ -118,15 +126,6 @@
     }
 }
 
-bool FFT::isPowerOfTwo(unsigned int p_nX)
-{
-    if( p_nX < 2 ) return false;
-
-    if( p_nX & (p_nX-1) ) return false;
-
-    return true;
-}
-
 unsigned int FFT::numberOfBitsNeeded(unsigned int p_nSamples)
 {	
     int i;
--- a/maths/MathUtilities.cpp	Tue Feb 10 12:52:43 2009 +0000
+++ b/maths/MathUtilities.cpp	Tue Feb 10 16:37:11 2009 +0000
@@ -353,4 +353,39 @@
     }
 }
 
-        
+bool
+MathUtilities::isPowerOfTwo(int x)
+{
+    if (x < 2) return false;
+    if (x & (x-1)) return false;
+    return true;
+}
+
+int
+MathUtilities::nextPowerOfTwo(int x)
+{
+    if (isPowerOfTwo(x)) return x;
+    int n = 1;
+    while (x) { x >>= 1; n <<= 1; }
+    return n;
+}
+
+int
+MathUtilities::previousPowerOfTwo(int x)
+{
+    if (isPowerOfTwo(x)) return x;
+    int n = 1;
+    x >>= 1;
+    while (x) { x >>= 1; n <<= 1; }
+    return n;
+}
+
+int
+MathUtilities::nearestPowerOfTwo(int x)
+{
+    if (isPowerOfTwo(x)) return x;
+    int n0 = previousPowerOfTwo(x), n1 = nearestPowerOfTwo(x);
+    if (x - n0 < n1 - x) return n0;
+    else return n1;
+}
+
--- a/maths/MathUtilities.h	Tue Feb 10 12:52:43 2009 +0000
+++ b/maths/MathUtilities.h	Tue Feb 10 16:37:11 2009 +0000
@@ -52,6 +52,11 @@
 
     // moving mean threshholding:
     static void adaptiveThreshold(std::vector<double> &data);
+
+    static bool isPowerOfTwo(int x);
+    static int nextPowerOfTwo(int x); // e.g. 1300 -> 2048, 2048 -> 2048
+    static int previousPowerOfTwo(int x); // e.g. 1300 -> 1024, 2048 -> 2048
+    static int nearestPowerOfTwo(int x); // e.g. 1300 -> 1024, 1700 -> 2048
 };
 
 #endif
--- a/qm-dsp.pro	Tue Feb 10 12:52:43 2009 +0000
+++ b/qm-dsp.pro	Tue Feb 10 16:37:11 2009 +0000
@@ -1,5 +1,5 @@
 TEMPLATE = lib
-CONFIG += warn_on staticlib release
+CONFIG += warn_on staticlib debug
 CONFIG -= qt
 OBJECTS_DIR = tmp_obj
 MOC_DIR = tmp_moc