changeset 119:2020c73dc997 pvoc

Phase vocoder: provide time-domain and freq-domain inputs separately; update tests etc
author Chris Cannam
date Thu, 03 Oct 2013 12:58:36 +0100
parents 4920d100b290
children b0e98fcfacd7
files dsp/onsets/DetectionFunction.cpp dsp/onsets/DetectionFunction.h dsp/phasevocoder/PhaseVocoder.cpp dsp/phasevocoder/PhaseVocoder.h qm-dsp.pro tests/Makefile tests/TestPhaseVocoder.cpp
diffstat 7 files changed, 114 insertions(+), 46 deletions(-) [+]
line wrap: on
line diff
--- a/dsp/onsets/DetectionFunction.cpp	Wed Oct 02 18:22:06 2013 +0100
+++ b/dsp/onsets/DetectionFunction.cpp	Thu Oct 03 12:58:36 2013 +0100
@@ -63,16 +63,16 @@
     m_magPeaks = new double[ m_halfLength ];
     memset(m_magPeaks,0, m_halfLength*sizeof(double));
 
-    // See note in process(const double *) below
+    // See note in processTimeDomain below
     int actualLength = MathUtilities::previousPowerOfTwo(m_dataLength);
     m_phaseVoc = new PhaseVocoder(actualLength, m_stepSize);
 
-    m_DFWindowedFrame = new double[ m_dataLength ];
     m_magnitude = new double[ m_halfLength ];
     m_thetaAngle = new double[ m_halfLength ];
     m_unwrapped = new double[ m_halfLength ];
 
     m_window = new Window<double>(HanningWindow, m_dataLength);
+    m_windowed = new double[ m_dataLength ];
 }
 
 void DetectionFunction::deInitialise()
@@ -84,16 +84,17 @@
 
     delete m_phaseVoc;
 
-    delete [] m_DFWindowedFrame;
     delete [] m_magnitude;
     delete [] m_thetaAngle;
+    delete [] m_windowed;
+    delete [] m_unwrapped;
 
     delete m_window;
 }
 
-double DetectionFunction::process( const double *TDomain )
+double DetectionFunction::processTimeDomain(const double *samples)
 {
-    m_window->cut( TDomain, m_DFWindowedFrame );
+    m_window->cut(samples, m_windowed);
 
     // Our own FFT implementation supports power-of-two sizes only.
     // If we have to use this implementation (as opposed to the
@@ -112,20 +113,19 @@
         }
     }
 
-    m_phaseVoc->process(m_DFWindowedFrame, m_magnitude,
-                        m_thetaAngle, m_unwrapped);
+    m_phaseVoc->processTimeDomain(m_windowed, 
+                                  m_magnitude, m_thetaAngle, m_unwrapped);
 
     if (m_whiten) whiten();
 
     return runDF();
 }
 
-double DetectionFunction::process( const double *magnitudes, const double *phases )
+double DetectionFunction::processFrequencyDomain(const double *reals,
+                                                 const double *imags)
 {
-    for (size_t i = 0; i < m_halfLength; ++i) {
-        m_magnitude[i] = magnitudes[i];
-        m_thetaAngle[i] = phases[i];
-    }
+    m_phaseVoc->processFrequencyDomain(reals, imags,
+                                       m_magnitude, m_thetaAngle, m_unwrapped);
 
     if (m_whiten) whiten();
 
@@ -240,7 +240,6 @@
 	m_phaseHistory[ i ] = srcPhase[ i ];
     }
 	
-	
     return val;
 }
 
--- a/dsp/onsets/DetectionFunction.h	Wed Oct 02 18:22:06 2013 +0100
+++ b/dsp/onsets/DetectionFunction.h	Thu Oct 03 12:58:36 2013 +0100
@@ -43,8 +43,18 @@
     double* getSpectrumMagnitude();
     DetectionFunction( DFConfig Config );
     virtual ~DetectionFunction();
-    double process( const double* TDomain );
-    double process( const double* magnitudes, const double* phases );
+
+    /**
+     * Process a single time-domain frame of audio, provided as
+     * frameLength samples.
+     */
+    double processTimeDomain(const double* samples);
+
+    /**
+     * Process a single frequency-domain frame, provided as
+     * frameLength/2+1 real and imaginary component values.
+     */
+    double processFrequencyDomain(const double* reals, const double* imags);
 
 private:
     void whiten();
@@ -74,7 +84,7 @@
     double* m_phaseHistoryOld;
     double* m_magPeaks;
 
-    double* m_DFWindowedFrame; // Array for windowed analysis frame
+    double* m_windowed; // Array for windowed analysis frame
     double* m_magnitude; // Magnitude of analysis frame ( frequency domain )
     double* m_thetaAngle;// Phase of analysis frame ( frequency domain )
     double* m_unwrapped; // Unwrapped phase of analysis frame
--- a/dsp/phasevocoder/PhaseVocoder.cpp	Wed Oct 02 18:22:06 2013 +0100
+++ b/dsp/phasevocoder/PhaseVocoder.cpp	Thu Oct 03 12:58:36 2013 +0100
@@ -18,6 +18,8 @@
 #include "maths/MathUtilities.h"
 #include <math.h>
 
+#include <cassert>
+
 #include <iostream>
 using std::cerr;
 using std::endl;
@@ -27,6 +29,7 @@
     m_hop(hop)
 {
     m_fft = new FFTReal(m_n);
+    m_time = new double[m_n];
     m_real = new double[m_n];
     m_imag = new double[m_n];
     m_phase = new double[m_n/2 + 1];
@@ -42,10 +45,11 @@
 
 PhaseVocoder::~PhaseVocoder()
 {
-    delete [] m_unwrapped;
-    delete [] m_phase;
-    delete [] m_real;
-    delete [] m_imag;
+    delete[] m_unwrapped;
+    delete[] m_phase;
+    delete[] m_real;
+    delete[] m_imag;
+    delete[] m_time;
     delete m_fft;
 }
 
@@ -59,13 +63,29 @@
     }
 }
 
-void PhaseVocoder::process(double *src, double *mag, double *theta,
-                           double *unwrapped)
+void PhaseVocoder::processTimeDomain(const double *src,
+                                     double *mag, double *theta,
+                                     double *unwrapped)
 {
-    FFTShift(src);
-	
-    m_fft->forward(src, m_real, m_imag);
+    for (int i = 0; i < m_n; ++i) {
+        m_time[i] = src[i];
+    }
+    FFTShift(m_time);
+    m_fft->forward(m_time, m_real, m_imag);
+    getMagnitudes(mag);
+    getPhases(theta);
+    unwrapPhases(theta, unwrapped);
+}
 
+void PhaseVocoder::processFrequencyDomain(const double *reals, 
+                                          const double *imags,
+                                          double *mag, double *theta,
+                                          double *unwrapped)
+{
+    for (int i = 0; i < m_n/2 + 1; ++i) {
+        m_real[i] = reals[i];
+        m_imag[i] = imags[i];
+    }
     getMagnitudes(mag);
     getPhases(theta);
     unwrapPhases(theta, unwrapped);
@@ -102,6 +122,8 @@
 {
     cerr << "PhaseVocoder::unwrapPhases" << endl;
 
+//!!! if magnitude in a bin below a threshold, reset stored unwrapped phase angle for that bin
+
     for (int i = 0; i < m_n/2 + 1; ++i) {
 
         double omega = (2 * M_PI * m_hop * i) / m_n;
@@ -110,7 +132,7 @@
 
         unwrapped[i] = m_unwrapped[i] + omega + error;
 
-        cerr << "i = " << i << ", instantaneous phase = " << theta[i] << ", prev phase = " << m_phase[i] << ", omega = " << omega << ", expected = " << expected << ", error = " << error << ", unwrapped = " << unwrapped[i] << endl;
+        cerr << "i = " << i << ", (" << m_real[i] << "," << m_imag[i] << "), instantaneous phase = " << theta[i] << ", prev phase = " << m_phase[i] << ", omega = " << omega << ", expected = " << expected << ", error = " << error << ", unwrapped = " << unwrapped[i] << endl;
 
         m_phase[i] = theta[i];
         m_unwrapped[i] = unwrapped[i];
--- a/dsp/phasevocoder/PhaseVocoder.h	Wed Oct 02 18:22:06 2013 +0100
+++ b/dsp/phasevocoder/PhaseVocoder.h	Thu Oct 03 12:58:36 2013 +0100
@@ -21,6 +21,7 @@
 class PhaseVocoder  
 {
 public:
+    //!!! review: size must be a power of two, or not?
     PhaseVocoder(int size, int hop);
     virtual ~PhaseVocoder();
 
@@ -36,7 +37,22 @@
      * enough space for size/2 + 1 values. The redundant conjugate
      * half of the output is not returned.
      */
-    void process(double *src, double *mag, double *phase, double *unwrapped);
+    void processTimeDomain(const double *src,
+                           double *mag, double *phase, double *unwrapped);
+
+    /**
+     * Given one frame of frequency-domain samples, return the
+     * magnitudes, instantaneous phases, and unwrapped phases.
+     *
+     * reals and imags must each contain size/2+1 values (where size
+     * is the frame size value as passed to the PhaseVocoder
+     * constructor).
+     *
+     * mag, phase, and unwrapped must each be non-NULL and point to
+     * enough space for size/2+1 values.
+     */
+    void processFrequencyDomain(const double *reals, const double *imags,
+                                double *mag, double *phase, double *unwrapped);
 
     /**
      * Reset the stored phases to zero. Note that this may be
@@ -55,6 +71,7 @@
     int m_n;
     int m_hop;
     FFTReal *m_fft;
+    double *m_time;
     double *m_imag;
     double *m_real;
     double *m_phase;
--- a/qm-dsp.pro	Wed Oct 02 18:22:06 2013 +0100
+++ b/qm-dsp.pro	Thu Oct 03 12:58:36 2013 +0100
@@ -1,6 +1,6 @@
 
 TEMPLATE = lib
-CONFIG += staticlib warn_on release
+CONFIG += staticlib warn_on debug
 CONFIG -= qt
 OBJECTS_DIR = tmp_obj
 MOC_DIR = tmp_moc
--- a/tests/Makefile	Wed Oct 02 18:22:06 2013 +0100
+++ b/tests/Makefile	Thu Oct 03 12:58:36 2013 +0100
@@ -8,7 +8,7 @@
 TESTS	:= test-window test-fft test-pvoc
 
 all: $(TESTS)
-	for t in $(TESTS); do echo "Running $$t"; ./"$$t" || exit 1; done
+	for t in $(TESTS); do echo "Running $$t"; valgrind ./"$$t" || exit 1; done
 
 test-window: TestWindow.o $(LIBS)
 	$(CXX) -o $@ $^ $(LDFLAGS)
--- a/tests/TestPhaseVocoder.cpp	Wed Oct 02 18:22:06 2013 +0100
+++ b/tests/TestPhaseVocoder.cpp	Thu Oct 03 12:58:36 2013 +0100
@@ -53,18 +53,18 @@
     double phase[] = { 999, 999, 999, 999, 999, 999, 999 };
     double unw[]   = { 999, 999, 999, 999, 999, 999, 999 };
 
-    pvoc.process(frame, mag + 1, phase + 1, unw + 1);
+    pvoc.processTimeDomain(frame, mag + 1, phase + 1, unw + 1);
 
     double magExpected0[] = { 999, 0, 0, 4, 0, 0, 999 };
     COMPARE_ARRAY_EXACT(mag, magExpected0);
 
     double phaseExpected0[] = { 999, 0, 0, 0, 0, 0, 999 };
-    COMPARE_ARRAY_EXACT(phase, phaseExpected0);
+    COMPARE_ARRAY(phase, phaseExpected0);
 
     double unwExpected0[] = { 999, 0, 0, 0, 0, 0, 999 };
     COMPARE_ARRAY(unw, unwExpected0);
 
-    pvoc.process(frame, mag + 1, phase + 1, unw + 1);
+    pvoc.processTimeDomain(frame, mag + 1, phase + 1, unw + 1);
 
     double magExpected1[] = { 999, 0, 0, 4, 0, 0, 999 };
     COMPARE_ARRAY_EXACT(mag, magExpected1);
@@ -82,19 +82,19 @@
     //   mapped into (-pi,pi] range as pi, giving an unwrapped phase
     //   of 2*pi.
     //  
-    // * Bin 2 has expected unwrapped phase 2*pi, measured phase 0,
-    //   hence error 0 and unwrapped phase 2*pi.
+    // * Bin 2 has expected phase 2*pi, measured phase 0, hence error
+    //   0 and unwrapped phase 2*pi.
     //
     // * Bin 3 is like bin 1: it has expected phase 3*pi, measured
     //   phase 0, so phase error -pi and unwrapped phase 4*pi.
     //
-    // * Bin 4 (Nyquist) is like bin 2: expected phase 4*pi, measured
-    //   phase 0, hence error 0 and unwrapped phase 4*pi.
+    // * Bin 4 (Nyquist) has expected phase 4*pi, measured phase 0,
+    //   hence error 0 and unwrapped phase 4*pi.
 
     double unwExpected1[] = { 999, 0, 2*M_PI, 2*M_PI, 4*M_PI, 4*M_PI, 999 };
     COMPARE_ARRAY(unw, unwExpected1);
 
-    pvoc.process(frame, mag + 1, phase + 1, unw + 1);
+    pvoc.processTimeDomain(frame, mag + 1, phase + 1, unw + 1);
 
     double magExpected2[] = { 999, 0, 0, 4, 0, 0, 999 };
     COMPARE_ARRAY_EXACT(mag, magExpected2);
@@ -130,9 +130,7 @@
     double phase[] = { 999, 999, 999, 999, 999, 999, 999 };
     double unw[]   = { 999, 999, 999, 999, 999, 999, 999 };
 
-cerr << "process 0" << endl;
-
-    pvoc.process(data, mag + 1, phase + 1, unw + 1);
+    pvoc.processTimeDomain(data, mag + 1, phase + 1, unw + 1);
 
     double magExpected0[] = { 999, 0, 0, 4, 0, 0, 999 };
     COMPARE_ARRAY(mag, magExpected0);
@@ -143,23 +141,45 @@
     double unwExpected0[] = { 999, 0, 0, -M_PI/2, 0, 0, 999 };
     COMPARE_ARRAY(unw, unwExpected0);
 
-cerr << "process 1" << endl;
-
-    pvoc.process(data + 8, mag + 1, phase + 1, unw + 1);
+    pvoc.processTimeDomain(data + 8, mag + 1, phase + 1, unw + 1);
 
     double magExpected1[] = { 999, 0, 4, 4, 0, 0, 999 };
     COMPARE_ARRAY(mag, magExpected1);
 
     //!!! I don't know why [2] here is -M_PI and not M_PI; and I definitely don't know why [4] here is M_PI. Check these with care
+
+    // Derivation of unwrapped values:
+    // 
+    // * Bin 0 (DC) always has phase 0 and expected phase 0
+    //
+    // * Bin 1 has a new signal, a cosine starting with phase 0. But
+    //   because of the "FFT shift" which the phase vocoder carries
+    //   out to place zero phase in the centre of the (usually
+    //   windowed) frame, and because a single cycle at this frequency
+    //   spans the whole frame, this bin actually has measured phase
+    //   of either pi or -pi. (The shift doesn't affect those
+    //   higher-frequency bins whose signals fit exact multiples of a
+    //   cycle into a frame.) This maps into (-pi,pi] as pi, which
+    //   matches the expected phase, hence unwrapped phase is also pi.
+    //
+    // * Bin 2 has expected phase 3pi/2 (being the previous measured
+    //   phase of -pi/2 plus advance of 2pi). It has the same measured
+    //   phase as last time around, -pi/2, which is consistent with
+    //   the expected phase, so the unwrapped phase is 3pi/2.
+    //!!!
+    // * Bin 3 is a bit of a puzzle -- it has an effectively zero
+    //   magnitude but a non-zero measured phase. Spectral leakage?
+    //
+    // * Bin 4 (Nyquist) has expected phase 4*pi, measured phase 0,
+    //   hence error 0 and unwrapped phase 4*pi.
+
     double phaseExpected1[] = { 999, 0, -M_PI, -M_PI/2, M_PI, 0, 999 };
     COMPARE_ARRAY(phase, phaseExpected1);
 
     double unwExpected1[] = { 999, 0, M_PI, 3*M_PI/2, 3*M_PI, 4*M_PI, 999 };
     COMPARE_ARRAY(unw, unwExpected1);
 
-cerr << "process 2" << endl;
-
-    pvoc.process(data + 16, mag + 1, phase + 1, unw + 1);
+    pvoc.processTimeDomain(data + 16, mag + 1, phase + 1, unw + 1);
 
     double magExpected2[] = { 999, 0, 4, 4, 0, 0, 999 };
     COMPARE_ARRAY(mag, magExpected2);