Mercurial > hg > qm-dsp
changeset 344:5eb9c2387108
Phase vocoder: provide time-domain and freq-domain inputs separately; update tests etc
author | Chris Cannam <c.cannam@qmul.ac.uk> |
---|---|
date | Thu, 03 Oct 2013 12:58:36 +0100 |
parents | 24d8ea972643 |
children | 04d134031a15 |
files | dsp/onsets/DetectionFunction.cpp dsp/onsets/DetectionFunction.h dsp/phasevocoder/PhaseVocoder.cpp dsp/phasevocoder/PhaseVocoder.h qm-dsp.pro tests/Makefile tests/TestPhaseVocoder.cpp |
diffstat | 7 files changed, 114 insertions(+), 46 deletions(-) [+] |
line wrap: on
line diff
--- a/dsp/onsets/DetectionFunction.cpp Wed Oct 02 18:22:06 2013 +0100 +++ b/dsp/onsets/DetectionFunction.cpp Thu Oct 03 12:58:36 2013 +0100 @@ -63,16 +63,16 @@ m_magPeaks = new double[ m_halfLength ]; memset(m_magPeaks,0, m_halfLength*sizeof(double)); - // See note in process(const double *) below + // See note in processTimeDomain below int actualLength = MathUtilities::previousPowerOfTwo(m_dataLength); m_phaseVoc = new PhaseVocoder(actualLength, m_stepSize); - m_DFWindowedFrame = new double[ m_dataLength ]; m_magnitude = new double[ m_halfLength ]; m_thetaAngle = new double[ m_halfLength ]; m_unwrapped = new double[ m_halfLength ]; m_window = new Window<double>(HanningWindow, m_dataLength); + m_windowed = new double[ m_dataLength ]; } void DetectionFunction::deInitialise() @@ -84,16 +84,17 @@ delete m_phaseVoc; - delete [] m_DFWindowedFrame; delete [] m_magnitude; delete [] m_thetaAngle; + delete [] m_windowed; + delete [] m_unwrapped; delete m_window; } -double DetectionFunction::process( const double *TDomain ) +double DetectionFunction::processTimeDomain(const double *samples) { - m_window->cut( TDomain, m_DFWindowedFrame ); + m_window->cut(samples, m_windowed); // Our own FFT implementation supports power-of-two sizes only. // If we have to use this implementation (as opposed to the @@ -112,20 +113,19 @@ } } - m_phaseVoc->process(m_DFWindowedFrame, m_magnitude, - m_thetaAngle, m_unwrapped); + m_phaseVoc->processTimeDomain(m_windowed, + m_magnitude, m_thetaAngle, m_unwrapped); if (m_whiten) whiten(); return runDF(); } -double DetectionFunction::process( const double *magnitudes, const double *phases ) +double DetectionFunction::processFrequencyDomain(const double *reals, + const double *imags) { - for (size_t i = 0; i < m_halfLength; ++i) { - m_magnitude[i] = magnitudes[i]; - m_thetaAngle[i] = phases[i]; - } + m_phaseVoc->processFrequencyDomain(reals, imags, + m_magnitude, m_thetaAngle, m_unwrapped); if (m_whiten) whiten(); @@ -240,7 +240,6 @@ m_phaseHistory[ i ] = srcPhase[ i ]; } - return val; }
--- a/dsp/onsets/DetectionFunction.h Wed Oct 02 18:22:06 2013 +0100 +++ b/dsp/onsets/DetectionFunction.h Thu Oct 03 12:58:36 2013 +0100 @@ -43,8 +43,18 @@ double* getSpectrumMagnitude(); DetectionFunction( DFConfig Config ); virtual ~DetectionFunction(); - double process( const double* TDomain ); - double process( const double* magnitudes, const double* phases ); + + /** + * Process a single time-domain frame of audio, provided as + * frameLength samples. + */ + double processTimeDomain(const double* samples); + + /** + * Process a single frequency-domain frame, provided as + * frameLength/2+1 real and imaginary component values. + */ + double processFrequencyDomain(const double* reals, const double* imags); private: void whiten(); @@ -74,7 +84,7 @@ double* m_phaseHistoryOld; double* m_magPeaks; - double* m_DFWindowedFrame; // Array for windowed analysis frame + double* m_windowed; // Array for windowed analysis frame double* m_magnitude; // Magnitude of analysis frame ( frequency domain ) double* m_thetaAngle;// Phase of analysis frame ( frequency domain ) double* m_unwrapped; // Unwrapped phase of analysis frame
--- a/dsp/phasevocoder/PhaseVocoder.cpp Wed Oct 02 18:22:06 2013 +0100 +++ b/dsp/phasevocoder/PhaseVocoder.cpp Thu Oct 03 12:58:36 2013 +0100 @@ -18,6 +18,8 @@ #include "maths/MathUtilities.h" #include <math.h> +#include <cassert> + #include <iostream> using std::cerr; using std::endl; @@ -27,6 +29,7 @@ m_hop(hop) { m_fft = new FFTReal(m_n); + m_time = new double[m_n]; m_real = new double[m_n]; m_imag = new double[m_n]; m_phase = new double[m_n/2 + 1]; @@ -42,10 +45,11 @@ PhaseVocoder::~PhaseVocoder() { - delete [] m_unwrapped; - delete [] m_phase; - delete [] m_real; - delete [] m_imag; + delete[] m_unwrapped; + delete[] m_phase; + delete[] m_real; + delete[] m_imag; + delete[] m_time; delete m_fft; } @@ -59,13 +63,29 @@ } } -void PhaseVocoder::process(double *src, double *mag, double *theta, - double *unwrapped) +void PhaseVocoder::processTimeDomain(const double *src, + double *mag, double *theta, + double *unwrapped) { - FFTShift(src); - - m_fft->forward(src, m_real, m_imag); + for (int i = 0; i < m_n; ++i) { + m_time[i] = src[i]; + } + FFTShift(m_time); + m_fft->forward(m_time, m_real, m_imag); + getMagnitudes(mag); + getPhases(theta); + unwrapPhases(theta, unwrapped); +} +void PhaseVocoder::processFrequencyDomain(const double *reals, + const double *imags, + double *mag, double *theta, + double *unwrapped) +{ + for (int i = 0; i < m_n/2 + 1; ++i) { + m_real[i] = reals[i]; + m_imag[i] = imags[i]; + } getMagnitudes(mag); getPhases(theta); unwrapPhases(theta, unwrapped); @@ -102,6 +122,8 @@ { cerr << "PhaseVocoder::unwrapPhases" << endl; +//!!! if magnitude in a bin below a threshold, reset stored unwrapped phase angle for that bin + for (int i = 0; i < m_n/2 + 1; ++i) { double omega = (2 * M_PI * m_hop * i) / m_n; @@ -110,7 +132,7 @@ unwrapped[i] = m_unwrapped[i] + omega + error; - cerr << "i = " << i << ", instantaneous phase = " << theta[i] << ", prev phase = " << m_phase[i] << ", omega = " << omega << ", expected = " << expected << ", error = " << error << ", unwrapped = " << unwrapped[i] << endl; + cerr << "i = " << i << ", (" << m_real[i] << "," << m_imag[i] << "), instantaneous phase = " << theta[i] << ", prev phase = " << m_phase[i] << ", omega = " << omega << ", expected = " << expected << ", error = " << error << ", unwrapped = " << unwrapped[i] << endl; m_phase[i] = theta[i]; m_unwrapped[i] = unwrapped[i];
--- a/dsp/phasevocoder/PhaseVocoder.h Wed Oct 02 18:22:06 2013 +0100 +++ b/dsp/phasevocoder/PhaseVocoder.h Thu Oct 03 12:58:36 2013 +0100 @@ -21,6 +21,7 @@ class PhaseVocoder { public: + //!!! review: size must be a power of two, or not? PhaseVocoder(int size, int hop); virtual ~PhaseVocoder(); @@ -36,7 +37,22 @@ * enough space for size/2 + 1 values. The redundant conjugate * half of the output is not returned. */ - void process(double *src, double *mag, double *phase, double *unwrapped); + void processTimeDomain(const double *src, + double *mag, double *phase, double *unwrapped); + + /** + * Given one frame of frequency-domain samples, return the + * magnitudes, instantaneous phases, and unwrapped phases. + * + * reals and imags must each contain size/2+1 values (where size + * is the frame size value as passed to the PhaseVocoder + * constructor). + * + * mag, phase, and unwrapped must each be non-NULL and point to + * enough space for size/2+1 values. + */ + void processFrequencyDomain(const double *reals, const double *imags, + double *mag, double *phase, double *unwrapped); /** * Reset the stored phases to zero. Note that this may be @@ -55,6 +71,7 @@ int m_n; int m_hop; FFTReal *m_fft; + double *m_time; double *m_imag; double *m_real; double *m_phase;
--- a/qm-dsp.pro Wed Oct 02 18:22:06 2013 +0100 +++ b/qm-dsp.pro Thu Oct 03 12:58:36 2013 +0100 @@ -1,6 +1,6 @@ TEMPLATE = lib -CONFIG += staticlib warn_on release +CONFIG += staticlib warn_on debug CONFIG -= qt OBJECTS_DIR = tmp_obj MOC_DIR = tmp_moc
--- a/tests/Makefile Wed Oct 02 18:22:06 2013 +0100 +++ b/tests/Makefile Thu Oct 03 12:58:36 2013 +0100 @@ -8,7 +8,7 @@ TESTS := test-window test-fft test-pvoc all: $(TESTS) - for t in $(TESTS); do echo "Running $$t"; ./"$$t" || exit 1; done + for t in $(TESTS); do echo "Running $$t"; valgrind ./"$$t" || exit 1; done test-window: TestWindow.o $(LIBS) $(CXX) -o $@ $^ $(LDFLAGS)
--- a/tests/TestPhaseVocoder.cpp Wed Oct 02 18:22:06 2013 +0100 +++ b/tests/TestPhaseVocoder.cpp Thu Oct 03 12:58:36 2013 +0100 @@ -53,18 +53,18 @@ double phase[] = { 999, 999, 999, 999, 999, 999, 999 }; double unw[] = { 999, 999, 999, 999, 999, 999, 999 }; - pvoc.process(frame, mag + 1, phase + 1, unw + 1); + pvoc.processTimeDomain(frame, mag + 1, phase + 1, unw + 1); double magExpected0[] = { 999, 0, 0, 4, 0, 0, 999 }; COMPARE_ARRAY_EXACT(mag, magExpected0); double phaseExpected0[] = { 999, 0, 0, 0, 0, 0, 999 }; - COMPARE_ARRAY_EXACT(phase, phaseExpected0); + COMPARE_ARRAY(phase, phaseExpected0); double unwExpected0[] = { 999, 0, 0, 0, 0, 0, 999 }; COMPARE_ARRAY(unw, unwExpected0); - pvoc.process(frame, mag + 1, phase + 1, unw + 1); + pvoc.processTimeDomain(frame, mag + 1, phase + 1, unw + 1); double magExpected1[] = { 999, 0, 0, 4, 0, 0, 999 }; COMPARE_ARRAY_EXACT(mag, magExpected1); @@ -82,19 +82,19 @@ // mapped into (-pi,pi] range as pi, giving an unwrapped phase // of 2*pi. // - // * Bin 2 has expected unwrapped phase 2*pi, measured phase 0, - // hence error 0 and unwrapped phase 2*pi. + // * Bin 2 has expected phase 2*pi, measured phase 0, hence error + // 0 and unwrapped phase 2*pi. // // * Bin 3 is like bin 1: it has expected phase 3*pi, measured // phase 0, so phase error -pi and unwrapped phase 4*pi. // - // * Bin 4 (Nyquist) is like bin 2: expected phase 4*pi, measured - // phase 0, hence error 0 and unwrapped phase 4*pi. + // * Bin 4 (Nyquist) has expected phase 4*pi, measured phase 0, + // hence error 0 and unwrapped phase 4*pi. double unwExpected1[] = { 999, 0, 2*M_PI, 2*M_PI, 4*M_PI, 4*M_PI, 999 }; COMPARE_ARRAY(unw, unwExpected1); - pvoc.process(frame, mag + 1, phase + 1, unw + 1); + pvoc.processTimeDomain(frame, mag + 1, phase + 1, unw + 1); double magExpected2[] = { 999, 0, 0, 4, 0, 0, 999 }; COMPARE_ARRAY_EXACT(mag, magExpected2); @@ -130,9 +130,7 @@ double phase[] = { 999, 999, 999, 999, 999, 999, 999 }; double unw[] = { 999, 999, 999, 999, 999, 999, 999 }; -cerr << "process 0" << endl; - - pvoc.process(data, mag + 1, phase + 1, unw + 1); + pvoc.processTimeDomain(data, mag + 1, phase + 1, unw + 1); double magExpected0[] = { 999, 0, 0, 4, 0, 0, 999 }; COMPARE_ARRAY(mag, magExpected0); @@ -143,23 +141,45 @@ double unwExpected0[] = { 999, 0, 0, -M_PI/2, 0, 0, 999 }; COMPARE_ARRAY(unw, unwExpected0); -cerr << "process 1" << endl; - - pvoc.process(data + 8, mag + 1, phase + 1, unw + 1); + pvoc.processTimeDomain(data + 8, mag + 1, phase + 1, unw + 1); double magExpected1[] = { 999, 0, 4, 4, 0, 0, 999 }; COMPARE_ARRAY(mag, magExpected1); //!!! I don't know why [2] here is -M_PI and not M_PI; and I definitely don't know why [4] here is M_PI. Check these with care + + // Derivation of unwrapped values: + // + // * Bin 0 (DC) always has phase 0 and expected phase 0 + // + // * Bin 1 has a new signal, a cosine starting with phase 0. But + // because of the "FFT shift" which the phase vocoder carries + // out to place zero phase in the centre of the (usually + // windowed) frame, and because a single cycle at this frequency + // spans the whole frame, this bin actually has measured phase + // of either pi or -pi. (The shift doesn't affect those + // higher-frequency bins whose signals fit exact multiples of a + // cycle into a frame.) This maps into (-pi,pi] as pi, which + // matches the expected phase, hence unwrapped phase is also pi. + // + // * Bin 2 has expected phase 3pi/2 (being the previous measured + // phase of -pi/2 plus advance of 2pi). It has the same measured + // phase as last time around, -pi/2, which is consistent with + // the expected phase, so the unwrapped phase is 3pi/2. + //!!! + // * Bin 3 is a bit of a puzzle -- it has an effectively zero + // magnitude but a non-zero measured phase. Spectral leakage? + // + // * Bin 4 (Nyquist) has expected phase 4*pi, measured phase 0, + // hence error 0 and unwrapped phase 4*pi. + double phaseExpected1[] = { 999, 0, -M_PI, -M_PI/2, M_PI, 0, 999 }; COMPARE_ARRAY(phase, phaseExpected1); double unwExpected1[] = { 999, 0, M_PI, 3*M_PI/2, 3*M_PI, 4*M_PI, 999 }; COMPARE_ARRAY(unw, unwExpected1); -cerr << "process 2" << endl; - - pvoc.process(data + 16, mag + 1, phase + 1, unw + 1); + pvoc.processTimeDomain(data + 16, mag + 1, phase + 1, unw + 1); double magExpected2[] = { 999, 0, 4, 4, 0, 0, 999 }; COMPARE_ARRAY(mag, magExpected2);