# HG changeset patch # User Chris Cannam # Date 1158241316 0 # Node ID e125f0dde7a39af7185720c8750eb9d309bf9ec2 # Parent f17798a555df1b32ce3a813121764722be406002 * restructure time stretcher somewhat so as to do transient detection on mixed stereo signal instead of just one channel diff -r f17798a555df -r e125f0dde7a3 audioio/PhaseVocoderTimeStretcher.cpp --- a/audioio/PhaseVocoderTimeStretcher.cpp Thu Sep 14 11:20:09 2006 +0000 +++ b/audioio/PhaseVocoderTimeStretcher.cpp Thu Sep 14 13:41:56 2006 +0000 @@ -61,21 +61,22 @@ m_n1 = m_n2 / ratio; } - m_window = new Window(HanningWindow, m_wlen); + m_analysisWindow = new Window(HanningWindow, m_wlen); + m_synthesisWindow = new Window(HanningWindow, m_wlen); m_prevPhase = new float *[m_channels]; m_prevAdjustedPhase = new float *[m_channels]; - if (m_sharpen) m_prevMag = new float *[m_channels]; - else m_prevMag = 0; - m_prevPercussiveCount = new int[m_channels]; - m_prevPercussive = false; - m_dbuf = (float *)fftwf_malloc(sizeof(float) * m_wlen); - m_time = (float *)fftwf_malloc(sizeof(float) * m_wlen); - m_freq = (fftwf_complex *)fftwf_malloc(sizeof(fftwf_complex) * m_wlen); - - m_plan = fftwf_plan_dft_r2c_1d(m_wlen, m_time, m_freq, FFTW_ESTIMATE); - m_iplan = fftwf_plan_dft_c2r_1d(m_wlen, m_freq, m_time, FFTW_ESTIMATE); + m_prevTransientMag = (float *)fftwf_malloc(sizeof(float) * (m_wlen / 2 + 1)); + m_prevTransientCount = 0; + m_prevTransient = false; + + m_tempbuf = (float *)fftwf_malloc(sizeof(float) * m_wlen); + + m_time = new float *[m_channels]; + m_freq = new fftwf_complex *[m_channels]; + m_plan = new fftwf_plan[m_channels]; + m_iplan = new fftwf_plan[m_channels]; m_inbuf = new RingBuffer *[m_channels]; m_outbuf = new RingBuffer *[m_channels]; @@ -85,12 +86,15 @@ for (size_t c = 0; c < m_channels; ++c) { - m_prevPhase[c] = (float *)fftwf_malloc(sizeof(float) * m_wlen); - m_prevAdjustedPhase[c] = (float *)fftwf_malloc(sizeof(float) * m_wlen); + m_prevPhase[c] = (float *)fftwf_malloc(sizeof(float) * (m_wlen / 2 + 1)); + m_prevAdjustedPhase[c] = (float *)fftwf_malloc(sizeof(float) * (m_wlen / 2 + 1)); - if (m_sharpen) { - m_prevMag[c] = (float *)fftwf_malloc(sizeof(float) * m_wlen); - } + m_time[c] = (float *)fftwf_malloc(sizeof(float) * m_wlen); + m_freq[c] = (fftwf_complex *)fftwf_malloc(sizeof(fftwf_complex) * + (m_wlen / 2 + 1)); + + m_plan[c] = fftwf_plan_dft_r2c_1d(m_wlen, m_time[c], m_freq[c], FFTW_ESTIMATE); + m_iplan[c] = fftwf_plan_dft_c2r_1d(m_wlen, m_freq[c], m_time[c], FFTW_ESTIMATE); m_inbuf[c] = new RingBuffer(m_wlen); m_outbuf[c] = new RingBuffer @@ -100,18 +104,22 @@ for (int i = 0; i < m_wlen; ++i) { m_mashbuf[c][i] = 0.0; + } + + for (int i = 0; i <= m_wlen/2; ++i) { m_prevPhase[c][i] = 0.0; m_prevAdjustedPhase[c][i] = 0.0; - if (m_sharpen) m_prevMag[c][i] = 0.0; } - - m_prevPercussiveCount[c] = 0; } for (int i = 0; i < m_wlen; ++i) { m_modulationbuf[i] = 0.0; } + for (int i = 0; i <= m_wlen/2; ++i) { + m_prevTransientMag[i] = 0.0; + } + std::cerr << "PhaseVocoderTimeStretcher: channels = " << channels << ", ratio = " << ratio << ", n1 = " << m_n1 << ", n2 = " << m_n2 << ", wlen = " @@ -123,35 +131,38 @@ { std::cerr << "PhaseVocoderTimeStretcher::~PhaseVocoderTimeStretcher" << std::endl; - fftwf_destroy_plan(m_plan); - fftwf_destroy_plan(m_iplan); + for (size_t c = 0; c < m_channels; ++c) { - fftwf_free(m_time); - fftwf_free(m_freq); - fftwf_free(m_dbuf); + fftwf_destroy_plan(m_plan[c]); + fftwf_destroy_plan(m_iplan[c]); - for (size_t c = 0; c < m_channels; ++c) { + fftwf_free(m_time[c]); + fftwf_free(m_freq[c]); fftwf_free(m_mashbuf[c]); fftwf_free(m_prevPhase[c]); fftwf_free(m_prevAdjustedPhase[c]); - if (m_sharpen) fftwf_free(m_prevMag[c]); delete m_inbuf[c]; delete m_outbuf[c]; } + fftwf_free(m_tempbuf); fftwf_free(m_modulationbuf); + fftwf_free(m_prevTransientMag); delete[] m_prevPhase; delete[] m_prevAdjustedPhase; - if (m_sharpen) delete[] m_prevMag; - delete[] m_prevPercussiveCount; delete[] m_inbuf; delete[] m_outbuf; delete[] m_mashbuf; + delete[] m_time; + delete[] m_freq; + delete[] m_plan; + delete[] m_iplan; - delete m_window; + delete m_analysisWindow; + delete m_synthesisWindow; } size_t @@ -221,26 +232,29 @@ // processing, and then read m_n1 to advance the read // pointer. + for (size_t c = 0; c < m_channels; ++c) { + + size_t got = m_inbuf[c]->peek(m_tempbuf, m_wlen); + assert(got == m_wlen); + + analyseBlock(c, m_tempbuf); + } + + bool transient = false; + if (m_sharpen) transient = isTransient(); + size_t n2 = m_n2; - bool isPercussive = false; + + if (transient) { + n2 = m_n1; + } for (size_t c = 0; c < m_channels; ++c) { - size_t got = m_inbuf[c]->peek(m_dbuf, m_wlen); - assert(got == m_wlen); - - bool thisChannelPercussive = - processBlock(c, m_dbuf, m_mashbuf[c], - c == 0 ? m_modulationbuf : 0, - m_prevPercussive ? m_n1 : m_n2); + synthesiseBlock(c, m_mashbuf[c], + c == 0 ? m_modulationbuf : 0, + m_prevTransient ? m_n1 : m_n2); - if (thisChannelPercussive && c == 0) { - isPercussive = true; - } - - if (isPercussive) { - n2 = m_n1; - } #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER std::cerr << "writing first " << m_n2 << " from mashbuf, skipping " << m_n1 << " on inbuf " << std::endl; @@ -264,7 +278,7 @@ } } - m_prevPercussive = isPercussive; + m_prevTransient = transient; for (size_t i = 0; i < m_wlen - n2; ++i) { m_modulationbuf[i] = m_modulationbuf[i + n2]; @@ -318,23 +332,18 @@ #endif } -bool -PhaseVocoderTimeStretcher::processBlock(size_t c, - float *buf, float *out, - float *modulation, - size_t lastStep) +void +PhaseVocoderTimeStretcher::analyseBlock(size_t c, float *buf) { size_t i; - bool isPercussive = false; - // buf contains m_wlen samples; out contains enough space for - // m_wlen * ratio samples (we mix into out, rather than replacing) + // buf contains m_wlen samples #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER - std::cerr << "PhaseVocoderTimeStretcher::processBlock (channel " << c << ")" << std::endl; + std::cerr << "PhaseVocoderTimeStretcher::analyseBlock (channel " << c << ")" << std::endl; #endif - m_window->cut(buf); + m_analysisWindow->cut(buf); for (i = 0; i < m_wlen/2; ++i) { float temp = buf[i]; @@ -343,107 +352,117 @@ } for (i = 0; i < m_wlen; ++i) { - m_time[i] = buf[i]; + m_time[c][i] = buf[i]; } - fftwf_execute(m_plan); // m_time -> m_freq + fftwf_execute(m_plan[c]); // m_time -> m_freq +} - if (m_sharpen && c == 0) { //!!! - - int count = 0; +bool +PhaseVocoderTimeStretcher::isTransient() +{ + int count = 0; - for (i = 0; i < m_wlen; ++i) { - - float mag = sqrtf(m_freq[i][0] * m_freq[i][0] + - m_freq[i][1] * m_freq[i][1]); + for (int i = 0; i <= m_wlen/2; ++i) { - if (m_prevMag[c][i] > 0) { - float magdiff = 20.f * log10f(mag / m_prevMag[c][i]); - if (magdiff > 3.f) ++count; - } - - m_prevMag[c][i] = mag; - } - - if (count > m_wlen / 4 && //!!! - count > m_prevPercussiveCount[c] * 1.2) { - isPercussive = true; - std::cerr << "isPercussive (count = " << count << ", prev = " << m_prevPercussiveCount[c] << ")" << std::endl; + float real = 0.f, imag = 0.f; + + for (size_t c = 0; c < m_channels; ++c) { + real += m_freq[c][i][0]; + imag += m_freq[c][i][1]; } - m_prevPercussiveCount[c] = count; + float sqrmag = (real * real + imag * imag); + + if (m_prevTransientMag[i] > 0.f) { + float diff = 10.f * log10f(sqrmag / m_prevTransientMag[i]); + if (diff > 3.f) ++count; + } + + m_prevTransientMag[i] = sqrmag; } - for (i = 0; i < m_wlen; ++i) { //!!! /2 + bool isTransient = false; - float mag; + if (count > m_wlen / 4.5 && //!!! + count > m_prevTransientCount * 1.2) { + isTransient = true; + std::cerr << "isTransient (count = " << count << ", prev = " << m_prevTransientCount << ")" << std::endl; + } - if (m_sharpen && c == 0) { - mag = m_prevMag[c][i]; // can reuse this - } else { - mag = sqrtf(m_freq[i][0] * m_freq[i][0] + - m_freq[i][1] * m_freq[i][1]); - } + m_prevTransientCount = count; + + return isTransient; +} + +void +PhaseVocoderTimeStretcher::synthesiseBlock(size_t c, + float *out, + float *modulation, + size_t lastStep) +{ + int i; + + bool unchanged = (lastStep == m_n1); + + for (i = 0; i <= m_wlen/2; ++i) { - float phase = princargf(atan2f(m_freq[i][1], m_freq[i][0])); - - float omega = (2 * M_PI * m_n1 * i) / m_wlen; - - float expectedPhase = m_prevPhase[c][i] + omega; - - float phaseError = princargf(phase - expectedPhase); - + float phase = princargf(atan2f(m_freq[c][i][1], m_freq[c][i][0])); float adjustedPhase = phase; - if (!isPercussive) { -// if (fabsf(phaseError) < (1.1f * (lastStep * M_PI) / m_wlen)) { + if (!unchanged) { - float phaseIncrement = (omega + phaseError) / m_n1; + float mag = sqrtf(m_freq[c][i][0] * m_freq[c][i][0] + + m_freq[c][i][1] * m_freq[c][i][1]); - adjustedPhase = m_prevAdjustedPhase[c][i] + - lastStep * phaseIncrement; -// } + float omega = (2 * M_PI * m_n1 * i) / m_wlen; + + float expectedPhase = m_prevPhase[c][i] + omega; + + float phaseError = princargf(phase - expectedPhase); + + float phaseIncrement = (omega + phaseError) / m_n1; + + adjustedPhase = m_prevAdjustedPhase[c][i] + + lastStep * phaseIncrement; + + float real = mag * cosf(adjustedPhase); + float imag = mag * sinf(adjustedPhase); + m_freq[c][i][0] = real; + m_freq[c][i][1] = imag; } -// if (isPercussive) adjustedPhase = phase; - - float real = mag * cosf(adjustedPhase); - float imag = mag * sinf(adjustedPhase); - m_freq[i][0] = real; - m_freq[i][1] = imag; - m_prevPhase[c][i] = phase; m_prevAdjustedPhase[c][i] = adjustedPhase; } - - fftwf_execute(m_iplan); // m_freq -> m_time, inverse fft + + fftwf_execute(m_iplan[c]); // m_freq -> m_time, inverse fft for (i = 0; i < m_wlen/2; ++i) { - float temp = m_time[i]; - m_time[i] = m_time[i + m_wlen/2]; - m_time[i + m_wlen/2] = temp; + float temp = m_time[c][i]; + m_time[c][i] = m_time[c][i + m_wlen/2]; + m_time[c][i + m_wlen/2] = temp; + } + + for (i = 0; i < m_wlen; ++i) { + m_time[c][i] = m_time[c][i] / m_wlen; } - for (i = 0; i < m_wlen; ++i) { - m_time[i] = m_time[i] / m_wlen; - } - - m_window->cut(m_time); + m_synthesisWindow->cut(m_time[c]); for (i = 0; i < m_wlen; ++i) { - out[i] += m_time[i]; + out[i] += m_time[c][i]; } if (modulation) { - float area = m_window->getArea(); + float area = m_analysisWindow->getArea(); for (i = 0; i < m_wlen; ++i) { - float val = m_window->getValue(i); + float val = m_synthesisWindow->getValue(i); modulation[i] += val * area; } } - - return isPercussive; } + diff -r f17798a555df -r e125f0dde7a3 audioio/PhaseVocoderTimeStretcher.h --- a/audioio/PhaseVocoderTimeStretcher.h Thu Sep 14 11:20:09 2006 +0000 +++ b/audioio/PhaseVocoderTimeStretcher.h Thu Sep 14 13:41:56 2006 +0000 @@ -91,7 +91,7 @@ /** * Get the window type. */ - WindowType getWindowType() const { return m_window->getType(); } +// WindowType getWindowType() const { return m_window->getType(); } /** * Get the stretch ratio. @@ -122,10 +122,14 @@ * the window overlap varies or otherwise results in something * other than a flat sum. */ - bool processBlock(size_t channel, - float *in, float *out, - float *modulation, - size_t lastStep); + + + void analyseBlock(size_t channel, float *in); // into m_freq[channel] + + bool isTransient(); // operates on m_freq[0..m_channels-1] + + void synthesiseBlock(size_t channel, float *out, float *modulation, + size_t lastStep); size_t m_channels; float m_ratio; @@ -133,19 +137,21 @@ size_t m_n1; size_t m_n2; size_t m_wlen; - Window *m_window; + Window *m_analysisWindow; + Window *m_synthesisWindow; float **m_prevPhase; float **m_prevAdjustedPhase; - float **m_prevMag; - int *m_prevPercussiveCount; - bool m_prevPercussive; - float *m_dbuf; - float *m_time; - fftwf_complex *m_freq; - fftwf_plan m_plan; - fftwf_plan m_iplan; + float *m_prevTransientMag; + int m_prevTransientCount; + bool m_prevTransient; + + float *m_tempbuf; + float **m_time; + fftwf_complex **m_freq; + fftwf_plan *m_plan; + fftwf_plan *m_iplan; RingBuffer **m_inbuf; RingBuffer **m_outbuf;