Mercurial > hg > sonic-visualiser
changeset 14:085f34c73939
* IntegerTimeStretcher -> PhaseVocoderTimeStretcher (no longer confined to
integer multiples)
author | Chris Cannam |
---|---|
date | Wed, 13 Sep 2006 11:06:28 +0000 (2006-09-13) |
parents | 00ed645f4175 |
children | cc566264c935 |
files | audioio/AudioCallbackPlaySource.cpp audioio/AudioCallbackPlaySource.h audioio/IntegerTimeStretcher.cpp audioio/IntegerTimeStretcher.h audioio/PhaseVocoderTimeStretcher.cpp audioio/PhaseVocoderTimeStretcher.h main/MainWindow.cpp sv.pro |
diffstat | 8 files changed, 380 insertions(+), 381 deletions(-) [+] |
line wrap: on
line diff
--- a/audioio/AudioCallbackPlaySource.cpp Tue Sep 12 19:13:12 2006 +0000 +++ b/audioio/AudioCallbackPlaySource.cpp Wed Sep 13 11:06:28 2006 +0000 @@ -22,13 +22,13 @@ #include "base/PlayParameterRepository.h" #include "data/model/DenseTimeValueModel.h" #include "data/model/SparseOneDimensionalModel.h" -#include "IntegerTimeStretcher.h" +#include "PhaseVocoderTimeStretcher.h" #include <iostream> #include <cassert> //#define DEBUG_AUDIO_PLAY_SOURCE 1 -#define DEBUG_AUDIO_PLAY_SOURCE_PLAYING 1 +//#define DEBUG_AUDIO_PLAY_SOURCE_PLAYING 1 //const size_t AudioCallbackPlaySource::m_ringBufferSize = 102400; const size_t AudioCallbackPlaySource::m_ringBufferSize = 131071; @@ -600,7 +600,7 @@ m_stretcher[ch] = StretcherBuffer //!!! - (new IntegerTimeStretcher(factor, + (new PhaseVocoderTimeStretcher(factor, blockSize, // 128), (blockSize/2) / factor), @@ -621,7 +621,7 @@ delete m_stretchInputBuffer; } -IntegerTimeStretcher * +PhaseVocoderTimeStretcher * AudioCallbackPlaySource::TimeStretcherData::getStretcher(size_t channel) { return m_stretcher[channel].first;
--- a/audioio/AudioCallbackPlaySource.h Tue Sep 12 19:13:12 2006 +0000 +++ b/audioio/AudioCallbackPlaySource.h Wed Sep 13 11:06:28 2006 +0000 @@ -35,7 +35,7 @@ class ViewManager; class AudioGenerator; class PlayParameters; -class IntegerTimeStretcher; +class PhaseVocoderTimeStretcher; /** * AudioCallbackPlaySource manages audio data supply to callback-based @@ -251,7 +251,7 @@ ~TimeStretcherData(); float getFactor() const { return m_factor; } - IntegerTimeStretcher *getStretcher(size_t channel); + PhaseVocoderTimeStretcher *getStretcher(size_t channel); float *getOutputBuffer(size_t channel); float *getInputBuffer(); @@ -261,7 +261,7 @@ TimeStretcherData(const TimeStretcherData &); // not provided TimeStretcherData &operator=(const TimeStretcherData &); // not provided - typedef std::pair<IntegerTimeStretcher *, float *> StretcherBuffer; + typedef std::pair<PhaseVocoderTimeStretcher *, float *> StretcherBuffer; std::map<size_t, StretcherBuffer> m_stretcher; float *m_stretchInputBuffer; float m_factor;
--- a/audioio/IntegerTimeStretcher.cpp Tue Sep 12 19:13:12 2006 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,259 +0,0 @@ -/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ - -/* - Sonic Visualiser - An audio file viewer and annotation editor. - Centre for Digital Music, Queen Mary, University of London. - This file copyright 2006 Chris Cannam. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. See the file - COPYING included with this distribution for more information. -*/ - -#include "IntegerTimeStretcher.h" - -#include <iostream> -#include <cassert> - -//#define DEBUG_INTEGER_TIME_STRETCHER 1 - -IntegerTimeStretcher::IntegerTimeStretcher(float ratio, - size_t maxProcessInputBlockSize, - size_t inputIncrement, - size_t windowSize, - WindowType windowType) : - m_ratio(ratio), - m_n1(inputIncrement), - m_n2(lrintf(m_n1 * ratio)), - m_wlen(std::max(windowSize, m_n2 * 2)), - m_inbuf(m_wlen), - m_outbuf(maxProcessInputBlockSize * ratio + 1024) //!!! -{ - m_window = new Window<float>(windowType, m_wlen), - - m_time = (fftwf_complex *)fftwf_malloc(sizeof(fftwf_complex) * m_wlen); - m_freq = (fftwf_complex *)fftwf_malloc(sizeof(fftwf_complex) * m_wlen); - m_dbuf = (float *)fftwf_malloc(sizeof(float) * m_wlen); - m_mashbuf = (float *)fftwf_malloc(sizeof(float) * m_wlen); - m_modulationbuf = (float *)fftwf_malloc(sizeof(float) * m_wlen); - m_prevPhase = (float *)fftwf_malloc(sizeof(float) * m_wlen); - m_prevAdjustedPhase = (float *)fftwf_malloc(sizeof(float) * m_wlen); - - m_plan = fftwf_plan_dft_1d(m_wlen, m_time, m_freq, FFTW_FORWARD, FFTW_ESTIMATE); - m_iplan = fftwf_plan_dft_c2r_1d(m_wlen, m_freq, m_dbuf, FFTW_ESTIMATE); - - for (int i = 0; i < m_wlen; ++i) { - m_mashbuf[i] = 0.0; - m_modulationbuf[i] = 0.0; - m_prevPhase[i] = 0.0; - m_prevAdjustedPhase[i] = 0.0; - } -} - -IntegerTimeStretcher::~IntegerTimeStretcher() -{ - std::cerr << "IntegerTimeStretcher::~IntegerTimeStretcher" << std::endl; - - fftwf_destroy_plan(m_plan); - fftwf_destroy_plan(m_iplan); - - fftwf_free(m_time); - fftwf_free(m_freq); - fftwf_free(m_dbuf); - fftwf_free(m_mashbuf); - fftwf_free(m_modulationbuf); - fftwf_free(m_prevPhase); - fftwf_free(m_prevAdjustedPhase); - - delete m_window; -} - -size_t -IntegerTimeStretcher::getProcessingLatency() const -{ - return getWindowSize() - getInputIncrement(); -} - -void -IntegerTimeStretcher::process(float *input, float *output, size_t samples) -{ - // We need to add samples from input to our internal buffer. When - // we have m_windowSize samples in the buffer, we can process it, - // move the samples back by m_n1 and write the output onto our - // internal output buffer. If we have (samples * ratio) samples - // in that, we can write m_n2 of them back to output and return - // (otherwise we have to write zeroes). - - // When we process, we write m_wlen to our fixed output buffer - // (m_mashbuf). We then pull out the first m_n2 samples from that - // buffer, push them into the output ring buffer, and shift - // m_mashbuf left by that amount. - - // The processing latency is then m_wlen - m_n2. - - size_t consumed = 0; - -#ifdef DEBUG_INTEGER_TIME_STRETCHER - std::cerr << "IntegerTimeStretcher::process(" << samples << ", consumed = " << consumed << "), writable " << m_inbuf.getWriteSpace() <<", readable "<< m_outbuf.getReadSpace() << std::endl; -#endif - - while (consumed < samples) { - - size_t writable = m_inbuf.getWriteSpace(); - writable = std::min(writable, samples - consumed); - - if (writable == 0) { - //!!! then what? I don't think this should happen, but - std::cerr << "WARNING: IntegerTimeStretcher::process: writable == 0" << std::endl; - break; - } - -#ifdef DEBUG_INTEGER_TIME_STRETCHER - std::cerr << "writing " << writable << " from index " << consumed << " to inbuf, consumed will be " << consumed + writable << std::endl; -#endif - m_inbuf.write(input + consumed, writable); - consumed += writable; - - while (m_inbuf.getReadSpace() >= m_wlen && - m_outbuf.getWriteSpace() >= m_n2) { - - // We know we have at least m_wlen samples available - // in m_inbuf. We need to peek m_wlen of them for - // processing, and then read m_n1 to advance the read - // pointer. - - size_t got = m_inbuf.peek(m_dbuf, m_wlen); - assert(got == m_wlen); - - processBlock(m_dbuf, m_mashbuf, m_modulationbuf); - -#ifdef DEBUG_INTEGER_TIME_STRETCHER - std::cerr << "writing first " << m_n2 << " from mashbuf, skipping " << m_n1 << " on inbuf " << std::endl; -#endif - m_inbuf.skip(m_n1); - - for (size_t i = 0; i < m_n2; ++i) { - if (m_modulationbuf[i] > 0.f) { - m_mashbuf[i] /= m_modulationbuf[i]; - } - } - - m_outbuf.write(m_mashbuf, m_n2); - - for (size_t i = 0; i < m_wlen - m_n2; ++i) { - m_mashbuf[i] = m_mashbuf[i + m_n2]; - m_modulationbuf[i] = m_modulationbuf[i + m_n2]; - } - - for (size_t i = m_wlen - m_n2; i < m_wlen; ++i) { - m_mashbuf[i] = 0.0f; - m_modulationbuf[i] = 0.0f; - } - } - -// std::cerr << "WARNING: IntegerTimeStretcher::process: writespace not enough for output increment (" << m_outbuf.getWriteSpace() << " < " << m_n2 << ")" << std::endl; -// } - -#ifdef DEBUG_INTEGER_TIME_STRETCHER - std::cerr << "loop ended: inbuf read space " << m_inbuf.getReadSpace() << ", outbuf write space " << m_outbuf.getWriteSpace() << std::endl; -#endif - } - - size_t toRead = lrintf(samples * m_ratio); - - if (m_outbuf.getReadSpace() < toRead) { - std::cerr << "WARNING: IntegerTimeStretcher::process: not enough data (yet?) (" << m_outbuf.getReadSpace() << " < " << toRead << ")" << std::endl; - size_t fill = toRead - m_outbuf.getReadSpace(); - for (size_t i = 0; i < fill; ++i) { - output[i] = 0.0; - } - m_outbuf.read(output + fill, m_outbuf.getReadSpace()); - } else { -#ifdef DEBUG_INTEGER_TIME_STRETCHER - std::cerr << "enough data - writing " << toRead << " from outbuf" << std::endl; -#endif - m_outbuf.read(output, toRead); - } - -#ifdef DEBUG_INTEGER_TIME_STRETCHER - std::cerr << "IntegerTimeStretcher::process returning" << std::endl; -#endif -} - -void -IntegerTimeStretcher::processBlock(float *buf, float *out, float *modulation) -{ - size_t i; - - // buf contains m_wlen samples; out contains enough space for - // m_wlen * ratio samples (we mix into out, rather than replacing) - -#ifdef DEBUG_INTEGER_TIME_STRETCHER - std::cerr << "IntegerTimeStretcher::processBlock" << std::endl; -#endif - - m_window->cut(buf); - - for (i = 0; i < m_wlen/2; ++i) { - float temp = buf[i]; - buf[i] = buf[i + m_wlen/2]; - buf[i + m_wlen/2] = temp; - } - - for (i = 0; i < m_wlen; ++i) { - m_time[i][0] = buf[i]; - m_time[i][1] = 0.0; - } - - fftwf_execute(m_plan); // m_time -> m_freq - - for (i = 0; i < m_wlen; ++i) { - - float mag = sqrtf(m_freq[i][0] * m_freq[i][0] + - m_freq[i][1] * m_freq[i][1]); - - float phase = princargf(atan2f(m_freq[i][1], m_freq[i][0])); - - float omega = (2 * M_PI * m_n1 * i) / m_wlen; - - float expectedPhase = m_prevPhase[i] + omega; - - float phaseError = princargf(phase - expectedPhase); - - float phaseIncrement = (omega + phaseError) / m_n1; - - float adjustedPhase = m_prevAdjustedPhase[i] + m_n2 * phaseIncrement; - - float real = mag * cosf(adjustedPhase); - float imag = mag * sinf(adjustedPhase); - m_freq[i][0] = real; - m_freq[i][1] = imag; - - m_prevPhase[i] = phase; - m_prevAdjustedPhase[i] = adjustedPhase; - } - - fftwf_execute(m_iplan); // m_freq -> in, inverse fft - - for (i = 0; i < m_wlen/2; ++i) { - float temp = buf[i] / m_wlen; - buf[i] = buf[i + m_wlen/2] / m_wlen; - buf[i + m_wlen/2] = temp; - } - - m_window->cut(buf); -/* - int div = m_wlen / m_n2; - if (div > 1) div /= 2; - for (i = 0; i < m_wlen; ++i) { - buf[i] /= div; - } -*/ - for (i = 0; i < m_wlen; ++i) { - out[i] += buf[i]; - modulation[i] += m_window->getValue(i); - } -}
--- a/audioio/IntegerTimeStretcher.h Tue Sep 12 19:13:12 2006 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,111 +0,0 @@ -/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ - -/* - Sonic Visualiser - An audio file viewer and annotation editor. - Centre for Digital Music, Queen Mary, University of London. - This file copyright 2006 Chris Cannam. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. See the file - COPYING included with this distribution for more information. -*/ - -#ifndef _INTEGER_TIME_STRETCHER_H_ -#define _INTEGER_TIME_STRETCHER_H_ - -#include "base/Window.h" -#include "base/RingBuffer.h" - -#include <fftw3.h> - -/** - * A time stretcher that slows down audio by an integer multiple of - * its original duration, preserving pitch. This uses the simple - * phase vocoder technique from DAFX pp275-276, adding a block-based - * stream oriented API. - * - * Causes significant transient smearing, but sounds good for steady - * notes and is generally predictable. - */ - -class IntegerTimeStretcher -{ -public: - IntegerTimeStretcher(float ratio, - size_t maxProcessInputBlockSize, - size_t inputIncrement = 64, - size_t windowSize = 2048, - WindowType windowType = HanningWindow); - virtual ~IntegerTimeStretcher(); - - /** - * Process a block. The input array contains the given number of - * samples; the output has enough space for samples * m_ratio. - */ - void process(float *input, float *output, size_t samples); - - /** - * Get the hop size for input. Smaller values may produce better - * results, at a cost in processing time. Larger values are - * faster but increase the likelihood of echo-like effects. The - * default is 64, which is usually pretty good, though heavy on - * processor power. - */ - size_t getInputIncrement() const { return m_n1; } - - /** - * Get the window size for FFT processing. Must be larger than - * the input and output increments. The default is 2048. - */ - size_t getWindowSize() const { return m_wlen; } - - /** - * Get the window type. The default is a Hanning window. - */ - WindowType getWindowType() const { return m_window->getType(); } - - float getRatio() const { return m_ratio; } - size_t getOutputIncrement() const { return getInputIncrement() * getRatio(); } - size_t getProcessingLatency() const; - -protected: - /** - * Process a single phase vocoder frame. - * - * Take m_wlen time-domain source samples from in, perform an FFT, - * phase shift, and IFFT, and add the results to out (presumably - * overlapping parts of existing data from prior frames). - * - * Also add to the modulation output the results of windowing a - * set of 1s with the resynthesis window -- this can then be used - * to ensure the output has the correct magnitude in cases where - * the window overlap varies or otherwise results in something - * other than a flat sum. - */ - void processBlock(float *in, float *out, float *modulation); - - float m_ratio; - size_t m_n1; - size_t m_n2; - size_t m_wlen; - Window<float> *m_window; - - fftwf_complex *m_time; - fftwf_complex *m_freq; - float *m_dbuf; - float *m_prevPhase; - float *m_prevAdjustedPhase; - - fftwf_plan m_plan; - fftwf_plan m_iplan; - - RingBuffer<float> m_inbuf; - RingBuffer<float> m_outbuf; - float *m_mashbuf; - float *m_modulationbuf; -}; - -#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/audioio/PhaseVocoderTimeStretcher.cpp Wed Sep 13 11:06:28 2006 +0000 @@ -0,0 +1,259 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + Sonic Visualiser + An audio file viewer and annotation editor. + Centre for Digital Music, Queen Mary, University of London. + This file copyright 2006 Chris Cannam. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#include "PhaseVocoderTimeStretcher.h" + +#include <iostream> +#include <cassert> + +//#define DEBUG_PHASE_VOCODER_TIME_STRETCHER 1 + +PhaseVocoderTimeStretcher::PhaseVocoderTimeStretcher(float ratio, + size_t maxProcessInputBlockSize, + size_t inputIncrement, + size_t windowSize, + WindowType windowType) : + m_ratio(ratio), + m_n1(inputIncrement), + m_n2(lrintf(m_n1 * ratio)), + m_wlen(std::max(windowSize, m_n2 * 2)), + m_inbuf(m_wlen), + m_outbuf(maxProcessInputBlockSize * ratio + 1024) //!!! +{ + m_window = new Window<float>(windowType, m_wlen), + + m_time = (fftwf_complex *)fftwf_malloc(sizeof(fftwf_complex) * m_wlen); + m_freq = (fftwf_complex *)fftwf_malloc(sizeof(fftwf_complex) * m_wlen); + m_dbuf = (float *)fftwf_malloc(sizeof(float) * m_wlen); + m_mashbuf = (float *)fftwf_malloc(sizeof(float) * m_wlen); + m_modulationbuf = (float *)fftwf_malloc(sizeof(float) * m_wlen); + m_prevPhase = (float *)fftwf_malloc(sizeof(float) * m_wlen); + m_prevAdjustedPhase = (float *)fftwf_malloc(sizeof(float) * m_wlen); + + m_plan = fftwf_plan_dft_1d(m_wlen, m_time, m_freq, FFTW_FORWARD, FFTW_ESTIMATE); + m_iplan = fftwf_plan_dft_c2r_1d(m_wlen, m_freq, m_dbuf, FFTW_ESTIMATE); + + for (int i = 0; i < m_wlen; ++i) { + m_mashbuf[i] = 0.0; + m_modulationbuf[i] = 0.0; + m_prevPhase[i] = 0.0; + m_prevAdjustedPhase[i] = 0.0; + } +} + +PhaseVocoderTimeStretcher::~PhaseVocoderTimeStretcher() +{ + std::cerr << "PhaseVocoderTimeStretcher::~PhaseVocoderTimeStretcher" << std::endl; + + fftwf_destroy_plan(m_plan); + fftwf_destroy_plan(m_iplan); + + fftwf_free(m_time); + fftwf_free(m_freq); + fftwf_free(m_dbuf); + fftwf_free(m_mashbuf); + fftwf_free(m_modulationbuf); + fftwf_free(m_prevPhase); + fftwf_free(m_prevAdjustedPhase); + + delete m_window; +} + +size_t +PhaseVocoderTimeStretcher::getProcessingLatency() const +{ + return getWindowSize() - getInputIncrement(); +} + +void +PhaseVocoderTimeStretcher::process(float *input, float *output, size_t samples) +{ + // We need to add samples from input to our internal buffer. When + // we have m_windowSize samples in the buffer, we can process it, + // move the samples back by m_n1 and write the output onto our + // internal output buffer. If we have (samples * ratio) samples + // in that, we can write m_n2 of them back to output and return + // (otherwise we have to write zeroes). + + // When we process, we write m_wlen to our fixed output buffer + // (m_mashbuf). We then pull out the first m_n2 samples from that + // buffer, push them into the output ring buffer, and shift + // m_mashbuf left by that amount. + + // The processing latency is then m_wlen - m_n2. + + size_t consumed = 0; + +#ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER + std::cerr << "PhaseVocoderTimeStretcher::process(" << samples << ", consumed = " << consumed << "), writable " << m_inbuf.getWriteSpace() <<", readable "<< m_outbuf.getReadSpace() << std::endl; +#endif + + while (consumed < samples) { + + size_t writable = m_inbuf.getWriteSpace(); + writable = std::min(writable, samples - consumed); + + if (writable == 0) { + //!!! then what? I don't think this should happen, but + std::cerr << "WARNING: PhaseVocoderTimeStretcher::process: writable == 0" << std::endl; + break; + } + +#ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER + std::cerr << "writing " << writable << " from index " << consumed << " to inbuf, consumed will be " << consumed + writable << std::endl; +#endif + m_inbuf.write(input + consumed, writable); + consumed += writable; + + while (m_inbuf.getReadSpace() >= m_wlen && + m_outbuf.getWriteSpace() >= m_n2) { + + // We know we have at least m_wlen samples available + // in m_inbuf. We need to peek m_wlen of them for + // processing, and then read m_n1 to advance the read + // pointer. + + size_t got = m_inbuf.peek(m_dbuf, m_wlen); + assert(got == m_wlen); + + processBlock(m_dbuf, m_mashbuf, m_modulationbuf); + +#ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER + std::cerr << "writing first " << m_n2 << " from mashbuf, skipping " << m_n1 << " on inbuf " << std::endl; +#endif + m_inbuf.skip(m_n1); + + for (size_t i = 0; i < m_n2; ++i) { + if (m_modulationbuf[i] > 0.f) { + m_mashbuf[i] /= m_modulationbuf[i]; + } + } + + m_outbuf.write(m_mashbuf, m_n2); + + for (size_t i = 0; i < m_wlen - m_n2; ++i) { + m_mashbuf[i] = m_mashbuf[i + m_n2]; + m_modulationbuf[i] = m_modulationbuf[i + m_n2]; + } + + for (size_t i = m_wlen - m_n2; i < m_wlen; ++i) { + m_mashbuf[i] = 0.0f; + m_modulationbuf[i] = 0.0f; + } + } + +// std::cerr << "WARNING: PhaseVocoderTimeStretcher::process: writespace not enough for output increment (" << m_outbuf.getWriteSpace() << " < " << m_n2 << ")" << std::endl; +// } + +#ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER + std::cerr << "loop ended: inbuf read space " << m_inbuf.getReadSpace() << ", outbuf write space " << m_outbuf.getWriteSpace() << std::endl; +#endif + } + + size_t toRead = lrintf(samples * m_ratio); + + if (m_outbuf.getReadSpace() < toRead) { + std::cerr << "WARNING: PhaseVocoderTimeStretcher::process: not enough data (yet?) (" << m_outbuf.getReadSpace() << " < " << toRead << ")" << std::endl; + size_t fill = toRead - m_outbuf.getReadSpace(); + for (size_t i = 0; i < fill; ++i) { + output[i] = 0.0; + } + m_outbuf.read(output + fill, m_outbuf.getReadSpace()); + } else { +#ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER + std::cerr << "enough data - writing " << toRead << " from outbuf" << std::endl; +#endif + m_outbuf.read(output, toRead); + } + +#ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER + std::cerr << "PhaseVocoderTimeStretcher::process returning" << std::endl; +#endif +} + +void +PhaseVocoderTimeStretcher::processBlock(float *buf, float *out, float *modulation) +{ + size_t i; + + // buf contains m_wlen samples; out contains enough space for + // m_wlen * ratio samples (we mix into out, rather than replacing) + +#ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER + std::cerr << "PhaseVocoderTimeStretcher::processBlock" << std::endl; +#endif + + m_window->cut(buf); + + for (i = 0; i < m_wlen/2; ++i) { + float temp = buf[i]; + buf[i] = buf[i + m_wlen/2]; + buf[i + m_wlen/2] = temp; + } + + for (i = 0; i < m_wlen; ++i) { + m_time[i][0] = buf[i]; + m_time[i][1] = 0.0; + } + + fftwf_execute(m_plan); // m_time -> m_freq + + for (i = 0; i < m_wlen; ++i) { + + float mag = sqrtf(m_freq[i][0] * m_freq[i][0] + + m_freq[i][1] * m_freq[i][1]); + + float phase = princargf(atan2f(m_freq[i][1], m_freq[i][0])); + + float omega = (2 * M_PI * m_n1 * i) / m_wlen; + + float expectedPhase = m_prevPhase[i] + omega; + + float phaseError = princargf(phase - expectedPhase); + + float phaseIncrement = (omega + phaseError) / m_n1; + + float adjustedPhase = m_prevAdjustedPhase[i] + m_n2 * phaseIncrement; + + float real = mag * cosf(adjustedPhase); + float imag = mag * sinf(adjustedPhase); + m_freq[i][0] = real; + m_freq[i][1] = imag; + + m_prevPhase[i] = phase; + m_prevAdjustedPhase[i] = adjustedPhase; + } + + fftwf_execute(m_iplan); // m_freq -> in, inverse fft + + for (i = 0; i < m_wlen/2; ++i) { + float temp = buf[i] / m_wlen; + buf[i] = buf[i + m_wlen/2] / m_wlen; + buf[i + m_wlen/2] = temp; + } + + m_window->cut(buf); +/* + int div = m_wlen / m_n2; + if (div > 1) div /= 2; + for (i = 0; i < m_wlen; ++i) { + buf[i] /= div; + } +*/ + for (i = 0; i < m_wlen; ++i) { + out[i] += buf[i]; + modulation[i] += m_window->getValue(i); + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/audioio/PhaseVocoderTimeStretcher.h Wed Sep 13 11:06:28 2006 +0000 @@ -0,0 +1,110 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + Sonic Visualiser + An audio file viewer and annotation editor. + Centre for Digital Music, Queen Mary, University of London. + This file copyright 2006 Chris Cannam. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#ifndef _PHASE_VOCODER_TIME_STRETCHER_H_ +#define _PHASE_VOCODER_TIME_STRETCHER_H_ + +#include "base/Window.h" +#include "base/RingBuffer.h" + +#include <fftw3.h> + +/** + * A time stretcher that alters the performance speed of audio, + * preserving pitch. This uses the simple phase vocoder technique + * from DAFX pp275-276, adding a block-based stream oriented API. + * + * Causes significant transient smearing, but sounds good for steady + * notes and is generally predictable. + */ + +class PhaseVocoderTimeStretcher +{ +public: + PhaseVocoderTimeStretcher(float ratio, + size_t maxProcessInputBlockSize, + size_t inputIncrement = 64, + size_t windowSize = 2048, + WindowType windowType = HanningWindow); + virtual ~PhaseVocoderTimeStretcher(); + + /** + * Process a block. The input array contains the given number of + * samples; the output has enough space for samples * m_ratio. + */ + void process(float *input, float *output, size_t samples); + + /** + * Get the hop size for input. Smaller values may produce better + * results, at a cost in processing time. Larger values are + * faster but increase the likelihood of echo-like effects. The + * default is 64, which is usually pretty good, though heavy on + * processor power. + */ + size_t getInputIncrement() const { return m_n1; } + + /** + * Get the window size for FFT processing. Must be larger than + * the input and output increments. The default is 2048. + */ + size_t getWindowSize() const { return m_wlen; } + + /** + * Get the window type. The default is a Hanning window. + */ + WindowType getWindowType() const { return m_window->getType(); } + + float getRatio() const { return m_ratio; } + size_t getOutputIncrement() const { return getInputIncrement() * getRatio(); } + size_t getProcessingLatency() const; + +protected: + /** + * Process a single phase vocoder frame. + * + * Take m_wlen time-domain source samples from in, perform an FFT, + * phase shift, and IFFT, and add the results to out (presumably + * overlapping parts of existing data from prior frames). + * + * Also add to the modulation output the results of windowing a + * set of 1s with the resynthesis window -- this can then be used + * to ensure the output has the correct magnitude in cases where + * the window overlap varies or otherwise results in something + * other than a flat sum. + */ + void processBlock(float *in, float *out, float *modulation); + + float m_ratio; + size_t m_n1; + size_t m_n2; + size_t m_wlen; + Window<float> *m_window; + + fftwf_complex *m_time; + fftwf_complex *m_freq; + float *m_dbuf; + float *m_prevPhase; + float *m_prevAdjustedPhase; + + fftwf_plan m_plan; + fftwf_plan m_iplan; + + RingBuffer<float> m_inbuf; + RingBuffer<float> m_outbuf; + float *m_mashbuf; + float *m_modulationbuf; +}; + +#endif
--- a/main/MainWindow.cpp Tue Sep 12 19:13:12 2006 +0000 +++ b/main/MainWindow.cpp Wed Sep 13 11:06:28 2006 +0000 @@ -2860,12 +2860,12 @@ // int factor = 11 - speed; if (speed > 10) factor = 1.0 / factor; std::cerr << "factor = " << factor << std::endl; - +/* int iinc = 128; int oinc = lrintf(iinc * factor); factor = (float(oinc) + 0.01) / iinc; std::cerr << "corrected factor = " << factor << std::endl; - +*/ m_playSpeed->setToolTip(tr("Playback speed: %1") .arg(factor != 1 ? QString("1/%1").arg(factor) :
--- a/sv.pro Tue Sep 12 19:13:12 2006 +0000 +++ b/sv.pro Wed Sep 13 11:06:28 2006 +0000 @@ -36,7 +36,7 @@ audioio/AudioJACKTarget.h \ audioio/AudioPortAudioTarget.h \ audioio/AudioTargetFactory.h \ - audioio/IntegerTimeStretcher.h \ + audioio/PhaseVocoderTimeStretcher.h \ document/Document.h \ document/SVFileReader.h \ main/MainWindow.h \ @@ -52,7 +52,7 @@ audioio/AudioJACKTarget.cpp \ audioio/AudioPortAudioTarget.cpp \ audioio/AudioTargetFactory.cpp \ - audioio/IntegerTimeStretcher.cpp \ + audioio/PhaseVocoderTimeStretcher.cpp \ document/Document.cpp \ document/SVFileReader.cpp \ main/main.cpp \