Mercurial > hg > sonic-visualiser
changeset 16:3715efc38f95
* substantial enhancements to time stretcher:
-- use putInput/getOutput methods to ensure the audio source always feeds
it enough input, avoiding underruns due to rounding error
-- add a percussion detector and an optional "Sharpen" toggle to the main
window, which invokes a very basic variable speed timestretcher
author | Chris Cannam |
---|---|
date | Wed, 13 Sep 2006 17:17:42 +0000 |
parents | cc566264c935 |
children | 67d54627efd3 |
files | audioio/AudioCallbackPlaySource.cpp audioio/AudioCallbackPlaySource.h audioio/PhaseVocoderTimeStretcher.cpp audioio/PhaseVocoderTimeStretcher.h main/MainWindow.cpp main/MainWindow.h |
diffstat | 6 files changed, 418 insertions(+), 266 deletions(-) [+] |
line wrap: on
line diff
--- a/audioio/AudioCallbackPlaySource.cpp Wed Sep 13 11:56:44 2006 +0000 +++ b/audioio/AudioCallbackPlaySource.cpp Wed Sep 13 17:17:42 2006 +0000 @@ -51,7 +51,6 @@ m_lastModelEndFrame(0), m_outputLeft(0.0), m_outputRight(0.0), - m_slowdownCounter(0), m_timeStretcher(0), m_fillThread(0), m_converter(0) @@ -427,10 +426,10 @@ size_t latency = m_playLatency; if (resample) latency = size_t(m_playLatency * ratio + 0.1); - - TimeStretcherData *timeStretcher = m_timeStretcher; + + PhaseVocoderTimeStretcher *timeStretcher = m_timeStretcher; if (timeStretcher) { - latency += timeStretcher->getStretcher(0)->getProcessingLatency(); + latency += timeStretcher->getProcessingLatency(); } latency += readSpace; @@ -588,82 +587,26 @@ return m_sourceSampleRate; } -AudioCallbackPlaySource::TimeStretcherData::TimeStretcherData(size_t channels, - float factor, - size_t blockSize) : - m_factor(factor), - m_blockSize(blockSize) -{ -// std::cerr << "TimeStretcherData::TimeStretcherData(" << channels << ", " << factor << ", " << blockSize << ")" << std::endl; - - for (size_t ch = 0; ch < channels; ++ch) { - - m_stretcher[ch] = new PhaseVocoderTimeStretcher(factor, blockSize); -// 128), -// (blockSize/2) / factor), -// new float[lrintf(blockSize * factor)]); - } -} - -AudioCallbackPlaySource::TimeStretcherData::~TimeStretcherData() -{ -// std::cerr << "TimeStretcherData::~TimeStretcherData" << std::endl; - - while (!m_stretcher.empty()) { - delete m_stretcher.begin()->second; -// delete[] m_stretcher.begin()->second.second; - m_stretcher.erase(m_stretcher.begin()); - } -// delete m_stretchInputBuffer; -} - -PhaseVocoderTimeStretcher * -AudioCallbackPlaySource::TimeStretcherData::getStretcher(size_t channel) -{ - return m_stretcher[channel]; -} -/* -float * -AudioCallbackPlaySource::TimeStretcherData::getOutputBuffer(size_t channel) -{ - return m_stretcher[channel].second; -} - -float * -AudioCallbackPlaySource::TimeStretcherData::getInputBuffer() -{ - return m_stretchInputBuffer; -} - void -AudioCallbackPlaySource::TimeStretcherData::run(size_t channel) -{ - getStretcher(channel)->process(getInputBuffer(), - getOutputBuffer(channel), - m_blockSize); -} -*/ -void -AudioCallbackPlaySource::setSlowdownFactor(float factor) +AudioCallbackPlaySource::setSlowdownFactor(float factor, bool sharpen) { // Avoid locks -- create, assign, mark old one for scavenging // later (as a call to getSourceSamples may still be using it) - TimeStretcherData *existingStretcher = m_timeStretcher; + PhaseVocoderTimeStretcher *existingStretcher = m_timeStretcher; - if (existingStretcher && existingStretcher->getFactor() == factor) { + if (existingStretcher && + existingStretcher->getRatio() == factor && + existingStretcher->getSharpening() == sharpen) { return; } if (factor != 1) { - TimeStretcherData *newStretcher = new TimeStretcherData - (getTargetChannelCount(), factor, -// factor > 1 ? getTargetBlockSize() : getTargetBlockSize() / factor); - //!!! doesn't work if the block size > getTargetBlockSize(), but it - // should be made to -// getTargetBlockSize()); + PhaseVocoderTimeStretcher *newStretcher = new PhaseVocoderTimeStretcher + (getTargetChannelCount(), + factor, + sharpen, lrintf(getTargetBlockSize() / factor)); - m_slowdownCounter = 0; m_timeStretcher = newStretcher; } else { m_timeStretcher = 0; @@ -686,9 +629,9 @@ return 0; } - TimeStretcherData *timeStretcher = m_timeStretcher; + PhaseVocoderTimeStretcher *ts = m_timeStretcher; - if (!timeStretcher || timeStretcher->getFactor() == 1) { + if (!ts || ts->getRatio() == 1) { size_t got = 0; @@ -721,91 +664,102 @@ return got; } -/*!!! - if (m_slowdownCounter == 0) { + float ratio = ts->getRatio(); - size_t got = 0; - float *ib = timeStretcher->getInputBuffer(); +// std::cout << "ratio = " << ratio << std::endl; - for (size_t ch = 0; ch < getTargetChannelCount(); ++ch) { + size_t available; - RingBuffer<float> *rb = getReadRingBuffer(ch); + while ((available = ts->getAvailableOutputSamples()) < count) { - if (rb) { + size_t reqd = lrintf((count - available) / ratio); + reqd = std::max(reqd, ts->getRequiredInputSamples()); + if (reqd == 0) reqd = 1; + + size_t channels = getTargetChannelCount(); - size_t request = count; - if (ch > 0) request = got; // see above - got = rb->read(buffer[ch], request); + float *ib[channels]; -#ifdef DEBUG_AUDIO_PLAY_SOURCE_PLAYING - std::cout << "AudioCallbackPlaySource::getSamples: got " << got << " samples on channel " << ch << ", running time stretcher" << std::endl; -#endif + size_t got = reqd; - for (size_t i = 0; i < count; ++i) { - ib[i] = buffer[ch][i]; - } - - timeStretcher->run(ch); - } - } + for (size_t c = 0; c < channels; ++c) { + ib[c] = new float[reqd]; //!!! fix -- this is a rt function + RingBuffer<float> *rb = getReadRingBuffer(c); + if (rb) { + size_t gotHere = rb->read(ib[c], got); + if (gotHere < got) got = gotHere; + } + } - } else if (m_slowdownCounter >= timeStretcher->getFactor()) { - // reset this in case the factor has changed leaving the - // counter out of range - m_slowdownCounter = 0; + if (got < reqd) { + std::cerr << "WARNING: Read underrun in playback (" + << got << " < " << reqd << ")" << std::endl; + } + + ts->putInput(ib, got); + + for (size_t c = 0; c < channels; ++c) { + delete[] ib[c]; + } + + if (got == 0) break; + + if (ts->getAvailableOutputSamples() == available) { + std::cerr << "WARNING: AudioCallbackPlaySource::getSamples: Added " << got << " samples to time stretcher, created no new available output samples" << std::endl; + break; + } } - for (size_t ch = 0; ch < getTargetChannelCount(); ++ch) { + ts->getOutput(buffer, count); - float *ob = timeStretcher->getOutputBuffer(ch); -#ifdef DEBUG_AUDIO_PLAY_SOURCE_PLAYING - std::cerr << "AudioCallbackPlaySource::getSamples: Copying from (" << (m_slowdownCounter * count) << "," << count << ") to buffer" << std::endl; -#endif - - for (size_t i = 0; i < count; ++i) { - buffer[ch][i] = ob[m_slowdownCounter * count + i]; - } - } -*/ - +/*!!! for (size_t ch = 0; ch < getTargetChannelCount(); ++ch) { RingBuffer<float> *rb = getReadRingBuffer(ch); if (rb) { - float ratio = timeStretcher->getStretcher(ch)->getRatio(); - size_t request = lrintf(count / ratio); -// if (ch > 0) request = got; // see above + float ratio = ts->getRatio(); - float *ib = new float[request]; //!!! +// std::cout << "ratio = " << ratio << std::endl; - size_t got = rb->read(ib, request); + size_t available; + + while ((available = ts->getAvailableOutputSamples()) < count) { + + size_t reqd = lrintf((count - available) / ratio); + reqd = std::max(reqd, ts->getRequiredInputSamples()); + if (reqd == 0) reqd = 1; + + float ib[reqd]; + size_t got = rb->read(ib, reqd); #ifdef DEBUG_AUDIO_PLAY_SOURCE_PLAYING - std::cout << "AudioCallbackPlaySource::getSamples: got " << got << " samples on channel " << ch << " (count=" << count << ", ratio=" << timeStretcher->getStretcher(ch)->getRatio() << ", got*ratio=" << got * ratio << "), running time stretcher" << std::endl; + std::cout << "AudioCallbackPlaySource::getSamples: got " << got << " samples on channel " << ch << " (reqd=" << reqd << ", count=" << count << ", ratio=" << ratio << ", got*ratio=" << got * ratio << "), running time stretcher" << std::endl; #endif - timeStretcher->getStretcher(ch)->process(ib, buffer[ch], request); - - delete[] ib; + if (got < reqd) { + std::cerr << "WARNING: Read underrun in playback (" + << got << " < " << reqd << ")" << std::endl; + } + + ts->putInput(ib, got); -// for (size_t i = 0; i < count; ++i) { -// ib[i] = buffer[ch][i]; -// } - -// timeStretcher->run(ch); + if (got == 0) break; - + if (ts->getAvailableOutputSamples() == available) { + std::cerr << "WARNING: AudioCallbackPlaySource::getSamples: Added " << got << " samples to time stretcher, created no new available output samples" << std::endl; + break; + } + } + ts->getOutput(buffer[ch], count); } } +*/ + m_condition.wakeAll(); - - -//!!! if (m_slowdownCounter == 0) m_condition.wakeAll(); -// m_slowdownCounter = (m_slowdownCounter + 1) % timeStretcher->getFactor(); return count; }
--- a/audioio/AudioCallbackPlaySource.h Wed Sep 13 11:56:44 2006 +0000 +++ b/audioio/AudioCallbackPlaySource.h Wed Sep 13 17:17:42 2006 +0000 @@ -177,7 +177,7 @@ */ size_t getSourceSamples(size_t count, float **buffer); - void setSlowdownFactor(float factor); + void setSlowdownFactor(float factor, bool sharpen); signals: void modelReplaced(); @@ -244,31 +244,8 @@ void clearRingBuffers(bool haveLock = false, size_t count = 0); void unifyRingBuffers(); - class TimeStretcherData - { - public: - TimeStretcherData(size_t channels, float factor, size_t blockSize); - ~TimeStretcherData(); - - float getFactor() const { return m_factor; } - PhaseVocoderTimeStretcher *getStretcher(size_t channel); -// float *getOutputBuffer(size_t channel); -// float *getInputBuffer(); - -// void run(size_t channel); - - protected: - TimeStretcherData(const TimeStretcherData &); // not provided - TimeStretcherData &operator=(const TimeStretcherData &); // not provided - - std::map<size_t, PhaseVocoderTimeStretcher *> m_stretcher; - float m_factor; - size_t m_blockSize; - }; - - size_t m_slowdownCounter; - TimeStretcherData *m_timeStretcher; - Scavenger<TimeStretcherData> m_timeStretcherScavenger; + PhaseVocoderTimeStretcher *m_timeStretcher; + Scavenger<PhaseVocoderTimeStretcher> m_timeStretcherScavenger; // Called from fill thread, m_playing true, mutex held // Return true if work done
--- a/audioio/PhaseVocoderTimeStretcher.cpp Wed Sep 13 11:56:44 2006 +0000 +++ b/audioio/PhaseVocoderTimeStretcher.cpp Wed Sep 13 17:17:42 2006 +0000 @@ -20,53 +20,102 @@ //#define DEBUG_PHASE_VOCODER_TIME_STRETCHER 1 -PhaseVocoderTimeStretcher::PhaseVocoderTimeStretcher(float ratio, +PhaseVocoderTimeStretcher::PhaseVocoderTimeStretcher(size_t channels, + float ratio, + bool sharpen, size_t maxProcessInputBlockSize) : - m_ratio(ratio) - //, - // m_n1(inputIncrement), - // m_n2(lrintf(m_n1 * ratio)), - // m_wlen(std::max(windowSize, m_n2 * 2)), - // m_inbuf(m_wlen), - // m_outbuf(maxProcessInputBlockSize * ratio + 1024) //!!! + m_channels(channels), + m_ratio(ratio), + m_sharpen(sharpen) { + m_wlen = 1024; + if (ratio < 1) { - m_n1 = 512; + if (ratio < 0.4) { + m_n1 = 1024; + m_wlen = 2048; + } else if (ratio < 0.8) { + m_n1 = 512; + } else { + m_n1 = 256; + } + if (m_sharpen) { + m_n1 /= 2; + m_wlen = 2048; + } m_n2 = m_n1 * ratio; - m_wlen = 1024; } else { - m_n2 = 512; + if (ratio > 2) { + m_n2 = 512; + m_wlen = 4096; + } else if (ratio > 1.6) { + m_n2 = 384; + m_wlen = 2048; + } else { + m_n2 = 256; + } + if (m_sharpen) { + m_n2 /= 2; + if (m_wlen < 2048) m_wlen = 2048; + } m_n1 = m_n2 / ratio; - m_wlen = 1024; } - - m_inbuf = new RingBuffer<float>(m_wlen); - m_outbuf = new RingBuffer<float> - (lrintf((maxProcessInputBlockSize + m_wlen) * ratio)); + + m_window = new Window<float>(HanningWindow, m_wlen); - std::cerr << "PhaseVocoderTimeStretcher: ratio = " << ratio - << ", n1 = " << m_n1 << ", n2 = " << m_n2 << ", wlen = " - << m_wlen << ", max = " << maxProcessInputBlockSize << ", outbuflen = " << m_outbuf->getSize() << std::endl; + m_prevPhase = new float *[m_channels]; + m_prevAdjustedPhase = new float *[m_channels]; + if (m_sharpen) m_prevMag = new float *[m_channels]; + else m_prevMag = 0; + m_prevPercussiveCount = new int[m_channels]; - m_window = new Window<float>(HanningWindow, m_wlen), - + m_dbuf = (float *)fftwf_malloc(sizeof(float) * m_wlen); m_time = (fftwf_complex *)fftwf_malloc(sizeof(fftwf_complex) * m_wlen); m_freq = (fftwf_complex *)fftwf_malloc(sizeof(fftwf_complex) * m_wlen); - m_dbuf = (float *)fftwf_malloc(sizeof(float) * m_wlen); - m_mashbuf = (float *)fftwf_malloc(sizeof(float) * m_wlen); - m_modulationbuf = (float *)fftwf_malloc(sizeof(float) * m_wlen); - m_prevPhase = (float *)fftwf_malloc(sizeof(float) * m_wlen); - m_prevAdjustedPhase = (float *)fftwf_malloc(sizeof(float) * m_wlen); - + m_plan = fftwf_plan_dft_1d(m_wlen, m_time, m_freq, FFTW_FORWARD, FFTW_ESTIMATE); m_iplan = fftwf_plan_dft_c2r_1d(m_wlen, m_freq, m_dbuf, FFTW_ESTIMATE); + m_inbuf = new RingBuffer<float> *[m_channels]; + m_outbuf = new RingBuffer<float> *[m_channels]; + m_mashbuf = new float *[m_channels]; + + m_modulationbuf = (float *)fftwf_malloc(sizeof(float) * m_wlen); + + for (size_t c = 0; c < m_channels; ++c) { + + m_prevPhase[c] = (float *)fftwf_malloc(sizeof(float) * m_wlen); + m_prevAdjustedPhase[c] = (float *)fftwf_malloc(sizeof(float) * m_wlen); + + if (m_sharpen) { + m_prevMag[c] = (float *)fftwf_malloc(sizeof(float) * m_wlen); + } + + m_inbuf[c] = new RingBuffer<float>(m_wlen); + m_outbuf[c] = new RingBuffer<float> + (lrintf((maxProcessInputBlockSize + m_wlen) * ratio)); + + m_mashbuf[c] = (float *)fftwf_malloc(sizeof(float) * m_wlen); + + for (int i = 0; i < m_wlen; ++i) { + m_mashbuf[c][i] = 0.0; + m_prevPhase[c][i] = 0.0; + m_prevAdjustedPhase[c][i] = 0.0; + if (m_sharpen) m_prevMag[c][i] = 0.0; + } + + m_prevPercussiveCount[c] = 0; + } + for (int i = 0; i < m_wlen; ++i) { - m_mashbuf[i] = 0.0; - m_modulationbuf[i] = 0.0; - m_prevPhase[i] = 0.0; - m_prevAdjustedPhase[i] = 0.0; + m_modulationbuf[i] = 0.0; } + + std::cerr << "PhaseVocoderTimeStretcher: channels = " << channels + << ", ratio = " << ratio + << ", n1 = " << m_n1 << ", n2 = " << m_n2 << ", wlen = " + << m_wlen << ", max = " << maxProcessInputBlockSize + << ", outbuflen = " << m_outbuf[0]->getSize() << std::endl; } PhaseVocoderTimeStretcher::~PhaseVocoderTimeStretcher() @@ -79,13 +128,27 @@ fftwf_free(m_time); fftwf_free(m_freq); fftwf_free(m_dbuf); - fftwf_free(m_mashbuf); + + for (size_t c = 0; c < m_channels; ++c) { + + fftwf_free(m_mashbuf[c]); + fftwf_free(m_prevPhase[c]); + fftwf_free(m_prevAdjustedPhase[c]); + if (m_sharpen) fftwf_free(m_prevMag[c]); + + delete m_inbuf[c]; + delete m_outbuf[c]; + } + fftwf_free(m_modulationbuf); - fftwf_free(m_prevPhase); - fftwf_free(m_prevAdjustedPhase); - delete m_inbuf; - delete m_outbuf; + delete[] m_prevPhase; + delete[] m_prevAdjustedPhase; + if (m_sharpen) delete[] m_prevMag; + delete[] m_prevPercussiveCount; + delete[] m_inbuf; + delete[] m_outbuf; + delete[] m_mashbuf; delete m_window; } @@ -97,7 +160,21 @@ } void -PhaseVocoderTimeStretcher::process(float *input, float *output, size_t samples) +PhaseVocoderTimeStretcher::process(float **input, float **output, size_t samples) +{ + putInput(input, samples); + getOutput(output, lrintf(samples * m_ratio)); +} + +size_t +PhaseVocoderTimeStretcher::getRequiredInputSamples() const +{ + if (m_inbuf[0]->getReadSpace() >= m_wlen) return 0; + return m_wlen - m_inbuf[0]->getReadSpace(); +} + +void +PhaseVocoderTimeStretcher::putInput(float **input, size_t samples) { // We need to add samples from input to our internal buffer. When // we have m_windowSize samples in the buffer, we can process it, @@ -115,103 +192,143 @@ size_t consumed = 0; -#ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER - std::cerr << "PhaseVocoderTimeStretcher::process(" << samples << ", consumed = " << consumed << "), writable " << m_inbuf->getWriteSpace() <<", readable "<< m_outbuf->getReadSpace() << std::endl; -#endif - while (consumed < samples) { - size_t writable = m_inbuf->getWriteSpace(); + size_t writable = m_inbuf[0]->getWriteSpace(); writable = std::min(writable, samples - consumed); if (writable == 0) { //!!! then what? I don't think this should happen, but - std::cerr << "WARNING: PhaseVocoderTimeStretcher::process: writable == 0" << std::endl; + std::cerr << "WARNING: PhaseVocoderTimeStretcher::putInput: writable == 0" << std::endl; break; } #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER std::cerr << "writing " << writable << " from index " << consumed << " to inbuf, consumed will be " << consumed + writable << std::endl; #endif - m_inbuf->write(input + consumed, writable); + + for (size_t c = 0; c < m_channels; ++c) { + m_inbuf[c]->write(input[c] + consumed, writable); + } consumed += writable; - while (m_inbuf->getReadSpace() >= m_wlen && - m_outbuf->getWriteSpace() >= m_n2) { + while (m_inbuf[0]->getReadSpace() >= m_wlen && + m_outbuf[0]->getWriteSpace() >= m_n2) { // We know we have at least m_wlen samples available - // in m_inbuf-> We need to peek m_wlen of them for + // in m_inbuf. We need to peek m_wlen of them for // processing, and then read m_n1 to advance the read // pointer. + + size_t n2 = m_n2; + bool isPercussive = false; - size_t got = m_inbuf->peek(m_dbuf, m_wlen); - assert(got == m_wlen); + for (size_t c = 0; c < m_channels; ++c) { + + size_t got = m_inbuf[c]->peek(m_dbuf, m_wlen); + assert(got == m_wlen); - processBlock(m_dbuf, m_mashbuf, m_modulationbuf); + bool thisChannelPercussive = + processBlock(c, m_dbuf, m_mashbuf[c], + c == 0 ? m_modulationbuf : 0, + isPercussive); + + if (thisChannelPercussive && c == 0) { + isPercussive = true; + } + + if (isPercussive) { + n2 = m_n1; + } #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER - std::cerr << "writing first " << m_n2 << " from mashbuf, skipping " << m_n1 << " on inbuf " << std::endl; + std::cerr << "writing first " << m_n2 << " from mashbuf, skipping " << m_n1 << " on inbuf " << std::endl; #endif - m_inbuf->skip(m_n1); + m_inbuf[c]->skip(m_n1); - for (size_t i = 0; i < m_n2; ++i) { - if (m_modulationbuf[i] > 0.f) { - m_mashbuf[i] /= m_modulationbuf[i]; + for (size_t i = 0; i < n2; ++i) { + if (m_modulationbuf[i] > 0.f) { + m_mashbuf[c][i] /= m_modulationbuf[i]; + } + } + + m_outbuf[c]->write(m_mashbuf[c], n2); + + for (size_t i = 0; i < m_wlen - n2; ++i) { + m_mashbuf[c][i] = m_mashbuf[c][i + n2]; + } + + for (size_t i = m_wlen - n2; i < m_wlen; ++i) { + m_mashbuf[c][i] = 0.0f; } } - m_outbuf->write(m_mashbuf, m_n2); - - for (size_t i = 0; i < m_wlen - m_n2; ++i) { - m_mashbuf[i] = m_mashbuf[i + m_n2]; - m_modulationbuf[i] = m_modulationbuf[i + m_n2]; + for (size_t i = 0; i < m_wlen - n2; ++i) { + m_modulationbuf[i] = m_modulationbuf[i + n2]; } - for (size_t i = m_wlen - m_n2; i < m_wlen; ++i) { - m_mashbuf[i] = 0.0f; + for (size_t i = m_wlen - n2; i < m_wlen; ++i) { m_modulationbuf[i] = 0.0f; } } -// std::cerr << "WARNING: PhaseVocoderTimeStretcher::process: writespace not enough for output increment (" << m_outbuf->getWriteSpace() << " < " << m_n2 << ")" << std::endl; -// } #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER - std::cerr << "loop ended: inbuf read space " << m_inbuf->getReadSpace() << ", outbuf write space " << m_outbuf->getWriteSpace() << std::endl; + std::cerr << "loop ended: inbuf read space " << m_inbuf[0]->getReadSpace() << ", outbuf write space " << m_outbuf[0]->getWriteSpace() << std::endl; #endif } - size_t toRead = lrintf(samples * m_ratio); +#ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER + std::cerr << "PhaseVocoderTimeStretcher::putInput returning" << std::endl; +#endif +} - if (m_outbuf->getReadSpace() < toRead) { - std::cerr << "WARNING: PhaseVocoderTimeStretcher::process: not enough data (yet?) (" << m_outbuf->getReadSpace() << " < " << toRead << ")" << std::endl; - size_t fill = toRead - m_outbuf->getReadSpace(); - for (size_t i = 0; i < fill; ++i) { - output[i] = 0.0; - } - m_outbuf->read(output + fill, m_outbuf->getReadSpace()); +size_t +PhaseVocoderTimeStretcher::getAvailableOutputSamples() const +{ + return m_outbuf[0]->getReadSpace(); +} + +void +PhaseVocoderTimeStretcher::getOutput(float **output, size_t samples) +{ + if (m_outbuf[0]->getReadSpace() < samples) { + std::cerr << "WARNING: PhaseVocoderTimeStretcher::getOutput: not enough data (yet?) (" << m_outbuf[0]->getReadSpace() << " < " << samples << ")" << std::endl; + size_t fill = samples - m_outbuf[0]->getReadSpace(); + for (size_t c = 0; c < m_channels; ++c) { + for (size_t i = 0; i < fill; ++i) { + output[c][i] = 0.0; + } + m_outbuf[c]->read(output[c] + fill, m_outbuf[c]->getReadSpace()); + } } else { #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER - std::cerr << "enough data - writing " << toRead << " from outbuf" << std::endl; + std::cerr << "enough data - writing " << samples << " from outbuf" << std::endl; #endif - m_outbuf->read(output, toRead); + for (size_t c = 0; c < m_channels; ++c) { + m_outbuf[c]->read(output[c], samples); + } } #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER - std::cerr << "PhaseVocoderTimeStretcher::process returning" << std::endl; + std::cerr << "PhaseVocoderTimeStretcher::getOutput returning" << std::endl; #endif } -void -PhaseVocoderTimeStretcher::processBlock(float *buf, float *out, float *modulation) +bool +PhaseVocoderTimeStretcher::processBlock(size_t c, + float *buf, float *out, + float *modulation, + bool knownPercussive) { size_t i; + bool isPercussive = knownPercussive; // buf contains m_wlen samples; out contains enough space for // m_wlen * ratio samples (we mix into out, rather than replacing) #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER - std::cerr << "PhaseVocoderTimeStretcher::processBlock" << std::endl; + std::cerr << "PhaseVocoderTimeStretcher::processBlock (channel " << c << ")" << std::endl; #endif m_window->cut(buf); @@ -229,30 +346,67 @@ fftwf_execute(m_plan); // m_time -> m_freq + if (m_sharpen && c == 0) { //!!! + + int count = 0; + + for (i = 0; i < m_wlen; ++i) { + + float mag = sqrtf(m_freq[i][0] * m_freq[i][0] + + m_freq[i][1] * m_freq[i][1]); + + if (m_prevMag[c][i] > 0) { + float magdiff = 20.f * log10f(mag / m_prevMag[c][i]); + if (magdiff > 3.f) ++count; + } + + m_prevMag[c][i] = mag; + } + + if (count > m_wlen / 6 && + count > m_prevPercussiveCount[c] * 1.2) { + isPercussive = true; + std::cerr << "isPercussive (count = " << count << ", prev = " << m_prevPercussiveCount[c] << ")" << std::endl; + } + + m_prevPercussiveCount[c] = count; + } + + size_t n2 = m_n2; + if (isPercussive) n2 = m_n1; + for (i = 0; i < m_wlen; ++i) { - - float mag = sqrtf(m_freq[i][0] * m_freq[i][0] + - m_freq[i][1] * m_freq[i][1]); + + float mag; + + if (m_sharpen && c == 0) { + mag = m_prevMag[c][i]; // can reuse this + } else { + mag = sqrtf(m_freq[i][0] * m_freq[i][0] + + m_freq[i][1] * m_freq[i][1]); + } float phase = princargf(atan2f(m_freq[i][1], m_freq[i][0])); float omega = (2 * M_PI * m_n1 * i) / m_wlen; - float expectedPhase = m_prevPhase[i] + omega; + float expectedPhase = m_prevPhase[c][i] + omega; float phaseError = princargf(phase - expectedPhase); float phaseIncrement = (omega + phaseError) / m_n1; - float adjustedPhase = m_prevAdjustedPhase[i] + m_n2 * phaseIncrement; + float adjustedPhase = m_prevAdjustedPhase[c][i] + n2 * phaseIncrement; + + if (isPercussive) adjustedPhase = phase; float real = mag * cosf(adjustedPhase); float imag = mag * sinf(adjustedPhase); m_freq[i][0] = real; m_freq[i][1] = imag; - m_prevPhase[i] = phase; - m_prevAdjustedPhase[i] = adjustedPhase; + m_prevPhase[c][i] = phase; + m_prevAdjustedPhase[c][i] = adjustedPhase; } fftwf_execute(m_iplan); // m_freq -> in, inverse fft @@ -264,20 +418,21 @@ } m_window->cut(buf); -/* - int div = m_wlen / m_n2; - if (div > 1) div /= 2; - for (i = 0; i < m_wlen; ++i) { - buf[i] /= div; - } -*/ - - float area = m_window->getArea(); for (i = 0; i < m_wlen; ++i) { - out[i] += buf[i]; - float val = m_window->getValue(i); - modulation[i] += val * area; + out[i] += buf[i]; } + + if (modulation) { + + float area = m_window->getArea(); + + for (i = 0; i < m_wlen; ++i) { + float val = m_window->getValue(i); + modulation[i] += val * area; + } + } + + return isPercussive; }
--- a/audioio/PhaseVocoderTimeStretcher.h Wed Sep 13 11:56:44 2006 +0000 +++ b/audioio/PhaseVocoderTimeStretcher.h Wed Sep 13 17:17:42 2006 +0000 @@ -33,14 +33,45 @@ class PhaseVocoderTimeStretcher { public: - PhaseVocoderTimeStretcher(float ratio, size_t maxProcessInputBlockSize); + PhaseVocoderTimeStretcher(size_t channels, + float ratio, + bool sharpen, + size_t maxProcessInputBlockSize); virtual ~PhaseVocoderTimeStretcher(); /** * Process a block. The input array contains the given number of - * samples; the output must have space for lrintf(samples * m_ratio). + * samples (on each channel); the output must have space for + * lrintf(samples * m_ratio). + * + * This should work correctly for some ratios, e.g. small powers + * of two. For other ratios it may drop samples -- use putInput + * in a loop followed by getOutput (when getAvailableOutputSamples + * reports enough) instead. + * + * Do not mix process calls with putInput/getOutput calls. */ - void process(float *input, float *output, size_t samples); + void process(float **input, float **output, size_t samples); + + /** + * Return the number of samples that would need to be added via + * putInput in order to provoke the time stretcher into doing some + * time stretching and making more output samples available. + */ + size_t getRequiredInputSamples() const; + + /** + * Put (and possibly process) a given number of input samples. + * Number must not exceed the maxProcessInputBlockSize passed to + * constructor. + */ + void putInput(float **input, size_t samples); + + size_t getAvailableOutputSamples() const; + + void getOutput(float **output, size_t samples); + + //!!! and reset? /** * Get the hop size for input. @@ -50,7 +81,7 @@ /** * Get the hop size for output. */ - size_t getOutputIncrement() const { return getInputIncrement() * getRatio(); } + size_t getOutputIncrement() const { return m_n2; } /** * Get the window size for FFT processing. @@ -63,9 +94,14 @@ WindowType getWindowType() const { return m_window->getType(); } /** - * Get the stretch ratio set in the constructor. + * Get the stretch ratio. */ - float getRatio() const { return m_ratio; } + float getRatio() const { return float(m_n2) / float(m_n1); } + + /** + * Return whether this time stretcher will attempt to sharpen transients. + */ + bool getSharpening() const { return m_sharpen; } /** * Get the latency added by the time stretcher, in sample frames. @@ -86,26 +122,33 @@ * the window overlap varies or otherwise results in something * other than a flat sum. */ - void processBlock(float *in, float *out, float *modulation); + bool processBlock(size_t channel, + float *in, float *out, + float *modulation, + bool knownPercussive); + size_t m_channels; float m_ratio; + bool m_sharpen; size_t m_n1; size_t m_n2; size_t m_wlen; Window<float> *m_window; + float **m_prevPhase; + float **m_prevAdjustedPhase; + float **m_prevMag; + int *m_prevPercussiveCount; + + float *m_dbuf; fftwf_complex *m_time; fftwf_complex *m_freq; - float *m_dbuf; - float *m_prevPhase; - float *m_prevAdjustedPhase; - fftwf_plan m_plan; fftwf_plan m_iplan; - RingBuffer<float> *m_inbuf; - RingBuffer<float> *m_outbuf; - float *m_mashbuf; + RingBuffer<float> **m_inbuf; + RingBuffer<float> **m_outbuf; + float **m_mashbuf; float *m_modulationbuf; };
--- a/main/MainWindow.cpp Wed Sep 13 11:56:44 2006 +0000 +++ b/main/MainWindow.cpp Wed Sep 13 17:17:42 2006 +0000 @@ -78,6 +78,7 @@ #include <QSettings> #include <QDateTime> #include <QProcess> +#include <QCheckBox> #include <iostream> #include <cstdio> @@ -165,10 +166,21 @@ connect(m_playSpeed, SIGNAL(valueChanged(int)), this, SLOT(playSpeedChanged(int))); - layout->addWidget(m_paneStack, 0, 0, 1, 3); + m_playSharpen = new QCheckBox(frame); + m_playSharpen->setToolTip(tr("Sharpen")); + m_playSharpen->setEnabled(false); + m_playSharpen->setChecked(false); + connect(m_playSharpen, SIGNAL(clicked()), + this, SLOT(playSharpenToggled())); + + layout->addWidget(m_paneStack, 0, 0, 1, 4); layout->addWidget(m_panner, 1, 0); layout->addWidget(m_fader, 1, 1); layout->addWidget(m_playSpeed, 1, 2); + layout->addWidget(m_playSharpen, 1, 3); + + layout->setColumnStretch(0, 10); + frame->setLayout(layout); connect(m_viewManager, SIGNAL(outputLevelsChanged(float, float)), @@ -2870,7 +2882,15 @@ .arg(factor != 1 ? QString("1/%1").arg(factor) : tr("Full"))); - m_playSource->setSlowdownFactor(factor); + m_playSharpen->setEnabled(speed != 10); + bool sharpen = (speed != 10 && m_playSharpen->isChecked()); + m_playSource->setSlowdownFactor(factor, sharpen); +} + +void +MainWindow::playSharpenToggled() +{ + playSpeedChanged(m_playSpeed->value()); } void
--- a/main/MainWindow.h Wed Sep 13 11:56:44 2006 +0000 +++ b/main/MainWindow.h Wed Sep 13 17:17:42 2006 +0000 @@ -44,6 +44,7 @@ class QMenu; class AudioDial; class QLabel; +class QCheckBox; class PreferencesDialog; @@ -139,6 +140,7 @@ void playLoopToggled(); void playSelectionToggled(); void playSpeedChanged(int); + void playSharpenToggled(); void sampleRateMismatch(size_t, size_t, bool); void outputLevelsChanged(float, float); @@ -203,6 +205,7 @@ Panner *m_panner; Fader *m_fader; AudioDial *m_playSpeed; + QCheckBox *m_playSharpen; WaveformLayer *m_panLayer; Layer *m_timeRulerLayer;