Chris@0: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ Chris@0: Chris@0: /* Chris@0: Sonic Visualiser Chris@0: An audio file viewer and annotation editor. Chris@0: Centre for Digital Music, Queen Mary, University of London. Chris@0: This file copyright 2006 Chris Cannam. Chris@0: Chris@0: This program is free software; you can redistribute it and/or Chris@0: modify it under the terms of the GNU General Public License as Chris@0: published by the Free Software Foundation; either version 2 of the Chris@0: License, or (at your option) any later version. See the file Chris@0: COPYING included with this distribution for more information. Chris@0: */ Chris@0: Chris@14: #include "PhaseVocoderTimeStretcher.h" Chris@0: Chris@0: #include Chris@0: #include Chris@0: Chris@14: //#define DEBUG_PHASE_VOCODER_TIME_STRETCHER 1 Chris@0: Chris@16: PhaseVocoderTimeStretcher::PhaseVocoderTimeStretcher(size_t channels, Chris@16: float ratio, Chris@16: bool sharpen, Chris@15: size_t maxProcessInputBlockSize) : Chris@16: m_channels(channels), Chris@16: m_ratio(ratio), Chris@16: m_sharpen(sharpen) Chris@0: { Chris@16: m_wlen = 1024; Chris@16: Chris@15: if (ratio < 1) { Chris@16: if (ratio < 0.4) { Chris@16: m_n1 = 1024; Chris@16: m_wlen = 2048; Chris@16: } else if (ratio < 0.8) { Chris@16: m_n1 = 512; Chris@16: } else { Chris@16: m_n1 = 256; Chris@16: } Chris@16: if (m_sharpen) { Chris@17: // m_n1 /= 2; Chris@16: m_wlen = 2048; Chris@16: } Chris@15: m_n2 = m_n1 * ratio; Chris@15: } else { Chris@16: if (ratio > 2) { Chris@16: m_n2 = 512; Chris@16: m_wlen = 4096; Chris@16: } else if (ratio > 1.6) { Chris@16: m_n2 = 384; Chris@16: m_wlen = 2048; Chris@16: } else { Chris@16: m_n2 = 256; Chris@16: } Chris@16: if (m_sharpen) { Chris@17: // m_n2 /= 2; Chris@16: if (m_wlen < 2048) m_wlen = 2048; Chris@16: } Chris@15: m_n1 = m_n2 / ratio; Chris@15: } Chris@16: Chris@20: m_analysisWindow = new Window(HanningWindow, m_wlen); Chris@20: m_synthesisWindow = new Window(HanningWindow, m_wlen); Chris@15: Chris@16: m_prevPhase = new float *[m_channels]; Chris@16: m_prevAdjustedPhase = new float *[m_channels]; Chris@15: Chris@20: m_prevTransientMag = (float *)fftwf_malloc(sizeof(float) * (m_wlen / 2 + 1)); Chris@20: m_prevTransientCount = 0; Chris@20: m_prevTransient = false; Chris@20: Chris@20: m_tempbuf = (float *)fftwf_malloc(sizeof(float) * m_wlen); Chris@20: Chris@20: m_time = new float *[m_channels]; Chris@20: m_freq = new fftwf_complex *[m_channels]; Chris@20: m_plan = new fftwf_plan[m_channels]; Chris@20: m_iplan = new fftwf_plan[m_channels]; Chris@0: Chris@16: m_inbuf = new RingBuffer *[m_channels]; Chris@16: m_outbuf = new RingBuffer *[m_channels]; Chris@16: m_mashbuf = new float *[m_channels]; Chris@16: Chris@16: m_modulationbuf = (float *)fftwf_malloc(sizeof(float) * m_wlen); Chris@16: Chris@16: for (size_t c = 0; c < m_channels; ++c) { Chris@16: Chris@20: m_prevPhase[c] = (float *)fftwf_malloc(sizeof(float) * (m_wlen / 2 + 1)); Chris@20: m_prevAdjustedPhase[c] = (float *)fftwf_malloc(sizeof(float) * (m_wlen / 2 + 1)); Chris@16: Chris@20: m_time[c] = (float *)fftwf_malloc(sizeof(float) * m_wlen); Chris@20: m_freq[c] = (fftwf_complex *)fftwf_malloc(sizeof(fftwf_complex) * Chris@20: (m_wlen / 2 + 1)); Chris@20: Chris@20: m_plan[c] = fftwf_plan_dft_r2c_1d(m_wlen, m_time[c], m_freq[c], FFTW_ESTIMATE); Chris@20: m_iplan[c] = fftwf_plan_dft_c2r_1d(m_wlen, m_freq[c], m_time[c], FFTW_ESTIMATE); Chris@16: Chris@16: m_inbuf[c] = new RingBuffer(m_wlen); Chris@16: m_outbuf[c] = new RingBuffer Chris@16: (lrintf((maxProcessInputBlockSize + m_wlen) * ratio)); Chris@16: Chris@16: m_mashbuf[c] = (float *)fftwf_malloc(sizeof(float) * m_wlen); Chris@16: Chris@16: for (int i = 0; i < m_wlen; ++i) { Chris@16: m_mashbuf[c][i] = 0.0; Chris@20: } Chris@20: Chris@20: for (int i = 0; i <= m_wlen/2; ++i) { Chris@16: m_prevPhase[c][i] = 0.0; Chris@16: m_prevAdjustedPhase[c][i] = 0.0; Chris@16: } Chris@16: } Chris@16: Chris@0: for (int i = 0; i < m_wlen; ++i) { Chris@16: m_modulationbuf[i] = 0.0; Chris@0: } Chris@16: Chris@20: for (int i = 0; i <= m_wlen/2; ++i) { Chris@20: m_prevTransientMag[i] = 0.0; Chris@20: } Chris@20: Chris@16: std::cerr << "PhaseVocoderTimeStretcher: channels = " << channels Chris@16: << ", ratio = " << ratio Chris@16: << ", n1 = " << m_n1 << ", n2 = " << m_n2 << ", wlen = " Chris@16: << m_wlen << ", max = " << maxProcessInputBlockSize Chris@16: << ", outbuflen = " << m_outbuf[0]->getSize() << std::endl; Chris@0: } Chris@0: Chris@14: PhaseVocoderTimeStretcher::~PhaseVocoderTimeStretcher() Chris@0: { Chris@14: std::cerr << "PhaseVocoderTimeStretcher::~PhaseVocoderTimeStretcher" << std::endl; Chris@0: Chris@20: for (size_t c = 0; c < m_channels; ++c) { Chris@0: Chris@20: fftwf_destroy_plan(m_plan[c]); Chris@20: fftwf_destroy_plan(m_iplan[c]); Chris@16: Chris@20: fftwf_free(m_time[c]); Chris@20: fftwf_free(m_freq[c]); Chris@16: Chris@16: fftwf_free(m_mashbuf[c]); Chris@16: fftwf_free(m_prevPhase[c]); Chris@16: fftwf_free(m_prevAdjustedPhase[c]); Chris@16: Chris@16: delete m_inbuf[c]; Chris@16: delete m_outbuf[c]; Chris@16: } Chris@16: Chris@20: fftwf_free(m_tempbuf); Chris@13: fftwf_free(m_modulationbuf); Chris@20: fftwf_free(m_prevTransientMag); Chris@0: Chris@16: delete[] m_prevPhase; Chris@16: delete[] m_prevAdjustedPhase; Chris@16: delete[] m_inbuf; Chris@16: delete[] m_outbuf; Chris@16: delete[] m_mashbuf; Chris@20: delete[] m_time; Chris@20: delete[] m_freq; Chris@20: delete[] m_plan; Chris@20: delete[] m_iplan; Chris@15: Chris@20: delete m_analysisWindow; Chris@20: delete m_synthesisWindow; Chris@0: } Chris@0: Chris@0: size_t Chris@14: PhaseVocoderTimeStretcher::getProcessingLatency() const Chris@0: { Chris@0: return getWindowSize() - getInputIncrement(); Chris@0: } Chris@0: Chris@0: void Chris@16: PhaseVocoderTimeStretcher::process(float **input, float **output, size_t samples) Chris@16: { Chris@16: putInput(input, samples); Chris@16: getOutput(output, lrintf(samples * m_ratio)); Chris@16: } Chris@16: Chris@16: size_t Chris@16: PhaseVocoderTimeStretcher::getRequiredInputSamples() const Chris@16: { Chris@16: if (m_inbuf[0]->getReadSpace() >= m_wlen) return 0; Chris@16: return m_wlen - m_inbuf[0]->getReadSpace(); Chris@16: } Chris@16: Chris@16: void Chris@16: PhaseVocoderTimeStretcher::putInput(float **input, size_t samples) Chris@0: { Chris@0: // We need to add samples from input to our internal buffer. When Chris@0: // we have m_windowSize samples in the buffer, we can process it, Chris@0: // move the samples back by m_n1 and write the output onto our Chris@0: // internal output buffer. If we have (samples * ratio) samples Chris@0: // in that, we can write m_n2 of them back to output and return Chris@0: // (otherwise we have to write zeroes). Chris@0: Chris@0: // When we process, we write m_wlen to our fixed output buffer Chris@0: // (m_mashbuf). We then pull out the first m_n2 samples from that Chris@0: // buffer, push them into the output ring buffer, and shift Chris@0: // m_mashbuf left by that amount. Chris@0: Chris@0: // The processing latency is then m_wlen - m_n2. Chris@0: Chris@0: size_t consumed = 0; Chris@0: Chris@0: while (consumed < samples) { Chris@0: Chris@16: size_t writable = m_inbuf[0]->getWriteSpace(); Chris@0: writable = std::min(writable, samples - consumed); Chris@0: Chris@0: if (writable == 0) { Chris@0: //!!! then what? I don't think this should happen, but Chris@16: std::cerr << "WARNING: PhaseVocoderTimeStretcher::putInput: writable == 0" << std::endl; Chris@0: break; Chris@0: } Chris@0: Chris@14: #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER Chris@0: std::cerr << "writing " << writable << " from index " << consumed << " to inbuf, consumed will be " << consumed + writable << std::endl; Chris@0: #endif Chris@16: Chris@16: for (size_t c = 0; c < m_channels; ++c) { Chris@16: m_inbuf[c]->write(input[c] + consumed, writable); Chris@16: } Chris@0: consumed += writable; Chris@0: Chris@16: while (m_inbuf[0]->getReadSpace() >= m_wlen && Chris@16: m_outbuf[0]->getWriteSpace() >= m_n2) { Chris@0: Chris@0: // We know we have at least m_wlen samples available Chris@16: // in m_inbuf. We need to peek m_wlen of them for Chris@0: // processing, and then read m_n1 to advance the read Chris@0: // pointer. Chris@16: Chris@20: for (size_t c = 0; c < m_channels; ++c) { Chris@20: Chris@20: size_t got = m_inbuf[c]->peek(m_tempbuf, m_wlen); Chris@20: assert(got == m_wlen); Chris@20: Chris@20: analyseBlock(c, m_tempbuf); Chris@20: } Chris@20: Chris@20: bool transient = false; Chris@20: if (m_sharpen) transient = isTransient(); Chris@20: Chris@16: size_t n2 = m_n2; Chris@20: Chris@20: if (transient) { Chris@20: n2 = m_n1; Chris@20: } Chris@0: Chris@16: for (size_t c = 0; c < m_channels; ++c) { Chris@16: Chris@20: synthesiseBlock(c, m_mashbuf[c], Chris@20: c == 0 ? m_modulationbuf : 0, Chris@20: m_prevTransient ? m_n1 : m_n2); Chris@16: Chris@0: Chris@14: #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER Chris@16: std::cerr << "writing first " << m_n2 << " from mashbuf, skipping " << m_n1 << " on inbuf " << std::endl; Chris@0: #endif Chris@16: m_inbuf[c]->skip(m_n1); Chris@13: Chris@16: for (size_t i = 0; i < n2; ++i) { Chris@16: if (m_modulationbuf[i] > 0.f) { Chris@16: m_mashbuf[c][i] /= m_modulationbuf[i]; Chris@16: } Chris@16: } Chris@16: Chris@16: m_outbuf[c]->write(m_mashbuf[c], n2); Chris@16: Chris@16: for (size_t i = 0; i < m_wlen - n2; ++i) { Chris@16: m_mashbuf[c][i] = m_mashbuf[c][i + n2]; Chris@16: } Chris@16: Chris@16: for (size_t i = m_wlen - n2; i < m_wlen; ++i) { Chris@16: m_mashbuf[c][i] = 0.0f; Chris@13: } Chris@13: } Chris@13: Chris@20: m_prevTransient = transient; Chris@17: Chris@16: for (size_t i = 0; i < m_wlen - n2; ++i) { Chris@16: m_modulationbuf[i] = m_modulationbuf[i + n2]; Chris@0: } Chris@13: Chris@16: for (size_t i = m_wlen - n2; i < m_wlen; ++i) { Chris@13: m_modulationbuf[i] = 0.0f; Chris@0: } Chris@0: } Chris@0: Chris@0: Chris@14: #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER Chris@16: std::cerr << "loop ended: inbuf read space " << m_inbuf[0]->getReadSpace() << ", outbuf write space " << m_outbuf[0]->getWriteSpace() << std::endl; Chris@0: #endif Chris@0: } Chris@0: Chris@16: #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER Chris@16: std::cerr << "PhaseVocoderTimeStretcher::putInput returning" << std::endl; Chris@16: #endif Chris@16: } Chris@12: Chris@16: size_t Chris@16: PhaseVocoderTimeStretcher::getAvailableOutputSamples() const Chris@16: { Chris@16: return m_outbuf[0]->getReadSpace(); Chris@16: } Chris@16: Chris@16: void Chris@16: PhaseVocoderTimeStretcher::getOutput(float **output, size_t samples) Chris@16: { Chris@16: if (m_outbuf[0]->getReadSpace() < samples) { Chris@16: std::cerr << "WARNING: PhaseVocoderTimeStretcher::getOutput: not enough data (yet?) (" << m_outbuf[0]->getReadSpace() << " < " << samples << ")" << std::endl; Chris@16: size_t fill = samples - m_outbuf[0]->getReadSpace(); Chris@16: for (size_t c = 0; c < m_channels; ++c) { Chris@16: for (size_t i = 0; i < fill; ++i) { Chris@16: output[c][i] = 0.0; Chris@16: } Chris@16: m_outbuf[c]->read(output[c] + fill, m_outbuf[c]->getReadSpace()); Chris@16: } Chris@0: } else { Chris@14: #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER Chris@16: std::cerr << "enough data - writing " << samples << " from outbuf" << std::endl; Chris@0: #endif Chris@16: for (size_t c = 0; c < m_channels; ++c) { Chris@16: m_outbuf[c]->read(output[c], samples); Chris@16: } Chris@0: } Chris@0: Chris@14: #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER Chris@16: std::cerr << "PhaseVocoderTimeStretcher::getOutput returning" << std::endl; Chris@0: #endif Chris@0: } Chris@0: Chris@20: void Chris@20: PhaseVocoderTimeStretcher::analyseBlock(size_t c, float *buf) Chris@0: { Chris@0: size_t i; Chris@0: Chris@20: // buf contains m_wlen samples Chris@0: Chris@14: #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER Chris@20: std::cerr << "PhaseVocoderTimeStretcher::analyseBlock (channel " << c << ")" << std::endl; Chris@0: #endif Chris@0: Chris@20: m_analysisWindow->cut(buf); Chris@0: Chris@0: for (i = 0; i < m_wlen/2; ++i) { Chris@0: float temp = buf[i]; Chris@0: buf[i] = buf[i + m_wlen/2]; Chris@0: buf[i + m_wlen/2] = temp; Chris@0: } Chris@19: Chris@0: for (i = 0; i < m_wlen; ++i) { Chris@20: m_time[c][i] = buf[i]; Chris@0: } Chris@0: Chris@20: fftwf_execute(m_plan[c]); // m_time -> m_freq Chris@20: } Chris@0: Chris@20: bool Chris@20: PhaseVocoderTimeStretcher::isTransient() Chris@20: { Chris@20: int count = 0; Chris@16: Chris@20: for (int i = 0; i <= m_wlen/2; ++i) { Chris@16: Chris@20: float real = 0.f, imag = 0.f; Chris@20: Chris@20: for (size_t c = 0; c < m_channels; ++c) { Chris@20: real += m_freq[c][i][0]; Chris@20: imag += m_freq[c][i][1]; Chris@16: } Chris@16: Chris@20: float sqrmag = (real * real + imag * imag); Chris@20: Chris@20: if (m_prevTransientMag[i] > 0.f) { Chris@20: float diff = 10.f * log10f(sqrmag / m_prevTransientMag[i]); Chris@20: if (diff > 3.f) ++count; Chris@20: } Chris@20: Chris@20: m_prevTransientMag[i] = sqrmag; Chris@16: } Chris@16: Chris@20: bool isTransient = false; Chris@16: Chris@20: if (count > m_wlen / 4.5 && //!!! Chris@20: count > m_prevTransientCount * 1.2) { Chris@20: isTransient = true; Chris@20: std::cerr << "isTransient (count = " << count << ", prev = " << m_prevTransientCount << ")" << std::endl; Chris@20: } Chris@16: Chris@20: m_prevTransientCount = count; Chris@20: Chris@20: return isTransient; Chris@20: } Chris@20: Chris@20: void Chris@20: PhaseVocoderTimeStretcher::synthesiseBlock(size_t c, Chris@20: float *out, Chris@20: float *modulation, Chris@20: size_t lastStep) Chris@20: { Chris@20: int i; Chris@20: Chris@20: bool unchanged = (lastStep == m_n1); Chris@20: Chris@20: for (i = 0; i <= m_wlen/2; ++i) { Chris@0: Chris@20: float phase = princargf(atan2f(m_freq[c][i][1], m_freq[c][i][0])); Chris@19: float adjustedPhase = phase; Chris@12: Chris@20: if (!unchanged) { Chris@16: Chris@20: float mag = sqrtf(m_freq[c][i][0] * m_freq[c][i][0] + Chris@20: m_freq[c][i][1] * m_freq[c][i][1]); Chris@19: Chris@20: float omega = (2 * M_PI * m_n1 * i) / m_wlen; Chris@20: Chris@20: float expectedPhase = m_prevPhase[c][i] + omega; Chris@20: Chris@20: float phaseError = princargf(phase - expectedPhase); Chris@20: Chris@20: float phaseIncrement = (omega + phaseError) / m_n1; Chris@20: Chris@20: adjustedPhase = m_prevAdjustedPhase[c][i] + Chris@20: lastStep * phaseIncrement; Chris@20: Chris@20: float real = mag * cosf(adjustedPhase); Chris@20: float imag = mag * sinf(adjustedPhase); Chris@20: m_freq[c][i][0] = real; Chris@20: m_freq[c][i][1] = imag; Chris@19: } Chris@19: Chris@16: m_prevPhase[c][i] = phase; Chris@16: m_prevAdjustedPhase[c][i] = adjustedPhase; Chris@0: } Chris@20: Chris@20: fftwf_execute(m_iplan[c]); // m_freq -> m_time, inverse fft Chris@19: Chris@0: for (i = 0; i < m_wlen/2; ++i) { Chris@20: float temp = m_time[c][i]; Chris@20: m_time[c][i] = m_time[c][i + m_wlen/2]; Chris@20: m_time[c][i + m_wlen/2] = temp; Chris@20: } Chris@20: Chris@20: for (i = 0; i < m_wlen; ++i) { Chris@20: m_time[c][i] = m_time[c][i] / m_wlen; Chris@0: } Chris@15: Chris@20: m_synthesisWindow->cut(m_time[c]); Chris@19: Chris@19: for (i = 0; i < m_wlen; ++i) { Chris@20: out[i] += m_time[c][i]; Chris@0: } Chris@16: Chris@16: if (modulation) { Chris@16: Chris@20: float area = m_analysisWindow->getArea(); Chris@16: Chris@16: for (i = 0; i < m_wlen; ++i) { Chris@20: float val = m_synthesisWindow->getValue(i); Chris@16: modulation[i] += val * area; Chris@16: } Chris@16: } Chris@0: } Chris@15: Chris@20: