svapp: audioio/PhaseVocoderTimeStretcher.cpp comparison

comparison audioio/PhaseVocoderTimeStretcher.cpp @ 43:3c5756fb6a68

* Move some things around to facilitate plundering libraries for other applications without needing to duplicate so much code. sv/osc -> data/osc sv/audioio -> audioio sv/transform -> plugin/transform sv/document -> document (will rename to framework in next commit)

author	Chris Cannam
date	Wed, 24 Oct 2007 16:34:31 +0000
parents
children	0ffab5d7e3e1

comparison

equal deleted inserted replaced

-:0619006a1ee3
+:3c5756fb6a68
+/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
+/*
+Sonic Visualiser
+An audio file viewer and annotation editor.
+Centre for Digital Music, Queen Mary, University of London.
+This file copyright 2006 Chris Cannam and QMUL.
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.  See the file
+COPYING included with this distribution for more information.
+*/
+#include "PhaseVocoderTimeStretcher.h"
+#include <iostream>
+#include <cassert>
+#include <QMutexLocker>
+//#define DEBUG_PHASE_VOCODER_TIME_STRETCHER 1
+PhaseVocoderTimeStretcher::PhaseVocoderTimeStretcher(size_t sampleRate,
+size_t channels,
+float ratio,
+bool sharpen,
+size_t maxOutputBlockSize) :
+m_sampleRate(sampleRate),
+m_channels(channels),
+m_maxOutputBlockSize(maxOutputBlockSize),
+m_ratio(ratio),
+m_sharpen(sharpen),
+m_totalCount(0),
+m_transientCount(0),
+m_n2sum(0),
+m_mutex(new QMutex())
+{
+initialise();
+}
+PhaseVocoderTimeStretcher::~PhaseVocoderTimeStretcher()
+{
+std::cerr << "PhaseVocoderTimeStretcher::~PhaseVocoderTimeStretcher" << std::endl;
+cleanup();
+delete m_mutex;
+}
+void
+PhaseVocoderTimeStretcher::initialise()
+{
+std::cerr << "PhaseVocoderTimeStretcher::initialise" << std::endl;
+calculateParameters();
+m_analysisWindow = new Window<float>(HanningWindow, m_wlen);
+m_synthesisWindow = new Window<float>(HanningWindow, m_wlen);
+m_prevPhase = new float *[m_channels];
+m_prevAdjustedPhase = new float *[m_channels];
+m_prevTransientMag = (float *)fftf_malloc(sizeof(float) * (m_wlen / 2 + 1));
+m_prevTransientScore = 0;
+m_prevTransient = false;
+m_tempbuf = (float *)fftf_malloc(sizeof(float) * m_wlen);
+m_time = new float *[m_channels];
+m_freq = new fftf_complex *[m_channels];
+m_plan = new fftf_plan[m_channels];
+m_iplan = new fftf_plan[m_channels];
+m_inbuf = new RingBuffer<float> *[m_channels];
+m_outbuf = new RingBuffer<float> *[m_channels];
+m_mashbuf = new float *[m_channels];
+m_modulationbuf = (float *)fftf_malloc(sizeof(float) * m_wlen);
+for (size_t c = 0; c < m_channels; ++c) {
+m_prevPhase[c] = (float *)fftf_malloc(sizeof(float) * (m_wlen / 2 + 1));
+m_prevAdjustedPhase[c] = (float *)fftf_malloc(sizeof(float) * (m_wlen / 2 + 1));
+m_time[c] = (float *)fftf_malloc(sizeof(float) * m_wlen);
+m_freq[c] = (fftf_complex *)fftf_malloc(sizeof(fftf_complex) *
+(m_wlen / 2 + 1));
+m_plan[c] = fftf_plan_dft_r2c_1d(m_wlen, m_time[c], m_freq[c], FFTW_MEASURE);
+m_iplan[c] = fftf_plan_dft_c2r_1d(m_wlen, m_freq[c], m_time[c], FFTW_MEASURE);
+m_outbuf[c] = new RingBuffer<float>
+((m_maxOutputBlockSize + m_wlen) * 2);
+m_inbuf[c] = new RingBuffer<float>
+(lrintf(m_outbuf[c]->getSize() / m_ratio) + m_wlen);
+std::cerr << "making inbuf size " << m_inbuf[c]->getSize() << " (outbuf size is " << m_outbuf[c]->getSize() << ", ratio " << m_ratio << ")" << std::endl;
+m_mashbuf[c] = (float *)fftf_malloc(sizeof(float) * m_wlen);
+for (size_t i = 0; i < m_wlen; ++i) {
+m_mashbuf[c][i] = 0.0;
+}
+for (size_t i = 0; i <= m_wlen/2; ++i) {
+m_prevPhase[c][i] = 0.0;
+m_prevAdjustedPhase[c][i] = 0.0;
+}
+}
+for (size_t i = 0; i < m_wlen; ++i) {
+m_modulationbuf[i] = 0.0;
+}
+for (size_t i = 0; i <= m_wlen/2; ++i) {
+m_prevTransientMag[i] = 0.0;
+}
+}
+void
+PhaseVocoderTimeStretcher::calculateParameters()
+{
+std::cerr << "PhaseVocoderTimeStretcher::calculateParameters" << std::endl;
+m_wlen = 1024;
+//!!! In transient sharpening mode, we need to pick the window
+//length so as to be more or less fixed in audio duration (i.e. we
+//need to exploit the sample rate)
+//!!! have to work out the relationship between wlen and transient
+//threshold
+if (m_ratio < 1) {
+if (m_ratio < 0.4) {
+m_n1 = 1024;
+m_wlen = 2048;
+} else if (m_ratio < 0.8) {
+m_n1 = 512;
+} else {
+m_n1 = 256;
+}
+if (shouldSharpen()) {
+m_wlen = 2048;
+}
+m_n2 = lrintf(m_n1 * m_ratio);
+} else {
+if (m_ratio > 2) {
+m_n2 = 512;
+m_wlen = 4096;
+} else if (m_ratio > 1.6) {
+m_n2 = 384;
+m_wlen = 2048;
+} else {
+m_n2 = 256;
+}
+if (shouldSharpen()) {
+if (m_wlen < 2048) m_wlen = 2048;
+}
+m_n1 = lrintf(m_n2 / m_ratio);
+if (m_n1 == 0) {
+m_n1 = 1;
+m_n2 = lrintf(m_ratio);
+}
+}
+m_transientThreshold = lrintf(m_wlen / 4.5);
+m_totalCount = 0;
+m_transientCount = 0;
+m_n2sum = 0;
+std::cerr << "PhaseVocoderTimeStretcher: channels = " << m_channels
+<< ", ratio = " << m_ratio
+<< ", n1 = " << m_n1 << ", n2 = " << m_n2 << ", wlen = "
+<< m_wlen << ", max = " << m_maxOutputBlockSize << std::endl;
+//              << ", outbuflen = " << m_outbuf[0]->getSize() << std::endl;
+}
+void
+PhaseVocoderTimeStretcher::cleanup()
+{
+std::cerr << "PhaseVocoderTimeStretcher::cleanup" << std::endl;
+for (size_t c = 0; c < m_channels; ++c) {
+fftf_destroy_plan(m_plan[c]);
+fftf_destroy_plan(m_iplan[c]);
+fftf_free(m_time[c]);
+fftf_free(m_freq[c]);
+fftf_free(m_mashbuf[c]);
+fftf_free(m_prevPhase[c]);
+fftf_free(m_prevAdjustedPhase[c]);
+delete m_inbuf[c];
+delete m_outbuf[c];
+}
+fftf_free(m_tempbuf);
+fftf_free(m_modulationbuf);
+fftf_free(m_prevTransientMag);
+delete[] m_prevPhase;
+delete[] m_prevAdjustedPhase;
+delete[] m_inbuf;
+delete[] m_outbuf;
+delete[] m_mashbuf;
+delete[] m_time;
+delete[] m_freq;
+delete[] m_plan;
+delete[] m_iplan;
+delete m_analysisWindow;
+delete m_synthesisWindow;
+}
+void
+PhaseVocoderTimeStretcher::setRatio(float ratio)
+{
+QMutexLocker locker(m_mutex);
+size_t formerWlen = m_wlen;
+m_ratio = ratio;
+std::cerr << "PhaseVocoderTimeStretcher::setRatio: new ratio " << ratio
+<< std::endl;
+calculateParameters();
+if (m_wlen == formerWlen) {
+// This is the only container whose size depends on m_ratio
+RingBuffer<float> **newin = new RingBuffer<float> *[m_channels];
+size_t formerSize = m_inbuf[0]->getSize();
+size_t newSize = lrintf(m_outbuf[0]->getSize() / m_ratio) + m_wlen;
+std::cerr << "resizing inbuf from " << formerSize << " to "
+<< newSize << " (outbuf size is " << m_outbuf[0]->getSize() << ", ratio " << m_ratio << ")" << std::endl;
+if (formerSize != newSize) {
+size_t ready = m_inbuf[0]->getReadSpace();
+for (size_t c = 0; c < m_channels; ++c) {
+newin[c] = new RingBuffer<float>(newSize);
+}
+if (ready > 0) {
+size_t copy = std::min(ready, newSize);
+float *tmp = new float[ready];
+for (size_t c = 0; c < m_channels; ++c) {
+m_inbuf[c]->read(tmp, ready);
+newin[c]->write(tmp + ready - copy, copy);
+}
+delete[] tmp;
+}
+for (size_t c = 0; c < m_channels; ++c) {
+delete m_inbuf[c];
+}
+delete[] m_inbuf;
+m_inbuf = newin;
+}
+} else {
+std::cerr << "wlen changed" << std::endl;
+cleanup();
+initialise();
+}
+}
+size_t
+PhaseVocoderTimeStretcher::getProcessingLatency() const
+{
+return getWindowSize() - getInputIncrement();
+}
+size_t
+PhaseVocoderTimeStretcher::getRequiredInputSamples() const
+{
+QMutexLocker locker(m_mutex);
+if (m_inbuf[0]->getReadSpace() >= m_wlen) return 0;
+return m_wlen - m_inbuf[0]->getReadSpace();
+}
+void
+PhaseVocoderTimeStretcher::putInput(float **input, size_t samples)
+{
+QMutexLocker locker(m_mutex);
+// We need to add samples from input to our internal buffer.  When
+// we have m_windowSize samples in the buffer, we can process it,
+// move the samples back by m_n1 and write the output onto our
+// internal output buffer.  If we have (samples * ratio) samples
+// in that, we can write m_n2 of them back to output and return
+// (otherwise we have to write zeroes).
+// When we process, we write m_wlen to our fixed output buffer
+// (m_mashbuf).  We then pull out the first m_n2 samples from that
+// buffer, push them into the output ring buffer, and shift
+// m_mashbuf left by that amount.
+// The processing latency is then m_wlen - m_n2.
+size_t consumed = 0;
+while (consumed < samples) {
+	size_t writable = m_inbuf[0]->getWriteSpace();
+	writable = std::min(writable, samples - consumed);
+	if (writable == 0) {
+#ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
+	    std::cerr << "WARNING: PhaseVocoderTimeStretcher::putInput: writable == 0 (inbuf has " << m_inbuf[0]->getReadSpace() << " samples available for reading, space for " << m_inbuf[0]->getWriteSpace() << " more)" << std::endl;
+#endif
+if (m_inbuf[0]->getReadSpace() < m_wlen ||
+m_outbuf[0]->getWriteSpace() < m_n2) {
+std::cerr << "WARNING: PhaseVocoderTimeStretcher::putInput: Inbuf has " << m_inbuf[0]->getReadSpace() << ", outbuf has space for " << m_outbuf[0]->getWriteSpace() << " (n2 = " << m_n2 << ", wlen = " << m_wlen << "), won't be able to process" << std::endl;
+break;
+}
+	} else {
+#ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
+std::cerr << "writing " << writable << " from index " << consumed << " to inbuf, consumed will be " << consumed + writable << std::endl;
+#endif
+for (size_t c = 0; c < m_channels; ++c) {
+m_inbuf[c]->write(input[c] + consumed, writable);
+}
+consumed += writable;
+}
+	while (m_inbuf[0]->getReadSpace() >= m_wlen &&
+	       m_outbuf[0]->getWriteSpace() >= m_n2) {
+	    // We know we have at least m_wlen samples available
+	    // in m_inbuf.  We need to peek m_wlen of them for
+	    // processing, and then read m_n1 to advance the read
+	    // pointer.
+for (size_t c = 0; c < m_channels; ++c) {
+size_t got = m_inbuf[c]->peek(m_tempbuf, m_wlen);
+assert(got == m_wlen);
+analyseBlock(c, m_tempbuf);
+}
+bool transient = false;
+if (shouldSharpen()) transient = isTransient();
+size_t n2 = m_n2;
+if (transient) {
+n2 = m_n1;
+}
+++m_totalCount;
+if (transient) ++m_transientCount;
+m_n2sum += n2;
+//            std::cerr << "ratio for last 10: " <<last10num << "/" << (10 * m_n1) << " = " << float(last10num) / float(10 * m_n1) << " (should be " << m_ratio << ")" << std::endl;
+if (m_totalCount > 50 && m_transientCount < m_totalCount) {
+int fixed = lrintf(m_transientCount * m_n1);
+int idealTotal = lrintf(m_totalCount * m_n1 * m_ratio);
+int idealSquashy = idealTotal - fixed;
+int squashyCount = m_totalCount - m_transientCount;
+n2 = lrintf(idealSquashy / squashyCount);
+#ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
+if (n2 != m_n2) {
+std::cerr << m_n2 << " -> " << n2 << std::endl;
+}
+#endif
+}
+for (size_t c = 0; c < m_channels; ++c) {
+synthesiseBlock(c, m_mashbuf[c],
+c == 0 ? m_modulationbuf : 0,
+m_prevTransient ? m_n1 : m_n2);
+#ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
+std::cerr << "writing first " << m_n2 << " from mashbuf, skipping " << m_n1 << " on inbuf " << std::endl;
+#endif
+m_inbuf[c]->skip(m_n1);
+for (size_t i = 0; i < n2; ++i) {
+if (m_modulationbuf[i] > 0.f) {
+m_mashbuf[c][i] /= m_modulationbuf[i];
+}
+}
+m_outbuf[c]->write(m_mashbuf[c], n2);
+for (size_t i = 0; i < m_wlen - n2; ++i) {
+m_mashbuf[c][i] = m_mashbuf[c][i + n2];
+}
+for (size_t i = m_wlen - n2; i < m_wlen; ++i) {
+m_mashbuf[c][i] = 0.0f;
+}
+}
+m_prevTransient = transient;
+for (size_t i = 0; i < m_wlen - n2; ++i) {
+m_modulationbuf[i] = m_modulationbuf[i + n2];
+	    }
+	    for (size_t i = m_wlen - n2; i < m_wlen; ++i) {
+m_modulationbuf[i] = 0.0f;
+	    }
+if (!transient) m_n2 = n2;
+	}
+#ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
+	std::cerr << "loop ended: inbuf read space " << m_inbuf[0]->getReadSpace() << ", outbuf write space " << m_outbuf[0]->getWriteSpace() << std::endl;
+#endif
+}
+#ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
+std::cerr << "PhaseVocoderTimeStretcher::putInput returning" << std::endl;
+#endif
+//    std::cerr << "ratio: nominal: " << getRatio() << " actual: "
+//              << m_total2 << "/" << m_total1 << " = " << float(m_total2) / float(m_total1) << " ideal: " << m_ratio << std::endl;
+}
+size_t
+PhaseVocoderTimeStretcher::getAvailableOutputSamples() const
+{
+QMutexLocker locker(m_mutex);
+return m_outbuf[0]->getReadSpace();
+}
+void
+PhaseVocoderTimeStretcher::getOutput(float **output, size_t samples)
+{
+QMutexLocker locker(m_mutex);
+if (m_outbuf[0]->getReadSpace() < samples) {
+	std::cerr << "WARNING: PhaseVocoderTimeStretcher::getOutput: not enough data (yet?) (" << m_outbuf[0]->getReadSpace() << " < " << samples << ")" << std::endl;
+	size_t fill = samples - m_outbuf[0]->getReadSpace();
+for (size_t c = 0; c < m_channels; ++c) {
+for (size_t i = 0; i < fill; ++i) {
+output[c][i] = 0.0;
+}
+m_outbuf[c]->read(output[c] + fill, m_outbuf[c]->getReadSpace());
+}
+} else {
+#ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
+	std::cerr << "enough data - writing " << samples << " from outbuf" << std::endl;
+#endif
+for (size_t c = 0; c < m_channels; ++c) {
+m_outbuf[c]->read(output[c], samples);
+}
+}
+#ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
+std::cerr << "PhaseVocoderTimeStretcher::getOutput returning" << std::endl;
+#endif
+}
+void
+PhaseVocoderTimeStretcher::analyseBlock(size_t c, float *buf)
+{
+size_t i;
+// buf contains m_wlen samples
+#ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
+std::cerr << "PhaseVocoderTimeStretcher::analyseBlock (channel " << c << ")" << std::endl;
+#endif
+m_analysisWindow->cut(buf);
+for (i = 0; i < m_wlen/2; ++i) {
+	float temp = buf[i];
+	buf[i] = buf[i + m_wlen/2];
+	buf[i + m_wlen/2] = temp;
+}
+for (i = 0; i < m_wlen; ++i) {
+	m_time[c][i] = buf[i];
+}
+fftf_execute(m_plan[c]); // m_time -> m_freq
+}
+bool
+PhaseVocoderTimeStretcher::isTransient()
+{
+int count = 0;
+for (size_t i = 0; i <= m_wlen/2; ++i) {
+float real = 0.f, imag = 0.f;
+for (size_t c = 0; c < m_channels; ++c) {
+real += m_freq[c][i][0];
+imag += m_freq[c][i][1];
+}
+float sqrmag = (real * real + imag * imag);
+if (m_prevTransientMag[i] > 0.f) {
+float diff = 10.f * log10f(sqrmag / m_prevTransientMag[i]);
+if (diff > 3.f) ++count;
+}
+m_prevTransientMag[i] = sqrmag;
+}
+bool isTransient = false;
+//    if (count > m_transientThreshold &&
+//        count > m_prevTransientScore * 1.2) {
+if (count > m_prevTransientScore &&
+count > m_transientThreshold &&
+count - m_prevTransientScore > int(m_wlen) / 20) {
+isTransient = true;
+//        std::cerr << "isTransient (count = " << count << ", prev = " << m_prevTransientScore << ", diff = " << count - m_prevTransientScore << ", ratio = " << (m_totalCount > 0 ? (float (m_n2sum) / float(m_totalCount * m_n1)) : 1.f) << ", ideal = " << m_ratio << ")" << std::endl;
+//    } else {
+//        std::cerr << " !transient (count = " << count << ", prev = " << m_prevTransientScore << ", diff = " << count - m_prevTransientScore << ")" << std::endl;
+}
+m_prevTransientScore = count;
+return isTransient;
+}
+void
+PhaseVocoderTimeStretcher::synthesiseBlock(size_t c,
+float *out,
+float *modulation,
+size_t lastStep)
+{
+bool unchanged = (lastStep == m_n1);
+for (size_t i = 0; i <= m_wlen/2; ++i) {
+float phase = princargf(atan2f(m_freq[c][i][1], m_freq[c][i][0]));
+float adjustedPhase = phase;
+if (!unchanged) {
+float omega = (2 * M_PI * m_n1 * i) / m_wlen;
+float expectedPhase = m_prevPhase[c][i] + omega;
+float phaseError = princargf(phase - expectedPhase);
+float phaseIncrement = (omega + phaseError) / m_n1;
+adjustedPhase = m_prevAdjustedPhase[c][i] +
+lastStep * phaseIncrement;
+float mag = sqrtf(m_freq[c][i][0] * m_freq[c][i][0] +
+m_freq[c][i][1] * m_freq[c][i][1]);
+float real = mag * cosf(adjustedPhase);
+float imag = mag * sinf(adjustedPhase);
+m_freq[c][i][0] = real;
+m_freq[c][i][1] = imag;
+}
+m_prevPhase[c][i] = phase;
+m_prevAdjustedPhase[c][i] = adjustedPhase;
+}
+fftf_execute(m_iplan[c]); // m_freq -> m_time, inverse fft
+for (size_t i = 0; i < m_wlen/2; ++i) {
+float temp = m_time[c][i];
+m_time[c][i] = m_time[c][i + m_wlen/2];
+m_time[c][i + m_wlen/2] = temp;
+}
+for (size_t i = 0; i < m_wlen; ++i) {
+m_time[c][i] = m_time[c][i] / m_wlen;
+}
+m_synthesisWindow->cut(m_time[c]);
+for (size_t i = 0; i < m_wlen; ++i) {
+out[i] += m_time[c][i];
+}
+if (modulation) {
+float area = m_analysisWindow->getArea();
+for (size_t i = 0; i < m_wlen; ++i) {
+float val = m_synthesisWindow->getValue(i);
+modulation[i] += val * area;
+}
+}
+}

Mercurial > hg > svapp

comparison audioio/PhaseVocoderTimeStretcher.cpp @ 43:3c5756fb6a68