lbajardsilogic@0: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ lbajardsilogic@0: lbajardsilogic@0: /* lbajardsilogic@0: Sonic Visualiser lbajardsilogic@0: An audio file viewer and annotation editor. lbajardsilogic@0: Centre for Digital Music, Queen Mary, University of London. lbajardsilogic@0: This file copyright 2006 Chris Cannam and QMUL. lbajardsilogic@0: lbajardsilogic@0: This program is free software; you can redistribute it and/or lbajardsilogic@0: modify it under the terms of the GNU General Public License as lbajardsilogic@0: published by the Free Software Foundation; either version 2 of the lbajardsilogic@0: License, or (at your option) any later version. See the file lbajardsilogic@0: COPYING included with this distribution for more information. lbajardsilogic@0: */ lbajardsilogic@0: lbajardsilogic@0: #ifndef _PHASE_VOCODER_TIME_STRETCHER_H_ lbajardsilogic@0: #define _PHASE_VOCODER_TIME_STRETCHER_H_ lbajardsilogic@0: lbajardsilogic@0: #include "base/Window.h" lbajardsilogic@0: #include "base/RingBuffer.h" lbajardsilogic@0: lbajardsilogic@0: #include "data/fft/FFTapi.h" lbajardsilogic@0: lbajardsilogic@0: #include lbajardsilogic@0: lbajardsilogic@0: /** lbajardsilogic@0: * A time stretcher that alters the performance speed of audio, lbajardsilogic@0: * preserving pitch. lbajardsilogic@0: * lbajardsilogic@0: * This is based on the straightforward phase vocoder with phase lbajardsilogic@0: * unwrapping (as in e.g. the DAFX book pp275-), with optional lbajardsilogic@0: * percussive transient detection to avoid smearing percussive notes lbajardsilogic@0: * and resynchronise phases, and adding a stream API for real-time lbajardsilogic@0: * use. Principles and methods from Chris Duxbury, AES 2002 and 2004 lbajardsilogic@0: * thesis; Emmanuel Ravelli, DAFX 2005; Dan Barry, ISSC 2005 on lbajardsilogic@0: * percussion detection; code by Chris Cannam. lbajardsilogic@0: */ lbajardsilogic@0: lbajardsilogic@0: class PhaseVocoderTimeStretcher lbajardsilogic@0: { lbajardsilogic@0: public: lbajardsilogic@0: PhaseVocoderTimeStretcher(size_t sampleRate, lbajardsilogic@0: size_t channels, lbajardsilogic@0: float ratio, lbajardsilogic@0: bool sharpen, lbajardsilogic@0: size_t maxOutputBlockSize); lbajardsilogic@0: virtual ~PhaseVocoderTimeStretcher(); lbajardsilogic@0: lbajardsilogic@0: /** lbajardsilogic@0: * Return the number of samples that would need to be added via lbajardsilogic@0: * putInput in order to provoke the time stretcher into doing some lbajardsilogic@0: * time stretching and making more output samples available. lbajardsilogic@0: * This will be an estimate, if transient sharpening is on; the lbajardsilogic@0: * caller may need to do the put/get/test cycle more than once. lbajardsilogic@0: */ lbajardsilogic@0: size_t getRequiredInputSamples() const; lbajardsilogic@0: lbajardsilogic@0: /** lbajardsilogic@0: * Put (and possibly process) a given number of input samples. lbajardsilogic@0: * Number should usually equal the value returned from lbajardsilogic@0: * getRequiredInputSamples(). lbajardsilogic@0: */ lbajardsilogic@0: void putInput(float **input, size_t samples); lbajardsilogic@0: lbajardsilogic@0: /** lbajardsilogic@0: * Get the number of processed samples ready for reading. lbajardsilogic@0: */ lbajardsilogic@0: size_t getAvailableOutputSamples() const; lbajardsilogic@0: lbajardsilogic@0: /** lbajardsilogic@0: * Get some processed samples. lbajardsilogic@0: */ lbajardsilogic@0: void getOutput(float **output, size_t samples); lbajardsilogic@0: lbajardsilogic@0: //!!! and reset? lbajardsilogic@0: lbajardsilogic@0: /** lbajardsilogic@0: * Change the time stretch ratio. lbajardsilogic@0: */ lbajardsilogic@0: void setRatio(float ratio); lbajardsilogic@0: lbajardsilogic@0: /** lbajardsilogic@0: * Get the hop size for input. lbajardsilogic@0: */ lbajardsilogic@0: size_t getInputIncrement() const { return m_n1; } lbajardsilogic@0: lbajardsilogic@0: /** lbajardsilogic@0: * Get the hop size for output. lbajardsilogic@0: */ lbajardsilogic@0: size_t getOutputIncrement() const { return m_n2; } lbajardsilogic@0: lbajardsilogic@0: /** lbajardsilogic@0: * Get the window size for FFT processing. lbajardsilogic@0: */ lbajardsilogic@0: size_t getWindowSize() const { return m_wlen; } lbajardsilogic@0: lbajardsilogic@0: /** lbajardsilogic@0: * Get the stretch ratio. lbajardsilogic@0: */ lbajardsilogic@0: float getRatio() const { return float(m_n2) / float(m_n1); } lbajardsilogic@0: lbajardsilogic@0: /** lbajardsilogic@0: * Return whether this time stretcher will attempt to sharpen transients. lbajardsilogic@0: */ lbajardsilogic@0: bool getSharpening() const { return m_sharpen; } lbajardsilogic@0: lbajardsilogic@0: /** lbajardsilogic@0: * Return the number of channels for this time stretcher. lbajardsilogic@0: */ lbajardsilogic@0: size_t getChannelCount() const { return m_channels; } lbajardsilogic@0: lbajardsilogic@0: /** lbajardsilogic@0: * Get the latency added by the time stretcher, in sample frames. lbajardsilogic@0: * This will be exact if transient sharpening is off, or approximate lbajardsilogic@0: * if it is on. lbajardsilogic@0: */ lbajardsilogic@0: size_t getProcessingLatency() const; lbajardsilogic@0: lbajardsilogic@0: protected: lbajardsilogic@0: /** lbajardsilogic@0: * Process a single phase vocoder frame from "in" into lbajardsilogic@0: * m_freq[channel]. lbajardsilogic@0: */ lbajardsilogic@0: void analyseBlock(size_t channel, float *in); // into m_freq[channel] lbajardsilogic@0: lbajardsilogic@0: /** lbajardsilogic@0: * Examine m_freq[0..m_channels-1] and return whether a percussive lbajardsilogic@0: * transient is found. lbajardsilogic@0: */ lbajardsilogic@0: bool isTransient(); lbajardsilogic@0: lbajardsilogic@0: /** lbajardsilogic@0: * Resynthesise from m_freq[channel] adding in to "out", lbajardsilogic@0: * adjusting phases on the basis of a prior step size of lastStep. lbajardsilogic@0: * Also add the window shape in to the modulation array (if lbajardsilogic@0: * present) -- for use in ensuring the output has the correct lbajardsilogic@0: * magnitude afterwards. lbajardsilogic@0: */ lbajardsilogic@0: void synthesiseBlock(size_t channel, float *out, float *modulation, lbajardsilogic@0: size_t lastStep); lbajardsilogic@0: lbajardsilogic@0: void initialise(); lbajardsilogic@0: void calculateParameters(); lbajardsilogic@0: void cleanup(); lbajardsilogic@0: lbajardsilogic@0: bool shouldSharpen() { lbajardsilogic@0: return m_sharpen && (m_ratio > 0.25); lbajardsilogic@0: } lbajardsilogic@0: lbajardsilogic@0: size_t m_sampleRate; lbajardsilogic@0: size_t m_channels; lbajardsilogic@0: size_t m_maxOutputBlockSize; lbajardsilogic@0: float m_ratio; lbajardsilogic@0: bool m_sharpen; lbajardsilogic@0: size_t m_n1; lbajardsilogic@0: size_t m_n2; lbajardsilogic@0: size_t m_wlen; lbajardsilogic@0: Window *m_analysisWindow; lbajardsilogic@0: Window *m_synthesisWindow; lbajardsilogic@0: lbajardsilogic@0: int m_totalCount; lbajardsilogic@0: int m_transientCount; lbajardsilogic@0: int m_n2sum; lbajardsilogic@0: lbajardsilogic@0: float **m_prevPhase; lbajardsilogic@0: float **m_prevAdjustedPhase; lbajardsilogic@0: lbajardsilogic@0: float *m_prevTransientMag; lbajardsilogic@0: int m_prevTransientScore; lbajardsilogic@0: int m_transientThreshold; lbajardsilogic@0: bool m_prevTransient; lbajardsilogic@0: lbajardsilogic@0: float *m_tempbuf; lbajardsilogic@0: float **m_time; lbajardsilogic@0: fftf_complex **m_freq; lbajardsilogic@0: fftf_plan *m_plan; lbajardsilogic@0: fftf_plan *m_iplan; lbajardsilogic@0: lbajardsilogic@0: RingBuffer **m_inbuf; lbajardsilogic@0: RingBuffer **m_outbuf; lbajardsilogic@0: float **m_mashbuf; lbajardsilogic@0: float *m_modulationbuf; lbajardsilogic@0: lbajardsilogic@0: QMutex *m_mutex; lbajardsilogic@0: }; lbajardsilogic@0: lbajardsilogic@0: #endif