Chris@0: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
Chris@0: 
Chris@0: /*
Chris@0:     Sonic Visualiser
Chris@0:     An audio file viewer and annotation editor.
Chris@0:     Centre for Digital Music, Queen Mary, University of London.
Chris@0:     This file copyright 2006 Chris Cannam.
Chris@0:     
Chris@0:     This program is free software; you can redistribute it and/or
Chris@0:     modify it under the terms of the GNU General Public License as
Chris@0:     published by the Free Software Foundation; either version 2 of the
Chris@0:     License, or (at your option) any later version.  See the file
Chris@0:     COPYING included with this distribution for more information.
Chris@0: */
Chris@0: 
Chris@14: #ifndef _PHASE_VOCODER_TIME_STRETCHER_H_
Chris@14: #define _PHASE_VOCODER_TIME_STRETCHER_H_
Chris@0: 
Chris@0: #include "base/Window.h"
Chris@0: #include "base/RingBuffer.h"
Chris@0: 
Chris@0: #include <fftw3.h>
Chris@0: 
Chris@0: /**
Chris@14:  * A time stretcher that alters the performance speed of audio,
Chris@22:  * preserving pitch.
Chris@0:  *
Chris@22:  * This is based on the straightforward phase vocoder with phase
Chris@22:  * unwrapping (as in e.g. the DAFX book pp275-), with optional
Chris@22:  * percussive transient detection to avoid smearing percussive notes
Chris@22:  * and resynchronise phases, and adding a stream API for real-time
Chris@22:  * use.  Principles and methods from Chris Duxbury, AES 2002 and 2004
Chris@22:  * thesis; Emmanuel Ravelli, DAFX 2005; Dan Barry, ISSC 2005 on
Chris@22:  * percussion detection; code by Chris Cannam.
Chris@0:  */
Chris@0: 
Chris@14: class PhaseVocoderTimeStretcher
Chris@0: {
Chris@0: public:
Chris@22:     PhaseVocoderTimeStretcher(size_t sampleRate,
Chris@22:                               size_t channels,
Chris@16:                               float ratio,
Chris@16:                               bool sharpen,
Chris@16:                               size_t maxProcessInputBlockSize);
Chris@14:     virtual ~PhaseVocoderTimeStretcher();
Chris@0: 
Chris@12:     /**
Chris@12:      * Process a block.  The input array contains the given number of
Chris@16:      * samples (on each channel); the output must have space for
Chris@16:      * lrintf(samples * m_ratio).
Chris@16:      * 
Chris@16:      * This should work correctly for some ratios, e.g. small powers
Chris@16:      * of two.  For other ratios it may drop samples -- use putInput
Chris@16:      * in a loop followed by getOutput (when getAvailableOutputSamples
Chris@16:      * reports enough) instead.
Chris@16:      *
Chris@16:      * Do not mix process calls with putInput/getOutput calls.
Chris@12:      */
Chris@16:     void process(float **input, float **output, size_t samples);
Chris@16: 
Chris@16:     /**
Chris@16:      * Return the number of samples that would need to be added via
Chris@16:      * putInput in order to provoke the time stretcher into doing some
Chris@16:      * time stretching and making more output samples available.
Chris@16:      */
Chris@16:     size_t getRequiredInputSamples() const;
Chris@16: 
Chris@16:     /**
Chris@16:      * Put (and possibly process) a given number of input samples.
Chris@16:      * Number must not exceed the maxProcessInputBlockSize passed to
Chris@16:      * constructor.
Chris@16:      */
Chris@16:     void putInput(float **input, size_t samples);
Chris@16: 
Chris@16:     size_t getAvailableOutputSamples() const;
Chris@16: 
Chris@16:     void getOutput(float **output, size_t samples);
Chris@16: 
Chris@16:     //!!! and reset?
Chris@0: 
Chris@0:     /**
Chris@15:      * Get the hop size for input.
Chris@0:      */
Chris@0:     size_t getInputIncrement() const { return m_n1; }
Chris@0: 
Chris@0:     /**
Chris@15:      * Get the hop size for output.
Chris@15:      */
Chris@16:     size_t getOutputIncrement() const { return m_n2; }
Chris@15: 
Chris@15:     /**
Chris@15:      * Get the window size for FFT processing.
Chris@0:      */
Chris@0:     size_t getWindowSize() const { return m_wlen; }
Chris@0: 
Chris@0:     /**
Chris@15:      * Get the window type.
Chris@0:      */
Chris@20: //    WindowType getWindowType() const { return m_window->getType(); }
Chris@0: 
Chris@15:     /**
Chris@16:      * Get the stretch ratio.
Chris@15:      */
Chris@16:     float getRatio() const { return float(m_n2) / float(m_n1); }
Chris@16: 
Chris@16:     /**
Chris@16:      * Return whether this time stretcher will attempt to sharpen transients.
Chris@16:      */
Chris@16:     bool getSharpening() const { return m_sharpen; }
Chris@15: 
Chris@15:     /**
Chris@15:      * Get the latency added by the time stretcher, in sample frames.
Chris@15:      */
Chris@0:     size_t getProcessingLatency() const;
Chris@0: 
Chris@0: protected:
Chris@13:     /**
Chris@13:      * Process a single phase vocoder frame.
Chris@13:      * 
Chris@13:      * Take m_wlen time-domain source samples from in, perform an FFT,
Chris@13:      * phase shift, and IFFT, and add the results to out (presumably
Chris@13:      * overlapping parts of existing data from prior frames).
Chris@13:      *
Chris@13:      * Also add to the modulation output the results of windowing a
Chris@13:      * set of 1s with the resynthesis window -- this can then be used
Chris@13:      * to ensure the output has the correct magnitude in cases where
Chris@13:      * the window overlap varies or otherwise results in something
Chris@13:      * other than a flat sum.
Chris@13:      */
Chris@20:     
Chris@20: 
Chris@20:     void analyseBlock(size_t channel, float *in); // into m_freq[channel]
Chris@20:     
Chris@20:     bool isTransient(); // operates on m_freq[0..m_channels-1]
Chris@20: 
Chris@20:     void synthesiseBlock(size_t channel, float *out, float *modulation,
Chris@20:                          size_t lastStep);
Chris@0: 
Chris@22:     size_t m_sampleRate;
Chris@16:     size_t m_channels;
Chris@12:     float m_ratio;
Chris@16:     bool m_sharpen;
Chris@0:     size_t m_n1;
Chris@0:     size_t m_n2;
Chris@0:     size_t m_wlen;
Chris@20:     Window<float> *m_analysisWindow;
Chris@20:     Window<float> *m_synthesisWindow;
Chris@0: 
Chris@21:     int m_totalCount;
Chris@21:     int m_transientCount;
Chris@21:     int m_n2sum;
Chris@21: 
Chris@16:     float **m_prevPhase;
Chris@16:     float **m_prevAdjustedPhase;
Chris@16: 
Chris@20:     float *m_prevTransientMag;
Chris@21:     int  m_prevTransientScore;
Chris@22:     int  m_transientThreshold;
Chris@20:     bool m_prevTransient;
Chris@20: 
Chris@20:     float *m_tempbuf;
Chris@20:     float **m_time;
Chris@20:     fftwf_complex **m_freq;
Chris@20:     fftwf_plan *m_plan;
Chris@20:     fftwf_plan *m_iplan;
Chris@0:     
Chris@16:     RingBuffer<float> **m_inbuf;
Chris@16:     RingBuffer<float> **m_outbuf;
Chris@16:     float **m_mashbuf;
Chris@13:     float *m_modulationbuf;
Chris@0: };
Chris@0: 
Chris@0: #endif