annotate audioio/PhaseVocoderTimeStretcher.h @ 16:3715efc38f95

* substantial enhancements to time stretcher: -- use putInput/getOutput methods to ensure the audio source always feeds it enough input, avoiding underruns due to rounding error -- add a percussion detector and an optional "Sharpen" toggle to the main window, which invokes a very basic variable speed timestretcher
author Chris Cannam
date Wed, 13 Sep 2006 17:17:42 +0000
parents cc566264c935
children 67d54627efd3
rev   line source
Chris@0 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@0 2
Chris@0 3 /*
Chris@0 4 Sonic Visualiser
Chris@0 5 An audio file viewer and annotation editor.
Chris@0 6 Centre for Digital Music, Queen Mary, University of London.
Chris@0 7 This file copyright 2006 Chris Cannam.
Chris@0 8
Chris@0 9 This program is free software; you can redistribute it and/or
Chris@0 10 modify it under the terms of the GNU General Public License as
Chris@0 11 published by the Free Software Foundation; either version 2 of the
Chris@0 12 License, or (at your option) any later version. See the file
Chris@0 13 COPYING included with this distribution for more information.
Chris@0 14 */
Chris@0 15
Chris@14 16 #ifndef _PHASE_VOCODER_TIME_STRETCHER_H_
Chris@14 17 #define _PHASE_VOCODER_TIME_STRETCHER_H_
Chris@0 18
Chris@0 19 #include "base/Window.h"
Chris@0 20 #include "base/RingBuffer.h"
Chris@0 21
Chris@0 22 #include <fftw3.h>
Chris@0 23
Chris@0 24 /**
Chris@14 25 * A time stretcher that alters the performance speed of audio,
Chris@14 26 * preserving pitch. This uses the simple phase vocoder technique
Chris@14 27 * from DAFX pp275-276, adding a block-based stream oriented API.
Chris@0 28 *
Chris@0 29 * Causes significant transient smearing, but sounds good for steady
Chris@0 30 * notes and is generally predictable.
Chris@0 31 */
Chris@0 32
Chris@14 33 class PhaseVocoderTimeStretcher
Chris@0 34 {
Chris@0 35 public:
Chris@16 36 PhaseVocoderTimeStretcher(size_t channels,
Chris@16 37 float ratio,
Chris@16 38 bool sharpen,
Chris@16 39 size_t maxProcessInputBlockSize);
Chris@14 40 virtual ~PhaseVocoderTimeStretcher();
Chris@0 41
Chris@12 42 /**
Chris@12 43 * Process a block. The input array contains the given number of
Chris@16 44 * samples (on each channel); the output must have space for
Chris@16 45 * lrintf(samples * m_ratio).
Chris@16 46 *
Chris@16 47 * This should work correctly for some ratios, e.g. small powers
Chris@16 48 * of two. For other ratios it may drop samples -- use putInput
Chris@16 49 * in a loop followed by getOutput (when getAvailableOutputSamples
Chris@16 50 * reports enough) instead.
Chris@16 51 *
Chris@16 52 * Do not mix process calls with putInput/getOutput calls.
Chris@12 53 */
Chris@16 54 void process(float **input, float **output, size_t samples);
Chris@16 55
Chris@16 56 /**
Chris@16 57 * Return the number of samples that would need to be added via
Chris@16 58 * putInput in order to provoke the time stretcher into doing some
Chris@16 59 * time stretching and making more output samples available.
Chris@16 60 */
Chris@16 61 size_t getRequiredInputSamples() const;
Chris@16 62
Chris@16 63 /**
Chris@16 64 * Put (and possibly process) a given number of input samples.
Chris@16 65 * Number must not exceed the maxProcessInputBlockSize passed to
Chris@16 66 * constructor.
Chris@16 67 */
Chris@16 68 void putInput(float **input, size_t samples);
Chris@16 69
Chris@16 70 size_t getAvailableOutputSamples() const;
Chris@16 71
Chris@16 72 void getOutput(float **output, size_t samples);
Chris@16 73
Chris@16 74 //!!! and reset?
Chris@0 75
Chris@0 76 /**
Chris@15 77 * Get the hop size for input.
Chris@0 78 */
Chris@0 79 size_t getInputIncrement() const { return m_n1; }
Chris@0 80
Chris@0 81 /**
Chris@15 82 * Get the hop size for output.
Chris@15 83 */
Chris@16 84 size_t getOutputIncrement() const { return m_n2; }
Chris@15 85
Chris@15 86 /**
Chris@15 87 * Get the window size for FFT processing.
Chris@0 88 */
Chris@0 89 size_t getWindowSize() const { return m_wlen; }
Chris@0 90
Chris@0 91 /**
Chris@15 92 * Get the window type.
Chris@0 93 */
Chris@0 94 WindowType getWindowType() const { return m_window->getType(); }
Chris@0 95
Chris@15 96 /**
Chris@16 97 * Get the stretch ratio.
Chris@15 98 */
Chris@16 99 float getRatio() const { return float(m_n2) / float(m_n1); }
Chris@16 100
Chris@16 101 /**
Chris@16 102 * Return whether this time stretcher will attempt to sharpen transients.
Chris@16 103 */
Chris@16 104 bool getSharpening() const { return m_sharpen; }
Chris@15 105
Chris@15 106 /**
Chris@15 107 * Get the latency added by the time stretcher, in sample frames.
Chris@15 108 */
Chris@0 109 size_t getProcessingLatency() const;
Chris@0 110
Chris@0 111 protected:
Chris@13 112 /**
Chris@13 113 * Process a single phase vocoder frame.
Chris@13 114 *
Chris@13 115 * Take m_wlen time-domain source samples from in, perform an FFT,
Chris@13 116 * phase shift, and IFFT, and add the results to out (presumably
Chris@13 117 * overlapping parts of existing data from prior frames).
Chris@13 118 *
Chris@13 119 * Also add to the modulation output the results of windowing a
Chris@13 120 * set of 1s with the resynthesis window -- this can then be used
Chris@13 121 * to ensure the output has the correct magnitude in cases where
Chris@13 122 * the window overlap varies or otherwise results in something
Chris@13 123 * other than a flat sum.
Chris@13 124 */
Chris@16 125 bool processBlock(size_t channel,
Chris@16 126 float *in, float *out,
Chris@16 127 float *modulation,
Chris@16 128 bool knownPercussive);
Chris@0 129
Chris@16 130 size_t m_channels;
Chris@12 131 float m_ratio;
Chris@16 132 bool m_sharpen;
Chris@0 133 size_t m_n1;
Chris@0 134 size_t m_n2;
Chris@0 135 size_t m_wlen;
Chris@0 136 Window<float> *m_window;
Chris@0 137
Chris@16 138 float **m_prevPhase;
Chris@16 139 float **m_prevAdjustedPhase;
Chris@16 140 float **m_prevMag;
Chris@16 141 int *m_prevPercussiveCount;
Chris@16 142
Chris@16 143 float *m_dbuf;
Chris@0 144 fftwf_complex *m_time;
Chris@0 145 fftwf_complex *m_freq;
Chris@0 146 fftwf_plan m_plan;
Chris@0 147 fftwf_plan m_iplan;
Chris@0 148
Chris@16 149 RingBuffer<float> **m_inbuf;
Chris@16 150 RingBuffer<float> **m_outbuf;
Chris@16 151 float **m_mashbuf;
Chris@13 152 float *m_modulationbuf;
Chris@0 153 };
Chris@0 154
Chris@0 155 #endif