annotate audioio/PhaseVocoderTimeStretcher.h @ 20:e125f0dde7a3

* restructure time stretcher somewhat so as to do transient detection on mixed stereo signal instead of just one channel
author Chris Cannam
date Thu, 14 Sep 2006 13:41:56 +0000
parents f17798a555df
children 7da85e0b85e9
rev   line source
Chris@0 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@0 2
Chris@0 3 /*
Chris@0 4 Sonic Visualiser
Chris@0 5 An audio file viewer and annotation editor.
Chris@0 6 Centre for Digital Music, Queen Mary, University of London.
Chris@0 7 This file copyright 2006 Chris Cannam.
Chris@0 8
Chris@0 9 This program is free software; you can redistribute it and/or
Chris@0 10 modify it under the terms of the GNU General Public License as
Chris@0 11 published by the Free Software Foundation; either version 2 of the
Chris@0 12 License, or (at your option) any later version. See the file
Chris@0 13 COPYING included with this distribution for more information.
Chris@0 14 */
Chris@0 15
Chris@14 16 #ifndef _PHASE_VOCODER_TIME_STRETCHER_H_
Chris@14 17 #define _PHASE_VOCODER_TIME_STRETCHER_H_
Chris@0 18
Chris@0 19 #include "base/Window.h"
Chris@0 20 #include "base/RingBuffer.h"
Chris@0 21
Chris@0 22 #include <fftw3.h>
Chris@0 23
Chris@0 24 /**
Chris@14 25 * A time stretcher that alters the performance speed of audio,
Chris@14 26 * preserving pitch. This uses the simple phase vocoder technique
Chris@14 27 * from DAFX pp275-276, adding a block-based stream oriented API.
Chris@0 28 *
Chris@0 29 * Causes significant transient smearing, but sounds good for steady
Chris@0 30 * notes and is generally predictable.
Chris@0 31 */
Chris@0 32
Chris@14 33 class PhaseVocoderTimeStretcher
Chris@0 34 {
Chris@0 35 public:
Chris@16 36 PhaseVocoderTimeStretcher(size_t channels,
Chris@16 37 float ratio,
Chris@16 38 bool sharpen,
Chris@16 39 size_t maxProcessInputBlockSize);
Chris@14 40 virtual ~PhaseVocoderTimeStretcher();
Chris@0 41
Chris@12 42 /**
Chris@12 43 * Process a block. The input array contains the given number of
Chris@16 44 * samples (on each channel); the output must have space for
Chris@16 45 * lrintf(samples * m_ratio).
Chris@16 46 *
Chris@16 47 * This should work correctly for some ratios, e.g. small powers
Chris@16 48 * of two. For other ratios it may drop samples -- use putInput
Chris@16 49 * in a loop followed by getOutput (when getAvailableOutputSamples
Chris@16 50 * reports enough) instead.
Chris@16 51 *
Chris@16 52 * Do not mix process calls with putInput/getOutput calls.
Chris@12 53 */
Chris@16 54 void process(float **input, float **output, size_t samples);
Chris@16 55
Chris@16 56 /**
Chris@16 57 * Return the number of samples that would need to be added via
Chris@16 58 * putInput in order to provoke the time stretcher into doing some
Chris@16 59 * time stretching and making more output samples available.
Chris@16 60 */
Chris@16 61 size_t getRequiredInputSamples() const;
Chris@16 62
Chris@16 63 /**
Chris@16 64 * Put (and possibly process) a given number of input samples.
Chris@16 65 * Number must not exceed the maxProcessInputBlockSize passed to
Chris@16 66 * constructor.
Chris@16 67 */
Chris@16 68 void putInput(float **input, size_t samples);
Chris@16 69
Chris@16 70 size_t getAvailableOutputSamples() const;
Chris@16 71
Chris@16 72 void getOutput(float **output, size_t samples);
Chris@16 73
Chris@16 74 //!!! and reset?
Chris@0 75
Chris@0 76 /**
Chris@15 77 * Get the hop size for input.
Chris@0 78 */
Chris@0 79 size_t getInputIncrement() const { return m_n1; }
Chris@0 80
Chris@0 81 /**
Chris@15 82 * Get the hop size for output.
Chris@15 83 */
Chris@16 84 size_t getOutputIncrement() const { return m_n2; }
Chris@15 85
Chris@15 86 /**
Chris@15 87 * Get the window size for FFT processing.
Chris@0 88 */
Chris@0 89 size_t getWindowSize() const { return m_wlen; }
Chris@0 90
Chris@0 91 /**
Chris@15 92 * Get the window type.
Chris@0 93 */
Chris@20 94 // WindowType getWindowType() const { return m_window->getType(); }
Chris@0 95
Chris@15 96 /**
Chris@16 97 * Get the stretch ratio.
Chris@15 98 */
Chris@16 99 float getRatio() const { return float(m_n2) / float(m_n1); }
Chris@16 100
Chris@16 101 /**
Chris@16 102 * Return whether this time stretcher will attempt to sharpen transients.
Chris@16 103 */
Chris@16 104 bool getSharpening() const { return m_sharpen; }
Chris@15 105
Chris@15 106 /**
Chris@15 107 * Get the latency added by the time stretcher, in sample frames.
Chris@15 108 */
Chris@0 109 size_t getProcessingLatency() const;
Chris@0 110
Chris@0 111 protected:
Chris@13 112 /**
Chris@13 113 * Process a single phase vocoder frame.
Chris@13 114 *
Chris@13 115 * Take m_wlen time-domain source samples from in, perform an FFT,
Chris@13 116 * phase shift, and IFFT, and add the results to out (presumably
Chris@13 117 * overlapping parts of existing data from prior frames).
Chris@13 118 *
Chris@13 119 * Also add to the modulation output the results of windowing a
Chris@13 120 * set of 1s with the resynthesis window -- this can then be used
Chris@13 121 * to ensure the output has the correct magnitude in cases where
Chris@13 122 * the window overlap varies or otherwise results in something
Chris@13 123 * other than a flat sum.
Chris@13 124 */
Chris@20 125
Chris@20 126
Chris@20 127 void analyseBlock(size_t channel, float *in); // into m_freq[channel]
Chris@20 128
Chris@20 129 bool isTransient(); // operates on m_freq[0..m_channels-1]
Chris@20 130
Chris@20 131 void synthesiseBlock(size_t channel, float *out, float *modulation,
Chris@20 132 size_t lastStep);
Chris@0 133
Chris@16 134 size_t m_channels;
Chris@12 135 float m_ratio;
Chris@16 136 bool m_sharpen;
Chris@0 137 size_t m_n1;
Chris@0 138 size_t m_n2;
Chris@0 139 size_t m_wlen;
Chris@20 140 Window<float> *m_analysisWindow;
Chris@20 141 Window<float> *m_synthesisWindow;
Chris@0 142
Chris@16 143 float **m_prevPhase;
Chris@16 144 float **m_prevAdjustedPhase;
Chris@16 145
Chris@20 146 float *m_prevTransientMag;
Chris@20 147 int m_prevTransientCount;
Chris@20 148 bool m_prevTransient;
Chris@20 149
Chris@20 150 float *m_tempbuf;
Chris@20 151 float **m_time;
Chris@20 152 fftwf_complex **m_freq;
Chris@20 153 fftwf_plan *m_plan;
Chris@20 154 fftwf_plan *m_iplan;
Chris@0 155
Chris@16 156 RingBuffer<float> **m_inbuf;
Chris@16 157 RingBuffer<float> **m_outbuf;
Chris@16 158 float **m_mashbuf;
Chris@13 159 float *m_modulationbuf;
Chris@0 160 };
Chris@0 161
Chris@0 162 #endif