annotate audioio/PhaseVocoderTimeStretcher.h @ 22:80126455d169

* add samplerate parameter to timestretcher (not properly used yet), and update credits
author Chris Cannam
date Fri, 15 Sep 2006 13:35:37 +0000
parents 7da85e0b85e9
children a2ad974b0c8c
rev   line source
Chris@0 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@0 2
Chris@0 3 /*
Chris@0 4 Sonic Visualiser
Chris@0 5 An audio file viewer and annotation editor.
Chris@0 6 Centre for Digital Music, Queen Mary, University of London.
Chris@0 7 This file copyright 2006 Chris Cannam.
Chris@0 8
Chris@0 9 This program is free software; you can redistribute it and/or
Chris@0 10 modify it under the terms of the GNU General Public License as
Chris@0 11 published by the Free Software Foundation; either version 2 of the
Chris@0 12 License, or (at your option) any later version. See the file
Chris@0 13 COPYING included with this distribution for more information.
Chris@0 14 */
Chris@0 15
Chris@14 16 #ifndef _PHASE_VOCODER_TIME_STRETCHER_H_
Chris@14 17 #define _PHASE_VOCODER_TIME_STRETCHER_H_
Chris@0 18
Chris@0 19 #include "base/Window.h"
Chris@0 20 #include "base/RingBuffer.h"
Chris@0 21
Chris@0 22 #include <fftw3.h>
Chris@0 23
Chris@0 24 /**
Chris@14 25 * A time stretcher that alters the performance speed of audio,
Chris@22 26 * preserving pitch.
Chris@0 27 *
Chris@22 28 * This is based on the straightforward phase vocoder with phase
Chris@22 29 * unwrapping (as in e.g. the DAFX book pp275-), with optional
Chris@22 30 * percussive transient detection to avoid smearing percussive notes
Chris@22 31 * and resynchronise phases, and adding a stream API for real-time
Chris@22 32 * use. Principles and methods from Chris Duxbury, AES 2002 and 2004
Chris@22 33 * thesis; Emmanuel Ravelli, DAFX 2005; Dan Barry, ISSC 2005 on
Chris@22 34 * percussion detection; code by Chris Cannam.
Chris@0 35 */
Chris@0 36
Chris@14 37 class PhaseVocoderTimeStretcher
Chris@0 38 {
Chris@0 39 public:
Chris@22 40 PhaseVocoderTimeStretcher(size_t sampleRate,
Chris@22 41 size_t channels,
Chris@16 42 float ratio,
Chris@16 43 bool sharpen,
Chris@16 44 size_t maxProcessInputBlockSize);
Chris@14 45 virtual ~PhaseVocoderTimeStretcher();
Chris@0 46
Chris@12 47 /**
Chris@12 48 * Process a block. The input array contains the given number of
Chris@16 49 * samples (on each channel); the output must have space for
Chris@16 50 * lrintf(samples * m_ratio).
Chris@16 51 *
Chris@16 52 * This should work correctly for some ratios, e.g. small powers
Chris@16 53 * of two. For other ratios it may drop samples -- use putInput
Chris@16 54 * in a loop followed by getOutput (when getAvailableOutputSamples
Chris@16 55 * reports enough) instead.
Chris@16 56 *
Chris@16 57 * Do not mix process calls with putInput/getOutput calls.
Chris@12 58 */
Chris@16 59 void process(float **input, float **output, size_t samples);
Chris@16 60
Chris@16 61 /**
Chris@16 62 * Return the number of samples that would need to be added via
Chris@16 63 * putInput in order to provoke the time stretcher into doing some
Chris@16 64 * time stretching and making more output samples available.
Chris@16 65 */
Chris@16 66 size_t getRequiredInputSamples() const;
Chris@16 67
Chris@16 68 /**
Chris@16 69 * Put (and possibly process) a given number of input samples.
Chris@16 70 * Number must not exceed the maxProcessInputBlockSize passed to
Chris@16 71 * constructor.
Chris@16 72 */
Chris@16 73 void putInput(float **input, size_t samples);
Chris@16 74
Chris@16 75 size_t getAvailableOutputSamples() const;
Chris@16 76
Chris@16 77 void getOutput(float **output, size_t samples);
Chris@16 78
Chris@16 79 //!!! and reset?
Chris@0 80
Chris@0 81 /**
Chris@15 82 * Get the hop size for input.
Chris@0 83 */
Chris@0 84 size_t getInputIncrement() const { return m_n1; }
Chris@0 85
Chris@0 86 /**
Chris@15 87 * Get the hop size for output.
Chris@15 88 */
Chris@16 89 size_t getOutputIncrement() const { return m_n2; }
Chris@15 90
Chris@15 91 /**
Chris@15 92 * Get the window size for FFT processing.
Chris@0 93 */
Chris@0 94 size_t getWindowSize() const { return m_wlen; }
Chris@0 95
Chris@0 96 /**
Chris@15 97 * Get the window type.
Chris@0 98 */
Chris@20 99 // WindowType getWindowType() const { return m_window->getType(); }
Chris@0 100
Chris@15 101 /**
Chris@16 102 * Get the stretch ratio.
Chris@15 103 */
Chris@16 104 float getRatio() const { return float(m_n2) / float(m_n1); }
Chris@16 105
Chris@16 106 /**
Chris@16 107 * Return whether this time stretcher will attempt to sharpen transients.
Chris@16 108 */
Chris@16 109 bool getSharpening() const { return m_sharpen; }
Chris@15 110
Chris@15 111 /**
Chris@15 112 * Get the latency added by the time stretcher, in sample frames.
Chris@15 113 */
Chris@0 114 size_t getProcessingLatency() const;
Chris@0 115
Chris@0 116 protected:
Chris@13 117 /**
Chris@13 118 * Process a single phase vocoder frame.
Chris@13 119 *
Chris@13 120 * Take m_wlen time-domain source samples from in, perform an FFT,
Chris@13 121 * phase shift, and IFFT, and add the results to out (presumably
Chris@13 122 * overlapping parts of existing data from prior frames).
Chris@13 123 *
Chris@13 124 * Also add to the modulation output the results of windowing a
Chris@13 125 * set of 1s with the resynthesis window -- this can then be used
Chris@13 126 * to ensure the output has the correct magnitude in cases where
Chris@13 127 * the window overlap varies or otherwise results in something
Chris@13 128 * other than a flat sum.
Chris@13 129 */
Chris@20 130
Chris@20 131
Chris@20 132 void analyseBlock(size_t channel, float *in); // into m_freq[channel]
Chris@20 133
Chris@20 134 bool isTransient(); // operates on m_freq[0..m_channels-1]
Chris@20 135
Chris@20 136 void synthesiseBlock(size_t channel, float *out, float *modulation,
Chris@20 137 size_t lastStep);
Chris@0 138
Chris@22 139 size_t m_sampleRate;
Chris@16 140 size_t m_channels;
Chris@12 141 float m_ratio;
Chris@16 142 bool m_sharpen;
Chris@0 143 size_t m_n1;
Chris@0 144 size_t m_n2;
Chris@0 145 size_t m_wlen;
Chris@20 146 Window<float> *m_analysisWindow;
Chris@20 147 Window<float> *m_synthesisWindow;
Chris@0 148
Chris@21 149 int m_totalCount;
Chris@21 150 int m_transientCount;
Chris@21 151 int m_n2sum;
Chris@21 152
Chris@16 153 float **m_prevPhase;
Chris@16 154 float **m_prevAdjustedPhase;
Chris@16 155
Chris@20 156 float *m_prevTransientMag;
Chris@21 157 int m_prevTransientScore;
Chris@22 158 int m_transientThreshold;
Chris@20 159 bool m_prevTransient;
Chris@20 160
Chris@20 161 float *m_tempbuf;
Chris@20 162 float **m_time;
Chris@20 163 fftwf_complex **m_freq;
Chris@20 164 fftwf_plan *m_plan;
Chris@20 165 fftwf_plan *m_iplan;
Chris@0 166
Chris@16 167 RingBuffer<float> **m_inbuf;
Chris@16 168 RingBuffer<float> **m_outbuf;
Chris@16 169 float **m_mashbuf;
Chris@13 170 float *m_modulationbuf;
Chris@0 171 };
Chris@0 172
Chris@0 173 #endif