annotate audioio/PhaseVocoderTimeStretcher.h @ 43:3c5756fb6a68

* Move some things around to facilitate plundering libraries for other applications without needing to duplicate so much code. sv/osc -> data/osc sv/audioio -> audioio sv/transform -> plugin/transform sv/document -> document (will rename to framework in next commit)
author Chris Cannam
date Wed, 24 Oct 2007 16:34:31 +0000
parents
children 0ffab5d7e3e1
rev   line source
Chris@43 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@43 2
Chris@43 3 /*
Chris@43 4 Sonic Visualiser
Chris@43 5 An audio file viewer and annotation editor.
Chris@43 6 Centre for Digital Music, Queen Mary, University of London.
Chris@43 7 This file copyright 2006 Chris Cannam and QMUL.
Chris@43 8
Chris@43 9 This program is free software; you can redistribute it and/or
Chris@43 10 modify it under the terms of the GNU General Public License as
Chris@43 11 published by the Free Software Foundation; either version 2 of the
Chris@43 12 License, or (at your option) any later version. See the file
Chris@43 13 COPYING included with this distribution for more information.
Chris@43 14 */
Chris@43 15
Chris@43 16 #ifndef _PHASE_VOCODER_TIME_STRETCHER_H_
Chris@43 17 #define _PHASE_VOCODER_TIME_STRETCHER_H_
Chris@43 18
Chris@43 19 #include "base/Window.h"
Chris@43 20 #include "base/RingBuffer.h"
Chris@43 21
Chris@43 22 #include "data/fft/FFTapi.h"
Chris@43 23
Chris@43 24 #include <QMutex>
Chris@43 25
Chris@43 26 /**
Chris@43 27 * A time stretcher that alters the performance speed of audio,
Chris@43 28 * preserving pitch.
Chris@43 29 *
Chris@43 30 * This is based on the straightforward phase vocoder with phase
Chris@43 31 * unwrapping (as in e.g. the DAFX book pp275-), with optional
Chris@43 32 * percussive transient detection to avoid smearing percussive notes
Chris@43 33 * and resynchronise phases, and adding a stream API for real-time
Chris@43 34 * use. Principles and methods from Chris Duxbury, AES 2002 and 2004
Chris@43 35 * thesis; Emmanuel Ravelli, DAFX 2005; Dan Barry, ISSC 2005 on
Chris@43 36 * percussion detection; code by Chris Cannam.
Chris@43 37 */
Chris@43 38
Chris@43 39 class PhaseVocoderTimeStretcher
Chris@43 40 {
Chris@43 41 public:
Chris@43 42 PhaseVocoderTimeStretcher(size_t sampleRate,
Chris@43 43 size_t channels,
Chris@43 44 float ratio,
Chris@43 45 bool sharpen,
Chris@43 46 size_t maxOutputBlockSize);
Chris@43 47 virtual ~PhaseVocoderTimeStretcher();
Chris@43 48
Chris@43 49 /**
Chris@43 50 * Return the number of samples that would need to be added via
Chris@43 51 * putInput in order to provoke the time stretcher into doing some
Chris@43 52 * time stretching and making more output samples available.
Chris@43 53 * This will be an estimate, if transient sharpening is on; the
Chris@43 54 * caller may need to do the put/get/test cycle more than once.
Chris@43 55 */
Chris@43 56 size_t getRequiredInputSamples() const;
Chris@43 57
Chris@43 58 /**
Chris@43 59 * Put (and possibly process) a given number of input samples.
Chris@43 60 * Number should usually equal the value returned from
Chris@43 61 * getRequiredInputSamples().
Chris@43 62 */
Chris@43 63 void putInput(float **input, size_t samples);
Chris@43 64
Chris@43 65 /**
Chris@43 66 * Get the number of processed samples ready for reading.
Chris@43 67 */
Chris@43 68 size_t getAvailableOutputSamples() const;
Chris@43 69
Chris@43 70 /**
Chris@43 71 * Get some processed samples.
Chris@43 72 */
Chris@43 73 void getOutput(float **output, size_t samples);
Chris@43 74
Chris@43 75 //!!! and reset?
Chris@43 76
Chris@43 77 /**
Chris@43 78 * Change the time stretch ratio.
Chris@43 79 */
Chris@43 80 void setRatio(float ratio);
Chris@43 81
Chris@43 82 /**
Chris@43 83 * Get the hop size for input.
Chris@43 84 */
Chris@43 85 size_t getInputIncrement() const { return m_n1; }
Chris@43 86
Chris@43 87 /**
Chris@43 88 * Get the hop size for output.
Chris@43 89 */
Chris@43 90 size_t getOutputIncrement() const { return m_n2; }
Chris@43 91
Chris@43 92 /**
Chris@43 93 * Get the window size for FFT processing.
Chris@43 94 */
Chris@43 95 size_t getWindowSize() const { return m_wlen; }
Chris@43 96
Chris@43 97 /**
Chris@43 98 * Get the stretch ratio.
Chris@43 99 */
Chris@43 100 float getRatio() const { return float(m_n2) / float(m_n1); }
Chris@43 101
Chris@43 102 /**
Chris@43 103 * Return whether this time stretcher will attempt to sharpen transients.
Chris@43 104 */
Chris@43 105 bool getSharpening() const { return m_sharpen; }
Chris@43 106
Chris@43 107 /**
Chris@43 108 * Return the number of channels for this time stretcher.
Chris@43 109 */
Chris@43 110 size_t getChannelCount() const { return m_channels; }
Chris@43 111
Chris@43 112 /**
Chris@43 113 * Get the latency added by the time stretcher, in sample frames.
Chris@43 114 * This will be exact if transient sharpening is off, or approximate
Chris@43 115 * if it is on.
Chris@43 116 */
Chris@43 117 size_t getProcessingLatency() const;
Chris@43 118
Chris@43 119 protected:
Chris@43 120 /**
Chris@43 121 * Process a single phase vocoder frame from "in" into
Chris@43 122 * m_freq[channel].
Chris@43 123 */
Chris@43 124 void analyseBlock(size_t channel, float *in); // into m_freq[channel]
Chris@43 125
Chris@43 126 /**
Chris@43 127 * Examine m_freq[0..m_channels-1] and return whether a percussive
Chris@43 128 * transient is found.
Chris@43 129 */
Chris@43 130 bool isTransient();
Chris@43 131
Chris@43 132 /**
Chris@43 133 * Resynthesise from m_freq[channel] adding in to "out",
Chris@43 134 * adjusting phases on the basis of a prior step size of lastStep.
Chris@43 135 * Also add the window shape in to the modulation array (if
Chris@43 136 * present) -- for use in ensuring the output has the correct
Chris@43 137 * magnitude afterwards.
Chris@43 138 */
Chris@43 139 void synthesiseBlock(size_t channel, float *out, float *modulation,
Chris@43 140 size_t lastStep);
Chris@43 141
Chris@43 142 void initialise();
Chris@43 143 void calculateParameters();
Chris@43 144 void cleanup();
Chris@43 145
Chris@43 146 bool shouldSharpen() {
Chris@43 147 return m_sharpen && (m_ratio > 0.25);
Chris@43 148 }
Chris@43 149
Chris@43 150 size_t m_sampleRate;
Chris@43 151 size_t m_channels;
Chris@43 152 size_t m_maxOutputBlockSize;
Chris@43 153 float m_ratio;
Chris@43 154 bool m_sharpen;
Chris@43 155 size_t m_n1;
Chris@43 156 size_t m_n2;
Chris@43 157 size_t m_wlen;
Chris@43 158 Window<float> *m_analysisWindow;
Chris@43 159 Window<float> *m_synthesisWindow;
Chris@43 160
Chris@43 161 int m_totalCount;
Chris@43 162 int m_transientCount;
Chris@43 163 int m_n2sum;
Chris@43 164
Chris@43 165 float **m_prevPhase;
Chris@43 166 float **m_prevAdjustedPhase;
Chris@43 167
Chris@43 168 float *m_prevTransientMag;
Chris@43 169 int m_prevTransientScore;
Chris@43 170 int m_transientThreshold;
Chris@43 171 bool m_prevTransient;
Chris@43 172
Chris@43 173 float *m_tempbuf;
Chris@43 174 float **m_time;
Chris@43 175 fftf_complex **m_freq;
Chris@43 176 fftf_plan *m_plan;
Chris@43 177 fftf_plan *m_iplan;
Chris@43 178
Chris@43 179 RingBuffer<float> **m_inbuf;
Chris@43 180 RingBuffer<float> **m_outbuf;
Chris@43 181 float **m_mashbuf;
Chris@43 182 float *m_modulationbuf;
Chris@43 183
Chris@43 184 QMutex *m_mutex;
Chris@43 185 };
Chris@43 186
Chris@43 187 #endif