comparison audioio/PhaseVocoderTimeStretcher.cpp @ 25:e74f508db18c

* Add setRatio method to the time stretcher, and make it possible to change the ratio without having to construct and replace the time stretcher. This means we can do it seamlessly. Add a lot more ratios to the time stretch control in the main window
author Chris Cannam
date Fri, 15 Sep 2006 15:35:06 +0000
parents 80126455d169
children d88d117e0c34
comparison
equal deleted inserted replaced
24:ae0731ba8e67 25:e74f508db18c
16 #include "PhaseVocoderTimeStretcher.h" 16 #include "PhaseVocoderTimeStretcher.h"
17 17
18 #include <iostream> 18 #include <iostream>
19 #include <cassert> 19 #include <cassert>
20 20
21 #include <QMutexLocker>
22
21 //#define DEBUG_PHASE_VOCODER_TIME_STRETCHER 1 23 //#define DEBUG_PHASE_VOCODER_TIME_STRETCHER 1
22 24
23 PhaseVocoderTimeStretcher::PhaseVocoderTimeStretcher(size_t sampleRate, 25 PhaseVocoderTimeStretcher::PhaseVocoderTimeStretcher(size_t sampleRate,
24 size_t channels, 26 size_t channels,
25 float ratio, 27 float ratio,
26 bool sharpen, 28 bool sharpen,
27 size_t maxProcessInputBlockSize) : 29 size_t maxProcessInputBlockSize) :
28 m_sampleRate(sampleRate), 30 m_sampleRate(sampleRate),
29 m_channels(channels), 31 m_channels(channels),
32 m_maxProcessInputBlockSize(maxProcessInputBlockSize),
30 m_ratio(ratio), 33 m_ratio(ratio),
31 m_sharpen(sharpen), 34 m_sharpen(sharpen),
32 m_totalCount(0), 35 m_totalCount(0),
33 m_transientCount(0), 36 m_transientCount(0),
34 m_n2sum(0) 37 m_n2sum(0),
35 { 38 m_mutex(new QMutex())
36 m_wlen = 1024; 39 {
37 40 initialise();
38 //!!! In transient sharpening mode, we need to pick the window 41
39 //length so as to be more or less fixed in audio duration (i.e. we 42 std::cerr << "PhaseVocoderTimeStretcher: channels = " << m_channels
40 //need to exploit the sample rate) 43 << ", ratio = " << m_ratio
41 44 << ", n1 = " << m_n1 << ", n2 = " << m_n2 << ", wlen = "
42 //!!! have to work out the relationship between wlen and transient 45 << m_wlen << ", max = " << maxProcessInputBlockSize
43 //threshold 46 << ", outbuflen = " << m_outbuf[0]->getSize() << std::endl;
44 47 }
45 if (ratio < 1) { 48
46 if (ratio < 0.4) { 49 PhaseVocoderTimeStretcher::~PhaseVocoderTimeStretcher()
47 m_n1 = 1024; 50 {
48 m_wlen = 2048; 51 std::cerr << "PhaseVocoderTimeStretcher::~PhaseVocoderTimeStretcher" << std::endl;
49 } else if (ratio < 0.8) { 52
50 m_n1 = 512; 53 cleanup();
51 } else { 54
52 m_n1 = 256; 55 delete m_mutex;
53 } 56 }
54 if (m_sharpen) { 57
55 m_wlen = 2048; 58 void
56 } 59 PhaseVocoderTimeStretcher::initialise()
57 m_n2 = m_n1 * ratio; 60 {
58 } else { 61 std::cerr << "PhaseVocoderTimeStretcher::initialise" << std::endl;
59 if (ratio > 2) { 62
60 m_n2 = 512; 63 calculateParameters();
61 m_wlen = 4096;
62 } else if (ratio > 1.6) {
63 m_n2 = 384;
64 m_wlen = 2048;
65 } else {
66 m_n2 = 256;
67 }
68 if (m_sharpen) {
69 if (m_wlen < 2048) m_wlen = 2048;
70 }
71 m_n1 = m_n2 / ratio;
72 }
73
74 m_transientThreshold = m_wlen / 4.5;
75 64
76 m_analysisWindow = new Window<float>(HanningWindow, m_wlen); 65 m_analysisWindow = new Window<float>(HanningWindow, m_wlen);
77 m_synthesisWindow = new Window<float>(HanningWindow, m_wlen); 66 m_synthesisWindow = new Window<float>(HanningWindow, m_wlen);
78 67
79 m_prevPhase = new float *[m_channels]; 68 m_prevPhase = new float *[m_channels];
108 m_plan[c] = fftwf_plan_dft_r2c_1d(m_wlen, m_time[c], m_freq[c], FFTW_ESTIMATE); 97 m_plan[c] = fftwf_plan_dft_r2c_1d(m_wlen, m_time[c], m_freq[c], FFTW_ESTIMATE);
109 m_iplan[c] = fftwf_plan_dft_c2r_1d(m_wlen, m_freq[c], m_time[c], FFTW_ESTIMATE); 98 m_iplan[c] = fftwf_plan_dft_c2r_1d(m_wlen, m_freq[c], m_time[c], FFTW_ESTIMATE);
110 99
111 m_inbuf[c] = new RingBuffer<float>(m_wlen); 100 m_inbuf[c] = new RingBuffer<float>(m_wlen);
112 m_outbuf[c] = new RingBuffer<float> 101 m_outbuf[c] = new RingBuffer<float>
113 (lrintf((maxProcessInputBlockSize + m_wlen) * ratio)); 102 (lrintf((m_maxProcessInputBlockSize + m_wlen) * m_ratio));
114 103
115 m_mashbuf[c] = (float *)fftwf_malloc(sizeof(float) * m_wlen); 104 m_mashbuf[c] = (float *)fftwf_malloc(sizeof(float) * m_wlen);
116 105
117 for (int i = 0; i < m_wlen; ++i) { 106 for (int i = 0; i < m_wlen; ++i) {
118 m_mashbuf[c][i] = 0.0; 107 m_mashbuf[c][i] = 0.0;
129 } 118 }
130 119
131 for (int i = 0; i <= m_wlen/2; ++i) { 120 for (int i = 0; i <= m_wlen/2; ++i) {
132 m_prevTransientMag[i] = 0.0; 121 m_prevTransientMag[i] = 0.0;
133 } 122 }
134 123 }
135 std::cerr << "PhaseVocoderTimeStretcher: channels = " << channels 124
136 << ", ratio = " << ratio 125 void
137 << ", n1 = " << m_n1 << ", n2 = " << m_n2 << ", wlen = " 126 PhaseVocoderTimeStretcher::calculateParameters()
138 << m_wlen << ", max = " << maxProcessInputBlockSize 127 {
139 << ", outbuflen = " << m_outbuf[0]->getSize() << std::endl; 128 std::cerr << "PhaseVocoderTimeStretcher::calculateParameters" << std::endl;
140 } 129
141 130 m_wlen = 1024;
142 PhaseVocoderTimeStretcher::~PhaseVocoderTimeStretcher() 131
143 { 132 //!!! In transient sharpening mode, we need to pick the window
144 std::cerr << "PhaseVocoderTimeStretcher::~PhaseVocoderTimeStretcher" << std::endl; 133 //length so as to be more or less fixed in audio duration (i.e. we
134 //need to exploit the sample rate)
135
136 //!!! have to work out the relationship between wlen and transient
137 //threshold
138
139 if (m_ratio < 1) {
140 if (m_ratio < 0.4) {
141 m_n1 = 1024;
142 m_wlen = 2048;
143 } else if (m_ratio < 0.8) {
144 m_n1 = 512;
145 } else {
146 m_n1 = 256;
147 }
148 if (m_sharpen) {
149 m_wlen = 2048;
150 }
151 m_n2 = m_n1 * m_ratio;
152 } else {
153 if (m_ratio > 2) {
154 m_n2 = 512;
155 m_wlen = 4096;
156 } else if (m_ratio > 1.6) {
157 m_n2 = 384;
158 m_wlen = 2048;
159 } else {
160 m_n2 = 256;
161 }
162 if (m_sharpen) {
163 if (m_wlen < 2048) m_wlen = 2048;
164 }
165 m_n1 = m_n2 / m_ratio;
166 }
167
168 m_transientThreshold = m_wlen / 4.5;
169 }
170
171 void
172 PhaseVocoderTimeStretcher::cleanup()
173 {
174 std::cerr << "PhaseVocoderTimeStretcher::cleanup" << std::endl;
145 175
146 for (size_t c = 0; c < m_channels; ++c) { 176 for (size_t c = 0; c < m_channels; ++c) {
147 177
148 fftwf_destroy_plan(m_plan[c]); 178 fftwf_destroy_plan(m_plan[c]);
149 fftwf_destroy_plan(m_iplan[c]); 179 fftwf_destroy_plan(m_iplan[c]);
175 205
176 delete m_analysisWindow; 206 delete m_analysisWindow;
177 delete m_synthesisWindow; 207 delete m_synthesisWindow;
178 } 208 }
179 209
210 void
211 PhaseVocoderTimeStretcher::setRatio(float ratio)
212 {
213 QMutexLocker locker(m_mutex);
214
215 float formerRatio = m_ratio;
216 size_t formerWlen = m_wlen;
217
218 m_ratio = ratio;
219
220 calculateParameters();
221
222 if (m_wlen == formerWlen) {
223
224 // This is the only container whose size depends on m_ratio
225
226 RingBuffer<float> **newout = new RingBuffer<float> *[m_channels];
227
228 size_t formerSize = m_outbuf[0]->getSize();
229 size_t newSize = lrintf((m_maxProcessInputBlockSize + m_wlen) * m_ratio);
230 size_t ready = m_outbuf[0]->getReadSpace();
231
232 for (size_t c = 0; c < m_channels; ++c) {
233 newout[c] = new RingBuffer<float>(newSize);
234 }
235
236 if (ready > 0) {
237
238 size_t copy = std::min(ready, newSize);
239 float *tmp = new float[ready];
240
241 for (size_t c = 0; c < m_channels; ++c) {
242 m_outbuf[c]->read(tmp, ready);
243 newout[c]->write(tmp + ready - copy, copy);
244 }
245
246 delete[] tmp;
247 }
248
249 for (size_t c = 0; c < m_channels; ++c) {
250 delete m_outbuf[c];
251 }
252
253 delete[] m_outbuf;
254 m_outbuf = newout;
255
256 } else {
257
258 std::cerr << "wlen changed" << std::endl;
259 cleanup();
260 initialise();
261 }
262 }
263
180 size_t 264 size_t
181 PhaseVocoderTimeStretcher::getProcessingLatency() const 265 PhaseVocoderTimeStretcher::getProcessingLatency() const
182 { 266 {
183 return getWindowSize() - getInputIncrement(); 267 return getWindowSize() - getInputIncrement();
184 } 268 }
191 } 275 }
192 276
193 size_t 277 size_t
194 PhaseVocoderTimeStretcher::getRequiredInputSamples() const 278 PhaseVocoderTimeStretcher::getRequiredInputSamples() const
195 { 279 {
280 QMutexLocker locker(m_mutex);
281
196 if (m_inbuf[0]->getReadSpace() >= m_wlen) return 0; 282 if (m_inbuf[0]->getReadSpace() >= m_wlen) return 0;
197 return m_wlen - m_inbuf[0]->getReadSpace(); 283 return m_wlen - m_inbuf[0]->getReadSpace();
198 } 284 }
199 285
200 void 286 void
201 PhaseVocoderTimeStretcher::putInput(float **input, size_t samples) 287 PhaseVocoderTimeStretcher::putInput(float **input, size_t samples)
202 { 288 {
289 QMutexLocker locker(m_mutex);
290
203 // We need to add samples from input to our internal buffer. When 291 // We need to add samples from input to our internal buffer. When
204 // we have m_windowSize samples in the buffer, we can process it, 292 // we have m_windowSize samples in the buffer, we can process it,
205 // move the samples back by m_n1 and write the output onto our 293 // move the samples back by m_n1 and write the output onto our
206 // internal output buffer. If we have (samples * ratio) samples 294 // internal output buffer. If we have (samples * ratio) samples
207 // in that, we can write m_n2 of them back to output and return 295 // in that, we can write m_n2 of them back to output and return
341 } 429 }
342 430
343 size_t 431 size_t
344 PhaseVocoderTimeStretcher::getAvailableOutputSamples() const 432 PhaseVocoderTimeStretcher::getAvailableOutputSamples() const
345 { 433 {
434 QMutexLocker locker(m_mutex);
435
346 return m_outbuf[0]->getReadSpace(); 436 return m_outbuf[0]->getReadSpace();
347 } 437 }
348 438
349 void 439 void
350 PhaseVocoderTimeStretcher::getOutput(float **output, size_t samples) 440 PhaseVocoderTimeStretcher::getOutput(float **output, size_t samples)
351 { 441 {
442 QMutexLocker locker(m_mutex);
443
352 if (m_outbuf[0]->getReadSpace() < samples) { 444 if (m_outbuf[0]->getReadSpace() < samples) {
353 std::cerr << "WARNING: PhaseVocoderTimeStretcher::getOutput: not enough data (yet?) (" << m_outbuf[0]->getReadSpace() << " < " << samples << ")" << std::endl; 445 std::cerr << "WARNING: PhaseVocoderTimeStretcher::getOutput: not enough data (yet?) (" << m_outbuf[0]->getReadSpace() << " < " << samples << ")" << std::endl;
354 size_t fill = samples - m_outbuf[0]->getReadSpace(); 446 size_t fill = samples - m_outbuf[0]->getReadSpace();
355 for (size_t c = 0; c < m_channels; ++c) { 447 for (size_t c = 0; c < m_channels; ++c) {
356 for (size_t i = 0; i < fill; ++i) { 448 for (size_t i = 0; i < fill; ++i) {