comparison audioio/PhaseVocoderTimeStretcher.cpp @ 31:37af203dbd15

* Buffer size fixes in the time stretcher, to avoid running out of input data for large or small ratios
author Chris Cannam
date Thu, 21 Sep 2006 09:43:41 +0000
parents d88d117e0c34
children e3b32dc5180b
comparison
equal deleted inserted replaced
30:56e1d4242bb4 31:37af203dbd15
24 24
25 PhaseVocoderTimeStretcher::PhaseVocoderTimeStretcher(size_t sampleRate, 25 PhaseVocoderTimeStretcher::PhaseVocoderTimeStretcher(size_t sampleRate,
26 size_t channels, 26 size_t channels,
27 float ratio, 27 float ratio,
28 bool sharpen, 28 bool sharpen,
29 size_t maxProcessInputBlockSize) : 29 size_t maxOutputBlockSize) :
30 m_sampleRate(sampleRate), 30 m_sampleRate(sampleRate),
31 m_channels(channels), 31 m_channels(channels),
32 m_maxProcessInputBlockSize(maxProcessInputBlockSize), 32 m_maxOutputBlockSize(maxOutputBlockSize),
33 m_ratio(ratio), 33 m_ratio(ratio),
34 m_sharpen(sharpen), 34 m_sharpen(sharpen),
35 m_totalCount(0), 35 m_totalCount(0),
36 m_transientCount(0), 36 m_transientCount(0),
37 m_n2sum(0), 37 m_n2sum(0),
90 (m_wlen / 2 + 1)); 90 (m_wlen / 2 + 1));
91 91
92 m_plan[c] = fftwf_plan_dft_r2c_1d(m_wlen, m_time[c], m_freq[c], FFTW_ESTIMATE); 92 m_plan[c] = fftwf_plan_dft_r2c_1d(m_wlen, m_time[c], m_freq[c], FFTW_ESTIMATE);
93 m_iplan[c] = fftwf_plan_dft_c2r_1d(m_wlen, m_freq[c], m_time[c], FFTW_ESTIMATE); 93 m_iplan[c] = fftwf_plan_dft_c2r_1d(m_wlen, m_freq[c], m_time[c], FFTW_ESTIMATE);
94 94
95 m_inbuf[c] = new RingBuffer<float>(m_wlen);
96 m_outbuf[c] = new RingBuffer<float> 95 m_outbuf[c] = new RingBuffer<float>
97 (lrintf((m_maxProcessInputBlockSize + m_wlen) * m_ratio)); 96 ((m_maxOutputBlockSize + m_wlen) * 2);
98 97 m_inbuf[c] = new RingBuffer<float>
98 (lrintf(m_outbuf[c]->getSize() / m_ratio) + m_wlen);
99
100 std::cerr << "making inbuf size " << m_inbuf[c]->getSize() << " (outbuf size is " << m_outbuf[c]->getSize() << ", ratio " << m_ratio << ")" << std::endl;
101
102
99 m_mashbuf[c] = (float *)fftwf_malloc(sizeof(float) * m_wlen); 103 m_mashbuf[c] = (float *)fftwf_malloc(sizeof(float) * m_wlen);
100 104
101 for (int i = 0; i < m_wlen; ++i) { 105 for (size_t i = 0; i < m_wlen; ++i) {
102 m_mashbuf[c][i] = 0.0; 106 m_mashbuf[c][i] = 0.0;
103 } 107 }
104 108
105 for (int i = 0; i <= m_wlen/2; ++i) { 109 for (size_t i = 0; i <= m_wlen/2; ++i) {
106 m_prevPhase[c][i] = 0.0; 110 m_prevPhase[c][i] = 0.0;
107 m_prevAdjustedPhase[c][i] = 0.0; 111 m_prevAdjustedPhase[c][i] = 0.0;
108 } 112 }
109 } 113 }
110 114
111 for (int i = 0; i < m_wlen; ++i) { 115 for (size_t i = 0; i < m_wlen; ++i) {
112 m_modulationbuf[i] = 0.0; 116 m_modulationbuf[i] = 0.0;
113 } 117 }
114 118
115 for (int i = 0; i <= m_wlen/2; ++i) { 119 for (size_t i = 0; i <= m_wlen/2; ++i) {
116 m_prevTransientMag[i] = 0.0; 120 m_prevTransientMag[i] = 0.0;
117 } 121 }
118 } 122 }
119 123
120 void 124 void
141 m_n1 = 256; 145 m_n1 = 256;
142 } 146 }
143 if (m_sharpen) { 147 if (m_sharpen) {
144 m_wlen = 2048; 148 m_wlen = 2048;
145 } 149 }
146 m_n2 = m_n1 * m_ratio; 150 m_n2 = lrintf(m_n1 * m_ratio);
147 } else { 151 } else {
148 if (m_ratio > 2) { 152 if (m_ratio > 2) {
149 m_n2 = 512; 153 m_n2 = 512;
150 m_wlen = 4096; 154 m_wlen = 4096;
151 } else if (m_ratio > 1.6) { 155 } else if (m_ratio > 1.6) {
155 m_n2 = 256; 159 m_n2 = 256;
156 } 160 }
157 if (m_sharpen) { 161 if (m_sharpen) {
158 if (m_wlen < 2048) m_wlen = 2048; 162 if (m_wlen < 2048) m_wlen = 2048;
159 } 163 }
160 m_n1 = m_n2 / m_ratio; 164 m_n1 = lrintf(m_n2 / m_ratio);
161 } 165 }
162 166
163 m_transientThreshold = m_wlen / 4.5; 167 m_transientThreshold = lrintf(m_wlen / 4.5);
164 168
165 m_totalCount = 0; 169 m_totalCount = 0;
166 m_transientCount = 0; 170 m_transientCount = 0;
167 m_n2sum = 0; 171 m_n2sum = 0;
168 172
169 173
170 std::cerr << "PhaseVocoderTimeStretcher: channels = " << m_channels 174 std::cerr << "PhaseVocoderTimeStretcher: channels = " << m_channels
171 << ", ratio = " << m_ratio 175 << ", ratio = " << m_ratio
172 << ", n1 = " << m_n1 << ", n2 = " << m_n2 << ", wlen = " 176 << ", n1 = " << m_n1 << ", n2 = " << m_n2 << ", wlen = "
173 << m_wlen << ", max = " << m_maxProcessInputBlockSize << std::endl; 177 << m_wlen << ", max = " << m_maxOutputBlockSize << std::endl;
174 // << ", outbuflen = " << m_outbuf[0]->getSize() << std::endl; 178 // << ", outbuflen = " << m_outbuf[0]->getSize() << std::endl;
175 } 179 }
176 180
177 void 181 void
178 PhaseVocoderTimeStretcher::cleanup() 182 PhaseVocoderTimeStretcher::cleanup()
216 void 220 void
217 PhaseVocoderTimeStretcher::setRatio(float ratio) 221 PhaseVocoderTimeStretcher::setRatio(float ratio)
218 { 222 {
219 QMutexLocker locker(m_mutex); 223 QMutexLocker locker(m_mutex);
220 224
221 float formerRatio = m_ratio;
222 size_t formerWlen = m_wlen; 225 size_t formerWlen = m_wlen;
223
224 m_ratio = ratio; 226 m_ratio = ratio;
225 227
226 calculateParameters(); 228 calculateParameters();
227 229
228 if (m_wlen == formerWlen) { 230 if (m_wlen == formerWlen) {
229 231
230 // This is the only container whose size depends on m_ratio 232 // This is the only container whose size depends on m_ratio
231 233
232 RingBuffer<float> **newout = new RingBuffer<float> *[m_channels]; 234 RingBuffer<float> **newin = new RingBuffer<float> *[m_channels];
233 235
234 size_t formerSize = m_outbuf[0]->getSize(); 236 size_t formerSize = m_inbuf[0]->getSize();
235 size_t newSize = lrintf((m_maxProcessInputBlockSize + m_wlen) * m_ratio); 237 size_t newSize = lrintf(m_outbuf[0]->getSize() / m_ratio) + m_wlen;
236 size_t ready = m_outbuf[0]->getReadSpace(); 238
237 239 std::cerr << "resizing inbuf from " << formerSize << " to "
238 for (size_t c = 0; c < m_channels; ++c) { 240 << newSize << " (outbuf size is " << m_outbuf[0]->getSize() << ", ratio " << m_ratio << ")" << std::endl;
239 newout[c] = new RingBuffer<float>(newSize); 241
240 } 242 if (formerSize != newSize) {
241 243
242 if (ready > 0) { 244 size_t ready = m_inbuf[0]->getReadSpace();
243
244 size_t copy = std::min(ready, newSize);
245 float *tmp = new float[ready];
246 245
247 for (size_t c = 0; c < m_channels; ++c) { 246 for (size_t c = 0; c < m_channels; ++c) {
248 m_outbuf[c]->read(tmp, ready); 247 newin[c] = new RingBuffer<float>(newSize);
249 newout[c]->write(tmp + ready - copy, copy); 248 }
250 } 249
251 250 if (ready > 0) {
252 delete[] tmp; 251
253 } 252 size_t copy = std::min(ready, newSize);
254 253 float *tmp = new float[ready];
255 for (size_t c = 0; c < m_channels; ++c) { 254
256 delete m_outbuf[c]; 255 for (size_t c = 0; c < m_channels; ++c) {
257 } 256 m_inbuf[c]->read(tmp, ready);
258 257 newin[c]->write(tmp + ready - copy, copy);
259 delete[] m_outbuf; 258 }
260 m_outbuf = newout; 259
260 delete[] tmp;
261 }
262
263 for (size_t c = 0; c < m_channels; ++c) {
264 delete m_inbuf[c];
265 }
266
267 delete[] m_inbuf;
268 m_inbuf = newin;
269 }
261 270
262 } else { 271 } else {
263 272
264 std::cerr << "wlen changed" << std::endl; 273 std::cerr << "wlen changed" << std::endl;
265 cleanup(); 274 cleanup();
269 278
270 size_t 279 size_t
271 PhaseVocoderTimeStretcher::getProcessingLatency() const 280 PhaseVocoderTimeStretcher::getProcessingLatency() const
272 { 281 {
273 return getWindowSize() - getInputIncrement(); 282 return getWindowSize() - getInputIncrement();
274 }
275
276 void
277 PhaseVocoderTimeStretcher::process(float **input, float **output, size_t samples)
278 {
279 putInput(input, samples);
280 getOutput(output, lrintf(samples * m_ratio));
281 } 283 }
282 284
283 size_t 285 size_t
284 PhaseVocoderTimeStretcher::getRequiredInputSamples() const 286 PhaseVocoderTimeStretcher::getRequiredInputSamples() const
285 { 287 {
315 size_t writable = m_inbuf[0]->getWriteSpace(); 317 size_t writable = m_inbuf[0]->getWriteSpace();
316 writable = std::min(writable, samples - consumed); 318 writable = std::min(writable, samples - consumed);
317 319
318 if (writable == 0) { 320 if (writable == 0) {
319 //!!! then what? I don't think this should happen, but 321 //!!! then what? I don't think this should happen, but
320 std::cerr << "WARNING: PhaseVocoderTimeStretcher::putInput: writable == 0" << std::endl; 322 std::cerr << "WARNING: PhaseVocoderTimeStretcher::putInput: writable == 0 (inbuf has " << m_inbuf[0]->getReadSpace() << " samples available for reading, space for " << m_inbuf[0]->getWriteSpace() << " more)" << std::endl;
321 break; 323 if (m_inbuf[0]->getReadSpace() < m_wlen ||
322 } 324 m_outbuf[0]->getWriteSpace() < m_n2) {
325 std::cerr << "Outbuf has space for " << m_outbuf[0]->getWriteSpace() << " (n2 = " << m_n2 << "), won't be able to process" << std::endl;
326 break;
327 }
328 } else {
323 329
324 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER 330 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
325 std::cerr << "writing " << writable << " from index " << consumed << " to inbuf, consumed will be " << consumed + writable << std::endl; 331 std::cerr << "writing " << writable << " from index " << consumed << " to inbuf, consumed will be " << consumed + writable << std::endl;
326 #endif 332 #endif
327 333
328 for (size_t c = 0; c < m_channels; ++c) { 334 for (size_t c = 0; c < m_channels; ++c) {
329 m_inbuf[c]->write(input[c] + consumed, writable); 335 m_inbuf[c]->write(input[c] + consumed, writable);
330 } 336 }
331 consumed += writable; 337 consumed += writable;
338 }
332 339
333 while (m_inbuf[0]->getReadSpace() >= m_wlen && 340 while (m_inbuf[0]->getReadSpace() >= m_wlen &&
334 m_outbuf[0]->getWriteSpace() >= m_n2) { 341 m_outbuf[0]->getWriteSpace() >= m_n2) {
335 342
336 // We know we have at least m_wlen samples available 343 // We know we have at least m_wlen samples available
499 bool 506 bool
500 PhaseVocoderTimeStretcher::isTransient() 507 PhaseVocoderTimeStretcher::isTransient()
501 { 508 {
502 int count = 0; 509 int count = 0;
503 510
504 for (int i = 0; i <= m_wlen/2; ++i) { 511 for (size_t i = 0; i <= m_wlen/2; ++i) {
505 512
506 float real = 0.f, imag = 0.f; 513 float real = 0.f, imag = 0.f;
507 514
508 for (size_t c = 0; c < m_channels; ++c) { 515 for (size_t c = 0; c < m_channels; ++c) {
509 real += m_freq[c][i][0]; 516 real += m_freq[c][i][0];
544 PhaseVocoderTimeStretcher::synthesiseBlock(size_t c, 551 PhaseVocoderTimeStretcher::synthesiseBlock(size_t c,
545 float *out, 552 float *out,
546 float *modulation, 553 float *modulation,
547 size_t lastStep) 554 size_t lastStep)
548 { 555 {
549 int i;
550
551 bool unchanged = (lastStep == m_n1); 556 bool unchanged = (lastStep == m_n1);
552 557
553 for (i = 0; i <= m_wlen/2; ++i) { 558 for (size_t i = 0; i <= m_wlen/2; ++i) {
554 559
555 float phase = princargf(atan2f(m_freq[c][i][1], m_freq[c][i][0])); 560 float phase = princargf(atan2f(m_freq[c][i][1], m_freq[c][i][0]));
556 float adjustedPhase = phase; 561 float adjustedPhase = phase;
557 562
558 if (!unchanged) { 563 if (!unchanged) {
581 m_prevAdjustedPhase[c][i] = adjustedPhase; 586 m_prevAdjustedPhase[c][i] = adjustedPhase;
582 } 587 }
583 588
584 fftwf_execute(m_iplan[c]); // m_freq -> m_time, inverse fft 589 fftwf_execute(m_iplan[c]); // m_freq -> m_time, inverse fft
585 590
586 for (i = 0; i < m_wlen/2; ++i) { 591 for (size_t i = 0; i < m_wlen/2; ++i) {
587 float temp = m_time[c][i]; 592 float temp = m_time[c][i];
588 m_time[c][i] = m_time[c][i + m_wlen/2]; 593 m_time[c][i] = m_time[c][i + m_wlen/2];
589 m_time[c][i + m_wlen/2] = temp; 594 m_time[c][i + m_wlen/2] = temp;
590 } 595 }
591 596
592 for (i = 0; i < m_wlen; ++i) { 597 for (size_t i = 0; i < m_wlen; ++i) {
593 m_time[c][i] = m_time[c][i] / m_wlen; 598 m_time[c][i] = m_time[c][i] / m_wlen;
594 } 599 }
595 600
596 m_synthesisWindow->cut(m_time[c]); 601 m_synthesisWindow->cut(m_time[c]);
597 602
598 for (i = 0; i < m_wlen; ++i) { 603 for (size_t i = 0; i < m_wlen; ++i) {
599 out[i] += m_time[c][i]; 604 out[i] += m_time[c][i];
600 } 605 }
601 606
602 if (modulation) { 607 if (modulation) {
603 608
604 float area = m_analysisWindow->getArea(); 609 float area = m_analysisWindow->getArea();
605 610
606 for (i = 0; i < m_wlen; ++i) { 611 for (size_t i = 0; i < m_wlen; ++i) {
607 float val = m_synthesisWindow->getValue(i); 612 float val = m_synthesisWindow->getValue(i);
608 modulation[i] += val * area; 613 modulation[i] += val * area;
609 } 614 }
610 } 615 }
611 } 616 }