Mercurial > hg > sonic-visualiser
comparison audioio/PhaseVocoderTimeStretcher.cpp @ 31:37af203dbd15
* Buffer size fixes in the time stretcher, to avoid running out of input data
for large or small ratios
author | Chris Cannam |
---|---|
date | Thu, 21 Sep 2006 09:43:41 +0000 |
parents | d88d117e0c34 |
children | e3b32dc5180b |
comparison
equal
deleted
inserted
replaced
30:56e1d4242bb4 | 31:37af203dbd15 |
---|---|
24 | 24 |
25 PhaseVocoderTimeStretcher::PhaseVocoderTimeStretcher(size_t sampleRate, | 25 PhaseVocoderTimeStretcher::PhaseVocoderTimeStretcher(size_t sampleRate, |
26 size_t channels, | 26 size_t channels, |
27 float ratio, | 27 float ratio, |
28 bool sharpen, | 28 bool sharpen, |
29 size_t maxProcessInputBlockSize) : | 29 size_t maxOutputBlockSize) : |
30 m_sampleRate(sampleRate), | 30 m_sampleRate(sampleRate), |
31 m_channels(channels), | 31 m_channels(channels), |
32 m_maxProcessInputBlockSize(maxProcessInputBlockSize), | 32 m_maxOutputBlockSize(maxOutputBlockSize), |
33 m_ratio(ratio), | 33 m_ratio(ratio), |
34 m_sharpen(sharpen), | 34 m_sharpen(sharpen), |
35 m_totalCount(0), | 35 m_totalCount(0), |
36 m_transientCount(0), | 36 m_transientCount(0), |
37 m_n2sum(0), | 37 m_n2sum(0), |
90 (m_wlen / 2 + 1)); | 90 (m_wlen / 2 + 1)); |
91 | 91 |
92 m_plan[c] = fftwf_plan_dft_r2c_1d(m_wlen, m_time[c], m_freq[c], FFTW_ESTIMATE); | 92 m_plan[c] = fftwf_plan_dft_r2c_1d(m_wlen, m_time[c], m_freq[c], FFTW_ESTIMATE); |
93 m_iplan[c] = fftwf_plan_dft_c2r_1d(m_wlen, m_freq[c], m_time[c], FFTW_ESTIMATE); | 93 m_iplan[c] = fftwf_plan_dft_c2r_1d(m_wlen, m_freq[c], m_time[c], FFTW_ESTIMATE); |
94 | 94 |
95 m_inbuf[c] = new RingBuffer<float>(m_wlen); | |
96 m_outbuf[c] = new RingBuffer<float> | 95 m_outbuf[c] = new RingBuffer<float> |
97 (lrintf((m_maxProcessInputBlockSize + m_wlen) * m_ratio)); | 96 ((m_maxOutputBlockSize + m_wlen) * 2); |
98 | 97 m_inbuf[c] = new RingBuffer<float> |
98 (lrintf(m_outbuf[c]->getSize() / m_ratio) + m_wlen); | |
99 | |
100 std::cerr << "making inbuf size " << m_inbuf[c]->getSize() << " (outbuf size is " << m_outbuf[c]->getSize() << ", ratio " << m_ratio << ")" << std::endl; | |
101 | |
102 | |
99 m_mashbuf[c] = (float *)fftwf_malloc(sizeof(float) * m_wlen); | 103 m_mashbuf[c] = (float *)fftwf_malloc(sizeof(float) * m_wlen); |
100 | 104 |
101 for (int i = 0; i < m_wlen; ++i) { | 105 for (size_t i = 0; i < m_wlen; ++i) { |
102 m_mashbuf[c][i] = 0.0; | 106 m_mashbuf[c][i] = 0.0; |
103 } | 107 } |
104 | 108 |
105 for (int i = 0; i <= m_wlen/2; ++i) { | 109 for (size_t i = 0; i <= m_wlen/2; ++i) { |
106 m_prevPhase[c][i] = 0.0; | 110 m_prevPhase[c][i] = 0.0; |
107 m_prevAdjustedPhase[c][i] = 0.0; | 111 m_prevAdjustedPhase[c][i] = 0.0; |
108 } | 112 } |
109 } | 113 } |
110 | 114 |
111 for (int i = 0; i < m_wlen; ++i) { | 115 for (size_t i = 0; i < m_wlen; ++i) { |
112 m_modulationbuf[i] = 0.0; | 116 m_modulationbuf[i] = 0.0; |
113 } | 117 } |
114 | 118 |
115 for (int i = 0; i <= m_wlen/2; ++i) { | 119 for (size_t i = 0; i <= m_wlen/2; ++i) { |
116 m_prevTransientMag[i] = 0.0; | 120 m_prevTransientMag[i] = 0.0; |
117 } | 121 } |
118 } | 122 } |
119 | 123 |
120 void | 124 void |
141 m_n1 = 256; | 145 m_n1 = 256; |
142 } | 146 } |
143 if (m_sharpen) { | 147 if (m_sharpen) { |
144 m_wlen = 2048; | 148 m_wlen = 2048; |
145 } | 149 } |
146 m_n2 = m_n1 * m_ratio; | 150 m_n2 = lrintf(m_n1 * m_ratio); |
147 } else { | 151 } else { |
148 if (m_ratio > 2) { | 152 if (m_ratio > 2) { |
149 m_n2 = 512; | 153 m_n2 = 512; |
150 m_wlen = 4096; | 154 m_wlen = 4096; |
151 } else if (m_ratio > 1.6) { | 155 } else if (m_ratio > 1.6) { |
155 m_n2 = 256; | 159 m_n2 = 256; |
156 } | 160 } |
157 if (m_sharpen) { | 161 if (m_sharpen) { |
158 if (m_wlen < 2048) m_wlen = 2048; | 162 if (m_wlen < 2048) m_wlen = 2048; |
159 } | 163 } |
160 m_n1 = m_n2 / m_ratio; | 164 m_n1 = lrintf(m_n2 / m_ratio); |
161 } | 165 } |
162 | 166 |
163 m_transientThreshold = m_wlen / 4.5; | 167 m_transientThreshold = lrintf(m_wlen / 4.5); |
164 | 168 |
165 m_totalCount = 0; | 169 m_totalCount = 0; |
166 m_transientCount = 0; | 170 m_transientCount = 0; |
167 m_n2sum = 0; | 171 m_n2sum = 0; |
168 | 172 |
169 | 173 |
170 std::cerr << "PhaseVocoderTimeStretcher: channels = " << m_channels | 174 std::cerr << "PhaseVocoderTimeStretcher: channels = " << m_channels |
171 << ", ratio = " << m_ratio | 175 << ", ratio = " << m_ratio |
172 << ", n1 = " << m_n1 << ", n2 = " << m_n2 << ", wlen = " | 176 << ", n1 = " << m_n1 << ", n2 = " << m_n2 << ", wlen = " |
173 << m_wlen << ", max = " << m_maxProcessInputBlockSize << std::endl; | 177 << m_wlen << ", max = " << m_maxOutputBlockSize << std::endl; |
174 // << ", outbuflen = " << m_outbuf[0]->getSize() << std::endl; | 178 // << ", outbuflen = " << m_outbuf[0]->getSize() << std::endl; |
175 } | 179 } |
176 | 180 |
177 void | 181 void |
178 PhaseVocoderTimeStretcher::cleanup() | 182 PhaseVocoderTimeStretcher::cleanup() |
216 void | 220 void |
217 PhaseVocoderTimeStretcher::setRatio(float ratio) | 221 PhaseVocoderTimeStretcher::setRatio(float ratio) |
218 { | 222 { |
219 QMutexLocker locker(m_mutex); | 223 QMutexLocker locker(m_mutex); |
220 | 224 |
221 float formerRatio = m_ratio; | |
222 size_t formerWlen = m_wlen; | 225 size_t formerWlen = m_wlen; |
223 | |
224 m_ratio = ratio; | 226 m_ratio = ratio; |
225 | 227 |
226 calculateParameters(); | 228 calculateParameters(); |
227 | 229 |
228 if (m_wlen == formerWlen) { | 230 if (m_wlen == formerWlen) { |
229 | 231 |
230 // This is the only container whose size depends on m_ratio | 232 // This is the only container whose size depends on m_ratio |
231 | 233 |
232 RingBuffer<float> **newout = new RingBuffer<float> *[m_channels]; | 234 RingBuffer<float> **newin = new RingBuffer<float> *[m_channels]; |
233 | 235 |
234 size_t formerSize = m_outbuf[0]->getSize(); | 236 size_t formerSize = m_inbuf[0]->getSize(); |
235 size_t newSize = lrintf((m_maxProcessInputBlockSize + m_wlen) * m_ratio); | 237 size_t newSize = lrintf(m_outbuf[0]->getSize() / m_ratio) + m_wlen; |
236 size_t ready = m_outbuf[0]->getReadSpace(); | 238 |
237 | 239 std::cerr << "resizing inbuf from " << formerSize << " to " |
238 for (size_t c = 0; c < m_channels; ++c) { | 240 << newSize << " (outbuf size is " << m_outbuf[0]->getSize() << ", ratio " << m_ratio << ")" << std::endl; |
239 newout[c] = new RingBuffer<float>(newSize); | 241 |
240 } | 242 if (formerSize != newSize) { |
241 | 243 |
242 if (ready > 0) { | 244 size_t ready = m_inbuf[0]->getReadSpace(); |
243 | |
244 size_t copy = std::min(ready, newSize); | |
245 float *tmp = new float[ready]; | |
246 | 245 |
247 for (size_t c = 0; c < m_channels; ++c) { | 246 for (size_t c = 0; c < m_channels; ++c) { |
248 m_outbuf[c]->read(tmp, ready); | 247 newin[c] = new RingBuffer<float>(newSize); |
249 newout[c]->write(tmp + ready - copy, copy); | 248 } |
250 } | 249 |
251 | 250 if (ready > 0) { |
252 delete[] tmp; | 251 |
253 } | 252 size_t copy = std::min(ready, newSize); |
254 | 253 float *tmp = new float[ready]; |
255 for (size_t c = 0; c < m_channels; ++c) { | 254 |
256 delete m_outbuf[c]; | 255 for (size_t c = 0; c < m_channels; ++c) { |
257 } | 256 m_inbuf[c]->read(tmp, ready); |
258 | 257 newin[c]->write(tmp + ready - copy, copy); |
259 delete[] m_outbuf; | 258 } |
260 m_outbuf = newout; | 259 |
260 delete[] tmp; | |
261 } | |
262 | |
263 for (size_t c = 0; c < m_channels; ++c) { | |
264 delete m_inbuf[c]; | |
265 } | |
266 | |
267 delete[] m_inbuf; | |
268 m_inbuf = newin; | |
269 } | |
261 | 270 |
262 } else { | 271 } else { |
263 | 272 |
264 std::cerr << "wlen changed" << std::endl; | 273 std::cerr << "wlen changed" << std::endl; |
265 cleanup(); | 274 cleanup(); |
269 | 278 |
270 size_t | 279 size_t |
271 PhaseVocoderTimeStretcher::getProcessingLatency() const | 280 PhaseVocoderTimeStretcher::getProcessingLatency() const |
272 { | 281 { |
273 return getWindowSize() - getInputIncrement(); | 282 return getWindowSize() - getInputIncrement(); |
274 } | |
275 | |
276 void | |
277 PhaseVocoderTimeStretcher::process(float **input, float **output, size_t samples) | |
278 { | |
279 putInput(input, samples); | |
280 getOutput(output, lrintf(samples * m_ratio)); | |
281 } | 283 } |
282 | 284 |
283 size_t | 285 size_t |
284 PhaseVocoderTimeStretcher::getRequiredInputSamples() const | 286 PhaseVocoderTimeStretcher::getRequiredInputSamples() const |
285 { | 287 { |
315 size_t writable = m_inbuf[0]->getWriteSpace(); | 317 size_t writable = m_inbuf[0]->getWriteSpace(); |
316 writable = std::min(writable, samples - consumed); | 318 writable = std::min(writable, samples - consumed); |
317 | 319 |
318 if (writable == 0) { | 320 if (writable == 0) { |
319 //!!! then what? I don't think this should happen, but | 321 //!!! then what? I don't think this should happen, but |
320 std::cerr << "WARNING: PhaseVocoderTimeStretcher::putInput: writable == 0" << std::endl; | 322 std::cerr << "WARNING: PhaseVocoderTimeStretcher::putInput: writable == 0 (inbuf has " << m_inbuf[0]->getReadSpace() << " samples available for reading, space for " << m_inbuf[0]->getWriteSpace() << " more)" << std::endl; |
321 break; | 323 if (m_inbuf[0]->getReadSpace() < m_wlen || |
322 } | 324 m_outbuf[0]->getWriteSpace() < m_n2) { |
325 std::cerr << "Outbuf has space for " << m_outbuf[0]->getWriteSpace() << " (n2 = " << m_n2 << "), won't be able to process" << std::endl; | |
326 break; | |
327 } | |
328 } else { | |
323 | 329 |
324 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER | 330 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER |
325 std::cerr << "writing " << writable << " from index " << consumed << " to inbuf, consumed will be " << consumed + writable << std::endl; | 331 std::cerr << "writing " << writable << " from index " << consumed << " to inbuf, consumed will be " << consumed + writable << std::endl; |
326 #endif | 332 #endif |
327 | 333 |
328 for (size_t c = 0; c < m_channels; ++c) { | 334 for (size_t c = 0; c < m_channels; ++c) { |
329 m_inbuf[c]->write(input[c] + consumed, writable); | 335 m_inbuf[c]->write(input[c] + consumed, writable); |
330 } | 336 } |
331 consumed += writable; | 337 consumed += writable; |
338 } | |
332 | 339 |
333 while (m_inbuf[0]->getReadSpace() >= m_wlen && | 340 while (m_inbuf[0]->getReadSpace() >= m_wlen && |
334 m_outbuf[0]->getWriteSpace() >= m_n2) { | 341 m_outbuf[0]->getWriteSpace() >= m_n2) { |
335 | 342 |
336 // We know we have at least m_wlen samples available | 343 // We know we have at least m_wlen samples available |
499 bool | 506 bool |
500 PhaseVocoderTimeStretcher::isTransient() | 507 PhaseVocoderTimeStretcher::isTransient() |
501 { | 508 { |
502 int count = 0; | 509 int count = 0; |
503 | 510 |
504 for (int i = 0; i <= m_wlen/2; ++i) { | 511 for (size_t i = 0; i <= m_wlen/2; ++i) { |
505 | 512 |
506 float real = 0.f, imag = 0.f; | 513 float real = 0.f, imag = 0.f; |
507 | 514 |
508 for (size_t c = 0; c < m_channels; ++c) { | 515 for (size_t c = 0; c < m_channels; ++c) { |
509 real += m_freq[c][i][0]; | 516 real += m_freq[c][i][0]; |
544 PhaseVocoderTimeStretcher::synthesiseBlock(size_t c, | 551 PhaseVocoderTimeStretcher::synthesiseBlock(size_t c, |
545 float *out, | 552 float *out, |
546 float *modulation, | 553 float *modulation, |
547 size_t lastStep) | 554 size_t lastStep) |
548 { | 555 { |
549 int i; | |
550 | |
551 bool unchanged = (lastStep == m_n1); | 556 bool unchanged = (lastStep == m_n1); |
552 | 557 |
553 for (i = 0; i <= m_wlen/2; ++i) { | 558 for (size_t i = 0; i <= m_wlen/2; ++i) { |
554 | 559 |
555 float phase = princargf(atan2f(m_freq[c][i][1], m_freq[c][i][0])); | 560 float phase = princargf(atan2f(m_freq[c][i][1], m_freq[c][i][0])); |
556 float adjustedPhase = phase; | 561 float adjustedPhase = phase; |
557 | 562 |
558 if (!unchanged) { | 563 if (!unchanged) { |
581 m_prevAdjustedPhase[c][i] = adjustedPhase; | 586 m_prevAdjustedPhase[c][i] = adjustedPhase; |
582 } | 587 } |
583 | 588 |
584 fftwf_execute(m_iplan[c]); // m_freq -> m_time, inverse fft | 589 fftwf_execute(m_iplan[c]); // m_freq -> m_time, inverse fft |
585 | 590 |
586 for (i = 0; i < m_wlen/2; ++i) { | 591 for (size_t i = 0; i < m_wlen/2; ++i) { |
587 float temp = m_time[c][i]; | 592 float temp = m_time[c][i]; |
588 m_time[c][i] = m_time[c][i + m_wlen/2]; | 593 m_time[c][i] = m_time[c][i + m_wlen/2]; |
589 m_time[c][i + m_wlen/2] = temp; | 594 m_time[c][i + m_wlen/2] = temp; |
590 } | 595 } |
591 | 596 |
592 for (i = 0; i < m_wlen; ++i) { | 597 for (size_t i = 0; i < m_wlen; ++i) { |
593 m_time[c][i] = m_time[c][i] / m_wlen; | 598 m_time[c][i] = m_time[c][i] / m_wlen; |
594 } | 599 } |
595 | 600 |
596 m_synthesisWindow->cut(m_time[c]); | 601 m_synthesisWindow->cut(m_time[c]); |
597 | 602 |
598 for (i = 0; i < m_wlen; ++i) { | 603 for (size_t i = 0; i < m_wlen; ++i) { |
599 out[i] += m_time[c][i]; | 604 out[i] += m_time[c][i]; |
600 } | 605 } |
601 | 606 |
602 if (modulation) { | 607 if (modulation) { |
603 | 608 |
604 float area = m_analysisWindow->getArea(); | 609 float area = m_analysisWindow->getArea(); |
605 | 610 |
606 for (i = 0; i < m_wlen; ++i) { | 611 for (size_t i = 0; i < m_wlen; ++i) { |
607 float val = m_synthesisWindow->getValue(i); | 612 float val = m_synthesisWindow->getValue(i); |
608 modulation[i] += val * area; | 613 modulation[i] += val * area; |
609 } | 614 } |
610 } | 615 } |
611 } | 616 } |