Mercurial > hg > sonic-visualiser
comparison audioio/PhaseVocoderTimeStretcher.cpp @ 20:e125f0dde7a3
* restructure time stretcher somewhat so as to do transient detection on
mixed stereo signal instead of just one channel
author | Chris Cannam |
---|---|
date | Thu, 14 Sep 2006 13:41:56 +0000 |
parents | f17798a555df |
children | 7da85e0b85e9 |
comparison
equal
deleted
inserted
replaced
19:f17798a555df | 20:e125f0dde7a3 |
---|---|
59 if (m_wlen < 2048) m_wlen = 2048; | 59 if (m_wlen < 2048) m_wlen = 2048; |
60 } | 60 } |
61 m_n1 = m_n2 / ratio; | 61 m_n1 = m_n2 / ratio; |
62 } | 62 } |
63 | 63 |
64 m_window = new Window<float>(HanningWindow, m_wlen); | 64 m_analysisWindow = new Window<float>(HanningWindow, m_wlen); |
65 m_synthesisWindow = new Window<float>(HanningWindow, m_wlen); | |
65 | 66 |
66 m_prevPhase = new float *[m_channels]; | 67 m_prevPhase = new float *[m_channels]; |
67 m_prevAdjustedPhase = new float *[m_channels]; | 68 m_prevAdjustedPhase = new float *[m_channels]; |
68 if (m_sharpen) m_prevMag = new float *[m_channels]; | 69 |
69 else m_prevMag = 0; | 70 m_prevTransientMag = (float *)fftwf_malloc(sizeof(float) * (m_wlen / 2 + 1)); |
70 m_prevPercussiveCount = new int[m_channels]; | 71 m_prevTransientCount = 0; |
71 m_prevPercussive = false; | 72 m_prevTransient = false; |
72 | 73 |
73 m_dbuf = (float *)fftwf_malloc(sizeof(float) * m_wlen); | 74 m_tempbuf = (float *)fftwf_malloc(sizeof(float) * m_wlen); |
74 m_time = (float *)fftwf_malloc(sizeof(float) * m_wlen); | 75 |
75 m_freq = (fftwf_complex *)fftwf_malloc(sizeof(fftwf_complex) * m_wlen); | 76 m_time = new float *[m_channels]; |
76 | 77 m_freq = new fftwf_complex *[m_channels]; |
77 m_plan = fftwf_plan_dft_r2c_1d(m_wlen, m_time, m_freq, FFTW_ESTIMATE); | 78 m_plan = new fftwf_plan[m_channels]; |
78 m_iplan = fftwf_plan_dft_c2r_1d(m_wlen, m_freq, m_time, FFTW_ESTIMATE); | 79 m_iplan = new fftwf_plan[m_channels]; |
79 | 80 |
80 m_inbuf = new RingBuffer<float> *[m_channels]; | 81 m_inbuf = new RingBuffer<float> *[m_channels]; |
81 m_outbuf = new RingBuffer<float> *[m_channels]; | 82 m_outbuf = new RingBuffer<float> *[m_channels]; |
82 m_mashbuf = new float *[m_channels]; | 83 m_mashbuf = new float *[m_channels]; |
83 | 84 |
84 m_modulationbuf = (float *)fftwf_malloc(sizeof(float) * m_wlen); | 85 m_modulationbuf = (float *)fftwf_malloc(sizeof(float) * m_wlen); |
85 | 86 |
86 for (size_t c = 0; c < m_channels; ++c) { | 87 for (size_t c = 0; c < m_channels; ++c) { |
87 | 88 |
88 m_prevPhase[c] = (float *)fftwf_malloc(sizeof(float) * m_wlen); | 89 m_prevPhase[c] = (float *)fftwf_malloc(sizeof(float) * (m_wlen / 2 + 1)); |
89 m_prevAdjustedPhase[c] = (float *)fftwf_malloc(sizeof(float) * m_wlen); | 90 m_prevAdjustedPhase[c] = (float *)fftwf_malloc(sizeof(float) * (m_wlen / 2 + 1)); |
90 | 91 |
91 if (m_sharpen) { | 92 m_time[c] = (float *)fftwf_malloc(sizeof(float) * m_wlen); |
92 m_prevMag[c] = (float *)fftwf_malloc(sizeof(float) * m_wlen); | 93 m_freq[c] = (fftwf_complex *)fftwf_malloc(sizeof(fftwf_complex) * |
93 } | 94 (m_wlen / 2 + 1)); |
95 | |
96 m_plan[c] = fftwf_plan_dft_r2c_1d(m_wlen, m_time[c], m_freq[c], FFTW_ESTIMATE); | |
97 m_iplan[c] = fftwf_plan_dft_c2r_1d(m_wlen, m_freq[c], m_time[c], FFTW_ESTIMATE); | |
94 | 98 |
95 m_inbuf[c] = new RingBuffer<float>(m_wlen); | 99 m_inbuf[c] = new RingBuffer<float>(m_wlen); |
96 m_outbuf[c] = new RingBuffer<float> | 100 m_outbuf[c] = new RingBuffer<float> |
97 (lrintf((maxProcessInputBlockSize + m_wlen) * ratio)); | 101 (lrintf((maxProcessInputBlockSize + m_wlen) * ratio)); |
98 | 102 |
99 m_mashbuf[c] = (float *)fftwf_malloc(sizeof(float) * m_wlen); | 103 m_mashbuf[c] = (float *)fftwf_malloc(sizeof(float) * m_wlen); |
100 | 104 |
101 for (int i = 0; i < m_wlen; ++i) { | 105 for (int i = 0; i < m_wlen; ++i) { |
102 m_mashbuf[c][i] = 0.0; | 106 m_mashbuf[c][i] = 0.0; |
107 } | |
108 | |
109 for (int i = 0; i <= m_wlen/2; ++i) { | |
103 m_prevPhase[c][i] = 0.0; | 110 m_prevPhase[c][i] = 0.0; |
104 m_prevAdjustedPhase[c][i] = 0.0; | 111 m_prevAdjustedPhase[c][i] = 0.0; |
105 if (m_sharpen) m_prevMag[c][i] = 0.0; | 112 } |
106 } | |
107 | |
108 m_prevPercussiveCount[c] = 0; | |
109 } | 113 } |
110 | 114 |
111 for (int i = 0; i < m_wlen; ++i) { | 115 for (int i = 0; i < m_wlen; ++i) { |
112 m_modulationbuf[i] = 0.0; | 116 m_modulationbuf[i] = 0.0; |
117 } | |
118 | |
119 for (int i = 0; i <= m_wlen/2; ++i) { | |
120 m_prevTransientMag[i] = 0.0; | |
113 } | 121 } |
114 | 122 |
115 std::cerr << "PhaseVocoderTimeStretcher: channels = " << channels | 123 std::cerr << "PhaseVocoderTimeStretcher: channels = " << channels |
116 << ", ratio = " << ratio | 124 << ", ratio = " << ratio |
117 << ", n1 = " << m_n1 << ", n2 = " << m_n2 << ", wlen = " | 125 << ", n1 = " << m_n1 << ", n2 = " << m_n2 << ", wlen = " |
121 | 129 |
122 PhaseVocoderTimeStretcher::~PhaseVocoderTimeStretcher() | 130 PhaseVocoderTimeStretcher::~PhaseVocoderTimeStretcher() |
123 { | 131 { |
124 std::cerr << "PhaseVocoderTimeStretcher::~PhaseVocoderTimeStretcher" << std::endl; | 132 std::cerr << "PhaseVocoderTimeStretcher::~PhaseVocoderTimeStretcher" << std::endl; |
125 | 133 |
126 fftwf_destroy_plan(m_plan); | |
127 fftwf_destroy_plan(m_iplan); | |
128 | |
129 fftwf_free(m_time); | |
130 fftwf_free(m_freq); | |
131 fftwf_free(m_dbuf); | |
132 | |
133 for (size_t c = 0; c < m_channels; ++c) { | 134 for (size_t c = 0; c < m_channels; ++c) { |
135 | |
136 fftwf_destroy_plan(m_plan[c]); | |
137 fftwf_destroy_plan(m_iplan[c]); | |
138 | |
139 fftwf_free(m_time[c]); | |
140 fftwf_free(m_freq[c]); | |
134 | 141 |
135 fftwf_free(m_mashbuf[c]); | 142 fftwf_free(m_mashbuf[c]); |
136 fftwf_free(m_prevPhase[c]); | 143 fftwf_free(m_prevPhase[c]); |
137 fftwf_free(m_prevAdjustedPhase[c]); | 144 fftwf_free(m_prevAdjustedPhase[c]); |
138 if (m_sharpen) fftwf_free(m_prevMag[c]); | |
139 | 145 |
140 delete m_inbuf[c]; | 146 delete m_inbuf[c]; |
141 delete m_outbuf[c]; | 147 delete m_outbuf[c]; |
142 } | 148 } |
143 | 149 |
150 fftwf_free(m_tempbuf); | |
144 fftwf_free(m_modulationbuf); | 151 fftwf_free(m_modulationbuf); |
152 fftwf_free(m_prevTransientMag); | |
145 | 153 |
146 delete[] m_prevPhase; | 154 delete[] m_prevPhase; |
147 delete[] m_prevAdjustedPhase; | 155 delete[] m_prevAdjustedPhase; |
148 if (m_sharpen) delete[] m_prevMag; | |
149 delete[] m_prevPercussiveCount; | |
150 delete[] m_inbuf; | 156 delete[] m_inbuf; |
151 delete[] m_outbuf; | 157 delete[] m_outbuf; |
152 delete[] m_mashbuf; | 158 delete[] m_mashbuf; |
153 | 159 delete[] m_time; |
154 delete m_window; | 160 delete[] m_freq; |
161 delete[] m_plan; | |
162 delete[] m_iplan; | |
163 | |
164 delete m_analysisWindow; | |
165 delete m_synthesisWindow; | |
155 } | 166 } |
156 | 167 |
157 size_t | 168 size_t |
158 PhaseVocoderTimeStretcher::getProcessingLatency() const | 169 PhaseVocoderTimeStretcher::getProcessingLatency() const |
159 { | 170 { |
219 // We know we have at least m_wlen samples available | 230 // We know we have at least m_wlen samples available |
220 // in m_inbuf. We need to peek m_wlen of them for | 231 // in m_inbuf. We need to peek m_wlen of them for |
221 // processing, and then read m_n1 to advance the read | 232 // processing, and then read m_n1 to advance the read |
222 // pointer. | 233 // pointer. |
223 | 234 |
235 for (size_t c = 0; c < m_channels; ++c) { | |
236 | |
237 size_t got = m_inbuf[c]->peek(m_tempbuf, m_wlen); | |
238 assert(got == m_wlen); | |
239 | |
240 analyseBlock(c, m_tempbuf); | |
241 } | |
242 | |
243 bool transient = false; | |
244 if (m_sharpen) transient = isTransient(); | |
245 | |
224 size_t n2 = m_n2; | 246 size_t n2 = m_n2; |
225 bool isPercussive = false; | 247 |
248 if (transient) { | |
249 n2 = m_n1; | |
250 } | |
226 | 251 |
227 for (size_t c = 0; c < m_channels; ++c) { | 252 for (size_t c = 0; c < m_channels; ++c) { |
228 | 253 |
229 size_t got = m_inbuf[c]->peek(m_dbuf, m_wlen); | 254 synthesiseBlock(c, m_mashbuf[c], |
230 assert(got == m_wlen); | 255 c == 0 ? m_modulationbuf : 0, |
231 | 256 m_prevTransient ? m_n1 : m_n2); |
232 bool thisChannelPercussive = | 257 |
233 processBlock(c, m_dbuf, m_mashbuf[c], | |
234 c == 0 ? m_modulationbuf : 0, | |
235 m_prevPercussive ? m_n1 : m_n2); | |
236 | |
237 if (thisChannelPercussive && c == 0) { | |
238 isPercussive = true; | |
239 } | |
240 | |
241 if (isPercussive) { | |
242 n2 = m_n1; | |
243 } | |
244 | 258 |
245 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER | 259 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER |
246 std::cerr << "writing first " << m_n2 << " from mashbuf, skipping " << m_n1 << " on inbuf " << std::endl; | 260 std::cerr << "writing first " << m_n2 << " from mashbuf, skipping " << m_n1 << " on inbuf " << std::endl; |
247 #endif | 261 #endif |
248 m_inbuf[c]->skip(m_n1); | 262 m_inbuf[c]->skip(m_n1); |
262 for (size_t i = m_wlen - n2; i < m_wlen; ++i) { | 276 for (size_t i = m_wlen - n2; i < m_wlen; ++i) { |
263 m_mashbuf[c][i] = 0.0f; | 277 m_mashbuf[c][i] = 0.0f; |
264 } | 278 } |
265 } | 279 } |
266 | 280 |
267 m_prevPercussive = isPercussive; | 281 m_prevTransient = transient; |
268 | 282 |
269 for (size_t i = 0; i < m_wlen - n2; ++i) { | 283 for (size_t i = 0; i < m_wlen - n2; ++i) { |
270 m_modulationbuf[i] = m_modulationbuf[i + n2]; | 284 m_modulationbuf[i] = m_modulationbuf[i + n2]; |
271 } | 285 } |
272 | 286 |
316 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER | 330 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER |
317 std::cerr << "PhaseVocoderTimeStretcher::getOutput returning" << std::endl; | 331 std::cerr << "PhaseVocoderTimeStretcher::getOutput returning" << std::endl; |
318 #endif | 332 #endif |
319 } | 333 } |
320 | 334 |
321 bool | 335 void |
322 PhaseVocoderTimeStretcher::processBlock(size_t c, | 336 PhaseVocoderTimeStretcher::analyseBlock(size_t c, float *buf) |
323 float *buf, float *out, | |
324 float *modulation, | |
325 size_t lastStep) | |
326 { | 337 { |
327 size_t i; | 338 size_t i; |
328 bool isPercussive = false; | 339 |
329 | 340 // buf contains m_wlen samples |
330 // buf contains m_wlen samples; out contains enough space for | 341 |
331 // m_wlen * ratio samples (we mix into out, rather than replacing) | 342 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER |
332 | 343 std::cerr << "PhaseVocoderTimeStretcher::analyseBlock (channel " << c << ")" << std::endl; |
333 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER | 344 #endif |
334 std::cerr << "PhaseVocoderTimeStretcher::processBlock (channel " << c << ")" << std::endl; | 345 |
335 #endif | 346 m_analysisWindow->cut(buf); |
336 | |
337 m_window->cut(buf); | |
338 | 347 |
339 for (i = 0; i < m_wlen/2; ++i) { | 348 for (i = 0; i < m_wlen/2; ++i) { |
340 float temp = buf[i]; | 349 float temp = buf[i]; |
341 buf[i] = buf[i + m_wlen/2]; | 350 buf[i] = buf[i + m_wlen/2]; |
342 buf[i + m_wlen/2] = temp; | 351 buf[i + m_wlen/2] = temp; |
343 } | 352 } |
344 | 353 |
345 for (i = 0; i < m_wlen; ++i) { | 354 for (i = 0; i < m_wlen; ++i) { |
346 m_time[i] = buf[i]; | 355 m_time[c][i] = buf[i]; |
347 } | 356 } |
348 | 357 |
349 fftwf_execute(m_plan); // m_time -> m_freq | 358 fftwf_execute(m_plan[c]); // m_time -> m_freq |
350 | 359 } |
351 if (m_sharpen && c == 0) { //!!! | 360 |
352 | 361 bool |
353 int count = 0; | 362 PhaseVocoderTimeStretcher::isTransient() |
354 | 363 { |
355 for (i = 0; i < m_wlen; ++i) { | 364 int count = 0; |
365 | |
366 for (int i = 0; i <= m_wlen/2; ++i) { | |
367 | |
368 float real = 0.f, imag = 0.f; | |
369 | |
370 for (size_t c = 0; c < m_channels; ++c) { | |
371 real += m_freq[c][i][0]; | |
372 imag += m_freq[c][i][1]; | |
373 } | |
374 | |
375 float sqrmag = (real * real + imag * imag); | |
376 | |
377 if (m_prevTransientMag[i] > 0.f) { | |
378 float diff = 10.f * log10f(sqrmag / m_prevTransientMag[i]); | |
379 if (diff > 3.f) ++count; | |
380 } | |
381 | |
382 m_prevTransientMag[i] = sqrmag; | |
383 } | |
384 | |
385 bool isTransient = false; | |
386 | |
387 if (count > m_wlen / 4.5 && //!!! | |
388 count > m_prevTransientCount * 1.2) { | |
389 isTransient = true; | |
390 std::cerr << "isTransient (count = " << count << ", prev = " << m_prevTransientCount << ")" << std::endl; | |
391 } | |
392 | |
393 m_prevTransientCount = count; | |
394 | |
395 return isTransient; | |
396 } | |
397 | |
398 void | |
399 PhaseVocoderTimeStretcher::synthesiseBlock(size_t c, | |
400 float *out, | |
401 float *modulation, | |
402 size_t lastStep) | |
403 { | |
404 int i; | |
405 | |
406 bool unchanged = (lastStep == m_n1); | |
407 | |
408 for (i = 0; i <= m_wlen/2; ++i) { | |
409 | |
410 float phase = princargf(atan2f(m_freq[c][i][1], m_freq[c][i][0])); | |
411 float adjustedPhase = phase; | |
412 | |
413 if (!unchanged) { | |
414 | |
415 float mag = sqrtf(m_freq[c][i][0] * m_freq[c][i][0] + | |
416 m_freq[c][i][1] * m_freq[c][i][1]); | |
417 | |
418 float omega = (2 * M_PI * m_n1 * i) / m_wlen; | |
419 | |
420 float expectedPhase = m_prevPhase[c][i] + omega; | |
421 | |
422 float phaseError = princargf(phase - expectedPhase); | |
423 | |
424 float phaseIncrement = (omega + phaseError) / m_n1; | |
356 | 425 |
357 float mag = sqrtf(m_freq[i][0] * m_freq[i][0] + | 426 adjustedPhase = m_prevAdjustedPhase[c][i] + |
358 m_freq[i][1] * m_freq[i][1]); | 427 lastStep * phaseIncrement; |
359 | |
360 if (m_prevMag[c][i] > 0) { | |
361 float magdiff = 20.f * log10f(mag / m_prevMag[c][i]); | |
362 if (magdiff > 3.f) ++count; | |
363 } | |
364 | 428 |
365 m_prevMag[c][i] = mag; | 429 float real = mag * cosf(adjustedPhase); |
366 } | 430 float imag = mag * sinf(adjustedPhase); |
367 | 431 m_freq[c][i][0] = real; |
368 if (count > m_wlen / 4 && //!!! | 432 m_freq[c][i][1] = imag; |
369 count > m_prevPercussiveCount[c] * 1.2) { | 433 } |
370 isPercussive = true; | |
371 std::cerr << "isPercussive (count = " << count << ", prev = " << m_prevPercussiveCount[c] << ")" << std::endl; | |
372 } | |
373 | |
374 m_prevPercussiveCount[c] = count; | |
375 } | |
376 | |
377 for (i = 0; i < m_wlen; ++i) { //!!! /2 | |
378 | |
379 float mag; | |
380 | |
381 if (m_sharpen && c == 0) { | |
382 mag = m_prevMag[c][i]; // can reuse this | |
383 } else { | |
384 mag = sqrtf(m_freq[i][0] * m_freq[i][0] + | |
385 m_freq[i][1] * m_freq[i][1]); | |
386 } | |
387 | |
388 float phase = princargf(atan2f(m_freq[i][1], m_freq[i][0])); | |
389 | |
390 float omega = (2 * M_PI * m_n1 * i) / m_wlen; | |
391 | |
392 float expectedPhase = m_prevPhase[c][i] + omega; | |
393 | |
394 float phaseError = princargf(phase - expectedPhase); | |
395 | |
396 float adjustedPhase = phase; | |
397 | |
398 if (!isPercussive) { | |
399 // if (fabsf(phaseError) < (1.1f * (lastStep * M_PI) / m_wlen)) { | |
400 | |
401 float phaseIncrement = (omega + phaseError) / m_n1; | |
402 | |
403 adjustedPhase = m_prevAdjustedPhase[c][i] + | |
404 lastStep * phaseIncrement; | |
405 // } | |
406 } | |
407 | |
408 // if (isPercussive) adjustedPhase = phase; | |
409 | |
410 float real = mag * cosf(adjustedPhase); | |
411 float imag = mag * sinf(adjustedPhase); | |
412 m_freq[i][0] = real; | |
413 m_freq[i][1] = imag; | |
414 | 434 |
415 m_prevPhase[c][i] = phase; | 435 m_prevPhase[c][i] = phase; |
416 m_prevAdjustedPhase[c][i] = adjustedPhase; | 436 m_prevAdjustedPhase[c][i] = adjustedPhase; |
417 } | 437 } |
438 | |
439 fftwf_execute(m_iplan[c]); // m_freq -> m_time, inverse fft | |
440 | |
441 for (i = 0; i < m_wlen/2; ++i) { | |
442 float temp = m_time[c][i]; | |
443 m_time[c][i] = m_time[c][i + m_wlen/2]; | |
444 m_time[c][i + m_wlen/2] = temp; | |
445 } | |
418 | 446 |
419 fftwf_execute(m_iplan); // m_freq -> m_time, inverse fft | |
420 | |
421 for (i = 0; i < m_wlen/2; ++i) { | |
422 float temp = m_time[i]; | |
423 m_time[i] = m_time[i + m_wlen/2]; | |
424 m_time[i + m_wlen/2] = temp; | |
425 } | |
426 | |
427 for (i = 0; i < m_wlen; ++i) { | 447 for (i = 0; i < m_wlen; ++i) { |
428 m_time[i] = m_time[i] / m_wlen; | 448 m_time[c][i] = m_time[c][i] / m_wlen; |
429 } | 449 } |
430 | 450 |
431 m_window->cut(m_time); | 451 m_synthesisWindow->cut(m_time[c]); |
432 | 452 |
433 for (i = 0; i < m_wlen; ++i) { | 453 for (i = 0; i < m_wlen; ++i) { |
434 out[i] += m_time[i]; | 454 out[i] += m_time[c][i]; |
435 } | 455 } |
436 | 456 |
437 if (modulation) { | 457 if (modulation) { |
438 | 458 |
439 float area = m_window->getArea(); | 459 float area = m_analysisWindow->getArea(); |
440 | 460 |
441 for (i = 0; i < m_wlen; ++i) { | 461 for (i = 0; i < m_wlen; ++i) { |
442 float val = m_window->getValue(i); | 462 float val = m_synthesisWindow->getValue(i); |
443 modulation[i] += val * area; | 463 modulation[i] += val * area; |
444 } | 464 } |
445 } | 465 } |
446 | 466 } |
447 return isPercussive; | 467 |
448 } | 468 |
449 |