comparison audioio/PhaseVocoderTimeStretcher.cpp @ 20:e125f0dde7a3

* restructure time stretcher somewhat so as to do transient detection on mixed stereo signal instead of just one channel
author Chris Cannam
date Thu, 14 Sep 2006 13:41:56 +0000
parents f17798a555df
children 7da85e0b85e9
comparison
equal deleted inserted replaced
19:f17798a555df 20:e125f0dde7a3
59 if (m_wlen < 2048) m_wlen = 2048; 59 if (m_wlen < 2048) m_wlen = 2048;
60 } 60 }
61 m_n1 = m_n2 / ratio; 61 m_n1 = m_n2 / ratio;
62 } 62 }
63 63
64 m_window = new Window<float>(HanningWindow, m_wlen); 64 m_analysisWindow = new Window<float>(HanningWindow, m_wlen);
65 m_synthesisWindow = new Window<float>(HanningWindow, m_wlen);
65 66
66 m_prevPhase = new float *[m_channels]; 67 m_prevPhase = new float *[m_channels];
67 m_prevAdjustedPhase = new float *[m_channels]; 68 m_prevAdjustedPhase = new float *[m_channels];
68 if (m_sharpen) m_prevMag = new float *[m_channels]; 69
69 else m_prevMag = 0; 70 m_prevTransientMag = (float *)fftwf_malloc(sizeof(float) * (m_wlen / 2 + 1));
70 m_prevPercussiveCount = new int[m_channels]; 71 m_prevTransientCount = 0;
71 m_prevPercussive = false; 72 m_prevTransient = false;
72 73
73 m_dbuf = (float *)fftwf_malloc(sizeof(float) * m_wlen); 74 m_tempbuf = (float *)fftwf_malloc(sizeof(float) * m_wlen);
74 m_time = (float *)fftwf_malloc(sizeof(float) * m_wlen); 75
75 m_freq = (fftwf_complex *)fftwf_malloc(sizeof(fftwf_complex) * m_wlen); 76 m_time = new float *[m_channels];
76 77 m_freq = new fftwf_complex *[m_channels];
77 m_plan = fftwf_plan_dft_r2c_1d(m_wlen, m_time, m_freq, FFTW_ESTIMATE); 78 m_plan = new fftwf_plan[m_channels];
78 m_iplan = fftwf_plan_dft_c2r_1d(m_wlen, m_freq, m_time, FFTW_ESTIMATE); 79 m_iplan = new fftwf_plan[m_channels];
79 80
80 m_inbuf = new RingBuffer<float> *[m_channels]; 81 m_inbuf = new RingBuffer<float> *[m_channels];
81 m_outbuf = new RingBuffer<float> *[m_channels]; 82 m_outbuf = new RingBuffer<float> *[m_channels];
82 m_mashbuf = new float *[m_channels]; 83 m_mashbuf = new float *[m_channels];
83 84
84 m_modulationbuf = (float *)fftwf_malloc(sizeof(float) * m_wlen); 85 m_modulationbuf = (float *)fftwf_malloc(sizeof(float) * m_wlen);
85 86
86 for (size_t c = 0; c < m_channels; ++c) { 87 for (size_t c = 0; c < m_channels; ++c) {
87 88
88 m_prevPhase[c] = (float *)fftwf_malloc(sizeof(float) * m_wlen); 89 m_prevPhase[c] = (float *)fftwf_malloc(sizeof(float) * (m_wlen / 2 + 1));
89 m_prevAdjustedPhase[c] = (float *)fftwf_malloc(sizeof(float) * m_wlen); 90 m_prevAdjustedPhase[c] = (float *)fftwf_malloc(sizeof(float) * (m_wlen / 2 + 1));
90 91
91 if (m_sharpen) { 92 m_time[c] = (float *)fftwf_malloc(sizeof(float) * m_wlen);
92 m_prevMag[c] = (float *)fftwf_malloc(sizeof(float) * m_wlen); 93 m_freq[c] = (fftwf_complex *)fftwf_malloc(sizeof(fftwf_complex) *
93 } 94 (m_wlen / 2 + 1));
95
96 m_plan[c] = fftwf_plan_dft_r2c_1d(m_wlen, m_time[c], m_freq[c], FFTW_ESTIMATE);
97 m_iplan[c] = fftwf_plan_dft_c2r_1d(m_wlen, m_freq[c], m_time[c], FFTW_ESTIMATE);
94 98
95 m_inbuf[c] = new RingBuffer<float>(m_wlen); 99 m_inbuf[c] = new RingBuffer<float>(m_wlen);
96 m_outbuf[c] = new RingBuffer<float> 100 m_outbuf[c] = new RingBuffer<float>
97 (lrintf((maxProcessInputBlockSize + m_wlen) * ratio)); 101 (lrintf((maxProcessInputBlockSize + m_wlen) * ratio));
98 102
99 m_mashbuf[c] = (float *)fftwf_malloc(sizeof(float) * m_wlen); 103 m_mashbuf[c] = (float *)fftwf_malloc(sizeof(float) * m_wlen);
100 104
101 for (int i = 0; i < m_wlen; ++i) { 105 for (int i = 0; i < m_wlen; ++i) {
102 m_mashbuf[c][i] = 0.0; 106 m_mashbuf[c][i] = 0.0;
107 }
108
109 for (int i = 0; i <= m_wlen/2; ++i) {
103 m_prevPhase[c][i] = 0.0; 110 m_prevPhase[c][i] = 0.0;
104 m_prevAdjustedPhase[c][i] = 0.0; 111 m_prevAdjustedPhase[c][i] = 0.0;
105 if (m_sharpen) m_prevMag[c][i] = 0.0; 112 }
106 }
107
108 m_prevPercussiveCount[c] = 0;
109 } 113 }
110 114
111 for (int i = 0; i < m_wlen; ++i) { 115 for (int i = 0; i < m_wlen; ++i) {
112 m_modulationbuf[i] = 0.0; 116 m_modulationbuf[i] = 0.0;
117 }
118
119 for (int i = 0; i <= m_wlen/2; ++i) {
120 m_prevTransientMag[i] = 0.0;
113 } 121 }
114 122
115 std::cerr << "PhaseVocoderTimeStretcher: channels = " << channels 123 std::cerr << "PhaseVocoderTimeStretcher: channels = " << channels
116 << ", ratio = " << ratio 124 << ", ratio = " << ratio
117 << ", n1 = " << m_n1 << ", n2 = " << m_n2 << ", wlen = " 125 << ", n1 = " << m_n1 << ", n2 = " << m_n2 << ", wlen = "
121 129
122 PhaseVocoderTimeStretcher::~PhaseVocoderTimeStretcher() 130 PhaseVocoderTimeStretcher::~PhaseVocoderTimeStretcher()
123 { 131 {
124 std::cerr << "PhaseVocoderTimeStretcher::~PhaseVocoderTimeStretcher" << std::endl; 132 std::cerr << "PhaseVocoderTimeStretcher::~PhaseVocoderTimeStretcher" << std::endl;
125 133
126 fftwf_destroy_plan(m_plan);
127 fftwf_destroy_plan(m_iplan);
128
129 fftwf_free(m_time);
130 fftwf_free(m_freq);
131 fftwf_free(m_dbuf);
132
133 for (size_t c = 0; c < m_channels; ++c) { 134 for (size_t c = 0; c < m_channels; ++c) {
135
136 fftwf_destroy_plan(m_plan[c]);
137 fftwf_destroy_plan(m_iplan[c]);
138
139 fftwf_free(m_time[c]);
140 fftwf_free(m_freq[c]);
134 141
135 fftwf_free(m_mashbuf[c]); 142 fftwf_free(m_mashbuf[c]);
136 fftwf_free(m_prevPhase[c]); 143 fftwf_free(m_prevPhase[c]);
137 fftwf_free(m_prevAdjustedPhase[c]); 144 fftwf_free(m_prevAdjustedPhase[c]);
138 if (m_sharpen) fftwf_free(m_prevMag[c]);
139 145
140 delete m_inbuf[c]; 146 delete m_inbuf[c];
141 delete m_outbuf[c]; 147 delete m_outbuf[c];
142 } 148 }
143 149
150 fftwf_free(m_tempbuf);
144 fftwf_free(m_modulationbuf); 151 fftwf_free(m_modulationbuf);
152 fftwf_free(m_prevTransientMag);
145 153
146 delete[] m_prevPhase; 154 delete[] m_prevPhase;
147 delete[] m_prevAdjustedPhase; 155 delete[] m_prevAdjustedPhase;
148 if (m_sharpen) delete[] m_prevMag;
149 delete[] m_prevPercussiveCount;
150 delete[] m_inbuf; 156 delete[] m_inbuf;
151 delete[] m_outbuf; 157 delete[] m_outbuf;
152 delete[] m_mashbuf; 158 delete[] m_mashbuf;
153 159 delete[] m_time;
154 delete m_window; 160 delete[] m_freq;
161 delete[] m_plan;
162 delete[] m_iplan;
163
164 delete m_analysisWindow;
165 delete m_synthesisWindow;
155 } 166 }
156 167
157 size_t 168 size_t
158 PhaseVocoderTimeStretcher::getProcessingLatency() const 169 PhaseVocoderTimeStretcher::getProcessingLatency() const
159 { 170 {
219 // We know we have at least m_wlen samples available 230 // We know we have at least m_wlen samples available
220 // in m_inbuf. We need to peek m_wlen of them for 231 // in m_inbuf. We need to peek m_wlen of them for
221 // processing, and then read m_n1 to advance the read 232 // processing, and then read m_n1 to advance the read
222 // pointer. 233 // pointer.
223 234
235 for (size_t c = 0; c < m_channels; ++c) {
236
237 size_t got = m_inbuf[c]->peek(m_tempbuf, m_wlen);
238 assert(got == m_wlen);
239
240 analyseBlock(c, m_tempbuf);
241 }
242
243 bool transient = false;
244 if (m_sharpen) transient = isTransient();
245
224 size_t n2 = m_n2; 246 size_t n2 = m_n2;
225 bool isPercussive = false; 247
248 if (transient) {
249 n2 = m_n1;
250 }
226 251
227 for (size_t c = 0; c < m_channels; ++c) { 252 for (size_t c = 0; c < m_channels; ++c) {
228 253
229 size_t got = m_inbuf[c]->peek(m_dbuf, m_wlen); 254 synthesiseBlock(c, m_mashbuf[c],
230 assert(got == m_wlen); 255 c == 0 ? m_modulationbuf : 0,
231 256 m_prevTransient ? m_n1 : m_n2);
232 bool thisChannelPercussive = 257
233 processBlock(c, m_dbuf, m_mashbuf[c],
234 c == 0 ? m_modulationbuf : 0,
235 m_prevPercussive ? m_n1 : m_n2);
236
237 if (thisChannelPercussive && c == 0) {
238 isPercussive = true;
239 }
240
241 if (isPercussive) {
242 n2 = m_n1;
243 }
244 258
245 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER 259 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
246 std::cerr << "writing first " << m_n2 << " from mashbuf, skipping " << m_n1 << " on inbuf " << std::endl; 260 std::cerr << "writing first " << m_n2 << " from mashbuf, skipping " << m_n1 << " on inbuf " << std::endl;
247 #endif 261 #endif
248 m_inbuf[c]->skip(m_n1); 262 m_inbuf[c]->skip(m_n1);
262 for (size_t i = m_wlen - n2; i < m_wlen; ++i) { 276 for (size_t i = m_wlen - n2; i < m_wlen; ++i) {
263 m_mashbuf[c][i] = 0.0f; 277 m_mashbuf[c][i] = 0.0f;
264 } 278 }
265 } 279 }
266 280
267 m_prevPercussive = isPercussive; 281 m_prevTransient = transient;
268 282
269 for (size_t i = 0; i < m_wlen - n2; ++i) { 283 for (size_t i = 0; i < m_wlen - n2; ++i) {
270 m_modulationbuf[i] = m_modulationbuf[i + n2]; 284 m_modulationbuf[i] = m_modulationbuf[i + n2];
271 } 285 }
272 286
316 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER 330 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
317 std::cerr << "PhaseVocoderTimeStretcher::getOutput returning" << std::endl; 331 std::cerr << "PhaseVocoderTimeStretcher::getOutput returning" << std::endl;
318 #endif 332 #endif
319 } 333 }
320 334
321 bool 335 void
322 PhaseVocoderTimeStretcher::processBlock(size_t c, 336 PhaseVocoderTimeStretcher::analyseBlock(size_t c, float *buf)
323 float *buf, float *out,
324 float *modulation,
325 size_t lastStep)
326 { 337 {
327 size_t i; 338 size_t i;
328 bool isPercussive = false; 339
329 340 // buf contains m_wlen samples
330 // buf contains m_wlen samples; out contains enough space for 341
331 // m_wlen * ratio samples (we mix into out, rather than replacing) 342 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
332 343 std::cerr << "PhaseVocoderTimeStretcher::analyseBlock (channel " << c << ")" << std::endl;
333 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER 344 #endif
334 std::cerr << "PhaseVocoderTimeStretcher::processBlock (channel " << c << ")" << std::endl; 345
335 #endif 346 m_analysisWindow->cut(buf);
336
337 m_window->cut(buf);
338 347
339 for (i = 0; i < m_wlen/2; ++i) { 348 for (i = 0; i < m_wlen/2; ++i) {
340 float temp = buf[i]; 349 float temp = buf[i];
341 buf[i] = buf[i + m_wlen/2]; 350 buf[i] = buf[i + m_wlen/2];
342 buf[i + m_wlen/2] = temp; 351 buf[i + m_wlen/2] = temp;
343 } 352 }
344 353
345 for (i = 0; i < m_wlen; ++i) { 354 for (i = 0; i < m_wlen; ++i) {
346 m_time[i] = buf[i]; 355 m_time[c][i] = buf[i];
347 } 356 }
348 357
349 fftwf_execute(m_plan); // m_time -> m_freq 358 fftwf_execute(m_plan[c]); // m_time -> m_freq
350 359 }
351 if (m_sharpen && c == 0) { //!!! 360
352 361 bool
353 int count = 0; 362 PhaseVocoderTimeStretcher::isTransient()
354 363 {
355 for (i = 0; i < m_wlen; ++i) { 364 int count = 0;
365
366 for (int i = 0; i <= m_wlen/2; ++i) {
367
368 float real = 0.f, imag = 0.f;
369
370 for (size_t c = 0; c < m_channels; ++c) {
371 real += m_freq[c][i][0];
372 imag += m_freq[c][i][1];
373 }
374
375 float sqrmag = (real * real + imag * imag);
376
377 if (m_prevTransientMag[i] > 0.f) {
378 float diff = 10.f * log10f(sqrmag / m_prevTransientMag[i]);
379 if (diff > 3.f) ++count;
380 }
381
382 m_prevTransientMag[i] = sqrmag;
383 }
384
385 bool isTransient = false;
386
387 if (count > m_wlen / 4.5 && //!!!
388 count > m_prevTransientCount * 1.2) {
389 isTransient = true;
390 std::cerr << "isTransient (count = " << count << ", prev = " << m_prevTransientCount << ")" << std::endl;
391 }
392
393 m_prevTransientCount = count;
394
395 return isTransient;
396 }
397
398 void
399 PhaseVocoderTimeStretcher::synthesiseBlock(size_t c,
400 float *out,
401 float *modulation,
402 size_t lastStep)
403 {
404 int i;
405
406 bool unchanged = (lastStep == m_n1);
407
408 for (i = 0; i <= m_wlen/2; ++i) {
409
410 float phase = princargf(atan2f(m_freq[c][i][1], m_freq[c][i][0]));
411 float adjustedPhase = phase;
412
413 if (!unchanged) {
414
415 float mag = sqrtf(m_freq[c][i][0] * m_freq[c][i][0] +
416 m_freq[c][i][1] * m_freq[c][i][1]);
417
418 float omega = (2 * M_PI * m_n1 * i) / m_wlen;
419
420 float expectedPhase = m_prevPhase[c][i] + omega;
421
422 float phaseError = princargf(phase - expectedPhase);
423
424 float phaseIncrement = (omega + phaseError) / m_n1;
356 425
357 float mag = sqrtf(m_freq[i][0] * m_freq[i][0] + 426 adjustedPhase = m_prevAdjustedPhase[c][i] +
358 m_freq[i][1] * m_freq[i][1]); 427 lastStep * phaseIncrement;
359
360 if (m_prevMag[c][i] > 0) {
361 float magdiff = 20.f * log10f(mag / m_prevMag[c][i]);
362 if (magdiff > 3.f) ++count;
363 }
364 428
365 m_prevMag[c][i] = mag; 429 float real = mag * cosf(adjustedPhase);
366 } 430 float imag = mag * sinf(adjustedPhase);
367 431 m_freq[c][i][0] = real;
368 if (count > m_wlen / 4 && //!!! 432 m_freq[c][i][1] = imag;
369 count > m_prevPercussiveCount[c] * 1.2) { 433 }
370 isPercussive = true;
371 std::cerr << "isPercussive (count = " << count << ", prev = " << m_prevPercussiveCount[c] << ")" << std::endl;
372 }
373
374 m_prevPercussiveCount[c] = count;
375 }
376
377 for (i = 0; i < m_wlen; ++i) { //!!! /2
378
379 float mag;
380
381 if (m_sharpen && c == 0) {
382 mag = m_prevMag[c][i]; // can reuse this
383 } else {
384 mag = sqrtf(m_freq[i][0] * m_freq[i][0] +
385 m_freq[i][1] * m_freq[i][1]);
386 }
387
388 float phase = princargf(atan2f(m_freq[i][1], m_freq[i][0]));
389
390 float omega = (2 * M_PI * m_n1 * i) / m_wlen;
391
392 float expectedPhase = m_prevPhase[c][i] + omega;
393
394 float phaseError = princargf(phase - expectedPhase);
395
396 float adjustedPhase = phase;
397
398 if (!isPercussive) {
399 // if (fabsf(phaseError) < (1.1f * (lastStep * M_PI) / m_wlen)) {
400
401 float phaseIncrement = (omega + phaseError) / m_n1;
402
403 adjustedPhase = m_prevAdjustedPhase[c][i] +
404 lastStep * phaseIncrement;
405 // }
406 }
407
408 // if (isPercussive) adjustedPhase = phase;
409
410 float real = mag * cosf(adjustedPhase);
411 float imag = mag * sinf(adjustedPhase);
412 m_freq[i][0] = real;
413 m_freq[i][1] = imag;
414 434
415 m_prevPhase[c][i] = phase; 435 m_prevPhase[c][i] = phase;
416 m_prevAdjustedPhase[c][i] = adjustedPhase; 436 m_prevAdjustedPhase[c][i] = adjustedPhase;
417 } 437 }
438
439 fftwf_execute(m_iplan[c]); // m_freq -> m_time, inverse fft
440
441 for (i = 0; i < m_wlen/2; ++i) {
442 float temp = m_time[c][i];
443 m_time[c][i] = m_time[c][i + m_wlen/2];
444 m_time[c][i + m_wlen/2] = temp;
445 }
418 446
419 fftwf_execute(m_iplan); // m_freq -> m_time, inverse fft
420
421 for (i = 0; i < m_wlen/2; ++i) {
422 float temp = m_time[i];
423 m_time[i] = m_time[i + m_wlen/2];
424 m_time[i + m_wlen/2] = temp;
425 }
426
427 for (i = 0; i < m_wlen; ++i) { 447 for (i = 0; i < m_wlen; ++i) {
428 m_time[i] = m_time[i] / m_wlen; 448 m_time[c][i] = m_time[c][i] / m_wlen;
429 } 449 }
430 450
431 m_window->cut(m_time); 451 m_synthesisWindow->cut(m_time[c]);
432 452
433 for (i = 0; i < m_wlen; ++i) { 453 for (i = 0; i < m_wlen; ++i) {
434 out[i] += m_time[i]; 454 out[i] += m_time[c][i];
435 } 455 }
436 456
437 if (modulation) { 457 if (modulation) {
438 458
439 float area = m_window->getArea(); 459 float area = m_analysisWindow->getArea();
440 460
441 for (i = 0; i < m_wlen; ++i) { 461 for (i = 0; i < m_wlen; ++i) {
442 float val = m_window->getValue(i); 462 float val = m_synthesisWindow->getValue(i);
443 modulation[i] += val * area; 463 modulation[i] += val * area;
444 } 464 }
445 } 465 }
446 466 }
447 return isPercussive; 467
448 } 468
449