comparison audioio/PhaseVocoderTimeStretcher.cpp @ 43:3c5756fb6a68

* Move some things around to facilitate plundering libraries for other applications without needing to duplicate so much code. sv/osc -> data/osc sv/audioio -> audioio sv/transform -> plugin/transform sv/document -> document (will rename to framework in next commit)
author Chris Cannam
date Wed, 24 Oct 2007 16:34:31 +0000
parents
children 0ffab5d7e3e1
comparison
equal deleted inserted replaced
42:0619006a1ee3 43:3c5756fb6a68
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2
3 /*
4 Sonic Visualiser
5 An audio file viewer and annotation editor.
6 Centre for Digital Music, Queen Mary, University of London.
7 This file copyright 2006 Chris Cannam and QMUL.
8
9 This program is free software; you can redistribute it and/or
10 modify it under the terms of the GNU General Public License as
11 published by the Free Software Foundation; either version 2 of the
12 License, or (at your option) any later version. See the file
13 COPYING included with this distribution for more information.
14 */
15
16 #include "PhaseVocoderTimeStretcher.h"
17
18 #include <iostream>
19 #include <cassert>
20
21 #include <QMutexLocker>
22
23 //#define DEBUG_PHASE_VOCODER_TIME_STRETCHER 1
24
25 PhaseVocoderTimeStretcher::PhaseVocoderTimeStretcher(size_t sampleRate,
26 size_t channels,
27 float ratio,
28 bool sharpen,
29 size_t maxOutputBlockSize) :
30 m_sampleRate(sampleRate),
31 m_channels(channels),
32 m_maxOutputBlockSize(maxOutputBlockSize),
33 m_ratio(ratio),
34 m_sharpen(sharpen),
35 m_totalCount(0),
36 m_transientCount(0),
37 m_n2sum(0),
38 m_mutex(new QMutex())
39 {
40 initialise();
41 }
42
43 PhaseVocoderTimeStretcher::~PhaseVocoderTimeStretcher()
44 {
45 std::cerr << "PhaseVocoderTimeStretcher::~PhaseVocoderTimeStretcher" << std::endl;
46
47 cleanup();
48
49 delete m_mutex;
50 }
51
52 void
53 PhaseVocoderTimeStretcher::initialise()
54 {
55 std::cerr << "PhaseVocoderTimeStretcher::initialise" << std::endl;
56
57 calculateParameters();
58
59 m_analysisWindow = new Window<float>(HanningWindow, m_wlen);
60 m_synthesisWindow = new Window<float>(HanningWindow, m_wlen);
61
62 m_prevPhase = new float *[m_channels];
63 m_prevAdjustedPhase = new float *[m_channels];
64
65 m_prevTransientMag = (float *)fftf_malloc(sizeof(float) * (m_wlen / 2 + 1));
66 m_prevTransientScore = 0;
67 m_prevTransient = false;
68
69 m_tempbuf = (float *)fftf_malloc(sizeof(float) * m_wlen);
70
71 m_time = new float *[m_channels];
72 m_freq = new fftf_complex *[m_channels];
73 m_plan = new fftf_plan[m_channels];
74 m_iplan = new fftf_plan[m_channels];
75
76 m_inbuf = new RingBuffer<float> *[m_channels];
77 m_outbuf = new RingBuffer<float> *[m_channels];
78 m_mashbuf = new float *[m_channels];
79
80 m_modulationbuf = (float *)fftf_malloc(sizeof(float) * m_wlen);
81
82 for (size_t c = 0; c < m_channels; ++c) {
83
84 m_prevPhase[c] = (float *)fftf_malloc(sizeof(float) * (m_wlen / 2 + 1));
85 m_prevAdjustedPhase[c] = (float *)fftf_malloc(sizeof(float) * (m_wlen / 2 + 1));
86
87 m_time[c] = (float *)fftf_malloc(sizeof(float) * m_wlen);
88 m_freq[c] = (fftf_complex *)fftf_malloc(sizeof(fftf_complex) *
89 (m_wlen / 2 + 1));
90
91 m_plan[c] = fftf_plan_dft_r2c_1d(m_wlen, m_time[c], m_freq[c], FFTW_MEASURE);
92 m_iplan[c] = fftf_plan_dft_c2r_1d(m_wlen, m_freq[c], m_time[c], FFTW_MEASURE);
93
94 m_outbuf[c] = new RingBuffer<float>
95 ((m_maxOutputBlockSize + m_wlen) * 2);
96 m_inbuf[c] = new RingBuffer<float>
97 (lrintf(m_outbuf[c]->getSize() / m_ratio) + m_wlen);
98
99 std::cerr << "making inbuf size " << m_inbuf[c]->getSize() << " (outbuf size is " << m_outbuf[c]->getSize() << ", ratio " << m_ratio << ")" << std::endl;
100
101
102 m_mashbuf[c] = (float *)fftf_malloc(sizeof(float) * m_wlen);
103
104 for (size_t i = 0; i < m_wlen; ++i) {
105 m_mashbuf[c][i] = 0.0;
106 }
107
108 for (size_t i = 0; i <= m_wlen/2; ++i) {
109 m_prevPhase[c][i] = 0.0;
110 m_prevAdjustedPhase[c][i] = 0.0;
111 }
112 }
113
114 for (size_t i = 0; i < m_wlen; ++i) {
115 m_modulationbuf[i] = 0.0;
116 }
117
118 for (size_t i = 0; i <= m_wlen/2; ++i) {
119 m_prevTransientMag[i] = 0.0;
120 }
121 }
122
123 void
124 PhaseVocoderTimeStretcher::calculateParameters()
125 {
126 std::cerr << "PhaseVocoderTimeStretcher::calculateParameters" << std::endl;
127
128 m_wlen = 1024;
129
130 //!!! In transient sharpening mode, we need to pick the window
131 //length so as to be more or less fixed in audio duration (i.e. we
132 //need to exploit the sample rate)
133
134 //!!! have to work out the relationship between wlen and transient
135 //threshold
136
137 if (m_ratio < 1) {
138 if (m_ratio < 0.4) {
139 m_n1 = 1024;
140 m_wlen = 2048;
141 } else if (m_ratio < 0.8) {
142 m_n1 = 512;
143 } else {
144 m_n1 = 256;
145 }
146 if (shouldSharpen()) {
147 m_wlen = 2048;
148 }
149 m_n2 = lrintf(m_n1 * m_ratio);
150 } else {
151 if (m_ratio > 2) {
152 m_n2 = 512;
153 m_wlen = 4096;
154 } else if (m_ratio > 1.6) {
155 m_n2 = 384;
156 m_wlen = 2048;
157 } else {
158 m_n2 = 256;
159 }
160 if (shouldSharpen()) {
161 if (m_wlen < 2048) m_wlen = 2048;
162 }
163 m_n1 = lrintf(m_n2 / m_ratio);
164 if (m_n1 == 0) {
165 m_n1 = 1;
166 m_n2 = lrintf(m_ratio);
167 }
168 }
169
170 m_transientThreshold = lrintf(m_wlen / 4.5);
171
172 m_totalCount = 0;
173 m_transientCount = 0;
174 m_n2sum = 0;
175
176
177 std::cerr << "PhaseVocoderTimeStretcher: channels = " << m_channels
178 << ", ratio = " << m_ratio
179 << ", n1 = " << m_n1 << ", n2 = " << m_n2 << ", wlen = "
180 << m_wlen << ", max = " << m_maxOutputBlockSize << std::endl;
181 // << ", outbuflen = " << m_outbuf[0]->getSize() << std::endl;
182 }
183
184 void
185 PhaseVocoderTimeStretcher::cleanup()
186 {
187 std::cerr << "PhaseVocoderTimeStretcher::cleanup" << std::endl;
188
189 for (size_t c = 0; c < m_channels; ++c) {
190
191 fftf_destroy_plan(m_plan[c]);
192 fftf_destroy_plan(m_iplan[c]);
193
194 fftf_free(m_time[c]);
195 fftf_free(m_freq[c]);
196
197 fftf_free(m_mashbuf[c]);
198 fftf_free(m_prevPhase[c]);
199 fftf_free(m_prevAdjustedPhase[c]);
200
201 delete m_inbuf[c];
202 delete m_outbuf[c];
203 }
204
205 fftf_free(m_tempbuf);
206 fftf_free(m_modulationbuf);
207 fftf_free(m_prevTransientMag);
208
209 delete[] m_prevPhase;
210 delete[] m_prevAdjustedPhase;
211 delete[] m_inbuf;
212 delete[] m_outbuf;
213 delete[] m_mashbuf;
214 delete[] m_time;
215 delete[] m_freq;
216 delete[] m_plan;
217 delete[] m_iplan;
218
219 delete m_analysisWindow;
220 delete m_synthesisWindow;
221 }
222
223 void
224 PhaseVocoderTimeStretcher::setRatio(float ratio)
225 {
226 QMutexLocker locker(m_mutex);
227
228 size_t formerWlen = m_wlen;
229 m_ratio = ratio;
230
231 std::cerr << "PhaseVocoderTimeStretcher::setRatio: new ratio " << ratio
232 << std::endl;
233
234 calculateParameters();
235
236 if (m_wlen == formerWlen) {
237
238 // This is the only container whose size depends on m_ratio
239
240 RingBuffer<float> **newin = new RingBuffer<float> *[m_channels];
241
242 size_t formerSize = m_inbuf[0]->getSize();
243 size_t newSize = lrintf(m_outbuf[0]->getSize() / m_ratio) + m_wlen;
244
245 std::cerr << "resizing inbuf from " << formerSize << " to "
246 << newSize << " (outbuf size is " << m_outbuf[0]->getSize() << ", ratio " << m_ratio << ")" << std::endl;
247
248 if (formerSize != newSize) {
249
250 size_t ready = m_inbuf[0]->getReadSpace();
251
252 for (size_t c = 0; c < m_channels; ++c) {
253 newin[c] = new RingBuffer<float>(newSize);
254 }
255
256 if (ready > 0) {
257
258 size_t copy = std::min(ready, newSize);
259 float *tmp = new float[ready];
260
261 for (size_t c = 0; c < m_channels; ++c) {
262 m_inbuf[c]->read(tmp, ready);
263 newin[c]->write(tmp + ready - copy, copy);
264 }
265
266 delete[] tmp;
267 }
268
269 for (size_t c = 0; c < m_channels; ++c) {
270 delete m_inbuf[c];
271 }
272
273 delete[] m_inbuf;
274 m_inbuf = newin;
275 }
276
277 } else {
278
279 std::cerr << "wlen changed" << std::endl;
280 cleanup();
281 initialise();
282 }
283 }
284
285 size_t
286 PhaseVocoderTimeStretcher::getProcessingLatency() const
287 {
288 return getWindowSize() - getInputIncrement();
289 }
290
291 size_t
292 PhaseVocoderTimeStretcher::getRequiredInputSamples() const
293 {
294 QMutexLocker locker(m_mutex);
295
296 if (m_inbuf[0]->getReadSpace() >= m_wlen) return 0;
297 return m_wlen - m_inbuf[0]->getReadSpace();
298 }
299
300 void
301 PhaseVocoderTimeStretcher::putInput(float **input, size_t samples)
302 {
303 QMutexLocker locker(m_mutex);
304
305 // We need to add samples from input to our internal buffer. When
306 // we have m_windowSize samples in the buffer, we can process it,
307 // move the samples back by m_n1 and write the output onto our
308 // internal output buffer. If we have (samples * ratio) samples
309 // in that, we can write m_n2 of them back to output and return
310 // (otherwise we have to write zeroes).
311
312 // When we process, we write m_wlen to our fixed output buffer
313 // (m_mashbuf). We then pull out the first m_n2 samples from that
314 // buffer, push them into the output ring buffer, and shift
315 // m_mashbuf left by that amount.
316
317 // The processing latency is then m_wlen - m_n2.
318
319 size_t consumed = 0;
320
321 while (consumed < samples) {
322
323 size_t writable = m_inbuf[0]->getWriteSpace();
324 writable = std::min(writable, samples - consumed);
325
326 if (writable == 0) {
327 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
328 std::cerr << "WARNING: PhaseVocoderTimeStretcher::putInput: writable == 0 (inbuf has " << m_inbuf[0]->getReadSpace() << " samples available for reading, space for " << m_inbuf[0]->getWriteSpace() << " more)" << std::endl;
329 #endif
330 if (m_inbuf[0]->getReadSpace() < m_wlen ||
331 m_outbuf[0]->getWriteSpace() < m_n2) {
332 std::cerr << "WARNING: PhaseVocoderTimeStretcher::putInput: Inbuf has " << m_inbuf[0]->getReadSpace() << ", outbuf has space for " << m_outbuf[0]->getWriteSpace() << " (n2 = " << m_n2 << ", wlen = " << m_wlen << "), won't be able to process" << std::endl;
333 break;
334 }
335 } else {
336
337 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
338 std::cerr << "writing " << writable << " from index " << consumed << " to inbuf, consumed will be " << consumed + writable << std::endl;
339 #endif
340
341 for (size_t c = 0; c < m_channels; ++c) {
342 m_inbuf[c]->write(input[c] + consumed, writable);
343 }
344 consumed += writable;
345 }
346
347 while (m_inbuf[0]->getReadSpace() >= m_wlen &&
348 m_outbuf[0]->getWriteSpace() >= m_n2) {
349
350 // We know we have at least m_wlen samples available
351 // in m_inbuf. We need to peek m_wlen of them for
352 // processing, and then read m_n1 to advance the read
353 // pointer.
354
355 for (size_t c = 0; c < m_channels; ++c) {
356
357 size_t got = m_inbuf[c]->peek(m_tempbuf, m_wlen);
358 assert(got == m_wlen);
359
360 analyseBlock(c, m_tempbuf);
361 }
362
363 bool transient = false;
364 if (shouldSharpen()) transient = isTransient();
365
366 size_t n2 = m_n2;
367
368 if (transient) {
369 n2 = m_n1;
370 }
371
372 ++m_totalCount;
373 if (transient) ++m_transientCount;
374 m_n2sum += n2;
375
376 // std::cerr << "ratio for last 10: " <<last10num << "/" << (10 * m_n1) << " = " << float(last10num) / float(10 * m_n1) << " (should be " << m_ratio << ")" << std::endl;
377
378 if (m_totalCount > 50 && m_transientCount < m_totalCount) {
379
380 int fixed = lrintf(m_transientCount * m_n1);
381
382 int idealTotal = lrintf(m_totalCount * m_n1 * m_ratio);
383 int idealSquashy = idealTotal - fixed;
384
385 int squashyCount = m_totalCount - m_transientCount;
386
387 n2 = lrintf(idealSquashy / squashyCount);
388
389 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
390 if (n2 != m_n2) {
391 std::cerr << m_n2 << " -> " << n2 << std::endl;
392 }
393 #endif
394 }
395
396 for (size_t c = 0; c < m_channels; ++c) {
397
398 synthesiseBlock(c, m_mashbuf[c],
399 c == 0 ? m_modulationbuf : 0,
400 m_prevTransient ? m_n1 : m_n2);
401
402
403 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
404 std::cerr << "writing first " << m_n2 << " from mashbuf, skipping " << m_n1 << " on inbuf " << std::endl;
405 #endif
406 m_inbuf[c]->skip(m_n1);
407
408 for (size_t i = 0; i < n2; ++i) {
409 if (m_modulationbuf[i] > 0.f) {
410 m_mashbuf[c][i] /= m_modulationbuf[i];
411 }
412 }
413
414 m_outbuf[c]->write(m_mashbuf[c], n2);
415
416 for (size_t i = 0; i < m_wlen - n2; ++i) {
417 m_mashbuf[c][i] = m_mashbuf[c][i + n2];
418 }
419
420 for (size_t i = m_wlen - n2; i < m_wlen; ++i) {
421 m_mashbuf[c][i] = 0.0f;
422 }
423 }
424
425 m_prevTransient = transient;
426
427 for (size_t i = 0; i < m_wlen - n2; ++i) {
428 m_modulationbuf[i] = m_modulationbuf[i + n2];
429 }
430
431 for (size_t i = m_wlen - n2; i < m_wlen; ++i) {
432 m_modulationbuf[i] = 0.0f;
433 }
434
435 if (!transient) m_n2 = n2;
436 }
437
438
439 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
440 std::cerr << "loop ended: inbuf read space " << m_inbuf[0]->getReadSpace() << ", outbuf write space " << m_outbuf[0]->getWriteSpace() << std::endl;
441 #endif
442 }
443
444 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
445 std::cerr << "PhaseVocoderTimeStretcher::putInput returning" << std::endl;
446 #endif
447
448 // std::cerr << "ratio: nominal: " << getRatio() << " actual: "
449 // << m_total2 << "/" << m_total1 << " = " << float(m_total2) / float(m_total1) << " ideal: " << m_ratio << std::endl;
450 }
451
452 size_t
453 PhaseVocoderTimeStretcher::getAvailableOutputSamples() const
454 {
455 QMutexLocker locker(m_mutex);
456
457 return m_outbuf[0]->getReadSpace();
458 }
459
460 void
461 PhaseVocoderTimeStretcher::getOutput(float **output, size_t samples)
462 {
463 QMutexLocker locker(m_mutex);
464
465 if (m_outbuf[0]->getReadSpace() < samples) {
466 std::cerr << "WARNING: PhaseVocoderTimeStretcher::getOutput: not enough data (yet?) (" << m_outbuf[0]->getReadSpace() << " < " << samples << ")" << std::endl;
467 size_t fill = samples - m_outbuf[0]->getReadSpace();
468 for (size_t c = 0; c < m_channels; ++c) {
469 for (size_t i = 0; i < fill; ++i) {
470 output[c][i] = 0.0;
471 }
472 m_outbuf[c]->read(output[c] + fill, m_outbuf[c]->getReadSpace());
473 }
474 } else {
475 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
476 std::cerr << "enough data - writing " << samples << " from outbuf" << std::endl;
477 #endif
478 for (size_t c = 0; c < m_channels; ++c) {
479 m_outbuf[c]->read(output[c], samples);
480 }
481 }
482
483 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
484 std::cerr << "PhaseVocoderTimeStretcher::getOutput returning" << std::endl;
485 #endif
486 }
487
488 void
489 PhaseVocoderTimeStretcher::analyseBlock(size_t c, float *buf)
490 {
491 size_t i;
492
493 // buf contains m_wlen samples
494
495 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
496 std::cerr << "PhaseVocoderTimeStretcher::analyseBlock (channel " << c << ")" << std::endl;
497 #endif
498
499 m_analysisWindow->cut(buf);
500
501 for (i = 0; i < m_wlen/2; ++i) {
502 float temp = buf[i];
503 buf[i] = buf[i + m_wlen/2];
504 buf[i + m_wlen/2] = temp;
505 }
506
507 for (i = 0; i < m_wlen; ++i) {
508 m_time[c][i] = buf[i];
509 }
510
511 fftf_execute(m_plan[c]); // m_time -> m_freq
512 }
513
514 bool
515 PhaseVocoderTimeStretcher::isTransient()
516 {
517 int count = 0;
518
519 for (size_t i = 0; i <= m_wlen/2; ++i) {
520
521 float real = 0.f, imag = 0.f;
522
523 for (size_t c = 0; c < m_channels; ++c) {
524 real += m_freq[c][i][0];
525 imag += m_freq[c][i][1];
526 }
527
528 float sqrmag = (real * real + imag * imag);
529
530 if (m_prevTransientMag[i] > 0.f) {
531 float diff = 10.f * log10f(sqrmag / m_prevTransientMag[i]);
532 if (diff > 3.f) ++count;
533 }
534
535 m_prevTransientMag[i] = sqrmag;
536 }
537
538 bool isTransient = false;
539
540 // if (count > m_transientThreshold &&
541 // count > m_prevTransientScore * 1.2) {
542 if (count > m_prevTransientScore &&
543 count > m_transientThreshold &&
544 count - m_prevTransientScore > int(m_wlen) / 20) {
545 isTransient = true;
546
547
548 // std::cerr << "isTransient (count = " << count << ", prev = " << m_prevTransientScore << ", diff = " << count - m_prevTransientScore << ", ratio = " << (m_totalCount > 0 ? (float (m_n2sum) / float(m_totalCount * m_n1)) : 1.f) << ", ideal = " << m_ratio << ")" << std::endl;
549 // } else {
550 // std::cerr << " !transient (count = " << count << ", prev = " << m_prevTransientScore << ", diff = " << count - m_prevTransientScore << ")" << std::endl;
551 }
552
553 m_prevTransientScore = count;
554
555 return isTransient;
556 }
557
558 void
559 PhaseVocoderTimeStretcher::synthesiseBlock(size_t c,
560 float *out,
561 float *modulation,
562 size_t lastStep)
563 {
564 bool unchanged = (lastStep == m_n1);
565
566 for (size_t i = 0; i <= m_wlen/2; ++i) {
567
568 float phase = princargf(atan2f(m_freq[c][i][1], m_freq[c][i][0]));
569 float adjustedPhase = phase;
570
571 if (!unchanged) {
572
573 float omega = (2 * M_PI * m_n1 * i) / m_wlen;
574
575 float expectedPhase = m_prevPhase[c][i] + omega;
576
577 float phaseError = princargf(phase - expectedPhase);
578
579 float phaseIncrement = (omega + phaseError) / m_n1;
580
581 adjustedPhase = m_prevAdjustedPhase[c][i] +
582 lastStep * phaseIncrement;
583
584 float mag = sqrtf(m_freq[c][i][0] * m_freq[c][i][0] +
585 m_freq[c][i][1] * m_freq[c][i][1]);
586
587 float real = mag * cosf(adjustedPhase);
588 float imag = mag * sinf(adjustedPhase);
589 m_freq[c][i][0] = real;
590 m_freq[c][i][1] = imag;
591 }
592
593 m_prevPhase[c][i] = phase;
594 m_prevAdjustedPhase[c][i] = adjustedPhase;
595 }
596
597 fftf_execute(m_iplan[c]); // m_freq -> m_time, inverse fft
598
599 for (size_t i = 0; i < m_wlen/2; ++i) {
600 float temp = m_time[c][i];
601 m_time[c][i] = m_time[c][i + m_wlen/2];
602 m_time[c][i + m_wlen/2] = temp;
603 }
604
605 for (size_t i = 0; i < m_wlen; ++i) {
606 m_time[c][i] = m_time[c][i] / m_wlen;
607 }
608
609 m_synthesisWindow->cut(m_time[c]);
610
611 for (size_t i = 0; i < m_wlen; ++i) {
612 out[i] += m_time[c][i];
613 }
614
615 if (modulation) {
616
617 float area = m_analysisWindow->getArea();
618
619 for (size_t i = 0; i < m_wlen; ++i) {
620 float val = m_synthesisWindow->getValue(i);
621 modulation[i] += val * area;
622 }
623 }
624 }
625
626