annotate audioio/PhaseVocoderTimeStretcher.cpp @ 19:f17798a555df

...
author Chris Cannam
date Thu, 14 Sep 2006 11:20:09 +0000
parents c1aee08c60b1
children e125f0dde7a3
rev   line source
Chris@0 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@0 2
Chris@0 3 /*
Chris@0 4 Sonic Visualiser
Chris@0 5 An audio file viewer and annotation editor.
Chris@0 6 Centre for Digital Music, Queen Mary, University of London.
Chris@0 7 This file copyright 2006 Chris Cannam.
Chris@0 8
Chris@0 9 This program is free software; you can redistribute it and/or
Chris@0 10 modify it under the terms of the GNU General Public License as
Chris@0 11 published by the Free Software Foundation; either version 2 of the
Chris@0 12 License, or (at your option) any later version. See the file
Chris@0 13 COPYING included with this distribution for more information.
Chris@0 14 */
Chris@0 15
Chris@14 16 #include "PhaseVocoderTimeStretcher.h"
Chris@0 17
Chris@0 18 #include <iostream>
Chris@0 19 #include <cassert>
Chris@0 20
Chris@14 21 //#define DEBUG_PHASE_VOCODER_TIME_STRETCHER 1
Chris@0 22
Chris@16 23 PhaseVocoderTimeStretcher::PhaseVocoderTimeStretcher(size_t channels,
Chris@16 24 float ratio,
Chris@16 25 bool sharpen,
Chris@15 26 size_t maxProcessInputBlockSize) :
Chris@16 27 m_channels(channels),
Chris@16 28 m_ratio(ratio),
Chris@16 29 m_sharpen(sharpen)
Chris@0 30 {
Chris@16 31 m_wlen = 1024;
Chris@16 32
Chris@15 33 if (ratio < 1) {
Chris@16 34 if (ratio < 0.4) {
Chris@16 35 m_n1 = 1024;
Chris@16 36 m_wlen = 2048;
Chris@16 37 } else if (ratio < 0.8) {
Chris@16 38 m_n1 = 512;
Chris@16 39 } else {
Chris@16 40 m_n1 = 256;
Chris@16 41 }
Chris@16 42 if (m_sharpen) {
Chris@17 43 // m_n1 /= 2;
Chris@16 44 m_wlen = 2048;
Chris@16 45 }
Chris@15 46 m_n2 = m_n1 * ratio;
Chris@15 47 } else {
Chris@16 48 if (ratio > 2) {
Chris@16 49 m_n2 = 512;
Chris@16 50 m_wlen = 4096;
Chris@16 51 } else if (ratio > 1.6) {
Chris@16 52 m_n2 = 384;
Chris@16 53 m_wlen = 2048;
Chris@16 54 } else {
Chris@16 55 m_n2 = 256;
Chris@16 56 }
Chris@16 57 if (m_sharpen) {
Chris@17 58 // m_n2 /= 2;
Chris@16 59 if (m_wlen < 2048) m_wlen = 2048;
Chris@16 60 }
Chris@15 61 m_n1 = m_n2 / ratio;
Chris@15 62 }
Chris@16 63
Chris@16 64 m_window = new Window<float>(HanningWindow, m_wlen);
Chris@15 65
Chris@16 66 m_prevPhase = new float *[m_channels];
Chris@16 67 m_prevAdjustedPhase = new float *[m_channels];
Chris@16 68 if (m_sharpen) m_prevMag = new float *[m_channels];
Chris@16 69 else m_prevMag = 0;
Chris@16 70 m_prevPercussiveCount = new int[m_channels];
Chris@17 71 m_prevPercussive = false;
Chris@15 72
Chris@16 73 m_dbuf = (float *)fftwf_malloc(sizeof(float) * m_wlen);
Chris@19 74 m_time = (float *)fftwf_malloc(sizeof(float) * m_wlen);
Chris@0 75 m_freq = (fftwf_complex *)fftwf_malloc(sizeof(fftwf_complex) * m_wlen);
Chris@16 76
Chris@19 77 m_plan = fftwf_plan_dft_r2c_1d(m_wlen, m_time, m_freq, FFTW_ESTIMATE);
Chris@19 78 m_iplan = fftwf_plan_dft_c2r_1d(m_wlen, m_freq, m_time, FFTW_ESTIMATE);
Chris@0 79
Chris@16 80 m_inbuf = new RingBuffer<float> *[m_channels];
Chris@16 81 m_outbuf = new RingBuffer<float> *[m_channels];
Chris@16 82 m_mashbuf = new float *[m_channels];
Chris@16 83
Chris@16 84 m_modulationbuf = (float *)fftwf_malloc(sizeof(float) * m_wlen);
Chris@16 85
Chris@16 86 for (size_t c = 0; c < m_channels; ++c) {
Chris@16 87
Chris@16 88 m_prevPhase[c] = (float *)fftwf_malloc(sizeof(float) * m_wlen);
Chris@16 89 m_prevAdjustedPhase[c] = (float *)fftwf_malloc(sizeof(float) * m_wlen);
Chris@16 90
Chris@16 91 if (m_sharpen) {
Chris@16 92 m_prevMag[c] = (float *)fftwf_malloc(sizeof(float) * m_wlen);
Chris@16 93 }
Chris@16 94
Chris@16 95 m_inbuf[c] = new RingBuffer<float>(m_wlen);
Chris@16 96 m_outbuf[c] = new RingBuffer<float>
Chris@16 97 (lrintf((maxProcessInputBlockSize + m_wlen) * ratio));
Chris@16 98
Chris@16 99 m_mashbuf[c] = (float *)fftwf_malloc(sizeof(float) * m_wlen);
Chris@16 100
Chris@16 101 for (int i = 0; i < m_wlen; ++i) {
Chris@16 102 m_mashbuf[c][i] = 0.0;
Chris@16 103 m_prevPhase[c][i] = 0.0;
Chris@16 104 m_prevAdjustedPhase[c][i] = 0.0;
Chris@16 105 if (m_sharpen) m_prevMag[c][i] = 0.0;
Chris@16 106 }
Chris@16 107
Chris@16 108 m_prevPercussiveCount[c] = 0;
Chris@16 109 }
Chris@16 110
Chris@0 111 for (int i = 0; i < m_wlen; ++i) {
Chris@16 112 m_modulationbuf[i] = 0.0;
Chris@0 113 }
Chris@16 114
Chris@16 115 std::cerr << "PhaseVocoderTimeStretcher: channels = " << channels
Chris@16 116 << ", ratio = " << ratio
Chris@16 117 << ", n1 = " << m_n1 << ", n2 = " << m_n2 << ", wlen = "
Chris@16 118 << m_wlen << ", max = " << maxProcessInputBlockSize
Chris@16 119 << ", outbuflen = " << m_outbuf[0]->getSize() << std::endl;
Chris@0 120 }
Chris@0 121
Chris@14 122 PhaseVocoderTimeStretcher::~PhaseVocoderTimeStretcher()
Chris@0 123 {
Chris@14 124 std::cerr << "PhaseVocoderTimeStretcher::~PhaseVocoderTimeStretcher" << std::endl;
Chris@0 125
Chris@0 126 fftwf_destroy_plan(m_plan);
Chris@0 127 fftwf_destroy_plan(m_iplan);
Chris@0 128
Chris@0 129 fftwf_free(m_time);
Chris@0 130 fftwf_free(m_freq);
Chris@0 131 fftwf_free(m_dbuf);
Chris@16 132
Chris@16 133 for (size_t c = 0; c < m_channels; ++c) {
Chris@16 134
Chris@16 135 fftwf_free(m_mashbuf[c]);
Chris@16 136 fftwf_free(m_prevPhase[c]);
Chris@16 137 fftwf_free(m_prevAdjustedPhase[c]);
Chris@16 138 if (m_sharpen) fftwf_free(m_prevMag[c]);
Chris@16 139
Chris@16 140 delete m_inbuf[c];
Chris@16 141 delete m_outbuf[c];
Chris@16 142 }
Chris@16 143
Chris@13 144 fftwf_free(m_modulationbuf);
Chris@0 145
Chris@16 146 delete[] m_prevPhase;
Chris@16 147 delete[] m_prevAdjustedPhase;
Chris@16 148 if (m_sharpen) delete[] m_prevMag;
Chris@16 149 delete[] m_prevPercussiveCount;
Chris@16 150 delete[] m_inbuf;
Chris@16 151 delete[] m_outbuf;
Chris@16 152 delete[] m_mashbuf;
Chris@15 153
Chris@0 154 delete m_window;
Chris@0 155 }
Chris@0 156
Chris@0 157 size_t
Chris@14 158 PhaseVocoderTimeStretcher::getProcessingLatency() const
Chris@0 159 {
Chris@0 160 return getWindowSize() - getInputIncrement();
Chris@0 161 }
Chris@0 162
Chris@0 163 void
Chris@16 164 PhaseVocoderTimeStretcher::process(float **input, float **output, size_t samples)
Chris@16 165 {
Chris@16 166 putInput(input, samples);
Chris@16 167 getOutput(output, lrintf(samples * m_ratio));
Chris@16 168 }
Chris@16 169
Chris@16 170 size_t
Chris@16 171 PhaseVocoderTimeStretcher::getRequiredInputSamples() const
Chris@16 172 {
Chris@16 173 if (m_inbuf[0]->getReadSpace() >= m_wlen) return 0;
Chris@16 174 return m_wlen - m_inbuf[0]->getReadSpace();
Chris@16 175 }
Chris@16 176
Chris@16 177 void
Chris@16 178 PhaseVocoderTimeStretcher::putInput(float **input, size_t samples)
Chris@0 179 {
Chris@0 180 // We need to add samples from input to our internal buffer. When
Chris@0 181 // we have m_windowSize samples in the buffer, we can process it,
Chris@0 182 // move the samples back by m_n1 and write the output onto our
Chris@0 183 // internal output buffer. If we have (samples * ratio) samples
Chris@0 184 // in that, we can write m_n2 of them back to output and return
Chris@0 185 // (otherwise we have to write zeroes).
Chris@0 186
Chris@0 187 // When we process, we write m_wlen to our fixed output buffer
Chris@0 188 // (m_mashbuf). We then pull out the first m_n2 samples from that
Chris@0 189 // buffer, push them into the output ring buffer, and shift
Chris@0 190 // m_mashbuf left by that amount.
Chris@0 191
Chris@0 192 // The processing latency is then m_wlen - m_n2.
Chris@0 193
Chris@0 194 size_t consumed = 0;
Chris@0 195
Chris@0 196 while (consumed < samples) {
Chris@0 197
Chris@16 198 size_t writable = m_inbuf[0]->getWriteSpace();
Chris@0 199 writable = std::min(writable, samples - consumed);
Chris@0 200
Chris@0 201 if (writable == 0) {
Chris@0 202 //!!! then what? I don't think this should happen, but
Chris@16 203 std::cerr << "WARNING: PhaseVocoderTimeStretcher::putInput: writable == 0" << std::endl;
Chris@0 204 break;
Chris@0 205 }
Chris@0 206
Chris@14 207 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
Chris@0 208 std::cerr << "writing " << writable << " from index " << consumed << " to inbuf, consumed will be " << consumed + writable << std::endl;
Chris@0 209 #endif
Chris@16 210
Chris@16 211 for (size_t c = 0; c < m_channels; ++c) {
Chris@16 212 m_inbuf[c]->write(input[c] + consumed, writable);
Chris@16 213 }
Chris@0 214 consumed += writable;
Chris@0 215
Chris@16 216 while (m_inbuf[0]->getReadSpace() >= m_wlen &&
Chris@16 217 m_outbuf[0]->getWriteSpace() >= m_n2) {
Chris@0 218
Chris@0 219 // We know we have at least m_wlen samples available
Chris@16 220 // in m_inbuf. We need to peek m_wlen of them for
Chris@0 221 // processing, and then read m_n1 to advance the read
Chris@0 222 // pointer.
Chris@16 223
Chris@16 224 size_t n2 = m_n2;
Chris@16 225 bool isPercussive = false;
Chris@0 226
Chris@16 227 for (size_t c = 0; c < m_channels; ++c) {
Chris@16 228
Chris@16 229 size_t got = m_inbuf[c]->peek(m_dbuf, m_wlen);
Chris@16 230 assert(got == m_wlen);
Chris@0 231
Chris@16 232 bool thisChannelPercussive =
Chris@16 233 processBlock(c, m_dbuf, m_mashbuf[c],
Chris@16 234 c == 0 ? m_modulationbuf : 0,
Chris@18 235 m_prevPercussive ? m_n1 : m_n2);
Chris@16 236
Chris@16 237 if (thisChannelPercussive && c == 0) {
Chris@16 238 isPercussive = true;
Chris@16 239 }
Chris@16 240
Chris@16 241 if (isPercussive) {
Chris@16 242 n2 = m_n1;
Chris@16 243 }
Chris@0 244
Chris@14 245 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
Chris@16 246 std::cerr << "writing first " << m_n2 << " from mashbuf, skipping " << m_n1 << " on inbuf " << std::endl;
Chris@0 247 #endif
Chris@16 248 m_inbuf[c]->skip(m_n1);
Chris@13 249
Chris@16 250 for (size_t i = 0; i < n2; ++i) {
Chris@16 251 if (m_modulationbuf[i] > 0.f) {
Chris@16 252 m_mashbuf[c][i] /= m_modulationbuf[i];
Chris@16 253 }
Chris@16 254 }
Chris@16 255
Chris@16 256 m_outbuf[c]->write(m_mashbuf[c], n2);
Chris@16 257
Chris@16 258 for (size_t i = 0; i < m_wlen - n2; ++i) {
Chris@16 259 m_mashbuf[c][i] = m_mashbuf[c][i + n2];
Chris@16 260 }
Chris@16 261
Chris@16 262 for (size_t i = m_wlen - n2; i < m_wlen; ++i) {
Chris@16 263 m_mashbuf[c][i] = 0.0f;
Chris@13 264 }
Chris@13 265 }
Chris@13 266
Chris@17 267 m_prevPercussive = isPercussive;
Chris@17 268
Chris@16 269 for (size_t i = 0; i < m_wlen - n2; ++i) {
Chris@16 270 m_modulationbuf[i] = m_modulationbuf[i + n2];
Chris@0 271 }
Chris@13 272
Chris@16 273 for (size_t i = m_wlen - n2; i < m_wlen; ++i) {
Chris@13 274 m_modulationbuf[i] = 0.0f;
Chris@0 275 }
Chris@0 276 }
Chris@0 277
Chris@0 278
Chris@14 279 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
Chris@16 280 std::cerr << "loop ended: inbuf read space " << m_inbuf[0]->getReadSpace() << ", outbuf write space " << m_outbuf[0]->getWriteSpace() << std::endl;
Chris@0 281 #endif
Chris@0 282 }
Chris@0 283
Chris@16 284 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
Chris@16 285 std::cerr << "PhaseVocoderTimeStretcher::putInput returning" << std::endl;
Chris@16 286 #endif
Chris@16 287 }
Chris@12 288
Chris@16 289 size_t
Chris@16 290 PhaseVocoderTimeStretcher::getAvailableOutputSamples() const
Chris@16 291 {
Chris@16 292 return m_outbuf[0]->getReadSpace();
Chris@16 293 }
Chris@16 294
Chris@16 295 void
Chris@16 296 PhaseVocoderTimeStretcher::getOutput(float **output, size_t samples)
Chris@16 297 {
Chris@16 298 if (m_outbuf[0]->getReadSpace() < samples) {
Chris@16 299 std::cerr << "WARNING: PhaseVocoderTimeStretcher::getOutput: not enough data (yet?) (" << m_outbuf[0]->getReadSpace() << " < " << samples << ")" << std::endl;
Chris@16 300 size_t fill = samples - m_outbuf[0]->getReadSpace();
Chris@16 301 for (size_t c = 0; c < m_channels; ++c) {
Chris@16 302 for (size_t i = 0; i < fill; ++i) {
Chris@16 303 output[c][i] = 0.0;
Chris@16 304 }
Chris@16 305 m_outbuf[c]->read(output[c] + fill, m_outbuf[c]->getReadSpace());
Chris@16 306 }
Chris@0 307 } else {
Chris@14 308 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
Chris@16 309 std::cerr << "enough data - writing " << samples << " from outbuf" << std::endl;
Chris@0 310 #endif
Chris@16 311 for (size_t c = 0; c < m_channels; ++c) {
Chris@16 312 m_outbuf[c]->read(output[c], samples);
Chris@16 313 }
Chris@0 314 }
Chris@0 315
Chris@14 316 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
Chris@16 317 std::cerr << "PhaseVocoderTimeStretcher::getOutput returning" << std::endl;
Chris@0 318 #endif
Chris@0 319 }
Chris@0 320
Chris@16 321 bool
Chris@16 322 PhaseVocoderTimeStretcher::processBlock(size_t c,
Chris@16 323 float *buf, float *out,
Chris@16 324 float *modulation,
Chris@18 325 size_t lastStep)
Chris@0 326 {
Chris@0 327 size_t i;
Chris@17 328 bool isPercussive = false;
Chris@0 329
Chris@0 330 // buf contains m_wlen samples; out contains enough space for
Chris@0 331 // m_wlen * ratio samples (we mix into out, rather than replacing)
Chris@0 332
Chris@14 333 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
Chris@16 334 std::cerr << "PhaseVocoderTimeStretcher::processBlock (channel " << c << ")" << std::endl;
Chris@0 335 #endif
Chris@0 336
Chris@0 337 m_window->cut(buf);
Chris@0 338
Chris@0 339 for (i = 0; i < m_wlen/2; ++i) {
Chris@0 340 float temp = buf[i];
Chris@0 341 buf[i] = buf[i + m_wlen/2];
Chris@0 342 buf[i + m_wlen/2] = temp;
Chris@0 343 }
Chris@19 344
Chris@0 345 for (i = 0; i < m_wlen; ++i) {
Chris@19 346 m_time[i] = buf[i];
Chris@0 347 }
Chris@0 348
Chris@0 349 fftwf_execute(m_plan); // m_time -> m_freq
Chris@0 350
Chris@16 351 if (m_sharpen && c == 0) { //!!!
Chris@16 352
Chris@16 353 int count = 0;
Chris@16 354
Chris@16 355 for (i = 0; i < m_wlen; ++i) {
Chris@16 356
Chris@16 357 float mag = sqrtf(m_freq[i][0] * m_freq[i][0] +
Chris@16 358 m_freq[i][1] * m_freq[i][1]);
Chris@16 359
Chris@16 360 if (m_prevMag[c][i] > 0) {
Chris@16 361 float magdiff = 20.f * log10f(mag / m_prevMag[c][i]);
Chris@16 362 if (magdiff > 3.f) ++count;
Chris@16 363 }
Chris@16 364
Chris@16 365 m_prevMag[c][i] = mag;
Chris@16 366 }
Chris@16 367
Chris@17 368 if (count > m_wlen / 4 && //!!!
Chris@16 369 count > m_prevPercussiveCount[c] * 1.2) {
Chris@16 370 isPercussive = true;
Chris@16 371 std::cerr << "isPercussive (count = " << count << ", prev = " << m_prevPercussiveCount[c] << ")" << std::endl;
Chris@16 372 }
Chris@16 373
Chris@16 374 m_prevPercussiveCount[c] = count;
Chris@16 375 }
Chris@16 376
Chris@19 377 for (i = 0; i < m_wlen; ++i) { //!!! /2
Chris@16 378
Chris@16 379 float mag;
Chris@16 380
Chris@16 381 if (m_sharpen && c == 0) {
Chris@16 382 mag = m_prevMag[c][i]; // can reuse this
Chris@16 383 } else {
Chris@16 384 mag = sqrtf(m_freq[i][0] * m_freq[i][0] +
Chris@16 385 m_freq[i][1] * m_freq[i][1]);
Chris@16 386 }
Chris@0 387
Chris@12 388 float phase = princargf(atan2f(m_freq[i][1], m_freq[i][0]));
Chris@12 389
Chris@12 390 float omega = (2 * M_PI * m_n1 * i) / m_wlen;
Chris@0 391
Chris@16 392 float expectedPhase = m_prevPhase[c][i] + omega;
Chris@12 393
Chris@12 394 float phaseError = princargf(phase - expectedPhase);
Chris@12 395
Chris@19 396 float adjustedPhase = phase;
Chris@12 397
Chris@19 398 if (!isPercussive) {
Chris@19 399 // if (fabsf(phaseError) < (1.1f * (lastStep * M_PI) / m_wlen)) {
Chris@16 400
Chris@19 401 float phaseIncrement = (omega + phaseError) / m_n1;
Chris@19 402
Chris@19 403 adjustedPhase = m_prevAdjustedPhase[c][i] +
Chris@19 404 lastStep * phaseIncrement;
Chris@19 405 // }
Chris@19 406 }
Chris@19 407
Chris@19 408 // if (isPercussive) adjustedPhase = phase;
Chris@0 409
Chris@12 410 float real = mag * cosf(adjustedPhase);
Chris@12 411 float imag = mag * sinf(adjustedPhase);
Chris@0 412 m_freq[i][0] = real;
Chris@0 413 m_freq[i][1] = imag;
Chris@12 414
Chris@16 415 m_prevPhase[c][i] = phase;
Chris@16 416 m_prevAdjustedPhase[c][i] = adjustedPhase;
Chris@0 417 }
Chris@0 418
Chris@19 419 fftwf_execute(m_iplan); // m_freq -> m_time, inverse fft
Chris@19 420
Chris@0 421 for (i = 0; i < m_wlen/2; ++i) {
Chris@19 422 float temp = m_time[i];
Chris@19 423 m_time[i] = m_time[i + m_wlen/2];
Chris@19 424 m_time[i + m_wlen/2] = temp;
Chris@0 425 }
Chris@15 426
Chris@0 427 for (i = 0; i < m_wlen; ++i) {
Chris@19 428 m_time[i] = m_time[i] / m_wlen;
Chris@19 429 }
Chris@19 430
Chris@19 431 m_window->cut(m_time);
Chris@19 432
Chris@19 433 for (i = 0; i < m_wlen; ++i) {
Chris@19 434 out[i] += m_time[i];
Chris@0 435 }
Chris@16 436
Chris@16 437 if (modulation) {
Chris@16 438
Chris@16 439 float area = m_window->getArea();
Chris@16 440
Chris@16 441 for (i = 0; i < m_wlen; ++i) {
Chris@16 442 float val = m_window->getValue(i);
Chris@16 443 modulation[i] += val * area;
Chris@16 444 }
Chris@16 445 }
Chris@16 446
Chris@16 447 return isPercussive;
Chris@0 448 }
Chris@15 449