annotate audioio/PhaseVocoderTimeStretcher.cpp @ 20:e125f0dde7a3

* restructure time stretcher somewhat so as to do transient detection on mixed stereo signal instead of just one channel
author Chris Cannam
date Thu, 14 Sep 2006 13:41:56 +0000
parents f17798a555df
children 7da85e0b85e9
rev   line source
Chris@0 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@0 2
Chris@0 3 /*
Chris@0 4 Sonic Visualiser
Chris@0 5 An audio file viewer and annotation editor.
Chris@0 6 Centre for Digital Music, Queen Mary, University of London.
Chris@0 7 This file copyright 2006 Chris Cannam.
Chris@0 8
Chris@0 9 This program is free software; you can redistribute it and/or
Chris@0 10 modify it under the terms of the GNU General Public License as
Chris@0 11 published by the Free Software Foundation; either version 2 of the
Chris@0 12 License, or (at your option) any later version. See the file
Chris@0 13 COPYING included with this distribution for more information.
Chris@0 14 */
Chris@0 15
Chris@14 16 #include "PhaseVocoderTimeStretcher.h"
Chris@0 17
Chris@0 18 #include <iostream>
Chris@0 19 #include <cassert>
Chris@0 20
Chris@14 21 //#define DEBUG_PHASE_VOCODER_TIME_STRETCHER 1
Chris@0 22
Chris@16 23 PhaseVocoderTimeStretcher::PhaseVocoderTimeStretcher(size_t channels,
Chris@16 24 float ratio,
Chris@16 25 bool sharpen,
Chris@15 26 size_t maxProcessInputBlockSize) :
Chris@16 27 m_channels(channels),
Chris@16 28 m_ratio(ratio),
Chris@16 29 m_sharpen(sharpen)
Chris@0 30 {
Chris@16 31 m_wlen = 1024;
Chris@16 32
Chris@15 33 if (ratio < 1) {
Chris@16 34 if (ratio < 0.4) {
Chris@16 35 m_n1 = 1024;
Chris@16 36 m_wlen = 2048;
Chris@16 37 } else if (ratio < 0.8) {
Chris@16 38 m_n1 = 512;
Chris@16 39 } else {
Chris@16 40 m_n1 = 256;
Chris@16 41 }
Chris@16 42 if (m_sharpen) {
Chris@17 43 // m_n1 /= 2;
Chris@16 44 m_wlen = 2048;
Chris@16 45 }
Chris@15 46 m_n2 = m_n1 * ratio;
Chris@15 47 } else {
Chris@16 48 if (ratio > 2) {
Chris@16 49 m_n2 = 512;
Chris@16 50 m_wlen = 4096;
Chris@16 51 } else if (ratio > 1.6) {
Chris@16 52 m_n2 = 384;
Chris@16 53 m_wlen = 2048;
Chris@16 54 } else {
Chris@16 55 m_n2 = 256;
Chris@16 56 }
Chris@16 57 if (m_sharpen) {
Chris@17 58 // m_n2 /= 2;
Chris@16 59 if (m_wlen < 2048) m_wlen = 2048;
Chris@16 60 }
Chris@15 61 m_n1 = m_n2 / ratio;
Chris@15 62 }
Chris@16 63
Chris@20 64 m_analysisWindow = new Window<float>(HanningWindow, m_wlen);
Chris@20 65 m_synthesisWindow = new Window<float>(HanningWindow, m_wlen);
Chris@15 66
Chris@16 67 m_prevPhase = new float *[m_channels];
Chris@16 68 m_prevAdjustedPhase = new float *[m_channels];
Chris@15 69
Chris@20 70 m_prevTransientMag = (float *)fftwf_malloc(sizeof(float) * (m_wlen / 2 + 1));
Chris@20 71 m_prevTransientCount = 0;
Chris@20 72 m_prevTransient = false;
Chris@20 73
Chris@20 74 m_tempbuf = (float *)fftwf_malloc(sizeof(float) * m_wlen);
Chris@20 75
Chris@20 76 m_time = new float *[m_channels];
Chris@20 77 m_freq = new fftwf_complex *[m_channels];
Chris@20 78 m_plan = new fftwf_plan[m_channels];
Chris@20 79 m_iplan = new fftwf_plan[m_channels];
Chris@0 80
Chris@16 81 m_inbuf = new RingBuffer<float> *[m_channels];
Chris@16 82 m_outbuf = new RingBuffer<float> *[m_channels];
Chris@16 83 m_mashbuf = new float *[m_channels];
Chris@16 84
Chris@16 85 m_modulationbuf = (float *)fftwf_malloc(sizeof(float) * m_wlen);
Chris@16 86
Chris@16 87 for (size_t c = 0; c < m_channels; ++c) {
Chris@16 88
Chris@20 89 m_prevPhase[c] = (float *)fftwf_malloc(sizeof(float) * (m_wlen / 2 + 1));
Chris@20 90 m_prevAdjustedPhase[c] = (float *)fftwf_malloc(sizeof(float) * (m_wlen / 2 + 1));
Chris@16 91
Chris@20 92 m_time[c] = (float *)fftwf_malloc(sizeof(float) * m_wlen);
Chris@20 93 m_freq[c] = (fftwf_complex *)fftwf_malloc(sizeof(fftwf_complex) *
Chris@20 94 (m_wlen / 2 + 1));
Chris@20 95
Chris@20 96 m_plan[c] = fftwf_plan_dft_r2c_1d(m_wlen, m_time[c], m_freq[c], FFTW_ESTIMATE);
Chris@20 97 m_iplan[c] = fftwf_plan_dft_c2r_1d(m_wlen, m_freq[c], m_time[c], FFTW_ESTIMATE);
Chris@16 98
Chris@16 99 m_inbuf[c] = new RingBuffer<float>(m_wlen);
Chris@16 100 m_outbuf[c] = new RingBuffer<float>
Chris@16 101 (lrintf((maxProcessInputBlockSize + m_wlen) * ratio));
Chris@16 102
Chris@16 103 m_mashbuf[c] = (float *)fftwf_malloc(sizeof(float) * m_wlen);
Chris@16 104
Chris@16 105 for (int i = 0; i < m_wlen; ++i) {
Chris@16 106 m_mashbuf[c][i] = 0.0;
Chris@20 107 }
Chris@20 108
Chris@20 109 for (int i = 0; i <= m_wlen/2; ++i) {
Chris@16 110 m_prevPhase[c][i] = 0.0;
Chris@16 111 m_prevAdjustedPhase[c][i] = 0.0;
Chris@16 112 }
Chris@16 113 }
Chris@16 114
Chris@0 115 for (int i = 0; i < m_wlen; ++i) {
Chris@16 116 m_modulationbuf[i] = 0.0;
Chris@0 117 }
Chris@16 118
Chris@20 119 for (int i = 0; i <= m_wlen/2; ++i) {
Chris@20 120 m_prevTransientMag[i] = 0.0;
Chris@20 121 }
Chris@20 122
Chris@16 123 std::cerr << "PhaseVocoderTimeStretcher: channels = " << channels
Chris@16 124 << ", ratio = " << ratio
Chris@16 125 << ", n1 = " << m_n1 << ", n2 = " << m_n2 << ", wlen = "
Chris@16 126 << m_wlen << ", max = " << maxProcessInputBlockSize
Chris@16 127 << ", outbuflen = " << m_outbuf[0]->getSize() << std::endl;
Chris@0 128 }
Chris@0 129
Chris@14 130 PhaseVocoderTimeStretcher::~PhaseVocoderTimeStretcher()
Chris@0 131 {
Chris@14 132 std::cerr << "PhaseVocoderTimeStretcher::~PhaseVocoderTimeStretcher" << std::endl;
Chris@0 133
Chris@20 134 for (size_t c = 0; c < m_channels; ++c) {
Chris@0 135
Chris@20 136 fftwf_destroy_plan(m_plan[c]);
Chris@20 137 fftwf_destroy_plan(m_iplan[c]);
Chris@16 138
Chris@20 139 fftwf_free(m_time[c]);
Chris@20 140 fftwf_free(m_freq[c]);
Chris@16 141
Chris@16 142 fftwf_free(m_mashbuf[c]);
Chris@16 143 fftwf_free(m_prevPhase[c]);
Chris@16 144 fftwf_free(m_prevAdjustedPhase[c]);
Chris@16 145
Chris@16 146 delete m_inbuf[c];
Chris@16 147 delete m_outbuf[c];
Chris@16 148 }
Chris@16 149
Chris@20 150 fftwf_free(m_tempbuf);
Chris@13 151 fftwf_free(m_modulationbuf);
Chris@20 152 fftwf_free(m_prevTransientMag);
Chris@0 153
Chris@16 154 delete[] m_prevPhase;
Chris@16 155 delete[] m_prevAdjustedPhase;
Chris@16 156 delete[] m_inbuf;
Chris@16 157 delete[] m_outbuf;
Chris@16 158 delete[] m_mashbuf;
Chris@20 159 delete[] m_time;
Chris@20 160 delete[] m_freq;
Chris@20 161 delete[] m_plan;
Chris@20 162 delete[] m_iplan;
Chris@15 163
Chris@20 164 delete m_analysisWindow;
Chris@20 165 delete m_synthesisWindow;
Chris@0 166 }
Chris@0 167
Chris@0 168 size_t
Chris@14 169 PhaseVocoderTimeStretcher::getProcessingLatency() const
Chris@0 170 {
Chris@0 171 return getWindowSize() - getInputIncrement();
Chris@0 172 }
Chris@0 173
Chris@0 174 void
Chris@16 175 PhaseVocoderTimeStretcher::process(float **input, float **output, size_t samples)
Chris@16 176 {
Chris@16 177 putInput(input, samples);
Chris@16 178 getOutput(output, lrintf(samples * m_ratio));
Chris@16 179 }
Chris@16 180
Chris@16 181 size_t
Chris@16 182 PhaseVocoderTimeStretcher::getRequiredInputSamples() const
Chris@16 183 {
Chris@16 184 if (m_inbuf[0]->getReadSpace() >= m_wlen) return 0;
Chris@16 185 return m_wlen - m_inbuf[0]->getReadSpace();
Chris@16 186 }
Chris@16 187
Chris@16 188 void
Chris@16 189 PhaseVocoderTimeStretcher::putInput(float **input, size_t samples)
Chris@0 190 {
Chris@0 191 // We need to add samples from input to our internal buffer. When
Chris@0 192 // we have m_windowSize samples in the buffer, we can process it,
Chris@0 193 // move the samples back by m_n1 and write the output onto our
Chris@0 194 // internal output buffer. If we have (samples * ratio) samples
Chris@0 195 // in that, we can write m_n2 of them back to output and return
Chris@0 196 // (otherwise we have to write zeroes).
Chris@0 197
Chris@0 198 // When we process, we write m_wlen to our fixed output buffer
Chris@0 199 // (m_mashbuf). We then pull out the first m_n2 samples from that
Chris@0 200 // buffer, push them into the output ring buffer, and shift
Chris@0 201 // m_mashbuf left by that amount.
Chris@0 202
Chris@0 203 // The processing latency is then m_wlen - m_n2.
Chris@0 204
Chris@0 205 size_t consumed = 0;
Chris@0 206
Chris@0 207 while (consumed < samples) {
Chris@0 208
Chris@16 209 size_t writable = m_inbuf[0]->getWriteSpace();
Chris@0 210 writable = std::min(writable, samples - consumed);
Chris@0 211
Chris@0 212 if (writable == 0) {
Chris@0 213 //!!! then what? I don't think this should happen, but
Chris@16 214 std::cerr << "WARNING: PhaseVocoderTimeStretcher::putInput: writable == 0" << std::endl;
Chris@0 215 break;
Chris@0 216 }
Chris@0 217
Chris@14 218 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
Chris@0 219 std::cerr << "writing " << writable << " from index " << consumed << " to inbuf, consumed will be " << consumed + writable << std::endl;
Chris@0 220 #endif
Chris@16 221
Chris@16 222 for (size_t c = 0; c < m_channels; ++c) {
Chris@16 223 m_inbuf[c]->write(input[c] + consumed, writable);
Chris@16 224 }
Chris@0 225 consumed += writable;
Chris@0 226
Chris@16 227 while (m_inbuf[0]->getReadSpace() >= m_wlen &&
Chris@16 228 m_outbuf[0]->getWriteSpace() >= m_n2) {
Chris@0 229
Chris@0 230 // We know we have at least m_wlen samples available
Chris@16 231 // in m_inbuf. We need to peek m_wlen of them for
Chris@0 232 // processing, and then read m_n1 to advance the read
Chris@0 233 // pointer.
Chris@16 234
Chris@20 235 for (size_t c = 0; c < m_channels; ++c) {
Chris@20 236
Chris@20 237 size_t got = m_inbuf[c]->peek(m_tempbuf, m_wlen);
Chris@20 238 assert(got == m_wlen);
Chris@20 239
Chris@20 240 analyseBlock(c, m_tempbuf);
Chris@20 241 }
Chris@20 242
Chris@20 243 bool transient = false;
Chris@20 244 if (m_sharpen) transient = isTransient();
Chris@20 245
Chris@16 246 size_t n2 = m_n2;
Chris@20 247
Chris@20 248 if (transient) {
Chris@20 249 n2 = m_n1;
Chris@20 250 }
Chris@0 251
Chris@16 252 for (size_t c = 0; c < m_channels; ++c) {
Chris@16 253
Chris@20 254 synthesiseBlock(c, m_mashbuf[c],
Chris@20 255 c == 0 ? m_modulationbuf : 0,
Chris@20 256 m_prevTransient ? m_n1 : m_n2);
Chris@16 257
Chris@0 258
Chris@14 259 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
Chris@16 260 std::cerr << "writing first " << m_n2 << " from mashbuf, skipping " << m_n1 << " on inbuf " << std::endl;
Chris@0 261 #endif
Chris@16 262 m_inbuf[c]->skip(m_n1);
Chris@13 263
Chris@16 264 for (size_t i = 0; i < n2; ++i) {
Chris@16 265 if (m_modulationbuf[i] > 0.f) {
Chris@16 266 m_mashbuf[c][i] /= m_modulationbuf[i];
Chris@16 267 }
Chris@16 268 }
Chris@16 269
Chris@16 270 m_outbuf[c]->write(m_mashbuf[c], n2);
Chris@16 271
Chris@16 272 for (size_t i = 0; i < m_wlen - n2; ++i) {
Chris@16 273 m_mashbuf[c][i] = m_mashbuf[c][i + n2];
Chris@16 274 }
Chris@16 275
Chris@16 276 for (size_t i = m_wlen - n2; i < m_wlen; ++i) {
Chris@16 277 m_mashbuf[c][i] = 0.0f;
Chris@13 278 }
Chris@13 279 }
Chris@13 280
Chris@20 281 m_prevTransient = transient;
Chris@17 282
Chris@16 283 for (size_t i = 0; i < m_wlen - n2; ++i) {
Chris@16 284 m_modulationbuf[i] = m_modulationbuf[i + n2];
Chris@0 285 }
Chris@13 286
Chris@16 287 for (size_t i = m_wlen - n2; i < m_wlen; ++i) {
Chris@13 288 m_modulationbuf[i] = 0.0f;
Chris@0 289 }
Chris@0 290 }
Chris@0 291
Chris@0 292
Chris@14 293 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
Chris@16 294 std::cerr << "loop ended: inbuf read space " << m_inbuf[0]->getReadSpace() << ", outbuf write space " << m_outbuf[0]->getWriteSpace() << std::endl;
Chris@0 295 #endif
Chris@0 296 }
Chris@0 297
Chris@16 298 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
Chris@16 299 std::cerr << "PhaseVocoderTimeStretcher::putInput returning" << std::endl;
Chris@16 300 #endif
Chris@16 301 }
Chris@12 302
Chris@16 303 size_t
Chris@16 304 PhaseVocoderTimeStretcher::getAvailableOutputSamples() const
Chris@16 305 {
Chris@16 306 return m_outbuf[0]->getReadSpace();
Chris@16 307 }
Chris@16 308
Chris@16 309 void
Chris@16 310 PhaseVocoderTimeStretcher::getOutput(float **output, size_t samples)
Chris@16 311 {
Chris@16 312 if (m_outbuf[0]->getReadSpace() < samples) {
Chris@16 313 std::cerr << "WARNING: PhaseVocoderTimeStretcher::getOutput: not enough data (yet?) (" << m_outbuf[0]->getReadSpace() << " < " << samples << ")" << std::endl;
Chris@16 314 size_t fill = samples - m_outbuf[0]->getReadSpace();
Chris@16 315 for (size_t c = 0; c < m_channels; ++c) {
Chris@16 316 for (size_t i = 0; i < fill; ++i) {
Chris@16 317 output[c][i] = 0.0;
Chris@16 318 }
Chris@16 319 m_outbuf[c]->read(output[c] + fill, m_outbuf[c]->getReadSpace());
Chris@16 320 }
Chris@0 321 } else {
Chris@14 322 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
Chris@16 323 std::cerr << "enough data - writing " << samples << " from outbuf" << std::endl;
Chris@0 324 #endif
Chris@16 325 for (size_t c = 0; c < m_channels; ++c) {
Chris@16 326 m_outbuf[c]->read(output[c], samples);
Chris@16 327 }
Chris@0 328 }
Chris@0 329
Chris@14 330 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
Chris@16 331 std::cerr << "PhaseVocoderTimeStretcher::getOutput returning" << std::endl;
Chris@0 332 #endif
Chris@0 333 }
Chris@0 334
Chris@20 335 void
Chris@20 336 PhaseVocoderTimeStretcher::analyseBlock(size_t c, float *buf)
Chris@0 337 {
Chris@0 338 size_t i;
Chris@0 339
Chris@20 340 // buf contains m_wlen samples
Chris@0 341
Chris@14 342 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
Chris@20 343 std::cerr << "PhaseVocoderTimeStretcher::analyseBlock (channel " << c << ")" << std::endl;
Chris@0 344 #endif
Chris@0 345
Chris@20 346 m_analysisWindow->cut(buf);
Chris@0 347
Chris@0 348 for (i = 0; i < m_wlen/2; ++i) {
Chris@0 349 float temp = buf[i];
Chris@0 350 buf[i] = buf[i + m_wlen/2];
Chris@0 351 buf[i + m_wlen/2] = temp;
Chris@0 352 }
Chris@19 353
Chris@0 354 for (i = 0; i < m_wlen; ++i) {
Chris@20 355 m_time[c][i] = buf[i];
Chris@0 356 }
Chris@0 357
Chris@20 358 fftwf_execute(m_plan[c]); // m_time -> m_freq
Chris@20 359 }
Chris@0 360
Chris@20 361 bool
Chris@20 362 PhaseVocoderTimeStretcher::isTransient()
Chris@20 363 {
Chris@20 364 int count = 0;
Chris@16 365
Chris@20 366 for (int i = 0; i <= m_wlen/2; ++i) {
Chris@16 367
Chris@20 368 float real = 0.f, imag = 0.f;
Chris@20 369
Chris@20 370 for (size_t c = 0; c < m_channels; ++c) {
Chris@20 371 real += m_freq[c][i][0];
Chris@20 372 imag += m_freq[c][i][1];
Chris@16 373 }
Chris@16 374
Chris@20 375 float sqrmag = (real * real + imag * imag);
Chris@20 376
Chris@20 377 if (m_prevTransientMag[i] > 0.f) {
Chris@20 378 float diff = 10.f * log10f(sqrmag / m_prevTransientMag[i]);
Chris@20 379 if (diff > 3.f) ++count;
Chris@20 380 }
Chris@20 381
Chris@20 382 m_prevTransientMag[i] = sqrmag;
Chris@16 383 }
Chris@16 384
Chris@20 385 bool isTransient = false;
Chris@16 386
Chris@20 387 if (count > m_wlen / 4.5 && //!!!
Chris@20 388 count > m_prevTransientCount * 1.2) {
Chris@20 389 isTransient = true;
Chris@20 390 std::cerr << "isTransient (count = " << count << ", prev = " << m_prevTransientCount << ")" << std::endl;
Chris@20 391 }
Chris@16 392
Chris@20 393 m_prevTransientCount = count;
Chris@20 394
Chris@20 395 return isTransient;
Chris@20 396 }
Chris@20 397
Chris@20 398 void
Chris@20 399 PhaseVocoderTimeStretcher::synthesiseBlock(size_t c,
Chris@20 400 float *out,
Chris@20 401 float *modulation,
Chris@20 402 size_t lastStep)
Chris@20 403 {
Chris@20 404 int i;
Chris@20 405
Chris@20 406 bool unchanged = (lastStep == m_n1);
Chris@20 407
Chris@20 408 for (i = 0; i <= m_wlen/2; ++i) {
Chris@0 409
Chris@20 410 float phase = princargf(atan2f(m_freq[c][i][1], m_freq[c][i][0]));
Chris@19 411 float adjustedPhase = phase;
Chris@12 412
Chris@20 413 if (!unchanged) {
Chris@16 414
Chris@20 415 float mag = sqrtf(m_freq[c][i][0] * m_freq[c][i][0] +
Chris@20 416 m_freq[c][i][1] * m_freq[c][i][1]);
Chris@19 417
Chris@20 418 float omega = (2 * M_PI * m_n1 * i) / m_wlen;
Chris@20 419
Chris@20 420 float expectedPhase = m_prevPhase[c][i] + omega;
Chris@20 421
Chris@20 422 float phaseError = princargf(phase - expectedPhase);
Chris@20 423
Chris@20 424 float phaseIncrement = (omega + phaseError) / m_n1;
Chris@20 425
Chris@20 426 adjustedPhase = m_prevAdjustedPhase[c][i] +
Chris@20 427 lastStep * phaseIncrement;
Chris@20 428
Chris@20 429 float real = mag * cosf(adjustedPhase);
Chris@20 430 float imag = mag * sinf(adjustedPhase);
Chris@20 431 m_freq[c][i][0] = real;
Chris@20 432 m_freq[c][i][1] = imag;
Chris@19 433 }
Chris@19 434
Chris@16 435 m_prevPhase[c][i] = phase;
Chris@16 436 m_prevAdjustedPhase[c][i] = adjustedPhase;
Chris@0 437 }
Chris@20 438
Chris@20 439 fftwf_execute(m_iplan[c]); // m_freq -> m_time, inverse fft
Chris@19 440
Chris@0 441 for (i = 0; i < m_wlen/2; ++i) {
Chris@20 442 float temp = m_time[c][i];
Chris@20 443 m_time[c][i] = m_time[c][i + m_wlen/2];
Chris@20 444 m_time[c][i + m_wlen/2] = temp;
Chris@20 445 }
Chris@20 446
Chris@20 447 for (i = 0; i < m_wlen; ++i) {
Chris@20 448 m_time[c][i] = m_time[c][i] / m_wlen;
Chris@0 449 }
Chris@15 450
Chris@20 451 m_synthesisWindow->cut(m_time[c]);
Chris@19 452
Chris@19 453 for (i = 0; i < m_wlen; ++i) {
Chris@20 454 out[i] += m_time[c][i];
Chris@0 455 }
Chris@16 456
Chris@16 457 if (modulation) {
Chris@16 458
Chris@20 459 float area = m_analysisWindow->getArea();
Chris@16 460
Chris@16 461 for (i = 0; i < m_wlen; ++i) {
Chris@20 462 float val = m_synthesisWindow->getValue(i);
Chris@16 463 modulation[i] += val * area;
Chris@16 464 }
Chris@16 465 }
Chris@0 466 }
Chris@15 467
Chris@20 468