annotate audioio/PhaseVocoderTimeStretcher.cpp @ 15:cc566264c935

* timestretcher improvements -- simplify API (it can calculate its own processing block sizes etc)
author Chris Cannam
date Wed, 13 Sep 2006 11:56:44 +0000
parents 085f34c73939
children 3715efc38f95
rev   line source
Chris@0 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@0 2
Chris@0 3 /*
Chris@0 4 Sonic Visualiser
Chris@0 5 An audio file viewer and annotation editor.
Chris@0 6 Centre for Digital Music, Queen Mary, University of London.
Chris@0 7 This file copyright 2006 Chris Cannam.
Chris@0 8
Chris@0 9 This program is free software; you can redistribute it and/or
Chris@0 10 modify it under the terms of the GNU General Public License as
Chris@0 11 published by the Free Software Foundation; either version 2 of the
Chris@0 12 License, or (at your option) any later version. See the file
Chris@0 13 COPYING included with this distribution for more information.
Chris@0 14 */
Chris@0 15
Chris@14 16 #include "PhaseVocoderTimeStretcher.h"
Chris@0 17
Chris@0 18 #include <iostream>
Chris@0 19 #include <cassert>
Chris@0 20
Chris@14 21 //#define DEBUG_PHASE_VOCODER_TIME_STRETCHER 1
Chris@0 22
Chris@14 23 PhaseVocoderTimeStretcher::PhaseVocoderTimeStretcher(float ratio,
Chris@15 24 size_t maxProcessInputBlockSize) :
Chris@15 25 m_ratio(ratio)
Chris@15 26 //,
Chris@15 27 // m_n1(inputIncrement),
Chris@15 28 // m_n2(lrintf(m_n1 * ratio)),
Chris@15 29 // m_wlen(std::max(windowSize, m_n2 * 2)),
Chris@15 30 // m_inbuf(m_wlen),
Chris@15 31 // m_outbuf(maxProcessInputBlockSize * ratio + 1024) //!!!
Chris@0 32 {
Chris@15 33 if (ratio < 1) {
Chris@15 34 m_n1 = 512;
Chris@15 35 m_n2 = m_n1 * ratio;
Chris@15 36 m_wlen = 1024;
Chris@15 37 } else {
Chris@15 38 m_n2 = 512;
Chris@15 39 m_n1 = m_n2 / ratio;
Chris@15 40 m_wlen = 1024;
Chris@15 41 }
Chris@15 42
Chris@15 43 m_inbuf = new RingBuffer<float>(m_wlen);
Chris@15 44 m_outbuf = new RingBuffer<float>
Chris@15 45 (lrintf((maxProcessInputBlockSize + m_wlen) * ratio));
Chris@15 46
Chris@15 47 std::cerr << "PhaseVocoderTimeStretcher: ratio = " << ratio
Chris@15 48 << ", n1 = " << m_n1 << ", n2 = " << m_n2 << ", wlen = "
Chris@15 49 << m_wlen << ", max = " << maxProcessInputBlockSize << ", outbuflen = " << m_outbuf->getSize() << std::endl;
Chris@15 50
Chris@15 51 m_window = new Window<float>(HanningWindow, m_wlen),
Chris@0 52
Chris@0 53 m_time = (fftwf_complex *)fftwf_malloc(sizeof(fftwf_complex) * m_wlen);
Chris@0 54 m_freq = (fftwf_complex *)fftwf_malloc(sizeof(fftwf_complex) * m_wlen);
Chris@0 55 m_dbuf = (float *)fftwf_malloc(sizeof(float) * m_wlen);
Chris@12 56 m_mashbuf = (float *)fftwf_malloc(sizeof(float) * m_wlen);
Chris@13 57 m_modulationbuf = (float *)fftwf_malloc(sizeof(float) * m_wlen);
Chris@12 58 m_prevPhase = (float *)fftwf_malloc(sizeof(float) * m_wlen);
Chris@12 59 m_prevAdjustedPhase = (float *)fftwf_malloc(sizeof(float) * m_wlen);
Chris@0 60
Chris@0 61 m_plan = fftwf_plan_dft_1d(m_wlen, m_time, m_freq, FFTW_FORWARD, FFTW_ESTIMATE);
Chris@0 62 m_iplan = fftwf_plan_dft_c2r_1d(m_wlen, m_freq, m_dbuf, FFTW_ESTIMATE);
Chris@0 63
Chris@0 64 for (int i = 0; i < m_wlen; ++i) {
Chris@0 65 m_mashbuf[i] = 0.0;
Chris@13 66 m_modulationbuf[i] = 0.0;
Chris@12 67 m_prevPhase[i] = 0.0;
Chris@12 68 m_prevAdjustedPhase[i] = 0.0;
Chris@0 69 }
Chris@0 70 }
Chris@0 71
Chris@14 72 PhaseVocoderTimeStretcher::~PhaseVocoderTimeStretcher()
Chris@0 73 {
Chris@14 74 std::cerr << "PhaseVocoderTimeStretcher::~PhaseVocoderTimeStretcher" << std::endl;
Chris@0 75
Chris@0 76 fftwf_destroy_plan(m_plan);
Chris@0 77 fftwf_destroy_plan(m_iplan);
Chris@0 78
Chris@0 79 fftwf_free(m_time);
Chris@0 80 fftwf_free(m_freq);
Chris@0 81 fftwf_free(m_dbuf);
Chris@12 82 fftwf_free(m_mashbuf);
Chris@13 83 fftwf_free(m_modulationbuf);
Chris@12 84 fftwf_free(m_prevPhase);
Chris@12 85 fftwf_free(m_prevAdjustedPhase);
Chris@0 86
Chris@15 87 delete m_inbuf;
Chris@15 88 delete m_outbuf;
Chris@15 89
Chris@0 90 delete m_window;
Chris@0 91 }
Chris@0 92
Chris@0 93 size_t
Chris@14 94 PhaseVocoderTimeStretcher::getProcessingLatency() const
Chris@0 95 {
Chris@0 96 return getWindowSize() - getInputIncrement();
Chris@0 97 }
Chris@0 98
Chris@0 99 void
Chris@14 100 PhaseVocoderTimeStretcher::process(float *input, float *output, size_t samples)
Chris@0 101 {
Chris@0 102 // We need to add samples from input to our internal buffer. When
Chris@0 103 // we have m_windowSize samples in the buffer, we can process it,
Chris@0 104 // move the samples back by m_n1 and write the output onto our
Chris@0 105 // internal output buffer. If we have (samples * ratio) samples
Chris@0 106 // in that, we can write m_n2 of them back to output and return
Chris@0 107 // (otherwise we have to write zeroes).
Chris@0 108
Chris@0 109 // When we process, we write m_wlen to our fixed output buffer
Chris@0 110 // (m_mashbuf). We then pull out the first m_n2 samples from that
Chris@0 111 // buffer, push them into the output ring buffer, and shift
Chris@0 112 // m_mashbuf left by that amount.
Chris@0 113
Chris@0 114 // The processing latency is then m_wlen - m_n2.
Chris@0 115
Chris@0 116 size_t consumed = 0;
Chris@0 117
Chris@14 118 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
Chris@15 119 std::cerr << "PhaseVocoderTimeStretcher::process(" << samples << ", consumed = " << consumed << "), writable " << m_inbuf->getWriteSpace() <<", readable "<< m_outbuf->getReadSpace() << std::endl;
Chris@0 120 #endif
Chris@0 121
Chris@0 122 while (consumed < samples) {
Chris@0 123
Chris@15 124 size_t writable = m_inbuf->getWriteSpace();
Chris@0 125 writable = std::min(writable, samples - consumed);
Chris@0 126
Chris@0 127 if (writable == 0) {
Chris@0 128 //!!! then what? I don't think this should happen, but
Chris@14 129 std::cerr << "WARNING: PhaseVocoderTimeStretcher::process: writable == 0" << std::endl;
Chris@0 130 break;
Chris@0 131 }
Chris@0 132
Chris@14 133 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
Chris@0 134 std::cerr << "writing " << writable << " from index " << consumed << " to inbuf, consumed will be " << consumed + writable << std::endl;
Chris@0 135 #endif
Chris@15 136 m_inbuf->write(input + consumed, writable);
Chris@0 137 consumed += writable;
Chris@0 138
Chris@15 139 while (m_inbuf->getReadSpace() >= m_wlen &&
Chris@15 140 m_outbuf->getWriteSpace() >= m_n2) {
Chris@0 141
Chris@0 142 // We know we have at least m_wlen samples available
Chris@15 143 // in m_inbuf-> We need to peek m_wlen of them for
Chris@0 144 // processing, and then read m_n1 to advance the read
Chris@0 145 // pointer.
Chris@0 146
Chris@15 147 size_t got = m_inbuf->peek(m_dbuf, m_wlen);
Chris@0 148 assert(got == m_wlen);
Chris@0 149
Chris@13 150 processBlock(m_dbuf, m_mashbuf, m_modulationbuf);
Chris@0 151
Chris@14 152 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
Chris@0 153 std::cerr << "writing first " << m_n2 << " from mashbuf, skipping " << m_n1 << " on inbuf " << std::endl;
Chris@0 154 #endif
Chris@15 155 m_inbuf->skip(m_n1);
Chris@13 156
Chris@13 157 for (size_t i = 0; i < m_n2; ++i) {
Chris@13 158 if (m_modulationbuf[i] > 0.f) {
Chris@13 159 m_mashbuf[i] /= m_modulationbuf[i];
Chris@13 160 }
Chris@13 161 }
Chris@13 162
Chris@15 163 m_outbuf->write(m_mashbuf, m_n2);
Chris@0 164
Chris@0 165 for (size_t i = 0; i < m_wlen - m_n2; ++i) {
Chris@0 166 m_mashbuf[i] = m_mashbuf[i + m_n2];
Chris@13 167 m_modulationbuf[i] = m_modulationbuf[i + m_n2];
Chris@0 168 }
Chris@13 169
Chris@0 170 for (size_t i = m_wlen - m_n2; i < m_wlen; ++i) {
Chris@0 171 m_mashbuf[i] = 0.0f;
Chris@13 172 m_modulationbuf[i] = 0.0f;
Chris@0 173 }
Chris@0 174 }
Chris@0 175
Chris@15 176 // std::cerr << "WARNING: PhaseVocoderTimeStretcher::process: writespace not enough for output increment (" << m_outbuf->getWriteSpace() << " < " << m_n2 << ")" << std::endl;
Chris@0 177 // }
Chris@0 178
Chris@14 179 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
Chris@15 180 std::cerr << "loop ended: inbuf read space " << m_inbuf->getReadSpace() << ", outbuf write space " << m_outbuf->getWriteSpace() << std::endl;
Chris@0 181 #endif
Chris@0 182 }
Chris@0 183
Chris@12 184 size_t toRead = lrintf(samples * m_ratio);
Chris@12 185
Chris@15 186 if (m_outbuf->getReadSpace() < toRead) {
Chris@15 187 std::cerr << "WARNING: PhaseVocoderTimeStretcher::process: not enough data (yet?) (" << m_outbuf->getReadSpace() << " < " << toRead << ")" << std::endl;
Chris@15 188 size_t fill = toRead - m_outbuf->getReadSpace();
Chris@0 189 for (size_t i = 0; i < fill; ++i) {
Chris@0 190 output[i] = 0.0;
Chris@0 191 }
Chris@15 192 m_outbuf->read(output + fill, m_outbuf->getReadSpace());
Chris@0 193 } else {
Chris@14 194 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
Chris@12 195 std::cerr << "enough data - writing " << toRead << " from outbuf" << std::endl;
Chris@0 196 #endif
Chris@15 197 m_outbuf->read(output, toRead);
Chris@0 198 }
Chris@0 199
Chris@14 200 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
Chris@14 201 std::cerr << "PhaseVocoderTimeStretcher::process returning" << std::endl;
Chris@0 202 #endif
Chris@0 203 }
Chris@0 204
Chris@0 205 void
Chris@14 206 PhaseVocoderTimeStretcher::processBlock(float *buf, float *out, float *modulation)
Chris@0 207 {
Chris@0 208 size_t i;
Chris@0 209
Chris@0 210 // buf contains m_wlen samples; out contains enough space for
Chris@0 211 // m_wlen * ratio samples (we mix into out, rather than replacing)
Chris@0 212
Chris@14 213 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
Chris@14 214 std::cerr << "PhaseVocoderTimeStretcher::processBlock" << std::endl;
Chris@0 215 #endif
Chris@0 216
Chris@0 217 m_window->cut(buf);
Chris@0 218
Chris@0 219 for (i = 0; i < m_wlen/2; ++i) {
Chris@0 220 float temp = buf[i];
Chris@0 221 buf[i] = buf[i + m_wlen/2];
Chris@0 222 buf[i + m_wlen/2] = temp;
Chris@0 223 }
Chris@0 224
Chris@0 225 for (i = 0; i < m_wlen; ++i) {
Chris@0 226 m_time[i][0] = buf[i];
Chris@0 227 m_time[i][1] = 0.0;
Chris@0 228 }
Chris@0 229
Chris@0 230 fftwf_execute(m_plan); // m_time -> m_freq
Chris@0 231
Chris@0 232 for (i = 0; i < m_wlen; ++i) {
Chris@0 233
Chris@0 234 float mag = sqrtf(m_freq[i][0] * m_freq[i][0] +
Chris@0 235 m_freq[i][1] * m_freq[i][1]);
Chris@0 236
Chris@12 237 float phase = princargf(atan2f(m_freq[i][1], m_freq[i][0]));
Chris@12 238
Chris@12 239 float omega = (2 * M_PI * m_n1 * i) / m_wlen;
Chris@0 240
Chris@12 241 float expectedPhase = m_prevPhase[i] + omega;
Chris@12 242
Chris@12 243 float phaseError = princargf(phase - expectedPhase);
Chris@12 244
Chris@12 245 float phaseIncrement = (omega + phaseError) / m_n1;
Chris@12 246
Chris@12 247 float adjustedPhase = m_prevAdjustedPhase[i] + m_n2 * phaseIncrement;
Chris@0 248
Chris@12 249 float real = mag * cosf(adjustedPhase);
Chris@12 250 float imag = mag * sinf(adjustedPhase);
Chris@0 251 m_freq[i][0] = real;
Chris@0 252 m_freq[i][1] = imag;
Chris@12 253
Chris@12 254 m_prevPhase[i] = phase;
Chris@12 255 m_prevAdjustedPhase[i] = adjustedPhase;
Chris@0 256 }
Chris@0 257
Chris@0 258 fftwf_execute(m_iplan); // m_freq -> in, inverse fft
Chris@0 259
Chris@0 260 for (i = 0; i < m_wlen/2; ++i) {
Chris@0 261 float temp = buf[i] / m_wlen;
Chris@0 262 buf[i] = buf[i + m_wlen/2] / m_wlen;
Chris@0 263 buf[i + m_wlen/2] = temp;
Chris@0 264 }
Chris@0 265
Chris@0 266 m_window->cut(buf);
Chris@13 267 /*
Chris@0 268 int div = m_wlen / m_n2;
Chris@0 269 if (div > 1) div /= 2;
Chris@0 270 for (i = 0; i < m_wlen; ++i) {
Chris@0 271 buf[i] /= div;
Chris@0 272 }
Chris@13 273 */
Chris@15 274
Chris@15 275 float area = m_window->getArea();
Chris@15 276
Chris@0 277 for (i = 0; i < m_wlen; ++i) {
Chris@0 278 out[i] += buf[i];
Chris@15 279 float val = m_window->getValue(i);
Chris@15 280 modulation[i] += val * area;
Chris@0 281 }
Chris@0 282 }
Chris@15 283