changeset 31:37af203dbd15

* Buffer size fixes in the time stretcher, to avoid running out of input data for large or small ratios
author Chris Cannam
date Thu, 21 Sep 2006 09:43:41 +0000
parents 56e1d4242bb4
children e3b32dc5180b
files audioio/AudioCallbackPlaySource.cpp audioio/PhaseVocoderTimeStretcher.cpp audioio/PhaseVocoderTimeStretcher.h
diffstat 3 files changed, 86 insertions(+), 80 deletions(-) [+]
line wrap: on
line diff
--- a/audioio/AudioCallbackPlaySource.cpp	Wed Sep 20 16:02:42 2006 +0000
+++ b/audioio/AudioCallbackPlaySource.cpp	Thu Sep 21 09:43:41 2006 +0000
@@ -619,7 +619,7 @@
              channels,
              factor,
              sharpen,
-             lrintf(getTargetBlockSize() / factor));
+             getTargetBlockSize());
 
 	m_timeStretcher = newStretcher;
 
@@ -688,6 +688,21 @@
 
     size_t available;
 
+    int warned = 0;
+
+    
+
+    //!!!
+    // We want output blocks of e.g. 1024 (probably fixed, certainly
+    // bounded).  We can provide input blocks of any size (unbounded)
+    // at the timestretcher's request.  The input block for a given
+    // output is approx output / ratio, but we can't predict it
+    // exactly, for an adaptive timestretcher.  The stretcher will
+    // need some additional buffer space.
+
+
+
+
     while ((available = ts->getAvailableOutputSamples()) < count) {
 
         size_t reqd = lrintf((count - available) / ratio);
@@ -735,8 +750,8 @@
         if (got == 0) break;
 
         if (ts->getAvailableOutputSamples() == available) {
-            std::cerr << "WARNING: AudioCallbackPlaySource::getSamples: Added " << got << " samples to time stretcher, created no new available output samples" << std::endl;
-            break;
+            std::cerr << "WARNING: AudioCallbackPlaySource::getSamples: Added " << got << " samples to time stretcher, created no new available output samples (warned = " << warned << ")" << std::endl;
+            if (++warned == 5) break;
         }
     }
 
--- a/audioio/PhaseVocoderTimeStretcher.cpp	Wed Sep 20 16:02:42 2006 +0000
+++ b/audioio/PhaseVocoderTimeStretcher.cpp	Thu Sep 21 09:43:41 2006 +0000
@@ -26,10 +26,10 @@
                                                      size_t channels,
                                                      float ratio,
                                                      bool sharpen,
-                                                     size_t maxProcessInputBlockSize) :
+                                                     size_t maxOutputBlockSize) :
     m_sampleRate(sampleRate),
     m_channels(channels),
-    m_maxProcessInputBlockSize(maxProcessInputBlockSize),
+    m_maxOutputBlockSize(maxOutputBlockSize),
     m_ratio(ratio),
     m_sharpen(sharpen),
     m_totalCount(0),
@@ -92,27 +92,31 @@
         m_plan[c] = fftwf_plan_dft_r2c_1d(m_wlen, m_time[c], m_freq[c], FFTW_ESTIMATE);
         m_iplan[c] = fftwf_plan_dft_c2r_1d(m_wlen, m_freq[c], m_time[c], FFTW_ESTIMATE);
 
-        m_inbuf[c] = new RingBuffer<float>(m_wlen);
         m_outbuf[c] = new RingBuffer<float>
-            (lrintf((m_maxProcessInputBlockSize + m_wlen) * m_ratio));
-            
+            ((m_maxOutputBlockSize + m_wlen) * 2);
+        m_inbuf[c] = new RingBuffer<float>
+            (lrintf(m_outbuf[c]->getSize() / m_ratio) + m_wlen);
+
+        std::cerr << "making inbuf size " << m_inbuf[c]->getSize() << " (outbuf size is " << m_outbuf[c]->getSize() << ", ratio " << m_ratio << ")" << std::endl;
+
+           
         m_mashbuf[c] = (float *)fftwf_malloc(sizeof(float) * m_wlen);
         
-        for (int i = 0; i < m_wlen; ++i) {
+        for (size_t i = 0; i < m_wlen; ++i) {
             m_mashbuf[c][i] = 0.0;
         }
 
-        for (int i = 0; i <= m_wlen/2; ++i) {
+        for (size_t i = 0; i <= m_wlen/2; ++i) {
             m_prevPhase[c][i] = 0.0;
             m_prevAdjustedPhase[c][i] = 0.0;
         }
     }
 
-    for (int i = 0; i < m_wlen; ++i) {
+    for (size_t i = 0; i < m_wlen; ++i) {
         m_modulationbuf[i] = 0.0;
     }
 
-    for (int i = 0; i <= m_wlen/2; ++i) {
+    for (size_t i = 0; i <= m_wlen/2; ++i) {
         m_prevTransientMag[i] = 0.0;
     }
 }
@@ -143,7 +147,7 @@
         if (m_sharpen) {
             m_wlen = 2048;
         }
-        m_n2 = m_n1 * m_ratio;
+        m_n2 = lrintf(m_n1 * m_ratio);
     } else {
         if (m_ratio > 2) {
             m_n2 = 512;
@@ -157,10 +161,10 @@
         if (m_sharpen) {
             if (m_wlen < 2048) m_wlen = 2048;
         }
-        m_n1 = m_n2 / m_ratio;
+        m_n1 = lrintf(m_n2 / m_ratio);
     }
 
-    m_transientThreshold = m_wlen / 4.5;
+    m_transientThreshold = lrintf(m_wlen / 4.5);
 
     m_totalCount = 0;
     m_transientCount = 0;
@@ -170,7 +174,7 @@
     std::cerr << "PhaseVocoderTimeStretcher: channels = " << m_channels
               << ", ratio = " << m_ratio
               << ", n1 = " << m_n1 << ", n2 = " << m_n2 << ", wlen = "
-              << m_wlen << ", max = " << m_maxProcessInputBlockSize << std::endl;
+              << m_wlen << ", max = " << m_maxOutputBlockSize << std::endl;
 //              << ", outbuflen = " << m_outbuf[0]->getSize() << std::endl;
 }
 
@@ -218,9 +222,7 @@
 {
     QMutexLocker locker(m_mutex);
 
-    float formerRatio = m_ratio;
     size_t formerWlen = m_wlen;
-
     m_ratio = ratio;
 
     calculateParameters();
@@ -229,36 +231,43 @@
 
         // This is the only container whose size depends on m_ratio
 
-        RingBuffer<float> **newout = new RingBuffer<float> *[m_channels];
+        RingBuffer<float> **newin = new RingBuffer<float> *[m_channels];
 
-        size_t formerSize = m_outbuf[0]->getSize();
-        size_t newSize = lrintf((m_maxProcessInputBlockSize + m_wlen) * m_ratio);
-        size_t ready = m_outbuf[0]->getReadSpace();
+        size_t formerSize = m_inbuf[0]->getSize();
+        size_t newSize = lrintf(m_outbuf[0]->getSize() / m_ratio) + m_wlen;
 
-        for (size_t c = 0; c < m_channels; ++c) {
-            newout[c] = new RingBuffer<float>(newSize);
-        }
+        std::cerr << "resizing inbuf from " << formerSize << " to "
+                  << newSize << " (outbuf size is " << m_outbuf[0]->getSize() << ", ratio " << m_ratio << ")" << std::endl;
 
-        if (ready > 0) {
+        if (formerSize != newSize) {
 
-            size_t copy = std::min(ready, newSize);
-            float *tmp = new float[ready];
+            size_t ready = m_inbuf[0]->getReadSpace();
 
             for (size_t c = 0; c < m_channels; ++c) {
-                m_outbuf[c]->read(tmp, ready);
-                newout[c]->write(tmp + ready - copy, copy);
+                newin[c] = new RingBuffer<float>(newSize);
             }
 
-            delete[] tmp;
+            if (ready > 0) {
+
+                size_t copy = std::min(ready, newSize);
+                float *tmp = new float[ready];
+
+                for (size_t c = 0; c < m_channels; ++c) {
+                    m_inbuf[c]->read(tmp, ready);
+                    newin[c]->write(tmp + ready - copy, copy);
+                }
+                
+                delete[] tmp;
+            }
+            
+            for (size_t c = 0; c < m_channels; ++c) {
+                delete m_inbuf[c];
+            }
+            
+            delete[] m_inbuf;
+            m_inbuf = newin;
         }
 
-        for (size_t c = 0; c < m_channels; ++c) {
-            delete m_outbuf[c];
-        }
-
-        delete[] m_outbuf;
-        m_outbuf = newout;
-
     } else {
         
         std::cerr << "wlen changed" << std::endl;
@@ -273,13 +282,6 @@
     return getWindowSize() - getInputIncrement();
 }
 
-void
-PhaseVocoderTimeStretcher::process(float **input, float **output, size_t samples)
-{
-    putInput(input, samples);
-    getOutput(output, lrintf(samples * m_ratio));
-}
-
 size_t
 PhaseVocoderTimeStretcher::getRequiredInputSamples() const
 {
@@ -317,18 +319,23 @@
 
 	if (writable == 0) {
 	    //!!! then what? I don't think this should happen, but
-	    std::cerr << "WARNING: PhaseVocoderTimeStretcher::putInput: writable == 0" << std::endl;
-	    break;
-	}
+	    std::cerr << "WARNING: PhaseVocoderTimeStretcher::putInput: writable == 0 (inbuf has " << m_inbuf[0]->getReadSpace() << " samples available for reading, space for " << m_inbuf[0]->getWriteSpace() << " more)" << std::endl;
+            if (m_inbuf[0]->getReadSpace() < m_wlen ||
+                m_outbuf[0]->getWriteSpace() < m_n2) {
+                std::cerr << "Outbuf has space for " << m_outbuf[0]->getWriteSpace() << " (n2 = " << m_n2 << "), won't be able to process" << std::endl;
+                break;
+            }
+	} else {
 
 #ifdef DEBUG_PHASE_VOCODER_TIME_STRETCHER
-	std::cerr << "writing " << writable << " from index " << consumed << " to inbuf, consumed will be " << consumed + writable << std::endl;
+            std::cerr << "writing " << writable << " from index " << consumed << " to inbuf, consumed will be " << consumed + writable << std::endl;
 #endif
 
-        for (size_t c = 0; c < m_channels; ++c) {
-            m_inbuf[c]->write(input[c] + consumed, writable);
+            for (size_t c = 0; c < m_channels; ++c) {
+                m_inbuf[c]->write(input[c] + consumed, writable);
+            }
+            consumed += writable;
         }
-	consumed += writable;
 
 	while (m_inbuf[0]->getReadSpace() >= m_wlen &&
 	       m_outbuf[0]->getWriteSpace() >= m_n2) {
@@ -501,7 +508,7 @@
 {
     int count = 0;
 
-    for (int i = 0; i <= m_wlen/2; ++i) {
+    for (size_t i = 0; i <= m_wlen/2; ++i) {
 
         float real = 0.f, imag = 0.f;
 
@@ -546,11 +553,9 @@
                                            float *modulation,
                                            size_t lastStep)
 {
-    int i;
-
     bool unchanged = (lastStep == m_n1);
 
-    for (i = 0; i <= m_wlen/2; ++i) {
+    for (size_t i = 0; i <= m_wlen/2; ++i) {
 		
         float phase = princargf(atan2f(m_freq[c][i][1], m_freq[c][i][0]));
         float adjustedPhase = phase;
@@ -583,19 +588,19 @@
 
     fftwf_execute(m_iplan[c]); // m_freq -> m_time, inverse fft
 
-    for (i = 0; i < m_wlen/2; ++i) {
+    for (size_t i = 0; i < m_wlen/2; ++i) {
         float temp = m_time[c][i];
         m_time[c][i] = m_time[c][i + m_wlen/2];
         m_time[c][i + m_wlen/2] = temp;
     }
     
-    for (i = 0; i < m_wlen; ++i) {
+    for (size_t i = 0; i < m_wlen; ++i) {
         m_time[c][i] = m_time[c][i] / m_wlen;
     }
 
     m_synthesisWindow->cut(m_time[c]);
 
-    for (i = 0; i < m_wlen; ++i) {
+    for (size_t i = 0; i < m_wlen; ++i) {
         out[i] += m_time[c][i];
     }
 
@@ -603,7 +608,7 @@
 
         float area = m_analysisWindow->getArea();
 
-        for (i = 0; i < m_wlen; ++i) {
+        for (size_t i = 0; i < m_wlen; ++i) {
             float val = m_synthesisWindow->getValue(i);
             modulation[i] += val * area;
         }
--- a/audioio/PhaseVocoderTimeStretcher.h	Wed Sep 20 16:02:42 2006 +0000
+++ b/audioio/PhaseVocoderTimeStretcher.h	Thu Sep 21 09:43:41 2006 +0000
@@ -43,36 +43,22 @@
                               size_t channels,
                               float ratio,
                               bool sharpen,
-                              size_t maxProcessInputBlockSize);
+                              size_t maxOutputBlockSize);
     virtual ~PhaseVocoderTimeStretcher();
 
     /**
-     * Process a block.  The input array contains the given number of
-     * samples (on each channel); the output must have space for
-     * lrintf(samples * m_ratio).
-     * 
-     * This function isn't really recommended, and I may yet remove it.
-     * It should work correctly for some ratios, e.g. small powers of
-     * two, if transient sharpening is off.  For other ratios it may
-     * drop samples -- use putInput in a loop followed by getOutput
-     * (when getAvailableOutputSamples reports enough) instead.
-     *
-     * Do not mix process calls with putInput/getOutput calls.
-     */
-    void process(float **input, float **output, size_t samples);
-
-    /**
      * Return the number of samples that would need to be added via
      * putInput in order to provoke the time stretcher into doing some
      * time stretching and making more output samples available.
-     * This will be an estimate, if transient sharpening is on.
+     * This will be an estimate, if transient sharpening is on; the 
+     * caller may need to do the put/get/test cycle more than once.
      */
     size_t getRequiredInputSamples() const;
 
     /**
      * Put (and possibly process) a given number of input samples.
-     * Number must not exceed the maxProcessInputBlockSize passed to
-     * constructor.
+     * Number should usually equal the value returned from
+     * getRequiredInputSamples().
      */
     void putInput(float **input, size_t samples);
 
@@ -159,7 +145,7 @@
 
     size_t m_sampleRate;
     size_t m_channels;
-    size_t m_maxProcessInputBlockSize;
+    size_t m_maxOutputBlockSize;
     float m_ratio;
     bool m_sharpen;
     size_t m_n1;