changeset 44:337d3b324c75

Some work on alignment
author Chris Cannam <c.cannam@qmul.ac.uk>
date Thu, 21 Nov 2013 17:04:19 +0000
parents f5bd00c97de3
children 73152bc3bb26
files cpp-qm-dsp/ConstantQ.cpp cpp-qm-dsp/ConstantQ.h yeti/cqt.yeti yeti/test.yeti
diffstat 4 files changed, 122 insertions(+), 57 deletions(-) [+]
line wrap: on
line diff
--- a/cpp-qm-dsp/ConstantQ.cpp	Wed Nov 20 21:39:42 2013 +0000
+++ b/cpp-qm-dsp/ConstantQ.cpp	Thu Nov 21 17:04:19 2013 +0000
@@ -20,9 +20,9 @@
 typedef std::complex<double> C;
 
 ConstantQ::ConstantQ(double sampleRate,
-		     double minFreq,
-		     double maxFreq,
-		     int binsPerOctave) :
+                     double minFreq,
+                     double maxFreq,
+                     int binsPerOctave) :
     m_sampleRate(sampleRate),
     m_maxFrequency(maxFreq),
     m_minFrequency(minFreq),
@@ -30,7 +30,7 @@
     m_fft(0)
 {
     if (minFreq <= 0.0 || maxFreq <= 0.0) {
-	throw std::invalid_argument("Frequency extents must be positive");
+        throw std::invalid_argument("Frequency extents must be positive");
     }
 
     initialise();
@@ -40,7 +40,7 @@
 {
     delete m_fft;
     for (int i = 0; i < (int)m_decimators.size(); ++i) {
-	delete m_decimators[i];
+        delete m_decimators[i];
     }
     delete m_kernel;
 }
@@ -50,16 +50,18 @@
 {
     m_octaves = int(ceil(log2(m_maxFrequency / m_minFrequency)));
     double actualMinFreq =
-	(m_maxFrequency / pow(2.0, m_octaves)) * pow(2.0, 1.0/m_binsPerOctave);
+        (m_maxFrequency / pow(2.0, m_octaves)) * pow(2.0, 1.0/m_binsPerOctave);
 
     cerr << "actual min freq = " << actualMinFreq << endl;
 
     m_kernel = new CQKernel(m_sampleRate, m_maxFrequency, m_binsPerOctave);
     m_p = m_kernel->getProperties();
     
-    // use exact powers of two for resampling rates. They don't have
-    // to be related to our actual samplerate, the resampler only
-    // cares about the ratio
+    // Use exact powers of two for resampling rates. They don't have
+    // to be related to our actual samplerate: the resampler only
+    // cares about the ratio, but it only accepts integer source and
+    // target rates, and if we start from the actual samplerate we
+    // risk getting non-integer rates for lower octaves
 
     int sourceRate = pow(2, m_octaves);
     vector<int> latencies;
@@ -68,32 +70,88 @@
     latencies.push_back(0);
     m_decimators.push_back(0);
 
-    for (int oct = 1; oct < m_octaves; ++oct) {
-	Resampler *r = new Resampler
-	    (sourceRate, sourceRate / pow(2, oct), 60, 0.02);
-	latencies.push_back(r->getLatency());
-	m_decimators.push_back(r);
+    for (int i = 1; i < m_octaves; ++i) {
+
+        int factor = pow(2, i);
+
+        Resampler *r = new Resampler
+            (sourceRate, sourceRate / factor, 60, 0.02);
+
+        // We need to adapt the latencies so as to get the first input
+        // sample to be aligned, in time, at the decimator output
+        // across all octaves.
+        // 
+        // Our decimator uses a linear phase filter, but being causal
+        // it is not zero phase: it has a latency that depends on the
+        // decimation factor. Those latencies have been calculated
+        // per-octave and are available to us in the latencies
+        // array. Left to its own devices, the first input sample will
+        // appear at output sample 0 in the highest octave (where no
+        // decimation is needed), sample number latencies[1] in the
+        // next octave down, latencies[2] in the next one, etc. We get
+        // to apply some artificial per-octave latency after the
+        // decimator in the processing chain, in order to compensate
+        // for the differing latencies associated with different
+        // decimation factors. How much should we insert?
+        //
+        // The outputs of the decimators are at different rates (in
+        // terms of the relation between clock time and samples) and
+        // we want them aligned in terms of time. So, for example, a
+        // latency of 10 samples with a decimation factor of 2 is
+        // equivalent to a latency of 20 with no decimation -- they
+        // both result in the first output sample happening at the
+        // same equivalent time in milliseconds.
+	// 
+	// So here we record the latency added by the decimator, in
+	// terms of the sample rate of the undecimated signal. Then we
+	// use that to compensate in a moment, when we've discovered
+	// what the longest latency across all octaves is.
+
+        latencies.push_back(r->getLatency() * factor);
+        m_decimators.push_back(r);
     }
 
     //!!! should be multiple of the kernel fft size?
     int maxLatency = *std::max_element(latencies.begin(), latencies.end());
     m_totalLatency = MathUtilities::nextPowerOfTwo(maxLatency);
     cerr << "total latency = " << m_totalLatency << endl;
+        //!!! should also round up so that total latency is a multiple of the big block size
+
+    int emptyHops = m_p.firstCentre / m_p.atomSpacing; //!!! round?
 
     for (int i = 0; i < m_octaves; ++i) {
 
-	int extraLatency = m_totalLatency - latencies[i];
+	double factor = pow(2, i);
 
-	int pad = 0;
- //!!! quite wrong
+	// And here (see comment above) we calculate the difference
+	// between the total latency applied across all octaves, and
+	// the existing latency due to the decimator for this octave,
+	// and then convert it back into the sample rate appropriate
+	// for the output latency of this decimator.
+
+	double extraLatency = double(m_totalLatency - latencies[i]) / factor;
+
+        int pad = m_p.fftSize * pow(2, m_octaves-i-1);
+
+//!!! This appears to be about right, by visual inspection. But why?
+	int pad2 = 0;
 	for (int j = 0; j < i; ++j) {
-	    pad += m_p.fftSize/2;
+	    pad2 += 9;
 	}
-	cerr << "for octave " << i << ", pad = " << pad << endl;
-	pad = 0;
 
-	m_buffers.push_back
-	    (vector<double>(extraLatency + pad, 0.0));
+
+        int drop = emptyHops * pow(2, m_octaves-i-1) - emptyHops;
+
+
+
+	cerr << "for octave " << i << ", latency for decimator = " << extraLatency << ", fixed padding = " << pad << ", visual inspection pad = " << pad2 << ", hops to drop would be " << drop << ", 2^i = " << pow(2, i) << ", 2^o-i = " << pow(2,m_octaves-i-1) << endl;
+
+	extraLatency += pad + pad2;
+
+	cerr << "then extraLatency -> " << extraLatency << endl;
+
+        m_buffers.push_back
+            (vector<double>(int(round(extraLatency)), 0.0));
     }
 
     m_fft = new FFTReal(m_p.fftSize);
@@ -108,8 +166,8 @@
     m_buffers[0].insert(m_buffers[0].end(), td.begin(), td.end());
 
     for (int i = 1; i < m_octaves; ++i) {
-	vector<double> dec = m_decimators[i]->process(td.data(), td.size());
-	m_buffers[i].insert(m_buffers[i].end(), dec.begin(), dec.end());
+        vector<double> dec = m_decimators[i]->process(td.data(), td.size());
+        m_buffers[i].insert(m_buffers[i].end(), dec.begin(), dec.end());
     }
 
     vector<vector<double> > out;
@@ -118,32 +176,38 @@
 
     while ((int)m_buffers[0].size() >= m_bigBlockSize) {
 
-	int base = out.size();
-	int totalColumns = pow(2, m_octaves - 1) * m_p.atomsPerFrame;
-	cerr << "totalColumns = " << totalColumns << endl;
-	for (int i = 0; i < totalColumns; ++i) {
-	    out.push_back(vector<double>());
-	}
+        int base = out.size();
+        int totalColumns = pow(2, m_octaves - 1) * m_p.atomsPerFrame;
+        cerr << "totalColumns = " << totalColumns << endl;
+        for (int i = 0; i < totalColumns; ++i) {
+            out.push_back(vector<double>(m_p.binsPerOctave * m_octaves, 0.0));
+        }
 
-	for (int octave = 0; octave < m_octaves; ++octave) {
+        for (int octave = 0; octave < m_octaves; ++octave) {
 
-	    int blocksThisOctave = pow(2, (m_octaves - octave - 1));
-//	    cerr << "octave " << octave+1 << " of " << m_octaves << ", n = " << blocksThisOctave << endl;
+            int blocksThisOctave = pow(2, (m_octaves - octave - 1));
+//          cerr << "octave " << octave+1 << " of " << m_octaves << ", n = " << blocksThisOctave << endl;
 
-	    for (int b = 0; b < blocksThisOctave; ++b) {
-		vector<vector<double> > block = processOctaveBlock(octave);
-		
-		for (int j = 0; j < m_p.atomsPerFrame; ++j) {
-		    int target = base +
-			(b * (totalColumns / blocksThisOctave) + 
-			 (j * ((totalColumns / blocksThisOctave) / m_p.atomsPerFrame)));
-//		    cerr << "j " << j << " -> target " << target << endl;
-		    for (int i = 0; i < m_p.binsPerOctave; ++i) {
-			out[target].push_back(block[j][m_p.binsPerOctave-i-1]);
+            for (int b = 0; b < blocksThisOctave; ++b) {
+                vector<vector<double> > block = processOctaveBlock(octave);
+                
+                for (int j = 0; j < m_p.atomsPerFrame; ++j) {
+
+		    for (int k = 0; k < pow(2, octave); ++k) {
+
+			int target = base + k +
+			    (b * (totalColumns / blocksThisOctave) + 
+			     (j * ((totalColumns / blocksThisOctave) /
+				   m_p.atomsPerFrame)));
+
+			for (int i = 0; i < m_p.binsPerOctave; ++i) {
+			    out[target][m_p.binsPerOctave * octave + i] = 
+				block[j][m_p.binsPerOctave - i - 1];
+			}
 		    }
-		}
-	    }
-	}
+                }
+            }
+        }
     }
     
     return out;
@@ -167,13 +231,13 @@
 
     vector<double> shifted;
     shifted.insert(shifted.end(), 
-		   m_buffers[octave].begin() + m_p.fftHop,
-		   m_buffers[octave].end());
+                   m_buffers[octave].begin() + m_p.fftHop,
+                   m_buffers[octave].end());
     m_buffers[octave] = shifted;
 
     vector<C> cv;
     for (int i = 0; i < m_p.fftSize; ++i) {
-	cv.push_back(C(ro[i], io[i]));
+        cv.push_back(C(ro[i], io[i]));
     }
 
     vector<C> cqrowvec = m_kernel->process(cv);
@@ -181,10 +245,10 @@
     // Reform into a column matrix 
     vector<vector<double> > cqblock;
     for (int j = 0; j < m_p.atomsPerFrame; ++j) {
-	cqblock.push_back(vector<double>());
-	for (int i = 0; i < m_p.binsPerOctave; ++i) {
-	    cqblock[j].push_back(abs(cqrowvec[i * m_p.atomsPerFrame + j]));
-	}
+        cqblock.push_back(vector<double>());
+        for (int i = 0; i < m_p.binsPerOctave; ++i) {
+            cqblock[j].push_back(abs(cqrowvec[i * m_p.atomsPerFrame + j]));
+        }
     }
 
     return cqblock;
--- a/cpp-qm-dsp/ConstantQ.h	Wed Nov 20 21:39:42 2013 +0000
+++ b/cpp-qm-dsp/ConstantQ.h	Thu Nov 21 17:04:19 2013 +0000
@@ -42,6 +42,7 @@
 
     std::vector<Resampler *> m_decimators;
     std::vector<std::vector<double> > m_buffers;
+    std::vector<int> m_dropHops;
 
     int m_totalLatency;
 
--- a/yeti/cqt.yeti	Wed Nov 20 21:39:42 2013 +0000
+++ b/yeti/cqt.yeti	Thu Nov 21 17:04:19 2013 +0000
@@ -87,7 +87,7 @@
     // Reshape each row vector into the appropriate rectangular matrix
     // and split into single-atom columns
 
-    emptyHops = kdata.firstCentre / kdata.atomSpacing;
+    emptyHops = kdata.firstCentre / kdata.atomSpacing; //!!! int? round?
     maxDrop = emptyHops * (pow 2 (octaves-1)) - emptyHops;
     eprintln "maxDrop = \(maxDrop)";
 
--- a/yeti/test.yeti	Wed Nov 20 21:39:42 2013 +0000
+++ b/yeti/test.yeti	Thu Nov 21 17:04:19 2013 +0000
@@ -13,17 +13,17 @@
 
 //testStream = manipulate.withDuration 96000 (syn.sinusoid 48000 500);
 //testStream = manipulate.withDuration 96000 (syn.pulseTrain 48000 4);
-//testStream = af.open "sweep-48000.wav";
+testStream = af.open "sweep-48000.wav";
 //testStream = af.open "sweep.wav";
 
 // So the stream is [ 0, 1, 0, -1, 0, 1, 0, -1, ... ] :
-testStream = manipulate.withDuration 64 (syn.sinusoid 8 2);
+//testStream = manipulate.withDuration 64 (syn.sinusoid 8 2);
 
 //testStream = manipulate.withDuration 32 (syn.pulseTrain 8 0.001);
 
 eprintln "have test stream";
 
-cq = cqt { maxFreq = testStream.sampleRate/2, minFreq = 1, binsPerOctave = 4 } testStream;
+cq = cqt { maxFreq = testStream.sampleRate/2, minFreq = 50, binsPerOctave = 24 } testStream;
 
 eprintln "bin frequencies: \(cq.kernel.binFrequencies)";