comparison dsp/rateconversion/Resampler.cpp @ 147:c1e98c18628a

Fixes to latency and initial phase calculations (+ explanation)
author Chris Cannam
date Thu, 17 Oct 2013 22:12:36 +0100
parents 235b99c7d4ce
children 9db2712b3ce4
comparison
equal deleted inserted replaced
146:235b99c7d4ce 147:c1e98c18628a
8 #include "qm-dsp/thread/Thread.h" 8 #include "qm-dsp/thread/Thread.h"
9 9
10 #include <iostream> 10 #include <iostream>
11 #include <vector> 11 #include <vector>
12 #include <map> 12 #include <map>
13 #include <cassert>
13 14
14 using std::vector; 15 using std::vector;
15 using std::map; 16 using std::map;
16 17
17 //#define DEBUG_RESAMPLER 1 18 //#define DEBUG_RESAMPLER 1
49 KaiserWindow::parametersForBandwidth(100, 0.02, peakToPole); 50 KaiserWindow::parametersForBandwidth(100, 0.02, peakToPole);
50 51
51 params.length = 52 params.length =
52 (params.length % 2 == 0 ? params.length + 1 : params.length); 53 (params.length % 2 == 0 ? params.length + 1 : params.length);
53 54
55 params.length =
56 (params.length > 200001 ? 200001 : params.length);
57
54 m_filterLength = params.length; 58 m_filterLength = params.length;
55 59
56 vector<double> filter; 60 vector<double> filter;
57 knownFilterMutex.lock(); 61 knownFilterMutex.lock();
58 62
64 68
65 filter = vector<double>(m_filterLength, 0.0); 69 filter = vector<double>(m_filterLength, 0.0);
66 for (int i = 0; i < m_filterLength; ++i) filter[i] = 1.0; 70 for (int i = 0; i < m_filterLength; ++i) filter[i] = 1.0;
67 sw.cut(filter.data()); 71 sw.cut(filter.data());
68 kw.cut(filter.data()); 72 kw.cut(filter.data());
69 73 /*
74 std::cerr << "sinc for " << params.length << ", " << params.beta
75 << ": ";
76 for (int i = 0; i < 10; ++i) {
77 std::cerr << sw.getWindow()[i] << " ";
78 }
79 std::cerr << std::endl;
80
81 std::cerr << "kaiser for " << params.length << ", " << params.beta
82 << ": ";
83 for (int i = 0; i < 10; ++i) {
84 std::cerr << kw.getWindow()[i] << " ";
85 }
86 std::cerr << std::endl;
87
88 std::cerr << "filter for " << params.length << ", " << params.beta
89 << ": ";
90 for (int i = 0; i < 10; ++i) {
91 std::cerr << filter[i] << " ";
92 }
93 std::cerr << std::endl;
94 */
70 knownFilters[peakToPole][m_filterLength][params.beta] = filter; 95 knownFilters[peakToPole][m_filterLength][params.beta] = filter;
71 } 96 }
72 97
73 filter = knownFilters[peakToPole][m_filterLength][params.beta]; 98 filter = knownFilters[peakToPole][m_filterLength][params.beta];
74 knownFilterMutex.unlock(); 99 knownFilterMutex.unlock();
80 std::cerr << "resample " << m_sourceRate << " -> " << m_targetRate 105 std::cerr << "resample " << m_sourceRate << " -> " << m_targetRate
81 << ": inputSpacing " << inputSpacing << ", outputSpacing " 106 << ": inputSpacing " << inputSpacing << ", outputSpacing "
82 << outputSpacing << ": filter length " << m_filterLength 107 << outputSpacing << ": filter length " << m_filterLength
83 << std::endl; 108 << std::endl;
84 #endif 109 #endif
110
111 // Now we have a filter of (odd) length flen in which the lower
112 // sample rate corresponds to every n'th point and the higher rate
113 // to every m'th where n and m are higher and lower rates divided
114 // by their gcd respectively. So if x coordinates are on the same
115 // scale as our filter resolution, then source sample i is at i *
116 // (targetRate / gcd) and target sample j is at j * (sourceRate /
117 // gcd).
118
119 // To reconstruct a single target sample, we want a buffer (real
120 // or virtual) of flen values formed of source samples spaced at
121 // intervals of (targetRate / gcd), in our example case 3. This
122 // is initially formed with the first sample at the filter peak.
123 //
124 // 0 0 0 0 a 0 0 b 0
125 //
126 // and of course we have our filter
127 //
128 // f1 f2 f3 f4 f5 f6 f7 f8 f9
129 //
130 // We take the sum of products of non-zero values from this buffer
131 // with corresponding values in the filter
132 //
133 // a * f5 + b * f8
134 //
135 // Then we drop (sourceRate / gcd) values, in our example case 4,
136 // from the start of the buffer and fill until it has flen values
137 // again
138 //
139 // a 0 0 b 0 0 c 0 0
140 //
141 // repeat to reconstruct the next target sample
142 //
143 // a * f1 + b * f4 + c * f7
144 //
145 // and so on.
146 //
147 // Above I said the buffer could be "real or virtual" -- ours is
148 // virtual. We don't actually store all the zero spacing values,
149 // except for padding at the start; normally we store only the
150 // values that actually came from the source stream, along with a
151 // phase value that tells us how many virtual zeroes there are at
152 // the start of the virtual buffer. So the two examples above are
153 //
154 // 0 a b [ with phase 1 ]
155 // a b c [ with phase 0 ]
156 //
157 // Having thus broken down the buffer so that only the elements we
158 // need to multiply are present, we can also unzip the filter into
159 // every-nth-element subsets at each phase, allowing us to do the
160 // filter multiplication as a simply vector multiply. That is, rather
161 // than store
162 //
163 // f1 f2 f3 f4 f5 f6 f7 f8 f9
164 //
165 // we store separately
166 //
167 // f1 f4 f7
168 // f2 f5 f8
169 // f3 f6 f9
170 //
171 // Each time we complete a multiply-and-sum, we need to work out
172 // how many (real) samples to drop from the start of our buffer,
173 // and how many to add at the end of it for the next multiply. We
174 // know we want to drop enough real samples to move along by one
175 // computed output sample, which is our outputSpacing number of
176 // virtual buffer samples. Depending on the relationship between
177 // input and output spacings, this may mean dropping several real
178 // samples, one real sample, or none at all (and simply moving to
179 // a different "phase").
85 180
86 m_phaseData = new Phase[inputSpacing]; 181 m_phaseData = new Phase[inputSpacing];
87 182
88 for (int phase = 0; phase < inputSpacing; ++phase) { 183 for (int phase = 0; phase < inputSpacing; ++phase) {
89 184
96 p.drop = int(ceil(std::max(0.0, double(outputSpacing - phase)) 191 p.drop = int(ceil(std::max(0.0, double(outputSpacing - phase))
97 / inputSpacing)); 192 / inputSpacing));
98 193
99 int filtZipLength = int(ceil(double(m_filterLength - phase) 194 int filtZipLength = int(ceil(double(m_filterLength - phase)
100 / inputSpacing)); 195 / inputSpacing));
196
101 for (int i = 0; i < filtZipLength; ++i) { 197 for (int i = 0; i < filtZipLength; ++i) {
102 p.filter.push_back(filter[i * inputSpacing + phase]); 198 p.filter.push_back(filter[i * inputSpacing + phase]);
103 } 199 }
104 200
105 m_phaseData[phase] = p; 201 m_phaseData[phase] = p;
120 // either return a very variable number of samples (none at all 216 // either return a very variable number of samples (none at all
121 // until the filter fills, then half the filter length at once) or 217 // until the filter fills, then half the filter length at once) or
122 // else have a lengthy declared latency on the output. We do the 218 // else have a lengthy declared latency on the output. We do the
123 // latter. (What do other implementations do?) 219 // latter. (What do other implementations do?)
124 220
125 m_phase = (m_filterLength/2) % inputSpacing; 221 int centreToEnd = (m_filterLength/2) + 1; // from centre of filter
126 222 // to first sample after
127 m_buffer = vector<double>(m_phaseData[0].filter.size(), 0); 223 // filter end
224
225 // We want to make sure the first "real" sample will eventually be
226 // aligned with the centre sample in the filter (it's tidier, and
227 // easier to do diagnostic calculations that way). So we need to
228 // pick the initial phase and buffer fill accordingly.
229 //
230 // Example: if the inputSpacing is 2, outputSpacing is 3, and
231 // filter length is 7,
232 //
233 // x x x x a b c ... input samples
234 // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 ...
235 // i j k l ... output samples
236 // [--------|--------] <- filter with centre mark
237 //
238 // Let h be the index of the centre mark, here 3 (generally
239 // int(filterLength/2) for odd-length filters).
240 //
241 // The smallest n such that h + n * outputSpacing > filterLength
242 // is 2 (that is, ceil((filterLength - h) / outputSpacing)), and
243 // (h + 2 * outputSpacing) % inputSpacing == 1, so the initial
244 // phase is 1.
245 //
246 // To achieve our n, we need to pre-fill the "virtual" buffer with
247 // 4 zero samples: the x's above. This is int((h + n *
248 // outputSpacing) / inputSpacing). It's the phase that makes this
249 // buffer get dealt with in such a way as to give us an effective
250 // index for sample a of 9 rather than 8 or 10 or whatever.
251 //
252 // This gives us output latency of 2 (== n), i.e. output samples i
253 // and j will appear before the one in which input sample a is at
254 // the centre of the filter.
255
256 int h = int(m_filterLength / 2);
257 int n = ceil(double(m_filterLength - h) / outputSpacing);
258
259 m_phase = (h + n * outputSpacing) % inputSpacing;
260
261 int fill = (h + n * outputSpacing) / inputSpacing;
262
263 m_latency = n;
264
265 m_buffer = vector<double>(fill, 0);
128 m_bufferOrigin = 0; 266 m_bufferOrigin = 0;
129
130 m_latency =
131 ((m_buffer.size() * inputSpacing) - (m_filterLength/2)) / outputSpacing
132 + m_phase;
133 267
134 #ifdef DEBUG_RESAMPLER 268 #ifdef DEBUG_RESAMPLER
135 std::cerr << "initial phase " << m_phase << " (as " << (m_filterLength/2) << " % " << inputSpacing << ")" 269 std::cerr << "initial phase " << m_phase << " (as " << (m_filterLength/2) << " % " << inputSpacing << ")"
136 << ", latency " << m_latency << std::endl; 270 << ", latency " << m_latency << std::endl;
137 #endif 271 #endif
141 Resampler::reconstructOne() 275 Resampler::reconstructOne()
142 { 276 {
143 Phase &pd = m_phaseData[m_phase]; 277 Phase &pd = m_phaseData[m_phase];
144 double v = 0.0; 278 double v = 0.0;
145 int n = pd.filter.size(); 279 int n = pd.filter.size();
280
281 assert(n + m_bufferOrigin <= m_buffer.size());
282
146 const double *const __restrict__ buf = m_buffer.data() + m_bufferOrigin; 283 const double *const __restrict__ buf = m_buffer.data() + m_bufferOrigin;
147 const double *const __restrict__ filt = pd.filter.data(); 284 const double *const __restrict__ filt = pd.filter.data();
285
286 // std::cerr << "phase = " << m_phase << ", drop = " << pd.drop << ", buffer for reconstruction starts...";
287 // for (int i = 0; i < 20; ++i) {
288 // if (i % 5 == 0) std::cerr << "\n" << i << " ";
289 // std::cerr << buf[i] << " ";
290 // }
291 // std::cerr << std::endl;
292
148 for (int i = 0; i < n; ++i) { 293 for (int i = 0; i < n; ++i) {
149 // NB gcc can only vectorize this with -ffast-math 294 // NB gcc can only vectorize this with -ffast-math
150 v += buf[i] * filt[i]; 295 v += buf[i] * filt[i];
151 } 296 }
152 m_bufferOrigin += pd.drop; 297 m_bufferOrigin += pd.drop;
218 int got = r.process(data, out.data(), n); 363 int got = r.process(data, out.data(), n);
219 got += r.process(pad.data(), out.data() + got, pad.size()); 364 got += r.process(pad.data(), out.data() + got, pad.size());
220 365
221 #ifdef DEBUG_RESAMPLER 366 #ifdef DEBUG_RESAMPLER
222 std::cerr << "resample: " << n << " in, " << got << " out" << std::endl; 367 std::cerr << "resample: " << n << " in, " << got << " out" << std::endl;
223 for (int i = 0; i < got; ++i) { 368 std::cerr << "first 10 in:" << std::endl;
224 if (i % 5 == 0) std::cout << std::endl << i << "... "; 369 for (int i = 0; i < 10; ++i) {
225 std::cout << (float) out[i] << " "; 370 std::cerr << data[i] << " ";
226 } 371 if (i == 5) std::cerr << std::endl;
227 std::cout << std::endl; 372 }
373 std::cerr << std::endl;
228 #endif 374 #endif
229 375
230 int toReturn = got - latency; 376 int toReturn = got - latency;
231 if (toReturn > m) toReturn = m; 377 if (toReturn > m) toReturn = m;
232 378
233 return vector<double>(out.begin() + latency, 379 vector<double> sliced(out.begin() + latency,
234 out.begin() + latency + toReturn); 380 out.begin() + latency + toReturn);
235 } 381
236 382 #ifdef DEBUG_RESAMPLER
383 std::cerr << "all out (after latency compensation), length " << sliced.size() << ":";
384 for (int i = 0; i < sliced.size(); ++i) {
385 if (i % 5 == 0) std::cerr << std::endl << i << "... ";
386 std::cerr << sliced[i] << " ";
387 }
388 std::cerr << std::endl;
389 #endif
390
391 return sliced;
392 }
393