Mercurial > hg > qm-dsp
comparison dsp/rateconversion/Resampler.cpp @ 372:d464286c007b
Fixes to latency and initial phase calculations (+ explanation)
author | Chris Cannam <c.cannam@qmul.ac.uk> |
---|---|
date | Thu, 17 Oct 2013 22:12:36 +0100 |
parents | 33e9e964443c |
children | 9db2712b3ce4 |
comparison
equal
deleted
inserted
replaced
371:33e9e964443c | 372:d464286c007b |
---|---|
8 #include "qm-dsp/thread/Thread.h" | 8 #include "qm-dsp/thread/Thread.h" |
9 | 9 |
10 #include <iostream> | 10 #include <iostream> |
11 #include <vector> | 11 #include <vector> |
12 #include <map> | 12 #include <map> |
13 #include <cassert> | |
13 | 14 |
14 using std::vector; | 15 using std::vector; |
15 using std::map; | 16 using std::map; |
16 | 17 |
17 //#define DEBUG_RESAMPLER 1 | 18 //#define DEBUG_RESAMPLER 1 |
49 KaiserWindow::parametersForBandwidth(100, 0.02, peakToPole); | 50 KaiserWindow::parametersForBandwidth(100, 0.02, peakToPole); |
50 | 51 |
51 params.length = | 52 params.length = |
52 (params.length % 2 == 0 ? params.length + 1 : params.length); | 53 (params.length % 2 == 0 ? params.length + 1 : params.length); |
53 | 54 |
55 params.length = | |
56 (params.length > 200001 ? 200001 : params.length); | |
57 | |
54 m_filterLength = params.length; | 58 m_filterLength = params.length; |
55 | 59 |
56 vector<double> filter; | 60 vector<double> filter; |
57 knownFilterMutex.lock(); | 61 knownFilterMutex.lock(); |
58 | 62 |
64 | 68 |
65 filter = vector<double>(m_filterLength, 0.0); | 69 filter = vector<double>(m_filterLength, 0.0); |
66 for (int i = 0; i < m_filterLength; ++i) filter[i] = 1.0; | 70 for (int i = 0; i < m_filterLength; ++i) filter[i] = 1.0; |
67 sw.cut(filter.data()); | 71 sw.cut(filter.data()); |
68 kw.cut(filter.data()); | 72 kw.cut(filter.data()); |
69 | 73 /* |
74 std::cerr << "sinc for " << params.length << ", " << params.beta | |
75 << ": "; | |
76 for (int i = 0; i < 10; ++i) { | |
77 std::cerr << sw.getWindow()[i] << " "; | |
78 } | |
79 std::cerr << std::endl; | |
80 | |
81 std::cerr << "kaiser for " << params.length << ", " << params.beta | |
82 << ": "; | |
83 for (int i = 0; i < 10; ++i) { | |
84 std::cerr << kw.getWindow()[i] << " "; | |
85 } | |
86 std::cerr << std::endl; | |
87 | |
88 std::cerr << "filter for " << params.length << ", " << params.beta | |
89 << ": "; | |
90 for (int i = 0; i < 10; ++i) { | |
91 std::cerr << filter[i] << " "; | |
92 } | |
93 std::cerr << std::endl; | |
94 */ | |
70 knownFilters[peakToPole][m_filterLength][params.beta] = filter; | 95 knownFilters[peakToPole][m_filterLength][params.beta] = filter; |
71 } | 96 } |
72 | 97 |
73 filter = knownFilters[peakToPole][m_filterLength][params.beta]; | 98 filter = knownFilters[peakToPole][m_filterLength][params.beta]; |
74 knownFilterMutex.unlock(); | 99 knownFilterMutex.unlock(); |
80 std::cerr << "resample " << m_sourceRate << " -> " << m_targetRate | 105 std::cerr << "resample " << m_sourceRate << " -> " << m_targetRate |
81 << ": inputSpacing " << inputSpacing << ", outputSpacing " | 106 << ": inputSpacing " << inputSpacing << ", outputSpacing " |
82 << outputSpacing << ": filter length " << m_filterLength | 107 << outputSpacing << ": filter length " << m_filterLength |
83 << std::endl; | 108 << std::endl; |
84 #endif | 109 #endif |
110 | |
111 // Now we have a filter of (odd) length flen in which the lower | |
112 // sample rate corresponds to every n'th point and the higher rate | |
113 // to every m'th where n and m are higher and lower rates divided | |
114 // by their gcd respectively. So if x coordinates are on the same | |
115 // scale as our filter resolution, then source sample i is at i * | |
116 // (targetRate / gcd) and target sample j is at j * (sourceRate / | |
117 // gcd). | |
118 | |
119 // To reconstruct a single target sample, we want a buffer (real | |
120 // or virtual) of flen values formed of source samples spaced at | |
121 // intervals of (targetRate / gcd), in our example case 3. This | |
122 // is initially formed with the first sample at the filter peak. | |
123 // | |
124 // 0 0 0 0 a 0 0 b 0 | |
125 // | |
126 // and of course we have our filter | |
127 // | |
128 // f1 f2 f3 f4 f5 f6 f7 f8 f9 | |
129 // | |
130 // We take the sum of products of non-zero values from this buffer | |
131 // with corresponding values in the filter | |
132 // | |
133 // a * f5 + b * f8 | |
134 // | |
135 // Then we drop (sourceRate / gcd) values, in our example case 4, | |
136 // from the start of the buffer and fill until it has flen values | |
137 // again | |
138 // | |
139 // a 0 0 b 0 0 c 0 0 | |
140 // | |
141 // repeat to reconstruct the next target sample | |
142 // | |
143 // a * f1 + b * f4 + c * f7 | |
144 // | |
145 // and so on. | |
146 // | |
147 // Above I said the buffer could be "real or virtual" -- ours is | |
148 // virtual. We don't actually store all the zero spacing values, | |
149 // except for padding at the start; normally we store only the | |
150 // values that actually came from the source stream, along with a | |
151 // phase value that tells us how many virtual zeroes there are at | |
152 // the start of the virtual buffer. So the two examples above are | |
153 // | |
154 // 0 a b [ with phase 1 ] | |
155 // a b c [ with phase 0 ] | |
156 // | |
157 // Having thus broken down the buffer so that only the elements we | |
158 // need to multiply are present, we can also unzip the filter into | |
159 // every-nth-element subsets at each phase, allowing us to do the | |
160 // filter multiplication as a simply vector multiply. That is, rather | |
161 // than store | |
162 // | |
163 // f1 f2 f3 f4 f5 f6 f7 f8 f9 | |
164 // | |
165 // we store separately | |
166 // | |
167 // f1 f4 f7 | |
168 // f2 f5 f8 | |
169 // f3 f6 f9 | |
170 // | |
171 // Each time we complete a multiply-and-sum, we need to work out | |
172 // how many (real) samples to drop from the start of our buffer, | |
173 // and how many to add at the end of it for the next multiply. We | |
174 // know we want to drop enough real samples to move along by one | |
175 // computed output sample, which is our outputSpacing number of | |
176 // virtual buffer samples. Depending on the relationship between | |
177 // input and output spacings, this may mean dropping several real | |
178 // samples, one real sample, or none at all (and simply moving to | |
179 // a different "phase"). | |
85 | 180 |
86 m_phaseData = new Phase[inputSpacing]; | 181 m_phaseData = new Phase[inputSpacing]; |
87 | 182 |
88 for (int phase = 0; phase < inputSpacing; ++phase) { | 183 for (int phase = 0; phase < inputSpacing; ++phase) { |
89 | 184 |
96 p.drop = int(ceil(std::max(0.0, double(outputSpacing - phase)) | 191 p.drop = int(ceil(std::max(0.0, double(outputSpacing - phase)) |
97 / inputSpacing)); | 192 / inputSpacing)); |
98 | 193 |
99 int filtZipLength = int(ceil(double(m_filterLength - phase) | 194 int filtZipLength = int(ceil(double(m_filterLength - phase) |
100 / inputSpacing)); | 195 / inputSpacing)); |
196 | |
101 for (int i = 0; i < filtZipLength; ++i) { | 197 for (int i = 0; i < filtZipLength; ++i) { |
102 p.filter.push_back(filter[i * inputSpacing + phase]); | 198 p.filter.push_back(filter[i * inputSpacing + phase]); |
103 } | 199 } |
104 | 200 |
105 m_phaseData[phase] = p; | 201 m_phaseData[phase] = p; |
120 // either return a very variable number of samples (none at all | 216 // either return a very variable number of samples (none at all |
121 // until the filter fills, then half the filter length at once) or | 217 // until the filter fills, then half the filter length at once) or |
122 // else have a lengthy declared latency on the output. We do the | 218 // else have a lengthy declared latency on the output. We do the |
123 // latter. (What do other implementations do?) | 219 // latter. (What do other implementations do?) |
124 | 220 |
125 m_phase = (m_filterLength/2) % inputSpacing; | 221 int centreToEnd = (m_filterLength/2) + 1; // from centre of filter |
126 | 222 // to first sample after |
127 m_buffer = vector<double>(m_phaseData[0].filter.size(), 0); | 223 // filter end |
224 | |
225 // We want to make sure the first "real" sample will eventually be | |
226 // aligned with the centre sample in the filter (it's tidier, and | |
227 // easier to do diagnostic calculations that way). So we need to | |
228 // pick the initial phase and buffer fill accordingly. | |
229 // | |
230 // Example: if the inputSpacing is 2, outputSpacing is 3, and | |
231 // filter length is 7, | |
232 // | |
233 // x x x x a b c ... input samples | |
234 // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 ... | |
235 // i j k l ... output samples | |
236 // [--------|--------] <- filter with centre mark | |
237 // | |
238 // Let h be the index of the centre mark, here 3 (generally | |
239 // int(filterLength/2) for odd-length filters). | |
240 // | |
241 // The smallest n such that h + n * outputSpacing > filterLength | |
242 // is 2 (that is, ceil((filterLength - h) / outputSpacing)), and | |
243 // (h + 2 * outputSpacing) % inputSpacing == 1, so the initial | |
244 // phase is 1. | |
245 // | |
246 // To achieve our n, we need to pre-fill the "virtual" buffer with | |
247 // 4 zero samples: the x's above. This is int((h + n * | |
248 // outputSpacing) / inputSpacing). It's the phase that makes this | |
249 // buffer get dealt with in such a way as to give us an effective | |
250 // index for sample a of 9 rather than 8 or 10 or whatever. | |
251 // | |
252 // This gives us output latency of 2 (== n), i.e. output samples i | |
253 // and j will appear before the one in which input sample a is at | |
254 // the centre of the filter. | |
255 | |
256 int h = int(m_filterLength / 2); | |
257 int n = ceil(double(m_filterLength - h) / outputSpacing); | |
258 | |
259 m_phase = (h + n * outputSpacing) % inputSpacing; | |
260 | |
261 int fill = (h + n * outputSpacing) / inputSpacing; | |
262 | |
263 m_latency = n; | |
264 | |
265 m_buffer = vector<double>(fill, 0); | |
128 m_bufferOrigin = 0; | 266 m_bufferOrigin = 0; |
129 | |
130 m_latency = | |
131 ((m_buffer.size() * inputSpacing) - (m_filterLength/2)) / outputSpacing | |
132 + m_phase; | |
133 | 267 |
134 #ifdef DEBUG_RESAMPLER | 268 #ifdef DEBUG_RESAMPLER |
135 std::cerr << "initial phase " << m_phase << " (as " << (m_filterLength/2) << " % " << inputSpacing << ")" | 269 std::cerr << "initial phase " << m_phase << " (as " << (m_filterLength/2) << " % " << inputSpacing << ")" |
136 << ", latency " << m_latency << std::endl; | 270 << ", latency " << m_latency << std::endl; |
137 #endif | 271 #endif |
141 Resampler::reconstructOne() | 275 Resampler::reconstructOne() |
142 { | 276 { |
143 Phase &pd = m_phaseData[m_phase]; | 277 Phase &pd = m_phaseData[m_phase]; |
144 double v = 0.0; | 278 double v = 0.0; |
145 int n = pd.filter.size(); | 279 int n = pd.filter.size(); |
280 | |
281 assert(n + m_bufferOrigin <= m_buffer.size()); | |
282 | |
146 const double *const __restrict__ buf = m_buffer.data() + m_bufferOrigin; | 283 const double *const __restrict__ buf = m_buffer.data() + m_bufferOrigin; |
147 const double *const __restrict__ filt = pd.filter.data(); | 284 const double *const __restrict__ filt = pd.filter.data(); |
285 | |
286 // std::cerr << "phase = " << m_phase << ", drop = " << pd.drop << ", buffer for reconstruction starts..."; | |
287 // for (int i = 0; i < 20; ++i) { | |
288 // if (i % 5 == 0) std::cerr << "\n" << i << " "; | |
289 // std::cerr << buf[i] << " "; | |
290 // } | |
291 // std::cerr << std::endl; | |
292 | |
148 for (int i = 0; i < n; ++i) { | 293 for (int i = 0; i < n; ++i) { |
149 // NB gcc can only vectorize this with -ffast-math | 294 // NB gcc can only vectorize this with -ffast-math |
150 v += buf[i] * filt[i]; | 295 v += buf[i] * filt[i]; |
151 } | 296 } |
152 m_bufferOrigin += pd.drop; | 297 m_bufferOrigin += pd.drop; |
218 int got = r.process(data, out.data(), n); | 363 int got = r.process(data, out.data(), n); |
219 got += r.process(pad.data(), out.data() + got, pad.size()); | 364 got += r.process(pad.data(), out.data() + got, pad.size()); |
220 | 365 |
221 #ifdef DEBUG_RESAMPLER | 366 #ifdef DEBUG_RESAMPLER |
222 std::cerr << "resample: " << n << " in, " << got << " out" << std::endl; | 367 std::cerr << "resample: " << n << " in, " << got << " out" << std::endl; |
223 for (int i = 0; i < got; ++i) { | 368 std::cerr << "first 10 in:" << std::endl; |
224 if (i % 5 == 0) std::cout << std::endl << i << "... "; | 369 for (int i = 0; i < 10; ++i) { |
225 std::cout << (float) out[i] << " "; | 370 std::cerr << data[i] << " "; |
226 } | 371 if (i == 5) std::cerr << std::endl; |
227 std::cout << std::endl; | 372 } |
373 std::cerr << std::endl; | |
228 #endif | 374 #endif |
229 | 375 |
230 int toReturn = got - latency; | 376 int toReturn = got - latency; |
231 if (toReturn > m) toReturn = m; | 377 if (toReturn > m) toReturn = m; |
232 | 378 |
233 return vector<double>(out.begin() + latency, | 379 vector<double> sliced(out.begin() + latency, |
234 out.begin() + latency + toReturn); | 380 out.begin() + latency + toReturn); |
235 } | 381 |
236 | 382 #ifdef DEBUG_RESAMPLER |
383 std::cerr << "all out (after latency compensation), length " << sliced.size() << ":"; | |
384 for (int i = 0; i < sliced.size(); ++i) { | |
385 if (i % 5 == 0) std::cerr << std::endl << i << "... "; | |
386 std::cerr << sliced[i] << " "; | |
387 } | |
388 std::cerr << std::endl; | |
389 #endif | |
390 | |
391 return sliced; | |
392 } | |
393 |