yading@10
|
1 /*
|
yading@10
|
2 * Copyright (c) 2012 Pavel Koshevoy <pkoshevoy at gmail dot com>
|
yading@10
|
3 *
|
yading@10
|
4 * This file is part of FFmpeg.
|
yading@10
|
5 *
|
yading@10
|
6 * FFmpeg is free software; you can redistribute it and/or
|
yading@10
|
7 * modify it under the terms of the GNU Lesser General Public
|
yading@10
|
8 * License as published by the Free Software Foundation; either
|
yading@10
|
9 * version 2.1 of the License, or (at your option) any later version.
|
yading@10
|
10 *
|
yading@10
|
11 * FFmpeg is distributed in the hope that it will be useful,
|
yading@10
|
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
yading@10
|
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
yading@10
|
14 * Lesser General Public License for more details.
|
yading@10
|
15 *
|
yading@10
|
16 * You should have received a copy of the GNU Lesser General Public
|
yading@10
|
17 * License along with FFmpeg; if not, write to the Free Software
|
yading@10
|
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
yading@10
|
19 */
|
yading@10
|
20
|
yading@10
|
21 /**
|
yading@10
|
22 * @file
|
yading@10
|
23 * tempo scaling audio filter -- an implementation of WSOLA algorithm
|
yading@10
|
24 *
|
yading@10
|
25 * Based on MIT licensed yaeAudioTempoFilter.h and yaeAudioFragment.h
|
yading@10
|
26 * from Apprentice Video player by Pavel Koshevoy.
|
yading@10
|
27 * https://sourceforge.net/projects/apprenticevideo/
|
yading@10
|
28 *
|
yading@10
|
29 * An explanation of SOLA algorithm is available at
|
yading@10
|
30 * http://www.surina.net/article/time-and-pitch-scaling.html
|
yading@10
|
31 *
|
yading@10
|
32 * WSOLA is very similar to SOLA, only one major difference exists between
|
yading@10
|
33 * these algorithms. SOLA shifts audio fragments along the output stream,
|
yading@10
|
34 * where as WSOLA shifts audio fragments along the input stream.
|
yading@10
|
35 *
|
yading@10
|
36 * The advantage of WSOLA algorithm is that the overlap region size is
|
yading@10
|
37 * always the same, therefore the blending function is constant and
|
yading@10
|
38 * can be precomputed.
|
yading@10
|
39 */
|
yading@10
|
40
|
yading@10
|
41 #include <float.h>
|
yading@10
|
42 #include "libavcodec/avfft.h"
|
yading@10
|
43 #include "libavutil/avassert.h"
|
yading@10
|
44 #include "libavutil/avstring.h"
|
yading@10
|
45 #include "libavutil/channel_layout.h"
|
yading@10
|
46 #include "libavutil/eval.h"
|
yading@10
|
47 #include "libavutil/opt.h"
|
yading@10
|
48 #include "libavutil/samplefmt.h"
|
yading@10
|
49 #include "avfilter.h"
|
yading@10
|
50 #include "audio.h"
|
yading@10
|
51 #include "internal.h"
|
yading@10
|
52
|
yading@10
|
53 /**
|
yading@10
|
54 * A fragment of audio waveform
|
yading@10
|
55 */
|
yading@10
|
56 typedef struct {
|
yading@10
|
57 // index of the first sample of this fragment in the overall waveform;
|
yading@10
|
58 // 0: input sample position
|
yading@10
|
59 // 1: output sample position
|
yading@10
|
60 int64_t position[2];
|
yading@10
|
61
|
yading@10
|
62 // original packed multi-channel samples:
|
yading@10
|
63 uint8_t *data;
|
yading@10
|
64
|
yading@10
|
65 // number of samples in this fragment:
|
yading@10
|
66 int nsamples;
|
yading@10
|
67
|
yading@10
|
68 // rDFT transform of the down-mixed mono fragment, used for
|
yading@10
|
69 // fast waveform alignment via correlation in frequency domain:
|
yading@10
|
70 FFTSample *xdat;
|
yading@10
|
71 } AudioFragment;
|
yading@10
|
72
|
yading@10
|
73 /**
|
yading@10
|
74 * Filter state machine states
|
yading@10
|
75 */
|
yading@10
|
76 typedef enum {
|
yading@10
|
77 YAE_LOAD_FRAGMENT,
|
yading@10
|
78 YAE_ADJUST_POSITION,
|
yading@10
|
79 YAE_RELOAD_FRAGMENT,
|
yading@10
|
80 YAE_OUTPUT_OVERLAP_ADD,
|
yading@10
|
81 YAE_FLUSH_OUTPUT,
|
yading@10
|
82 } FilterState;
|
yading@10
|
83
|
yading@10
|
84 /**
|
yading@10
|
85 * Filter state machine
|
yading@10
|
86 */
|
yading@10
|
87 typedef struct {
|
yading@10
|
88 const AVClass *class;
|
yading@10
|
89
|
yading@10
|
90 // ring-buffer of input samples, necessary because some times
|
yading@10
|
91 // input fragment position may be adjusted backwards:
|
yading@10
|
92 uint8_t *buffer;
|
yading@10
|
93
|
yading@10
|
94 // ring-buffer maximum capacity, expressed in sample rate time base:
|
yading@10
|
95 int ring;
|
yading@10
|
96
|
yading@10
|
97 // ring-buffer house keeping:
|
yading@10
|
98 int size;
|
yading@10
|
99 int head;
|
yading@10
|
100 int tail;
|
yading@10
|
101
|
yading@10
|
102 // 0: input sample position corresponding to the ring buffer tail
|
yading@10
|
103 // 1: output sample position
|
yading@10
|
104 int64_t position[2];
|
yading@10
|
105
|
yading@10
|
106 // sample format:
|
yading@10
|
107 enum AVSampleFormat format;
|
yading@10
|
108
|
yading@10
|
109 // number of channels:
|
yading@10
|
110 int channels;
|
yading@10
|
111
|
yading@10
|
112 // row of bytes to skip from one sample to next, across multple channels;
|
yading@10
|
113 // stride = (number-of-channels * bits-per-sample-per-channel) / 8
|
yading@10
|
114 int stride;
|
yading@10
|
115
|
yading@10
|
116 // fragment window size, power-of-two integer:
|
yading@10
|
117 int window;
|
yading@10
|
118
|
yading@10
|
119 // Hann window coefficients, for feathering
|
yading@10
|
120 // (blending) the overlapping fragment region:
|
yading@10
|
121 float *hann;
|
yading@10
|
122
|
yading@10
|
123 // tempo scaling factor:
|
yading@10
|
124 double tempo;
|
yading@10
|
125
|
yading@10
|
126 // cumulative alignment drift:
|
yading@10
|
127 int drift;
|
yading@10
|
128
|
yading@10
|
129 // current/previous fragment ring-buffer:
|
yading@10
|
130 AudioFragment frag[2];
|
yading@10
|
131
|
yading@10
|
132 // current fragment index:
|
yading@10
|
133 uint64_t nfrag;
|
yading@10
|
134
|
yading@10
|
135 // current state:
|
yading@10
|
136 FilterState state;
|
yading@10
|
137
|
yading@10
|
138 // for fast correlation calculation in frequency domain:
|
yading@10
|
139 RDFTContext *real_to_complex;
|
yading@10
|
140 RDFTContext *complex_to_real;
|
yading@10
|
141 FFTSample *correlation;
|
yading@10
|
142
|
yading@10
|
143 // for managing AVFilterPad.request_frame and AVFilterPad.filter_frame
|
yading@10
|
144 AVFrame *dst_buffer;
|
yading@10
|
145 uint8_t *dst;
|
yading@10
|
146 uint8_t *dst_end;
|
yading@10
|
147 uint64_t nsamples_in;
|
yading@10
|
148 uint64_t nsamples_out;
|
yading@10
|
149 } ATempoContext;
|
yading@10
|
150
|
yading@10
|
151 #define OFFSET(x) offsetof(ATempoContext, x)
|
yading@10
|
152
|
yading@10
|
153 static const AVOption atempo_options[] = {
|
yading@10
|
154 { "tempo", "set tempo scale factor",
|
yading@10
|
155 OFFSET(tempo), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 0.5, 2.0,
|
yading@10
|
156 AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_FILTERING_PARAM },
|
yading@10
|
157 { NULL }
|
yading@10
|
158 };
|
yading@10
|
159
|
yading@10
|
160 AVFILTER_DEFINE_CLASS(atempo);
|
yading@10
|
161
|
yading@10
|
162 /**
|
yading@10
|
163 * Reset filter to initial state, do not deallocate existing local buffers.
|
yading@10
|
164 */
|
yading@10
|
165 static void yae_clear(ATempoContext *atempo)
|
yading@10
|
166 {
|
yading@10
|
167 atempo->size = 0;
|
yading@10
|
168 atempo->head = 0;
|
yading@10
|
169 atempo->tail = 0;
|
yading@10
|
170
|
yading@10
|
171 atempo->drift = 0;
|
yading@10
|
172 atempo->nfrag = 0;
|
yading@10
|
173 atempo->state = YAE_LOAD_FRAGMENT;
|
yading@10
|
174
|
yading@10
|
175 atempo->position[0] = 0;
|
yading@10
|
176 atempo->position[1] = 0;
|
yading@10
|
177
|
yading@10
|
178 atempo->frag[0].position[0] = 0;
|
yading@10
|
179 atempo->frag[0].position[1] = 0;
|
yading@10
|
180 atempo->frag[0].nsamples = 0;
|
yading@10
|
181
|
yading@10
|
182 atempo->frag[1].position[0] = 0;
|
yading@10
|
183 atempo->frag[1].position[1] = 0;
|
yading@10
|
184 atempo->frag[1].nsamples = 0;
|
yading@10
|
185
|
yading@10
|
186 // shift left position of 1st fragment by half a window
|
yading@10
|
187 // so that no re-normalization would be required for
|
yading@10
|
188 // the left half of the 1st fragment:
|
yading@10
|
189 atempo->frag[0].position[0] = -(int64_t)(atempo->window / 2);
|
yading@10
|
190 atempo->frag[0].position[1] = -(int64_t)(atempo->window / 2);
|
yading@10
|
191
|
yading@10
|
192 av_frame_free(&atempo->dst_buffer);
|
yading@10
|
193 atempo->dst = NULL;
|
yading@10
|
194 atempo->dst_end = NULL;
|
yading@10
|
195
|
yading@10
|
196 atempo->nsamples_in = 0;
|
yading@10
|
197 atempo->nsamples_out = 0;
|
yading@10
|
198 }
|
yading@10
|
199
|
yading@10
|
200 /**
|
yading@10
|
201 * Reset filter to initial state and deallocate all buffers.
|
yading@10
|
202 */
|
yading@10
|
203 static void yae_release_buffers(ATempoContext *atempo)
|
yading@10
|
204 {
|
yading@10
|
205 yae_clear(atempo);
|
yading@10
|
206
|
yading@10
|
207 av_freep(&atempo->frag[0].data);
|
yading@10
|
208 av_freep(&atempo->frag[1].data);
|
yading@10
|
209 av_freep(&atempo->frag[0].xdat);
|
yading@10
|
210 av_freep(&atempo->frag[1].xdat);
|
yading@10
|
211
|
yading@10
|
212 av_freep(&atempo->buffer);
|
yading@10
|
213 av_freep(&atempo->hann);
|
yading@10
|
214 av_freep(&atempo->correlation);
|
yading@10
|
215
|
yading@10
|
216 av_rdft_end(atempo->real_to_complex);
|
yading@10
|
217 atempo->real_to_complex = NULL;
|
yading@10
|
218
|
yading@10
|
219 av_rdft_end(atempo->complex_to_real);
|
yading@10
|
220 atempo->complex_to_real = NULL;
|
yading@10
|
221 }
|
yading@10
|
222
|
yading@10
|
223 /* av_realloc is not aligned enough; fortunately, the data does not need to
|
yading@10
|
224 * be preserved */
|
yading@10
|
225 #define RE_MALLOC_OR_FAIL(field, field_size) \
|
yading@10
|
226 do { \
|
yading@10
|
227 av_freep(&field); \
|
yading@10
|
228 field = av_malloc(field_size); \
|
yading@10
|
229 if (!field) { \
|
yading@10
|
230 yae_release_buffers(atempo); \
|
yading@10
|
231 return AVERROR(ENOMEM); \
|
yading@10
|
232 } \
|
yading@10
|
233 } while (0)
|
yading@10
|
234
|
yading@10
|
235 /**
|
yading@10
|
236 * Prepare filter for processing audio data of given format,
|
yading@10
|
237 * sample rate and number of channels.
|
yading@10
|
238 */
|
yading@10
|
239 static int yae_reset(ATempoContext *atempo,
|
yading@10
|
240 enum AVSampleFormat format,
|
yading@10
|
241 int sample_rate,
|
yading@10
|
242 int channels)
|
yading@10
|
243 {
|
yading@10
|
244 const int sample_size = av_get_bytes_per_sample(format);
|
yading@10
|
245 uint32_t nlevels = 0;
|
yading@10
|
246 uint32_t pot;
|
yading@10
|
247 int i;
|
yading@10
|
248
|
yading@10
|
249 atempo->format = format;
|
yading@10
|
250 atempo->channels = channels;
|
yading@10
|
251 atempo->stride = sample_size * channels;
|
yading@10
|
252
|
yading@10
|
253 // pick a segment window size:
|
yading@10
|
254 atempo->window = sample_rate / 24;
|
yading@10
|
255
|
yading@10
|
256 // adjust window size to be a power-of-two integer:
|
yading@10
|
257 nlevels = av_log2(atempo->window);
|
yading@10
|
258 pot = 1 << nlevels;
|
yading@10
|
259 av_assert0(pot <= atempo->window);
|
yading@10
|
260
|
yading@10
|
261 if (pot < atempo->window) {
|
yading@10
|
262 atempo->window = pot * 2;
|
yading@10
|
263 nlevels++;
|
yading@10
|
264 }
|
yading@10
|
265
|
yading@10
|
266 // initialize audio fragment buffers:
|
yading@10
|
267 RE_MALLOC_OR_FAIL(atempo->frag[0].data, atempo->window * atempo->stride);
|
yading@10
|
268 RE_MALLOC_OR_FAIL(atempo->frag[1].data, atempo->window * atempo->stride);
|
yading@10
|
269 RE_MALLOC_OR_FAIL(atempo->frag[0].xdat, atempo->window * sizeof(FFTComplex));
|
yading@10
|
270 RE_MALLOC_OR_FAIL(atempo->frag[1].xdat, atempo->window * sizeof(FFTComplex));
|
yading@10
|
271
|
yading@10
|
272 // initialize rDFT contexts:
|
yading@10
|
273 av_rdft_end(atempo->real_to_complex);
|
yading@10
|
274 atempo->real_to_complex = NULL;
|
yading@10
|
275
|
yading@10
|
276 av_rdft_end(atempo->complex_to_real);
|
yading@10
|
277 atempo->complex_to_real = NULL;
|
yading@10
|
278
|
yading@10
|
279 atempo->real_to_complex = av_rdft_init(nlevels + 1, DFT_R2C);
|
yading@10
|
280 if (!atempo->real_to_complex) {
|
yading@10
|
281 yae_release_buffers(atempo);
|
yading@10
|
282 return AVERROR(ENOMEM);
|
yading@10
|
283 }
|
yading@10
|
284
|
yading@10
|
285 atempo->complex_to_real = av_rdft_init(nlevels + 1, IDFT_C2R);
|
yading@10
|
286 if (!atempo->complex_to_real) {
|
yading@10
|
287 yae_release_buffers(atempo);
|
yading@10
|
288 return AVERROR(ENOMEM);
|
yading@10
|
289 }
|
yading@10
|
290
|
yading@10
|
291 RE_MALLOC_OR_FAIL(atempo->correlation, atempo->window * sizeof(FFTComplex));
|
yading@10
|
292
|
yading@10
|
293 atempo->ring = atempo->window * 3;
|
yading@10
|
294 RE_MALLOC_OR_FAIL(atempo->buffer, atempo->ring * atempo->stride);
|
yading@10
|
295
|
yading@10
|
296 // initialize the Hann window function:
|
yading@10
|
297 RE_MALLOC_OR_FAIL(atempo->hann, atempo->window * sizeof(float));
|
yading@10
|
298
|
yading@10
|
299 for (i = 0; i < atempo->window; i++) {
|
yading@10
|
300 double t = (double)i / (double)(atempo->window - 1);
|
yading@10
|
301 double h = 0.5 * (1.0 - cos(2.0 * M_PI * t));
|
yading@10
|
302 atempo->hann[i] = (float)h;
|
yading@10
|
303 }
|
yading@10
|
304
|
yading@10
|
305 yae_clear(atempo);
|
yading@10
|
306 return 0;
|
yading@10
|
307 }
|
yading@10
|
308
|
yading@10
|
309 static int yae_set_tempo(AVFilterContext *ctx, const char *arg_tempo)
|
yading@10
|
310 {
|
yading@10
|
311 ATempoContext *atempo = ctx->priv;
|
yading@10
|
312 char *tail = NULL;
|
yading@10
|
313 double tempo = av_strtod(arg_tempo, &tail);
|
yading@10
|
314
|
yading@10
|
315 if (tail && *tail) {
|
yading@10
|
316 av_log(ctx, AV_LOG_ERROR, "Invalid tempo value '%s'\n", arg_tempo);
|
yading@10
|
317 return AVERROR(EINVAL);
|
yading@10
|
318 }
|
yading@10
|
319
|
yading@10
|
320 if (tempo < 0.5 || tempo > 2.0) {
|
yading@10
|
321 av_log(ctx, AV_LOG_ERROR, "Tempo value %f exceeds [0.5, 2.0] range\n",
|
yading@10
|
322 tempo);
|
yading@10
|
323 return AVERROR(EINVAL);
|
yading@10
|
324 }
|
yading@10
|
325
|
yading@10
|
326 atempo->tempo = tempo;
|
yading@10
|
327 return 0;
|
yading@10
|
328 }
|
yading@10
|
329
|
yading@10
|
330 inline static AudioFragment *yae_curr_frag(ATempoContext *atempo)
|
yading@10
|
331 {
|
yading@10
|
332 return &atempo->frag[atempo->nfrag % 2];
|
yading@10
|
333 }
|
yading@10
|
334
|
yading@10
|
335 inline static AudioFragment *yae_prev_frag(ATempoContext *atempo)
|
yading@10
|
336 {
|
yading@10
|
337 return &atempo->frag[(atempo->nfrag + 1) % 2];
|
yading@10
|
338 }
|
yading@10
|
339
|
yading@10
|
340 /**
|
yading@10
|
341 * A helper macro for initializing complex data buffer with scalar data
|
yading@10
|
342 * of a given type.
|
yading@10
|
343 */
|
yading@10
|
344 #define yae_init_xdat(scalar_type, scalar_max) \
|
yading@10
|
345 do { \
|
yading@10
|
346 const uint8_t *src_end = src + \
|
yading@10
|
347 frag->nsamples * atempo->channels * sizeof(scalar_type); \
|
yading@10
|
348 \
|
yading@10
|
349 FFTSample *xdat = frag->xdat; \
|
yading@10
|
350 scalar_type tmp; \
|
yading@10
|
351 \
|
yading@10
|
352 if (atempo->channels == 1) { \
|
yading@10
|
353 for (; src < src_end; xdat++) { \
|
yading@10
|
354 tmp = *(const scalar_type *)src; \
|
yading@10
|
355 src += sizeof(scalar_type); \
|
yading@10
|
356 \
|
yading@10
|
357 *xdat = (FFTSample)tmp; \
|
yading@10
|
358 } \
|
yading@10
|
359 } else { \
|
yading@10
|
360 FFTSample s, max, ti, si; \
|
yading@10
|
361 int i; \
|
yading@10
|
362 \
|
yading@10
|
363 for (; src < src_end; xdat++) { \
|
yading@10
|
364 tmp = *(const scalar_type *)src; \
|
yading@10
|
365 src += sizeof(scalar_type); \
|
yading@10
|
366 \
|
yading@10
|
367 max = (FFTSample)tmp; \
|
yading@10
|
368 s = FFMIN((FFTSample)scalar_max, \
|
yading@10
|
369 (FFTSample)fabsf(max)); \
|
yading@10
|
370 \
|
yading@10
|
371 for (i = 1; i < atempo->channels; i++) { \
|
yading@10
|
372 tmp = *(const scalar_type *)src; \
|
yading@10
|
373 src += sizeof(scalar_type); \
|
yading@10
|
374 \
|
yading@10
|
375 ti = (FFTSample)tmp; \
|
yading@10
|
376 si = FFMIN((FFTSample)scalar_max, \
|
yading@10
|
377 (FFTSample)fabsf(ti)); \
|
yading@10
|
378 \
|
yading@10
|
379 if (s < si) { \
|
yading@10
|
380 s = si; \
|
yading@10
|
381 max = ti; \
|
yading@10
|
382 } \
|
yading@10
|
383 } \
|
yading@10
|
384 \
|
yading@10
|
385 *xdat = max; \
|
yading@10
|
386 } \
|
yading@10
|
387 } \
|
yading@10
|
388 } while (0)
|
yading@10
|
389
|
yading@10
|
390 /**
|
yading@10
|
391 * Initialize complex data buffer of a given audio fragment
|
yading@10
|
392 * with down-mixed mono data of appropriate scalar type.
|
yading@10
|
393 */
|
yading@10
|
394 static void yae_downmix(ATempoContext *atempo, AudioFragment *frag)
|
yading@10
|
395 {
|
yading@10
|
396 // shortcuts:
|
yading@10
|
397 const uint8_t *src = frag->data;
|
yading@10
|
398
|
yading@10
|
399 // init complex data buffer used for FFT and Correlation:
|
yading@10
|
400 memset(frag->xdat, 0, sizeof(FFTComplex) * atempo->window);
|
yading@10
|
401
|
yading@10
|
402 if (atempo->format == AV_SAMPLE_FMT_U8) {
|
yading@10
|
403 yae_init_xdat(uint8_t, 127);
|
yading@10
|
404 } else if (atempo->format == AV_SAMPLE_FMT_S16) {
|
yading@10
|
405 yae_init_xdat(int16_t, 32767);
|
yading@10
|
406 } else if (atempo->format == AV_SAMPLE_FMT_S32) {
|
yading@10
|
407 yae_init_xdat(int, 2147483647);
|
yading@10
|
408 } else if (atempo->format == AV_SAMPLE_FMT_FLT) {
|
yading@10
|
409 yae_init_xdat(float, 1);
|
yading@10
|
410 } else if (atempo->format == AV_SAMPLE_FMT_DBL) {
|
yading@10
|
411 yae_init_xdat(double, 1);
|
yading@10
|
412 }
|
yading@10
|
413 }
|
yading@10
|
414
|
yading@10
|
415 /**
|
yading@10
|
416 * Populate the internal data buffer on as-needed basis.
|
yading@10
|
417 *
|
yading@10
|
418 * @return
|
yading@10
|
419 * 0 if requested data was already available or was successfully loaded,
|
yading@10
|
420 * AVERROR(EAGAIN) if more input data is required.
|
yading@10
|
421 */
|
yading@10
|
422 static int yae_load_data(ATempoContext *atempo,
|
yading@10
|
423 const uint8_t **src_ref,
|
yading@10
|
424 const uint8_t *src_end,
|
yading@10
|
425 int64_t stop_here)
|
yading@10
|
426 {
|
yading@10
|
427 // shortcut:
|
yading@10
|
428 const uint8_t *src = *src_ref;
|
yading@10
|
429 const int read_size = stop_here - atempo->position[0];
|
yading@10
|
430
|
yading@10
|
431 if (stop_here <= atempo->position[0]) {
|
yading@10
|
432 return 0;
|
yading@10
|
433 }
|
yading@10
|
434
|
yading@10
|
435 // samples are not expected to be skipped:
|
yading@10
|
436 av_assert0(read_size <= atempo->ring);
|
yading@10
|
437
|
yading@10
|
438 while (atempo->position[0] < stop_here && src < src_end) {
|
yading@10
|
439 int src_samples = (src_end - src) / atempo->stride;
|
yading@10
|
440
|
yading@10
|
441 // load data piece-wise, in order to avoid complicating the logic:
|
yading@10
|
442 int nsamples = FFMIN(read_size, src_samples);
|
yading@10
|
443 int na;
|
yading@10
|
444 int nb;
|
yading@10
|
445
|
yading@10
|
446 nsamples = FFMIN(nsamples, atempo->ring);
|
yading@10
|
447 na = FFMIN(nsamples, atempo->ring - atempo->tail);
|
yading@10
|
448 nb = FFMIN(nsamples - na, atempo->ring);
|
yading@10
|
449
|
yading@10
|
450 if (na) {
|
yading@10
|
451 uint8_t *a = atempo->buffer + atempo->tail * atempo->stride;
|
yading@10
|
452 memcpy(a, src, na * atempo->stride);
|
yading@10
|
453
|
yading@10
|
454 src += na * atempo->stride;
|
yading@10
|
455 atempo->position[0] += na;
|
yading@10
|
456
|
yading@10
|
457 atempo->size = FFMIN(atempo->size + na, atempo->ring);
|
yading@10
|
458 atempo->tail = (atempo->tail + na) % atempo->ring;
|
yading@10
|
459 atempo->head =
|
yading@10
|
460 atempo->size < atempo->ring ?
|
yading@10
|
461 atempo->tail - atempo->size :
|
yading@10
|
462 atempo->tail;
|
yading@10
|
463 }
|
yading@10
|
464
|
yading@10
|
465 if (nb) {
|
yading@10
|
466 uint8_t *b = atempo->buffer;
|
yading@10
|
467 memcpy(b, src, nb * atempo->stride);
|
yading@10
|
468
|
yading@10
|
469 src += nb * atempo->stride;
|
yading@10
|
470 atempo->position[0] += nb;
|
yading@10
|
471
|
yading@10
|
472 atempo->size = FFMIN(atempo->size + nb, atempo->ring);
|
yading@10
|
473 atempo->tail = (atempo->tail + nb) % atempo->ring;
|
yading@10
|
474 atempo->head =
|
yading@10
|
475 atempo->size < atempo->ring ?
|
yading@10
|
476 atempo->tail - atempo->size :
|
yading@10
|
477 atempo->tail;
|
yading@10
|
478 }
|
yading@10
|
479 }
|
yading@10
|
480
|
yading@10
|
481 // pass back the updated source buffer pointer:
|
yading@10
|
482 *src_ref = src;
|
yading@10
|
483
|
yading@10
|
484 // sanity check:
|
yading@10
|
485 av_assert0(atempo->position[0] <= stop_here);
|
yading@10
|
486
|
yading@10
|
487 return atempo->position[0] == stop_here ? 0 : AVERROR(EAGAIN);
|
yading@10
|
488 }
|
yading@10
|
489
|
yading@10
|
490 /**
|
yading@10
|
491 * Populate current audio fragment data buffer.
|
yading@10
|
492 *
|
yading@10
|
493 * @return
|
yading@10
|
494 * 0 when the fragment is ready,
|
yading@10
|
495 * AVERROR(EAGAIN) if more input data is required.
|
yading@10
|
496 */
|
yading@10
|
497 static int yae_load_frag(ATempoContext *atempo,
|
yading@10
|
498 const uint8_t **src_ref,
|
yading@10
|
499 const uint8_t *src_end)
|
yading@10
|
500 {
|
yading@10
|
501 // shortcuts:
|
yading@10
|
502 AudioFragment *frag = yae_curr_frag(atempo);
|
yading@10
|
503 uint8_t *dst;
|
yading@10
|
504 int64_t missing, start, zeros;
|
yading@10
|
505 uint32_t nsamples;
|
yading@10
|
506 const uint8_t *a, *b;
|
yading@10
|
507 int i0, i1, n0, n1, na, nb;
|
yading@10
|
508
|
yading@10
|
509 int64_t stop_here = frag->position[0] + atempo->window;
|
yading@10
|
510 if (src_ref && yae_load_data(atempo, src_ref, src_end, stop_here) != 0) {
|
yading@10
|
511 return AVERROR(EAGAIN);
|
yading@10
|
512 }
|
yading@10
|
513
|
yading@10
|
514 // calculate the number of samples we don't have:
|
yading@10
|
515 missing =
|
yading@10
|
516 stop_here > atempo->position[0] ?
|
yading@10
|
517 stop_here - atempo->position[0] : 0;
|
yading@10
|
518
|
yading@10
|
519 nsamples =
|
yading@10
|
520 missing < (int64_t)atempo->window ?
|
yading@10
|
521 (uint32_t)(atempo->window - missing) : 0;
|
yading@10
|
522
|
yading@10
|
523 // setup the output buffer:
|
yading@10
|
524 frag->nsamples = nsamples;
|
yading@10
|
525 dst = frag->data;
|
yading@10
|
526
|
yading@10
|
527 start = atempo->position[0] - atempo->size;
|
yading@10
|
528 zeros = 0;
|
yading@10
|
529
|
yading@10
|
530 if (frag->position[0] < start) {
|
yading@10
|
531 // what we don't have we substitute with zeros:
|
yading@10
|
532 zeros = FFMIN(start - frag->position[0], (int64_t)nsamples);
|
yading@10
|
533 av_assert0(zeros != nsamples);
|
yading@10
|
534
|
yading@10
|
535 memset(dst, 0, zeros * atempo->stride);
|
yading@10
|
536 dst += zeros * atempo->stride;
|
yading@10
|
537 }
|
yading@10
|
538
|
yading@10
|
539 if (zeros == nsamples) {
|
yading@10
|
540 return 0;
|
yading@10
|
541 }
|
yading@10
|
542
|
yading@10
|
543 // get the remaining data from the ring buffer:
|
yading@10
|
544 na = (atempo->head < atempo->tail ?
|
yading@10
|
545 atempo->tail - atempo->head :
|
yading@10
|
546 atempo->ring - atempo->head);
|
yading@10
|
547
|
yading@10
|
548 nb = atempo->head < atempo->tail ? 0 : atempo->tail;
|
yading@10
|
549
|
yading@10
|
550 // sanity check:
|
yading@10
|
551 av_assert0(nsamples <= zeros + na + nb);
|
yading@10
|
552
|
yading@10
|
553 a = atempo->buffer + atempo->head * atempo->stride;
|
yading@10
|
554 b = atempo->buffer;
|
yading@10
|
555
|
yading@10
|
556 i0 = frag->position[0] + zeros - start;
|
yading@10
|
557 i1 = i0 < na ? 0 : i0 - na;
|
yading@10
|
558
|
yading@10
|
559 n0 = i0 < na ? FFMIN(na - i0, (int)(nsamples - zeros)) : 0;
|
yading@10
|
560 n1 = nsamples - zeros - n0;
|
yading@10
|
561
|
yading@10
|
562 if (n0) {
|
yading@10
|
563 memcpy(dst, a + i0 * atempo->stride, n0 * atempo->stride);
|
yading@10
|
564 dst += n0 * atempo->stride;
|
yading@10
|
565 }
|
yading@10
|
566
|
yading@10
|
567 if (n1) {
|
yading@10
|
568 memcpy(dst, b + i1 * atempo->stride, n1 * atempo->stride);
|
yading@10
|
569 }
|
yading@10
|
570
|
yading@10
|
571 return 0;
|
yading@10
|
572 }
|
yading@10
|
573
|
yading@10
|
574 /**
|
yading@10
|
575 * Prepare for loading next audio fragment.
|
yading@10
|
576 */
|
yading@10
|
577 static void yae_advance_to_next_frag(ATempoContext *atempo)
|
yading@10
|
578 {
|
yading@10
|
579 const double fragment_step = atempo->tempo * (double)(atempo->window / 2);
|
yading@10
|
580
|
yading@10
|
581 const AudioFragment *prev;
|
yading@10
|
582 AudioFragment *frag;
|
yading@10
|
583
|
yading@10
|
584 atempo->nfrag++;
|
yading@10
|
585 prev = yae_prev_frag(atempo);
|
yading@10
|
586 frag = yae_curr_frag(atempo);
|
yading@10
|
587
|
yading@10
|
588 frag->position[0] = prev->position[0] + (int64_t)fragment_step;
|
yading@10
|
589 frag->position[1] = prev->position[1] + atempo->window / 2;
|
yading@10
|
590 frag->nsamples = 0;
|
yading@10
|
591 }
|
yading@10
|
592
|
yading@10
|
593 /**
|
yading@10
|
594 * Calculate cross-correlation via rDFT.
|
yading@10
|
595 *
|
yading@10
|
596 * Multiply two vectors of complex numbers (result of real_to_complex rDFT)
|
yading@10
|
597 * and transform back via complex_to_real rDFT.
|
yading@10
|
598 */
|
yading@10
|
599 static void yae_xcorr_via_rdft(FFTSample *xcorr,
|
yading@10
|
600 RDFTContext *complex_to_real,
|
yading@10
|
601 const FFTComplex *xa,
|
yading@10
|
602 const FFTComplex *xb,
|
yading@10
|
603 const int window)
|
yading@10
|
604 {
|
yading@10
|
605 FFTComplex *xc = (FFTComplex *)xcorr;
|
yading@10
|
606 int i;
|
yading@10
|
607
|
yading@10
|
608 // NOTE: first element requires special care -- Given Y = rDFT(X),
|
yading@10
|
609 // Im(Y[0]) and Im(Y[N/2]) are always zero, therefore av_rdft_calc
|
yading@10
|
610 // stores Re(Y[N/2]) in place of Im(Y[0]).
|
yading@10
|
611
|
yading@10
|
612 xc->re = xa->re * xb->re;
|
yading@10
|
613 xc->im = xa->im * xb->im;
|
yading@10
|
614 xa++;
|
yading@10
|
615 xb++;
|
yading@10
|
616 xc++;
|
yading@10
|
617
|
yading@10
|
618 for (i = 1; i < window; i++, xa++, xb++, xc++) {
|
yading@10
|
619 xc->re = (xa->re * xb->re + xa->im * xb->im);
|
yading@10
|
620 xc->im = (xa->im * xb->re - xa->re * xb->im);
|
yading@10
|
621 }
|
yading@10
|
622
|
yading@10
|
623 // apply inverse rDFT:
|
yading@10
|
624 av_rdft_calc(complex_to_real, xcorr);
|
yading@10
|
625 }
|
yading@10
|
626
|
yading@10
|
627 /**
|
yading@10
|
628 * Calculate alignment offset for given fragment
|
yading@10
|
629 * relative to the previous fragment.
|
yading@10
|
630 *
|
yading@10
|
631 * @return alignment offset of current fragment relative to previous.
|
yading@10
|
632 */
|
yading@10
|
633 static int yae_align(AudioFragment *frag,
|
yading@10
|
634 const AudioFragment *prev,
|
yading@10
|
635 const int window,
|
yading@10
|
636 const int delta_max,
|
yading@10
|
637 const int drift,
|
yading@10
|
638 FFTSample *correlation,
|
yading@10
|
639 RDFTContext *complex_to_real)
|
yading@10
|
640 {
|
yading@10
|
641 int best_offset = -drift;
|
yading@10
|
642 FFTSample best_metric = -FLT_MAX;
|
yading@10
|
643 FFTSample *xcorr;
|
yading@10
|
644
|
yading@10
|
645 int i0;
|
yading@10
|
646 int i1;
|
yading@10
|
647 int i;
|
yading@10
|
648
|
yading@10
|
649 yae_xcorr_via_rdft(correlation,
|
yading@10
|
650 complex_to_real,
|
yading@10
|
651 (const FFTComplex *)prev->xdat,
|
yading@10
|
652 (const FFTComplex *)frag->xdat,
|
yading@10
|
653 window);
|
yading@10
|
654
|
yading@10
|
655 // identify search window boundaries:
|
yading@10
|
656 i0 = FFMAX(window / 2 - delta_max - drift, 0);
|
yading@10
|
657 i0 = FFMIN(i0, window);
|
yading@10
|
658
|
yading@10
|
659 i1 = FFMIN(window / 2 + delta_max - drift, window - window / 16);
|
yading@10
|
660 i1 = FFMAX(i1, 0);
|
yading@10
|
661
|
yading@10
|
662 // identify cross-correlation peaks within search window:
|
yading@10
|
663 xcorr = correlation + i0;
|
yading@10
|
664
|
yading@10
|
665 for (i = i0; i < i1; i++, xcorr++) {
|
yading@10
|
666 FFTSample metric = *xcorr;
|
yading@10
|
667
|
yading@10
|
668 // normalize:
|
yading@10
|
669 FFTSample drifti = (FFTSample)(drift + i);
|
yading@10
|
670 metric *= drifti * (FFTSample)(i - i0) * (FFTSample)(i1 - i);
|
yading@10
|
671
|
yading@10
|
672 if (metric > best_metric) {
|
yading@10
|
673 best_metric = metric;
|
yading@10
|
674 best_offset = i - window / 2;
|
yading@10
|
675 }
|
yading@10
|
676 }
|
yading@10
|
677
|
yading@10
|
678 return best_offset;
|
yading@10
|
679 }
|
yading@10
|
680
|
yading@10
|
681 /**
|
yading@10
|
682 * Adjust current fragment position for better alignment
|
yading@10
|
683 * with previous fragment.
|
yading@10
|
684 *
|
yading@10
|
685 * @return alignment correction.
|
yading@10
|
686 */
|
yading@10
|
687 static int yae_adjust_position(ATempoContext *atempo)
|
yading@10
|
688 {
|
yading@10
|
689 const AudioFragment *prev = yae_prev_frag(atempo);
|
yading@10
|
690 AudioFragment *frag = yae_curr_frag(atempo);
|
yading@10
|
691
|
yading@10
|
692 const int delta_max = atempo->window / 2;
|
yading@10
|
693 const int correction = yae_align(frag,
|
yading@10
|
694 prev,
|
yading@10
|
695 atempo->window,
|
yading@10
|
696 delta_max,
|
yading@10
|
697 atempo->drift,
|
yading@10
|
698 atempo->correlation,
|
yading@10
|
699 atempo->complex_to_real);
|
yading@10
|
700
|
yading@10
|
701 if (correction) {
|
yading@10
|
702 // adjust fragment position:
|
yading@10
|
703 frag->position[0] -= correction;
|
yading@10
|
704
|
yading@10
|
705 // clear so that the fragment can be reloaded:
|
yading@10
|
706 frag->nsamples = 0;
|
yading@10
|
707
|
yading@10
|
708 // update cumulative correction drift counter:
|
yading@10
|
709 atempo->drift += correction;
|
yading@10
|
710 }
|
yading@10
|
711
|
yading@10
|
712 return correction;
|
yading@10
|
713 }
|
yading@10
|
714
|
yading@10
|
715 /**
|
yading@10
|
716 * A helper macro for blending the overlap region of previous
|
yading@10
|
717 * and current audio fragment.
|
yading@10
|
718 */
|
yading@10
|
719 #define yae_blend(scalar_type) \
|
yading@10
|
720 do { \
|
yading@10
|
721 const scalar_type *aaa = (const scalar_type *)a; \
|
yading@10
|
722 const scalar_type *bbb = (const scalar_type *)b; \
|
yading@10
|
723 \
|
yading@10
|
724 scalar_type *out = (scalar_type *)dst; \
|
yading@10
|
725 scalar_type *out_end = (scalar_type *)dst_end; \
|
yading@10
|
726 int64_t i; \
|
yading@10
|
727 \
|
yading@10
|
728 for (i = 0; i < overlap && out < out_end; \
|
yading@10
|
729 i++, atempo->position[1]++, wa++, wb++) { \
|
yading@10
|
730 float w0 = *wa; \
|
yading@10
|
731 float w1 = *wb; \
|
yading@10
|
732 int j; \
|
yading@10
|
733 \
|
yading@10
|
734 for (j = 0; j < atempo->channels; \
|
yading@10
|
735 j++, aaa++, bbb++, out++) { \
|
yading@10
|
736 float t0 = (float)*aaa; \
|
yading@10
|
737 float t1 = (float)*bbb; \
|
yading@10
|
738 \
|
yading@10
|
739 *out = \
|
yading@10
|
740 frag->position[0] + i < 0 ? \
|
yading@10
|
741 *aaa : \
|
yading@10
|
742 (scalar_type)(t0 * w0 + t1 * w1); \
|
yading@10
|
743 } \
|
yading@10
|
744 } \
|
yading@10
|
745 dst = (uint8_t *)out; \
|
yading@10
|
746 } while (0)
|
yading@10
|
747
|
yading@10
|
748 /**
|
yading@10
|
749 * Blend the overlap region of previous and current audio fragment
|
yading@10
|
750 * and output the results to the given destination buffer.
|
yading@10
|
751 *
|
yading@10
|
752 * @return
|
yading@10
|
753 * 0 if the overlap region was completely stored in the dst buffer,
|
yading@10
|
754 * AVERROR(EAGAIN) if more destination buffer space is required.
|
yading@10
|
755 */
|
yading@10
|
756 static int yae_overlap_add(ATempoContext *atempo,
|
yading@10
|
757 uint8_t **dst_ref,
|
yading@10
|
758 uint8_t *dst_end)
|
yading@10
|
759 {
|
yading@10
|
760 // shortcuts:
|
yading@10
|
761 const AudioFragment *prev = yae_prev_frag(atempo);
|
yading@10
|
762 const AudioFragment *frag = yae_curr_frag(atempo);
|
yading@10
|
763
|
yading@10
|
764 const int64_t start_here = FFMAX(atempo->position[1],
|
yading@10
|
765 frag->position[1]);
|
yading@10
|
766
|
yading@10
|
767 const int64_t stop_here = FFMIN(prev->position[1] + prev->nsamples,
|
yading@10
|
768 frag->position[1] + frag->nsamples);
|
yading@10
|
769
|
yading@10
|
770 const int64_t overlap = stop_here - start_here;
|
yading@10
|
771
|
yading@10
|
772 const int64_t ia = start_here - prev->position[1];
|
yading@10
|
773 const int64_t ib = start_here - frag->position[1];
|
yading@10
|
774
|
yading@10
|
775 const float *wa = atempo->hann + ia;
|
yading@10
|
776 const float *wb = atempo->hann + ib;
|
yading@10
|
777
|
yading@10
|
778 const uint8_t *a = prev->data + ia * atempo->stride;
|
yading@10
|
779 const uint8_t *b = frag->data + ib * atempo->stride;
|
yading@10
|
780
|
yading@10
|
781 uint8_t *dst = *dst_ref;
|
yading@10
|
782
|
yading@10
|
783 av_assert0(start_here <= stop_here &&
|
yading@10
|
784 frag->position[1] <= start_here &&
|
yading@10
|
785 overlap <= frag->nsamples);
|
yading@10
|
786
|
yading@10
|
787 if (atempo->format == AV_SAMPLE_FMT_U8) {
|
yading@10
|
788 yae_blend(uint8_t);
|
yading@10
|
789 } else if (atempo->format == AV_SAMPLE_FMT_S16) {
|
yading@10
|
790 yae_blend(int16_t);
|
yading@10
|
791 } else if (atempo->format == AV_SAMPLE_FMT_S32) {
|
yading@10
|
792 yae_blend(int);
|
yading@10
|
793 } else if (atempo->format == AV_SAMPLE_FMT_FLT) {
|
yading@10
|
794 yae_blend(float);
|
yading@10
|
795 } else if (atempo->format == AV_SAMPLE_FMT_DBL) {
|
yading@10
|
796 yae_blend(double);
|
yading@10
|
797 }
|
yading@10
|
798
|
yading@10
|
799 // pass-back the updated destination buffer pointer:
|
yading@10
|
800 *dst_ref = dst;
|
yading@10
|
801
|
yading@10
|
802 return atempo->position[1] == stop_here ? 0 : AVERROR(EAGAIN);
|
yading@10
|
803 }
|
yading@10
|
804
|
yading@10
|
805 /**
|
yading@10
|
806 * Feed as much data to the filter as it is able to consume
|
yading@10
|
807 * and receive as much processed data in the destination buffer
|
yading@10
|
808 * as it is able to produce or store.
|
yading@10
|
809 */
|
yading@10
|
810 static void
|
yading@10
|
811 yae_apply(ATempoContext *atempo,
|
yading@10
|
812 const uint8_t **src_ref,
|
yading@10
|
813 const uint8_t *src_end,
|
yading@10
|
814 uint8_t **dst_ref,
|
yading@10
|
815 uint8_t *dst_end)
|
yading@10
|
816 {
|
yading@10
|
817 while (1) {
|
yading@10
|
818 if (atempo->state == YAE_LOAD_FRAGMENT) {
|
yading@10
|
819 // load additional data for the current fragment:
|
yading@10
|
820 if (yae_load_frag(atempo, src_ref, src_end) != 0) {
|
yading@10
|
821 break;
|
yading@10
|
822 }
|
yading@10
|
823
|
yading@10
|
824 // down-mix to mono:
|
yading@10
|
825 yae_downmix(atempo, yae_curr_frag(atempo));
|
yading@10
|
826
|
yading@10
|
827 // apply rDFT:
|
yading@10
|
828 av_rdft_calc(atempo->real_to_complex, yae_curr_frag(atempo)->xdat);
|
yading@10
|
829
|
yading@10
|
830 // must load the second fragment before alignment can start:
|
yading@10
|
831 if (!atempo->nfrag) {
|
yading@10
|
832 yae_advance_to_next_frag(atempo);
|
yading@10
|
833 continue;
|
yading@10
|
834 }
|
yading@10
|
835
|
yading@10
|
836 atempo->state = YAE_ADJUST_POSITION;
|
yading@10
|
837 }
|
yading@10
|
838
|
yading@10
|
839 if (atempo->state == YAE_ADJUST_POSITION) {
|
yading@10
|
840 // adjust position for better alignment:
|
yading@10
|
841 if (yae_adjust_position(atempo)) {
|
yading@10
|
842 // reload the fragment at the corrected position, so that the
|
yading@10
|
843 // Hann window blending would not require normalization:
|
yading@10
|
844 atempo->state = YAE_RELOAD_FRAGMENT;
|
yading@10
|
845 } else {
|
yading@10
|
846 atempo->state = YAE_OUTPUT_OVERLAP_ADD;
|
yading@10
|
847 }
|
yading@10
|
848 }
|
yading@10
|
849
|
yading@10
|
850 if (atempo->state == YAE_RELOAD_FRAGMENT) {
|
yading@10
|
851 // load additional data if necessary due to position adjustment:
|
yading@10
|
852 if (yae_load_frag(atempo, src_ref, src_end) != 0) {
|
yading@10
|
853 break;
|
yading@10
|
854 }
|
yading@10
|
855
|
yading@10
|
856 // down-mix to mono:
|
yading@10
|
857 yae_downmix(atempo, yae_curr_frag(atempo));
|
yading@10
|
858
|
yading@10
|
859 // apply rDFT:
|
yading@10
|
860 av_rdft_calc(atempo->real_to_complex, yae_curr_frag(atempo)->xdat);
|
yading@10
|
861
|
yading@10
|
862 atempo->state = YAE_OUTPUT_OVERLAP_ADD;
|
yading@10
|
863 }
|
yading@10
|
864
|
yading@10
|
865 if (atempo->state == YAE_OUTPUT_OVERLAP_ADD) {
|
yading@10
|
866 // overlap-add and output the result:
|
yading@10
|
867 if (yae_overlap_add(atempo, dst_ref, dst_end) != 0) {
|
yading@10
|
868 break;
|
yading@10
|
869 }
|
yading@10
|
870
|
yading@10
|
871 // advance to the next fragment, repeat:
|
yading@10
|
872 yae_advance_to_next_frag(atempo);
|
yading@10
|
873 atempo->state = YAE_LOAD_FRAGMENT;
|
yading@10
|
874 }
|
yading@10
|
875 }
|
yading@10
|
876 }
|
yading@10
|
877
|
yading@10
|
878 /**
|
yading@10
|
879 * Flush any buffered data from the filter.
|
yading@10
|
880 *
|
yading@10
|
881 * @return
|
yading@10
|
882 * 0 if all data was completely stored in the dst buffer,
|
yading@10
|
883 * AVERROR(EAGAIN) if more destination buffer space is required.
|
yading@10
|
884 */
|
yading@10
|
885 static int yae_flush(ATempoContext *atempo,
|
yading@10
|
886 uint8_t **dst_ref,
|
yading@10
|
887 uint8_t *dst_end)
|
yading@10
|
888 {
|
yading@10
|
889 AudioFragment *frag = yae_curr_frag(atempo);
|
yading@10
|
890 int64_t overlap_end;
|
yading@10
|
891 int64_t start_here;
|
yading@10
|
892 int64_t stop_here;
|
yading@10
|
893 int64_t offset;
|
yading@10
|
894
|
yading@10
|
895 const uint8_t *src;
|
yading@10
|
896 uint8_t *dst;
|
yading@10
|
897
|
yading@10
|
898 int src_size;
|
yading@10
|
899 int dst_size;
|
yading@10
|
900 int nbytes;
|
yading@10
|
901
|
yading@10
|
902 atempo->state = YAE_FLUSH_OUTPUT;
|
yading@10
|
903
|
yading@10
|
904 if (atempo->position[0] == frag->position[0] + frag->nsamples &&
|
yading@10
|
905 atempo->position[1] == frag->position[1] + frag->nsamples) {
|
yading@10
|
906 // the current fragment is already flushed:
|
yading@10
|
907 return 0;
|
yading@10
|
908 }
|
yading@10
|
909
|
yading@10
|
910 if (frag->position[0] + frag->nsamples < atempo->position[0]) {
|
yading@10
|
911 // finish loading the current (possibly partial) fragment:
|
yading@10
|
912 yae_load_frag(atempo, NULL, NULL);
|
yading@10
|
913
|
yading@10
|
914 if (atempo->nfrag) {
|
yading@10
|
915 // down-mix to mono:
|
yading@10
|
916 yae_downmix(atempo, frag);
|
yading@10
|
917
|
yading@10
|
918 // apply rDFT:
|
yading@10
|
919 av_rdft_calc(atempo->real_to_complex, frag->xdat);
|
yading@10
|
920
|
yading@10
|
921 // align current fragment to previous fragment:
|
yading@10
|
922 if (yae_adjust_position(atempo)) {
|
yading@10
|
923 // reload the current fragment due to adjusted position:
|
yading@10
|
924 yae_load_frag(atempo, NULL, NULL);
|
yading@10
|
925 }
|
yading@10
|
926 }
|
yading@10
|
927 }
|
yading@10
|
928
|
yading@10
|
929 // flush the overlap region:
|
yading@10
|
930 overlap_end = frag->position[1] + FFMIN(atempo->window / 2,
|
yading@10
|
931 frag->nsamples);
|
yading@10
|
932
|
yading@10
|
933 while (atempo->position[1] < overlap_end) {
|
yading@10
|
934 if (yae_overlap_add(atempo, dst_ref, dst_end) != 0) {
|
yading@10
|
935 return AVERROR(EAGAIN);
|
yading@10
|
936 }
|
yading@10
|
937 }
|
yading@10
|
938
|
yading@10
|
939 // flush the remaininder of the current fragment:
|
yading@10
|
940 start_here = FFMAX(atempo->position[1], overlap_end);
|
yading@10
|
941 stop_here = frag->position[1] + frag->nsamples;
|
yading@10
|
942 offset = start_here - frag->position[1];
|
yading@10
|
943 av_assert0(start_here <= stop_here && frag->position[1] <= start_here);
|
yading@10
|
944
|
yading@10
|
945 src = frag->data + offset * atempo->stride;
|
yading@10
|
946 dst = (uint8_t *)*dst_ref;
|
yading@10
|
947
|
yading@10
|
948 src_size = (int)(stop_here - start_here) * atempo->stride;
|
yading@10
|
949 dst_size = dst_end - dst;
|
yading@10
|
950 nbytes = FFMIN(src_size, dst_size);
|
yading@10
|
951
|
yading@10
|
952 memcpy(dst, src, nbytes);
|
yading@10
|
953 dst += nbytes;
|
yading@10
|
954
|
yading@10
|
955 atempo->position[1] += (nbytes / atempo->stride);
|
yading@10
|
956
|
yading@10
|
957 // pass-back the updated destination buffer pointer:
|
yading@10
|
958 *dst_ref = (uint8_t *)dst;
|
yading@10
|
959
|
yading@10
|
960 return atempo->position[1] == stop_here ? 0 : AVERROR(EAGAIN);
|
yading@10
|
961 }
|
yading@10
|
962
|
yading@10
|
963 static av_cold int init(AVFilterContext *ctx)
|
yading@10
|
964 {
|
yading@10
|
965 ATempoContext *atempo = ctx->priv;
|
yading@10
|
966 atempo->format = AV_SAMPLE_FMT_NONE;
|
yading@10
|
967 atempo->state = YAE_LOAD_FRAGMENT;
|
yading@10
|
968 return 0;
|
yading@10
|
969 }
|
yading@10
|
970
|
yading@10
|
971 static av_cold void uninit(AVFilterContext *ctx)
|
yading@10
|
972 {
|
yading@10
|
973 ATempoContext *atempo = ctx->priv;
|
yading@10
|
974 yae_release_buffers(atempo);
|
yading@10
|
975 }
|
yading@10
|
976
|
yading@10
|
977 static int query_formats(AVFilterContext *ctx)
|
yading@10
|
978 {
|
yading@10
|
979 AVFilterChannelLayouts *layouts = NULL;
|
yading@10
|
980 AVFilterFormats *formats = NULL;
|
yading@10
|
981
|
yading@10
|
982 // WSOLA necessitates an internal sliding window ring buffer
|
yading@10
|
983 // for incoming audio stream.
|
yading@10
|
984 //
|
yading@10
|
985 // Planar sample formats are too cumbersome to store in a ring buffer,
|
yading@10
|
986 // therefore planar sample formats are not supported.
|
yading@10
|
987 //
|
yading@10
|
988 static const enum AVSampleFormat sample_fmts[] = {
|
yading@10
|
989 AV_SAMPLE_FMT_U8,
|
yading@10
|
990 AV_SAMPLE_FMT_S16,
|
yading@10
|
991 AV_SAMPLE_FMT_S32,
|
yading@10
|
992 AV_SAMPLE_FMT_FLT,
|
yading@10
|
993 AV_SAMPLE_FMT_DBL,
|
yading@10
|
994 AV_SAMPLE_FMT_NONE
|
yading@10
|
995 };
|
yading@10
|
996
|
yading@10
|
997 layouts = ff_all_channel_layouts();
|
yading@10
|
998 if (!layouts) {
|
yading@10
|
999 return AVERROR(ENOMEM);
|
yading@10
|
1000 }
|
yading@10
|
1001 ff_set_common_channel_layouts(ctx, layouts);
|
yading@10
|
1002
|
yading@10
|
1003 formats = ff_make_format_list(sample_fmts);
|
yading@10
|
1004 if (!formats) {
|
yading@10
|
1005 return AVERROR(ENOMEM);
|
yading@10
|
1006 }
|
yading@10
|
1007 ff_set_common_formats(ctx, formats);
|
yading@10
|
1008
|
yading@10
|
1009 formats = ff_all_samplerates();
|
yading@10
|
1010 if (!formats) {
|
yading@10
|
1011 return AVERROR(ENOMEM);
|
yading@10
|
1012 }
|
yading@10
|
1013 ff_set_common_samplerates(ctx, formats);
|
yading@10
|
1014
|
yading@10
|
1015 return 0;
|
yading@10
|
1016 }
|
yading@10
|
1017
|
yading@10
|
1018 static int config_props(AVFilterLink *inlink)
|
yading@10
|
1019 {
|
yading@10
|
1020 AVFilterContext *ctx = inlink->dst;
|
yading@10
|
1021 ATempoContext *atempo = ctx->priv;
|
yading@10
|
1022
|
yading@10
|
1023 enum AVSampleFormat format = inlink->format;
|
yading@10
|
1024 int sample_rate = (int)inlink->sample_rate;
|
yading@10
|
1025 int channels = av_get_channel_layout_nb_channels(inlink->channel_layout);
|
yading@10
|
1026
|
yading@10
|
1027 ctx->outputs[0]->flags |= FF_LINK_FLAG_REQUEST_LOOP;
|
yading@10
|
1028
|
yading@10
|
1029 return yae_reset(atempo, format, sample_rate, channels);
|
yading@10
|
1030 }
|
yading@10
|
1031
|
yading@10
|
1032 static int push_samples(ATempoContext *atempo,
|
yading@10
|
1033 AVFilterLink *outlink,
|
yading@10
|
1034 int n_out)
|
yading@10
|
1035 {
|
yading@10
|
1036 int ret;
|
yading@10
|
1037
|
yading@10
|
1038 atempo->dst_buffer->sample_rate = outlink->sample_rate;
|
yading@10
|
1039 atempo->dst_buffer->nb_samples = n_out;
|
yading@10
|
1040
|
yading@10
|
1041 // adjust the PTS:
|
yading@10
|
1042 atempo->dst_buffer->pts =
|
yading@10
|
1043 av_rescale_q(atempo->nsamples_out,
|
yading@10
|
1044 (AVRational){ 1, outlink->sample_rate },
|
yading@10
|
1045 outlink->time_base);
|
yading@10
|
1046
|
yading@10
|
1047 ret = ff_filter_frame(outlink, atempo->dst_buffer);
|
yading@10
|
1048 if (ret < 0)
|
yading@10
|
1049 return ret;
|
yading@10
|
1050 atempo->dst_buffer = NULL;
|
yading@10
|
1051 atempo->dst = NULL;
|
yading@10
|
1052 atempo->dst_end = NULL;
|
yading@10
|
1053
|
yading@10
|
1054 atempo->nsamples_out += n_out;
|
yading@10
|
1055 return 0;
|
yading@10
|
1056 }
|
yading@10
|
1057
|
yading@10
|
1058 static int filter_frame(AVFilterLink *inlink, AVFrame *src_buffer)
|
yading@10
|
1059 {
|
yading@10
|
1060 AVFilterContext *ctx = inlink->dst;
|
yading@10
|
1061 ATempoContext *atempo = ctx->priv;
|
yading@10
|
1062 AVFilterLink *outlink = ctx->outputs[0];
|
yading@10
|
1063
|
yading@10
|
1064 int ret = 0;
|
yading@10
|
1065 int n_in = src_buffer->nb_samples;
|
yading@10
|
1066 int n_out = (int)(0.5 + ((double)n_in) / atempo->tempo);
|
yading@10
|
1067
|
yading@10
|
1068 const uint8_t *src = src_buffer->data[0];
|
yading@10
|
1069 const uint8_t *src_end = src + n_in * atempo->stride;
|
yading@10
|
1070
|
yading@10
|
1071 while (src < src_end) {
|
yading@10
|
1072 if (!atempo->dst_buffer) {
|
yading@10
|
1073 atempo->dst_buffer = ff_get_audio_buffer(outlink, n_out);
|
yading@10
|
1074 if (!atempo->dst_buffer)
|
yading@10
|
1075 return AVERROR(ENOMEM);
|
yading@10
|
1076 av_frame_copy_props(atempo->dst_buffer, src_buffer);
|
yading@10
|
1077
|
yading@10
|
1078 atempo->dst = atempo->dst_buffer->data[0];
|
yading@10
|
1079 atempo->dst_end = atempo->dst + n_out * atempo->stride;
|
yading@10
|
1080 }
|
yading@10
|
1081
|
yading@10
|
1082 yae_apply(atempo, &src, src_end, &atempo->dst, atempo->dst_end);
|
yading@10
|
1083
|
yading@10
|
1084 if (atempo->dst == atempo->dst_end) {
|
yading@10
|
1085 ret = push_samples(atempo, outlink, n_out);
|
yading@10
|
1086 if (ret < 0)
|
yading@10
|
1087 goto end;
|
yading@10
|
1088 }
|
yading@10
|
1089 }
|
yading@10
|
1090
|
yading@10
|
1091 atempo->nsamples_in += n_in;
|
yading@10
|
1092 end:
|
yading@10
|
1093 av_frame_free(&src_buffer);
|
yading@10
|
1094 return ret;
|
yading@10
|
1095 }
|
yading@10
|
1096
|
yading@10
|
1097 static int request_frame(AVFilterLink *outlink)
|
yading@10
|
1098 {
|
yading@10
|
1099 AVFilterContext *ctx = outlink->src;
|
yading@10
|
1100 ATempoContext *atempo = ctx->priv;
|
yading@10
|
1101 int ret;
|
yading@10
|
1102
|
yading@10
|
1103 ret = ff_request_frame(ctx->inputs[0]);
|
yading@10
|
1104
|
yading@10
|
1105 if (ret == AVERROR_EOF) {
|
yading@10
|
1106 // flush the filter:
|
yading@10
|
1107 int n_max = atempo->ring;
|
yading@10
|
1108 int n_out;
|
yading@10
|
1109 int err = AVERROR(EAGAIN);
|
yading@10
|
1110
|
yading@10
|
1111 while (err == AVERROR(EAGAIN)) {
|
yading@10
|
1112 if (!atempo->dst_buffer) {
|
yading@10
|
1113 atempo->dst_buffer = ff_get_audio_buffer(outlink, n_max);
|
yading@10
|
1114 if (!atempo->dst_buffer)
|
yading@10
|
1115 return AVERROR(ENOMEM);
|
yading@10
|
1116
|
yading@10
|
1117 atempo->dst = atempo->dst_buffer->data[0];
|
yading@10
|
1118 atempo->dst_end = atempo->dst + n_max * atempo->stride;
|
yading@10
|
1119 }
|
yading@10
|
1120
|
yading@10
|
1121 err = yae_flush(atempo, &atempo->dst, atempo->dst_end);
|
yading@10
|
1122
|
yading@10
|
1123 n_out = ((atempo->dst - atempo->dst_buffer->data[0]) /
|
yading@10
|
1124 atempo->stride);
|
yading@10
|
1125
|
yading@10
|
1126 if (n_out) {
|
yading@10
|
1127 ret = push_samples(atempo, outlink, n_out);
|
yading@10
|
1128 }
|
yading@10
|
1129 }
|
yading@10
|
1130
|
yading@10
|
1131 av_frame_free(&atempo->dst_buffer);
|
yading@10
|
1132 atempo->dst = NULL;
|
yading@10
|
1133 atempo->dst_end = NULL;
|
yading@10
|
1134
|
yading@10
|
1135 return AVERROR_EOF;
|
yading@10
|
1136 }
|
yading@10
|
1137
|
yading@10
|
1138 return ret;
|
yading@10
|
1139 }
|
yading@10
|
1140
|
yading@10
|
1141 static int process_command(AVFilterContext *ctx,
|
yading@10
|
1142 const char *cmd,
|
yading@10
|
1143 const char *arg,
|
yading@10
|
1144 char *res,
|
yading@10
|
1145 int res_len,
|
yading@10
|
1146 int flags)
|
yading@10
|
1147 {
|
yading@10
|
1148 return !strcmp(cmd, "tempo") ? yae_set_tempo(ctx, arg) : AVERROR(ENOSYS);
|
yading@10
|
1149 }
|
yading@10
|
1150
|
yading@10
|
1151 static const AVFilterPad atempo_inputs[] = {
|
yading@10
|
1152 {
|
yading@10
|
1153 .name = "default",
|
yading@10
|
1154 .type = AVMEDIA_TYPE_AUDIO,
|
yading@10
|
1155 .filter_frame = filter_frame,
|
yading@10
|
1156 .config_props = config_props,
|
yading@10
|
1157 },
|
yading@10
|
1158 { NULL }
|
yading@10
|
1159 };
|
yading@10
|
1160
|
yading@10
|
1161 static const AVFilterPad atempo_outputs[] = {
|
yading@10
|
1162 {
|
yading@10
|
1163 .name = "default",
|
yading@10
|
1164 .request_frame = request_frame,
|
yading@10
|
1165 .type = AVMEDIA_TYPE_AUDIO,
|
yading@10
|
1166 },
|
yading@10
|
1167 { NULL }
|
yading@10
|
1168 };
|
yading@10
|
1169
|
yading@10
|
1170 AVFilter avfilter_af_atempo = {
|
yading@10
|
1171 .name = "atempo",
|
yading@10
|
1172 .description = NULL_IF_CONFIG_SMALL("Adjust audio tempo."),
|
yading@10
|
1173 .init = init,
|
yading@10
|
1174 .uninit = uninit,
|
yading@10
|
1175 .query_formats = query_formats,
|
yading@10
|
1176 .process_command = process_command,
|
yading@10
|
1177 .priv_size = sizeof(ATempoContext),
|
yading@10
|
1178 .priv_class = &atempo_class,
|
yading@10
|
1179 .inputs = atempo_inputs,
|
yading@10
|
1180 .outputs = atempo_outputs,
|
yading@10
|
1181 };
|