af_atempo.c File Reference

tempo scaling audio filter – an implementation of WSOLA algorithm More...

#include <float.h>
#include "libavcodec/avfft.h"
#include "libavutil/avassert.h"
#include "libavutil/avstring.h"
#include "libavutil/channel_layout.h"
#include "libavutil/eval.h"
#include "libavutil/opt.h"
#include "libavutil/samplefmt.h"
#include "avfilter.h"
#include "audio.h"
#include "internal.h"
Include dependency graph for af_atempo.c:

Go to the source code of this file.

Data Structures

struct  AudioFragment
 A fragment of audio waveform. More...
 
struct  ATempoContext
 Filter state machine. More...
 

Macros

#define OFFSET(x)   offsetof(ATempoContext, x)
 
#define RE_MALLOC_OR_FAIL(field, field_size)
 
#define yae_init_xdat(scalar_type, scalar_max)
 A helper macro for initializing complex data buffer with scalar data of a given type. More...
 
#define yae_blend(scalar_type)
 A helper macro for blending the overlap region of previous and current audio fragment. More...
 

Enumerations

enum  FilterState {
  YAE_LOAD_FRAGMENT, YAE_ADJUST_POSITION, YAE_RELOAD_FRAGMENT, YAE_OUTPUT_OVERLAP_ADD,
  YAE_FLUSH_OUTPUT
}
 Filter state machine states. More...
 

Functions

 AVFILTER_DEFINE_CLASS (atempo)
 
static void yae_clear (ATempoContext *atempo)
 Reset filter to initial state, do not deallocate existing local buffers. More...
 
static void yae_release_buffers (ATempoContext *atempo)
 Reset filter to initial state and deallocate all buffers. More...
 
static int yae_reset (ATempoContext *atempo, enum AVSampleFormat format, int sample_rate, int channels)
 Prepare filter for processing audio data of given format, sample rate and number of channels. More...
 
static int yae_set_tempo (AVFilterContext *ctx, const char *arg_tempo)
 
static AudioFragmentyae_curr_frag (ATempoContext *atempo)
 
static AudioFragmentyae_prev_frag (ATempoContext *atempo)
 
static void yae_downmix (ATempoContext *atempo, AudioFragment *frag)
 Initialize complex data buffer of a given audio fragment with down-mixed mono data of appropriate scalar type. More...
 
static int yae_load_data (ATempoContext *atempo, const uint8_t **src_ref, const uint8_t *src_end, int64_t stop_here)
 Populate the internal data buffer on as-needed basis. More...
 
static int yae_load_frag (ATempoContext *atempo, const uint8_t **src_ref, const uint8_t *src_end)
 Populate current audio fragment data buffer. More...
 
static void yae_advance_to_next_frag (ATempoContext *atempo)
 Prepare for loading next audio fragment. More...
 
static void yae_xcorr_via_rdft (FFTSample *xcorr, RDFTContext *complex_to_real, const FFTComplex *xa, const FFTComplex *xb, const int window)
 Calculate cross-correlation via rDFT. More...
 
static int yae_align (AudioFragment *frag, const AudioFragment *prev, const int window, const int delta_max, const int drift, FFTSample *correlation, RDFTContext *complex_to_real)
 Calculate alignment offset for given fragment relative to the previous fragment. More...
 
static int yae_adjust_position (ATempoContext *atempo)
 Adjust current fragment position for better alignment with previous fragment. More...
 
static int yae_overlap_add (ATempoContext *atempo, uint8_t **dst_ref, uint8_t *dst_end)
 Blend the overlap region of previous and current audio fragment and output the results to the given destination buffer. More...
 
static void yae_apply (ATempoContext *atempo, const uint8_t **src_ref, const uint8_t *src_end, uint8_t **dst_ref, uint8_t *dst_end)
 Feed as much data to the filter as it is able to consume and receive as much processed data in the destination buffer as it is able to produce or store. More...
 
static int yae_flush (ATempoContext *atempo, uint8_t **dst_ref, uint8_t *dst_end)
 Flush any buffered data from the filter. More...
 
static av_cold int init (AVFilterContext *ctx)
 
static av_cold void uninit (AVFilterContext *ctx)
 
static int query_formats (AVFilterContext *ctx)
 
static int config_props (AVFilterLink *inlink)
 
static int push_samples (ATempoContext *atempo, AVFilterLink *outlink, int n_out)
 
static int filter_frame (AVFilterLink *inlink, AVFrame *src_buffer)
 
static int request_frame (AVFilterLink *outlink)
 
static int process_command (AVFilterContext *ctx, const char *cmd, const char *arg, char *res, int res_len, int flags)
 

Variables

static const AVOption atempo_options []
 
static const AVFilterPad atempo_inputs []
 
static const AVFilterPad atempo_outputs []
 
AVFilter avfilter_af_atempo
 

Detailed Description

tempo scaling audio filter – an implementation of WSOLA algorithm

Based on MIT licensed yaeAudioTempoFilter.h and yaeAudioFragment.h from Apprentice Video player by Pavel Koshevoy. https://sourceforge.net/projects/apprenticevideo/

An explanation of SOLA algorithm is available at http://www.surina.net/article/time-and-pitch-scaling.html

WSOLA is very similar to SOLA, only one major difference exists between these algorithms. SOLA shifts audio fragments along the output stream, where as WSOLA shifts audio fragments along the input stream.

The advantage of WSOLA algorithm is that the overlap region size is always the same, therefore the blending function is constant and can be precomputed.

Definition in file af_atempo.c.

Macro Definition Documentation

#define OFFSET (   x)    offsetof(ATempoContext, x)

Definition at line 151 of file af_atempo.c.

#define RE_MALLOC_OR_FAIL (   field,
  field_size 
)
Value:
do { \
av_freep(&field); \
field = av_malloc(field_size); \
if (!field) { \
return AVERROR(ENOMEM); \
} \
} while (0)
void av_freep(void *arg)
Free a memory block which has been allocated with av_malloc(z)() or av_realloc() and set the pointer ...
Definition: mem.c:198
initialize output if(nPeaks >3)%at least 3 peaks in spectrum for trying to find f0 nf0peaks
static void yae_release_buffers(ATempoContext *atempo)
Reset filter to initial state and deallocate all buffers.
Definition: af_atempo.c:203
void * av_malloc(size_t size)
Allocate a block of size bytes with alignment suitable for all memory accesses (including vectors if ...
Definition: mem.c:73
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFilterBuffer structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later.That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another.Buffer references ownership and permissions
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame This method is called when a frame is wanted on an output For an it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return it should return

Definition at line 225 of file af_atempo.c.

Referenced by yae_reset().

#define yae_blend (   scalar_type)
Value:
do { \
const scalar_type *aaa = (const scalar_type *)a; \
const scalar_type *bbb = (const scalar_type *)b; \
\
scalar_type *out = (scalar_type *)dst; \
scalar_type *out_end = (scalar_type *)dst_end; \
int64_t i; \
for (i = 0; i < overlap && out < out_end; \
i++, atempo->position[1]++, wa++, wb++) { \
float w0 = *wa; \
float w1 = *wb; \
int j; \
for (j = 0; j < atempo->channels; \
j++, aaa++, bbb++, out++) { \
float t0 = (float)*aaa; \
float t1 = (float)*bbb; \
\
*out = \
frag->position[0] + i < 0 ? \
*aaa : \
(scalar_type)(t0 * w0 + t1 * w1); \
} \
} \
dst = (uint8_t *)out; \
} while (0)
uint8_t
#define b
Definition: input.c:42
#define t0
Definition: regdef.h:28
#define t1
Definition: regdef.h:29
synthesis window for stochastic i
else dst[i][x+y *dst_stride[i]]
Definition: vf_mcdeint.c:160
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31))))#define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac){}void ff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map){AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);return NULL;}return ac;}in_planar=av_sample_fmt_is_planar(in_fmt);out_planar=av_sample_fmt_is_planar(out_fmt);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;}int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){int use_generic=1;int len=in->nb_samples;int p;if(ac->dc){av_dlog(ac->avr,"%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> out
for(j=16;j >0;--j)

A helper macro for blending the overlap region of previous and current audio fragment.

Definition at line 719 of file af_atempo.c.

Referenced by yae_overlap_add().

#define yae_init_xdat (   scalar_type,
  scalar_max 
)

A helper macro for initializing complex data buffer with scalar data of a given type.

Definition at line 344 of file af_atempo.c.

Referenced by yae_downmix().

Enumeration Type Documentation

Filter state machine states.

Enumerator
YAE_LOAD_FRAGMENT 
YAE_ADJUST_POSITION 
YAE_RELOAD_FRAGMENT 
YAE_OUTPUT_OVERLAP_ADD 
YAE_FLUSH_OUTPUT 

Definition at line 76 of file af_atempo.c.

Function Documentation

AVFILTER_DEFINE_CLASS ( atempo  )
static int config_props ( AVFilterLink inlink)
static

Definition at line 1018 of file af_atempo.c.

static int filter_frame ( AVFilterLink inlink,
AVFrame src_buffer 
)
static

Definition at line 1058 of file af_atempo.c.

static av_cold int init ( AVFilterContext ctx)
static

Definition at line 963 of file af_atempo.c.

static int process_command ( AVFilterContext ctx,
const char *  cmd,
const char *  arg,
char *  res,
int  res_len,
int  flags 
)
static

Definition at line 1141 of file af_atempo.c.

static int push_samples ( ATempoContext atempo,
AVFilterLink outlink,
int  n_out 
)
static

Definition at line 1032 of file af_atempo.c.

Referenced by filter_frame(), and request_frame().

static int query_formats ( AVFilterContext ctx)
static

Definition at line 977 of file af_atempo.c.

static int request_frame ( AVFilterLink outlink)
static

Definition at line 1097 of file af_atempo.c.

static av_cold void uninit ( AVFilterContext ctx)
static

Definition at line 971 of file af_atempo.c.

static int yae_adjust_position ( ATempoContext atempo)
static

Adjust current fragment position for better alignment with previous fragment.

Returns
alignment correction.

Definition at line 687 of file af_atempo.c.

Referenced by yae_apply(), and yae_flush().

static void yae_advance_to_next_frag ( ATempoContext atempo)
static

Prepare for loading next audio fragment.

Definition at line 577 of file af_atempo.c.

Referenced by yae_apply().

static int yae_align ( AudioFragment frag,
const AudioFragment prev,
const int  window,
const int  delta_max,
const int  drift,
FFTSample correlation,
RDFTContext complex_to_real 
)
static

Calculate alignment offset for given fragment relative to the previous fragment.

Returns
alignment offset of current fragment relative to previous.

Definition at line 633 of file af_atempo.c.

Referenced by yae_adjust_position().

static void yae_apply ( ATempoContext atempo,
const uint8_t **  src_ref,
const uint8_t src_end,
uint8_t **  dst_ref,
uint8_t dst_end 
)
static

Feed as much data to the filter as it is able to consume and receive as much processed data in the destination buffer as it is able to produce or store.

Definition at line 811 of file af_atempo.c.

Referenced by filter_frame().

static void yae_clear ( ATempoContext atempo)
static

Reset filter to initial state, do not deallocate existing local buffers.

Definition at line 165 of file af_atempo.c.

Referenced by yae_release_buffers(), and yae_reset().

static AudioFragment* yae_curr_frag ( ATempoContext atempo)
inlinestatic
static void yae_downmix ( ATempoContext atempo,
AudioFragment frag 
)
static

Initialize complex data buffer of a given audio fragment with down-mixed mono data of appropriate scalar type.

Definition at line 394 of file af_atempo.c.

Referenced by yae_apply(), and yae_flush().

static int yae_flush ( ATempoContext atempo,
uint8_t **  dst_ref,
uint8_t dst_end 
)
static

Flush any buffered data from the filter.

Returns
0 if all data was completely stored in the dst buffer, AVERROR(EAGAIN) if more destination buffer space is required.

Definition at line 885 of file af_atempo.c.

Referenced by request_frame().

static int yae_load_data ( ATempoContext atempo,
const uint8_t **  src_ref,
const uint8_t src_end,
int64_t  stop_here 
)
static

Populate the internal data buffer on as-needed basis.

Returns
0 if requested data was already available or was successfully loaded, AVERROR(EAGAIN) if more input data is required.

Definition at line 422 of file af_atempo.c.

Referenced by yae_load_frag().

static int yae_load_frag ( ATempoContext atempo,
const uint8_t **  src_ref,
const uint8_t src_end 
)
static

Populate current audio fragment data buffer.

Returns
0 when the fragment is ready, AVERROR(EAGAIN) if more input data is required.

Definition at line 497 of file af_atempo.c.

Referenced by yae_apply(), and yae_flush().

static int yae_overlap_add ( ATempoContext atempo,
uint8_t **  dst_ref,
uint8_t dst_end 
)
static

Blend the overlap region of previous and current audio fragment and output the results to the given destination buffer.

Returns
0 if the overlap region was completely stored in the dst buffer, AVERROR(EAGAIN) if more destination buffer space is required.

Definition at line 756 of file af_atempo.c.

Referenced by yae_apply(), and yae_flush().

static AudioFragment* yae_prev_frag ( ATempoContext atempo)
inlinestatic

Definition at line 335 of file af_atempo.c.

Referenced by yae_adjust_position(), yae_advance_to_next_frag(), and yae_overlap_add().

static void yae_release_buffers ( ATempoContext atempo)
static

Reset filter to initial state and deallocate all buffers.

Definition at line 203 of file af_atempo.c.

Referenced by uninit(), and yae_reset().

static int yae_reset ( ATempoContext atempo,
enum AVSampleFormat  format,
int  sample_rate,
int  channels 
)
static

Prepare filter for processing audio data of given format, sample rate and number of channels.

Definition at line 239 of file af_atempo.c.

Referenced by config_props().

static int yae_set_tempo ( AVFilterContext ctx,
const char *  arg_tempo 
)
static

Definition at line 309 of file af_atempo.c.

Referenced by process_command().

static void yae_xcorr_via_rdft ( FFTSample xcorr,
RDFTContext complex_to_real,
const FFTComplex xa,
const FFTComplex xb,
const int  window 
)
static

Calculate cross-correlation via rDFT.

Multiply two vectors of complex numbers (result of real_to_complex rDFT) and transform back via complex_to_real rDFT.

Definition at line 599 of file af_atempo.c.

Referenced by yae_align().

Variable Documentation

const AVFilterPad atempo_inputs[]
static
Initial value:
= {
{
.name = "default",
.filter_frame = filter_frame,
.config_props = config_props,
},
{ NULL }
}
static int config_props(AVFilterLink *inlink)
Definition: af_atempo.c:1018
NULL
Definition: eval.c:55
static int filter_frame(AVFilterLink *inlink, AVFrame *src_buffer)
Definition: af_atempo.c:1058

Definition at line 1151 of file af_atempo.c.

const AVOption atempo_options[]
static
Initial value:
= {
{ "tempo", "set tempo scale factor",
OFFSET(tempo), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 0.5, 2.0,
{ NULL }
}
#define AV_OPT_FLAG_AUDIO_PARAM
Definition: opt.h:284
#define AV_OPT_FLAG_FILTERING_PARAM
a generic parameter which can be set by the user for filtering
Definition: opt.h:287
NULL
Definition: eval.c:55
#define OFFSET(x)
Definition: af_atempo.c:151

Definition at line 153 of file af_atempo.c.

const AVFilterPad atempo_outputs[]
static
Initial value:
= {
{
.name = "default",
.request_frame = request_frame,
},
{ NULL }
}
static int request_frame(AVFilterLink *outlink)
Definition: af_atempo.c:1097
NULL
Definition: eval.c:55

Definition at line 1161 of file af_atempo.c.

AVFilter avfilter_af_atempo
Initial value:
= {
.name = "atempo",
.description = NULL_IF_CONFIG_SMALL("Adjust audio tempo."),
.init = init,
.uninit = uninit,
.query_formats = query_formats,
.process_command = process_command,
.priv_size = sizeof(ATempoContext),
.priv_class = &atempo_class,
}
static const AVFilterPad outputs[]
Definition: af_ashowinfo.c:117
static const AVFilterPad atempo_outputs[]
Definition: af_atempo.c:1161
Filter state machine.
Definition: af_atempo.c:87
static av_cold int init(AVFilterContext *ctx)
Definition: af_atempo.c:963
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
static const AVFilterPad atempo_inputs[]
Definition: af_atempo.c:1151
static int process_command(AVFilterContext *ctx, const char *cmd, const char *arg, char *res, int res_len, int flags)
Definition: af_atempo.c:1141
static int query_formats(AVFilterContext *ctx)
Definition: af_atempo.c:977
static av_cold void uninit(AVFilterContext *ctx)
Definition: af_atempo.c:971
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several inputs

Definition at line 1170 of file af_atempo.c.