yading@10: /* yading@10: * Audio Mix Filter yading@10: * Copyright (c) 2012 Justin Ruggles yading@10: * yading@10: * This file is part of Libav. yading@10: * yading@10: * Libav is free software; you can redistribute it and/or yading@10: * modify it under the terms of the GNU Lesser General Public yading@10: * License as published by the Free Software Foundation; either yading@10: * version 2.1 of the License, or (at your option) any later version. yading@10: * yading@10: * Libav is distributed in the hope that it will be useful, yading@10: * but WITHOUT ANY WARRANTY; without even the implied warranty of yading@10: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU yading@10: * Lesser General Public License for more details. yading@10: * yading@10: * You should have received a copy of the GNU Lesser General Public yading@10: * License along with Libav; if not, write to the Free Software yading@10: * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA yading@10: */ yading@10: yading@10: /** yading@10: * @file yading@10: * Audio Mix Filter yading@10: * yading@10: * Mixes audio from multiple sources into a single output. The channel layout, yading@10: * sample rate, and sample format will be the same for all inputs and the yading@10: * output. yading@10: */ yading@10: yading@10: #include "libavutil/audio_fifo.h" yading@10: #include "libavutil/avassert.h" yading@10: #include "libavutil/avstring.h" yading@10: #include "libavutil/channel_layout.h" yading@10: #include "libavutil/common.h" yading@10: #include "libavutil/float_dsp.h" yading@10: #include "libavutil/mathematics.h" yading@10: #include "libavutil/opt.h" yading@10: #include "libavutil/samplefmt.h" yading@10: yading@10: #include "audio.h" yading@10: #include "avfilter.h" yading@10: #include "formats.h" yading@10: #include "internal.h" yading@10: yading@10: #define INPUT_OFF 0 /**< input has reached EOF */ yading@10: #define INPUT_ON 1 /**< input is active */ yading@10: #define INPUT_INACTIVE 2 /**< input is on, but is currently inactive */ yading@10: yading@10: #define DURATION_LONGEST 0 yading@10: #define DURATION_SHORTEST 1 yading@10: #define DURATION_FIRST 2 yading@10: yading@10: yading@10: typedef struct FrameInfo { yading@10: int nb_samples; yading@10: int64_t pts; yading@10: struct FrameInfo *next; yading@10: } FrameInfo; yading@10: yading@10: /** yading@10: * Linked list used to store timestamps and frame sizes of all frames in the yading@10: * FIFO for the first input. yading@10: * yading@10: * This is needed to keep timestamps synchronized for the case where multiple yading@10: * input frames are pushed to the filter for processing before a frame is yading@10: * requested by the output link. yading@10: */ yading@10: typedef struct FrameList { yading@10: int nb_frames; yading@10: int nb_samples; yading@10: FrameInfo *list; yading@10: FrameInfo *end; yading@10: } FrameList; yading@10: yading@10: static void frame_list_clear(FrameList *frame_list) yading@10: { yading@10: if (frame_list) { yading@10: while (frame_list->list) { yading@10: FrameInfo *info = frame_list->list; yading@10: frame_list->list = info->next; yading@10: av_free(info); yading@10: } yading@10: frame_list->nb_frames = 0; yading@10: frame_list->nb_samples = 0; yading@10: frame_list->end = NULL; yading@10: } yading@10: } yading@10: yading@10: static int frame_list_next_frame_size(FrameList *frame_list) yading@10: { yading@10: if (!frame_list->list) yading@10: return 0; yading@10: return frame_list->list->nb_samples; yading@10: } yading@10: yading@10: static int64_t frame_list_next_pts(FrameList *frame_list) yading@10: { yading@10: if (!frame_list->list) yading@10: return AV_NOPTS_VALUE; yading@10: return frame_list->list->pts; yading@10: } yading@10: yading@10: static void frame_list_remove_samples(FrameList *frame_list, int nb_samples) yading@10: { yading@10: if (nb_samples >= frame_list->nb_samples) { yading@10: frame_list_clear(frame_list); yading@10: } else { yading@10: int samples = nb_samples; yading@10: while (samples > 0) { yading@10: FrameInfo *info = frame_list->list; yading@10: av_assert0(info != NULL); yading@10: if (info->nb_samples <= samples) { yading@10: samples -= info->nb_samples; yading@10: frame_list->list = info->next; yading@10: if (!frame_list->list) yading@10: frame_list->end = NULL; yading@10: frame_list->nb_frames--; yading@10: frame_list->nb_samples -= info->nb_samples; yading@10: av_free(info); yading@10: } else { yading@10: info->nb_samples -= samples; yading@10: info->pts += samples; yading@10: frame_list->nb_samples -= samples; yading@10: samples = 0; yading@10: } yading@10: } yading@10: } yading@10: } yading@10: yading@10: static int frame_list_add_frame(FrameList *frame_list, int nb_samples, int64_t pts) yading@10: { yading@10: FrameInfo *info = av_malloc(sizeof(*info)); yading@10: if (!info) yading@10: return AVERROR(ENOMEM); yading@10: info->nb_samples = nb_samples; yading@10: info->pts = pts; yading@10: info->next = NULL; yading@10: yading@10: if (!frame_list->list) { yading@10: frame_list->list = info; yading@10: frame_list->end = info; yading@10: } else { yading@10: av_assert0(frame_list->end != NULL); yading@10: frame_list->end->next = info; yading@10: frame_list->end = info; yading@10: } yading@10: frame_list->nb_frames++; yading@10: frame_list->nb_samples += nb_samples; yading@10: yading@10: return 0; yading@10: } yading@10: yading@10: yading@10: typedef struct MixContext { yading@10: const AVClass *class; /**< class for AVOptions */ yading@10: AVFloatDSPContext fdsp; yading@10: yading@10: int nb_inputs; /**< number of inputs */ yading@10: int active_inputs; /**< number of input currently active */ yading@10: int duration_mode; /**< mode for determining duration */ yading@10: float dropout_transition; /**< transition time when an input drops out */ yading@10: yading@10: int nb_channels; /**< number of channels */ yading@10: int sample_rate; /**< sample rate */ yading@10: int planar; yading@10: AVAudioFifo **fifos; /**< audio fifo for each input */ yading@10: uint8_t *input_state; /**< current state of each input */ yading@10: float *input_scale; /**< mixing scale factor for each input */ yading@10: float scale_norm; /**< normalization factor for all inputs */ yading@10: int64_t next_pts; /**< calculated pts for next output frame */ yading@10: FrameList *frame_list; /**< list of frame info for the first input */ yading@10: } MixContext; yading@10: yading@10: #define OFFSET(x) offsetof(MixContext, x) yading@10: #define A AV_OPT_FLAG_AUDIO_PARAM yading@10: #define F AV_OPT_FLAG_FILTERING_PARAM yading@10: static const AVOption amix_options[] = { yading@10: { "inputs", "Number of inputs.", yading@10: OFFSET(nb_inputs), AV_OPT_TYPE_INT, { .i64 = 2 }, 1, 32, A|F }, yading@10: { "duration", "How to determine the end-of-stream.", yading@10: OFFSET(duration_mode), AV_OPT_TYPE_INT, { .i64 = DURATION_LONGEST }, 0, 2, A|F, "duration" }, yading@10: { "longest", "Duration of longest input.", 0, AV_OPT_TYPE_CONST, { .i64 = DURATION_LONGEST }, INT_MIN, INT_MAX, A|F, "duration" }, yading@10: { "shortest", "Duration of shortest input.", 0, AV_OPT_TYPE_CONST, { .i64 = DURATION_SHORTEST }, INT_MIN, INT_MAX, A|F, "duration" }, yading@10: { "first", "Duration of first input.", 0, AV_OPT_TYPE_CONST, { .i64 = DURATION_FIRST }, INT_MIN, INT_MAX, A|F, "duration" }, yading@10: { "dropout_transition", "Transition time, in seconds, for volume " yading@10: "renormalization when an input stream ends.", yading@10: OFFSET(dropout_transition), AV_OPT_TYPE_FLOAT, { .dbl = 2.0 }, 0, INT_MAX, A|F }, yading@10: { NULL }, yading@10: }; yading@10: yading@10: AVFILTER_DEFINE_CLASS(amix); yading@10: yading@10: /** yading@10: * Update the scaling factors to apply to each input during mixing. yading@10: * yading@10: * This balances the full volume range between active inputs and handles yading@10: * volume transitions when EOF is encountered on an input but mixing continues yading@10: * with the remaining inputs. yading@10: */ yading@10: static void calculate_scales(MixContext *s, int nb_samples) yading@10: { yading@10: int i; yading@10: yading@10: if (s->scale_norm > s->active_inputs) { yading@10: s->scale_norm -= nb_samples / (s->dropout_transition * s->sample_rate); yading@10: s->scale_norm = FFMAX(s->scale_norm, s->active_inputs); yading@10: } yading@10: yading@10: for (i = 0; i < s->nb_inputs; i++) { yading@10: if (s->input_state[i] == INPUT_ON) yading@10: s->input_scale[i] = 1.0f / s->scale_norm; yading@10: else yading@10: s->input_scale[i] = 0.0f; yading@10: } yading@10: } yading@10: yading@10: static int config_output(AVFilterLink *outlink) yading@10: { yading@10: AVFilterContext *ctx = outlink->src; yading@10: MixContext *s = ctx->priv; yading@10: int i; yading@10: char buf[64]; yading@10: yading@10: s->planar = av_sample_fmt_is_planar(outlink->format); yading@10: s->sample_rate = outlink->sample_rate; yading@10: outlink->time_base = (AVRational){ 1, outlink->sample_rate }; yading@10: s->next_pts = AV_NOPTS_VALUE; yading@10: yading@10: s->frame_list = av_mallocz(sizeof(*s->frame_list)); yading@10: if (!s->frame_list) yading@10: return AVERROR(ENOMEM); yading@10: yading@10: s->fifos = av_mallocz(s->nb_inputs * sizeof(*s->fifos)); yading@10: if (!s->fifos) yading@10: return AVERROR(ENOMEM); yading@10: yading@10: s->nb_channels = av_get_channel_layout_nb_channels(outlink->channel_layout); yading@10: for (i = 0; i < s->nb_inputs; i++) { yading@10: s->fifos[i] = av_audio_fifo_alloc(outlink->format, s->nb_channels, 1024); yading@10: if (!s->fifos[i]) yading@10: return AVERROR(ENOMEM); yading@10: } yading@10: yading@10: s->input_state = av_malloc(s->nb_inputs); yading@10: if (!s->input_state) yading@10: return AVERROR(ENOMEM); yading@10: memset(s->input_state, INPUT_ON, s->nb_inputs); yading@10: s->active_inputs = s->nb_inputs; yading@10: yading@10: s->input_scale = av_mallocz(s->nb_inputs * sizeof(*s->input_scale)); yading@10: if (!s->input_scale) yading@10: return AVERROR(ENOMEM); yading@10: s->scale_norm = s->active_inputs; yading@10: calculate_scales(s, 0); yading@10: yading@10: av_get_channel_layout_string(buf, sizeof(buf), -1, outlink->channel_layout); yading@10: yading@10: av_log(ctx, AV_LOG_VERBOSE, yading@10: "inputs:%d fmt:%s srate:%d cl:%s\n", s->nb_inputs, yading@10: av_get_sample_fmt_name(outlink->format), outlink->sample_rate, buf); yading@10: yading@10: return 0; yading@10: } yading@10: yading@10: /** yading@10: * Read samples from the input FIFOs, mix, and write to the output link. yading@10: */ yading@10: static int output_frame(AVFilterLink *outlink, int nb_samples) yading@10: { yading@10: AVFilterContext *ctx = outlink->src; yading@10: MixContext *s = ctx->priv; yading@10: AVFrame *out_buf, *in_buf; yading@10: int i; yading@10: yading@10: calculate_scales(s, nb_samples); yading@10: yading@10: out_buf = ff_get_audio_buffer(outlink, nb_samples); yading@10: if (!out_buf) yading@10: return AVERROR(ENOMEM); yading@10: yading@10: in_buf = ff_get_audio_buffer(outlink, nb_samples); yading@10: if (!in_buf) { yading@10: av_frame_free(&out_buf); yading@10: return AVERROR(ENOMEM); yading@10: } yading@10: yading@10: for (i = 0; i < s->nb_inputs; i++) { yading@10: if (s->input_state[i] == INPUT_ON) { yading@10: int planes, plane_size, p; yading@10: yading@10: av_audio_fifo_read(s->fifos[i], (void **)in_buf->extended_data, yading@10: nb_samples); yading@10: yading@10: planes = s->planar ? s->nb_channels : 1; yading@10: plane_size = nb_samples * (s->planar ? 1 : s->nb_channels); yading@10: plane_size = FFALIGN(plane_size, 16); yading@10: yading@10: for (p = 0; p < planes; p++) { yading@10: s->fdsp.vector_fmac_scalar((float *)out_buf->extended_data[p], yading@10: (float *) in_buf->extended_data[p], yading@10: s->input_scale[i], plane_size); yading@10: } yading@10: } yading@10: } yading@10: av_frame_free(&in_buf); yading@10: yading@10: out_buf->pts = s->next_pts; yading@10: if (s->next_pts != AV_NOPTS_VALUE) yading@10: s->next_pts += nb_samples; yading@10: yading@10: return ff_filter_frame(outlink, out_buf); yading@10: } yading@10: yading@10: /** yading@10: * Returns the smallest number of samples available in the input FIFOs other yading@10: * than that of the first input. yading@10: */ yading@10: static int get_available_samples(MixContext *s) yading@10: { yading@10: int i; yading@10: int available_samples = INT_MAX; yading@10: yading@10: av_assert0(s->nb_inputs > 1); yading@10: yading@10: for (i = 1; i < s->nb_inputs; i++) { yading@10: int nb_samples; yading@10: if (s->input_state[i] == INPUT_OFF) yading@10: continue; yading@10: nb_samples = av_audio_fifo_size(s->fifos[i]); yading@10: available_samples = FFMIN(available_samples, nb_samples); yading@10: } yading@10: if (available_samples == INT_MAX) yading@10: return 0; yading@10: return available_samples; yading@10: } yading@10: yading@10: /** yading@10: * Requests a frame, if needed, from each input link other than the first. yading@10: */ yading@10: static int request_samples(AVFilterContext *ctx, int min_samples) yading@10: { yading@10: MixContext *s = ctx->priv; yading@10: int i, ret; yading@10: yading@10: av_assert0(s->nb_inputs > 1); yading@10: yading@10: for (i = 1; i < s->nb_inputs; i++) { yading@10: ret = 0; yading@10: if (s->input_state[i] == INPUT_OFF) yading@10: continue; yading@10: while (!ret && av_audio_fifo_size(s->fifos[i]) < min_samples) yading@10: ret = ff_request_frame(ctx->inputs[i]); yading@10: if (ret == AVERROR_EOF) { yading@10: if (av_audio_fifo_size(s->fifos[i]) == 0) { yading@10: s->input_state[i] = INPUT_OFF; yading@10: continue; yading@10: } yading@10: } else if (ret < 0) yading@10: return ret; yading@10: } yading@10: return 0; yading@10: } yading@10: yading@10: /** yading@10: * Calculates the number of active inputs and determines EOF based on the yading@10: * duration option. yading@10: * yading@10: * @return 0 if mixing should continue, or AVERROR_EOF if mixing should stop. yading@10: */ yading@10: static int calc_active_inputs(MixContext *s) yading@10: { yading@10: int i; yading@10: int active_inputs = 0; yading@10: for (i = 0; i < s->nb_inputs; i++) yading@10: active_inputs += !!(s->input_state[i] != INPUT_OFF); yading@10: s->active_inputs = active_inputs; yading@10: yading@10: if (!active_inputs || yading@10: (s->duration_mode == DURATION_FIRST && s->input_state[0] == INPUT_OFF) || yading@10: (s->duration_mode == DURATION_SHORTEST && active_inputs != s->nb_inputs)) yading@10: return AVERROR_EOF; yading@10: return 0; yading@10: } yading@10: yading@10: static int request_frame(AVFilterLink *outlink) yading@10: { yading@10: AVFilterContext *ctx = outlink->src; yading@10: MixContext *s = ctx->priv; yading@10: int ret; yading@10: int wanted_samples, available_samples; yading@10: yading@10: ret = calc_active_inputs(s); yading@10: if (ret < 0) yading@10: return ret; yading@10: yading@10: if (s->input_state[0] == INPUT_OFF) { yading@10: ret = request_samples(ctx, 1); yading@10: if (ret < 0) yading@10: return ret; yading@10: yading@10: ret = calc_active_inputs(s); yading@10: if (ret < 0) yading@10: return ret; yading@10: yading@10: available_samples = get_available_samples(s); yading@10: if (!available_samples) yading@10: return AVERROR(EAGAIN); yading@10: yading@10: return output_frame(outlink, available_samples); yading@10: } yading@10: yading@10: if (s->frame_list->nb_frames == 0) { yading@10: ret = ff_request_frame(ctx->inputs[0]); yading@10: if (ret == AVERROR_EOF) { yading@10: s->input_state[0] = INPUT_OFF; yading@10: if (s->nb_inputs == 1) yading@10: return AVERROR_EOF; yading@10: else yading@10: return AVERROR(EAGAIN); yading@10: } else if (ret < 0) yading@10: return ret; yading@10: } yading@10: av_assert0(s->frame_list->nb_frames > 0); yading@10: yading@10: wanted_samples = frame_list_next_frame_size(s->frame_list); yading@10: yading@10: if (s->active_inputs > 1) { yading@10: ret = request_samples(ctx, wanted_samples); yading@10: if (ret < 0) yading@10: return ret; yading@10: yading@10: ret = calc_active_inputs(s); yading@10: if (ret < 0) yading@10: return ret; yading@10: } yading@10: yading@10: if (s->active_inputs > 1) { yading@10: available_samples = get_available_samples(s); yading@10: if (!available_samples) yading@10: return AVERROR(EAGAIN); yading@10: available_samples = FFMIN(available_samples, wanted_samples); yading@10: } else { yading@10: available_samples = wanted_samples; yading@10: } yading@10: yading@10: s->next_pts = frame_list_next_pts(s->frame_list); yading@10: frame_list_remove_samples(s->frame_list, available_samples); yading@10: yading@10: return output_frame(outlink, available_samples); yading@10: } yading@10: yading@10: static int filter_frame(AVFilterLink *inlink, AVFrame *buf) yading@10: { yading@10: AVFilterContext *ctx = inlink->dst; yading@10: MixContext *s = ctx->priv; yading@10: AVFilterLink *outlink = ctx->outputs[0]; yading@10: int i, ret = 0; yading@10: yading@10: for (i = 0; i < ctx->nb_inputs; i++) yading@10: if (ctx->inputs[i] == inlink) yading@10: break; yading@10: if (i >= ctx->nb_inputs) { yading@10: av_log(ctx, AV_LOG_ERROR, "unknown input link\n"); yading@10: ret = AVERROR(EINVAL); yading@10: goto fail; yading@10: } yading@10: yading@10: if (i == 0) { yading@10: int64_t pts = av_rescale_q(buf->pts, inlink->time_base, yading@10: outlink->time_base); yading@10: ret = frame_list_add_frame(s->frame_list, buf->nb_samples, pts); yading@10: if (ret < 0) yading@10: goto fail; yading@10: } yading@10: yading@10: ret = av_audio_fifo_write(s->fifos[i], (void **)buf->extended_data, yading@10: buf->nb_samples); yading@10: yading@10: fail: yading@10: av_frame_free(&buf); yading@10: yading@10: return ret; yading@10: } yading@10: yading@10: static int init(AVFilterContext *ctx) yading@10: { yading@10: MixContext *s = ctx->priv; yading@10: int i; yading@10: yading@10: for (i = 0; i < s->nb_inputs; i++) { yading@10: char name[32]; yading@10: AVFilterPad pad = { 0 }; yading@10: yading@10: snprintf(name, sizeof(name), "input%d", i); yading@10: pad.type = AVMEDIA_TYPE_AUDIO; yading@10: pad.name = av_strdup(name); yading@10: pad.filter_frame = filter_frame; yading@10: yading@10: ff_insert_inpad(ctx, i, &pad); yading@10: } yading@10: yading@10: avpriv_float_dsp_init(&s->fdsp, 0); yading@10: yading@10: return 0; yading@10: } yading@10: yading@10: static void uninit(AVFilterContext *ctx) yading@10: { yading@10: int i; yading@10: MixContext *s = ctx->priv; yading@10: yading@10: if (s->fifos) { yading@10: for (i = 0; i < s->nb_inputs; i++) yading@10: av_audio_fifo_free(s->fifos[i]); yading@10: av_freep(&s->fifos); yading@10: } yading@10: frame_list_clear(s->frame_list); yading@10: av_freep(&s->frame_list); yading@10: av_freep(&s->input_state); yading@10: av_freep(&s->input_scale); yading@10: yading@10: for (i = 0; i < ctx->nb_inputs; i++) yading@10: av_freep(&ctx->input_pads[i].name); yading@10: } yading@10: yading@10: static int query_formats(AVFilterContext *ctx) yading@10: { yading@10: AVFilterFormats *formats = NULL; yading@10: ff_add_format(&formats, AV_SAMPLE_FMT_FLT); yading@10: ff_add_format(&formats, AV_SAMPLE_FMT_FLTP); yading@10: ff_set_common_formats(ctx, formats); yading@10: ff_set_common_channel_layouts(ctx, ff_all_channel_layouts()); yading@10: ff_set_common_samplerates(ctx, ff_all_samplerates()); yading@10: return 0; yading@10: } yading@10: yading@10: static const AVFilterPad avfilter_af_amix_outputs[] = { yading@10: { yading@10: .name = "default", yading@10: .type = AVMEDIA_TYPE_AUDIO, yading@10: .config_props = config_output, yading@10: .request_frame = request_frame yading@10: }, yading@10: { NULL } yading@10: }; yading@10: yading@10: AVFilter avfilter_af_amix = { yading@10: .name = "amix", yading@10: .description = NULL_IF_CONFIG_SMALL("Audio mixing."), yading@10: .priv_size = sizeof(MixContext), yading@10: .priv_class = &amix_class, yading@10: yading@10: .init = init, yading@10: .uninit = uninit, yading@10: .query_formats = query_formats, yading@10: yading@10: .inputs = NULL, yading@10: .outputs = avfilter_af_amix_outputs, yading@10: yading@10: .flags = AVFILTER_FLAG_DYNAMIC_INPUTS, yading@10: };