af_silencedetect.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2012 Clément Bœsch <ubitux@gmail.com>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 /**
22  * @file
23  * Audio silence detector
24  */
25 
26 #include <float.h> /* DBL_MAX */
27 
29 #include "libavutil/opt.h"
30 #include "libavutil/timestamp.h"
31 #include "audio.h"
32 #include "formats.h"
33 #include "avfilter.h"
34 #include "internal.h"
35 
36 typedef struct {
37  const AVClass *class;
38  double noise; ///< noise amplitude ratio
39  double duration; ///< minimum duration of silence until notification
40  int64_t nb_null_samples; ///< current number of continuous zero samples
41  int64_t start; ///< if silence is detected, this value contains the time of the first zero sample
42  int last_sample_rate; ///< last sample rate to check for sample rate changes
44 
45 #define OFFSET(x) offsetof(SilenceDetectContext, x)
46 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_AUDIO_PARAM
47 static const AVOption silencedetect_options[] = {
48  { "n", "set noise tolerance", OFFSET(noise), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0, DBL_MAX, FLAGS },
49  { "noise", "set noise tolerance", OFFSET(noise), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0, DBL_MAX, FLAGS },
50  { "d", "set minimum duration in seconds", OFFSET(duration), AV_OPT_TYPE_DOUBLE, {.dbl=2.}, 0, 24*60*60, FLAGS },
51  { "duration", "set minimum duration in seconds", OFFSET(duration), AV_OPT_TYPE_DOUBLE, {.dbl=2.}, 0, 24*60*60, FLAGS },
52  { NULL },
53 };
54 
55 AVFILTER_DEFINE_CLASS(silencedetect);
56 
57 static char *get_metadata_val(AVFrame *insamples, const char *key)
58 {
59  AVDictionaryEntry *e = av_dict_get(insamples->metadata, key, NULL, 0);
60  return e && e->value ? e->value : NULL;
61 }
62 
63 static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
64 {
65  int i;
66  SilenceDetectContext *silence = inlink->dst->priv;
68  const int srate = inlink->sample_rate;
69  const int nb_samples = insamples->nb_samples * nb_channels;
70  const int64_t nb_samples_notify = srate * silence->duration * nb_channels;
71 
72  // scale number of null samples to the new sample rate
73  if (silence->last_sample_rate && silence->last_sample_rate != srate)
74  silence->nb_null_samples =
75  srate * silence->nb_null_samples / silence->last_sample_rate;
76  silence->last_sample_rate = srate;
77 
78  // TODO: support more sample formats
79  // TODO: document metadata
80  if (insamples->format == AV_SAMPLE_FMT_DBL) {
81  double *p = (double *)insamples->data[0];
82 
83  for (i = 0; i < nb_samples; i++, p++) {
84  if (*p < silence->noise && *p > -silence->noise) {
85  if (!silence->start) {
86  silence->nb_null_samples++;
87  if (silence->nb_null_samples >= nb_samples_notify) {
88  silence->start = insamples->pts - (int64_t)(silence->duration / av_q2d(inlink->time_base) + .5);
89  av_dict_set(&insamples->metadata, "lavfi.silence_start",
90  av_ts2timestr(silence->start, &inlink->time_base), 0);
91  av_log(silence, AV_LOG_INFO, "silence_start: %s\n",
92  get_metadata_val(insamples, "lavfi.silence_start"));
93  }
94  }
95  } else {
96  if (silence->start) {
97  av_dict_set(&insamples->metadata, "lavfi.silence_end",
98  av_ts2timestr(insamples->pts, &inlink->time_base), 0);
99  av_dict_set(&insamples->metadata, "lavfi.silence_duration",
100  av_ts2timestr(insamples->pts - silence->start, &inlink->time_base), 0);
101  av_log(silence, AV_LOG_INFO,
102  "silence_end: %s | silence_duration: %s\n",
103  get_metadata_val(insamples, "lavfi.silence_end"),
104  get_metadata_val(insamples, "lavfi.silence_duration"));
105  }
106  silence->nb_null_samples = silence->start = 0;
107  }
108  }
109  }
110 
111  return ff_filter_frame(inlink->dst->outputs[0], insamples);
112 }
113 
115 {
118  static const enum AVSampleFormat sample_fmts[] = {
121  };
122 
123  layouts = ff_all_channel_layouts();
124  if (!layouts)
125  return AVERROR(ENOMEM);
126  ff_set_common_channel_layouts(ctx, layouts);
127 
128  formats = ff_make_format_list(sample_fmts);
129  if (!formats)
130  return AVERROR(ENOMEM);
131  ff_set_common_formats(ctx, formats);
132 
133  formats = ff_all_samplerates();
134  if (!formats)
135  return AVERROR(ENOMEM);
136  ff_set_common_samplerates(ctx, formats);
137 
138  return 0;
139 }
140 
142  {
143  .name = "default",
144  .type = AVMEDIA_TYPE_AUDIO,
145  .get_audio_buffer = ff_null_get_audio_buffer,
146  .filter_frame = filter_frame,
147  },
148  { NULL }
149 };
150 
152  {
153  .name = "default",
154  .type = AVMEDIA_TYPE_AUDIO,
155  },
156  { NULL }
157 };
158 
160  .name = "silencedetect",
161  .description = NULL_IF_CONFIG_SMALL("Detect silence."),
162  .priv_size = sizeof(SilenceDetectContext),
164  .inputs = silencedetect_inputs,
165  .outputs = silencedetect_outputs,
166  .priv_class = &silencedetect_class,
167 };
This structure describes decoded (raw) audio or video data.
Definition: frame.h:76
AVOption.
Definition: opt.h:251
AVFilter avfilter_af_silencedetect
static const AVFilterPad outputs[]
Definition: af_ashowinfo.c:117
external API header
AVDictionaryEntry * av_dict_get(AVDictionary *m, const char *key, const AVDictionaryEntry *prev, int flags)
Get a dictionary entry with matching key.
Definition: dict.c:39
AVFilterFormats * ff_make_format_list(const int *fmts)
Create a list of supported formats.
Definition: formats.c:308
const char * name
Pad name.
static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
it can be given away to ff_start_frame *A reference passed to ff_filter_frame(or the deprecated ff_start_frame) is given away and must no longer be used.*A reference created with avfilter_ref_buffer belongs to the code that created it.*A reference obtained with ff_get_video_buffer or ff_get_audio_buffer belongs to the code that requested it.*A reference given as return value by the get_video_buffer or get_audio_buffer method is given away and must no longer be used.Link reference fields---------------------The AVFilterLink structure has a few AVFilterBufferRef fields.The cur_buf and out_buf were used with the deprecated start_frame/draw_slice/end_frame API and should no longer be used.src_buf
AVOptions.
timestamp utils, mostly useful for debugging/logging purposes
static const AVFilterPad silencedetect_inputs[]
double duration
minimum duration of silence until notification
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
Definition: frame.h:159
#define OFFSET(x)
static double av_q2d(AVRational a)
Convert rational to double.
Definition: rational.h:69
AVDictionary * metadata
metadata.
Definition: frame.h:401
void ff_set_common_formats(AVFilterContext *ctx, AVFilterFormats *formats)
A helper for query_formats() which sets all links to the same list of formats.
Definition: formats.c:545
static int64_t duration
Definition: ffplay.c:294
A filter pad used for either input or output.
static char * get_metadata_val(AVFrame *insamples, const char *key)
static const AVFilterPad silencedetect_outputs[]
#define FLAGS
#define av_ts2timestr(ts, tb)
Convenience macro, the return value should be used only directly in function arguments but never stan...
Definition: timestamp.h:72
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
void * priv
private data for use by the filter
Definition: avfilter.h:545
int av_get_channel_layout_nb_channels(uint64_t channel_layout)
Return the number of channels in the channel layout.
void av_log(void *avcl, int level, const char *fmt,...)
Definition: log.c:246
int64_t start
if silence is detected, this value contains the time of the first zero sample
audio channel layout utility functions
double noise
noise amplitude ratio
AVFrame * ff_null_get_audio_buffer(AVFilterLink *link, int nb_samples)
get_audio_buffer() handler for filters which simply pass audio along
Definition: audio.c:36
AVFilterChannelLayouts * ff_all_channel_layouts(void)
Construct an empty AVFilterChannelLayouts/AVFilterFormats struct – representing any channel layout (...
Definition: formats.c:402
A list of supported channel layouts.
Definition: formats.h:85
int format
format of the frame, -1 if unknown or unset Values correspond to enum AVPixelFormat for video frames...
Definition: frame.h:134
NULL
Definition: eval.c:55
static int noise(AVBitStreamFilterContext *bsfc, AVCodecContext *avctx, const char *args, uint8_t **poutbuf, int *poutbuf_size, const uint8_t *buf, int buf_size, int keyframe)
Definition: noise_bsf.c:28
int av_dict_set(AVDictionary **pm, const char *key, const char *value, int flags)
Set the given entry in *pm, overwriting an existing entry.
Definition: dict.c:62
AVFILTER_DEFINE_CLASS(silencedetect)
Describe the class of an AVClass context structure.
Definition: log.h:50
Filter definition.
Definition: avfilter.h:436
synthesis window for stochastic i
static int query_formats(AVFilterContext *ctx)
const char * name
filter name
Definition: avfilter.h:437
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFilterBuffer structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later.That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another.Buffer references ownership and permissions
AVFilterLink ** outputs
array of pointers to output links
Definition: avfilter.h:539
enum MovChannelLayoutTag * layouts
Definition: mov_chan.c:434
AVFilterFormats * ff_all_samplerates(void)
Definition: formats.c:396
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:87
void ff_set_common_samplerates(AVFilterContext *ctx, AVFilterFormats *samplerates)
Definition: formats.c:533
The official guide to swscale for confused that consecutive non overlapping rectangles of slice_bottom special converter These generally are unscaled converters of common formats
Definition: swscale.txt:33
AVSampleFormat
Audio Sample Formats.
Definition: samplefmt.h:49
int64_t nb_null_samples
current number of continuous zero samples
static const AVOption silencedetect_options[]
char * value
Definition: dict.h:82
A list of supported formats for one end of a filter link.
Definition: formats.h:64
An instance of a filter.
Definition: avfilter.h:524
static enum AVSampleFormat sample_fmts[]
Definition: adpcmenc.c:700
#define AV_LOG_INFO
Definition: log.h:156
void ff_set_common_channel_layouts(AVFilterContext *ctx, AVFilterChannelLayouts *layouts)
A helper for query_formats() which sets all links to the same list of channel layouts/sample rates...
Definition: formats.c:526
int nb_channels
int last_sample_rate
last sample rate to check for sample rate changes
internal API functions
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several inputs
int nb_samples
number of audio samples (per channel) described by this frame
Definition: frame.h:127
for(j=16;j >0;--j)