libavresample/dither.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2012 Justin Ruggles <justin.ruggles@gmail.com>
3  *
4  * Triangular with Noise Shaping is based on opusfile.
5  * Copyright (c) 1994-2012 by the Xiph.Org Foundation and contributors
6  *
7  * This file is part of Libav.
8  *
9  * Libav is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * Libav is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with Libav; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 /**
25  * @file
26  * Dithered Audio Sample Quantization
27  *
28  * Converts from dbl, flt, or s32 to s16 using dithering.
29  */
30 
31 #include <math.h>
32 #include <stdint.h>
33 
34 #include "libavutil/common.h"
35 #include "libavutil/lfg.h"
36 #include "libavutil/mem.h"
37 #include "libavutil/samplefmt.h"
38 #include "audio_convert.h"
39 #include "dither.h"
40 #include "internal.h"
41 
42 typedef struct DitherState {
43  int mute;
44  unsigned int seed;
46  float *noise_buf;
49  float dither_a[4];
50  float dither_b[4];
51 } DitherState;
52 
53 struct DitherContext {
56  int apply_map;
58 
59  int mute_dither_threshold; // threshold for disabling dither
60  int mute_reset_threshold; // threshold for resetting noise shaping
61  const float *ns_coef_b; // noise shaping coeffs
62  const float *ns_coef_a; // noise shaping coeffs
63 
64  int channels;
65  DitherState *state; // dither states for each channel
66 
67  AudioData *flt_data; // input data in fltp
68  AudioData *s16_data; // dithered output in s16p
69  AudioConvert *ac_in; // converter for input to fltp
70  AudioConvert *ac_out; // converter for s16p to s16 (if needed)
71 
72  void (*quantize)(int16_t *dst, const float *src, float *dither, int len);
74 };
75 
76 /* mute threshold, in seconds */
77 #define MUTE_THRESHOLD_SEC 0.000333
78 
79 /* scale factor for 16-bit output.
80  The signal is attenuated slightly to avoid clipping */
81 #define S16_SCALE 32753.0f
82 
83 /* scale to convert lfg from INT_MIN/INT_MAX to -0.5/0.5 */
84 #define LFG_SCALE (1.0f / (2.0f * INT32_MAX))
85 
86 /* noise shaping coefficients */
87 
88 static const float ns_48_coef_b[4] = {
89  2.2374f, -0.7339f, -0.1251f, -0.6033f
90 };
91 
92 static const float ns_48_coef_a[4] = {
93  0.9030f, 0.0116f, -0.5853f, -0.2571f
94 };
95 
96 static const float ns_44_coef_b[4] = {
97  2.2061f, -0.4707f, -0.2534f, -0.6213f
98 };
99 
100 static const float ns_44_coef_a[4] = {
101  1.0587f, 0.0676f, -0.6054f, -0.2738f
102 };
103 
104 static void dither_int_to_float_rectangular_c(float *dst, int *src, int len)
105 {
106  int i;
107  for (i = 0; i < len; i++)
108  dst[i] = src[i] * LFG_SCALE;
109 }
110 
111 static void dither_int_to_float_triangular_c(float *dst, int *src0, int len)
112 {
113  int i;
114  int *src1 = src0 + len;
115 
116  for (i = 0; i < len; i++) {
117  float r = src0[i] * LFG_SCALE;
118  r += src1[i] * LFG_SCALE;
119  dst[i] = r;
120  }
121 }
122 
123 static void quantize_c(int16_t *dst, const float *src, float *dither, int len)
124 {
125  int i;
126  for (i = 0; i < len; i++)
127  dst[i] = av_clip_int16(lrintf(src[i] * S16_SCALE + dither[i]));
128 }
129 
130 #define SQRT_1_6 0.40824829046386301723f
131 
132 static void dither_highpass_filter(float *src, int len)
133 {
134  int i;
135 
136  /* filter is from libswresample in FFmpeg */
137  for (i = 0; i < len - 2; i++)
138  src[i] = (-src[i] + 2 * src[i + 1] - src[i + 2]) * SQRT_1_6;
139 }
140 
142  int min_samples)
143 {
144  int i;
145  int nb_samples = FFALIGN(min_samples, 16) + 16;
146  int buf_samples = nb_samples *
147  (c->method == AV_RESAMPLE_DITHER_RECTANGULAR ? 1 : 2);
148  unsigned int *noise_buf_ui;
149 
150  av_freep(&state->noise_buf);
151  state->noise_buf_size = state->noise_buf_ptr = 0;
152 
153  state->noise_buf = av_malloc(buf_samples * sizeof(*state->noise_buf));
154  if (!state->noise_buf)
155  return AVERROR(ENOMEM);
156  state->noise_buf_size = FFALIGN(min_samples, 16);
157  noise_buf_ui = (unsigned int *)state->noise_buf;
158 
159  av_lfg_init(&state->lfg, state->seed);
160  for (i = 0; i < buf_samples; i++)
161  noise_buf_ui[i] = av_lfg_get(&state->lfg);
162 
163  c->ddsp.dither_int_to_float(state->noise_buf, noise_buf_ui, nb_samples);
164 
166  dither_highpass_filter(state->noise_buf, nb_samples);
167 
168  return 0;
169 }
170 
172  int16_t *dst, const float *src,
173  int nb_samples)
174 {
175  int i, j;
176  float *dither = &state->noise_buf[state->noise_buf_ptr];
177 
178  if (state->mute > c->mute_reset_threshold)
179  memset(state->dither_a, 0, sizeof(state->dither_a));
180 
181  for (i = 0; i < nb_samples; i++) {
182  float err = 0;
183  float sample = src[i] * S16_SCALE;
184 
185  for (j = 0; j < 4; j++) {
186  err += c->ns_coef_b[j] * state->dither_b[j] -
187  c->ns_coef_a[j] * state->dither_a[j];
188  }
189  for (j = 3; j > 0; j--) {
190  state->dither_a[j] = state->dither_a[j - 1];
191  state->dither_b[j] = state->dither_b[j - 1];
192  }
193  state->dither_a[0] = err;
194  sample -= err;
195 
196  if (state->mute > c->mute_dither_threshold) {
197  dst[i] = av_clip_int16(lrintf(sample));
198  state->dither_b[0] = 0;
199  } else {
200  dst[i] = av_clip_int16(lrintf(sample + dither[i]));
201  state->dither_b[0] = av_clipf(dst[i] - sample, -1.5f, 1.5f);
202  }
203 
204  state->mute++;
205  if (src[i])
206  state->mute = 0;
207  }
208 }
209 
210 static int convert_samples(DitherContext *c, int16_t **dst, float * const *src,
211  int channels, int nb_samples)
212 {
213  int ch, ret;
214  int aligned_samples = FFALIGN(nb_samples, 16);
215 
216  for (ch = 0; ch < channels; ch++) {
217  DitherState *state = &c->state[ch];
218 
219  if (state->noise_buf_size < aligned_samples) {
220  ret = generate_dither_noise(c, state, nb_samples);
221  if (ret < 0)
222  return ret;
223  } else if (state->noise_buf_size - state->noise_buf_ptr < aligned_samples) {
224  state->noise_buf_ptr = 0;
225  }
226 
228  quantize_triangular_ns(c, state, dst[ch], src[ch], nb_samples);
229  } else {
230  c->quantize(dst[ch], src[ch],
231  &state->noise_buf[state->noise_buf_ptr],
232  FFALIGN(nb_samples, c->samples_align));
233  }
234 
235  state->noise_buf_ptr += aligned_samples;
236  }
237 
238  return 0;
239 }
240 
242 {
243  int ret;
244  AudioData *flt_data;
245 
246  /* output directly to dst if it is planar */
247  if (dst->sample_fmt == AV_SAMPLE_FMT_S16P)
248  c->s16_data = dst;
249  else {
250  /* make sure s16_data is large enough for the output */
251  ret = ff_audio_data_realloc(c->s16_data, src->nb_samples);
252  if (ret < 0)
253  return ret;
254  }
255 
256  if (src->sample_fmt != AV_SAMPLE_FMT_FLTP || c->apply_map) {
257  /* make sure flt_data is large enough for the input */
258  ret = ff_audio_data_realloc(c->flt_data, src->nb_samples);
259  if (ret < 0)
260  return ret;
261  flt_data = c->flt_data;
262  }
263 
264  if (src->sample_fmt != AV_SAMPLE_FMT_FLTP) {
265  /* convert input samples to fltp and scale to s16 range */
266  ret = ff_audio_convert(c->ac_in, flt_data, src);
267  if (ret < 0)
268  return ret;
269  } else if (c->apply_map) {
270  ret = ff_audio_data_copy(flt_data, src, c->ch_map_info);
271  if (ret < 0)
272  return ret;
273  } else {
274  flt_data = src;
275  }
276 
277  /* check alignment and padding constraints */
279  int ptr_align = FFMIN(flt_data->ptr_align, c->s16_data->ptr_align);
280  int samples_align = FFMIN(flt_data->samples_align, c->s16_data->samples_align);
281  int aligned_len = FFALIGN(src->nb_samples, c->ddsp.samples_align);
282 
283  if (!(ptr_align % c->ddsp.ptr_align) && samples_align >= aligned_len) {
284  c->quantize = c->ddsp.quantize;
286  } else {
287  c->quantize = quantize_c;
288  c->samples_align = 1;
289  }
290  }
291 
292  ret = convert_samples(c, (int16_t **)c->s16_data->data,
293  (float * const *)flt_data->data, src->channels,
294  src->nb_samples);
295  if (ret < 0)
296  return ret;
297 
298  c->s16_data->nb_samples = src->nb_samples;
299 
300  /* interleave output to dst if needed */
301  if (dst->sample_fmt == AV_SAMPLE_FMT_S16) {
302  ret = ff_audio_convert(c->ac_out, dst, c->s16_data);
303  if (ret < 0)
304  return ret;
305  } else
306  c->s16_data = NULL;
307 
308  return 0;
309 }
310 
312 {
313  DitherContext *c = *cp;
314  int ch;
315 
316  if (!c)
317  return;
322  for (ch = 0; ch < c->channels; ch++)
323  av_free(c->state[ch].noise_buf);
324  av_free(c->state);
325  av_freep(cp);
326 }
327 
328 static void dither_init(DitherDSPContext *ddsp,
329  enum AVResampleDitherMethod method)
330 {
331  ddsp->quantize = quantize_c;
332  ddsp->ptr_align = 1;
333  ddsp->samples_align = 1;
334 
335  if (method == AV_RESAMPLE_DITHER_RECTANGULAR)
337  else
339 
340  if (ARCH_X86)
341  ff_dither_init_x86(ddsp, method);
342 }
343 
345  enum AVSampleFormat out_fmt,
346  enum AVSampleFormat in_fmt,
347  int channels, int sample_rate, int apply_map)
348 {
349  AVLFG seed_gen;
350  DitherContext *c;
351  int ch;
352 
354  av_get_bytes_per_sample(in_fmt) <= 2) {
355  av_log(avr, AV_LOG_ERROR, "dithering %s to %s is not supported\n",
357  return NULL;
358  }
359 
360  c = av_mallocz(sizeof(*c));
361  if (!c)
362  return NULL;
363 
364  c->apply_map = apply_map;
365  if (apply_map)
366  c->ch_map_info = &avr->ch_map_info;
367 
369  sample_rate != 48000 && sample_rate != 44100) {
370  av_log(avr, AV_LOG_WARNING, "sample rate must be 48000 or 44100 Hz "
371  "for triangular_ns dither. using triangular_hp instead.\n");
373  }
374  c->method = avr->dither_method;
375  dither_init(&c->ddsp, c->method);
376 
378  if (sample_rate == 48000) {
379  c->ns_coef_b = ns_48_coef_b;
380  c->ns_coef_a = ns_48_coef_a;
381  } else {
382  c->ns_coef_b = ns_44_coef_b;
383  c->ns_coef_a = ns_44_coef_a;
384  }
385  }
386 
387  /* Either s16 or s16p output format is allowed, but s16p is used
388  internally, so we need to use a temp buffer and interleave if the output
389  format is s16 */
390  if (out_fmt != AV_SAMPLE_FMT_S16P) {
391  c->s16_data = ff_audio_data_alloc(channels, 1024, AV_SAMPLE_FMT_S16P,
392  "dither s16 buffer");
393  if (!c->s16_data)
394  goto fail;
395 
397  channels, sample_rate, 0);
398  if (!c->ac_out)
399  goto fail;
400  }
401 
402  if (in_fmt != AV_SAMPLE_FMT_FLTP || c->apply_map) {
403  c->flt_data = ff_audio_data_alloc(channels, 1024, AV_SAMPLE_FMT_FLTP,
404  "dither flt buffer");
405  if (!c->flt_data)
406  goto fail;
407  }
408  if (in_fmt != AV_SAMPLE_FMT_FLTP) {
410  channels, sample_rate, c->apply_map);
411  if (!c->ac_in)
412  goto fail;
413  }
414 
415  c->state = av_mallocz(channels * sizeof(*c->state));
416  if (!c->state)
417  goto fail;
418  c->channels = channels;
419 
420  /* calculate thresholds for turning off dithering during periods of
421  silence to avoid replacing digital silence with quiet dither noise */
422  c->mute_dither_threshold = lrintf(sample_rate * MUTE_THRESHOLD_SEC);
424 
425  /* initialize dither states */
426  av_lfg_init(&seed_gen, 0xC0FFEE);
427  for (ch = 0; ch < channels; ch++) {
428  DitherState *state = &c->state[ch];
429  state->mute = c->mute_reset_threshold + 1;
430  state->seed = av_lfg_get(&seed_gen);
431  generate_dither_noise(c, state, FFMAX(32768, sample_rate / 2));
432  }
433 
434  return c;
435 
436 fail:
437  ff_dither_free(&c);
438  return NULL;
439 }
Definition: lfg.h:25
void * av_mallocz(size_t size)
Allocate a block of size bytes with alignment suitable for all memory accesses (including vectors if ...
Definition: mem.c:205
void ff_dither_free(DitherContext **cp)
Free a DitherContext.
int ff_audio_data_realloc(AudioData *a, int nb_samples)
Reallocate AudioData.
Definition: audio_data.c:153
static const float ns_48_coef_b[4]
struct DitherState DitherState
Audio buffer used for intermediate storage between conversion phases.
Definition: oss_audio.c:46
static int generate_dither_noise(DitherContext *c, DitherState *state, int min_samples)
AudioData * flt_data
enum AVResampleDitherMethod method
memory handling functions
AudioData * ff_audio_data_alloc(int channels, int nb_samples, enum AVSampleFormat sample_fmt, const char *name)
Allocate AudioData.
Definition: audio_data.c:110
#define AV_LOG_WARNING
Something somehow does not look correct.
Definition: log.h:154
#define LFG_SCALE
Sinusoidal phase f
static const float ns_44_coef_a[4]
int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in)
Convert audio data from one sample format to another.
signed 16 bits
Definition: samplefmt.h:52
#define sample
unsigned int seed
AudioConvert * ac_out
void(* quantize)(int16_t *dst, const float *src, float *dither, int len)
#define FFALIGN(x, a)
Definition: common.h:63
int nb_samples
current number of samples
Definition: audio_data.h:41
void av_freep(void *arg)
Free a memory block which has been allocated with av_malloc(z)() or av_realloc() and set the pointer ...
Definition: mem.c:198
Triangular Dither with Noise Shaping.
Definition: avresample.h:127
Rectangular Dither.
Definition: avresample.h:124
Triangular Dither with High Pass.
Definition: avresample.h:126
#define SQRT_1_6
#define S16_SCALE
DitherState * state
static void dither_int_to_float_rectangular_c(float *dst, int *src, int len)
void(* quantize)(int16_t *dst, const float *src, float *dither, int len)
Convert samples from flt to s16 with added dither noise.
Definition: dither.h:38
const float * ns_coef_a
#define lrintf(x)
Definition: libm_mips.h:70
AudioConvert * ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map)
Allocate and initialize AudioConvert context for sample format conversion.
float, planar
Definition: samplefmt.h:60
static int convert_samples(DitherContext *c, int16_t **dst, float *const *src, int channels, int nb_samples)
static void quantize_c(int16_t *dst, const float *src, float *dither, int len)
enum AVResampleDitherMethod dither_method
dither method
void av_free(void *ptr)
Free a memory block which has been allocated with av_malloc(z)() or av_realloc(). ...
Definition: mem.c:183
#define ARCH_X86
Definition: config.h:35
int ptr_align
src and dst constraits for quantize()
Definition: dither.h:40
int channels
channel count
Definition: oss_audio.c:50
const char * r
Definition: vf_curves.c:94
void av_log(void *avcl, int level, const char *fmt,...)
Definition: log.c:246
void(* dither_int_to_float)(float *dst, int *src0, int len)
Convert dither noise from int to float with triangular distribution.
Definition: dither.h:54
void ff_audio_convert_free(AudioConvert **ac)
Free AudioConvert.
#define FFMAX(a, b)
Definition: common.h:56
DitherContext * ff_dither_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map)
Allocate and initialize a DitherContext.
AudioConvert * ac_in
DitherDSPContext ddsp
#define FFMIN(a, b)
Definition: common.h:58
int ff_convert_dither(DitherContext *c, AudioData *dst, AudioData *src)
Convert audio sample format with dithering.
ret
Definition: avfilter.c:821
enum AVSampleFormat av_get_packed_sample_fmt(enum AVSampleFormat sample_fmt)
Get the packed alternative form of the given sample format.
Definition: samplefmt.c:73
static uint32_t quantize(int32_t sample, int bits)
8-23 bits quantization
Definition: dcaenc.c:365
const float * ns_coef_b
const char * av_get_sample_fmt_name(enum AVSampleFormat sample_fmt)
Return the name of sample_fmt, or NULL if sample_fmt is not recognized.
Definition: samplefmt.c:47
static const float ns_48_coef_a[4]
static const float ns_44_coef_b[4]
int samples_align
len constraits for quantize()
Definition: dither.h:41
int av_get_bytes_per_sample(enum AVSampleFormat sample_fmt)
Return number of bytes per sample.
Definition: samplefmt.c:104
NULL
Definition: eval.c:55
sample_rate
AVS_Value src
Definition: avisynth_c.h:523
typedef void(RENAME(mix_any_func_type))
uint8_t * data[AVRESAMPLE_MAX_CHANNELS]
data plane pointers
Definition: audio_data.h:37
static void dither_highpass_filter(float *src, int len)
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:148
static unsigned int av_lfg_get(AVLFG *c)
Get the next random unsigned 32-bit number using an ALFG.
Definition: lfg.h:38
static void quantize_triangular_ns(DitherContext *c, DitherState *state, int16_t *dst, const float *src, int nb_samples)
void * av_malloc(size_t size)
Allocate a block of size bytes with alignment suitable for all memory accesses (including vectors if ...
Definition: mem.c:73
synthesis window for stochastic i
av_cold void av_lfg_init(AVLFG *c, unsigned int seed)
Definition: lfg.c:30
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFilterBuffer structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later.That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another.Buffer references ownership and permissions
static void dither_int_to_float_triangular_c(float *dst, int *src0, int len)
static void dither_init(DitherDSPContext *ddsp, enum AVResampleDitherMethod method)
static uint32_t state
Definition: trasher.c:27
int samples_align
allocated samples alignment
Definition: audio_data.h:52
int ff_audio_data_copy(AudioData *dst, AudioData *src, ChannelMapInfo *map)
Copy data from one AudioData to another.
Definition: audio_data.c:216
common internal and external API header
AudioData * s16_data
static double c[64]
AVSampleFormat
Audio Sample Formats.
Definition: samplefmt.h:49
int len
else dst[i][x+y *dst_stride[i]]
Definition: vf_mcdeint.c:160
signed 16 bits, planar
Definition: samplefmt.h:58
enum AVSampleFormat sample_fmt
sample format
Definition: audio_data.h:42
void ff_audio_data_free(AudioData **a)
Free AudioData.
Definition: audio_data.c:208
void ff_dither_init_x86(DitherDSPContext *ddsp, enum AVResampleDitherMethod method)
Definition: dither_init.c:34
int ptr_align
minimum data pointer alignment
Definition: audio_data.h:51
AVResampleDitherMethod
Definition: avresample.h:122
ChannelMapInfo * ch_map_info
#define MUTE_THRESHOLD_SEC