g722enc.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) CMU 1993 Computer Science, Speech Group
3  * Chengxiang Lu and Alex Hauptmann
4  * Copyright (c) 2005 Steve Underwood <steveu at coppice.org>
5  * Copyright (c) 2009 Kenan Gillet
6  * Copyright (c) 2010 Martin Storsjo
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
25 /**
26  * @file
27  * G.722 ADPCM audio encoder
28  */
29 
30 #include "libavutil/avassert.h"
31 #include "avcodec.h"
32 #include "internal.h"
33 #include "g722.h"
34 #include "libavutil/common.h"
35 
36 #define FREEZE_INTERVAL 128
37 
38 /* This is an arbitrary value. Allowing insanely large values leads to strange
39  problems, so we limit it to a reasonable value */
40 #define MAX_FRAME_SIZE 32768
41 
42 /* We clip the value of avctx->trellis to prevent data type overflows and
43  undefined behavior. Using larger values is insanely slow anyway. */
44 #define MIN_TRELLIS 0
45 #define MAX_TRELLIS 16
46 
48 {
49  G722Context *c = avctx->priv_data;
50  int i;
51  for (i = 0; i < 2; i++) {
52  av_freep(&c->paths[i]);
53  av_freep(&c->node_buf[i]);
54  av_freep(&c->nodep_buf[i]);
55  }
56  return 0;
57 }
58 
60 {
61  G722Context *c = avctx->priv_data;
62  int ret;
63 
64  if (avctx->channels != 1) {
65  av_log(avctx, AV_LOG_ERROR, "Only mono tracks are allowed.\n");
66  return AVERROR_INVALIDDATA;
67  }
68 
69  c->band[0].scale_factor = 8;
70  c->band[1].scale_factor = 2;
71  c->prev_samples_pos = 22;
72 
73  if (avctx->trellis) {
74  int frontier = 1 << avctx->trellis;
75  int max_paths = frontier * FREEZE_INTERVAL;
76  int i;
77  for (i = 0; i < 2; i++) {
78  c->paths[i] = av_mallocz(max_paths * sizeof(**c->paths));
79  c->node_buf[i] = av_mallocz(2 * frontier * sizeof(**c->node_buf));
80  c->nodep_buf[i] = av_mallocz(2 * frontier * sizeof(**c->nodep_buf));
81  if (!c->paths[i] || !c->node_buf[i] || !c->nodep_buf[i]) {
82  ret = AVERROR(ENOMEM);
83  goto error;
84  }
85  }
86  }
87 
88  if (avctx->frame_size) {
89  /* validate frame size */
90  if (avctx->frame_size & 1 || avctx->frame_size > MAX_FRAME_SIZE) {
91  int new_frame_size;
92 
93  if (avctx->frame_size == 1)
94  new_frame_size = 2;
95  else if (avctx->frame_size > MAX_FRAME_SIZE)
96  new_frame_size = MAX_FRAME_SIZE;
97  else
98  new_frame_size = avctx->frame_size - 1;
99 
100  av_log(avctx, AV_LOG_WARNING, "Requested frame size is not "
101  "allowed. Using %d instead of %d\n", new_frame_size,
102  avctx->frame_size);
103  avctx->frame_size = new_frame_size;
104  }
105  } else {
106  /* This is arbitrary. We use 320 because it's 20ms @ 16kHz, which is
107  a common packet size for VoIP applications */
108  avctx->frame_size = 320;
109  }
110  avctx->delay = 22;
111 
112  if (avctx->trellis) {
113  /* validate trellis */
114  if (avctx->trellis < MIN_TRELLIS || avctx->trellis > MAX_TRELLIS) {
115  int new_trellis = av_clip(avctx->trellis, MIN_TRELLIS, MAX_TRELLIS);
116  av_log(avctx, AV_LOG_WARNING, "Requested trellis value is not "
117  "allowed. Using %d instead of %d\n", new_trellis,
118  avctx->trellis);
119  avctx->trellis = new_trellis;
120  }
121  }
122 
123  return 0;
124 error:
125  g722_encode_close(avctx);
126  return ret;
127 }
128 
129 static const int16_t low_quant[33] = {
130  35, 72, 110, 150, 190, 233, 276, 323,
131  370, 422, 473, 530, 587, 650, 714, 786,
132  858, 940, 1023, 1121, 1219, 1339, 1458, 1612,
133  1765, 1980, 2195, 2557, 2919
134 };
135 
136 static inline void filter_samples(G722Context *c, const int16_t *samples,
137  int *xlow, int *xhigh)
138 {
139  int xout1, xout2;
140  c->prev_samples[c->prev_samples_pos++] = samples[0];
141  c->prev_samples[c->prev_samples_pos++] = samples[1];
142  ff_g722_apply_qmf(c->prev_samples + c->prev_samples_pos - 24, &xout1, &xout2);
143  *xlow = xout1 + xout2 >> 14;
144  *xhigh = xout1 - xout2 >> 14;
146  memmove(c->prev_samples,
147  c->prev_samples + c->prev_samples_pos - 22,
148  22 * sizeof(c->prev_samples[0]));
149  c->prev_samples_pos = 22;
150  }
151 }
152 
153 static inline int encode_high(const struct G722Band *state, int xhigh)
154 {
155  int diff = av_clip_int16(xhigh - state->s_predictor);
156  int pred = 141 * state->scale_factor >> 8;
157  /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */
158  return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0);
159 }
160 
161 static inline int encode_low(const struct G722Band* state, int xlow)
162 {
163  int diff = av_clip_int16(xlow - state->s_predictor);
164  /* = diff >= 0 ? diff : -(diff + 1) */
165  int limit = diff ^ (diff >> (sizeof(diff)*8-1));
166  int i = 0;
167  limit = limit + 1 << 10;
168  if (limit > low_quant[8] * state->scale_factor)
169  i = 9;
170  while (i < 29 && limit > low_quant[i] * state->scale_factor)
171  i++;
172  return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i;
173 }
174 
175 static void g722_encode_trellis(G722Context *c, int trellis,
176  uint8_t *dst, int nb_samples,
177  const int16_t *samples)
178 {
179  int i, j, k;
180  int frontier = 1 << trellis;
181  struct TrellisNode **nodes[2];
182  struct TrellisNode **nodes_next[2];
183  int pathn[2] = {0, 0}, froze = -1;
184  struct TrellisPath *p[2];
185 
186  for (i = 0; i < 2; i++) {
187  nodes[i] = c->nodep_buf[i];
188  nodes_next[i] = c->nodep_buf[i] + frontier;
189  memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf[i]));
190  nodes[i][0] = c->node_buf[i] + frontier;
191  nodes[i][0]->ssd = 0;
192  nodes[i][0]->path = 0;
193  nodes[i][0]->state = c->band[i];
194  }
195 
196  for (i = 0; i < nb_samples >> 1; i++) {
197  int xlow, xhigh;
198  struct TrellisNode *next[2];
199  int heap_pos[2] = {0, 0};
200 
201  for (j = 0; j < 2; j++) {
202  next[j] = c->node_buf[j] + frontier*(i & 1);
203  memset(nodes_next[j], 0, frontier * sizeof(**nodes_next));
204  }
205 
206  filter_samples(c, &samples[2*i], &xlow, &xhigh);
207 
208  for (j = 0; j < frontier && nodes[0][j]; j++) {
209  /* Only k >> 2 affects the future adaptive state, therefore testing
210  * small steps that don't change k >> 2 is useless, the original
211  * value from encode_low is better than them. Since we step k
212  * in steps of 4, make sure range is a multiple of 4, so that
213  * we don't miss the original value from encode_low. */
214  int range = j < frontier/2 ? 4 : 0;
215  struct TrellisNode *cur_node = nodes[0][j];
216 
217  int ilow = encode_low(&cur_node->state, xlow);
218 
219  for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) {
220  int decoded, dec_diff, pos;
221  uint32_t ssd;
222  struct TrellisNode* node;
223 
224  if (k < 0)
225  continue;
226 
227  decoded = av_clip((cur_node->state.scale_factor *
228  ff_g722_low_inv_quant6[k] >> 10)
229  + cur_node->state.s_predictor, -16384, 16383);
230  dec_diff = xlow - decoded;
231 
232 #define STORE_NODE(index, UPDATE, VALUE)\
233  ssd = cur_node->ssd + dec_diff*dec_diff;\
234  /* Check for wraparound. Using 64 bit ssd counters would \
235  * be simpler, but is slower on x86 32 bit. */\
236  if (ssd < cur_node->ssd)\
237  continue;\
238  if (heap_pos[index] < frontier) {\
239  pos = heap_pos[index]++;\
240  av_assert2(pathn[index] < FREEZE_INTERVAL * frontier);\
241  node = nodes_next[index][pos] = next[index]++;\
242  node->path = pathn[index]++;\
243  } else {\
244  /* Try to replace one of the leaf nodes with the new \
245  * one, but not always testing the same leaf position */\
246  pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\
247  if (ssd >= nodes_next[index][pos]->ssd)\
248  continue;\
249  heap_pos[index]++;\
250  node = nodes_next[index][pos];\
251  }\
252  node->ssd = ssd;\
253  node->state = cur_node->state;\
254  UPDATE;\
255  c->paths[index][node->path].value = VALUE;\
256  c->paths[index][node->path].prev = cur_node->path;\
257  /* Sift the newly inserted node up in the heap to restore \
258  * the heap property */\
259  while (pos > 0) {\
260  int parent = (pos - 1) >> 1;\
261  if (nodes_next[index][parent]->ssd <= ssd)\
262  break;\
263  FFSWAP(struct TrellisNode*, nodes_next[index][parent],\
264  nodes_next[index][pos]);\
265  pos = parent;\
266  }
267  STORE_NODE(0, ff_g722_update_low_predictor(&node->state, k >> 2), k);
268  }
269  }
270 
271  for (j = 0; j < frontier && nodes[1][j]; j++) {
272  int ihigh;
273  struct TrellisNode *cur_node = nodes[1][j];
274 
275  /* We don't try to get any initial guess for ihigh via
276  * encode_high - since there's only 4 possible values, test
277  * them all. Testing all of these gives a much, much larger
278  * gain than testing a larger range around ilow. */
279  for (ihigh = 0; ihigh < 4; ihigh++) {
280  int dhigh, decoded, dec_diff, pos;
281  uint32_t ssd;
282  struct TrellisNode* node;
283 
284  dhigh = cur_node->state.scale_factor *
285  ff_g722_high_inv_quant[ihigh] >> 10;
286  decoded = av_clip(dhigh + cur_node->state.s_predictor,
287  -16384, 16383);
288  dec_diff = xhigh - decoded;
289 
290  STORE_NODE(1, ff_g722_update_high_predictor(&node->state, dhigh, ihigh), ihigh);
291  }
292  }
293 
294  for (j = 0; j < 2; j++) {
295  FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]);
296 
297  if (nodes[j][0]->ssd > (1 << 16)) {
298  for (k = 1; k < frontier && nodes[j][k]; k++)
299  nodes[j][k]->ssd -= nodes[j][0]->ssd;
300  nodes[j][0]->ssd = 0;
301  }
302  }
303 
304  if (i == froze + FREEZE_INTERVAL) {
305  p[0] = &c->paths[0][nodes[0][0]->path];
306  p[1] = &c->paths[1][nodes[1][0]->path];
307  for (j = i; j > froze; j--) {
308  dst[j] = p[1]->value << 6 | p[0]->value;
309  p[0] = &c->paths[0][p[0]->prev];
310  p[1] = &c->paths[1][p[1]->prev];
311  }
312  froze = i;
313  pathn[0] = pathn[1] = 0;
314  memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes));
315  memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes));
316  }
317  }
318 
319  p[0] = &c->paths[0][nodes[0][0]->path];
320  p[1] = &c->paths[1][nodes[1][0]->path];
321  for (j = i; j > froze; j--) {
322  dst[j] = p[1]->value << 6 | p[0]->value;
323  p[0] = &c->paths[0][p[0]->prev];
324  p[1] = &c->paths[1][p[1]->prev];
325  }
326  c->band[0] = nodes[0][0]->state;
327  c->band[1] = nodes[1][0]->state;
328 }
329 
331  const int16_t *samples)
332 {
333  int xlow, xhigh, ilow, ihigh;
334  filter_samples(c, samples, &xlow, &xhigh);
335  ihigh = encode_high(&c->band[1], xhigh);
336  ilow = encode_low (&c->band[0], xlow);
338  ff_g722_high_inv_quant[ihigh] >> 10, ihigh);
339  ff_g722_update_low_predictor(&c->band[0], ilow >> 2);
340  *dst = ihigh << 6 | ilow;
341 }
342 
344  uint8_t *dst, int nb_samples,
345  const int16_t *samples)
346 {
347  int i;
348  for (i = 0; i < nb_samples; i += 2)
349  encode_byte(c, dst++, &samples[i]);
350 }
351 
352 static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
353  const AVFrame *frame, int *got_packet_ptr)
354 {
355  G722Context *c = avctx->priv_data;
356  const int16_t *samples = (const int16_t *)frame->data[0];
357  int nb_samples, out_size, ret;
358 
359  out_size = (frame->nb_samples + 1) / 2;
360  if ((ret = ff_alloc_packet2(avctx, avpkt, out_size)) < 0)
361  return ret;
362 
363  nb_samples = frame->nb_samples - (frame->nb_samples & 1);
364 
365  if (avctx->trellis)
366  g722_encode_trellis(c, avctx->trellis, avpkt->data, nb_samples, samples);
367  else
368  g722_encode_no_trellis(c, avpkt->data, nb_samples, samples);
369 
370  /* handle last frame with odd frame_size */
371  if (nb_samples < frame->nb_samples) {
372  int16_t last_samples[2] = { samples[nb_samples], samples[nb_samples] };
373  encode_byte(c, &avpkt->data[nb_samples >> 1], last_samples);
374  }
375 
376  if (frame->pts != AV_NOPTS_VALUE)
377  avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->delay);
378  *got_packet_ptr = 1;
379  return 0;
380 }
381 
383  .name = "g722",
384  .type = AVMEDIA_TYPE_AUDIO,
386  .priv_data_size = sizeof(G722Context),
389  .encode2 = g722_encode_frame,
390  .capabilities = CODEC_CAP_SMALL_LAST_FRAME,
391  .long_name = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
392  .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
394 };
void * av_mallocz(size_t size)
Allocate a block of size bytes with alignment suitable for all memory accesses (including vectors if ...
Definition: mem.c:205
#define AVERROR_INVALIDDATA
Invalid data found when processing input.
Definition: error.h:59
struct G722Context::TrellisNode ** nodep_buf[2]
int path
Definition: adpcmenc.c:45
This structure describes decoded (raw) audio or video data.
Definition: frame.h:76
static av_cold int init(AVCodecContext *avctx)
Definition: avrndec.c:35
#define AV_LOG_WARNING
Something somehow does not look correct.
Definition: log.h:154
location of range
struct G722Context::TrellisPath * paths[2]
static void filter_samples(G722Context *c, const int16_t *samples, int *xlow, int *xhigh)
Definition: g722enc.c:136
#define MIN_TRELLIS
Definition: g722enc.c:44
signed 16 bits
Definition: samplefmt.h:52
static void g722_encode_no_trellis(G722Context *c, uint8_t *dst, int nb_samples, const int16_t *samples)
Definition: g722enc.c:343
void av_freep(void *arg)
Free a memory block which has been allocated with av_malloc(z)() or av_realloc() and set the pointer ...
Definition: mem.c:198
static int encode_high(const struct G722Band *state, int xhigh)
Definition: g722enc.c:153
uint8_t
#define av_cold
Definition: attributes.h:78
#define PREV_SAMPLES_BUF_SIZE
Definition: g722.h:31
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
Definition: frame.h:159
static av_cold int g722_encode_init(AVCodecContext *avctx)
Definition: g722enc.c:59
uint8_t * data
frame
Definition: stft.m:14
uint32_t ssd
Definition: adpcmenc.c:44
struct G722Context::TrellisNode * node_buf[2]
#define CODEC_CAP_SMALL_LAST_FRAME
Codec can be fed a final frame with a smaller size.
const int16_t ff_g722_low_inv_quant6[64]
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
void ff_g722_apply_qmf(const int16_t *prev_samples, int *xout1, int *xout2)
int16_t prev_samples[PREV_SAMPLES_BUF_SIZE]
memory of past decoded samples
Definition: g722.h:36
simple assert() macros that are a bit more flexible than ISO C assert().
AVCodec ff_adpcm_g722_encoder
Definition: g722enc.c:382
void av_log(void *avcl, int level, const char *fmt,...)
Definition: log.c:246
const char * name
Name of the codec implementation.
#define FREEZE_INTERVAL
Definition: g722enc.c:36
external API header
struct G722Context::G722Band band[2]
static void g722_encode_trellis(G722Context *c, int trellis, uint8_t *dst, int nb_samples, const int16_t *samples)
Definition: g722enc.c:175
#define MAX_FRAME_SIZE
Definition: g722enc.c:40
ret
Definition: avfilter.c:821
static av_cold int g722_encode_close(AVCodecContext *avctx)
Definition: g722enc.c:47
static int encode_low(const struct G722Band *state, int xlow)
Definition: g722enc.c:161
#define diff(a, as, b, bs)
Definition: vf_phase.c:80
void ff_g722_update_low_predictor(struct G722Band *band, const int ilow)
int ff_alloc_packet2(AVCodecContext *avctx, AVPacket *avpkt, int size)
Check AVPacket size and/or allocate data.
static const float pred[4]
Definition: siprdata.h:259
for k
int frame_size
Number of samples per channel in an audio frame.
static const int16_t low_quant[33]
Definition: g722enc.c:129
main external API structure.
static void close(AVCodecParserContext *s)
Definition: h264_parser.c:375
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:148
synthesis window for stochastic i
struct G722Context G722Context
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFilterBuffer structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later.That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another.Buffer references ownership and permissions
static uint32_t state
Definition: trasher.c:27
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:87
common internal api header.
common internal and external API header
static double c[64]
AVSampleFormat
Audio Sample Formats.
Definition: samplefmt.h:49
int prev_samples_pos
the number of values in prev_samples
Definition: g722.h:37
int trellis
trellis RD quantization
#define STORE_NODE(index, UPDATE, VALUE)
const int16_t ff_g722_high_inv_quant[4]
int channels
number of audio channels
else dst[i][x+y *dst_stride[i]]
Definition: vf_mcdeint.c:160
static av_always_inline void encode_byte(G722Context *c, uint8_t *dst, const int16_t *samples)
Definition: g722enc.c:330
Filter the word “frame” indicates either a video frame or a group of audio samples
static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, const AVFrame *frame, int *got_packet_ptr)
Definition: g722enc.c:352
#define av_always_inline
Definition: attributes.h:41
static av_always_inline int64_t ff_samples_to_time_base(AVCodecContext *avctx, int64_t samples)
Rescale from sample rate to AVCodecContext.time_base.
#define MAX_TRELLIS
Definition: g722enc.c:45
#define FFSWAP(type, a, b)
Definition: common.h:61
void ff_g722_update_high_predictor(struct G722Band *band, const int dhigh, const int ihigh)
This structure stores compressed data.
int delay
Codec delay.
int16_t scale_factor
delayed quantizer scale factor
Definition: g722.h:51
int nb_samples
number of audio samples (per channel) described by this frame
Definition: frame.h:127
int64_t pts
Presentation timestamp in AVStream->time_base units; the time at which the decompressed packet will b...
#define AV_NOPTS_VALUE
Undefined timestamp value.
Definition: avutil.h:190