libtheoraenc.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2006 Paul Richards <paul.richards@gmail.com>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 /**
22  * @file
23  * @brief Theora encoder using libtheora.
24  * @author Paul Richards <paul.richards@gmail.com>
25  *
26  * A lot of this is copy / paste from other output codecs in
27  * libavcodec or pure guesswork (or both).
28  *
29  * I have used t_ prefixes on variables which are libtheora types
30  * and o_ prefixes on variables which are libogg types.
31  */
32 
33 /* FFmpeg includes */
34 #include "libavutil/common.h"
35 #include "libavutil/intreadwrite.h"
36 #include "libavutil/pixdesc.h"
37 #include "libavutil/log.h"
38 #include "libavutil/base64.h"
39 #include "avcodec.h"
40 #include "internal.h"
41 
42 /* libtheora includes */
43 #include <theora/theoraenc.h>
44 
45 typedef struct TheoraContext {
46  th_enc_ctx *t_state;
50  int uv_hshift;
51  int uv_vshift;
54 
55 /** Concatenate an ogg_packet into the extradata. */
56 static int concatenate_packet(unsigned int* offset,
57  AVCodecContext* avc_context,
58  const ogg_packet* packet)
59 {
60  const char* message = NULL;
61  uint8_t* newdata = NULL;
62  int newsize = avc_context->extradata_size + 2 + packet->bytes;
63  int ret;
64 
65  if (packet->bytes < 0) {
66  message = "ogg_packet has negative size";
67  ret = AVERROR_INVALIDDATA;
68  } else if (packet->bytes > 0xffff) {
69  message = "ogg_packet is larger than 65535 bytes";
70  ret = AVERROR_INVALIDDATA;
71  } else if (newsize < avc_context->extradata_size) {
72  message = "extradata_size would overflow";
73  ret = AVERROR_INVALIDDATA;
74  } else {
75  newdata = av_realloc(avc_context->extradata, newsize);
76  if (!newdata)
77  message = "av_realloc failed";
78  ret = AVERROR(ENOMEM);
79  }
80  if (message) {
81  av_log(avc_context, AV_LOG_ERROR, "concatenate_packet failed: %s\n", message);
82  return ret;
83  }
84 
85  avc_context->extradata = newdata;
86  avc_context->extradata_size = newsize;
87  AV_WB16(avc_context->extradata + (*offset), packet->bytes);
88  *offset += 2;
89  memcpy(avc_context->extradata + (*offset), packet->packet, packet->bytes);
90  (*offset) += packet->bytes;
91  return 0;
92 }
93 
94 static int get_stats(AVCodecContext *avctx, int eos)
95 {
96 #ifdef TH_ENCCTL_2PASS_OUT
97  TheoraContext *h = avctx->priv_data;
98  uint8_t *buf;
99  int bytes;
100 
101  bytes = th_encode_ctl(h->t_state, TH_ENCCTL_2PASS_OUT, &buf, sizeof(buf));
102  if (bytes < 0) {
103  av_log(avctx, AV_LOG_ERROR, "Error getting first pass stats\n");
104  return AVERROR_EXTERNAL;
105  }
106  if (!eos) {
107  h->stats = av_fast_realloc(h->stats, &h->stats_size,
108  h->stats_offset + bytes);
109  memcpy(h->stats + h->stats_offset, buf, bytes);
110  h->stats_offset += bytes;
111  } else {
112  int b64_size = AV_BASE64_SIZE(h->stats_offset);
113  // libtheora generates a summary header at the end
114  memcpy(h->stats, buf, bytes);
115  avctx->stats_out = av_malloc(b64_size);
116  av_base64_encode(avctx->stats_out, b64_size, h->stats, h->stats_offset);
117  }
118  return 0;
119 #else
120  av_log(avctx, AV_LOG_ERROR, "libtheora too old to support 2pass\n");
121  return AVERROR(ENOSUP);
122 #endif
123 }
124 
125 // libtheora won't read the entire buffer we give it at once, so we have to
126 // repeatedly submit it...
127 static int submit_stats(AVCodecContext *avctx)
128 {
129 #ifdef TH_ENCCTL_2PASS_IN
130  TheoraContext *h = avctx->priv_data;
131  int bytes;
132  if (!h->stats) {
133  if (!avctx->stats_in) {
134  av_log(avctx, AV_LOG_ERROR, "No statsfile for second pass\n");
135  return AVERROR(EINVAL);
136  }
137  h->stats_size = strlen(avctx->stats_in) * 3/4;
138  h->stats = av_malloc(h->stats_size);
139  h->stats_size = av_base64_decode(h->stats, avctx->stats_in, h->stats_size);
140  }
141  while (h->stats_size - h->stats_offset > 0) {
142  bytes = th_encode_ctl(h->t_state, TH_ENCCTL_2PASS_IN,
143  h->stats + h->stats_offset,
144  h->stats_size - h->stats_offset);
145  if (bytes < 0) {
146  av_log(avctx, AV_LOG_ERROR, "Error submitting stats\n");
147  return AVERROR_EXTERNAL;
148  }
149  if (!bytes)
150  return 0;
151  h->stats_offset += bytes;
152  }
153  return 0;
154 #else
155  av_log(avctx, AV_LOG_ERROR, "libtheora too old to support 2pass\n");
156  return AVERROR(ENOSUP);
157 #endif
158 }
159 
160 static av_cold int encode_init(AVCodecContext* avc_context)
161 {
162  th_info t_info;
163  th_comment t_comment;
164  ogg_packet o_packet;
165  unsigned int offset;
166  TheoraContext *h = avc_context->priv_data;
167  uint32_t gop_size = avc_context->gop_size;
168  int ret;
169 
170  /* Set up the theora_info struct */
171  th_info_init(&t_info);
172  t_info.frame_width = FFALIGN(avc_context->width, 16);
173  t_info.frame_height = FFALIGN(avc_context->height, 16);
174  t_info.pic_width = avc_context->width;
175  t_info.pic_height = avc_context->height;
176  t_info.pic_x = 0;
177  t_info.pic_y = 0;
178  /* Swap numerator and denominator as time_base in AVCodecContext gives the
179  * time period between frames, but theora_info needs the framerate. */
180  t_info.fps_numerator = avc_context->time_base.den;
181  t_info.fps_denominator = avc_context->time_base.num;
182  if (avc_context->sample_aspect_ratio.num) {
183  t_info.aspect_numerator = avc_context->sample_aspect_ratio.num;
184  t_info.aspect_denominator = avc_context->sample_aspect_ratio.den;
185  } else {
186  t_info.aspect_numerator = 1;
187  t_info.aspect_denominator = 1;
188  }
189 
190  if (avc_context->color_primaries == AVCOL_PRI_BT470M)
191  t_info.colorspace = TH_CS_ITU_REC_470M;
192  else if (avc_context->color_primaries == AVCOL_PRI_BT470BG)
193  t_info.colorspace = TH_CS_ITU_REC_470BG;
194  else
195  t_info.colorspace = TH_CS_UNSPECIFIED;
196 
197  if (avc_context->pix_fmt == AV_PIX_FMT_YUV420P)
198  t_info.pixel_fmt = TH_PF_420;
199  else if (avc_context->pix_fmt == AV_PIX_FMT_YUV422P)
200  t_info.pixel_fmt = TH_PF_422;
201  else if (avc_context->pix_fmt == AV_PIX_FMT_YUV444P)
202  t_info.pixel_fmt = TH_PF_444;
203  else {
204  av_log(avc_context, AV_LOG_ERROR, "Unsupported pix_fmt\n");
205  return AVERROR(EINVAL);
206  }
208 
209  if (avc_context->flags & CODEC_FLAG_QSCALE) {
210  /* Clip global_quality in QP units to the [0 - 10] range
211  to be consistent with the libvorbis implementation.
212  Theora accepts a quality parameter which is an int value in
213  the [0 - 63] range.
214  */
215  t_info.quality = av_clipf(avc_context->global_quality / (float)FF_QP2LAMBDA, 0, 10) * 6.3;
216  t_info.target_bitrate = 0;
217  } else {
218  t_info.target_bitrate = avc_context->bit_rate;
219  t_info.quality = 0;
220  }
221 
222  /* Now initialise libtheora */
223  h->t_state = th_encode_alloc(&t_info);
224  if (!h->t_state) {
225  av_log(avc_context, AV_LOG_ERROR, "theora_encode_init failed\n");
226  return AVERROR_EXTERNAL;
227  }
228 
229  h->keyframe_mask = (1 << t_info.keyframe_granule_shift) - 1;
230  /* Clear up theora_info struct */
231  th_info_clear(&t_info);
232 
233  if (th_encode_ctl(h->t_state, TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE,
234  &gop_size, sizeof(gop_size))) {
235  av_log(avc_context, AV_LOG_ERROR, "Error setting GOP size\n");
236  return AVERROR_EXTERNAL;
237  }
238 
239  // need to enable 2 pass (via TH_ENCCTL_2PASS_) before encoding headers
240  if (avc_context->flags & CODEC_FLAG_PASS1) {
241  if ((ret = get_stats(avc_context, 0)) < 0)
242  return ret;
243  } else if (avc_context->flags & CODEC_FLAG_PASS2) {
244  if ((ret = submit_stats(avc_context)) < 0)
245  return ret;
246  }
247 
248  /*
249  Output first header packet consisting of theora
250  header, comment, and tables.
251 
252  Each one is prefixed with a 16bit size, then they
253  are concatenated together into libavcodec's extradata.
254  */
255  offset = 0;
256 
257  /* Headers */
258  th_comment_init(&t_comment);
259 
260  while (th_encode_flushheader(h->t_state, &t_comment, &o_packet))
261  if ((ret = concatenate_packet(&offset, avc_context, &o_packet)) < 0)
262  return ret;
263 
264  th_comment_clear(&t_comment);
265 
266  /* Set up the output AVFrame */
267  avc_context->coded_frame= avcodec_alloc_frame();
268 
269  return 0;
270 }
271 
272 static int encode_frame(AVCodecContext* avc_context, AVPacket *pkt,
273  const AVFrame *frame, int *got_packet)
274 {
275  th_ycbcr_buffer t_yuv_buffer;
276  TheoraContext *h = avc_context->priv_data;
277  ogg_packet o_packet;
278  int result, i, ret;
279 
280  // EOS, finish and get 1st pass stats if applicable
281  if (!frame) {
282  th_encode_packetout(h->t_state, 1, &o_packet);
283  if (avc_context->flags & CODEC_FLAG_PASS1)
284  if ((ret = get_stats(avc_context, 1)) < 0)
285  return ret;
286  return 0;
287  }
288 
289  /* Copy planes to the theora yuv_buffer */
290  for (i = 0; i < 3; i++) {
291  t_yuv_buffer[i].width = FFALIGN(avc_context->width, 16) >> (i && h->uv_hshift);
292  t_yuv_buffer[i].height = FFALIGN(avc_context->height, 16) >> (i && h->uv_vshift);
293  t_yuv_buffer[i].stride = frame->linesize[i];
294  t_yuv_buffer[i].data = frame->data[i];
295  }
296 
297  if (avc_context->flags & CODEC_FLAG_PASS2)
298  if ((ret = submit_stats(avc_context)) < 0)
299  return ret;
300 
301  /* Now call into theora_encode_YUVin */
302  result = th_encode_ycbcr_in(h->t_state, t_yuv_buffer);
303  if (result) {
304  const char* message;
305  switch (result) {
306  case -1:
307  message = "differing frame sizes";
308  break;
309  case TH_EINVAL:
310  message = "encoder is not ready or is finished";
311  break;
312  default:
313  message = "unknown reason";
314  break;
315  }
316  av_log(avc_context, AV_LOG_ERROR, "theora_encode_YUVin failed (%s) [%d]\n", message, result);
317  return AVERROR_EXTERNAL;
318  }
319 
320  if (avc_context->flags & CODEC_FLAG_PASS1)
321  if ((ret = get_stats(avc_context, 0)) < 0)
322  return ret;
323 
324  /* Pick up returned ogg_packet */
325  result = th_encode_packetout(h->t_state, 0, &o_packet);
326  switch (result) {
327  case 0:
328  /* No packet is ready */
329  return 0;
330  case 1:
331  /* Success, we have a packet */
332  break;
333  default:
334  av_log(avc_context, AV_LOG_ERROR, "theora_encode_packetout failed [%d]\n", result);
335  return AVERROR_EXTERNAL;
336  }
337 
338  /* Copy ogg_packet content out to buffer */
339  if ((ret = ff_alloc_packet2(avc_context, pkt, o_packet.bytes)) < 0)
340  return ret;
341  memcpy(pkt->data, o_packet.packet, o_packet.bytes);
342 
343  // HACK: assumes no encoder delay, this is true until libtheora becomes
344  // multithreaded (which will be disabled unless explicitly requested)
345  pkt->pts = pkt->dts = frame->pts;
346  avc_context->coded_frame->key_frame = !(o_packet.granulepos & h->keyframe_mask);
347  if (avc_context->coded_frame->key_frame)
348  pkt->flags |= AV_PKT_FLAG_KEY;
349  *got_packet = 1;
350 
351  return 0;
352 }
353 
354 static av_cold int encode_close(AVCodecContext* avc_context)
355 {
356  TheoraContext *h = avc_context->priv_data;
357 
358  th_encode_free(h->t_state);
359  av_freep(&h->stats);
360  av_freep(&avc_context->coded_frame);
361  av_freep(&avc_context->stats_out);
362  av_freep(&avc_context->extradata);
363  avc_context->extradata_size = 0;
364 
365  return 0;
366 }
367 
368 /** AVCodec struct exposed to libavcodec */
370  .name = "libtheora",
371  .type = AVMEDIA_TYPE_VIDEO,
372  .id = AV_CODEC_ID_THEORA,
373  .priv_data_size = sizeof(TheoraContext),
374  .init = encode_init,
375  .close = encode_close,
376  .encode2 = encode_frame,
377  .capabilities = CODEC_CAP_DELAY, // needed to get the statsfile summary
378  .pix_fmts = (const enum AVPixelFormat[]){
380  },
381  .long_name = NULL_IF_CONFIG_SMALL("libtheora Theora"),
382 };
#define AVERROR_INVALIDDATA
Invalid data found when processing input.
Definition: error.h:59
uint8_t * stats
Definition: libtheoraenc.c:47
This structure describes decoded (raw) audio or video data.
Definition: frame.h:76
#define CODEC_FLAG_PASS2
Use internal 2pass ratecontrol in second pass mode.
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:73
#define CODEC_FLAG_PASS1
Use internal 2pass ratecontrol in first pass mode.
AVFrame * coded_frame
the picture in the bitstream
static av_cold int init(AVCodecContext *avctx)
Definition: avrndec.c:35
static int concatenate_packet(unsigned int *offset, AVCodecContext *avc_context, const ogg_packet *packet)
Concatenate an ogg_packet into the extradata.
Definition: libtheoraenc.c:56
int num
numerator
Definition: rational.h:44
AVRational sample_aspect_ratio
sample aspect ratio (0 if unknown) That is the width of a pixel divided by the height of the pixel...
enum AVPixelFormat pix_fmt
Pixel format, see AV_PIX_FMT_xxx.
char * stats_in
pass2 encoding statistics input buffer Concatenated stuff from stats_out of pass1 should be placed he...
void * av_realloc(void *ptr, size_t size)
Allocate or reallocate a block of memory.
Definition: mem.c:141
#define FFALIGN(x, a)
Definition: common.h:63
static int submit_stats(AVCodecContext *avctx)
Definition: libtheoraenc.c:127
AVRational time_base
This is the fundamental unit of time (in seconds) in terms of which frame timestamps are represented...
void av_freep(void *arg)
Free a memory block which has been allocated with av_malloc(z)() or av_realloc() and set the pointer ...
Definition: mem.c:198
uint8_t
#define av_cold
Definition: attributes.h:78
static AVPacket pkt
Definition: demuxing.c:56
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
Definition: frame.h:159
uint8_t * extradata
some codecs need / can use extradata like Huffman tables.
uint8_t * data
char * stats_out
pass1 encoding statistics output buffer
#define AV_PKT_FLAG_KEY
The packet contains a keyframe.
frame
Definition: stft.m:14
void * av_fast_realloc(void *ptr, unsigned int *size, size_t min_size)
Reallocate the given block if it is not large enough, otherwise do nothing.
#define CODEC_CAP_DELAY
Encoder or decoder requires flushing with NULL input at the end in order to give the complete and cor...
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
int flags
CODEC_FLAG_*.
#define CODEC_FLAG_QSCALE
Use fixed qscale.
void av_log(void *avcl, int level, const char *fmt,...)
Definition: log.c:246
const char * name
Name of the codec implementation.
static const uint8_t offset[127][2]
Definition: vf_spp.c:70
char * av_base64_encode(char *out, int out_size, const uint8_t *in, int in_size)
Encode data to base64 and null-terminate.
external API header
int flags
A combination of AV_PKT_FLAG values.
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
Definition: pixfmt.h:72
AVFrame * avcodec_alloc_frame(void)
Allocate an AVFrame and set its fields to default values.
static int get_stats(AVCodecContext *avctx, int eos)
Definition: libtheoraenc.c:94
int bit_rate
the average bitrate
#define AV_BASE64_SIZE(x)
Calculate the output size needed to base64-encode x bytes to a null-terminated string.
Definition: base64.h:61
struct TheoraContext TheoraContext
ret
Definition: avfilter.c:821
int width
picture width / height.
static av_cold int encode_init(AVCodecContext *avc_context)
Definition: libtheoraenc.c:160
enum AVColorPrimaries color_primaries
Chromaticity coordinates of the source primaries.
void avcodec_get_chroma_sub_sample(enum AVPixelFormat pix_fmt, int *h_shift, int *v_shift)
Utility function to access log2_chroma_w log2_chroma_h from the pixel format AVPixFmtDescriptor.
Definition: imgconvert.c:65
int ff_alloc_packet2(AVCodecContext *avctx, AVPacket *avpkt, int size)
Check AVPacket size and/or allocate data.
NULL
Definition: eval.c:55
also ITU-R BT601-6 625 / ITU-R BT1358 625 / ITU-R BT1700 625 PAL & SECAM
int linesize[AV_NUM_DATA_POINTERS]
For video, size in bytes of each picture line.
Definition: frame.h:101
static int ogg_packet(AVFormatContext *s, int *sid, int *dstart, int *dsize, int64_t *fpos)
find the next Ogg packet
Definition: oggdec.c:436
main external API structure.
static void close(AVCodecParserContext *s)
Definition: h264_parser.c:375
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:148
static av_cold int encode_close(AVCodecContext *avc_context)
Definition: libtheoraenc.c:354
th_enc_ctx * t_state
Definition: libtheoraenc.c:46
void * buf
Definition: avisynth_c.h:594
void * av_malloc(size_t size)
Allocate a block of size bytes with alignment suitable for all memory accesses (including vectors if ...
Definition: mem.c:73
synthesis window for stochastic i
#define AV_WB16(p, darg)
Definition: intreadwrite.h:237
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFilterBuffer structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later.That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another.Buffer references ownership and permissions
static int encode_frame(AVCodecContext *avc_context, AVPacket *pkt, const AVFrame *frame, int *got_packet)
Definition: libtheoraenc.c:272
int global_quality
Global quality for codecs which cannot change it per frame.
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:87
int gop_size
the number of pictures in a group of pictures, or 0 for intra_only
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:68
common internal api header.
common internal and external API header
int den
denominator
Definition: rational.h:45
AVCodec ff_libtheora_encoder
AVCodec struct exposed to libavcodec.
Definition: libtheoraenc.c:369
int key_frame
1 -> keyframe, 0-> not
Definition: frame.h:139
#define FF_QP2LAMBDA
factor to convert from H.263 QP to lambda
Definition: avutil.h:169
int av_base64_decode(uint8_t *out, const char *in_str, int out_size)
Decode a base64-encoded string.
int64_t dts
Decompression timestamp in AVStream->time_base units; the time at which the packet is decompressed...
#define AVERROR_EXTERNAL
Generic error in an external library.
Definition: error.h:57
AVPixelFormat
Pixel format.
Definition: pixfmt.h:66
This structure stores compressed data.
int64_t pts
Presentation timestamp in AVStream->time_base units; the time at which the decompressed packet will b...