yading@11
|
1 /*
|
yading@11
|
2 * TED Talks captions format decoder
|
yading@11
|
3 * Copyright (c) 2012 Nicolas George
|
yading@11
|
4 *
|
yading@11
|
5 * This file is part of FFmpeg.
|
yading@11
|
6 *
|
yading@11
|
7 * FFmpeg is free software; you can redistribute it and/or
|
yading@11
|
8 * modify it under the terms of the GNU Lesser General Public
|
yading@11
|
9 * License as published by the Free Software Foundation; either
|
yading@11
|
10 * version 2.1 of the License, or (at your option) any later version.
|
yading@11
|
11 *
|
yading@11
|
12 * FFmpeg is distributed in the hope that it will be useful,
|
yading@11
|
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
yading@11
|
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
yading@11
|
15 * Lesser General Public License for more details.
|
yading@11
|
16 *
|
yading@11
|
17 * You should have received a copy of the GNU Lesser General Public
|
yading@11
|
18 * License along with FFmpeg; if not, write to the Free Software
|
yading@11
|
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
yading@11
|
20 */
|
yading@11
|
21
|
yading@11
|
22 #include "libavutil/bprint.h"
|
yading@11
|
23 #include "libavutil/log.h"
|
yading@11
|
24 #include "libavutil/opt.h"
|
yading@11
|
25 #include "avformat.h"
|
yading@11
|
26 #include "internal.h"
|
yading@11
|
27 #include "subtitles.h"
|
yading@11
|
28
|
yading@11
|
29 typedef struct {
|
yading@11
|
30 AVClass *class;
|
yading@11
|
31 int64_t start_time;
|
yading@11
|
32 FFDemuxSubtitlesQueue subs;
|
yading@11
|
33 } TEDCaptionsDemuxer;
|
yading@11
|
34
|
yading@11
|
35 static const AVOption tedcaptions_options[] = {
|
yading@11
|
36 { "start_time", "set the start time (offset) of the subtitles, in ms",
|
yading@11
|
37 offsetof(TEDCaptionsDemuxer, start_time), FF_OPT_TYPE_INT64,
|
yading@11
|
38 { .i64 = 15000 }, INT64_MIN, INT64_MAX,
|
yading@11
|
39 AV_OPT_FLAG_SUBTITLE_PARAM | AV_OPT_FLAG_DECODING_PARAM },
|
yading@11
|
40 { NULL },
|
yading@11
|
41 };
|
yading@11
|
42
|
yading@11
|
43 static const AVClass tedcaptions_demuxer_class = {
|
yading@11
|
44 .class_name = "tedcaptions_demuxer",
|
yading@11
|
45 .item_name = av_default_item_name,
|
yading@11
|
46 .option = tedcaptions_options,
|
yading@11
|
47 .version = LIBAVUTIL_VERSION_INT,
|
yading@11
|
48 };
|
yading@11
|
49
|
yading@11
|
50 #define BETWEEN(a, amin, amax) ((unsigned)((a) - (amin)) <= (amax) - (amin))
|
yading@11
|
51
|
yading@11
|
52 #define HEX_DIGIT_TEST(c) (BETWEEN(c, '0', '9') || BETWEEN((c) | 32, 'a', 'z'))
|
yading@11
|
53 #define HEX_DIGIT_VAL(c) ((c) <= '9' ? (c) - '0' : ((c) | 32) - 'a' + 10)
|
yading@11
|
54 #define ERR_CODE(c) (c < 0 ? c : AVERROR_INVALIDDATA)
|
yading@11
|
55
|
yading@11
|
56 static void av_bprint_utf8(AVBPrint *bp, unsigned c)
|
yading@11
|
57 {
|
yading@11
|
58 int bytes, i;
|
yading@11
|
59
|
yading@11
|
60 if (c <= 0x7F) {
|
yading@11
|
61 av_bprint_chars(bp, c, 1);
|
yading@11
|
62 return;
|
yading@11
|
63 }
|
yading@11
|
64 bytes = (av_log2(c) - 2) / 5;
|
yading@11
|
65 av_bprint_chars(bp, (c >> (bytes * 6)) | ((0xFF80 >> bytes) & 0xFF), 1);
|
yading@11
|
66 for (i = bytes - 1; i >= 0; i--)
|
yading@11
|
67 av_bprint_chars(bp, ((c >> (i * 6)) & 0x3F) | 0x80, 1);
|
yading@11
|
68 }
|
yading@11
|
69
|
yading@11
|
70 static void next_byte(AVIOContext *pb, int *cur_byte)
|
yading@11
|
71 {
|
yading@11
|
72 uint8_t b;
|
yading@11
|
73 int ret = avio_read(pb, &b, 1);
|
yading@11
|
74 *cur_byte = ret > 0 ? b : ret == 0 ? AVERROR_EOF : ret;
|
yading@11
|
75 }
|
yading@11
|
76
|
yading@11
|
77 static void skip_spaces(AVIOContext *pb, int *cur_byte)
|
yading@11
|
78 {
|
yading@11
|
79 while (*cur_byte == ' ' || *cur_byte == '\t' ||
|
yading@11
|
80 *cur_byte == '\n' || *cur_byte == '\r')
|
yading@11
|
81 next_byte(pb, cur_byte);
|
yading@11
|
82 }
|
yading@11
|
83
|
yading@11
|
84 static int expect_byte(AVIOContext *pb, int *cur_byte, uint8_t c)
|
yading@11
|
85 {
|
yading@11
|
86 skip_spaces(pb, cur_byte);
|
yading@11
|
87 if (*cur_byte != c)
|
yading@11
|
88 return ERR_CODE(*cur_byte);
|
yading@11
|
89 next_byte(pb, cur_byte);
|
yading@11
|
90 return 0;
|
yading@11
|
91 }
|
yading@11
|
92
|
yading@11
|
93 static int parse_string(AVIOContext *pb, int *cur_byte, AVBPrint *bp, int full)
|
yading@11
|
94 {
|
yading@11
|
95 int ret;
|
yading@11
|
96
|
yading@11
|
97 av_bprint_init(bp, 0, full ? -1 : 1);
|
yading@11
|
98 ret = expect_byte(pb, cur_byte, '"');
|
yading@11
|
99 if (ret < 0)
|
yading@11
|
100 goto fail;
|
yading@11
|
101 while (*cur_byte > 0 && *cur_byte != '"') {
|
yading@11
|
102 if (*cur_byte == '\\') {
|
yading@11
|
103 next_byte(pb, cur_byte);
|
yading@11
|
104 if (*cur_byte < 0) {
|
yading@11
|
105 ret = AVERROR_INVALIDDATA;
|
yading@11
|
106 goto fail;
|
yading@11
|
107 }
|
yading@11
|
108 if ((*cur_byte | 32) == 'u') {
|
yading@11
|
109 unsigned chr = 0, i;
|
yading@11
|
110 for (i = 0; i < 4; i++) {
|
yading@11
|
111 next_byte(pb, cur_byte);
|
yading@11
|
112 if (!HEX_DIGIT_TEST(*cur_byte)) {
|
yading@11
|
113 ret = ERR_CODE(*cur_byte);
|
yading@11
|
114 goto fail;
|
yading@11
|
115 }
|
yading@11
|
116 chr = chr * 16 + HEX_DIGIT_VAL(*cur_byte);
|
yading@11
|
117 }
|
yading@11
|
118 av_bprint_utf8(bp, chr);
|
yading@11
|
119 } else {
|
yading@11
|
120 av_bprint_chars(bp, *cur_byte, 1);
|
yading@11
|
121 }
|
yading@11
|
122 } else {
|
yading@11
|
123 av_bprint_chars(bp, *cur_byte, 1);
|
yading@11
|
124 }
|
yading@11
|
125 next_byte(pb, cur_byte);
|
yading@11
|
126 }
|
yading@11
|
127 ret = expect_byte(pb, cur_byte, '"');
|
yading@11
|
128 if (ret < 0)
|
yading@11
|
129 goto fail;
|
yading@11
|
130 if (full && !av_bprint_is_complete(bp)) {
|
yading@11
|
131 ret = AVERROR(ENOMEM);
|
yading@11
|
132 goto fail;
|
yading@11
|
133 }
|
yading@11
|
134 return 0;
|
yading@11
|
135
|
yading@11
|
136 fail:
|
yading@11
|
137 av_bprint_finalize(bp, NULL);
|
yading@11
|
138 return ret;
|
yading@11
|
139 }
|
yading@11
|
140
|
yading@11
|
141 static int parse_label(AVIOContext *pb, int *cur_byte, AVBPrint *bp)
|
yading@11
|
142 {
|
yading@11
|
143 int ret;
|
yading@11
|
144
|
yading@11
|
145 ret = parse_string(pb, cur_byte, bp, 0);
|
yading@11
|
146 if (ret < 0)
|
yading@11
|
147 return ret;
|
yading@11
|
148 ret = expect_byte(pb, cur_byte, ':');
|
yading@11
|
149 if (ret < 0)
|
yading@11
|
150 return ret;
|
yading@11
|
151 return 0;
|
yading@11
|
152 }
|
yading@11
|
153
|
yading@11
|
154 static int parse_boolean(AVIOContext *pb, int *cur_byte, int *result)
|
yading@11
|
155 {
|
yading@11
|
156 const char *text[] = { "false", "true" }, *p;
|
yading@11
|
157 int i;
|
yading@11
|
158
|
yading@11
|
159 skip_spaces(pb, cur_byte);
|
yading@11
|
160 for (i = 0; i < 2; i++) {
|
yading@11
|
161 p = text[i];
|
yading@11
|
162 if (*cur_byte != *p)
|
yading@11
|
163 continue;
|
yading@11
|
164 for (; *p; p++, next_byte(pb, cur_byte))
|
yading@11
|
165 if (*cur_byte != *p)
|
yading@11
|
166 return AVERROR_INVALIDDATA;
|
yading@11
|
167 if (BETWEEN(*cur_byte | 32, 'a', 'z'))
|
yading@11
|
168 return AVERROR_INVALIDDATA;
|
yading@11
|
169 *result = i;
|
yading@11
|
170 return 0;
|
yading@11
|
171 }
|
yading@11
|
172 return AVERROR_INVALIDDATA;
|
yading@11
|
173 }
|
yading@11
|
174
|
yading@11
|
175 static int parse_int(AVIOContext *pb, int *cur_byte, int64_t *result)
|
yading@11
|
176 {
|
yading@11
|
177 int64_t val = 0;
|
yading@11
|
178
|
yading@11
|
179 skip_spaces(pb, cur_byte);
|
yading@11
|
180 if ((unsigned)*cur_byte - '0' > 9)
|
yading@11
|
181 return AVERROR_INVALIDDATA;
|
yading@11
|
182 while (BETWEEN(*cur_byte, '0', '9')) {
|
yading@11
|
183 val = val * 10 + (*cur_byte - '0');
|
yading@11
|
184 next_byte(pb, cur_byte);
|
yading@11
|
185 }
|
yading@11
|
186 *result = val;
|
yading@11
|
187 return 0;
|
yading@11
|
188 }
|
yading@11
|
189
|
yading@11
|
190 static int parse_file(AVIOContext *pb, FFDemuxSubtitlesQueue *subs)
|
yading@11
|
191 {
|
yading@11
|
192 int ret, cur_byte, start_of_par;
|
yading@11
|
193 AVBPrint label, content;
|
yading@11
|
194 int64_t pos, start, duration;
|
yading@11
|
195 AVPacket *pkt;
|
yading@11
|
196
|
yading@11
|
197 next_byte(pb, &cur_byte);
|
yading@11
|
198 ret = expect_byte(pb, &cur_byte, '{');
|
yading@11
|
199 if (ret < 0)
|
yading@11
|
200 return AVERROR_INVALIDDATA;
|
yading@11
|
201 ret = parse_label(pb, &cur_byte, &label);
|
yading@11
|
202 if (ret < 0 || strcmp(label.str, "captions"))
|
yading@11
|
203 return AVERROR_INVALIDDATA;
|
yading@11
|
204 ret = expect_byte(pb, &cur_byte, '[');
|
yading@11
|
205 if (ret < 0)
|
yading@11
|
206 return AVERROR_INVALIDDATA;
|
yading@11
|
207 while (1) {
|
yading@11
|
208 content.size = 0;
|
yading@11
|
209 start = duration = AV_NOPTS_VALUE;
|
yading@11
|
210 ret = expect_byte(pb, &cur_byte, '{');
|
yading@11
|
211 if (ret < 0)
|
yading@11
|
212 return ret;
|
yading@11
|
213 pos = avio_tell(pb) - 1;
|
yading@11
|
214 while (1) {
|
yading@11
|
215 ret = parse_label(pb, &cur_byte, &label);
|
yading@11
|
216 if (ret < 0)
|
yading@11
|
217 return ret;
|
yading@11
|
218 if (!strcmp(label.str, "startOfParagraph")) {
|
yading@11
|
219 ret = parse_boolean(pb, &cur_byte, &start_of_par);
|
yading@11
|
220 if (ret < 0)
|
yading@11
|
221 return ret;
|
yading@11
|
222 } else if (!strcmp(label.str, "content")) {
|
yading@11
|
223 ret = parse_string(pb, &cur_byte, &content, 1);
|
yading@11
|
224 if (ret < 0)
|
yading@11
|
225 return ret;
|
yading@11
|
226 } else if (!strcmp(label.str, "startTime")) {
|
yading@11
|
227 ret = parse_int(pb, &cur_byte, &start);
|
yading@11
|
228 if (ret < 0)
|
yading@11
|
229 return ret;
|
yading@11
|
230 } else if (!strcmp(label.str, "duration")) {
|
yading@11
|
231 ret = parse_int(pb, &cur_byte, &duration);
|
yading@11
|
232 if (ret < 0)
|
yading@11
|
233 return ret;
|
yading@11
|
234 } else {
|
yading@11
|
235 return AVERROR_INVALIDDATA;
|
yading@11
|
236 }
|
yading@11
|
237 skip_spaces(pb, &cur_byte);
|
yading@11
|
238 if (cur_byte != ',')
|
yading@11
|
239 break;
|
yading@11
|
240 next_byte(pb, &cur_byte);
|
yading@11
|
241 }
|
yading@11
|
242 ret = expect_byte(pb, &cur_byte, '}');
|
yading@11
|
243 if (ret < 0)
|
yading@11
|
244 return ret;
|
yading@11
|
245
|
yading@11
|
246 if (!content.size || start == AV_NOPTS_VALUE ||
|
yading@11
|
247 duration == AV_NOPTS_VALUE)
|
yading@11
|
248 return AVERROR_INVALIDDATA;
|
yading@11
|
249 pkt = ff_subtitles_queue_insert(subs, content.str, content.len, 0);
|
yading@11
|
250 if (!pkt)
|
yading@11
|
251 return AVERROR(ENOMEM);
|
yading@11
|
252 pkt->pos = pos;
|
yading@11
|
253 pkt->pts = start;
|
yading@11
|
254 pkt->duration = duration;
|
yading@11
|
255 av_bprint_finalize(&content, NULL);
|
yading@11
|
256
|
yading@11
|
257 skip_spaces(pb, &cur_byte);
|
yading@11
|
258 if (cur_byte != ',')
|
yading@11
|
259 break;
|
yading@11
|
260 next_byte(pb, &cur_byte);
|
yading@11
|
261 }
|
yading@11
|
262 ret = expect_byte(pb, &cur_byte, ']');
|
yading@11
|
263 if (ret < 0)
|
yading@11
|
264 return ret;
|
yading@11
|
265 ret = expect_byte(pb, &cur_byte, '}');
|
yading@11
|
266 if (ret < 0)
|
yading@11
|
267 return ret;
|
yading@11
|
268 skip_spaces(pb, &cur_byte);
|
yading@11
|
269 if (cur_byte != AVERROR_EOF)
|
yading@11
|
270 return ERR_CODE(cur_byte);
|
yading@11
|
271 return 0;
|
yading@11
|
272 }
|
yading@11
|
273
|
yading@11
|
274 static av_cold int tedcaptions_read_header(AVFormatContext *avf)
|
yading@11
|
275 {
|
yading@11
|
276 TEDCaptionsDemuxer *tc = avf->priv_data;
|
yading@11
|
277 AVStream *st;
|
yading@11
|
278 int ret, i;
|
yading@11
|
279 AVPacket *last;
|
yading@11
|
280
|
yading@11
|
281 ret = parse_file(avf->pb, &tc->subs);
|
yading@11
|
282 if (ret < 0) {
|
yading@11
|
283 if (ret == AVERROR_INVALIDDATA)
|
yading@11
|
284 av_log(avf, AV_LOG_ERROR, "Syntax error near offset %"PRId64".\n",
|
yading@11
|
285 avio_tell(avf->pb));
|
yading@11
|
286 ff_subtitles_queue_clean(&tc->subs);
|
yading@11
|
287 return ret;
|
yading@11
|
288 }
|
yading@11
|
289 ff_subtitles_queue_finalize(&tc->subs);
|
yading@11
|
290 for (i = 0; i < tc->subs.nb_subs; i++)
|
yading@11
|
291 tc->subs.subs[i].pts += tc->start_time;
|
yading@11
|
292
|
yading@11
|
293 last = &tc->subs.subs[tc->subs.nb_subs - 1];
|
yading@11
|
294 st = avformat_new_stream(avf, NULL);
|
yading@11
|
295 if (!st)
|
yading@11
|
296 return AVERROR(ENOMEM);
|
yading@11
|
297 st->codec->codec_type = AVMEDIA_TYPE_SUBTITLE;
|
yading@11
|
298 st->codec->codec_id = AV_CODEC_ID_TEXT;
|
yading@11
|
299 avpriv_set_pts_info(st, 64, 1, 1000);
|
yading@11
|
300 st->probe_packets = 0;
|
yading@11
|
301 st->start_time = 0;
|
yading@11
|
302 st->duration = last->pts + last->duration;
|
yading@11
|
303 st->cur_dts = 0;
|
yading@11
|
304
|
yading@11
|
305 return 0;
|
yading@11
|
306 }
|
yading@11
|
307
|
yading@11
|
308 static int tedcaptions_read_packet(AVFormatContext *avf, AVPacket *packet)
|
yading@11
|
309 {
|
yading@11
|
310 TEDCaptionsDemuxer *tc = avf->priv_data;
|
yading@11
|
311
|
yading@11
|
312 return ff_subtitles_queue_read_packet(&tc->subs, packet);
|
yading@11
|
313 }
|
yading@11
|
314
|
yading@11
|
315 static int tedcaptions_read_close(AVFormatContext *avf)
|
yading@11
|
316 {
|
yading@11
|
317 TEDCaptionsDemuxer *tc = avf->priv_data;
|
yading@11
|
318
|
yading@11
|
319 ff_subtitles_queue_clean(&tc->subs);
|
yading@11
|
320 return 0;
|
yading@11
|
321 }
|
yading@11
|
322
|
yading@11
|
323 static av_cold int tedcaptions_read_probe(AVProbeData *p)
|
yading@11
|
324 {
|
yading@11
|
325 static const char *const tags[] = {
|
yading@11
|
326 "\"captions\"", "\"duration\"", "\"content\"",
|
yading@11
|
327 "\"startOfParagraph\"", "\"startTime\"",
|
yading@11
|
328 };
|
yading@11
|
329 unsigned i, count = 0;
|
yading@11
|
330 const char *t;
|
yading@11
|
331
|
yading@11
|
332 if (p->buf[strspn(p->buf, " \t\r\n")] != '{')
|
yading@11
|
333 return 0;
|
yading@11
|
334 for (i = 0; i < FF_ARRAY_ELEMS(tags); i++) {
|
yading@11
|
335 if (!(t = strstr(p->buf, tags[i])))
|
yading@11
|
336 continue;
|
yading@11
|
337 t += strlen(tags[i]);
|
yading@11
|
338 t += strspn(t, " \t\r\n");
|
yading@11
|
339 if (*t == ':')
|
yading@11
|
340 count++;
|
yading@11
|
341 }
|
yading@11
|
342 return count == FF_ARRAY_ELEMS(tags) ? AVPROBE_SCORE_MAX :
|
yading@11
|
343 count ? AVPROBE_SCORE_MAX / 2 : 0;
|
yading@11
|
344 }
|
yading@11
|
345
|
yading@11
|
346 static int tedcaptions_read_seek(AVFormatContext *avf, int stream_index,
|
yading@11
|
347 int64_t min_ts, int64_t ts, int64_t max_ts,
|
yading@11
|
348 int flags)
|
yading@11
|
349 {
|
yading@11
|
350 TEDCaptionsDemuxer *tc = avf->priv_data;
|
yading@11
|
351 return ff_subtitles_queue_seek(&tc->subs, avf, stream_index,
|
yading@11
|
352 min_ts, ts, max_ts, flags);
|
yading@11
|
353 }
|
yading@11
|
354
|
yading@11
|
355 AVInputFormat ff_tedcaptions_demuxer = {
|
yading@11
|
356 .name = "tedcaptions",
|
yading@11
|
357 .long_name = NULL_IF_CONFIG_SMALL("TED Talks captions"),
|
yading@11
|
358 .priv_data_size = sizeof(TEDCaptionsDemuxer),
|
yading@11
|
359 .priv_class = &tedcaptions_demuxer_class,
|
yading@11
|
360 .read_header = tedcaptions_read_header,
|
yading@11
|
361 .read_packet = tedcaptions_read_packet,
|
yading@11
|
362 .read_close = tedcaptions_read_close,
|
yading@11
|
363 .read_probe = tedcaptions_read_probe,
|
yading@11
|
364 .read_seek2 = tedcaptions_read_seek,
|
yading@11
|
365 };
|