yading@11: /* yading@11: * TED Talks captions format decoder yading@11: * Copyright (c) 2012 Nicolas George yading@11: * yading@11: * This file is part of FFmpeg. yading@11: * yading@11: * FFmpeg is free software; you can redistribute it and/or yading@11: * modify it under the terms of the GNU Lesser General Public yading@11: * License as published by the Free Software Foundation; either yading@11: * version 2.1 of the License, or (at your option) any later version. yading@11: * yading@11: * FFmpeg is distributed in the hope that it will be useful, yading@11: * but WITHOUT ANY WARRANTY; without even the implied warranty of yading@11: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU yading@11: * Lesser General Public License for more details. yading@11: * yading@11: * You should have received a copy of the GNU Lesser General Public yading@11: * License along with FFmpeg; if not, write to the Free Software yading@11: * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA yading@11: */ yading@11: yading@11: #include "libavutil/bprint.h" yading@11: #include "libavutil/log.h" yading@11: #include "libavutil/opt.h" yading@11: #include "avformat.h" yading@11: #include "internal.h" yading@11: #include "subtitles.h" yading@11: yading@11: typedef struct { yading@11: AVClass *class; yading@11: int64_t start_time; yading@11: FFDemuxSubtitlesQueue subs; yading@11: } TEDCaptionsDemuxer; yading@11: yading@11: static const AVOption tedcaptions_options[] = { yading@11: { "start_time", "set the start time (offset) of the subtitles, in ms", yading@11: offsetof(TEDCaptionsDemuxer, start_time), FF_OPT_TYPE_INT64, yading@11: { .i64 = 15000 }, INT64_MIN, INT64_MAX, yading@11: AV_OPT_FLAG_SUBTITLE_PARAM | AV_OPT_FLAG_DECODING_PARAM }, yading@11: { NULL }, yading@11: }; yading@11: yading@11: static const AVClass tedcaptions_demuxer_class = { yading@11: .class_name = "tedcaptions_demuxer", yading@11: .item_name = av_default_item_name, yading@11: .option = tedcaptions_options, yading@11: .version = LIBAVUTIL_VERSION_INT, yading@11: }; yading@11: yading@11: #define BETWEEN(a, amin, amax) ((unsigned)((a) - (amin)) <= (amax) - (amin)) yading@11: yading@11: #define HEX_DIGIT_TEST(c) (BETWEEN(c, '0', '9') || BETWEEN((c) | 32, 'a', 'z')) yading@11: #define HEX_DIGIT_VAL(c) ((c) <= '9' ? (c) - '0' : ((c) | 32) - 'a' + 10) yading@11: #define ERR_CODE(c) (c < 0 ? c : AVERROR_INVALIDDATA) yading@11: yading@11: static void av_bprint_utf8(AVBPrint *bp, unsigned c) yading@11: { yading@11: int bytes, i; yading@11: yading@11: if (c <= 0x7F) { yading@11: av_bprint_chars(bp, c, 1); yading@11: return; yading@11: } yading@11: bytes = (av_log2(c) - 2) / 5; yading@11: av_bprint_chars(bp, (c >> (bytes * 6)) | ((0xFF80 >> bytes) & 0xFF), 1); yading@11: for (i = bytes - 1; i >= 0; i--) yading@11: av_bprint_chars(bp, ((c >> (i * 6)) & 0x3F) | 0x80, 1); yading@11: } yading@11: yading@11: static void next_byte(AVIOContext *pb, int *cur_byte) yading@11: { yading@11: uint8_t b; yading@11: int ret = avio_read(pb, &b, 1); yading@11: *cur_byte = ret > 0 ? b : ret == 0 ? AVERROR_EOF : ret; yading@11: } yading@11: yading@11: static void skip_spaces(AVIOContext *pb, int *cur_byte) yading@11: { yading@11: while (*cur_byte == ' ' || *cur_byte == '\t' || yading@11: *cur_byte == '\n' || *cur_byte == '\r') yading@11: next_byte(pb, cur_byte); yading@11: } yading@11: yading@11: static int expect_byte(AVIOContext *pb, int *cur_byte, uint8_t c) yading@11: { yading@11: skip_spaces(pb, cur_byte); yading@11: if (*cur_byte != c) yading@11: return ERR_CODE(*cur_byte); yading@11: next_byte(pb, cur_byte); yading@11: return 0; yading@11: } yading@11: yading@11: static int parse_string(AVIOContext *pb, int *cur_byte, AVBPrint *bp, int full) yading@11: { yading@11: int ret; yading@11: yading@11: av_bprint_init(bp, 0, full ? -1 : 1); yading@11: ret = expect_byte(pb, cur_byte, '"'); yading@11: if (ret < 0) yading@11: goto fail; yading@11: while (*cur_byte > 0 && *cur_byte != '"') { yading@11: if (*cur_byte == '\\') { yading@11: next_byte(pb, cur_byte); yading@11: if (*cur_byte < 0) { yading@11: ret = AVERROR_INVALIDDATA; yading@11: goto fail; yading@11: } yading@11: if ((*cur_byte | 32) == 'u') { yading@11: unsigned chr = 0, i; yading@11: for (i = 0; i < 4; i++) { yading@11: next_byte(pb, cur_byte); yading@11: if (!HEX_DIGIT_TEST(*cur_byte)) { yading@11: ret = ERR_CODE(*cur_byte); yading@11: goto fail; yading@11: } yading@11: chr = chr * 16 + HEX_DIGIT_VAL(*cur_byte); yading@11: } yading@11: av_bprint_utf8(bp, chr); yading@11: } else { yading@11: av_bprint_chars(bp, *cur_byte, 1); yading@11: } yading@11: } else { yading@11: av_bprint_chars(bp, *cur_byte, 1); yading@11: } yading@11: next_byte(pb, cur_byte); yading@11: } yading@11: ret = expect_byte(pb, cur_byte, '"'); yading@11: if (ret < 0) yading@11: goto fail; yading@11: if (full && !av_bprint_is_complete(bp)) { yading@11: ret = AVERROR(ENOMEM); yading@11: goto fail; yading@11: } yading@11: return 0; yading@11: yading@11: fail: yading@11: av_bprint_finalize(bp, NULL); yading@11: return ret; yading@11: } yading@11: yading@11: static int parse_label(AVIOContext *pb, int *cur_byte, AVBPrint *bp) yading@11: { yading@11: int ret; yading@11: yading@11: ret = parse_string(pb, cur_byte, bp, 0); yading@11: if (ret < 0) yading@11: return ret; yading@11: ret = expect_byte(pb, cur_byte, ':'); yading@11: if (ret < 0) yading@11: return ret; yading@11: return 0; yading@11: } yading@11: yading@11: static int parse_boolean(AVIOContext *pb, int *cur_byte, int *result) yading@11: { yading@11: const char *text[] = { "false", "true" }, *p; yading@11: int i; yading@11: yading@11: skip_spaces(pb, cur_byte); yading@11: for (i = 0; i < 2; i++) { yading@11: p = text[i]; yading@11: if (*cur_byte != *p) yading@11: continue; yading@11: for (; *p; p++, next_byte(pb, cur_byte)) yading@11: if (*cur_byte != *p) yading@11: return AVERROR_INVALIDDATA; yading@11: if (BETWEEN(*cur_byte | 32, 'a', 'z')) yading@11: return AVERROR_INVALIDDATA; yading@11: *result = i; yading@11: return 0; yading@11: } yading@11: return AVERROR_INVALIDDATA; yading@11: } yading@11: yading@11: static int parse_int(AVIOContext *pb, int *cur_byte, int64_t *result) yading@11: { yading@11: int64_t val = 0; yading@11: yading@11: skip_spaces(pb, cur_byte); yading@11: if ((unsigned)*cur_byte - '0' > 9) yading@11: return AVERROR_INVALIDDATA; yading@11: while (BETWEEN(*cur_byte, '0', '9')) { yading@11: val = val * 10 + (*cur_byte - '0'); yading@11: next_byte(pb, cur_byte); yading@11: } yading@11: *result = val; yading@11: return 0; yading@11: } yading@11: yading@11: static int parse_file(AVIOContext *pb, FFDemuxSubtitlesQueue *subs) yading@11: { yading@11: int ret, cur_byte, start_of_par; yading@11: AVBPrint label, content; yading@11: int64_t pos, start, duration; yading@11: AVPacket *pkt; yading@11: yading@11: next_byte(pb, &cur_byte); yading@11: ret = expect_byte(pb, &cur_byte, '{'); yading@11: if (ret < 0) yading@11: return AVERROR_INVALIDDATA; yading@11: ret = parse_label(pb, &cur_byte, &label); yading@11: if (ret < 0 || strcmp(label.str, "captions")) yading@11: return AVERROR_INVALIDDATA; yading@11: ret = expect_byte(pb, &cur_byte, '['); yading@11: if (ret < 0) yading@11: return AVERROR_INVALIDDATA; yading@11: while (1) { yading@11: content.size = 0; yading@11: start = duration = AV_NOPTS_VALUE; yading@11: ret = expect_byte(pb, &cur_byte, '{'); yading@11: if (ret < 0) yading@11: return ret; yading@11: pos = avio_tell(pb) - 1; yading@11: while (1) { yading@11: ret = parse_label(pb, &cur_byte, &label); yading@11: if (ret < 0) yading@11: return ret; yading@11: if (!strcmp(label.str, "startOfParagraph")) { yading@11: ret = parse_boolean(pb, &cur_byte, &start_of_par); yading@11: if (ret < 0) yading@11: return ret; yading@11: } else if (!strcmp(label.str, "content")) { yading@11: ret = parse_string(pb, &cur_byte, &content, 1); yading@11: if (ret < 0) yading@11: return ret; yading@11: } else if (!strcmp(label.str, "startTime")) { yading@11: ret = parse_int(pb, &cur_byte, &start); yading@11: if (ret < 0) yading@11: return ret; yading@11: } else if (!strcmp(label.str, "duration")) { yading@11: ret = parse_int(pb, &cur_byte, &duration); yading@11: if (ret < 0) yading@11: return ret; yading@11: } else { yading@11: return AVERROR_INVALIDDATA; yading@11: } yading@11: skip_spaces(pb, &cur_byte); yading@11: if (cur_byte != ',') yading@11: break; yading@11: next_byte(pb, &cur_byte); yading@11: } yading@11: ret = expect_byte(pb, &cur_byte, '}'); yading@11: if (ret < 0) yading@11: return ret; yading@11: yading@11: if (!content.size || start == AV_NOPTS_VALUE || yading@11: duration == AV_NOPTS_VALUE) yading@11: return AVERROR_INVALIDDATA; yading@11: pkt = ff_subtitles_queue_insert(subs, content.str, content.len, 0); yading@11: if (!pkt) yading@11: return AVERROR(ENOMEM); yading@11: pkt->pos = pos; yading@11: pkt->pts = start; yading@11: pkt->duration = duration; yading@11: av_bprint_finalize(&content, NULL); yading@11: yading@11: skip_spaces(pb, &cur_byte); yading@11: if (cur_byte != ',') yading@11: break; yading@11: next_byte(pb, &cur_byte); yading@11: } yading@11: ret = expect_byte(pb, &cur_byte, ']'); yading@11: if (ret < 0) yading@11: return ret; yading@11: ret = expect_byte(pb, &cur_byte, '}'); yading@11: if (ret < 0) yading@11: return ret; yading@11: skip_spaces(pb, &cur_byte); yading@11: if (cur_byte != AVERROR_EOF) yading@11: return ERR_CODE(cur_byte); yading@11: return 0; yading@11: } yading@11: yading@11: static av_cold int tedcaptions_read_header(AVFormatContext *avf) yading@11: { yading@11: TEDCaptionsDemuxer *tc = avf->priv_data; yading@11: AVStream *st; yading@11: int ret, i; yading@11: AVPacket *last; yading@11: yading@11: ret = parse_file(avf->pb, &tc->subs); yading@11: if (ret < 0) { yading@11: if (ret == AVERROR_INVALIDDATA) yading@11: av_log(avf, AV_LOG_ERROR, "Syntax error near offset %"PRId64".\n", yading@11: avio_tell(avf->pb)); yading@11: ff_subtitles_queue_clean(&tc->subs); yading@11: return ret; yading@11: } yading@11: ff_subtitles_queue_finalize(&tc->subs); yading@11: for (i = 0; i < tc->subs.nb_subs; i++) yading@11: tc->subs.subs[i].pts += tc->start_time; yading@11: yading@11: last = &tc->subs.subs[tc->subs.nb_subs - 1]; yading@11: st = avformat_new_stream(avf, NULL); yading@11: if (!st) yading@11: return AVERROR(ENOMEM); yading@11: st->codec->codec_type = AVMEDIA_TYPE_SUBTITLE; yading@11: st->codec->codec_id = AV_CODEC_ID_TEXT; yading@11: avpriv_set_pts_info(st, 64, 1, 1000); yading@11: st->probe_packets = 0; yading@11: st->start_time = 0; yading@11: st->duration = last->pts + last->duration; yading@11: st->cur_dts = 0; yading@11: yading@11: return 0; yading@11: } yading@11: yading@11: static int tedcaptions_read_packet(AVFormatContext *avf, AVPacket *packet) yading@11: { yading@11: TEDCaptionsDemuxer *tc = avf->priv_data; yading@11: yading@11: return ff_subtitles_queue_read_packet(&tc->subs, packet); yading@11: } yading@11: yading@11: static int tedcaptions_read_close(AVFormatContext *avf) yading@11: { yading@11: TEDCaptionsDemuxer *tc = avf->priv_data; yading@11: yading@11: ff_subtitles_queue_clean(&tc->subs); yading@11: return 0; yading@11: } yading@11: yading@11: static av_cold int tedcaptions_read_probe(AVProbeData *p) yading@11: { yading@11: static const char *const tags[] = { yading@11: "\"captions\"", "\"duration\"", "\"content\"", yading@11: "\"startOfParagraph\"", "\"startTime\"", yading@11: }; yading@11: unsigned i, count = 0; yading@11: const char *t; yading@11: yading@11: if (p->buf[strspn(p->buf, " \t\r\n")] != '{') yading@11: return 0; yading@11: for (i = 0; i < FF_ARRAY_ELEMS(tags); i++) { yading@11: if (!(t = strstr(p->buf, tags[i]))) yading@11: continue; yading@11: t += strlen(tags[i]); yading@11: t += strspn(t, " \t\r\n"); yading@11: if (*t == ':') yading@11: count++; yading@11: } yading@11: return count == FF_ARRAY_ELEMS(tags) ? AVPROBE_SCORE_MAX : yading@11: count ? AVPROBE_SCORE_MAX / 2 : 0; yading@11: } yading@11: yading@11: static int tedcaptions_read_seek(AVFormatContext *avf, int stream_index, yading@11: int64_t min_ts, int64_t ts, int64_t max_ts, yading@11: int flags) yading@11: { yading@11: TEDCaptionsDemuxer *tc = avf->priv_data; yading@11: return ff_subtitles_queue_seek(&tc->subs, avf, stream_index, yading@11: min_ts, ts, max_ts, flags); yading@11: } yading@11: yading@11: AVInputFormat ff_tedcaptions_demuxer = { yading@11: .name = "tedcaptions", yading@11: .long_name = NULL_IF_CONFIG_SMALL("TED Talks captions"), yading@11: .priv_data_size = sizeof(TEDCaptionsDemuxer), yading@11: .priv_class = &tedcaptions_demuxer_class, yading@11: .read_header = tedcaptions_read_header, yading@11: .read_packet = tedcaptions_read_packet, yading@11: .read_close = tedcaptions_read_close, yading@11: .read_probe = tedcaptions_read_probe, yading@11: .read_seek2 = tedcaptions_read_seek, yading@11: };