e@0: #!/usr/bin/env python3 e@0: # -*- coding: utf-8 -*- e@0: """ e@0: Created on Tue May 1 17:00:26 2018 e@0: e@0: @author: Emmanouil Theofanis Chourdakis e@0: e@0: Takes an .ann annotation and a .json character line e@0: file and creates a _script.txt script file. e@0: e@0: """ e@0: e@0: import argparse e@0: import logging e@0: import ner e@0: from rel import * e@0: import pypeg2 as pg e@0: import pandas as pd e@0: import json e@0: import os e@0: e@0: logging.basicConfig(level=logging.INFO) e@0: e@0: e@0: def annotation2script(annot, quotesdict): e@0: logging.info('Parsing annotation') e@0: parsed = pg.parse(annot, ner.AnnotationFile) e@0: e@0: characters = {} e@0: places = {} e@0: character_lines = {} e@0: scenes = [] e@0: e@0: # Store an entity and relations dictionary since relations e@0: # point to such entities e@0: e@0: dictionary = {} e@0: e@0: # Visit all the parsed lines. Do it in two passes, first parse e@0: # entities and then relations. The reason for that is that some times e@0: # a relation refers to an entity that has not been defined. e@0: e@0: for line in parsed: e@0: # Every annotation line has a single object e@0: obj = line[0] e@0: e@0: if isinstance(obj, ner.AnnotationTuple): e@0: e@0: annotation = obj.annotation.lower() e@0: e@0: # Store to dictionary the string relating e@0: # to the annotation e@0: e@0: e@0: if annotation.split()[0].lower() in ['a', 'the']: e@0: annotation = annotation.split()[1] e@0: e@0: dictionary[obj.variable] = annotation e@0: e@0: if obj.type == 'Character': e@0: characters[annotation] = {} e@0: elif obj.type == 'Character_Line': e@0: character_lines[annotation] = {} e@0: elif obj.type == 'Place': e@0: places[annotation] = {} e@0: e@0: for line in parsed: e@0: obj = line[0] e@0: if isinstance(obj, ner.AttributeTuple): e@0: # If it is an instance of an attribute tuple, e@0: # find out whether it is a gender assignment, then find e@0: # the character it refers to and add the gender as attribute e@0: e@0: target = dictionary[obj.target] e@0: value = obj.annotation e@0: e@0: if obj.type == 'Gender': e@0: characters[target]['gender'] = value e@0: elif obj.type == 'Age': e@0: characters[target]['age'] = value e@0: e@0: e@0: for line in parsed: e@0: # Every annotation line has a single object e@0: obj = line[0] e@0: e@0: if isinstance(obj, ner.RelationTuple): e@0: e@0: # Relations have a trigger, a first argument `arg1' and a e@0: # second argument `arg2'. There are going to be e@0: # |arg1| * |arg2| relations constructed for each trigger e@0: # where |arg1| is the number of candidates for argument 1 e@0: # and |arg2| the number of candidates for argument 2 e@0: e@0: arg1_candidates = [] e@0: arg2_candidates = [] e@0: e@0: # Check relation's arguments: e@0: for arg in obj.args: e@0: if arg.label == 'Says': e@0: trigger = dictionary[arg.target] e@0: label = 'Quote' e@0: elif arg.label == 'Spatial_Signal': e@0: trigger = dictionary[arg.target] e@0: label = 'Spatial_Relation' e@0: if arg.label in ['Trajector', 'WHO']: e@0: arg1_candidates.append(dictionary[arg.target]) e@0: if arg.label in ['Landmark', 'WHAT']: e@0: arg2_candidates.append(dictionary[arg.target]) e@0: e@0: for arg1 in arg1_candidates: e@0: for arg2 in arg2_candidates: e@0: relation = (trigger, arg1, arg2, label) e@0: if label == 'Quote': e@0: character_lines[arg2]['who'] = arg1 e@0: if label == 'Spatial_Relation': e@0: scenes.append(arg2) e@0: e@0: # Generate cast list e@0: cast_list_section = r"""Cast List: e@0: Narrator - male or female - panned center e@0: """ e@0: e@0: # Ping - pong the characters e@0: panned = 'right' e@0: for c in characters: e@0: if 'gender' not in characters[c]: e@0: gender = 'male or female' e@0: else: e@0: gender = characters[c]['gender'].lower() e@0: e@0: cast_list_section += '{} - {} - panned {}\n'.format(c.capitalize(), gender, panned) e@0: if panned == 'right': e@0: panned = 'left' e@0: else: e@0: panned = 'right' e@0: e@0: e@0: scenes_definition = r"""Scenes: e@0: """ e@0: e@0: for n, scene in enumerate(scenes): e@0: scenes_definition += "{} - {} - fxive:{} - none".format(n+1, scene, scene) e@0: e@0: # Scene introduction e@0: ## TODO: Do it so that scenes follow the text e@0: e@0: # Keep the correct order in lines e@0: lines_order = [qq for qq in quotesdict] e@0: e@0: # The lines are of the format <*line0> <*line1> etc, e@0: # sort them based on the number just before the closing > e@0: lines_order = sorted(lines_order, key=lambda x: int(x[-3])) e@0: lines_section = r"""Script: e@0: --- Scene 1 --- e@0: """ e@0: e@0: for l in lines_order: e@0: if l[1] == 'n': e@0: lines_section += "[Narrator] {}\n".format(quotesdict[l]) e@0: elif l[1] == 'c': e@0: lines_section += "[{}] {}\n".format(character_lines[l[:-1]]['who'].capitalize(), quotesdict[l]) e@0: e@0: script = cast_list_section + '\n' + scenes_definition + '\n' + lines_section e@0: e@0: e@0: e@0: # Create transcript e@0: e@0: lines = [] e@0: e@0: for cline in quotesdict: e@0: ldict = {} e@0: e@0: cline = cline[:-1] # Remove the trailing dot e@0: e@0: if cline[1] == 'c': e@0: ldict['cast'] = character_lines[cline]['who'].capitalize() e@0: else: e@0: ldict['cast'] = 'Narrator' e@0: e@0: lineno = cline.replace('<', '').replace('>', '') e@0: ldict['filename'] = '{}.wav'.format(lineno) e@0: ldict['line'] = quotesdict['{}.'.format(cline)] e@0: lines.append(ldict) e@0: e@0: # Create sfx dataframe e@0: e@0: sfx = [] e@0: for scene in scenes: e@0: sfx_dict = {'sfx': scene, 'url':'http://edit_me'} e@0: sfx.append(sfx_dict) e@0: e@0: sfx_df = pd.DataFrame.from_records(sfx) e@0: e@0: transcript_df = pd.DataFrame.from_records(lines) e@0: return script, transcript_df, sfx_df e@0: e@0: e@0: if __name__ == "__main__": e@0: argparser = argparse.ArgumentParser() e@0: argparser.add_argument('input_annotation_path', e@0: help='.ann file with annotation') e@0: e@0: argparser.add_argument('input_json_path', e@0: help='.json file containing the character quotes') e@0: e@0: args = argparser.parse_args() e@0: e@0: # Load annotation and quotes dictionary e@0: with open(args.input_annotation_path) as f: e@0: annot = f.read() e@0: e@0: with open(args.input_json_path) as f: e@0: quotesdict = json.load(f) e@0: e@0: script, transcript_df, sfx_df = annotation2script(annot, quotesdict) e@0: e@0: output_path = args.input_annotation_path[:-4] + '_script.txt' e@0: e@0: with open(output_path, 'w') as f: e@0: f.write(script) e@0: e@0: transcript_df.to_excel(os.path.join(os.path.dirname(args.input_annotation_path),'transcript.xls')) e@0: sfx_df.to_excel(os.path.join(os.path.dirname(args.input_annotation_path), 'sfx.xls')) e@0: