e@0: #!/usr/bin/env python3
e@0: # -*- coding: utf-8 -*-
e@0: """
e@0: Created on Tue May  1 17:00:26 2018
e@0: 
e@0: @author: Emmanouil Theofanis Chourdakis
e@0: 
e@0: Takes an .ann annotation and a .json character line
e@0: file and creates a _script.txt script file.
e@0: 
e@0: """
e@0: 
e@0: import argparse
e@0: import logging
e@0: import ner
e@0: from rel import *
e@0: import pypeg2 as pg
e@0: import pandas as pd
e@0: import json
e@0: import os
e@0: 
e@0: logging.basicConfig(level=logging.INFO)
e@0: 
e@0: 
e@0: def annotation2script(annot, quotesdict):
e@0:     logging.info('Parsing annotation')
e@0:     parsed = pg.parse(annot, ner.AnnotationFile)
e@0: 
e@0:     characters = {}
e@0:     places = {}
e@0:     character_lines = {}
e@0:     scenes = []
e@0: 
e@0:     # Store an entity and relations dictionary since relations
e@0:     # point to such entities
e@0: 
e@0:     dictionary = {}
e@0: 
e@0:     # Visit all the parsed lines. Do it in two passes, first parse
e@0:     # entities and then relations. The reason for that is that some times
e@0:     # a relation refers to an entity that has not been defined.
e@0: 
e@0:     for line in parsed:
e@0:         # Every annotation line has a single object
e@0:         obj = line[0]
e@0: 
e@0:         if isinstance(obj, ner.AnnotationTuple):
e@0: 
e@0:             annotation = obj.annotation.lower()
e@0: 
e@0:             # Store to dictionary the string relating
e@0:             # to the annotation
e@0: 
e@0: 
e@0:             if annotation.split()[0].lower() in ['a', 'the']:
e@0:                 annotation = annotation.split()[1]
e@0: 
e@0:             dictionary[obj.variable] = annotation
e@0: 
e@0:             if obj.type == 'Character':
e@0:                 characters[annotation] = {}
e@0:             elif obj.type == 'Character_Line':
e@0:                 character_lines[annotation] = {}
e@0:             elif obj.type == 'Place':
e@0:                 places[annotation] = {}
e@0: 
e@0:     for line in parsed:
e@0:         obj = line[0]
e@0:         if isinstance(obj, ner.AttributeTuple):
e@0:             # If it is an instance of an attribute tuple,
e@0:             # find out whether it is a gender assignment, then find
e@0:             # the character it refers to and add the gender as attribute
e@0: 
e@0:             target = dictionary[obj.target]
e@0:             value = obj.annotation
e@0: 
e@0:             if obj.type == 'Gender':
e@0:                 characters[target]['gender'] = value
e@0:             elif obj.type == 'Age':
e@0:                 characters[target]['age'] = value
e@0: 
e@0: 
e@0:     for line in parsed:
e@0:         # Every annotation line has a single object
e@0:         obj = line[0]
e@0: 
e@0:         if isinstance(obj, ner.RelationTuple):
e@0: 
e@0:             # Relations have a trigger, a first argument `arg1' and a
e@0:             # second argument `arg2'. There are going to be
e@0:             # |arg1| * |arg2| relations constructed for each trigger
e@0:             # where |arg1| is the number of candidates for argument 1
e@0:             # and |arg2| the number of candidates for argument 2
e@0: 
e@0:             arg1_candidates = []
e@0:             arg2_candidates = []
e@0: 
e@0:             # Check relation's arguments:
e@0:             for arg in obj.args:
e@0:                 if arg.label == 'Says':
e@0:                     trigger = dictionary[arg.target]
e@0:                     label = 'Quote'
e@0:                 elif arg.label == 'Spatial_Signal':
e@0:                     trigger = dictionary[arg.target]
e@0:                     label = 'Spatial_Relation'
e@0:                 if arg.label in ['Trajector', 'WHO']:
e@0:                     arg1_candidates.append(dictionary[arg.target])
e@0:                 if arg.label in ['Landmark', 'WHAT']:
e@0:                     arg2_candidates.append(dictionary[arg.target])
e@0: 
e@0:             for arg1 in arg1_candidates:
e@0:                 for arg2 in arg2_candidates:
e@0:                     relation = (trigger, arg1, arg2, label)
e@0:                     if label == 'Quote':
e@0:                         character_lines[arg2]['who'] = arg1
e@0:                     if label == 'Spatial_Relation':
e@0:                         scenes.append(arg2)
e@0: 
e@0:     # Generate cast list
e@0:     cast_list_section = r"""Cast List:
e@0: Narrator - male or female - panned center
e@0: """
e@0: 
e@0:     # Ping - pong the characters
e@0:     panned = 'right'
e@0:     for c in characters:
e@0:         if 'gender' not in characters[c]:
e@0:             gender = 'male or female'
e@0:         else:
e@0:             gender = characters[c]['gender'].lower()
e@0: 
e@0:         cast_list_section += '{} - {} - panned {}\n'.format(c.capitalize(), gender, panned)
e@0:         if panned == 'right':
e@0:             panned = 'left'
e@0:         else:
e@0:             panned = 'right'
e@0: 
e@0: 
e@0:     scenes_definition = r"""Scenes:
e@0: """
e@0: 
e@0:     for n, scene in enumerate(scenes):
e@0:         scenes_definition += "{} - {} - fxive:{} - none".format(n+1, scene, scene)
e@0: 
e@0:     # Scene introduction
e@0:     ## TODO: Do it so that scenes follow the text
e@0: 
e@0:     # Keep the correct order in lines
e@0:     lines_order = [qq for qq in quotesdict]
e@0: 
e@0:     # The lines are of the format <*line0> <*line1> etc,
e@0:     # sort them based on the number just before the closing >
e@0:     lines_order = sorted(lines_order, key=lambda x: int(x[-3]))
e@0:     lines_section = r"""Script:
e@0: --- Scene 1 ---
e@0: """
e@0: 
e@0:     for l in lines_order:
e@0:         if l[1] == 'n':
e@0:             lines_section += "[Narrator] {}\n".format(quotesdict[l])
e@0:         elif l[1] == 'c':
e@0:             lines_section += "[{}] {}\n".format(character_lines[l[:-1]]['who'].capitalize(), quotesdict[l])
e@0: 
e@0:     script = cast_list_section + '\n' + scenes_definition + '\n' + lines_section
e@0: 
e@0:     
e@0:     
e@0:     # Create transcript 
e@0:     
e@0:     lines = []
e@0: 
e@0:     for cline in quotesdict:
e@0:         ldict = {}
e@0:         
e@0:         cline = cline[:-1] # Remove the trailing dot
e@0:         
e@0:         if cline[1] == 'c':
e@0:             ldict['cast'] = character_lines[cline]['who'].capitalize()
e@0:         else:
e@0:             ldict['cast'] = 'Narrator'
e@0: 
e@0:         lineno = cline.replace('<', '').replace('>', '')
e@0:         ldict['filename'] = '{}.wav'.format(lineno)
e@0:         ldict['line'] = quotesdict['{}.'.format(cline)]
e@0:         lines.append(ldict)
e@0:         
e@0:     # Create sfx dataframe
e@0:     
e@0:     sfx = []
e@0:     for scene in scenes:
e@0:         sfx_dict = {'sfx': scene, 'url':'http://edit_me'}
e@0:         sfx.append(sfx_dict)
e@0:         
e@0:     sfx_df = pd.DataFrame.from_records(sfx)
e@0:         
e@0:     transcript_df = pd.DataFrame.from_records(lines)
e@0:     return script, transcript_df, sfx_df
e@0: 
e@0: 
e@0: if __name__ == "__main__":
e@0:     argparser = argparse.ArgumentParser()
e@0:     argparser.add_argument('input_annotation_path',
e@0:                            help='.ann file with annotation')
e@0: 
e@0:     argparser.add_argument('input_json_path',
e@0:                            help='.json file containing the character quotes')
e@0: 
e@0:     args = argparser.parse_args()
e@0: 
e@0:     # Load annotation and quotes dictionary
e@0:     with open(args.input_annotation_path) as f:
e@0:         annot = f.read()
e@0: 
e@0:     with open(args.input_json_path) as f:
e@0:         quotesdict = json.load(f)
e@0: 
e@0:     script, transcript_df, sfx_df = annotation2script(annot, quotesdict)
e@0: 
e@0:     output_path = args.input_annotation_path[:-4] + '_script.txt'
e@0: 
e@0:     with open(output_path, 'w') as f:
e@0:         f.write(script)
e@0:         
e@0:     transcript_df.to_excel(os.path.join(os.path.dirname(args.input_annotation_path),'transcript.xls'))
e@0:     sfx_df.to_excel(os.path.join(os.path.dirname(args.input_annotation_path), 'sfx.xls'))
e@0: