annotate demo/annotation2script.py.orig @ 1:eb3b846ae0ef tip

second commit
author Emmanouil Theofanis Chourdakis <e.t.chourdakis@qmul.ac.uk>
date Wed, 16 May 2018 18:13:41 +0100
parents 4dad87badb0c
children
rev   line source
e@0 1 #!/usr/bin/env python3
e@0 2 # -*- coding: utf-8 -*-
e@0 3 """
e@0 4 Created on Tue May 1 17:00:26 2018
e@0 5
e@0 6 @author: Emmanouil Theofanis Chourdakis
e@0 7
e@0 8 Takes an .ann annotation and a .json character line
e@0 9 file and creates a _script.txt script file.
e@0 10
e@0 11 """
e@0 12
e@0 13 import argparse
e@0 14 import logging
e@0 15 import ner
e@0 16 from rel import *
e@0 17 import pypeg2 as pg
e@0 18 import json
e@0 19
e@0 20 logging.basicConfig(level=logging.INFO)
e@0 21
e@0 22
e@0 23 def annotation2script(annot, quotesdict):
e@0 24 logging.info('Parsing annotation')
e@0 25 parsed = pg.parse(annot, ner.AnnotationFile)
e@0 26
e@0 27 characters = {}
e@0 28 places = {}
e@0 29 character_lines = {}
e@0 30 scenes = []
e@0 31
e@0 32 # Store an entity and relations dictionary since relations
e@0 33 # point to such entities
e@0 34
e@0 35 dictionary = {}
e@0 36
e@0 37 # Visit all the parsed lines. Do it in two passes, first parse
e@0 38 # entities and then relations. The reason for that is that some times
e@0 39 # a relation refers to an entity that has not been defined.
e@0 40
e@0 41 for line in parsed:
e@0 42 # Every annotation line has a single object
e@0 43 obj = line[0]
e@0 44
e@0 45 if isinstance(obj, ner.AnnotationTuple):
e@0 46 annotation = obj.annotation.lower()
e@0 47
e@0 48 # Store to dictionary the string relating
e@0 49 # to the annotation
e@0 50
e@0 51
e@0 52 if annotation.split()[0].lower() in ['a', 'the']:
e@0 53 annotation = annotation.split()[1]
e@0 54
e@0 55 dictionary[obj.variable] = annotation
e@0 56
e@0 57 if obj.type == 'Character':
e@0 58 characters[annotation] = {}
e@0 59 elif obj.type == 'Character_Line':
e@0 60 if annotation[-1] == '.':
e@0 61 annotation = annotation[:-1]
e@0 62 character_lines[annotation] = {}
e@0 63 elif obj.type == 'Place':
e@0 64 places[annotation] = {}
e@0 65
e@0 66 for line in parsed:
e@0 67 obj = line[0]
e@0 68 if isinstance(obj, ner.AttributeTuple):
e@0 69 # If it is an instance of an attribute tuple,
e@0 70 # find out whether it is a gender assignment, then find
e@0 71 # the character it refers to and add the gender as attribute
e@0 72
e@0 73 target = dictionary[obj.target]
e@0 74 value = obj.annotation
e@0 75
e@0 76 if obj.type == 'Gender':
e@0 77 characters[target]['gender'] = value
e@0 78 elif obj.type == 'Age':
e@0 79 characters[target]['age'] = value
e@0 80
e@0 81
e@0 82 for line in parsed:
e@0 83 # Every annotation line has a single object
e@0 84 obj = line[0]
e@0 85
e@0 86 if isinstance(obj, ner.RelationTuple):
e@0 87
e@0 88 # Relations have a trigger, a first argument `arg1' and a
e@0 89 # second argument `arg2'. There are going to be
e@0 90 # |arg1| * |arg2| relations constructed for each trigger
e@0 91 # where |arg1| is the number of candidates for argument 1
e@0 92 # and |arg2| the number of candidates for argument 2
e@0 93
e@0 94 arg1_candidates = []
e@0 95 arg2_candidates = []
e@0 96
e@0 97 # Check relation's arguments:
e@0 98 for arg in obj.args:
e@0 99 if arg.label == 'Says':
e@0 100 trigger = dictionary[arg.target]
e@0 101 label = 'Quote'
e@0 102 elif arg.label == 'Spatial_Signal':
e@0 103 trigger = dictionary[arg.target]
e@0 104 label = 'Spatial_Relation'
e@0 105 if arg.label in ['Trajector', 'WHO']:
e@0 106 arg1_candidates.append(dictionary[arg.target])
e@0 107 if arg.label in ['Landmark', 'WHAT']:
e@0 108 arg2_candidates.append(dictionary[arg.target])
e@0 109
e@0 110 for arg1 in arg1_candidates:
e@0 111 for arg2 in arg2_candidates:
e@0 112 relation = (trigger, arg1, arg2, label)
e@0 113 if label == 'Quote':
e@0 114 character_lines[arg2]['who'] = arg1
e@0 115 if label == 'Spatial_Relation':
e@0 116 scenes.append(arg2)
e@0 117
e@0 118 # Generate cast list
e@0 119 cast_list_section = r"""Cast List:
e@0 120 Narrator - male or female - panned center
e@0 121 """
e@0 122
e@0 123 # Ping - pong the characters
e@0 124 panned = 'right'
e@0 125 for c in characters:
e@0 126 if 'gender' not in characters[c]:
e@0 127 gender = 'male or female'
e@0 128 else:
e@0 129 gender = characters[c]['gender'].lower()
e@0 130
e@0 131 cast_list_section += '{} - {} - panned {}\n'.format(c.capitalize(), gender, panned)
e@0 132 if panned == 'right':
e@0 133 panned = 'left'
e@0 134 else:
e@0 135 panned = 'right'
e@0 136
e@0 137
e@0 138 scenes_definition = r"""Scenes:
e@0 139 """
e@0 140
e@0 141 for n, scene in enumerate(scenes):
e@0 142 scenes_definition += "{} - {} - fxive:{} - none".format(n+1, scene, scene)
e@0 143
e@0 144 # Scene introduction
e@0 145 ## TODO: Do it so that scenes follow the text
e@0 146
e@0 147 # Keep the correct order in lines
e@0 148 lines_order = [qq for qq in quotesdict]
e@0 149
e@0 150 # The lines are of the format <*line0> <*line1> etc,
e@0 151 # sort them based on the number just before the closing >
e@0 152 lines_order = sorted(lines_order, key=lambda x: int(x[-1]))
e@0 153 lines_section = r"""Script:
e@0 154 --- Scene 1 ---
e@0 155 """
e@0 156 print(character_lines)
e@0 157 print(lines_order)
e@0 158
e@0 159 for l in lines_order:
e@0 160 if l[0] == 'n':
e@0 161 lines_section += "[Narrator] {}\n".format(quotesdict[l])
e@0 162 elif l[0] == 'c':
e@0 163 lines_section += "[{}] {}\n".format(character_lines[l]['who'].capitalize(), quotesdict[l])
e@0 164
e@0 165 script = cast_list_section + '\n' + scenes_definition + '\n' + lines_section
e@0 166
e@0 167 return script
e@0 168
e@0 169
e@0 170 if __name__ == "__main__":
e@0 171 argparser = argparse.ArgumentParser()
e@0 172 argparser.add_argument('input_annotation_path',
e@0 173 help='.ann file with annotation')
e@0 174
e@0 175 argparser.add_argument('input_json_path',
e@0 176 help='.json file containing the character quotes')
e@0 177
e@0 178 args = argparser.parse_args()
e@0 179
e@0 180 # Load annotation and quotes dictionary
e@0 181 with open(args.input_annotation_path) as f:
e@0 182 annot = f.read()
e@0 183
e@0 184 with open(args.input_json_path) as f:
e@0 185 quotesdict = json.load(f)
e@0 186
e@0 187 script = annotation2script(annot, quotesdict)
e@0 188
e@0 189 output_path = args.input_annotation_path[:-4] + '_script.txt'
e@0 190
e@0 191 with open(output_path, 'w') as f:
e@0 192 f.write(script)
e@0 193