comparison demo/annotation2script.py.orig @ 0:4dad87badb0c

initial commit
author Emmanouil Theofanis Chourdakis <e.t.chourdakis@qmul.ac.uk>
date Wed, 16 May 2018 17:56:10 +0100
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4dad87badb0c
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 """
4 Created on Tue May 1 17:00:26 2018
5
6 @author: Emmanouil Theofanis Chourdakis
7
8 Takes an .ann annotation and a .json character line
9 file and creates a _script.txt script file.
10
11 """
12
13 import argparse
14 import logging
15 import ner
16 from rel import *
17 import pypeg2 as pg
18 import json
19
20 logging.basicConfig(level=logging.INFO)
21
22
23 def annotation2script(annot, quotesdict):
24 logging.info('Parsing annotation')
25 parsed = pg.parse(annot, ner.AnnotationFile)
26
27 characters = {}
28 places = {}
29 character_lines = {}
30 scenes = []
31
32 # Store an entity and relations dictionary since relations
33 # point to such entities
34
35 dictionary = {}
36
37 # Visit all the parsed lines. Do it in two passes, first parse
38 # entities and then relations. The reason for that is that some times
39 # a relation refers to an entity that has not been defined.
40
41 for line in parsed:
42 # Every annotation line has a single object
43 obj = line[0]
44
45 if isinstance(obj, ner.AnnotationTuple):
46 annotation = obj.annotation.lower()
47
48 # Store to dictionary the string relating
49 # to the annotation
50
51
52 if annotation.split()[0].lower() in ['a', 'the']:
53 annotation = annotation.split()[1]
54
55 dictionary[obj.variable] = annotation
56
57 if obj.type == 'Character':
58 characters[annotation] = {}
59 elif obj.type == 'Character_Line':
60 if annotation[-1] == '.':
61 annotation = annotation[:-1]
62 character_lines[annotation] = {}
63 elif obj.type == 'Place':
64 places[annotation] = {}
65
66 for line in parsed:
67 obj = line[0]
68 if isinstance(obj, ner.AttributeTuple):
69 # If it is an instance of an attribute tuple,
70 # find out whether it is a gender assignment, then find
71 # the character it refers to and add the gender as attribute
72
73 target = dictionary[obj.target]
74 value = obj.annotation
75
76 if obj.type == 'Gender':
77 characters[target]['gender'] = value
78 elif obj.type == 'Age':
79 characters[target]['age'] = value
80
81
82 for line in parsed:
83 # Every annotation line has a single object
84 obj = line[0]
85
86 if isinstance(obj, ner.RelationTuple):
87
88 # Relations have a trigger, a first argument `arg1' and a
89 # second argument `arg2'. There are going to be
90 # |arg1| * |arg2| relations constructed for each trigger
91 # where |arg1| is the number of candidates for argument 1
92 # and |arg2| the number of candidates for argument 2
93
94 arg1_candidates = []
95 arg2_candidates = []
96
97 # Check relation's arguments:
98 for arg in obj.args:
99 if arg.label == 'Says':
100 trigger = dictionary[arg.target]
101 label = 'Quote'
102 elif arg.label == 'Spatial_Signal':
103 trigger = dictionary[arg.target]
104 label = 'Spatial_Relation'
105 if arg.label in ['Trajector', 'WHO']:
106 arg1_candidates.append(dictionary[arg.target])
107 if arg.label in ['Landmark', 'WHAT']:
108 arg2_candidates.append(dictionary[arg.target])
109
110 for arg1 in arg1_candidates:
111 for arg2 in arg2_candidates:
112 relation = (trigger, arg1, arg2, label)
113 if label == 'Quote':
114 character_lines[arg2]['who'] = arg1
115 if label == 'Spatial_Relation':
116 scenes.append(arg2)
117
118 # Generate cast list
119 cast_list_section = r"""Cast List:
120 Narrator - male or female - panned center
121 """
122
123 # Ping - pong the characters
124 panned = 'right'
125 for c in characters:
126 if 'gender' not in characters[c]:
127 gender = 'male or female'
128 else:
129 gender = characters[c]['gender'].lower()
130
131 cast_list_section += '{} - {} - panned {}\n'.format(c.capitalize(), gender, panned)
132 if panned == 'right':
133 panned = 'left'
134 else:
135 panned = 'right'
136
137
138 scenes_definition = r"""Scenes:
139 """
140
141 for n, scene in enumerate(scenes):
142 scenes_definition += "{} - {} - fxive:{} - none".format(n+1, scene, scene)
143
144 # Scene introduction
145 ## TODO: Do it so that scenes follow the text
146
147 # Keep the correct order in lines
148 lines_order = [qq for qq in quotesdict]
149
150 # The lines are of the format <*line0> <*line1> etc,
151 # sort them based on the number just before the closing >
152 lines_order = sorted(lines_order, key=lambda x: int(x[-1]))
153 lines_section = r"""Script:
154 --- Scene 1 ---
155 """
156 print(character_lines)
157 print(lines_order)
158
159 for l in lines_order:
160 if l[0] == 'n':
161 lines_section += "[Narrator] {}\n".format(quotesdict[l])
162 elif l[0] == 'c':
163 lines_section += "[{}] {}\n".format(character_lines[l]['who'].capitalize(), quotesdict[l])
164
165 script = cast_list_section + '\n' + scenes_definition + '\n' + lines_section
166
167 return script
168
169
170 if __name__ == "__main__":
171 argparser = argparse.ArgumentParser()
172 argparser.add_argument('input_annotation_path',
173 help='.ann file with annotation')
174
175 argparser.add_argument('input_json_path',
176 help='.json file containing the character quotes')
177
178 args = argparser.parse_args()
179
180 # Load annotation and quotes dictionary
181 with open(args.input_annotation_path) as f:
182 annot = f.read()
183
184 with open(args.input_json_path) as f:
185 quotesdict = json.load(f)
186
187 script = annotation2script(annot, quotesdict)
188
189 output_path = args.input_annotation_path[:-4] + '_script.txt'
190
191 with open(output_path, 'w') as f:
192 f.write(script)
193