e@0
|
1 #!/usr/bin/env python3
|
e@0
|
2 # -*- coding: utf-8 -*-
|
e@0
|
3 """
|
e@0
|
4 Created on Tue May 1 17:00:26 2018
|
e@0
|
5
|
e@0
|
6 @author: Emmanouil Theofanis Chourdakis
|
e@0
|
7
|
e@0
|
8 Takes an .ann annotation and a .json character line
|
e@0
|
9 file and creates a _script.txt script file.
|
e@0
|
10
|
e@0
|
11 """
|
e@0
|
12
|
e@0
|
13 import argparse
|
e@0
|
14 import logging
|
e@0
|
15 import ner
|
e@0
|
16 from rel import *
|
e@0
|
17 import pypeg2 as pg
|
e@0
|
18 import pandas as pd
|
e@0
|
19 import json
|
e@0
|
20 import os
|
e@0
|
21
|
e@0
|
22 logging.basicConfig(level=logging.INFO)
|
e@0
|
23
|
e@0
|
24
|
e@0
|
25 def annotation2script(annot, quotesdict):
|
e@0
|
26 logging.info('Parsing annotation')
|
e@0
|
27 parsed = pg.parse(annot, ner.AnnotationFile)
|
e@0
|
28
|
e@0
|
29 characters = {}
|
e@0
|
30 places = {}
|
e@0
|
31 character_lines = {}
|
e@0
|
32 scenes = []
|
e@0
|
33
|
e@0
|
34 # Store an entity and relations dictionary since relations
|
e@0
|
35 # point to such entities
|
e@0
|
36
|
e@0
|
37 dictionary = {}
|
e@0
|
38
|
e@0
|
39 # Visit all the parsed lines. Do it in two passes, first parse
|
e@0
|
40 # entities and then relations. The reason for that is that some times
|
e@0
|
41 # a relation refers to an entity that has not been defined.
|
e@0
|
42
|
e@0
|
43 for line in parsed:
|
e@0
|
44 # Every annotation line has a single object
|
e@0
|
45 obj = line[0]
|
e@0
|
46
|
e@0
|
47 if isinstance(obj, ner.AnnotationTuple):
|
e@0
|
48
|
e@0
|
49 annotation = obj.annotation.lower()
|
e@0
|
50
|
e@0
|
51 # Store to dictionary the string relating
|
e@0
|
52 # to the annotation
|
e@0
|
53
|
e@0
|
54
|
e@0
|
55 if annotation.split()[0].lower() in ['a', 'the']:
|
e@0
|
56 annotation = annotation.split()[1]
|
e@0
|
57
|
e@0
|
58 dictionary[obj.variable] = annotation
|
e@0
|
59
|
e@0
|
60 if obj.type == 'Character':
|
e@0
|
61 characters[annotation] = {}
|
e@0
|
62 elif obj.type == 'Character_Line':
|
e@0
|
63 character_lines[annotation] = {}
|
e@0
|
64 elif obj.type == 'Place':
|
e@0
|
65 places[annotation] = {}
|
e@0
|
66
|
e@0
|
67 for line in parsed:
|
e@0
|
68 obj = line[0]
|
e@0
|
69 if isinstance(obj, ner.AttributeTuple):
|
e@0
|
70 # If it is an instance of an attribute tuple,
|
e@0
|
71 # find out whether it is a gender assignment, then find
|
e@0
|
72 # the character it refers to and add the gender as attribute
|
e@0
|
73
|
e@0
|
74 target = dictionary[obj.target]
|
e@0
|
75 value = obj.annotation
|
e@0
|
76
|
e@0
|
77 if obj.type == 'Gender':
|
e@0
|
78 characters[target]['gender'] = value
|
e@0
|
79 elif obj.type == 'Age':
|
e@0
|
80 characters[target]['age'] = value
|
e@0
|
81
|
e@0
|
82
|
e@0
|
83 for line in parsed:
|
e@0
|
84 # Every annotation line has a single object
|
e@0
|
85 obj = line[0]
|
e@0
|
86
|
e@0
|
87 if isinstance(obj, ner.RelationTuple):
|
e@0
|
88
|
e@0
|
89 # Relations have a trigger, a first argument `arg1' and a
|
e@0
|
90 # second argument `arg2'. There are going to be
|
e@0
|
91 # |arg1| * |arg2| relations constructed for each trigger
|
e@0
|
92 # where |arg1| is the number of candidates for argument 1
|
e@0
|
93 # and |arg2| the number of candidates for argument 2
|
e@0
|
94
|
e@0
|
95 arg1_candidates = []
|
e@0
|
96 arg2_candidates = []
|
e@0
|
97
|
e@0
|
98 # Check relation's arguments:
|
e@0
|
99 for arg in obj.args:
|
e@0
|
100 if arg.label == 'Says':
|
e@0
|
101 trigger = dictionary[arg.target]
|
e@0
|
102 label = 'Quote'
|
e@0
|
103 elif arg.label == 'Spatial_Signal':
|
e@0
|
104 trigger = dictionary[arg.target]
|
e@0
|
105 label = 'Spatial_Relation'
|
e@0
|
106 if arg.label in ['Trajector', 'WHO']:
|
e@0
|
107 arg1_candidates.append(dictionary[arg.target])
|
e@0
|
108 if arg.label in ['Landmark', 'WHAT']:
|
e@0
|
109 arg2_candidates.append(dictionary[arg.target])
|
e@0
|
110
|
e@0
|
111 for arg1 in arg1_candidates:
|
e@0
|
112 for arg2 in arg2_candidates:
|
e@0
|
113 relation = (trigger, arg1, arg2, label)
|
e@0
|
114 if label == 'Quote':
|
e@0
|
115 character_lines[arg2]['who'] = arg1
|
e@0
|
116 if label == 'Spatial_Relation':
|
e@0
|
117 scenes.append(arg2)
|
e@0
|
118
|
e@0
|
119 # Generate cast list
|
e@0
|
120 cast_list_section = r"""Cast List:
|
e@0
|
121 Narrator - male or female - panned center
|
e@0
|
122 """
|
e@0
|
123
|
e@0
|
124 # Ping - pong the characters
|
e@0
|
125 panned = 'right'
|
e@0
|
126 for c in characters:
|
e@0
|
127 if 'gender' not in characters[c]:
|
e@0
|
128 gender = 'male or female'
|
e@0
|
129 else:
|
e@0
|
130 gender = characters[c]['gender'].lower()
|
e@0
|
131
|
e@0
|
132 cast_list_section += '{} - {} - panned {}\n'.format(c.capitalize(), gender, panned)
|
e@0
|
133 if panned == 'right':
|
e@0
|
134 panned = 'left'
|
e@0
|
135 else:
|
e@0
|
136 panned = 'right'
|
e@0
|
137
|
e@0
|
138
|
e@0
|
139 scenes_definition = r"""Scenes:
|
e@0
|
140 """
|
e@0
|
141
|
e@0
|
142 for n, scene in enumerate(scenes):
|
e@0
|
143 scenes_definition += "{} - {} - fxive:{} - none".format(n+1, scene, scene)
|
e@0
|
144
|
e@0
|
145 # Scene introduction
|
e@0
|
146 ## TODO: Do it so that scenes follow the text
|
e@0
|
147
|
e@0
|
148 # Keep the correct order in lines
|
e@0
|
149 lines_order = [qq for qq in quotesdict]
|
e@0
|
150
|
e@0
|
151 # The lines are of the format <*line0> <*line1> etc,
|
e@0
|
152 # sort them based on the number just before the closing >
|
e@0
|
153 lines_order = sorted(lines_order, key=lambda x: int(x[-3]))
|
e@0
|
154 lines_section = r"""Script:
|
e@0
|
155 --- Scene 1 ---
|
e@0
|
156 """
|
e@0
|
157
|
e@0
|
158 for l in lines_order:
|
e@0
|
159 if l[1] == 'n':
|
e@0
|
160 lines_section += "[Narrator] {}\n".format(quotesdict[l])
|
e@0
|
161 elif l[1] == 'c':
|
e@0
|
162 lines_section += "[{}] {}\n".format(character_lines[l[:-1]]['who'].capitalize(), quotesdict[l])
|
e@0
|
163
|
e@0
|
164 script = cast_list_section + '\n' + scenes_definition + '\n' + lines_section
|
e@0
|
165
|
e@0
|
166
|
e@0
|
167
|
e@0
|
168 # Create transcript
|
e@0
|
169
|
e@0
|
170 lines = []
|
e@0
|
171
|
e@0
|
172 for cline in quotesdict:
|
e@0
|
173 ldict = {}
|
e@0
|
174
|
e@0
|
175 cline = cline[:-1] # Remove the trailing dot
|
e@0
|
176
|
e@0
|
177 if cline[1] == 'c':
|
e@0
|
178 ldict['cast'] = character_lines[cline]['who'].capitalize()
|
e@0
|
179 else:
|
e@0
|
180 ldict['cast'] = 'Narrator'
|
e@0
|
181
|
e@0
|
182 lineno = cline.replace('<', '').replace('>', '')
|
e@0
|
183 ldict['filename'] = '{}.wav'.format(lineno)
|
e@0
|
184 ldict['line'] = quotesdict['{}.'.format(cline)]
|
e@0
|
185 lines.append(ldict)
|
e@0
|
186
|
e@0
|
187 # Create sfx dataframe
|
e@0
|
188
|
e@0
|
189 sfx = []
|
e@0
|
190 for scene in scenes:
|
e@0
|
191 sfx_dict = {'sfx': scene, 'url':'http://edit_me'}
|
e@0
|
192 sfx.append(sfx_dict)
|
e@0
|
193
|
e@0
|
194 sfx_df = pd.DataFrame.from_records(sfx)
|
e@0
|
195
|
e@0
|
196 transcript_df = pd.DataFrame.from_records(lines)
|
e@0
|
197 return script, transcript_df, sfx_df
|
e@0
|
198
|
e@0
|
199
|
e@0
|
200 if __name__ == "__main__":
|
e@0
|
201 argparser = argparse.ArgumentParser()
|
e@0
|
202 argparser.add_argument('input_annotation_path',
|
e@0
|
203 help='.ann file with annotation')
|
e@0
|
204
|
e@0
|
205 argparser.add_argument('input_json_path',
|
e@0
|
206 help='.json file containing the character quotes')
|
e@0
|
207
|
e@0
|
208 args = argparser.parse_args()
|
e@0
|
209
|
e@0
|
210 # Load annotation and quotes dictionary
|
e@0
|
211 with open(args.input_annotation_path) as f:
|
e@0
|
212 annot = f.read()
|
e@0
|
213
|
e@0
|
214 with open(args.input_json_path) as f:
|
e@0
|
215 quotesdict = json.load(f)
|
e@0
|
216
|
e@0
|
217 script, transcript_df, sfx_df = annotation2script(annot, quotesdict)
|
e@0
|
218
|
e@0
|
219 output_path = args.input_annotation_path[:-4] + '_script.txt'
|
e@0
|
220
|
e@0
|
221 with open(output_path, 'w') as f:
|
e@0
|
222 f.write(script)
|
e@0
|
223
|
e@0
|
224 transcript_df.to_excel(os.path.join(os.path.dirname(args.input_annotation_path),'transcript.xls'))
|
e@0
|
225 sfx_df.to_excel(os.path.join(os.path.dirname(args.input_annotation_path), 'sfx.xls'))
|
e@0
|
226
|