e@0
|
1 import difflib
|
e@0
|
2 from sklearn.externals import joblib
|
e@0
|
3 from collections import defaultdict
|
e@0
|
4 import nltk
|
e@0
|
5 import numpy as np
|
e@0
|
6 import re
|
e@0
|
7 import librosa
|
e@0
|
8 import glob
|
e@0
|
9 import pandas as pd
|
e@0
|
10 from nltk.stem import porter
|
e@0
|
11 import sox
|
e@0
|
12 from scipy.io.wavfile import read as wavread
|
e@0
|
13 from scipy.io.wavfile import write as wavwrite
|
e@0
|
14 from numpy.core._internal import _gcd as gcd
|
e@0
|
15 from rtsfx import *
|
e@0
|
16
|
e@0
|
17 import subprocess
|
e@0
|
18 import os
|
e@0
|
19 import pypeg2 as pg
|
e@0
|
20 import random
|
e@0
|
21
|
e@0
|
22 VOICE_PROPERTIES = ['slow', 'deep', 'fast', 'stuttering']
|
e@0
|
23 PAUSE_PROPERTIES = ['short', 'long']
|
e@0
|
24 SEPARATORS = [ "[", "]", "(", ")", ":", "-"]
|
e@0
|
25 SFX_MOD_PROPERTIES = ['quiet', 'loud', 'silent']
|
e@0
|
26
|
e@0
|
27
|
e@0
|
28 FIXED_VOICES = False
|
e@0
|
29
|
e@0
|
30 FMV = 0
|
e@0
|
31 FFV = 0
|
e@0
|
32
|
e@0
|
33
|
e@0
|
34 male_voices = r"""
|
e@0
|
35 cmu_us_ahw_cg
|
e@0
|
36 cmu_us_awb_cg
|
e@0
|
37 cmu_us_bdl_cg
|
e@0
|
38 cmu_us_fem_cg
|
e@0
|
39 cmu_us_jmk_cg
|
e@0
|
40 cmu_us_ksp_cg
|
e@0
|
41 cmu_us_rms_cg
|
e@0
|
42 """.split()
|
e@0
|
43
|
e@0
|
44
|
e@0
|
45 female_voices = r"""
|
e@0
|
46 cmu_us_aup_cg
|
e@0
|
47 cmu_us_axb_cg
|
e@0
|
48 cmu_us_clb_cg
|
e@0
|
49 cmu_us_gka_cg
|
e@0
|
50 cmu_us_rxr_cg
|
e@0
|
51 cmu_us_slt_cg
|
e@0
|
52 """.split()
|
e@0
|
53
|
e@0
|
54
|
e@0
|
55
|
e@0
|
56 # male_voices = r"""
|
e@0
|
57 # cmu_us_ahw_cg
|
e@0
|
58 # cmu_us_fem_cg
|
e@0
|
59 # cmu_us_rms_cg
|
e@0
|
60 # """.split()
|
e@0
|
61 #
|
e@0
|
62 #
|
e@0
|
63 # female_voices = r"""
|
e@0
|
64 # cmu_us_aup_cg
|
e@0
|
65 # cmu_us_axb_cg
|
e@0
|
66 # cmu_us_rxr_cg
|
e@0
|
67 # cmu_us_slt_cg
|
e@0
|
68 # """.split()
|
e@0
|
69
|
e@0
|
70 fixed_male_voice = male_voices
|
e@0
|
71 fixed_female_voice = female_voices
|
e@0
|
72
|
e@0
|
73
|
e@0
|
74 # male_voices = r"""
|
e@0
|
75 # cmu_us_ahw_cg
|
e@0
|
76 # cmu_us_awb_cg
|
e@0
|
77 # cmu_us_bdl_cg
|
e@0
|
78 # cmu_us_fem_cg
|
e@0
|
79 # cmu_us_jmk_cg
|
e@0
|
80 # cmu_us_ksp_cg
|
e@0
|
81 # cmu_us_rms_cg
|
e@0
|
82 # """.split()
|
e@0
|
83
|
e@0
|
84
|
e@0
|
85 # male_voices = r"""
|
e@0
|
86 # cmu_us_ahw_cg
|
e@0
|
87 # """.split()
|
e@0
|
88 #
|
e@0
|
89 #
|
e@0
|
90 # female_voices = r"""
|
e@0
|
91 # cmu_us_ahw_cg
|
e@0
|
92 # """.split()
|
e@0
|
93
|
e@0
|
94 import matplotlib.pyplot as plt
|
e@0
|
95
|
e@0
|
96 def generate_speech_with_festival(voice,
|
e@0
|
97 panning,
|
e@0
|
98 line,
|
e@0
|
99 sr=None
|
e@0
|
100 ):
|
e@0
|
101 """
|
e@0
|
102 Used for speech generation
|
e@0
|
103 Constructs a festival .sable file
|
e@0
|
104 and runs it through festival.
|
e@0
|
105
|
e@0
|
106 """
|
e@0
|
107 header = r"""
|
e@0
|
108 <?xml version="1.0"?>
|
e@0
|
109 <!DOCTYPE SABLE PUBLIC "-//SABLE//DTD SABLE speech mark up//EN"
|
e@0
|
110 "Sable.v0_2.dtd"
|
e@0
|
111 []>
|
e@0
|
112 <SABLE>
|
e@0
|
113 <SPEAKER NAME="{}">
|
e@0
|
114 """.format(voice)
|
e@0
|
115
|
e@0
|
116 footer = r"""
|
e@0
|
117 </SPEAKER>
|
e@0
|
118 </SABLE>
|
e@0
|
119 """
|
e@0
|
120
|
e@0
|
121 # 0. Construct sable file
|
e@0
|
122 sable = header + line + footer
|
e@0
|
123
|
e@0
|
124 # 1. Save sable file to a temporary .sable file in tmp
|
e@0
|
125
|
e@0
|
126 with open('/tmp/character_line.sable', 'w') as f:
|
e@0
|
127 f.write(sable)
|
e@0
|
128
|
e@0
|
129 # 2. Call process to festival
|
e@0
|
130 cmd = 'text2wave /tmp/character_line.sable -o /tmp/character_line.wav'
|
e@0
|
131
|
e@0
|
132 print("Generating speech for line: '{}' with voice '{}' and panning '{}' ".format(line, voice, panning))
|
e@0
|
133 value = subprocess.call(cmd, shell=True)
|
e@0
|
134
|
e@0
|
135 if value != 0:
|
e@0
|
136 raise RuntimeError("Festival failed to execute.")
|
e@0
|
137
|
e@0
|
138 # 3. Load back wave file
|
e@0
|
139 if sr is None:
|
e@0
|
140 wav, sr = librosa.load('/tmp/character_line.wav', mono=True)
|
e@0
|
141 else:
|
e@0
|
142 wav, sr = librosa.load('/tmp/character_line.wav', sr=sr, mono=True)
|
e@0
|
143
|
e@0
|
144 audio = np.vstack([panning*wav,(1.-panning)*wav])
|
e@0
|
145 #
|
e@0
|
146 # plt.figure()
|
e@0
|
147 # plt.plot(audio[0,:])
|
e@0
|
148 # plt.figure()
|
e@0
|
149 # plt.plot(audio[1,:])
|
e@0
|
150 # plt.show()
|
e@0
|
151 return audio, sr
|
e@0
|
152
|
e@0
|
153
|
e@0
|
154 def substr_features(sent,
|
e@0
|
155 lower=True, substr=[]):
|
e@0
|
156 if lower:
|
e@0
|
157 sent = sent.lower()
|
e@0
|
158 freqs = defaultdict(int)
|
e@0
|
159 for ss in substr:
|
e@0
|
160 if ss in sent:
|
e@0
|
161 freqs[ss] = 1
|
e@0
|
162 return dict(freqs)
|
e@0
|
163
|
e@0
|
164
|
e@0
|
165 def features_dict_to_matrix(features, feature_labels):
|
e@0
|
166 N = len(features)
|
e@0
|
167 M = len(feature_labels)
|
e@0
|
168 arr = np.zeros((N, M))
|
e@0
|
169
|
e@0
|
170 idx_to_feat = list(feature_labels)
|
e@0
|
171 feat_to_idx = dict((idx_to_feat[k], k) for k in range(len(idx_to_feat)))
|
e@0
|
172
|
e@0
|
173 for n in range(arr.shape[0]):
|
e@0
|
174 for m in range(arr.shape[1]):
|
e@0
|
175 if idx_to_feat[m] in features[n]:
|
e@0
|
176 arr[n, m] = features[n][idx_to_feat[m]]
|
e@0
|
177
|
e@0
|
178 return arr, list(feat_to_idx.keys())
|
e@0
|
179
|
e@0
|
180
|
e@0
|
181 def similar(text1, text2,
|
e@0
|
182 threshold=0.7 # threshold for similarity
|
e@0
|
183 ):
|
e@0
|
184 """ Tests whether two strings are similar """
|
e@0
|
185
|
e@0
|
186 ratio = difflib.SequenceMatcher(None, text1.lower(), text2.lower()).ratio()
|
e@0
|
187 return ratio >= threshold
|
e@0
|
188
|
e@0
|
189
|
e@0
|
190 class Master():
|
e@0
|
191 def __init__(self, downmix):
|
e@0
|
192 self.downmix = downmix
|
e@0
|
193
|
e@0
|
194
|
e@0
|
195 def get_mastered(self):
|
e@0
|
196 # Creating transformer
|
e@0
|
197 tfm = sox.Transformer()
|
e@0
|
198
|
e@0
|
199 # Removing everything below 80hz
|
e@0
|
200 tfm.highpass(80)
|
e@0
|
201
|
e@0
|
202 # Adding a notch filter at 200hz to improve clarity
|
e@0
|
203 tfm.bandreject(200)
|
e@0
|
204
|
e@0
|
205 # Loudness control for under -9dB
|
e@0
|
206 tfm.loudness(gain_db=-9)
|
e@0
|
207
|
e@0
|
208 # Store downmix temporarily
|
e@0
|
209 librosa.output.write_wav('/tmp/downmix_unnormalized.wav', self.downmix, sr=44100, norm=False)
|
e@0
|
210 tfm.build('/tmp/downmix_unnormalized.wav', '/tmp/downmix_normalized.wav')
|
e@0
|
211
|
e@0
|
212 # Load downmix
|
e@0
|
213 mastered = librosa.core.load('/tmp/downmix_normalized.wav', sr=44100,mono=False )[0]
|
e@0
|
214 return mastered
|
e@0
|
215
|
e@0
|
216
|
e@0
|
217 class Mixer():
|
e@0
|
218 def __init__(self, multitrack):
|
e@0
|
219 self.multitrack = multitrack
|
e@0
|
220
|
e@0
|
221 def get_downmix(self):
|
e@0
|
222
|
e@0
|
223 # Just a trick to get the length of the first track
|
e@0
|
224 if 'background' in self.multitrack:
|
e@0
|
225 D = self.multitrack['background'].shape[1]
|
e@0
|
226 else:
|
e@0
|
227 for track in self.multitrack:
|
e@0
|
228 D = self.multitrack[track].shape[1]
|
e@0
|
229 break
|
e@0
|
230
|
e@0
|
231 downmix = np.zeros((2, D))
|
e@0
|
232 for ttrack in self.multitrack:
|
e@0
|
233
|
e@0
|
234 #1. Normalize
|
e@0
|
235
|
e@0
|
236 track = self.multitrack[ttrack]
|
e@0
|
237
|
e@0
|
238 max_val = np.max(np.abs(track))
|
e@0
|
239
|
e@0
|
240 if max_val > 0:
|
e@0
|
241 track /= max_val
|
e@0
|
242
|
e@0
|
243 if ttrack == 'background':
|
e@0
|
244 track *= 0.05
|
e@0
|
245
|
e@0
|
246
|
e@0
|
247 downmix += track
|
e@0
|
248
|
e@0
|
249 return downmix
|
e@0
|
250
|
e@0
|
251
|
e@0
|
252 def zafar(lx, rx, d1, g1, m, fc, G, da=0.007, fs=44100.):
|
e@0
|
253 """ Rafii & Pardo Reverberator (2009) controlled by High Level parameters
|
e@0
|
254 Inputs:
|
e@0
|
255 lx : left channel input
|
e@0
|
256 rx : right channel input
|
e@0
|
257 d1 : delay of first comb filter in seconds
|
e@0
|
258 g1 : gain of first comb filters
|
e@0
|
259 da : delay of allpass filter in seconds
|
e@0
|
260 G : dry/wet mix gain
|
e@0
|
261 fc : lowpass filter cuttoff Hz
|
e@0
|
262 m : difference between left and right channel phases
|
e@0
|
263 fs : sampling rate
|
e@0
|
264
|
e@0
|
265 Outputs:
|
e@0
|
266 ly: left channel output
|
e@0
|
267 ry: right channel output
|
e@0
|
268 """
|
e@0
|
269
|
e@0
|
270 d1 = int(d1 * fs)
|
e@0
|
271 m = int(m * fs)
|
e@0
|
272 da = int(da * fs)
|
e@0
|
273
|
e@0
|
274 def calculate_parameters(d1, g1):
|
e@0
|
275
|
e@0
|
276 d2 = int(round((1.5) ** (-1) * d1))
|
e@0
|
277
|
e@0
|
278 while gcd(d2, d1) != 1:
|
e@0
|
279 d2 += 1
|
e@0
|
280
|
e@0
|
281 d3 = int(round((1.5) ** (-2) * d1))
|
e@0
|
282
|
e@0
|
283 while gcd(d3, d2) != 1 or gcd(d3, d1) != 1:
|
e@0
|
284 d3 += 1
|
e@0
|
285
|
e@0
|
286 d4 = int(round((1.5) ** (-3) * d1))
|
e@0
|
287
|
e@0
|
288 while gcd(d4, d3) != 1 or gcd(d4, d2) != 1 or gcd(d4, d1) != 1:
|
e@0
|
289 d4 += 1
|
e@0
|
290
|
e@0
|
291 d5 = int(round((1.5) ** (-4) * d1))
|
e@0
|
292
|
e@0
|
293 while gcd(d5, d4) != 1 or gcd(d5, d3) != 1 or gcd(d5, d2) != 1 or gcd(d5, d1) != 1:
|
e@0
|
294 d5 += 1
|
e@0
|
295
|
e@0
|
296 d6 = int(round((1.5) ** (-5) * d1))
|
e@0
|
297 while gcd(d6, d5) != 1 or gcd(d6, d4) != 1 or gcd(d6, d3) != 1 or gcd(d6, d2) != 1 or gcd(d6, d1) != 1:
|
e@0
|
298 d6 += 1
|
e@0
|
299 g2 = g1 ** (1.5) ** (-1) * g1
|
e@0
|
300 g3 = g1 ** (1.5) ** (-2) * g1
|
e@0
|
301 g4 = g1 ** (1.5) ** (-3) * g1
|
e@0
|
302 g5 = g1 ** (1.5) ** (-4) * g1
|
e@0
|
303 g6 = g1 ** (1.5) ** (-5) * g1
|
e@0
|
304
|
e@0
|
305 return (d1, d2, d3, d4, d5, d6, g1, g2, g3, g4, g5, g6)
|
e@0
|
306
|
e@0
|
307 def comb_array(x, g1, d1):
|
e@0
|
308
|
e@0
|
309 (d1, d2, d3, d4, d5, d6, g1, g2, g3, g4, g5, g6) = calculate_parameters(d1, g1)
|
e@0
|
310
|
e@0
|
311 c1out = comb(x, g1, d1)
|
e@0
|
312 c2out = comb(x, g2, d2)
|
e@0
|
313 c3out = comb(x, g3, d3)
|
e@0
|
314 c4out = comb(x, g4, d4)
|
e@0
|
315 c5out = comb(x, g5, d5)
|
e@0
|
316 c6out = comb(x, g6, d6)
|
e@0
|
317
|
e@0
|
318 Lc1 = len(c1out)
|
e@0
|
319 Lc2 = len(c2out)
|
e@0
|
320 Lc3 = len(c3out)
|
e@0
|
321 Lc4 = len(c4out)
|
e@0
|
322 Lc5 = len(c5out)
|
e@0
|
323 Lc6 = len(c6out)
|
e@0
|
324
|
e@0
|
325 Lc = max(Lc1, Lc2, Lc3, Lc4, Lc5, Lc6)
|
e@0
|
326
|
e@0
|
327 y = np.zeros((Lc,))
|
e@0
|
328
|
e@0
|
329 y[0:Lc1] = c1out
|
e@0
|
330 y[0:Lc2] += c2out
|
e@0
|
331 y[0:Lc3] += c3out
|
e@0
|
332 y[0:Lc4] += c4out
|
e@0
|
333 y[0:Lc5] += c5out
|
e@0
|
334 y[0:Lc6] += c6out
|
e@0
|
335
|
e@0
|
336 return y
|
e@0
|
337
|
e@0
|
338 def comb(x, g, d):
|
e@0
|
339 LEN = len(x) + d
|
e@0
|
340 # print d
|
e@0
|
341 y = np.zeros((LEN,))
|
e@0
|
342 for n in range(0, LEN):
|
e@0
|
343 if n - d < 0:
|
e@0
|
344 y[n] = 0
|
e@0
|
345 else:
|
e@0
|
346 y[n] = x[n - d] + g * y[n - d]
|
e@0
|
347
|
e@0
|
348 return y
|
e@0
|
349
|
e@0
|
350 def allpass(x, g, d):
|
e@0
|
351 LENx = len(x)
|
e@0
|
352 LENy = LENx + d
|
e@0
|
353 y = np.zeros((LENy,))
|
e@0
|
354 for n in range(0, LENy):
|
e@0
|
355 if n - d < 0:
|
e@0
|
356 y[n] = -g * x[n]
|
e@0
|
357 elif n >= LENx:
|
e@0
|
358 y[n] = x[n - d] + g * y[n - d]
|
e@0
|
359 else:
|
e@0
|
360 y[n] = x[n - d] - g * x[n] + g * y[n - d]
|
e@0
|
361
|
e@0
|
362 return y
|
e@0
|
363
|
e@0
|
364 def lowpass(x, g):
|
e@0
|
365 LEN = len(x)
|
e@0
|
366 y = np.zeros((LEN,))
|
e@0
|
367
|
e@0
|
368 for n in range(0, LEN):
|
e@0
|
369 if n - 1 < 0:
|
e@0
|
370 y[n] = (1 - g) * x[n]
|
e@0
|
371 else:
|
e@0
|
372 y[n] = (1 - g) * x[n] + g * y[n - 1]
|
e@0
|
373
|
e@0
|
374 return y
|
e@0
|
375
|
e@0
|
376 ga = 1. / np.sqrt(2.)
|
e@0
|
377
|
e@0
|
378 cin = 0.5 * lx + 0.5 * rx
|
e@0
|
379 cout = comb_array(cin, g1, d1)
|
e@0
|
380
|
e@0
|
381 ra = allpass(cout, ga, da + m // 2)
|
e@0
|
382 la = allpass(cout, ga, da - m // 2)
|
e@0
|
383
|
e@0
|
384 gc = 2 - np.cos(2 * np.pi * fc / fs) - np.sqrt((np.cos(2 * np.pi * fc / fs) - 2) ** 2 - 1)
|
e@0
|
385
|
e@0
|
386 ral = lowpass(ra, gc)
|
e@0
|
387 lal = lowpass(la, gc)
|
e@0
|
388
|
e@0
|
389 ralg = G * ral
|
e@0
|
390 lalg = G * lal
|
e@0
|
391
|
e@0
|
392 ry = ralg[0:len(rx)] + (1 - G) * rx
|
e@0
|
393 ly = lalg[0:len(lx)] + (1 - G) * lx
|
e@0
|
394
|
e@0
|
395 return np.vstack([ry, ly])
|
e@0
|
396
|
e@0
|
397 def get_reverb_from_tags(xl, xr, tags, fs=44100):
|
e@0
|
398 reverb_csv = 'contributions.csv'
|
e@0
|
399 df = pd.read_csv(reverb_csv)
|
e@0
|
400 df = df.fillna("")
|
e@0
|
401 params = []
|
e@0
|
402 for n in range(len(df)):
|
e@0
|
403 if all([t in df['agreed'].iloc[n].split(',') for t in tags]):
|
e@0
|
404 params.append(df['param'].iloc[n])
|
e@0
|
405 d1, g1, m, fc, G = [float(f) for f in params[0].split(',')]
|
e@0
|
406 y = zafar(xl, xr, d1, g1, m, fc, G, fs=fs)
|
e@0
|
407 return y
|
e@0
|
408
|
e@0
|
409
|
e@0
|
410 def fade(x, fade_in, fade_out, sr=44100):
|
e@0
|
411 """
|
e@0
|
412 Creates a fade-in-fade-out envelope
|
e@0
|
413 for audio array x.
|
e@0
|
414 """
|
e@0
|
415
|
e@0
|
416 if len(x) == 0:
|
e@0
|
417 return x
|
e@0
|
418
|
e@0
|
419 fade_in_samples = int(fade_in * sr)
|
e@0
|
420 fade_out_samples = int(fade_out * sr)
|
e@0
|
421
|
e@0
|
422 outp = np.ones_like(x)
|
e@0
|
423 for n in range(fade_in_samples):
|
e@0
|
424 outp[n] = n * 1. / fade_in_samples
|
e@0
|
425
|
e@0
|
426 for n in range(fade_out_samples):
|
e@0
|
427 outp[len(outp) - fade_out_samples + n] = 1 - 1. / fade_out_samples * n
|
e@0
|
428 return outp * x
|
e@0
|
429
|
e@0
|
430
|
e@0
|
431 def slope(x, slope_in, slope_out, delay=1.0, v=0.1, sr=44100):
|
e@0
|
432 """
|
e@0
|
433 Creates a slope in slope out envelope
|
e@0
|
434 """
|
e@0
|
435
|
e@0
|
436 if len(x) == 0:
|
e@0
|
437 return x
|
e@0
|
438
|
e@0
|
439 delay_samples = int(delay * sr)
|
e@0
|
440 slope_in_samples = int(slope_in * sr)
|
e@0
|
441 slope_out_samples = int(slope_out * sr)
|
e@0
|
442
|
e@0
|
443 outp = np.zeros_like(x)
|
e@0
|
444
|
e@0
|
445 for n in range(len(outp)):
|
e@0
|
446 if n >= 0 and n < delay_samples:
|
e@0
|
447 outp[n] = 1.0 - v
|
e@0
|
448 elif n >= delay_samples and n < delay_samples + slope_in_samples:
|
e@0
|
449 outp[n] = (1. - v) - (1. - v) / slope_in_samples * (n - delay_samples)
|
e@0
|
450 elif n >= delay_samples + slope_in_samples and n < len(outp) - delay_samples - slope_out_samples:
|
e@0
|
451 outp[n] = 0
|
e@0
|
452 elif n >= len(outp) - delay_samples - slope_out_samples and n < len(outp) - delay_samples:
|
e@0
|
453 outp[n] = (1. - v) / slope_out_samples * (n - len(outp) + delay_samples + slope_out_samples)
|
e@0
|
454 if outp[n] < 0:
|
e@0
|
455 print(n)
|
e@0
|
456 break
|
e@0
|
457 elif n >= len(outp) - delay_samples:
|
e@0
|
458 outp[n] = 1.0 - v
|
e@0
|
459
|
e@0
|
460 outp += v
|
e@0
|
461
|
e@0
|
462 return outp * x
|
e@0
|
463
|
e@0
|
464
|
e@0
|
465 def get_background(
|
e@0
|
466 fname,
|
e@0
|
467 duration,
|
e@0
|
468 ft=0.5,
|
e@0
|
469 ):
|
e@0
|
470 print(fname)
|
e@0
|
471 bg, sr = librosa.load(fname)
|
e@0
|
472 f_s = int(ft * sr)
|
e@0
|
473 y = bg
|
e@0
|
474 z = np.zeros((duration,))
|
e@0
|
475 if len(y) < len(z):
|
e@0
|
476 y = fade(y, ft, ft, sr)
|
e@0
|
477 for n in range(0, len(z) - len(y), len(y) - f_s):
|
e@0
|
478 z[n:n + len(y)] += y
|
e@0
|
479 n += len(y) - f_s
|
e@0
|
480 if len(y) > len(z[n:]):
|
e@0
|
481 z[n:] += y[:len(z[n:])]
|
e@0
|
482 else:
|
e@0
|
483 z[n:n + len(y)] += y
|
e@0
|
484
|
e@0
|
485 z = fade(z, ft, ft, sr=sr)
|
e@0
|
486
|
e@0
|
487 elif len(y) > len(z):
|
e@0
|
488 z += fade(y[0:len(z)], ft, ft, sr=sr)
|
e@0
|
489 return z
|
e@0
|
490
|
e@0
|
491
|
e@0
|
492 def compose_bg_scene(bgs, background_changes, D, delay=3*44100):
|
e@0
|
493 z = np.zeros((2,D))
|
e@0
|
494 for n in range(len(background_changes)):
|
e@0
|
495 bg_choice = background_changes[n][1]
|
e@0
|
496 start = background_changes[n][0]
|
e@0
|
497 fname = bgs[bg_choice]
|
e@0
|
498 if n < len(background_changes) - 1:
|
e@0
|
499 duration = background_changes[n + 1][0] - background_changes[n][0]
|
e@0
|
500 else:
|
e@0
|
501 duration = D - background_changes[n][0]
|
e@0
|
502
|
e@0
|
503 y = get_background(fname, duration)
|
e@0
|
504 z[0,start:start + len(y)] = y
|
e@0
|
505 z[1, start:start + len(y)] = y
|
e@0
|
506 #z = fade(z, 1., 1.)
|
e@0
|
507 return z
|
e@0
|
508
|
e@0
|
509
|
e@0
|
510 class Director():
|
e@0
|
511 def __init__(self, script, sound_dir, speech_dir):
|
e@0
|
512 """
|
e@0
|
513 Gets a list of script
|
e@0
|
514
|
e@0
|
515 :param sound_dir: directory of sound files
|
e@0
|
516 :param speech_dir: directory of speech files
|
e@0
|
517 :param script: the script
|
e@0
|
518 """
|
e@0
|
519
|
e@0
|
520 # Gets character definitions
|
e@0
|
521
|
e@0
|
522 ## TODO: Change this to also have accents
|
e@0
|
523
|
e@0
|
524 self.voice_params = {}
|
e@0
|
525 self.scene_params = {}
|
e@0
|
526 self.bg_params = {}
|
e@0
|
527
|
e@0
|
528 # This holds the fxive sound engine if available
|
e@0
|
529 self.fxive = None
|
e@0
|
530
|
e@0
|
531 global FFV, FMV
|
e@0
|
532 for d in script['definitions']:
|
e@0
|
533 if d['type'] == 'scene_definition':
|
e@0
|
534 number = int(d['number'])
|
e@0
|
535 tags = d['tags']
|
e@0
|
536 filename = d['filename']
|
e@0
|
537
|
e@0
|
538 # If it starts with fxive: then get the preset from fxive
|
e@0
|
539 if 'fxive:' == filename[:6]:
|
e@0
|
540 print("Fetching sample from fxive...")
|
e@0
|
541 if self.fxive is not None:
|
e@0
|
542 self.bg_params[number] = self.fxive.get_sfx(filename[6:])
|
e@0
|
543 else:
|
e@0
|
544 self.fxive = FXive(sfx_path=os.path.join(sound_dir, 'sfx.xls'))
|
e@0
|
545 self.bg_params[number] = self.fxive.get_sfx(filename[6:])
|
e@0
|
546 else:
|
e@0
|
547 self.bg_params[number] = filename
|
e@0
|
548
|
e@0
|
549 if 'none' in tags:
|
e@0
|
550 self.scene_params[number] = []
|
e@0
|
551 else:
|
e@0
|
552 self.scene_params[number] = tags
|
e@0
|
553
|
e@0
|
554 if d['type'] == 'cast_definition':
|
e@0
|
555 # print("-----------------")
|
e@0
|
556 name = d['name']
|
e@0
|
557 gender = random.sample(d['gender'], 1)[0]
|
e@0
|
558 panning = random.sample(d['panning'], 1)[0]
|
e@0
|
559
|
e@0
|
560 if panning == 'left':
|
e@0
|
561 panning = 0.01
|
e@0
|
562 elif panning == 'right':
|
e@0
|
563 panning = 0.99
|
e@0
|
564 elif panning in ['center', 'centre']:
|
e@0
|
565 panning = 0.5
|
e@0
|
566 #print(gender, panning)
|
e@0
|
567 if gender == 'female':
|
e@0
|
568 # Choose a random female voice
|
e@0
|
569 voice = random.sample(female_voices, 1)[0]
|
e@0
|
570
|
e@0
|
571 if FIXED_VOICES:
|
e@0
|
572 voice = fixed_female_voice[FFV]
|
e@0
|
573 FFV += 1
|
e@0
|
574 else:
|
e@0
|
575 # Choose a random male voice
|
e@0
|
576 voice = random.sample(male_voices, 1)[0]
|
e@0
|
577
|
e@0
|
578 if FIXED_VOICES:
|
e@0
|
579 voice = fixed_male_voice[FMV]
|
e@0
|
580 FMV += 1
|
e@0
|
581
|
e@0
|
582 self.voice_params[name] = (voice, panning)
|
e@0
|
583
|
e@0
|
584 # if character_panning == 0.5:
|
e@0
|
585 # character_panning = 0.1
|
e@0
|
586 # elif character_panning == 0.1:
|
e@0
|
587 # character_panning = 0.9
|
e@0
|
588 # elif character_panning == 0.9:
|
e@0
|
589 # character_panning = 0.1
|
e@0
|
590
|
e@0
|
591
|
e@0
|
592 if self.fxive is not None:
|
e@0
|
593 self.fxive.close()
|
e@0
|
594
|
e@0
|
595 self.script = script
|
e@0
|
596 self.sound_dir = sound_dir
|
e@0
|
597 self.speech_dir = speech_dir
|
e@0
|
598
|
e@0
|
599 self.musicmanager = MusicManager(sound_dir)
|
e@0
|
600 self.pausemanager = PauseManager()
|
e@0
|
601 self.speechmanager = SpeechManager(speech_dir, self.voice_params)
|
e@0
|
602 self.sfxmanager = SoundManager(sound_dir)
|
e@0
|
603
|
e@0
|
604 def get_voice_params(self, name):
|
e@0
|
605 return self.voice_params[name]
|
e@0
|
606
|
e@0
|
607 def generate_multitrack(self):
|
e@0
|
608 # Shift by 4 seconds
|
e@0
|
609 D = 0
|
e@0
|
610 P = []
|
e@0
|
611 track_names = []
|
e@0
|
612
|
e@0
|
613 # print(self.script['script'])
|
e@0
|
614
|
e@0
|
615 current_scene = 1
|
e@0
|
616 current_reverb_tags = ""
|
e@0
|
617
|
e@0
|
618 scene_changes = []
|
e@0
|
619
|
e@0
|
620 # Create a program of scripts
|
e@0
|
621 for s in self.script['script']:
|
e@0
|
622 if s['type'] == 'music':
|
e@0
|
623 name = 'music'
|
e@0
|
624 audio = self.musicmanager.retrieve_music(s)
|
e@0
|
625 elif s['type'] == 'sfx':
|
e@0
|
626 name = s['name'].lower()
|
e@0
|
627 audio = self.sfxmanager.retrieve_sfx(s)
|
e@0
|
628 elif s['type'] == 'scene_change':
|
e@0
|
629 current_scene = int(s['number'])
|
e@0
|
630 #print(current_scene)
|
e@0
|
631 #print(self.scene_params)
|
e@0
|
632 current_reverb_tags = self.scene_params[current_scene]
|
e@0
|
633
|
e@0
|
634 print("Changed to scene {} with reverb tags: {}".format(current_scene, current_reverb_tags))
|
e@0
|
635 scene_changes.append((D, current_scene))
|
e@0
|
636 continue
|
e@0
|
637 elif s['type'] == 'pause':
|
e@0
|
638 name = 'pause'
|
e@0
|
639 audio = self.pausemanager.retrieve_pause(s)
|
e@0
|
640 elif s['type'] == 'cast_line':
|
e@0
|
641 print(s)
|
e@0
|
642 name = s['name'].lower()
|
e@0
|
643 audio = self.speechmanager.retrieve_speech(s)
|
e@0
|
644 if len(current_reverb_tags) > 0:
|
e@0
|
645 print("Applying reverberation with tags: {}".format(current_reverb_tags))
|
e@0
|
646 print(audio.shape)
|
e@0
|
647 if s['name'] != 'Narrator':
|
e@0
|
648 audio = get_reverb_from_tags(audio[0,:], audio[1,:], current_reverb_tags)
|
e@0
|
649
|
e@0
|
650 if name not in track_names:
|
e@0
|
651 track_names.append(name)
|
e@0
|
652 D += audio.shape[1]
|
e@0
|
653 P.append((name,audio))
|
e@0
|
654
|
e@0
|
655 multitrack = {t: np.zeros((2, D)) for t in track_names}
|
e@0
|
656
|
e@0
|
657 print("Composing bg scene")
|
e@0
|
658 multitrack['background'] = compose_bg_scene(self.bg_params, scene_changes, D)
|
e@0
|
659
|
e@0
|
660 idx = 0
|
e@0
|
661 for p in P:
|
e@0
|
662 multitrack[p[0]][:, idx:idx+p[1].shape[1]] = p[1]
|
e@0
|
663 idx += p[1].shape[1]
|
e@0
|
664
|
e@0
|
665 return multitrack
|
e@0
|
666
|
e@0
|
667
|
e@0
|
668
|
e@0
|
669 class Generator():
|
e@0
|
670 def __init__(self):
|
e@0
|
671 pass
|
e@0
|
672
|
e@0
|
673 def generate(self):
|
e@0
|
674 with open('../data/scripts/The Mystery Of Spooky Hill.txt') as f:
|
e@0
|
675 return f.read()
|
e@0
|
676
|
e@0
|
677
|
e@0
|
678 class PauseManager():
|
e@0
|
679 def __init__(self):
|
e@0
|
680 """
|
e@0
|
681 Manages pauses
|
e@0
|
682 """
|
e@0
|
683
|
e@0
|
684 def retrieve_pause(self, input_):
|
e@0
|
685 duration_str = input_['duration']
|
e@0
|
686 if duration_str == 'long':
|
e@0
|
687 duration = 3.0
|
e@0
|
688 elif duration_str == 'short':
|
e@0
|
689 duration = 1.0
|
e@0
|
690
|
e@0
|
691 audio = np.zeros((2, int(duration*44100)))
|
e@0
|
692 return audio
|
e@0
|
693
|
e@0
|
694 class SpeechManager():
|
e@0
|
695 def __init__(self, speech_folder, voice_params):
|
e@0
|
696 """
|
e@0
|
697
|
e@0
|
698 :param speech_folder: the folder the speech .mp3s are
|
e@0
|
699 """
|
e@0
|
700
|
e@0
|
701 self.voice_params = voice_params
|
e@0
|
702 self.speech_folder = speech_folder
|
e@0
|
703 try:
|
e@0
|
704 self.transcriptions = pd.read_excel(os.path.join(speech_folder ,'transcript.xls'))
|
e@0
|
705 except:
|
e@0
|
706 # If the file does not exist
|
e@0
|
707 self.transcriptions = None
|
e@0
|
708
|
e@0
|
709 print('Transcription file:' + str(os.path.join(speech_folder ,'transcript.xls')))
|
e@0
|
710 print('Transcriptions:' + str(self.transcriptions))
|
e@0
|
711
|
e@0
|
712 def retrieve_speech(self, input_):
|
e@0
|
713 # print(input_)
|
e@0
|
714 cast_member = input_['name']
|
e@0
|
715 # print(self.voice_params)
|
e@0
|
716 cast_voice = self.voice_params[cast_member][0] # 0th element is voice
|
e@0
|
717 cast_panning = self.voice_params[cast_member][1] #1th element is panning
|
e@0
|
718
|
e@0
|
719 cast_line = input_['line']
|
e@0
|
720
|
e@0
|
721 can_find_entry = False
|
e@0
|
722
|
e@0
|
723
|
e@0
|
724 # If the file does not exist
|
e@0
|
725 cast_lines_df = self.transcriptions[self.transcriptions['cast'].map(lambda x: x.lower()) == cast_member.lower()]
|
e@0
|
726 similarities = {}
|
e@0
|
727 for n in cast_lines_df.index:
|
e@0
|
728 similarities[n] = difflib.SequenceMatcher(None, cast_line, cast_lines_df['line'].loc[n]).ratio()
|
e@0
|
729
|
e@0
|
730 # Most similar entry location
|
e@0
|
731 chosen_entry = max(similarities, key=lambda x: similarities[x])
|
e@0
|
732 chosen_file = cast_lines_df['filename'].loc[chosen_entry]
|
e@0
|
733 chosen_line = cast_lines_df['line'].loc[chosen_entry]
|
e@0
|
734
|
e@0
|
735 if similar(cast_line, chosen_line):
|
e@0
|
736 can_find_entry = True
|
e@0
|
737
|
e@0
|
738 chosen_file_path = os.path.join(self.speech_folder, chosen_file)
|
e@0
|
739 print("Retrieving: " + chosen_file_path)
|
e@0
|
740
|
e@0
|
741 if os.path.exists(chosen_file_path):
|
e@0
|
742 audio, sr = librosa.core.load(chosen_file_path, sr=44100, mono=False)
|
e@0
|
743 #print("panning: {}".format(cast_panning))
|
e@0
|
744 audio[0,:] *= cast_panning
|
e@0
|
745 audio[1,:] *= (1-cast_panning)
|
e@0
|
746 else:
|
e@0
|
747 can_find_entry = False
|
e@0
|
748
|
e@0
|
749 if not can_find_entry:
|
e@0
|
750 # 1. Generate line
|
e@0
|
751 audio, sr = generate_speech_with_festival(cast_voice, cast_panning, cast_line, sr=44100)
|
e@0
|
752 # print("panning: {}".format(cast_panning))
|
e@0
|
753 # audio[0,:] *= cast_panning
|
e@0
|
754 # audio[1,:] *= (1-cast_panning)
|
e@0
|
755
|
e@0
|
756
|
e@0
|
757
|
e@0
|
758 # If the line is too disimilar, synthesize it, else use the chosen line
|
e@0
|
759 return audio
|
e@0
|
760
|
e@0
|
761
|
e@0
|
762 class SoundManager():
|
e@0
|
763 def __init__(self, sound_folder):
|
e@0
|
764 """
|
e@0
|
765
|
e@0
|
766 :param sound_folder: the folder the music .mp3s are
|
e@0
|
767 """
|
e@0
|
768
|
e@0
|
769
|
e@0
|
770 self.sound_folder = sound_folder
|
e@0
|
771 self.sound_file_names = [f.split('/')[-1] for f in glob.glob(sound_folder + '*.mp3')]
|
e@0
|
772
|
e@0
|
773 # If the directory is empty, return.
|
e@0
|
774 if len(self.sound_file_names) == 0:
|
e@0
|
775 return
|
e@0
|
776 # Lookup strings
|
e@0
|
777 strings = []
|
e@0
|
778 for f in self.sound_file_names:
|
e@0
|
779 strings.append(" ".join(re.findall('[A-Za-z]+', f)).lower())
|
e@0
|
780
|
e@0
|
781 # Sanitize strings, remove the most common substring
|
e@0
|
782
|
e@0
|
783 # Find most common substring
|
e@0
|
784 string1 = strings[0]
|
e@0
|
785 for n in range(1, len(strings)):
|
e@0
|
786 string2 = strings[n]
|
e@0
|
787 match = difflib.SequenceMatcher(None, string1, string2).find_longest_match(0, len(string1), 0, len(string2))
|
e@0
|
788 string1 = string2[match.b:match.b + match.size]
|
e@0
|
789
|
e@0
|
790 # Remove most common substring
|
e@0
|
791 ## TODO: Check here please, should we remove it?
|
e@0
|
792
|
e@0
|
793 # strings = [s.replace(string1, '') for s in strings]
|
e@0
|
794 self.lookup = strings
|
e@0
|
795
|
e@0
|
796 def retrieve_sfx(self, input_):
|
e@0
|
797 """
|
e@0
|
798
|
e@0
|
799 :param query: dictionary object from parser
|
e@0
|
800 :return: audio matrix containing audio file
|
e@0
|
801 """
|
e@0
|
802
|
e@0
|
803 query = input_['name'].lower()
|
e@0
|
804 # Lematize words before checking for similarity
|
e@0
|
805 stemmer = porter.PorterStemmer()
|
e@0
|
806
|
e@0
|
807 qwords = [stemmer.stem(q).lower() for q in query.split()]
|
e@0
|
808 similarities = []
|
e@0
|
809
|
e@0
|
810 # If the words in the query are available in the words in the filename, then increase by 1. Finally,
|
e@0
|
811 # divide by the total number of words (Jaccard similarity?)
|
e@0
|
812
|
e@0
|
813 for s in self.lookup:
|
e@0
|
814
|
e@0
|
815 words = [stemmer.stem(w).lower() for w in s.split()]
|
e@0
|
816 similarities.append(0.)
|
e@0
|
817
|
e@0
|
818 for qw in qwords:
|
e@0
|
819 for w in words:
|
e@0
|
820 similarities[-1] += difflib.SequenceMatcher(None, qw, w).ratio()
|
e@0
|
821
|
e@0
|
822 similarities[-1]/=float(len(words))
|
e@0
|
823
|
e@0
|
824 # This is argmax
|
e@0
|
825 chosen = [n for n in range(len(similarities)) if similarities[n] == max(similarities)][0]
|
e@0
|
826 chosen_fname = self.sound_folder + self.sound_file_names[chosen]
|
e@0
|
827 audio = librosa.core.load(chosen_fname, sr=44100, mono=False)
|
e@0
|
828 return audio[0]
|
e@0
|
829
|
e@0
|
830
|
e@0
|
831 class MusicManager():
|
e@0
|
832 def __init__(self, sound_folder):
|
e@0
|
833 """
|
e@0
|
834
|
e@0
|
835 :param sound_folder: the folder the music .mp3s are
|
e@0
|
836 """
|
e@0
|
837
|
e@0
|
838 self.sound_folder = sound_folder
|
e@0
|
839 self.sound_file_names = [f.split('/')[-1] for f in glob.glob(sound_folder + '/*.mp3')]
|
e@0
|
840
|
e@0
|
841 # If the directory is empty, return.
|
e@0
|
842 if len(self.sound_file_names) == 0:
|
e@0
|
843 return
|
e@0
|
844
|
e@0
|
845 # Lookup strings
|
e@0
|
846 strings = []
|
e@0
|
847 for f in self.sound_file_names:
|
e@0
|
848 strings.append(" ".join(re.findall('[A-Za-z]+', f)).lower())
|
e@0
|
849
|
e@0
|
850 # Sanitize strings, remove the most common substring
|
e@0
|
851
|
e@0
|
852 # Find most common substring
|
e@0
|
853 string1 = strings[0]
|
e@0
|
854 for n in range(1, len(strings)):
|
e@0
|
855 string2 = strings[n]
|
e@0
|
856 match = difflib.SequenceMatcher(None, string1, string2).find_longest_match(0, len(string1), 0, len(string2))
|
e@0
|
857 string1 = string2[match.b:match.b + match.size]
|
e@0
|
858
|
e@0
|
859 # Remove most common substring
|
e@0
|
860 strings = [s.replace(string1, '') for s in strings]
|
e@0
|
861 self.lookup = strings
|
e@0
|
862
|
e@0
|
863 def retrieve_music(self, input_):
|
e@0
|
864 """
|
e@0
|
865
|
e@0
|
866 :param query: dictionary object from parser
|
e@0
|
867 :return: audio matrix containing audio file
|
e@0
|
868 """
|
e@0
|
869
|
e@0
|
870 query = input_['name'].lower() + ' music'
|
e@0
|
871
|
e@0
|
872 similarities = []
|
e@0
|
873
|
e@0
|
874 # If the words in the query are available in the words in the filename, then increase by 1. Finally,
|
e@0
|
875 # divide by the total number of words (Jaccard similarity?)
|
e@0
|
876
|
e@0
|
877 for s in self.lookup:
|
e@0
|
878 qwords = query.split()
|
e@0
|
879 words = s.split()
|
e@0
|
880 similarities.append(0.)
|
e@0
|
881
|
e@0
|
882 for qw in qwords:
|
e@0
|
883 if qw in words:
|
e@0
|
884 similarities[-1] += 1.
|
e@0
|
885
|
e@0
|
886 similarities[-1]/=float(len(words))
|
e@0
|
887
|
e@0
|
888 # This is argmax
|
e@0
|
889 chosen = [n for n in range(len(similarities)) if similarities[n] == max(similarities)][0]
|
e@0
|
890 chosen_fname = self.sound_folder + self.sound_file_names[chosen]
|
e@0
|
891 audio = librosa.core.load(chosen_fname, sr=44100, mono=False)
|
e@0
|
892 return audio[0]
|
e@0
|
893
|
e@0
|
894
|
e@0
|
895 # Classes for aiding parsing
|
e@0
|
896 class Environment:
|
e@0
|
897 def __init__(self, varname, name):
|
e@0
|
898 self.name = name
|
e@0
|
899 self.varname = varname
|
e@0
|
900
|
e@0
|
901 def to_json(self):
|
e@0
|
902 return {"type": "environment_definition", "name": self.name}
|
e@0
|
903
|
e@0
|
904
|
e@0
|
905 class Sound_Effect:
|
e@0
|
906 def __init__(self, varname, name, pos):
|
e@0
|
907 self.name = name
|
e@0
|
908 self.varname = varname
|
e@0
|
909 self.keywords = [kw for kw in name.split()]
|
e@0
|
910
|
e@0
|
911 # Set the end to pos-1 so the first
|
e@0
|
912 # character of the next line won't be ommited
|
e@0
|
913
|
e@0
|
914 self.pos = (pos, pos - 1)
|
e@0
|
915
|
e@0
|
916 def to_json(self):
|
e@0
|
917 return {
|
e@0
|
918 'definition': {
|
e@0
|
919 'type': 'sfx_definition',
|
e@0
|
920 'name': ' '.join(self.keywords),
|
e@0
|
921 'optional': False
|
e@0
|
922 },
|
e@0
|
923 'playback': {
|
e@0
|
924 'type': 'sfx',
|
e@0
|
925 'name': ' '.join(self.keywords)
|
e@0
|
926 }
|
e@0
|
927 }
|
e@0
|
928
|
e@0
|
929 def add_keywords(self, keywords):
|
e@0
|
930 for kw in keywords:
|
e@0
|
931 self.keywords.insert(0, kw)
|
e@0
|
932
|
e@0
|
933 def __str__(self):
|
e@0
|
934 return "({} FX)".format(' '.join(self.keywords))
|
e@0
|
935
|
e@0
|
936 def definition(self):
|
e@0
|
937 return ' '.join(self.keywords)
|
e@0
|
938
|
e@0
|
939 def script(self):
|
e@0
|
940 return str(self)
|
e@0
|
941
|
e@0
|
942
|
e@0
|
943 class Character_Line:
|
e@0
|
944 def __init__(self, varname, txt, pos_start, pos_end):
|
e@0
|
945 self.varname = varname
|
e@0
|
946 self.txt = '. '.join([sent.capitalize() for sent in txt.split('\n')])
|
e@0
|
947 if self.txt[-1] != '.':
|
e@0
|
948 self.txt += '.'
|
e@0
|
949
|
e@0
|
950 self.character = None
|
e@0
|
951 self.pos = (pos_start, pos_end)
|
e@0
|
952
|
e@0
|
953 def set_character(self, character):
|
e@0
|
954 self.character = character
|
e@0
|
955
|
e@0
|
956 def __str__(self):
|
e@0
|
957 return "{}: {}".format(self.character.name, self.txt)
|
e@0
|
958
|
e@0
|
959 def script(self):
|
e@0
|
960 return "[{}] {}".format(self.character.name, self.txt)
|
e@0
|
961
|
e@0
|
962 def set_pos(self, start, end):
|
e@0
|
963 self.pos = (start, end)
|
e@0
|
964
|
e@0
|
965 def to_json(self):
|
e@0
|
966 return {'playback': {"type": "cast_line", "name": self.character.name, "line": self.txt}}
|
e@0
|
967
|
e@0
|
968
|
e@0
|
969 class Character:
|
e@0
|
970 def __init__(self, varname, name):
|
e@0
|
971 self.name = ' '.join([n.capitalize() for n in name.split()])
|
e@0
|
972 self.varname = varname
|
e@0
|
973 self.gender = ''
|
e@0
|
974 self.age = ''
|
e@0
|
975
|
e@0
|
976 def set_gender(self, gender):
|
e@0
|
977 self.gender = gender
|
e@0
|
978
|
e@0
|
979 def set_age(self, age):
|
e@0
|
980 self.age = age
|
e@0
|
981
|
e@0
|
982 def definition(self):
|
e@0
|
983 str_ = self.name + ' - '
|
e@0
|
984 if self.gender == '':
|
e@0
|
985 str_ += 'male or female'
|
e@0
|
986 else:
|
e@0
|
987 str_ += self.gender
|
e@0
|
988
|
e@0
|
989 return str_
|
e@0
|
990
|
e@0
|
991 def __str__(self):
|
e@0
|
992 return __repr__(self)
|
e@0
|
993
|
e@0
|
994 def __repr__(self):
|
e@0
|
995 return "[{}:{}/{}/{}]".format(self.varname, self.name, self.gender, self.age)
|
e@0
|
996
|
e@0
|
997 def to_json(self):
|
e@0
|
998 json_dict = {"type": "cast_definition", "name": self.name}
|
e@0
|
999 if self.gender != '':
|
e@0
|
1000 json_dict['gender'] = self.gender
|
e@0
|
1001 if self.age != '':
|
e@0
|
1002 json_dict['age'] = self.age
|
e@0
|
1003
|
e@0
|
1004 return json_dict
|
e@0
|
1005
|
e@0
|
1006
|
e@0
|
1007 class KDuration(pg.Keyword):
|
e@0
|
1008 grammar = pg.Enum(pg.K('long'), pg.K('short'))
|
e@0
|
1009
|
e@0
|
1010
|
e@0
|
1011 class Pause(pg.Plain):
|
e@0
|
1012 grammar = '(', pg.optional(pg.attr('duration', KDuration)), 'pause', ')'
|
e@0
|
1013
|
e@0
|
1014
|
e@0
|
1015 class CastHeader(pg.Plain):
|
e@0
|
1016 grammar = 'Cast', pg.optional('List'), ':', pg.endl
|
e@0
|
1017
|
e@0
|
1018
|
e@0
|
1019 class KGender(pg.Keyword):
|
e@0
|
1020 grammar = pg.Enum(pg.K('male'), pg.K('female'))
|
e@0
|
1021
|
e@0
|
1022
|
e@0
|
1023 class EGender(pg.List):
|
e@0
|
1024 grammar = KGender, pg.optional('or', KGender)
|
e@0
|
1025
|
e@0
|
1026
|
e@0
|
1027 class KPan(pg.Keyword):
|
e@0
|
1028 grammar = pg.Enum(pg.K('left'), pg.K('right'), pg.K('center'), pg.K('centre'))
|
e@0
|
1029
|
e@0
|
1030
|
e@0
|
1031 class EPan(pg.List):
|
e@0
|
1032 grammar = KPan, pg.optional('or', KPan)
|
e@0
|
1033
|
e@0
|
1034
|
e@0
|
1035 class CastDefinition(pg.Plain):
|
e@0
|
1036 grammar = pg.attr('cast_name', re.compile('[A-Za-z0-9 ]+')), \
|
e@0
|
1037 re.compile('\-+'), \
|
e@0
|
1038 pg.attr('gender', EGender), \
|
e@0
|
1039 re.compile('\-+'), \
|
e@0
|
1040 'panned', \
|
e@0
|
1041 pg.attr('panning', EPan), pg.endl
|
e@0
|
1042
|
e@0
|
1043
|
e@0
|
1044 class Tag(pg.Plain):
|
e@0
|
1045 grammar = pg.attr('tag', re.compile(r'[A-Za-z0-9_\-]+'))
|
e@0
|
1046
|
e@0
|
1047
|
e@0
|
1048 class LTag(pg.List):
|
e@0
|
1049 grammar = pg.csl(Tag)
|
e@0
|
1050
|
e@0
|
1051
|
e@0
|
1052 class ScenesHeader(pg.Plain):
|
e@0
|
1053 grammar = re.compile('Scenes?'), pg.optional('List'), ':', pg.endl
|
e@0
|
1054
|
e@0
|
1055
|
e@0
|
1056 class ScenesDefinition(pg.Plain):
|
e@0
|
1057 grammar = pg.attr('number', re.compile('[A-Za-z0-9]+')), \
|
e@0
|
1058 re.compile('\-+'), pg.attr('name', re.compile('[A-Za-z0-9]+')), \
|
e@0
|
1059 re.compile('\-+'), pg.attr('filename', re.compile('[A-Za-z0-9_\:]+(\.(mp3|wav))?')), \
|
e@0
|
1060 re.compile('\-+'), pg.attr('tags', LTag), pg.endl
|
e@0
|
1061
|
e@0
|
1062
|
e@0
|
1063 class ScriptHeader(pg.Plain):
|
e@0
|
1064 grammar = 'Script', ':', pg.endl
|
e@0
|
1065
|
e@0
|
1066
|
e@0
|
1067 class SceneCommence(pg.Plain):
|
e@0
|
1068 grammar = re.compile('\-+'), 'Scene', pg.attr('scene', re.compile('[A-Za-z0-9]+')), pg.optional(
|
e@0
|
1069 re.compile('\-+')), pg.endl;
|
e@0
|
1070
|
e@0
|
1071
|
e@0
|
1072 class CastLine(pg.Plain):
|
e@0
|
1073 grammar = '[', pg.attr('cast_name', re.compile('[A-Za-z0-9 ]+')), ']', pg.attr('line',
|
e@0
|
1074 re.compile(r'[A-Za-z0-9\-_.\ \" \'\,\?\:\!]+')),
|
e@0
|
1075
|
e@0
|
1076
|
e@0
|
1077 class Headers(pg.Plain):
|
e@0
|
1078 grammar = CastHeader, \
|
e@0
|
1079 pg.attr('cast_list', pg.maybe_some(CastDefinition)), \
|
e@0
|
1080 pg.optional(ScenesHeader, pg.attr('scene_list', pg.maybe_some(ScenesDefinition))), pg.optional(
|
e@0
|
1081 ScriptHeader)
|
e@0
|
1082
|
e@0
|
1083
|
e@0
|
1084 class Script(pg.List):
|
e@0
|
1085 grammar = pg.some([Pause, SceneCommence, CastLine])
|
e@0
|
1086
|
e@0
|
1087
|
e@0
|
1088 class ScriptDocument(pg.Plain):
|
e@0
|
1089 grammar = pg.attr('headers', Headers), pg.attr('script', Script)
|
e@0
|
1090
|
e@0
|
1091
|
e@0
|
1092 class Parser:
|
e@0
|
1093 def __init__(self):
|
e@0
|
1094 pass
|
e@0
|
1095
|
e@0
|
1096 def parse_str(self, str_):
|
e@0
|
1097
|
e@0
|
1098 obj = pg.parse(str_, ScriptDocument)
|
e@0
|
1099 definitions = []
|
e@0
|
1100 script = []
|
e@0
|
1101
|
e@0
|
1102 for cast_def in obj.headers.cast_list:
|
e@0
|
1103 name = cast_def.cast_name.strip()
|
e@0
|
1104 gender = [str(t) for t in cast_def.gender]
|
e@0
|
1105
|
e@0
|
1106 panning = [str(t) for t in cast_def.panning]
|
e@0
|
1107
|
e@0
|
1108 cast_dict = {'type': 'cast_definition', 'name': name, 'gender': gender, 'voice': [], 'panning': panning}
|
e@0
|
1109 definitions.append(cast_dict)
|
e@0
|
1110
|
e@0
|
1111 for scene_def in obj.headers.scene_list:
|
e@0
|
1112 name = scene_def.name.strip()
|
e@0
|
1113 number = scene_def.number
|
e@0
|
1114 filename = scene_def.filename
|
e@0
|
1115 tags = [str(t.tag) for t in scene_def.tags]
|
e@0
|
1116 scene_dict = {'type': 'scene_definition',
|
e@0
|
1117 'scene': name,
|
e@0
|
1118 'number': number,
|
e@0
|
1119 'filename': scene_def.filename,
|
e@0
|
1120 'tags': tags}
|
e@0
|
1121 definitions.append(scene_dict)
|
e@0
|
1122
|
e@0
|
1123 for action in obj.script:
|
e@0
|
1124 if isinstance(action, Pause):
|
e@0
|
1125 duration = str(action.duration)
|
e@0
|
1126 pause_dict = {'type': 'pause', 'duration': duration}
|
e@0
|
1127 script.append(pause_dict)
|
e@0
|
1128 if isinstance(action, SceneCommence):
|
e@0
|
1129 number = str(action.scene).strip()
|
e@0
|
1130 scene_dict = {'type': 'scene_change', 'number': number}
|
e@0
|
1131 script.append(scene_dict)
|
e@0
|
1132 if isinstance(action, CastLine):
|
e@0
|
1133 name = str(action.cast_name).strip()
|
e@0
|
1134 line = str(action.line)
|
e@0
|
1135 cast_line = {'type': 'cast_line', 'name': name, 'line': line}
|
e@0
|
1136 script.append(cast_line)
|
e@0
|
1137
|
e@0
|
1138 return {'definitions': definitions, 'script': script}
|
e@0
|
1139
|
e@0
|
1140
|
e@0
|
1141 class Preprocessor():
|
e@0
|
1142 def __init__(self):
|
e@0
|
1143 self.parsed_dict = None
|
e@0
|
1144 self.regexp_entity = re.compile('(?P<variable>[A-Z][0-9]+)\s+(?P<type>[A-Z][A-Za-z_]*)\s+([0-9]+)\s+([0-9]+)(?P<name>(\s+[A-Za-z\'\"]+)+$)')
|
e@0
|
1145 self.regexp_attribute = re.compile('(?P<variable>[A-Z][0-9]+)\s+(?P<type>[A-Z][A-Za-z_]*)\s+(?P<target_variable>[A-Z][0-9]+)\s+(?P<value>[A-Za-z]+)')
|
e@0
|
1146 self.regexp_relation = re.compile('(?P<variable>[A-Z][0-9]+)\s+(([A-Za-z0-9_]+:[A-Z0-9]+\s*)+)')
|
e@0
|
1147 self.regexp_transitive = re.compile('(?P<arity>\*)\s+(?P<type>[A-Z][A-Za-z_]*)\s+(?P<refersTo1>[A-Z][0-9]+)\s+(?P<refersTo2>[A-Z][0-9]+)\s*')
|
e@0
|
1148
|
e@0
|
1149 def parse_str(self, text, annot):
|
e@0
|
1150 """ takes as input a text and an annotation string """
|
e@0
|
1151
|
e@0
|
1152 tups = [tup for tup in annot.split('\n') if tup.strip() != '']
|
e@0
|
1153
|
e@0
|
1154 # Add the narrator
|
e@0
|
1155 narrator = Character('_', 'Narrator')
|
e@0
|
1156 characters = [narrator]
|
e@0
|
1157
|
e@0
|
1158 objects = []
|
e@0
|
1159 character_lines = []
|
e@0
|
1160 sound_effects = []
|
e@0
|
1161
|
e@0
|
1162 def find_character(var):
|
e@0
|
1163 for c in characters:
|
e@0
|
1164 if c.varname == var:
|
e@0
|
1165 return c
|
e@0
|
1166
|
e@0
|
1167 def find_character_or_object(var):
|
e@0
|
1168 c = find_character(var)
|
e@0
|
1169 if c is not None:
|
e@0
|
1170 return c
|
e@0
|
1171
|
e@0
|
1172 def find_character_lines(var):
|
e@0
|
1173 for c in character_lines:
|
e@0
|
1174 if c.varname == var:
|
e@0
|
1175 return c
|
e@0
|
1176
|
e@0
|
1177 def find_sound_effect(var):
|
e@0
|
1178 for c in sound_effects:
|
e@0
|
1179 if c.varname == var:
|
e@0
|
1180 return c
|
e@0
|
1181
|
e@0
|
1182 def character_speaks(c):
|
e@0
|
1183 for cl in character_lines:
|
e@0
|
1184 if isinstance(cl, Character_Line) and cl.character == c:
|
e@0
|
1185 return True
|
e@0
|
1186 return False
|
e@0
|
1187
|
e@0
|
1188 for tup in tups:
|
e@0
|
1189 # print(tup)
|
e@0
|
1190
|
e@0
|
1191 groups = self.regexp_entity.findall(tup)
|
e@0
|
1192 if len(groups) > 0:
|
e@0
|
1193 if groups[0][1] == 'Character':
|
e@0
|
1194 # If the entity is a character
|
e@0
|
1195 characters.append(
|
e@0
|
1196 Character(groups[0][0].strip(), groups[0][4].strip())
|
e@0
|
1197 )
|
e@0
|
1198 elif groups[0][1] == 'Character_Line':
|
e@0
|
1199 character_lines.append(
|
e@0
|
1200 Character_Line(groups[0][0].strip(), groups[0][4].strip(), int(groups[0][2]), int(groups[0][3]))
|
e@0
|
1201 )
|
e@0
|
1202 elif groups[0][1] == 'Sound_Effect':
|
e@0
|
1203 sound_effects.append(
|
e@0
|
1204 Sound_Effect(groups[0][0].strip(), groups[0][4].strip(), int(groups[0][2]))
|
e@0
|
1205 )
|
e@0
|
1206 continue
|
e@0
|
1207
|
e@0
|
1208 for tup in tups:
|
e@0
|
1209 # Attributes and relations
|
e@0
|
1210 groups = self.regexp_attribute.findall(tup)
|
e@0
|
1211 if len(groups) > 0:
|
e@0
|
1212 if groups[0][1] == 'Gender':
|
e@0
|
1213 # if
|
e@0
|
1214 c = find_character(groups[0][2].strip())
|
e@0
|
1215 c.set_gender(groups[0][3].strip().lower())
|
e@0
|
1216 elif groups[0][1] == 'Age':
|
e@0
|
1217 c = find_character(groups[0][2].strip())
|
e@0
|
1218 c.set_age(groups[0][3].strip().lower())
|
e@0
|
1219
|
e@0
|
1220 for tup in tups:
|
e@0
|
1221 # Attributes and relations
|
e@0
|
1222 groups = self.regexp_relation.findall(tup)
|
e@0
|
1223 if len(groups) > 0 and groups[0][1][:4] == 'Says':
|
e@0
|
1224 # print(groups)
|
e@0
|
1225 refs = groups[0][1].split()[1:]
|
e@0
|
1226
|
e@0
|
1227 # Store who and whats
|
e@0
|
1228 whats = []
|
e@0
|
1229 who = None
|
e@0
|
1230
|
e@0
|
1231 for ref in refs:
|
e@0
|
1232 type_, var = ref.split(':')
|
e@0
|
1233 if type_[:4] == 'WHAT':
|
e@0
|
1234 whats.append(var)
|
e@0
|
1235 elif type_[:3] == 'WHO':
|
e@0
|
1236 who = find_character(var)
|
e@0
|
1237
|
e@0
|
1238 # find character lines:
|
e@0
|
1239 clines = [find_character_lines(w) for w in whats]
|
e@0
|
1240
|
e@0
|
1241 # Assign characters
|
e@0
|
1242 for cl in clines:
|
e@0
|
1243 cl.set_character(who)
|
e@0
|
1244 elif len(groups) > 0 and groups[0][1][:12] == 'Sound_Effect':
|
e@0
|
1245 sfx = find_sound_effect(groups[0][1][13:].split()[0])
|
e@0
|
1246 #print(groups)
|
e@0
|
1247
|
e@0
|
1248 # Store extra keywords
|
e@0
|
1249 keywords = []
|
e@0
|
1250
|
e@0
|
1251 refs = groups[0][1].split()[1:]
|
e@0
|
1252 for ref in refs:
|
e@0
|
1253 #print(ref)
|
e@0
|
1254 type_, var = ref.split(':')
|
e@0
|
1255 if type_[:8] == 'CausedBy':
|
e@0
|
1256 cause = find_character_or_object(var)
|
e@0
|
1257 if cause != None:
|
e@0
|
1258 keywords.append(cause.name)
|
e@0
|
1259
|
e@0
|
1260 sfx.add_keywords(keywords)
|
e@0
|
1261
|
e@0
|
1262 # %% Calculate line segments for character lines and narration
|
e@0
|
1263
|
e@0
|
1264 # Add sound effects to the timeline
|
e@0
|
1265 clpos = [cl.pos for cl in character_lines]
|
e@0
|
1266 clpos += [sfx.pos for sfx in sound_effects]
|
e@0
|
1267 clpos = sorted(clpos, key=lambda x: x[0])
|
e@0
|
1268
|
e@0
|
1269 # Add first narrator line
|
e@0
|
1270 cl = Character_Line('_', text[0:clpos[0][0]], 0, clpos[0][0] - 1)
|
e@0
|
1271 cl.set_character(narrator)
|
e@0
|
1272 character_lines.append(cl)
|
e@0
|
1273
|
e@0
|
1274 for n in range(len(clpos) - 1):
|
e@0
|
1275 if clpos[n][1] != clpos[n + 1][0] - 1:
|
e@0
|
1276 cl = Character_Line('_', text[clpos[n][1] + 1:clpos[n + 1][0]].rstrip(), clpos[n][1] + 1,
|
e@0
|
1277 clpos[n + 1][0] - 1)
|
e@0
|
1278 cl.set_character(narrator)
|
e@0
|
1279 character_lines.append(cl)
|
e@0
|
1280
|
e@0
|
1281 character_lines += sound_effects
|
e@0
|
1282 character_lines = sorted(character_lines, key=lambda x: x.pos[1])
|
e@0
|
1283
|
e@0
|
1284 # parsed_dict = {'definitions': [c.to_json() for c in characters],
|
e@0
|
1285 # 'script': [cl.to_json() for cl in character_lines]}
|
e@0
|
1286
|
e@0
|
1287 # parsed_dict = {'definitions': [], 'script':[]}
|
e@0
|
1288 #
|
e@0
|
1289 # print("Cast List:")
|
e@0
|
1290 # for c in characters:
|
e@0
|
1291 # if character_speaks(c):
|
e@0
|
1292 # print(c.definition())
|
e@0
|
1293 #
|
e@0
|
1294 # print("")
|
e@0
|
1295 # print("Sound Effects:")
|
e@0
|
1296 #
|
e@0
|
1297 # for c in sound_effects:
|
e@0
|
1298 # print(c.definition())
|
e@0
|
1299 #
|
e@0
|
1300 # for cl in character_lines:
|
e@0
|
1301 # print(cl.script())
|
e@0
|
1302
|
e@0
|
1303 # Add definitions for characters
|
e@0
|
1304 # for c in characters:
|
e@0
|
1305 # if character_speaks(c):
|
e@0
|
1306 # parsed_dict['definitions'].append(c.to_json())
|
e@0
|
1307 #
|
e@0
|
1308 # # Add definitions for sound effects
|
e@0
|
1309 # for c in sound_effects:
|
e@0
|
1310 # parsed_dict['definitions'].append(c.to_json()['definition'])
|
e@0
|
1311 #
|
e@0
|
1312 #
|
e@0
|
1313 # # Add timeline information
|
e@0
|
1314 # for cl in character_lines:
|
e@0
|
1315 # parsed_dict['script'].append(cl.to_json()['playback'])
|
e@0
|
1316
|
e@0
|
1317 str_ = "Cast List:\n"
|
e@0
|
1318 for c in characters:
|
e@0
|
1319 if character_speaks(c):
|
e@0
|
1320 str_ += "{}\n".format(c.definition())
|
e@0
|
1321
|
e@0
|
1322 str_ += "Sound Effects:\n"
|
e@0
|
1323 for c in sound_effects:
|
e@0
|
1324 str_ += "{}\n".format(c.definition())
|
e@0
|
1325
|
e@0
|
1326
|
e@0
|
1327 for cl in character_lines:
|
e@0
|
1328 str_ += "{}\n".format(cl.script())
|
e@0
|
1329
|
e@0
|
1330
|
e@0
|
1331 return str_
|