amine@274
|
1 import os
|
amine@274
|
2 from tempfile import TemporaryDirectory
|
amine@403
|
3 from unittest.mock import Mock, call, patch
|
amine@403
|
4
|
amine@400
|
5 import pytest
|
amine@403
|
6
|
amine@418
|
7 import auditok.workers
|
amine@419
|
8 from auditok import AudioReader, AudioRegion, split, split_and_join_with_silence
|
amine@403
|
9 from auditok.cmdline_util import make_logger
|
amine@274
|
10 from auditok.workers import (
|
amine@419
|
11 AudioEventsJoinerWorker,
|
amine@403
|
12 CommandLineWorker,
|
amine@403
|
13 PlayerWorker,
|
amine@403
|
14 PrintWorker,
|
amine@403
|
15 RegionSaverWorker,
|
amine@403
|
16 StreamSaverWorker,
|
amine@274
|
17 TokenizerWorker,
|
amine@274
|
18 )
|
amine@274
|
19
|
amine@274
|
20
|
amine@400
|
21 @pytest.fixture
|
amine@400
|
22 def audio_data_source():
|
amine@403
|
23 reader = AudioReader(
|
amine@400
|
24 input="tests/data/test_split_10HZ_mono.raw",
|
amine@400
|
25 block_dur=0.1,
|
amine@400
|
26 sr=10,
|
amine@400
|
27 sw=2,
|
amine@400
|
28 ch=1,
|
amine@400
|
29 )
|
amine@400
|
30 yield reader
|
amine@400
|
31 reader.close()
|
amine@275
|
32
|
amine@400
|
33
|
amine@400
|
34 @pytest.fixture
|
amine@400
|
35 def expected_detections():
|
amine@400
|
36 return [
|
amine@400
|
37 (0.2, 1.6),
|
amine@400
|
38 (1.7, 3.1),
|
amine@400
|
39 (3.4, 5.4),
|
amine@400
|
40 (5.4, 7.4),
|
amine@400
|
41 (7.4, 7.6),
|
amine@400
|
42 ]
|
amine@400
|
43
|
amine@400
|
44
|
amine@400
|
45 def test_TokenizerWorker(audio_data_source, expected_detections):
|
amine@400
|
46 with TemporaryDirectory() as tmpdir:
|
amine@400
|
47 file = os.path.join(tmpdir, "file.log")
|
amine@400
|
48 logger = make_logger(file=file, name="test_TokenizerWorker")
|
amine@400
|
49 tokenizer = TokenizerWorker(
|
amine@400
|
50 audio_data_source,
|
amine@400
|
51 logger=logger,
|
amine@400
|
52 min_dur=0.3,
|
amine@400
|
53 max_dur=2,
|
amine@400
|
54 max_silence=0.2,
|
amine@400
|
55 drop_trailing_silence=False,
|
amine@400
|
56 strict_min_dur=False,
|
amine@400
|
57 eth=50,
|
amine@274
|
58 )
|
amine@400
|
59 tokenizer.start_all()
|
amine@400
|
60 tokenizer.join()
|
amine@400
|
61 with open(file) as fp:
|
amine@400
|
62 log_lines = fp.readlines()
|
amine@275
|
63
|
amine@400
|
64 log_fmt = (
|
amine@400
|
65 "[DET]: Detection {} (start: {:.3f}, end: {:.3f}, duration: {:.3f})"
|
amine@400
|
66 )
|
amine@400
|
67 assert len(tokenizer.detections) == len(expected_detections)
|
amine@400
|
68 for i, (det, exp, log_line) in enumerate(
|
amine@426
|
69 zip(
|
amine@426
|
70 tokenizer.detections,
|
amine@426
|
71 expected_detections,
|
amine@426
|
72 log_lines,
|
amine@426
|
73 ),
|
amine@418
|
74 1,
|
amine@400
|
75 ):
|
amine@400
|
76 start, end = exp
|
amine@400
|
77 exp_log_line = log_fmt.format(i, start, end, end - start)
|
amine@400
|
78 assert pytest.approx(det.start) == start
|
amine@400
|
79 assert pytest.approx(det.end) == end
|
amine@400
|
80 assert log_line[28:].strip() == exp_log_line
|
amine@275
|
81
|
amine@274
|
82
|
amine@400
|
83 def test_PlayerWorker(audio_data_source, expected_detections):
|
amine@400
|
84 with TemporaryDirectory() as tmpdir:
|
amine@400
|
85 file = os.path.join(tmpdir, "file.log")
|
amine@400
|
86 logger = make_logger(file=file, name="test_RegionSaverWorker")
|
amine@400
|
87 player_mock = Mock()
|
amine@400
|
88 observers = [PlayerWorker(player_mock, logger=logger)]
|
amine@275
|
89 tokenizer = TokenizerWorker(
|
amine@400
|
90 audio_data_source,
|
amine@400
|
91 logger=logger,
|
amine@275
|
92 observers=observers,
|
amine@275
|
93 min_dur=0.3,
|
amine@275
|
94 max_dur=2,
|
amine@275
|
95 max_silence=0.2,
|
amine@275
|
96 drop_trailing_silence=False,
|
amine@275
|
97 strict_min_dur=False,
|
amine@275
|
98 eth=50,
|
amine@275
|
99 )
|
amine@400
|
100 tokenizer.start_all()
|
amine@400
|
101 tokenizer.join()
|
amine@400
|
102 tokenizer._observers[0].join()
|
amine@400
|
103 with open(file) as fp:
|
amine@400
|
104 log_lines = [
|
amine@400
|
105 line for line in fp.readlines() if line.startswith("[PLAY]")
|
amine@400
|
106 ]
|
amine@400
|
107
|
amine@400
|
108 assert player_mock.play.called
|
amine@400
|
109 assert len(tokenizer.detections) == len(expected_detections)
|
amine@400
|
110 log_fmt = "[PLAY]: Detection {id} played"
|
amine@400
|
111 for i, (det, exp, log_line) in enumerate(
|
amine@426
|
112 zip(
|
amine@426
|
113 tokenizer.detections,
|
amine@426
|
114 expected_detections,
|
amine@426
|
115 log_lines,
|
amine@426
|
116 ),
|
amine@418
|
117 1,
|
amine@400
|
118 ):
|
amine@400
|
119 start, end = exp
|
amine@400
|
120 exp_log_line = log_fmt.format(id=i)
|
amine@400
|
121 assert pytest.approx(det.start) == start
|
amine@400
|
122 assert pytest.approx(det.end) == end
|
amine@400
|
123 assert log_line[28:].strip() == exp_log_line
|
amine@400
|
124
|
amine@400
|
125
|
amine@400
|
126 def test_RegionSaverWorker(audio_data_source, expected_detections):
|
amine@400
|
127 filename_format = "Region_{id}_{start:.6f}-{end:.3f}_{duration:.3f}.wav"
|
amine@400
|
128 with TemporaryDirectory() as tmpdir:
|
amine@400
|
129 file = os.path.join(tmpdir, "file.log")
|
amine@400
|
130 logger = make_logger(file=file, name="test_RegionSaverWorker")
|
amine@400
|
131 observers = [RegionSaverWorker(filename_format, logger=logger)]
|
amine@400
|
132 tokenizer = TokenizerWorker(
|
amine@400
|
133 audio_data_source,
|
amine@400
|
134 logger=logger,
|
amine@400
|
135 observers=observers,
|
amine@400
|
136 min_dur=0.3,
|
amine@400
|
137 max_dur=2,
|
amine@400
|
138 max_silence=0.2,
|
amine@400
|
139 drop_trailing_silence=False,
|
amine@400
|
140 strict_min_dur=False,
|
amine@400
|
141 eth=50,
|
amine@400
|
142 )
|
amine@400
|
143 with patch("auditok.core.AudioRegion.save") as patched_save:
|
amine@275
|
144 tokenizer.start_all()
|
amine@275
|
145 tokenizer.join()
|
amine@275
|
146 tokenizer._observers[0].join()
|
amine@400
|
147 with open(file) as fp:
|
amine@400
|
148 log_lines = [
|
amine@400
|
149 line for line in fp.readlines() if line.startswith("[SAVE]")
|
amine@400
|
150 ]
|
amine@275
|
151
|
amine@400
|
152 expected_save_calls = [
|
amine@400
|
153 call(
|
amine@400
|
154 filename_format.format(
|
amine@400
|
155 id=i, start=exp[0], end=exp[1], duration=exp[1] - exp[0]
|
amine@400
|
156 ),
|
amine@400
|
157 None,
|
amine@400
|
158 )
|
amine@400
|
159 for i, exp in enumerate(expected_detections, 1)
|
amine@400
|
160 ]
|
amine@287
|
161
|
amine@400
|
162 mock_calls = [
|
amine@400
|
163 c for i, c in enumerate(patched_save.mock_calls) if i % 2 == 0
|
amine@400
|
164 ]
|
amine@400
|
165 assert mock_calls == expected_save_calls
|
amine@400
|
166 assert len(tokenizer.detections) == len(expected_detections)
|
amine@287
|
167
|
amine@400
|
168 log_fmt = "[SAVE]: Detection {id} saved as '{filename}'"
|
amine@400
|
169 for i, (det, exp, log_line) in enumerate(
|
amine@426
|
170 zip(
|
amine@426
|
171 tokenizer.detections,
|
amine@426
|
172 expected_detections,
|
amine@426
|
173 log_lines,
|
amine@426
|
174 ),
|
amine@418
|
175 1,
|
amine@400
|
176 ):
|
amine@400
|
177 start, end = exp
|
amine@400
|
178 expected_filename = filename_format.format(
|
amine@400
|
179 id=i, start=start, end=end, duration=end - start
|
amine@400
|
180 )
|
amine@400
|
181 exp_log_line = log_fmt.format(id=i, filename=expected_filename)
|
amine@400
|
182 assert pytest.approx(det.start) == start
|
amine@400
|
183 assert pytest.approx(det.end) == end
|
amine@400
|
184 assert log_line[28:].strip() == exp_log_line
|
amine@400
|
185
|
amine@400
|
186
|
amine@400
|
187 def test_CommandLineWorker(audio_data_source, expected_detections):
|
amine@400
|
188 command_format = "do nothing with"
|
amine@400
|
189 with TemporaryDirectory() as tmpdir:
|
amine@400
|
190 file = os.path.join(tmpdir, "file.log")
|
amine@400
|
191 logger = make_logger(file=file, name="test_CommandLineWorker")
|
amine@400
|
192 observers = [CommandLineWorker(command_format, logger=logger)]
|
amine@400
|
193 tokenizer = TokenizerWorker(
|
amine@400
|
194 audio_data_source,
|
amine@400
|
195 logger=logger,
|
amine@400
|
196 observers=observers,
|
amine@400
|
197 min_dur=0.3,
|
amine@400
|
198 max_dur=2,
|
amine@400
|
199 max_silence=0.2,
|
amine@400
|
200 drop_trailing_silence=False,
|
amine@400
|
201 strict_min_dur=False,
|
amine@400
|
202 eth=50,
|
amine@400
|
203 )
|
amine@400
|
204 with patch("auditok.workers.os.system") as patched_os_system:
|
amine@287
|
205 tokenizer.start_all()
|
amine@287
|
206 tokenizer.join()
|
amine@400
|
207 tokenizer._observers[0].join()
|
amine@400
|
208 with open(file) as fp:
|
amine@400
|
209 log_lines = [
|
amine@400
|
210 line for line in fp.readlines() if line.startswith("[COMMAND]")
|
amine@400
|
211 ]
|
amine@287
|
212
|
amine@400
|
213 expected_save_calls = [call(command_format) for _ in expected_detections]
|
amine@400
|
214 assert patched_os_system.mock_calls == expected_save_calls
|
amine@400
|
215 assert len(tokenizer.detections) == len(expected_detections)
|
amine@400
|
216 log_fmt = "[COMMAND]: Detection {id} command '{command}'"
|
amine@400
|
217 for i, (det, exp, log_line) in enumerate(
|
amine@426
|
218 zip(
|
amine@426
|
219 tokenizer.detections,
|
amine@426
|
220 expected_detections,
|
amine@426
|
221 log_lines,
|
amine@426
|
222 ),
|
amine@418
|
223 1,
|
amine@400
|
224 ):
|
amine@400
|
225 start, end = exp
|
amine@400
|
226 exp_log_line = log_fmt.format(id=i, command=command_format)
|
amine@400
|
227 assert pytest.approx(det.start) == start
|
amine@400
|
228 assert pytest.approx(det.end) == end
|
amine@400
|
229 assert log_line[28:].strip() == exp_log_line
|
amine@400
|
230
|
amine@400
|
231
|
amine@400
|
232 def test_PrintWorker(audio_data_source, expected_detections):
|
amine@400
|
233 observers = [
|
amine@400
|
234 PrintWorker(print_format="[{id}] {start} {end}, dur: {duration}")
|
amine@400
|
235 ]
|
amine@400
|
236 tokenizer = TokenizerWorker(
|
amine@400
|
237 audio_data_source,
|
amine@400
|
238 observers=observers,
|
amine@400
|
239 min_dur=0.3,
|
amine@400
|
240 max_dur=2,
|
amine@400
|
241 max_silence=0.2,
|
amine@400
|
242 drop_trailing_silence=False,
|
amine@400
|
243 strict_min_dur=False,
|
amine@400
|
244 eth=50,
|
amine@400
|
245 )
|
amine@400
|
246 with patch("builtins.print") as patched_print:
|
amine@400
|
247 tokenizer.start_all()
|
amine@400
|
248 tokenizer.join()
|
amine@400
|
249 tokenizer._observers[0].join()
|
amine@400
|
250
|
amine@400
|
251 expected_print_calls = [
|
amine@400
|
252 call(
|
amine@400
|
253 "[{}] {:.3f} {:.3f}, dur: {:.3f}".format(
|
amine@400
|
254 i, exp[0], exp[1], exp[1] - exp[0]
|
amine@287
|
255 )
|
amine@400
|
256 )
|
amine@400
|
257 for i, exp in enumerate(expected_detections, 1)
|
amine@400
|
258 ]
|
amine@400
|
259 assert patched_print.mock_calls == expected_print_calls
|
amine@400
|
260 assert len(tokenizer.detections) == len(expected_detections)
|
amine@426
|
261 for det, exp in zip(
|
amine@426
|
262 tokenizer.detections,
|
amine@426
|
263 expected_detections,
|
amine@426
|
264 ):
|
amine@400
|
265 start, end = exp
|
amine@400
|
266 assert pytest.approx(det.start) == start
|
amine@400
|
267 assert pytest.approx(det.end) == end
|
amine@287
|
268
|
amine@287
|
269
|
amine@400
|
270 def test_StreamSaverWorker_wav(audio_data_source):
|
amine@400
|
271 with TemporaryDirectory() as tmpdir:
|
amine@400
|
272 expected_filename = os.path.join(tmpdir, "output.wav")
|
amine@400
|
273 saver = StreamSaverWorker(audio_data_source, expected_filename)
|
amine@400
|
274 saver.start()
|
amine@400
|
275
|
amine@400
|
276 tokenizer = TokenizerWorker(saver)
|
amine@400
|
277 tokenizer.start_all()
|
amine@400
|
278 tokenizer.join()
|
amine@400
|
279 saver.join()
|
amine@400
|
280
|
amine@418
|
281 output_filename = saver.export_audio()
|
amine@400
|
282 region = AudioRegion.load(
|
amine@400
|
283 "tests/data/test_split_10HZ_mono.raw", sr=10, sw=2, ch=1
|
amine@400
|
284 )
|
amine@400
|
285
|
amine@400
|
286 expected_region = AudioRegion.load(output_filename)
|
amine@400
|
287 assert output_filename == expected_filename
|
amine@400
|
288 assert region == expected_region
|
amine@400
|
289 assert saver.data == bytes(expected_region)
|
amine@400
|
290
|
amine@400
|
291
|
amine@419
|
292 @pytest.mark.parametrize(
|
amine@419
|
293 "export_format",
|
amine@419
|
294 [
|
amine@419
|
295 "raw", # raw
|
amine@419
|
296 "wav", # wav
|
amine@419
|
297 ],
|
amine@419
|
298 ids=[
|
amine@419
|
299 "raw",
|
amine@419
|
300 "raw",
|
amine@419
|
301 ],
|
amine@419
|
302 )
|
amine@419
|
303 def test_StreamSaverWorker(audio_data_source, export_format):
|
amine@400
|
304 with TemporaryDirectory() as tmpdir:
|
amine@419
|
305 expected_filename = os.path.join(tmpdir, f"output.{export_format}")
|
amine@400
|
306 saver = StreamSaverWorker(
|
amine@419
|
307 audio_data_source, expected_filename, export_format=export_format
|
amine@400
|
308 )
|
amine@400
|
309 saver.start()
|
amine@400
|
310 tokenizer = TokenizerWorker(saver)
|
amine@400
|
311 tokenizer.start_all()
|
amine@400
|
312 tokenizer.join()
|
amine@400
|
313 saver.join()
|
amine@418
|
314 output_filename = saver.export_audio()
|
amine@400
|
315 region = AudioRegion.load(
|
amine@400
|
316 "tests/data/test_split_10HZ_mono.raw", sr=10, sw=2, ch=1
|
amine@400
|
317 )
|
amine@400
|
318 expected_region = AudioRegion.load(
|
amine@419
|
319 output_filename, sr=10, sw=2, ch=1, audio_format=export_format
|
amine@400
|
320 )
|
amine@400
|
321 assert output_filename == expected_filename
|
amine@400
|
322 assert region == expected_region
|
amine@400
|
323 assert saver.data == bytes(expected_region)
|
amine@400
|
324
|
amine@400
|
325
|
amine@400
|
326 def test_StreamSaverWorker_encode_audio(audio_data_source):
|
amine@400
|
327 with TemporaryDirectory() as tmpdir:
|
amine@400
|
328 with patch("auditok.workers._run_subprocess") as patch_rsp:
|
amine@400
|
329 patch_rsp.return_value = (1, None, None)
|
amine@400
|
330 expected_filename = os.path.join(tmpdir, "output.ogg")
|
amine@400
|
331 tmp_expected_filename = expected_filename + ".wav"
|
amine@400
|
332 saver = StreamSaverWorker(audio_data_source, expected_filename)
|
amine@287
|
333 saver.start()
|
amine@287
|
334 tokenizer = TokenizerWorker(saver)
|
amine@287
|
335 tokenizer.start_all()
|
amine@287
|
336 tokenizer.join()
|
amine@287
|
337 saver.join()
|
amine@418
|
338
|
amine@418
|
339 with pytest.raises(auditok.workers.AudioEncodingError) as ae_error:
|
amine@418
|
340 saver._encode_export_audio()
|
amine@418
|
341
|
amine@400
|
342 warn_msg = "Couldn't save audio data in the desired format "
|
amine@418
|
343 warn_msg += "'ogg'.\nEither none of 'ffmpeg', 'avconv' or 'sox' "
|
amine@400
|
344 warn_msg += "is installed or this format is not recognized.\n"
|
amine@400
|
345 warn_msg += "Audio file was saved as '{}'"
|
amine@418
|
346 assert warn_msg.format(tmp_expected_filename) == str(ae_error.value)
|
amine@400
|
347 ffmpef_avconv = [
|
amine@400
|
348 "-y",
|
amine@400
|
349 "-f",
|
amine@400
|
350 "wav",
|
amine@400
|
351 "-i",
|
amine@400
|
352 tmp_expected_filename,
|
amine@400
|
353 "-f",
|
amine@400
|
354 "ogg",
|
amine@400
|
355 expected_filename,
|
amine@400
|
356 ]
|
amine@400
|
357 expected_calls = [
|
amine@400
|
358 call(["ffmpeg"] + ffmpef_avconv),
|
amine@400
|
359 call(["avconv"] + ffmpef_avconv),
|
amine@400
|
360 call(
|
amine@400
|
361 [
|
amine@400
|
362 "sox",
|
amine@400
|
363 "-t",
|
amine@400
|
364 "wav",
|
amine@400
|
365 tmp_expected_filename,
|
amine@400
|
366 expected_filename,
|
amine@400
|
367 ]
|
amine@400
|
368 ),
|
amine@400
|
369 ]
|
amine@400
|
370 assert patch_rsp.mock_calls == expected_calls
|
amine@400
|
371 region = AudioRegion.load(
|
amine@400
|
372 "tests/data/test_split_10HZ_mono.raw", sr=10, sw=2, ch=1
|
amine@400
|
373 )
|
amine@418
|
374 assert not saver._exported
|
amine@400
|
375 assert saver.data == bytes(region)
|
amine@419
|
376
|
amine@419
|
377
|
amine@419
|
378 @pytest.mark.parametrize(
|
amine@419
|
379 "export_format",
|
amine@419
|
380 [
|
amine@419
|
381 "raw", # raw
|
amine@419
|
382 "wav", # wav
|
amine@419
|
383 ],
|
amine@419
|
384 ids=[
|
amine@419
|
385 "raw",
|
amine@419
|
386 "raw",
|
amine@419
|
387 ],
|
amine@419
|
388 )
|
amine@419
|
389 def test_AudioEventsJoinerWorker(audio_data_source, export_format):
|
amine@419
|
390 with TemporaryDirectory() as tmpdir:
|
amine@419
|
391 expected_filename = os.path.join(tmpdir, f"output.{export_format}")
|
amine@419
|
392 joiner = AudioEventsJoinerWorker(
|
amine@419
|
393 silence_duration=1.0,
|
amine@419
|
394 filename=expected_filename,
|
amine@419
|
395 export_format=export_format,
|
amine@419
|
396 sampling_rate=audio_data_source.sampling_rate,
|
amine@419
|
397 sample_width=audio_data_source.sample_width,
|
amine@419
|
398 channels=audio_data_source.channels,
|
amine@419
|
399 )
|
amine@419
|
400
|
amine@419
|
401 tokenizer = TokenizerWorker(audio_data_source, observers=[joiner])
|
amine@419
|
402 tokenizer.start_all()
|
amine@419
|
403 tokenizer.join()
|
amine@419
|
404 joiner.join()
|
amine@419
|
405
|
amine@419
|
406 output_filename = joiner.export_audio()
|
amine@419
|
407 expected_region = split_and_join_with_silence(
|
amine@419
|
408 "tests/data/test_split_10HZ_mono.raw",
|
amine@419
|
409 silence_duration=1.0,
|
amine@419
|
410 sr=10,
|
amine@419
|
411 sw=2,
|
amine@419
|
412 ch=1,
|
amine@419
|
413 aw=0.1,
|
amine@419
|
414 )
|
amine@419
|
415 assert output_filename == expected_filename
|
amine@419
|
416 assert joiner.data == bytes(expected_region)
|