amine@274
|
1 import os
|
amine@274
|
2 from tempfile import TemporaryDirectory
|
amine@403
|
3 from unittest.mock import Mock, call, patch
|
amine@403
|
4
|
amine@400
|
5 import pytest
|
amine@403
|
6
|
amine@403
|
7 from auditok import AudioReader, AudioRegion
|
amine@403
|
8 from auditok.cmdline_util import make_logger
|
amine@292
|
9 from auditok.exceptions import AudioEncodingWarning
|
amine@274
|
10 from auditok.workers import (
|
amine@403
|
11 CommandLineWorker,
|
amine@403
|
12 PlayerWorker,
|
amine@403
|
13 PrintWorker,
|
amine@403
|
14 RegionSaverWorker,
|
amine@403
|
15 StreamSaverWorker,
|
amine@274
|
16 TokenizerWorker,
|
amine@274
|
17 )
|
amine@274
|
18
|
amine@274
|
19
|
amine@400
|
20 @pytest.fixture
|
amine@400
|
21 def audio_data_source():
|
amine@403
|
22 reader = AudioReader(
|
amine@400
|
23 input="tests/data/test_split_10HZ_mono.raw",
|
amine@400
|
24 block_dur=0.1,
|
amine@400
|
25 sr=10,
|
amine@400
|
26 sw=2,
|
amine@400
|
27 ch=1,
|
amine@400
|
28 )
|
amine@400
|
29 yield reader
|
amine@400
|
30 reader.close()
|
amine@275
|
31
|
amine@400
|
32
|
amine@400
|
33 @pytest.fixture
|
amine@400
|
34 def expected_detections():
|
amine@400
|
35 return [
|
amine@400
|
36 (0.2, 1.6),
|
amine@400
|
37 (1.7, 3.1),
|
amine@400
|
38 (3.4, 5.4),
|
amine@400
|
39 (5.4, 7.4),
|
amine@400
|
40 (7.4, 7.6),
|
amine@400
|
41 ]
|
amine@400
|
42
|
amine@400
|
43
|
amine@400
|
44 def test_TokenizerWorker(audio_data_source, expected_detections):
|
amine@400
|
45 with TemporaryDirectory() as tmpdir:
|
amine@400
|
46 file = os.path.join(tmpdir, "file.log")
|
amine@400
|
47 logger = make_logger(file=file, name="test_TokenizerWorker")
|
amine@400
|
48 tokenizer = TokenizerWorker(
|
amine@400
|
49 audio_data_source,
|
amine@400
|
50 logger=logger,
|
amine@400
|
51 min_dur=0.3,
|
amine@400
|
52 max_dur=2,
|
amine@400
|
53 max_silence=0.2,
|
amine@400
|
54 drop_trailing_silence=False,
|
amine@400
|
55 strict_min_dur=False,
|
amine@400
|
56 eth=50,
|
amine@274
|
57 )
|
amine@400
|
58 tokenizer.start_all()
|
amine@400
|
59 tokenizer.join()
|
amine@400
|
60 with open(file) as fp:
|
amine@400
|
61 log_lines = fp.readlines()
|
amine@275
|
62
|
amine@400
|
63 log_fmt = (
|
amine@400
|
64 "[DET]: Detection {} (start: {:.3f}, end: {:.3f}, duration: {:.3f})"
|
amine@400
|
65 )
|
amine@400
|
66 assert len(tokenizer.detections) == len(expected_detections)
|
amine@400
|
67 for i, (det, exp, log_line) in enumerate(
|
amine@400
|
68 zip(tokenizer.detections, expected_detections, log_lines), 1
|
amine@400
|
69 ):
|
amine@400
|
70 start, end = exp
|
amine@400
|
71 exp_log_line = log_fmt.format(i, start, end, end - start)
|
amine@400
|
72 assert pytest.approx(det.start) == start
|
amine@400
|
73 assert pytest.approx(det.end) == end
|
amine@400
|
74 assert log_line[28:].strip() == exp_log_line
|
amine@275
|
75
|
amine@274
|
76
|
amine@400
|
77 def test_PlayerWorker(audio_data_source, expected_detections):
|
amine@400
|
78 with TemporaryDirectory() as tmpdir:
|
amine@400
|
79 file = os.path.join(tmpdir, "file.log")
|
amine@400
|
80 logger = make_logger(file=file, name="test_RegionSaverWorker")
|
amine@400
|
81 player_mock = Mock()
|
amine@400
|
82 observers = [PlayerWorker(player_mock, logger=logger)]
|
amine@275
|
83 tokenizer = TokenizerWorker(
|
amine@400
|
84 audio_data_source,
|
amine@400
|
85 logger=logger,
|
amine@275
|
86 observers=observers,
|
amine@275
|
87 min_dur=0.3,
|
amine@275
|
88 max_dur=2,
|
amine@275
|
89 max_silence=0.2,
|
amine@275
|
90 drop_trailing_silence=False,
|
amine@275
|
91 strict_min_dur=False,
|
amine@275
|
92 eth=50,
|
amine@275
|
93 )
|
amine@400
|
94 tokenizer.start_all()
|
amine@400
|
95 tokenizer.join()
|
amine@400
|
96 tokenizer._observers[0].join()
|
amine@400
|
97 with open(file) as fp:
|
amine@400
|
98 log_lines = [
|
amine@400
|
99 line for line in fp.readlines() if line.startswith("[PLAY]")
|
amine@400
|
100 ]
|
amine@400
|
101
|
amine@400
|
102 assert player_mock.play.called
|
amine@400
|
103 assert len(tokenizer.detections) == len(expected_detections)
|
amine@400
|
104 log_fmt = "[PLAY]: Detection {id} played"
|
amine@400
|
105 for i, (det, exp, log_line) in enumerate(
|
amine@400
|
106 zip(tokenizer.detections, expected_detections, log_lines), 1
|
amine@400
|
107 ):
|
amine@400
|
108 start, end = exp
|
amine@400
|
109 exp_log_line = log_fmt.format(id=i)
|
amine@400
|
110 assert pytest.approx(det.start) == start
|
amine@400
|
111 assert pytest.approx(det.end) == end
|
amine@400
|
112 assert log_line[28:].strip() == exp_log_line
|
amine@400
|
113
|
amine@400
|
114
|
amine@400
|
115 def test_RegionSaverWorker(audio_data_source, expected_detections):
|
amine@400
|
116 filename_format = "Region_{id}_{start:.6f}-{end:.3f}_{duration:.3f}.wav"
|
amine@400
|
117 with TemporaryDirectory() as tmpdir:
|
amine@400
|
118 file = os.path.join(tmpdir, "file.log")
|
amine@400
|
119 logger = make_logger(file=file, name="test_RegionSaverWorker")
|
amine@400
|
120 observers = [RegionSaverWorker(filename_format, logger=logger)]
|
amine@400
|
121 tokenizer = TokenizerWorker(
|
amine@400
|
122 audio_data_source,
|
amine@400
|
123 logger=logger,
|
amine@400
|
124 observers=observers,
|
amine@400
|
125 min_dur=0.3,
|
amine@400
|
126 max_dur=2,
|
amine@400
|
127 max_silence=0.2,
|
amine@400
|
128 drop_trailing_silence=False,
|
amine@400
|
129 strict_min_dur=False,
|
amine@400
|
130 eth=50,
|
amine@400
|
131 )
|
amine@400
|
132 with patch("auditok.core.AudioRegion.save") as patched_save:
|
amine@275
|
133 tokenizer.start_all()
|
amine@275
|
134 tokenizer.join()
|
amine@275
|
135 tokenizer._observers[0].join()
|
amine@400
|
136 with open(file) as fp:
|
amine@400
|
137 log_lines = [
|
amine@400
|
138 line for line in fp.readlines() if line.startswith("[SAVE]")
|
amine@400
|
139 ]
|
amine@275
|
140
|
amine@400
|
141 expected_save_calls = [
|
amine@400
|
142 call(
|
amine@400
|
143 filename_format.format(
|
amine@400
|
144 id=i, start=exp[0], end=exp[1], duration=exp[1] - exp[0]
|
amine@400
|
145 ),
|
amine@400
|
146 None,
|
amine@400
|
147 )
|
amine@400
|
148 for i, exp in enumerate(expected_detections, 1)
|
amine@400
|
149 ]
|
amine@287
|
150
|
amine@400
|
151 mock_calls = [
|
amine@400
|
152 c for i, c in enumerate(patched_save.mock_calls) if i % 2 == 0
|
amine@400
|
153 ]
|
amine@400
|
154 assert mock_calls == expected_save_calls
|
amine@400
|
155 assert len(tokenizer.detections) == len(expected_detections)
|
amine@287
|
156
|
amine@400
|
157 log_fmt = "[SAVE]: Detection {id} saved as '{filename}'"
|
amine@400
|
158 for i, (det, exp, log_line) in enumerate(
|
amine@400
|
159 zip(tokenizer.detections, expected_detections, log_lines), 1
|
amine@400
|
160 ):
|
amine@400
|
161 start, end = exp
|
amine@400
|
162 expected_filename = filename_format.format(
|
amine@400
|
163 id=i, start=start, end=end, duration=end - start
|
amine@400
|
164 )
|
amine@400
|
165 exp_log_line = log_fmt.format(id=i, filename=expected_filename)
|
amine@400
|
166 assert pytest.approx(det.start) == start
|
amine@400
|
167 assert pytest.approx(det.end) == end
|
amine@400
|
168 assert log_line[28:].strip() == exp_log_line
|
amine@400
|
169
|
amine@400
|
170
|
amine@400
|
171 def test_CommandLineWorker(audio_data_source, expected_detections):
|
amine@400
|
172 command_format = "do nothing with"
|
amine@400
|
173 with TemporaryDirectory() as tmpdir:
|
amine@400
|
174 file = os.path.join(tmpdir, "file.log")
|
amine@400
|
175 logger = make_logger(file=file, name="test_CommandLineWorker")
|
amine@400
|
176 observers = [CommandLineWorker(command_format, logger=logger)]
|
amine@400
|
177 tokenizer = TokenizerWorker(
|
amine@400
|
178 audio_data_source,
|
amine@400
|
179 logger=logger,
|
amine@400
|
180 observers=observers,
|
amine@400
|
181 min_dur=0.3,
|
amine@400
|
182 max_dur=2,
|
amine@400
|
183 max_silence=0.2,
|
amine@400
|
184 drop_trailing_silence=False,
|
amine@400
|
185 strict_min_dur=False,
|
amine@400
|
186 eth=50,
|
amine@400
|
187 )
|
amine@400
|
188 with patch("auditok.workers.os.system") as patched_os_system:
|
amine@287
|
189 tokenizer.start_all()
|
amine@287
|
190 tokenizer.join()
|
amine@400
|
191 tokenizer._observers[0].join()
|
amine@400
|
192 with open(file) as fp:
|
amine@400
|
193 log_lines = [
|
amine@400
|
194 line for line in fp.readlines() if line.startswith("[COMMAND]")
|
amine@400
|
195 ]
|
amine@287
|
196
|
amine@400
|
197 expected_save_calls = [call(command_format) for _ in expected_detections]
|
amine@400
|
198 assert patched_os_system.mock_calls == expected_save_calls
|
amine@400
|
199 assert len(tokenizer.detections) == len(expected_detections)
|
amine@400
|
200 log_fmt = "[COMMAND]: Detection {id} command '{command}'"
|
amine@400
|
201 for i, (det, exp, log_line) in enumerate(
|
amine@400
|
202 zip(tokenizer.detections, expected_detections, log_lines), 1
|
amine@400
|
203 ):
|
amine@400
|
204 start, end = exp
|
amine@400
|
205 exp_log_line = log_fmt.format(id=i, command=command_format)
|
amine@400
|
206 assert pytest.approx(det.start) == start
|
amine@400
|
207 assert pytest.approx(det.end) == end
|
amine@400
|
208 assert log_line[28:].strip() == exp_log_line
|
amine@400
|
209
|
amine@400
|
210
|
amine@400
|
211 def test_PrintWorker(audio_data_source, expected_detections):
|
amine@400
|
212 observers = [
|
amine@400
|
213 PrintWorker(print_format="[{id}] {start} {end}, dur: {duration}")
|
amine@400
|
214 ]
|
amine@400
|
215 tokenizer = TokenizerWorker(
|
amine@400
|
216 audio_data_source,
|
amine@400
|
217 observers=observers,
|
amine@400
|
218 min_dur=0.3,
|
amine@400
|
219 max_dur=2,
|
amine@400
|
220 max_silence=0.2,
|
amine@400
|
221 drop_trailing_silence=False,
|
amine@400
|
222 strict_min_dur=False,
|
amine@400
|
223 eth=50,
|
amine@400
|
224 )
|
amine@400
|
225 with patch("builtins.print") as patched_print:
|
amine@400
|
226 tokenizer.start_all()
|
amine@400
|
227 tokenizer.join()
|
amine@400
|
228 tokenizer._observers[0].join()
|
amine@400
|
229
|
amine@400
|
230 expected_print_calls = [
|
amine@400
|
231 call(
|
amine@400
|
232 "[{}] {:.3f} {:.3f}, dur: {:.3f}".format(
|
amine@400
|
233 i, exp[0], exp[1], exp[1] - exp[0]
|
amine@287
|
234 )
|
amine@400
|
235 )
|
amine@400
|
236 for i, exp in enumerate(expected_detections, 1)
|
amine@400
|
237 ]
|
amine@400
|
238 assert patched_print.mock_calls == expected_print_calls
|
amine@400
|
239 assert len(tokenizer.detections) == len(expected_detections)
|
amine@400
|
240 for det, exp in zip(tokenizer.detections, expected_detections):
|
amine@400
|
241 start, end = exp
|
amine@400
|
242 assert pytest.approx(det.start) == start
|
amine@400
|
243 assert pytest.approx(det.end) == end
|
amine@287
|
244
|
amine@287
|
245
|
amine@400
|
246 def test_StreamSaverWorker_wav(audio_data_source):
|
amine@400
|
247 with TemporaryDirectory() as tmpdir:
|
amine@400
|
248 expected_filename = os.path.join(tmpdir, "output.wav")
|
amine@400
|
249 saver = StreamSaverWorker(audio_data_source, expected_filename)
|
amine@400
|
250 saver.start()
|
amine@400
|
251
|
amine@400
|
252 tokenizer = TokenizerWorker(saver)
|
amine@400
|
253 tokenizer.start_all()
|
amine@400
|
254 tokenizer.join()
|
amine@400
|
255 saver.join()
|
amine@400
|
256
|
amine@400
|
257 output_filename = saver.save_stream()
|
amine@400
|
258 region = AudioRegion.load(
|
amine@400
|
259 "tests/data/test_split_10HZ_mono.raw", sr=10, sw=2, ch=1
|
amine@400
|
260 )
|
amine@400
|
261
|
amine@400
|
262 expected_region = AudioRegion.load(output_filename)
|
amine@400
|
263 assert output_filename == expected_filename
|
amine@400
|
264 assert region == expected_region
|
amine@400
|
265 assert saver.data == bytes(expected_region)
|
amine@400
|
266
|
amine@400
|
267
|
amine@400
|
268 def test_StreamSaverWorker_raw(audio_data_source):
|
amine@400
|
269 with TemporaryDirectory() as tmpdir:
|
amine@400
|
270 expected_filename = os.path.join(tmpdir, "output")
|
amine@400
|
271 saver = StreamSaverWorker(
|
amine@400
|
272 audio_data_source, expected_filename, export_format="raw"
|
amine@400
|
273 )
|
amine@400
|
274 saver.start()
|
amine@400
|
275 tokenizer = TokenizerWorker(saver)
|
amine@400
|
276 tokenizer.start_all()
|
amine@400
|
277 tokenizer.join()
|
amine@400
|
278 saver.join()
|
amine@400
|
279 output_filename = saver.save_stream()
|
amine@400
|
280 region = AudioRegion.load(
|
amine@400
|
281 "tests/data/test_split_10HZ_mono.raw", sr=10, sw=2, ch=1
|
amine@400
|
282 )
|
amine@400
|
283 expected_region = AudioRegion.load(
|
amine@400
|
284 output_filename, sr=10, sw=2, ch=1, audio_format="raw"
|
amine@400
|
285 )
|
amine@400
|
286 assert output_filename == expected_filename
|
amine@400
|
287 assert region == expected_region
|
amine@400
|
288 assert saver.data == bytes(expected_region)
|
amine@400
|
289
|
amine@400
|
290
|
amine@400
|
291 def test_StreamSaverWorker_encode_audio(audio_data_source):
|
amine@400
|
292 with TemporaryDirectory() as tmpdir:
|
amine@400
|
293 with patch("auditok.workers._run_subprocess") as patch_rsp:
|
amine@400
|
294 patch_rsp.return_value = (1, None, None)
|
amine@400
|
295 expected_filename = os.path.join(tmpdir, "output.ogg")
|
amine@400
|
296 tmp_expected_filename = expected_filename + ".wav"
|
amine@400
|
297 saver = StreamSaverWorker(audio_data_source, expected_filename)
|
amine@287
|
298 saver.start()
|
amine@287
|
299 tokenizer = TokenizerWorker(saver)
|
amine@287
|
300 tokenizer.start_all()
|
amine@287
|
301 tokenizer.join()
|
amine@287
|
302 saver.join()
|
amine@400
|
303 with pytest.raises(AudioEncodingWarning) as rt_warn:
|
amine@400
|
304 saver.save_stream()
|
amine@400
|
305 warn_msg = "Couldn't save audio data in the desired format "
|
amine@400
|
306 warn_msg += "'ogg'. Either none of 'ffmpeg', 'avconv' or 'sox' "
|
amine@400
|
307 warn_msg += "is installed or this format is not recognized.\n"
|
amine@400
|
308 warn_msg += "Audio file was saved as '{}'"
|
amine@400
|
309 assert warn_msg.format(tmp_expected_filename) == str(rt_warn.value)
|
amine@400
|
310 ffmpef_avconv = [
|
amine@400
|
311 "-y",
|
amine@400
|
312 "-f",
|
amine@400
|
313 "wav",
|
amine@400
|
314 "-i",
|
amine@400
|
315 tmp_expected_filename,
|
amine@400
|
316 "-f",
|
amine@400
|
317 "ogg",
|
amine@400
|
318 expected_filename,
|
amine@400
|
319 ]
|
amine@400
|
320 expected_calls = [
|
amine@400
|
321 call(["ffmpeg"] + ffmpef_avconv),
|
amine@400
|
322 call(["avconv"] + ffmpef_avconv),
|
amine@400
|
323 call(
|
amine@400
|
324 [
|
amine@400
|
325 "sox",
|
amine@400
|
326 "-t",
|
amine@400
|
327 "wav",
|
amine@400
|
328 tmp_expected_filename,
|
amine@400
|
329 expected_filename,
|
amine@400
|
330 ]
|
amine@400
|
331 ),
|
amine@400
|
332 ]
|
amine@400
|
333 assert patch_rsp.mock_calls == expected_calls
|
amine@400
|
334 region = AudioRegion.load(
|
amine@400
|
335 "tests/data/test_split_10HZ_mono.raw", sr=10, sw=2, ch=1
|
amine@400
|
336 )
|
amine@400
|
337 assert saver._exported
|
amine@400
|
338 assert saver.data == bytes(region)
|