annotate tests/test_workers.py @ 455:7dae98b84cdd tip master

Merge branch 'master' of https://github.com/amsehili/auditok
author www-data <www-data@c4dm-xenserv-virt2.eecs.qmul.ac.uk>
date Tue, 03 Dec 2024 09:18:01 +0000
parents c5b4178aa80f
children
rev   line source
amine@274 1 import os
amine@274 2 from tempfile import TemporaryDirectory
amine@403 3 from unittest.mock import Mock, call, patch
amine@403 4
amine@400 5 import pytest
amine@403 6
amine@418 7 import auditok.workers
amine@419 8 from auditok import AudioReader, AudioRegion, split, split_and_join_with_silence
amine@403 9 from auditok.cmdline_util import make_logger
amine@274 10 from auditok.workers import (
amine@419 11 AudioEventsJoinerWorker,
amine@403 12 CommandLineWorker,
amine@403 13 PlayerWorker,
amine@403 14 PrintWorker,
amine@403 15 RegionSaverWorker,
amine@403 16 StreamSaverWorker,
amine@274 17 TokenizerWorker,
amine@274 18 )
amine@274 19
amine@274 20
amine@400 21 @pytest.fixture
amine@400 22 def audio_data_source():
amine@403 23 reader = AudioReader(
amine@400 24 input="tests/data/test_split_10HZ_mono.raw",
amine@400 25 block_dur=0.1,
amine@400 26 sr=10,
amine@400 27 sw=2,
amine@400 28 ch=1,
amine@400 29 )
amine@400 30 yield reader
amine@400 31 reader.close()
amine@275 32
amine@400 33
amine@400 34 @pytest.fixture
amine@400 35 def expected_detections():
amine@400 36 return [
amine@400 37 (0.2, 1.6),
amine@400 38 (1.7, 3.1),
amine@400 39 (3.4, 5.4),
amine@400 40 (5.4, 7.4),
amine@400 41 (7.4, 7.6),
amine@400 42 ]
amine@400 43
amine@400 44
amine@400 45 def test_TokenizerWorker(audio_data_source, expected_detections):
amine@400 46 with TemporaryDirectory() as tmpdir:
amine@400 47 file = os.path.join(tmpdir, "file.log")
amine@400 48 logger = make_logger(file=file, name="test_TokenizerWorker")
amine@400 49 tokenizer = TokenizerWorker(
amine@400 50 audio_data_source,
amine@400 51 logger=logger,
amine@400 52 min_dur=0.3,
amine@400 53 max_dur=2,
amine@400 54 max_silence=0.2,
amine@400 55 drop_trailing_silence=False,
amine@400 56 strict_min_dur=False,
amine@400 57 eth=50,
amine@274 58 )
amine@400 59 tokenizer.start_all()
amine@400 60 tokenizer.join()
amine@400 61 with open(file) as fp:
amine@400 62 log_lines = fp.readlines()
amine@275 63
amine@400 64 log_fmt = (
amine@400 65 "[DET]: Detection {} (start: {:.3f}, end: {:.3f}, duration: {:.3f})"
amine@400 66 )
amine@400 67 assert len(tokenizer.detections) == len(expected_detections)
amine@400 68 for i, (det, exp, log_line) in enumerate(
amine@426 69 zip(
amine@426 70 tokenizer.detections,
amine@426 71 expected_detections,
amine@426 72 log_lines,
amine@426 73 ),
amine@418 74 1,
amine@400 75 ):
amine@400 76 start, end = exp
amine@400 77 exp_log_line = log_fmt.format(i, start, end, end - start)
amine@400 78 assert pytest.approx(det.start) == start
amine@400 79 assert pytest.approx(det.end) == end
amine@400 80 assert log_line[28:].strip() == exp_log_line
amine@275 81
amine@274 82
amine@400 83 def test_PlayerWorker(audio_data_source, expected_detections):
amine@400 84 with TemporaryDirectory() as tmpdir:
amine@400 85 file = os.path.join(tmpdir, "file.log")
amine@400 86 logger = make_logger(file=file, name="test_RegionSaverWorker")
amine@400 87 player_mock = Mock()
amine@400 88 observers = [PlayerWorker(player_mock, logger=logger)]
amine@275 89 tokenizer = TokenizerWorker(
amine@400 90 audio_data_source,
amine@400 91 logger=logger,
amine@275 92 observers=observers,
amine@275 93 min_dur=0.3,
amine@275 94 max_dur=2,
amine@275 95 max_silence=0.2,
amine@275 96 drop_trailing_silence=False,
amine@275 97 strict_min_dur=False,
amine@275 98 eth=50,
amine@275 99 )
amine@400 100 tokenizer.start_all()
amine@400 101 tokenizer.join()
amine@400 102 tokenizer._observers[0].join()
amine@400 103 with open(file) as fp:
amine@400 104 log_lines = [
amine@400 105 line for line in fp.readlines() if line.startswith("[PLAY]")
amine@400 106 ]
amine@400 107
amine@400 108 assert player_mock.play.called
amine@400 109 assert len(tokenizer.detections) == len(expected_detections)
amine@400 110 log_fmt = "[PLAY]: Detection {id} played"
amine@400 111 for i, (det, exp, log_line) in enumerate(
amine@426 112 zip(
amine@426 113 tokenizer.detections,
amine@426 114 expected_detections,
amine@426 115 log_lines,
amine@426 116 ),
amine@418 117 1,
amine@400 118 ):
amine@400 119 start, end = exp
amine@400 120 exp_log_line = log_fmt.format(id=i)
amine@400 121 assert pytest.approx(det.start) == start
amine@400 122 assert pytest.approx(det.end) == end
amine@400 123 assert log_line[28:].strip() == exp_log_line
amine@400 124
amine@400 125
amine@400 126 def test_RegionSaverWorker(audio_data_source, expected_detections):
amine@400 127 filename_format = "Region_{id}_{start:.6f}-{end:.3f}_{duration:.3f}.wav"
amine@400 128 with TemporaryDirectory() as tmpdir:
amine@400 129 file = os.path.join(tmpdir, "file.log")
amine@400 130 logger = make_logger(file=file, name="test_RegionSaverWorker")
amine@400 131 observers = [RegionSaverWorker(filename_format, logger=logger)]
amine@400 132 tokenizer = TokenizerWorker(
amine@400 133 audio_data_source,
amine@400 134 logger=logger,
amine@400 135 observers=observers,
amine@400 136 min_dur=0.3,
amine@400 137 max_dur=2,
amine@400 138 max_silence=0.2,
amine@400 139 drop_trailing_silence=False,
amine@400 140 strict_min_dur=False,
amine@400 141 eth=50,
amine@400 142 )
amine@400 143 with patch("auditok.core.AudioRegion.save") as patched_save:
amine@275 144 tokenizer.start_all()
amine@275 145 tokenizer.join()
amine@275 146 tokenizer._observers[0].join()
amine@400 147 with open(file) as fp:
amine@400 148 log_lines = [
amine@400 149 line for line in fp.readlines() if line.startswith("[SAVE]")
amine@400 150 ]
amine@275 151
amine@400 152 expected_save_calls = [
amine@400 153 call(
amine@400 154 filename_format.format(
amine@400 155 id=i, start=exp[0], end=exp[1], duration=exp[1] - exp[0]
amine@400 156 ),
amine@400 157 None,
amine@400 158 )
amine@400 159 for i, exp in enumerate(expected_detections, 1)
amine@400 160 ]
amine@287 161
amine@400 162 mock_calls = [
amine@400 163 c for i, c in enumerate(patched_save.mock_calls) if i % 2 == 0
amine@400 164 ]
amine@400 165 assert mock_calls == expected_save_calls
amine@400 166 assert len(tokenizer.detections) == len(expected_detections)
amine@287 167
amine@400 168 log_fmt = "[SAVE]: Detection {id} saved as '{filename}'"
amine@400 169 for i, (det, exp, log_line) in enumerate(
amine@426 170 zip(
amine@426 171 tokenizer.detections,
amine@426 172 expected_detections,
amine@426 173 log_lines,
amine@426 174 ),
amine@418 175 1,
amine@400 176 ):
amine@400 177 start, end = exp
amine@400 178 expected_filename = filename_format.format(
amine@400 179 id=i, start=start, end=end, duration=end - start
amine@400 180 )
amine@400 181 exp_log_line = log_fmt.format(id=i, filename=expected_filename)
amine@400 182 assert pytest.approx(det.start) == start
amine@400 183 assert pytest.approx(det.end) == end
amine@400 184 assert log_line[28:].strip() == exp_log_line
amine@400 185
amine@400 186
amine@400 187 def test_CommandLineWorker(audio_data_source, expected_detections):
amine@400 188 command_format = "do nothing with"
amine@400 189 with TemporaryDirectory() as tmpdir:
amine@400 190 file = os.path.join(tmpdir, "file.log")
amine@400 191 logger = make_logger(file=file, name="test_CommandLineWorker")
amine@400 192 observers = [CommandLineWorker(command_format, logger=logger)]
amine@400 193 tokenizer = TokenizerWorker(
amine@400 194 audio_data_source,
amine@400 195 logger=logger,
amine@400 196 observers=observers,
amine@400 197 min_dur=0.3,
amine@400 198 max_dur=2,
amine@400 199 max_silence=0.2,
amine@400 200 drop_trailing_silence=False,
amine@400 201 strict_min_dur=False,
amine@400 202 eth=50,
amine@400 203 )
amine@400 204 with patch("auditok.workers.os.system") as patched_os_system:
amine@287 205 tokenizer.start_all()
amine@287 206 tokenizer.join()
amine@400 207 tokenizer._observers[0].join()
amine@400 208 with open(file) as fp:
amine@400 209 log_lines = [
amine@400 210 line for line in fp.readlines() if line.startswith("[COMMAND]")
amine@400 211 ]
amine@287 212
amine@400 213 expected_save_calls = [call(command_format) for _ in expected_detections]
amine@400 214 assert patched_os_system.mock_calls == expected_save_calls
amine@400 215 assert len(tokenizer.detections) == len(expected_detections)
amine@400 216 log_fmt = "[COMMAND]: Detection {id} command '{command}'"
amine@400 217 for i, (det, exp, log_line) in enumerate(
amine@426 218 zip(
amine@426 219 tokenizer.detections,
amine@426 220 expected_detections,
amine@426 221 log_lines,
amine@426 222 ),
amine@418 223 1,
amine@400 224 ):
amine@400 225 start, end = exp
amine@400 226 exp_log_line = log_fmt.format(id=i, command=command_format)
amine@400 227 assert pytest.approx(det.start) == start
amine@400 228 assert pytest.approx(det.end) == end
amine@400 229 assert log_line[28:].strip() == exp_log_line
amine@400 230
amine@400 231
amine@400 232 def test_PrintWorker(audio_data_source, expected_detections):
amine@400 233 observers = [
amine@400 234 PrintWorker(print_format="[{id}] {start} {end}, dur: {duration}")
amine@400 235 ]
amine@400 236 tokenizer = TokenizerWorker(
amine@400 237 audio_data_source,
amine@400 238 observers=observers,
amine@400 239 min_dur=0.3,
amine@400 240 max_dur=2,
amine@400 241 max_silence=0.2,
amine@400 242 drop_trailing_silence=False,
amine@400 243 strict_min_dur=False,
amine@400 244 eth=50,
amine@400 245 )
amine@400 246 with patch("builtins.print") as patched_print:
amine@400 247 tokenizer.start_all()
amine@400 248 tokenizer.join()
amine@400 249 tokenizer._observers[0].join()
amine@400 250
amine@400 251 expected_print_calls = [
amine@400 252 call(
amine@400 253 "[{}] {:.3f} {:.3f}, dur: {:.3f}".format(
amine@400 254 i, exp[0], exp[1], exp[1] - exp[0]
amine@287 255 )
amine@400 256 )
amine@400 257 for i, exp in enumerate(expected_detections, 1)
amine@400 258 ]
amine@400 259 assert patched_print.mock_calls == expected_print_calls
amine@400 260 assert len(tokenizer.detections) == len(expected_detections)
amine@426 261 for det, exp in zip(
amine@426 262 tokenizer.detections,
amine@426 263 expected_detections,
amine@426 264 ):
amine@400 265 start, end = exp
amine@400 266 assert pytest.approx(det.start) == start
amine@400 267 assert pytest.approx(det.end) == end
amine@287 268
amine@287 269
amine@400 270 def test_StreamSaverWorker_wav(audio_data_source):
amine@400 271 with TemporaryDirectory() as tmpdir:
amine@400 272 expected_filename = os.path.join(tmpdir, "output.wav")
amine@400 273 saver = StreamSaverWorker(audio_data_source, expected_filename)
amine@400 274 saver.start()
amine@400 275
amine@400 276 tokenizer = TokenizerWorker(saver)
amine@400 277 tokenizer.start_all()
amine@400 278 tokenizer.join()
amine@400 279 saver.join()
amine@400 280
amine@418 281 output_filename = saver.export_audio()
amine@400 282 region = AudioRegion.load(
amine@400 283 "tests/data/test_split_10HZ_mono.raw", sr=10, sw=2, ch=1
amine@400 284 )
amine@400 285
amine@400 286 expected_region = AudioRegion.load(output_filename)
amine@400 287 assert output_filename == expected_filename
amine@400 288 assert region == expected_region
amine@400 289 assert saver.data == bytes(expected_region)
amine@400 290
amine@400 291
amine@419 292 @pytest.mark.parametrize(
amine@419 293 "export_format",
amine@419 294 [
amine@419 295 "raw", # raw
amine@419 296 "wav", # wav
amine@419 297 ],
amine@419 298 ids=[
amine@419 299 "raw",
amine@419 300 "raw",
amine@419 301 ],
amine@419 302 )
amine@419 303 def test_StreamSaverWorker(audio_data_source, export_format):
amine@400 304 with TemporaryDirectory() as tmpdir:
amine@419 305 expected_filename = os.path.join(tmpdir, f"output.{export_format}")
amine@400 306 saver = StreamSaverWorker(
amine@419 307 audio_data_source, expected_filename, export_format=export_format
amine@400 308 )
amine@400 309 saver.start()
amine@400 310 tokenizer = TokenizerWorker(saver)
amine@400 311 tokenizer.start_all()
amine@400 312 tokenizer.join()
amine@400 313 saver.join()
amine@418 314 output_filename = saver.export_audio()
amine@400 315 region = AudioRegion.load(
amine@400 316 "tests/data/test_split_10HZ_mono.raw", sr=10, sw=2, ch=1
amine@400 317 )
amine@400 318 expected_region = AudioRegion.load(
amine@419 319 output_filename, sr=10, sw=2, ch=1, audio_format=export_format
amine@400 320 )
amine@400 321 assert output_filename == expected_filename
amine@400 322 assert region == expected_region
amine@400 323 assert saver.data == bytes(expected_region)
amine@400 324
amine@400 325
amine@400 326 def test_StreamSaverWorker_encode_audio(audio_data_source):
amine@400 327 with TemporaryDirectory() as tmpdir:
amine@400 328 with patch("auditok.workers._run_subprocess") as patch_rsp:
amine@400 329 patch_rsp.return_value = (1, None, None)
amine@400 330 expected_filename = os.path.join(tmpdir, "output.ogg")
amine@400 331 tmp_expected_filename = expected_filename + ".wav"
amine@400 332 saver = StreamSaverWorker(audio_data_source, expected_filename)
amine@287 333 saver.start()
amine@287 334 tokenizer = TokenizerWorker(saver)
amine@287 335 tokenizer.start_all()
amine@287 336 tokenizer.join()
amine@287 337 saver.join()
amine@418 338
amine@418 339 with pytest.raises(auditok.workers.AudioEncodingError) as ae_error:
amine@418 340 saver._encode_export_audio()
amine@418 341
amine@400 342 warn_msg = "Couldn't save audio data in the desired format "
amine@418 343 warn_msg += "'ogg'.\nEither none of 'ffmpeg', 'avconv' or 'sox' "
amine@400 344 warn_msg += "is installed or this format is not recognized.\n"
amine@400 345 warn_msg += "Audio file was saved as '{}'"
amine@418 346 assert warn_msg.format(tmp_expected_filename) == str(ae_error.value)
amine@400 347 ffmpef_avconv = [
amine@400 348 "-y",
amine@400 349 "-f",
amine@400 350 "wav",
amine@400 351 "-i",
amine@400 352 tmp_expected_filename,
amine@400 353 "-f",
amine@400 354 "ogg",
amine@400 355 expected_filename,
amine@400 356 ]
amine@400 357 expected_calls = [
amine@400 358 call(["ffmpeg"] + ffmpef_avconv),
amine@400 359 call(["avconv"] + ffmpef_avconv),
amine@400 360 call(
amine@400 361 [
amine@400 362 "sox",
amine@400 363 "-t",
amine@400 364 "wav",
amine@400 365 tmp_expected_filename,
amine@400 366 expected_filename,
amine@400 367 ]
amine@400 368 ),
amine@400 369 ]
amine@400 370 assert patch_rsp.mock_calls == expected_calls
amine@400 371 region = AudioRegion.load(
amine@400 372 "tests/data/test_split_10HZ_mono.raw", sr=10, sw=2, ch=1
amine@400 373 )
amine@418 374 assert not saver._exported
amine@400 375 assert saver.data == bytes(region)
amine@419 376
amine@419 377
amine@419 378 @pytest.mark.parametrize(
amine@419 379 "export_format",
amine@419 380 [
amine@419 381 "raw", # raw
amine@419 382 "wav", # wav
amine@419 383 ],
amine@419 384 ids=[
amine@419 385 "raw",
amine@419 386 "raw",
amine@419 387 ],
amine@419 388 )
amine@419 389 def test_AudioEventsJoinerWorker(audio_data_source, export_format):
amine@419 390 with TemporaryDirectory() as tmpdir:
amine@419 391 expected_filename = os.path.join(tmpdir, f"output.{export_format}")
amine@419 392 joiner = AudioEventsJoinerWorker(
amine@419 393 silence_duration=1.0,
amine@419 394 filename=expected_filename,
amine@419 395 export_format=export_format,
amine@419 396 sampling_rate=audio_data_source.sampling_rate,
amine@419 397 sample_width=audio_data_source.sample_width,
amine@419 398 channels=audio_data_source.channels,
amine@419 399 )
amine@419 400
amine@419 401 tokenizer = TokenizerWorker(audio_data_source, observers=[joiner])
amine@419 402 tokenizer.start_all()
amine@419 403 tokenizer.join()
amine@419 404 joiner.join()
amine@419 405
amine@419 406 output_filename = joiner.export_audio()
amine@419 407 expected_region = split_and_join_with_silence(
amine@419 408 "tests/data/test_split_10HZ_mono.raw",
amine@419 409 silence_duration=1.0,
amine@419 410 sr=10,
amine@419 411 sw=2,
amine@419 412 ch=1,
amine@419 413 aw=0.1,
amine@419 414 )
amine@419 415 assert output_filename == expected_filename
amine@419 416 assert joiner.data == bytes(expected_region)