annotate tests/test_workers.py @ 419:c2ac3fc1bfbc

Add tests for AudioEventsJoinerWorker
author Amine Sehili <amine.sehili@gmail.com>
date Fri, 18 Oct 2024 23:16:08 +0200
parents 70abdb92149a
children c5b4178aa80f
rev   line source
amine@274 1 import os
amine@274 2 from tempfile import TemporaryDirectory
amine@403 3 from unittest.mock import Mock, call, patch
amine@403 4
amine@400 5 import pytest
amine@403 6
amine@418 7 import auditok.workers
amine@419 8 from auditok import AudioReader, AudioRegion, split, split_and_join_with_silence
amine@403 9 from auditok.cmdline_util import make_logger
amine@274 10 from auditok.workers import (
amine@419 11 AudioEventsJoinerWorker,
amine@403 12 CommandLineWorker,
amine@403 13 PlayerWorker,
amine@403 14 PrintWorker,
amine@403 15 RegionSaverWorker,
amine@403 16 StreamSaverWorker,
amine@274 17 TokenizerWorker,
amine@274 18 )
amine@274 19
amine@274 20
amine@400 21 @pytest.fixture
amine@400 22 def audio_data_source():
amine@403 23 reader = AudioReader(
amine@400 24 input="tests/data/test_split_10HZ_mono.raw",
amine@400 25 block_dur=0.1,
amine@400 26 sr=10,
amine@400 27 sw=2,
amine@400 28 ch=1,
amine@400 29 )
amine@400 30 yield reader
amine@400 31 reader.close()
amine@275 32
amine@400 33
amine@400 34 @pytest.fixture
amine@400 35 def expected_detections():
amine@400 36 return [
amine@400 37 (0.2, 1.6),
amine@400 38 (1.7, 3.1),
amine@400 39 (3.4, 5.4),
amine@400 40 (5.4, 7.4),
amine@400 41 (7.4, 7.6),
amine@400 42 ]
amine@400 43
amine@400 44
amine@400 45 def test_TokenizerWorker(audio_data_source, expected_detections):
amine@400 46 with TemporaryDirectory() as tmpdir:
amine@400 47 file = os.path.join(tmpdir, "file.log")
amine@400 48 logger = make_logger(file=file, name="test_TokenizerWorker")
amine@400 49 tokenizer = TokenizerWorker(
amine@400 50 audio_data_source,
amine@400 51 logger=logger,
amine@400 52 min_dur=0.3,
amine@400 53 max_dur=2,
amine@400 54 max_silence=0.2,
amine@400 55 drop_trailing_silence=False,
amine@400 56 strict_min_dur=False,
amine@400 57 eth=50,
amine@274 58 )
amine@400 59 tokenizer.start_all()
amine@400 60 tokenizer.join()
amine@400 61 with open(file) as fp:
amine@400 62 log_lines = fp.readlines()
amine@275 63
amine@400 64 log_fmt = (
amine@400 65 "[DET]: Detection {} (start: {:.3f}, end: {:.3f}, duration: {:.3f})"
amine@400 66 )
amine@400 67 assert len(tokenizer.detections) == len(expected_detections)
amine@400 68 for i, (det, exp, log_line) in enumerate(
amine@418 69 zip(tokenizer.detections, expected_detections, log_lines, strict=True),
amine@418 70 1,
amine@400 71 ):
amine@400 72 start, end = exp
amine@400 73 exp_log_line = log_fmt.format(i, start, end, end - start)
amine@400 74 assert pytest.approx(det.start) == start
amine@400 75 assert pytest.approx(det.end) == end
amine@400 76 assert log_line[28:].strip() == exp_log_line
amine@275 77
amine@274 78
amine@400 79 def test_PlayerWorker(audio_data_source, expected_detections):
amine@400 80 with TemporaryDirectory() as tmpdir:
amine@400 81 file = os.path.join(tmpdir, "file.log")
amine@400 82 logger = make_logger(file=file, name="test_RegionSaverWorker")
amine@400 83 player_mock = Mock()
amine@400 84 observers = [PlayerWorker(player_mock, logger=logger)]
amine@275 85 tokenizer = TokenizerWorker(
amine@400 86 audio_data_source,
amine@400 87 logger=logger,
amine@275 88 observers=observers,
amine@275 89 min_dur=0.3,
amine@275 90 max_dur=2,
amine@275 91 max_silence=0.2,
amine@275 92 drop_trailing_silence=False,
amine@275 93 strict_min_dur=False,
amine@275 94 eth=50,
amine@275 95 )
amine@400 96 tokenizer.start_all()
amine@400 97 tokenizer.join()
amine@400 98 tokenizer._observers[0].join()
amine@400 99 with open(file) as fp:
amine@400 100 log_lines = [
amine@400 101 line for line in fp.readlines() if line.startswith("[PLAY]")
amine@400 102 ]
amine@400 103
amine@400 104 assert player_mock.play.called
amine@400 105 assert len(tokenizer.detections) == len(expected_detections)
amine@400 106 log_fmt = "[PLAY]: Detection {id} played"
amine@400 107 for i, (det, exp, log_line) in enumerate(
amine@418 108 zip(tokenizer.detections, expected_detections, log_lines, strict=False),
amine@418 109 1,
amine@400 110 ):
amine@400 111 start, end = exp
amine@400 112 exp_log_line = log_fmt.format(id=i)
amine@400 113 assert pytest.approx(det.start) == start
amine@400 114 assert pytest.approx(det.end) == end
amine@400 115 assert log_line[28:].strip() == exp_log_line
amine@400 116
amine@400 117
amine@400 118 def test_RegionSaverWorker(audio_data_source, expected_detections):
amine@400 119 filename_format = "Region_{id}_{start:.6f}-{end:.3f}_{duration:.3f}.wav"
amine@400 120 with TemporaryDirectory() as tmpdir:
amine@400 121 file = os.path.join(tmpdir, "file.log")
amine@400 122 logger = make_logger(file=file, name="test_RegionSaverWorker")
amine@400 123 observers = [RegionSaverWorker(filename_format, logger=logger)]
amine@400 124 tokenizer = TokenizerWorker(
amine@400 125 audio_data_source,
amine@400 126 logger=logger,
amine@400 127 observers=observers,
amine@400 128 min_dur=0.3,
amine@400 129 max_dur=2,
amine@400 130 max_silence=0.2,
amine@400 131 drop_trailing_silence=False,
amine@400 132 strict_min_dur=False,
amine@400 133 eth=50,
amine@400 134 )
amine@400 135 with patch("auditok.core.AudioRegion.save") as patched_save:
amine@275 136 tokenizer.start_all()
amine@275 137 tokenizer.join()
amine@275 138 tokenizer._observers[0].join()
amine@400 139 with open(file) as fp:
amine@400 140 log_lines = [
amine@400 141 line for line in fp.readlines() if line.startswith("[SAVE]")
amine@400 142 ]
amine@275 143
amine@400 144 expected_save_calls = [
amine@400 145 call(
amine@400 146 filename_format.format(
amine@400 147 id=i, start=exp[0], end=exp[1], duration=exp[1] - exp[0]
amine@400 148 ),
amine@400 149 None,
amine@400 150 )
amine@400 151 for i, exp in enumerate(expected_detections, 1)
amine@400 152 ]
amine@287 153
amine@400 154 mock_calls = [
amine@400 155 c for i, c in enumerate(patched_save.mock_calls) if i % 2 == 0
amine@400 156 ]
amine@400 157 assert mock_calls == expected_save_calls
amine@400 158 assert len(tokenizer.detections) == len(expected_detections)
amine@287 159
amine@400 160 log_fmt = "[SAVE]: Detection {id} saved as '{filename}'"
amine@400 161 for i, (det, exp, log_line) in enumerate(
amine@418 162 zip(tokenizer.detections, expected_detections, log_lines, strict=False),
amine@418 163 1,
amine@400 164 ):
amine@400 165 start, end = exp
amine@400 166 expected_filename = filename_format.format(
amine@400 167 id=i, start=start, end=end, duration=end - start
amine@400 168 )
amine@400 169 exp_log_line = log_fmt.format(id=i, filename=expected_filename)
amine@400 170 assert pytest.approx(det.start) == start
amine@400 171 assert pytest.approx(det.end) == end
amine@400 172 assert log_line[28:].strip() == exp_log_line
amine@400 173
amine@400 174
amine@400 175 def test_CommandLineWorker(audio_data_source, expected_detections):
amine@400 176 command_format = "do nothing with"
amine@400 177 with TemporaryDirectory() as tmpdir:
amine@400 178 file = os.path.join(tmpdir, "file.log")
amine@400 179 logger = make_logger(file=file, name="test_CommandLineWorker")
amine@400 180 observers = [CommandLineWorker(command_format, logger=logger)]
amine@400 181 tokenizer = TokenizerWorker(
amine@400 182 audio_data_source,
amine@400 183 logger=logger,
amine@400 184 observers=observers,
amine@400 185 min_dur=0.3,
amine@400 186 max_dur=2,
amine@400 187 max_silence=0.2,
amine@400 188 drop_trailing_silence=False,
amine@400 189 strict_min_dur=False,
amine@400 190 eth=50,
amine@400 191 )
amine@400 192 with patch("auditok.workers.os.system") as patched_os_system:
amine@287 193 tokenizer.start_all()
amine@287 194 tokenizer.join()
amine@400 195 tokenizer._observers[0].join()
amine@400 196 with open(file) as fp:
amine@400 197 log_lines = [
amine@400 198 line for line in fp.readlines() if line.startswith("[COMMAND]")
amine@400 199 ]
amine@287 200
amine@400 201 expected_save_calls = [call(command_format) for _ in expected_detections]
amine@400 202 assert patched_os_system.mock_calls == expected_save_calls
amine@400 203 assert len(tokenizer.detections) == len(expected_detections)
amine@400 204 log_fmt = "[COMMAND]: Detection {id} command '{command}'"
amine@400 205 for i, (det, exp, log_line) in enumerate(
amine@418 206 zip(tokenizer.detections, expected_detections, log_lines, strict=False),
amine@418 207 1,
amine@400 208 ):
amine@400 209 start, end = exp
amine@400 210 exp_log_line = log_fmt.format(id=i, command=command_format)
amine@400 211 assert pytest.approx(det.start) == start
amine@400 212 assert pytest.approx(det.end) == end
amine@400 213 assert log_line[28:].strip() == exp_log_line
amine@400 214
amine@400 215
amine@400 216 def test_PrintWorker(audio_data_source, expected_detections):
amine@400 217 observers = [
amine@400 218 PrintWorker(print_format="[{id}] {start} {end}, dur: {duration}")
amine@400 219 ]
amine@400 220 tokenizer = TokenizerWorker(
amine@400 221 audio_data_source,
amine@400 222 observers=observers,
amine@400 223 min_dur=0.3,
amine@400 224 max_dur=2,
amine@400 225 max_silence=0.2,
amine@400 226 drop_trailing_silence=False,
amine@400 227 strict_min_dur=False,
amine@400 228 eth=50,
amine@400 229 )
amine@400 230 with patch("builtins.print") as patched_print:
amine@400 231 tokenizer.start_all()
amine@400 232 tokenizer.join()
amine@400 233 tokenizer._observers[0].join()
amine@400 234
amine@400 235 expected_print_calls = [
amine@400 236 call(
amine@400 237 "[{}] {:.3f} {:.3f}, dur: {:.3f}".format(
amine@400 238 i, exp[0], exp[1], exp[1] - exp[0]
amine@287 239 )
amine@400 240 )
amine@400 241 for i, exp in enumerate(expected_detections, 1)
amine@400 242 ]
amine@400 243 assert patched_print.mock_calls == expected_print_calls
amine@400 244 assert len(tokenizer.detections) == len(expected_detections)
amine@418 245 for det, exp in zip(tokenizer.detections, expected_detections, strict=True):
amine@400 246 start, end = exp
amine@400 247 assert pytest.approx(det.start) == start
amine@400 248 assert pytest.approx(det.end) == end
amine@287 249
amine@287 250
amine@400 251 def test_StreamSaverWorker_wav(audio_data_source):
amine@400 252 with TemporaryDirectory() as tmpdir:
amine@400 253 expected_filename = os.path.join(tmpdir, "output.wav")
amine@400 254 saver = StreamSaverWorker(audio_data_source, expected_filename)
amine@400 255 saver.start()
amine@400 256
amine@400 257 tokenizer = TokenizerWorker(saver)
amine@400 258 tokenizer.start_all()
amine@400 259 tokenizer.join()
amine@400 260 saver.join()
amine@400 261
amine@418 262 output_filename = saver.export_audio()
amine@400 263 region = AudioRegion.load(
amine@400 264 "tests/data/test_split_10HZ_mono.raw", sr=10, sw=2, ch=1
amine@400 265 )
amine@400 266
amine@400 267 expected_region = AudioRegion.load(output_filename)
amine@400 268 assert output_filename == expected_filename
amine@400 269 assert region == expected_region
amine@400 270 assert saver.data == bytes(expected_region)
amine@400 271
amine@400 272
amine@419 273 @pytest.mark.parametrize(
amine@419 274 "export_format",
amine@419 275 [
amine@419 276 "raw", # raw
amine@419 277 "wav", # wav
amine@419 278 ],
amine@419 279 ids=[
amine@419 280 "raw",
amine@419 281 "raw",
amine@419 282 ],
amine@419 283 )
amine@419 284 def test_StreamSaverWorker(audio_data_source, export_format):
amine@400 285 with TemporaryDirectory() as tmpdir:
amine@419 286 expected_filename = os.path.join(tmpdir, f"output.{export_format}")
amine@400 287 saver = StreamSaverWorker(
amine@419 288 audio_data_source, expected_filename, export_format=export_format
amine@400 289 )
amine@400 290 saver.start()
amine@400 291 tokenizer = TokenizerWorker(saver)
amine@400 292 tokenizer.start_all()
amine@400 293 tokenizer.join()
amine@400 294 saver.join()
amine@418 295 output_filename = saver.export_audio()
amine@400 296 region = AudioRegion.load(
amine@400 297 "tests/data/test_split_10HZ_mono.raw", sr=10, sw=2, ch=1
amine@400 298 )
amine@400 299 expected_region = AudioRegion.load(
amine@419 300 output_filename, sr=10, sw=2, ch=1, audio_format=export_format
amine@400 301 )
amine@400 302 assert output_filename == expected_filename
amine@400 303 assert region == expected_region
amine@400 304 assert saver.data == bytes(expected_region)
amine@400 305
amine@400 306
amine@400 307 def test_StreamSaverWorker_encode_audio(audio_data_source):
amine@400 308 with TemporaryDirectory() as tmpdir:
amine@400 309 with patch("auditok.workers._run_subprocess") as patch_rsp:
amine@400 310 patch_rsp.return_value = (1, None, None)
amine@400 311 expected_filename = os.path.join(tmpdir, "output.ogg")
amine@400 312 tmp_expected_filename = expected_filename + ".wav"
amine@400 313 saver = StreamSaverWorker(audio_data_source, expected_filename)
amine@287 314 saver.start()
amine@287 315 tokenizer = TokenizerWorker(saver)
amine@287 316 tokenizer.start_all()
amine@287 317 tokenizer.join()
amine@287 318 saver.join()
amine@418 319
amine@418 320 with pytest.raises(auditok.workers.AudioEncodingError) as ae_error:
amine@418 321 saver._encode_export_audio()
amine@418 322
amine@400 323 warn_msg = "Couldn't save audio data in the desired format "
amine@418 324 warn_msg += "'ogg'.\nEither none of 'ffmpeg', 'avconv' or 'sox' "
amine@400 325 warn_msg += "is installed or this format is not recognized.\n"
amine@400 326 warn_msg += "Audio file was saved as '{}'"
amine@418 327 assert warn_msg.format(tmp_expected_filename) == str(ae_error.value)
amine@400 328 ffmpef_avconv = [
amine@400 329 "-y",
amine@400 330 "-f",
amine@400 331 "wav",
amine@400 332 "-i",
amine@400 333 tmp_expected_filename,
amine@400 334 "-f",
amine@400 335 "ogg",
amine@400 336 expected_filename,
amine@400 337 ]
amine@400 338 expected_calls = [
amine@400 339 call(["ffmpeg"] + ffmpef_avconv),
amine@400 340 call(["avconv"] + ffmpef_avconv),
amine@400 341 call(
amine@400 342 [
amine@400 343 "sox",
amine@400 344 "-t",
amine@400 345 "wav",
amine@400 346 tmp_expected_filename,
amine@400 347 expected_filename,
amine@400 348 ]
amine@400 349 ),
amine@400 350 ]
amine@400 351 assert patch_rsp.mock_calls == expected_calls
amine@400 352 region = AudioRegion.load(
amine@400 353 "tests/data/test_split_10HZ_mono.raw", sr=10, sw=2, ch=1
amine@400 354 )
amine@418 355 assert not saver._exported
amine@400 356 assert saver.data == bytes(region)
amine@419 357
amine@419 358
amine@419 359 @pytest.mark.parametrize(
amine@419 360 "export_format",
amine@419 361 [
amine@419 362 "raw", # raw
amine@419 363 "wav", # wav
amine@419 364 ],
amine@419 365 ids=[
amine@419 366 "raw",
amine@419 367 "raw",
amine@419 368 ],
amine@419 369 )
amine@419 370 def test_AudioEventsJoinerWorker(audio_data_source, export_format):
amine@419 371 with TemporaryDirectory() as tmpdir:
amine@419 372 expected_filename = os.path.join(tmpdir, f"output.{export_format}")
amine@419 373 joiner = AudioEventsJoinerWorker(
amine@419 374 silence_duration=1.0,
amine@419 375 filename=expected_filename,
amine@419 376 export_format=export_format,
amine@419 377 sampling_rate=audio_data_source.sampling_rate,
amine@419 378 sample_width=audio_data_source.sample_width,
amine@419 379 channels=audio_data_source.channels,
amine@419 380 )
amine@419 381
amine@419 382 tokenizer = TokenizerWorker(audio_data_source, observers=[joiner])
amine@419 383 tokenizer.start_all()
amine@419 384 tokenizer.join()
amine@419 385 joiner.join()
amine@419 386
amine@419 387 output_filename = joiner.export_audio()
amine@419 388 expected_region = split_and_join_with_silence(
amine@419 389 "tests/data/test_split_10HZ_mono.raw",
amine@419 390 silence_duration=1.0,
amine@419 391 sr=10,
amine@419 392 sw=2,
amine@419 393 ch=1,
amine@419 394 aw=0.1,
amine@419 395 )
amine@419 396 assert output_filename == expected_filename
amine@419 397 assert joiner.data == bytes(expected_region)