annotate tests/test_workers.py @ 418:70abdb92149a

Add AudioEventsJoinerWorker
author Amine Sehili <amine.sehili@gmail.com>
date Fri, 18 Oct 2024 22:47:58 +0200
parents 996948ada980
children c2ac3fc1bfbc
rev   line source
amine@274 1 import os
amine@274 2 from tempfile import TemporaryDirectory
amine@403 3 from unittest.mock import Mock, call, patch
amine@403 4
amine@400 5 import pytest
amine@403 6
amine@418 7 import auditok.workers
amine@403 8 from auditok import AudioReader, AudioRegion
amine@403 9 from auditok.cmdline_util import make_logger
amine@292 10 from auditok.exceptions import AudioEncodingWarning
amine@274 11 from auditok.workers import (
amine@403 12 CommandLineWorker,
amine@403 13 PlayerWorker,
amine@403 14 PrintWorker,
amine@403 15 RegionSaverWorker,
amine@403 16 StreamSaverWorker,
amine@274 17 TokenizerWorker,
amine@274 18 )
amine@274 19
amine@274 20
amine@400 21 @pytest.fixture
amine@400 22 def audio_data_source():
amine@403 23 reader = AudioReader(
amine@400 24 input="tests/data/test_split_10HZ_mono.raw",
amine@400 25 block_dur=0.1,
amine@400 26 sr=10,
amine@400 27 sw=2,
amine@400 28 ch=1,
amine@400 29 )
amine@400 30 yield reader
amine@400 31 reader.close()
amine@275 32
amine@400 33
amine@400 34 @pytest.fixture
amine@400 35 def expected_detections():
amine@400 36 return [
amine@400 37 (0.2, 1.6),
amine@400 38 (1.7, 3.1),
amine@400 39 (3.4, 5.4),
amine@400 40 (5.4, 7.4),
amine@400 41 (7.4, 7.6),
amine@400 42 ]
amine@400 43
amine@400 44
amine@400 45 def test_TokenizerWorker(audio_data_source, expected_detections):
amine@400 46 with TemporaryDirectory() as tmpdir:
amine@400 47 file = os.path.join(tmpdir, "file.log")
amine@400 48 logger = make_logger(file=file, name="test_TokenizerWorker")
amine@400 49 tokenizer = TokenizerWorker(
amine@400 50 audio_data_source,
amine@400 51 logger=logger,
amine@400 52 min_dur=0.3,
amine@400 53 max_dur=2,
amine@400 54 max_silence=0.2,
amine@400 55 drop_trailing_silence=False,
amine@400 56 strict_min_dur=False,
amine@400 57 eth=50,
amine@274 58 )
amine@400 59 tokenizer.start_all()
amine@400 60 tokenizer.join()
amine@400 61 with open(file) as fp:
amine@400 62 log_lines = fp.readlines()
amine@275 63
amine@400 64 log_fmt = (
amine@400 65 "[DET]: Detection {} (start: {:.3f}, end: {:.3f}, duration: {:.3f})"
amine@400 66 )
amine@400 67 assert len(tokenizer.detections) == len(expected_detections)
amine@400 68 for i, (det, exp, log_line) in enumerate(
amine@418 69 zip(tokenizer.detections, expected_detections, log_lines, strict=True),
amine@418 70 1,
amine@400 71 ):
amine@400 72 start, end = exp
amine@400 73 exp_log_line = log_fmt.format(i, start, end, end - start)
amine@400 74 assert pytest.approx(det.start) == start
amine@400 75 assert pytest.approx(det.end) == end
amine@400 76 assert log_line[28:].strip() == exp_log_line
amine@275 77
amine@274 78
amine@400 79 def test_PlayerWorker(audio_data_source, expected_detections):
amine@400 80 with TemporaryDirectory() as tmpdir:
amine@400 81 file = os.path.join(tmpdir, "file.log")
amine@400 82 logger = make_logger(file=file, name="test_RegionSaverWorker")
amine@400 83 player_mock = Mock()
amine@400 84 observers = [PlayerWorker(player_mock, logger=logger)]
amine@275 85 tokenizer = TokenizerWorker(
amine@400 86 audio_data_source,
amine@400 87 logger=logger,
amine@275 88 observers=observers,
amine@275 89 min_dur=0.3,
amine@275 90 max_dur=2,
amine@275 91 max_silence=0.2,
amine@275 92 drop_trailing_silence=False,
amine@275 93 strict_min_dur=False,
amine@275 94 eth=50,
amine@275 95 )
amine@400 96 tokenizer.start_all()
amine@400 97 tokenizer.join()
amine@400 98 tokenizer._observers[0].join()
amine@400 99 with open(file) as fp:
amine@400 100 log_lines = [
amine@400 101 line for line in fp.readlines() if line.startswith("[PLAY]")
amine@400 102 ]
amine@400 103
amine@400 104 assert player_mock.play.called
amine@400 105 assert len(tokenizer.detections) == len(expected_detections)
amine@400 106 log_fmt = "[PLAY]: Detection {id} played"
amine@400 107 for i, (det, exp, log_line) in enumerate(
amine@418 108 zip(tokenizer.detections, expected_detections, log_lines, strict=False),
amine@418 109 1,
amine@400 110 ):
amine@400 111 start, end = exp
amine@400 112 exp_log_line = log_fmt.format(id=i)
amine@400 113 assert pytest.approx(det.start) == start
amine@400 114 assert pytest.approx(det.end) == end
amine@400 115 assert log_line[28:].strip() == exp_log_line
amine@400 116
amine@400 117
amine@400 118 def test_RegionSaverWorker(audio_data_source, expected_detections):
amine@400 119 filename_format = "Region_{id}_{start:.6f}-{end:.3f}_{duration:.3f}.wav"
amine@400 120 with TemporaryDirectory() as tmpdir:
amine@400 121 file = os.path.join(tmpdir, "file.log")
amine@400 122 logger = make_logger(file=file, name="test_RegionSaverWorker")
amine@400 123 observers = [RegionSaverWorker(filename_format, logger=logger)]
amine@400 124 tokenizer = TokenizerWorker(
amine@400 125 audio_data_source,
amine@400 126 logger=logger,
amine@400 127 observers=observers,
amine@400 128 min_dur=0.3,
amine@400 129 max_dur=2,
amine@400 130 max_silence=0.2,
amine@400 131 drop_trailing_silence=False,
amine@400 132 strict_min_dur=False,
amine@400 133 eth=50,
amine@400 134 )
amine@400 135 with patch("auditok.core.AudioRegion.save") as patched_save:
amine@275 136 tokenizer.start_all()
amine@275 137 tokenizer.join()
amine@275 138 tokenizer._observers[0].join()
amine@400 139 with open(file) as fp:
amine@400 140 log_lines = [
amine@400 141 line for line in fp.readlines() if line.startswith("[SAVE]")
amine@400 142 ]
amine@275 143
amine@400 144 expected_save_calls = [
amine@400 145 call(
amine@400 146 filename_format.format(
amine@400 147 id=i, start=exp[0], end=exp[1], duration=exp[1] - exp[0]
amine@400 148 ),
amine@400 149 None,
amine@400 150 )
amine@400 151 for i, exp in enumerate(expected_detections, 1)
amine@400 152 ]
amine@287 153
amine@400 154 mock_calls = [
amine@400 155 c for i, c in enumerate(patched_save.mock_calls) if i % 2 == 0
amine@400 156 ]
amine@400 157 assert mock_calls == expected_save_calls
amine@400 158 assert len(tokenizer.detections) == len(expected_detections)
amine@287 159
amine@400 160 log_fmt = "[SAVE]: Detection {id} saved as '{filename}'"
amine@400 161 for i, (det, exp, log_line) in enumerate(
amine@418 162 zip(tokenizer.detections, expected_detections, log_lines, strict=False),
amine@418 163 1,
amine@400 164 ):
amine@400 165 start, end = exp
amine@400 166 expected_filename = filename_format.format(
amine@400 167 id=i, start=start, end=end, duration=end - start
amine@400 168 )
amine@400 169 exp_log_line = log_fmt.format(id=i, filename=expected_filename)
amine@400 170 assert pytest.approx(det.start) == start
amine@400 171 assert pytest.approx(det.end) == end
amine@400 172 assert log_line[28:].strip() == exp_log_line
amine@400 173
amine@400 174
amine@400 175 def test_CommandLineWorker(audio_data_source, expected_detections):
amine@400 176 command_format = "do nothing with"
amine@400 177 with TemporaryDirectory() as tmpdir:
amine@400 178 file = os.path.join(tmpdir, "file.log")
amine@400 179 logger = make_logger(file=file, name="test_CommandLineWorker")
amine@400 180 observers = [CommandLineWorker(command_format, logger=logger)]
amine@400 181 tokenizer = TokenizerWorker(
amine@400 182 audio_data_source,
amine@400 183 logger=logger,
amine@400 184 observers=observers,
amine@400 185 min_dur=0.3,
amine@400 186 max_dur=2,
amine@400 187 max_silence=0.2,
amine@400 188 drop_trailing_silence=False,
amine@400 189 strict_min_dur=False,
amine@400 190 eth=50,
amine@400 191 )
amine@400 192 with patch("auditok.workers.os.system") as patched_os_system:
amine@287 193 tokenizer.start_all()
amine@287 194 tokenizer.join()
amine@400 195 tokenizer._observers[0].join()
amine@400 196 with open(file) as fp:
amine@400 197 log_lines = [
amine@400 198 line for line in fp.readlines() if line.startswith("[COMMAND]")
amine@400 199 ]
amine@287 200
amine@400 201 expected_save_calls = [call(command_format) for _ in expected_detections]
amine@400 202 assert patched_os_system.mock_calls == expected_save_calls
amine@400 203 assert len(tokenizer.detections) == len(expected_detections)
amine@400 204 log_fmt = "[COMMAND]: Detection {id} command '{command}'"
amine@400 205 for i, (det, exp, log_line) in enumerate(
amine@418 206 zip(tokenizer.detections, expected_detections, log_lines, strict=False),
amine@418 207 1,
amine@400 208 ):
amine@400 209 start, end = exp
amine@400 210 exp_log_line = log_fmt.format(id=i, command=command_format)
amine@400 211 assert pytest.approx(det.start) == start
amine@400 212 assert pytest.approx(det.end) == end
amine@400 213 assert log_line[28:].strip() == exp_log_line
amine@400 214
amine@400 215
amine@400 216 def test_PrintWorker(audio_data_source, expected_detections):
amine@400 217 observers = [
amine@400 218 PrintWorker(print_format="[{id}] {start} {end}, dur: {duration}")
amine@400 219 ]
amine@400 220 tokenizer = TokenizerWorker(
amine@400 221 audio_data_source,
amine@400 222 observers=observers,
amine@400 223 min_dur=0.3,
amine@400 224 max_dur=2,
amine@400 225 max_silence=0.2,
amine@400 226 drop_trailing_silence=False,
amine@400 227 strict_min_dur=False,
amine@400 228 eth=50,
amine@400 229 )
amine@400 230 with patch("builtins.print") as patched_print:
amine@400 231 tokenizer.start_all()
amine@400 232 tokenizer.join()
amine@400 233 tokenizer._observers[0].join()
amine@400 234
amine@400 235 expected_print_calls = [
amine@400 236 call(
amine@400 237 "[{}] {:.3f} {:.3f}, dur: {:.3f}".format(
amine@400 238 i, exp[0], exp[1], exp[1] - exp[0]
amine@287 239 )
amine@400 240 )
amine@400 241 for i, exp in enumerate(expected_detections, 1)
amine@400 242 ]
amine@400 243 assert patched_print.mock_calls == expected_print_calls
amine@400 244 assert len(tokenizer.detections) == len(expected_detections)
amine@418 245 for det, exp in zip(tokenizer.detections, expected_detections, strict=True):
amine@400 246 start, end = exp
amine@400 247 assert pytest.approx(det.start) == start
amine@400 248 assert pytest.approx(det.end) == end
amine@287 249
amine@287 250
amine@400 251 def test_StreamSaverWorker_wav(audio_data_source):
amine@400 252 with TemporaryDirectory() as tmpdir:
amine@400 253 expected_filename = os.path.join(tmpdir, "output.wav")
amine@400 254 saver = StreamSaverWorker(audio_data_source, expected_filename)
amine@400 255 saver.start()
amine@400 256
amine@400 257 tokenizer = TokenizerWorker(saver)
amine@400 258 tokenizer.start_all()
amine@400 259 tokenizer.join()
amine@400 260 saver.join()
amine@400 261
amine@418 262 output_filename = saver.export_audio()
amine@400 263 region = AudioRegion.load(
amine@400 264 "tests/data/test_split_10HZ_mono.raw", sr=10, sw=2, ch=1
amine@400 265 )
amine@400 266
amine@400 267 expected_region = AudioRegion.load(output_filename)
amine@400 268 assert output_filename == expected_filename
amine@400 269 assert region == expected_region
amine@400 270 assert saver.data == bytes(expected_region)
amine@400 271
amine@400 272
amine@400 273 def test_StreamSaverWorker_raw(audio_data_source):
amine@400 274 with TemporaryDirectory() as tmpdir:
amine@400 275 expected_filename = os.path.join(tmpdir, "output")
amine@400 276 saver = StreamSaverWorker(
amine@400 277 audio_data_source, expected_filename, export_format="raw"
amine@400 278 )
amine@400 279 saver.start()
amine@400 280 tokenizer = TokenizerWorker(saver)
amine@400 281 tokenizer.start_all()
amine@400 282 tokenizer.join()
amine@400 283 saver.join()
amine@418 284 output_filename = saver.export_audio()
amine@400 285 region = AudioRegion.load(
amine@400 286 "tests/data/test_split_10HZ_mono.raw", sr=10, sw=2, ch=1
amine@400 287 )
amine@400 288 expected_region = AudioRegion.load(
amine@400 289 output_filename, sr=10, sw=2, ch=1, audio_format="raw"
amine@400 290 )
amine@400 291 assert output_filename == expected_filename
amine@400 292 assert region == expected_region
amine@400 293 assert saver.data == bytes(expected_region)
amine@400 294
amine@400 295
amine@400 296 def test_StreamSaverWorker_encode_audio(audio_data_source):
amine@400 297 with TemporaryDirectory() as tmpdir:
amine@400 298 with patch("auditok.workers._run_subprocess") as patch_rsp:
amine@400 299 patch_rsp.return_value = (1, None, None)
amine@400 300 expected_filename = os.path.join(tmpdir, "output.ogg")
amine@400 301 tmp_expected_filename = expected_filename + ".wav"
amine@400 302 saver = StreamSaverWorker(audio_data_source, expected_filename)
amine@418 303 print("########## saver._exported 1:", saver._exported)
amine@418 304 # import auditok
amine@418 305
amine@418 306 # with pytest.raises(auditok.workers.AudioEncodingWarning) as rt_warn:
amine@287 307 saver.start()
amine@287 308 tokenizer = TokenizerWorker(saver)
amine@287 309 tokenizer.start_all()
amine@287 310 tokenizer.join()
amine@287 311 saver.join()
amine@418 312
amine@418 313 with pytest.raises(auditok.workers.AudioEncodingError) as ae_error:
amine@418 314 saver._encode_export_audio()
amine@418 315
amine@400 316 warn_msg = "Couldn't save audio data in the desired format "
amine@418 317 warn_msg += "'ogg'.\nEither none of 'ffmpeg', 'avconv' or 'sox' "
amine@400 318 warn_msg += "is installed or this format is not recognized.\n"
amine@400 319 warn_msg += "Audio file was saved as '{}'"
amine@418 320 assert warn_msg.format(tmp_expected_filename) == str(ae_error.value)
amine@400 321 ffmpef_avconv = [
amine@400 322 "-y",
amine@400 323 "-f",
amine@400 324 "wav",
amine@400 325 "-i",
amine@400 326 tmp_expected_filename,
amine@400 327 "-f",
amine@400 328 "ogg",
amine@400 329 expected_filename,
amine@400 330 ]
amine@400 331 expected_calls = [
amine@400 332 call(["ffmpeg"] + ffmpef_avconv),
amine@400 333 call(["avconv"] + ffmpef_avconv),
amine@400 334 call(
amine@400 335 [
amine@400 336 "sox",
amine@400 337 "-t",
amine@400 338 "wav",
amine@400 339 tmp_expected_filename,
amine@400 340 expected_filename,
amine@400 341 ]
amine@400 342 ),
amine@400 343 ]
amine@400 344 assert patch_rsp.mock_calls == expected_calls
amine@400 345 region = AudioRegion.load(
amine@400 346 "tests/data/test_split_10HZ_mono.raw", sr=10, sw=2, ch=1
amine@400 347 )
amine@418 348 assert not saver._exported
amine@400 349 assert saver.data == bytes(region)