annotate tests/test_workers.py @ 403:996948ada980

Update tests
author Amine Sehili <amine.sehili@gmail.com>
date Sun, 26 May 2024 22:43:08 +0200
parents 323d59b404a2
children 70abdb92149a
rev   line source
amine@274 1 import os
amine@274 2 from tempfile import TemporaryDirectory
amine@403 3 from unittest.mock import Mock, call, patch
amine@403 4
amine@400 5 import pytest
amine@403 6
amine@403 7 from auditok import AudioReader, AudioRegion
amine@403 8 from auditok.cmdline_util import make_logger
amine@292 9 from auditok.exceptions import AudioEncodingWarning
amine@274 10 from auditok.workers import (
amine@403 11 CommandLineWorker,
amine@403 12 PlayerWorker,
amine@403 13 PrintWorker,
amine@403 14 RegionSaverWorker,
amine@403 15 StreamSaverWorker,
amine@274 16 TokenizerWorker,
amine@274 17 )
amine@274 18
amine@274 19
amine@400 20 @pytest.fixture
amine@400 21 def audio_data_source():
amine@403 22 reader = AudioReader(
amine@400 23 input="tests/data/test_split_10HZ_mono.raw",
amine@400 24 block_dur=0.1,
amine@400 25 sr=10,
amine@400 26 sw=2,
amine@400 27 ch=1,
amine@400 28 )
amine@400 29 yield reader
amine@400 30 reader.close()
amine@275 31
amine@400 32
amine@400 33 @pytest.fixture
amine@400 34 def expected_detections():
amine@400 35 return [
amine@400 36 (0.2, 1.6),
amine@400 37 (1.7, 3.1),
amine@400 38 (3.4, 5.4),
amine@400 39 (5.4, 7.4),
amine@400 40 (7.4, 7.6),
amine@400 41 ]
amine@400 42
amine@400 43
amine@400 44 def test_TokenizerWorker(audio_data_source, expected_detections):
amine@400 45 with TemporaryDirectory() as tmpdir:
amine@400 46 file = os.path.join(tmpdir, "file.log")
amine@400 47 logger = make_logger(file=file, name="test_TokenizerWorker")
amine@400 48 tokenizer = TokenizerWorker(
amine@400 49 audio_data_source,
amine@400 50 logger=logger,
amine@400 51 min_dur=0.3,
amine@400 52 max_dur=2,
amine@400 53 max_silence=0.2,
amine@400 54 drop_trailing_silence=False,
amine@400 55 strict_min_dur=False,
amine@400 56 eth=50,
amine@274 57 )
amine@400 58 tokenizer.start_all()
amine@400 59 tokenizer.join()
amine@400 60 with open(file) as fp:
amine@400 61 log_lines = fp.readlines()
amine@275 62
amine@400 63 log_fmt = (
amine@400 64 "[DET]: Detection {} (start: {:.3f}, end: {:.3f}, duration: {:.3f})"
amine@400 65 )
amine@400 66 assert len(tokenizer.detections) == len(expected_detections)
amine@400 67 for i, (det, exp, log_line) in enumerate(
amine@400 68 zip(tokenizer.detections, expected_detections, log_lines), 1
amine@400 69 ):
amine@400 70 start, end = exp
amine@400 71 exp_log_line = log_fmt.format(i, start, end, end - start)
amine@400 72 assert pytest.approx(det.start) == start
amine@400 73 assert pytest.approx(det.end) == end
amine@400 74 assert log_line[28:].strip() == exp_log_line
amine@275 75
amine@274 76
amine@400 77 def test_PlayerWorker(audio_data_source, expected_detections):
amine@400 78 with TemporaryDirectory() as tmpdir:
amine@400 79 file = os.path.join(tmpdir, "file.log")
amine@400 80 logger = make_logger(file=file, name="test_RegionSaverWorker")
amine@400 81 player_mock = Mock()
amine@400 82 observers = [PlayerWorker(player_mock, logger=logger)]
amine@275 83 tokenizer = TokenizerWorker(
amine@400 84 audio_data_source,
amine@400 85 logger=logger,
amine@275 86 observers=observers,
amine@275 87 min_dur=0.3,
amine@275 88 max_dur=2,
amine@275 89 max_silence=0.2,
amine@275 90 drop_trailing_silence=False,
amine@275 91 strict_min_dur=False,
amine@275 92 eth=50,
amine@275 93 )
amine@400 94 tokenizer.start_all()
amine@400 95 tokenizer.join()
amine@400 96 tokenizer._observers[0].join()
amine@400 97 with open(file) as fp:
amine@400 98 log_lines = [
amine@400 99 line for line in fp.readlines() if line.startswith("[PLAY]")
amine@400 100 ]
amine@400 101
amine@400 102 assert player_mock.play.called
amine@400 103 assert len(tokenizer.detections) == len(expected_detections)
amine@400 104 log_fmt = "[PLAY]: Detection {id} played"
amine@400 105 for i, (det, exp, log_line) in enumerate(
amine@400 106 zip(tokenizer.detections, expected_detections, log_lines), 1
amine@400 107 ):
amine@400 108 start, end = exp
amine@400 109 exp_log_line = log_fmt.format(id=i)
amine@400 110 assert pytest.approx(det.start) == start
amine@400 111 assert pytest.approx(det.end) == end
amine@400 112 assert log_line[28:].strip() == exp_log_line
amine@400 113
amine@400 114
amine@400 115 def test_RegionSaverWorker(audio_data_source, expected_detections):
amine@400 116 filename_format = "Region_{id}_{start:.6f}-{end:.3f}_{duration:.3f}.wav"
amine@400 117 with TemporaryDirectory() as tmpdir:
amine@400 118 file = os.path.join(tmpdir, "file.log")
amine@400 119 logger = make_logger(file=file, name="test_RegionSaverWorker")
amine@400 120 observers = [RegionSaverWorker(filename_format, logger=logger)]
amine@400 121 tokenizer = TokenizerWorker(
amine@400 122 audio_data_source,
amine@400 123 logger=logger,
amine@400 124 observers=observers,
amine@400 125 min_dur=0.3,
amine@400 126 max_dur=2,
amine@400 127 max_silence=0.2,
amine@400 128 drop_trailing_silence=False,
amine@400 129 strict_min_dur=False,
amine@400 130 eth=50,
amine@400 131 )
amine@400 132 with patch("auditok.core.AudioRegion.save") as patched_save:
amine@275 133 tokenizer.start_all()
amine@275 134 tokenizer.join()
amine@275 135 tokenizer._observers[0].join()
amine@400 136 with open(file) as fp:
amine@400 137 log_lines = [
amine@400 138 line for line in fp.readlines() if line.startswith("[SAVE]")
amine@400 139 ]
amine@275 140
amine@400 141 expected_save_calls = [
amine@400 142 call(
amine@400 143 filename_format.format(
amine@400 144 id=i, start=exp[0], end=exp[1], duration=exp[1] - exp[0]
amine@400 145 ),
amine@400 146 None,
amine@400 147 )
amine@400 148 for i, exp in enumerate(expected_detections, 1)
amine@400 149 ]
amine@287 150
amine@400 151 mock_calls = [
amine@400 152 c for i, c in enumerate(patched_save.mock_calls) if i % 2 == 0
amine@400 153 ]
amine@400 154 assert mock_calls == expected_save_calls
amine@400 155 assert len(tokenizer.detections) == len(expected_detections)
amine@287 156
amine@400 157 log_fmt = "[SAVE]: Detection {id} saved as '{filename}'"
amine@400 158 for i, (det, exp, log_line) in enumerate(
amine@400 159 zip(tokenizer.detections, expected_detections, log_lines), 1
amine@400 160 ):
amine@400 161 start, end = exp
amine@400 162 expected_filename = filename_format.format(
amine@400 163 id=i, start=start, end=end, duration=end - start
amine@400 164 )
amine@400 165 exp_log_line = log_fmt.format(id=i, filename=expected_filename)
amine@400 166 assert pytest.approx(det.start) == start
amine@400 167 assert pytest.approx(det.end) == end
amine@400 168 assert log_line[28:].strip() == exp_log_line
amine@400 169
amine@400 170
amine@400 171 def test_CommandLineWorker(audio_data_source, expected_detections):
amine@400 172 command_format = "do nothing with"
amine@400 173 with TemporaryDirectory() as tmpdir:
amine@400 174 file = os.path.join(tmpdir, "file.log")
amine@400 175 logger = make_logger(file=file, name="test_CommandLineWorker")
amine@400 176 observers = [CommandLineWorker(command_format, logger=logger)]
amine@400 177 tokenizer = TokenizerWorker(
amine@400 178 audio_data_source,
amine@400 179 logger=logger,
amine@400 180 observers=observers,
amine@400 181 min_dur=0.3,
amine@400 182 max_dur=2,
amine@400 183 max_silence=0.2,
amine@400 184 drop_trailing_silence=False,
amine@400 185 strict_min_dur=False,
amine@400 186 eth=50,
amine@400 187 )
amine@400 188 with patch("auditok.workers.os.system") as patched_os_system:
amine@287 189 tokenizer.start_all()
amine@287 190 tokenizer.join()
amine@400 191 tokenizer._observers[0].join()
amine@400 192 with open(file) as fp:
amine@400 193 log_lines = [
amine@400 194 line for line in fp.readlines() if line.startswith("[COMMAND]")
amine@400 195 ]
amine@287 196
amine@400 197 expected_save_calls = [call(command_format) for _ in expected_detections]
amine@400 198 assert patched_os_system.mock_calls == expected_save_calls
amine@400 199 assert len(tokenizer.detections) == len(expected_detections)
amine@400 200 log_fmt = "[COMMAND]: Detection {id} command '{command}'"
amine@400 201 for i, (det, exp, log_line) in enumerate(
amine@400 202 zip(tokenizer.detections, expected_detections, log_lines), 1
amine@400 203 ):
amine@400 204 start, end = exp
amine@400 205 exp_log_line = log_fmt.format(id=i, command=command_format)
amine@400 206 assert pytest.approx(det.start) == start
amine@400 207 assert pytest.approx(det.end) == end
amine@400 208 assert log_line[28:].strip() == exp_log_line
amine@400 209
amine@400 210
amine@400 211 def test_PrintWorker(audio_data_source, expected_detections):
amine@400 212 observers = [
amine@400 213 PrintWorker(print_format="[{id}] {start} {end}, dur: {duration}")
amine@400 214 ]
amine@400 215 tokenizer = TokenizerWorker(
amine@400 216 audio_data_source,
amine@400 217 observers=observers,
amine@400 218 min_dur=0.3,
amine@400 219 max_dur=2,
amine@400 220 max_silence=0.2,
amine@400 221 drop_trailing_silence=False,
amine@400 222 strict_min_dur=False,
amine@400 223 eth=50,
amine@400 224 )
amine@400 225 with patch("builtins.print") as patched_print:
amine@400 226 tokenizer.start_all()
amine@400 227 tokenizer.join()
amine@400 228 tokenizer._observers[0].join()
amine@400 229
amine@400 230 expected_print_calls = [
amine@400 231 call(
amine@400 232 "[{}] {:.3f} {:.3f}, dur: {:.3f}".format(
amine@400 233 i, exp[0], exp[1], exp[1] - exp[0]
amine@287 234 )
amine@400 235 )
amine@400 236 for i, exp in enumerate(expected_detections, 1)
amine@400 237 ]
amine@400 238 assert patched_print.mock_calls == expected_print_calls
amine@400 239 assert len(tokenizer.detections) == len(expected_detections)
amine@400 240 for det, exp in zip(tokenizer.detections, expected_detections):
amine@400 241 start, end = exp
amine@400 242 assert pytest.approx(det.start) == start
amine@400 243 assert pytest.approx(det.end) == end
amine@287 244
amine@287 245
amine@400 246 def test_StreamSaverWorker_wav(audio_data_source):
amine@400 247 with TemporaryDirectory() as tmpdir:
amine@400 248 expected_filename = os.path.join(tmpdir, "output.wav")
amine@400 249 saver = StreamSaverWorker(audio_data_source, expected_filename)
amine@400 250 saver.start()
amine@400 251
amine@400 252 tokenizer = TokenizerWorker(saver)
amine@400 253 tokenizer.start_all()
amine@400 254 tokenizer.join()
amine@400 255 saver.join()
amine@400 256
amine@400 257 output_filename = saver.save_stream()
amine@400 258 region = AudioRegion.load(
amine@400 259 "tests/data/test_split_10HZ_mono.raw", sr=10, sw=2, ch=1
amine@400 260 )
amine@400 261
amine@400 262 expected_region = AudioRegion.load(output_filename)
amine@400 263 assert output_filename == expected_filename
amine@400 264 assert region == expected_region
amine@400 265 assert saver.data == bytes(expected_region)
amine@400 266
amine@400 267
amine@400 268 def test_StreamSaverWorker_raw(audio_data_source):
amine@400 269 with TemporaryDirectory() as tmpdir:
amine@400 270 expected_filename = os.path.join(tmpdir, "output")
amine@400 271 saver = StreamSaverWorker(
amine@400 272 audio_data_source, expected_filename, export_format="raw"
amine@400 273 )
amine@400 274 saver.start()
amine@400 275 tokenizer = TokenizerWorker(saver)
amine@400 276 tokenizer.start_all()
amine@400 277 tokenizer.join()
amine@400 278 saver.join()
amine@400 279 output_filename = saver.save_stream()
amine@400 280 region = AudioRegion.load(
amine@400 281 "tests/data/test_split_10HZ_mono.raw", sr=10, sw=2, ch=1
amine@400 282 )
amine@400 283 expected_region = AudioRegion.load(
amine@400 284 output_filename, sr=10, sw=2, ch=1, audio_format="raw"
amine@400 285 )
amine@400 286 assert output_filename == expected_filename
amine@400 287 assert region == expected_region
amine@400 288 assert saver.data == bytes(expected_region)
amine@400 289
amine@400 290
amine@400 291 def test_StreamSaverWorker_encode_audio(audio_data_source):
amine@400 292 with TemporaryDirectory() as tmpdir:
amine@400 293 with patch("auditok.workers._run_subprocess") as patch_rsp:
amine@400 294 patch_rsp.return_value = (1, None, None)
amine@400 295 expected_filename = os.path.join(tmpdir, "output.ogg")
amine@400 296 tmp_expected_filename = expected_filename + ".wav"
amine@400 297 saver = StreamSaverWorker(audio_data_source, expected_filename)
amine@287 298 saver.start()
amine@287 299 tokenizer = TokenizerWorker(saver)
amine@287 300 tokenizer.start_all()
amine@287 301 tokenizer.join()
amine@287 302 saver.join()
amine@400 303 with pytest.raises(AudioEncodingWarning) as rt_warn:
amine@400 304 saver.save_stream()
amine@400 305 warn_msg = "Couldn't save audio data in the desired format "
amine@400 306 warn_msg += "'ogg'. Either none of 'ffmpeg', 'avconv' or 'sox' "
amine@400 307 warn_msg += "is installed or this format is not recognized.\n"
amine@400 308 warn_msg += "Audio file was saved as '{}'"
amine@400 309 assert warn_msg.format(tmp_expected_filename) == str(rt_warn.value)
amine@400 310 ffmpef_avconv = [
amine@400 311 "-y",
amine@400 312 "-f",
amine@400 313 "wav",
amine@400 314 "-i",
amine@400 315 tmp_expected_filename,
amine@400 316 "-f",
amine@400 317 "ogg",
amine@400 318 expected_filename,
amine@400 319 ]
amine@400 320 expected_calls = [
amine@400 321 call(["ffmpeg"] + ffmpef_avconv),
amine@400 322 call(["avconv"] + ffmpef_avconv),
amine@400 323 call(
amine@400 324 [
amine@400 325 "sox",
amine@400 326 "-t",
amine@400 327 "wav",
amine@400 328 tmp_expected_filename,
amine@400 329 expected_filename,
amine@400 330 ]
amine@400 331 ),
amine@400 332 ]
amine@400 333 assert patch_rsp.mock_calls == expected_calls
amine@400 334 region = AudioRegion.load(
amine@400 335 "tests/data/test_split_10HZ_mono.raw", sr=10, sw=2, ch=1
amine@400 336 )
amine@400 337 assert saver._exported
amine@400 338 assert saver.data == bytes(region)