auditok: tests/test_workers.py annotate

annotate tests/test_workers.py @ 418:70abdb92149a

Add AudioEventsJoinerWorker

author	Amine Sehili <amine.sehili@gmail.com>
date	Fri, 18 Oct 2024 22:47:58 +0200
parents	996948ada980
children	c2ac3fc1bfbc

rev	line source
amine@274	1 import os
amine@274	2 from tempfile import TemporaryDirectory
amine@403	3 from unittest.mock import Mock, call, patch
amine@403	4
amine@400	5 import pytest
amine@403	6
amine@418	7 import auditok.workers
amine@403	8 from auditok import AudioReader, AudioRegion
amine@403	9 from auditok.cmdline_util import make_logger
amine@292	10 from auditok.exceptions import AudioEncodingWarning
amine@274	11 from auditok.workers import (
amine@403	12 CommandLineWorker,
amine@403	13 PlayerWorker,
amine@403	14 PrintWorker,
amine@403	15 RegionSaverWorker,
amine@403	16 StreamSaverWorker,
amine@274	17 TokenizerWorker,
amine@274	18 )
amine@274	19
amine@274	20
amine@400	21 @pytest.fixture
amine@400	22 def audio_data_source():
amine@403	23 reader = AudioReader(
amine@400	24 input="tests/data/test_split_10HZ_mono.raw",
amine@400	25 block_dur=0.1,
amine@400	26 sr=10,
amine@400	27 sw=2,
amine@400	28 ch=1,
amine@400	29 )
amine@400	30 yield reader
amine@400	31 reader.close()
amine@275	32
amine@400	33
amine@400	34 @pytest.fixture
amine@400	35 def expected_detections():
amine@400	36 return [
amine@400	37 (0.2, 1.6),
amine@400	38 (1.7, 3.1),
amine@400	39 (3.4, 5.4),
amine@400	40 (5.4, 7.4),
amine@400	41 (7.4, 7.6),
amine@400	42 ]
amine@400	43
amine@400	44
amine@400	45 def test_TokenizerWorker(audio_data_source, expected_detections):
amine@400	46 with TemporaryDirectory() as tmpdir:
amine@400	47 file = os.path.join(tmpdir, "file.log")
amine@400	48 logger = make_logger(file=file, name="test_TokenizerWorker")
amine@400	49 tokenizer = TokenizerWorker(
amine@400	50 audio_data_source,
amine@400	51 logger=logger,
amine@400	52 min_dur=0.3,
amine@400	53 max_dur=2,
amine@400	54 max_silence=0.2,
amine@400	55 drop_trailing_silence=False,
amine@400	56 strict_min_dur=False,
amine@400	57 eth=50,
amine@274	58 )
amine@400	59 tokenizer.start_all()
amine@400	60 tokenizer.join()
amine@400	61 with open(file) as fp:
amine@400	62 log_lines = fp.readlines()
amine@275	63
amine@400	64 log_fmt = (
amine@400	65 "[DET]: Detection {} (start: {:.3f}, end: {:.3f}, duration: {:.3f})"
amine@400	66 )
amine@400	67 assert len(tokenizer.detections) == len(expected_detections)
amine@400	68 for i, (det, exp, log_line) in enumerate(
amine@418	69 zip(tokenizer.detections, expected_detections, log_lines, strict=True),
amine@418	70 1,
amine@400	71 ):
amine@400	72 start, end = exp
amine@400	73 exp_log_line = log_fmt.format(i, start, end, end - start)
amine@400	74 assert pytest.approx(det.start) == start
amine@400	75 assert pytest.approx(det.end) == end
amine@400	76 assert log_line[28:].strip() == exp_log_line
amine@275	77
amine@274	78
amine@400	79 def test_PlayerWorker(audio_data_source, expected_detections):
amine@400	80 with TemporaryDirectory() as tmpdir:
amine@400	81 file = os.path.join(tmpdir, "file.log")
amine@400	82 logger = make_logger(file=file, name="test_RegionSaverWorker")
amine@400	83 player_mock = Mock()
amine@400	84 observers = [PlayerWorker(player_mock, logger=logger)]
amine@275	85 tokenizer = TokenizerWorker(
amine@400	86 audio_data_source,
amine@400	87 logger=logger,
amine@275	88 observers=observers,
amine@275	89 min_dur=0.3,
amine@275	90 max_dur=2,
amine@275	91 max_silence=0.2,
amine@275	92 drop_trailing_silence=False,
amine@275	93 strict_min_dur=False,
amine@275	94 eth=50,
amine@275	95 )
amine@400	96 tokenizer.start_all()
amine@400	97 tokenizer.join()
amine@400	98 tokenizer._observers[0].join()
amine@400	99 with open(file) as fp:
amine@400	100 log_lines = [
amine@400	101 line for line in fp.readlines() if line.startswith("[PLAY]")
amine@400	102 ]
amine@400	103
amine@400	104 assert player_mock.play.called
amine@400	105 assert len(tokenizer.detections) == len(expected_detections)
amine@400	106 log_fmt = "[PLAY]: Detection {id} played"
amine@400	107 for i, (det, exp, log_line) in enumerate(
amine@418	108 zip(tokenizer.detections, expected_detections, log_lines, strict=False),
amine@418	109 1,
amine@400	110 ):
amine@400	111 start, end = exp
amine@400	112 exp_log_line = log_fmt.format(id=i)
amine@400	113 assert pytest.approx(det.start) == start
amine@400	114 assert pytest.approx(det.end) == end
amine@400	115 assert log_line[28:].strip() == exp_log_line
amine@400	116
amine@400	117
amine@400	118 def test_RegionSaverWorker(audio_data_source, expected_detections):
amine@400	119 filename_format = "Region_{id}_{start:.6f}-{end:.3f}_{duration:.3f}.wav"
amine@400	120 with TemporaryDirectory() as tmpdir:
amine@400	121 file = os.path.join(tmpdir, "file.log")
amine@400	122 logger = make_logger(file=file, name="test_RegionSaverWorker")
amine@400	123 observers = [RegionSaverWorker(filename_format, logger=logger)]
amine@400	124 tokenizer = TokenizerWorker(
amine@400	125 audio_data_source,
amine@400	126 logger=logger,
amine@400	127 observers=observers,
amine@400	128 min_dur=0.3,
amine@400	129 max_dur=2,
amine@400	130 max_silence=0.2,
amine@400	131 drop_trailing_silence=False,
amine@400	132 strict_min_dur=False,
amine@400	133 eth=50,
amine@400	134 )
amine@400	135 with patch("auditok.core.AudioRegion.save") as patched_save:
amine@275	136 tokenizer.start_all()
amine@275	137 tokenizer.join()
amine@275	138 tokenizer._observers[0].join()
amine@400	139 with open(file) as fp:
amine@400	140 log_lines = [
amine@400	141 line for line in fp.readlines() if line.startswith("[SAVE]")
amine@400	142 ]
amine@275	143
amine@400	144 expected_save_calls = [
amine@400	145 call(
amine@400	146 filename_format.format(
amine@400	147 id=i, start=exp[0], end=exp[1], duration=exp[1] - exp[0]
amine@400	148 ),
amine@400	149 None,
amine@400	150 )
amine@400	151 for i, exp in enumerate(expected_detections, 1)
amine@400	152 ]
amine@287	153
amine@400	154 mock_calls = [
amine@400	155 c for i, c in enumerate(patched_save.mock_calls) if i % 2 == 0
amine@400	156 ]
amine@400	157 assert mock_calls == expected_save_calls
amine@400	158 assert len(tokenizer.detections) == len(expected_detections)
amine@287	159
amine@400	160 log_fmt = "[SAVE]: Detection {id} saved as '{filename}'"
amine@400	161 for i, (det, exp, log_line) in enumerate(
amine@418	162 zip(tokenizer.detections, expected_detections, log_lines, strict=False),
amine@418	163 1,
amine@400	164 ):
amine@400	165 start, end = exp
amine@400	166 expected_filename = filename_format.format(
amine@400	167 id=i, start=start, end=end, duration=end - start
amine@400	168 )
amine@400	169 exp_log_line = log_fmt.format(id=i, filename=expected_filename)
amine@400	170 assert pytest.approx(det.start) == start
amine@400	171 assert pytest.approx(det.end) == end
amine@400	172 assert log_line[28:].strip() == exp_log_line
amine@400	173
amine@400	174
amine@400	175 def test_CommandLineWorker(audio_data_source, expected_detections):
amine@400	176 command_format = "do nothing with"
amine@400	177 with TemporaryDirectory() as tmpdir:
amine@400	178 file = os.path.join(tmpdir, "file.log")
amine@400	179 logger = make_logger(file=file, name="test_CommandLineWorker")
amine@400	180 observers = [CommandLineWorker(command_format, logger=logger)]
amine@400	181 tokenizer = TokenizerWorker(
amine@400	182 audio_data_source,
amine@400	183 logger=logger,
amine@400	184 observers=observers,
amine@400	185 min_dur=0.3,
amine@400	186 max_dur=2,
amine@400	187 max_silence=0.2,
amine@400	188 drop_trailing_silence=False,
amine@400	189 strict_min_dur=False,
amine@400	190 eth=50,
amine@400	191 )
amine@400	192 with patch("auditok.workers.os.system") as patched_os_system:
amine@287	193 tokenizer.start_all()
amine@287	194 tokenizer.join()
amine@400	195 tokenizer._observers[0].join()
amine@400	196 with open(file) as fp:
amine@400	197 log_lines = [
amine@400	198 line for line in fp.readlines() if line.startswith("[COMMAND]")
amine@400	199 ]
amine@287	200
amine@400	201 expected_save_calls = [call(command_format) for _ in expected_detections]
amine@400	202 assert patched_os_system.mock_calls == expected_save_calls
amine@400	203 assert len(tokenizer.detections) == len(expected_detections)
amine@400	204 log_fmt = "[COMMAND]: Detection {id} command '{command}'"
amine@400	205 for i, (det, exp, log_line) in enumerate(
amine@418	206 zip(tokenizer.detections, expected_detections, log_lines, strict=False),
amine@418	207 1,
amine@400	208 ):
amine@400	209 start, end = exp
amine@400	210 exp_log_line = log_fmt.format(id=i, command=command_format)
amine@400	211 assert pytest.approx(det.start) == start
amine@400	212 assert pytest.approx(det.end) == end
amine@400	213 assert log_line[28:].strip() == exp_log_line
amine@400	214
amine@400	215
amine@400	216 def test_PrintWorker(audio_data_source, expected_detections):
amine@400	217 observers = [
amine@400	218 PrintWorker(print_format="[{id}] {start} {end}, dur: {duration}")
amine@400	219 ]
amine@400	220 tokenizer = TokenizerWorker(
amine@400	221 audio_data_source,
amine@400	222 observers=observers,
amine@400	223 min_dur=0.3,
amine@400	224 max_dur=2,
amine@400	225 max_silence=0.2,
amine@400	226 drop_trailing_silence=False,
amine@400	227 strict_min_dur=False,
amine@400	228 eth=50,
amine@400	229 )
amine@400	230 with patch("builtins.print") as patched_print:
amine@400	231 tokenizer.start_all()
amine@400	232 tokenizer.join()
amine@400	233 tokenizer._observers[0].join()
amine@400	234
amine@400	235 expected_print_calls = [
amine@400	236 call(
amine@400	237 "[{}] {:.3f} {:.3f}, dur: {:.3f}".format(
amine@400	238 i, exp[0], exp[1], exp[1] - exp[0]
amine@287	239 )
amine@400	240 )
amine@400	241 for i, exp in enumerate(expected_detections, 1)
amine@400	242 ]
amine@400	243 assert patched_print.mock_calls == expected_print_calls
amine@400	244 assert len(tokenizer.detections) == len(expected_detections)
amine@418	245 for det, exp in zip(tokenizer.detections, expected_detections, strict=True):
amine@400	246 start, end = exp
amine@400	247 assert pytest.approx(det.start) == start
amine@400	248 assert pytest.approx(det.end) == end
amine@287	249
amine@287	250
amine@400	251 def test_StreamSaverWorker_wav(audio_data_source):
amine@400	252 with TemporaryDirectory() as tmpdir:
amine@400	253 expected_filename = os.path.join(tmpdir, "output.wav")
amine@400	254 saver = StreamSaverWorker(audio_data_source, expected_filename)
amine@400	255 saver.start()
amine@400	256
amine@400	257 tokenizer = TokenizerWorker(saver)
amine@400	258 tokenizer.start_all()
amine@400	259 tokenizer.join()
amine@400	260 saver.join()
amine@400	261
amine@418	262 output_filename = saver.export_audio()
amine@400	263 region = AudioRegion.load(
amine@400	264 "tests/data/test_split_10HZ_mono.raw", sr=10, sw=2, ch=1
amine@400	265 )
amine@400	266
amine@400	267 expected_region = AudioRegion.load(output_filename)
amine@400	268 assert output_filename == expected_filename
amine@400	269 assert region == expected_region
amine@400	270 assert saver.data == bytes(expected_region)
amine@400	271
amine@400	272
amine@400	273 def test_StreamSaverWorker_raw(audio_data_source):
amine@400	274 with TemporaryDirectory() as tmpdir:
amine@400	275 expected_filename = os.path.join(tmpdir, "output")
amine@400	276 saver = StreamSaverWorker(
amine@400	277 audio_data_source, expected_filename, export_format="raw"
amine@400	278 )
amine@400	279 saver.start()
amine@400	280 tokenizer = TokenizerWorker(saver)
amine@400	281 tokenizer.start_all()
amine@400	282 tokenizer.join()
amine@400	283 saver.join()
amine@418	284 output_filename = saver.export_audio()
amine@400	285 region = AudioRegion.load(
amine@400	286 "tests/data/test_split_10HZ_mono.raw", sr=10, sw=2, ch=1
amine@400	287 )
amine@400	288 expected_region = AudioRegion.load(
amine@400	289 output_filename, sr=10, sw=2, ch=1, audio_format="raw"
amine@400	290 )
amine@400	291 assert output_filename == expected_filename
amine@400	292 assert region == expected_region
amine@400	293 assert saver.data == bytes(expected_region)
amine@400	294
amine@400	295
amine@400	296 def test_StreamSaverWorker_encode_audio(audio_data_source):
amine@400	297 with TemporaryDirectory() as tmpdir:
amine@400	298 with patch("auditok.workers._run_subprocess") as patch_rsp:
amine@400	299 patch_rsp.return_value = (1, None, None)
amine@400	300 expected_filename = os.path.join(tmpdir, "output.ogg")
amine@400	301 tmp_expected_filename = expected_filename + ".wav"
amine@400	302 saver = StreamSaverWorker(audio_data_source, expected_filename)
amine@418	303 print("########## saver._exported 1:", saver._exported)
amine@418	304 # import auditok
amine@418	305
amine@418	306 # with pytest.raises(auditok.workers.AudioEncodingWarning) as rt_warn:
amine@287	307 saver.start()
amine@287	308 tokenizer = TokenizerWorker(saver)
amine@287	309 tokenizer.start_all()
amine@287	310 tokenizer.join()
amine@287	311 saver.join()
amine@418	312
amine@418	313 with pytest.raises(auditok.workers.AudioEncodingError) as ae_error:
amine@418	314 saver._encode_export_audio()
amine@418	315
amine@400	316 warn_msg = "Couldn't save audio data in the desired format "
amine@418	317 warn_msg += "'ogg'.\nEither none of 'ffmpeg', 'avconv' or 'sox' "
amine@400	318 warn_msg += "is installed or this format is not recognized.\n"
amine@400	319 warn_msg += "Audio file was saved as '{}'"
amine@418	320 assert warn_msg.format(tmp_expected_filename) == str(ae_error.value)
amine@400	321 ffmpef_avconv = [
amine@400	322 "-y",
amine@400	323 "-f",
amine@400	324 "wav",
amine@400	325 "-i",
amine@400	326 tmp_expected_filename,
amine@400	327 "-f",
amine@400	328 "ogg",
amine@400	329 expected_filename,
amine@400	330 ]
amine@400	331 expected_calls = [
amine@400	332 call(["ffmpeg"] + ffmpef_avconv),
amine@400	333 call(["avconv"] + ffmpef_avconv),
amine@400	334 call(
amine@400	335 [
amine@400	336 "sox",
amine@400	337 "-t",
amine@400	338 "wav",
amine@400	339 tmp_expected_filename,
amine@400	340 expected_filename,
amine@400	341 ]
amine@400	342 ),
amine@400	343 ]
amine@400	344 assert patch_rsp.mock_calls == expected_calls
amine@400	345 region = AudioRegion.load(
amine@400	346 "tests/data/test_split_10HZ_mono.raw", sr=10, sw=2, ch=1
amine@400	347 )
amine@418	348 assert not saver._exported
amine@400	349 assert saver.data == bytes(region)

Mercurial > hg > auditok

annotate tests/test_workers.py @ 418:70abdb92149a