amine@274: import os
amine@274: from tempfile import TemporaryDirectory
amine@403: from unittest.mock import Mock, call, patch
amine@403: 
amine@400: import pytest
amine@403: 
amine@418: import auditok.workers
amine@419: from auditok import AudioReader, AudioRegion, split, split_and_join_with_silence
amine@403: from auditok.cmdline_util import make_logger
amine@274: from auditok.workers import (
amine@419:     AudioEventsJoinerWorker,
amine@403:     CommandLineWorker,
amine@403:     PlayerWorker,
amine@403:     PrintWorker,
amine@403:     RegionSaverWorker,
amine@403:     StreamSaverWorker,
amine@274:     TokenizerWorker,
amine@274: )
amine@274: 
amine@274: 
amine@400: @pytest.fixture
amine@400: def audio_data_source():
amine@403:     reader = AudioReader(
amine@400:         input="tests/data/test_split_10HZ_mono.raw",
amine@400:         block_dur=0.1,
amine@400:         sr=10,
amine@400:         sw=2,
amine@400:         ch=1,
amine@400:     )
amine@400:     yield reader
amine@400:     reader.close()
amine@275: 
amine@400: 
amine@400: @pytest.fixture
amine@400: def expected_detections():
amine@400:     return [
amine@400:         (0.2, 1.6),
amine@400:         (1.7, 3.1),
amine@400:         (3.4, 5.4),
amine@400:         (5.4, 7.4),
amine@400:         (7.4, 7.6),
amine@400:     ]
amine@400: 
amine@400: 
amine@400: def test_TokenizerWorker(audio_data_source, expected_detections):
amine@400:     with TemporaryDirectory() as tmpdir:
amine@400:         file = os.path.join(tmpdir, "file.log")
amine@400:         logger = make_logger(file=file, name="test_TokenizerWorker")
amine@400:         tokenizer = TokenizerWorker(
amine@400:             audio_data_source,
amine@400:             logger=logger,
amine@400:             min_dur=0.3,
amine@400:             max_dur=2,
amine@400:             max_silence=0.2,
amine@400:             drop_trailing_silence=False,
amine@400:             strict_min_dur=False,
amine@400:             eth=50,
amine@274:         )
amine@400:         tokenizer.start_all()
amine@400:         tokenizer.join()
amine@400:         with open(file) as fp:
amine@400:             log_lines = fp.readlines()
amine@275: 
amine@400:     log_fmt = (
amine@400:         "[DET]: Detection {} (start: {:.3f}, end: {:.3f}, duration: {:.3f})"
amine@400:     )
amine@400:     assert len(tokenizer.detections) == len(expected_detections)
amine@400:     for i, (det, exp, log_line) in enumerate(
amine@426:         zip(
amine@426:             tokenizer.detections,
amine@426:             expected_detections,
amine@426:             log_lines,
amine@426:         ),
amine@418:         1,
amine@400:     ):
amine@400:         start, end = exp
amine@400:         exp_log_line = log_fmt.format(i, start, end, end - start)
amine@400:         assert pytest.approx(det.start) == start
amine@400:         assert pytest.approx(det.end) == end
amine@400:         assert log_line[28:].strip() == exp_log_line
amine@275: 
amine@274: 
amine@400: def test_PlayerWorker(audio_data_source, expected_detections):
amine@400:     with TemporaryDirectory() as tmpdir:
amine@400:         file = os.path.join(tmpdir, "file.log")
amine@400:         logger = make_logger(file=file, name="test_RegionSaverWorker")
amine@400:         player_mock = Mock()
amine@400:         observers = [PlayerWorker(player_mock, logger=logger)]
amine@275:         tokenizer = TokenizerWorker(
amine@400:             audio_data_source,
amine@400:             logger=logger,
amine@275:             observers=observers,
amine@275:             min_dur=0.3,
amine@275:             max_dur=2,
amine@275:             max_silence=0.2,
amine@275:             drop_trailing_silence=False,
amine@275:             strict_min_dur=False,
amine@275:             eth=50,
amine@275:         )
amine@400:         tokenizer.start_all()
amine@400:         tokenizer.join()
amine@400:         tokenizer._observers[0].join()
amine@400:         with open(file) as fp:
amine@400:             log_lines = [
amine@400:                 line for line in fp.readlines() if line.startswith("[PLAY]")
amine@400:             ]
amine@400: 
amine@400:     assert player_mock.play.called
amine@400:     assert len(tokenizer.detections) == len(expected_detections)
amine@400:     log_fmt = "[PLAY]: Detection {id} played"
amine@400:     for i, (det, exp, log_line) in enumerate(
amine@426:         zip(
amine@426:             tokenizer.detections,
amine@426:             expected_detections,
amine@426:             log_lines,
amine@426:         ),
amine@418:         1,
amine@400:     ):
amine@400:         start, end = exp
amine@400:         exp_log_line = log_fmt.format(id=i)
amine@400:         assert pytest.approx(det.start) == start
amine@400:         assert pytest.approx(det.end) == end
amine@400:         assert log_line[28:].strip() == exp_log_line
amine@400: 
amine@400: 
amine@400: def test_RegionSaverWorker(audio_data_source, expected_detections):
amine@400:     filename_format = "Region_{id}_{start:.6f}-{end:.3f}_{duration:.3f}.wav"
amine@400:     with TemporaryDirectory() as tmpdir:
amine@400:         file = os.path.join(tmpdir, "file.log")
amine@400:         logger = make_logger(file=file, name="test_RegionSaverWorker")
amine@400:         observers = [RegionSaverWorker(filename_format, logger=logger)]
amine@400:         tokenizer = TokenizerWorker(
amine@400:             audio_data_source,
amine@400:             logger=logger,
amine@400:             observers=observers,
amine@400:             min_dur=0.3,
amine@400:             max_dur=2,
amine@400:             max_silence=0.2,
amine@400:             drop_trailing_silence=False,
amine@400:             strict_min_dur=False,
amine@400:             eth=50,
amine@400:         )
amine@400:         with patch("auditok.core.AudioRegion.save") as patched_save:
amine@275:             tokenizer.start_all()
amine@275:             tokenizer.join()
amine@275:             tokenizer._observers[0].join()
amine@400:         with open(file) as fp:
amine@400:             log_lines = [
amine@400:                 line for line in fp.readlines() if line.startswith("[SAVE]")
amine@400:             ]
amine@275: 
amine@400:     expected_save_calls = [
amine@400:         call(
amine@400:             filename_format.format(
amine@400:                 id=i, start=exp[0], end=exp[1], duration=exp[1] - exp[0]
amine@400:             ),
amine@400:             None,
amine@400:         )
amine@400:         for i, exp in enumerate(expected_detections, 1)
amine@400:     ]
amine@287: 
amine@400:     mock_calls = [
amine@400:         c for i, c in enumerate(patched_save.mock_calls) if i % 2 == 0
amine@400:     ]
amine@400:     assert mock_calls == expected_save_calls
amine@400:     assert len(tokenizer.detections) == len(expected_detections)
amine@287: 
amine@400:     log_fmt = "[SAVE]: Detection {id} saved as '{filename}'"
amine@400:     for i, (det, exp, log_line) in enumerate(
amine@426:         zip(
amine@426:             tokenizer.detections,
amine@426:             expected_detections,
amine@426:             log_lines,
amine@426:         ),
amine@418:         1,
amine@400:     ):
amine@400:         start, end = exp
amine@400:         expected_filename = filename_format.format(
amine@400:             id=i, start=start, end=end, duration=end - start
amine@400:         )
amine@400:         exp_log_line = log_fmt.format(id=i, filename=expected_filename)
amine@400:         assert pytest.approx(det.start) == start
amine@400:         assert pytest.approx(det.end) == end
amine@400:         assert log_line[28:].strip() == exp_log_line
amine@400: 
amine@400: 
amine@400: def test_CommandLineWorker(audio_data_source, expected_detections):
amine@400:     command_format = "do nothing with"
amine@400:     with TemporaryDirectory() as tmpdir:
amine@400:         file = os.path.join(tmpdir, "file.log")
amine@400:         logger = make_logger(file=file, name="test_CommandLineWorker")
amine@400:         observers = [CommandLineWorker(command_format, logger=logger)]
amine@400:         tokenizer = TokenizerWorker(
amine@400:             audio_data_source,
amine@400:             logger=logger,
amine@400:             observers=observers,
amine@400:             min_dur=0.3,
amine@400:             max_dur=2,
amine@400:             max_silence=0.2,
amine@400:             drop_trailing_silence=False,
amine@400:             strict_min_dur=False,
amine@400:             eth=50,
amine@400:         )
amine@400:         with patch("auditok.workers.os.system") as patched_os_system:
amine@287:             tokenizer.start_all()
amine@287:             tokenizer.join()
amine@400:             tokenizer._observers[0].join()
amine@400:         with open(file) as fp:
amine@400:             log_lines = [
amine@400:                 line for line in fp.readlines() if line.startswith("[COMMAND]")
amine@400:             ]
amine@287: 
amine@400:     expected_save_calls = [call(command_format) for _ in expected_detections]
amine@400:     assert patched_os_system.mock_calls == expected_save_calls
amine@400:     assert len(tokenizer.detections) == len(expected_detections)
amine@400:     log_fmt = "[COMMAND]: Detection {id} command '{command}'"
amine@400:     for i, (det, exp, log_line) in enumerate(
amine@426:         zip(
amine@426:             tokenizer.detections,
amine@426:             expected_detections,
amine@426:             log_lines,
amine@426:         ),
amine@418:         1,
amine@400:     ):
amine@400:         start, end = exp
amine@400:         exp_log_line = log_fmt.format(id=i, command=command_format)
amine@400:         assert pytest.approx(det.start) == start
amine@400:         assert pytest.approx(det.end) == end
amine@400:         assert log_line[28:].strip() == exp_log_line
amine@400: 
amine@400: 
amine@400: def test_PrintWorker(audio_data_source, expected_detections):
amine@400:     observers = [
amine@400:         PrintWorker(print_format="[{id}] {start} {end}, dur: {duration}")
amine@400:     ]
amine@400:     tokenizer = TokenizerWorker(
amine@400:         audio_data_source,
amine@400:         observers=observers,
amine@400:         min_dur=0.3,
amine@400:         max_dur=2,
amine@400:         max_silence=0.2,
amine@400:         drop_trailing_silence=False,
amine@400:         strict_min_dur=False,
amine@400:         eth=50,
amine@400:     )
amine@400:     with patch("builtins.print") as patched_print:
amine@400:         tokenizer.start_all()
amine@400:         tokenizer.join()
amine@400:         tokenizer._observers[0].join()
amine@400: 
amine@400:     expected_print_calls = [
amine@400:         call(
amine@400:             "[{}] {:.3f} {:.3f}, dur: {:.3f}".format(
amine@400:                 i, exp[0], exp[1], exp[1] - exp[0]
amine@287:             )
amine@400:         )
amine@400:         for i, exp in enumerate(expected_detections, 1)
amine@400:     ]
amine@400:     assert patched_print.mock_calls == expected_print_calls
amine@400:     assert len(tokenizer.detections) == len(expected_detections)
amine@426:     for det, exp in zip(
amine@426:         tokenizer.detections,
amine@426:         expected_detections,
amine@426:     ):
amine@400:         start, end = exp
amine@400:         assert pytest.approx(det.start) == start
amine@400:         assert pytest.approx(det.end) == end
amine@287: 
amine@287: 
amine@400: def test_StreamSaverWorker_wav(audio_data_source):
amine@400:     with TemporaryDirectory() as tmpdir:
amine@400:         expected_filename = os.path.join(tmpdir, "output.wav")
amine@400:         saver = StreamSaverWorker(audio_data_source, expected_filename)
amine@400:         saver.start()
amine@400: 
amine@400:         tokenizer = TokenizerWorker(saver)
amine@400:         tokenizer.start_all()
amine@400:         tokenizer.join()
amine@400:         saver.join()
amine@400: 
amine@418:         output_filename = saver.export_audio()
amine@400:         region = AudioRegion.load(
amine@400:             "tests/data/test_split_10HZ_mono.raw", sr=10, sw=2, ch=1
amine@400:         )
amine@400: 
amine@400:         expected_region = AudioRegion.load(output_filename)
amine@400:         assert output_filename == expected_filename
amine@400:         assert region == expected_region
amine@400:         assert saver.data == bytes(expected_region)
amine@400: 
amine@400: 
amine@419: @pytest.mark.parametrize(
amine@419:     "export_format",
amine@419:     [
amine@419:         "raw",  # raw
amine@419:         "wav",  # wav
amine@419:     ],
amine@419:     ids=[
amine@419:         "raw",
amine@419:         "raw",
amine@419:     ],
amine@419: )
amine@419: def test_StreamSaverWorker(audio_data_source, export_format):
amine@400:     with TemporaryDirectory() as tmpdir:
amine@419:         expected_filename = os.path.join(tmpdir, f"output.{export_format}")
amine@400:         saver = StreamSaverWorker(
amine@419:             audio_data_source, expected_filename, export_format=export_format
amine@400:         )
amine@400:         saver.start()
amine@400:         tokenizer = TokenizerWorker(saver)
amine@400:         tokenizer.start_all()
amine@400:         tokenizer.join()
amine@400:         saver.join()
amine@418:         output_filename = saver.export_audio()
amine@400:         region = AudioRegion.load(
amine@400:             "tests/data/test_split_10HZ_mono.raw", sr=10, sw=2, ch=1
amine@400:         )
amine@400:         expected_region = AudioRegion.load(
amine@419:             output_filename, sr=10, sw=2, ch=1, audio_format=export_format
amine@400:         )
amine@400:         assert output_filename == expected_filename
amine@400:         assert region == expected_region
amine@400:         assert saver.data == bytes(expected_region)
amine@400: 
amine@400: 
amine@400: def test_StreamSaverWorker_encode_audio(audio_data_source):
amine@400:     with TemporaryDirectory() as tmpdir:
amine@400:         with patch("auditok.workers._run_subprocess") as patch_rsp:
amine@400:             patch_rsp.return_value = (1, None, None)
amine@400:             expected_filename = os.path.join(tmpdir, "output.ogg")
amine@400:             tmp_expected_filename = expected_filename + ".wav"
amine@400:             saver = StreamSaverWorker(audio_data_source, expected_filename)
amine@287:             saver.start()
amine@287:             tokenizer = TokenizerWorker(saver)
amine@287:             tokenizer.start_all()
amine@287:             tokenizer.join()
amine@287:             saver.join()
amine@418: 
amine@418:             with pytest.raises(auditok.workers.AudioEncodingError) as ae_error:
amine@418:                 saver._encode_export_audio()
amine@418: 
amine@400:         warn_msg = "Couldn't save audio data in the desired format "
amine@418:         warn_msg += "'ogg'.\nEither none of 'ffmpeg', 'avconv' or 'sox' "
amine@400:         warn_msg += "is installed or this format is not recognized.\n"
amine@400:         warn_msg += "Audio file was saved as '{}'"
amine@418:         assert warn_msg.format(tmp_expected_filename) == str(ae_error.value)
amine@400:         ffmpef_avconv = [
amine@400:             "-y",
amine@400:             "-f",
amine@400:             "wav",
amine@400:             "-i",
amine@400:             tmp_expected_filename,
amine@400:             "-f",
amine@400:             "ogg",
amine@400:             expected_filename,
amine@400:         ]
amine@400:         expected_calls = [
amine@400:             call(["ffmpeg"] + ffmpef_avconv),
amine@400:             call(["avconv"] + ffmpef_avconv),
amine@400:             call(
amine@400:                 [
amine@400:                     "sox",
amine@400:                     "-t",
amine@400:                     "wav",
amine@400:                     tmp_expected_filename,
amine@400:                     expected_filename,
amine@400:                 ]
amine@400:             ),
amine@400:         ]
amine@400:         assert patch_rsp.mock_calls == expected_calls
amine@400:         region = AudioRegion.load(
amine@400:             "tests/data/test_split_10HZ_mono.raw", sr=10, sw=2, ch=1
amine@400:         )
amine@418:         assert not saver._exported
amine@400:         assert saver.data == bytes(region)
amine@419: 
amine@419: 
amine@419: @pytest.mark.parametrize(
amine@419:     "export_format",
amine@419:     [
amine@419:         "raw",  # raw
amine@419:         "wav",  # wav
amine@419:     ],
amine@419:     ids=[
amine@419:         "raw",
amine@419:         "raw",
amine@419:     ],
amine@419: )
amine@419: def test_AudioEventsJoinerWorker(audio_data_source, export_format):
amine@419:     with TemporaryDirectory() as tmpdir:
amine@419:         expected_filename = os.path.join(tmpdir, f"output.{export_format}")
amine@419:         joiner = AudioEventsJoinerWorker(
amine@419:             silence_duration=1.0,
amine@419:             filename=expected_filename,
amine@419:             export_format=export_format,
amine@419:             sampling_rate=audio_data_source.sampling_rate,
amine@419:             sample_width=audio_data_source.sample_width,
amine@419:             channels=audio_data_source.channels,
amine@419:         )
amine@419: 
amine@419:         tokenizer = TokenizerWorker(audio_data_source, observers=[joiner])
amine@419:         tokenizer.start_all()
amine@419:         tokenizer.join()
amine@419:         joiner.join()
amine@419: 
amine@419:         output_filename = joiner.export_audio()
amine@419:         expected_region = split_and_join_with_silence(
amine@419:             "tests/data/test_split_10HZ_mono.raw",
amine@419:             silence_duration=1.0,
amine@419:             sr=10,
amine@419:             sw=2,
amine@419:             ch=1,
amine@419:             aw=0.1,
amine@419:         )
amine@419:         assert output_filename == expected_filename
amine@419:         assert joiner.data == bytes(expected_region)