amine@274: import os amine@274: from tempfile import TemporaryDirectory amine@403: from unittest.mock import Mock, call, patch amine@403: amine@400: import pytest amine@403: amine@418: import auditok.workers amine@419: from auditok import AudioReader, AudioRegion, split, split_and_join_with_silence amine@403: from auditok.cmdline_util import make_logger amine@274: from auditok.workers import ( amine@419: AudioEventsJoinerWorker, amine@403: CommandLineWorker, amine@403: PlayerWorker, amine@403: PrintWorker, amine@403: RegionSaverWorker, amine@403: StreamSaverWorker, amine@274: TokenizerWorker, amine@274: ) amine@274: amine@274: amine@400: @pytest.fixture amine@400: def audio_data_source(): amine@403: reader = AudioReader( amine@400: input="tests/data/test_split_10HZ_mono.raw", amine@400: block_dur=0.1, amine@400: sr=10, amine@400: sw=2, amine@400: ch=1, amine@400: ) amine@400: yield reader amine@400: reader.close() amine@275: amine@400: amine@400: @pytest.fixture amine@400: def expected_detections(): amine@400: return [ amine@400: (0.2, 1.6), amine@400: (1.7, 3.1), amine@400: (3.4, 5.4), amine@400: (5.4, 7.4), amine@400: (7.4, 7.6), amine@400: ] amine@400: amine@400: amine@400: def test_TokenizerWorker(audio_data_source, expected_detections): amine@400: with TemporaryDirectory() as tmpdir: amine@400: file = os.path.join(tmpdir, "file.log") amine@400: logger = make_logger(file=file, name="test_TokenizerWorker") amine@400: tokenizer = TokenizerWorker( amine@400: audio_data_source, amine@400: logger=logger, amine@400: min_dur=0.3, amine@400: max_dur=2, amine@400: max_silence=0.2, amine@400: drop_trailing_silence=False, amine@400: strict_min_dur=False, amine@400: eth=50, amine@274: ) amine@400: tokenizer.start_all() amine@400: tokenizer.join() amine@400: with open(file) as fp: amine@400: log_lines = fp.readlines() amine@275: amine@400: log_fmt = ( amine@400: "[DET]: Detection {} (start: {:.3f}, end: {:.3f}, duration: {:.3f})" amine@400: ) amine@400: assert len(tokenizer.detections) == len(expected_detections) amine@400: for i, (det, exp, log_line) in enumerate( amine@426: zip( amine@426: tokenizer.detections, amine@426: expected_detections, amine@426: log_lines, amine@426: ), amine@418: 1, amine@400: ): amine@400: start, end = exp amine@400: exp_log_line = log_fmt.format(i, start, end, end - start) amine@400: assert pytest.approx(det.start) == start amine@400: assert pytest.approx(det.end) == end amine@400: assert log_line[28:].strip() == exp_log_line amine@275: amine@274: amine@400: def test_PlayerWorker(audio_data_source, expected_detections): amine@400: with TemporaryDirectory() as tmpdir: amine@400: file = os.path.join(tmpdir, "file.log") amine@400: logger = make_logger(file=file, name="test_RegionSaverWorker") amine@400: player_mock = Mock() amine@400: observers = [PlayerWorker(player_mock, logger=logger)] amine@275: tokenizer = TokenizerWorker( amine@400: audio_data_source, amine@400: logger=logger, amine@275: observers=observers, amine@275: min_dur=0.3, amine@275: max_dur=2, amine@275: max_silence=0.2, amine@275: drop_trailing_silence=False, amine@275: strict_min_dur=False, amine@275: eth=50, amine@275: ) amine@400: tokenizer.start_all() amine@400: tokenizer.join() amine@400: tokenizer._observers[0].join() amine@400: with open(file) as fp: amine@400: log_lines = [ amine@400: line for line in fp.readlines() if line.startswith("[PLAY]") amine@400: ] amine@400: amine@400: assert player_mock.play.called amine@400: assert len(tokenizer.detections) == len(expected_detections) amine@400: log_fmt = "[PLAY]: Detection {id} played" amine@400: for i, (det, exp, log_line) in enumerate( amine@426: zip( amine@426: tokenizer.detections, amine@426: expected_detections, amine@426: log_lines, amine@426: ), amine@418: 1, amine@400: ): amine@400: start, end = exp amine@400: exp_log_line = log_fmt.format(id=i) amine@400: assert pytest.approx(det.start) == start amine@400: assert pytest.approx(det.end) == end amine@400: assert log_line[28:].strip() == exp_log_line amine@400: amine@400: amine@400: def test_RegionSaverWorker(audio_data_source, expected_detections): amine@400: filename_format = "Region_{id}_{start:.6f}-{end:.3f}_{duration:.3f}.wav" amine@400: with TemporaryDirectory() as tmpdir: amine@400: file = os.path.join(tmpdir, "file.log") amine@400: logger = make_logger(file=file, name="test_RegionSaverWorker") amine@400: observers = [RegionSaverWorker(filename_format, logger=logger)] amine@400: tokenizer = TokenizerWorker( amine@400: audio_data_source, amine@400: logger=logger, amine@400: observers=observers, amine@400: min_dur=0.3, amine@400: max_dur=2, amine@400: max_silence=0.2, amine@400: drop_trailing_silence=False, amine@400: strict_min_dur=False, amine@400: eth=50, amine@400: ) amine@400: with patch("auditok.core.AudioRegion.save") as patched_save: amine@275: tokenizer.start_all() amine@275: tokenizer.join() amine@275: tokenizer._observers[0].join() amine@400: with open(file) as fp: amine@400: log_lines = [ amine@400: line for line in fp.readlines() if line.startswith("[SAVE]") amine@400: ] amine@275: amine@400: expected_save_calls = [ amine@400: call( amine@400: filename_format.format( amine@400: id=i, start=exp[0], end=exp[1], duration=exp[1] - exp[0] amine@400: ), amine@400: None, amine@400: ) amine@400: for i, exp in enumerate(expected_detections, 1) amine@400: ] amine@287: amine@400: mock_calls = [ amine@400: c for i, c in enumerate(patched_save.mock_calls) if i % 2 == 0 amine@400: ] amine@400: assert mock_calls == expected_save_calls amine@400: assert len(tokenizer.detections) == len(expected_detections) amine@287: amine@400: log_fmt = "[SAVE]: Detection {id} saved as '{filename}'" amine@400: for i, (det, exp, log_line) in enumerate( amine@426: zip( amine@426: tokenizer.detections, amine@426: expected_detections, amine@426: log_lines, amine@426: ), amine@418: 1, amine@400: ): amine@400: start, end = exp amine@400: expected_filename = filename_format.format( amine@400: id=i, start=start, end=end, duration=end - start amine@400: ) amine@400: exp_log_line = log_fmt.format(id=i, filename=expected_filename) amine@400: assert pytest.approx(det.start) == start amine@400: assert pytest.approx(det.end) == end amine@400: assert log_line[28:].strip() == exp_log_line amine@400: amine@400: amine@400: def test_CommandLineWorker(audio_data_source, expected_detections): amine@400: command_format = "do nothing with" amine@400: with TemporaryDirectory() as tmpdir: amine@400: file = os.path.join(tmpdir, "file.log") amine@400: logger = make_logger(file=file, name="test_CommandLineWorker") amine@400: observers = [CommandLineWorker(command_format, logger=logger)] amine@400: tokenizer = TokenizerWorker( amine@400: audio_data_source, amine@400: logger=logger, amine@400: observers=observers, amine@400: min_dur=0.3, amine@400: max_dur=2, amine@400: max_silence=0.2, amine@400: drop_trailing_silence=False, amine@400: strict_min_dur=False, amine@400: eth=50, amine@400: ) amine@400: with patch("auditok.workers.os.system") as patched_os_system: amine@287: tokenizer.start_all() amine@287: tokenizer.join() amine@400: tokenizer._observers[0].join() amine@400: with open(file) as fp: amine@400: log_lines = [ amine@400: line for line in fp.readlines() if line.startswith("[COMMAND]") amine@400: ] amine@287: amine@400: expected_save_calls = [call(command_format) for _ in expected_detections] amine@400: assert patched_os_system.mock_calls == expected_save_calls amine@400: assert len(tokenizer.detections) == len(expected_detections) amine@400: log_fmt = "[COMMAND]: Detection {id} command '{command}'" amine@400: for i, (det, exp, log_line) in enumerate( amine@426: zip( amine@426: tokenizer.detections, amine@426: expected_detections, amine@426: log_lines, amine@426: ), amine@418: 1, amine@400: ): amine@400: start, end = exp amine@400: exp_log_line = log_fmt.format(id=i, command=command_format) amine@400: assert pytest.approx(det.start) == start amine@400: assert pytest.approx(det.end) == end amine@400: assert log_line[28:].strip() == exp_log_line amine@400: amine@400: amine@400: def test_PrintWorker(audio_data_source, expected_detections): amine@400: observers = [ amine@400: PrintWorker(print_format="[{id}] {start} {end}, dur: {duration}") amine@400: ] amine@400: tokenizer = TokenizerWorker( amine@400: audio_data_source, amine@400: observers=observers, amine@400: min_dur=0.3, amine@400: max_dur=2, amine@400: max_silence=0.2, amine@400: drop_trailing_silence=False, amine@400: strict_min_dur=False, amine@400: eth=50, amine@400: ) amine@400: with patch("builtins.print") as patched_print: amine@400: tokenizer.start_all() amine@400: tokenizer.join() amine@400: tokenizer._observers[0].join() amine@400: amine@400: expected_print_calls = [ amine@400: call( amine@400: "[{}] {:.3f} {:.3f}, dur: {:.3f}".format( amine@400: i, exp[0], exp[1], exp[1] - exp[0] amine@287: ) amine@400: ) amine@400: for i, exp in enumerate(expected_detections, 1) amine@400: ] amine@400: assert patched_print.mock_calls == expected_print_calls amine@400: assert len(tokenizer.detections) == len(expected_detections) amine@426: for det, exp in zip( amine@426: tokenizer.detections, amine@426: expected_detections, amine@426: ): amine@400: start, end = exp amine@400: assert pytest.approx(det.start) == start amine@400: assert pytest.approx(det.end) == end amine@287: amine@287: amine@400: def test_StreamSaverWorker_wav(audio_data_source): amine@400: with TemporaryDirectory() as tmpdir: amine@400: expected_filename = os.path.join(tmpdir, "output.wav") amine@400: saver = StreamSaverWorker(audio_data_source, expected_filename) amine@400: saver.start() amine@400: amine@400: tokenizer = TokenizerWorker(saver) amine@400: tokenizer.start_all() amine@400: tokenizer.join() amine@400: saver.join() amine@400: amine@418: output_filename = saver.export_audio() amine@400: region = AudioRegion.load( amine@400: "tests/data/test_split_10HZ_mono.raw", sr=10, sw=2, ch=1 amine@400: ) amine@400: amine@400: expected_region = AudioRegion.load(output_filename) amine@400: assert output_filename == expected_filename amine@400: assert region == expected_region amine@400: assert saver.data == bytes(expected_region) amine@400: amine@400: amine@419: @pytest.mark.parametrize( amine@419: "export_format", amine@419: [ amine@419: "raw", # raw amine@419: "wav", # wav amine@419: ], amine@419: ids=[ amine@419: "raw", amine@419: "raw", amine@419: ], amine@419: ) amine@419: def test_StreamSaverWorker(audio_data_source, export_format): amine@400: with TemporaryDirectory() as tmpdir: amine@419: expected_filename = os.path.join(tmpdir, f"output.{export_format}") amine@400: saver = StreamSaverWorker( amine@419: audio_data_source, expected_filename, export_format=export_format amine@400: ) amine@400: saver.start() amine@400: tokenizer = TokenizerWorker(saver) amine@400: tokenizer.start_all() amine@400: tokenizer.join() amine@400: saver.join() amine@418: output_filename = saver.export_audio() amine@400: region = AudioRegion.load( amine@400: "tests/data/test_split_10HZ_mono.raw", sr=10, sw=2, ch=1 amine@400: ) amine@400: expected_region = AudioRegion.load( amine@419: output_filename, sr=10, sw=2, ch=1, audio_format=export_format amine@400: ) amine@400: assert output_filename == expected_filename amine@400: assert region == expected_region amine@400: assert saver.data == bytes(expected_region) amine@400: amine@400: amine@400: def test_StreamSaverWorker_encode_audio(audio_data_source): amine@400: with TemporaryDirectory() as tmpdir: amine@400: with patch("auditok.workers._run_subprocess") as patch_rsp: amine@400: patch_rsp.return_value = (1, None, None) amine@400: expected_filename = os.path.join(tmpdir, "output.ogg") amine@400: tmp_expected_filename = expected_filename + ".wav" amine@400: saver = StreamSaverWorker(audio_data_source, expected_filename) amine@287: saver.start() amine@287: tokenizer = TokenizerWorker(saver) amine@287: tokenizer.start_all() amine@287: tokenizer.join() amine@287: saver.join() amine@418: amine@418: with pytest.raises(auditok.workers.AudioEncodingError) as ae_error: amine@418: saver._encode_export_audio() amine@418: amine@400: warn_msg = "Couldn't save audio data in the desired format " amine@418: warn_msg += "'ogg'.\nEither none of 'ffmpeg', 'avconv' or 'sox' " amine@400: warn_msg += "is installed or this format is not recognized.\n" amine@400: warn_msg += "Audio file was saved as '{}'" amine@418: assert warn_msg.format(tmp_expected_filename) == str(ae_error.value) amine@400: ffmpef_avconv = [ amine@400: "-y", amine@400: "-f", amine@400: "wav", amine@400: "-i", amine@400: tmp_expected_filename, amine@400: "-f", amine@400: "ogg", amine@400: expected_filename, amine@400: ] amine@400: expected_calls = [ amine@400: call(["ffmpeg"] + ffmpef_avconv), amine@400: call(["avconv"] + ffmpef_avconv), amine@400: call( amine@400: [ amine@400: "sox", amine@400: "-t", amine@400: "wav", amine@400: tmp_expected_filename, amine@400: expected_filename, amine@400: ] amine@400: ), amine@400: ] amine@400: assert patch_rsp.mock_calls == expected_calls amine@400: region = AudioRegion.load( amine@400: "tests/data/test_split_10HZ_mono.raw", sr=10, sw=2, ch=1 amine@400: ) amine@418: assert not saver._exported amine@400: assert saver.data == bytes(region) amine@419: amine@419: amine@419: @pytest.mark.parametrize( amine@419: "export_format", amine@419: [ amine@419: "raw", # raw amine@419: "wav", # wav amine@419: ], amine@419: ids=[ amine@419: "raw", amine@419: "raw", amine@419: ], amine@419: ) amine@419: def test_AudioEventsJoinerWorker(audio_data_source, export_format): amine@419: with TemporaryDirectory() as tmpdir: amine@419: expected_filename = os.path.join(tmpdir, f"output.{export_format}") amine@419: joiner = AudioEventsJoinerWorker( amine@419: silence_duration=1.0, amine@419: filename=expected_filename, amine@419: export_format=export_format, amine@419: sampling_rate=audio_data_source.sampling_rate, amine@419: sample_width=audio_data_source.sample_width, amine@419: channels=audio_data_source.channels, amine@419: ) amine@419: amine@419: tokenizer = TokenizerWorker(audio_data_source, observers=[joiner]) amine@419: tokenizer.start_all() amine@419: tokenizer.join() amine@419: joiner.join() amine@419: amine@419: output_filename = joiner.export_audio() amine@419: expected_region = split_and_join_with_silence( amine@419: "tests/data/test_split_10HZ_mono.raw", amine@419: silence_duration=1.0, amine@419: sr=10, amine@419: sw=2, amine@419: ch=1, amine@419: aw=0.1, amine@419: ) amine@419: assert output_filename == expected_filename amine@419: assert joiner.data == bytes(expected_region)