amine@403: import filecmp amine@106: import os amine@406: import wave amine@406: from pathlib import Path amine@133: from tempfile import NamedTemporaryFile, TemporaryDirectory amine@403: from unittest.mock import Mock, patch amine@403: amine@405: import numpy as np amine@400: import pytest amine@426: from test_AudioSource import PURE_TONE_DICT, _sample_generator amine@403: amine@426: import auditok amine@110: from auditok.io import ( amine@121: AudioIOError, amine@110: AudioParameterError, amine@126: BufferAudioSource, amine@162: RawAudioSource, amine@403: StdinAudioSource, amine@162: WaveAudioSource, amine@403: _get_audio_parameters, amine@143: _guess_audio_format, amine@126: _load_raw, amine@129: _load_wave, amine@131: _load_with_pydub, amine@111: _save_raw, amine@110: _save_wave, amine@141: _save_with_pydub, amine@403: check_audio_data, amine@403: from_file, amine@403: get_audio_source, amine@135: to_file, amine@110: ) amine@405: from auditok.signal import SAMPLE_WIDTH_TO_DTYPE amine@106: amine@405: AUDIO_PARAMS = {"sampling_rate": 16000, "sample_width": 2, "channels": 1} amine@120: AUDIO_PARAMS_SHORT = {"sr": 16000, "sw": 2, "ch": 1} amine@106: amine@106: amine@400: @pytest.mark.parametrize( amine@400: "data, sample_width, channels, valid", amine@400: [ amine@400: (b"\0" * 113, 1, 1, True), # valid_mono amine@400: (b"\0" * 160, 1, 2, True), # valid_stereo amine@400: (b"\0" * 113, 2, 1, False), # invalid_mono_sw_2 amine@400: (b"\0" * 113, 1, 2, False), # invalid_stereo_sw_1 amine@400: (b"\0" * 158, 2, 2, False), # invalid_stereo_sw_2 amine@400: ], amine@400: ids=[ amine@400: "valid_mono", amine@400: "valid_stereo", amine@400: "invalid_mono_sw_2", amine@400: "invalid_stereo_sw_1", amine@400: "invalid_stereo_sw_2", amine@400: ], amine@400: ) amine@400: def test_check_audio_data(data, sample_width, channels, valid): amine@400: if not valid: amine@400: with pytest.raises(AudioParameterError): amine@400: check_audio_data(data, sample_width, channels) amine@400: else: amine@400: assert check_audio_data(data, sample_width, channels) is None amine@400: amine@400: amine@400: @pytest.mark.parametrize( amine@406: "filename, audio_format, expected", amine@400: [ amine@406: ("filename.wav", "wav", "wav"), # extension_and_format_same amine@406: ("filename.mp3", "wav", "wav"), # extension_and_format_different amine@406: ("filename.wav", None, "wav"), # extension_no_format amine@406: ("filename", "wav", "wav"), # format_no_extension amine@406: ("filename", None, None), # no_format_no_extension amine@406: ("filename", "wave", "wav"), # wave_as_wav amine@406: ("filename.wave", None, "wav"), # wave_as_wav_extension amine@400: ], amine@400: ids=[ amine@406: "extension_and_format_same", amine@406: "extension_and_format_different", amine@406: "extension_no_format", amine@400: "format_no_extension", amine@400: "no_format_no_extension", amine@400: "wave_as_wav", amine@400: "wave_as_wav_extension", amine@400: ], amine@400: ) amine@406: def test_guess_audio_format(filename, audio_format, expected): amine@406: result = _guess_audio_format(filename, audio_format) amine@406: assert result == expected amine@406: amine@406: result = _guess_audio_format(Path(filename), audio_format) amine@400: assert result == expected amine@400: amine@400: amine@400: def test_get_audio_parameters_short_params(): amine@400: expected = (8000, 2, 1) amine@426: params = dict(zip(("sr", "sw", "ch"), expected)) amine@400: result = _get_audio_parameters(params) amine@400: assert result == expected amine@400: amine@400: amine@400: def test_get_audio_parameters_long_params(): amine@400: expected = (8000, 2, 1) amine@426: params = dict(zip(("sampling_rate", "sample_width", "channels"), expected)) amine@426: result = _get_audio_parameters(params) amine@426: assert result == expected amine@426: amine@426: amine@426: def test_get_audio_parameters_long_params_shadow_short_ones(): amine@426: expected = (8000, 2, 1) amine@400: params = dict( amine@400: zip( amine@405: ("sampling_rate", "sample_width", "channels"), amine@400: expected, amine@400: ) amine@108: ) amine@426: params.update( amine@426: dict( amine@426: zip( amine@426: ("sr", "sw", "ch"), amine@426: "xxx", amine@426: ) amine@405: ) amine@405: ) amine@400: result = _get_audio_parameters(params) amine@400: assert result == expected amine@143: amine@145: amine@400: @pytest.mark.parametrize( amine@405: "missing_param", amine@405: [ amine@405: "sampling_rate", # missing_sampling_rate amine@405: "sample_width", # missing_sample_width amine@405: "channels", # missing_channels amine@405: ], amine@405: ids=["missing_sampling_rate", "missing_sample_width", "missing_channels"], amine@405: ) amine@405: def test_get_audio_parameters_missing_parameter(missing_param): amine@405: params = AUDIO_PARAMS.copy() amine@405: del params[missing_param] amine@405: with pytest.raises(AudioParameterError): amine@405: _get_audio_parameters(params) amine@405: amine@405: amine@405: @pytest.mark.parametrize( amine@405: "missing_param", amine@405: [ amine@405: "sr", # missing_sampling_rate amine@405: "sw", # missing_sample_width amine@405: "ch", # missing_channels amine@405: ], amine@405: ids=["missing_sampling_rate", "missing_sample_width", "missing_channels"], amine@405: ) amine@405: def test_get_audio_parameters_missing_parameter_short(missing_param): amine@405: params = AUDIO_PARAMS_SHORT.copy() amine@405: del params[missing_param] amine@405: with pytest.raises(AudioParameterError): amine@405: _get_audio_parameters(params) amine@405: amine@405: amine@405: @pytest.mark.parametrize( amine@400: "values", amine@400: [ amine@400: ("x", 2, 1), # str_sampling_rate amine@400: (-8000, 2, 1), # negative_sampling_rate amine@400: (8000, "x", 1), # str_sample_width amine@400: (8000, -2, 1), # negative_sample_width amine@400: (8000, 2, "x"), # str_channels amine@400: (8000, 2, -1), # negative_channels amine@400: ], amine@400: ids=[ amine@400: "str_sampling_rate", amine@400: "negative_sampling_rate", amine@400: "str_sample_width", amine@400: "negative_sample_width", amine@400: "str_channels", amine@400: "negative_channels", amine@400: ], amine@400: ) amine@400: def test_get_audio_parameters_invalid(values): amine@405: params = dict( amine@426: zip( amine@426: ("sampling_rate", "sample_width", "channels"), amine@426: values, amine@426: ) amine@405: ) amine@400: with pytest.raises(AudioParameterError): amine@400: _get_audio_parameters(params) amine@145: amine@145: amine@400: @pytest.mark.parametrize( amine@400: "filename, audio_format, funtion_name, kwargs", amine@400: [ amine@400: ( amine@120: "audio", amine@120: "raw", amine@120: "_load_raw", amine@120: AUDIO_PARAMS_SHORT, amine@400: ), # raw_with_audio_format amine@400: ( amine@120: "audio.raw", amine@120: None, amine@120: "_load_raw", amine@120: AUDIO_PARAMS_SHORT, amine@400: ), # raw_with_extension amine@400: ("audio", "wave", "_load_wave", None), # wave_with_audio_format amine@400: ("audio", "wave", "_load_wave", None), # wav_with_audio_format amine@400: ("audio.wav", None, "_load_wave", None), # wav_with_extension amine@400: ( amine@400: "audio.dat", amine@400: "wav", amine@400: "_load_wave", amine@400: None, amine@400: ), # format_and_extension_both_given_a amine@400: ( amine@400: "audio.raw", amine@400: "wave", amine@400: "_load_wave", amine@400: None, amine@400: ), # format_and_extension_both_given_b amine@400: ("audio", None, "_load_with_pydub", None), # no_format_nor_extension amine@400: ("audio.ogg", None, "_load_with_pydub", None), # other_formats_ogg amine@400: ("audio", "webm", "_load_with_pydub", None), # other_formats_webm amine@400: ], amine@400: ids=[ amine@400: "raw_with_audio_format", amine@400: "raw_with_extension", amine@400: "wave_with_audio_format", amine@400: "wav_with_audio_format", amine@400: "wav_with_extension", amine@400: "format_and_extension_both_given_a", amine@400: "format_and_extension_both_given_b", amine@400: "no_format_nor_extension", amine@400: "other_formats_ogg", amine@400: "other_formats_webm", amine@400: ], amine@400: ) amine@400: def test_from_file(filename, audio_format, funtion_name, kwargs): amine@400: funtion_name = "auditok.io." + funtion_name amine@400: if kwargs is None: amine@400: kwargs = {} amine@400: with patch(funtion_name) as patch_function: amine@400: from_file(filename, audio_format, **kwargs) amine@400: assert patch_function.called amine@400: amine@400: amine@406: @pytest.mark.parametrize( amine@406: "large_file, cls, size, use_pathlib", amine@406: [ amine@406: (False, BufferAudioSource, -1, False), # large_file_false_negative_size amine@406: (False, BufferAudioSource, None, False), # large_file_false_None_size amine@407: ( amine@407: False, amine@407: BufferAudioSource, amine@407: None, amine@407: True, amine@407: ), # large_file_false_None_size_Path amine@406: (True, RawAudioSource, -1, False), # large_file_true_negative_size amine@406: (True, RawAudioSource, None, False), # large_file_true_None_size amine@406: (True, RawAudioSource, -1, True), # large_file_true_negative_size_Path amine@406: ], amine@406: ids=[ amine@406: "large_file_false_negative_size", amine@406: "large_file_false_None_size", amine@407: "large_file_false_None_size_Path", amine@406: "large_file_true_negative_size", amine@406: "large_file_true_None_size", amine@406: "large_file_true_negative_size_Path", amine@406: ], amine@406: ) amine@406: def test_from_file_raw_read_all(large_file, cls, size, use_pathlib): amine@406: filename = Path("tests/data/test_16KHZ_mono_400Hz.raw") amine@406: if use_pathlib: amine@406: filename = Path(filename) amine@400: audio_source = from_file( amine@400: filename, amine@406: large_file=large_file, amine@400: sampling_rate=16000, amine@400: sample_width=2, amine@400: channels=1, amine@120: ) amine@406: assert isinstance(audio_source, cls) amine@120: amine@406: with open(filename, "rb") as fp: amine@406: expected = fp.read() amine@406: audio_source.open() amine@406: data = audio_source.read(size) amine@406: audio_source.close() amine@406: assert data == expected amine@162: amine@406: amine@406: @pytest.mark.parametrize( amine@406: "large_file, cls, size, use_pathlib", amine@406: [ amine@406: (False, BufferAudioSource, -1, False), # large_file_false_negative_size amine@406: (False, BufferAudioSource, None, False), # large_file_false_None_size amine@407: ( amine@407: False, amine@407: BufferAudioSource, amine@407: None, amine@407: True, amine@407: ), # large_file_false_None_size_Path amine@406: (True, WaveAudioSource, -1, False), # large_file_true_negative_size amine@406: (True, WaveAudioSource, None, False), # large_file_true_None_size amine@406: (True, WaveAudioSource, -1, True), # large_file_true_negative_size_Path amine@406: ], amine@406: ids=[ amine@406: "large_file_false_negative_size", amine@406: "large_file_false_None_size", amine@407: "large_file_false_None_size_Path", amine@406: "large_file_true_negative_size", amine@406: "large_file_true_None_size", amine@406: "large_file_true_negative_size_Path", amine@406: ], amine@406: ) amine@406: def test_from_file_wave_read_all(large_file, cls, size, use_pathlib): amine@400: filename = "tests/data/test_16KHZ_mono_400Hz.wav" amine@406: if use_pathlib: amine@406: filename = Path(filename) amine@406: audio_source = from_file( amine@406: filename, amine@406: large_file=large_file, amine@406: sampling_rate=16000, amine@406: sample_width=2, amine@406: channels=1, amine@406: ) amine@406: assert isinstance(audio_source, cls) amine@406: amine@406: with wave.open(str(filename)) as fp: amine@406: expected = fp.readframes(-1) amine@406: audio_source.open() amine@406: data = audio_source.read(size) amine@406: audio_source.close() amine@406: assert data == expected amine@163: amine@162: amine@400: def test_from_file_large_file_compressed(): amine@400: filename = "tests/data/test_16KHZ_mono_400Hz.ogg" amine@400: with pytest.raises(AudioIOError): amine@400: from_file(filename, large_file=True) amine@137: amine@121: amine@400: @pytest.mark.parametrize( amine@400: "missing_param", amine@400: [ amine@400: "sr", # missing_sampling_rate amine@400: "sw", # missing_sample_width amine@400: "ch", # missing_channels amine@400: ], amine@400: ids=["missing_sampling_rate", "missing_sample_width", "missing_channels"], amine@400: ) amine@400: def test_from_file_missing_audio_param(missing_param): amine@405: params = AUDIO_PARAMS_SHORT.copy() amine@405: del params[missing_param] amine@400: with pytest.raises(AudioParameterError): amine@400: from_file("audio", audio_format="raw", **params) amine@240: amine@400: amine@400: def test_from_file_no_pydub(): amine@400: with patch("auditok.io._WITH_PYDUB", False): amine@400: with pytest.raises(AudioIOError): amine@400: from_file("audio", "mp3") amine@400: amine@400: amine@400: @pytest.mark.parametrize( amine@400: "audio_format, function", amine@400: [ amine@400: ("ogg", "from_ogg"), # ogg_first_channel amine@400: ("ogg", "from_ogg"), # ogg_second_channel amine@400: ("ogg", "from_ogg"), # ogg_mix amine@400: ("ogg", "from_ogg"), # ogg_default amine@400: ("mp3", "from_mp3"), # mp3_left_channel amine@400: ("mp3", "from_mp3"), # mp3_right_channel amine@400: ("flac", "from_file"), # flac_first_channel amine@400: ("flac", "from_file"), # flac_second_channel amine@400: ("flv", "from_flv"), # flv_left_channel amine@400: ("webm", "from_file"), # webm_right_channel amine@400: ], amine@400: ids=[ amine@400: "ogg_first_channel", amine@400: "ogg_second_channel", amine@400: "ogg_mix", amine@400: "ogg_default", amine@400: "mp3_left_channel", amine@400: "mp3_right_channel", amine@400: "flac_first_channel", amine@400: "flac_second_channel", amine@400: "flv_left_channel", amine@400: "webm_right_channel", amine@400: ], amine@400: ) amine@400: @patch("auditok.io._WITH_PYDUB", True) amine@400: @patch("auditok.io.BufferAudioSource") amine@400: def test_from_file_multichannel_audio_compressed( amine@400: mock_buffer_audio_source, audio_format, function amine@400: ): amine@400: filename = "audio.{}".format(audio_format) amine@400: segment_mock = Mock() amine@400: segment_mock.sample_width = 2 amine@400: segment_mock.channels = 2 amine@400: segment_mock._data = b"abcd" amine@400: with patch("auditok.io.AudioSegment.{}".format(function)) as open_func: amine@400: open_func.return_value = segment_mock amine@400: from_file(filename) amine@400: assert open_func.called amine@400: amine@400: amine@400: @pytest.mark.parametrize( amine@400: "file_id, frequencies, large_file", amine@400: [ amine@400: ("mono_400", (400,), False), # mono amine@400: ("3channel_400-800-1600", (400, 800, 1600), False), # three_channel amine@400: ("mono_400", (400,), True), # mono_large_file amine@400: ( amine@313: "3channel_400-800-1600", amine@313: (400, 800, 1600), amine@313: True, amine@400: ), # three_channel_large_file amine@400: ], amine@400: ids=[ amine@400: "mono", amine@400: "three_channel", amine@400: "mono_large_file", amine@400: "three_channel_large_file", amine@400: ], amine@400: ) amine@400: def test_load_raw(file_id, frequencies, large_file): amine@400: filename = "tests/data/test_16KHZ_{}Hz.raw".format(file_id) amine@400: audio_source = _load_raw( amine@400: filename, 16000, 2, len(frequencies), large_file=large_file amine@126: ) amine@400: audio_source.open() amine@400: data = audio_source.read(-1) amine@400: audio_source.close() amine@400: expected_class = RawAudioSource if large_file else BufferAudioSource amine@400: assert isinstance(audio_source, expected_class) amine@400: assert audio_source.sampling_rate == 16000 amine@400: assert audio_source.sample_width == 2 amine@400: assert audio_source.channels == len(frequencies) amine@400: mono_channels = [PURE_TONE_DICT[freq] for freq in frequencies] amine@405: dtype = SAMPLE_WIDTH_TO_DTYPE[audio_source.sample_width] amine@405: expected = np.fromiter( amine@405: _sample_generator(*mono_channels), dtype=dtype amine@405: ).tobytes() amine@400: assert data == expected amine@126: amine@128: amine@405: def test_load_raw_missing_audio_param(): amine@400: with pytest.raises(AudioParameterError): amine@405: _load_raw("audio", sampling_rate=None, sample_width=1, channels=1) amine@405: amine@405: with pytest.raises(AudioParameterError): amine@405: _load_raw("audio", sampling_rate=16000, sample_width=None, channels=1) amine@405: amine@405: with pytest.raises(AudioParameterError): amine@405: _load_raw("audio", sampling_rate=16000, sample_width=1, channels=None) amine@400: amine@400: amine@400: @pytest.mark.parametrize( amine@400: "file_id, frequencies, large_file", amine@400: [ amine@400: ("mono_400", (400,), False), # mono amine@400: ("3channel_400-800-1600", (400, 800, 1600), False), # three_channel amine@400: ("mono_400", (400,), True), # mono_large_file amine@400: ( amine@313: "3channel_400-800-1600", amine@313: (400, 800, 1600), amine@313: True, amine@400: ), # three_channel_large_file amine@400: ], amine@400: ids=[ amine@400: "mono", amine@400: "three_channel", amine@400: "mono_large_file", amine@400: "three_channel_large_file", amine@400: ], amine@400: ) amine@400: def test_load_wave(file_id, frequencies, large_file): amine@400: filename = "tests/data/test_16KHZ_{}Hz.wav".format(file_id) amine@400: audio_source = _load_wave(filename, large_file=large_file) amine@400: audio_source.open() amine@400: data = audio_source.read(-1) amine@400: audio_source.close() amine@400: expected_class = WaveAudioSource if large_file else BufferAudioSource amine@400: assert isinstance(audio_source, expected_class) amine@400: assert audio_source.sampling_rate == 16000 amine@400: assert audio_source.sample_width == 2 amine@400: assert audio_source.channels == len(frequencies) amine@400: mono_channels = [PURE_TONE_DICT[freq] for freq in frequencies] amine@405: dtype = SAMPLE_WIDTH_TO_DTYPE[audio_source.sample_width] amine@405: expected = np.fromiter( amine@405: _sample_generator(*mono_channels), dtype=dtype amine@405: ).tobytes() amine@400: assert data == expected amine@400: amine@400: amine@400: @pytest.mark.parametrize( amine@400: "audio_format, channels, function", amine@400: [ amine@400: ("ogg", 2, "from_ogg"), # ogg_default_first_channel amine@400: ("ogg", 1, "from_ogg"), # ogg_first_channel amine@400: ("ogg", 2, "from_ogg"), # ogg_second_channel amine@400: ("ogg", 3, "from_ogg"), # ogg_mix_channels amine@400: ("mp3", 1, "from_mp3"), # mp3_left_channel amine@400: ("mp3", 2, "from_mp3"), # mp3_right_channel amine@400: ("mp3", 3, "from_mp3"), # mp3_mix_channels amine@400: ("flac", 2, "from_file"), # flac_first_channel amine@400: ("flac", 2, "from_file"), # flac_second_channel amine@400: ("flv", 1, "from_flv"), # flv_left_channel amine@400: ("webm", 2, "from_file"), # webm_right_channel amine@400: ("webm", 4, "from_file"), # webm_mix_channels amine@400: ], amine@400: ids=[ amine@400: "ogg_default_first_channel", amine@400: "ogg_first_channel", amine@400: "ogg_second_channel", amine@400: "ogg_mix_channels", amine@400: "mp3_left_channel", amine@400: "mp3_right_channel", amine@400: "mp3_mix_channels", amine@400: "flac_first_channel", amine@400: "flac_second_channel", amine@400: "flv_left_channel", amine@400: "webm_right_channel", amine@400: "webm_mix_channels", amine@400: ], amine@400: ) amine@400: @patch("auditok.io._WITH_PYDUB", True) amine@400: @patch("auditok.io.BufferAudioSource") amine@400: def test_load_with_pydub( amine@400: mock_buffer_audio_source, audio_format, channels, function amine@400: ): amine@400: filename = "audio.{}".format(audio_format) amine@400: segment_mock = Mock() amine@400: segment_mock.sample_width = 2 amine@400: segment_mock.channels = channels amine@400: segment_mock._data = b"abcdefgh" amine@400: with patch("auditok.io.AudioSegment.{}".format(function)) as open_func: amine@400: open_func.return_value = segment_mock amine@400: _load_with_pydub(filename, audio_format) amine@400: assert open_func.called amine@400: amine@400: amine@400: @pytest.mark.parametrize( amine@406: "filename, frequencies, use_pathlib", amine@400: [ amine@406: ("mono_400Hz.raw", (400,), False), # mono amine@406: ("mono_400Hz.raw", (400,), True), # mono_pathlib amine@406: ( amine@406: "3channel_400-800-1600Hz.raw", amine@406: (400, 800, 1600), amine@406: False, amine@406: ), # three_channel amine@400: ], amine@406: ids=["mono", "three_channel", "use_pathlib"], amine@400: ) amine@406: def test_save_raw(filename, frequencies, use_pathlib): amine@400: filename = "tests/data/test_16KHZ_{}".format(filename) amine@406: if use_pathlib: amine@406: filename = Path(filename) amine@400: sample_width = 2 amine@405: dtype = SAMPLE_WIDTH_TO_DTYPE[sample_width] amine@400: mono_channels = [PURE_TONE_DICT[freq] for freq in frequencies] amine@405: data = np.fromiter(_sample_generator(*mono_channels), dtype=dtype).tobytes() amine@400: tmpfile = NamedTemporaryFile() amine@400: _save_raw(data, tmpfile.name) amine@400: assert filecmp.cmp(tmpfile.name, filename, shallow=False) amine@400: amine@400: amine@400: @pytest.mark.parametrize( amine@406: "filename, frequencies, use_pathlib", amine@400: [ amine@406: ("mono_400Hz.wav", (400,), False), # mono amine@406: ("mono_400Hz.wav", (400,), True), # mono_pathlib amine@406: ( amine@406: "3channel_400-800-1600Hz.wav", amine@406: (400, 800, 1600), amine@406: False, amine@406: ), # three_channel amine@400: ], amine@406: ids=["mono", "mono_pathlib", "three_channel"], amine@400: ) amine@406: def test_save_wave(filename, frequencies, use_pathlib): amine@400: filename = "tests/data/test_16KHZ_{}".format(filename) amine@406: if use_pathlib: amine@406: filename = str(filename) amine@400: sampling_rate = 16000 amine@400: sample_width = 2 amine@400: channels = len(frequencies) amine@400: mono_channels = [PURE_TONE_DICT[freq] for freq in frequencies] amine@405: dtype = SAMPLE_WIDTH_TO_DTYPE[sample_width] amine@405: data = np.fromiter(_sample_generator(*mono_channels), dtype=dtype).tobytes() amine@400: tmpfile = NamedTemporaryFile() amine@400: _save_wave(data, tmpfile.name, sampling_rate, sample_width, channels) amine@400: assert filecmp.cmp(tmpfile.name, filename, shallow=False) amine@400: amine@400: amine@400: @pytest.mark.parametrize( amine@400: "missing_param", amine@400: [ amine@400: "sr", # missing_sampling_rate amine@400: "sw", # missing_sample_width amine@400: "ch", # missing_channels amine@400: ], amine@400: ids=["missing_sampling_rate", "missing_sample_width", "missing_channels"], amine@400: ) amine@400: def test_save_wave_missing_audio_param(missing_param): amine@400: with pytest.raises(AudioParameterError): amine@405: _save_wave( amine@405: b"\0\0", "audio", sampling_rate=None, sample_width=1, channels=1 amine@405: ) amine@405: amine@405: with pytest.raises(AudioParameterError): amine@405: _save_wave( amine@405: b"\0\0", "audio", sampling_rate=16000, sample_width=None, channels=1 amine@405: ) amine@405: amine@405: with pytest.raises(AudioParameterError): amine@405: _save_wave( amine@405: b"\0\0", "audio", sampling_rate=16000, sample_width=1, channels=None amine@405: ) amine@400: amine@400: amine@400: def test_save_with_pydub(): amine@400: with patch("auditok.io.AudioSegment.export") as export: amine@400: tmpdir = TemporaryDirectory() amine@400: filename = os.path.join(tmpdir.name, "audio.ogg") amine@400: _save_with_pydub(b"\0\0", filename, "ogg", 16000, 2, 1) amine@400: assert export.called amine@400: tmpdir.cleanup() amine@400: amine@400: amine@400: @pytest.mark.parametrize( amine@400: "filename, audio_format", amine@400: [ amine@400: ("audio", "raw"), # raw_with_audio_format amine@400: ("audio.raw", None), # raw_with_extension amine@400: ("audio.mp3", "raw"), # raw_with_audio_format_and_extension amine@400: ("audio", None), # raw_no_audio_format_nor_extension amine@400: ], amine@400: ids=[ amine@400: "raw_with_audio_format", amine@400: "raw_with_extension", amine@400: "raw_with_audio_format_and_extension", amine@400: "raw_no_audio_format_nor_extension", amine@400: ], amine@400: ) amine@400: def test_to_file_raw(filename, audio_format): amine@400: exp_filename = "tests/data/test_16KHZ_mono_400Hz.raw" amine@400: tmpdir = TemporaryDirectory() amine@400: filename = os.path.join(tmpdir.name, filename) amine@400: data = PURE_TONE_DICT[400].tobytes() amine@400: to_file(data, filename, audio_format=audio_format) amine@400: assert filecmp.cmp(filename, exp_filename, shallow=False) amine@400: tmpdir.cleanup() amine@400: amine@400: amine@400: @pytest.mark.parametrize( amine@400: "filename, audio_format", amine@400: [ amine@400: ("audio", "wav"), # wav_with_audio_format amine@400: ("audio.wav", None), # wav_with_extension amine@400: ("audio.mp3", "wav"), # wav_with_audio_format_and_extension amine@400: ("audio", "wave"), # wave_with_audio_format amine@400: ("audio.wave", None), # wave_with_extension amine@400: ("audio.mp3", "wave"), # wave_with_audio_format_and_extension amine@400: ], amine@400: ids=[ amine@400: "wav_with_audio_format", amine@400: "wav_with_extension", amine@400: "wav_with_audio_format_and_extension", amine@400: "wave_with_audio_format", amine@400: "wave_with_extension", amine@400: "wave_with_audio_format_and_extension", amine@400: ], amine@400: ) amine@400: def test_to_file_wave(filename, audio_format): amine@400: exp_filename = "tests/data/test_16KHZ_mono_400Hz.wav" amine@400: tmpdir = TemporaryDirectory() amine@400: filename = os.path.join(tmpdir.name, filename) amine@400: data = PURE_TONE_DICT[400].tobytes() amine@400: to_file( amine@400: data, amine@400: filename, amine@400: audio_format=audio_format, amine@400: sampling_rate=16000, amine@400: sample_width=2, amine@400: channels=1, amine@129: ) amine@400: assert filecmp.cmp(filename, exp_filename, shallow=False) amine@400: tmpdir.cleanup() amine@129: amine@240: amine@400: @pytest.mark.parametrize( amine@400: "missing_param", amine@400: [ amine@400: "sr", # missing_sampling_rate amine@400: "sw", # missing_sample_width amine@400: "ch", # missing_channels amine@400: ], amine@400: ids=["missing_sampling_rate", "missing_sample_width", "missing_channels"], amine@400: ) amine@400: def test_to_file_missing_audio_param(missing_param): amine@400: params = AUDIO_PARAMS_SHORT.copy() amine@400: del params[missing_param] amine@400: with pytest.raises(AudioParameterError): amine@400: to_file(b"\0\0", "audio", audio_format="wav", **params) amine@400: with pytest.raises(AudioParameterError): amine@400: to_file(b"\0\0", "audio", audio_format="mp3", **params) amine@132: amine@132: amine@400: def test_to_file_no_pydub(): amine@400: with patch("auditok.io._WITH_PYDUB", False): amine@400: with pytest.raises(AudioIOError): amine@400: to_file("audio", b"", "mp3") amine@133: amine@141: amine@400: @pytest.mark.parametrize( amine@400: "filename, audio_format", amine@400: [ amine@400: ("audio.ogg", None), # ogg_with_extension amine@400: ("audio", "ogg"), # ogg_with_audio_format amine@400: ("audio.wav", "ogg"), # ogg_format_with_wrong_extension amine@400: ], amine@400: ids=[ amine@400: "ogg_with_extension", amine@400: "ogg_with_audio_format", amine@400: "ogg_format_with_wrong_extension", amine@400: ], amine@400: ) amine@400: @patch("auditok.io._WITH_PYDUB", True) amine@400: def test_to_file_compressed(filename, audio_format): amine@400: with patch("auditok.io.AudioSegment.export") as export: amine@133: tmpdir = TemporaryDirectory() amine@133: filename = os.path.join(tmpdir.name, filename) amine@400: to_file(b"\0\0", filename, audio_format, **AUDIO_PARAMS_SHORT) amine@400: assert export.called amine@133: tmpdir.cleanup() amine@134: amine@138: amine@400: @pytest.mark.parametrize( amine@400: "input, expected_type, extra_args", amine@400: [ amine@400: ( amine@190: "tests/data/test_16KHZ_mono_400Hz.wav", amine@190: BufferAudioSource, amine@400: None, amine@400: ), # string_wave amine@400: ( amine@190: "tests/data/test_16KHZ_mono_400Hz.wav", amine@190: WaveAudioSource, amine@190: {"large_file": True}, amine@400: ), # string_wave_large_file amine@400: ("-", StdinAudioSource, None), # stdin amine@400: ( amine@400: "tests/data/test_16KHZ_mono_400Hz.raw", amine@400: BufferAudioSource, amine@400: None, amine@400: ), # string_raw amine@400: ( amine@190: "tests/data/test_16KHZ_mono_400Hz.raw", amine@190: RawAudioSource, amine@190: {"large_file": True}, amine@400: ), # string_raw_large_file amine@400: (b"0" * 8000, BufferAudioSource, None), # bytes_ amine@400: ], amine@400: ids=[ amine@400: "string_wave", amine@400: "string_wave_large_file", amine@400: "stdin", amine@400: "string_raw", amine@400: "string_raw_large_file", amine@400: "bytes_", amine@400: ], amine@400: ) amine@400: def test_get_audio_source(input, expected_type, extra_args): amine@400: kwargs = {"sampling_rate": 16000, "sample_width": 2, "channels": 1} amine@400: if extra_args is not None: amine@400: kwargs.update(extra_args) amine@400: audio_source = get_audio_source(input, **kwargs) amine@400: assert isinstance(audio_source, expected_type) amine@403: assert audio_source.sampling_rate == 16000, ( amine@403: "Unexpected sampling rate: audio_source.sampling_rate = " amine@403: + f"{audio_source.sampling_rate} instead of 16000" amine@403: ) amine@403: assert audio_source.sr == 16000, ( amine@403: "Unexpected sampling rate: audio_source.sr = " amine@403: + f"{audio_source.sr} instead of 16000" amine@403: ) amine@403: assert audio_source.sample_width == 2, ( amine@403: "Unexpected sample width: audio_source.sample_width = " amine@403: + f"{audio_source.sample_width} instead of 2" amine@403: ) amine@403: assert audio_source.sw == 2, ( amine@403: "Unexpected sample width: audio_source.sw = " amine@403: + f"{audio_source.sw} instead of 2" amine@403: ) amine@403: assert audio_source.channels == 1, ( amine@403: "Unexpected number of channels: audio_source.channels = " amine@403: + f"{audio_source.channels} instead of 1" amine@403: ) amine@403: assert audio_source.ch == 1, ( amine@403: "Unexpected number of channels: audio_source.ch = " amine@403: + f"{audio_source.ch} instead of 1" amine@403: ) amine@403: amine@403: amine@403: def test_get_audio_source_alias_prams(): amine@403: audio_source = get_audio_source(b"0" * 1600, sr=16000, sw=2, ch=1) amine@403: assert audio_source.sampling_rate == 16000, ( amine@403: "Unexpected sampling rate: audio_source.sampling_rate = " amine@403: + f"{audio_source.sampling_rate} instead of 16000" amine@403: ) amine@403: assert audio_source.sr == 16000, ( amine@403: "Unexpected sampling rate: audio_source.sr = " amine@403: + f"{audio_source.sr} instead of 16000" amine@403: ) amine@403: assert audio_source.sample_width == 2, ( amine@403: "Unexpected sample width: audio_source.sample_width = " amine@403: + f"{audio_source.sample_width} instead of 2" amine@403: ) amine@403: assert audio_source.sw == 2, ( amine@403: "Unexpected sample width: audio_source.sw = " amine@403: + f"{audio_source.sw} instead of 2" amine@403: ) amine@403: assert audio_source.channels == 1, ( amine@403: "Unexpected number of channels: audio_source.channels = " amine@403: + f"{audio_source.channels} instead of 1" amine@403: ) amine@403: assert audio_source.ch == 1, ( amine@403: "Unexpected number of channels: audio_source.ch = " amine@403: + f"{audio_source.ch} instead of 1" amine@403: )