annotate tests/test_io.py @ 406:79bd3de43a5b

Accept pathlib.Path for io
author Amine Sehili <amine.sehili@gmail.com>
date Wed, 19 Jun 2024 22:48:54 +0200
parents f56b4d8adfb8
children 6c33626d0bff
rev   line source
amine@403 1 import filecmp
amine@403 2 import math
amine@106 3 import os
amine@106 4 import sys
amine@406 5 import wave
amine@107 6 from array import array
amine@406 7 from pathlib import Path
amine@133 8 from tempfile import NamedTemporaryFile, TemporaryDirectory
amine@403 9 from unittest.mock import Mock, patch
amine@403 10
amine@405 11 import numpy as np
amine@400 12 import pytest
amine@405 13 from test_AudioSource import (
amine@405 14 PURE_TONE_DICT,
amine@405 15 _generate_pure_tone,
amine@405 16 _sample_generator,
amine@405 17 )
amine@403 18
amine@110 19 from auditok.io import (
amine@121 20 AudioIOError,
amine@110 21 AudioParameterError,
amine@126 22 BufferAudioSource,
amine@162 23 RawAudioSource,
amine@403 24 StdinAudioSource,
amine@162 25 WaveAudioSource,
amine@403 26 _get_audio_parameters,
amine@143 27 _guess_audio_format,
amine@126 28 _load_raw,
amine@129 29 _load_wave,
amine@131 30 _load_with_pydub,
amine@111 31 _save_raw,
amine@110 32 _save_wave,
amine@141 33 _save_with_pydub,
amine@403 34 check_audio_data,
amine@403 35 from_file,
amine@403 36 get_audio_source,
amine@135 37 to_file,
amine@110 38 )
amine@405 39 from auditok.signal import SAMPLE_WIDTH_TO_DTYPE
amine@106 40
amine@405 41 AUDIO_PARAMS = {"sampling_rate": 16000, "sample_width": 2, "channels": 1}
amine@120 42 AUDIO_PARAMS_SHORT = {"sr": 16000, "sw": 2, "ch": 1}
amine@106 43
amine@106 44
amine@400 45 @pytest.mark.parametrize(
amine@400 46 "data, sample_width, channels, valid",
amine@400 47 [
amine@400 48 (b"\0" * 113, 1, 1, True), # valid_mono
amine@400 49 (b"\0" * 160, 1, 2, True), # valid_stereo
amine@400 50 (b"\0" * 113, 2, 1, False), # invalid_mono_sw_2
amine@400 51 (b"\0" * 113, 1, 2, False), # invalid_stereo_sw_1
amine@400 52 (b"\0" * 158, 2, 2, False), # invalid_stereo_sw_2
amine@400 53 ],
amine@400 54 ids=[
amine@400 55 "valid_mono",
amine@400 56 "valid_stereo",
amine@400 57 "invalid_mono_sw_2",
amine@400 58 "invalid_stereo_sw_1",
amine@400 59 "invalid_stereo_sw_2",
amine@400 60 ],
amine@400 61 )
amine@400 62 def test_check_audio_data(data, sample_width, channels, valid):
amine@400 63 if not valid:
amine@400 64 with pytest.raises(AudioParameterError):
amine@400 65 check_audio_data(data, sample_width, channels)
amine@400 66 else:
amine@400 67 assert check_audio_data(data, sample_width, channels) is None
amine@400 68
amine@400 69
amine@400 70 @pytest.mark.parametrize(
amine@406 71 "filename, audio_format, expected",
amine@400 72 [
amine@406 73 ("filename.wav", "wav", "wav"), # extension_and_format_same
amine@406 74 ("filename.mp3", "wav", "wav"), # extension_and_format_different
amine@406 75 ("filename.wav", None, "wav"), # extension_no_format
amine@406 76 ("filename", "wav", "wav"), # format_no_extension
amine@406 77 ("filename", None, None), # no_format_no_extension
amine@406 78 ("filename", "wave", "wav"), # wave_as_wav
amine@406 79 ("filename.wave", None, "wav"), # wave_as_wav_extension
amine@400 80 ],
amine@400 81 ids=[
amine@406 82 "extension_and_format_same",
amine@406 83 "extension_and_format_different",
amine@406 84 "extension_no_format",
amine@400 85 "format_no_extension",
amine@400 86 "no_format_no_extension",
amine@400 87 "wave_as_wav",
amine@400 88 "wave_as_wav_extension",
amine@400 89 ],
amine@400 90 )
amine@406 91 def test_guess_audio_format(filename, audio_format, expected):
amine@406 92 result = _guess_audio_format(filename, audio_format)
amine@406 93 assert result == expected
amine@406 94
amine@406 95 result = _guess_audio_format(Path(filename), audio_format)
amine@400 96 assert result == expected
amine@400 97
amine@400 98
amine@400 99 def test_get_audio_parameters_short_params():
amine@400 100 expected = (8000, 2, 1)
amine@405 101 params = dict(zip(("sr", "sw", "ch"), expected, strict=True))
amine@400 102 result = _get_audio_parameters(params)
amine@400 103 assert result == expected
amine@400 104
amine@400 105
amine@400 106 def test_get_audio_parameters_long_params():
amine@400 107 expected = (8000, 2, 1)
amine@400 108 params = dict(
amine@400 109 zip(
amine@405 110 ("sampling_rate", "sample_width", "channels"),
amine@400 111 expected,
amine@405 112 strict=True,
amine@400 113 )
amine@108 114 )
amine@400 115 result = _get_audio_parameters(params)
amine@400 116 assert result == expected
amine@108 117
amine@110 118
amine@400 119 def test_get_audio_parameters_long_params_shadow_short_ones():
amine@400 120 expected = (8000, 2, 1)
amine@405 121 params = dict(
amine@405 122 zip(
amine@405 123 ("sampling_rate", "sample_width", "channels"), expected, strict=True
amine@405 124 )
amine@405 125 )
amine@405 126 params.update(dict(zip(("sr", "sw", "ch"), "xxx", strict=True)))
amine@400 127 result = _get_audio_parameters(params)
amine@400 128 assert result == expected
amine@143 129
amine@145 130
amine@400 131 @pytest.mark.parametrize(
amine@405 132 "missing_param",
amine@405 133 [
amine@405 134 "sampling_rate", # missing_sampling_rate
amine@405 135 "sample_width", # missing_sample_width
amine@405 136 "channels", # missing_channels
amine@405 137 ],
amine@405 138 ids=["missing_sampling_rate", "missing_sample_width", "missing_channels"],
amine@405 139 )
amine@405 140 def test_get_audio_parameters_missing_parameter(missing_param):
amine@405 141 params = AUDIO_PARAMS.copy()
amine@405 142 del params[missing_param]
amine@405 143 with pytest.raises(AudioParameterError):
amine@405 144 _get_audio_parameters(params)
amine@405 145
amine@405 146
amine@405 147 @pytest.mark.parametrize(
amine@405 148 "missing_param",
amine@405 149 [
amine@405 150 "sr", # missing_sampling_rate
amine@405 151 "sw", # missing_sample_width
amine@405 152 "ch", # missing_channels
amine@405 153 ],
amine@405 154 ids=["missing_sampling_rate", "missing_sample_width", "missing_channels"],
amine@405 155 )
amine@405 156 def test_get_audio_parameters_missing_parameter_short(missing_param):
amine@405 157 params = AUDIO_PARAMS_SHORT.copy()
amine@405 158 del params[missing_param]
amine@405 159 with pytest.raises(AudioParameterError):
amine@405 160 _get_audio_parameters(params)
amine@405 161
amine@405 162
amine@405 163 @pytest.mark.parametrize(
amine@400 164 "values",
amine@400 165 [
amine@400 166 ("x", 2, 1), # str_sampling_rate
amine@400 167 (-8000, 2, 1), # negative_sampling_rate
amine@400 168 (8000, "x", 1), # str_sample_width
amine@400 169 (8000, -2, 1), # negative_sample_width
amine@400 170 (8000, 2, "x"), # str_channels
amine@400 171 (8000, 2, -1), # negative_channels
amine@400 172 ],
amine@400 173 ids=[
amine@400 174 "str_sampling_rate",
amine@400 175 "negative_sampling_rate",
amine@400 176 "str_sample_width",
amine@400 177 "negative_sample_width",
amine@400 178 "str_channels",
amine@400 179 "negative_channels",
amine@400 180 ],
amine@400 181 )
amine@400 182 def test_get_audio_parameters_invalid(values):
amine@405 183 params = dict(
amine@405 184 zip(("sampling_rate", "sample_width", "channels"), values, strict=True)
amine@405 185 )
amine@400 186 with pytest.raises(AudioParameterError):
amine@400 187 _get_audio_parameters(params)
amine@145 188
amine@145 189
amine@400 190 @pytest.mark.parametrize(
amine@400 191 "filename, audio_format, funtion_name, kwargs",
amine@400 192 [
amine@400 193 (
amine@120 194 "audio",
amine@120 195 "raw",
amine@120 196 "_load_raw",
amine@120 197 AUDIO_PARAMS_SHORT,
amine@400 198 ), # raw_with_audio_format
amine@400 199 (
amine@120 200 "audio.raw",
amine@120 201 None,
amine@120 202 "_load_raw",
amine@120 203 AUDIO_PARAMS_SHORT,
amine@400 204 ), # raw_with_extension
amine@400 205 ("audio", "wave", "_load_wave", None), # wave_with_audio_format
amine@400 206 ("audio", "wave", "_load_wave", None), # wav_with_audio_format
amine@400 207 ("audio.wav", None, "_load_wave", None), # wav_with_extension
amine@400 208 (
amine@400 209 "audio.dat",
amine@400 210 "wav",
amine@400 211 "_load_wave",
amine@400 212 None,
amine@400 213 ), # format_and_extension_both_given_a
amine@400 214 (
amine@400 215 "audio.raw",
amine@400 216 "wave",
amine@400 217 "_load_wave",
amine@400 218 None,
amine@400 219 ), # format_and_extension_both_given_b
amine@400 220 ("audio", None, "_load_with_pydub", None), # no_format_nor_extension
amine@400 221 ("audio.ogg", None, "_load_with_pydub", None), # other_formats_ogg
amine@400 222 ("audio", "webm", "_load_with_pydub", None), # other_formats_webm
amine@400 223 ],
amine@400 224 ids=[
amine@400 225 "raw_with_audio_format",
amine@400 226 "raw_with_extension",
amine@400 227 "wave_with_audio_format",
amine@400 228 "wav_with_audio_format",
amine@400 229 "wav_with_extension",
amine@400 230 "format_and_extension_both_given_a",
amine@400 231 "format_and_extension_both_given_b",
amine@400 232 "no_format_nor_extension",
amine@400 233 "other_formats_ogg",
amine@400 234 "other_formats_webm",
amine@400 235 ],
amine@400 236 )
amine@400 237 def test_from_file(filename, audio_format, funtion_name, kwargs):
amine@400 238 funtion_name = "auditok.io." + funtion_name
amine@400 239 if kwargs is None:
amine@400 240 kwargs = {}
amine@400 241 with patch(funtion_name) as patch_function:
amine@400 242 from_file(filename, audio_format, **kwargs)
amine@400 243 assert patch_function.called
amine@400 244
amine@400 245
amine@406 246 @pytest.mark.parametrize(
amine@406 247 "large_file, cls, size, use_pathlib",
amine@406 248 [
amine@406 249 (False, BufferAudioSource, -1, False), # large_file_false_negative_size
amine@406 250 (False, BufferAudioSource, None, False), # large_file_false_None_size
amine@406 251 (True, RawAudioSource, -1, False), # large_file_true_negative_size
amine@406 252 (True, RawAudioSource, None, False), # large_file_true_None_size
amine@406 253 (True, RawAudioSource, -1, True), # large_file_true_negative_size_Path
amine@406 254 ],
amine@406 255 ids=[
amine@406 256 "large_file_false_negative_size",
amine@406 257 "large_file_false_None_size",
amine@406 258 "large_file_true_negative_size",
amine@406 259 "large_file_true_None_size",
amine@406 260 "large_file_true_negative_size_Path",
amine@406 261 ],
amine@406 262 )
amine@406 263 def test_from_file_raw_read_all(large_file, cls, size, use_pathlib):
amine@406 264 filename = Path("tests/data/test_16KHZ_mono_400Hz.raw")
amine@406 265 if use_pathlib:
amine@406 266 filename = Path(filename)
amine@400 267 audio_source = from_file(
amine@400 268 filename,
amine@406 269 large_file=large_file,
amine@400 270 sampling_rate=16000,
amine@400 271 sample_width=2,
amine@400 272 channels=1,
amine@120 273 )
amine@406 274 assert isinstance(audio_source, cls)
amine@120 275
amine@406 276 with open(filename, "rb") as fp:
amine@406 277 expected = fp.read()
amine@406 278 audio_source.open()
amine@406 279 data = audio_source.read(size)
amine@406 280 audio_source.close()
amine@406 281 assert data == expected
amine@162 282
amine@406 283
amine@406 284 @pytest.mark.parametrize(
amine@406 285 "large_file, cls, size, use_pathlib",
amine@406 286 [
amine@406 287 (False, BufferAudioSource, -1, False), # large_file_false_negative_size
amine@406 288 (False, BufferAudioSource, None, False), # large_file_false_None_size
amine@406 289 (True, WaveAudioSource, -1, False), # large_file_true_negative_size
amine@406 290 (True, WaveAudioSource, None, False), # large_file_true_None_size
amine@406 291 (True, WaveAudioSource, -1, True), # large_file_true_negative_size_Path
amine@406 292 ],
amine@406 293 ids=[
amine@406 294 "large_file_false_negative_size",
amine@406 295 "large_file_false_None_size",
amine@406 296 "large_file_true_negative_size",
amine@406 297 "large_file_true_None_size",
amine@406 298 "large_file_true_negative_size_Path",
amine@406 299 ],
amine@406 300 )
amine@406 301 def test_from_file_wave_read_all(large_file, cls, size, use_pathlib):
amine@400 302 filename = "tests/data/test_16KHZ_mono_400Hz.wav"
amine@406 303 if use_pathlib:
amine@406 304 filename = Path(filename)
amine@406 305 audio_source = from_file(
amine@406 306 filename,
amine@406 307 large_file=large_file,
amine@406 308 sampling_rate=16000,
amine@406 309 sample_width=2,
amine@406 310 channels=1,
amine@406 311 )
amine@406 312 assert isinstance(audio_source, cls)
amine@406 313
amine@406 314 with wave.open(str(filename)) as fp:
amine@406 315 expected = fp.readframes(-1)
amine@406 316 audio_source.open()
amine@406 317 data = audio_source.read(size)
amine@406 318 audio_source.close()
amine@406 319 assert data == expected
amine@163 320
amine@162 321
amine@400 322 def test_from_file_large_file_compressed():
amine@400 323 filename = "tests/data/test_16KHZ_mono_400Hz.ogg"
amine@400 324 with pytest.raises(AudioIOError):
amine@400 325 from_file(filename, large_file=True)
amine@137 326
amine@121 327
amine@400 328 @pytest.mark.parametrize(
amine@400 329 "missing_param",
amine@400 330 [
amine@400 331 "sr", # missing_sampling_rate
amine@400 332 "sw", # missing_sample_width
amine@400 333 "ch", # missing_channels
amine@400 334 ],
amine@400 335 ids=["missing_sampling_rate", "missing_sample_width", "missing_channels"],
amine@400 336 )
amine@400 337 def test_from_file_missing_audio_param(missing_param):
amine@405 338 params = AUDIO_PARAMS_SHORT.copy()
amine@405 339 del params[missing_param]
amine@400 340 with pytest.raises(AudioParameterError):
amine@400 341 from_file("audio", audio_format="raw", **params)
amine@240 342
amine@400 343
amine@400 344 def test_from_file_no_pydub():
amine@400 345 with patch("auditok.io._WITH_PYDUB", False):
amine@400 346 with pytest.raises(AudioIOError):
amine@400 347 from_file("audio", "mp3")
amine@400 348
amine@400 349
amine@400 350 @pytest.mark.parametrize(
amine@400 351 "audio_format, function",
amine@400 352 [
amine@400 353 ("ogg", "from_ogg"), # ogg_first_channel
amine@400 354 ("ogg", "from_ogg"), # ogg_second_channel
amine@400 355 ("ogg", "from_ogg"), # ogg_mix
amine@400 356 ("ogg", "from_ogg"), # ogg_default
amine@400 357 ("mp3", "from_mp3"), # mp3_left_channel
amine@400 358 ("mp3", "from_mp3"), # mp3_right_channel
amine@400 359 ("flac", "from_file"), # flac_first_channel
amine@400 360 ("flac", "from_file"), # flac_second_channel
amine@400 361 ("flv", "from_flv"), # flv_left_channel
amine@400 362 ("webm", "from_file"), # webm_right_channel
amine@400 363 ],
amine@400 364 ids=[
amine@400 365 "ogg_first_channel",
amine@400 366 "ogg_second_channel",
amine@400 367 "ogg_mix",
amine@400 368 "ogg_default",
amine@400 369 "mp3_left_channel",
amine@400 370 "mp3_right_channel",
amine@400 371 "flac_first_channel",
amine@400 372 "flac_second_channel",
amine@400 373 "flv_left_channel",
amine@400 374 "webm_right_channel",
amine@400 375 ],
amine@400 376 )
amine@400 377 @patch("auditok.io._WITH_PYDUB", True)
amine@400 378 @patch("auditok.io.BufferAudioSource")
amine@400 379 def test_from_file_multichannel_audio_compressed(
amine@400 380 mock_buffer_audio_source, audio_format, function
amine@400 381 ):
amine@400 382 filename = "audio.{}".format(audio_format)
amine@400 383 segment_mock = Mock()
amine@400 384 segment_mock.sample_width = 2
amine@400 385 segment_mock.channels = 2
amine@400 386 segment_mock._data = b"abcd"
amine@400 387 with patch("auditok.io.AudioSegment.{}".format(function)) as open_func:
amine@400 388 open_func.return_value = segment_mock
amine@400 389 from_file(filename)
amine@400 390 assert open_func.called
amine@400 391
amine@400 392
amine@400 393 @pytest.mark.parametrize(
amine@400 394 "file_id, frequencies, large_file",
amine@400 395 [
amine@400 396 ("mono_400", (400,), False), # mono
amine@400 397 ("3channel_400-800-1600", (400, 800, 1600), False), # three_channel
amine@400 398 ("mono_400", (400,), True), # mono_large_file
amine@400 399 (
amine@313 400 "3channel_400-800-1600",
amine@313 401 (400, 800, 1600),
amine@313 402 True,
amine@400 403 ), # three_channel_large_file
amine@400 404 ],
amine@400 405 ids=[
amine@400 406 "mono",
amine@400 407 "three_channel",
amine@400 408 "mono_large_file",
amine@400 409 "three_channel_large_file",
amine@400 410 ],
amine@400 411 )
amine@400 412 def test_load_raw(file_id, frequencies, large_file):
amine@400 413 filename = "tests/data/test_16KHZ_{}Hz.raw".format(file_id)
amine@400 414 audio_source = _load_raw(
amine@400 415 filename, 16000, 2, len(frequencies), large_file=large_file
amine@126 416 )
amine@400 417 audio_source.open()
amine@400 418 data = audio_source.read(-1)
amine@400 419 audio_source.close()
amine@400 420 expected_class = RawAudioSource if large_file else BufferAudioSource
amine@400 421 assert isinstance(audio_source, expected_class)
amine@400 422 assert audio_source.sampling_rate == 16000
amine@400 423 assert audio_source.sample_width == 2
amine@400 424 assert audio_source.channels == len(frequencies)
amine@400 425 mono_channels = [PURE_TONE_DICT[freq] for freq in frequencies]
amine@405 426 dtype = SAMPLE_WIDTH_TO_DTYPE[audio_source.sample_width]
amine@405 427 expected = np.fromiter(
amine@405 428 _sample_generator(*mono_channels), dtype=dtype
amine@405 429 ).tobytes()
amine@400 430 assert data == expected
amine@126 431
amine@128 432
amine@405 433 def test_load_raw_missing_audio_param():
amine@400 434 with pytest.raises(AudioParameterError):
amine@405 435 _load_raw("audio", sampling_rate=None, sample_width=1, channels=1)
amine@405 436
amine@405 437 with pytest.raises(AudioParameterError):
amine@405 438 _load_raw("audio", sampling_rate=16000, sample_width=None, channels=1)
amine@405 439
amine@405 440 with pytest.raises(AudioParameterError):
amine@405 441 _load_raw("audio", sampling_rate=16000, sample_width=1, channels=None)
amine@400 442
amine@400 443
amine@400 444 @pytest.mark.parametrize(
amine@400 445 "file_id, frequencies, large_file",
amine@400 446 [
amine@400 447 ("mono_400", (400,), False), # mono
amine@400 448 ("3channel_400-800-1600", (400, 800, 1600), False), # three_channel
amine@400 449 ("mono_400", (400,), True), # mono_large_file
amine@400 450 (
amine@313 451 "3channel_400-800-1600",
amine@313 452 (400, 800, 1600),
amine@313 453 True,
amine@400 454 ), # three_channel_large_file
amine@400 455 ],
amine@400 456 ids=[
amine@400 457 "mono",
amine@400 458 "three_channel",
amine@400 459 "mono_large_file",
amine@400 460 "three_channel_large_file",
amine@400 461 ],
amine@400 462 )
amine@400 463 def test_load_wave(file_id, frequencies, large_file):
amine@400 464 filename = "tests/data/test_16KHZ_{}Hz.wav".format(file_id)
amine@400 465 audio_source = _load_wave(filename, large_file=large_file)
amine@400 466 audio_source.open()
amine@400 467 data = audio_source.read(-1)
amine@400 468 audio_source.close()
amine@400 469 expected_class = WaveAudioSource if large_file else BufferAudioSource
amine@400 470 assert isinstance(audio_source, expected_class)
amine@400 471 assert audio_source.sampling_rate == 16000
amine@400 472 assert audio_source.sample_width == 2
amine@400 473 assert audio_source.channels == len(frequencies)
amine@400 474 mono_channels = [PURE_TONE_DICT[freq] for freq in frequencies]
amine@405 475 dtype = SAMPLE_WIDTH_TO_DTYPE[audio_source.sample_width]
amine@405 476 expected = np.fromiter(
amine@405 477 _sample_generator(*mono_channels), dtype=dtype
amine@405 478 ).tobytes()
amine@400 479 assert data == expected
amine@400 480
amine@400 481
amine@400 482 @pytest.mark.parametrize(
amine@400 483 "audio_format, channels, function",
amine@400 484 [
amine@400 485 ("ogg", 2, "from_ogg"), # ogg_default_first_channel
amine@400 486 ("ogg", 1, "from_ogg"), # ogg_first_channel
amine@400 487 ("ogg", 2, "from_ogg"), # ogg_second_channel
amine@400 488 ("ogg", 3, "from_ogg"), # ogg_mix_channels
amine@400 489 ("mp3", 1, "from_mp3"), # mp3_left_channel
amine@400 490 ("mp3", 2, "from_mp3"), # mp3_right_channel
amine@400 491 ("mp3", 3, "from_mp3"), # mp3_mix_channels
amine@400 492 ("flac", 2, "from_file"), # flac_first_channel
amine@400 493 ("flac", 2, "from_file"), # flac_second_channel
amine@400 494 ("flv", 1, "from_flv"), # flv_left_channel
amine@400 495 ("webm", 2, "from_file"), # webm_right_channel
amine@400 496 ("webm", 4, "from_file"), # webm_mix_channels
amine@400 497 ],
amine@400 498 ids=[
amine@400 499 "ogg_default_first_channel",
amine@400 500 "ogg_first_channel",
amine@400 501 "ogg_second_channel",
amine@400 502 "ogg_mix_channels",
amine@400 503 "mp3_left_channel",
amine@400 504 "mp3_right_channel",
amine@400 505 "mp3_mix_channels",
amine@400 506 "flac_first_channel",
amine@400 507 "flac_second_channel",
amine@400 508 "flv_left_channel",
amine@400 509 "webm_right_channel",
amine@400 510 "webm_mix_channels",
amine@400 511 ],
amine@400 512 )
amine@400 513 @patch("auditok.io._WITH_PYDUB", True)
amine@400 514 @patch("auditok.io.BufferAudioSource")
amine@400 515 def test_load_with_pydub(
amine@400 516 mock_buffer_audio_source, audio_format, channels, function
amine@400 517 ):
amine@400 518 filename = "audio.{}".format(audio_format)
amine@400 519 segment_mock = Mock()
amine@400 520 segment_mock.sample_width = 2
amine@400 521 segment_mock.channels = channels
amine@400 522 segment_mock._data = b"abcdefgh"
amine@400 523 with patch("auditok.io.AudioSegment.{}".format(function)) as open_func:
amine@400 524 open_func.return_value = segment_mock
amine@400 525 _load_with_pydub(filename, audio_format)
amine@400 526 assert open_func.called
amine@400 527
amine@400 528
amine@400 529 @pytest.mark.parametrize(
amine@406 530 "filename, frequencies, use_pathlib",
amine@400 531 [
amine@406 532 ("mono_400Hz.raw", (400,), False), # mono
amine@406 533 ("mono_400Hz.raw", (400,), True), # mono_pathlib
amine@406 534 (
amine@406 535 "3channel_400-800-1600Hz.raw",
amine@406 536 (400, 800, 1600),
amine@406 537 False,
amine@406 538 ), # three_channel
amine@400 539 ],
amine@406 540 ids=["mono", "three_channel", "use_pathlib"],
amine@400 541 )
amine@406 542 def test_save_raw(filename, frequencies, use_pathlib):
amine@400 543 filename = "tests/data/test_16KHZ_{}".format(filename)
amine@406 544 if use_pathlib:
amine@406 545 filename = Path(filename)
amine@400 546 sample_width = 2
amine@405 547 dtype = SAMPLE_WIDTH_TO_DTYPE[sample_width]
amine@400 548 mono_channels = [PURE_TONE_DICT[freq] for freq in frequencies]
amine@405 549 data = np.fromiter(_sample_generator(*mono_channels), dtype=dtype).tobytes()
amine@400 550 tmpfile = NamedTemporaryFile()
amine@400 551 _save_raw(data, tmpfile.name)
amine@400 552 assert filecmp.cmp(tmpfile.name, filename, shallow=False)
amine@400 553
amine@400 554
amine@400 555 @pytest.mark.parametrize(
amine@406 556 "filename, frequencies, use_pathlib",
amine@400 557 [
amine@406 558 ("mono_400Hz.wav", (400,), False), # mono
amine@406 559 ("mono_400Hz.wav", (400,), True), # mono_pathlib
amine@406 560 (
amine@406 561 "3channel_400-800-1600Hz.wav",
amine@406 562 (400, 800, 1600),
amine@406 563 False,
amine@406 564 ), # three_channel
amine@400 565 ],
amine@406 566 ids=["mono", "mono_pathlib", "three_channel"],
amine@400 567 )
amine@406 568 def test_save_wave(filename, frequencies, use_pathlib):
amine@400 569 filename = "tests/data/test_16KHZ_{}".format(filename)
amine@406 570 if use_pathlib:
amine@406 571 filename = str(filename)
amine@400 572 sampling_rate = 16000
amine@400 573 sample_width = 2
amine@400 574 channels = len(frequencies)
amine@400 575 mono_channels = [PURE_TONE_DICT[freq] for freq in frequencies]
amine@405 576 dtype = SAMPLE_WIDTH_TO_DTYPE[sample_width]
amine@405 577 data = np.fromiter(_sample_generator(*mono_channels), dtype=dtype).tobytes()
amine@400 578 tmpfile = NamedTemporaryFile()
amine@400 579 _save_wave(data, tmpfile.name, sampling_rate, sample_width, channels)
amine@400 580 assert filecmp.cmp(tmpfile.name, filename, shallow=False)
amine@400 581
amine@400 582
amine@400 583 @pytest.mark.parametrize(
amine@400 584 "missing_param",
amine@400 585 [
amine@400 586 "sr", # missing_sampling_rate
amine@400 587 "sw", # missing_sample_width
amine@400 588 "ch", # missing_channels
amine@400 589 ],
amine@400 590 ids=["missing_sampling_rate", "missing_sample_width", "missing_channels"],
amine@400 591 )
amine@400 592 def test_save_wave_missing_audio_param(missing_param):
amine@400 593 with pytest.raises(AudioParameterError):
amine@405 594 _save_wave(
amine@405 595 b"\0\0", "audio", sampling_rate=None, sample_width=1, channels=1
amine@405 596 )
amine@405 597
amine@405 598 with pytest.raises(AudioParameterError):
amine@405 599 _save_wave(
amine@405 600 b"\0\0", "audio", sampling_rate=16000, sample_width=None, channels=1
amine@405 601 )
amine@405 602
amine@405 603 with pytest.raises(AudioParameterError):
amine@405 604 _save_wave(
amine@405 605 b"\0\0", "audio", sampling_rate=16000, sample_width=1, channels=None
amine@405 606 )
amine@400 607
amine@400 608
amine@400 609 def test_save_with_pydub():
amine@400 610 with patch("auditok.io.AudioSegment.export") as export:
amine@400 611 tmpdir = TemporaryDirectory()
amine@400 612 filename = os.path.join(tmpdir.name, "audio.ogg")
amine@400 613 _save_with_pydub(b"\0\0", filename, "ogg", 16000, 2, 1)
amine@400 614 assert export.called
amine@400 615 tmpdir.cleanup()
amine@400 616
amine@400 617
amine@400 618 @pytest.mark.parametrize(
amine@400 619 "filename, audio_format",
amine@400 620 [
amine@400 621 ("audio", "raw"), # raw_with_audio_format
amine@400 622 ("audio.raw", None), # raw_with_extension
amine@400 623 ("audio.mp3", "raw"), # raw_with_audio_format_and_extension
amine@400 624 ("audio", None), # raw_no_audio_format_nor_extension
amine@400 625 ],
amine@400 626 ids=[
amine@400 627 "raw_with_audio_format",
amine@400 628 "raw_with_extension",
amine@400 629 "raw_with_audio_format_and_extension",
amine@400 630 "raw_no_audio_format_nor_extension",
amine@400 631 ],
amine@400 632 )
amine@400 633 def test_to_file_raw(filename, audio_format):
amine@400 634 exp_filename = "tests/data/test_16KHZ_mono_400Hz.raw"
amine@400 635 tmpdir = TemporaryDirectory()
amine@400 636 filename = os.path.join(tmpdir.name, filename)
amine@400 637 data = PURE_TONE_DICT[400].tobytes()
amine@400 638 to_file(data, filename, audio_format=audio_format)
amine@400 639 assert filecmp.cmp(filename, exp_filename, shallow=False)
amine@400 640 tmpdir.cleanup()
amine@400 641
amine@400 642
amine@400 643 @pytest.mark.parametrize(
amine@400 644 "filename, audio_format",
amine@400 645 [
amine@400 646 ("audio", "wav"), # wav_with_audio_format
amine@400 647 ("audio.wav", None), # wav_with_extension
amine@400 648 ("audio.mp3", "wav"), # wav_with_audio_format_and_extension
amine@400 649 ("audio", "wave"), # wave_with_audio_format
amine@400 650 ("audio.wave", None), # wave_with_extension
amine@400 651 ("audio.mp3", "wave"), # wave_with_audio_format_and_extension
amine@400 652 ],
amine@400 653 ids=[
amine@400 654 "wav_with_audio_format",
amine@400 655 "wav_with_extension",
amine@400 656 "wav_with_audio_format_and_extension",
amine@400 657 "wave_with_audio_format",
amine@400 658 "wave_with_extension",
amine@400 659 "wave_with_audio_format_and_extension",
amine@400 660 ],
amine@400 661 )
amine@400 662 def test_to_file_wave(filename, audio_format):
amine@400 663 exp_filename = "tests/data/test_16KHZ_mono_400Hz.wav"
amine@400 664 tmpdir = TemporaryDirectory()
amine@400 665 filename = os.path.join(tmpdir.name, filename)
amine@400 666 data = PURE_TONE_DICT[400].tobytes()
amine@400 667 to_file(
amine@400 668 data,
amine@400 669 filename,
amine@400 670 audio_format=audio_format,
amine@400 671 sampling_rate=16000,
amine@400 672 sample_width=2,
amine@400 673 channels=1,
amine@129 674 )
amine@400 675 assert filecmp.cmp(filename, exp_filename, shallow=False)
amine@400 676 tmpdir.cleanup()
amine@129 677
amine@240 678
amine@400 679 @pytest.mark.parametrize(
amine@400 680 "missing_param",
amine@400 681 [
amine@400 682 "sr", # missing_sampling_rate
amine@400 683 "sw", # missing_sample_width
amine@400 684 "ch", # missing_channels
amine@400 685 ],
amine@400 686 ids=["missing_sampling_rate", "missing_sample_width", "missing_channels"],
amine@400 687 )
amine@400 688 def test_to_file_missing_audio_param(missing_param):
amine@400 689 params = AUDIO_PARAMS_SHORT.copy()
amine@400 690 del params[missing_param]
amine@400 691 with pytest.raises(AudioParameterError):
amine@400 692 to_file(b"\0\0", "audio", audio_format="wav", **params)
amine@400 693 with pytest.raises(AudioParameterError):
amine@400 694 to_file(b"\0\0", "audio", audio_format="mp3", **params)
amine@132 695
amine@132 696
amine@400 697 def test_to_file_no_pydub():
amine@400 698 with patch("auditok.io._WITH_PYDUB", False):
amine@400 699 with pytest.raises(AudioIOError):
amine@400 700 to_file("audio", b"", "mp3")
amine@133 701
amine@141 702
amine@400 703 @pytest.mark.parametrize(
amine@400 704 "filename, audio_format",
amine@400 705 [
amine@400 706 ("audio.ogg", None), # ogg_with_extension
amine@400 707 ("audio", "ogg"), # ogg_with_audio_format
amine@400 708 ("audio.wav", "ogg"), # ogg_format_with_wrong_extension
amine@400 709 ],
amine@400 710 ids=[
amine@400 711 "ogg_with_extension",
amine@400 712 "ogg_with_audio_format",
amine@400 713 "ogg_format_with_wrong_extension",
amine@400 714 ],
amine@400 715 )
amine@400 716 @patch("auditok.io._WITH_PYDUB", True)
amine@400 717 def test_to_file_compressed(filename, audio_format):
amine@400 718 with patch("auditok.io.AudioSegment.export") as export:
amine@133 719 tmpdir = TemporaryDirectory()
amine@133 720 filename = os.path.join(tmpdir.name, filename)
amine@400 721 to_file(b"\0\0", filename, audio_format, **AUDIO_PARAMS_SHORT)
amine@400 722 assert export.called
amine@133 723 tmpdir.cleanup()
amine@134 724
amine@138 725
amine@400 726 @pytest.mark.parametrize(
amine@400 727 "input, expected_type, extra_args",
amine@400 728 [
amine@400 729 (
amine@190 730 "tests/data/test_16KHZ_mono_400Hz.wav",
amine@190 731 BufferAudioSource,
amine@400 732 None,
amine@400 733 ), # string_wave
amine@400 734 (
amine@190 735 "tests/data/test_16KHZ_mono_400Hz.wav",
amine@190 736 WaveAudioSource,
amine@190 737 {"large_file": True},
amine@400 738 ), # string_wave_large_file
amine@400 739 ("-", StdinAudioSource, None), # stdin
amine@400 740 (
amine@400 741 "tests/data/test_16KHZ_mono_400Hz.raw",
amine@400 742 BufferAudioSource,
amine@400 743 None,
amine@400 744 ), # string_raw
amine@400 745 (
amine@190 746 "tests/data/test_16KHZ_mono_400Hz.raw",
amine@190 747 RawAudioSource,
amine@190 748 {"large_file": True},
amine@400 749 ), # string_raw_large_file
amine@400 750 (b"0" * 8000, BufferAudioSource, None), # bytes_
amine@400 751 ],
amine@400 752 ids=[
amine@400 753 "string_wave",
amine@400 754 "string_wave_large_file",
amine@400 755 "stdin",
amine@400 756 "string_raw",
amine@400 757 "string_raw_large_file",
amine@400 758 "bytes_",
amine@400 759 ],
amine@400 760 )
amine@400 761 def test_get_audio_source(input, expected_type, extra_args):
amine@400 762 kwargs = {"sampling_rate": 16000, "sample_width": 2, "channels": 1}
amine@400 763 if extra_args is not None:
amine@400 764 kwargs.update(extra_args)
amine@400 765 audio_source = get_audio_source(input, **kwargs)
amine@400 766 assert isinstance(audio_source, expected_type)
amine@403 767 assert audio_source.sampling_rate == 16000, (
amine@403 768 "Unexpected sampling rate: audio_source.sampling_rate = "
amine@403 769 + f"{audio_source.sampling_rate} instead of 16000"
amine@403 770 )
amine@403 771 assert audio_source.sr == 16000, (
amine@403 772 "Unexpected sampling rate: audio_source.sr = "
amine@403 773 + f"{audio_source.sr} instead of 16000"
amine@403 774 )
amine@403 775 assert audio_source.sample_width == 2, (
amine@403 776 "Unexpected sample width: audio_source.sample_width = "
amine@403 777 + f"{audio_source.sample_width} instead of 2"
amine@403 778 )
amine@403 779 assert audio_source.sw == 2, (
amine@403 780 "Unexpected sample width: audio_source.sw = "
amine@403 781 + f"{audio_source.sw} instead of 2"
amine@403 782 )
amine@403 783 assert audio_source.channels == 1, (
amine@403 784 "Unexpected number of channels: audio_source.channels = "
amine@403 785 + f"{audio_source.channels} instead of 1"
amine@403 786 )
amine@403 787 assert audio_source.ch == 1, (
amine@403 788 "Unexpected number of channels: audio_source.ch = "
amine@403 789 + f"{audio_source.ch} instead of 1"
amine@403 790 )
amine@403 791
amine@403 792
amine@403 793 def test_get_audio_source_alias_prams():
amine@403 794 audio_source = get_audio_source(b"0" * 1600, sr=16000, sw=2, ch=1)
amine@403 795 assert audio_source.sampling_rate == 16000, (
amine@403 796 "Unexpected sampling rate: audio_source.sampling_rate = "
amine@403 797 + f"{audio_source.sampling_rate} instead of 16000"
amine@403 798 )
amine@403 799 assert audio_source.sr == 16000, (
amine@403 800 "Unexpected sampling rate: audio_source.sr = "
amine@403 801 + f"{audio_source.sr} instead of 16000"
amine@403 802 )
amine@403 803 assert audio_source.sample_width == 2, (
amine@403 804 "Unexpected sample width: audio_source.sample_width = "
amine@403 805 + f"{audio_source.sample_width} instead of 2"
amine@403 806 )
amine@403 807 assert audio_source.sw == 2, (
amine@403 808 "Unexpected sample width: audio_source.sw = "
amine@403 809 + f"{audio_source.sw} instead of 2"
amine@403 810 )
amine@403 811 assert audio_source.channels == 1, (
amine@403 812 "Unexpected number of channels: audio_source.channels = "
amine@403 813 + f"{audio_source.channels} instead of 1"
amine@403 814 )
amine@403 815 assert audio_source.ch == 1, (
amine@403 816 "Unexpected number of channels: audio_source.ch = "
amine@403 817 + f"{audio_source.ch} instead of 1"
amine@403 818 )