Mercurial > hg > auditok
diff tests/test_core.py @ 405:f56b4d8adfb8
Use numpy instead of audioop everywhere
author | Amine Sehili <amine.sehili@gmail.com> |
---|---|
date | Mon, 17 Jun 2024 19:45:51 +0200 |
parents | 996948ada980 |
children | 0e938065a2db |
line wrap: on
line diff
--- a/tests/test_core.py Sun May 26 23:29:33 2024 +0200 +++ b/tests/test_core.py Mon Jun 17 19:45:51 2024 +0200 @@ -1,10 +1,10 @@ import math import os -from array import array as array_ from random import random from tempfile import TemporaryDirectory from unittest.mock import Mock, patch +import numpy as np import pytest from auditok import AudioParameterError, AudioRegion, load, split @@ -15,6 +15,7 @@ _read_offline, ) from auditok.io import get_audio_source +from auditok.signal import to_array from auditok.util import AudioReader @@ -33,13 +34,13 @@ @pytest.mark.parametrize( "skip, max_read, channels", [ - (0, -1, 1), - (0, -1, 2), - (2, -1, 1), - (2, None, 1), - (2, 3, 1), - (2, 3.5, 2), - (2.4, 3.5, 2), + (0, -1, 1), # no_skip_read_all + (0, -1, 2), # no_skip_read_all_stereo + (2, -1, 1), # skip_2_read_all + (2, None, 1), # skip_2_read_all_None + (2, 3, 1), # skip_2_read_3 + (2, 3.5, 2), # skip_2_read_3_5_stereo + (2.4, 3.5, 2), # skip_2_4_read_3_5_stereo ], ids=[ "no_skip_read_all", @@ -77,15 +78,15 @@ @pytest.mark.parametrize( "duration, analysis_window, round_fn, expected, kwargs", [ - (0, 1, None, 0, None), - (0.3, 0.1, round, 3, None), - (0.35, 0.1, math.ceil, 4, None), - (0.35, 0.1, math.floor, 3, None), - (0.05, 0.1, round, 0, None), - (0.05, 0.1, math.ceil, 1, None), - (0.3, 0.1, math.floor, 3, {"epsilon": 1e-6}), - (-0.5, 0.1, math.ceil, ValueError, None), - (0.5, -0.1, math.ceil, ValueError, None), + (0, 1, None, 0, None), # zero_duration + (0.3, 0.1, round, 3, None), # multiple + (0.35, 0.1, math.ceil, 4, None), # not_multiple_ceil + (0.35, 0.1, math.floor, 3, None), # not_multiple_floor + (0.05, 0.1, round, 0, None), # small_duration + (0.05, 0.1, math.ceil, 1, None), # small_duration_ceil + (0.3, 0.1, math.floor, 3, {"epsilon": 1e-6}), # with_round_error + (-0.5, 0.1, math.ceil, ValueError, None), # negative_duration + (0.5, -0.1, math.ceil, ValueError, None), # negative_analysis_window ], ids=[ "zero_duration", @@ -117,14 +118,14 @@ @pytest.mark.parametrize( "channels, skip, max_read", [ - (1, 0, None), - (1, 3, None), - (1, 2, -1), - (1, 2, 3), - (2, 0, None), - (2, 3, None), - (2, 2, -1), - (2, 2, 3), + (1, 0, None), # mono_skip_0_max_read_None + (1, 3, None), # mono_skip_3_max_read_None + (1, 2, -1), # mono_skip_2_max_read_negative + (1, 2, 3), # mono_skip_2_max_read_3 + (2, 0, None), # stereo_skip_0_max_read_None + (2, 3, None), # stereo_skip_3_max_read_None + (2, 2, -1), # stereo_skip_2_max_read_negative + (2, 2, 3), # stereo_skip_2_max_read_3 ], ids=[ "mono_skip_0_max_read_None", @@ -165,9 +166,20 @@ @pytest.mark.parametrize( - "min_dur, max_dur, max_silence, drop_trailing_silence, strict_min_dur, kwargs, expected", + ( + "min_dur, max_dur, max_silence, drop_trailing_silence, " + + "strict_min_dur, kwargs, expected" + ), [ - (0.2, 5, 0.2, False, False, {"eth": 50}, [(2, 16), (17, 31), (34, 76)]), + ( + 0.2, + 5, + 0.2, + False, + False, + {"eth": 50}, + [(2, 16), (17, 31), (34, 76)], + ), # simple ( 0.3, 2, @@ -176,9 +188,9 @@ False, {"eth": 50}, [(2, 16), (17, 31), (34, 54), (54, 74), (74, 76)], - ), - (3, 5, 0.2, False, False, {"eth": 50}, [(34, 76)]), - (0.2, 80, 10, False, False, {"eth": 50}, [(2, 76)]), + ), # short_max_dur + (3, 5, 0.2, False, False, {"eth": 50}, [(34, 76)]), # long_min_dur + (0.2, 80, 10, False, False, {"eth": 50}, [(2, 76)]), # long_max_silence ( 0.2, 5, @@ -187,7 +199,7 @@ False, {"eth": 50}, [(2, 14), (17, 24), (26, 29), (34, 76)], - ), + ), # zero_max_silence ( 0.2, 5, @@ -196,11 +208,43 @@ False, {"energy_threshold": 40}, [(0, 50), (50, 76)], - ), - (0.2, 5, 0.2, False, False, {"energy_threshold": 60}, []), - (0.2, 10, 0.5, True, False, {"eth": 50}, [(2, 76)]), - (0.2, 5, 0.2, True, False, {"eth": 50}, [(2, 14), (17, 29), (34, 76)]), - (1.5, 5, 0.2, True, False, {"eth": 50}, [(34, 76)]), + ), # low_energy_threshold + ( + 0.2, + 5, + 0.2, + False, + False, + {"energy_threshold": 60}, + [], + ), # high_energy_threshold + ( + 0.2, + 10, + 0.5, + True, + False, + {"eth": 50}, + [(2, 76)], + ), # trim_leading_and_trailing_silence + ( + 0.2, + 5, + 0.2, + True, + False, + {"eth": 50}, + [(2, 14), (17, 29), (34, 76)], + ), # drop_trailing_silence + ( + 1.5, + 5, + 0.2, + True, + False, + {"eth": 50}, + [(34, 76)], + ), # drop_trailing_silence_2 ( 0.3, 2, @@ -209,7 +253,7 @@ True, {"eth": 50}, [(2, 16), (17, 31), (34, 54), (54, 74)], - ), + ), # strict_min_dur ], ids=[ "simple", @@ -272,7 +316,7 @@ assert len(regions_ar) == len(expected), err_msg sample_width = 2 - for reg, reg_ar, exp in zip(regions, regions_ar, expected): + for reg, reg_ar, exp in zip(regions, regions_ar, expected, strict=True): onset, offset = exp exp_data = data[onset * sample_width : offset * sample_width] assert bytes(reg) == exp_data @@ -282,21 +326,57 @@ @pytest.mark.parametrize( "channels, kwargs, expected", [ - (2, {}, [(2, 32), (34, 76)]), - (1, {"max_read": 5}, [(2, 16), (17, 31), (34, 50)]), - (1, {"mr": 5}, [(2, 16), (17, 31), (34, 50)]), - (1, {"eth": 50, "use_channel": 0}, [(2, 16), (17, 31), (34, 76)]), - (1, {"eth": 50, "uc": 1}, [(2, 16), (17, 31), (34, 76)]), - (1, {"eth": 50, "use_channel": None}, [(2, 16), (17, 31), (34, 76)]), - (2, {"eth": 50, "use_channel": 0}, [(2, 16), (17, 31), (34, 76)]), - (2, {"eth": 50}, [(2, 32), (34, 76)]), - (2, {"eth": 50, "use_channel": -2}, [(2, 16), (17, 31), (34, 76)]), - (2, {"eth": 50, "uc": 1}, [(10, 32), (36, 76)]), - (2, {"eth": 50, "uc": -1}, [(10, 32), (36, 76)]), - (1, {"eth": 50, "uc": "mix"}, [(2, 16), (17, 31), (34, 76)]), - (2, {"energy_threshold": 53.5, "use_channel": "mix"}, [(54, 76)]), - (2, {"eth": 52, "uc": "mix"}, [(17, 26), (54, 76)]), - (2, {"uc": "mix"}, [(10, 16), (17, 31), (36, 76)]), + (2, {}, [(2, 32), (34, 76)]), # stereo_all_default + (1, {"max_read": 5}, [(2, 16), (17, 31), (34, 50)]), # mono_max_read + ( + 1, + {"mr": 5}, + [(2, 16), (17, 31), (34, 50)], + ), # mono_max_read_short_name + ( + 1, + {"eth": 50, "use_channel": 0}, + [(2, 16), (17, 31), (34, 76)], + ), # mono_use_channel_1 + (1, {"eth": 50, "uc": 1}, [(2, 16), (17, 31), (34, 76)]), # mono_uc_1 + ( + 1, + {"eth": 50, "use_channel": None}, + [(2, 16), (17, 31), (34, 76)], + ), # mono_use_channel_None + ( + 2, + {"eth": 50, "use_channel": 0}, + [(2, 16), (17, 31), (34, 76)], + ), # stereo_use_channel_1 + ( + 2, + {"eth": 50}, + [(2, 32), (34, 76)], + ), # stereo_use_channel_no_use_channel_given + ( + 2, + {"eth": 50, "use_channel": -2}, + [(2, 16), (17, 31), (34, 76)], + ), # stereo_use_channel_minus_2 + (2, {"eth": 50, "uc": 1}, [(10, 32), (36, 76)]), # stereo_uc_2 + (2, {"eth": 50, "uc": -1}, [(10, 32), (36, 76)]), # stereo_uc_minus_1 + ( + 1, + {"eth": 50, "uc": "mix"}, + [(2, 16), (17, 31), (34, 76)], + ), # mono_uc_mix + ( + 2, + {"energy_threshold": 53.5, "use_channel": "mix"}, + [(54, 76)], + ), # stereo_use_channel_mix + (2, {"eth": 52, "uc": "mix"}, [(17, 26), (54, 76)]), # stereo_uc_mix + ( + 2, + {"uc": "mix"}, + [(10, 16), (17, 31), (36, 76)], + ), # stereo_uc_mix_default_eth ], ids=[ "stereo_all_default", @@ -365,7 +445,7 @@ sample_width = 2 sample_size_bytes = sample_width * channels - for reg, reg_ar, exp in zip(regions, regions_ar, expected): + for reg, reg_ar, exp in zip(regions, regions_ar, expected, strict=True): onset, offset = exp exp_data = data[onset * sample_size_bytes : offset * sample_size_bytes] assert len(bytes(reg)) == len(exp_data) @@ -375,19 +455,103 @@ @pytest.mark.parametrize( "min_dur, max_dur, max_silence, channels, kwargs, expected", [ - (0.2, 5, 0.2, 1, {"aw": 0.2}, [(2, 30), (34, 76)]), - (0.2, 5, 0.3, 1, {"aw": 0.2}, [(2, 30), (34, 76)]), - (0.2, 5, 0.4, 1, {"aw": 0.2}, [(2, 32), (34, 76)]), - (0.2, 5, 0, 1, {"aw": 0.2}, [(2, 14), (16, 24), (26, 28), (34, 76)]), - (0.2, 5, 0.2, 1, {"aw": 0.2}, [(2, 30), (34, 76)]), - (0.3, 5, 0, 1, {"aw": 0.3}, [(3, 12), (15, 24), (36, 76)]), - (0.3, 5, 0.3, 1, {"aw": 0.3}, [(3, 27), (36, 76)]), - (0.3, 5, 0.5, 1, {"aw": 0.3}, [(3, 27), (36, 76)]), - (0.3, 5, 0.6, 1, {"aw": 0.3}, [(3, 30), (36, 76)]), - (0.2, 5, 0, 1, {"aw": 0.4}, [(4, 12), (16, 24), (36, 76)]), - (0.2, 5, 0.3, 1, {"aw": 0.4}, [(4, 12), (16, 24), (36, 76)]), - (0.2, 5, 0.4, 1, {"aw": 0.4}, [(4, 28), (36, 76)]), - (0.2, 5, 0.2, 2, {"analysis_window": 0.2}, [(2, 32), (34, 76)]), + ( + 0.2, + 5, + 0.2, + 1, + {"aw": 0.2}, + [(2, 30), (34, 76)], + ), # mono_aw_0_2_max_silence_0_2 + ( + 0.2, + 5, + 0.3, + 1, + {"aw": 0.2}, + [(2, 30), (34, 76)], + ), # mono_aw_0_2_max_silence_0_3 + ( + 0.2, + 5, + 0.4, + 1, + {"aw": 0.2}, + [(2, 32), (34, 76)], + ), # mono_aw_0_2_max_silence_0_4 + ( + 0.2, + 5, + 0, + 1, + {"aw": 0.2}, + [(2, 14), (16, 24), (26, 28), (34, 76)], + ), # mono_aw_0_2_max_silence_0 + (0.2, 5, 0.2, 1, {"aw": 0.2}, [(2, 30), (34, 76)]), # mono_aw_0_2 + ( + 0.3, + 5, + 0, + 1, + {"aw": 0.3}, + [(3, 12), (15, 24), (36, 76)], + ), # mono_aw_0_3_max_silence_0 + ( + 0.3, + 5, + 0.3, + 1, + {"aw": 0.3}, + [(3, 27), (36, 76)], + ), # mono_aw_0_3_max_silence_0_3 + ( + 0.3, + 5, + 0.5, + 1, + {"aw": 0.3}, + [(3, 27), (36, 76)], + ), # mono_aw_0_3_max_silence_0_5 + ( + 0.3, + 5, + 0.6, + 1, + {"aw": 0.3}, + [(3, 30), (36, 76)], + ), # mono_aw_0_3_max_silence_0_6 + ( + 0.2, + 5, + 0, + 1, + {"aw": 0.4}, + [(4, 12), (16, 24), (36, 76)], + ), # mono_aw_0_4_max_silence_0 + ( + 0.2, + 5, + 0.3, + 1, + {"aw": 0.4}, + [(4, 12), (16, 24), (36, 76)], + ), # mono_aw_0_4_max_silence_0_3 + ( + 0.2, + 5, + 0.4, + 1, + {"aw": 0.4}, + [(4, 28), (36, 76)], + ), # mono_aw_0_4_max_silence_0_4 + ( + 0.2, + 5, + 0.2, + 2, + {"analysis_window": 0.2}, + [(2, 32), (34, 76)], + ), # stereo_uc_None_analysis_window_0_2 ( 0.2, 5, @@ -395,7 +559,7 @@ 2, {"uc": None, "analysis_window": 0.2}, [(2, 32), (34, 76)], - ), + ), # stereo_uc_any_analysis_window_0_2 ( 0.2, 5, @@ -403,7 +567,7 @@ 2, {"use_channel": None, "analysis_window": 0.3}, [(3, 30), (36, 76)], - ), + ), # stereo_use_channel_None_aw_0_3_max_silence_0_2 ( 0.2, 5, @@ -411,7 +575,7 @@ 2, {"use_channel": "any", "analysis_window": 0.3}, [(3, 33), (36, 76)], - ), + ), # stereo_use_channel_any_aw_0_3_max_silence_0_3 ( 0.2, 5, @@ -419,7 +583,7 @@ 2, {"use_channel": None, "analysis_window": 0.4}, [(4, 28), (36, 76)], - ), + ), # stereo_use_channel_None_aw_0_4_max_silence_0_2 ( 0.2, 5, @@ -427,7 +591,7 @@ 2, {"use_channel": "any", "analysis_window": 0.4}, [(4, 32), (36, 76)], - ), + ), # stereo_use_channel_any_aw_0_3_max_silence_0_4 ( 0.2, 5, @@ -435,7 +599,7 @@ 2, {"uc": 0, "analysis_window": 0.2}, [(2, 30), (34, 76)], - ), + ), # stereo_uc_0_analysis_window_0_2 ( 0.2, 5, @@ -443,7 +607,7 @@ 2, {"uc": 1, "analysis_window": 0.2}, [(10, 32), (36, 76)], - ), + ), # stereo_uc_1_analysis_window_0_2 ( 0.2, 5, @@ -451,7 +615,7 @@ 2, {"uc": "mix", "analysis_window": 0.1}, [(10, 14), (17, 24), (26, 29), (36, 76)], - ), + ), # stereo_uc_mix_aw_0_1_max_silence_0 ( 0.2, 5, @@ -459,7 +623,7 @@ 2, {"uc": "mix", "analysis_window": 0.1}, [(10, 15), (17, 25), (26, 30), (36, 76)], - ), + ), # stereo_uc_mix_aw_0_1_max_silence_0_1 ( 0.2, 5, @@ -467,7 +631,7 @@ 2, {"uc": "mix", "analysis_window": 0.1}, [(10, 16), (17, 31), (36, 76)], - ), + ), # stereo_uc_mix_aw_0_1_max_silence_0_2 ( 0.2, 5, @@ -475,7 +639,7 @@ 2, {"uc": "mix", "analysis_window": 0.1}, [(10, 32), (36, 76)], - ), + ), # stereo_uc_mix_aw_0_1_max_silence_0_3 ( 0.3, 5, @@ -483,7 +647,7 @@ 2, {"uc": "avg", "analysis_window": 0.2}, [(10, 14), (16, 24), (36, 76)], - ), + ), # stereo_uc_avg_aw_0_2_max_silence_0_min_dur_0_3 ( 0.41, 5, @@ -491,7 +655,7 @@ 2, {"uc": "average", "analysis_window": 0.2}, [(16, 24), (36, 76)], - ), + ), # stereo_uc_average_aw_0_2_max_silence_0_min_dur_0_41 ( 0.2, 5, @@ -499,7 +663,7 @@ 2, {"uc": "mix", "analysis_window": 0.2}, [(10, 14), (16, 24), (26, 28), (36, 76)], - ), + ), # stereo_uc_mix_aw_0_2_max_silence_0_1 ( 0.2, 5, @@ -507,7 +671,7 @@ 2, {"uc": "mix", "analysis_window": 0.2}, [(10, 30), (36, 76)], - ), + ), # stereo_uc_mix_aw_0_2_max_silence_0_2 ( 0.2, 5, @@ -515,7 +679,7 @@ 2, {"uc": "mix", "analysis_window": 0.2}, [(10, 32), (36, 76)], - ), + ), # stereo_uc_mix_aw_0_2_max_silence_0_4 ( 0.2, 5, @@ -523,7 +687,7 @@ 2, {"uc": "mix", "analysis_window": 0.2}, [(10, 32), (36, 76)], - ), + ), # stereo_uc_mix_aw_0_2_max_silence_0_5 ( 0.2, 5, @@ -531,7 +695,7 @@ 2, {"uc": "mix", "analysis_window": 0.2}, [(10, 34), (36, 76)], - ), + ), # stereo_uc_mix_aw_0_2_max_silence_0_6 ( 0.2, 5, @@ -539,7 +703,7 @@ 2, {"uc": "mix", "analysis_window": 0.3}, [(9, 24), (27, 30), (36, 76)], - ), + ), # stereo_uc_mix_aw_0_3_max_silence_0 ( 0.4, 5, @@ -547,7 +711,7 @@ 2, {"uc": "mix", "analysis_window": 0.3}, [(9, 24), (36, 76)], - ), + ), # stereo_uc_mix_aw_0_3_max_silence_0_min_dur_0_3 ( 0.2, 5, @@ -555,7 +719,7 @@ 2, {"uc": "mix", "analysis_window": 0.3}, [(9, 57), (57, 76)], - ), + ), # stereo_uc_mix_aw_0_3_max_silence_0_6 ( 0.2, 5.1, @@ -563,7 +727,7 @@ 2, {"uc": "mix", "analysis_window": 0.3}, [(9, 60), (60, 76)], - ), + ), # stereo_uc_mix_aw_0_3_max_silence_0_6_max_dur_5_1 ( 0.2, 5.2, @@ -571,7 +735,7 @@ 2, {"uc": "mix", "analysis_window": 0.3}, [(9, 60), (60, 76)], - ), + ), # stereo_uc_mix_aw_0_3_max_silence_0_6_max_dur_5_2 ( 0.2, 5.3, @@ -579,7 +743,7 @@ 2, {"uc": "mix", "analysis_window": 0.3}, [(9, 60), (60, 76)], - ), + ), # stereo_uc_mix_aw_0_3_max_silence_0_6_max_dur_5_3 ( 0.2, 5.4, @@ -587,7 +751,7 @@ 2, {"uc": "mix", "analysis_window": 0.3}, [(9, 63), (63, 76)], - ), + ), # stereo_uc_mix_aw_0_3_max_silence_0_6_max_dur_5_4 ( 0.2, 5, @@ -595,7 +759,7 @@ 2, {"uc": "mix", "analysis_window": 0.4}, [(16, 24), (36, 76)], - ), + ), # stereo_uc_mix_aw_0_4_max_silence_0 ( 0.2, 5, @@ -603,7 +767,7 @@ 2, {"uc": "mix", "analysis_window": 0.4}, [(16, 24), (36, 76)], - ), + ), # stereo_uc_mix_aw_0_4_max_silence_0_3 ( 0.2, 5, @@ -611,7 +775,7 @@ 2, {"uc": "mix", "analysis_window": 0.4}, [(16, 28), (36, 76)], - ), + ), # stereo_uc_mix_aw_0_4_max_silence_0_4 ], ids=[ "mono_aw_0_2_max_silence_0_2", @@ -702,7 +866,7 @@ sample_width = 2 sample_size_bytes = sample_width * channels - for reg, reg_ar, exp in zip(regions, regions_ar, expected): + for reg, reg_ar, exp in zip(regions, regions_ar, expected, strict=True): onset, offset = exp exp_data = data[onset * sample_size_bytes : offset * sample_size_bytes] assert bytes(reg) == exp_data @@ -725,7 +889,7 @@ sw=2, ch=1, analysis_window=0.1, - validator=lambda x: array_("h", x)[0] >= 320, + validator=lambda x: to_array(x, sample_width=2, channels=1)[0] >= 320, ) region = AudioRegion(data, 10, 2, 1) @@ -736,7 +900,7 @@ drop_trailing_silence=False, strict_min_dur=False, analysis_window=0.1, - validator=lambda x: array_("h", x)[0] >= 320, + validator=lambda x: to_array(x, sample_width=2, channels=1)[0] >= 320, ) expected = [(2, 16), (17, 31), (34, 76)] @@ -750,7 +914,7 @@ assert len(regions_ar) == len(expected), err_msg sample_size_bytes = 2 - for reg, reg_ar, exp in zip(regions, regions_ar, expected): + for reg, reg_ar, exp in zip(regions, regions_ar, expected, strict=True): onset, offset = exp exp_data = data[onset * sample_size_bytes : offset * sample_size_bytes] assert bytes(reg) == exp_data @@ -763,20 +927,23 @@ ( "tests/data/test_split_10HZ_stereo.raw", {"audio_format": "raw", "sr": 10, "sw": 2, "ch": 2}, - ), + ), # filename_audio_format ( "tests/data/test_split_10HZ_stereo.raw", {"fmt": "raw", "sr": 10, "sw": 2, "ch": 2}, - ), - ("tests/data/test_split_10HZ_stereo.raw", {"sr": 10, "sw": 2, "ch": 2}), + ), # filename_audio_format_short_name + ( + "tests/data/test_split_10HZ_stereo.raw", + {"sr": 10, "sw": 2, "ch": 2}, + ), # filename_no_audio_format ( "tests/data/test_split_10HZ_stereo.raw", {"sampling_rate": 10, "sample_width": 2, "channels": 2}, - ), + ), # filename_no_long_audio_params ( open("tests/data/test_split_10HZ_stereo.raw", "rb").read(), {"sr": 10, "sw": 2, "ch": 2}, - ), + ), # bytes_ ( AudioReader( "tests/data/test_split_10HZ_stereo.raw", @@ -786,7 +953,7 @@ block_dur=0.1, ), {}, - ), + ), # audio_reader ( AudioRegion( open("tests/data/test_split_10HZ_stereo.raw", "rb").read(), @@ -795,13 +962,13 @@ 2, ), {}, - ), + ), # audio_region ( get_audio_source( "tests/data/test_split_10HZ_stereo.raw", sr=10, sw=2, ch=2 ), {}, - ), + ), # audio_source ], ids=[ "filename_audio_format", @@ -835,7 +1002,7 @@ err_msg = "Wrong number of regions after split, expected: " err_msg += "{}, found: {}".format(expected, regions) assert len(regions) == len(expected), err_msg - for reg, exp in zip(regions, expected): + for reg, exp in zip(regions, expected, strict=True): onset, offset = exp exp_data = data[onset * sample_width * 2 : offset * sample_width * 2] assert bytes(reg) == exp_data @@ -884,9 +1051,9 @@ @pytest.mark.parametrize( "max_silence, max_dur, analysis_window", [ - (0.5, 0.5, 0.1), - (0.5, 0.4, 0.1), - (0.44, 0.49, 0.1), + (0.5, 0.5, 0.1), # max_silence_equals_max_dur + (0.5, 0.4, 0.1), # max_silence_greater_than_max_dur + (0.44, 0.49, 0.1), # durations_OK_but_wrong_number_of_analysis_windows ], ids=[ "max_silence_equals_max_dur", @@ -926,13 +1093,13 @@ @pytest.mark.parametrize( "wrong_param", [ - {"min_dur": -1}, - {"min_dur": 0}, - {"max_dur": -1}, - {"max_dur": 0}, - {"max_silence": -1}, - {"analysis_window": 0}, - {"analysis_window": -1}, + {"min_dur": -1}, # negative_min_dur + {"min_dur": 0}, # zero_min_dur + {"max_dur": -1}, # negative_max_dur + {"max_dur": 0}, # zero_max_dur + {"max_silence": -1}, # negative_max_silence + {"analysis_window": 0}, # zero_analysis_window + {"analysis_window": -1}, # negative_analysis_window ], ids=[ "negative_min_dur", @@ -979,7 +1146,7 @@ data = fp.read() region = AudioRegion(data, 10, 2, 1) - with patch("auditok.plotting.plot") as patch_fn: + with patch("auditok.core.plot") as patch_fn: regions = region.split_and_plot( min_dur=0.2, max_dur=5, @@ -1014,23 +1181,125 @@ @pytest.mark.parametrize( - "data, start, sampling_rate, sample_width, channels, expected_end, expected_duration_s, expected_duration_ms", + ( + "data, start, sampling_rate, sample_width, channels, expected_end, " + + "expected_duration_s, expected_duration_ms" + ), [ - (b"\0" * 8000, 0, 8000, 1, 1, 1, 1, 1000), - (b"\0" * 7992, 0, 8000, 1, 1, 0.999, 0.999, 999), - (b"\0" * 7994, 0, 8000, 1, 1, 0.99925, 0.99925, 999), - (b"\0" * 7996, 0, 8000, 1, 1, 0.9995, 0.9995, 1000), - (b"\0" * 7998, 0, 8000, 1, 1, 0.99975, 0.99975, 1000), - (b"\0" * 8000 * 2, 0, 8000, 2, 1, 1, 1, 1000), - (b"\0" * 8000 * 2, 0, 8000, 1, 2, 1, 1, 1000), - (b"\0" * 8000 * 5, 0, 8000, 1, 5, 1, 1, 1000), - (b"\0" * 8000 * 2 * 5, 0, 8000, 2, 5, 1, 1, 1000), - (b"\0" * 7992 * 2 * 5, 0, 8000, 2, 5, 0.999, 0.999, 999), - (b"\0" * 7994 * 2 * 5, 0, 8000, 2, 5, 0.99925, 0.99925, 999), - (b"\0" * 7996 * 2 * 5, 0, 8000, 2, 5, 0.9995, 0.9995, 1000), - (b"\0" * 7998 * 2 * 5, 0, 8000, 2, 5, 0.99975, 0.99975, 1000), - (b"\0" * int(8000 * 1.33), 2.7, 8000, 1, 1, 4.03, 1.33, 1330), - (b"\0" * int(8000 * 0.476), 11.568, 8000, 1, 1, 12.044, 0.476, 476), + (b"\0" * 8000, 0, 8000, 1, 1, 1, 1, 1000), # simple + ( + b"\0" * 7992, + 0, + 8000, + 1, + 1, + 0.999, + 0.999, + 999, + ), # one_ms_less_than_1_sec + ( + b"\0" * 7994, + 0, + 8000, + 1, + 1, + 0.99925, + 0.99925, + 999, + ), # tree_quarter_ms_less_than_1_sec + ( + b"\0" * 7996, + 0, + 8000, + 1, + 1, + 0.9995, + 0.9995, + 1000, + ), # half_ms_less_than_1_sec + ( + b"\0" * 7998, + 0, + 8000, + 1, + 1, + 0.99975, + 0.99975, + 1000, + ), # quarter_ms_less_than_1_sec + (b"\0" * 8000 * 2, 0, 8000, 2, 1, 1, 1, 1000), # simple_sample_width_2 + (b"\0" * 8000 * 2, 0, 8000, 1, 2, 1, 1, 1000), # simple_stereo + (b"\0" * 8000 * 5, 0, 8000, 1, 5, 1, 1, 1000), # simple_multichannel + ( + b"\0" * 8000 * 2 * 5, + 0, + 8000, + 2, + 5, + 1, + 1, + 1000, + ), # simple_sample_width_2_multichannel + ( + b"\0" * 7992 * 2 * 5, + 0, + 8000, + 2, + 5, + 0.999, + 0.999, + 999, + ), # one_ms_less_than_1s_sw_2_multichannel + ( + b"\0" * 7994 * 2 * 5, + 0, + 8000, + 2, + 5, + 0.99925, + 0.99925, + 999, + ), # tree_qrt_ms_lt_1_s_sw_2_multichannel + ( + b"\0" * 7996 * 2 * 5, + 0, + 8000, + 2, + 5, + 0.9995, + 0.9995, + 1000, + ), # half_ms_lt_1s_sw_2_multichannel + ( + b"\0" * 7998 * 2 * 5, + 0, + 8000, + 2, + 5, + 0.99975, + 0.99975, + 1000, + ), # quarter_ms_lt_1s_sw_2_multichannel + ( + b"\0" * int(8000 * 1.33), + 2.7, + 8000, + 1, + 1, + 4.03, + 1.33, + 1330, + ), # arbitrary_length_1 + ( + b"\0" * int(8000 * 0.476), + 11.568, + 8000, + 1, + 1, + 12.044, + 0.476, + 476, + ), # arbitrary_length_2 ( b"\0" * int(8000 * 1.711) * 2 * 3, 9.415, @@ -1040,7 +1309,7 @@ 11.126, 1.711, 1711, - ), + ), # arbitrary_length_sw_2_multichannel ( b"\0" * int(3172 * 1.318), 17.236, @@ -1050,7 +1319,7 @@ 17.236 + int(3172 * 1.318) / 3172, int(3172 * 1.318) / 3172, 1318, - ), + ), # arbitrary_sampling_rate ( b"\0" * int(11317 * 0.716) * 2 * 3, 18.811, @@ -1060,7 +1329,7 @@ 18.811 + int(11317 * 0.716) / 11317, int(11317 * 0.716) / 11317, 716, - ), + ), # arbitrary_sr_sw_2_multichannel ], ids=[ "simple", @@ -1079,7 +1348,7 @@ "arbitrary_length_1", "arbitrary_length_2", "arbitrary_length_sw_2_multichannel", - "arbitrary_samplig_rate", + "arbitrary_sampling_rate", "arbitrary_sr_sw_2_multichannel", ], ) @@ -1122,13 +1391,13 @@ @pytest.mark.parametrize( "skip, max_read, channels", [ - (0, -1, 1), - (0, -1, 2), - (2, -1, 1), - (2, None, 1), - (2, 3, 1), - (2, 3.5, 2), - (2.4, 3.5, 2), + (0, -1, 1), # no_skip_read_all + (0, -1, 2), # no_skip_read_all_stereo + (2, -1, 1), # skip_2_read_all + (2, None, 1), # skip_2_read_all_None + (2, 3, 1), # skip_2_read_3 + (2, 3.5, 2), # skip_2_read_3_5_stereo + (2.4, 3.5, 2), # skip_2_4_read_3_5_stereo ], ids=[ "no_skip_read_all", @@ -1180,11 +1449,11 @@ @pytest.mark.parametrize( "max_read", [ - None, - -1, + None, # None + -1, # negative ], ids=[ - "none", + "None", "negative", ], ) @@ -1207,22 +1476,34 @@ @pytest.mark.parametrize( "format, start, expected", [ - ("output.wav", 1.230, "output.wav"), - ("output_{meta.start:g}.wav", 1.230, "output_1.23.wav"), - ("output_{meta.start}.wav", 1.233712, "output_1.233712.wav"), - ("output_{meta.start:.2f}.wav", 1.2300001, "output_1.23.wav"), - ("output_{meta.start:.3f}.wav", 1.233712, "output_1.234.wav"), - ("output_{meta.start:.8f}.wav", 1.233712, "output_1.23371200.wav"), + ("output.wav", 1.230, "output.wav"), # simple + ("output_{meta.start:g}.wav", 1.230, "output_1.23.wav"), # start + ("output_{meta.start}.wav", 1.233712, "output_1.233712.wav"), # start_2 + ( + "output_{meta.start:.2f}.wav", + 1.2300001, + "output_1.23.wav", + ), # start_3 + ( + "output_{meta.start:.3f}.wav", + 1.233712, + "output_1.234.wav", + ), # start_4 + ( + "output_{meta.start:.8f}.wav", + 1.233712, + "output_1.23371200.wav", + ), # start_5 ( "output_{meta.start}_{meta.end}_{duration}.wav", 1.455, "output_1.455_2.455_1.0.wav", - ), + ), # start_end_duration ( "output_{meta.start}_{meta.end}_{duration}.wav", 1.455321, "output_1.455321_2.455321_1.0.wav", - ), + ), # start_end_duration_2 ], ids=[ "simple", @@ -1260,74 +1541,102 @@ ( AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1), slice(0, 500), - b"a" * 80, + b"a" * 80, # first_half ), ( AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1), slice(500, None), - b"b" * 80, + b"b" * 80, # second_half ), ( AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1), slice(-500, None), - b"b" * 80, + b"b" * 80, # second_half_negative ), ( AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1), slice(200, 750), - b"a" * 48 + b"b" * 40, + b"a" * 48 + b"b" * 40, # middle ), ( AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1), slice(-800, -250), - b"a" * 48 + b"b" * 40, + b"a" * 48 + b"b" * 40, # middle_negative ), ( AudioRegion(b"a" * 160 + b"b" * 160, 160, 2, 1), slice(200, 750), - b"a" * 96 + b"b" * 80, + b"a" * 96 + b"b" * 80, # middle_sw2 ), ( AudioRegion(b"a" * 160 + b"b" * 160, 160, 1, 2), slice(200, 750), - b"a" * 96 + b"b" * 80, + b"a" * 96 + b"b" * 80, # middle_ch2 ), ( AudioRegion(b"a" * 320 + b"b" * 320, 160, 2, 2), slice(200, 750), - b"a" * 192 + b"b" * 160, + b"a" * 192 + b"b" * 160, # middle_sw2_ch2 ), ( AudioRegion(b"a" * 4000 + b"b" * 4000, 8000, 1, 1), slice(1, None), - b"a" * (4000 - 8) + b"b" * 4000, + b"a" * (4000 - 8) + b"b" * 4000, # but_first_sample ), ( AudioRegion(b"a" * 4000 + b"b" * 4000, 8000, 1, 1), slice(-999, None), - b"a" * (4000 - 8) + b"b" * 4000, + b"a" * (4000 - 8) + b"b" * 4000, # but_first_sample_negative ), ( AudioRegion(b"a" * 4000 + b"b" * 4000, 8000, 1, 1), slice(0, 999), - b"a" * 4000 + b"b" * (4000 - 8), + b"a" * 4000 + b"b" * (4000 - 8), # but_last_sample ), ( AudioRegion(b"a" * 4000 + b"b" * 4000, 8000, 1, 1), slice(0, -1), - b"a" * 4000 + b"b" * (4000 - 8), + b"a" * 4000 + b"b" * (4000 - 8), # but_last_sample_negative ), - (AudioRegion(b"a" * 160, 160, 1, 1), slice(-5000, None), b"a" * 160), - (AudioRegion(b"a" * 160, 160, 1, 1), slice(None, -1500), b""), - (AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1), slice(0, 0), b""), - (AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1), slice(200, 100), b""), - (AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1), slice(2000, 3000), b""), - (AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1), slice(-100, -200), b""), - (AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1), slice(0, -2000), b""), + ( + AudioRegion(b"a" * 160, 160, 1, 1), + slice(-5000, None), + b"a" * 160, # big_negative_start + ), + ( + AudioRegion(b"a" * 160, 160, 1, 1), + slice(None, -1500), + b"", # big_negative_stop + ), + ( + AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1), + slice(0, 0), + b"", # empty + ), + ( + AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1), + slice(200, 100), + b"", # empty_start_stop_reversed + ), + ( + AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1), + slice(2000, 3000), + b"", # empty_big_positive_start + ), + ( + AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1), + slice(-100, -200), + b"", # empty_negative_reversed + ), + ( + AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1), + slice(0, -2000), + b"", # empty_big_negative_stop + ), ( AudioRegion(b"a" * 124 + b"b" * 376, 1234, 1, 1), slice(100, 200), - b"a" + b"b" * 123, + b"a" + b"b" * 123, # arbitrary_sampling_rate ), ], ids=[ @@ -1369,112 +1678,127 @@ AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1), slice(0, 80), 0, - b"a" * 80, + b"a" * 80, # first_half ), ( AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1), slice(80, None), 0.5, - b"b" * 80, + b"b" * 80, # second_half ), ( AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1), slice(-80, None), 0.5, - b"b" * 80, + b"b" * 80, # second_half_negative ), ( AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1), slice(160 // 5, 160 // 4 * 3), 0.2, - b"a" * 48 + b"b" * 40, + b"a" * 48 + b"b" * 40, # middle ), ( AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1), slice(-160 // 5 * 4, -160 // 4), 0.2, - b"a" * 48 + b"b" * 40, + b"a" * 48 + b"b" * 40, # middle_negative ), ( AudioRegion(b"a" * 160 + b"b" * 160, 160, 2, 1), slice(160 // 5, 160 // 4 * 3), 0.2, - b"a" * 96 + b"b" * 80, + b"a" * 96 + b"b" * 80, # middle_sw2 ), ( AudioRegion(b"a" * 160 + b"b" * 160, 160, 1, 2), slice(160 // 5, 160 // 4 * 3), 0.2, - b"a" * 96 + b"b" * 80, + b"a" * 96 + b"b" * 80, # middle_ch2 ), ( AudioRegion(b"a" * 320 + b"b" * 320, 160, 2, 2), slice(160 // 5, 160 // 4 * 3), 0.2, - b"a" * 192 + b"b" * 160, + b"a" * 192 + b"b" * 160, # middle_sw2_ch2 ), ( AudioRegion(b"a" * 4000 + b"b" * 4000, 8000, 1, 1), slice(1, None), 1 / 8000, - b"a" * (4000 - 1) + b"b" * 4000, + b"a" * (4000 - 1) + b"b" * 4000, # but_first_sample ), ( AudioRegion(b"a" * 4000 + b"b" * 4000, 8000, 1, 1), slice(-7999, None), 1 / 8000, - b"a" * (4000 - 1) + b"b" * 4000, + b"a" * (4000 - 1) + b"b" * 4000, # but_first_sample_negative ), ( AudioRegion(b"a" * 4000 + b"b" * 4000, 8000, 1, 1), slice(0, 7999), 0, - b"a" * 4000 + b"b" * (4000 - 1), + b"a" * 4000 + b"b" * (4000 - 1), # but_last_sample ), ( AudioRegion(b"a" * 4000 + b"b" * 4000, 8000, 1, 1), slice(0, -1), 0, - b"a" * 4000 + b"b" * (4000 - 1), + b"a" * 4000 + b"b" * (4000 - 1), # but_last_sample_negative ), - (AudioRegion(b"a" * 160, 160, 1, 1), slice(-1600, None), 0, b"a" * 160), - (AudioRegion(b"a" * 160, 160, 1, 1), slice(None, -1600), 0, b""), - (AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1), slice(0, 0), 0, b""), + ( + AudioRegion(b"a" * 160, 160, 1, 1), + slice(-1600, None), + 0, + b"a" * 160, # big_negative_start + ), + ( + AudioRegion(b"a" * 160, 160, 1, 1), + slice(None, -1600), + 0, + b"", # big_negative_stop + ), + ( + AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1), + slice(0, 0), + 0, + b"", # empty + ), ( AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1), slice(80, 40), 0.5, - b"", + b"", # empty_start_stop_reversed ), ( AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1), slice(1600, 3000), 10, - b"", + b"", # empty_big_positive_start ), ( AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1), slice(-16, -32), 0.9, - b"", + b"", # empty_negative_reversed ), ( AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1), slice(0, -2000), 0, - b"", + b"", # empty_big_negative_stop ), ( AudioRegion(b"a" * 124 + b"b" * 376, 1235, 1, 1), slice(100, 200), 100 / 1235, - b"a" * 24 + b"b" * 76, + b"a" * 24 + b"b" * 76, # arbitrary_sampling_rate ), ( AudioRegion(b"a" * 124 + b"b" * 376, 1235, 2, 2), slice(25, 50), 25 / 1235, - b"a" * 24 + b"b" * 76, + b"a" * 24 + b"b" * 76, # arbitrary_sampling_rate_middle_sw2_ch2 ), ], ids=[ @@ -1509,9 +1833,9 @@ @pytest.mark.parametrize( "sampling_rate, sample_width, channels", [ - (8000, 1, 1), - (8000, 2, 2), - (5413, 2, 3), + (8000, 1, 1), # simple + (8000, 2, 2), # stereo_sw_2 + (5413, 2, 3), # arbitrary_sr_multichannel ], ids=[ "simple", @@ -1534,9 +1858,9 @@ @pytest.mark.parametrize( "sampling_rate, sample_width, channels", [ - (8000, 1, 1), - (8000, 2, 2), - (5413, 2, 3), + (8000, 1, 1), # simple + (8000, 2, 2), # stereo_sw_2 + (5413, 2, 3), # arbitrary_sr_multichannel ], ids=[ "simple", @@ -1558,7 +1882,6 @@ def test_concatenation_different_sampling_rate_error(): - region_1 = AudioRegion(b"a" * 100, 8000, 1, 1) region_2 = AudioRegion(b"b" * 100, 3000, 1, 1) @@ -1566,24 +1889,22 @@ region_1 + region_2 assert str(val_err.value) == ( "Can only concatenate AudioRegions of the same " - "sampling rate (8000 != 3000)" + "sampling rate (8000 != 3000)" # different_sampling_rate ) def test_concatenation_different_sample_width_error(): - region_1 = AudioRegion(b"a" * 100, 8000, 2, 1) region_2 = AudioRegion(b"b" * 100, 8000, 4, 1) with pytest.raises(ValueError) as val_err: region_1 + region_2 assert str(val_err.value) == ( - "Can only concatenate AudioRegions of the same " "sample width (2 != 4)" + "Can only concatenate AudioRegions of the same sample width (2 != 4)" ) def test_concatenation_different_number_of_channels_error(): - region_1 = AudioRegion(b"a" * 100, 8000, 1, 1) region_2 = AudioRegion(b"b" * 100, 8000, 1, 2) @@ -1591,16 +1912,16 @@ region_1 + region_2 assert str(val_err.value) == ( "Can only concatenate AudioRegions of the same " - "number of channels (1 != 2)" + "number of channels (1 != 2)" # different_number_of_channels ) @pytest.mark.parametrize( "duration, expected_duration, expected_len, expected_len_ms", [ - (0.01, 0.03, 240, 30), - (0.00575, 0.01725, 138, 17), - (0.00625, 0.01875, 150, 19), + (0.01, 0.03, 240, 30), # simple + (0.00575, 0.01725, 138, 17), # rounded_len_floor + (0.00625, 0.01875, 150, 19), # rounded_len_ceil ], ids=[ "simple", @@ -1630,28 +1951,28 @@ @pytest.mark.parametrize( "factor, _type", [ - ("x", "str"), - (1.4, "float"), + ("x", str), # string + (1.4, float), # float ], ids=[ - "_str", - "_float", + "string", + "float", ], ) def test_multiplication_non_int(factor, _type): with pytest.raises(TypeError) as type_err: AudioRegion(b"0" * 80, 8000, 1, 1) * factor - err_msg = "Can't multiply AudioRegion by a non-int of type '{}'" - assert err_msg.format(_type) == str(type_err.value) + err_msg = "Can't multiply AudioRegion by a non-int of type '{}'" + assert err_msg.format(_type) == str(type_err.value) @pytest.mark.parametrize( "data", [ - [b"a" * 80, b"b" * 80], - [b"a" * 31, b"b" * 31, b"c" * 30], - [b"a" * 31, b"b" * 30, b"c" * 30], - [b"a" * 11, b"b" * 11, b"c" * 10, b"c" * 10], + [b"a" * 80, b"b" * 80], # simple + [b"a" * 31, b"b" * 31, b"c" * 30], # extra_samples_1 + [b"a" * 31, b"b" * 30, b"c" * 30], # extra_samples_2 + [b"a" * 11, b"b" * 11, b"c" * 10, b"c" * 10], # extra_samples_3 ], ids=[ "simple", @@ -1665,17 +1986,17 @@ region = AudioRegion(b"".join(data), 80, 1, 1) sub_regions = region / len(data) - for data_i, region in zip(data, sub_regions): + for data_i, region in zip(data, sub_regions, strict=True): assert len(data_i) == len(bytes(region)) @pytest.mark.parametrize( - "data, sample_width, channels, fmt, expected", + "data, sample_width, channels, expected", [ - (b"a" * 10, 1, 1, "b", [97] * 10), - (b"a" * 10, 2, 1, "h", [24929] * 5), - (b"a" * 8, 4, 1, "i", [1633771873] * 2), - (b"ab" * 5, 1, 2, "b", [[97] * 5, [98] * 5]), + (b"a" * 10, 1, 1, [97] * 10), # mono_sw_1 + (b"a" * 10, 2, 1, [24929] * 5), # mono_sw_2 + (b"a" * 8, 4, 1, [1633771873] * 2), # mono_sw_4 + (b"ab" * 5, 1, 2, [[97] * 5, [98] * 5]), # stereo_sw_1 ], ids=[ "mono_sw_1", @@ -1684,18 +2005,10 @@ "stereo_sw_1", ], ) -def test_samples(data, sample_width, channels, fmt, expected): +def test_samples(data, sample_width, channels, expected): region = AudioRegion(data, 10, sample_width, channels) - if isinstance(expected[0], list): - expected = [array_(fmt, exp) for exp in expected] - else: - expected = array_(fmt, expected) - samples = region.samples - equal = samples == expected - try: - # for numpy - equal = equal.all() - except AttributeError: - pass - assert equal + expected = np.array(expected) + assert (region.samples == expected).all() + assert (region.numpy() == expected).all() + assert (np.array(region) == expected).all()