amine@403
|
1 import math
|
amine@192
|
2 import os
|
amine@411
|
3 from pathlib import Path
|
amine@88
|
4 from random import random
|
amine@192
|
5 from tempfile import TemporaryDirectory
|
amine@403
|
6 from unittest.mock import Mock, patch
|
amine@403
|
7
|
amine@405
|
8 import numpy as np
|
amine@400
|
9 import pytest
|
amine@403
|
10
|
amine@403
|
11 from auditok import AudioParameterError, AudioRegion, load, split
|
amine@323
|
12 from auditok.core import (
|
amine@323
|
13 _duration_to_nb_windows,
|
amine@323
|
14 _make_audio_region,
|
amine@323
|
15 _read_chunks_online,
|
amine@323
|
16 _read_offline,
|
amine@323
|
17 )
|
amine@315
|
18 from auditok.io import get_audio_source
|
amine@405
|
19 from auditok.signal import to_array
|
amine@403
|
20 from auditok.util import AudioReader
|
amine@86
|
21
|
amine@86
|
22
|
amine@299
|
23 def _make_random_length_regions(
|
amine@299
|
24 byte_seq, sampling_rate, sample_width, channels
|
amine@299
|
25 ):
|
amine@88
|
26 regions = []
|
amine@88
|
27 for b in byte_seq:
|
amine@88
|
28 duration = round(random() * 10, 6)
|
amine@95
|
29 data = b * int(duration * sampling_rate) * sample_width * channels
|
amine@244
|
30 region = AudioRegion(data, sampling_rate, sample_width, channels)
|
amine@88
|
31 regions.append(region)
|
amine@88
|
32 return regions
|
amine@88
|
33
|
amine@88
|
34
|
amine@400
|
35 @pytest.mark.parametrize(
|
amine@400
|
36 "skip, max_read, channels",
|
amine@400
|
37 [
|
amine@405
|
38 (0, -1, 1), # no_skip_read_all
|
amine@405
|
39 (0, -1, 2), # no_skip_read_all_stereo
|
amine@405
|
40 (2, -1, 1), # skip_2_read_all
|
amine@405
|
41 (2, None, 1), # skip_2_read_all_None
|
amine@405
|
42 (2, 3, 1), # skip_2_read_3
|
amine@405
|
43 (2, 3.5, 2), # skip_2_read_3_5_stereo
|
amine@405
|
44 (2.4, 3.5, 2), # skip_2_4_read_3_5_stereo
|
amine@400
|
45 ],
|
amine@400
|
46 ids=[
|
amine@400
|
47 "no_skip_read_all",
|
amine@400
|
48 "no_skip_read_all_stereo",
|
amine@400
|
49 "skip_2_read_all",
|
amine@400
|
50 "skip_2_read_all_None",
|
amine@400
|
51 "skip_2_read_3",
|
amine@400
|
52 "skip_2_read_3_5_stereo",
|
amine@400
|
53 "skip_2_4_read_3_5_stereo",
|
amine@400
|
54 ],
|
amine@400
|
55 )
|
amine@400
|
56 def test_load(skip, max_read, channels):
|
amine@400
|
57 sampling_rate = 10
|
amine@400
|
58 sample_width = 2
|
amine@400
|
59 filename = "tests/data/test_split_10HZ_{}.raw"
|
amine@400
|
60 filename = filename.format("mono" if channels == 1 else "stereo")
|
amine@400
|
61 region = load(
|
amine@400
|
62 filename,
|
amine@400
|
63 skip=skip,
|
amine@400
|
64 max_read=max_read,
|
amine@400
|
65 sr=sampling_rate,
|
amine@400
|
66 sw=sample_width,
|
amine@400
|
67 ch=channels,
|
amine@371
|
68 )
|
amine@400
|
69 with open(filename, "rb") as fp:
|
amine@400
|
70 fp.read(round(skip * sampling_rate * sample_width * channels))
|
amine@400
|
71 if max_read is None or max_read < 0:
|
amine@400
|
72 to_read = -1
|
amine@400
|
73 else:
|
amine@400
|
74 to_read = round(max_read * sampling_rate * sample_width * channels)
|
amine@400
|
75 expected = fp.read(to_read)
|
amine@400
|
76 assert bytes(region) == expected
|
amine@400
|
77
|
amine@400
|
78
|
amine@400
|
79 @pytest.mark.parametrize(
|
amine@400
|
80 "duration, analysis_window, round_fn, expected, kwargs",
|
amine@400
|
81 [
|
amine@405
|
82 (0, 1, None, 0, None), # zero_duration
|
amine@405
|
83 (0.3, 0.1, round, 3, None), # multiple
|
amine@405
|
84 (0.35, 0.1, math.ceil, 4, None), # not_multiple_ceil
|
amine@405
|
85 (0.35, 0.1, math.floor, 3, None), # not_multiple_floor
|
amine@405
|
86 (0.05, 0.1, round, 0, None), # small_duration
|
amine@405
|
87 (0.05, 0.1, math.ceil, 1, None), # small_duration_ceil
|
amine@405
|
88 (0.3, 0.1, math.floor, 3, {"epsilon": 1e-6}), # with_round_error
|
amine@405
|
89 (-0.5, 0.1, math.ceil, ValueError, None), # negative_duration
|
amine@405
|
90 (0.5, -0.1, math.ceil, ValueError, None), # negative_analysis_window
|
amine@400
|
91 ],
|
amine@400
|
92 ids=[
|
amine@400
|
93 "zero_duration",
|
amine@400
|
94 "multiple",
|
amine@400
|
95 "not_multiple_ceil",
|
amine@400
|
96 "not_multiple_floor",
|
amine@400
|
97 "small_duration",
|
amine@400
|
98 "small_duration_ceil",
|
amine@400
|
99 "with_round_error",
|
amine@400
|
100 "negative_duration",
|
amine@400
|
101 "negative_analysis_window",
|
amine@400
|
102 ],
|
amine@400
|
103 )
|
amine@400
|
104 def test_duration_to_nb_windows(
|
amine@400
|
105 duration, analysis_window, round_fn, expected, kwargs
|
amine@400
|
106 ):
|
amine@400
|
107 if expected == ValueError:
|
amine@400
|
108 with pytest.raises(ValueError):
|
amine@400
|
109 _duration_to_nb_windows(duration, analysis_window, round_fn)
|
amine@400
|
110 else:
|
amine@400
|
111 if kwargs is None:
|
amine@400
|
112 kwargs = {}
|
amine@400
|
113 result = _duration_to_nb_windows(
|
amine@400
|
114 duration, analysis_window, round_fn, **kwargs
|
amine@371
|
115 )
|
amine@400
|
116 assert result == expected
|
amine@371
|
117
|
amine@400
|
118
|
amine@400
|
119 @pytest.mark.parametrize(
|
amine@400
|
120 "channels, skip, max_read",
|
amine@400
|
121 [
|
amine@405
|
122 (1, 0, None), # mono_skip_0_max_read_None
|
amine@405
|
123 (1, 3, None), # mono_skip_3_max_read_None
|
amine@405
|
124 (1, 2, -1), # mono_skip_2_max_read_negative
|
amine@405
|
125 (1, 2, 3), # mono_skip_2_max_read_3
|
amine@405
|
126 (2, 0, None), # stereo_skip_0_max_read_None
|
amine@405
|
127 (2, 3, None), # stereo_skip_3_max_read_None
|
amine@405
|
128 (2, 2, -1), # stereo_skip_2_max_read_negative
|
amine@405
|
129 (2, 2, 3), # stereo_skip_2_max_read_3
|
amine@400
|
130 ],
|
amine@400
|
131 ids=[
|
amine@400
|
132 "mono_skip_0_max_read_None",
|
amine@400
|
133 "mono_skip_3_max_read_None",
|
amine@400
|
134 "mono_skip_2_max_read_negative",
|
amine@400
|
135 "mono_skip_2_max_read_3",
|
amine@400
|
136 "stereo_skip_0_max_read_None",
|
amine@400
|
137 "stereo_skip_3_max_read_None",
|
amine@400
|
138 "stereo_skip_2_max_read_negative",
|
amine@400
|
139 "stereo_skip_2_max_read_3",
|
amine@400
|
140 ],
|
amine@400
|
141 )
|
amine@400
|
142 def test_read_offline(channels, skip, max_read):
|
amine@400
|
143 sampling_rate = 10
|
amine@400
|
144 sample_width = 2
|
amine@400
|
145 mono_or_stereo = "mono" if channels == 1 else "stereo"
|
amine@400
|
146 filename = "tests/data/test_split_10HZ_{}.raw".format(mono_or_stereo)
|
amine@400
|
147 with open(filename, "rb") as fp:
|
amine@400
|
148 data = fp.read()
|
amine@400
|
149 onset = round(skip * sampling_rate * sample_width * channels)
|
amine@400
|
150 if max_read in (-1, None):
|
amine@400
|
151 offset = len(data) + 1
|
amine@400
|
152 else:
|
amine@400
|
153 offset = onset + round(
|
amine@400
|
154 max_read * sampling_rate * sample_width * channels
|
amine@400
|
155 )
|
amine@400
|
156 expected_data = data[onset:offset]
|
amine@400
|
157 read_data, *audio_params = _read_offline(
|
amine@400
|
158 filename,
|
amine@400
|
159 skip=skip,
|
amine@400
|
160 max_read=max_read,
|
amine@400
|
161 sr=sampling_rate,
|
amine@400
|
162 sw=sample_width,
|
amine@400
|
163 ch=channels,
|
amine@215
|
164 )
|
amine@400
|
165 assert read_data == expected_data
|
amine@400
|
166 assert tuple(audio_params) == (sampling_rate, sample_width, channels)
|
amine@215
|
167
|
amine@323
|
168
|
amine@400
|
169 @pytest.mark.parametrize(
|
amine@405
|
170 (
|
amine@405
|
171 "min_dur, max_dur, max_silence, drop_trailing_silence, "
|
amine@405
|
172 + "strict_min_dur, kwargs, expected"
|
amine@405
|
173 ),
|
amine@400
|
174 [
|
amine@405
|
175 (
|
amine@405
|
176 0.2,
|
amine@405
|
177 5,
|
amine@405
|
178 0.2,
|
amine@405
|
179 False,
|
amine@405
|
180 False,
|
amine@405
|
181 {"eth": 50},
|
amine@405
|
182 [(2, 16), (17, 31), (34, 76)],
|
amine@405
|
183 ), # simple
|
amine@400
|
184 (
|
amine@400
|
185 0.3,
|
amine@400
|
186 2,
|
amine@400
|
187 0.2,
|
amine@400
|
188 False,
|
amine@400
|
189 False,
|
amine@400
|
190 {"eth": 50},
|
amine@400
|
191 [(2, 16), (17, 31), (34, 54), (54, 74), (74, 76)],
|
amine@405
|
192 ), # short_max_dur
|
amine@405
|
193 (3, 5, 0.2, False, False, {"eth": 50}, [(34, 76)]), # long_min_dur
|
amine@405
|
194 (0.2, 80, 10, False, False, {"eth": 50}, [(2, 76)]), # long_max_silence
|
amine@400
|
195 (
|
amine@400
|
196 0.2,
|
amine@400
|
197 5,
|
amine@400
|
198 0.0,
|
amine@400
|
199 False,
|
amine@400
|
200 False,
|
amine@400
|
201 {"eth": 50},
|
amine@400
|
202 [(2, 14), (17, 24), (26, 29), (34, 76)],
|
amine@405
|
203 ), # zero_max_silence
|
amine@400
|
204 (
|
amine@299
|
205 0.2,
|
amine@299
|
206 5,
|
amine@299
|
207 0.2,
|
amine@299
|
208 False,
|
amine@299
|
209 False,
|
amine@207
|
210 {"energy_threshold": 40},
|
amine@207
|
211 [(0, 50), (50, 76)],
|
amine@405
|
212 ), # low_energy_threshold
|
amine@405
|
213 (
|
amine@405
|
214 0.2,
|
amine@405
|
215 5,
|
amine@405
|
216 0.2,
|
amine@405
|
217 False,
|
amine@405
|
218 False,
|
amine@405
|
219 {"energy_threshold": 60},
|
amine@405
|
220 [],
|
amine@405
|
221 ), # high_energy_threshold
|
amine@405
|
222 (
|
amine@405
|
223 0.2,
|
amine@405
|
224 10,
|
amine@405
|
225 0.5,
|
amine@405
|
226 True,
|
amine@405
|
227 False,
|
amine@405
|
228 {"eth": 50},
|
amine@405
|
229 [(2, 76)],
|
amine@405
|
230 ), # trim_leading_and_trailing_silence
|
amine@405
|
231 (
|
amine@405
|
232 0.2,
|
amine@405
|
233 5,
|
amine@405
|
234 0.2,
|
amine@405
|
235 True,
|
amine@405
|
236 False,
|
amine@405
|
237 {"eth": 50},
|
amine@405
|
238 [(2, 14), (17, 29), (34, 76)],
|
amine@405
|
239 ), # drop_trailing_silence
|
amine@405
|
240 (
|
amine@405
|
241 1.5,
|
amine@405
|
242 5,
|
amine@405
|
243 0.2,
|
amine@405
|
244 True,
|
amine@405
|
245 False,
|
amine@405
|
246 {"eth": 50},
|
amine@405
|
247 [(34, 76)],
|
amine@405
|
248 ), # drop_trailing_silence_2
|
amine@400
|
249 (
|
amine@207
|
250 0.3,
|
amine@207
|
251 2,
|
amine@207
|
252 0.2,
|
amine@207
|
253 False,
|
amine@207
|
254 True,
|
amine@207
|
255 {"eth": 50},
|
amine@207
|
256 [(2, 16), (17, 31), (34, 54), (54, 74)],
|
amine@405
|
257 ), # strict_min_dur
|
amine@400
|
258 ],
|
amine@400
|
259 ids=[
|
amine@400
|
260 "simple",
|
amine@400
|
261 "short_max_dur",
|
amine@400
|
262 "long_min_dur",
|
amine@400
|
263 "long_max_silence",
|
amine@400
|
264 "zero_max_silence",
|
amine@400
|
265 "low_energy_threshold",
|
amine@400
|
266 "high_energy_threshold",
|
amine@400
|
267 "trim_leading_and_trailing_silence",
|
amine@400
|
268 "drop_trailing_silence",
|
amine@400
|
269 "drop_trailing_silence_2",
|
amine@400
|
270 "strict_min_dur",
|
amine@400
|
271 ],
|
amine@400
|
272 )
|
amine@400
|
273 def test_split_params(
|
amine@400
|
274 min_dur,
|
amine@400
|
275 max_dur,
|
amine@400
|
276 max_silence,
|
amine@400
|
277 drop_trailing_silence,
|
amine@400
|
278 strict_min_dur,
|
amine@400
|
279 kwargs,
|
amine@400
|
280 expected,
|
amine@400
|
281 ):
|
amine@400
|
282 with open("tests/data/test_split_10HZ_mono.raw", "rb") as fp:
|
amine@400
|
283 data = fp.read()
|
amine@400
|
284
|
amine@400
|
285 regions = split(
|
amine@400
|
286 data,
|
amine@207
|
287 min_dur,
|
amine@207
|
288 max_dur,
|
amine@207
|
289 max_silence,
|
amine@207
|
290 drop_trailing_silence,
|
amine@207
|
291 strict_min_dur,
|
amine@400
|
292 analysis_window=0.1,
|
amine@400
|
293 sr=10,
|
amine@400
|
294 sw=2,
|
amine@400
|
295 ch=1,
|
amine@400
|
296 **kwargs
|
amine@400
|
297 )
|
amine@207
|
298
|
amine@400
|
299 region = AudioRegion(data, 10, 2, 1)
|
amine@400
|
300 regions_ar = region.split(
|
amine@400
|
301 min_dur,
|
amine@400
|
302 max_dur,
|
amine@400
|
303 max_silence,
|
amine@400
|
304 drop_trailing_silence,
|
amine@400
|
305 strict_min_dur,
|
amine@400
|
306 analysis_window=0.1,
|
amine@400
|
307 **kwargs
|
amine@400
|
308 )
|
amine@255
|
309
|
amine@400
|
310 regions = list(regions)
|
amine@400
|
311 regions_ar = list(regions_ar)
|
amine@400
|
312 err_msg = "Wrong number of regions after split, expected: "
|
amine@400
|
313 err_msg += "{}, found: {}".format(len(expected), len(regions))
|
amine@400
|
314 assert len(regions) == len(expected), err_msg
|
amine@400
|
315 err_msg = "Wrong number of regions after AudioRegion.split, expected: "
|
amine@400
|
316 err_msg += "{}, found: {}".format(len(expected), len(regions_ar))
|
amine@400
|
317 assert len(regions_ar) == len(expected), err_msg
|
amine@255
|
318
|
amine@400
|
319 sample_width = 2
|
amine@405
|
320 for reg, reg_ar, exp in zip(regions, regions_ar, expected, strict=True):
|
amine@400
|
321 onset, offset = exp
|
amine@400
|
322 exp_data = data[onset * sample_width : offset * sample_width]
|
amine@400
|
323 assert bytes(reg) == exp_data
|
amine@400
|
324 assert reg == reg_ar
|
amine@207
|
325
|
amine@299
|
326
|
amine@400
|
327 @pytest.mark.parametrize(
|
amine@400
|
328 "channels, kwargs, expected",
|
amine@400
|
329 [
|
amine@405
|
330 (2, {}, [(2, 32), (34, 76)]), # stereo_all_default
|
amine@405
|
331 (1, {"max_read": 5}, [(2, 16), (17, 31), (34, 50)]), # mono_max_read
|
amine@405
|
332 (
|
amine@405
|
333 1,
|
amine@405
|
334 {"mr": 5},
|
amine@405
|
335 [(2, 16), (17, 31), (34, 50)],
|
amine@405
|
336 ), # mono_max_read_short_name
|
amine@405
|
337 (
|
amine@405
|
338 1,
|
amine@405
|
339 {"eth": 50, "use_channel": 0},
|
amine@405
|
340 [(2, 16), (17, 31), (34, 76)],
|
amine@405
|
341 ), # mono_use_channel_1
|
amine@405
|
342 (1, {"eth": 50, "uc": 1}, [(2, 16), (17, 31), (34, 76)]), # mono_uc_1
|
amine@405
|
343 (
|
amine@405
|
344 1,
|
amine@405
|
345 {"eth": 50, "use_channel": None},
|
amine@405
|
346 [(2, 16), (17, 31), (34, 76)],
|
amine@405
|
347 ), # mono_use_channel_None
|
amine@405
|
348 (
|
amine@405
|
349 2,
|
amine@405
|
350 {"eth": 50, "use_channel": 0},
|
amine@405
|
351 [(2, 16), (17, 31), (34, 76)],
|
amine@405
|
352 ), # stereo_use_channel_1
|
amine@405
|
353 (
|
amine@405
|
354 2,
|
amine@405
|
355 {"eth": 50},
|
amine@405
|
356 [(2, 32), (34, 76)],
|
amine@405
|
357 ), # stereo_use_channel_no_use_channel_given
|
amine@405
|
358 (
|
amine@405
|
359 2,
|
amine@405
|
360 {"eth": 50, "use_channel": -2},
|
amine@405
|
361 [(2, 16), (17, 31), (34, 76)],
|
amine@405
|
362 ), # stereo_use_channel_minus_2
|
amine@405
|
363 (2, {"eth": 50, "uc": 1}, [(10, 32), (36, 76)]), # stereo_uc_2
|
amine@405
|
364 (2, {"eth": 50, "uc": -1}, [(10, 32), (36, 76)]), # stereo_uc_minus_1
|
amine@405
|
365 (
|
amine@405
|
366 1,
|
amine@405
|
367 {"eth": 50, "uc": "mix"},
|
amine@405
|
368 [(2, 16), (17, 31), (34, 76)],
|
amine@405
|
369 ), # mono_uc_mix
|
amine@405
|
370 (
|
amine@405
|
371 2,
|
amine@405
|
372 {"energy_threshold": 53.5, "use_channel": "mix"},
|
amine@405
|
373 [(54, 76)],
|
amine@405
|
374 ), # stereo_use_channel_mix
|
amine@405
|
375 (2, {"eth": 52, "uc": "mix"}, [(17, 26), (54, 76)]), # stereo_uc_mix
|
amine@405
|
376 (
|
amine@405
|
377 2,
|
amine@405
|
378 {"uc": "mix"},
|
amine@405
|
379 [(10, 16), (17, 31), (36, 76)],
|
amine@405
|
380 ), # stereo_uc_mix_default_eth
|
amine@400
|
381 ],
|
amine@400
|
382 ids=[
|
amine@400
|
383 "stereo_all_default",
|
amine@400
|
384 "mono_max_read",
|
amine@400
|
385 "mono_max_read_short_name",
|
amine@400
|
386 "mono_use_channel_1",
|
amine@400
|
387 "mono_uc_1",
|
amine@400
|
388 "mono_use_channel_None",
|
amine@400
|
389 "stereo_use_channel_1",
|
amine@400
|
390 "stereo_use_channel_no_use_channel_given",
|
amine@400
|
391 "stereo_use_channel_minus_2",
|
amine@400
|
392 "stereo_uc_2",
|
amine@400
|
393 "stereo_uc_minus_1",
|
amine@400
|
394 "mono_uc_mix",
|
amine@400
|
395 "stereo_use_channel_mix",
|
amine@400
|
396 "stereo_uc_mix",
|
amine@400
|
397 "stereo_uc_mix_default_eth",
|
amine@400
|
398 ],
|
amine@400
|
399 )
|
amine@400
|
400 def test_split_kwargs(channels, kwargs, expected):
|
amine@400
|
401
|
amine@400
|
402 mono_or_stereo = "mono" if channels == 1 else "stereo"
|
amine@400
|
403 filename = "tests/data/test_split_10HZ_{}.raw".format(mono_or_stereo)
|
amine@400
|
404 with open(filename, "rb") as fp:
|
amine@400
|
405 data = fp.read()
|
amine@400
|
406
|
amine@400
|
407 regions = split(
|
amine@400
|
408 data,
|
amine@400
|
409 min_dur=0.2,
|
amine@400
|
410 max_dur=5,
|
amine@400
|
411 max_silence=0.2,
|
amine@400
|
412 drop_trailing_silence=False,
|
amine@400
|
413 strict_min_dur=False,
|
amine@400
|
414 analysis_window=0.1,
|
amine@400
|
415 sr=10,
|
amine@400
|
416 sw=2,
|
amine@400
|
417 ch=channels,
|
amine@400
|
418 **kwargs
|
amine@211
|
419 )
|
amine@211
|
420
|
amine@400
|
421 region = AudioRegion(data, 10, 2, channels)
|
amine@400
|
422 max_read = kwargs.get("max_read", kwargs.get("mr"))
|
amine@400
|
423 if max_read is not None:
|
amine@400
|
424 region = region.sec[:max_read]
|
amine@400
|
425 kwargs.pop("max_read", None)
|
amine@400
|
426 kwargs.pop("mr", None)
|
amine@211
|
427
|
amine@400
|
428 regions_ar = region.split(
|
amine@400
|
429 min_dur=0.2,
|
amine@400
|
430 max_dur=5,
|
amine@400
|
431 max_silence=0.2,
|
amine@400
|
432 drop_trailing_silence=False,
|
amine@400
|
433 strict_min_dur=False,
|
amine@400
|
434 analysis_window=0.1,
|
amine@400
|
435 **kwargs
|
amine@400
|
436 )
|
amine@255
|
437
|
amine@400
|
438 regions = list(regions)
|
amine@400
|
439 regions_ar = list(regions_ar)
|
amine@400
|
440 err_msg = "Wrong number of regions after split, expected: "
|
amine@400
|
441 err_msg += "{}, found: {}".format(len(expected), len(regions))
|
amine@400
|
442 assert len(regions) == len(expected), err_msg
|
amine@400
|
443 err_msg = "Wrong number of regions after AudioRegion.split, expected: "
|
amine@400
|
444 err_msg += "{}, found: {}".format(len(expected), len(regions_ar))
|
amine@400
|
445 assert len(regions_ar) == len(expected), err_msg
|
amine@306
|
446
|
amine@400
|
447 sample_width = 2
|
amine@400
|
448 sample_size_bytes = sample_width * channels
|
amine@405
|
449 for reg, reg_ar, exp in zip(regions, regions_ar, expected, strict=True):
|
amine@400
|
450 onset, offset = exp
|
amine@400
|
451 exp_data = data[onset * sample_size_bytes : offset * sample_size_bytes]
|
amine@400
|
452 assert len(bytes(reg)) == len(exp_data)
|
amine@400
|
453 assert reg == reg_ar
|
amine@255
|
454
|
amine@255
|
455
|
amine@400
|
456 @pytest.mark.parametrize(
|
amine@400
|
457 "min_dur, max_dur, max_silence, channels, kwargs, expected",
|
amine@400
|
458 [
|
amine@405
|
459 (
|
amine@405
|
460 0.2,
|
amine@405
|
461 5,
|
amine@405
|
462 0.2,
|
amine@405
|
463 1,
|
amine@405
|
464 {"aw": 0.2},
|
amine@405
|
465 [(2, 30), (34, 76)],
|
amine@405
|
466 ), # mono_aw_0_2_max_silence_0_2
|
amine@405
|
467 (
|
amine@405
|
468 0.2,
|
amine@405
|
469 5,
|
amine@405
|
470 0.3,
|
amine@405
|
471 1,
|
amine@405
|
472 {"aw": 0.2},
|
amine@405
|
473 [(2, 30), (34, 76)],
|
amine@405
|
474 ), # mono_aw_0_2_max_silence_0_3
|
amine@405
|
475 (
|
amine@405
|
476 0.2,
|
amine@405
|
477 5,
|
amine@405
|
478 0.4,
|
amine@405
|
479 1,
|
amine@405
|
480 {"aw": 0.2},
|
amine@405
|
481 [(2, 32), (34, 76)],
|
amine@405
|
482 ), # mono_aw_0_2_max_silence_0_4
|
amine@405
|
483 (
|
amine@405
|
484 0.2,
|
amine@405
|
485 5,
|
amine@405
|
486 0,
|
amine@405
|
487 1,
|
amine@405
|
488 {"aw": 0.2},
|
amine@405
|
489 [(2, 14), (16, 24), (26, 28), (34, 76)],
|
amine@405
|
490 ), # mono_aw_0_2_max_silence_0
|
amine@405
|
491 (0.2, 5, 0.2, 1, {"aw": 0.2}, [(2, 30), (34, 76)]), # mono_aw_0_2
|
amine@405
|
492 (
|
amine@405
|
493 0.3,
|
amine@405
|
494 5,
|
amine@405
|
495 0,
|
amine@405
|
496 1,
|
amine@405
|
497 {"aw": 0.3},
|
amine@405
|
498 [(3, 12), (15, 24), (36, 76)],
|
amine@405
|
499 ), # mono_aw_0_3_max_silence_0
|
amine@405
|
500 (
|
amine@405
|
501 0.3,
|
amine@405
|
502 5,
|
amine@405
|
503 0.3,
|
amine@405
|
504 1,
|
amine@405
|
505 {"aw": 0.3},
|
amine@405
|
506 [(3, 27), (36, 76)],
|
amine@405
|
507 ), # mono_aw_0_3_max_silence_0_3
|
amine@405
|
508 (
|
amine@405
|
509 0.3,
|
amine@405
|
510 5,
|
amine@405
|
511 0.5,
|
amine@405
|
512 1,
|
amine@405
|
513 {"aw": 0.3},
|
amine@405
|
514 [(3, 27), (36, 76)],
|
amine@405
|
515 ), # mono_aw_0_3_max_silence_0_5
|
amine@405
|
516 (
|
amine@405
|
517 0.3,
|
amine@405
|
518 5,
|
amine@405
|
519 0.6,
|
amine@405
|
520 1,
|
amine@405
|
521 {"aw": 0.3},
|
amine@405
|
522 [(3, 30), (36, 76)],
|
amine@405
|
523 ), # mono_aw_0_3_max_silence_0_6
|
amine@405
|
524 (
|
amine@405
|
525 0.2,
|
amine@405
|
526 5,
|
amine@405
|
527 0,
|
amine@405
|
528 1,
|
amine@405
|
529 {"aw": 0.4},
|
amine@405
|
530 [(4, 12), (16, 24), (36, 76)],
|
amine@405
|
531 ), # mono_aw_0_4_max_silence_0
|
amine@405
|
532 (
|
amine@405
|
533 0.2,
|
amine@405
|
534 5,
|
amine@405
|
535 0.3,
|
amine@405
|
536 1,
|
amine@405
|
537 {"aw": 0.4},
|
amine@405
|
538 [(4, 12), (16, 24), (36, 76)],
|
amine@405
|
539 ), # mono_aw_0_4_max_silence_0_3
|
amine@405
|
540 (
|
amine@405
|
541 0.2,
|
amine@405
|
542 5,
|
amine@405
|
543 0.4,
|
amine@405
|
544 1,
|
amine@405
|
545 {"aw": 0.4},
|
amine@405
|
546 [(4, 28), (36, 76)],
|
amine@405
|
547 ), # mono_aw_0_4_max_silence_0_4
|
amine@405
|
548 (
|
amine@405
|
549 0.2,
|
amine@405
|
550 5,
|
amine@405
|
551 0.2,
|
amine@405
|
552 2,
|
amine@405
|
553 {"analysis_window": 0.2},
|
amine@405
|
554 [(2, 32), (34, 76)],
|
amine@405
|
555 ), # stereo_uc_None_analysis_window_0_2
|
amine@400
|
556 (
|
amine@316
|
557 0.2,
|
amine@316
|
558 5,
|
amine@316
|
559 0.2,
|
amine@316
|
560 2,
|
amine@316
|
561 {"uc": None, "analysis_window": 0.2},
|
amine@316
|
562 [(2, 32), (34, 76)],
|
amine@405
|
563 ), # stereo_uc_any_analysis_window_0_2
|
amine@400
|
564 (
|
amine@316
|
565 0.2,
|
amine@316
|
566 5,
|
amine@316
|
567 0.2,
|
amine@316
|
568 2,
|
amine@316
|
569 {"use_channel": None, "analysis_window": 0.3},
|
amine@316
|
570 [(3, 30), (36, 76)],
|
amine@405
|
571 ), # stereo_use_channel_None_aw_0_3_max_silence_0_2
|
amine@400
|
572 (
|
amine@316
|
573 0.2,
|
amine@316
|
574 5,
|
amine@316
|
575 0.3,
|
amine@316
|
576 2,
|
amine@316
|
577 {"use_channel": "any", "analysis_window": 0.3},
|
amine@316
|
578 [(3, 33), (36, 76)],
|
amine@405
|
579 ), # stereo_use_channel_any_aw_0_3_max_silence_0_3
|
amine@400
|
580 (
|
amine@316
|
581 0.2,
|
amine@316
|
582 5,
|
amine@316
|
583 0.2,
|
amine@316
|
584 2,
|
amine@316
|
585 {"use_channel": None, "analysis_window": 0.4},
|
amine@316
|
586 [(4, 28), (36, 76)],
|
amine@405
|
587 ), # stereo_use_channel_None_aw_0_4_max_silence_0_2
|
amine@400
|
588 (
|
amine@316
|
589 0.2,
|
amine@316
|
590 5,
|
amine@316
|
591 0.4,
|
amine@316
|
592 2,
|
amine@316
|
593 {"use_channel": "any", "analysis_window": 0.4},
|
amine@316
|
594 [(4, 32), (36, 76)],
|
amine@405
|
595 ), # stereo_use_channel_any_aw_0_3_max_silence_0_4
|
amine@400
|
596 (
|
amine@241
|
597 0.2,
|
amine@241
|
598 5,
|
amine@241
|
599 0.2,
|
amine@241
|
600 2,
|
amine@241
|
601 {"uc": 0, "analysis_window": 0.2},
|
amine@241
|
602 [(2, 30), (34, 76)],
|
amine@405
|
603 ), # stereo_uc_0_analysis_window_0_2
|
amine@400
|
604 (
|
amine@220
|
605 0.2,
|
amine@220
|
606 5,
|
amine@220
|
607 0.2,
|
amine@220
|
608 2,
|
amine@220
|
609 {"uc": 1, "analysis_window": 0.2},
|
amine@231
|
610 [(10, 32), (36, 76)],
|
amine@405
|
611 ), # stereo_uc_1_analysis_window_0_2
|
amine@400
|
612 (
|
amine@233
|
613 0.2,
|
amine@233
|
614 5,
|
amine@233
|
615 0,
|
amine@233
|
616 2,
|
amine@233
|
617 {"uc": "mix", "analysis_window": 0.1},
|
amine@233
|
618 [(10, 14), (17, 24), (26, 29), (36, 76)],
|
amine@405
|
619 ), # stereo_uc_mix_aw_0_1_max_silence_0
|
amine@400
|
620 (
|
amine@233
|
621 0.2,
|
amine@233
|
622 5,
|
amine@233
|
623 0.1,
|
amine@233
|
624 2,
|
amine@233
|
625 {"uc": "mix", "analysis_window": 0.1},
|
amine@233
|
626 [(10, 15), (17, 25), (26, 30), (36, 76)],
|
amine@405
|
627 ), # stereo_uc_mix_aw_0_1_max_silence_0_1
|
amine@400
|
628 (
|
amine@233
|
629 0.2,
|
amine@233
|
630 5,
|
amine@233
|
631 0.2,
|
amine@233
|
632 2,
|
amine@233
|
633 {"uc": "mix", "analysis_window": 0.1},
|
amine@233
|
634 [(10, 16), (17, 31), (36, 76)],
|
amine@405
|
635 ), # stereo_uc_mix_aw_0_1_max_silence_0_2
|
amine@400
|
636 (
|
amine@233
|
637 0.2,
|
amine@233
|
638 5,
|
amine@233
|
639 0.3,
|
amine@233
|
640 2,
|
amine@233
|
641 {"uc": "mix", "analysis_window": 0.1},
|
amine@233
|
642 [(10, 32), (36, 76)],
|
amine@405
|
643 ), # stereo_uc_mix_aw_0_1_max_silence_0_3
|
amine@400
|
644 (
|
amine@233
|
645 0.3,
|
amine@233
|
646 5,
|
amine@233
|
647 0,
|
amine@233
|
648 2,
|
amine@316
|
649 {"uc": "avg", "analysis_window": 0.2},
|
amine@233
|
650 [(10, 14), (16, 24), (36, 76)],
|
amine@405
|
651 ), # stereo_uc_avg_aw_0_2_max_silence_0_min_dur_0_3
|
amine@400
|
652 (
|
amine@233
|
653 0.41,
|
amine@233
|
654 5,
|
amine@233
|
655 0,
|
amine@233
|
656 2,
|
amine@316
|
657 {"uc": "average", "analysis_window": 0.2},
|
amine@233
|
658 [(16, 24), (36, 76)],
|
amine@405
|
659 ), # stereo_uc_average_aw_0_2_max_silence_0_min_dur_0_41
|
amine@400
|
660 (
|
amine@233
|
661 0.2,
|
amine@233
|
662 5,
|
amine@233
|
663 0.1,
|
amine@233
|
664 2,
|
amine@233
|
665 {"uc": "mix", "analysis_window": 0.2},
|
amine@233
|
666 [(10, 14), (16, 24), (26, 28), (36, 76)],
|
amine@405
|
667 ), # stereo_uc_mix_aw_0_2_max_silence_0_1
|
amine@400
|
668 (
|
amine@233
|
669 0.2,
|
amine@233
|
670 5,
|
amine@233
|
671 0.2,
|
amine@233
|
672 2,
|
amine@233
|
673 {"uc": "mix", "analysis_window": 0.2},
|
amine@233
|
674 [(10, 30), (36, 76)],
|
amine@405
|
675 ), # stereo_uc_mix_aw_0_2_max_silence_0_2
|
amine@400
|
676 (
|
amine@233
|
677 0.2,
|
amine@233
|
678 5,
|
amine@233
|
679 0.4,
|
amine@233
|
680 2,
|
amine@233
|
681 {"uc": "mix", "analysis_window": 0.2},
|
amine@233
|
682 [(10, 32), (36, 76)],
|
amine@405
|
683 ), # stereo_uc_mix_aw_0_2_max_silence_0_4
|
amine@400
|
684 (
|
amine@233
|
685 0.2,
|
amine@233
|
686 5,
|
amine@233
|
687 0.5,
|
amine@233
|
688 2,
|
amine@233
|
689 {"uc": "mix", "analysis_window": 0.2},
|
amine@233
|
690 [(10, 32), (36, 76)],
|
amine@405
|
691 ), # stereo_uc_mix_aw_0_2_max_silence_0_5
|
amine@400
|
692 (
|
amine@233
|
693 0.2,
|
amine@233
|
694 5,
|
amine@233
|
695 0.6,
|
amine@233
|
696 2,
|
amine@233
|
697 {"uc": "mix", "analysis_window": 0.2},
|
amine@233
|
698 [(10, 34), (36, 76)],
|
amine@405
|
699 ), # stereo_uc_mix_aw_0_2_max_silence_0_6
|
amine@400
|
700 (
|
amine@233
|
701 0.2,
|
amine@233
|
702 5,
|
amine@233
|
703 0,
|
amine@233
|
704 2,
|
amine@233
|
705 {"uc": "mix", "analysis_window": 0.3},
|
amine@233
|
706 [(9, 24), (27, 30), (36, 76)],
|
amine@405
|
707 ), # stereo_uc_mix_aw_0_3_max_silence_0
|
amine@400
|
708 (
|
amine@233
|
709 0.4,
|
amine@233
|
710 5,
|
amine@233
|
711 0,
|
amine@233
|
712 2,
|
amine@233
|
713 {"uc": "mix", "analysis_window": 0.3},
|
amine@233
|
714 [(9, 24), (36, 76)],
|
amine@405
|
715 ), # stereo_uc_mix_aw_0_3_max_silence_0_min_dur_0_3
|
amine@400
|
716 (
|
amine@233
|
717 0.2,
|
amine@233
|
718 5,
|
amine@233
|
719 0.6,
|
amine@233
|
720 2,
|
amine@233
|
721 {"uc": "mix", "analysis_window": 0.3},
|
amine@233
|
722 [(9, 57), (57, 76)],
|
amine@405
|
723 ), # stereo_uc_mix_aw_0_3_max_silence_0_6
|
amine@400
|
724 (
|
amine@233
|
725 0.2,
|
amine@233
|
726 5.1,
|
amine@233
|
727 0.6,
|
amine@233
|
728 2,
|
amine@233
|
729 {"uc": "mix", "analysis_window": 0.3},
|
amine@233
|
730 [(9, 60), (60, 76)],
|
amine@405
|
731 ), # stereo_uc_mix_aw_0_3_max_silence_0_6_max_dur_5_1
|
amine@400
|
732 (
|
amine@233
|
733 0.2,
|
amine@233
|
734 5.2,
|
amine@233
|
735 0.6,
|
amine@233
|
736 2,
|
amine@233
|
737 {"uc": "mix", "analysis_window": 0.3},
|
amine@233
|
738 [(9, 60), (60, 76)],
|
amine@405
|
739 ), # stereo_uc_mix_aw_0_3_max_silence_0_6_max_dur_5_2
|
amine@400
|
740 (
|
amine@233
|
741 0.2,
|
amine@233
|
742 5.3,
|
amine@233
|
743 0.6,
|
amine@233
|
744 2,
|
amine@233
|
745 {"uc": "mix", "analysis_window": 0.3},
|
amine@233
|
746 [(9, 60), (60, 76)],
|
amine@405
|
747 ), # stereo_uc_mix_aw_0_3_max_silence_0_6_max_dur_5_3
|
amine@400
|
748 (
|
amine@233
|
749 0.2,
|
amine@233
|
750 5.4,
|
amine@233
|
751 0.6,
|
amine@233
|
752 2,
|
amine@233
|
753 {"uc": "mix", "analysis_window": 0.3},
|
amine@233
|
754 [(9, 63), (63, 76)],
|
amine@405
|
755 ), # stereo_uc_mix_aw_0_3_max_silence_0_6_max_dur_5_4
|
amine@400
|
756 (
|
amine@233
|
757 0.2,
|
amine@233
|
758 5,
|
amine@233
|
759 0,
|
amine@233
|
760 2,
|
amine@233
|
761 {"uc": "mix", "analysis_window": 0.4},
|
amine@233
|
762 [(16, 24), (36, 76)],
|
amine@405
|
763 ), # stereo_uc_mix_aw_0_4_max_silence_0
|
amine@400
|
764 (
|
amine@233
|
765 0.2,
|
amine@233
|
766 5,
|
amine@233
|
767 0.3,
|
amine@233
|
768 2,
|
amine@233
|
769 {"uc": "mix", "analysis_window": 0.4},
|
amine@233
|
770 [(16, 24), (36, 76)],
|
amine@405
|
771 ), # stereo_uc_mix_aw_0_4_max_silence_0_3
|
amine@400
|
772 (
|
amine@233
|
773 0.2,
|
amine@233
|
774 5,
|
amine@233
|
775 0.4,
|
amine@233
|
776 2,
|
amine@233
|
777 {"uc": "mix", "analysis_window": 0.4},
|
amine@233
|
778 [(16, 28), (36, 76)],
|
amine@405
|
779 ), # stereo_uc_mix_aw_0_4_max_silence_0_4
|
amine@400
|
780 ],
|
amine@400
|
781 ids=[
|
amine@400
|
782 "mono_aw_0_2_max_silence_0_2",
|
amine@400
|
783 "mono_aw_0_2_max_silence_0_3",
|
amine@400
|
784 "mono_aw_0_2_max_silence_0_4",
|
amine@400
|
785 "mono_aw_0_2_max_silence_0",
|
amine@400
|
786 "mono_aw_0_2",
|
amine@400
|
787 "mono_aw_0_3_max_silence_0",
|
amine@400
|
788 "mono_aw_0_3_max_silence_0_3",
|
amine@400
|
789 "mono_aw_0_3_max_silence_0_5",
|
amine@400
|
790 "mono_aw_0_3_max_silence_0_6",
|
amine@400
|
791 "mono_aw_0_4_max_silence_0",
|
amine@400
|
792 "mono_aw_0_4_max_silence_0_3",
|
amine@400
|
793 "mono_aw_0_4_max_silence_0_4",
|
amine@400
|
794 "stereo_uc_None_analysis_window_0_2",
|
amine@400
|
795 "stereo_uc_any_analysis_window_0_2",
|
amine@400
|
796 "stereo_use_channel_None_aw_0_3_max_silence_0_2",
|
amine@400
|
797 "stereo_use_channel_any_aw_0_3_max_silence_0_3",
|
amine@400
|
798 "stereo_use_channel_None_aw_0_4_max_silence_0_2",
|
amine@400
|
799 "stereo_use_channel_any_aw_0_3_max_silence_0_4",
|
amine@400
|
800 "stereo_uc_0_analysis_window_0_2",
|
amine@400
|
801 "stereo_uc_1_analysis_window_0_2",
|
amine@400
|
802 "stereo_uc_mix_aw_0_1_max_silence_0",
|
amine@400
|
803 "stereo_uc_mix_aw_0_1_max_silence_0_1",
|
amine@400
|
804 "stereo_uc_mix_aw_0_1_max_silence_0_2",
|
amine@400
|
805 "stereo_uc_mix_aw_0_1_max_silence_0_3",
|
amine@400
|
806 "stereo_uc_avg_aw_0_2_max_silence_0_min_dur_0_3",
|
amine@400
|
807 "stereo_uc_average_aw_0_2_max_silence_0_min_dur_0_41",
|
amine@400
|
808 "stereo_uc_mix_aw_0_2_max_silence_0_1",
|
amine@400
|
809 "stereo_uc_mix_aw_0_2_max_silence_0_2",
|
amine@400
|
810 "stereo_uc_mix_aw_0_2_max_silence_0_4",
|
amine@400
|
811 "stereo_uc_mix_aw_0_2_max_silence_0_5",
|
amine@400
|
812 "stereo_uc_mix_aw_0_2_max_silence_0_6",
|
amine@400
|
813 "stereo_uc_mix_aw_0_3_max_silence_0",
|
amine@400
|
814 "stereo_uc_mix_aw_0_3_max_silence_0_min_dur_0_3",
|
amine@400
|
815 "stereo_uc_mix_aw_0_3_max_silence_0_6",
|
amine@400
|
816 "stereo_uc_mix_aw_0_3_max_silence_0_6_max_dur_5_1",
|
amine@400
|
817 "stereo_uc_mix_aw_0_3_max_silence_0_6_max_dur_5_2",
|
amine@400
|
818 "stereo_uc_mix_aw_0_3_max_silence_0_6_max_dur_5_3",
|
amine@400
|
819 "stereo_uc_mix_aw_0_3_max_silence_0_6_max_dur_5_4",
|
amine@400
|
820 "stereo_uc_mix_aw_0_4_max_silence_0",
|
amine@400
|
821 "stereo_uc_mix_aw_0_4_max_silence_0_3",
|
amine@400
|
822 "stereo_uc_mix_aw_0_4_max_silence_0_4",
|
amine@400
|
823 ],
|
amine@400
|
824 )
|
amine@400
|
825 def test_split_analysis_window(
|
amine@400
|
826 min_dur, max_dur, max_silence, channels, kwargs, expected
|
amine@400
|
827 ):
|
amine@400
|
828
|
amine@400
|
829 mono_or_stereo = "mono" if channels == 1 else "stereo"
|
amine@400
|
830 filename = "tests/data/test_split_10HZ_{}.raw".format(mono_or_stereo)
|
amine@400
|
831 with open(filename, "rb") as fp:
|
amine@400
|
832 data = fp.read()
|
amine@400
|
833
|
amine@400
|
834 regions = split(
|
amine@400
|
835 data,
|
amine@400
|
836 min_dur=min_dur,
|
amine@400
|
837 max_dur=max_dur,
|
amine@400
|
838 max_silence=max_silence,
|
amine@400
|
839 drop_trailing_silence=False,
|
amine@400
|
840 strict_min_dur=False,
|
amine@400
|
841 sr=10,
|
amine@400
|
842 sw=2,
|
amine@400
|
843 ch=channels,
|
amine@400
|
844 eth=49.99,
|
amine@400
|
845 **kwargs
|
amine@220
|
846 )
|
amine@220
|
847
|
amine@400
|
848 region = AudioRegion(data, 10, 2, channels)
|
amine@400
|
849 regions_ar = region.split(
|
amine@400
|
850 min_dur=min_dur,
|
amine@400
|
851 max_dur=max_dur,
|
amine@400
|
852 max_silence=max_silence,
|
amine@400
|
853 drop_trailing_silence=False,
|
amine@400
|
854 strict_min_dur=False,
|
amine@400
|
855 eth=49.99,
|
amine@400
|
856 **kwargs
|
amine@400
|
857 )
|
amine@220
|
858
|
amine@400
|
859 regions = list(regions)
|
amine@400
|
860 regions_ar = list(regions_ar)
|
amine@400
|
861 err_msg = "Wrong number of regions after split, expected: "
|
amine@400
|
862 err_msg += "{}, found: {}".format(len(expected), len(regions))
|
amine@400
|
863 assert len(regions) == len(expected), err_msg
|
amine@400
|
864 err_msg = "Wrong number of regions after AudioRegion.split, expected: "
|
amine@400
|
865 err_msg += "{}, found: {}".format(len(expected), len(regions_ar))
|
amine@400
|
866 assert len(regions_ar) == len(expected), err_msg
|
amine@255
|
867
|
amine@400
|
868 sample_width = 2
|
amine@400
|
869 sample_size_bytes = sample_width * channels
|
amine@405
|
870 for reg, reg_ar, exp in zip(regions, regions_ar, expected, strict=True):
|
amine@400
|
871 onset, offset = exp
|
amine@400
|
872 exp_data = data[onset * sample_size_bytes : offset * sample_size_bytes]
|
amine@400
|
873 assert bytes(reg) == exp_data
|
amine@400
|
874 assert reg == reg_ar
|
amine@255
|
875
|
amine@255
|
876
|
amine@400
|
877 def test_split_custom_validator():
|
amine@400
|
878 filename = "tests/data/test_split_10HZ_mono.raw"
|
amine@400
|
879 with open(filename, "rb") as fp:
|
amine@400
|
880 data = fp.read()
|
amine@299
|
881
|
amine@400
|
882 regions = split(
|
amine@400
|
883 data,
|
amine@400
|
884 min_dur=0.2,
|
amine@400
|
885 max_dur=5,
|
amine@400
|
886 max_silence=0.2,
|
amine@400
|
887 drop_trailing_silence=False,
|
amine@400
|
888 strict_min_dur=False,
|
amine@400
|
889 sr=10,
|
amine@400
|
890 sw=2,
|
amine@400
|
891 ch=1,
|
amine@400
|
892 analysis_window=0.1,
|
amine@405
|
893 validator=lambda x: to_array(x, sample_width=2, channels=1)[0] >= 320,
|
amine@400
|
894 )
|
amine@299
|
895
|
amine@400
|
896 region = AudioRegion(data, 10, 2, 1)
|
amine@400
|
897 regions_ar = region.split(
|
amine@400
|
898 min_dur=0.2,
|
amine@400
|
899 max_dur=5,
|
amine@400
|
900 max_silence=0.2,
|
amine@400
|
901 drop_trailing_silence=False,
|
amine@400
|
902 strict_min_dur=False,
|
amine@400
|
903 analysis_window=0.1,
|
amine@405
|
904 validator=lambda x: to_array(x, sample_width=2, channels=1)[0] >= 320,
|
amine@400
|
905 )
|
amine@299
|
906
|
amine@400
|
907 expected = [(2, 16), (17, 31), (34, 76)]
|
amine@400
|
908 regions = list(regions)
|
amine@400
|
909 regions_ar = list(regions_ar)
|
amine@400
|
910 err_msg = "Wrong number of regions after split, expected: "
|
amine@400
|
911 err_msg += "{}, found: {}".format(len(expected), len(regions))
|
amine@400
|
912 assert len(regions) == len(expected), err_msg
|
amine@400
|
913 err_msg = "Wrong number of regions after AudioRegion.split, expected: "
|
amine@400
|
914 err_msg += "{}, found: {}".format(len(expected), len(regions_ar))
|
amine@400
|
915 assert len(regions_ar) == len(expected), err_msg
|
amine@299
|
916
|
amine@400
|
917 sample_size_bytes = 2
|
amine@405
|
918 for reg, reg_ar, exp in zip(regions, regions_ar, expected, strict=True):
|
amine@400
|
919 onset, offset = exp
|
amine@400
|
920 exp_data = data[onset * sample_size_bytes : offset * sample_size_bytes]
|
amine@400
|
921 assert bytes(reg) == exp_data
|
amine@400
|
922 assert reg == reg_ar
|
amine@299
|
923
|
amine@220
|
924
|
amine@400
|
925 @pytest.mark.parametrize(
|
amine@400
|
926 "input, kwargs",
|
amine@400
|
927 [
|
amine@400
|
928 (
|
amine@212
|
929 "tests/data/test_split_10HZ_stereo.raw",
|
amine@212
|
930 {"audio_format": "raw", "sr": 10, "sw": 2, "ch": 2},
|
amine@405
|
931 ), # filename_audio_format
|
amine@400
|
932 (
|
amine@212
|
933 "tests/data/test_split_10HZ_stereo.raw",
|
amine@212
|
934 {"fmt": "raw", "sr": 10, "sw": 2, "ch": 2},
|
amine@405
|
935 ), # filename_audio_format_short_name
|
amine@405
|
936 (
|
amine@405
|
937 "tests/data/test_split_10HZ_stereo.raw",
|
amine@405
|
938 {"sr": 10, "sw": 2, "ch": 2},
|
amine@405
|
939 ), # filename_no_audio_format
|
amine@400
|
940 (
|
amine@212
|
941 "tests/data/test_split_10HZ_stereo.raw",
|
amine@212
|
942 {"sampling_rate": 10, "sample_width": 2, "channels": 2},
|
amine@405
|
943 ), # filename_no_long_audio_params
|
amine@400
|
944 (
|
amine@212
|
945 open("tests/data/test_split_10HZ_stereo.raw", "rb").read(),
|
amine@212
|
946 {"sr": 10, "sw": 2, "ch": 2},
|
amine@405
|
947 ), # bytes_
|
amine@400
|
948 (
|
amine@403
|
949 AudioReader(
|
amine@212
|
950 "tests/data/test_split_10HZ_stereo.raw",
|
amine@212
|
951 sr=10,
|
amine@212
|
952 sw=2,
|
amine@212
|
953 ch=2,
|
amine@212
|
954 block_dur=0.1,
|
amine@212
|
955 ),
|
amine@212
|
956 {},
|
amine@405
|
957 ), # audio_reader
|
amine@400
|
958 (
|
amine@212
|
959 AudioRegion(
|
amine@299
|
960 open("tests/data/test_split_10HZ_stereo.raw", "rb").read(),
|
amine@299
|
961 10,
|
amine@299
|
962 2,
|
amine@299
|
963 2,
|
amine@212
|
964 ),
|
amine@212
|
965 {},
|
amine@405
|
966 ), # audio_region
|
amine@400
|
967 (
|
amine@212
|
968 get_audio_source(
|
amine@212
|
969 "tests/data/test_split_10HZ_stereo.raw", sr=10, sw=2, ch=2
|
amine@212
|
970 ),
|
amine@212
|
971 {},
|
amine@405
|
972 ), # audio_source
|
amine@400
|
973 ],
|
amine@400
|
974 ids=[
|
amine@400
|
975 "filename_audio_format",
|
amine@400
|
976 "filename_audio_format_short_name",
|
amine@400
|
977 "filename_no_audio_format",
|
amine@400
|
978 "filename_no_long_audio_params",
|
amine@400
|
979 "bytes_",
|
amine@400
|
980 "audio_reader",
|
amine@400
|
981 "audio_region",
|
amine@400
|
982 "audio_source",
|
amine@400
|
983 ],
|
amine@400
|
984 )
|
amine@400
|
985 def test_split_input_type(input, kwargs):
|
amine@400
|
986
|
amine@400
|
987 with open("tests/data/test_split_10HZ_stereo.raw", "rb") as fp:
|
amine@400
|
988 data = fp.read()
|
amine@400
|
989
|
amine@400
|
990 regions = split(
|
amine@400
|
991 input,
|
amine@400
|
992 min_dur=0.2,
|
amine@400
|
993 max_dur=5,
|
amine@400
|
994 max_silence=0.2,
|
amine@400
|
995 drop_trailing_silence=False,
|
amine@400
|
996 strict_min_dur=False,
|
amine@400
|
997 analysis_window=0.1,
|
amine@400
|
998 **kwargs
|
amine@212
|
999 )
|
amine@400
|
1000 regions = list(regions)
|
amine@400
|
1001 expected = [(2, 32), (34, 76)]
|
amine@400
|
1002 sample_width = 2
|
amine@400
|
1003 err_msg = "Wrong number of regions after split, expected: "
|
amine@400
|
1004 err_msg += "{}, found: {}".format(expected, regions)
|
amine@400
|
1005 assert len(regions) == len(expected), err_msg
|
amine@405
|
1006 for reg, exp in zip(regions, expected, strict=True):
|
amine@400
|
1007 onset, offset = exp
|
amine@400
|
1008 exp_data = data[onset * sample_width * 2 : offset * sample_width * 2]
|
amine@400
|
1009 assert bytes(reg) == exp_data
|
amine@212
|
1010
|
amine@212
|
1011
|
amine@400
|
1012 @pytest.mark.parametrize(
|
amine@400
|
1013 "min_dur, max_dur, analysis_window",
|
amine@400
|
1014 [
|
amine@400
|
1015 (0.5, 0.4, 0.1),
|
amine@400
|
1016 (0.44, 0.49, 0.1),
|
amine@400
|
1017 ],
|
amine@400
|
1018 ids=[
|
amine@400
|
1019 "min_dur_greater_than_max_dur",
|
amine@400
|
1020 "durations_OK_but_wrong_number_of_analysis_windows",
|
amine@400
|
1021 ],
|
amine@400
|
1022 )
|
amine@400
|
1023 def test_split_wrong_min_max_dur(min_dur, max_dur, analysis_window):
|
amine@400
|
1024
|
amine@400
|
1025 with pytest.raises(ValueError) as val_err:
|
amine@400
|
1026 split(
|
amine@400
|
1027 b"0" * 16,
|
amine@400
|
1028 min_dur=min_dur,
|
amine@400
|
1029 max_dur=max_dur,
|
amine@400
|
1030 max_silence=0.2,
|
amine@400
|
1031 sr=16000,
|
amine@400
|
1032 sw=1,
|
amine@400
|
1033 ch=1,
|
amine@400
|
1034 analysis_window=analysis_window,
|
amine@400
|
1035 )
|
amine@400
|
1036
|
amine@400
|
1037 err_msg = "'min_dur' ({0} sec.) results in {1} analysis "
|
amine@400
|
1038 err_msg += "window(s) ({1} == ceil({0} / {2})) which is "
|
amine@400
|
1039 err_msg += "higher than the number of analysis window(s) for "
|
amine@400
|
1040 err_msg += "'max_dur' ({3} == floor({4} / {2}))"
|
amine@400
|
1041
|
amine@400
|
1042 err_msg = err_msg.format(
|
amine@400
|
1043 min_dur,
|
amine@400
|
1044 math.ceil(min_dur / analysis_window),
|
amine@400
|
1045 analysis_window,
|
amine@400
|
1046 math.floor(max_dur / analysis_window),
|
amine@400
|
1047 max_dur,
|
amine@400
|
1048 )
|
amine@400
|
1049 assert err_msg == str(val_err.value)
|
amine@400
|
1050
|
amine@400
|
1051
|
amine@400
|
1052 @pytest.mark.parametrize(
|
amine@400
|
1053 "max_silence, max_dur, analysis_window",
|
amine@400
|
1054 [
|
amine@405
|
1055 (0.5, 0.5, 0.1), # max_silence_equals_max_dur
|
amine@405
|
1056 (0.5, 0.4, 0.1), # max_silence_greater_than_max_dur
|
amine@405
|
1057 (0.44, 0.49, 0.1), # durations_OK_but_wrong_number_of_analysis_windows
|
amine@400
|
1058 ],
|
amine@400
|
1059 ids=[
|
amine@400
|
1060 "max_silence_equals_max_dur",
|
amine@400
|
1061 "max_silence_greater_than_max_dur",
|
amine@400
|
1062 "durations_OK_but_wrong_number_of_analysis_windows",
|
amine@400
|
1063 ],
|
amine@400
|
1064 )
|
amine@400
|
1065 def test_split_wrong_max_silence_max_dur(max_silence, max_dur, analysis_window):
|
amine@400
|
1066
|
amine@400
|
1067 with pytest.raises(ValueError) as val_err:
|
amine@400
|
1068 split(
|
amine@400
|
1069 b"0" * 16,
|
amine@400
|
1070 min_dur=0.2,
|
amine@400
|
1071 max_dur=max_dur,
|
amine@400
|
1072 max_silence=max_silence,
|
amine@400
|
1073 sr=16000,
|
amine@400
|
1074 sw=1,
|
amine@400
|
1075 ch=1,
|
amine@400
|
1076 analysis_window=analysis_window,
|
amine@400
|
1077 )
|
amine@400
|
1078
|
amine@400
|
1079 err_msg = "'max_silence' ({0} sec.) results in {1} analysis "
|
amine@400
|
1080 err_msg += "window(s) ({1} == floor({0} / {2})) which is "
|
amine@400
|
1081 err_msg += "higher or equal to the number of analysis window(s) for "
|
amine@400
|
1082 err_msg += "'max_dur' ({3} == floor({4} / {2}))"
|
amine@400
|
1083
|
amine@400
|
1084 err_msg = err_msg.format(
|
amine@400
|
1085 max_silence,
|
amine@400
|
1086 math.floor(max_silence / analysis_window),
|
amine@400
|
1087 analysis_window,
|
amine@400
|
1088 math.floor(max_dur / analysis_window),
|
amine@400
|
1089 max_dur,
|
amine@400
|
1090 )
|
amine@400
|
1091 assert err_msg == str(val_err.value)
|
amine@400
|
1092
|
amine@400
|
1093
|
amine@400
|
1094 @pytest.mark.parametrize(
|
amine@400
|
1095 "wrong_param",
|
amine@400
|
1096 [
|
amine@405
|
1097 {"min_dur": -1}, # negative_min_dur
|
amine@405
|
1098 {"min_dur": 0}, # zero_min_dur
|
amine@405
|
1099 {"max_dur": -1}, # negative_max_dur
|
amine@405
|
1100 {"max_dur": 0}, # zero_max_dur
|
amine@405
|
1101 {"max_silence": -1}, # negative_max_silence
|
amine@405
|
1102 {"analysis_window": 0}, # zero_analysis_window
|
amine@405
|
1103 {"analysis_window": -1}, # negative_analysis_window
|
amine@400
|
1104 ],
|
amine@400
|
1105 ids=[
|
amine@400
|
1106 "negative_min_dur",
|
amine@400
|
1107 "zero_min_dur",
|
amine@400
|
1108 "negative_max_dur",
|
amine@400
|
1109 "zero_max_dur",
|
amine@400
|
1110 "negative_max_silence",
|
amine@400
|
1111 "zero_analysis_window",
|
amine@400
|
1112 "negative_analysis_window",
|
amine@400
|
1113 ],
|
amine@400
|
1114 )
|
amine@400
|
1115 def test_split_negative_temporal_params(wrong_param):
|
amine@400
|
1116
|
amine@400
|
1117 params = {
|
amine@400
|
1118 "min_dur": 0.2,
|
amine@400
|
1119 "max_dur": 0.5,
|
amine@400
|
1120 "max_silence": 0.1,
|
amine@400
|
1121 "analysis_window": 0.1,
|
amine@400
|
1122 }
|
amine@400
|
1123 params.update(wrong_param)
|
amine@400
|
1124 with pytest.raises(ValueError) as val_err:
|
amine@400
|
1125 split(None, **params)
|
amine@400
|
1126
|
amine@400
|
1127 name = set(wrong_param).pop()
|
amine@400
|
1128 value = wrong_param[name]
|
amine@400
|
1129 err_msg = "'{}' ({}) must be >{} 0".format(
|
amine@400
|
1130 name, value, "=" if name == "max_silence" else ""
|
amine@400
|
1131 )
|
amine@400
|
1132 assert err_msg == str(val_err.value)
|
amine@400
|
1133
|
amine@400
|
1134
|
amine@400
|
1135 def test_split_too_small_analysis_window():
|
amine@400
|
1136 with pytest.raises(ValueError) as val_err:
|
amine@400
|
1137 split(b"", sr=10, sw=1, ch=1, analysis_window=0.09)
|
amine@403
|
1138 err_msg = "Too small 'analysis_window' (0.09) for sampling rate (10)."
|
amine@403
|
1139 err_msg += " Analysis window should at least be 1/10 to cover one "
|
amine@403
|
1140 err_msg += "data sample"
|
amine@400
|
1141 assert err_msg == str(val_err.value)
|
amine@400
|
1142
|
amine@400
|
1143
|
amine@400
|
1144 def test_split_and_plot():
|
amine@400
|
1145
|
amine@400
|
1146 with open("tests/data/test_split_10HZ_mono.raw", "rb") as fp:
|
amine@400
|
1147 data = fp.read()
|
amine@400
|
1148
|
amine@400
|
1149 region = AudioRegion(data, 10, 2, 1)
|
amine@405
|
1150 with patch("auditok.core.plot") as patch_fn:
|
amine@400
|
1151 regions = region.split_and_plot(
|
amine@212
|
1152 min_dur=0.2,
|
amine@212
|
1153 max_dur=5,
|
amine@212
|
1154 max_silence=0.2,
|
amine@212
|
1155 drop_trailing_silence=False,
|
amine@212
|
1156 strict_min_dur=False,
|
amine@212
|
1157 analysis_window=0.1,
|
amine@400
|
1158 sr=10,
|
amine@400
|
1159 sw=2,
|
amine@400
|
1160 ch=1,
|
amine@400
|
1161 eth=50,
|
amine@212
|
1162 )
|
amine@400
|
1163 assert patch_fn.called
|
amine@400
|
1164 expected = [(2, 16), (17, 31), (34, 76)]
|
amine@400
|
1165 sample_width = 2
|
amine@400
|
1166 expected_regions = []
|
amine@400
|
1167 for onset, offset in expected:
|
amine@400
|
1168 onset *= sample_width
|
amine@400
|
1169 offset *= sample_width
|
amine@400
|
1170 expected_regions.append(AudioRegion(data[onset:offset], 10, 2, 1))
|
amine@400
|
1171 assert regions == expected_regions
|
amine@211
|
1172
|
amine@223
|
1173
|
amine@400
|
1174 def test_split_exception():
|
amine@400
|
1175 with open("tests/data/test_split_10HZ_mono.raw", "rb") as fp:
|
amine@400
|
1176 data = fp.read()
|
amine@400
|
1177 region = AudioRegion(data, 10, 2, 1)
|
amine@223
|
1178
|
amine@400
|
1179 with pytest.raises(RuntimeWarning):
|
amine@400
|
1180 # max_read is not accepted when calling AudioRegion.split
|
amine@400
|
1181 region.split(max_read=2)
|
amine@223
|
1182
|
amine@223
|
1183
|
amine@400
|
1184 @pytest.mark.parametrize(
|
amine@405
|
1185 (
|
amine@405
|
1186 "data, start, sampling_rate, sample_width, channels, expected_end, "
|
amine@405
|
1187 + "expected_duration_s, expected_duration_ms"
|
amine@405
|
1188 ),
|
amine@400
|
1189 [
|
amine@405
|
1190 (b"\0" * 8000, 0, 8000, 1, 1, 1, 1, 1000), # simple
|
amine@405
|
1191 (
|
amine@405
|
1192 b"\0" * 7992,
|
amine@405
|
1193 0,
|
amine@405
|
1194 8000,
|
amine@405
|
1195 1,
|
amine@405
|
1196 1,
|
amine@405
|
1197 0.999,
|
amine@405
|
1198 0.999,
|
amine@405
|
1199 999,
|
amine@405
|
1200 ), # one_ms_less_than_1_sec
|
amine@405
|
1201 (
|
amine@405
|
1202 b"\0" * 7994,
|
amine@405
|
1203 0,
|
amine@405
|
1204 8000,
|
amine@405
|
1205 1,
|
amine@405
|
1206 1,
|
amine@405
|
1207 0.99925,
|
amine@405
|
1208 0.99925,
|
amine@405
|
1209 999,
|
amine@405
|
1210 ), # tree_quarter_ms_less_than_1_sec
|
amine@405
|
1211 (
|
amine@405
|
1212 b"\0" * 7996,
|
amine@405
|
1213 0,
|
amine@405
|
1214 8000,
|
amine@405
|
1215 1,
|
amine@405
|
1216 1,
|
amine@405
|
1217 0.9995,
|
amine@405
|
1218 0.9995,
|
amine@405
|
1219 1000,
|
amine@405
|
1220 ), # half_ms_less_than_1_sec
|
amine@405
|
1221 (
|
amine@405
|
1222 b"\0" * 7998,
|
amine@405
|
1223 0,
|
amine@405
|
1224 8000,
|
amine@405
|
1225 1,
|
amine@405
|
1226 1,
|
amine@405
|
1227 0.99975,
|
amine@405
|
1228 0.99975,
|
amine@405
|
1229 1000,
|
amine@405
|
1230 ), # quarter_ms_less_than_1_sec
|
amine@405
|
1231 (b"\0" * 8000 * 2, 0, 8000, 2, 1, 1, 1, 1000), # simple_sample_width_2
|
amine@405
|
1232 (b"\0" * 8000 * 2, 0, 8000, 1, 2, 1, 1, 1000), # simple_stereo
|
amine@405
|
1233 (b"\0" * 8000 * 5, 0, 8000, 1, 5, 1, 1, 1000), # simple_multichannel
|
amine@405
|
1234 (
|
amine@405
|
1235 b"\0" * 8000 * 2 * 5,
|
amine@405
|
1236 0,
|
amine@405
|
1237 8000,
|
amine@405
|
1238 2,
|
amine@405
|
1239 5,
|
amine@405
|
1240 1,
|
amine@405
|
1241 1,
|
amine@405
|
1242 1000,
|
amine@405
|
1243 ), # simple_sample_width_2_multichannel
|
amine@405
|
1244 (
|
amine@405
|
1245 b"\0" * 7992 * 2 * 5,
|
amine@405
|
1246 0,
|
amine@405
|
1247 8000,
|
amine@405
|
1248 2,
|
amine@405
|
1249 5,
|
amine@405
|
1250 0.999,
|
amine@405
|
1251 0.999,
|
amine@405
|
1252 999,
|
amine@405
|
1253 ), # one_ms_less_than_1s_sw_2_multichannel
|
amine@405
|
1254 (
|
amine@405
|
1255 b"\0" * 7994 * 2 * 5,
|
amine@405
|
1256 0,
|
amine@405
|
1257 8000,
|
amine@405
|
1258 2,
|
amine@405
|
1259 5,
|
amine@405
|
1260 0.99925,
|
amine@405
|
1261 0.99925,
|
amine@405
|
1262 999,
|
amine@405
|
1263 ), # tree_qrt_ms_lt_1_s_sw_2_multichannel
|
amine@405
|
1264 (
|
amine@405
|
1265 b"\0" * 7996 * 2 * 5,
|
amine@405
|
1266 0,
|
amine@405
|
1267 8000,
|
amine@405
|
1268 2,
|
amine@405
|
1269 5,
|
amine@405
|
1270 0.9995,
|
amine@405
|
1271 0.9995,
|
amine@405
|
1272 1000,
|
amine@405
|
1273 ), # half_ms_lt_1s_sw_2_multichannel
|
amine@405
|
1274 (
|
amine@405
|
1275 b"\0" * 7998 * 2 * 5,
|
amine@405
|
1276 0,
|
amine@405
|
1277 8000,
|
amine@405
|
1278 2,
|
amine@405
|
1279 5,
|
amine@405
|
1280 0.99975,
|
amine@405
|
1281 0.99975,
|
amine@405
|
1282 1000,
|
amine@405
|
1283 ), # quarter_ms_lt_1s_sw_2_multichannel
|
amine@405
|
1284 (
|
amine@405
|
1285 b"\0" * int(8000 * 1.33),
|
amine@405
|
1286 2.7,
|
amine@405
|
1287 8000,
|
amine@405
|
1288 1,
|
amine@405
|
1289 1,
|
amine@405
|
1290 4.03,
|
amine@405
|
1291 1.33,
|
amine@405
|
1292 1330,
|
amine@405
|
1293 ), # arbitrary_length_1
|
amine@405
|
1294 (
|
amine@405
|
1295 b"\0" * int(8000 * 0.476),
|
amine@405
|
1296 11.568,
|
amine@405
|
1297 8000,
|
amine@405
|
1298 1,
|
amine@405
|
1299 1,
|
amine@405
|
1300 12.044,
|
amine@405
|
1301 0.476,
|
amine@405
|
1302 476,
|
amine@405
|
1303 ), # arbitrary_length_2
|
amine@400
|
1304 (
|
amine@86
|
1305 b"\0" * int(8000 * 1.711) * 2 * 3,
|
amine@86
|
1306 9.415,
|
amine@86
|
1307 8000,
|
amine@86
|
1308 2,
|
amine@86
|
1309 3,
|
amine@86
|
1310 11.126,
|
amine@86
|
1311 1.711,
|
amine@86
|
1312 1711,
|
amine@405
|
1313 ), # arbitrary_length_sw_2_multichannel
|
amine@400
|
1314 (
|
amine@86
|
1315 b"\0" * int(3172 * 1.318),
|
amine@86
|
1316 17.236,
|
amine@86
|
1317 3172,
|
amine@86
|
1318 1,
|
amine@86
|
1319 1,
|
amine@86
|
1320 17.236 + int(3172 * 1.318) / 3172,
|
amine@86
|
1321 int(3172 * 1.318) / 3172,
|
amine@86
|
1322 1318,
|
amine@405
|
1323 ), # arbitrary_sampling_rate
|
amine@400
|
1324 (
|
amine@86
|
1325 b"\0" * int(11317 * 0.716) * 2 * 3,
|
amine@86
|
1326 18.811,
|
amine@86
|
1327 11317,
|
amine@86
|
1328 2,
|
amine@86
|
1329 3,
|
amine@86
|
1330 18.811 + int(11317 * 0.716) / 11317,
|
amine@86
|
1331 int(11317 * 0.716) / 11317,
|
amine@86
|
1332 716,
|
amine@405
|
1333 ), # arbitrary_sr_sw_2_multichannel
|
amine@400
|
1334 ],
|
amine@400
|
1335 ids=[
|
amine@400
|
1336 "simple",
|
amine@400
|
1337 "one_ms_less_than_1_sec",
|
amine@400
|
1338 "tree_quarter_ms_less_than_1_sec",
|
amine@400
|
1339 "half_ms_less_than_1_sec",
|
amine@400
|
1340 "quarter_ms_less_than_1_sec",
|
amine@400
|
1341 "simple_sample_width_2",
|
amine@400
|
1342 "simple_stereo",
|
amine@400
|
1343 "simple_multichannel",
|
amine@400
|
1344 "simple_sample_width_2_multichannel",
|
amine@400
|
1345 "one_ms_less_than_1s_sw_2_multichannel",
|
amine@400
|
1346 "tree_qrt_ms_lt_1_s_sw_2_multichannel",
|
amine@400
|
1347 "half_ms_lt_1s_sw_2_multichannel",
|
amine@400
|
1348 "quarter_ms_lt_1s_sw_2_multichannel",
|
amine@400
|
1349 "arbitrary_length_1",
|
amine@400
|
1350 "arbitrary_length_2",
|
amine@400
|
1351 "arbitrary_length_sw_2_multichannel",
|
amine@405
|
1352 "arbitrary_sampling_rate",
|
amine@400
|
1353 "arbitrary_sr_sw_2_multichannel",
|
amine@400
|
1354 ],
|
amine@400
|
1355 )
|
amine@400
|
1356 def test_creation(
|
amine@400
|
1357 data,
|
amine@400
|
1358 start,
|
amine@400
|
1359 sampling_rate,
|
amine@400
|
1360 sample_width,
|
amine@400
|
1361 channels,
|
amine@400
|
1362 expected_end,
|
amine@400
|
1363 expected_duration_s,
|
amine@400
|
1364 expected_duration_ms,
|
amine@400
|
1365 ):
|
amine@411
|
1366 region = AudioRegion(data, sampling_rate, sample_width, channels, start)
|
amine@400
|
1367 assert region.sampling_rate == sampling_rate
|
amine@400
|
1368 assert region.sr == sampling_rate
|
amine@400
|
1369 assert region.sample_width == sample_width
|
amine@400
|
1370 assert region.sw == sample_width
|
amine@400
|
1371 assert region.channels == channels
|
amine@400
|
1372 assert region.ch == channels
|
amine@400
|
1373 assert region.meta.start == start
|
amine@400
|
1374 assert region.meta.end == expected_end
|
amine@400
|
1375 assert region.duration == expected_duration_s
|
amine@400
|
1376 assert len(region.ms) == expected_duration_ms
|
amine@400
|
1377 assert bytes(region) == data
|
amine@400
|
1378
|
amine@400
|
1379
|
amine@400
|
1380 def test_creation_invalid_data_exception():
|
amine@400
|
1381 with pytest.raises(AudioParameterError) as audio_param_err:
|
amine@400
|
1382 _ = AudioRegion(
|
amine@400
|
1383 data=b"ABCDEFGHI", sampling_rate=8, sample_width=2, channels=1
|
amine@400
|
1384 )
|
amine@400
|
1385 assert str(audio_param_err.value) == (
|
amine@400
|
1386 "The length of audio data must be an integer "
|
amine@400
|
1387 "multiple of `sample_width * channels`"
|
amine@86
|
1388 )
|
amine@88
|
1389
|
amine@97
|
1390
|
amine@400
|
1391 @pytest.mark.parametrize(
|
amine@400
|
1392 "skip, max_read, channels",
|
amine@400
|
1393 [
|
amine@405
|
1394 (0, -1, 1), # no_skip_read_all
|
amine@405
|
1395 (0, -1, 2), # no_skip_read_all_stereo
|
amine@405
|
1396 (2, -1, 1), # skip_2_read_all
|
amine@405
|
1397 (2, None, 1), # skip_2_read_all_None
|
amine@405
|
1398 (2, 3, 1), # skip_2_read_3
|
amine@405
|
1399 (2, 3.5, 2), # skip_2_read_3_5_stereo
|
amine@405
|
1400 (2.4, 3.5, 2), # skip_2_4_read_3_5_stereo
|
amine@400
|
1401 ],
|
amine@400
|
1402 ids=[
|
amine@400
|
1403 "no_skip_read_all",
|
amine@400
|
1404 "no_skip_read_all_stereo",
|
amine@400
|
1405 "skip_2_read_all",
|
amine@400
|
1406 "skip_2_read_all_None",
|
amine@400
|
1407 "skip_2_read_3",
|
amine@400
|
1408 "skip_2_read_3_5_stereo",
|
amine@400
|
1409 "skip_2_4_read_3_5_stereo",
|
amine@400
|
1410 ],
|
amine@400
|
1411 )
|
amine@400
|
1412 def test_load_AudioRegion(skip, max_read, channels):
|
amine@400
|
1413 sampling_rate = 10
|
amine@400
|
1414 sample_width = 2
|
amine@400
|
1415 filename = "tests/data/test_split_10HZ_{}.raw"
|
amine@400
|
1416 filename = filename.format("mono" if channels == 1 else "stereo")
|
amine@400
|
1417 region = AudioRegion.load(
|
amine@400
|
1418 filename,
|
amine@400
|
1419 skip=skip,
|
amine@400
|
1420 max_read=max_read,
|
amine@400
|
1421 sr=sampling_rate,
|
amine@400
|
1422 sw=sample_width,
|
amine@400
|
1423 ch=channels,
|
amine@308
|
1424 )
|
amine@400
|
1425 with open(filename, "rb") as fp:
|
amine@400
|
1426 fp.read(round(skip * sampling_rate * sample_width * channels))
|
amine@400
|
1427 if max_read is None or max_read < 0:
|
amine@400
|
1428 to_read = -1
|
amine@400
|
1429 else:
|
amine@400
|
1430 to_read = round(max_read * sampling_rate * sample_width * channels)
|
amine@400
|
1431 expected = fp.read(to_read)
|
amine@400
|
1432 assert bytes(region) == expected
|
amine@308
|
1433
|
amine@308
|
1434
|
amine@400
|
1435 def test_load_from_microphone():
|
amine@400
|
1436 with patch("auditok.io.PyAudioSource") as patch_pyaudio_source:
|
amine@400
|
1437 with patch("auditok.core.AudioReader.read") as patch_reader:
|
amine@400
|
1438 patch_reader.return_value = None
|
amine@400
|
1439 with patch(
|
amine@400
|
1440 "auditok.core.AudioRegion.__init__"
|
amine@400
|
1441 ) as patch_AudioRegion:
|
amine@400
|
1442 patch_AudioRegion.return_value = None
|
amine@400
|
1443 AudioRegion.load(None, skip=0, max_read=5, sr=16000, sw=2, ch=1)
|
amine@400
|
1444 assert patch_pyaudio_source.called
|
amine@400
|
1445 assert patch_reader.called
|
amine@400
|
1446 assert patch_AudioRegion.called
|
amine@307
|
1447
|
amine@308
|
1448
|
amine@400
|
1449 @pytest.mark.parametrize(
|
amine@400
|
1450 "max_read",
|
amine@400
|
1451 [
|
amine@405
|
1452 None, # None
|
amine@405
|
1453 -1, # negative
|
amine@400
|
1454 ],
|
amine@400
|
1455 ids=[
|
amine@405
|
1456 "None",
|
amine@400
|
1457 "negative",
|
amine@400
|
1458 ],
|
amine@400
|
1459 )
|
amine@400
|
1460 def test_load_from_microphone_without_max_read_exception(max_read):
|
amine@400
|
1461 with pytest.raises(ValueError) as val_err:
|
amine@400
|
1462 AudioRegion.load(None, max_read=max_read, sr=16000, sw=2, ch=1)
|
amine@400
|
1463 assert str(val_err.value) == (
|
amine@400
|
1464 "'max_read' should not be None when reading from microphone"
|
amine@400
|
1465 )
|
amine@400
|
1466
|
amine@400
|
1467
|
amine@400
|
1468 def test_load_from_microphone_with_nonzero_skip_exception():
|
amine@400
|
1469 with pytest.raises(ValueError) as val_err:
|
amine@400
|
1470 AudioRegion.load(None, skip=1, max_read=5, sr=16000, sw=2, ch=1)
|
amine@400
|
1471 assert str(val_err.value) == (
|
amine@400
|
1472 "'skip' should be 0 when reading from microphone"
|
amine@400
|
1473 )
|
amine@400
|
1474
|
amine@400
|
1475
|
amine@400
|
1476 @pytest.mark.parametrize(
|
amine@400
|
1477 "format, start, expected",
|
amine@400
|
1478 [
|
amine@405
|
1479 ("output.wav", 1.230, "output.wav"), # simple
|
amine@405
|
1480 ("output_{meta.start:g}.wav", 1.230, "output_1.23.wav"), # start
|
amine@405
|
1481 ("output_{meta.start}.wav", 1.233712, "output_1.233712.wav"), # start_2
|
amine@405
|
1482 (
|
amine@405
|
1483 "output_{meta.start:.2f}.wav",
|
amine@405
|
1484 1.2300001,
|
amine@405
|
1485 "output_1.23.wav",
|
amine@405
|
1486 ), # start_3
|
amine@405
|
1487 (
|
amine@405
|
1488 "output_{meta.start:.3f}.wav",
|
amine@405
|
1489 1.233712,
|
amine@405
|
1490 "output_1.234.wav",
|
amine@405
|
1491 ), # start_4
|
amine@405
|
1492 (
|
amine@405
|
1493 "output_{meta.start:.8f}.wav",
|
amine@405
|
1494 1.233712,
|
amine@405
|
1495 "output_1.23371200.wav",
|
amine@405
|
1496 ), # start_5
|
amine@400
|
1497 (
|
amine@244
|
1498 "output_{meta.start}_{meta.end}_{duration}.wav",
|
amine@192
|
1499 1.455,
|
amine@192
|
1500 "output_1.455_2.455_1.0.wav",
|
amine@405
|
1501 ), # start_end_duration
|
amine@400
|
1502 (
|
amine@244
|
1503 "output_{meta.start}_{meta.end}_{duration}.wav",
|
amine@192
|
1504 1.455321,
|
amine@192
|
1505 "output_1.455321_2.455321_1.0.wav",
|
amine@405
|
1506 ), # start_end_duration_2
|
amine@400
|
1507 ],
|
amine@400
|
1508 ids=[
|
amine@400
|
1509 "simple",
|
amine@400
|
1510 "start",
|
amine@400
|
1511 "start_2",
|
amine@400
|
1512 "start_3",
|
amine@400
|
1513 "start_4",
|
amine@400
|
1514 "start_5",
|
amine@400
|
1515 "start_end_duration",
|
amine@400
|
1516 "start_end_duration_2",
|
amine@400
|
1517 ],
|
amine@400
|
1518 )
|
amine@400
|
1519 def test_save(format, start, expected):
|
amine@400
|
1520 with TemporaryDirectory() as tmpdir:
|
amine@411
|
1521 region = AudioRegion(b"0" * 160, 160, 1, 1, start)
|
amine@400
|
1522 format = os.path.join(tmpdir, format)
|
amine@400
|
1523 filename = region.save(format)[len(tmpdir) + 1 :]
|
amine@400
|
1524 assert filename == expected
|
amine@192
|
1525
|
amine@193
|
1526
|
amine@400
|
1527 def test_save_file_exists_exception():
|
amine@400
|
1528 with TemporaryDirectory() as tmpdir:
|
amine@400
|
1529 filename = os.path.join(tmpdir, "output.wav")
|
amine@400
|
1530 open(filename, "w").close()
|
amine@400
|
1531 region = AudioRegion(b"0" * 160, 160, 1, 1)
|
amine@400
|
1532 with pytest.raises(FileExistsError):
|
amine@400
|
1533 region.save(filename, exists_ok=False)
|
amine@400
|
1534
|
amine@411
|
1535 with pytest.raises(FileExistsError):
|
amine@411
|
1536 region.save(Path(filename), exists_ok=False)
|
amine@411
|
1537
|
amine@400
|
1538
|
amine@400
|
1539 @pytest.mark.parametrize(
|
amine@414
|
1540 "sampling_rate, sample_width, channels",
|
amine@414
|
1541 [
|
amine@414
|
1542 (16000, 1, 1), # mono_16K_1byte
|
amine@414
|
1543 (16000, 2, 1), # mono_16K_2byte
|
amine@414
|
1544 (44100, 2, 2), # stereo_44100_2byte
|
amine@414
|
1545 (44100, 2, 3), # 3channel_44100_2byte
|
amine@414
|
1546 ],
|
amine@414
|
1547 ids=[
|
amine@414
|
1548 "mono_16K_1byte",
|
amine@414
|
1549 "mono_16K_2byte",
|
amine@414
|
1550 "stereo_44100_2byte",
|
amine@414
|
1551 "3channel_44100_2byte",
|
amine@414
|
1552 ],
|
amine@414
|
1553 )
|
amine@414
|
1554 def test_join(sampling_rate, sample_width, channels):
|
amine@414
|
1555 duration = 1
|
amine@414
|
1556 size = int(duration * sampling_rate * sample_width * channels)
|
amine@414
|
1557 glue_data = b"\0" * size
|
amine@414
|
1558 regions_data = [
|
amine@414
|
1559 b"\1" * int(size * 1.5),
|
amine@414
|
1560 b"\2" * int(size * 0.5),
|
amine@414
|
1561 b"\3" * int(size * 0.75),
|
amine@414
|
1562 ]
|
amine@414
|
1563
|
amine@414
|
1564 glue_region = AudioRegion(glue_data, sampling_rate, sample_width, channels)
|
amine@414
|
1565 regions = [
|
amine@414
|
1566 AudioRegion(data, sampling_rate, sample_width, channels)
|
amine@414
|
1567 for data in regions_data
|
amine@414
|
1568 ]
|
amine@414
|
1569 joined = glue_region.join(regions)
|
amine@414
|
1570 assert joined.data == glue_data.join(regions_data)
|
amine@414
|
1571 assert joined.duration == duration * 2 + 1.5 + 0.5 + 0.75
|
amine@414
|
1572
|
amine@414
|
1573
|
amine@414
|
1574 @pytest.mark.parametrize(
|
amine@414
|
1575 "sampling_rate, sample_width, channels",
|
amine@414
|
1576 [
|
amine@414
|
1577 (32000, 1, 1), # different_sampling_rate
|
amine@414
|
1578 (16000, 2, 1), # different_sample_width
|
amine@414
|
1579 (16000, 1, 2), # different_channels
|
amine@414
|
1580 ],
|
amine@414
|
1581 ids=[
|
amine@414
|
1582 "different_sampling_rate",
|
amine@414
|
1583 "different_sample_width",
|
amine@414
|
1584 "different_channels",
|
amine@414
|
1585 ],
|
amine@414
|
1586 )
|
amine@414
|
1587 def test_join_exception(sampling_rate, sample_width, channels):
|
amine@414
|
1588
|
amine@414
|
1589 glue_sampling_rate = 16000
|
amine@414
|
1590 glue_sample_width = 1
|
amine@414
|
1591 glue_channels = 1
|
amine@414
|
1592
|
amine@414
|
1593 duration = 1
|
amine@414
|
1594 size = int(
|
amine@414
|
1595 duration * glue_sampling_rate * glue_sample_width * glue_channels
|
amine@414
|
1596 )
|
amine@414
|
1597 glue_data = b"\0" * size
|
amine@414
|
1598 glue_region = AudioRegion(
|
amine@414
|
1599 glue_data, glue_sampling_rate, glue_sample_width, glue_channels
|
amine@414
|
1600 )
|
amine@414
|
1601
|
amine@414
|
1602 size = int(duration * sampling_rate * sample_width * channels)
|
amine@414
|
1603 regions_data = [
|
amine@414
|
1604 b"\1" * int(size * 1.5),
|
amine@414
|
1605 b"\2" * int(size * 0.5),
|
amine@414
|
1606 b"\3" * int(size * 0.75),
|
amine@414
|
1607 ]
|
amine@414
|
1608 regions = [
|
amine@414
|
1609 AudioRegion(data, sampling_rate, sample_width, channels)
|
amine@414
|
1610 for data in regions_data
|
amine@414
|
1611 ]
|
amine@414
|
1612
|
amine@414
|
1613 with pytest.raises(AudioParameterError):
|
amine@414
|
1614 glue_region.join(regions)
|
amine@414
|
1615
|
amine@414
|
1616
|
amine@414
|
1617 @pytest.mark.parametrize(
|
amine@400
|
1618 "region, slice_, expected_data",
|
amine@400
|
1619 [
|
amine@400
|
1620 (
|
amine@244
|
1621 AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1),
|
amine@194
|
1622 slice(0, 500),
|
amine@405
|
1623 b"a" * 80, # first_half
|
amine@244
|
1624 ),
|
amine@400
|
1625 (
|
amine@244
|
1626 AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1),
|
amine@244
|
1627 slice(500, None),
|
amine@405
|
1628 b"b" * 80, # second_half
|
amine@244
|
1629 ),
|
amine@400
|
1630 (
|
amine@244
|
1631 AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1),
|
amine@244
|
1632 slice(-500, None),
|
amine@405
|
1633 b"b" * 80, # second_half_negative
|
amine@244
|
1634 ),
|
amine@400
|
1635 (
|
amine@244
|
1636 AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1),
|
amine@244
|
1637 slice(200, 750),
|
amine@405
|
1638 b"a" * 48 + b"b" * 40, # middle
|
amine@244
|
1639 ),
|
amine@400
|
1640 (
|
amine@244
|
1641 AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1),
|
amine@244
|
1642 slice(-800, -250),
|
amine@405
|
1643 b"a" * 48 + b"b" * 40, # middle_negative
|
amine@244
|
1644 ),
|
amine@400
|
1645 (
|
amine@244
|
1646 AudioRegion(b"a" * 160 + b"b" * 160, 160, 2, 1),
|
amine@244
|
1647 slice(200, 750),
|
amine@405
|
1648 b"a" * 96 + b"b" * 80, # middle_sw2
|
amine@244
|
1649 ),
|
amine@400
|
1650 (
|
amine@244
|
1651 AudioRegion(b"a" * 160 + b"b" * 160, 160, 1, 2),
|
amine@244
|
1652 slice(200, 750),
|
amine@405
|
1653 b"a" * 96 + b"b" * 80, # middle_ch2
|
amine@244
|
1654 ),
|
amine@400
|
1655 (
|
amine@244
|
1656 AudioRegion(b"a" * 320 + b"b" * 320, 160, 2, 2),
|
amine@244
|
1657 slice(200, 750),
|
amine@405
|
1658 b"a" * 192 + b"b" * 160, # middle_sw2_ch2
|
amine@244
|
1659 ),
|
amine@400
|
1660 (
|
amine@244
|
1661 AudioRegion(b"a" * 4000 + b"b" * 4000, 8000, 1, 1),
|
amine@244
|
1662 slice(1, None),
|
amine@405
|
1663 b"a" * (4000 - 8) + b"b" * 4000, # but_first_sample
|
amine@244
|
1664 ),
|
amine@400
|
1665 (
|
amine@244
|
1666 AudioRegion(b"a" * 4000 + b"b" * 4000, 8000, 1, 1),
|
amine@244
|
1667 slice(-999, None),
|
amine@405
|
1668 b"a" * (4000 - 8) + b"b" * 4000, # but_first_sample_negative
|
amine@244
|
1669 ),
|
amine@400
|
1670 (
|
amine@244
|
1671 AudioRegion(b"a" * 4000 + b"b" * 4000, 8000, 1, 1),
|
amine@244
|
1672 slice(0, 999),
|
amine@405
|
1673 b"a" * 4000 + b"b" * (4000 - 8), # but_last_sample
|
amine@244
|
1674 ),
|
amine@400
|
1675 (
|
amine@244
|
1676 AudioRegion(b"a" * 4000 + b"b" * 4000, 8000, 1, 1),
|
amine@244
|
1677 slice(0, -1),
|
amine@405
|
1678 b"a" * 4000 + b"b" * (4000 - 8), # but_last_sample_negative
|
amine@244
|
1679 ),
|
amine@405
|
1680 (
|
amine@405
|
1681 AudioRegion(b"a" * 160, 160, 1, 1),
|
amine@405
|
1682 slice(-5000, None),
|
amine@405
|
1683 b"a" * 160, # big_negative_start
|
amine@405
|
1684 ),
|
amine@405
|
1685 (
|
amine@405
|
1686 AudioRegion(b"a" * 160, 160, 1, 1),
|
amine@405
|
1687 slice(None, -1500),
|
amine@405
|
1688 b"", # big_negative_stop
|
amine@405
|
1689 ),
|
amine@405
|
1690 (
|
amine@405
|
1691 AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1),
|
amine@405
|
1692 slice(0, 0),
|
amine@405
|
1693 b"", # empty
|
amine@405
|
1694 ),
|
amine@405
|
1695 (
|
amine@405
|
1696 AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1),
|
amine@405
|
1697 slice(200, 100),
|
amine@405
|
1698 b"", # empty_start_stop_reversed
|
amine@405
|
1699 ),
|
amine@405
|
1700 (
|
amine@405
|
1701 AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1),
|
amine@405
|
1702 slice(2000, 3000),
|
amine@405
|
1703 b"", # empty_big_positive_start
|
amine@405
|
1704 ),
|
amine@405
|
1705 (
|
amine@405
|
1706 AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1),
|
amine@405
|
1707 slice(-100, -200),
|
amine@405
|
1708 b"", # empty_negative_reversed
|
amine@405
|
1709 ),
|
amine@405
|
1710 (
|
amine@405
|
1711 AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1),
|
amine@405
|
1712 slice(0, -2000),
|
amine@405
|
1713 b"", # empty_big_negative_stop
|
amine@405
|
1714 ),
|
amine@400
|
1715 (
|
amine@244
|
1716 AudioRegion(b"a" * 124 + b"b" * 376, 1234, 1, 1),
|
amine@244
|
1717 slice(100, 200),
|
amine@405
|
1718 b"a" + b"b" * 123, # arbitrary_sampling_rate
|
amine@244
|
1719 ),
|
amine@400
|
1720 ],
|
amine@400
|
1721 ids=[
|
amine@400
|
1722 "first_half",
|
amine@400
|
1723 "second_half",
|
amine@400
|
1724 "second_half_negative",
|
amine@400
|
1725 "middle",
|
amine@400
|
1726 "middle_negative",
|
amine@400
|
1727 "middle_sw2",
|
amine@400
|
1728 "middle_ch2",
|
amine@400
|
1729 "middle_sw2_ch2",
|
amine@400
|
1730 "but_first_sample",
|
amine@400
|
1731 "but_first_sample_negative",
|
amine@400
|
1732 "but_last_sample",
|
amine@400
|
1733 "but_last_sample_negative",
|
amine@400
|
1734 "big_negative_start",
|
amine@400
|
1735 "big_negative_stop",
|
amine@400
|
1736 "empty",
|
amine@400
|
1737 "empty_start_stop_reversed",
|
amine@400
|
1738 "empty_big_positive_start",
|
amine@400
|
1739 "empty_negative_reversed",
|
amine@400
|
1740 "empty_big_negative_stop",
|
amine@400
|
1741 "arbitrary_sampling_rate",
|
amine@400
|
1742 ],
|
amine@400
|
1743 )
|
amine@400
|
1744 def test_region_temporal_slicing(region, slice_, expected_data):
|
amine@400
|
1745 sub_region = region.millis[slice_]
|
amine@400
|
1746 assert bytes(sub_region) == expected_data
|
amine@400
|
1747 start_sec = slice_.start / 1000 if slice_.start is not None else None
|
amine@400
|
1748 stop_sec = slice_.stop / 1000 if slice_.stop is not None else None
|
amine@400
|
1749 sub_region = region.sec[start_sec:stop_sec]
|
amine@400
|
1750 assert bytes(sub_region) == expected_data
|
amine@244
|
1751
|
amine@400
|
1752
|
amine@400
|
1753 @pytest.mark.parametrize(
|
amine@400
|
1754 "region, slice_, time_shift, expected_data",
|
amine@400
|
1755 [
|
amine@400
|
1756 (
|
amine@244
|
1757 AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1),
|
amine@244
|
1758 slice(0, 80),
|
amine@194
|
1759 0,
|
amine@405
|
1760 b"a" * 80, # first_half
|
amine@194
|
1761 ),
|
amine@400
|
1762 (
|
amine@244
|
1763 AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1),
|
amine@244
|
1764 slice(80, None),
|
amine@194
|
1765 0.5,
|
amine@405
|
1766 b"b" * 80, # second_half
|
amine@194
|
1767 ),
|
amine@400
|
1768 (
|
amine@244
|
1769 AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1),
|
amine@244
|
1770 slice(-80, None),
|
amine@194
|
1771 0.5,
|
amine@405
|
1772 b"b" * 80, # second_half_negative
|
amine@194
|
1773 ),
|
amine@400
|
1774 (
|
amine@244
|
1775 AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1),
|
amine@244
|
1776 slice(160 // 5, 160 // 4 * 3),
|
amine@194
|
1777 0.2,
|
amine@405
|
1778 b"a" * 48 + b"b" * 40, # middle
|
amine@194
|
1779 ),
|
amine@400
|
1780 (
|
amine@244
|
1781 AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1),
|
amine@244
|
1782 slice(-160 // 5 * 4, -160 // 4),
|
amine@194
|
1783 0.2,
|
amine@405
|
1784 b"a" * 48 + b"b" * 40, # middle_negative
|
amine@194
|
1785 ),
|
amine@400
|
1786 (
|
amine@244
|
1787 AudioRegion(b"a" * 160 + b"b" * 160, 160, 2, 1),
|
amine@244
|
1788 slice(160 // 5, 160 // 4 * 3),
|
amine@194
|
1789 0.2,
|
amine@405
|
1790 b"a" * 96 + b"b" * 80, # middle_sw2
|
amine@194
|
1791 ),
|
amine@400
|
1792 (
|
amine@244
|
1793 AudioRegion(b"a" * 160 + b"b" * 160, 160, 1, 2),
|
amine@244
|
1794 slice(160 // 5, 160 // 4 * 3),
|
amine@194
|
1795 0.2,
|
amine@405
|
1796 b"a" * 96 + b"b" * 80, # middle_ch2
|
amine@194
|
1797 ),
|
amine@400
|
1798 (
|
amine@244
|
1799 AudioRegion(b"a" * 320 + b"b" * 320, 160, 2, 2),
|
amine@244
|
1800 slice(160 // 5, 160 // 4 * 3),
|
amine@194
|
1801 0.2,
|
amine@405
|
1802 b"a" * 192 + b"b" * 160, # middle_sw2_ch2
|
amine@194
|
1803 ),
|
amine@400
|
1804 (
|
amine@244
|
1805 AudioRegion(b"a" * 4000 + b"b" * 4000, 8000, 1, 1),
|
amine@194
|
1806 slice(1, None),
|
amine@244
|
1807 1 / 8000,
|
amine@405
|
1808 b"a" * (4000 - 1) + b"b" * 4000, # but_first_sample
|
amine@194
|
1809 ),
|
amine@400
|
1810 (
|
amine@244
|
1811 AudioRegion(b"a" * 4000 + b"b" * 4000, 8000, 1, 1),
|
amine@244
|
1812 slice(-7999, None),
|
amine@244
|
1813 1 / 8000,
|
amine@405
|
1814 b"a" * (4000 - 1) + b"b" * 4000, # but_first_sample_negative
|
amine@194
|
1815 ),
|
amine@400
|
1816 (
|
amine@244
|
1817 AudioRegion(b"a" * 4000 + b"b" * 4000, 8000, 1, 1),
|
amine@244
|
1818 slice(0, 7999),
|
amine@194
|
1819 0,
|
amine@405
|
1820 b"a" * 4000 + b"b" * (4000 - 1), # but_last_sample
|
amine@194
|
1821 ),
|
amine@400
|
1822 (
|
amine@244
|
1823 AudioRegion(b"a" * 4000 + b"b" * 4000, 8000, 1, 1),
|
amine@194
|
1824 slice(0, -1),
|
amine@194
|
1825 0,
|
amine@405
|
1826 b"a" * 4000 + b"b" * (4000 - 1), # but_last_sample_negative
|
amine@194
|
1827 ),
|
amine@405
|
1828 (
|
amine@405
|
1829 AudioRegion(b"a" * 160, 160, 1, 1),
|
amine@405
|
1830 slice(-1600, None),
|
amine@405
|
1831 0,
|
amine@405
|
1832 b"a" * 160, # big_negative_start
|
amine@405
|
1833 ),
|
amine@405
|
1834 (
|
amine@405
|
1835 AudioRegion(b"a" * 160, 160, 1, 1),
|
amine@405
|
1836 slice(None, -1600),
|
amine@405
|
1837 0,
|
amine@405
|
1838 b"", # big_negative_stop
|
amine@405
|
1839 ),
|
amine@405
|
1840 (
|
amine@405
|
1841 AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1),
|
amine@405
|
1842 slice(0, 0),
|
amine@405
|
1843 0,
|
amine@405
|
1844 b"", # empty
|
amine@405
|
1845 ),
|
amine@400
|
1846 (
|
amine@244
|
1847 AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1),
|
amine@244
|
1848 slice(80, 40),
|
amine@244
|
1849 0.5,
|
amine@405
|
1850 b"", # empty_start_stop_reversed
|
amine@194
|
1851 ),
|
amine@400
|
1852 (
|
amine@244
|
1853 AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1),
|
amine@244
|
1854 slice(1600, 3000),
|
amine@244
|
1855 10,
|
amine@405
|
1856 b"", # empty_big_positive_start
|
amine@194
|
1857 ),
|
amine@400
|
1858 (
|
amine@244
|
1859 AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1),
|
amine@244
|
1860 slice(-16, -32),
|
amine@194
|
1861 0.9,
|
amine@405
|
1862 b"", # empty_negative_reversed
|
amine@194
|
1863 ),
|
amine@400
|
1864 (
|
amine@244
|
1865 AudioRegion(b"a" * 80 + b"b" * 80, 160, 1, 1),
|
amine@194
|
1866 slice(0, -2000),
|
amine@194
|
1867 0,
|
amine@405
|
1868 b"", # empty_big_negative_stop
|
amine@194
|
1869 ),
|
amine@400
|
1870 (
|
amine@244
|
1871 AudioRegion(b"a" * 124 + b"b" * 376, 1235, 1, 1),
|
amine@231
|
1872 slice(100, 200),
|
amine@231
|
1873 100 / 1235,
|
amine@405
|
1874 b"a" * 24 + b"b" * 76, # arbitrary_sampling_rate
|
amine@231
|
1875 ),
|
amine@400
|
1876 (
|
amine@244
|
1877 AudioRegion(b"a" * 124 + b"b" * 376, 1235, 2, 2),
|
amine@231
|
1878 slice(25, 50),
|
amine@231
|
1879 25 / 1235,
|
amine@405
|
1880 b"a" * 24 + b"b" * 76, # arbitrary_sampling_rate_middle_sw2_ch2
|
amine@231
|
1881 ),
|
amine@400
|
1882 ],
|
amine@400
|
1883 ids=[
|
amine@400
|
1884 "first_half",
|
amine@400
|
1885 "second_half",
|
amine@400
|
1886 "second_half_negative",
|
amine@400
|
1887 "middle",
|
amine@400
|
1888 "middle_negative",
|
amine@400
|
1889 "middle_sw2",
|
amine@400
|
1890 "middle_ch2",
|
amine@400
|
1891 "middle_sw2_ch2",
|
amine@400
|
1892 "but_first_sample",
|
amine@400
|
1893 "but_first_sample_negative",
|
amine@400
|
1894 "but_last_sample",
|
amine@400
|
1895 "but_last_sample_negative",
|
amine@400
|
1896 "big_negative_start",
|
amine@400
|
1897 "big_negative_stop",
|
amine@400
|
1898 "empty",
|
amine@400
|
1899 "empty_start_stop_reversed",
|
amine@400
|
1900 "empty_big_positive_start",
|
amine@400
|
1901 "empty_negative_reversed",
|
amine@400
|
1902 "empty_big_negative_stop",
|
amine@400
|
1903 "arbitrary_sampling_rate",
|
amine@400
|
1904 "arbitrary_sampling_rate_middle_sw2_ch2",
|
amine@400
|
1905 ],
|
amine@400
|
1906 )
|
amine@400
|
1907 def test_region_sample_slicing(region, slice_, time_shift, expected_data):
|
amine@400
|
1908 sub_region = region[slice_]
|
amine@400
|
1909 assert bytes(sub_region) == expected_data
|
amine@400
|
1910
|
amine@400
|
1911
|
amine@400
|
1912 @pytest.mark.parametrize(
|
amine@400
|
1913 "sampling_rate, sample_width, channels",
|
amine@400
|
1914 [
|
amine@405
|
1915 (8000, 1, 1), # simple
|
amine@405
|
1916 (8000, 2, 2), # stereo_sw_2
|
amine@405
|
1917 (5413, 2, 3), # arbitrary_sr_multichannel
|
amine@400
|
1918 ],
|
amine@400
|
1919 ids=[
|
amine@400
|
1920 "simple",
|
amine@400
|
1921 "stereo_sw_2",
|
amine@400
|
1922 "arbitrary_sr_multichannel",
|
amine@400
|
1923 ],
|
amine@400
|
1924 )
|
amine@400
|
1925 def test_concatenation(sampling_rate, sample_width, channels):
|
amine@400
|
1926
|
amine@400
|
1927 region_1, region_2 = _make_random_length_regions(
|
amine@400
|
1928 [b"a", b"b"], sampling_rate, sample_width, channels
|
amine@231
|
1929 )
|
amine@400
|
1930 expected_duration = region_1.duration + region_2.duration
|
amine@400
|
1931 expected_data = bytes(region_1) + bytes(region_2)
|
amine@400
|
1932 concat_region = region_1 + region_2
|
amine@400
|
1933 assert concat_region.duration == pytest.approx(expected_duration, abs=1e-6)
|
amine@400
|
1934 assert bytes(concat_region) == expected_data
|
amine@231
|
1935
|
amine@400
|
1936
|
amine@400
|
1937 @pytest.mark.parametrize(
|
amine@400
|
1938 "sampling_rate, sample_width, channels",
|
amine@400
|
1939 [
|
amine@405
|
1940 (8000, 1, 1), # simple
|
amine@405
|
1941 (8000, 2, 2), # stereo_sw_2
|
amine@405
|
1942 (5413, 2, 3), # arbitrary_sr_multichannel
|
amine@400
|
1943 ],
|
amine@400
|
1944 ids=[
|
amine@400
|
1945 "simple",
|
amine@400
|
1946 "stereo_sw_2",
|
amine@400
|
1947 "arbitrary_sr_multichannel",
|
amine@400
|
1948 ],
|
amine@400
|
1949 )
|
amine@400
|
1950 def test_concatenation_many(sampling_rate, sample_width, channels):
|
amine@400
|
1951
|
amine@400
|
1952 regions = _make_random_length_regions(
|
amine@400
|
1953 [b"a", b"b", b"c"], sampling_rate, sample_width, channels
|
amine@88
|
1954 )
|
amine@400
|
1955 expected_duration = sum(r.duration for r in regions)
|
amine@400
|
1956 expected_data = b"".join(bytes(r) for r in regions)
|
amine@400
|
1957 concat_region = sum(regions)
|
amine@88
|
1958
|
amine@400
|
1959 assert concat_region.duration == pytest.approx(expected_duration, abs=1e-6)
|
amine@400
|
1960 assert bytes(concat_region) == expected_data
|
amine@88
|
1961
|
amine@400
|
1962
|
amine@400
|
1963 def test_concatenation_different_sampling_rate_error():
|
amine@400
|
1964 region_1 = AudioRegion(b"a" * 100, 8000, 1, 1)
|
amine@400
|
1965 region_2 = AudioRegion(b"b" * 100, 3000, 1, 1)
|
amine@400
|
1966
|
amine@414
|
1967 with pytest.raises(AudioParameterError) as val_err:
|
amine@400
|
1968 region_1 + region_2
|
amine@400
|
1969 assert str(val_err.value) == (
|
amine@400
|
1970 "Can only concatenate AudioRegions of the same "
|
amine@405
|
1971 "sampling rate (8000 != 3000)" # different_sampling_rate
|
amine@88
|
1972 )
|
amine@88
|
1973
|
amine@88
|
1974
|
amine@400
|
1975 def test_concatenation_different_sample_width_error():
|
amine@400
|
1976 region_1 = AudioRegion(b"a" * 100, 8000, 2, 1)
|
amine@400
|
1977 region_2 = AudioRegion(b"b" * 100, 8000, 4, 1)
|
amine@88
|
1978
|
amine@414
|
1979 with pytest.raises(AudioParameterError) as val_err:
|
amine@400
|
1980 region_1 + region_2
|
amine@400
|
1981 assert str(val_err.value) == (
|
amine@405
|
1982 "Can only concatenate AudioRegions of the same sample width (2 != 4)"
|
amine@400
|
1983 )
|
amine@88
|
1984
|
amine@88
|
1985
|
amine@400
|
1986 def test_concatenation_different_number_of_channels_error():
|
amine@400
|
1987 region_1 = AudioRegion(b"a" * 100, 8000, 1, 1)
|
amine@400
|
1988 region_2 = AudioRegion(b"b" * 100, 8000, 1, 2)
|
amine@88
|
1989
|
amine@414
|
1990 with pytest.raises(AudioParameterError) as val_err:
|
amine@400
|
1991 region_1 + region_2
|
amine@400
|
1992 assert str(val_err.value) == (
|
amine@400
|
1993 "Can only concatenate AudioRegions of the same "
|
amine@405
|
1994 "number of channels (1 != 2)" # different_number_of_channels
|
amine@400
|
1995 )
|
amine@88
|
1996
|
amine@88
|
1997
|
amine@400
|
1998 @pytest.mark.parametrize(
|
amine@400
|
1999 "duration, expected_duration, expected_len, expected_len_ms",
|
amine@400
|
2000 [
|
amine@405
|
2001 (0.01, 0.03, 240, 30), # simple
|
amine@405
|
2002 (0.00575, 0.01725, 138, 17), # rounded_len_floor
|
amine@405
|
2003 (0.00625, 0.01875, 150, 19), # rounded_len_ceil
|
amine@400
|
2004 ],
|
amine@400
|
2005 ids=[
|
amine@400
|
2006 "simple",
|
amine@400
|
2007 "rounded_len_floor",
|
amine@400
|
2008 "rounded_len_ceil",
|
amine@400
|
2009 ],
|
amine@400
|
2010 )
|
amine@400
|
2011 def test_multiplication(
|
amine@400
|
2012 duration, expected_duration, expected_len, expected_len_ms
|
amine@400
|
2013 ):
|
amine@400
|
2014 sw = 2
|
amine@400
|
2015 data = b"0" * int(duration * 8000 * sw)
|
amine@400
|
2016 region = AudioRegion(data, 8000, sw, 1)
|
amine@400
|
2017 m_region = 1 * region * 3
|
amine@400
|
2018 assert bytes(m_region) == data * 3
|
amine@400
|
2019 assert m_region.sr == 8000
|
amine@400
|
2020 assert m_region.sw == 2
|
amine@400
|
2021 assert m_region.ch == 1
|
amine@400
|
2022 assert m_region.duration == expected_duration
|
amine@400
|
2023 assert len(m_region) == expected_len
|
amine@400
|
2024 assert m_region.len == expected_len
|
amine@400
|
2025 assert m_region.s.len == expected_duration
|
amine@400
|
2026 assert len(m_region.ms) == expected_len_ms
|
amine@400
|
2027 assert m_region.ms.len == expected_len_ms
|
amine@88
|
2028
|
amine@196
|
2029
|
amine@400
|
2030 @pytest.mark.parametrize(
|
amine@400
|
2031 "factor, _type",
|
amine@400
|
2032 [
|
amine@405
|
2033 ("x", str), # string
|
amine@405
|
2034 (1.4, float), # float
|
amine@400
|
2035 ],
|
amine@400
|
2036 ids=[
|
amine@405
|
2037 "string",
|
amine@405
|
2038 "float",
|
amine@400
|
2039 ],
|
amine@400
|
2040 )
|
amine@400
|
2041 def test_multiplication_non_int(factor, _type):
|
amine@400
|
2042 with pytest.raises(TypeError) as type_err:
|
amine@400
|
2043 AudioRegion(b"0" * 80, 8000, 1, 1) * factor
|
amine@405
|
2044 err_msg = "Can't multiply AudioRegion by a non-int of type '{}'"
|
amine@405
|
2045 assert err_msg.format(_type) == str(type_err.value)
|
amine@197
|
2046
|
amine@254
|
2047
|
amine@400
|
2048 @pytest.mark.parametrize(
|
amine@400
|
2049 "data",
|
amine@400
|
2050 [
|
amine@405
|
2051 [b"a" * 80, b"b" * 80], # simple
|
amine@405
|
2052 [b"a" * 31, b"b" * 31, b"c" * 30], # extra_samples_1
|
amine@405
|
2053 [b"a" * 31, b"b" * 30, b"c" * 30], # extra_samples_2
|
amine@405
|
2054 [b"a" * 11, b"b" * 11, b"c" * 10, b"c" * 10], # extra_samples_3
|
amine@400
|
2055 ],
|
amine@400
|
2056 ids=[
|
amine@400
|
2057 "simple",
|
amine@400
|
2058 "extra_samples_1",
|
amine@400
|
2059 "extra_samples_2",
|
amine@400
|
2060 "extra_samples_3",
|
amine@400
|
2061 ],
|
amine@400
|
2062 )
|
amine@400
|
2063 def test_truediv(data):
|
amine@254
|
2064
|
amine@400
|
2065 region = AudioRegion(b"".join(data), 80, 1, 1)
|
amine@252
|
2066
|
amine@400
|
2067 sub_regions = region / len(data)
|
amine@405
|
2068 for data_i, region in zip(data, sub_regions, strict=True):
|
amine@400
|
2069 assert len(data_i) == len(bytes(region))
|
amine@254
|
2070
|
amine@254
|
2071
|
amine@400
|
2072 @pytest.mark.parametrize(
|
amine@405
|
2073 "data, sample_width, channels, expected",
|
amine@400
|
2074 [
|
amine@405
|
2075 (b"a" * 10, 1, 1, [97] * 10), # mono_sw_1
|
amine@405
|
2076 (b"a" * 10, 2, 1, [24929] * 5), # mono_sw_2
|
amine@405
|
2077 (b"a" * 8, 4, 1, [1633771873] * 2), # mono_sw_4
|
amine@405
|
2078 (b"ab" * 5, 1, 2, [[97] * 5, [98] * 5]), # stereo_sw_1
|
amine@400
|
2079 ],
|
amine@400
|
2080 ids=[
|
amine@400
|
2081 "mono_sw_1",
|
amine@400
|
2082 "mono_sw_2",
|
amine@400
|
2083 "mono_sw_4",
|
amine@400
|
2084 "stereo_sw_1",
|
amine@400
|
2085 ],
|
amine@400
|
2086 )
|
amine@405
|
2087 def test_samples(data, sample_width, channels, expected):
|
amine@337
|
2088
|
amine@400
|
2089 region = AudioRegion(data, 10, sample_width, channels)
|
amine@405
|
2090 expected = np.array(expected)
|
amine@405
|
2091 assert (region.samples == expected).all()
|
amine@405
|
2092 assert (region.numpy() == expected).all()
|
amine@405
|
2093 assert (np.array(region) == expected).all()
|