changeset 232:4282cea451d6

Add epsilon argument to _duration_to_nb_windows to avoid round errors
author Amine Sehili <amine.sehili@gmail.com>
date Fri, 19 Jul 2019 21:15:20 +0100
parents 046c445b9699
children d8267b3334f0
files auditok/core.py tests/test_core.py
diffstat 2 files changed, 21 insertions(+), 9 deletions(-) [+]
line wrap: on
line diff
--- a/auditok/core.py	Thu Jul 18 20:28:45 2019 +0100
+++ b/auditok/core.py	Fri Jul 19 21:15:20 2019 +0100
@@ -18,6 +18,7 @@
 
 DEFAULT_ANALYSIS_WINDOW = 0.05
 DEFAULT_ENERGY_THRESHOLD = 50
+_EPSILON = 1e-6
 
 
 def split(
@@ -127,9 +128,9 @@
     if strict_min_dur:
         mode |= StreamTokenizer.STRICT_MIN_LENGTH
     min_length = _duration_to_nb_windows(min_dur, analysis_window, math.ceil)
-    max_length = _duration_to_nb_windows(max_dur, analysis_window, math.floor)
+    max_length = _duration_to_nb_windows(max_dur, analysis_window, math.floor, _EPSILON)
     max_continuous_silence = _duration_to_nb_windows(
-        max_silence, analysis_window, math.floor
+        max_silence, analysis_window, math.floor, _EPSILON
     )
 
     err_msg = "({0} sec.) results in {1} analysis window(s) "
@@ -163,6 +164,7 @@
             )
         )
 
+    #print(min_length, max_length, max_continuous_silence)
     tokenizer = StreamTokenizer(
         validator, min_length, max_length, max_continuous_silence, mode=mode
     )
@@ -182,7 +184,7 @@
     return region_gen
 
 
-def _duration_to_nb_windows(duration, analysis_window, round_fn=round):
+def _duration_to_nb_windows(duration, analysis_window, round_fn=round, epsilon=0):
     """
     Converts a given duration into a positive integer of analysis windows.
     if `duration / analysis_window` is not an integer, the result will be
@@ -194,9 +196,16 @@
     :Parameters:
 
     duration: float
-        a given duration in seconds or ms
+        a given duration in seconds or ms.
     analysis_window: float
-        size of analysis window, in the same unit as `duration`
+        size of analysis window, in the same unit as `duration`.
+    round_fn: callable
+        function called to round the result. Default: `round`.
+    epsilon: float
+        small value to add to the division result before rounding.
+        E.g., `0.3 / 0.1 = 2.9999999999999996`, when called with
+        `round_fn=math.floor` returns `2` instead of `3`. Adding a small value
+        to `0.3 / 0.1` avoids this error.
 
     Returns:
     --------
@@ -209,7 +218,7 @@
         raise ValueError(err_msg.format(duration, analysis_window))
     if duration == 0:
         return 0
-    return int(round_fn(duration / analysis_window))
+    return int(round_fn(duration / analysis_window + epsilon))
 
 
 def _make_audio_region(
--- a/tests/test_core.py	Thu Jul 18 20:28:45 2019 +0100
+++ b/tests/test_core.py	Fri Jul 19 21:15:20 2019 +0100
@@ -38,18 +38,21 @@
         not_multiple_floor=(0.35, 0.1, math.floor, 3),
         small_duration=(0.05, 0.1, round, 0),
         small_duration_ceil=(0.05, 0.1, math.ceil, 1),
+        with_round_error=(0.3, 0.1, math.floor, 3, {"epsilon":1e-6}),
         negative_duration=(-0.5, 0.1, math.ceil, ValueError),
         negative_analysis_window=(0.5, -0.1, math.ceil, ValueError),
     )
     def test_duration_to_nb_windows(
-        self, duration, analysis_window, round_fn, expected
+        self, duration, analysis_window, round_fn, expected, kwargs=None
     ):
         if expected == ValueError:
             with self.assertRaises(expected):
                 _duration_to_nb_windows(duration, analysis_window, round_fn)
         else:
+            if kwargs is None:
+                kwargs = {}
             result = _duration_to_nb_windows(
-                duration, analysis_window, round_fn
+                duration, analysis_window, round_fn, **kwargs
             )
             self.assertEqual(result, expected)
 
@@ -363,7 +366,7 @@
         mono_aw_0_4_max_silence_0=(
             0.2,
             5,
-            0.,
+            0,
             1,
             {"uc": 1, "aw": 0.4},
             [(4, 12), (16, 24), (36, 76)],