changeset 387:bd242e80455f

Update documentation and configuration
author Amine Sehili <amine.sehili@gmail.com>
date Tue, 02 Mar 2021 20:10:50 +0100
parents c030134b7870
children 5fd9b6b7ff0d 9e143e277d51
files .pre-commit-config.yaml CHANGELOG README.rst auditok/core.py auditok/util.py demos/audio_tokenize_demo.py demos/audio_trim_demo.py demos/echo.py doc/_static/css/custom_style.css doc/conf.py doc/examples.rst pyproject.toml
diffstat 12 files changed, 53 insertions(+), 314 deletions(-) [+]
line wrap: on
line diff
--- a/.pre-commit-config.yaml	Mon Mar 01 23:11:49 2021 +0100
+++ b/.pre-commit-config.yaml	Tue Mar 02 20:10:50 2021 +0100
@@ -1,6 +1,6 @@
 repos:
 -   repo: https://github.com/psf/black
-    rev: stable
+    rev: 20.8b1
     hooks:
     - id: black
       language_version: python3.7
--- a/CHANGELOG	Mon Mar 01 23:11:49 2021 +0100
+++ b/CHANGELOG	Tue Mar 02 20:10:50 2021 +0100
@@ -7,9 +7,11 @@
 - Implement split function as a high-level API for tokenization
 - Implement AudioRegion class for simple audio objects manipulation
 - Use a much faster energy computation method (based on standard audioop module)
+- Make ADSFactory deprecated
 - Choose which channel(s) to use for tokenization
 - Save multi-channel audio data
 - Refactor code in all modules
+- Use genty for tests
 - Improve documentation
 - Use ArgumentParser instead of OptionParser in command-line script
 - Clean up command-line script and move functions and workers to dedicated modules
--- a/README.rst	Mon Mar 01 23:11:49 2021 +0100
+++ b/README.rst	Tue Mar 02 20:10:50 2021 +0100
@@ -10,7 +10,7 @@
 
 ``auditok`` is an **Audio Activity Detection** tool that can process online data
 (read from an audio device or from standard input) as well as audio files.
-It can be used as a command line program or by calling its API.
+It can be used as a command-line program or by calling its API.
 
 The latest version of the documentation can be found on
 `readthedocs. <https://readthedocs.org/projects/auditok/badge/?version=latest>`_
--- a/auditok/core.py	Mon Mar 01 23:11:49 2021 +0100
+++ b/auditok/core.py	Tue Mar 02 20:10:50 2021 +0100
@@ -240,9 +240,7 @@
         validator = AudioEnergyValidator(
             energy_threshold, source.sw, source.ch, use_channel=use_channel
         )
-    mode = (
-        StreamTokenizer.DROP_TRAILING_SILENCE if drop_trailing_silence else 0
-    )
+    mode = StreamTokenizer.DROP_TRAILING_SILENCE if drop_trailing_silence else 0
     if strict_min_dur:
         mode |= StreamTokenizer.STRICT_MIN_LENGTH
     min_length = _duration_to_nb_windows(min_dur, analysis_window, math.ceil)
@@ -532,8 +530,7 @@
 
 
 class _AudioRegionMetadata(dict):
-    """A class to store `AudioRegion`'s metadata.
-    """
+    """A class to store `AudioRegion`'s metadata."""
 
     def __getattr__(self, name):
         if name in self:
@@ -610,8 +607,7 @@
 
     @meta.setter
     def meta(self, new_meta):
-        """Meta data of audio region.
-        """
+        """Meta data of audio region."""
         self._meta = _AudioRegionMetadata(new_meta)
 
     @classmethod
@@ -658,8 +654,7 @@
 
     @property
     def millis(self):
-        """A view to slice audio region by milliseconds (using ``region.millis[start:end]``).
-        """
+        """A view to slice audio region by milliseconds (using ``region.millis[start:end]``)."""
         return self._millis_view
 
     @property
@@ -673,38 +668,32 @@
 
     @property
     def sampling_rate(self):
-        """Samling rate of audio data.
-        """
+        """Samling rate of audio data."""
         return self._sampling_rate
 
     @property
     def sr(self):
-        """Samling rate of audio data, alias for `sampling_rate`.
-        """
+        """Samling rate of audio data, alias for `sampling_rate`."""
         return self._sampling_rate
 
     @property
     def sample_width(self):
-        """Number of bytes per sample, one channel considered.
-        """
+        """Number of bytes per sample, one channel considered."""
         return self._sample_width
 
     @property
     def sw(self):
-        """Number of bytes per sample, alias for `sampling_rate`.
-        """
+        """Number of bytes per sample, alias for `sampling_rate`."""
         return self._sample_width
 
     @property
     def channels(self):
-        """Number of channels of audio data.
-        """
+        """Number of channels of audio data."""
         return self._channels
 
     @property
     def ch(self):
-        """Number of channels of audio data, alias for `channels`.
-        """
+        """Number of channels of audio data, alias for `channels`."""
         return self._channels
 
     def play(self, progress_bar=False, player=None, **progress_bar_kwargs):
@@ -730,9 +719,7 @@
             self._data, progress_bar=progress_bar, **progress_bar_kwargs
         )
 
-    def save(
-        self, file, audio_format=None, exists_ok=True, **audio_parameters
-    ):
+    def save(self, file, audio_format=None, exists_ok=True, **audio_parameters):
         """
         Save audio region to file.
 
@@ -918,8 +905,7 @@
 
     @property
     def samples(self):
-        """Audio region as arrays of samples, one array per channel.
-        """
+        """Audio region as arrays of samples, one array per channel."""
         if self._samples is None:
             self._samples = signal.to_array(
                 self._data, self.sample_width, self.channels
@@ -1005,9 +991,7 @@
 
     def __truediv__(self, n):
         if not isinstance(n, int) or n <= 0:
-            raise TypeError(
-                "AudioRegion can only be divided by a positive int"
-            )
+            raise TypeError("AudioRegion can only be divided by a positive int")
         samples_per_sub_region, rest = divmod(len(self), n)
         onset = 0
         sub_regions = []
@@ -1232,9 +1216,7 @@
             )
 
         if min_length <= 0 or min_length > max_length:
-            err_msg = (
-                "'min_length' must be > 0 and <= 'max_length' (value={0})"
-            )
+            err_msg = "'min_length' must be > 0 and <= 'max_length' (value={0})"
             raise ValueError(err_msg.format(min_length))
 
         if max_continuous_silence >= max_length:
--- a/auditok/util.py	Mon Mar 01 23:11:49 2021 +0100
+++ b/auditok/util.py	Tue Mar 02 20:10:50 2021 +0100
@@ -509,9 +509,7 @@
             "asrc", None
         )
         kwargs["fn"] = kwargs.pop("filename", None) or kwargs.pop("fn", None)
-        kwargs["db"] = kwargs.pop("data_buffer", None) or kwargs.pop(
-            "db", None
-        )
+        kwargs["db"] = kwargs.pop("data_buffer", None) or kwargs.pop("db", None)
 
         record = kwargs.pop("record", False)
         if not record:
--- a/demos/audio_tokenize_demo.py	Mon Mar 01 23:11:49 2021 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,80 +0,0 @@
-"""
-@author: Amine SEHILI <amine.sehili@gmail.com>
-September, 2015
-"""
-
-from auditok import (
-    ADSFactory,
-    AudioEnergyValidator,
-    StreamTokenizer,
-    player_for,
-    dataset,
-)
-import sys
-
-try:
-
-    # We set the `record` argument to True so that we can rewind the source
-    asource = ADSFactory.ads(
-        filename=dataset.one_to_six_arabic_16000_mono_bc_noise, record=True
-    )
-
-    validator = AudioEnergyValidator(
-        sample_width=asource.get_sample_width(), energy_threshold=65
-    )
-
-    # Default analysis window is 10 ms (float(asource.get_block_size()) / asource.get_sampling_rate())
-    # min_length=20 : minimum length of a valid audio activity is 20 * 10 == 200 ms
-    # max_length=400 :  maximum length of a valid audio activity is 400 * 10 == 4000 ms == 4 seconds
-    # max_continuous_silence=30 : maximum length of a tolerated  silence within a valid audio activity is 30 * 30 == 300 ms
-    tokenizer = StreamTokenizer(
-        validator=validator,
-        min_length=20,
-        max_length=400,
-        max_continuous_silence=30,
-    )
-
-    asource.open()
-    tokens = tokenizer.tokenize(asource)
-
-    # Play detected regions back
-    player = player_for(asource)
-
-    # Rewind and read the whole signal
-    asource.rewind()
-    original_signal = []
-
-    while True:
-        w = asource.read()
-        if w is None:
-            break
-        original_signal.append(w)
-
-    original_signal = b"".join(original_signal)
-    player.play(original_signal)
-
-    print("\n ** playing detected regions...\n")
-    for i, t in enumerate(tokens):
-        print(
-            "Token [{0}] starts at {1} and ends at {2}".format(
-                i + 1, t[1], t[2]
-            )
-        )
-        data = b"".join(t[0])
-        player.play(data)
-
-    assert len(tokens) == 8
-
-    asource.close()
-    player.stop()
-
-except KeyboardInterrupt:
-
-    player.stop()
-    asource.close()
-    sys.exit(0)
-
-except Exception as e:
-
-    sys.stderr.write(str(e) + "\n")
-    sys.exit(1)
--- a/demos/audio_trim_demo.py	Mon Mar 01 23:11:49 2021 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,120 +0,0 @@
-"""
-@author: Amine SEHILI <amine.sehili@gmail.com>
-September, 2015
-"""
-
-# Trim leading and trailing silence from a record
-
-from auditok import (
-    ADSFactory,
-    AudioEnergyValidator,
-    StreamTokenizer,
-    player_for,
-    dataset,
-)
-import pyaudio
-import sys
-
-"""
-The  tokenizer in the following example is set up to remove the silence
-that precedes the first acoustic activity or follows the last activity
-in a record. It preserves whatever it founds between the two activities.
-In other words, it removes the leading and trailing silence.
-
-Sampling rate is 44100 sample per second, we'll use an analysis window of 100 ms
-(i.e. bloc_ksize == 4410)
-
-Energy threshold is 50.
-
-The tokenizer will start accumulating windows up from the moment it encounters
-the first analysis window of an energy >= 50. ALL the following windows will be
-kept regardless of their energy. At the end of the analysis, it will drop trailing
- windows with an energy below 50.
-
-This is an interesting example because the audio file we're analyzing contains a very
-brief noise that occurs within the leading silence. We certainly do want our tokenizer
-to stop at this point and considers whatever it comes after as a useful signal.
-To force the tokenizer to ignore that brief event we use two other parameters `init_min`
-ans `init_max_silence`. By `init_min`=3 and `init_max_silence`=1 we tell the tokenizer
-that a valid event must start with at least 3 noisy windows, between which there
-is at most 1 silent window.
-
-Still with this configuration we can get the tokenizer detect that noise as a valid event
-(if it actually contains 3 consecutive noisy frames). To circummvent this we use an enough
-large analysis window (here of 100 ms) to ensure that the brief noise be surrounded by a much
-longer silence and hence the energy of the overall analysis window will be below 50.
-
-When using a shorter analysis window (of 10ms for instance, block_size == 441), the brief
-noise contributes more to energy calculation which yields an energy of over 50 for the window.
-Again we can deal with this situation by using a higher energy threshold (55 for example)
-
-"""
-
-try:
-    # record = True so that we'll be able to rewind the source.
-    asource = ADSFactory.ads(
-        filename=dataset.was_der_mensch_saet_mono_44100_lead_tail_silence,
-        record=True,
-        block_size=4410,
-    )
-    asource.open()
-
-    original_signal = []
-    # Read the whole signal
-    while True:
-        w = asource.read()
-        if w is None:
-            break
-        original_signal.append(w)
-
-    original_signal = b"".join(original_signal)
-
-    # rewind source
-    asource.rewind()
-
-    # Create a validator with an energy threshold of 50
-    validator = AudioEnergyValidator(
-        sample_width=asource.get_sample_width(), energy_threshold=50
-    )
-
-    # Create a tokenizer with an unlimited token length and continuous silence within a token
-    # Note the DROP_TRAILING_SILENCE mode that will ensure removing trailing silence
-    trimmer = StreamTokenizer(
-        validator,
-        min_length=20,
-        max_length=99999999,
-        max_continuous_silence=9999999,
-        mode=StreamTokenizer.DROP_TRAILING_SILENCE,
-        init_min=3,
-        init_max_silence=1,
-    )
-
-    tokens = trimmer.tokenize(asource)
-
-    # Make sure we only have one token
-    assert len(tokens) == 1, "Should have detected one single token"
-
-    trimmed_signal = b"".join(tokens[0][0])
-
-    player = player_for(asource)
-
-    print(
-        "\n ** Playing original signal (with leading and trailing silence)..."
-    )
-    player.play(original_signal)
-    print("\n ** Playing trimmed signal...")
-    player.play(trimmed_signal)
-
-    player.stop()
-    asource.close()
-
-except KeyboardInterrupt:
-
-    player.stop()
-    asource.close()
-    sys.exit(0)
-
-except Exception as e:
-
-    sys.stderr.write(str(e) + "\n")
-    sys.exit(1)
--- a/demos/echo.py	Mon Mar 01 23:11:49 2021 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,64 +0,0 @@
-from auditok import (
-    ADSFactory,
-    AudioEnergyValidator,
-    StreamTokenizer,
-    player_for,
-)
-import pyaudio
-import sys
-
-try:
-
-    energy_threshold = 45
-    duration = 10  # seconds
-
-    if len(sys.argv) > 1:
-        energy_threshold = float(sys.argv[1])
-
-    if len(sys.argv) > 2:
-        duration = float(sys.argv[2])
-
-    # record = True so that we'll be able to rewind the source.
-    # max_time = 10: read 10 seconds from the microphone
-    asource = ADSFactory.ads(record=True, max_time=duration)
-
-    validator = AudioEnergyValidator(
-        sample_width=asource.get_sample_width(),
-        energy_threshold=energy_threshold,
-    )
-    tokenizer = StreamTokenizer(
-        validator=validator,
-        min_length=20,
-        max_length=250,
-        max_continuous_silence=30,
-    )
-
-    player = player_for(asource)
-
-    def echo(data, start, end):
-        print("Acoustic activity at: {0}--{1}".format(start, end))
-        player.play(b"".join(data))
-
-    asource.open()
-
-    print(
-        "\n  ** Make some noise (dur:{}, energy:{})...".format(
-            duration, energy_threshold
-        )
-    )
-
-    tokenizer.tokenize(asource, callback=echo)
-
-    asource.close()
-    player.stop()
-
-except KeyboardInterrupt:
-
-    player.stop()
-    asource.close()
-    sys.exit(0)
-
-except Exception as e:
-
-    sys.stderr.write(str(e) + "\n")
-    sys.exit(1)
--- a/doc/_static/css/custom_style.css	Mon Mar 01 23:11:49 2021 +0100
+++ b/doc/_static/css/custom_style.css	Tue Mar 02 20:10:50 2021 +0100
@@ -1,3 +1,7 @@
+div.wy-side-nav-search {
+    background-color: #000000;
+}
+
 div.wy-side-nav-search .version {
     color: #DDDDDD;
     font-weight: bold;
--- a/doc/conf.py	Mon Mar 01 23:11:49 2021 +0100
+++ b/doc/conf.py	Tue Mar 02 20:10:50 2021 +0100
@@ -137,7 +137,7 @@
     html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
 html_theme_options = {
     "logo_only": True,
-    "style_nav_header_background": "black",
+    "style_nav_header_background": "#000000",
 }
 
 # Theme options are theme-specific and customize the look and feel of a theme
--- a/doc/examples.rst	Mon Mar 01 23:11:49 2021 +0100
+++ b/doc/examples.rst	Tue Mar 02 20:10:50 2021 +0100
@@ -1,5 +1,5 @@
-Loading audio data
-------------------
+Load audio data
+---------------
 
 Audio data is loaded with the :func:`load` function which can read from audio
 files, the microphone or use raw audio data.
@@ -7,15 +7,15 @@
 From a file
 ===========
 
-If the first argument of :func:`load` is a string, it should be a path to an audio
-file.
+If the first argument of :func:`load` is a string, it should be a path to an
+audio file.
 
 .. code:: python
 
     import auditok
     region = auditok.load("audio.ogg")
 
-If input file contains a raw (headerless) audio data, passing `audio_format="raw"`
+If input file contains raw (headerless) audio data, passing `audio_format="raw"`
 and other audio parameters (`sampling_rate`, `sample_width` and `channels`) is
 mandatory. In the following example we pass audio parameters with their short
 names:
@@ -42,6 +42,8 @@
     data = b"\0" * sr * sw * ch
     region = auditok.load(data, sr=sr, sw=sw, ch=ch)
     print(region)
+    # alternatively you can use
+    #region = auditok.AudioRegion(data, sr, sw, ch)
 
 output:
 
@@ -74,15 +76,30 @@
 Skip part of audio data
 =======================
 
-If the `skip` parameter is > 0, :func:`load` will skip that leading amount of audio
-data:
+If the `skip` parameter is > 0, :func:`load` will skip that amount  in seconds
+of leading audio data:
 
 .. code:: python
 
     import auditok
     region = auditok.load("audio.ogg", skip=2) # skip the first 2 seconds
 
-This argument must be 0 when reading from the microphone.
+This argument must be 0 when reading data from the microphone.
+
+
+Limit the amount of read audio
+==============================
+
+If the `max_read` parameter is > 0, :func:`load` will read at most that amount
+in seconds of audio data:
+
+.. code:: python
+
+    import auditok
+    region = auditok.load("audio.ogg", max_read=5)
+    assert region.duration <= 5
+
+This argument is mandatory when reading data from the microphone.
 
 
 Basic split example
@@ -188,8 +205,8 @@
 seconds with the `max_read` argument.
 
 
-Accessing recorded data after split
------------------------------------
+Access recorded data after split
+--------------------------------
 
 Using a :class:`Recorder` object you can get hold of acquired audio data:
 
@@ -362,7 +379,7 @@
     assert len(samples) == region.channels
 
 
-If `numpy` is not installed you can use:
+If `numpy` is installed you can use:
 
 .. code:: python
 
--- a/pyproject.toml	Mon Mar 01 23:11:49 2021 +0100
+++ b/pyproject.toml	Tue Mar 02 20:10:50 2021 +0100
@@ -1,5 +1,5 @@
 [tool.black]
-line-length = 79
+line-length = 80
 include = '\.pyi?$'
 exclude = '''
 /(