Mercurial > hg > auditok

--- a/auditok/__init__.py	Sun May 26 22:43:08 2024 +0200
+++ b/auditok/__init__.py	Sun May 26 23:29:33 2024 +0200
@@ -10,8 +10,8 @@
 """

 from .core import *
+from .exceptions import *
 from .io import *
 from .util import *
-from .exceptions import *

 __version__ = "0.2.0"
--- a/auditok/cmdline.py	Sun May 26 22:43:08 2024 +0200
+++ b/auditok/cmdline.py	Sun May 26 23:29:33 2024 +0200
@@ -14,19 +14,19 @@
 @deffield    updated: 01 Mar 2021
 """

+import os
 import sys
-import os
+import threading
+import time
 from argparse import ArgumentParser
-import time
-import threading

-from auditok import __version__, AudioRegion
+from auditok import AudioRegion, __version__
+
+from . import workers
+from .cmdline_util import initialize_workers, make_kwargs, make_logger
+from .exceptions import AudioEncodingWarning, EndOfProcessing
+from .io import player_for
 from .util import AudioDataSource
-from .exceptions import EndOfProcessing, AudioEncodingWarning
-from .io import player_for
-from .cmdline_util import make_logger, make_kwargs, initialize_workers
-from . import workers
-

 __all__ = []
 __date__ = "2015-11-23"
--- a/auditok/cmdline_util.py	Sun May 26 22:43:08 2024 +0200
+++ b/auditok/cmdline_util.py	Sun May 26 23:29:33 2024 +0200
@@ -1,9 +1,10 @@
+import logging
 import sys
-import logging
 from collections import namedtuple
+
 from . import workers
-from .util import AudioDataSource
 from .io import player_for
+from .util import AudioReader

 _AUDITOK_LOGGER = "AUDITOK_LOGGER"
 KeywordArguments = namedtuple(
@@ -81,7 +82,7 @@

 def initialize_workers(logger=None, **kwargs):
     observers = []
-    reader = AudioDataSource(source=kwargs["input"], **kwargs)
+    reader = AudioReader(source=kwargs["input"], **kwargs)
     if kwargs["save_stream"] is not None:
         reader = workers.StreamSaverWorker(
             reader,
--- a/auditok/core.py	Sun May 26 22:43:08 2024 +0200
+++ b/auditok/core.py	Sun May 26 23:29:33 2024 +0200
@@ -7,11 +7,14 @@
     AudioRegion
     StreamTokenizer
 """
+
+import math
 import os
-import math
-from .util import AudioReader, DataValidator, AudioEnergyValidator
-from .io import check_audio_data, to_file, player_for, get_audio_source
-from .exceptions import TooSamllBlockDuration
+
+from .exceptions import TooSmallBlockDuration
+from .io import check_audio_data, get_audio_source, player_for, to_file
+from .plotting import plot
+from .util import AudioEnergyValidator, AudioReader, DataValidator

 try:
     from . import signal_numpy as signal
@@ -90,7 +93,7 @@
     max_silence=0.3,
     drop_trailing_silence=False,
     strict_min_dur=False,
-    **kwargs
+    **kwargs,
 ):
     """
     Split audio data and return a generator of AudioRegions
@@ -104,17 +107,17 @@
         Every object that is not an `AudioReader` will be transformed into an
         `AudioReader` before processing. If it is an `str` that refers to a raw
         audio file, `bytes` or None, audio parameters should be provided using
-        kwargs (i.e., `samplig_rate`, `sample_width` and `channels` or their
+        kwargs (i.e., `sampling_rate`, `sample_width` and `channels` or their
         alias).
         If `input` is str then audio format will be guessed from file extension.
         `audio_format` (alias `fmt`) kwarg can also be given to specify audio
         format explicitly. If none of these options is available, rely on
         backend (currently only pydub is supported) to load data.
     min_dur : float, default: 0.2
-        minimun duration in seconds of a detected audio event. By using large
+        minimum duration in seconds of a detected audio event. By using large
         values for `min_dur`, very short audio events (e.g., very short 1-word
-        utterances like 'yes' or 'no') can be mis detected. Using very short
-        values might result in a high number of short, unuseful audio events.
+        utterances like 'yes' or 'no') can be mis detected. Using a very small
+        value may result in a high number of too short audio events.
     max_dur : float, default: 5
         maximum duration in seconds of a detected audio event. If an audio event
         lasts more than `max_dur` it will be truncated. If the continuation of a
@@ -177,7 +180,7 @@
     max_read, mr : float, default: None, read until end of stream
         maximum data to read from source in seconds.
     validator, val : callable, DataValidator
-        custom data validator. If `None` (default), an `AudioEnergyValidor` is
+        custom data validator. If `None` (default), an `AudioEnergyValidtor` is
         used with the given energy threshold. Can be a callable or an instance
         of `DataValidator` that implements `is_valid`. In either case, it'll be
         called with with a window of audio data as the first parameter.
@@ -197,11 +200,11 @@
         a generator of detected :class:`AudioRegion` s.
     """
     if min_dur <= 0:
-        raise ValueError("'min_dur' ({}) must be > 0".format(min_dur))
+        raise ValueError(f"'min_dur' ({min_dur}) must be > 0")
     if max_dur <= 0:
-        raise ValueError("'max_dur' ({}) must be > 0".format(max_dur))
+        raise ValueError(f"'max_dur' ({max_dur}) must be > 0")
     if max_silence < 0:
-        raise ValueError("'max_silence' ({}) must be >= 0".format(max_silence))
+        raise ValueError(f"'max_silence' ({max_silence}) must be >= 0")

     if isinstance(input, AudioReader):
         source = input
@@ -212,7 +215,7 @@
         )
         if analysis_window <= 0:
             raise ValueError(
-                "'analysis_window' ({}) must be > 0".format(analysis_window)
+                f"'analysis_window' ({analysis_window}) must be > 0"
             )

         params = kwargs.copy()
@@ -225,11 +228,12 @@
             input = bytes(input)
         try:
             source = AudioReader(input, block_dur=analysis_window, **params)
-        except TooSamllBlockDuration as exc:
-            err_msg = "Too small 'analysis_windows' ({0}) for sampling rate "
-            err_msg += "({1}). Analysis windows should at least be 1/{1} to "
-            err_msg += "cover one single data sample"
-            raise ValueError(err_msg.format(exc.block_dur, exc.sampling_rate))
+        except TooSmallBlockDuration as exc:
+            err_msg = f"Too small 'analysis_window' ({exc.block_dur}) for "
+            err_msg += f"sampling rate ({exc.sampling_rate}). Analysis window "
+            err_msg += f"should at least be 1/{exc.sampling_rate} to cover "
+            err_msg += "one data sample"
+            raise ValueError(err_msg) from exc

     validator = kwargs.get("validator", kwargs.get("val"))
     if validator is None:
@@ -358,8 +362,8 @@
     frame_duration: float
         duration of analysis window in seconds
     start_frame : int
-        index of the fisrt analysis window
-    samling_rate : int
+        index of the first analysis window
+    sampling_rate : int
         sampling rate of audio data
     sample_width : int
         number of bytes of one audio sample
@@ -369,7 +373,7 @@
     Returns
     -------
     audio_region : AudioRegion
-        AudioRegion whose start time is calculeted as:
+        AudioRegion whose start time is calculated as:
         `1000 * start_frame * frame_duration`
     """
     start = start_frame * frame_duration
@@ -648,13 +652,15 @@
     @property
     def seconds(self):
         """
-        A view to slice audio region by seconds (using ``region.seconds[start:end]``).
+        A view to slice audio region by seconds using
+        ``region.seconds[start:end]``.
         """
         return self._seconds_view

     @property
     def millis(self):
-        """A view to slice audio region by milliseconds (using ``region.millis[start:end]``)."""
+        """A view to slice audio region by milliseconds using
+        ``region.millis[start:end]``."""
         return self._millis_view

     @property
@@ -786,7 +792,7 @@
         max_silence=0.3,
         drop_trailing_silence=False,
         strict_min_dur=False,
-        **kwargs
+        **kwargs,
     ):
         """Split audio region. See :func:`auditok.split()` for a comprehensive
         description of split parameters.
@@ -804,7 +810,7 @@
             max_silence=max_silence,
             drop_trailing_silence=drop_trailing_silence,
             strict_min_dur=strict_min_dur,
-            **kwargs
+            **kwargs,
         )

     def plot(
@@ -816,7 +822,7 @@
         dpi=120,
         theme="auditok",
     ):
-        """Plot audio region, one sub-plot for each channel.
+        """Plot audio region using one sub-plot per each channel.

         Parameters
         ----------
@@ -835,20 +841,15 @@
             plot theme to use. Currently only "auditok" theme is implemented. To
             provide you own them see :attr:`auditok.plotting.AUDITOK_PLOT_THEME`.
         """
-        try:
-            from auditok.plotting import plot
-
-            plot(
-                self,
-                scale_signal=scale_signal,
-                show=show,
-                figsize=figsize,
-                save_as=save_as,
-                dpi=dpi,
-                theme=theme,
-            )
-        except ImportError:
-            raise RuntimeWarning("Plotting requires matplotlib")
+        plot(
+            self,
+            scale_signal=scale_signal,
+            show=show,
+            figsize=figsize,
+            save_as=save_as,
+            dpi=dpi,
+            theme=theme,
+        )

     def split_and_plot(
         self,
@@ -863,42 +864,37 @@
         save_as=None,
         dpi=120,
         theme="auditok",
-        **kwargs
+        **kwargs,
     ):
         """Split region and plot signal and detections. Alias: :meth:`splitp`.
         See :func:`auditok.split()` for a comprehensive description of split
         parameters. Also see :meth:`plot` for plot parameters.
         """
-        try:
-            from auditok.plotting import plot
-
-            regions = self.split(
-                min_dur=min_dur,
-                max_dur=max_dur,
-                max_silence=max_silence,
-                drop_trailing_silence=drop_trailing_silence,
-                strict_min_dur=strict_min_dur,
-                **kwargs
-            )
-            regions = list(regions)
-            detections = ((reg.meta.start, reg.meta.end) for reg in regions)
-            eth = kwargs.get(
-                "energy_threshold", kwargs.get("eth", DEFAULT_ENERGY_THRESHOLD)
-            )
-            plot(
-                self,
-                scale_signal=scale_signal,
-                detections=detections,
-                energy_threshold=eth,
-                show=show,
-                figsize=figsize,
-                save_as=save_as,
-                dpi=dpi,
-                theme=theme,
-            )
-            return regions
-        except ImportError:
-            raise RuntimeWarning("Plotting requires matplotlib")
+        regions = self.split(
+            min_dur=min_dur,
+            max_dur=max_dur,
+            max_silence=max_silence,
+            drop_trailing_silence=drop_trailing_silence,
+            strict_min_dur=strict_min_dur,
+            **kwargs,
+        )
+        regions = list(regions)
+        detections = ((reg.meta.start, reg.meta.end) for reg in regions)
+        eth = kwargs.get(
+            "energy_threshold", kwargs.get("eth", DEFAULT_ENERGY_THRESHOLD)
+        )
+        plot(
+            self,
+            scale_signal=scale_signal,
+            detections=detections,
+            energy_threshold=eth,
+            show=show,
+            figsize=figsize,
+            save_as=save_as,
+            dpi=dpi,
+            theme=theme,
+        )
+        return regions

     def __array__(self):
         return self.samples
@@ -1116,13 +1112,11 @@
             In that case the trailing silence can be removed if you use the
             `StreamTokenizer.DROP_TRAILING_SILENCE` mode.

-            -4 `(StreamTokenizer.STRICT_MIN_LENGTH | StreamTokenizer.DROP_TRAILING_SILENCE)`:
+            -4 `(StreamTokenizer.STRICT_MIN_LENGTH | StreamTokenizer.DROP_TRAILING_SILENCE)`:  # noqa: B950
             use both options. That means: first remove tailing silence, then
             check if the token still has a length of at least `min_length`.


-
-
     Examples
     --------
--- a/auditok/io.py	Sun May 26 22:43:08 2024 +0200
+++ b/auditok/io.py	Sun May 26 23:29:33 2024 +0200
@@ -15,12 +15,12 @@
     to_file
     player_for
 """
+
 import os
 import sys
 import wave
-import warnings
 from abc import ABC, abstractmethod
-from functools import partial
+
 from .exceptions import AudioIOError, AudioParameterError

 try:
@@ -104,20 +104,17 @@
     audio_parameters : tuple
         a tuple for audio parameters as (sampling_rate, sample_width, channels).
     """
-    err_message = (
-        "'{ln}' (or '{sn}') must be a positive integer, found: '{val}'"
-    )
     parameters = []
-    for (long_name, short_name) in (
+    for long_name, short_name in (
         ("sampling_rate", "sr"),
         ("sample_width", "sw"),
         ("channels", "ch"),
     ):
         param = param_dict.get(long_name, param_dict.get(short_name))
         if param is None or not isinstance(param, int) or param <= 0:
-            raise AudioParameterError(
-                err_message.format(ln=long_name, sn=short_name, val=param)
-            )
+            err_message = f"{long_name!r} (or {short_name!r}) must be a "
+            err_message += f"positive integer, passed value: {param}."
+            raise AudioParameterError(err_message)
         parameters.append(param)
     sampling_rate, sample_width, channels = parameters
     return sampling_rate, sample_width, channels
@@ -141,7 +138,10 @@
     """

     def __init__(
-        self, sampling_rate, sample_width, channels,
+        self,
+        sampling_rate,
+        sample_width,
+        channels,
     ):

         if sample_width not in (1, 2, 4):
@@ -283,9 +283,13 @@
     """

     def __init__(
-        self, data, sampling_rate=16000, sample_width=2, channels=1,
+        self,
+        data,
+        sampling_rate=16000,
+        sample_width=2,
+        channels=1,
     ):
-        AudioSource.__init__(self, sampling_rate, sample_width, channels)
+        super().__init__(sampling_rate, sample_width, channels)
         check_audio_data(data, sample_width, channels)
         self._data = data
         self._sample_size_all_channels = sample_width * channels
@@ -558,7 +562,10 @@
     """

     def __init__(
-        self, sampling_rate=16000, sample_width=2, channels=1,
+        self,
+        sampling_rate=16000,
+        sample_width=2,
+        channels=1,
     ):
         FileAudioSource.__init__(self, sampling_rate, sample_width, channels)
         self._is_open = False
@@ -610,7 +617,10 @@
     """

     def __init__(
-        self, sampling_rate=16000, sample_width=2, channels=1,
+        self,
+        sampling_rate=16000,
+        sample_width=2,
+        channels=1,
     ):
         if sample_width not in (1, 2, 4):
             raise ValueError("Sample width in bytes must be one of 1, 2 or 4")
@@ -640,7 +650,7 @@
                 chunk_gen,
                 total=nb_chunks,
                 duration=duration,
-                **progress_bar_kwargs
+                **progress_bar_kwargs,
             )
         if self.stream.is_stopped():
             self.stream.start_stream()
@@ -737,7 +747,7 @@
         return PyAudioSource(
             *_get_audio_parameters(kwargs),
             frames_per_buffer=frames_per_buffer,
-            input_device_index=input_device_index
+            input_device_index=input_device_index,
         )


@@ -1004,12 +1014,7 @@
     if audio_format in (None, "raw"):
         _save_raw(data, file)
         return
-    try:
-        sampling_rate, sample_width, channels = _get_audio_parameters(kwargs)
-    except AudioParameterError as exc:
-        err_message = "All audio parameters are required to save formats "
-        "other than raw. Error detail: {}".format(exc)
-        raise AudioParameterError(err_message)
+    sampling_rate, sample_width, channels = _get_audio_parameters(kwargs)
     if audio_format in ("wav", "wave"):
         _save_wave(data, file, sampling_rate, sample_width, channels)
     elif _WITH_PYDUB:
@@ -1017,5 +1022,6 @@
             data, file, audio_format, sampling_rate, sample_width, channels
         )
     else:
-        err_message = "cannot write file format {} (file name: {})"
-        raise AudioIOError(err_message.format(audio_format, file))
+        raise AudioIOError(
+            f"cannot write file format {audio_format} (file name: {file})"
+        )
--- a/auditok/plotting.py	Sun May 26 22:43:08 2024 +0200
+++ b/auditok/plotting.py	Sun May 26 23:29:33 2024 +0200
@@ -40,7 +40,7 @@
     ls = theme.get("linestyle", theme.get("ls"))
     lw = theme.get("linewidth", theme.get("lw"))
     alpha = theme.get("alpha")
-    for (start, end) in detections:
+    for start, end in detections:
         subplot.axvspan(start, end, fc=fc, ec=ec, ls=ls, lw=lw, alpha=alpha)
--- a/auditok/signal.py	Sun May 26 22:43:08 2024 +0200
+++ b/auditok/signal.py	Sun May 26 23:29:33 2024 +0200
@@ -12,9 +12,10 @@
     calculate_energy_single_channel
     calculate_energy_multichannel
 """
-from array import array as array_
+
 import audioop
 import math
+from array import array as array_

 FORMAT = {1: "b", 2: "h", 4: "i"}
 _EPSILON = 1e-10
--- a/auditok/signal_numpy.py	Sun May 26 22:43:08 2024 +0200
+++ b/auditok/signal_numpy.py	Sun May 26 23:29:33 2024 +0200
@@ -1,8 +1,9 @@
 import numpy as np
+
 from .signal import (
+    calculate_energy_multichannel,
+    calculate_energy_single_channel,
     compute_average_channel_stereo,
-    calculate_energy_single_channel,
-    calculate_energy_multichannel,
 )

 FORMAT = {1: np.int8, 2: np.int16, 4: np.int32}
--- a/auditok/workers.py	Sun May 26 22:43:08 2024 +0200
+++ b/auditok/workers.py	Sun May 26 23:29:33 2024 +0200
@@ -1,22 +1,22 @@
 import os
+import subprocess
 import sys
+import wave
+from abc import ABCMeta, abstractmethod
+from collections import namedtuple
+from datetime import datetime, timedelta
+from queue import Empty, Queue
 from tempfile import NamedTemporaryFile
-from abc import ABCMeta, abstractmethod
 from threading import Thread
-from datetime import datetime, timedelta
-from collections import namedtuple
-import wave
-import subprocess
-from queue import Queue, Empty
-from .io import _guess_audio_format
-from .util import AudioDataSource, make_duration_formatter
+
 from .core import split
 from .exceptions import (
-    EndOfProcessing,
     AudioEncodingError,
     AudioEncodingWarning,
+    EndOfProcessing,
 )
-
+from .io import _guess_audio_format
+from .util import AudioReader, make_duration_formatter

 _STOP_PROCESSING = "STOP_PROCESSING"
 _Detection = namedtuple("_Detection", "id start end duration")
@@ -86,7 +86,7 @@
             return None


-class TokenizerWorker(Worker, AudioDataSource):
+class TokenizerWorker(Worker, AudioReader):
     def __init__(self, reader, observers=None, logger=None, **kwargs):
         self._observers = observers if observers is not None else []
         self._reader = reader
@@ -245,7 +245,7 @@
         self.stop()

     def rewind(self):
-        # ensure compatibility with AudioDataSource with record=True
+        # ensure compatibility with AudioReader with record=True
         pass

     @property
--- a/doc/conf.py	Sun May 26 22:43:08 2024 +0200
+++ b/doc/conf.py	Sun May 26 23:29:33 2024 +0200
@@ -12,11 +12,11 @@
 # All configuration values have a default; values that are commented out
 # serve to show the default.

-import sys
+import ast
 import os
 import re
-import ast
 import shlex
+import sys

 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
@@ -63,9 +63,9 @@
 master_doc = "index"

 # General information about the project.
-project = u"auditok"
-copyright = u"2015-2021, Amine Sehili"
-author = u"Amine Sehili"
+project = "auditok"
+copyright = "2015-2021, Amine Sehili"
+author = "Amine Sehili"

 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
@@ -253,8 +253,8 @@
     (
         master_doc,
         "auditok.tex",
-        u"auditok Documentation",
-        u"Amine Sehili",
+        "auditok Documentation",
+        "Amine Sehili",
         "manual",
     ),
 ]
@@ -284,7 +284,7 @@

 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
-man_pages = [(master_doc, "auditok", u"auditok Documentation", [author], 1)]
+man_pages = [(master_doc, "auditok", "auditok Documentation", [author], 1)]

 # If true, show URL addresses after external links.
 # man_show_urls = False
@@ -299,7 +299,7 @@
     (
         master_doc,
         "auditok",
-        u"auditok Documentation",
+        "auditok Documentation",
         author,
         "auditok",
         "Audio Activity Detection tool.",
--- a/setup.py	Sun May 26 22:43:08 2024 +0200
+++ b/setup.py	Sun May 26 23:29:33 2024 +0200
@@ -1,9 +1,9 @@
+import ast
+import re
 import sys
-import re
-import ast
+
 from setuptools import setup

-
 _version_re = re.compile(r"__version__\s+=\s+(.*)")

 with open("auditok/__init__.py", "rt") as f: