Mercurial > hg > auditok

--- a/auditok/core.py	Sun Jan 03 18:46:40 2021 +0100
+++ b/auditok/core.py	Sun Jan 10 17:11:07 2021 +0100
@@ -1,18 +1,16 @@
 """
+.. autosummary::
+    :toctree: generated/

-Summary
-=======
-
-.. autosummary::
-        split
-        AudioRegion
-        StreamTokenizer
+    split
+    AudioRegion
+    StreamTokenizer
 """
 import os
 import math
-from auditok.util import AudioReader, DataValidator, AudioEnergyValidator
-from auditok.io import check_audio_data, to_file, player_for, get_audio_source
-from auditok.exceptions import TooSamllBlockDuration
+from .util import AudioReader, DataValidator, AudioEnergyValidator
+from .io import check_audio_data, to_file, player_for, get_audio_source
+from .exceptions import TooSamllBlockDuration

 try:
     from . import signal_numpy as signal
@@ -24,7 +22,7 @@

 DEFAULT_ANALYSIS_WINDOW = 0.05
 DEFAULT_ENERGY_THRESHOLD = 50
-_EPSILON = 1e-6
+_EPSILON = 1e-10


 def split(
@@ -37,7 +35,7 @@
     **kwargs
 ):
     """
-    Split audio data and return a generator of `AudioRegion`s
+    Split audio data and return a generator of AudioRegions

     Parameters
     ----------
@@ -45,12 +43,12 @@
         input audio data. If str, it should be a path to an existing audio file.
         "-" is interpreted as standard input. If bytes, input is considered as
         raw audio data. If None, read audio from microphone.
-        Every object that is not an ´AudioReader´ will be transformed into an
+        Every object that is not an `AudioReader` will be transformed into an
         `AudioReader` before processing. If it is an `str` that refers to a raw
         audio file, `bytes` or None, audio parameters should be provided using
         kwargs (i.e., `samplig_rate`, `sample_width` and `channels` or their
         alias).
-        If ´input´ is str then audio format will be guessed from file extension.
+        If `input` is str then audio format will be guessed from file extension.
         `audio_format` (alias `fmt`) kwarg can also be given to specify audio
         format explicitly. If none of these options is available, rely on
         backend (currently only pydub is supported) to load data.
@@ -72,22 +70,21 @@
         part of the event if `drop_trailing_silence` is False (default).
     drop_trailing_silence : bool, default: False
         Whether to remove trailing silence from detected events. To avoid abrupt
-        cuts in speech, trailing silence should be kept, therefor
-        `drop_trailing_silence` should be False.s
-        detection, it
+        cuts in speech, trailing silence should be kept, therefore this
+        parameter should be False.
     strict_min_dur : bool, default: False
         strict minimum duration. Do not accept an audio event if it is shorter
-        than ´min_dur´ even if it is contiguous to the latest valid event. This
-        happens if the the latest detected event had reached ´max_dur´.
+        than `min_dur` even if it is contiguous to the latest valid event. This
+        happens if the the latest detected event had reached `max_dur`.

-    Kwargs
-    ------
+    Other Parameters
+    ----------------
     analysis_window, aw : float, default: 0.05 (50 ms)
         duration of analysis window in seconds. A value between 0.01 (10 ms) and
         0.1 (100 ms) should be good for most use-cases.
     audio_format, fmt : str
         type of audio data (e.g., wav, ogg, flac, raw, etc.). This will only be
-        used if ´input´ is a string path to an audio file. If not given, audio
+        used if `input` is a string path to an audio file. If not given, audio
         type will be guessed from file name extension or from file header.
     sampling_rate, sr : int
         sampling rate of audio data. Required if `input` is a raw audio file, is
@@ -103,22 +100,26 @@
         Regardless of which channel is used for splitting, returned audio events
         contain data from *all* channels, just as `input`.
         The following values are accepted:
-            - None (alias "any"): accept audio activity from any channel, even
-            if other channels are silent. This is the default behavior.
-            - "mix" ("avg" or "average"): mix down all channels (i.e. compute
-            average channel) and split the resulting channel.
-            - int (0 <=, > `channels`): use one channel, specified by integer
-            id, for split.
+
+        - None (alias "any"): accept audio activity from any channel, even if
+          other channels are silent. This is the default behavior.
+
+        - "mix" ("avg" or "average"): mix down all channels (i.e. compute
+          average channel) and split the resulting channel.
+
+        - int (0 <=, > `channels`): use one channel, specified by integer id,
+          for split.
+
     large_file : bool, default: False
         If True, AND if `input` is a path to a *wav* of a *raw* audio file
         (and only these two formats) then audio data is lazily loaded to memory
         (i.e., one analysis window a time). Otherwise the whole file is loaded
         to memory before split. Set to True if the size of the file is larger
         than available memory.
-    max_read, mr : float, default: None (read until end of stream)
+    max_read, mr : float, default: None, read until end of stream
         maximum data to read from source in seconds.
     validator, val : callable, DataValidator
-        custom data validator. If ´None´ (default), an `AudioEnergyValidor` is
+        custom data validator. If `None` (default), an `AudioEnergyValidor` is
         used with the given energy threshold. Can be a callable or an instance
         of `DataValidator` that implements `is_valid`. In either case, it'll be
         called with with a window of audio data as the first parameter.
@@ -127,8 +128,15 @@
         enough windows of with a signal energy equal to or above this threshold
         are considered valid audio events. Here we are referring to this amount
         as the energy of the signal but to be more accurate, it is the log
-        energy of computed as: 10 . log10 dot(x, x) / |x|
-        If `validator` is given, this argument is ignored.
+        energy of computed as: `20 * log10(sqrt(dot(x, x) / len(x)))` (see
+        :class:`AudioEnergyValidator` and
+        :func:`calculate_energy_single_channel`). If `validator` is given, this
+        argument is ignored.
+
+    Yields
+    ------
+    AudioRegion
+        a generator of detected :class:`AudioRegion` s.
     """
     if min_dur <= 0:
         raise ValueError("'min_dur' ({}) must be > 0".format(min_dur))
@@ -264,7 +272,7 @@

     Returns
     -------
-    nb_windows: int
+    nb_windows : int
         minimum number of `analysis_window`'s to cover `durartion`. That means
         that `analysis_window * nb_windows >= duration`.
     """
@@ -487,32 +495,37 @@


 class AudioRegion(object):
+    """
+    AudioRegion encapsulates raw audio data and provides an interface to
+    perform simple operations on it. Use `AudioRegion.load` to build an
+    `AudioRegion` from different types of objects.
+
+    Parameters
+    ----------
+    data : bytes
+        raw audio data as a bytes object
+    sampling_rate : int
+        sampling rate of audio data
+    sample_width : int
+        number of bytes of one audio sample
+    channels : int
+        number of channels of audio data
+    meta : dict, default: None
+        any collection of <key:value> elements used to build metadata for
+        this `AudioRegion`. Meta data can be accessed via `region.meta.key`
+        if `key` is a valid python attribute name, or via `region.meta[key]`
+        if not. Note that the :func:`split` function (or the
+        :meth:`AudioRegion.split` method) returns `AudioRegions` with a ``start``
+        and a ``stop`` meta values that indicate the location in seconds of the
+        region in original audio data.
+
+    See also
+    --------
+    AudioRegion.load
+
+    """
+
     def __init__(self, data, sampling_rate, sample_width, channels, meta=None):
-        """
-        AudioRegion encapsulates raw audio data and provides an interface to
-        perform simple operations on it. Use `AudioRegion.load` to build an
-        `AudioRegion` from different types of objects.
-
-        Parameters
-        ----------
-        data : bytes
-            raw audio data as a bytes object
-        sampling_rate : int
-            sampling rate of audio data
-        sample_width : int
-            number of bytes of one audio sample
-        channels : int
-            number of channels of audio data
-        meta : dict, default: None
-            any collection of <key:value> elements used to build metadata for this
-            `AudioRegion. Meta data can be accessed via `region.meta.key` if `key`
-            is a valid python attribute name, or via `region.meta[key]` if not.
-
-        See also
-        --------
-        AudioRegion.load
-
-        """
         check_audio_data(data, sample_width, channels)
         self._data = data
         self._sampling_rate = sampling_rate
@@ -554,7 +567,7 @@
             Input can also an AudioSource object.
         skip : float, default: 0
             amount, in seconds, of audio data to skip from source. If read from
-            microphone, ``skip`` must be 0, otherwise a ValueError is raised.
+            microphone, `skip` must be 0, otherwise a `ValueError` is raised.
         max_read : float, default: None
             amount, in seconds, of audio data to read from source. If read from
             microphone, `max_read` should not be None, otherwise a ValueError is
@@ -566,13 +579,13 @@
             audio type will be guessed from file name extension or from file
             header.
         sampling_rate, sr : int
-            sampling rate of audio data. Reauired if `input` is a raw audio file,
+            sampling rate of audio data. Required if `input` is a raw audio file,
             a bytes object or None (i.e., read from microphone).
         sample_width, sw : int
             number of bytes used to encode one audio sample, typically 1, 2 or 4.
             Required for raw data, see `sampling_rate`.
         channels, ch : int
-            nuumber of channels of audio data. Required for raw data, see
+            number of channels of audio data. Required for raw data, see
             `sampling_rate`.
         large_file : bool, default: False
             If True, AND if `input` is a path to a *wav* of a *raw* audio file
@@ -585,7 +598,8 @@

         Raises
         ------
-        ValueError if `input` is None and `skip` != 0 or `max_read` is None.
+        ValueError
+            raised if `input` is None and `skip` != 0 or `max_read` is None.
         """
         if input is None:
             if skip > 0:
@@ -663,7 +677,7 @@
             to get a new audio player.
         progress_bar_kwargs : kwargs
             keyword arguments to pass to `tqdm` progress_bar builder (e.g.,
-            use `leave=False` to clean up screen when play finishes).
+            use `leave=False` to clean up the screen when play finishes).
         """
         if player is None:
             player = player_for(self)
@@ -680,11 +694,11 @@
         Parameters
         ----------
         file : str
-            path to output audio file. May contain ´{duration}´ placeholder
+            path to output audio file. May contain `{duration}` placeholder
             as well as any place holder that this region's metadata might
             contain (e.g., regions returned by `split` contain metadata with
             `start` and `end` attributes that can be used to build output file
-            name as ´{meta.start}´ and ´{meta.end}´. See examples using
+            name as `{meta.start}` and `{meta.end}`. See examples using
             placeholders with formatting.

         audio_format : str, default: None
@@ -692,32 +706,31 @@
             from file name's extension. If file name has no extension, audio
             data is saved as a raw (headerless) audio file.
         exists_ok : bool, default: True
-            If True, overwrite ´file´ if a file with the same name exists.
-            If False, raise an ´IOError´ if `file` exists.
+            If True, overwrite `file` if a file with the same name exists.
+            If False, raise an `IOError` if `file` exists.
         audio_parameters: dict
             any keyword arguments to be passed to audio saving backend.
-            FIXME: this is not yet implemented!

         Returns
         -------
         file: str
             name of output file with replaced placehoders.
         Raises
-            IOError if ´file´ exists and ´exists_ok´ is False.
+            IOError if `file` exists and `exists_ok` is False.

-        Example
-        -------
-        .. code:: python
-            region = AudioRegion(b'\0' * 2 * 24000,
-                                    sampling_rate=16000,
-                                    sample_width=2,
-                                    channels=1)
-            region.meta.start = 2.25
-            region.meta.end = 2.25 + region.duration
-            region.save('audio_{meta.start}-{meta.end}.wav')
-            audio_2.25-3.75.wav
-            region.save('region_{meta.start:.3f}_{duration:.3f}.wav')
-            audio_2.250_1.500.wav
+
+        Examples
+        --------
+        >>> region = AudioRegion(b'\\0' * 2 * 24000,
+        >>>                      sampling_rate=16000,
+        >>>                      sample_width=2,
+        >>>                      channels=1)
+        >>> region.meta.start = 2.25
+        >>> region.meta.end = 2.25 + region.duration
+        >>> region.save('audio_{meta.start}-{meta.end}.wav')
+        >>> audio_2.25-3.75.wav
+        >>> region.save('region_{meta.start:.3f}_{duration:.3f}.wav')
+        audio_2.250_1.500.wav
         """
         if isinstance(file, str):
             file = file.format(duration=self.duration, meta=self.meta)
@@ -743,8 +756,9 @@
         strict_min_dur=False,
         **kwargs
     ):
-        """Split audio region. See `auditok.split()` for split parameters
-        description.
+        """Split audio region. See :func:`auditok.split()` for a comprehensive
+        description of split parameters.
+        See Also :func:`AudioRegio.split_and_plot`.
         """
         if kwargs.get("max_read", kwargs.get("mr")) is not None:
             warn_msg = "'max_read' (or 'mr') should not be used with "
@@ -770,6 +784,8 @@
         dpi=120,
         theme="auditok",
     ):
+        """Plot audio region.
+        """
         try:
             from auditok.plotting import plot

@@ -800,8 +816,9 @@
         theme="auditok",
         **kwargs
     ):
-        """Split region and plot signal and detection. Alias: `splitp`.
-        See :auditok.split() for split parameters description.
+        """Split region and plot signal and detections. Alias: :meth:`splitp`.
+        See :func:`auditok.split()` for a comprehensive description of split
+        parameters.
         """
         try:
             from auditok.plotting import plot
@@ -995,133 +1012,113 @@
         Maximum number of frames of a valid token. This includes all
         tolerated non valid frames within the token.

-    `max_continuous_silence` : *(int)*
+    max_continuous_silence : int
         Maximum number of consecutive non-valid frames within a token.
         Note that, within a valid token, there may be many tolerated
         *silent* regions that contain each a number of non valid frames up
         to `max_continuous_silence`

-    `init_min` : *(int, default=0)*
+    init_min : int
         Minimum number of consecutive valid frames that must be
         **initially** gathered before any sequence of non valid frames can
         be tolerated. This option is not always needed, it can be used to
         drop non-valid tokens as early as possible. **Default = 0** means
         that the option is by default ineffective.

-    `init_max_silence` : *(int, default=0)*
+    init_max_silence : int
         Maximum number of tolerated consecutive non-valid frames if the
         number already gathered valid frames has not yet reached
         'init_min'.This argument is normally used if `init_min` is used.
         **Default = 0**, by default this argument is not taken into
         consideration.

-    `mode` : *(int, default=0)*
-        `mode` can be:
+    mode : int
+        mode can be one of the following:

-    1. `StreamTokenizer.NORMAL`:
-    Do not drop trailing silence, and accept a token shorter than
-    `min_length` if it is the continuation of the latest delivered token.
+            -1 `StreamTokenizer.NORMAL` : do not drop trailing silence, and
+            accept a token shorter than `min_length` if it is the continuation
+            of the latest delivered token.

-    2. `StreamTokenizer.STRICT_MIN_LENGTH`:
-    if token *i* is delivered because `max_length`
-    is reached, and token *i+1* is immediately adjacent to
-    token *i* (i.e. token *i* ends at frame *k* and token *i+1* starts
-    at frame *k+1*) then accept token *i+1* only of it has a size of at
-    least `min_length`. The default behavior is to accept token *i+1*
-    event if it is shorter than `min_length` (given that the above
-    conditions are fulfilled of course).
+            -2 `StreamTokenizer.STRICT_MIN_LENGTH`: if token `i` is delivered
+            because `max_length` is reached, and token `i+1` is immediately
+            adjacent to token `i` (i.e. token `i` ends at frame `k` and token
+            `i+1` starts at frame `k+1`) then accept token `i+1` only of it has
+            a size of at least `min_length`. The default behavior is to accept
+            token `i+1` event if it is shorter than `min_length` (provided that
+            the above conditions are fulfilled of course).

-    :Examples:
+            -3 `StreamTokenizer.DROP_TRAILING_SILENCE`: drop all tailing
+            non-valid frames from a token to be delivered if and only if it
+            is not **truncated**. This can be a bit tricky. A token is actually
+            delivered if:
+
+                - `max_continuous_silence` is reached.
+
+                - Its length reaches `max_length`. This is referred to as a
+                  **truncated** token.
+
+            In the current implementation, a `StreamTokenizer`'s decision is only
+            based on already seen data and on incoming data. Thus, if a token is
+            truncated at a non-valid but tolerated frame (`max_length` is reached
+            but `max_continuous_silence` not yet) any tailing silence will be kept
+            because it can potentially be part of valid token (if `max_length` was
+            bigger). But if `max_continuous_silence` is reached before
+            `max_length`, the delivered token will not be considered as truncated
+            but a result of *normal* end of detection (i.e. no more valid data).
+            In that case the trailing silence can be removed if you use the
+            `StreamTokenizer.DROP_TRAILING_SILENCE` mode.
+
+            -4 `(StreamTokenizer.STRICT_MIN_LENGTH | StreamTokenizer.DROP_TRAILING_SILENCE)`:
+            use both options. That means: first remove tailing silence, then
+            check if the token still has a length of at least `min_length`.
+
+
+
+
+    Examples
+    --------

     In the following code, without `STRICT_MIN_LENGTH`, the 'BB' token is
     accepted although it is shorter than `min_length` (3), because it
     immediately follows the latest delivered token:

-    .. code:: python
+    >>> from auditok.core import StreamTokenizer
+    >>> from StringDataSource, DataValidator

-        from auditok import (StreamTokenizer,
-                                StringDataSource,
-                                DataValidator)
-
-        class UpperCaseChecker(DataValidator):
-            def is_valid(self, frame):
+    >>> class UpperCaseChecker(DataValidator):
+    >>>     def is_valid(self, frame):
                 return frame.isupper()
-
-
-        dsource = StringDataSource("aaaAAAABBbbb")
-        tokenizer = StreamTokenizer(validator=UpperCaseChecker(),
+    >>> dsource = StringDataSource("aaaAAAABBbbb")
+    >>> tokenizer = StreamTokenizer(validator=UpperCaseChecker(),
                                     min_length=3,
                                     max_length=4,
                                     max_continuous_silence=0)
-
-        tokenizer.tokenize(dsource)
-
-    :output:
-
-        .. code:: python
-
-        [(['A', 'A', 'A', 'A'], 3, 6), (['B', 'B'], 7, 8)]
+    >>> tokenizer.tokenize(dsource)
+    [(['A', 'A', 'A', 'A'], 3, 6), (['B', 'B'], 7, 8)]


     The following tokenizer will however reject the 'BB' token:

-    .. code:: python
-
-        dsource = StringDataSource("aaaAAAABBbbb")
-        tokenizer = StreamTokenizer(validator=UpperCaseChecker(),
+    >>> dsource = StringDataSource("aaaAAAABBbbb")
+    >>> tokenizer = StreamTokenizer(validator=UpperCaseChecker(),
                                     min_length=3, max_length=4,
                                     max_continuous_silence=0,
                                     mode=StreamTokenizer.STRICT_MIN_LENGTH)
-        tokenizer.tokenize(dsource)
+    >>> tokenizer.tokenize(dsource)
+    [(['A', 'A', 'A', 'A'], 3, 6)]

-    :output:

-    .. code:: python

-        [(['A', 'A', 'A', 'A'], 3, 6)]
-
-
-    3. `StreamTokenizer.DROP_TRAILING_SILENCE`: drop all tailing non-valid
-    frames from a token to be delivered if and only if it is not
-    **truncated**. This can be a bit tricky. A token is actually delivered
-    if: - a. `max_continuous_silence` is reached
-
-    :or:
-
-    - b. Its length reaches `max_length`. This is called a **truncated**
-    token
-
-    In the current implementation, a `StreamTokenizer`'s decision is only
-    based on already seen data and on incoming data. Thus, if a token is
-    truncated at a non-valid but tolerated frame (`max_length` is reached
-    but `max_continuous_silence` not yet) any tailing silence will be kept
-    because it can potentially be part of valid token (if `max_length` was
-    bigger). But if `max_continuous_silence` is reached before
-    `max_length`, the delivered token will not be considered as truncated
-    but a result of *normal* end of detection (i.e. no more valid data).
-    In that case the trailing silence can be removed if you use the
-    `StreamTokenizer.DROP_TRAILING_SILENCE` mode.
-
-    :Example:
-
-    .. code:: python
-
-            tokenizer = StreamTokenizer(
-                            validator=UpperCaseChecker(),
-                            min_length=3,
-                            max_length=6,
-                            max_continuous_silence=3,
-                            mode=StreamTokenizer.DROP_TRAILING_SILENCE
-                            )
-
-            dsource = StringDataSource("aaaAAAaaaBBbbbb")
-            tokenizer.tokenize(dsource)
-
-    :output:
-
-    .. code:: python
-
-        [(['A', 'A', 'A', 'a', 'a', 'a'], 3, 8), (['B', 'B'], 9, 10)]
+    >>> tokenizer = StreamTokenizer(
+    >>>                validator=UpperCaseChecker(),
+    >>>                min_length=3,
+    >>>                max_length=6,
+    >>>                max_continuous_silence=3,
+    >>>                mode=StreamTokenizer.DROP_TRAILING_SILENCE
+    >>>                )
+    >>> dsource = StringDataSource("aaaAAAaaaBBbbbb")
+    >>> tokenizer.tokenize(dsource)
+    [(['A', 'A', 'A', 'a', 'a', 'a'], 3, 8), (['B', 'B'], 9, 10)]

     The first token is delivered with its tailing silence because it is
     truncated while the second one has its tailing frames removed.
@@ -1135,11 +1132,6 @@
             (['B', 'B', 'b', 'b', 'b'], 9, 13)
         ]

-
-    4. `(StreamTokenizer.STRICT_MIN_LENGTH |
-            StreamTokenizer.DROP_TRAILING_SILENCE)`:
-    use both options. That means: first remove tailing silence, then ckeck
-    if the token still has at least a length of `min_length`.
     """

     SILENCE = 0
--- a/auditok/dataset.py	Sun Jan 03 18:46:40 2021 +0100
+++ b/auditok/dataset.py	Sun Jan 10 17:11:07 2021 +0100
@@ -1,5 +1,11 @@
 """
 This module contains links to audio files that can be used for test purposes.
+
+.. autosummary::
+    :toctree: generated/
+
+    one_to_six_arabic_16000_mono_bc_noise
+    was_der_mensch_saet_mono_44100_lead_trail_silence
 """

 import os
@@ -22,5 +28,4 @@
 silence.wav".format(
     cd=_current_dir, sep=os.path.sep
 )
-"""A wave file that contains a sentence between long leading and trailing
-periods of silence"""
+"""A wave file that contains a sentence with a long leading and trailing silence"""
--- a/auditok/exceptions.py	Sun Jan 03 18:46:40 2021 +0100
+++ b/auditok/exceptions.py	Sun Jan 10 17:11:07 2021 +0100
@@ -3,7 +3,7 @@


 class TooSamllBlockDuration(ValueError):
-    """Raised when block_dur results in a block_size smaller than one sample"""
+    """Raised when block_dur results in a block_size smaller than one sample."""

     def __init__(self, message, block_dur, sampling_rate):
         self.block_dur = block_dur
@@ -12,12 +12,12 @@


 class TimeFormatError(Exception):
-    """Raised when duration formatting directicve is wrong"""
+    """Raised when a duration formatting directive is unknown."""


 class EndOfProcessing(Exception):
     """Raised within command line script's main function to jump to
-    postprocessing code"""
+    postprocessing code."""


 class AudioIOError(Exception):
@@ -28,7 +28,7 @@
 class AudioParameterError(AudioIOError):
     """Raised when one audio parameter is missing when loading raw data or
     saving data to a format other than raw. Also raised when an audio
-    parameter has a wrong value"""
+    parameter has a wrong value."""


 class AudioEncodingError(Exception):
--- a/auditok/io.py	Sun Jan 03 18:46:40 2021 +0100
+++ b/auditok/io.py	Sun Jan 10 17:11:07 2021 +0100
@@ -1,27 +1,19 @@
 """
 Module for low-level audio input-output operations.

-Class summary
-=============
+.. autosummary::
+    :toctree: generated/

-.. autosummary::
-
-        AudioSource
-        Rewindable
-        BufferAudioSource
-        WaveAudioSource
-        PyAudioSource
-        StdinAudioSource
-        PyAudioPlayer
-
-Function summary
-================
-
-.. autosummary::
-
-        from_file
-        to_file
-        player_for
+    AudioSource
+    Rewindable
+    BufferAudioSource
+    WaveAudioSource
+    PyAudioSource
+    StdinAudioSource
+    PyAudioPlayer
+    from_file
+    to_file
+    player_for
 """
 import os
 import sys
@@ -94,20 +86,23 @@

 def _get_audio_parameters(param_dict):
     """
-    Gets audio parameters from a dictionary of parameters.
-    A parameter can have a long name or a short name. If the long name is
-    present, the short name is ignored. In neither is present then
-    `AudioParameterError` is raised.
+    Get audio parameters from a dictionary of parameters. An audio parameter can
+    have a long name or a short name. If the long name is present, the short
+    name will be ignored. If neither is present then `AudioParameterError` is
+    raised.

     Expected parameters are:

-        `sampling_rate`, `sr`: int, sampling rate.
-        `sample_width`, `sw`: int, sample size in bytes.
-        `channels`, `ch`: int, number of channels.
+        - `sampling_rate`, `sr` : int, sampling rate.

-    :Returns
-        audio_parameters: tuple
-            audio parameters: (sampling_rate, sample_width, channels)
+        - `sample_width`, `sw` : int, sample size in bytes.
+
+        - `channels`, `ch` : int, number of channels.
+
+    Returns
+    -------
+    audio_parameters : tuple
+        a tuple for audio parameters as (sampling_rate, sample_width, channels).
     """
     err_message = (
         "'{ln}' (or '{sn}') must be a positive integer, found: '{val}'"
@@ -135,24 +130,18 @@
     Subclasses should implement methods to open/close and audio stream
     and read the desired amount of audio samples.

-    :Parameters:
-
-        `sampling_rate` : int
-            Number of samples per second of audio stream. Default = 16000.
-
-        `sample_width` : int
-            Size in bytes of one audio sample. Possible values : 1, 2, 4.
-            Default = 2.
-
-        `channels` : int
-            Number of channels of audio stream.
+    Parameters
+    ----------
+    sampling_rate : int
+        number of samples per second of audio data.
+    sample_width : int
+        size in bytes of one audio sample. Possible values: 1, 2 or 4.
+    channels : int
+        number of channels of audio data.
     """

     def __init__(
-        self,
-        sampling_rate=DEFAULT_SAMPLING_RATE,
-        sample_width=DEFAULT_SAMPLE_WIDTH,
-        channels=DEFAULT_NB_CHANNELS,
+        self, sampling_rate, sample_width, channels,
     ):

         if sample_width not in (1, 2, 4):
@@ -166,112 +155,111 @@

     @abstractmethod
     def is_open(self):
-        """ Return True if audio source is open, False otherwise """
+        """Return True if audio source is open, False otherwise."""

     @abstractmethod
     def open(self):
-        """ Open audio source """
+        """Open audio source."""

     @abstractmethod
     def close(self):
-        """ Close audio source """
+        """Close audio source."""

     @abstractmethod
     def read(self, size):
         """
         Read and return `size` audio samples at most.

-        :Parameters:
+        Parameters
+        -----------
+        size : int
+            Number of samples to read.

-            `size` : int
-                the number of samples to read.
+        Returns
+        -------
+        data : bytes
+            Audio data as a bytes object of length `N * sample_width * channels`
+            where `N` equals:

-        :Returns:
+            - `size` if `size` <= remaining samples

-            Audio data as a string of length `N * sample_width * channels`,
-            where `N` is:
-
-            - `size` if `size` < 'left_samples'
-
-            - 'left_samples' if `size` > 'left_samples'
+            - remaining samples if `size` > remaining samples
         """

     @property
     def sampling_rate(self):
-        """ Number of samples per second of audio stream """
+        """Number of samples per second of audio stream."""
         return self._sampling_rate

     @property
     def sr(self):
-        """ Number of samples per second of audio stream """
+        """Number of samples per second of audio stream (alias for
+        `sampling_rate)`."""
         return self._sampling_rate

     @property
     def sample_width(self):
-        """ Number of bytes used to represent one audio sample """
+        """Number of bytes used to represent one audio sample."""
         return self._sample_width

     @property
     def sw(self):
-        """ Number of bytes used to represent one audio sample """
+        """Number of bytes used to represent one audio sample (alias for
+        `sample_width`)."""
         return self._sample_width

     @property
     def channels(self):
-        """ Number of channels of this audio source """
+        """Number of channels in audio stream."""
         return self._channels

     @property
     def ch(self):
-        """ Return the number of channels of this audio source """
+        """Number of channels in audio stream (alias for `channels`)."""
         return self.channels


 class Rewindable(AudioSource):
     """
     Base class for rewindable audio streams.
-    Subclasses should implement methods to return to the beginning of an
-    audio stream as well as method to move to an absolute audio position
-    expressed in time or in number of samples.
+
+    Subclasses should implement a method to return back to the start of an the
+    stream (`rewind`), as well as a property getter/setter named `position` that
+    reads/sets stream position expressed in number of samples.
     """

-    @property
-    def rewindable(self):
-        return True
-
     @abstractmethod
     def rewind(self):
-        """ Go back to the beginning of audio stream """
-        raise NotImplementedError
+        """Go back to the beginning of audio stream."""

     @property
     @abstractmethod
     def position(self):
-        """Return stream position in number of samples"""
+        """Return stream position in number of samples."""

     @position.setter
     @abstractmethod
     def position(self, position):
-        """Set stream position in number of samples"""
+        """Set stream position in number of samples."""

     @property
     def position_s(self):
-        """Return stream position in seconds"""
+        """Return stream position in seconds."""
         return self.position / self.sampling_rate

     @position_s.setter
     def position_s(self, position_s):
-        """Set stream position in seconds"""
+        """Set stream position in seconds."""
         self.position = int(self.sampling_rate * position_s)

     @property
     def position_ms(self):
-        """Return stream position in milliseconds"""
+        """Return stream position in milliseconds."""
         return (self.position * 1000) // self.sampling_rate

     @position_ms.setter
     def position_ms(self, position_ms):
-        """Set stream position in milliseconds"""
+        """Set stream position in milliseconds."""
         if not isinstance(position_ms, int):
             raise ValueError("position_ms should be an int")
         self.position = int(self.sampling_rate * position_ms / 1000)
@@ -279,17 +267,23 @@

 class BufferAudioSource(Rewindable):
     """
-    An :class:`AudioSource` that encapsulates and reads data from a memory
-    buffer. It implements methods from :class:`Rewindable` and is therefore
-    a navigable :class:`AudioSource`.
+    An `AudioSource` that encapsulates and reads data from a memory buffer.
+
+    This class implements the `Rewindable` interface.
+    Parameters
+    ----------
+    data : bytes
+        audio data
+    sampling_rate : int, default: 16000
+        number of samples per second of audio data.
+    sample_width : int, default: 2
+        size in bytes of one audio sample. Possible values: 1, 2 or 4.
+    channels : int, default: 1
+        number of channels of audio data.
     """

     def __init__(
-        self,
-        data,
-        sampling_rate=DEFAULT_SAMPLING_RATE,
-        sample_width=DEFAULT_SAMPLE_WIDTH,
-        channels=DEFAULT_NB_CHANNELS,
+        self, data, sampling_rate=16000, sample_width=2, channels=1,
     ):
         AudioSource.__init__(self, sampling_rate, sample_width, channels)
         check_audio_data(data, sample_width, channels)
@@ -324,6 +318,7 @@

     @property
     def data(self):
+        """Get raw audio data as a `bytes` object."""
         return self._data

     def rewind(self):
@@ -331,11 +326,12 @@

     @property
     def position(self):
-        """Stream position in number of samples"""
+        """Get stream position in number of samples"""
         return self._current_position_bytes // self._sample_size_all_channels

     @position.setter
     def position(self, position):
+        """Set stream position in number of samples."""
         position *= self._sample_size_all_channels
         if position < 0:
             position += len(self.data)
@@ -345,19 +341,33 @@

     @property
     def position_ms(self):
-        """Stream position in milliseconds"""
+        """Get stream position in milliseconds."""
         return (self._current_position_bytes * 1000) // (
             self._sample_size_all_channels * self.sampling_rate
         )

     @position_ms.setter
     def position_ms(self, position_ms):
+        """Set stream position in milliseconds."""
         if not isinstance(position_ms, int):
             raise ValueError("position_ms should be an int")
         self.position = int(self.sampling_rate * position_ms / 1000)


 class FileAudioSource(AudioSource):
+    """
+    Base class `AudioSource`s that read audio data from a file.
+
+    Parameters
+    ----------
+    sampling_rate : int, default: 16000
+        number of samples per second of audio data.
+    sample_width : int, default: 2
+        size in bytes of one audio sample. Possible values: 1, 2 or 4.
+    channels : int, default: 1
+        number of channels of audio data.
+    """
+
     def __init__(self, sampling_rate, sample_width, channels):
         AudioSource.__init__(self, sampling_rate, sample_width, channels)
         self._audio_stream = None
@@ -388,6 +398,25 @@


 class RawAudioSource(FileAudioSource):
+    """
+    A class for an `AudioSource` that reads data from a raw (headerless) audio
+    file.
+
+    This class should be used for large raw audio files to avoid loading the
+    whole data to memory.
+
+    Parameters
+    ----------
+    filename : str
+        path to a raw audio file.
+    sampling_rate : int
+        Number of samples per second of audio data.
+    sample_width : int
+        Size in bytes of one audio sample. Possible values : 1, 2, 4.
+    channels : int
+        Number of channels of audio data.
+    """
+
     def __init__(self, file, sampling_rate, sample_width, channels):
         FileAudioSource.__init__(self, sampling_rate, sample_width, channels)
         self._file = file
@@ -410,13 +439,14 @@
 class WaveAudioSource(FileAudioSource):
     """
     A class for an `AudioSource` that reads data from a wave file.
-    This class should be used for large wave files to avoid loading
-    the whole data to memory.

-    :Parameters:
+    This class should be used for large wave files to avoid loading the whole
+    data to memory.

-        `filename` :
-            path to a valid wave file.
+    Parameters
+    ----------
+    filename : str
+        path to a valid wave file.
     """

     def __init__(self, filename):
@@ -443,15 +473,29 @@

 class PyAudioSource(AudioSource):
     """
-    A class for an `AudioSource` that reads data built-in microphone using
-    PyAudio.
+    A class for an `AudioSource` that reads data from built-in microphone using
+    PyAudio (https://people.csail.mit.edu/hubert/pyaudio/).
+
+    Parameters
+    ----------
+    sampling_rate : int, default: 16000
+        number of samples per second of audio data.
+    sample_width : int, default: 2
+        size in bytes of one audio sample. Possible values: 1, 2 or 4.
+    channels : int, default: 1
+        number of channels of audio data.
+    frames_per_buffer : int, default: 1024
+        PyAudio number of frames per buffer.
+    input_device_index: None or int, default: None
+        PyAudio index of audio device to read audio data from. If None default
+        device is used.
     """

     def __init__(
         self,
-        sampling_rate=DEFAULT_SAMPLING_RATE,
-        sample_width=DEFAULT_SAMPLE_WIDTH,
-        channels=DEFAULT_NB_CHANNELS,
+        sampling_rate=16000,
+        sample_width=2,
+        channels=1,
         frames_per_buffer=1024,
         input_device_index=None,
     ):
@@ -491,28 +535,31 @@
     def read(self, size):
         if self._audio_stream is None:
             raise IOError("Stream is not open")
-
         if self._audio_stream.is_active():
             data = self._audio_stream.read(size)
             if data is None or len(data) < 1:
                 return None
             return data
-
         return None


 class StdinAudioSource(FileAudioSource):
     """
-    A class for an :class:`AudioSource` that reads data from standard input.
+    A class for an `AudioSource` that reads data from standard input.
+
+    Parameters
+    ----------
+    sampling_rate : int, default: 16000
+        number of samples per second of audio data.
+    sample_width : int, default: 2
+        size in bytes of one audio sample. Possible values: 1, 2 or 4.
+    channels : int, default: 1
+        number of channels of audio data.
     """

     def __init__(
-        self,
-        sampling_rate=DEFAULT_SAMPLING_RATE,
-        sample_width=DEFAULT_SAMPLE_WIDTH,
-        channels=DEFAULT_NB_CHANNELS,
+        self, sampling_rate=16000, sample_width=2, channels=1,
     ):
-
         FileAudioSource.__init__(self, sampling_rate, sample_width, channels)
         self._is_open = False
         self._sample_size = sample_width * channels
@@ -535,7 +582,7 @@
         return None


-def make_tqdm_progress_bar(iterable, total, duration, **tqdm_kwargs):
+def _make_tqdm_progress_bar(iterable, total, duration, **tqdm_kwargs):
     fmt = tqdm_kwargs.get("bar_format", DEFAULT_BAR_FORMAT_TQDM)
     fmt = fmt.replace("{duration}", "{:.3f}".format(duration))
     tqdm_kwargs["bar_format"] = fmt
@@ -550,16 +597,23 @@
 class PyAudioPlayer:
     """
     A class for audio playback using Pyaudio
+    (https://people.csail.mit.edu/hubert/pyaudio/).
+
+    Parameters
+    ----------
+    sampling_rate : int, default: 16000
+        number of samples per second of audio data.
+    sample_width : int, default: 2
+        size in bytes of one audio sample. Possible values: 1, 2 or 4.
+    channels : int, default: 1
+        number of channels of audio data.
     """

     def __init__(
-        self,
-        sampling_rate=DEFAULT_SAMPLING_RATE,
-        sample_width=DEFAULT_SAMPLE_WIDTH,
-        channels=DEFAULT_NB_CHANNELS,
+        self, sampling_rate=16000, sample_width=2, channels=1,
     ):
         if sample_width not in (1, 2, 4):
-            raise ValueError("Sample width must be one of: 1, 2 or 4 (bytes)")
+            raise ValueError("Sample width in bytes must be one of 1, 2 or 4")

         self.sampling_rate = sampling_rate
         self.sample_width = sample_width
@@ -582,7 +636,7 @@
             duration = len(data) / (
                 self.sampling_rate * self.sample_width * self.channels
             )
-            chunk_gen = make_tqdm_progress_bar(
+            chunk_gen = _make_tqdm_progress_bar(
                 chunk_gen,
                 total=nb_chunks,
                 duration=duration,
@@ -620,17 +674,19 @@

 def player_for(source):
     """
-    Return a :class:`AudioPlayer` that can play data from `source`.
+    Return an `AudioPlayer` compatible with `source` (i.e., has the same
+    sampling rate, sample width and number of channels).

-    :Parameters:
+    Parameters
+    ----------
+    source : AudioSource
+        An object that has `sampling_rate`, `sample_width` and `sample_width`
+        attributes.

-        `source` :
-            a objects that has `sampling_rate`, `sample_width` and
-            `sample_width` attributes.
-
-    :Returns:
-
-        An `AudioPlayer` that has the same sampling rate, sample width
+    Returns
+    -------
+    player : PyAudioPlayer
+        An audio player that has the same sampling rate, sample width
         and number of channels as `source`.
     """
     return PyAudioPlayer(
@@ -642,13 +698,27 @@
     """
     Create and return an AudioSource from input.

-    Parameters:
+    Parameters
+    ----------
+    input : str, bytes, "-" or None (default)
+        source to read audio data from. If str, it should be a path to a valid
+        audio file. If bytes, it is interpreted as raw audio data. If it is "-",
+        raw data will be read from stdin. If None, read audio data from built-in
+        microphone using PyAudio.
+    kwargs
+        audio parameters used to build the `AudioSource` object. Depending on
+        the nature of `input`, theses may be omitted (e.g., when `input` is an
+        audio file in a popular audio format such as wav, ogg, flac, etc.) or
+        include parameters such as `sampling_rate`, `sample_width`, `channels`
+        (or their respective short name versions `sr`, `sw` and `ch`) if `input`
+        is a path to a raw (headerless) audio file, a bytes object for raw audio
+        data or None (to read data from built-in microphone). See the respective
+        `AudioSource` classes from more information about possible parameters.

-        ´input´ : str, bytes, "-" or None
-        Source to read audio data from. If str, it should be a path to a valid
-        audio file. If bytes, it is interpreted as raw audio data. if equals to
-        "-", raw data will be read from stdin. If None, read audio data from
-        microphone using PyAudio.
+    Returns
+    -------
+    source : AudioSource
+        audio source created from input parameters
     """
     if input == "-":
         return StdinAudioSource(*_get_audio_parameters(kwargs))
@@ -673,28 +743,28 @@

 def _load_raw(file, sampling_rate, sample_width, channels, large_file=False):
     """
-    Load a raw audio file with standard Python.
-    If `large_file` is True, audio data will be lazily
-    loaded to memory.
+    Load a raw audio file with standard Python. If `large_file` is True, return
+    a `RawAudioSource` object that reads data lazily from disk, otherwise load
+    all data to memory and return a `BufferAudioSource` object.

-    See also :func:`from_file`.
+    Parameters
+    ----------
+    file : str
+        path to a raw audio data file.
+    sampling_rate : int
+        sampling rate of audio data.
+    sample_width : int
+        size in bytes of one audio sample.
+    channels : int
+        number of channels of audio data.
+    large_file : bool
+        if True, return a `RawAudioSource` otherwise a `BufferAudioSource`
+        object.

-    :Parameters:
-        `file` : filelike object or str
-            raw audio file to open
-        `sampling_rate`: int
-            sampling rate of audio data
-        `sample_width`: int
-            sample width of audio data
-        `channels`: int
-            number of channels of audio data
-        `large_file`: bool
-            If True, return a `RawAudioSource` object that reads data lazily
-            from disk, otherwise load all data and return a `BufferAudioSource`
-
-    :Returns:
-
-        `RawAudioSource` if `large_file` is True, `BufferAudioSource` otherwise
+    Returns
+    -------
+    source : RawAudioSource or BufferAudioSource
+        an `AudioSource` that reads data from input file.
     """
     if None in (sampling_rate, sample_width, channels):
         raise AudioParameterError(
@@ -719,16 +789,28 @@
     )


-def _load_wave(filename, large_file=False):
+def _load_wave(file, large_file=False):
     """
-    Load a wave audio file with standard Python.
-    If `large_file` is True, audio data will be lazily
-    loaded to memory.
+    Load a wave audio file with standard Python. If `large_file` is True, return
+    a `WaveAudioSource` object that reads data lazily from disk, otherwise load
+    all data to memory and return a `BufferAudioSource` object.

+    Parameters
+    ----------
+    file : str
+        path to a wav audio data file
+    large_file : bool
+        if True, return a `WaveAudioSource` otherwise a `BufferAudioSource`
+        object.
+
+    Returns
+    -------
+    source : WaveAudioSource or BufferAudioSource
+        an `AudioSource` that reads data from input file.
     """
     if large_file:
-        return WaveAudioSource(filename)
-    with wave.open(filename) as fp:
+        return WaveAudioSource(file)
+    with wave.open(file) as fp:
         channels = fp.getnchannels()
         srate = fp.getframerate()
         swidth = fp.getsampwidth()
@@ -738,18 +820,22 @@
     )


-def _load_with_pydub(filename, audio_format):
-    """Open compressed audio file using pydub. If a video file
+def _load_with_pydub(file, audio_format=None):
+    """
+    Open compressed audio or video file using pydub. If a video file
     is passed, its audio track(s) are extracted and loaded.
-    This function should not be called directely, use :func:`from_file`
-    instead.

-    :Parameters:
+    Parameters
+    ----------
+    file : str
+        path to audio file.
+    audio_format : str, default: None
+        string, audio/video file format if known (e.g. raw, webm, wav, ogg)

-    `filename`:
-        path to audio file.
-    `audio_format`:
-        string, audio file format (e.g. raw, webm, wav, ogg)
+    Returns
+    -------
+    source : BufferAudioSource
+        an `AudioSource` that reads data from input file.
     """
     func_dict = {
         "mp3": AudioSegment.from_mp3,
@@ -757,7 +843,7 @@
         "flv": AudioSegment.from_flv,
     }
     open_function = func_dict.get(audio_format, AudioSegment.from_file)
-    segment = open_function(filename)
+    segment = open_function(file)
     return BufferAudioSource(
         data=segment.raw_data,
         sampling_rate=segment.frame_rate,
@@ -769,10 +855,9 @@
 def from_file(filename, audio_format=None, large_file=False, **kwargs):
     """
     Read audio data from `filename` and return an `AudioSource` object.
-    if `audio_format` is None, the appropriate :class:`AudioSource` class is
-    guessed from file's extension. `filename` can be a compressed audio or
-    video file. This will require installing pydub:
-    (https://github.com/jiaaro/pydub).
+    if `audio_format` is None, the appropriate `AudioSource` class is guessed
+    from file's extension. `filename` can be a compressed audio or video file.
+    This will require installing `pydub` (https://github.com/jiaaro/pydub).

     The normal behavior is to load all audio data to memory from which a
     :class:`BufferAudioSource` object is created. This should be convenient
@@ -783,38 +868,43 @@
     Note that the current implementation supports only wave and raw formats for
     lazy audio loading.

-    See also :func:`to_file`.
+    If an audio format other than `raw` is used then sampling rate, sample width
+    and channels are required.

-    :Parameters:
+    See also
+    --------
+    :func:`to_file`.

-    `filename`: str
+    Parameters
+    ----------
+    filename : str
         path to input audio or video file.
-    `audio_format`: str
-        audio format used to save data  (e.g. raw, webm, wav, ogg)
-    `large_file`: bool
-        If True, audio won't fully be loaded to memory but only when a window
+    audio_format : str
+        audio format used to save data  (e.g. raw, webm, wav, ogg).
+    large_file : bool, default: False
+        if True, audio won't fully be loaded to memory but only when a window
         is read from disk.

-    :kwargs:

-    If an audio format other than `raw` is used, the following keyword
-    arguments are required:
-
-    `sampling_rate`, `sr`: int
+    Other Parameters
+    ----------------
+    sampling_rate, sr: int
         sampling rate of audio data
-    `sample_width`: int
+    sample_width : int
         sample width (i.e. number of bytes used to represent one audio sample)
-    `channels`: int
+    channels : int
         number of channels of audio data

-    :Returns:
+    Returns
+    -------
+    audio_source : AudioSource
+        an :class:`AudioSource` object that reads data from input file.

-    An `AudioSource` object that reads data from input file.
-
-    :Raises:
-
-    An `AudioIOError` is raised if audio data cannot be read in the given
-    format; or if format is `raw` and one or more audio parameters are missing.
+    Raises
+    ------
+    `AudioIOError`
+        raised if audio data cannot be read in the given
+        format or if `format` is `raw` and one or more audio parameters are missing.
     """
     audio_format = _guess_audio_format(audio_format, filename)

@@ -884,29 +974,28 @@
     is `None` and `file` comes without an extension then audio
     data will be written as a raw audio file.

-    :Parameters:
+    Parameters
+    ----------
+    data : bytes-like
+        audio data to be written. Can be a `bytes`, `bytearray`,
+        `memoryview`, `array` or `numpy.ndarray` object.
+    file : str
+        path to output audio file
+    audio_format : str
+        audio format used to save data (e.g. raw, webm, wav, ogg)
+    kwargs: dict
+        If an audio format other than raw is used, the following keyword
+        arguments are required:

-        `data`: buffer of bytes
-            audio data to be written. Can be a `bytes`, `bytearray`,
-            `memoryview`, `array` or `numpy.ndarray` object.
-        `file`: str
-            path to output audio file
-        `audio_format`: str
-            audio format used to save data (e.g. raw, webm, wav, ogg)
-        :kwargs:
-            If an audio format other than raw is used, the following
-            keyword arguments are required:
-            `sampling_rate`, `sr`: int
-                sampling rate of audio data
-            `sample_width`, `sw`: int
-                sample width (i.e., number of bytes of one audio sample)
-            `channels`, `ch`: int
-                number of channels of audio data
-    :Raises:
+        - `sampling_rate`, `sr`: int,  sampling rate of audio data.
+        - `sample_width`, `sw`: int, size in bytes of one audio sample.
+        - `channels`, `ch`: int, number of channels of audio data.

-        `AudioParameterError` if output format is different than raw and one
-        or more audio parameters are missing.
-        `AudioIOError` if audio data cannot be written in the desired format.
+    Raises
+    ------
+    `AudioParameterError` if output format is different than raw and one or more
+    audio parameters are missing. `AudioIOError` if audio data cannot be written
+    in the desired format.
     """
     audio_format = _guess_audio_format(audio_format, file)
     if audio_format in (None, "raw"):
--- a/auditok/signal.py	Sun Jan 03 18:46:40 2021 +0100
+++ b/auditok/signal.py	Sun Jan 10 17:11:07 2021 +0100
@@ -1,3 +1,17 @@
+"""
+Module for basic audio signal processing and array operations.
+
+.. autosummary::
+    :toctree: generated/
+
+    to_array
+    extract_single_channel
+    compute_average_channel
+    compute_average_channel_stereo
+    separate_channels
+    calculate_energy_single_channel
+    calculate_energy_multichannel
+"""
 from array import array
 import audioop
 import math
@@ -19,6 +33,28 @@


 def compute_average_channel(data, fmt, channels):
+    """
+    Compute and return average channel of multi-channel audio data. If the
+    number of channels is 2, use :func:`compute_average_channel_stereo` (much
+    faster). This function uses satandard `array` module to convert `bytes` data
+    into an array of numeric values.
+
+    Parameters
+    ----------
+    data : bytes
+        multi-channel audio data to mix down.
+    fmt : str
+        format (single character) to pass to `array.array` to convert `data`
+        into an array of samples. This should be "b" if audio data's sample width
+        is 1, "h" if it's 2 and "i" if it's 4.
+    channels : int
+        number of channels of audio data.
+
+    Returns
+    -------
+    mono_audio : bytes
+        mixed down audio data.
+    """
     all_channels = array(fmt, data)
     mono_channels = [
         array(fmt, all_channels[ch::channels]) for ch in range(channels)
@@ -31,9 +67,10 @@


 def compute_average_channel_stereo(data, sample_width):
-    """Compute and return average channel (i.e., mix down channels) of stereo
-    data. When data is 2-channel, using standard `audioop` module is *much*
-    faster.
+    """Compute and return average channel of stereo audio data. This function
+    should be used when the number of channels is exactly 2 because in that
+    case we can use standard `audioop` module which *much* faster then calling
+    :func:`compute_average_channel`.

     Parameters
     ----------
@@ -53,6 +90,25 @@


 def separate_channels(data, fmt, channels):
+    """Create a list of arrays of audio samples (`array.array` objects), one for
+    each channel.
+
+    Parameters
+    ----------
+    data : bytes
+        multi-channel audio data to mix down.
+    fmt : str
+        format (single character) to pass to `array.array` to convert `data`
+        into an array of samples. This should be "b" if audio data's sample width
+        is 1, "h" if it's 2 and "i" if it's 4.
+    channels : int
+        number of channels of audio data.
+
+    Returns
+    -------
+    channels_arr : list
+        list of audio channels, each as a standard `array.array`.
+    """
     all_channels = array(fmt, data)
     mono_channels = [
         array(fmt, all_channels[ch::channels]) for ch in range(channels)
@@ -60,11 +116,48 @@
     return mono_channels


-def calculate_energy_single_channel(x, sample_width):
-    energy_sqrt = max(audioop.rms(x, sample_width), _EPSILON)
+def calculate_energy_single_channel(data, sample_width):
+    """Calculate the energy of mono audio data. Energy is computed as:
+
+    .. math:: energy = 20 \log(\sqrt({1}/{N}\sum_{i}^{N}{a_i}^2)) # noqa: W605
+
+    where `a_i` is the i-th audio sample and `N` is the number of audio samples
+    in data.
+
+    Parameters
+    ----------
+    data : bytes
+        single-channel audio data.
+    sample_width : int
+        size in bytes of one audio sample.
+
+    Returns
+    -------
+    energy : float
+        energy of audio signal.
+    """
+    energy_sqrt = max(audioop.rms(data, sample_width), _EPSILON)
     return 20 * math.log10(energy_sqrt)


 def calculate_energy_multichannel(x, sample_width, aggregation_fn=max):
+    """Calculate the energy of multi-channel audio data. Energy is calculated
+    channel-wise. An aggregation function is applied to the resulting energies
+    (default: `max`). Also see :func:`calculate_energy_single_channel`.
+
+    Parameters
+    ----------
+    data : bytes
+        single-channel audio data.
+    sample_width : int
+        size in bytes of one audio sample.
+    aggregation_fn : callable, default: max
+        aggregation function to apply to the resulting per-channel energies.
+
+    Returns
+    -------
+    energy : float
+        aggregated energy of multi-channel audio signal.
+    """
     energies = (calculate_energy_single_channel(xi, sample_width) for xi in x)
     return aggregation_fn(energies)
--- a/auditok/util.py	Sun Jan 03 18:46:40 2021 +0100
+++ b/auditok/util.py	Sun Jan 10 17:11:07 2021 +0100
@@ -1,19 +1,16 @@
 """
-Class summary
-=============
+.. autosummary::
+    :toctree: generated/

-.. autosummary::
-
-        AudioEnergyValidator
-        AudioReader
-        Recorder
+    AudioEnergyValidator
+    AudioReader
+    Recorder
+    make_duration_formatter
+    make_channel_selector
 """
-from __future__ import division
-import sys
 from abc import ABC, abstractmethod
 import warnings
 from functools import partial
-from audioop import tomono
 from .io import (
     AudioIOError,
     AudioSource,
@@ -36,6 +33,7 @@

 __all__ = [
     "make_duration_formatter",
+    "make_channel_selector",
     "DataSource",
     "DataValidator",
     "StringDataSource",
@@ -49,7 +47,64 @@

 def make_duration_formatter(fmt):
     """
-    Accepted format directives: %i %s %m %h
+    Make and return a function used to format durations in seconds. Accepted
+    format directives are:
+
+    - ``%S`` : absolute number of seconds with 3 decimals. This direction should
+      be used alone.
+    - ``%i`` : milliseconds
+    - ``%s`` : seconds
+    - ``%m`` : minutes
+    - ``%h`` : hours
+
+    These last 4 directives should all be specified. They can be placed anywhere
+    in the input string.
+
+    Parameters
+    ----------
+    fmt : str
+        duration format.
+
+    Returns
+    -------
+    formatter : callable
+        a function that takes a duration in seconds (float) and returns a string
+        that corresponds to that duration.
+
+    Raises
+    ------
+    TimeFormatError
+        if the format contains an unknown directive.
+
+    Examples
+    --------
+
+    Using ``%S``:
+
+    .. code:: python
+
+        formatter = make_duration_formatter("%S")
+        formatter(123.589)
+        '123.589'
+        formatter(123)
+        '123.000'
+
+    Using the other directives:
+
+    .. code:: python
+
+        formatter = make_duration_formatter("%h:%m:%s.%i")
+        formatter(3600+120+3.25)
+        '01:02:03.250'
+
+        formatter = make_duration_formatter("%h hrs, %m min, %s sec and %i ms")
+        formatter(3600+120+3.25)
+        '01 hrs, 02 min, 03 sec and 250 ms'
+
+        # omitting one of the 4 directives might result in a wrong duration
+        formatter = make_duration_formatter("%m min, %s sec and %i ms")
+        formatter(3600+120+3.25)
+        '02 min, 03 sec and 250 ms'
     """
     if fmt == "%S":

@@ -114,7 +169,7 @@
     channels : int
         number of channels of raw audio data that the returned selector should
         expect.
-    selected : int or str
+    selected : int or str, default: None
         audio channel to select and return when calling `selector(raw_data)`. It
         should be an int >= `-channels` and < `channels`. If one of "mix",
         "avg" or "average" is passed then `selector` will return the average
@@ -129,8 +184,9 @@

     Raises
     ------
-    ValueError if `sample_width` is not one of 1, 2 or 4, or if `selected` has
-        an unexpected value.
+    ValueError
+        if `sample_width` is not one of 1, 2 or 4, or if `selected` has an
+        unexpected value.
     """
     fmt = signal.FORMAT.get(sample_width)
     if fmt is None:
@@ -176,15 +232,14 @@

 class DataSource(ABC):
     """
-    Base class for objects passed to
-    :func:`auditok.core.StreamTokenizer.tokenize`.
+    Base class for objects passed to :func:`StreamTokenizer.tokenize`.
     Subclasses should implement a :func:`DataSource.read` method.
     """

     @abstractmethod
     def read(self):
         """
-        Read a piece of data read from this source.
+        Read a block (i.e., window) of data read from this source.
         If no more data is available, return None.
         """

@@ -194,7 +249,7 @@
     Base class for a validator object used by :class:`.core.StreamTokenizer`
     to check if read data is valid.
     Subclasses should implement :func:`is_valid` method.
-   """
+    """

     @abstractmethod
     def is_valid(self, data):
@@ -204,6 +259,40 @@


 class AudioEnergyValidator(DataValidator):
+    """
+    A validator based on audio signal energy. For an input window of `N` audio
+    samples (see :func:`AudioEnergyValidator.is_valid`), the energy is computed
+    as:
+
+    .. math:: energy = 20 \log(\sqrt({1}/{N}\sum_{i}^{N}{a_i}^2)) # noqa: W605
+
+    where `a_i` is the i-th audio sample.
+
+    Parameters
+    ----------
+    energy_threshold : float
+        minimum energy that audio window should have to be valid.
+    sample_width : int
+        size in bytes of one audio sample.
+    channels : int
+        number of channels of audio data.
+    use_channel : {None, "any", "mix", "avg", "average"} or int
+        channel to use for energy computation. The following values are
+        accepted:
+
+        - None (alias "any") : compute energy for each of the channels and return
+          the maximum value.
+        - "mix" (alias "avg" or "average") : compute the average channel then
+          compute its energy.
+        - int (>= 0 , < `channels`) : compute the energy of the specified channel
+          and ignore the other ones.
+
+    Returns
+    -------
+    energy : float
+        energy of the audio window.
+    """
+
     def __init__(
         self, energy_threshold, sample_width, channels, use_channel=None
     ):
@@ -218,6 +307,18 @@
         self._energy_threshold = energy_threshold

     def is_valid(self, data):
+        """
+
+        Parameters
+        ----------
+        data : bytes-like
+            array of raw audio data
+
+        Returns
+        -------
+        bool
+            True if the energy of audio data is >= threshold, False otherwise.
+        """
         log_energy = self._energy_fn(self._selector(data), self._sample_width)
         return log_energy >= self._energy_threshold

@@ -229,10 +330,10 @@
     step forward. If the end of the buffer is reached, :func:`read` returns
     None.

-    :Parameters:
-
-        `data` :
-            a str object.
+    Parameters
+    ----------
+    data : str
+        a string object used as data.

     """

@@ -246,9 +347,10 @@
         """
         Read one character from buffer.

-        :Returns:
-
-            Current character or None if end of buffer is reached
+        Returns
+        -------
+        char : str
+            current character or None if end of buffer is reached.
         """

         if self._current >= len(self._data):
@@ -260,10 +362,10 @@
         """
         Set a new data buffer.

-        :Parameters:
-
-            `data` : a str object
-                New data buffer.
+        Parameters
+        ----------
+        data : str
+            new data buffer.
         """

         if not isinstance(data, str):
@@ -274,21 +376,25 @@

 class ADSFactory:
     """
+    .. deprecated:: 2.0.0
+          `ADSFactory` will be removed in auditok 2.0.1, use instances of
+          :class:`AudioReader` instead.
+
     Factory class that makes it easy to create an
-    :class:`ADSFactory.AudioDataSource` object that implements
+    :class:`AudioDataSource` object that implements
     :class:`DataSource` and can therefore be passed to
     :func:`auditok.core.StreamTokenizer.tokenize`.

     Whether you read audio data from a file, the microphone or a memory buffer,
     this factory instantiates and returns the right
-    :class:`ADSFactory.AudioDataSource` object.
+    :class:`AudioDataSource` object.

-    There are many other features you want your
-    :class:`ADSFactory.AudioDataSource` object to have, such as: memorize all
-    read audio data so that you can rewind and reuse it (especially useful when
-    reading data from the microphone), read a fixed amount of data (also useful
-    when reading from the microphone), read overlapping audio frames
-    (often needed when dosing a spectral analysis of data).
+    There are many other features you want a :class:`AudioDataSource` object to
+    have, such as: memorize all read audio data so that you can rewind and reuse
+    it (especially useful when reading data from the microphone), read a fixed
+    amount of data (also useful when reading from the microphone), read
+    overlapping audio frames (often needed when dosing a spectral analysis of
+    data).

     :func:`ADSFactory.ads` automatically creates and return object with the
     desired behavior according to the supplied keyword arguments.
@@ -440,57 +546,40 @@
     @staticmethod
     def ads(**kwargs):
         """
-        Create an return an :class:`ADSFactory.AudioDataSource`. The type and
+        Create an return an :class:`AudioDataSource`. The type and
         behavior of the object is the result
-        of the supplied parameters.
+        of the supplied parameters. Called without any parameters, the class
+        will read audio data from the available built-in microphone with the
+        default parameters.

-        :Parameters:
-
-        *No parameters* :
-           read audio data from the available built-in microphone with the
-           default parameters. The returned :class:`ADSFactory.AudioDataSource`
-           encapsulate an :class:`io.PyAudioSource` object and hence it accepts
-           the next four parameters are passed to use instead of their default
-           values.
-
-        `sampling_rate`, `sr` : *(int)*
-            number of samples per second. Default = 16000.
-
-        `sample_width`, `sw` : *(int)*
-            number of bytes per sample (must be in (1, 2, 4)). Default = 2
-
-        `channels`, `ch` : *(int)*
-            number of audio channels. Default = 1 (only this value is currently
-            accepted)
-
-        `frames_per_buffer`, `fpb` : *(int)*
-            number of samples of PyAudio buffer. Default = 1024.
-
-        `audio_source`, `asrc` : an `AudioSource` object
-            read data from this audio source
-
-        `filename`, `fn` : *(string)*
-            build an `io.AudioSource` object using this file (currently only
-            wave format is supported)
-
-        `data_buffer`, `db` : *(string)*
+        Parameters
+        ----------
+        sampling_rate, sr : int, default: 16000
+            number of audio samples per second of input audio stream.
+        sample_width, sw : int, default: 2
+            number of bytes per sample, must be one of 1, 2 or 4
+        channels, ch : int, default: 1
+            number of audio channels, only a value of 1 is currently accepted.
+        frames_per_buffer, fpb : int, default: 1024
+            number of samples of PyAudio buffer.
+        audio_source, asrc : `AudioSource`
+            `AudioSource` to read data from
+        filename, fn : str
+            create an `AudioSource` object using this file
+        data_buffer, db : str
             build an `io.BufferAudioSource` using data in `data_buffer`.
             If this keyword is used,
             `sampling_rate`, `sample_width` and `channels` are passed to
             `io.BufferAudioSource` constructor and used instead of default
             values.
-
-        `max_time`, `mt` : *(float)*
+        max_time, mt : float
             maximum time (in seconds) to read. Default behavior: read until
             there is no more data
             available.
-
-        `record`, `rec` : *(bool)*
+        record, rec : bool, default = False
             save all read data in cache. Provide a navigable object which has a
             `rewind` method.
-            Default = False.
-
-        `block_dur`, `bd` : *(float)*
+        block_dur, bd : float
             processing block duration in seconds. This represents the quantity
             of audio data to return each time the :func:`read` method is
             invoked. If `block_dur` is 0.025 (i.e. 25 ms) and the sampling rate
@@ -499,8 +588,7 @@
             be looked for (and used if available) before `block_size`. If
             neither parameter is given, `block_dur` will be set to 0.01 second
             (i.e. 10 ms)
-
-        `hop_dur`, `hd` : *(float)*
+        hop_dur, hd : float
             quantity of data to skip from current processing window. if
             `hop_dur` is supplied then there will be an overlap of `block_dur`
             - `hop_dur` between two adjacent blocks. This parameter will be
@@ -508,173 +596,25 @@
             If neither parameter is given, `hop_dur` will be set to `block_dur`
             which means that there will be no overlap between two consecutively
             read blocks.
-
-        `block_size`, `bs` : *(int)*
+        block_size, bs : int
             number of samples to read each time the `read` method is called.
             Default: a block size that represents a window of 10ms, so for a
             sampling rate of 16000, the default `block_size` is 160 samples,
             for a rate of 44100, `block_size` = 441 samples, etc.
-
-        `hop_size`, `hs` : *(int)*
+        hop_size, hs : int
             determines the number of overlapping samples between two adjacent
             read windows. For a `hop_size` of value *N*, the overlap is
             `block_size` - *N*. Default : `hop_size` = `block_size`, means that
             there is no overlap.

-        :Returns:
-
-        An AudioDataSource object that has the desired features.
-
-        :Exampels:
-
-        1. **Create an AudioDataSource that reads data from the microphone
-        (requires Pyaudio) with default audio parameters:**
-
-        .. code:: python
-
-            from auditok import ADSFactory
-            ads = ADSFactory.ads()
-            ads.get_sampling_rate()
-            16000
-            ads.get_sample_width()
-            2
-            ads.get_channels()
-            1
-
-        2. **Create an AudioDataSource that reads data from the microphone with
-        a sampling rate of 48KHz:**
-
-        .. code:: python
-
-            from auditok import ADSFactory
-            ads = ADSFactory.ads(sr=48000)
-            ads.get_sampling_rate()
-            48000
-
-        3. **Create an AudioDataSource that reads data from a wave file:**
-
-        .. code:: python
-
-            from auditok import ADSFactory
-            from auditok import dataset
-            file = dataset.was_der_mensch_saet_mono_44100_lead_trail_silence
-            ads = ADSFactory.ads(fn=file)
-            ads.get_sampling_rate()
-            44100
-            ads.get_sample_width()
-            2
-            ads.get_channels()
-            1
-
-        4. **Define size of read blocks as 20 ms**
-
-        .. code:: python
-
-            from auditok import ADSFactory
-            from auditok import dataset
-            file = dataset.was_der_mensch_saet_mono_44100_lead_trail_silence
-            #we know samling rate for previous file is 44100 samples/second
-            #so 10 ms are equivalent to 441 samples and 20 ms to 882
-            block_size = 882
-            ads = ADSFactory.ads(bs=882, fn=file)
-            ads.open()
-            # read one block
-            data = ads.read()
-            ads.close()
-            len(data)
-            1764
-            assert len(data) ==  ads.get_sample_width() * block_size
-
-        5. **Define block size as a duration (use block_dur or bd):**
-
-        .. code:: python
-
-            from auditok import ADSFactory
-            from auditok import dataset
-            file = dataset.was_der_mensch_saet_mono_44100_lead_trail_silence
-            dur = 0.25 # second
-            ads = ADSFactory.ads(bd=dur, fn=file)
-
-            # we know samling rate for previous file is 44100 samples/second
-            # for a block duration of 250 ms, block size should be
-            # 0.25 * 44100 = 11025
-            ads.get_block_size()
-            11025
-            assert ads.get_block_size() ==  int(0.25 * 44100)
-            ads.open()
-            # read one block
-            data = ads.read()
-            ads.close()
-            len(data)
-            22050
-            assert len(data) ==  ads.get_sample_width() * ads.get_block_size()
-
-        6. **Read overlapping blocks (when one of hope_size, hs, hop_dur or hd
-            is > 0):**
-
-        For a better readability we'd use :class:`auditok.io.BufferAudioSource`
-        with a string buffer:
-
-        .. code:: python
-
-            from auditok import ADSFactory
-            '''
-            we supply a data beffer instead of a file (keyword 'bata_buffer' or
-            'db')
-            sr : sampling rate = 16 samples/sec
-            sw : sample width = 1 byte
-            ch : channels = 1
-            '''
-            buffer = "abcdefghijklmnop" # 16 bytes = 1 second of data
-            bd = 0.250 # block duration = 250 ms = 4 bytes
-            hd = 0.125 # hop duration = 125 ms = 2 bytes
-            ads = ADSFactory.ads(db="abcdefghijklmnop",
-                                 bd=bd,
-                                 hd=hd,
-                                 sr=16,
-                                 sw=1,
-                                 ch=1)
-            ads.open()
-            ads.read()
-            'abcd'
-            ads.read()
-            'cdef'
-            ads.read()
-            'efgh'
-            ads.read()
-            'ghij'
-            data = ads.read()
-            assert data == 'ijkl'
-
-        7. **Limit amount of read data (use max_time or mt):**
-
-        .. code:: python
-
-            '''
-            We know audio file is larger than 2.25 seconds
-            We want to read up to 2.25 seconds of audio data
-            '''
-            from auditok import dataset
-            from auditok import ADSFactory
-            file = dataset.was_der_mensch_saet_mono_44100_lead_trail_silence
-            ads = ADSFactory.ads(mt=2.25, fn=file)
-            ads.open()
-            data = []
-            while True:
-                d = ads.read()
-                if d is None:
-                    break
-                data.append(d)
-
-            ads.close()
-            data = b''.join(data)
-            assert len(data) == int(ads.get_sampling_rate() *
-                                 2.25 * ads.get_sample_width() *
-                                 ads.get_channels())
+        Returns
+        -------
+        audio_data_source : AudioDataSource
+            an `AudioDataSource` object build with input parameters.
         """
         warnings.warn(
             "'ADSFactory' is deprecated and will be removed in a future "
-            "release. Please use AudioReader(...) instead.",
+            "release. Please use AudioReader class instead.",
             DeprecationWarning,
         )

@@ -982,20 +922,22 @@
     call expect when remaining data does not make up a full window.

     Objects of this class can be set up to return audio windows with a given
-    overlap and to record the whole stream for later access. They can also have
+    overlap and to record the whole stream for later access (useful when
+    reading data from the microphone). They can also have
     a limit for the maximum amount of data to read.

     Parameters
     ----------
     input : str, bytes, AudioSource, AudioReader, AudioRegion or None
-        input audio data. If str, it should be a path to an existing audio file.
-        "-" is interpreted as standard input. If bytes, input is considered as
-        raw audio data. If None, read audio from microphone.
-        Every object that is not an ´AudioReader´ will be transformed into an
-        `AudioReader` before processing. If it is an `str` that refers to a raw
-        audio file, `bytes` or None, audio parameters should be provided using
-        kwargs (i.e., `samplig_rate`, `sample_width` and `channels` or their
-        alias).
+        input audio data. If the type of the passed argument is `str`, it should
+        be a path to an existing audio file. "-" is interpreted as standardinput.
+        If the type is `bytes`, input is considered as a buffer of raw audio
+        data. If None, read audio from microphone. Every object that is not an
+        :class:`AudioReader` will be transformed, when possible, into an
+        :class:`AudioSource` before processing. If it is an `str` that refers to
+        a raw audio file, `bytes` or None, audio parameters should be provided
+        using kwargs (i.e., `samplig_rate`, `sample_width` and `channels` or
+        their alias).
     block_dur: float, default: 0.01
         length in seconds of audio windows to return at each `read` call.
     hop_dur: float, default: None
@@ -1017,11 +959,11 @@
     When `input` is None, of type bytes or a raw audio files some of the
     follwing kwargs are mandatory.

-    Kwargs
-    ------
+    Other Parameters
+    ----------------
     audio_format, fmt : str
         type of audio data (e.g., wav, ogg, flac, raw, etc.). This will only be
-        used if ´input´ is a string path to an audio file. If not given, audio
+        used if `input` is a string path to an audio file. If not given, audio
         type will be guessed from file name extension or from file header.
     sampling_rate, sr : int
         sampling rate of audio data. Required if `input` is a raw audio file, is
@@ -1032,17 +974,21 @@
     channels, ch : int
         number of channels of audio data. Required for raw data, see
         `sampling_rate`.
-    use_channel, uc : {None, "mix"} or int
+    use_channel, uc : {None, "any", "mix", "avg", "average"} or int
         which channel to use for split if `input` has multiple audio channels.
         Regardless of which channel is used for splitting, returned audio events
-        contain data from *all* channels, just as `input`.
-        The following values are accepted:
-            - None (alias "any"): accept audio activity from any channel, even
-            if other channels are silent. This is the default behavior.
-            - "mix" ("avg" or "average"): mix down all channels (i.e. compute
-            average channel) and split the resulting channel.
-            - int (0 <=, > `channels`): use one channel, specified by integer
-            id, for split.
+        contain data from *all* the channels of `input`. The following values
+        are accepted:
+
+        - None (alias "any"): accept audio activity from any channel, even if
+          other channels are silent. This is the default behavior.
+
+        - "mix" (alias "avg" or "average"): mix down all channels (i.e., compute
+          average channel) and split the resulting channel.
+
+        - int (>= 0 , < `channels`): use one channel, specified by its integer
+          id, for split.
+
     large_file : bool, default: False
         If True, AND if `input` is a path to a *wav* of a *raw* audio file
         (and only these two formats) then audio data is lazily loaded to memory
@@ -1138,11 +1084,21 @@


 # Keep AudioDataSource for compatibility
-# Remove in a future version when ADSFactory is dropped
+# Remove in a future version when ADSFactory is removed
 AudioDataSource = AudioReader


 class Recorder(AudioReader):
+    """Class to read fixed-size chunks of audio data from a source and keeps
+    data in a cache. Using this class is equivalent to initializing
+    :class:`AudioReader` with `record=True`. For more information about the
+    other parameters see :class:`AudioReader`.
+
+    Once the desired amount of data is read, you can call the :func:`rewind`
+    method then get the recorded data via the :attr:`data` attribute. You can also
+    re-read cached data one window a time by calling :func:`read`.
+    """
+
     def __init__(
         self, input, block_dur=0.01, hop_dur=None, max_read=None, **kwargs
     ):