# HG changeset patch # User Amine Sehili # Date 1729349899 -7200 # Node ID 1b78211b7e0718a9d2d1a7136b6873fbc722140d # Parent 12a7f01c633b4ee35832107bc3a3edad8fb2ca9c Update docstrings diff -r 12a7f01c633b -r 1b78211b7e07 auditok/cmdline.py --- a/auditok/cmdline.py Sat Oct 19 15:21:43 2024 +0200 +++ b/auditok/cmdline.py Sat Oct 19 16:58:19 2024 +0200 @@ -1,11 +1,10 @@ #!/usr/bin/env python # encoding: utf-8 -""" -`auditok` -- An Audio Activity Detection tool +"""`auditok` -- An Audio Activity Detection Tool -`auditok` is a program that can be used for Audio/Acoustic -activity detection. It can read audio data from audio files as well -as from the microphone or standard input. +`auditok` is a program designed for audio or acoustic activity detection. +It supports reading audio data from various sources, including audio files, +microphones, and standard input. @author: Mohamed El Amine SEHILI @copyright: 2015-2021 Mohamed El Amine SEHILI diff -r 12a7f01c633b -r 1b78211b7e07 auditok/io.py --- a/auditok/io.py Sat Oct 19 15:21:43 2024 +0200 +++ b/auditok/io.py Sat Oct 19 16:58:19 2024 +0200 @@ -72,16 +72,27 @@ def _guess_audio_format(filename, fmt): - """Helper function to guess audio format from file extension, or by - normalizing a user provided format. + """Guess the audio format from a file extension or normalize a provided + format. - Args: - filename (str, Path): audio file name. - fmt (str): un-normalized user provided format. + This helper function attempts to determine the audio format based on the + file extension of `filename` or by normalizing the format specified by the + user in `fmt`. - Returns: - str, None: guessed audio format or None if no format could be guessed. + Parameters + ---------- + filename : str or Path + The audio file name, including its extension. + fmt : str + The un-normalized format provided by the user. + + Returns + ------- + str or None + The guessed audio format as a string, or None if no format could be + determined. """ + if fmt is None: extension = os.path.splitext(filename)[1][1:].lower() if extension: @@ -96,24 +107,36 @@ def _get_audio_parameters(param_dict): """ - Get audio parameters from a dictionary of parameters. An audio parameter can - have a long name or a short name. If the long name is present, the short - name will be ignored. If neither is present then `AudioParameterError` is - raised. + Retrieve audio parameters from a dictionary of parameters. - Expected parameters are: + Each audio parameter can have a long name or a short name. If both are + present, the long name takes precedence. If neither is found, an + `AudioParameterError` is raised. - - `sampling_rate`, `sr` : int, sampling rate. + Expected parameters: + - `sampling_rate`, `sr` : int, the sampling rate. + - `sample_width`, `sw` : int, the sample size in bytes. + - `channels`, `ch` : int, the number of audio channels. - - `sample_width`, `sw` : int, sample size in bytes. - - - `channels`, `ch` : int, number of channels. + Parameters + ---------- + param_dict : dict + A dictionary containing audio parameters, with possible keys as + defined above. Returns ------- - audio_parameters : tuple - a tuple for audio parameters as (sampling_rate, sample_width, channels). + tuple + A tuple containing audio parameters as + (sampling_rate, sample_width, channels). + + Raises + ------ + AudioParameterError + If a required parameter is missing, is not an integer, or is not a + positive value. """ + parameters = [] for long_name, short_name in ( ("sampling_rate", "sr"), @@ -134,17 +157,18 @@ """ Base class for audio source objects. - Subclasses should implement methods to open/close and audio stream - and read the desired amount of audio samples. + This class provides a foundation for audio source objects. Subclasses are + expected to implement methods to open and close an audio stream, as well as + to read the desired number of audio samples. Parameters ---------- sampling_rate : int - number of samples per second of audio data. + The number of samples per second of audio data. sample_width : int - size in bytes of one audio sample. Possible values: 1, 2 or 4. + The size, in bytes, of each audio sample. Accepted values are 1, 2, or 4. channels : int - number of channels of audio data. + The number of audio channels. """ def __init__( @@ -177,23 +201,26 @@ @abstractmethod def read(self, size): - """ - Read and return `size` audio samples at most. + """Read and return up to `size` audio samples. + + This abstract method reads audio data and returns it as a bytes object, + containing at most `size` samples. Parameters - ----------- + ---------- size : int - Number of samples to read. + The number of samples to read. Returns ------- - data : bytes - Audio data as a bytes object of length `N * sample_width * channels` - where `N` equals: + bytes + A bytes object containing the audio data, with a length of + `N * sample_width * channels`, where `N` is: - - `size` if `size` <= remaining samples - - - remaining samples if `size` > remaining samples + - `size`, if `size` is less than or equal to the number of remaining + samples + - the number of remaining samples, if `size` exceeds the remaining + samples """ @property @@ -241,12 +268,12 @@ class Rewindable(AudioSource): - """ - Base class for rewindable audio streams. + """Base class for rewindable audio sources. - Subclasses should implement a method to return back to the start of an the - stream (`rewind`), as well as a property getter/setter named `position` that - reads/sets stream position expressed in number of samples. + This class serves as a base for audio sources that support rewinding. + Subclasses should implement a method to return to the beginning of the + stream (`rewind`), and provide a property `position` that allows getting + and setting the current stream position, expressed in number of samples. """ @abstractmethod @@ -287,20 +314,22 @@ class BufferAudioSource(Rewindable): - """ - An `AudioSource` that encapsulates and reads data from a memory buffer. + """An `AudioSource` that reads audio data from a memory buffer. - This class implements the `Rewindable` interface. + This class implements the `Rewindable` interface, allowing audio data + stored in a buffer to be read with support for rewinding and position + control. + Parameters ---------- data : bytes - audio data - sampling_rate : int, default: 16000 - number of samples per second of audio data. - sample_width : int, default: 2 - size in bytes of one audio sample. Possible values: 1, 2 or 4. - channels : int, default: 1 - number of channels of audio data. + The audio data stored in a memory buffer. + sampling_rate : int, optional, default=16000 + The number of samples per second of audio data. + sample_width : int, optional, default=2 + The size in bytes of one audio sample. Accepted values are 1, 2, or 4. + channels : int, optional, default=1 + The number of audio channels. """ def __init__( @@ -380,17 +409,19 @@ class FileAudioSource(AudioSource): - """ - Base class `AudioSource`s that read audio data from a file. + """Base class for `AudioSource`s that read audio data from a file. + + This class provides a foundation for audio sources that retrieve audio data + from file sources. Parameters ---------- - sampling_rate : int, default: 16000 - number of samples per second of audio data. - sample_width : int, default: 2 - size in bytes of one audio sample. Possible values: 1, 2 or 4. - channels : int, default: 1 - number of channels of audio data. + sampling_rate : int, optional, default=16000 + The number of samples per second of audio data. + sample_width : int, optional, default=2 + The size in bytes of one audio sample. Accepted values are 1, 2, or 4. + channels : int, optional, default=1 + The number of audio channels. """ def __init__(self, sampling_rate, sample_width, channels): @@ -424,22 +455,21 @@ class RawAudioSource(FileAudioSource): """ - A class for an `AudioSource` that reads data from a raw (headerless) audio - file. + An `AudioSource` class for reading data from a raw (headerless) audio file. - This class should be used for large raw audio files to avoid loading the - whole data to memory. + This class is suitable for large raw audio files, allowing for efficient + data handling without loading the entire file into memory. Parameters ---------- - filename : str, Path - path to a raw audio file. + filename : str or Path + The path to the raw audio file. sampling_rate : int - Number of samples per second of audio data. + The number of samples per second of audio data. sample_width : int - Size in bytes of one audio sample. Possible values : 1, 2, 4. + The size in bytes of each audio sample. Accepted values are 1, 2, or 4. channels : int - Number of channels of audio data. + The number of audio channels. """ def __init__(self, filename, sampling_rate, sample_width, channels): @@ -463,15 +493,15 @@ class WaveAudioSource(FileAudioSource): """ - A class for an `AudioSource` that reads data from a wave file. + An `AudioSource` class for reading data from a wave file. - This class should be used for large wave files to avoid loading the whole - data to memory. + This class is suitable for large wave files, allowing for efficient data + handling without loading the entire file into memory. Parameters ---------- - filename : str, Path - path to a valid wave file. + filename : str or Path + The path to a valid wave file. """ def __init__(self, filename): @@ -496,23 +526,25 @@ class PyAudioSource(AudioSource): - """ - A class for an `AudioSource` that reads data from built-in microphone using - PyAudio (https://people.csail.mit.edu/hubert/pyaudio/). + """An `AudioSource` class for reading data from a built-in microphone using + PyAudio. + + This class leverages PyAudio (https://people.csail.mit.edu/hubert/pyaudio/) + to capture audio data directly from a microphone. Parameters ---------- - sampling_rate : int, default: 16000 - number of samples per second of audio data. - sample_width : int, default: 2 - size in bytes of one audio sample. Possible values: 1, 2 or 4. - channels : int, default: 1 - number of channels of audio data. - frames_per_buffer : int, default: 1024 - PyAudio number of frames per buffer. - input_device_index: None or int, default: None - PyAudio index of audio device to read audio data from. If None default - device is used. + sampling_rate : int, optional, default=16000 + The number of samples per second of audio data. + sample_width : int, optional, default=2 + The size in bytes of each audio sample. Accepted values are 1, 2, or 4. + channels : int, optional, default=1 + The number of audio channels. + frames_per_buffer : int, optional, default=1024 + The number of frames per buffer, as specified by PyAudio. + input_device_index : int or None, optional, default=None + The PyAudio index of the audio device to read from. If None, the default + audio device is used. """ def __init__( @@ -569,16 +601,19 @@ class StdinAudioSource(FileAudioSource): """ - A class for an `AudioSource` that reads data from standard input. + An `AudioSource` class for reading audio data from standard input. + + This class is designed to capture audio data directly from standard input, + making it suitable for streaming audio sources. Parameters ---------- - sampling_rate : int, default: 16000 - number of samples per second of audio data. - sample_width : int, default: 2 - size in bytes of one audio sample. Possible values: 1, 2 or 4. - channels : int, default: 1 - number of channels of audio data. + sampling_rate : int, optional, default=16000 + The number of samples per second of audio data. + sample_width : int, optional, default=2 + The size in bytes of each audio sample. Accepted values are 1, 2, or 4. + channels : int, optional, default=1 + The number of audio channels. """ def __init__( @@ -622,18 +657,19 @@ class PyAudioPlayer: - """ - A class for audio playback using Pyaudio + """A class for audio playback using PyAudio. + + This class facilitates audio playback through the PyAudio library (https://people.csail.mit.edu/hubert/pyaudio/). Parameters ---------- - sampling_rate : int, default: 16000 - number of samples per second of audio data. - sample_width : int, default: 2 - size in bytes of one audio sample. Possible values: 1, 2 or 4. - channels : int, default: 1 - number of channels of audio data. + sampling_rate : int, optional, default=16000 + The number of samples per second of audio data. + sample_width : int, optional, default=2 + The size in bytes of each audio sample. Accepted values are 1, 2, or 4. + channels : int, optional, default=1 + The number of audio channels. """ def __init__( @@ -704,21 +740,26 @@ def player_for(source): """ - Return an `AudioPlayer` compatible with `source` (i.e., has the same - sampling rate, sample width and number of channels). + Return an `AudioPlayer` compatible with the specified `source`. + + This function creates an `AudioPlayer` instance (currently only + `PyAudioPlayer` is implemented) that matches the audio properties of the + provided `source`, ensuring compatibility in terms of sampling rate, sample + width, and number of channels. Parameters ---------- source : AudioSource - An object that has `sampling_rate`, `sample_width` and `sample_width` + An object with `sampling_rate`, `sample_width`, and `channels` attributes. Returns ------- - player : PyAudioPlayer - An audio player that has the same sampling rate, sample width - and number of channels as `source`. + PyAudioPlayer + An audio player with the same sampling rate, sample width, and number + of channels as `source`. """ + return PyAudioPlayer( source.sampling_rate, source.sample_width, source.channels ) @@ -726,30 +767,37 @@ def get_audio_source(input=None, **kwargs): """ - Create and return an AudioSource from input. + Create and return an `AudioSource` based on the specified input. + + This function generates an `AudioSource` instance from various input types, + allowing flexibility for audio data sources such as file paths, raw data, + standard input, or microphone input via PyAudio. Parameters ---------- - input : str, bytes, "-" or None (default) - source to read audio data from. If `str`, it should be a path to a valid - audio file. If `bytes`, it is used as raw audio data. If it is "-", - raw data will be read from stdin. If None, read audio data from the - microphone using PyAudio. - kwargs - audio parameters used to build the `AudioSource` object. Depending on - the nature of `input`, theses may be omitted (e.g., when `input` is an - audio file in a popular audio format such as wav, ogg, flac, etc.) or - include parameters such as `sampling_rate`, `sample_width`, `channels` - (or their respective short name versions `sr`, `sw` and `ch`) if `input` - is a path to a raw (headerless) audio file, a bytes object for raw audio - data or None (to read data from built-in microphone). See the respective - `AudioSource` classes from more information about possible parameters. + input : str, bytes, "-", or None, optional + The source to read audio data from. Possible values are: + - `str`: Path to a valid audio file. + - `bytes`: Raw audio data. + - "-": Read raw data from standard input. + - None (default): Read audio data from the microphone using PyAudio. + kwargs : dict, optional + Additional audio parameters used to construct the `AudioSource` object. + Depending on the `input` type, these may be optional (e.g., for common + audio file formats such as wav, ogg, or flac). When required, parameters + include `sampling_rate`, `sample_width`, `channels`, or their short + forms `sr`, `sw`, and `ch`. These parameters are typically needed when + `input` is a path to a raw audio file, a bytes object with raw audio + data, or None (for microphone input). See respective `AudioSource` + classes for detailed parameter requirements. Returns ------- - source : AudioSource - audio source created from input parameters + AudioSource + An audio source created based on the specified input and audio + parameters. """ + if input == "-": return StdinAudioSource(*_get_audio_parameters(kwargs)) @@ -775,28 +823,34 @@ filename, sampling_rate, sample_width, channels, large_file=False ): """ - Load a raw audio file with standard Python. If `large_file` is True, return - a `RawAudioSource` object that reads data lazily from disk, otherwise load - all data to memory and return a `BufferAudioSource` object. + Load a raw audio file using standard Python file handling. + + This function loads audio data from a raw file. If `large_file` is set to + True, it returns a `RawAudioSource` object that reads data lazily from disk. + Otherwise, it loads all data into memory and returns a `BufferAudioSource` + object. Parameters ---------- - filename : str, Path - path to a raw audio data file. + filename : str or Path + The path to the raw audio data file. sampling_rate : int - sampling rate of audio data. + The sampling rate of the audio data. sample_width : int - size in bytes of one audio sample. + The size, in bytes, of each audio sample. channels : int - number of channels of audio data. - large_file : bool - if True, return a `RawAudioSource` otherwise a `BufferAudioSource` - object. + The number of audio channels. + large_file : bool, optional + If True, a `RawAudioSource` is returned to allow lazy data loading from + disk. If False, returns a `BufferAudioSource` with all data loaded into + memory. Returns ------- - source : RawAudioSource or BufferAudioSource - an `AudioSource` that reads data from input file. + AudioSource + An `AudioSource` that reads data from the specified file. The source is + either a `RawAudioSource` (for lazy loading) or a `BufferAudioSource` + (for in-memory loading), depending on the value of `large_file`. """ if None in (sampling_rate, sample_width, channels): @@ -824,22 +878,28 @@ def _load_wave(filename, large_file=False): """ - Load a wave audio file with standard Python. If `large_file` is True, return - a `WaveAudioSource` object that reads data lazily from disk, otherwise load - all data to memory and return a `BufferAudioSource` object. + Load a wave audio file using standard Python module `wave`. + + This function loads audio data from a wave (.wav) file. If `large_file` is + set to True, it returns a `WaveAudioSource` object that reads data lazily + from disk. Otherwise, it loads all data into memory and returns a + `BufferAudioSource` object. Parameters ---------- - filename : str, Path - path to a wav audio data file - large_file : bool - if True, return a `WaveAudioSource` otherwise a `BufferAudioSource` - object. + filename : str or Path + The path to the wave audio data file. + large_file : bool, optional + If True, a `WaveAudioSource` is returned to allow lazy data loading from + disk. If False, returns a `BufferAudioSource` with all data loaded into + memory. Returns ------- - source : WaveAudioSource or BufferAudioSource - an `AudioSource` that reads data from input file. + AudioSource + An `AudioSource` that reads data from the specified file. The source is + either a `WaveAudioSource` (for lazy loading) or a `BufferAudioSource` + (for in-memory loading), depending on the value of `large_file`. """ if large_file: @@ -856,20 +916,22 @@ def _load_with_pydub(filename, audio_format=None): """ - Open compressed audio or video file using pydub. If a video file - is passed, its audio track(s) are extracted and loaded. + Load audio from a compressed audio or video file using `pydub`. + + This function uses `pydub` to load compressed audio files. If a video file + is specified, the audio track(s) are extracted and loaded. Parameters ---------- - filename : str, Path - path to audio file. - audio_format : str, default: None - audio file format if known (e.g. raw, webm, wav, ogg) + filename : str or Path + The path to the audio file. + audio_format : str, optional, default=None + The audio file format, if known (e.g., raw, webm, wav, ogg). Returns ------- - source : BufferAudioSource - an `AudioSource` that reads data from input file. + BufferAudioSource + An `AudioSource` that reads data from the specified file. """ func_dict = { @@ -888,62 +950,60 @@ def from_file(filename, audio_format=None, large_file=False, **kwargs): - """ - Read audio data from `filename` and return an `AudioSource` object. - if `audio_format` is None, the appropriate `AudioSource` class is guessed - from file's extension. `filename` can be a compressed audio or video file. - This will require installing `pydub` (https://github.com/jiaaro/pydub). + """Read audio data from `filename` and return an `AudioSource` object. - The normal behavior is to load all audio data to memory from which a - :class:`BufferAudioSource` object is created. This should be convenient - most of the time unless audio file is very large. In that case, and - in order to load audio data in lazy manner (i.e. read data from disk each - time :func:`AudioSource.read` is called), `large_file` should be True. + If `audio_format` is None, the appropriate `AudioSource` class is inferred + from the file extension. The `filename` can refer to a compressed audio or + video file; if a video file is provided, its audio track(s) are extracted. + This functionality requires `pydub` (https://github.com/jiaaro/pydub). - Note that the current implementation supports only wave and raw formats for - lazy audio loading. + By default, all audio data is loaded into memory to create a + `BufferAudioSource` object, suitable for most cases. For very large files, + set `large_file=True` to enable lazy loading, which reads audio data from + disk each time `AudioSource.read` is called. Currently, lazy loading + supports only wave and raw formats. - If an audio format is `raw`, the following keyword arguments are required: + If `audio_format` is `raw`, the following keyword arguments are required: - - `sampling_rate`, `sr`: int, sampling rate of audio data. + - `sampling_rate`, `sr`: int, sampling rate of audio data. - `sample_width`, `sw`: int, size in bytes of one audio sample. - `channels`, `ch`: int, number of channels of audio data. - See also + See Also -------- - :func:`to_file`. + to_file : A related function for saving audio data to a file. Parameters ---------- - filename : str, Path - path to input audio or video file. - audio_format : str - audio format used to save data (e.g. raw, webm, wav, ogg). - large_file : bool, default: False - if True, audio won't fully be loaded to memory but only when a window - is read from disk. - + filename : str or Path + The path to the input audio or video file. + audio_format : str, optional + The audio format (e.g., raw, webm, wav, ogg). + large_file : bool, optional, default=False + If True, the audio data is read lazily from disk rather than being + fully loaded into memory. Other Parameters ---------------- - sampling_rate, sr: int - sampling rate of audio data + sampling_rate, sr : int + The sampling rate of the audio data. sample_width : int - sample width (i.e. number of bytes used to represent one audio sample) + The sample width in bytes (i.e., number of bytes per audio sample). channels : int - number of channels of audio data + The number of audio channels. Returns ------- - audio_source : AudioSource - an :class:`AudioSource` object that reads data from input file. + AudioSource + An `AudioSource` object that reads data from the specified file. Raises ------ - `AudioIOError` - raised if audio data cannot be read in the given - format or if `format` is `raw` and one or more audio parameters are missing. + AudioIOError + If audio data cannot be read in the given format or if `audio_format` + is `raw` and one or more required audio parameters are missing. """ + audio_format = _guess_audio_format(filename, audio_format) if audio_format == "raw": @@ -965,18 +1025,52 @@ def _save_raw(data, file): """ - Saves audio data as a headerless (i.e. raw) file. - See also :func:`to_file`. + Save audio data as a headerless (raw) file. + + This function writes audio data to a file in raw format, without any header + information. + + Parameters + ---------- + data : bytes + The audio data to be saved. + file : str or Path + The path to the file where audio data will be saved. + + See Also + -------- + to_file : A related function for saving audio data in various formats. """ + with open(file, "wb") as fp: fp.write(data) def _save_wave(data, file, sampling_rate, sample_width, channels): """ - Saves audio data to a wave file. - See also :func:`to_file`. + Save audio data to a wave file. + + This function writes audio data to a file in the wave format, including + header information based on the specified audio parameters. + + Parameters + ---------- + data : bytes + The audio data to be saved. + file : str or Path + The path to the file where audio data will be saved. + sampling_rate : int + The sampling rate of the audio data. + sample_width : int + The size, in bytes, of each audio sample. + channels : int + The number of audio channels. + + See Also + -------- + to_file : A related function for saving audio data in various formats. """ + if None in (sampling_rate, sample_width, channels): raise AudioParameterError( "All audio parameters are required to save wave audio files" @@ -992,9 +1086,31 @@ data, file, audio_format, sampling_rate, sample_width, channels ): """ - Saves audio data with pydub (https://github.com/jiaaro/pydub). - See also :func:`to_file`. + Save audio data using pydub. + + This function saves audio data to a file in various formats supported by + pydub (https://github.com/jiaaro/pydub), such as mp3, wav, ogg, etc. + + Parameters + ---------- + data : bytes + The audio data to be saved. + file : str or Path + The path to the file where audio data will be saved. + audio_format : str + The audio format to save the file in (e.g., mp3, wav, ogg). + sampling_rate : int + The sampling rate of the audio data. + sample_width : int + The size, in bytes, of each audio sample. + channels : int + The number of audio channels. + + See Also + -------- + to_file : A related function for saving audio data in various formats. """ + segment = AudioSegment( data, frame_rate=sampling_rate, @@ -1007,34 +1123,38 @@ def to_file(data, filename, audio_format=None, **kwargs): """ - Writes audio data to file. If `audio_format` is `None`, output - audio format will be guessed from extension. If `audio_format` - is `None` and `filename` comes without an extension then audio - data will be written as a raw audio file. + Write audio data to a file. + + This function writes audio data to a file in the specified format. If + `audio_format` is None, the output format will be inferred from the file + extension. If `audio_format` is None and `filename` has no extension, + the data will be saved as a raw audio file. Parameters ---------- data : bytes-like - audio data to be written. Can be a `bytes`, `bytearray`, - `memoryview`, `array` or `numpy.ndarray` object. - filename : str, Path - path to output audio file. - audio_format : str - audio format used to save data (e.g. raw, webm, wav, ogg) - kwargs: dict - If an audio format other than `raw` is used, the following keyword - arguments are required: + The audio data to be written. Accepts `bytes`, `bytearray`, `memoryview`, + `array`, or `numpy.ndarray` objects. + filename : str or Path + The path to the output audio file. + audio_format : str, optional + The audio format to use for saving the data (e.g., raw, webm, wav, ogg). + kwargs : dict, optional + Additional parameters required for non-raw audio formats: - - `sampling_rate`, `sr`: int, sampling rate of audio data. - - `sample_width`, `sw`: int, size in bytes of one audio sample. - - `channels`, `ch`: int, number of channels of audio data. + - `sampling_rate`, `sr` : int, the sampling rate of the audio data. + - `sample_width`, `sw` : int, the size in bytes of one audio sample. + - `channels`, `ch` : int, the number of audio channels. Raises ------ - `AudioParameterError` if output format is different than raw and one or more - audio parameters are missing. `AudioIOError` if audio data cannot be written - in the desired format. + AudioParameterError + Raised if the output format is not raw and one or more required audio + parameters are missing. + AudioIOError + Raised if the audio data cannot be written in the specified format. """ + audio_format = _guess_audio_format(filename, audio_format) if audio_format in (None, "raw"): _save_raw(data, filename) diff -r 12a7f01c633b -r 1b78211b7e07 auditok/signal.py --- a/auditok/signal.py Sat Oct 19 15:21:43 2024 +0200 +++ b/auditok/signal.py Sat Oct 19 16:58:19 2024 +0200 @@ -1,11 +1,44 @@ +""" +.. autosummary:: + :toctree: generated/ + + to_array + calculate_energy +""" + import numpy as np +__all__ = [ + "SAMPLE_WIDTH_TO_DTYPE", + "to_array", + "calculate_energy", +] + SAMPLE_WIDTH_TO_DTYPE = {1: np.int8, 2: np.int16, 4: np.int32} EPSILON = 1e-10 def _get_numpy_dtype(sample_width): - """Helper function to convert sample with to the corresponding numpy type.""" + """ + Helper function to convert a sample width to the corresponding NumPy data + type. + + Parameters + ---------- + sample_width : int + The width of the sample in bytes. Accepted values are 1, 2, or 4. + + Returns + ------- + numpy.dtype + The corresponding NumPy data type for the specified sample width. + + Raises + ------ + ValueError + If `sample_width` is not one of the accepted values (1, 2, or 4). + """ + dtype = SAMPLE_WIDTH_TO_DTYPE.get(sample_width) if dtype is None: err_msg = "'sample_width' must be 1, 2 or 4, given: {}" @@ -17,8 +50,9 @@ """ Convert raw audio data into a NumPy array. - The returned array will have a data type of `numpy.float64` regardless of - the sample width. + This function transforms raw audio data, specified by sample width and + number of channels, into a 2-D NumPy array of `numpy.float64` data type. + The array will be arranged by channels and samples. Parameters ---------- @@ -32,38 +66,48 @@ Returns ------- numpy.ndarray - A 2-D NumPy array representing the audio data. The array will have a - shape of (number of channels, number of samples) and will be of data - type `numpy.float64`. + A 2-D NumPy array representing the audio data. The shape of the array + will be (number of channels, number of samples), with data type + `numpy.float64`. + + Raises + ------ + ValueError + If `sample_width` is not an accepted value for conversion by the helper + function `_get_numpy_dtype`. """ + dtype = _get_numpy_dtype(sample_width) array = np.frombuffer(data, dtype=dtype).astype(np.float64) return array.reshape(channels, -1, order="F") def calculate_energy(x, agg_fn=None): - """Calculate the energy of audio data. The energy is calculated as: + """Calculate the energy of audio data. - .. math:: energy = 20 \log(\sqrt({1}/{N}\sum_{i}^{N}{a_i}^2)) % # noqa: W605 + The energy is calculated as: - where `a_i` is the i-th audio sample and `N` is the number of audio samples - in data. + .. math:: + \text{energy} = 20 \log\left(\sqrt{\frac{1}{N} \sum_{i=1}^{N} a_i^2}\right) % # noqa: W605 + + where `a_i` is the i-th audio sample and `N` is the total number of samples + in `x`. Parameters ---------- x : array - array of audio data. - agg_fn : callable - aggregation function to use for multi-channel data. If None, the energy - will be computed and returned for each channel separately. - + Array of audio data, which may contain multiple channels. + agg_fn : callable, optional + Aggregation function to use for multi-channel data. If None, the energy + will be computed and returned separately for each channel. Returns ------- - energy : float, numpy.ndarray - energy of audio signal. If x is multichannel and agg_fn is None, this - an array of energies, one per channel. + float or numpy.ndarray + The energy of the audio signal. If `x` is multichannel and `agg_fn` is + None, this will be an array of energies, one per channel. """ + x = np.array(x).astype(np.float64) energy_sqrt = np.sqrt(np.mean(x**2, axis=-1)) energy_sqrt = np.clip(energy_sqrt, a_min=EPSILON, a_max=None) diff -r 12a7f01c633b -r 1b78211b7e07 auditok/util.py --- a/auditok/util.py Sat Oct 19 15:21:43 2024 +0200 +++ b/auditok/util.py Sat Oct 19 16:58:19 2024 +0200 @@ -40,65 +40,69 @@ def make_duration_formatter(fmt): """ - Make and return a function used to format durations in seconds. Accepted - format directives are: + Create and return a function to format durations in seconds using a + specified format. Accepted format directives are: - - ``%S`` : absolute number of seconds with 3 decimals. This direction should - be used alone. + - ``%S`` : absolute seconds with 3 decimals; must be used alone. - ``%i`` : milliseconds - ``%s`` : seconds - ``%m`` : minutes - ``%h`` : hours - These last 4 directives should all be specified. They can be placed anywhere - in the input string. + The last four directives (%i, %s, %m, %h) should all be specified and can + be placed in any order within the input format string. Parameters ---------- fmt : str - duration format. + Format string specifying the duration format. Returns ------- formatter : callable - a function that takes a duration in seconds (float) and returns a string - that corresponds to that duration. + A function that takes a duration in seconds (float) and returns a + formatted string. Raises ------ TimeFormatError - if the format contains an unknown directive. + Raised if the format contains an unknown directive. Examples -------- - - Using ``%S``: + Using ``%S`` for total seconds with three decimal precision: .. code:: python formatter = make_duration_formatter("%S") formatter(123.589) - '123.589' + # '123.589' formatter(123) - '123.000' + # '123.000' - Using the other directives: + Using combined directives: .. code:: python formatter = make_duration_formatter("%h:%m:%s.%i") - formatter(3600+120+3.25) - '01:02:03.250' + formatter(3723.25) + # '01:02:03.250' formatter = make_duration_formatter("%h hrs, %m min, %s sec and %i ms") - formatter(3600+120+3.25) - '01 hrs, 02 min, 03 sec and 250 ms' + formatter(3723.25) + # '01 hrs, 02 min, 03 sec and 250 ms' - # omitting one of the 4 directives might result in a wrong duration - formatter = make_duration_formatter("%m min, %s sec and %i ms") - formatter(3600+120+3.25) - '02 min, 03 sec and 250 ms' + Note: + Omitting any of the four main directives (%i, %s, %m, %h) may result + in incorrect formatting: + + .. code:: python + + formatter = make_duration_formatter("%m min, %s sec and %i ms") + formatter(3723.25) + # '02 min, 03 sec and 250 ms' """ + if fmt == "%S": def formatter(seconds): @@ -133,53 +137,46 @@ def make_channel_selector(sample_width, channels, selected=None): - """Create and return a callable used for audio channel selection. The - returned selector can be used as `selector(audio_data)` and returns data - that contains selected channel only. + """ + Create and return a callable for selecting a specific audio channel. The + returned `selector` function can be used as `selector(audio_data)` and + returns data for the specified channel. - Importantly, if `selected` is None or equals "any", `selector(audio_data)` - will separate and return a list of available channels: - `[data_channel_1, data_channel_2, ...].` + If `selected` is None or "any", the `selector` will separate and return a + list of available channels: `[data_channel_1, data_channel_2, ...]`. - Note also that returned `selector` expects `bytes` format for input data but - does not necessarily return a `bytes` object. In fact, in order to extract - the desired channel (or compute the average channel if `selected` = "avg"), - it first converts input data into a `array.array` (or `numpy.ndarray`) - object. After the channel of interest is selected/computed, it is returned as - such, without any reconversion to `bytes`. This behavior is wanted for - efficiency purposes because returned objects can be directly used as buffers - of bytes. In any case, returned objects can be converted back to `bytes` - using `bytes(obj)`. + Note that `selector` expects input data in `bytes` format but does not + necessarily return a `bytes` object. To select or compute the desired + channel (or average channel if `selected="avg"`), it converts the input + data into an `array.array` or `numpy.ndarray`. After selection, the data + is returned as is, without reconversion to `bytes`, for efficiency. The + output can be converted back to `bytes` with `bytes(obj)` if needed. - Exception to this is the special case where `channels` = 1 in which input - data is returned without any processing. - + Special case: If `channels=1`, the input data is returned without processing. Parameters ---------- sample_width : int - number of bytes used to encode one audio sample, should be 1, 2 or 4. + Number of bytes per audio sample; should be 1, 2, or 4. channels : int - number of channels of raw audio data that the returned selector should - expect. - selected : int or str, default: None - audio channel to select and return when calling `selector(raw_data)`. It - should be an int >= `-channels` and < `channels`. If one of "mix", - "avg" or "average" is passed then `selector` will return the average - channel of audio data. If None or "any", return a list of all available - channels at each call. + Number of channels in the audio data that the selector should expect. + selected : int or str, optional + Channel to select in each call to `selector(raw_data)`. Acceptable values: + - An integer in range [-channels, channels). + - "mix", "avg", or "average" for averaging across channels. + - None or "any" to return a list of all channels. Returns ------- selector : callable - a callable that can be used as `selector(audio_data)` and returns data - that contains channel of interest. + A function that can be called as `selector(audio_data)` and returns data + for the selected channel. Raises ------ ValueError - if `sample_width` is not one of 1, 2 or 4, or if `selected` has an - unexpected value. + If `sample_width` is not one of {1, 2, 4}, or if `selected` has an + unsupported value. """ to_array_ = partial( signal.to_array, sample_width=sample_width, channels=channels @@ -207,29 +204,50 @@ class DataSource(ABC): """ - Base class for objects passed to :func:`StreamTokenizer.tokenize`. - Subclasses should implement a :func:`DataSource.read` method. + Base class for objects used as data sources in + :func:`StreamTokenizer.tokenize`. + + Subclasses should implement a :func:`DataSource.read` method, which is + expected to return a frame (or slice) of data from the source, and None + when there is no more data to read. """ @abstractmethod def read(self): """ - Read a block (i.e., window) of data read from this source. - If no more data is available, return None. + Read a block (or window) of data from this source. + + Returns + ------- + data : object or None + A block of data from the source. If no more data is available, + should return None. """ class DataValidator(ABC): """ - Base class for a validator object used by :class:`.core.StreamTokenizer` - to check if read data is valid. - Subclasses should implement :func:`is_valid` method. + Base class for validator objects used by :class:`.core.StreamTokenizer` + to verify the validity of read data. + + Subclasses should implement the :func:`is_valid` method to define the + specific criteria for data validity. """ @abstractmethod def is_valid(self, data): """ - Check whether `data` is valid + Determine whether the provided `data` meets validity criteria. + + Parameters + ---------- + data : object + The data to be validated. + + Returns + ------- + bool + True if `data` is valid, otherwise False. """ @@ -239,33 +257,33 @@ samples (see :func:`AudioEnergyValidator.is_valid`), the energy is computed as: - .. math:: energy = 20 \log(\sqrt({1}/{N}\sum_{i}^{N}{a_i}^2)) % # noqa: W605 + .. math:: + energy = 20 \log(\sqrt({1}/{N} \sum_{i=1}^{N} {a_i}^2)) % # noqa: W605 - where `a_i` is the i-th audio sample. + where `a_i` represents the i-th audio sample. Parameters ---------- energy_threshold : float - minimum energy that audio window should have to be valid. + Minimum energy required for an audio window to be considered valid. sample_width : int - size in bytes of one audio sample. + Size in bytes of a single audio sample. channels : int - number of channels of audio data. + Number of audio channels in the data. use_channel : {None, "any", "mix", "avg", "average"} or int - channel to use for energy computation. The following values are - accepted: + Specifies the channel used for energy computation: - - None (alias "any") : compute energy for each of the channels and return - the maximum value. - - "mix" (alias "avg" or "average") : compute the average channel then - compute its energy. - - int (>= 0 , < `channels`) : compute the energy of the specified channel - and ignore the other ones. + - None or "any": Compute energy for each channel and return the maximum. + - "mix" (or "avg" / "average"): Average across all channels, then + compute energy. + - int (0 <= value < `channels`): Compute energy for the specified channel + only, ignoring others. Returns ------- energy : float - energy of the audio window. + Computed energy of the audio window, used to validate if the window + meets the `energy_threshold`. """ def __init__( @@ -280,17 +298,20 @@ def is_valid(self, data): """ + Determine if the audio data meets the energy threshold. Parameters ---------- data : bytes-like - array of raw audio data + An array of raw audio data. Returns ------- bool - True if the energy of audio data is >= threshold, False otherwise. + True if the energy of the audio data is greater than or equal to + the specified threshold; otherwise, False. """ + log_energy = signal.calculate_energy( self._selector(data), self._energy_agg_fn ) @@ -299,16 +320,16 @@ class StringDataSource(DataSource): """ - Class that represent a :class:`DataSource` as a string buffer. - Each call to :func:`DataSource.read` returns on character and moves one - step forward. If the end of the buffer is reached, :func:`read` returns + A :class:`DataSource` implementation that reads from a string buffer. + + Each call to :mrth:`read` returns one character from the buffer and advances + by one position. When the end of the buffer is reached, :meth:`read` returns None. Parameters ---------- data : str - a string object used as data. - + The string data to be used as the source. """ def __init__(self, data): @@ -324,7 +345,7 @@ Returns ------- char : str - current character or None if end of buffer is reached. + current character or None if the end of the buffer is reached. """ if self._current >= len(self._data): @@ -389,8 +410,10 @@ class _Recorder(_AudioReadingProxy): """ - Class for `AudioReader` objects that can record all data they read. Useful - when reading data from microphone. + A class for `AudioReader` objects that records all data read from the source. + + This class is particularly useful for capturing audio data when reading from + a microphone or similar live audio sources. """ def __init__(self, audio_source): @@ -406,7 +429,7 @@ @property def data(self): if self._data is None: - err_msg = "Unrewinded recorder. `rewind` should be called before " + err_msg = "Un-rewinded recorder. `rewind` should be called before " err_msg += "accessing recorded data" raise RuntimeError(err_msg) return self._data @@ -437,9 +460,10 @@ class _Limiter(_AudioReadingProxy): """ - Class for `AudioReader` objects that can read a fixed amount of data. - This can be useful when reading data from the microphone or from large - audio files. + A class for `AudioReader` objects that restricts the amount of data read. + + This class is useful for limiting data intake when reading from a microphone + or large audio files, ensuring only a specified amount of data is processed. """ def __init__(self, audio_source, max_read): @@ -476,7 +500,7 @@ class _FixedSizeAudioReader(_AudioReadingProxy): """ - Class to read fixed-size audio windows from source. + A class to read fixed-size audio windows from a source. """ def __init__(self, audio_source, block_dur): @@ -513,8 +537,11 @@ class _OverlapAudioReader(_FixedSizeAudioReader): """ - Class for `AudioReader` objects that can read and return overlapping audio + A class for `AudioReader` objects that reads and returns overlapping audio windows. + + Useful for applications requiring overlapping segments, such as audio + analysis or feature extraction. """ def __init__(self, audio_source, block_dur, hop_dur): @@ -576,86 +603,74 @@ class AudioReader(DataSource): """ - Class to read fixed-size chunks of audio data from a source. A source can - be a file on disk, standard input (with `input` = "-") or microphone. This - is normally used by tokenization algorithms that expect source objects with - a `read` function that returns a windows of data of the same size at each - call expect when remaining data does not make up a full window. + A class to read fixed-size chunks of audio data from a source, which can + be a file, standard input (with `input` set to "-"), or a microphone. + Typically used by tokenization algorithms that require source objects with + a `read` function to return data windows of consistent size, except for + the last window if remaining data is insufficient. - Objects of this class can be set up to return audio windows with a given - overlap and to record the whole stream for later access (useful when - reading data from the microphone). They can also have - a limit for the maximum amount of data to read. + This class supports overlapping audio windows, recording the audio stream + for later access (useful for microphone input), and limiting the maximum + amount of data read. Parameters ---------- - input : str, bytes, AudioSource, AudioReader, AudioRegion or None - input audio data. If the type of the passed argument is `str`, it should - be a path to an existing audio file. "-" is interpreted as standard input. - If the type is `bytes`, input is considered as a buffer of raw audio - data. If None, read audio from microphone. Every object that is not an - :class:`AudioReader` will be transformed, when possible, into an - :class:`AudioSource` before processing. If it is an `str` that refers to - a raw audio file, `bytes` or None, audio parameters should be provided - using kwargs (i.e., `sampling_rate`, `sample_width` and `channels` or - their alias: `sr`, `sw` and `ch`). - block_dur: float, default: 0.01 - length in seconds of audio data to return for each `read` call. - hop_dur: float, default: None - length in seconds of data amount to skip from previous window. If - defined, it is used to compute the temporal overlap between previous and - current window (namely `overlap = block_dur - hop_dur`). Default, None, - means that consecutive windows do not overlap. - record: bool, default: False - whether to record read audio data for later access. If True, audio data - can be retrieved by first calling `rewind()`, then using the `data` - property. Note that once `rewind()` is called, no new data will be read - from source (subsequent `read()` call will read data from cache) and - that there's no need to call `rewind()` again to access `data` property. - max_read: float, default: None - maximum amount of audio data to read in seconds. Default is None meaning - that data will be read until end of stream is reached or, when reading - from microphone a Ctrl-C is sent. + input : str, bytes, AudioSource, AudioReader, AudioRegion, or None + Input audio data. If a string, it should be the path to an audio file + (use "-" for standard input). If bytes, the input is treated as raw + audio data. If None, audio is read from a microphone. Any input that + is not an :class:`AudioReader` will be converted, if possible, to an + :class:`AudioSource` for processing. For raw audio (string path, bytes, + or None), specify audio parameters using kwargs (`sampling_rate`, + `sample_width`, `channels` or their aliases: `sr`, `sw`, `ch`). + block_dur : float, default=0.01 + Duration of audio data (in seconds) to return in each `read` call. + hop_dur : float, optional + Duration of data to skip (in seconds) from the previous window. If set, + it is used to calculate temporal overlap between the current and + previous window (`overlap = block_dur - hop_dur`). If None (default), + windows do not overlap. + record : bool, default=False + Whether to record audio data for later access. If True, recorded audio + can be accessed using the `data` property after calling `rewind()`. + Note: after `rewind()`, no new data is read from the source—subsequent + `read` calls use the cached data. + max_read : float, optional + Maximum duration of audio data to read (in seconds). If None (default), + data is read until the end of the stream or, for microphone input, until + a Ctrl-C interruption. - When `input` is None, of type bytes or a raw audio files some of the - following kwargs are mandatory. + Additional audio parameters may be required if `input` is raw audio + (None, bytes, or raw audio file): Other Parameters ---------------- audio_format, fmt : str - type of audio data (e.g., wav, ogg, flac, raw, etc.). This will only be - used if `input` is a string path to an audio file. If not given, audio - type will be guessed from file name extension or from file header. + Type of audio data (e.g., wav, ogg, flac, raw). Used if `input` is a + file path. If not provided, the format is inferred from the file + extension or header. sampling_rate, sr : int - sampling rate of audio data. Required if `input` is a raw audio file, is - a bytes object or None (i.e., read from microphone). + Sampling rate of the audio data. Required for raw audio (bytes, None, + or raw file). sample_width, sw : int - number of bytes used to encode one audio sample, typically 1, 2 or 4. - Required for raw data, see `sampling_rate`. + Number of bytes per audio sample (typically 1, 2, or 4). Required for + raw data. channels, ch : int - number of channels of audio data. Required for raw data, see - `sampling_rate`. + Number of audio channels. Required for raw data. use_channel, uc : {None, "any", "mix", "avg", "average"} or int - which channel to use for split if `input` has multiple audio channels. - Regardless of which channel is used for splitting, returned audio events - contain data from *all* the channels of `input`. The following values - are accepted: + Specifies the channel used for split if `input` has multiple channels. + All returned audio data includes data from *all* input channels. Options: - - None (alias "any"): accept audio activity from any channel, even if - other channels are silent. This is the default behavior. + - None or "any": Use any active channel, regardless of silence in others. + (Default) + - "mix" / "avg" / "average": Combine all channels by averaging. + - int: Use the specified channel ID (0 <= value < `channels`). - - "mix" (alias "avg" or "average"): mix down all channels (i.e., compute - average channel) and split the resulting channel. - - - int (>= 0 , < `channels`): use one channel, specified by its integer - id, for split. - - large_file : bool, default: False - If True, AND if `input` is a path to a *wav* of a *raw* audio file - (and only these two formats) then audio data is lazily loaded to memory - (i.e., one analysis window a time). Otherwise the whole file is loaded - to memory before split. Set to True if the size of the file is larger - than available memory. + large_file : bool, default=False + If True and `input` is a path to a *wav* or *raw* file, audio data is + loaded lazily (one analysis window at a time). Otherwise, the entire + file is loaded before processing. Use True for large files exceeding + available memory. """ def __init__( @@ -746,14 +761,16 @@ class Recorder(AudioReader): - """Class to read fixed-size chunks of audio data from a source and keeps - data in a cache. Using this class is equivalent to initializing - :class:`AudioReader` with `record=True`. For more information about the - other parameters see :class:`AudioReader`. + """ + A class to read fixed-size chunks of audio data from a source and store + them in a cache. This class is equivalent to initializing + :class:`AudioReader` with `record=True`. For more details on additional + parameters, refer to :class:`AudioReader`. - Once the desired amount of data is read, you can call the :func:`rewind` - method then get the recorded data via the :attr:`data` attribute. You can also - re-read cached data one window a time by calling :func:`read`. + Once the desired amount of data is read, you can call the :meth:`rewind` + method to access the recorded data via the :attr:`data` attribute. The + cached data can also be re-read in fixed-size windows by calling + :meth:`read`. """ def __init__(