changeset 112:e5813b709634

Add function to read raw audio data
author Amine Sehili <amine.sehili@gmail.com>
date Sun, 20 Jan 2019 16:46:35 +0100
parents 85fbaf34c88b
children 7771f11179fb
files auditok/io.py
diffstat 1 files changed, 214 insertions(+), 85 deletions(-) [+]
line wrap: on
line diff
--- a/auditok/io.py	Wed Jan 16 20:54:50 2019 +0100
+++ b/auditok/io.py	Sun Jan 20 16:46:35 2019 +0100
@@ -31,18 +31,30 @@
 
 try:
     from pydub import AudioSegment
+
     _WITH_PYDUB = True
 except ImportError:
     _WITH_PYDUB = False
 
-__all__ = ["AudioIOError", "AudioParameterError", "AudioSource", "Rewindable",
-           "BufferAudioSource", "WaveAudioSource", "PyAudioSource",
-           "StdinAudioSource", "PyAudioPlayer", "from_file", "player_for"]
+__all__ = [
+    "AudioIOError",
+    "AudioParameterError",
+    "AudioSource",
+    "Rewindable",
+    "BufferAudioSource",
+    "WaveAudioSource",
+    "PyAudioSource",
+    "StdinAudioSource",
+    "PyAudioPlayer",
+    "from_file",
+    "player_for",
+]
 
 DEFAULT_SAMPLE_RATE = 16000
 DEFAULT_SAMPLE_WIDTH = 2
 DEFAULT_NB_CHANNELS = 1
-DATA_FORMAT = {1: 'b', 2: 'h', 4: 'i'}
+DATA_FORMAT = {1: "b", 2: "h", 4: "i"}
+
 
 class AudioIOError(Exception):
     pass
@@ -56,8 +68,10 @@
     sample_size_bytes = int(sample_width * channels)
     nb_samples = len(data) // sample_size_bytes
     if nb_samples * sample_size_bytes != len(data):
-        raise AudioParameterError("The length of audio data must be an integer "
-                                  "multiple of `sample_width * channels`")
+        raise AudioParameterError(
+            "The length of audio data must be an integer "
+            "multiple of `sample_width * channels`"
+        )
 
 
 def _guess_audio_format(fmt, filename):
@@ -118,22 +132,26 @@
                                          channels,
                                          use_channel)
     """
-    err_message = ("'{ln}' (or '{sn}') must be an integer, found: '{val}'")
+    err_message = "'{ln}' (or '{sn}') must be an integer, found: '{val}'"
     parameters = []
-    for (long_name, short_name) in (("sampling_rate", "sr"),
-                                    ("sample_width", "sw"),
-                                    ("channels", "ch")):
-        param = param_dict.get(long_name, None) or param_dict.get(short_name, None)
+    for (long_name, short_name) in (
+        ("sampling_rate", "sr"),
+        ("sample_width", "sw"),
+        ("channels", "ch"),
+    ):
+        param = param_dict.get(long_name, None) or param_dict.get(
+            short_name, None
+        )
         if param is None or not isinstance(param, int):
-            raise AudioParameterError(err_message.format(ln=long_name,
-                                                         sn=short_name,
-                                                         val=param))
+            raise AudioParameterError(
+                err_message.format(ln=long_name, sn=short_name, val=param)
+            )
         parameters.append(param)
     use_channel = param_dict.get("use_channel", param_dict.get("uc", 0))
     return tuple(parameters) + (_normalize_use_channel(use_channel),)
 
 
-class AudioSource():
+class AudioSource:
     """ 
     Base class for audio source objects.
 
@@ -156,12 +174,17 @@
 
     __metaclass__ = ABCMeta
 
-    def __init__(self, sampling_rate=DEFAULT_SAMPLE_RATE,
-                 sample_width=DEFAULT_SAMPLE_WIDTH,
-                 channels=DEFAULT_NB_CHANNELS):
+    def __init__(
+        self,
+        sampling_rate=DEFAULT_SAMPLE_RATE,
+        sample_width=DEFAULT_SAMPLE_WIDTH,
+        channels=DEFAULT_NB_CHANNELS,
+    ):
 
         if sample_width not in (1, 2, 4):
-            raise AudioParameterError("Sample width must be one of: 1, 2 or 4 (bytes)")
+            raise AudioParameterError(
+                "Sample width must be one of: 1, 2 or 4 (bytes)"
+            )
 
         if channels != 1:
             raise AudioParameterError("Only mono audio is currently supported")
@@ -244,7 +267,7 @@
         return self.channels
 
 
-class Rewindable():
+class Rewindable:
     """
     Base class for rewindable audio streams.
     Subclasses should implement methods to return to the beginning of an
@@ -295,10 +318,13 @@
     It implements methods from :class:`Rewindable` and is therefore a navigable :class:`AudioSource`.
     """
 
-    def __init__(self, data_buffer,
-                 sampling_rate=DEFAULT_SAMPLE_RATE,
-                 sample_width=DEFAULT_SAMPLE_WIDTH,
-                 channels=DEFAULT_NB_CHANNELS):
+    def __init__(
+        self,
+        data_buffer,
+        sampling_rate=DEFAULT_SAMPLE_RATE,
+        sample_width=DEFAULT_SAMPLE_WIDTH,
+        channels=DEFAULT_NB_CHANNELS,
+    ):
         AudioSource.__init__(self, sampling_rate, sample_width, channels)
         check_audio_data(data_buffer, sample_width, channels)
         self._buffer = data_buffer
@@ -320,7 +346,10 @@
         if not self._is_open:
             raise AudioIOError("Stream is not open")
         bytes_to_read = self._sample_size_all_channels * size
-        data = self._buffer[self._current_position_bytes: self._current_position_bytes + bytes_to_read]
+        data = self._buffer[
+            self._current_position_bytes : self._current_position_bytes
+            + bytes_to_read
+        ]
         if data:
             self._current_position_bytes += len(data)
             return data
@@ -360,13 +389,17 @@
         return self._current_position_bytes / self._sample_size_all_channels
 
     def get_time_position(self):
-        return float(self._current_position_bytes) / (self._sample_size_all_channels * self.sampling_rate)
+        return float(self._current_position_bytes) / (
+            self._sample_size_all_channels * self.sampling_rate
+        )
 
     def set_position(self, position):
         if position < 0:
             raise ValueError("position must be >= 0")
         position *= self._sample_size_all_channels
-        self._current_position_bytes = position if position < len(self._buffer) else len(self._buffer)
+        self._current_position_bytes = (
+            position if position < len(self._buffer) else len(self._buffer)
+        )
 
     def set_time_position(self, time_position):  # time in seconds
         position = int(self.sampling_rate * time_position)
@@ -389,16 +422,19 @@
         self._audio_stream = None
 
         stream = wave.open(self._filename)
-        AudioSource.__init__(self, stream.getframerate(),
-                             stream.getsampwidth(),
-                             stream.getnchannels())
+        AudioSource.__init__(
+            self,
+            stream.getframerate(),
+            stream.getsampwidth(),
+            stream.getnchannels(),
+        )
         stream.close()
 
     def is_open(self):
         return self._audio_stream is not None
 
     def open(self):
-        if(self._audio_stream is None):
+        if self._audio_stream is None:
             self._audio_stream = wave.open(self._filename)
 
     def close(self):
@@ -421,32 +457,40 @@
     A class for an `AudioSource` that reads data the built-in microphone using PyAudio. 
     """
 
-    def __init__(self, sampling_rate=DEFAULT_SAMPLE_RATE,
-                 sample_width=DEFAULT_SAMPLE_WIDTH,
-                 channels=DEFAULT_NB_CHANNELS,
-                 frames_per_buffer=1024,
-                 input_device_index=None):
+    def __init__(
+        self,
+        sampling_rate=DEFAULT_SAMPLE_RATE,
+        sample_width=DEFAULT_SAMPLE_WIDTH,
+        channels=DEFAULT_NB_CHANNELS,
+        frames_per_buffer=1024,
+        input_device_index=None,
+    ):
 
         AudioSource.__init__(self, sampling_rate, sample_width, channels)
         self._chunk_size = frames_per_buffer
         self.input_device_index = input_device_index
 
         import pyaudio
+
         self._pyaudio_object = pyaudio.PyAudio()
-        self._pyaudio_format = self._pyaudio_object.get_format_from_width(self.sample_width)
+        self._pyaudio_format = self._pyaudio_object.get_format_from_width(
+            self.sample_width
+        )
         self._audio_stream = None
 
     def is_open(self):
         return self._audio_stream is not None
 
     def open(self):
-        self._audio_stream = self._pyaudio_object.open(format=self._pyaudio_format,
-                                                       channels=self.channels,
-                                                       rate=self.sampling_rate,
-                                                       input=True,
-                                                       output=False,
-                                                       input_device_index=self.input_device_index,
-                                                       frames_per_buffer=self._chunk_size)
+        self._audio_stream = self._pyaudio_object.open(
+            format=self._pyaudio_format,
+            channels=self.channels,
+            rate=self.sampling_rate,
+            input=True,
+            output=False,
+            input_device_index=self.input_device_index,
+            frames_per_buffer=self._chunk_size,
+        )
 
     def close(self):
         if self._audio_stream is not None:
@@ -472,9 +516,12 @@
     A class for an :class:`AudioSource` that reads data from standard input.
     """
 
-    def __init__(self, sampling_rate=DEFAULT_SAMPLE_RATE,
-                 sample_width=DEFAULT_SAMPLE_WIDTH,
-                 channels=DEFAULT_NB_CHANNELS):
+    def __init__(
+        self,
+        sampling_rate=DEFAULT_SAMPLE_RATE,
+        sample_width=DEFAULT_SAMPLE_WIDTH,
+        channels=DEFAULT_NB_CHANNELS,
+    ):
 
         AudioSource.__init__(self, sampling_rate, sample_width, channels)
         self._is_open = False
@@ -504,14 +551,17 @@
         return data
 
 
-class PyAudioPlayer():
+class PyAudioPlayer:
     """
     A class for audio playback using Pyaudio
     """
 
-    def __init__(self, sampling_rate=DEFAULT_SAMPLE_RATE,
-                 sample_width=DEFAULT_SAMPLE_WIDTH,
-                 channels=DEFAULT_NB_CHANNELS):
+    def __init__(
+        self,
+        sampling_rate=DEFAULT_SAMPLE_RATE,
+        sample_width=DEFAULT_SAMPLE_WIDTH,
+        channels=DEFAULT_NB_CHANNELS,
+    ):
         if not sample_width in (1, 2, 4):
             raise ValueError("Sample width must be one of: 1, 2 or 4 (bytes)")
 
@@ -520,10 +570,15 @@
         self.channels = channels
 
         import pyaudio
+
         self._p = pyaudio.PyAudio()
-        self.stream = self._p.open(format=self._p.get_format_from_width(self.sample_width),
-                                   channels=self.channels, rate=self.sampling_rate,
-                                   input=False, output=True)
+        self.stream = self._p.open(
+            format=self._p.get_format_from_width(self.sample_width),
+            channels=self.channels,
+            rate=self.sampling_rate,
+            input=False,
+            output=True,
+        )
 
     def play(self, data):
         if self.stream.is_stopped():
@@ -542,13 +597,100 @@
 
     def _chunk_data(self, data):
         # make audio chunks of 100 ms to allow interruption (like ctrl+c)
-        chunk_size = int((self.sampling_rate * self.sample_width * self.channels) / 10)
+        chunk_size = int(
+            (self.sampling_rate * self.sample_width * self.channels) / 10
+        )
         start = 0
         while start < len(data):
-            yield data[start: start + chunk_size]
+            yield data[start : start + chunk_size]
             start += chunk_size
 
 
+def player_for(audio_source):
+    """
+    Return a :class:`PyAudioPlayer` that can play data from `audio_source`.
+
+    :Parameters:
+
+        `audio_source` : 
+            an `AudioSource` object.
+
+    :Returns:
+
+        `PyAudioPlayer` that has the same sampling rate, sample width and number of channels
+        as `audio_source`.
+    """
+
+    return PyAudioPlayer(
+        audio_source.get_sampling_rate(),
+        audio_source.get_sample_width(),
+        audio_source.get_channels(),
+    )
+
+
+def _load_raw(
+    file,
+    sampling_rate,
+    sample_width,
+    channels,
+    use_channel=0,
+    large_file=False,
+):
+    """
+    Load a raw audio file with standard Python.
+    If `large_file` is True, audio data will be lazily
+    loaded to memory.
+
+    See also :func:`from_file`.
+
+    :Parameters:
+        `file` : filelike object or str
+            raw audio file to open
+        `sampling_rate`: int
+            sampling rate of audio data
+        `sample_width`: int
+            sample width of audio data
+        `channels`: int
+            number of channels of audio data
+        `use_channel`: int
+            audio channel to read if file is not mono audio. This must be an integer
+            0 >= and < channels, or one of 'left' (treated as 0 or first channel), or
+            right (treated as 1 or second channels). 
+
+    :Returns:
+
+        `PyAudioPlayer` that has the same sampling rate, sample width and number of channels
+        as `audio_source`.
+    """
+    if None in (sampling_rate, sample_width, channels):
+        raise AudioParameterError(
+            "All audio parameters are required for raw audio files"
+        )
+
+    if large_file:
+        return RawAudioSource(
+            file,
+            sampling_rate=sampling_rate,
+            sample_width=sample_width,
+            channels=channels,
+            use_channel=use_channel,
+        )
+    else:
+        with open(file, "rb") as fp:
+            data = fp.read()
+        if channels != 1:
+            # TODO check if striding with mmap doesn't load all data to memory
+            data = _extract_selected_channel(
+                data, channels, sample_width, use_channel
+            )
+        return BufferAudioSource(
+            data,
+            sampling_rate=sampling_rate,
+            sample_width=sample_width,
+            channels=1,
+        )
+
+
 def from_file(filename):
     """
     Create an `AudioSource` object using the audio file specified by `filename`.
@@ -567,28 +709,11 @@
     if filename.lower().endswith(".wav"):
         return WaveAudioSource(filename)
 
-    raise Exception("Can not create an AudioSource object from '%s'" % (filename))
+    raise Exception(
+        "Can not create an AudioSource object from '%s'" % (filename)
+    )
 
 
-def player_for(audio_source):
-    """
-    Return a :class:`PyAudioPlayer` that can play data from `audio_source`.
-
-    :Parameters:
-
-        `audio_source` : 
-            an `AudioSource` object.
-
-    :Returns:
-
-        `PyAudioPlayer` that has the same sampling rate, sample width and number of channels
-        as `audio_source`.
-    """
-
-    return PyAudioPlayer(audio_source.get_sampling_rate(),
-                         audio_source.get_sample_width(),
-                         audio_source.get_channels())
-
 def _save_raw(file, data):
     """
     Saves audio data as a headerless (i.e. raw) file.
@@ -611,16 +736,19 @@
         fp.writeframes(data)
 
 
-def _save_with_pydub(file, data, audio_format, sampling_rate, sample_width,
-                     channels):
+def _save_with_pydub(
+    file, data, audio_format, sampling_rate, sample_width, channels
+):
     """
     Saves audio data with pydub (https://github.com/jiaaro/pydub).
     See also :func:`to_file`.
     """
-    segment = AudioSegment(data,
-                           frame_rate=sampling_rate,
-                           sample_width=sample_width,
-                           channels=channels)
+    segment = AudioSegment(
+        data,
+        frame_rate=sampling_rate,
+        sample_width=sample_width,
+        channels=channels,
+    )
     with open(file, "wb") as fp:
         segment.export(fp, format=audio_format)
 
@@ -670,8 +798,9 @@
     if audio_format in ("wav", "wave"):
         _save_wave(file, data, sampling_rate, sample_width, channels)
     elif _WITH_PYDUB:
-        _save_with_pydub(file, data, audio_format, sampling_rate,
-                         sample_width, channels)
+        _save_with_pydub(
+            file, data, audio_format, sampling_rate, sample_width, channels
+        )
     else:
         err_message = "cannot write file format {} (file name: {})"
-        raise AudioIOError(err_message.format(audio_format, file))
\ No newline at end of file
+        raise AudioIOError(err_message.format(audio_format, file))