annotate auditok/io.py @ 101:a3da97fad36e

Add _normalize_use_channel
author Amine Sehili <amine.sehili@gmail.com>
date Tue, 08 Jan 2019 20:55:52 +0100
parents cee5037f17ad
children 3a72db5f8798
rev   line source
amine@2 1 """
amine@33 2 Module for low-level audio input-output operations.
amine@2 3
amine@32 4 Class summary
amine@32 5 =============
amine@32 6
amine@32 7 .. autosummary::
amine@32 8
amine@32 9 AudioSource
amine@32 10 Rewindable
amine@32 11 BufferAudioSource
amine@32 12 WaveAudioSource
amine@32 13 PyAudioSource
amine@32 14 StdinAudioSource
amine@32 15 PyAudioPlayer
amine@32 16
amine@32 17
amine@32 18 Function summary
amine@32 19 ================
amine@32 20
amine@32 21 .. autosummary::
amine@32 22
amine@32 23 from_file
amine@32 24 player_for
amine@2 25 """
amine@2 26
amine@2 27 from abc import ABCMeta, abstractmethod
amine@2 28 import wave
amine@10 29 import sys
amine@2 30
amine@90 31 __all__ = ["AudioIOError", "AudioParameterError", "AudioSource", "Rewindable",
amine@90 32 "BufferAudioSource", "WaveAudioSource", "PyAudioSource",
amine@90 33 "StdinAudioSource", "PyAudioPlayer", "from_file", "player_for"]
amine@2 34
amine@2 35 DEFAULT_SAMPLE_RATE = 16000
amine@2 36 DEFAULT_SAMPLE_WIDTH = 2
amine@2 37 DEFAULT_NB_CHANNELS = 1
amine@2 38
amine@89 39 class AudioIOError(Exception):
amine@89 40 pass
amine@89 41
amine@89 42
amine@89 43 class AudioParameterError(AudioIOError):
amine@89 44 pass
amine@89 45
amine@2 46
amine@90 47 def check_audio_data(data, sample_width, channels):
amine@90 48 sample_size_bytes = int(sample_width * channels)
amine@90 49 nb_samples = len(data) // sample_size_bytes
amine@90 50 if nb_samples * sample_size_bytes != len(data):
amine@90 51 raise AudioParameterError("The length of audio data must be an integer "
amine@90 52 "multiple of `sample_width * channels`")
amine@90 53
amine@90 54
amine@100 55 def _guess_audio_format(fmt, filename):
amine@100 56 if fmt is None:
amine@100 57 extension = os.path.splitext(filename.lower())[1][1:]
amine@100 58 return extension if extension else None
amine@100 59 return fmt.lower()
amine@100 60
amine@100 61
amine@101 62 def _normalize_use_channel(use_channel):
amine@101 63 """
amine@101 64 Returns a value of `use_channel` as expected by audio read/write fuctions.
amine@101 65 If `use_channel` is `None`, returns 0. If it's an integer, or the special
amine@101 66 str 'mix' returns it as is. If it's `left` or `right` returns 0 or 1
amine@101 67 respectively.
amine@101 68 """
amine@101 69 if use_channel is None:
amine@101 70 return 0
amine@101 71 if use_channel == "mix" or isinstance(use_channel, int):
amine@101 72 return use_channel
amine@101 73 try:
amine@101 74 return ["left", "right"].index(use_channel)
amine@101 75 except ValueError:
amine@101 76 err_message = "'use_channel' parameter must be an integer "
amine@101 77 "or one of ('left', 'right', 'mix'), found: '{}'".format(use_channel)
amine@101 78 raise AudioParameterError(err_message)
amine@101 79
amine@101 80
amine@2 81 class AudioSource():
amine@2 82 """
amine@32 83 Base class for audio source objects.
amine@67 84
amine@2 85 Subclasses should implement methods to open/close and audio stream
amine@2 86 and read the desired amount of audio samples.
amine@67 87
amine@32 88 :Parameters:
amine@67 89
amine@32 90 `sampling_rate` : int
amine@32 91 Number of samples per second of audio stream. Default = 16000.
amine@67 92
amine@32 93 `sample_width` : int
amine@32 94 Size in bytes of one audio sample. Possible values : 1, 2, 4.
amine@32 95 Default = 2.
amine@67 96
amine@32 97 `channels` : int
amine@32 98 Number of channels of audio stream. The current version supports
amine@32 99 only mono audio streams (i.e. one channel).
amine@2 100 """
amine@67 101
amine@32 102 __metaclass__ = ABCMeta
amine@2 103
amine@67 104 def __init__(self, sampling_rate=DEFAULT_SAMPLE_RATE,
amine@67 105 sample_width=DEFAULT_SAMPLE_WIDTH,
amine@67 106 channels=DEFAULT_NB_CHANNELS):
amine@67 107
amine@90 108 if sample_width not in (1, 2, 4):
amine@90 109 raise AudioParameterError("Sample width must be one of: 1, 2 or 4 (bytes)")
amine@67 110
amine@2 111 if channels != 1:
amine@90 112 raise AudioParameterError("Only mono audio is currently supported")
amine@67 113
amine@70 114 self._sampling_rate = sampling_rate
amine@70 115 self._sample_width = sample_width
amine@70 116 self._channels = channels
amine@67 117
amine@2 118 @abstractmethod
amine@2 119 def is_open(self):
amine@2 120 """ Return True if audio source is open, False otherwise """
amine@67 121
amine@2 122 @abstractmethod
amine@2 123 def open(self):
amine@2 124 """ Open audio source """
amine@67 125
amine@2 126 @abstractmethod
amine@2 127 def close(self):
amine@2 128 """ Close audio source """
amine@67 129
amine@2 130 @abstractmethod
amine@2 131 def read(self, size):
amine@2 132 """
amine@2 133 Read and return `size` audio samples at most.
amine@67 134
amine@32 135 :Parameters:
amine@67 136
amine@32 137 `size` : int
amine@32 138 the number of samples to read.
amine@67 139
amine@32 140 :Returns:
amine@67 141
taf2@55 142 Audio data as a string of length 'N' * 'sample_width' * 'channels', where 'N' is:
amine@67 143
amine@32 144 - `size` if `size` < 'left_samples'
amine@67 145
amine@32 146 - 'left_samples' if `size` > 'left_samples'
amine@67 147 """
amine@67 148
amine@2 149 def get_sampling_rate(self):
amine@2 150 """ Return the number of samples per second of audio stream """
amine@2 151 return self.sampling_rate
amine@67 152
amine@70 153 @property
amine@70 154 def sampling_rate(self):
amine@70 155 """ Number of samples per second of audio stream """
amine@70 156 return self._sampling_rate
amine@70 157
amine@72 158 @property
amine@72 159 def sr(self):
amine@72 160 """ Number of samples per second of audio stream """
amine@72 161 return self._sampling_rate
amine@72 162
amine@2 163 def get_sample_width(self):
amine@2 164 """ Return the number of bytes used to represent one audio sample """
amine@2 165 return self.sample_width
amine@67 166
amine@70 167 @property
amine@70 168 def sample_width(self):
amine@70 169 """ Number of bytes used to represent one audio sample """
amine@70 170 return self._sample_width
amine@70 171
amine@72 172 @property
amine@72 173 def sw(self):
amine@72 174 """ Number of bytes used to represent one audio sample """
amine@72 175 return self._sample_width
amine@72 176
amine@2 177 def get_channels(self):
amine@2 178 """ Return the number of channels of this audio source """
amine@2 179 return self.channels
amine@2 180
amine@70 181 @property
amine@70 182 def channels(self):
amine@70 183 """ Number of channels of this audio source """
amine@70 184 return self._channels
amine@70 185
amine@72 186 @property
amine@72 187 def ch(self):
amine@72 188 """ Return the number of channels of this audio source """
amine@72 189 return self.channels
amine@72 190
amine@2 191
amine@2 192 class Rewindable():
amine@2 193 """
amine@2 194 Base class for rewindable audio streams.
amine@2 195 Subclasses should implement methods to return to the beginning of an
amine@2 196 audio stream as well as method to move to an absolute audio position
amine@2 197 expressed in time or in number of samples.
amine@32 198 """
amine@67 199
amine@32 200 __metaclass__ = ABCMeta
amine@67 201
amine@2 202 @abstractmethod
amine@2 203 def rewind(self):
amine@2 204 """ Go back to the beginning of audio stream """
amine@2 205 pass
amine@67 206
amine@2 207 @abstractmethod
amine@2 208 def get_position(self):
amine@2 209 """ Return the total number of already read samples """
amine@67 210
amine@2 211 @abstractmethod
amine@2 212 def get_time_position(self):
amine@2 213 """ Return the total duration in seconds of already read data """
amine@67 214
amine@2 215 @abstractmethod
amine@2 216 def set_position(self, position):
amine@2 217 """ Move to an absolute position
amine@67 218
amine@32 219 :Parameters:
amine@67 220
amine@32 221 `position` : int
amine@32 222 number of samples to skip from the start of the stream
amine@2 223 """
amine@67 224
amine@2 225 @abstractmethod
amine@2 226 def set_time_position(self, time_position):
amine@2 227 """ Move to an absolute position expressed in seconds
amine@67 228
amine@32 229 :Parameters:
amine@67 230
amine@32 231 `time_position` : float
amine@32 232 seconds to skip from the start of the stream
amine@2 233 """
amine@2 234 pass
amine@48 235
amine@2 236
amine@2 237 class BufferAudioSource(AudioSource, Rewindable):
amine@2 238 """
amine@32 239 An :class:`AudioSource` that encapsulates and reads data from a memory buffer.
amine@32 240 It implements methods from :class:`Rewindable` and is therefore a navigable :class:`AudioSource`.
amine@2 241 """
amine@67 242
amine@2 243 def __init__(self, data_buffer,
amine@67 244 sampling_rate=DEFAULT_SAMPLE_RATE,
amine@67 245 sample_width=DEFAULT_SAMPLE_WIDTH,
amine@67 246 channels=DEFAULT_NB_CHANNELS):
amine@94 247 AudioSource.__init__(self, sampling_rate, sample_width, channels)
amine@90 248 check_audio_data(data_buffer, sample_width, channels)
amine@2 249 self._buffer = data_buffer
amine@94 250 self._sample_size_all_channels = sample_width * channels
amine@94 251 self._current_position_bytes = 0
amine@2 252 self._is_open = False
amine@67 253
amine@2 254 def is_open(self):
amine@2 255 return self._is_open
amine@67 256
amine@2 257 def open(self):
amine@2 258 self._is_open = True
amine@67 259
amine@2 260 def close(self):
amine@2 261 self._is_open = False
amine@2 262 self.rewind()
amine@67 263
amine@10 264 def read(self, size):
amine@2 265 if not self._is_open:
amine@94 266 raise AudioIOError("Stream is not open")
amine@94 267 bytes_to_read = self._sample_size_all_channels * size
amine@94 268 data = self._buffer[self._current_position_bytes: self._current_position_bytes + bytes_to_read]
amine@94 269 if data:
amine@94 270 self._current_position_bytes += len(data)
amine@2 271 return data
amine@2 272 return None
amine@67 273
amine@2 274 def get_data_buffer(self):
amine@2 275 """ Return all audio data as one string buffer. """
amine@2 276 return self._buffer
amine@67 277
amine@2 278 def set_data(self, data_buffer):
amine@2 279 """ Set new data for this audio stream.
amine@67 280
amine@32 281 :Parameters:
amine@67 282
amine@32 283 `data_buffer` : str, basestring, Bytes
amine@32 284 a string buffer with a length multiple of (sample_width * channels)
amine@2 285 """
amine@90 286 check_audio_data(data_buffer, self.sample_width, self.channels)
amine@2 287 self._buffer = data_buffer
amine@94 288 self._current_position_bytes = 0
amine@67 289
amine@2 290 def append_data(self, data_buffer):
amine@2 291 """ Append data to this audio stream
amine@67 292
amine@32 293 :Parameters:
amine@67 294
amine@32 295 `data_buffer` : str, basestring, Bytes
amine@32 296 a buffer with a length multiple of (sample_width * channels)
amine@2 297 """
amine@90 298 check_audio_data(data_buffer, self.sample_width, self.channels)
amine@2 299 self._buffer += data_buffer
amine@2 300
amine@2 301 def rewind(self):
amine@2 302 self.set_position(0)
amine@67 303
amine@2 304 def get_position(self):
amine@94 305 return self._current_position_bytes / self._sample_size_all_channels
amine@67 306
amine@2 307 def get_time_position(self):
amine@94 308 return float(self._current_position_bytes) / (self._sample_size_all_channels * self.sampling_rate)
amine@67 309
amine@2 310 def set_position(self, position):
amine@2 311 if position < 0:
amine@2 312 raise ValueError("position must be >= 0")
amine@94 313 position *= self._sample_size_all_channels
amine@94 314 self._current_position_bytes = position if position < len(self._buffer) else len(self._buffer)
amine@2 315
amine@67 316 def set_time_position(self, time_position): # time in seconds
amine@2 317 position = int(self.sampling_rate * time_position)
amine@2 318 self.set_position(position)
amine@2 319
amine@48 320
amine@2 321 class WaveAudioSource(AudioSource):
amine@32 322 """
amine@32 323 A class for an `AudioSource` that reads data from a wave file.
amine@67 324
amine@32 325 :Parameters:
amine@67 326
amine@32 327 `filename` :
amine@32 328 path to a valid wave file
amine@32 329 """
amine@67 330
amine@2 331 def __init__(self, filename):
amine@67 332
amine@2 333 self._filename = filename
amine@2 334 self._audio_stream = None
amine@67 335
amine@2 336 stream = wave.open(self._filename)
amine@2 337 AudioSource.__init__(self, stream.getframerate(),
amine@67 338 stream.getsampwidth(),
amine@67 339 stream.getnchannels())
amine@2 340 stream.close()
amine@67 341
amine@2 342 def is_open(self):
amine@2 343 return self._audio_stream is not None
amine@67 344
amine@2 345 def open(self):
amine@2 346 if(self._audio_stream is None):
amine@2 347 self._audio_stream = wave.open(self._filename)
amine@67 348
amine@2 349 def close(self):
amine@2 350 if self._audio_stream is not None:
amine@2 351 self._audio_stream.close()
amine@2 352 self._audio_stream = None
amine@67 353
amine@2 354 def read(self, size):
amine@2 355 if self._audio_stream is None:
amine@2 356 raise IOError("Stream is not open")
amine@2 357 else:
amine@2 358 data = self._audio_stream.readframes(size)
amine@2 359 if data is None or len(data) < 1:
amine@2 360 return None
amine@2 361 return data
amine@2 362
amine@2 363
amine@2 364 class PyAudioSource(AudioSource):
amine@32 365 """
amine@32 366 A class for an `AudioSource` that reads data the built-in microphone using PyAudio.
amine@32 367 """
amine@67 368
amine@67 369 def __init__(self, sampling_rate=DEFAULT_SAMPLE_RATE,
amine@67 370 sample_width=DEFAULT_SAMPLE_WIDTH,
amine@67 371 channels=DEFAULT_NB_CHANNELS,
mathieu@79 372 frames_per_buffer=1024,
mathieu@79 373 input_device_index=None):
amine@67 374
amine@2 375 AudioSource.__init__(self, sampling_rate, sample_width, channels)
amine@2 376 self._chunk_size = frames_per_buffer
mathieu@79 377 self.input_device_index = input_device_index
amine@67 378
amine@2 379 import pyaudio
amine@2 380 self._pyaudio_object = pyaudio.PyAudio()
amine@67 381 self._pyaudio_format = self._pyaudio_object.get_format_from_width(self.sample_width)
amine@2 382 self._audio_stream = None
amine@2 383
amine@2 384 def is_open(self):
amine@2 385 return self._audio_stream is not None
amine@67 386
amine@2 387 def open(self):
amine@67 388 self._audio_stream = self._pyaudio_object.open(format=self._pyaudio_format,
amine@67 389 channels=self.channels,
amine@67 390 rate=self.sampling_rate,
amine@67 391 input=True,
amine@67 392 output=False,
mathieu@79 393 input_device_index=self.input_device_index,
amine@67 394 frames_per_buffer=self._chunk_size)
amine@67 395
amine@2 396 def close(self):
amine@2 397 if self._audio_stream is not None:
amine@2 398 self._audio_stream.stop_stream()
amine@2 399 self._audio_stream.close()
amine@2 400 self._audio_stream = None
amine@67 401
amine@2 402 def read(self, size):
amine@2 403 if self._audio_stream is None:
amine@2 404 raise IOError("Stream is not open")
amine@67 405
amine@2 406 if self._audio_stream.is_active():
amine@2 407 data = self._audio_stream.read(size)
amine@2 408 if data is None or len(data) < 1:
amine@2 409 return None
amine@2 410 return data
amine@67 411
amine@2 412 return None
amine@67 413
amine@2 414
amine@10 415 class StdinAudioSource(AudioSource):
amine@32 416 """
amine@32 417 A class for an :class:`AudioSource` that reads data from standard input.
amine@32 418 """
amine@67 419
amine@67 420 def __init__(self, sampling_rate=DEFAULT_SAMPLE_RATE,
amine@67 421 sample_width=DEFAULT_SAMPLE_WIDTH,
amine@67 422 channels=DEFAULT_NB_CHANNELS):
amine@67 423
amine@10 424 AudioSource.__init__(self, sampling_rate, sample_width, channels)
amine@10 425 self._is_open = False
amine@67 426
amine@10 427 def is_open(self):
amine@10 428 return self._is_open
amine@67 429
amine@10 430 def open(self):
amine@10 431 self._is_open = True
amine@67 432
amine@10 433 def close(self):
amine@10 434 self._is_open = False
amine@67 435
amine@10 436 def read(self, size):
amine@10 437 if not self._is_open:
amine@10 438 raise IOError("Stream is not open")
amine@67 439
amine@10 440 to_read = size * self.sample_width * self.channels
pete@74 441 if sys.version_info >= (3, 0):
pete@74 442 data = sys.stdin.buffer.read(to_read)
pete@74 443 else:
pete@74 444 data = sys.stdin.read(to_read)
amine@67 445
amine@10 446 if data is None or len(data) < 1:
amine@10 447 return None
amine@67 448
amine@10 449 return data
amine@67 450
amine@67 451
amine@2 452 class PyAudioPlayer():
amine@32 453 """
amine@32 454 A class for audio playback using Pyaudio
amine@32 455 """
amine@67 456
amine@67 457 def __init__(self, sampling_rate=DEFAULT_SAMPLE_RATE,
amine@67 458 sample_width=DEFAULT_SAMPLE_WIDTH,
amine@67 459 channels=DEFAULT_NB_CHANNELS):
amine@2 460 if not sample_width in (1, 2, 4):
amine@2 461 raise ValueError("Sample width must be one of: 1, 2 or 4 (bytes)")
amine@67 462
amine@2 463 self.sampling_rate = sampling_rate
amine@2 464 self.sample_width = sample_width
amine@2 465 self.channels = channels
amine@67 466
amine@2 467 import pyaudio
amine@2 468 self._p = pyaudio.PyAudio()
amine@67 469 self.stream = self._p.open(format=self._p.get_format_from_width(self.sample_width),
amine@67 470 channels=self.channels, rate=self.sampling_rate,
amine@67 471 input=False, output=True)
amine@67 472
amine@2 473 def play(self, data):
amine@2 474 if self.stream.is_stopped():
amine@2 475 self.stream.start_stream()
amine@67 476
amine@10 477 for chunk in self._chunk_data(data):
amine@10 478 self.stream.write(chunk)
amine@67 479
amine@2 480 self.stream.stop_stream()
amine@67 481
amine@67 482 def stop(self):
amine@2 483 if not self.stream.is_stopped():
amine@2 484 self.stream.stop_stream()
amine@2 485 self.stream.close()
amine@2 486 self._p.terminate()
amine@67 487
amine@10 488 def _chunk_data(self, data):
amine@10 489 # make audio chunks of 100 ms to allow interruption (like ctrl+c)
amine@10 490 chunk_size = int((self.sampling_rate * self.sample_width * self.channels) / 10)
amine@10 491 start = 0
amine@10 492 while start < len(data):
amine@67 493 yield data[start: start + chunk_size]
amine@10 494 start += chunk_size
amine@67 495
amine@2 496
amine@99 497 def _save_raw(filename, data):
amine@99 498 """
amine@99 499 Save audio data as a headerless (i.e. raw) file.
amine@99 500 """
amine@99 501 with open(filename, "wb") as fp:
amine@99 502 fp.write(data)
amine@99 503
amine@99 504
amine@98 505 def _save_wave(filename, data, sampling_rate, sample_width, channels):
amine@98 506 """
amine@98 507 Save audio data to a wave file.
amine@98 508 """
amine@98 509 # use standard python's wave module
amine@98 510 with wave.open(filename, "w") as fp:
amine@98 511 fp.setframerate(sampling_rate)
amine@98 512 fp.setsampwidth(sample_width)
amine@98 513 fp.setnchannels(channels)
amine@98 514 fp.writeframes(data)
amine@98 515
amine@98 516
amine@2 517 def from_file(filename):
amine@2 518 """
amine@2 519 Create an `AudioSource` object using the audio file specified by `filename`.
amine@48 520 The appropriate :class:`AudioSource` class is guessed from file's extension.
amine@67 521
amine@32 522 :Parameters:
amine@67 523
amine@32 524 `filename` :
amine@32 525 path to an audio file.
amine@67 526
amine@32 527 :Returns:
amine@67 528
amine@32 529 an `AudioSource` object that reads data from the given file.
amine@2 530 """
amine@67 531
amine@2 532 if filename.lower().endswith(".wav"):
amine@2 533 return WaveAudioSource(filename)
amine@67 534
amine@67 535 raise Exception("Can not create an AudioSource object from '%s'" % (filename))
amine@2 536
amine@2 537
amine@2 538 def player_for(audio_source):
amine@2 539 """
amine@32 540 Return a :class:`PyAudioPlayer` that can play data from `audio_source`.
amine@67 541
amine@32 542 :Parameters:
amine@67 543
amine@32 544 `audio_source` :
amine@32 545 an `AudioSource` object.
amine@67 546
amine@32 547 :Returns:
amine@67 548
amine@32 549 `PyAudioPlayer` that has the same sampling rate, sample width and number of channels
amine@32 550 as `audio_source`.
amine@2 551 """
amine@67 552
amine@2 553 return PyAudioPlayer(audio_source.get_sampling_rate(),
amine@67 554 audio_source.get_sample_width(),
amine@67 555 audio_source.get_channels())