annotate auditok/io.py @ 159:3439ba35aba0

Normalize user_channel in _FileAudioSource and make it a property
author Amine Sehili <amine.sehili@gmail.com>
date Tue, 26 Feb 2019 20:20:18 +0100
parents 6ed3a1eea98d
children 8591a92127de
rev   line source
amine@2 1 """
amine@33 2 Module for low-level audio input-output operations.
amine@2 3
amine@32 4 Class summary
amine@32 5 =============
amine@32 6
amine@32 7 .. autosummary::
amine@32 8
amine@32 9 AudioSource
amine@32 10 Rewindable
amine@32 11 BufferAudioSource
amine@32 12 WaveAudioSource
amine@32 13 PyAudioSource
amine@32 14 StdinAudioSource
amine@32 15 PyAudioPlayer
amine@32 16
amine@32 17
amine@32 18 Function summary
amine@32 19 ================
amine@32 20
amine@32 21 .. autosummary::
amine@32 22
amine@32 23 from_file
amine@32 24 player_for
amine@2 25 """
amine@103 26 import os
amine@103 27 import sys
amine@2 28 import wave
amine@117 29 import audioop
amine@116 30 from array import array
amine@153 31 from functools import partial
amine@116 32
amine@116 33 if sys.version_info >= (3, 0):
amine@116 34 PYTHON_3 = True
amine@116 35 else:
amine@116 36 PYTHON_3 = False
amine@2 37
amine@104 38 try:
amine@104 39 from pydub import AudioSegment
amine@112 40
amine@104 41 _WITH_PYDUB = True
amine@104 42 except ImportError:
amine@104 43 _WITH_PYDUB = False
amine@104 44
amine@112 45 __all__ = [
amine@112 46 "AudioIOError",
amine@112 47 "AudioParameterError",
amine@112 48 "AudioSource",
amine@112 49 "Rewindable",
amine@112 50 "BufferAudioSource",
amine@112 51 "WaveAudioSource",
amine@112 52 "PyAudioSource",
amine@112 53 "StdinAudioSource",
amine@112 54 "PyAudioPlayer",
amine@112 55 "from_file",
amine@112 56 "player_for",
amine@112 57 ]
amine@2 58
amine@2 59 DEFAULT_SAMPLE_RATE = 16000
amine@2 60 DEFAULT_SAMPLE_WIDTH = 2
amine@2 61 DEFAULT_NB_CHANNELS = 1
amine@112 62 DATA_FORMAT = {1: "b", 2: "h", 4: "i"}
amine@112 63
amine@2 64
amine@89 65 class AudioIOError(Exception):
amine@89 66 pass
amine@89 67
amine@89 68
amine@89 69 class AudioParameterError(AudioIOError):
amine@89 70 pass
amine@89 71
amine@2 72
amine@90 73 def check_audio_data(data, sample_width, channels):
amine@90 74 sample_size_bytes = int(sample_width * channels)
amine@90 75 nb_samples = len(data) // sample_size_bytes
amine@90 76 if nb_samples * sample_size_bytes != len(data):
amine@112 77 raise AudioParameterError(
amine@112 78 "The length of audio data must be an integer "
amine@112 79 "multiple of `sample_width * channels`"
amine@112 80 )
amine@90 81
amine@90 82
amine@100 83 def _guess_audio_format(fmt, filename):
amine@100 84 if fmt is None:
amine@100 85 extension = os.path.splitext(filename.lower())[1][1:]
amine@100 86 return extension if extension else None
amine@100 87 return fmt.lower()
amine@100 88
amine@100 89
amine@101 90 def _normalize_use_channel(use_channel):
amine@101 91 """
amine@101 92 Returns a value of `use_channel` as expected by audio read/write fuctions.
amine@101 93 If `use_channel` is `None`, returns 0. If it's an integer, or the special
amine@101 94 str 'mix' returns it as is. If it's `left` or `right` returns 0 or 1
amine@101 95 respectively.
amine@101 96 """
amine@101 97 if use_channel is None:
amine@101 98 return 0
amine@101 99 if use_channel == "mix" or isinstance(use_channel, int):
amine@101 100 return use_channel
amine@101 101 try:
amine@101 102 return ["left", "right"].index(use_channel)
amine@101 103 except ValueError:
amine@101 104 err_message = "'use_channel' parameter must be an integer "
amine@101 105 "or one of ('left', 'right', 'mix'), found: '{}'".format(use_channel)
amine@101 106 raise AudioParameterError(err_message)
amine@101 107
amine@101 108
amine@102 109 def _get_audio_parameters(param_dict):
amine@102 110 """
amine@102 111 Gets audio parameters from a dictionary of parameters.
amine@102 112 A parameter can have a long name or a short name. If the long name is
amine@102 113 present, the short name is ignored. In neither is present then
amine@102 114 `AudioParameterError` is raised except for the `use_channel` (or `uc`)
amine@102 115 parameter for which a defalut value of 0 is returned.
amine@102 116
amine@102 117 Also raises `AudioParameterError` if sampling rate, sample width or
amine@102 118 channels is not an integer.
amine@102 119
amine@102 120 Expected parameters are:
amine@102 121
amine@102 122 `sampling_rate`, `sr`: int, sampling rate.
amine@102 123 `sample_width`, `sw`: int, sample size in bytes.
amine@102 124 `channels`, `ch`: int, number of channels.
amine@102 125 `use_channel`, `us`: int or str, which channel to use from data.
amine@102 126 Default value is 0 (first channel). The following special str
amine@102 127 values are also accepted:
amine@102 128 `left`: alias for 0
amine@102 129 `right`: alias for 1
amine@102 130 `mix`: indicates that all channels should be mixed up into one
amine@102 131 single channel
amine@102 132
amine@102 133 :Returns
amine@102 134
amine@102 135 param_dict: tuple
amine@102 136 audio parameters as a tuple (sampling_rate,
amine@102 137 sample_width,
amine@102 138 channels,
amine@102 139 use_channel)
amine@102 140 """
amine@146 141 err_message = (
amine@146 142 "'{ln}' (or '{sn}') must be a positive integer, found: '{val}'"
amine@146 143 )
amine@102 144 parameters = []
amine@112 145 for (long_name, short_name) in (
amine@112 146 ("sampling_rate", "sr"),
amine@112 147 ("sample_width", "sw"),
amine@112 148 ("channels", "ch"),
amine@112 149 ):
amine@145 150 param = param_dict.get(long_name, param_dict.get(short_name))
amine@146 151 if param is None or not isinstance(param, int) or param <= 0:
amine@112 152 raise AudioParameterError(
amine@112 153 err_message.format(ln=long_name, sn=short_name, val=param)
amine@112 154 )
amine@102 155 parameters.append(param)
amine@147 156 sampling_rate, sample_width, channels = parameters
amine@102 157 use_channel = param_dict.get("use_channel", param_dict.get("uc", 0))
amine@147 158 use_channel = _normalize_use_channel(use_channel)
amine@147 159 return sampling_rate, sample_width, channels, use_channel
amine@102 160
amine@102 161
amine@116 162 def _array_to_bytes(a):
amine@116 163 """
amine@116 164 Converts an `array.array` to `bytes`.
amine@116 165 """
amine@116 166 if PYTHON_3:
amine@116 167 return a.tobytes()
amine@116 168 else:
amine@116 169 return a.tostring()
amine@116 170
amine@116 171
amine@117 172 def _mix_audio_channels(data, channels, sample_width):
amine@117 173 if channels == 1:
amine@117 174 return data
amine@117 175 if channels == 2:
amine@117 176 return audioop.tomono(data, sample_width, 0.5, 0.5)
amine@117 177 fmt = DATA_FORMAT[sample_width]
amine@117 178 buffer = array(fmt, data)
amine@117 179 mono_channels = [
amine@117 180 array(fmt, buffer[ch::channels]) for ch in range(channels)
amine@117 181 ]
amine@117 182 avg_arr = array(
amine@117 183 fmt, (sum(samples) // channels for samples in zip(*mono_channels))
amine@117 184 )
amine@117 185 return _array_to_bytes(avg_arr)
amine@117 186
amine@117 187
amine@116 188 def _extract_selected_channel(data, channels, sample_width, use_channel):
amine@116 189 if use_channel == "mix":
amine@116 190 return _mix_audio_channels(data, channels, sample_width)
amine@116 191 elif use_channel >= channels or use_channel < -channels:
amine@116 192 err_message = "use_channel == {} but audio data has only {} channel{}."
amine@116 193 err_message += " Selected channel must be 'mix' or an integer >= "
amine@116 194 err_message += "-channels and < channels"
amine@116 195 err_message = err_message.format(
amine@116 196 use_channel, channels, "s" if channels > 1 else ""
amine@116 197 )
amine@116 198 raise AudioParameterError(err_message)
amine@119 199 elif use_channel < 0:
amine@119 200 use_channel += channels
amine@116 201 fmt = DATA_FORMAT[sample_width]
amine@116 202 buffer = array(fmt, data)
amine@116 203 return _array_to_bytes(buffer[use_channel::channels])
amine@116 204
amine@116 205
amine@153 206 class AudioSource:
amine@2 207 """
amine@32 208 Base class for audio source objects.
amine@67 209
amine@2 210 Subclasses should implement methods to open/close and audio stream
amine@2 211 and read the desired amount of audio samples.
amine@67 212
amine@32 213 :Parameters:
amine@67 214
amine@32 215 `sampling_rate` : int
amine@32 216 Number of samples per second of audio stream. Default = 16000.
amine@67 217
amine@32 218 `sample_width` : int
amine@32 219 Size in bytes of one audio sample. Possible values : 1, 2, 4.
amine@32 220 Default = 2.
amine@67 221
amine@32 222 `channels` : int
amine@150 223 Number of channels of audio stream.
amine@2 224 """
amine@67 225
amine@153 226 def __init__(
amine@153 227 self,
amine@153 228 sampling_rate=DEFAULT_SAMPLE_RATE,
amine@153 229 sample_width=DEFAULT_SAMPLE_WIDTH,
amine@153 230 channels=DEFAULT_NB_CHANNELS,
amine@153 231 ):
amine@2 232
amine@150 233 if not sample_width in (1, 2, 4):
amine@153 234 raise AudioParameterError(
amine@153 235 "Sample width must be one of: 1, 2 or 4 (bytes)"
amine@153 236 )
amine@67 237
amine@70 238 self._sampling_rate = sampling_rate
amine@70 239 self._sample_width = sample_width
amine@70 240 self._channels = channels
amine@67 241
amine@2 242 def is_open(self):
amine@2 243 """ Return True if audio source is open, False otherwise """
amine@150 244 raise NotImplementedError
amine@67 245
amine@2 246 def open(self):
amine@2 247 """ Open audio source """
amine@150 248 raise NotImplementedError
amine@67 249
amine@2 250 def close(self):
amine@2 251 """ Close audio source """
amine@150 252 raise NotImplementedError
amine@67 253
amine@2 254 def read(self, size):
amine@2 255 """
amine@2 256 Read and return `size` audio samples at most.
amine@67 257
amine@32 258 :Parameters:
amine@67 259
amine@32 260 `size` : int
amine@32 261 the number of samples to read.
amine@67 262
amine@32 263 :Returns:
amine@67 264
amine@150 265 Audio data as a string of length `N * sample_width * channels`,
amine@150 266 where `N` is:
amine@67 267
amine@32 268 - `size` if `size` < 'left_samples'
amine@67 269
amine@32 270 - 'left_samples' if `size` > 'left_samples'
amine@67 271 """
amine@150 272 raise NotImplementedError
amine@67 273
amine@2 274 def get_sampling_rate(self):
amine@2 275 """ Return the number of samples per second of audio stream """
amine@2 276 return self.sampling_rate
amine@67 277
amine@70 278 @property
amine@70 279 def sampling_rate(self):
amine@70 280 """ Number of samples per second of audio stream """
amine@70 281 return self._sampling_rate
amine@70 282
amine@72 283 @property
amine@72 284 def sr(self):
amine@72 285 """ Number of samples per second of audio stream """
amine@72 286 return self._sampling_rate
amine@72 287
amine@2 288 def get_sample_width(self):
amine@2 289 """ Return the number of bytes used to represent one audio sample """
amine@2 290 return self.sample_width
amine@67 291
amine@70 292 @property
amine@70 293 def sample_width(self):
amine@70 294 """ Number of bytes used to represent one audio sample """
amine@70 295 return self._sample_width
amine@70 296
amine@72 297 @property
amine@72 298 def sw(self):
amine@72 299 """ Number of bytes used to represent one audio sample """
amine@72 300 return self._sample_width
amine@72 301
amine@2 302 def get_channels(self):
amine@2 303 """ Return the number of channels of this audio source """
amine@2 304 return self.channels
amine@2 305
amine@70 306 @property
amine@70 307 def channels(self):
amine@70 308 """ Number of channels of this audio source """
amine@70 309 return self._channels
amine@70 310
amine@72 311 @property
amine@72 312 def ch(self):
amine@72 313 """ Return the number of channels of this audio source """
amine@72 314 return self.channels
amine@72 315
amine@2 316
amine@153 317 class Rewindable:
amine@2 318 """
amine@2 319 Base class for rewindable audio streams.
amine@2 320 Subclasses should implement methods to return to the beginning of an
amine@2 321 audio stream as well as method to move to an absolute audio position
amine@2 322 expressed in time or in number of samples.
amine@32 323 """
amine@153 324
amine@151 325 @property
amine@151 326 def rewindable(self):
amine@151 327 return True
amine@67 328
amine@2 329 def rewind(self):
amine@2 330 """ Go back to the beginning of audio stream """
amine@151 331 raise NotImplementedError
amine@67 332
amine@2 333 def get_position(self):
amine@2 334 """ Return the total number of already read samples """
amine@151 335 raise NotImplementedError
amine@67 336
amine@2 337 def get_time_position(self):
amine@2 338 """ Return the total duration in seconds of already read data """
amine@151 339 raise NotImplementedError
amine@67 340
amine@2 341 def set_position(self, position):
amine@2 342 """ Move to an absolute position
amine@67 343
amine@32 344 :Parameters:
amine@67 345
amine@32 346 `position` : int
amine@32 347 number of samples to skip from the start of the stream
amine@2 348 """
amine@151 349 raise NotImplementedError
amine@67 350
amine@2 351 def set_time_position(self, time_position):
amine@2 352 """ Move to an absolute position expressed in seconds
amine@67 353
amine@32 354 :Parameters:
amine@67 355
amine@32 356 `time_position` : float
amine@32 357 seconds to skip from the start of the stream
amine@2 358 """
amine@151 359 raise NotImplementedError
amine@48 360
amine@2 361
amine@2 362 class BufferAudioSource(AudioSource, Rewindable):
amine@2 363 """
amine@32 364 An :class:`AudioSource` that encapsulates and reads data from a memory buffer.
amine@32 365 It implements methods from :class:`Rewindable` and is therefore a navigable :class:`AudioSource`.
amine@2 366 """
amine@67 367
amine@112 368 def __init__(
amine@112 369 self,
amine@112 370 data_buffer,
amine@112 371 sampling_rate=DEFAULT_SAMPLE_RATE,
amine@112 372 sample_width=DEFAULT_SAMPLE_WIDTH,
amine@112 373 channels=DEFAULT_NB_CHANNELS,
amine@112 374 ):
amine@94 375 AudioSource.__init__(self, sampling_rate, sample_width, channels)
amine@90 376 check_audio_data(data_buffer, sample_width, channels)
amine@2 377 self._buffer = data_buffer
amine@94 378 self._sample_size_all_channels = sample_width * channels
amine@94 379 self._current_position_bytes = 0
amine@2 380 self._is_open = False
amine@67 381
amine@2 382 def is_open(self):
amine@2 383 return self._is_open
amine@67 384
amine@2 385 def open(self):
amine@2 386 self._is_open = True
amine@67 387
amine@2 388 def close(self):
amine@2 389 self._is_open = False
amine@2 390 self.rewind()
amine@67 391
amine@10 392 def read(self, size):
amine@2 393 if not self._is_open:
amine@94 394 raise AudioIOError("Stream is not open")
amine@94 395 bytes_to_read = self._sample_size_all_channels * size
amine@112 396 data = self._buffer[
amine@112 397 self._current_position_bytes : self._current_position_bytes
amine@112 398 + bytes_to_read
amine@112 399 ]
amine@94 400 if data:
amine@94 401 self._current_position_bytes += len(data)
amine@2 402 return data
amine@2 403 return None
amine@67 404
amine@2 405 def get_data_buffer(self):
amine@2 406 """ Return all audio data as one string buffer. """
amine@2 407 return self._buffer
amine@67 408
amine@2 409 def set_data(self, data_buffer):
amine@2 410 """ Set new data for this audio stream.
amine@67 411
amine@32 412 :Parameters:
amine@67 413
amine@32 414 `data_buffer` : str, basestring, Bytes
amine@32 415 a string buffer with a length multiple of (sample_width * channels)
amine@2 416 """
amine@90 417 check_audio_data(data_buffer, self.sample_width, self.channels)
amine@2 418 self._buffer = data_buffer
amine@94 419 self._current_position_bytes = 0
amine@67 420
amine@2 421 def append_data(self, data_buffer):
amine@2 422 """ Append data to this audio stream
amine@67 423
amine@32 424 :Parameters:
amine@67 425
amine@32 426 `data_buffer` : str, basestring, Bytes
amine@32 427 a buffer with a length multiple of (sample_width * channels)
amine@2 428 """
amine@90 429 check_audio_data(data_buffer, self.sample_width, self.channels)
amine@2 430 self._buffer += data_buffer
amine@2 431
amine@2 432 def rewind(self):
amine@2 433 self.set_position(0)
amine@67 434
amine@2 435 def get_position(self):
amine@94 436 return self._current_position_bytes / self._sample_size_all_channels
amine@67 437
amine@2 438 def get_time_position(self):
amine@112 439 return float(self._current_position_bytes) / (
amine@112 440 self._sample_size_all_channels * self.sampling_rate
amine@112 441 )
amine@67 442
amine@2 443 def set_position(self, position):
amine@2 444 if position < 0:
amine@2 445 raise ValueError("position must be >= 0")
amine@94 446 position *= self._sample_size_all_channels
amine@112 447 self._current_position_bytes = (
amine@112 448 position if position < len(self._buffer) else len(self._buffer)
amine@112 449 )
amine@2 450
amine@67 451 def set_time_position(self, time_position): # time in seconds
amine@2 452 position = int(self.sampling_rate * time_position)
amine@2 453 self.set_position(position)
amine@2 454
amine@48 455
amine@153 456 class _FileAudioSource(AudioSource):
amine@153 457 def __init__(self, sampling_rate, sample_width, channels, use_channel):
amine@153 458 AudioSource.__init__(self, sampling_rate, sample_width, channels)
amine@153 459 self._audio_stream = None
amine@159 460 self._use_channel = _normalize_use_channel(use_channel)
amine@153 461 if channels > 1:
amine@153 462 self._extract_selected_channel = partial(
amine@153 463 _extract_selected_channel,
amine@153 464 channels=channels,
amine@153 465 sample_width=sample_width,
amine@159 466 use_channel=self._use_channel,
amine@153 467 )
amine@153 468 else:
amine@153 469 self._extract_selected_channel = lambda x: x
amine@153 470
amine@153 471 def __del__(self):
amine@153 472 if self.is_open():
amine@153 473 self.close()
amine@153 474
amine@159 475 @property
amine@159 476 def use_channel(self):
amine@159 477 return self._use_channel
amine@159 478
amine@153 479 def is_open(self):
amine@153 480 return self._audio_stream is not None
amine@153 481
amine@153 482 def close(self):
amine@153 483 if self._audio_stream is not None:
amine@153 484 self._audio_stream.close()
amine@153 485 self._audio_stream = None
amine@153 486
amine@153 487 def _read_from_stream(self, size):
amine@153 488 raise NotImplementedError
amine@153 489
amine@153 490 def read(self, size):
amine@153 491 if not self.is_open():
amine@153 492 raise AudioIOError("Audio stream is not open")
amine@153 493 data = self._read_from_stream(size)
amine@153 494 if data:
amine@153 495 return self._extract_selected_channel(data)
amine@153 496 return None
amine@153 497
amine@153 498
amine@154 499 class RawAudioSource(_FileAudioSource, Rewindable):
amine@154 500 def __init__(
amine@154 501 self, file, sampling_rate, sample_width, channels, use_channel=0
amine@154 502 ):
amine@154 503 _FileAudioSource.__init__(
amine@154 504 self, sampling_rate, sample_width, channels, use_channel
amine@154 505 )
amine@154 506 self._file = file
amine@154 507 self._audio_stream = None
amine@154 508 self._sample_size = sample_width * channels
amine@154 509
amine@154 510 def open(self):
amine@154 511 if self._audio_stream is None:
amine@158 512 self._audio_stream = open(self._file, "rb")
amine@154 513
amine@154 514 def _read_from_stream(self, size):
amine@154 515 bytes_to_read = size * self._sample_size
amine@154 516 data = self._audio_stream.read(bytes_to_read)
amine@154 517 return data
amine@154 518
amine@154 519
amine@155 520 class WaveAudioSource(_FileAudioSource, Rewindable):
amine@32 521 """
amine@32 522 A class for an `AudioSource` that reads data from a wave file.
amine@155 523 This class should be used for large wave files to avoid loading
amine@155 524 the whole data to memory.
amine@67 525
amine@32 526 :Parameters:
amine@67 527
amine@32 528 `filename` :
amine@155 529 path to a valid wave file.
amine@32 530 """
amine@67 531
amine@155 532 def __init__(self, filename, use_channel=0):
amine@2 533 self._filename = filename
amine@2 534 self._audio_stream = None
amine@158 535 stream = wave.open(self._filename, "rb")
amine@155 536 _FileAudioSource.__init__(
amine@112 537 self,
amine@112 538 stream.getframerate(),
amine@112 539 stream.getsampwidth(),
amine@112 540 stream.getnchannels(),
amine@155 541 use_channel,
amine@112 542 )
amine@2 543 stream.close()
amine@67 544
amine@2 545 def open(self):
amine@112 546 if self._audio_stream is None:
amine@2 547 self._audio_stream = wave.open(self._filename)
amine@67 548
amine@155 549 def _read_from_stream(self, size):
amine@155 550 return self._audio_stream.readframes(size)
amine@2 551
amine@2 552
amine@2 553 class PyAudioSource(AudioSource):
amine@32 554 """
amine@32 555 A class for an `AudioSource` that reads data the built-in microphone using PyAudio.
amine@32 556 """
amine@67 557
amine@112 558 def __init__(
amine@112 559 self,
amine@112 560 sampling_rate=DEFAULT_SAMPLE_RATE,
amine@112 561 sample_width=DEFAULT_SAMPLE_WIDTH,
amine@112 562 channels=DEFAULT_NB_CHANNELS,
amine@112 563 frames_per_buffer=1024,
amine@112 564 input_device_index=None,
amine@112 565 ):
amine@67 566
amine@2 567 AudioSource.__init__(self, sampling_rate, sample_width, channels)
amine@2 568 self._chunk_size = frames_per_buffer
mathieu@79 569 self.input_device_index = input_device_index
amine@67 570
amine@2 571 import pyaudio
amine@112 572
amine@2 573 self._pyaudio_object = pyaudio.PyAudio()
amine@112 574 self._pyaudio_format = self._pyaudio_object.get_format_from_width(
amine@112 575 self.sample_width
amine@112 576 )
amine@2 577 self._audio_stream = None
amine@2 578
amine@2 579 def is_open(self):
amine@2 580 return self._audio_stream is not None
amine@67 581
amine@2 582 def open(self):
amine@112 583 self._audio_stream = self._pyaudio_object.open(
amine@112 584 format=self._pyaudio_format,
amine@112 585 channels=self.channels,
amine@112 586 rate=self.sampling_rate,
amine@112 587 input=True,
amine@112 588 output=False,
amine@112 589 input_device_index=self.input_device_index,
amine@112 590 frames_per_buffer=self._chunk_size,
amine@112 591 )
amine@67 592
amine@2 593 def close(self):
amine@2 594 if self._audio_stream is not None:
amine@2 595 self._audio_stream.stop_stream()
amine@2 596 self._audio_stream.close()
amine@2 597 self._audio_stream = None
amine@67 598
amine@2 599 def read(self, size):
amine@2 600 if self._audio_stream is None:
amine@2 601 raise IOError("Stream is not open")
amine@67 602
amine@2 603 if self._audio_stream.is_active():
amine@2 604 data = self._audio_stream.read(size)
amine@2 605 if data is None or len(data) < 1:
amine@2 606 return None
amine@2 607 return data
amine@67 608
amine@2 609 return None
amine@67 610
amine@2 611
amine@156 612 class StdinAudioSource(_FileAudioSource):
amine@32 613 """
amine@32 614 A class for an :class:`AudioSource` that reads data from standard input.
amine@32 615 """
amine@67 616
amine@112 617 def __init__(
amine@112 618 self,
amine@112 619 sampling_rate=DEFAULT_SAMPLE_RATE,
amine@112 620 sample_width=DEFAULT_SAMPLE_WIDTH,
amine@112 621 channels=DEFAULT_NB_CHANNELS,
amine@156 622 use_channel=0,
amine@112 623 ):
amine@67 624
amine@156 625 _FileAudioSource.__init__(
amine@156 626 self, sampling_rate, sample_width, channels, use_channel
amine@156 627 )
amine@10 628 self._is_open = False
amine@156 629 self._sample_size = sample_width * channels
amine@156 630 if PYTHON_3:
amine@156 631 self._stream = sys.stdin.buffer
amine@156 632 else:
amine@156 633 self._stream = sys.stdin
amine@67 634
amine@10 635 def is_open(self):
amine@10 636 return self._is_open
amine@67 637
amine@10 638 def open(self):
amine@10 639 self._is_open = True
amine@67 640
amine@10 641 def close(self):
amine@10 642 self._is_open = False
amine@67 643
amine@156 644 def _read_from_stream(self, size):
amine@156 645 bytes_to_read = size * self._sample_size
amine@156 646 data = self._stream.read(bytes_to_read)
amine@156 647 if data:
amine@156 648 return data
amine@156 649 return None
amine@67 650
amine@67 651
amine@112 652 class PyAudioPlayer:
amine@32 653 """
amine@32 654 A class for audio playback using Pyaudio
amine@32 655 """
amine@67 656
amine@112 657 def __init__(
amine@112 658 self,
amine@112 659 sampling_rate=DEFAULT_SAMPLE_RATE,
amine@112 660 sample_width=DEFAULT_SAMPLE_WIDTH,
amine@112 661 channels=DEFAULT_NB_CHANNELS,
amine@112 662 ):
amine@2 663 if not sample_width in (1, 2, 4):
amine@2 664 raise ValueError("Sample width must be one of: 1, 2 or 4 (bytes)")
amine@67 665
amine@2 666 self.sampling_rate = sampling_rate
amine@2 667 self.sample_width = sample_width
amine@2 668 self.channels = channels
amine@67 669
amine@2 670 import pyaudio
amine@112 671
amine@2 672 self._p = pyaudio.PyAudio()
amine@112 673 self.stream = self._p.open(
amine@112 674 format=self._p.get_format_from_width(self.sample_width),
amine@112 675 channels=self.channels,
amine@112 676 rate=self.sampling_rate,
amine@112 677 input=False,
amine@112 678 output=True,
amine@112 679 )
amine@67 680
amine@2 681 def play(self, data):
amine@2 682 if self.stream.is_stopped():
amine@2 683 self.stream.start_stream()
amine@67 684
amine@10 685 for chunk in self._chunk_data(data):
amine@10 686 self.stream.write(chunk)
amine@67 687
amine@2 688 self.stream.stop_stream()
amine@67 689
amine@67 690 def stop(self):
amine@2 691 if not self.stream.is_stopped():
amine@2 692 self.stream.stop_stream()
amine@2 693 self.stream.close()
amine@2 694 self._p.terminate()
amine@67 695
amine@10 696 def _chunk_data(self, data):
amine@10 697 # make audio chunks of 100 ms to allow interruption (like ctrl+c)
amine@112 698 chunk_size = int(
amine@112 699 (self.sampling_rate * self.sample_width * self.channels) / 10
amine@112 700 )
amine@10 701 start = 0
amine@10 702 while start < len(data):
amine@112 703 yield data[start : start + chunk_size]
amine@10 704 start += chunk_size
amine@67 705
amine@2 706
amine@112 707 def player_for(audio_source):
amine@112 708 """
amine@112 709 Return a :class:`PyAudioPlayer` that can play data from `audio_source`.
amine@112 710
amine@112 711 :Parameters:
amine@112 712
amine@112 713 `audio_source` :
amine@112 714 an `AudioSource` object.
amine@112 715
amine@112 716 :Returns:
amine@112 717
amine@112 718 `PyAudioPlayer` that has the same sampling rate, sample width and number of channels
amine@112 719 as `audio_source`.
amine@112 720 """
amine@112 721
amine@112 722 return PyAudioPlayer(
amine@112 723 audio_source.get_sampling_rate(),
amine@112 724 audio_source.get_sample_width(),
amine@112 725 audio_source.get_channels(),
amine@112 726 )
amine@112 727
amine@112 728
amine@112 729 def _load_raw(
amine@112 730 file,
amine@112 731 sampling_rate,
amine@112 732 sample_width,
amine@112 733 channels,
amine@112 734 use_channel=0,
amine@112 735 large_file=False,
amine@112 736 ):
amine@112 737 """
amine@112 738 Load a raw audio file with standard Python.
amine@112 739 If `large_file` is True, audio data will be lazily
amine@112 740 loaded to memory.
amine@112 741
amine@112 742 See also :func:`from_file`.
amine@112 743
amine@112 744 :Parameters:
amine@112 745 `file` : filelike object or str
amine@112 746 raw audio file to open
amine@112 747 `sampling_rate`: int
amine@112 748 sampling rate of audio data
amine@112 749 `sample_width`: int
amine@112 750 sample width of audio data
amine@112 751 `channels`: int
amine@112 752 number of channels of audio data
amine@112 753 `use_channel`: int
amine@112 754 audio channel to read if file is not mono audio. This must be an integer
amine@112 755 0 >= and < channels, or one of 'left' (treated as 0 or first channel), or
amine@112 756 right (treated as 1 or second channels).
amine@112 757
amine@112 758 :Returns:
amine@112 759
amine@112 760 `PyAudioPlayer` that has the same sampling rate, sample width and number of channels
amine@112 761 as `audio_source`.
amine@112 762 """
amine@112 763 if None in (sampling_rate, sample_width, channels):
amine@112 764 raise AudioParameterError(
amine@112 765 "All audio parameters are required for raw audio files"
amine@112 766 )
amine@112 767
amine@112 768 if large_file:
amine@112 769 return RawAudioSource(
amine@112 770 file,
amine@112 771 sampling_rate=sampling_rate,
amine@112 772 sample_width=sample_width,
amine@112 773 channels=channels,
amine@112 774 use_channel=use_channel,
amine@112 775 )
amine@112 776 else:
amine@112 777 with open(file, "rb") as fp:
amine@112 778 data = fp.read()
amine@112 779 if channels != 1:
amine@112 780 # TODO check if striding with mmap doesn't load all data to memory
amine@112 781 data = _extract_selected_channel(
amine@112 782 data, channels, sample_width, use_channel
amine@112 783 )
amine@112 784 return BufferAudioSource(
amine@112 785 data,
amine@112 786 sampling_rate=sampling_rate,
amine@112 787 sample_width=sample_width,
amine@112 788 channels=1,
amine@112 789 )
amine@112 790
amine@112 791
amine@113 792 def _load_wave(filename, large_file=False, use_channel=0):
amine@113 793 """
amine@113 794 Load a wave audio file with standard Python.
amine@113 795 If `large_file` is True, audio data will be lazily
amine@113 796 loaded to memory.
amine@113 797
amine@113 798 See also :func:`to_file`.
amine@113 799 """
amine@113 800 if large_file:
amine@113 801 return WaveAudioSource(filename, use_channel)
amine@113 802 with wave.open(filename) as fp:
amine@113 803 channels = fp.getnchannels()
amine@113 804 srate = fp.getframerate()
amine@113 805 swidth = fp.getsampwidth()
amine@113 806 data = fp.readframes(-1)
amine@113 807 if channels > 1:
amine@113 808 data = _extract_selected_channel(data, channels, swidth, use_channel)
amine@113 809 return BufferAudioSource(
amine@113 810 data, sampling_rate=srate, sample_width=swidth, channels=1
amine@113 811 )
amine@113 812
amine@113 813
amine@114 814 def _load_with_pydub(filename, audio_format, use_channel=0):
amine@114 815 """Open compressed audio file using pydub. If a video file
amine@114 816 is passed, its audio track(s) are extracted and loaded.
amine@114 817 This function should not be called directely, use :func:`from_file`
amine@114 818 instead.
amine@114 819
amine@114 820 :Parameters:
amine@114 821
amine@114 822 `filename`:
amine@114 823 path to audio file.
amine@114 824 `audio_format`:
amine@114 825 string, audio file format (e.g. raw, webm, wav, ogg)
amine@114 826 """
amine@114 827 func_dict = {
amine@114 828 "mp3": AudioSegment.from_mp3,
amine@114 829 "ogg": AudioSegment.from_ogg,
amine@114 830 "flv": AudioSegment.from_flv,
amine@114 831 }
amine@114 832 open_function = func_dict.get(audio_format, AudioSegment.from_file)
amine@114 833 segment = open_function(filename)
amine@114 834 data = segment._data
amine@114 835 if segment.channels > 1:
amine@114 836 data = _extract_selected_channel(
amine@114 837 data, segment.channels, segment.sample_width, use_channel
amine@114 838 )
amine@114 839 return BufferAudioSource(
amine@114 840 data_buffer=data,
amine@114 841 sampling_rate=segment.frame_rate,
amine@114 842 sample_width=segment.sample_width,
amine@114 843 channels=1,
amine@114 844 )
amine@114 845
amine@114 846
amine@122 847 def from_file(filename, audio_format=None, large_file=False, **kwargs):
amine@2 848 """
amine@115 849 Read audio data from `filename` and return an `AudioSource` object.
amine@115 850 if `audio_format` is None, the appropriate :class:`AudioSource` class is
amine@115 851 guessed from file's extension. `filename` can be a compressed audio or
amine@115 852 video file. This will require installing pydub:
amine@115 853 (https://github.com/jiaaro/pydub).
amine@115 854
amine@115 855 The normal behavior is to load all audio data to memory from which a
amine@115 856 :class:`BufferAudioSource` object is created. This should be convenient
amine@115 857 most of the time unless audio file is very large. In that case, and
amine@115 858 in order to load audio data in lazy manner (i.e. read data from disk each
amine@115 859 time :func:`AudioSource.read` is called), `large_file` should be True.
amine@115 860
amine@115 861 Note that the current implementation supports only wave and raw formats for
amine@115 862 lazy audio loading.
amine@115 863
amine@115 864 See also :func:`to_file`.
amine@67 865
amine@32 866 :Parameters:
amine@67 867
amine@115 868 `filename`: str
amine@115 869 path to input audio or video file.
amine@115 870 `audio_format`: str
amine@115 871 audio format used to save data (e.g. raw, webm, wav, ogg)
amine@115 872 `large_file`: bool
amine@115 873 If True, audio won't fully be loaded to memory but only when a window
amine@115 874 is read disk.
amine@115 875
amine@115 876 :kwargs:
amine@115 877
amine@115 878 If an audio format other than `raw` is used, the following keyword
amine@115 879 arguments are required:
amine@115 880
amine@115 881 `sampling_rate`, `sr`: int
amine@115 882 sampling rate of audio data
amine@115 883 `sample_width`: int
amine@115 884 sample width (i.e. number of bytes used to represent one audio sample)
amine@115 885 `channels`: int
amine@115 886 number of channels of audio data
amine@122 887 `use_channel`: int, str
amine@122 888 audio channel to extract from input file if file is not mono audio.
amine@122 889 This must be an integer >= 0 and < channels, or one of the special
amine@122 890 values `left` and `right` (treated as 0 and 1 respectively).
amine@67 891
amine@32 892 :Returns:
amine@67 893
amine@115 894 An `AudioSource` object that reads data from input file.
amine@115 895
amine@115 896 :Raises:
amine@115 897
amine@115 898 An `AudioIOError` is raised if audio data cannot be read in the given
amine@115 899 format; or if format is `raw` and one or more audio parameters are missing.
amine@2 900 """
amine@115 901 audio_format = _guess_audio_format(audio_format, filename)
amine@67 902
amine@115 903 if audio_format == "raw":
amine@115 904 srate, swidth, channels, use_channel = _get_audio_parameters(kwargs)
amine@115 905 return _load_raw(
amine@115 906 filename, srate, swidth, channels, use_channel, large_file
amine@115 907 )
amine@67 908
amine@122 909 use_channel = _normalize_use_channel(kwargs.get("use_channel"))
amine@115 910 if audio_format in ["wav", "wave"]:
amine@115 911 return _load_wave(filename, large_file, use_channel)
amine@115 912 if large_file:
amine@115 913 raise AudioIOError("Large file format should be raw or wav")
amine@115 914 if _WITH_PYDUB:
amine@115 915 return _load_with_pydub(
amine@115 916 filename, audio_format=audio_format, use_channel=use_channel
amine@115 917 )
amine@115 918 else:
amine@115 919 raise AudioIOError(
amine@115 920 "pydub is required for audio formats other than raw or wav"
amine@115 921 )
amine@2 922
amine@2 923
amine@136 924 def _save_raw(data, file):
amine@104 925 """
amine@104 926 Saves audio data as a headerless (i.e. raw) file.
amine@104 927 See also :func:`to_file`.
amine@104 928 """
amine@104 929 with open(file, "wb") as fp:
amine@104 930 fp.write(data)
amine@104 931
amine@104 932
amine@136 933 def _save_wave(data, file, sampling_rate, sample_width, channels):
amine@104 934 """
amine@104 935 Saves audio data to a wave file.
amine@104 936 See also :func:`to_file`.
amine@104 937 """
amine@132 938 if None in (sampling_rate, sample_width, channels):
amine@132 939 raise AudioParameterError(
amine@132 940 "All audio parameters are required to save wave audio files"
amine@132 941 )
amine@104 942 with wave.open(file, "w") as fp:
amine@104 943 fp.setframerate(sampling_rate)
amine@104 944 fp.setsampwidth(sample_width)
amine@104 945 fp.setnchannels(channels)
amine@104 946 fp.writeframes(data)
amine@104 947
amine@104 948
amine@112 949 def _save_with_pydub(
amine@136 950 data, file, audio_format, sampling_rate, sample_width, channels
amine@112 951 ):
amine@104 952 """
amine@104 953 Saves audio data with pydub (https://github.com/jiaaro/pydub).
amine@104 954 See also :func:`to_file`.
amine@104 955 """
amine@112 956 segment = AudioSegment(
amine@112 957 data,
amine@112 958 frame_rate=sampling_rate,
amine@112 959 sample_width=sample_width,
amine@112 960 channels=channels,
amine@112 961 )
amine@104 962 with open(file, "wb") as fp:
amine@104 963 segment.export(fp, format=audio_format)
amine@104 964
amine@104 965
amine@103 966 def to_file(data, file, audio_format=None, **kwargs):
amine@103 967 """
amine@103 968 Writes audio data to file. If `audio_format` is `None`, output
amine@103 969 audio format will be guessed from extension. If `audio_format`
amine@103 970 is `None` and `file` comes without an extension then audio
amine@103 971 data will be written as a raw audio file.
amine@103 972
amine@103 973 :Parameters:
amine@103 974
amine@103 975 `data`: buffer of bytes
amine@103 976 audio data to be written. Can be a `bytes`, `bytearray`,
amine@103 977 `memoryview`, `array` or `numpy.ndarray` object.
amine@103 978 `file`: str
amine@103 979 path to output audio file
amine@103 980 `audio_format`: str
amine@103 981 audio format used to save data (e.g. raw, webm, wav, ogg)
amine@103 982 :kwargs:
amine@103 983 If an audio format other than raw is used, the following
amine@103 984 keyword arguments are required:
amine@103 985 `sampling_rate`, `sr`: int
amine@103 986 sampling rate of audio data
amine@103 987 `sample_width`, `sw`: int
amine@103 988 sample width (i.e., number of bytes of one audio sample)
amine@103 989 `channels`, `ch`: int
amine@103 990 number of channels of audio data
amine@103 991 :Raises:
amine@103 992
amine@103 993 `AudioParameterError` if output format is different than raw and one
amine@103 994 or more audio parameters are missing.
amine@103 995 `AudioIOError` if audio data cannot be written in the desired format.
amine@103 996 """
amine@103 997 audio_format = _guess_audio_format(audio_format, file)
amine@103 998 if audio_format in (None, "raw"):
amine@136 999 _save_raw(data, file)
amine@103 1000 return
amine@103 1001 try:
amine@103 1002 params = _get_audio_parameters(kwargs)
amine@103 1003 sampling_rate, sample_width, channels, _ = params
amine@103 1004 except AudioParameterError as exc:
amine@103 1005 err_message = "All audio parameters are required to save formats "
amine@103 1006 "other than raw. Error detail: {}".format(exc)
amine@103 1007 raise AudioParameterError(err_message)
amine@103 1008 if audio_format in ("wav", "wave"):
amine@136 1009 _save_wave(data, file, sampling_rate, sample_width, channels)
amine@105 1010 elif _WITH_PYDUB:
amine@112 1011 _save_with_pydub(
amine@140 1012 data, file, audio_format, sampling_rate, sample_width, channels
amine@112 1013 )
amine@103 1014 else:
amine@103 1015 err_message = "cannot write file format {} (file name: {})"
amine@112 1016 raise AudioIOError(err_message.format(audio_format, file))