annotate auditok/io.py @ 170:684392cc5019

Add get_audio_source
author Amine Sehili <amine.sehili@gmail.com>
date Thu, 07 Mar 2019 21:28:23 +0100
parents 1fa7fa2857d4
children 00790b3d5aa2
rev   line source
amine@2 1 """
amine@33 2 Module for low-level audio input-output operations.
amine@2 3
amine@32 4 Class summary
amine@32 5 =============
amine@32 6
amine@32 7 .. autosummary::
amine@32 8
amine@32 9 AudioSource
amine@32 10 Rewindable
amine@32 11 BufferAudioSource
amine@32 12 WaveAudioSource
amine@32 13 PyAudioSource
amine@32 14 StdinAudioSource
amine@32 15 PyAudioPlayer
amine@32 16
amine@32 17
amine@32 18 Function summary
amine@32 19 ================
amine@32 20
amine@32 21 .. autosummary::
amine@32 22
amine@32 23 from_file
amine@32 24 player_for
amine@2 25 """
amine@103 26 import os
amine@103 27 import sys
amine@2 28 import wave
amine@168 29 import warnings
amine@117 30 import audioop
amine@116 31 from array import array
amine@153 32 from functools import partial
amine@116 33
amine@116 34 if sys.version_info >= (3, 0):
amine@116 35 PYTHON_3 = True
amine@116 36 else:
amine@116 37 PYTHON_3 = False
amine@2 38
amine@104 39 try:
amine@104 40 from pydub import AudioSegment
amine@112 41
amine@104 42 _WITH_PYDUB = True
amine@104 43 except ImportError:
amine@104 44 _WITH_PYDUB = False
amine@104 45
amine@112 46 __all__ = [
amine@112 47 "AudioIOError",
amine@112 48 "AudioParameterError",
amine@112 49 "AudioSource",
amine@112 50 "Rewindable",
amine@112 51 "BufferAudioSource",
amine@112 52 "WaveAudioSource",
amine@112 53 "PyAudioSource",
amine@112 54 "StdinAudioSource",
amine@112 55 "PyAudioPlayer",
amine@112 56 "from_file",
amine@112 57 "player_for",
amine@112 58 ]
amine@2 59
amine@2 60 DEFAULT_SAMPLE_RATE = 16000
amine@2 61 DEFAULT_SAMPLE_WIDTH = 2
amine@2 62 DEFAULT_NB_CHANNELS = 1
amine@112 63 DATA_FORMAT = {1: "b", 2: "h", 4: "i"}
amine@112 64
amine@2 65
amine@89 66 class AudioIOError(Exception):
amine@89 67 pass
amine@89 68
amine@89 69
amine@89 70 class AudioParameterError(AudioIOError):
amine@89 71 pass
amine@89 72
amine@2 73
amine@90 74 def check_audio_data(data, sample_width, channels):
amine@90 75 sample_size_bytes = int(sample_width * channels)
amine@90 76 nb_samples = len(data) // sample_size_bytes
amine@90 77 if nb_samples * sample_size_bytes != len(data):
amine@112 78 raise AudioParameterError(
amine@112 79 "The length of audio data must be an integer "
amine@112 80 "multiple of `sample_width * channels`"
amine@112 81 )
amine@90 82
amine@90 83
amine@100 84 def _guess_audio_format(fmt, filename):
amine@100 85 if fmt is None:
amine@100 86 extension = os.path.splitext(filename.lower())[1][1:]
amine@100 87 return extension if extension else None
amine@100 88 return fmt.lower()
amine@100 89
amine@100 90
amine@101 91 def _normalize_use_channel(use_channel):
amine@101 92 """
amine@101 93 Returns a value of `use_channel` as expected by audio read/write fuctions.
amine@101 94 If `use_channel` is `None`, returns 0. If it's an integer, or the special
amine@101 95 str 'mix' returns it as is. If it's `left` or `right` returns 0 or 1
amine@101 96 respectively.
amine@101 97 """
amine@101 98 if use_channel is None:
amine@101 99 return 0
amine@101 100 if use_channel == "mix" or isinstance(use_channel, int):
amine@101 101 return use_channel
amine@101 102 try:
amine@101 103 return ["left", "right"].index(use_channel)
amine@101 104 except ValueError:
amine@101 105 err_message = "'use_channel' parameter must be an integer "
amine@101 106 "or one of ('left', 'right', 'mix'), found: '{}'".format(use_channel)
amine@101 107 raise AudioParameterError(err_message)
amine@101 108
amine@101 109
amine@102 110 def _get_audio_parameters(param_dict):
amine@102 111 """
amine@102 112 Gets audio parameters from a dictionary of parameters.
amine@102 113 A parameter can have a long name or a short name. If the long name is
amine@102 114 present, the short name is ignored. In neither is present then
amine@102 115 `AudioParameterError` is raised except for the `use_channel` (or `uc`)
amine@102 116 parameter for which a defalut value of 0 is returned.
amine@102 117
amine@102 118 Also raises `AudioParameterError` if sampling rate, sample width or
amine@102 119 channels is not an integer.
amine@102 120
amine@102 121 Expected parameters are:
amine@102 122
amine@102 123 `sampling_rate`, `sr`: int, sampling rate.
amine@102 124 `sample_width`, `sw`: int, sample size in bytes.
amine@102 125 `channels`, `ch`: int, number of channels.
amine@102 126 `use_channel`, `us`: int or str, which channel to use from data.
amine@102 127 Default value is 0 (first channel). The following special str
amine@102 128 values are also accepted:
amine@102 129 `left`: alias for 0
amine@102 130 `right`: alias for 1
amine@102 131 `mix`: indicates that all channels should be mixed up into one
amine@102 132 single channel
amine@102 133
amine@102 134 :Returns
amine@102 135
amine@102 136 param_dict: tuple
amine@102 137 audio parameters as a tuple (sampling_rate,
amine@102 138 sample_width,
amine@102 139 channels,
amine@102 140 use_channel)
amine@102 141 """
amine@146 142 err_message = (
amine@146 143 "'{ln}' (or '{sn}') must be a positive integer, found: '{val}'"
amine@146 144 )
amine@102 145 parameters = []
amine@112 146 for (long_name, short_name) in (
amine@112 147 ("sampling_rate", "sr"),
amine@112 148 ("sample_width", "sw"),
amine@112 149 ("channels", "ch"),
amine@112 150 ):
amine@145 151 param = param_dict.get(long_name, param_dict.get(short_name))
amine@146 152 if param is None or not isinstance(param, int) or param <= 0:
amine@112 153 raise AudioParameterError(
amine@112 154 err_message.format(ln=long_name, sn=short_name, val=param)
amine@112 155 )
amine@102 156 parameters.append(param)
amine@147 157 sampling_rate, sample_width, channels = parameters
amine@102 158 use_channel = param_dict.get("use_channel", param_dict.get("uc", 0))
amine@147 159 use_channel = _normalize_use_channel(use_channel)
amine@147 160 return sampling_rate, sample_width, channels, use_channel
amine@102 161
amine@102 162
amine@116 163 def _array_to_bytes(a):
amine@116 164 """
amine@116 165 Converts an `array.array` to `bytes`.
amine@116 166 """
amine@116 167 if PYTHON_3:
amine@116 168 return a.tobytes()
amine@116 169 else:
amine@116 170 return a.tostring()
amine@116 171
amine@116 172
amine@117 173 def _mix_audio_channels(data, channels, sample_width):
amine@117 174 if channels == 1:
amine@117 175 return data
amine@117 176 if channels == 2:
amine@117 177 return audioop.tomono(data, sample_width, 0.5, 0.5)
amine@117 178 fmt = DATA_FORMAT[sample_width]
amine@117 179 buffer = array(fmt, data)
amine@117 180 mono_channels = [
amine@117 181 array(fmt, buffer[ch::channels]) for ch in range(channels)
amine@117 182 ]
amine@117 183 avg_arr = array(
amine@117 184 fmt, (sum(samples) // channels for samples in zip(*mono_channels))
amine@117 185 )
amine@117 186 return _array_to_bytes(avg_arr)
amine@117 187
amine@117 188
amine@116 189 def _extract_selected_channel(data, channels, sample_width, use_channel):
amine@116 190 if use_channel == "mix":
amine@116 191 return _mix_audio_channels(data, channels, sample_width)
amine@116 192 elif use_channel >= channels or use_channel < -channels:
amine@116 193 err_message = "use_channel == {} but audio data has only {} channel{}."
amine@116 194 err_message += " Selected channel must be 'mix' or an integer >= "
amine@116 195 err_message += "-channels and < channels"
amine@116 196 err_message = err_message.format(
amine@116 197 use_channel, channels, "s" if channels > 1 else ""
amine@116 198 )
amine@116 199 raise AudioParameterError(err_message)
amine@119 200 elif use_channel < 0:
amine@119 201 use_channel += channels
amine@116 202 fmt = DATA_FORMAT[sample_width]
amine@116 203 buffer = array(fmt, data)
amine@116 204 return _array_to_bytes(buffer[use_channel::channels])
amine@116 205
amine@116 206
amine@153 207 class AudioSource:
amine@2 208 """
amine@32 209 Base class for audio source objects.
amine@67 210
amine@2 211 Subclasses should implement methods to open/close and audio stream
amine@2 212 and read the desired amount of audio samples.
amine@67 213
amine@32 214 :Parameters:
amine@67 215
amine@32 216 `sampling_rate` : int
amine@32 217 Number of samples per second of audio stream. Default = 16000.
amine@67 218
amine@32 219 `sample_width` : int
amine@32 220 Size in bytes of one audio sample. Possible values : 1, 2, 4.
amine@32 221 Default = 2.
amine@67 222
amine@32 223 `channels` : int
amine@150 224 Number of channels of audio stream.
amine@2 225 """
amine@67 226
amine@153 227 def __init__(
amine@153 228 self,
amine@153 229 sampling_rate=DEFAULT_SAMPLE_RATE,
amine@153 230 sample_width=DEFAULT_SAMPLE_WIDTH,
amine@153 231 channels=DEFAULT_NB_CHANNELS,
amine@153 232 ):
amine@2 233
amine@150 234 if not sample_width in (1, 2, 4):
amine@153 235 raise AudioParameterError(
amine@153 236 "Sample width must be one of: 1, 2 or 4 (bytes)"
amine@153 237 )
amine@67 238
amine@70 239 self._sampling_rate = sampling_rate
amine@70 240 self._sample_width = sample_width
amine@70 241 self._channels = channels
amine@67 242
amine@2 243 def is_open(self):
amine@2 244 """ Return True if audio source is open, False otherwise """
amine@150 245 raise NotImplementedError
amine@67 246
amine@2 247 def open(self):
amine@2 248 """ Open audio source """
amine@150 249 raise NotImplementedError
amine@67 250
amine@2 251 def close(self):
amine@2 252 """ Close audio source """
amine@150 253 raise NotImplementedError
amine@67 254
amine@2 255 def read(self, size):
amine@2 256 """
amine@2 257 Read and return `size` audio samples at most.
amine@67 258
amine@32 259 :Parameters:
amine@67 260
amine@32 261 `size` : int
amine@32 262 the number of samples to read.
amine@67 263
amine@32 264 :Returns:
amine@67 265
amine@150 266 Audio data as a string of length `N * sample_width * channels`,
amine@150 267 where `N` is:
amine@67 268
amine@32 269 - `size` if `size` < 'left_samples'
amine@67 270
amine@32 271 - 'left_samples' if `size` > 'left_samples'
amine@67 272 """
amine@150 273 raise NotImplementedError
amine@67 274
amine@2 275 def get_sampling_rate(self):
amine@2 276 """ Return the number of samples per second of audio stream """
amine@2 277 return self.sampling_rate
amine@67 278
amine@70 279 @property
amine@70 280 def sampling_rate(self):
amine@70 281 """ Number of samples per second of audio stream """
amine@70 282 return self._sampling_rate
amine@70 283
amine@72 284 @property
amine@72 285 def sr(self):
amine@72 286 """ Number of samples per second of audio stream """
amine@72 287 return self._sampling_rate
amine@72 288
amine@2 289 def get_sample_width(self):
amine@2 290 """ Return the number of bytes used to represent one audio sample """
amine@2 291 return self.sample_width
amine@67 292
amine@70 293 @property
amine@70 294 def sample_width(self):
amine@70 295 """ Number of bytes used to represent one audio sample """
amine@70 296 return self._sample_width
amine@70 297
amine@72 298 @property
amine@72 299 def sw(self):
amine@72 300 """ Number of bytes used to represent one audio sample """
amine@72 301 return self._sample_width
amine@72 302
amine@2 303 def get_channels(self):
amine@2 304 """ Return the number of channels of this audio source """
amine@2 305 return self.channels
amine@2 306
amine@70 307 @property
amine@70 308 def channels(self):
amine@70 309 """ Number of channels of this audio source """
amine@70 310 return self._channels
amine@70 311
amine@72 312 @property
amine@72 313 def ch(self):
amine@72 314 """ Return the number of channels of this audio source """
amine@72 315 return self.channels
amine@72 316
amine@2 317
amine@167 318 class Rewindable(AudioSource):
amine@2 319 """
amine@2 320 Base class for rewindable audio streams.
amine@2 321 Subclasses should implement methods to return to the beginning of an
amine@2 322 audio stream as well as method to move to an absolute audio position
amine@2 323 expressed in time or in number of samples.
amine@32 324 """
amine@153 325
amine@151 326 @property
amine@151 327 def rewindable(self):
amine@151 328 return True
amine@67 329
amine@2 330 def rewind(self):
amine@2 331 """ Go back to the beginning of audio stream """
amine@151 332 raise NotImplementedError
amine@67 333
amine@167 334 @property
amine@167 335 def position(self):
amine@167 336 """Stream position in number of samples"""
amine@167 337 raise NotImplementedError
amine@167 338
amine@167 339 @position.setter
amine@167 340 def position(self, position):
amine@167 341 raise NotImplementedError
amine@167 342
amine@167 343 @property
amine@167 344 def position_s(self):
amine@167 345 """Stream position in seconds"""
amine@167 346 return self.position / self.sampling_rate
amine@167 347
amine@167 348 @position_s.setter
amine@167 349 def position_s(self, position_s):
amine@167 350 self.position = int(self.sampling_rate * position_s)
amine@167 351
amine@167 352 @property
amine@167 353 def position_ms(self):
amine@167 354 """Stream position in milliseconds"""
amine@167 355 return (self.position * 1000) // self.sampling_rate
amine@167 356
amine@167 357 @position_ms.setter
amine@167 358 def position_ms(self, position_ms):
amine@167 359 if not isinstance(position_ms, int):
amine@167 360 raise ValueError("position_ms should be an int")
amine@167 361 self.position = int(self.sampling_rate * position_ms / 1000)
amine@167 362
amine@2 363 def get_position(self):
amine@2 364 """ Return the total number of already read samples """
amine@168 365 warnings.warn(
amine@168 366 "'get_position' is deprecated, use 'position' property instead",
amine@168 367 DeprecationWarning
amine@168 368 )
amine@167 369 return self.position
amine@67 370
amine@2 371 def get_time_position(self):
amine@2 372 """ Return the total duration in seconds of already read data """
amine@168 373 warnings.warn(
amine@168 374 "'get_time_position' is deprecated, use 'position_s' or 'position_ms' properties instead",
amine@168 375 DeprecationWarning
amine@168 376 )
amine@167 377 return self.position_s
amine@67 378
amine@2 379 def set_position(self, position):
amine@2 380 """ Move to an absolute position
amine@67 381
amine@32 382 :Parameters:
amine@67 383
amine@32 384 `position` : int
amine@32 385 number of samples to skip from the start of the stream
amine@2 386 """
amine@168 387 warnings.warn(
amine@168 388 "'set_position' is deprecated, set 'position' property instead",
amine@168 389 DeprecationWarning
amine@168 390 )
amine@167 391 self.position = position
amine@67 392
amine@2 393 def set_time_position(self, time_position):
amine@2 394 """ Move to an absolute position expressed in seconds
amine@67 395
amine@32 396 :Parameters:
amine@67 397
amine@32 398 `time_position` : float
amine@32 399 seconds to skip from the start of the stream
amine@2 400 """
amine@168 401 warnings.warn(
amine@168 402 "'set_time_position' is deprecated, set 'position_s' or 'position_ms' properties instead",
amine@168 403 DeprecationWarning
amine@168 404 )
amine@167 405 self.position_s = time_position
amine@48 406
amine@2 407
amine@167 408 class BufferAudioSource(Rewindable):
amine@2 409 """
amine@32 410 An :class:`AudioSource` that encapsulates and reads data from a memory buffer.
amine@32 411 It implements methods from :class:`Rewindable` and is therefore a navigable :class:`AudioSource`.
amine@2 412 """
amine@67 413
amine@112 414 def __init__(
amine@112 415 self,
amine@112 416 data_buffer,
amine@112 417 sampling_rate=DEFAULT_SAMPLE_RATE,
amine@112 418 sample_width=DEFAULT_SAMPLE_WIDTH,
amine@112 419 channels=DEFAULT_NB_CHANNELS,
amine@112 420 ):
amine@94 421 AudioSource.__init__(self, sampling_rate, sample_width, channels)
amine@90 422 check_audio_data(data_buffer, sample_width, channels)
amine@2 423 self._buffer = data_buffer
amine@94 424 self._sample_size_all_channels = sample_width * channels
amine@94 425 self._current_position_bytes = 0
amine@2 426 self._is_open = False
amine@67 427
amine@2 428 def is_open(self):
amine@2 429 return self._is_open
amine@67 430
amine@2 431 def open(self):
amine@2 432 self._is_open = True
amine@67 433
amine@2 434 def close(self):
amine@2 435 self._is_open = False
amine@2 436 self.rewind()
amine@67 437
amine@10 438 def read(self, size):
amine@2 439 if not self._is_open:
amine@94 440 raise AudioIOError("Stream is not open")
amine@94 441 bytes_to_read = self._sample_size_all_channels * size
amine@112 442 data = self._buffer[
amine@112 443 self._current_position_bytes : self._current_position_bytes
amine@112 444 + bytes_to_read
amine@112 445 ]
amine@94 446 if data:
amine@94 447 self._current_position_bytes += len(data)
amine@2 448 return data
amine@2 449 return None
amine@67 450
amine@165 451 @property
amine@165 452 def data(self):
amine@165 453 return self._buffer
amine@165 454
amine@2 455 def get_data_buffer(self):
amine@2 456 """ Return all audio data as one string buffer. """
amine@2 457 return self._buffer
amine@67 458
amine@2 459 def set_data(self, data_buffer):
amine@2 460 """ Set new data for this audio stream.
amine@67 461
amine@32 462 :Parameters:
amine@67 463
amine@32 464 `data_buffer` : str, basestring, Bytes
amine@32 465 a string buffer with a length multiple of (sample_width * channels)
amine@2 466 """
amine@90 467 check_audio_data(data_buffer, self.sample_width, self.channels)
amine@2 468 self._buffer = data_buffer
amine@94 469 self._current_position_bytes = 0
amine@67 470
amine@2 471 def append_data(self, data_buffer):
amine@2 472 """ Append data to this audio stream
amine@67 473
amine@32 474 :Parameters:
amine@67 475
amine@32 476 `data_buffer` : str, basestring, Bytes
amine@32 477 a buffer with a length multiple of (sample_width * channels)
amine@2 478 """
amine@90 479 check_audio_data(data_buffer, self.sample_width, self.channels)
amine@2 480 self._buffer += data_buffer
amine@2 481
amine@2 482 def rewind(self):
amine@2 483 self.set_position(0)
amine@67 484
amine@165 485 @property
amine@165 486 def position(self):
amine@165 487 """Stream position in number of samples"""
amine@165 488 return self._current_position_bytes // self._sample_size_all_channels
amine@165 489
amine@165 490 @position.setter
amine@165 491 def position(self, position):
amine@165 492 position *= self._sample_size_all_channels
amine@165 493 if position < 0:
amine@165 494 position += len(self.data)
amine@165 495 if position < 0 or position > len(self.data):
amine@165 496 raise IndexError("Position out of range")
amine@165 497 self._current_position_bytes = position
amine@165 498
amine@165 499 @property
amine@165 500 def position_ms(self):
amine@165 501 """Stream position in milliseconds"""
amine@165 502 return (self._current_position_bytes * 1000) // (
amine@165 503 self._sample_size_all_channels * self.sampling_rate
amine@165 504 )
amine@165 505
amine@165 506 @position_ms.setter
amine@165 507 def position_ms(self, position_ms):
amine@165 508 if not isinstance(position_ms, int):
amine@165 509 raise ValueError("position_ms should be an int")
amine@165 510 self.position = int(self.sampling_rate * position_ms / 1000)
amine@165 511
amine@48 512
amine@153 513 class _FileAudioSource(AudioSource):
amine@153 514 def __init__(self, sampling_rate, sample_width, channels, use_channel):
amine@153 515 AudioSource.__init__(self, sampling_rate, sample_width, channels)
amine@153 516 self._audio_stream = None
amine@159 517 self._use_channel = _normalize_use_channel(use_channel)
amine@153 518 if channels > 1:
amine@153 519 self._extract_selected_channel = partial(
amine@153 520 _extract_selected_channel,
amine@153 521 channels=channels,
amine@153 522 sample_width=sample_width,
amine@159 523 use_channel=self._use_channel,
amine@153 524 )
amine@153 525 else:
amine@153 526 self._extract_selected_channel = lambda x: x
amine@153 527
amine@153 528 def __del__(self):
amine@153 529 if self.is_open():
amine@153 530 self.close()
amine@153 531
amine@159 532 @property
amine@159 533 def use_channel(self):
amine@159 534 return self._use_channel
amine@159 535
amine@153 536 def is_open(self):
amine@153 537 return self._audio_stream is not None
amine@153 538
amine@153 539 def close(self):
amine@153 540 if self._audio_stream is not None:
amine@153 541 self._audio_stream.close()
amine@153 542 self._audio_stream = None
amine@153 543
amine@153 544 def _read_from_stream(self, size):
amine@153 545 raise NotImplementedError
amine@153 546
amine@153 547 def read(self, size):
amine@153 548 if not self.is_open():
amine@153 549 raise AudioIOError("Audio stream is not open")
amine@153 550 data = self._read_from_stream(size)
amine@153 551 if data:
amine@153 552 return self._extract_selected_channel(data)
amine@153 553 return None
amine@153 554
amine@153 555
amine@154 556 class RawAudioSource(_FileAudioSource, Rewindable):
amine@154 557 def __init__(
amine@154 558 self, file, sampling_rate, sample_width, channels, use_channel=0
amine@154 559 ):
amine@154 560 _FileAudioSource.__init__(
amine@154 561 self, sampling_rate, sample_width, channels, use_channel
amine@154 562 )
amine@154 563 self._file = file
amine@154 564 self._audio_stream = None
amine@154 565 self._sample_size = sample_width * channels
amine@154 566
amine@154 567 def open(self):
amine@154 568 if self._audio_stream is None:
amine@158 569 self._audio_stream = open(self._file, "rb")
amine@154 570
amine@154 571 def _read_from_stream(self, size):
amine@154 572 bytes_to_read = size * self._sample_size
amine@154 573 data = self._audio_stream.read(bytes_to_read)
amine@154 574 return data
amine@154 575
amine@154 576
amine@155 577 class WaveAudioSource(_FileAudioSource, Rewindable):
amine@32 578 """
amine@32 579 A class for an `AudioSource` that reads data from a wave file.
amine@155 580 This class should be used for large wave files to avoid loading
amine@155 581 the whole data to memory.
amine@67 582
amine@32 583 :Parameters:
amine@67 584
amine@32 585 `filename` :
amine@155 586 path to a valid wave file.
amine@32 587 """
amine@67 588
amine@155 589 def __init__(self, filename, use_channel=0):
amine@2 590 self._filename = filename
amine@2 591 self._audio_stream = None
amine@158 592 stream = wave.open(self._filename, "rb")
amine@155 593 _FileAudioSource.__init__(
amine@112 594 self,
amine@112 595 stream.getframerate(),
amine@112 596 stream.getsampwidth(),
amine@112 597 stream.getnchannels(),
amine@155 598 use_channel,
amine@112 599 )
amine@2 600 stream.close()
amine@67 601
amine@2 602 def open(self):
amine@112 603 if self._audio_stream is None:
amine@2 604 self._audio_stream = wave.open(self._filename)
amine@67 605
amine@155 606 def _read_from_stream(self, size):
amine@155 607 return self._audio_stream.readframes(size)
amine@2 608
amine@2 609
amine@2 610 class PyAudioSource(AudioSource):
amine@32 611 """
amine@32 612 A class for an `AudioSource` that reads data the built-in microphone using PyAudio.
amine@32 613 """
amine@67 614
amine@112 615 def __init__(
amine@112 616 self,
amine@112 617 sampling_rate=DEFAULT_SAMPLE_RATE,
amine@112 618 sample_width=DEFAULT_SAMPLE_WIDTH,
amine@112 619 channels=DEFAULT_NB_CHANNELS,
amine@112 620 frames_per_buffer=1024,
amine@112 621 input_device_index=None,
amine@112 622 ):
amine@67 623
amine@2 624 AudioSource.__init__(self, sampling_rate, sample_width, channels)
amine@2 625 self._chunk_size = frames_per_buffer
mathieu@79 626 self.input_device_index = input_device_index
amine@67 627
amine@2 628 import pyaudio
amine@112 629
amine@2 630 self._pyaudio_object = pyaudio.PyAudio()
amine@112 631 self._pyaudio_format = self._pyaudio_object.get_format_from_width(
amine@112 632 self.sample_width
amine@112 633 )
amine@2 634 self._audio_stream = None
amine@2 635
amine@2 636 def is_open(self):
amine@2 637 return self._audio_stream is not None
amine@67 638
amine@2 639 def open(self):
amine@112 640 self._audio_stream = self._pyaudio_object.open(
amine@112 641 format=self._pyaudio_format,
amine@112 642 channels=self.channels,
amine@112 643 rate=self.sampling_rate,
amine@112 644 input=True,
amine@112 645 output=False,
amine@112 646 input_device_index=self.input_device_index,
amine@112 647 frames_per_buffer=self._chunk_size,
amine@112 648 )
amine@67 649
amine@2 650 def close(self):
amine@2 651 if self._audio_stream is not None:
amine@2 652 self._audio_stream.stop_stream()
amine@2 653 self._audio_stream.close()
amine@2 654 self._audio_stream = None
amine@67 655
amine@2 656 def read(self, size):
amine@2 657 if self._audio_stream is None:
amine@2 658 raise IOError("Stream is not open")
amine@67 659
amine@2 660 if self._audio_stream.is_active():
amine@2 661 data = self._audio_stream.read(size)
amine@2 662 if data is None or len(data) < 1:
amine@2 663 return None
amine@2 664 return data
amine@67 665
amine@2 666 return None
amine@67 667
amine@2 668
amine@156 669 class StdinAudioSource(_FileAudioSource):
amine@32 670 """
amine@32 671 A class for an :class:`AudioSource` that reads data from standard input.
amine@32 672 """
amine@67 673
amine@112 674 def __init__(
amine@112 675 self,
amine@112 676 sampling_rate=DEFAULT_SAMPLE_RATE,
amine@112 677 sample_width=DEFAULT_SAMPLE_WIDTH,
amine@112 678 channels=DEFAULT_NB_CHANNELS,
amine@156 679 use_channel=0,
amine@112 680 ):
amine@67 681
amine@156 682 _FileAudioSource.__init__(
amine@156 683 self, sampling_rate, sample_width, channels, use_channel
amine@156 684 )
amine@10 685 self._is_open = False
amine@156 686 self._sample_size = sample_width * channels
amine@156 687 if PYTHON_3:
amine@156 688 self._stream = sys.stdin.buffer
amine@156 689 else:
amine@156 690 self._stream = sys.stdin
amine@67 691
amine@10 692 def is_open(self):
amine@10 693 return self._is_open
amine@67 694
amine@10 695 def open(self):
amine@10 696 self._is_open = True
amine@67 697
amine@10 698 def close(self):
amine@10 699 self._is_open = False
amine@67 700
amine@156 701 def _read_from_stream(self, size):
amine@156 702 bytes_to_read = size * self._sample_size
amine@156 703 data = self._stream.read(bytes_to_read)
amine@156 704 if data:
amine@156 705 return data
amine@156 706 return None
amine@67 707
amine@67 708
amine@112 709 class PyAudioPlayer:
amine@32 710 """
amine@32 711 A class for audio playback using Pyaudio
amine@32 712 """
amine@67 713
amine@112 714 def __init__(
amine@112 715 self,
amine@112 716 sampling_rate=DEFAULT_SAMPLE_RATE,
amine@112 717 sample_width=DEFAULT_SAMPLE_WIDTH,
amine@112 718 channels=DEFAULT_NB_CHANNELS,
amine@112 719 ):
amine@2 720 if not sample_width in (1, 2, 4):
amine@2 721 raise ValueError("Sample width must be one of: 1, 2 or 4 (bytes)")
amine@67 722
amine@2 723 self.sampling_rate = sampling_rate
amine@2 724 self.sample_width = sample_width
amine@2 725 self.channels = channels
amine@67 726
amine@2 727 import pyaudio
amine@112 728
amine@2 729 self._p = pyaudio.PyAudio()
amine@112 730 self.stream = self._p.open(
amine@112 731 format=self._p.get_format_from_width(self.sample_width),
amine@112 732 channels=self.channels,
amine@112 733 rate=self.sampling_rate,
amine@112 734 input=False,
amine@112 735 output=True,
amine@112 736 )
amine@67 737
amine@2 738 def play(self, data):
amine@2 739 if self.stream.is_stopped():
amine@2 740 self.stream.start_stream()
amine@67 741
amine@10 742 for chunk in self._chunk_data(data):
amine@10 743 self.stream.write(chunk)
amine@67 744
amine@2 745 self.stream.stop_stream()
amine@67 746
amine@67 747 def stop(self):
amine@2 748 if not self.stream.is_stopped():
amine@2 749 self.stream.stop_stream()
amine@2 750 self.stream.close()
amine@2 751 self._p.terminate()
amine@67 752
amine@10 753 def _chunk_data(self, data):
amine@10 754 # make audio chunks of 100 ms to allow interruption (like ctrl+c)
amine@112 755 chunk_size = int(
amine@112 756 (self.sampling_rate * self.sample_width * self.channels) / 10
amine@112 757 )
amine@10 758 start = 0
amine@10 759 while start < len(data):
amine@112 760 yield data[start : start + chunk_size]
amine@10 761 start += chunk_size
amine@67 762
amine@2 763
amine@112 764 def player_for(audio_source):
amine@112 765 """
amine@112 766 Return a :class:`PyAudioPlayer` that can play data from `audio_source`.
amine@112 767
amine@112 768 :Parameters:
amine@112 769
amine@112 770 `audio_source` :
amine@112 771 an `AudioSource` object.
amine@112 772
amine@112 773 :Returns:
amine@112 774
amine@112 775 `PyAudioPlayer` that has the same sampling rate, sample width and number of channels
amine@112 776 as `audio_source`.
amine@112 777 """
amine@112 778
amine@112 779 return PyAudioPlayer(
amine@112 780 audio_source.get_sampling_rate(),
amine@112 781 audio_source.get_sample_width(),
amine@112 782 audio_source.get_channels(),
amine@112 783 )
amine@112 784
amine@170 785 def get_audio_source(input=None, **kwargs):
amine@170 786
amine@170 787 # read data from standard input
amine@170 788 if input == "-":
amine@170 789 return StdinAudioSource(**kwargs)
amine@170 790
amine@170 791 # create AudioSource from raw data
amine@170 792 if isinstance(input, bytes):
amine@170 793 return BufferAudioSource(input, **kwargs)
amine@170 794
amine@170 795 # read data from a file
amine@170 796 if input is not None:
amine@170 797 return from_file(filename=input,
amine@170 798 audio_format=kwargs.get('audio_format'),
amine@170 799 large_file=kwargs.get('large_file', False),
amine@170 800 **kwargs)
amine@170 801
amine@170 802 # read data from microphone via pyaudio
amine@170 803 else:
amine@170 804 return PyAudioSource(**kwargs)
amine@170 805
amine@112 806
amine@112 807 def _load_raw(
amine@112 808 file,
amine@112 809 sampling_rate,
amine@112 810 sample_width,
amine@112 811 channels,
amine@112 812 use_channel=0,
amine@112 813 large_file=False,
amine@112 814 ):
amine@112 815 """
amine@112 816 Load a raw audio file with standard Python.
amine@112 817 If `large_file` is True, audio data will be lazily
amine@112 818 loaded to memory.
amine@112 819
amine@112 820 See also :func:`from_file`.
amine@112 821
amine@112 822 :Parameters:
amine@112 823 `file` : filelike object or str
amine@112 824 raw audio file to open
amine@112 825 `sampling_rate`: int
amine@112 826 sampling rate of audio data
amine@112 827 `sample_width`: int
amine@112 828 sample width of audio data
amine@112 829 `channels`: int
amine@112 830 number of channels of audio data
amine@112 831 `use_channel`: int
amine@112 832 audio channel to read if file is not mono audio. This must be an integer
amine@112 833 0 >= and < channels, or one of 'left' (treated as 0 or first channel), or
amine@112 834 right (treated as 1 or second channels).
amine@112 835
amine@112 836 :Returns:
amine@112 837
amine@112 838 `PyAudioPlayer` that has the same sampling rate, sample width and number of channels
amine@112 839 as `audio_source`.
amine@112 840 """
amine@112 841 if None in (sampling_rate, sample_width, channels):
amine@112 842 raise AudioParameterError(
amine@112 843 "All audio parameters are required for raw audio files"
amine@112 844 )
amine@112 845
amine@112 846 if large_file:
amine@112 847 return RawAudioSource(
amine@112 848 file,
amine@112 849 sampling_rate=sampling_rate,
amine@112 850 sample_width=sample_width,
amine@112 851 channels=channels,
amine@112 852 use_channel=use_channel,
amine@112 853 )
amine@112 854 else:
amine@112 855 with open(file, "rb") as fp:
amine@112 856 data = fp.read()
amine@112 857 if channels != 1:
amine@112 858 # TODO check if striding with mmap doesn't load all data to memory
amine@112 859 data = _extract_selected_channel(
amine@112 860 data, channels, sample_width, use_channel
amine@112 861 )
amine@112 862 return BufferAudioSource(
amine@112 863 data,
amine@112 864 sampling_rate=sampling_rate,
amine@112 865 sample_width=sample_width,
amine@112 866 channels=1,
amine@112 867 )
amine@112 868
amine@112 869
amine@113 870 def _load_wave(filename, large_file=False, use_channel=0):
amine@113 871 """
amine@113 872 Load a wave audio file with standard Python.
amine@113 873 If `large_file` is True, audio data will be lazily
amine@113 874 loaded to memory.
amine@113 875
amine@113 876 See also :func:`to_file`.
amine@113 877 """
amine@113 878 if large_file:
amine@113 879 return WaveAudioSource(filename, use_channel)
amine@113 880 with wave.open(filename) as fp:
amine@113 881 channels = fp.getnchannels()
amine@113 882 srate = fp.getframerate()
amine@113 883 swidth = fp.getsampwidth()
amine@113 884 data = fp.readframes(-1)
amine@113 885 if channels > 1:
amine@113 886 data = _extract_selected_channel(data, channels, swidth, use_channel)
amine@113 887 return BufferAudioSource(
amine@113 888 data, sampling_rate=srate, sample_width=swidth, channels=1
amine@113 889 )
amine@113 890
amine@113 891
amine@114 892 def _load_with_pydub(filename, audio_format, use_channel=0):
amine@114 893 """Open compressed audio file using pydub. If a video file
amine@114 894 is passed, its audio track(s) are extracted and loaded.
amine@114 895 This function should not be called directely, use :func:`from_file`
amine@114 896 instead.
amine@114 897
amine@114 898 :Parameters:
amine@114 899
amine@114 900 `filename`:
amine@114 901 path to audio file.
amine@114 902 `audio_format`:
amine@114 903 string, audio file format (e.g. raw, webm, wav, ogg)
amine@114 904 """
amine@114 905 func_dict = {
amine@114 906 "mp3": AudioSegment.from_mp3,
amine@114 907 "ogg": AudioSegment.from_ogg,
amine@114 908 "flv": AudioSegment.from_flv,
amine@114 909 }
amine@114 910 open_function = func_dict.get(audio_format, AudioSegment.from_file)
amine@114 911 segment = open_function(filename)
amine@114 912 data = segment._data
amine@114 913 if segment.channels > 1:
amine@114 914 data = _extract_selected_channel(
amine@114 915 data, segment.channels, segment.sample_width, use_channel
amine@114 916 )
amine@114 917 return BufferAudioSource(
amine@114 918 data_buffer=data,
amine@114 919 sampling_rate=segment.frame_rate,
amine@114 920 sample_width=segment.sample_width,
amine@114 921 channels=1,
amine@114 922 )
amine@114 923
amine@114 924
amine@122 925 def from_file(filename, audio_format=None, large_file=False, **kwargs):
amine@2 926 """
amine@115 927 Read audio data from `filename` and return an `AudioSource` object.
amine@115 928 if `audio_format` is None, the appropriate :class:`AudioSource` class is
amine@115 929 guessed from file's extension. `filename` can be a compressed audio or
amine@115 930 video file. This will require installing pydub:
amine@115 931 (https://github.com/jiaaro/pydub).
amine@115 932
amine@115 933 The normal behavior is to load all audio data to memory from which a
amine@115 934 :class:`BufferAudioSource` object is created. This should be convenient
amine@115 935 most of the time unless audio file is very large. In that case, and
amine@115 936 in order to load audio data in lazy manner (i.e. read data from disk each
amine@115 937 time :func:`AudioSource.read` is called), `large_file` should be True.
amine@115 938
amine@115 939 Note that the current implementation supports only wave and raw formats for
amine@115 940 lazy audio loading.
amine@115 941
amine@115 942 See also :func:`to_file`.
amine@67 943
amine@32 944 :Parameters:
amine@67 945
amine@115 946 `filename`: str
amine@115 947 path to input audio or video file.
amine@115 948 `audio_format`: str
amine@115 949 audio format used to save data (e.g. raw, webm, wav, ogg)
amine@115 950 `large_file`: bool
amine@115 951 If True, audio won't fully be loaded to memory but only when a window
amine@115 952 is read disk.
amine@115 953
amine@115 954 :kwargs:
amine@115 955
amine@115 956 If an audio format other than `raw` is used, the following keyword
amine@115 957 arguments are required:
amine@115 958
amine@115 959 `sampling_rate`, `sr`: int
amine@115 960 sampling rate of audio data
amine@115 961 `sample_width`: int
amine@115 962 sample width (i.e. number of bytes used to represent one audio sample)
amine@115 963 `channels`: int
amine@115 964 number of channels of audio data
amine@122 965 `use_channel`: int, str
amine@122 966 audio channel to extract from input file if file is not mono audio.
amine@122 967 This must be an integer >= 0 and < channels, or one of the special
amine@122 968 values `left` and `right` (treated as 0 and 1 respectively).
amine@67 969
amine@32 970 :Returns:
amine@67 971
amine@115 972 An `AudioSource` object that reads data from input file.
amine@115 973
amine@115 974 :Raises:
amine@115 975
amine@115 976 An `AudioIOError` is raised if audio data cannot be read in the given
amine@115 977 format; or if format is `raw` and one or more audio parameters are missing.
amine@2 978 """
amine@115 979 audio_format = _guess_audio_format(audio_format, filename)
amine@67 980
amine@115 981 if audio_format == "raw":
amine@115 982 srate, swidth, channels, use_channel = _get_audio_parameters(kwargs)
amine@115 983 return _load_raw(
amine@115 984 filename, srate, swidth, channels, use_channel, large_file
amine@115 985 )
amine@67 986
amine@122 987 use_channel = _normalize_use_channel(kwargs.get("use_channel"))
amine@115 988 if audio_format in ["wav", "wave"]:
amine@115 989 return _load_wave(filename, large_file, use_channel)
amine@115 990 if large_file:
amine@115 991 raise AudioIOError("Large file format should be raw or wav")
amine@115 992 if _WITH_PYDUB:
amine@115 993 return _load_with_pydub(
amine@115 994 filename, audio_format=audio_format, use_channel=use_channel
amine@115 995 )
amine@115 996 else:
amine@115 997 raise AudioIOError(
amine@115 998 "pydub is required for audio formats other than raw or wav"
amine@115 999 )
amine@2 1000
amine@2 1001
amine@136 1002 def _save_raw(data, file):
amine@104 1003 """
amine@104 1004 Saves audio data as a headerless (i.e. raw) file.
amine@104 1005 See also :func:`to_file`.
amine@104 1006 """
amine@104 1007 with open(file, "wb") as fp:
amine@104 1008 fp.write(data)
amine@104 1009
amine@104 1010
amine@136 1011 def _save_wave(data, file, sampling_rate, sample_width, channels):
amine@104 1012 """
amine@104 1013 Saves audio data to a wave file.
amine@104 1014 See also :func:`to_file`.
amine@104 1015 """
amine@132 1016 if None in (sampling_rate, sample_width, channels):
amine@132 1017 raise AudioParameterError(
amine@132 1018 "All audio parameters are required to save wave audio files"
amine@132 1019 )
amine@104 1020 with wave.open(file, "w") as fp:
amine@104 1021 fp.setframerate(sampling_rate)
amine@104 1022 fp.setsampwidth(sample_width)
amine@104 1023 fp.setnchannels(channels)
amine@104 1024 fp.writeframes(data)
amine@104 1025
amine@104 1026
amine@112 1027 def _save_with_pydub(
amine@136 1028 data, file, audio_format, sampling_rate, sample_width, channels
amine@112 1029 ):
amine@104 1030 """
amine@104 1031 Saves audio data with pydub (https://github.com/jiaaro/pydub).
amine@104 1032 See also :func:`to_file`.
amine@104 1033 """
amine@112 1034 segment = AudioSegment(
amine@112 1035 data,
amine@112 1036 frame_rate=sampling_rate,
amine@112 1037 sample_width=sample_width,
amine@112 1038 channels=channels,
amine@112 1039 )
amine@104 1040 with open(file, "wb") as fp:
amine@104 1041 segment.export(fp, format=audio_format)
amine@104 1042
amine@104 1043
amine@103 1044 def to_file(data, file, audio_format=None, **kwargs):
amine@103 1045 """
amine@103 1046 Writes audio data to file. If `audio_format` is `None`, output
amine@103 1047 audio format will be guessed from extension. If `audio_format`
amine@103 1048 is `None` and `file` comes without an extension then audio
amine@103 1049 data will be written as a raw audio file.
amine@103 1050
amine@103 1051 :Parameters:
amine@103 1052
amine@103 1053 `data`: buffer of bytes
amine@103 1054 audio data to be written. Can be a `bytes`, `bytearray`,
amine@103 1055 `memoryview`, `array` or `numpy.ndarray` object.
amine@103 1056 `file`: str
amine@103 1057 path to output audio file
amine@103 1058 `audio_format`: str
amine@103 1059 audio format used to save data (e.g. raw, webm, wav, ogg)
amine@103 1060 :kwargs:
amine@103 1061 If an audio format other than raw is used, the following
amine@103 1062 keyword arguments are required:
amine@103 1063 `sampling_rate`, `sr`: int
amine@103 1064 sampling rate of audio data
amine@103 1065 `sample_width`, `sw`: int
amine@103 1066 sample width (i.e., number of bytes of one audio sample)
amine@103 1067 `channels`, `ch`: int
amine@103 1068 number of channels of audio data
amine@103 1069 :Raises:
amine@103 1070
amine@103 1071 `AudioParameterError` if output format is different than raw and one
amine@103 1072 or more audio parameters are missing.
amine@103 1073 `AudioIOError` if audio data cannot be written in the desired format.
amine@103 1074 """
amine@103 1075 audio_format = _guess_audio_format(audio_format, file)
amine@103 1076 if audio_format in (None, "raw"):
amine@136 1077 _save_raw(data, file)
amine@103 1078 return
amine@103 1079 try:
amine@103 1080 params = _get_audio_parameters(kwargs)
amine@103 1081 sampling_rate, sample_width, channels, _ = params
amine@103 1082 except AudioParameterError as exc:
amine@103 1083 err_message = "All audio parameters are required to save formats "
amine@103 1084 "other than raw. Error detail: {}".format(exc)
amine@103 1085 raise AudioParameterError(err_message)
amine@103 1086 if audio_format in ("wav", "wave"):
amine@136 1087 _save_wave(data, file, sampling_rate, sample_width, channels)
amine@105 1088 elif _WITH_PYDUB:
amine@112 1089 _save_with_pydub(
amine@140 1090 data, file, audio_format, sampling_rate, sample_width, channels
amine@112 1091 )
amine@103 1092 else:
amine@103 1093 err_message = "cannot write file format {} (file name: {})"
amine@112 1094 raise AudioIOError(err_message.format(audio_format, file))