annotate auditok/io.py @ 102:3a72db5f8798

Add _get_audio_parameters
author Amine Sehili <amine.sehili@gmail.com>
date Thu, 10 Jan 2019 20:52:20 +0100
parents a3da97fad36e
children 0335e53c8391
rev   line source
amine@2 1 """
amine@33 2 Module for low-level audio input-output operations.
amine@2 3
amine@32 4 Class summary
amine@32 5 =============
amine@32 6
amine@32 7 .. autosummary::
amine@32 8
amine@32 9 AudioSource
amine@32 10 Rewindable
amine@32 11 BufferAudioSource
amine@32 12 WaveAudioSource
amine@32 13 PyAudioSource
amine@32 14 StdinAudioSource
amine@32 15 PyAudioPlayer
amine@32 16
amine@32 17
amine@32 18 Function summary
amine@32 19 ================
amine@32 20
amine@32 21 .. autosummary::
amine@32 22
amine@32 23 from_file
amine@32 24 player_for
amine@2 25 """
amine@2 26
amine@2 27 from abc import ABCMeta, abstractmethod
amine@2 28 import wave
amine@10 29 import sys
amine@2 30
amine@90 31 __all__ = ["AudioIOError", "AudioParameterError", "AudioSource", "Rewindable",
amine@90 32 "BufferAudioSource", "WaveAudioSource", "PyAudioSource",
amine@90 33 "StdinAudioSource", "PyAudioPlayer", "from_file", "player_for"]
amine@2 34
amine@2 35 DEFAULT_SAMPLE_RATE = 16000
amine@2 36 DEFAULT_SAMPLE_WIDTH = 2
amine@2 37 DEFAULT_NB_CHANNELS = 1
amine@2 38
amine@89 39 class AudioIOError(Exception):
amine@89 40 pass
amine@89 41
amine@89 42
amine@89 43 class AudioParameterError(AudioIOError):
amine@89 44 pass
amine@89 45
amine@2 46
amine@90 47 def check_audio_data(data, sample_width, channels):
amine@90 48 sample_size_bytes = int(sample_width * channels)
amine@90 49 nb_samples = len(data) // sample_size_bytes
amine@90 50 if nb_samples * sample_size_bytes != len(data):
amine@90 51 raise AudioParameterError("The length of audio data must be an integer "
amine@90 52 "multiple of `sample_width * channels`")
amine@90 53
amine@90 54
amine@100 55 def _guess_audio_format(fmt, filename):
amine@100 56 if fmt is None:
amine@100 57 extension = os.path.splitext(filename.lower())[1][1:]
amine@100 58 return extension if extension else None
amine@100 59 return fmt.lower()
amine@100 60
amine@100 61
amine@101 62 def _normalize_use_channel(use_channel):
amine@101 63 """
amine@101 64 Returns a value of `use_channel` as expected by audio read/write fuctions.
amine@101 65 If `use_channel` is `None`, returns 0. If it's an integer, or the special
amine@101 66 str 'mix' returns it as is. If it's `left` or `right` returns 0 or 1
amine@101 67 respectively.
amine@101 68 """
amine@101 69 if use_channel is None:
amine@101 70 return 0
amine@101 71 if use_channel == "mix" or isinstance(use_channel, int):
amine@101 72 return use_channel
amine@101 73 try:
amine@101 74 return ["left", "right"].index(use_channel)
amine@101 75 except ValueError:
amine@101 76 err_message = "'use_channel' parameter must be an integer "
amine@101 77 "or one of ('left', 'right', 'mix'), found: '{}'".format(use_channel)
amine@101 78 raise AudioParameterError(err_message)
amine@101 79
amine@101 80
amine@102 81 def _get_audio_parameters(param_dict):
amine@102 82 """
amine@102 83 Gets audio parameters from a dictionary of parameters.
amine@102 84 A parameter can have a long name or a short name. If the long name is
amine@102 85 present, the short name is ignored. In neither is present then
amine@102 86 `AudioParameterError` is raised except for the `use_channel` (or `uc`)
amine@102 87 parameter for which a defalut value of 0 is returned.
amine@102 88
amine@102 89 Also raises `AudioParameterError` if sampling rate, sample width or
amine@102 90 channels is not an integer.
amine@102 91
amine@102 92 Expected parameters are:
amine@102 93
amine@102 94 `sampling_rate`, `sr`: int, sampling rate.
amine@102 95 `sample_width`, `sw`: int, sample size in bytes.
amine@102 96 `channels`, `ch`: int, number of channels.
amine@102 97 `use_channel`, `us`: int or str, which channel to use from data.
amine@102 98 Default value is 0 (first channel). The following special str
amine@102 99 values are also accepted:
amine@102 100 `left`: alias for 0
amine@102 101 `right`: alias for 1
amine@102 102 `mix`: indicates that all channels should be mixed up into one
amine@102 103 single channel
amine@102 104
amine@102 105 :Returns
amine@102 106
amine@102 107 param_dict: tuple
amine@102 108 audio parameters as a tuple (sampling_rate,
amine@102 109 sample_width,
amine@102 110 channels,
amine@102 111 use_channel)
amine@102 112 """
amine@102 113 err_message = ("'{ln}' (or '{sn}') must be an integer, found: '{val}'")
amine@102 114 parameters = []
amine@102 115 for (long_name, short_name) in (("sampling_rate", "sr"),
amine@102 116 ("sample_width", "sw"),
amine@102 117 ("channels", "ch")):
amine@102 118 param = param_dict.get(long_name, None) or param_dict.get(short_name, None)
amine@102 119 if param is None or not isinstance(param, int):
amine@102 120 raise AudioParameterError(err_message.format(ln=long_name,
amine@102 121 sn=short_name,
amine@102 122 val=param))
amine@102 123 parameters.append(param)
amine@102 124 use_channel = param_dict.get("use_channel", param_dict.get("uc", 0))
amine@102 125 return tuple(parameters) + (_normalize_use_channel(use_channel),)
amine@102 126
amine@102 127
amine@2 128 class AudioSource():
amine@2 129 """
amine@32 130 Base class for audio source objects.
amine@67 131
amine@2 132 Subclasses should implement methods to open/close and audio stream
amine@2 133 and read the desired amount of audio samples.
amine@67 134
amine@32 135 :Parameters:
amine@67 136
amine@32 137 `sampling_rate` : int
amine@32 138 Number of samples per second of audio stream. Default = 16000.
amine@67 139
amine@32 140 `sample_width` : int
amine@32 141 Size in bytes of one audio sample. Possible values : 1, 2, 4.
amine@32 142 Default = 2.
amine@67 143
amine@32 144 `channels` : int
amine@32 145 Number of channels of audio stream. The current version supports
amine@32 146 only mono audio streams (i.e. one channel).
amine@2 147 """
amine@67 148
amine@32 149 __metaclass__ = ABCMeta
amine@2 150
amine@67 151 def __init__(self, sampling_rate=DEFAULT_SAMPLE_RATE,
amine@67 152 sample_width=DEFAULT_SAMPLE_WIDTH,
amine@67 153 channels=DEFAULT_NB_CHANNELS):
amine@67 154
amine@90 155 if sample_width not in (1, 2, 4):
amine@90 156 raise AudioParameterError("Sample width must be one of: 1, 2 or 4 (bytes)")
amine@67 157
amine@2 158 if channels != 1:
amine@90 159 raise AudioParameterError("Only mono audio is currently supported")
amine@67 160
amine@70 161 self._sampling_rate = sampling_rate
amine@70 162 self._sample_width = sample_width
amine@70 163 self._channels = channels
amine@67 164
amine@2 165 @abstractmethod
amine@2 166 def is_open(self):
amine@2 167 """ Return True if audio source is open, False otherwise """
amine@67 168
amine@2 169 @abstractmethod
amine@2 170 def open(self):
amine@2 171 """ Open audio source """
amine@67 172
amine@2 173 @abstractmethod
amine@2 174 def close(self):
amine@2 175 """ Close audio source """
amine@67 176
amine@2 177 @abstractmethod
amine@2 178 def read(self, size):
amine@2 179 """
amine@2 180 Read and return `size` audio samples at most.
amine@67 181
amine@32 182 :Parameters:
amine@67 183
amine@32 184 `size` : int
amine@32 185 the number of samples to read.
amine@67 186
amine@32 187 :Returns:
amine@67 188
taf2@55 189 Audio data as a string of length 'N' * 'sample_width' * 'channels', where 'N' is:
amine@67 190
amine@32 191 - `size` if `size` < 'left_samples'
amine@67 192
amine@32 193 - 'left_samples' if `size` > 'left_samples'
amine@67 194 """
amine@67 195
amine@2 196 def get_sampling_rate(self):
amine@2 197 """ Return the number of samples per second of audio stream """
amine@2 198 return self.sampling_rate
amine@67 199
amine@70 200 @property
amine@70 201 def sampling_rate(self):
amine@70 202 """ Number of samples per second of audio stream """
amine@70 203 return self._sampling_rate
amine@70 204
amine@72 205 @property
amine@72 206 def sr(self):
amine@72 207 """ Number of samples per second of audio stream """
amine@72 208 return self._sampling_rate
amine@72 209
amine@2 210 def get_sample_width(self):
amine@2 211 """ Return the number of bytes used to represent one audio sample """
amine@2 212 return self.sample_width
amine@67 213
amine@70 214 @property
amine@70 215 def sample_width(self):
amine@70 216 """ Number of bytes used to represent one audio sample """
amine@70 217 return self._sample_width
amine@70 218
amine@72 219 @property
amine@72 220 def sw(self):
amine@72 221 """ Number of bytes used to represent one audio sample """
amine@72 222 return self._sample_width
amine@72 223
amine@2 224 def get_channels(self):
amine@2 225 """ Return the number of channels of this audio source """
amine@2 226 return self.channels
amine@2 227
amine@70 228 @property
amine@70 229 def channels(self):
amine@70 230 """ Number of channels of this audio source """
amine@70 231 return self._channels
amine@70 232
amine@72 233 @property
amine@72 234 def ch(self):
amine@72 235 """ Return the number of channels of this audio source """
amine@72 236 return self.channels
amine@72 237
amine@2 238
amine@2 239 class Rewindable():
amine@2 240 """
amine@2 241 Base class for rewindable audio streams.
amine@2 242 Subclasses should implement methods to return to the beginning of an
amine@2 243 audio stream as well as method to move to an absolute audio position
amine@2 244 expressed in time or in number of samples.
amine@32 245 """
amine@67 246
amine@32 247 __metaclass__ = ABCMeta
amine@67 248
amine@2 249 @abstractmethod
amine@2 250 def rewind(self):
amine@2 251 """ Go back to the beginning of audio stream """
amine@2 252 pass
amine@67 253
amine@2 254 @abstractmethod
amine@2 255 def get_position(self):
amine@2 256 """ Return the total number of already read samples """
amine@67 257
amine@2 258 @abstractmethod
amine@2 259 def get_time_position(self):
amine@2 260 """ Return the total duration in seconds of already read data """
amine@67 261
amine@2 262 @abstractmethod
amine@2 263 def set_position(self, position):
amine@2 264 """ Move to an absolute position
amine@67 265
amine@32 266 :Parameters:
amine@67 267
amine@32 268 `position` : int
amine@32 269 number of samples to skip from the start of the stream
amine@2 270 """
amine@67 271
amine@2 272 @abstractmethod
amine@2 273 def set_time_position(self, time_position):
amine@2 274 """ Move to an absolute position expressed in seconds
amine@67 275
amine@32 276 :Parameters:
amine@67 277
amine@32 278 `time_position` : float
amine@32 279 seconds to skip from the start of the stream
amine@2 280 """
amine@2 281 pass
amine@48 282
amine@2 283
amine@2 284 class BufferAudioSource(AudioSource, Rewindable):
amine@2 285 """
amine@32 286 An :class:`AudioSource` that encapsulates and reads data from a memory buffer.
amine@32 287 It implements methods from :class:`Rewindable` and is therefore a navigable :class:`AudioSource`.
amine@2 288 """
amine@67 289
amine@2 290 def __init__(self, data_buffer,
amine@67 291 sampling_rate=DEFAULT_SAMPLE_RATE,
amine@67 292 sample_width=DEFAULT_SAMPLE_WIDTH,
amine@67 293 channels=DEFAULT_NB_CHANNELS):
amine@94 294 AudioSource.__init__(self, sampling_rate, sample_width, channels)
amine@90 295 check_audio_data(data_buffer, sample_width, channels)
amine@2 296 self._buffer = data_buffer
amine@94 297 self._sample_size_all_channels = sample_width * channels
amine@94 298 self._current_position_bytes = 0
amine@2 299 self._is_open = False
amine@67 300
amine@2 301 def is_open(self):
amine@2 302 return self._is_open
amine@67 303
amine@2 304 def open(self):
amine@2 305 self._is_open = True
amine@67 306
amine@2 307 def close(self):
amine@2 308 self._is_open = False
amine@2 309 self.rewind()
amine@67 310
amine@10 311 def read(self, size):
amine@2 312 if not self._is_open:
amine@94 313 raise AudioIOError("Stream is not open")
amine@94 314 bytes_to_read = self._sample_size_all_channels * size
amine@94 315 data = self._buffer[self._current_position_bytes: self._current_position_bytes + bytes_to_read]
amine@94 316 if data:
amine@94 317 self._current_position_bytes += len(data)
amine@2 318 return data
amine@2 319 return None
amine@67 320
amine@2 321 def get_data_buffer(self):
amine@2 322 """ Return all audio data as one string buffer. """
amine@2 323 return self._buffer
amine@67 324
amine@2 325 def set_data(self, data_buffer):
amine@2 326 """ Set new data for this audio stream.
amine@67 327
amine@32 328 :Parameters:
amine@67 329
amine@32 330 `data_buffer` : str, basestring, Bytes
amine@32 331 a string buffer with a length multiple of (sample_width * channels)
amine@2 332 """
amine@90 333 check_audio_data(data_buffer, self.sample_width, self.channels)
amine@2 334 self._buffer = data_buffer
amine@94 335 self._current_position_bytes = 0
amine@67 336
amine@2 337 def append_data(self, data_buffer):
amine@2 338 """ Append data to this audio stream
amine@67 339
amine@32 340 :Parameters:
amine@67 341
amine@32 342 `data_buffer` : str, basestring, Bytes
amine@32 343 a buffer with a length multiple of (sample_width * channels)
amine@2 344 """
amine@90 345 check_audio_data(data_buffer, self.sample_width, self.channels)
amine@2 346 self._buffer += data_buffer
amine@2 347
amine@2 348 def rewind(self):
amine@2 349 self.set_position(0)
amine@67 350
amine@2 351 def get_position(self):
amine@94 352 return self._current_position_bytes / self._sample_size_all_channels
amine@67 353
amine@2 354 def get_time_position(self):
amine@94 355 return float(self._current_position_bytes) / (self._sample_size_all_channels * self.sampling_rate)
amine@67 356
amine@2 357 def set_position(self, position):
amine@2 358 if position < 0:
amine@2 359 raise ValueError("position must be >= 0")
amine@94 360 position *= self._sample_size_all_channels
amine@94 361 self._current_position_bytes = position if position < len(self._buffer) else len(self._buffer)
amine@2 362
amine@67 363 def set_time_position(self, time_position): # time in seconds
amine@2 364 position = int(self.sampling_rate * time_position)
amine@2 365 self.set_position(position)
amine@2 366
amine@48 367
amine@2 368 class WaveAudioSource(AudioSource):
amine@32 369 """
amine@32 370 A class for an `AudioSource` that reads data from a wave file.
amine@67 371
amine@32 372 :Parameters:
amine@67 373
amine@32 374 `filename` :
amine@32 375 path to a valid wave file
amine@32 376 """
amine@67 377
amine@2 378 def __init__(self, filename):
amine@67 379
amine@2 380 self._filename = filename
amine@2 381 self._audio_stream = None
amine@67 382
amine@2 383 stream = wave.open(self._filename)
amine@2 384 AudioSource.__init__(self, stream.getframerate(),
amine@67 385 stream.getsampwidth(),
amine@67 386 stream.getnchannels())
amine@2 387 stream.close()
amine@67 388
amine@2 389 def is_open(self):
amine@2 390 return self._audio_stream is not None
amine@67 391
amine@2 392 def open(self):
amine@2 393 if(self._audio_stream is None):
amine@2 394 self._audio_stream = wave.open(self._filename)
amine@67 395
amine@2 396 def close(self):
amine@2 397 if self._audio_stream is not None:
amine@2 398 self._audio_stream.close()
amine@2 399 self._audio_stream = None
amine@67 400
amine@2 401 def read(self, size):
amine@2 402 if self._audio_stream is None:
amine@2 403 raise IOError("Stream is not open")
amine@2 404 else:
amine@2 405 data = self._audio_stream.readframes(size)
amine@2 406 if data is None or len(data) < 1:
amine@2 407 return None
amine@2 408 return data
amine@2 409
amine@2 410
amine@2 411 class PyAudioSource(AudioSource):
amine@32 412 """
amine@32 413 A class for an `AudioSource` that reads data the built-in microphone using PyAudio.
amine@32 414 """
amine@67 415
amine@67 416 def __init__(self, sampling_rate=DEFAULT_SAMPLE_RATE,
amine@67 417 sample_width=DEFAULT_SAMPLE_WIDTH,
amine@67 418 channels=DEFAULT_NB_CHANNELS,
mathieu@79 419 frames_per_buffer=1024,
mathieu@79 420 input_device_index=None):
amine@67 421
amine@2 422 AudioSource.__init__(self, sampling_rate, sample_width, channels)
amine@2 423 self._chunk_size = frames_per_buffer
mathieu@79 424 self.input_device_index = input_device_index
amine@67 425
amine@2 426 import pyaudio
amine@2 427 self._pyaudio_object = pyaudio.PyAudio()
amine@67 428 self._pyaudio_format = self._pyaudio_object.get_format_from_width(self.sample_width)
amine@2 429 self._audio_stream = None
amine@2 430
amine@2 431 def is_open(self):
amine@2 432 return self._audio_stream is not None
amine@67 433
amine@2 434 def open(self):
amine@67 435 self._audio_stream = self._pyaudio_object.open(format=self._pyaudio_format,
amine@67 436 channels=self.channels,
amine@67 437 rate=self.sampling_rate,
amine@67 438 input=True,
amine@67 439 output=False,
mathieu@79 440 input_device_index=self.input_device_index,
amine@67 441 frames_per_buffer=self._chunk_size)
amine@67 442
amine@2 443 def close(self):
amine@2 444 if self._audio_stream is not None:
amine@2 445 self._audio_stream.stop_stream()
amine@2 446 self._audio_stream.close()
amine@2 447 self._audio_stream = None
amine@67 448
amine@2 449 def read(self, size):
amine@2 450 if self._audio_stream is None:
amine@2 451 raise IOError("Stream is not open")
amine@67 452
amine@2 453 if self._audio_stream.is_active():
amine@2 454 data = self._audio_stream.read(size)
amine@2 455 if data is None or len(data) < 1:
amine@2 456 return None
amine@2 457 return data
amine@67 458
amine@2 459 return None
amine@67 460
amine@2 461
amine@10 462 class StdinAudioSource(AudioSource):
amine@32 463 """
amine@32 464 A class for an :class:`AudioSource` that reads data from standard input.
amine@32 465 """
amine@67 466
amine@67 467 def __init__(self, sampling_rate=DEFAULT_SAMPLE_RATE,
amine@67 468 sample_width=DEFAULT_SAMPLE_WIDTH,
amine@67 469 channels=DEFAULT_NB_CHANNELS):
amine@67 470
amine@10 471 AudioSource.__init__(self, sampling_rate, sample_width, channels)
amine@10 472 self._is_open = False
amine@67 473
amine@10 474 def is_open(self):
amine@10 475 return self._is_open
amine@67 476
amine@10 477 def open(self):
amine@10 478 self._is_open = True
amine@67 479
amine@10 480 def close(self):
amine@10 481 self._is_open = False
amine@67 482
amine@10 483 def read(self, size):
amine@10 484 if not self._is_open:
amine@10 485 raise IOError("Stream is not open")
amine@67 486
amine@10 487 to_read = size * self.sample_width * self.channels
pete@74 488 if sys.version_info >= (3, 0):
pete@74 489 data = sys.stdin.buffer.read(to_read)
pete@74 490 else:
pete@74 491 data = sys.stdin.read(to_read)
amine@67 492
amine@10 493 if data is None or len(data) < 1:
amine@10 494 return None
amine@67 495
amine@10 496 return data
amine@67 497
amine@67 498
amine@2 499 class PyAudioPlayer():
amine@32 500 """
amine@32 501 A class for audio playback using Pyaudio
amine@32 502 """
amine@67 503
amine@67 504 def __init__(self, sampling_rate=DEFAULT_SAMPLE_RATE,
amine@67 505 sample_width=DEFAULT_SAMPLE_WIDTH,
amine@67 506 channels=DEFAULT_NB_CHANNELS):
amine@2 507 if not sample_width in (1, 2, 4):
amine@2 508 raise ValueError("Sample width must be one of: 1, 2 or 4 (bytes)")
amine@67 509
amine@2 510 self.sampling_rate = sampling_rate
amine@2 511 self.sample_width = sample_width
amine@2 512 self.channels = channels
amine@67 513
amine@2 514 import pyaudio
amine@2 515 self._p = pyaudio.PyAudio()
amine@67 516 self.stream = self._p.open(format=self._p.get_format_from_width(self.sample_width),
amine@67 517 channels=self.channels, rate=self.sampling_rate,
amine@67 518 input=False, output=True)
amine@67 519
amine@2 520 def play(self, data):
amine@2 521 if self.stream.is_stopped():
amine@2 522 self.stream.start_stream()
amine@67 523
amine@10 524 for chunk in self._chunk_data(data):
amine@10 525 self.stream.write(chunk)
amine@67 526
amine@2 527 self.stream.stop_stream()
amine@67 528
amine@67 529 def stop(self):
amine@2 530 if not self.stream.is_stopped():
amine@2 531 self.stream.stop_stream()
amine@2 532 self.stream.close()
amine@2 533 self._p.terminate()
amine@67 534
amine@10 535 def _chunk_data(self, data):
amine@10 536 # make audio chunks of 100 ms to allow interruption (like ctrl+c)
amine@10 537 chunk_size = int((self.sampling_rate * self.sample_width * self.channels) / 10)
amine@10 538 start = 0
amine@10 539 while start < len(data):
amine@67 540 yield data[start: start + chunk_size]
amine@10 541 start += chunk_size
amine@67 542
amine@2 543
amine@99 544 def _save_raw(filename, data):
amine@99 545 """
amine@99 546 Save audio data as a headerless (i.e. raw) file.
amine@99 547 """
amine@99 548 with open(filename, "wb") as fp:
amine@99 549 fp.write(data)
amine@99 550
amine@99 551
amine@98 552 def _save_wave(filename, data, sampling_rate, sample_width, channels):
amine@98 553 """
amine@98 554 Save audio data to a wave file.
amine@98 555 """
amine@98 556 # use standard python's wave module
amine@98 557 with wave.open(filename, "w") as fp:
amine@98 558 fp.setframerate(sampling_rate)
amine@98 559 fp.setsampwidth(sample_width)
amine@98 560 fp.setnchannels(channels)
amine@98 561 fp.writeframes(data)
amine@98 562
amine@98 563
amine@2 564 def from_file(filename):
amine@2 565 """
amine@2 566 Create an `AudioSource` object using the audio file specified by `filename`.
amine@48 567 The appropriate :class:`AudioSource` class is guessed from file's extension.
amine@67 568
amine@32 569 :Parameters:
amine@67 570
amine@32 571 `filename` :
amine@32 572 path to an audio file.
amine@67 573
amine@32 574 :Returns:
amine@67 575
amine@32 576 an `AudioSource` object that reads data from the given file.
amine@2 577 """
amine@67 578
amine@2 579 if filename.lower().endswith(".wav"):
amine@2 580 return WaveAudioSource(filename)
amine@67 581
amine@67 582 raise Exception("Can not create an AudioSource object from '%s'" % (filename))
amine@2 583
amine@2 584
amine@2 585 def player_for(audio_source):
amine@2 586 """
amine@32 587 Return a :class:`PyAudioPlayer` that can play data from `audio_source`.
amine@67 588
amine@32 589 :Parameters:
amine@67 590
amine@32 591 `audio_source` :
amine@32 592 an `AudioSource` object.
amine@67 593
amine@32 594 :Returns:
amine@67 595
amine@32 596 `PyAudioPlayer` that has the same sampling rate, sample width and number of channels
amine@32 597 as `audio_source`.
amine@2 598 """
amine@67 599
amine@2 600 return PyAudioPlayer(audio_source.get_sampling_rate(),
amine@67 601 audio_source.get_sample_width(),
amine@67 602 audio_source.get_channels())