auditok: auditok/io.py annotate

annotate auditok/io.py @ 170:684392cc5019

Add get_audio_source

author	Amine Sehili <amine.sehili@gmail.com>
date	Thu, 07 Mar 2019 21:28:23 +0100
parents	1fa7fa2857d4
children	00790b3d5aa2

rev	line source
amine@2	1 """
amine@33	2 Module for low-level audio input-output operations.
amine@2	3
amine@32	4 Class summary
amine@32	5 =============
amine@32	6
amine@32	7 .. autosummary::
amine@32	8
amine@32	9 AudioSource
amine@32	10 Rewindable
amine@32	11 BufferAudioSource
amine@32	12 WaveAudioSource
amine@32	13 PyAudioSource
amine@32	14 StdinAudioSource
amine@32	15 PyAudioPlayer
amine@32	16
amine@32	17
amine@32	18 Function summary
amine@32	19 ================
amine@32	20
amine@32	21 .. autosummary::
amine@32	22
amine@32	23 from_file
amine@32	24 player_for
amine@2	25 """
amine@103	26 import os
amine@103	27 import sys
amine@2	28 import wave
amine@168	29 import warnings
amine@117	30 import audioop
amine@116	31 from array import array
amine@153	32 from functools import partial
amine@116	33
amine@116	34 if sys.version_info >= (3, 0):
amine@116	35 PYTHON_3 = True
amine@116	36 else:
amine@116	37 PYTHON_3 = False
amine@2	38
amine@104	39 try:
amine@104	40 from pydub import AudioSegment
amine@112	41
amine@104	42 _WITH_PYDUB = True
amine@104	43 except ImportError:
amine@104	44 _WITH_PYDUB = False
amine@104	45
amine@112	46 __all__ = [
amine@112	47 "AudioIOError",
amine@112	48 "AudioParameterError",
amine@112	49 "AudioSource",
amine@112	50 "Rewindable",
amine@112	51 "BufferAudioSource",
amine@112	52 "WaveAudioSource",
amine@112	53 "PyAudioSource",
amine@112	54 "StdinAudioSource",
amine@112	55 "PyAudioPlayer",
amine@112	56 "from_file",
amine@112	57 "player_for",
amine@112	58 ]
amine@2	59
amine@2	60 DEFAULT_SAMPLE_RATE = 16000
amine@2	61 DEFAULT_SAMPLE_WIDTH = 2
amine@2	62 DEFAULT_NB_CHANNELS = 1
amine@112	63 DATA_FORMAT = {1: "b", 2: "h", 4: "i"}
amine@112	64
amine@2	65
amine@89	66 class AudioIOError(Exception):
amine@89	67 pass
amine@89	68
amine@89	69
amine@89	70 class AudioParameterError(AudioIOError):
amine@89	71 pass
amine@89	72
amine@2	73
amine@90	74 def check_audio_data(data, sample_width, channels):
amine@90	75 sample_size_bytes = int(sample_width * channels)
amine@90	76 nb_samples = len(data) // sample_size_bytes
amine@90	77 if nb_samples * sample_size_bytes != len(data):
amine@112	78 raise AudioParameterError(
amine@112	79 "The length of audio data must be an integer "
amine@112	80 "multiple of `sample_width * channels`"
amine@112	81 )
amine@90	82
amine@90	83
amine@100	84 def _guess_audio_format(fmt, filename):
amine@100	85 if fmt is None:
amine@100	86 extension = os.path.splitext(filename.lower())[1][1:]
amine@100	87 return extension if extension else None
amine@100	88 return fmt.lower()
amine@100	89
amine@100	90
amine@101	91 def _normalize_use_channel(use_channel):
amine@101	92 """
amine@101	93 Returns a value of `use_channel` as expected by audio read/write fuctions.
amine@101	94 If `use_channel` is `None`, returns 0. If it's an integer, or the special
amine@101	95 str 'mix' returns it as is. If it's `left` or `right` returns 0 or 1
amine@101	96 respectively.
amine@101	97 """
amine@101	98 if use_channel is None:
amine@101	99 return 0
amine@101	100 if use_channel == "mix" or isinstance(use_channel, int):
amine@101	101 return use_channel
amine@101	102 try:
amine@101	103 return ["left", "right"].index(use_channel)
amine@101	104 except ValueError:
amine@101	105 err_message = "'use_channel' parameter must be an integer "
amine@101	106 "or one of ('left', 'right', 'mix'), found: '{}'".format(use_channel)
amine@101	107 raise AudioParameterError(err_message)
amine@101	108
amine@101	109
amine@102	110 def _get_audio_parameters(param_dict):
amine@102	111 """
amine@102	112 Gets audio parameters from a dictionary of parameters.
amine@102	113 A parameter can have a long name or a short name. If the long name is
amine@102	114 present, the short name is ignored. In neither is present then
amine@102	115 `AudioParameterError` is raised except for the `use_channel` (or `uc`)
amine@102	116 parameter for which a defalut value of 0 is returned.
amine@102	117
amine@102	118 Also raises `AudioParameterError` if sampling rate, sample width or
amine@102	119 channels is not an integer.
amine@102	120
amine@102	121 Expected parameters are:
amine@102	122
amine@102	123 `sampling_rate`, `sr`: int, sampling rate.
amine@102	124 `sample_width`, `sw`: int, sample size in bytes.
amine@102	125 `channels`, `ch`: int, number of channels.
amine@102	126 `use_channel`, `us`: int or str, which channel to use from data.
amine@102	127 Default value is 0 (first channel). The following special str
amine@102	128 values are also accepted:
amine@102	129 `left`: alias for 0
amine@102	130 `right`: alias for 1
amine@102	131 `mix`: indicates that all channels should be mixed up into one
amine@102	132 single channel
amine@102	133
amine@102	134 :Returns
amine@102	135
amine@102	136 param_dict: tuple
amine@102	137 audio parameters as a tuple (sampling_rate,
amine@102	138 sample_width,
amine@102	139 channels,
amine@102	140 use_channel)
amine@102	141 """
amine@146	142 err_message = (
amine@146	143 "'{ln}' (or '{sn}') must be a positive integer, found: '{val}'"
amine@146	144 )
amine@102	145 parameters = []
amine@112	146 for (long_name, short_name) in (
amine@112	147 ("sampling_rate", "sr"),
amine@112	148 ("sample_width", "sw"),
amine@112	149 ("channels", "ch"),
amine@112	150 ):
amine@145	151 param = param_dict.get(long_name, param_dict.get(short_name))
amine@146	152 if param is None or not isinstance(param, int) or param <= 0:
amine@112	153 raise AudioParameterError(
amine@112	154 err_message.format(ln=long_name, sn=short_name, val=param)
amine@112	155 )
amine@102	156 parameters.append(param)
amine@147	157 sampling_rate, sample_width, channels = parameters
amine@102	158 use_channel = param_dict.get("use_channel", param_dict.get("uc", 0))
amine@147	159 use_channel = _normalize_use_channel(use_channel)
amine@147	160 return sampling_rate, sample_width, channels, use_channel
amine@102	161
amine@102	162
amine@116	163 def _array_to_bytes(a):
amine@116	164 """
amine@116	165 Converts an `array.array` to `bytes`.
amine@116	166 """
amine@116	167 if PYTHON_3:
amine@116	168 return a.tobytes()
amine@116	169 else:
amine@116	170 return a.tostring()
amine@116	171
amine@116	172
amine@117	173 def _mix_audio_channels(data, channels, sample_width):
amine@117	174 if channels == 1:
amine@117	175 return data
amine@117	176 if channels == 2:
amine@117	177 return audioop.tomono(data, sample_width, 0.5, 0.5)
amine@117	178 fmt = DATA_FORMAT[sample_width]
amine@117	179 buffer = array(fmt, data)
amine@117	180 mono_channels = [
amine@117	181 array(fmt, buffer[ch::channels]) for ch in range(channels)
amine@117	182 ]
amine@117	183 avg_arr = array(
amine@117	184 fmt, (sum(samples) // channels for samples in zip(*mono_channels))
amine@117	185 )
amine@117	186 return _array_to_bytes(avg_arr)
amine@117	187
amine@117	188
amine@116	189 def _extract_selected_channel(data, channels, sample_width, use_channel):
amine@116	190 if use_channel == "mix":
amine@116	191 return _mix_audio_channels(data, channels, sample_width)
amine@116	192 elif use_channel >= channels or use_channel < -channels:
amine@116	193 err_message = "use_channel == {} but audio data has only {} channel{}."
amine@116	194 err_message += " Selected channel must be 'mix' or an integer >= "
amine@116	195 err_message += "-channels and < channels"
amine@116	196 err_message = err_message.format(
amine@116	197 use_channel, channels, "s" if channels > 1 else ""
amine@116	198 )
amine@116	199 raise AudioParameterError(err_message)
amine@119	200 elif use_channel < 0:
amine@119	201 use_channel += channels
amine@116	202 fmt = DATA_FORMAT[sample_width]
amine@116	203 buffer = array(fmt, data)
amine@116	204 return _array_to_bytes(buffer[use_channel::channels])
amine@116	205
amine@116	206
amine@153	207 class AudioSource:
amine@2	208 """
amine@32	209 Base class for audio source objects.
amine@67	210
amine@2	211 Subclasses should implement methods to open/close and audio stream
amine@2	212 and read the desired amount of audio samples.
amine@67	213
amine@32	214 :Parameters:
amine@67	215
amine@32	216 `sampling_rate` : int
amine@32	217 Number of samples per second of audio stream. Default = 16000.
amine@67	218
amine@32	219 `sample_width` : int
amine@32	220 Size in bytes of one audio sample. Possible values : 1, 2, 4.
amine@32	221 Default = 2.
amine@67	222
amine@32	223 `channels` : int
amine@150	224 Number of channels of audio stream.
amine@2	225 """
amine@67	226
amine@153	227 def __init__(
amine@153	228 self,
amine@153	229 sampling_rate=DEFAULT_SAMPLE_RATE,
amine@153	230 sample_width=DEFAULT_SAMPLE_WIDTH,
amine@153	231 channels=DEFAULT_NB_CHANNELS,
amine@153	232 ):
amine@2	233
amine@150	234 if not sample_width in (1, 2, 4):
amine@153	235 raise AudioParameterError(
amine@153	236 "Sample width must be one of: 1, 2 or 4 (bytes)"
amine@153	237 )
amine@67	238
amine@70	239 self._sampling_rate = sampling_rate
amine@70	240 self._sample_width = sample_width
amine@70	241 self._channels = channels
amine@67	242
amine@2	243 def is_open(self):
amine@2	244 """ Return True if audio source is open, False otherwise """
amine@150	245 raise NotImplementedError
amine@67	246
amine@2	247 def open(self):
amine@2	248 """ Open audio source """
amine@150	249 raise NotImplementedError
amine@67	250
amine@2	251 def close(self):
amine@2	252 """ Close audio source """
amine@150	253 raise NotImplementedError
amine@67	254
amine@2	255 def read(self, size):
amine@2	256 """
amine@2	257 Read and return `size` audio samples at most.
amine@67	258
amine@32	259 :Parameters:
amine@67	260
amine@32	261 `size` : int
amine@32	262 the number of samples to read.
amine@67	263
amine@32	264 :Returns:
amine@67	265
amine@150	266 Audio data as a string of length `N * sample_width * channels`,
amine@150	267 where `N` is:
amine@67	268
amine@32	269 - `size` if `size` < 'left_samples'
amine@67	270
amine@32	271 - 'left_samples' if `size` > 'left_samples'
amine@67	272 """
amine@150	273 raise NotImplementedError
amine@67	274
amine@2	275 def get_sampling_rate(self):
amine@2	276 """ Return the number of samples per second of audio stream """
amine@2	277 return self.sampling_rate
amine@67	278
amine@70	279 @property
amine@70	280 def sampling_rate(self):
amine@70	281 """ Number of samples per second of audio stream """
amine@70	282 return self._sampling_rate
amine@70	283
amine@72	284 @property
amine@72	285 def sr(self):
amine@72	286 """ Number of samples per second of audio stream """
amine@72	287 return self._sampling_rate
amine@72	288
amine@2	289 def get_sample_width(self):
amine@2	290 """ Return the number of bytes used to represent one audio sample """
amine@2	291 return self.sample_width
amine@67	292
amine@70	293 @property
amine@70	294 def sample_width(self):
amine@70	295 """ Number of bytes used to represent one audio sample """
amine@70	296 return self._sample_width
amine@70	297
amine@72	298 @property
amine@72	299 def sw(self):
amine@72	300 """ Number of bytes used to represent one audio sample """
amine@72	301 return self._sample_width
amine@72	302
amine@2	303 def get_channels(self):
amine@2	304 """ Return the number of channels of this audio source """
amine@2	305 return self.channels
amine@2	306
amine@70	307 @property
amine@70	308 def channels(self):
amine@70	309 """ Number of channels of this audio source """
amine@70	310 return self._channels
amine@70	311
amine@72	312 @property
amine@72	313 def ch(self):
amine@72	314 """ Return the number of channels of this audio source """
amine@72	315 return self.channels
amine@72	316
amine@2	317
amine@167	318 class Rewindable(AudioSource):
amine@2	319 """
amine@2	320 Base class for rewindable audio streams.
amine@2	321 Subclasses should implement methods to return to the beginning of an
amine@2	322 audio stream as well as method to move to an absolute audio position
amine@2	323 expressed in time or in number of samples.
amine@32	324 """
amine@153	325
amine@151	326 @property
amine@151	327 def rewindable(self):
amine@151	328 return True
amine@67	329
amine@2	330 def rewind(self):
amine@2	331 """ Go back to the beginning of audio stream """
amine@151	332 raise NotImplementedError
amine@67	333
amine@167	334 @property
amine@167	335 def position(self):
amine@167	336 """Stream position in number of samples"""
amine@167	337 raise NotImplementedError
amine@167	338
amine@167	339 @position.setter
amine@167	340 def position(self, position):
amine@167	341 raise NotImplementedError
amine@167	342
amine@167	343 @property
amine@167	344 def position_s(self):
amine@167	345 """Stream position in seconds"""
amine@167	346 return self.position / self.sampling_rate
amine@167	347
amine@167	348 @position_s.setter
amine@167	349 def position_s(self, position_s):
amine@167	350 self.position = int(self.sampling_rate * position_s)
amine@167	351
amine@167	352 @property
amine@167	353 def position_ms(self):
amine@167	354 """Stream position in milliseconds"""
amine@167	355 return (self.position * 1000) // self.sampling_rate
amine@167	356
amine@167	357 @position_ms.setter
amine@167	358 def position_ms(self, position_ms):
amine@167	359 if not isinstance(position_ms, int):
amine@167	360 raise ValueError("position_ms should be an int")
amine@167	361 self.position = int(self.sampling_rate * position_ms / 1000)
amine@167	362
amine@2	363 def get_position(self):
amine@2	364 """ Return the total number of already read samples """
amine@168	365 warnings.warn(
amine@168	366 "'get_position' is deprecated, use 'position' property instead",
amine@168	367 DeprecationWarning
amine@168	368 )
amine@167	369 return self.position
amine@67	370
amine@2	371 def get_time_position(self):
amine@2	372 """ Return the total duration in seconds of already read data """
amine@168	373 warnings.warn(
amine@168	374 "'get_time_position' is deprecated, use 'position_s' or 'position_ms' properties instead",
amine@168	375 DeprecationWarning
amine@168	376 )
amine@167	377 return self.position_s
amine@67	378
amine@2	379 def set_position(self, position):
amine@2	380 """ Move to an absolute position
amine@67	381
amine@32	382 :Parameters:
amine@67	383
amine@32	384 `position` : int
amine@32	385 number of samples to skip from the start of the stream
amine@2	386 """
amine@168	387 warnings.warn(
amine@168	388 "'set_position' is deprecated, set 'position' property instead",
amine@168	389 DeprecationWarning
amine@168	390 )
amine@167	391 self.position = position
amine@67	392
amine@2	393 def set_time_position(self, time_position):
amine@2	394 """ Move to an absolute position expressed in seconds
amine@67	395
amine@32	396 :Parameters:
amine@67	397
amine@32	398 `time_position` : float
amine@32	399 seconds to skip from the start of the stream
amine@2	400 """
amine@168	401 warnings.warn(
amine@168	402 "'set_time_position' is deprecated, set 'position_s' or 'position_ms' properties instead",
amine@168	403 DeprecationWarning
amine@168	404 )
amine@167	405 self.position_s = time_position
amine@48	406
amine@2	407
amine@167	408 class BufferAudioSource(Rewindable):
amine@2	409 """
amine@32	410 An :class:`AudioSource` that encapsulates and reads data from a memory buffer.
amine@32	411 It implements methods from :class:`Rewindable` and is therefore a navigable :class:`AudioSource`.
amine@2	412 """
amine@67	413
amine@112	414 def __init__(
amine@112	415 self,
amine@112	416 data_buffer,
amine@112	417 sampling_rate=DEFAULT_SAMPLE_RATE,
amine@112	418 sample_width=DEFAULT_SAMPLE_WIDTH,
amine@112	419 channels=DEFAULT_NB_CHANNELS,
amine@112	420 ):
amine@94	421 AudioSource.__init__(self, sampling_rate, sample_width, channels)
amine@90	422 check_audio_data(data_buffer, sample_width, channels)
amine@2	423 self._buffer = data_buffer
amine@94	424 self._sample_size_all_channels = sample_width * channels
amine@94	425 self._current_position_bytes = 0
amine@2	426 self._is_open = False
amine@67	427
amine@2	428 def is_open(self):
amine@2	429 return self._is_open
amine@67	430
amine@2	431 def open(self):
amine@2	432 self._is_open = True
amine@67	433
amine@2	434 def close(self):
amine@2	435 self._is_open = False
amine@2	436 self.rewind()
amine@67	437
amine@10	438 def read(self, size):
amine@2	439 if not self._is_open:
amine@94	440 raise AudioIOError("Stream is not open")
amine@94	441 bytes_to_read = self._sample_size_all_channels * size
amine@112	442 data = self._buffer[
amine@112	443 self._current_position_bytes : self._current_position_bytes
amine@112	444 + bytes_to_read
amine@112	445 ]
amine@94	446 if data:
amine@94	447 self._current_position_bytes += len(data)
amine@2	448 return data
amine@2	449 return None
amine@67	450
amine@165	451 @property
amine@165	452 def data(self):
amine@165	453 return self._buffer
amine@165	454
amine@2	455 def get_data_buffer(self):
amine@2	456 """ Return all audio data as one string buffer. """
amine@2	457 return self._buffer
amine@67	458
amine@2	459 def set_data(self, data_buffer):
amine@2	460 """ Set new data for this audio stream.
amine@67	461
amine@32	462 :Parameters:
amine@67	463
amine@32	464 `data_buffer` : str, basestring, Bytes
amine@32	465 a string buffer with a length multiple of (sample_width * channels)
amine@2	466 """
amine@90	467 check_audio_data(data_buffer, self.sample_width, self.channels)
amine@2	468 self._buffer = data_buffer
amine@94	469 self._current_position_bytes = 0
amine@67	470
amine@2	471 def append_data(self, data_buffer):
amine@2	472 """ Append data to this audio stream
amine@67	473
amine@32	474 :Parameters:
amine@67	475
amine@32	476 `data_buffer` : str, basestring, Bytes
amine@32	477 a buffer with a length multiple of (sample_width * channels)
amine@2	478 """
amine@90	479 check_audio_data(data_buffer, self.sample_width, self.channels)
amine@2	480 self._buffer += data_buffer
amine@2	481
amine@2	482 def rewind(self):
amine@2	483 self.set_position(0)
amine@67	484
amine@165	485 @property
amine@165	486 def position(self):
amine@165	487 """Stream position in number of samples"""
amine@165	488 return self._current_position_bytes // self._sample_size_all_channels
amine@165	489
amine@165	490 @position.setter
amine@165	491 def position(self, position):
amine@165	492 position *= self._sample_size_all_channels
amine@165	493 if position < 0:
amine@165	494 position += len(self.data)
amine@165	495 if position < 0 or position > len(self.data):
amine@165	496 raise IndexError("Position out of range")
amine@165	497 self._current_position_bytes = position
amine@165	498
amine@165	499 @property
amine@165	500 def position_ms(self):
amine@165	501 """Stream position in milliseconds"""
amine@165	502 return (self._current_position_bytes * 1000) // (
amine@165	503 self._sample_size_all_channels * self.sampling_rate
amine@165	504 )
amine@165	505
amine@165	506 @position_ms.setter
amine@165	507 def position_ms(self, position_ms):
amine@165	508 if not isinstance(position_ms, int):
amine@165	509 raise ValueError("position_ms should be an int")
amine@165	510 self.position = int(self.sampling_rate * position_ms / 1000)
amine@165	511
amine@48	512
amine@153	513 class _FileAudioSource(AudioSource):
amine@153	514 def __init__(self, sampling_rate, sample_width, channels, use_channel):
amine@153	515 AudioSource.__init__(self, sampling_rate, sample_width, channels)
amine@153	516 self._audio_stream = None
amine@159	517 self._use_channel = _normalize_use_channel(use_channel)
amine@153	518 if channels > 1:
amine@153	519 self._extract_selected_channel = partial(
amine@153	520 _extract_selected_channel,
amine@153	521 channels=channels,
amine@153	522 sample_width=sample_width,
amine@159	523 use_channel=self._use_channel,
amine@153	524 )
amine@153	525 else:
amine@153	526 self._extract_selected_channel = lambda x: x
amine@153	527
amine@153	528 def __del__(self):
amine@153	529 if self.is_open():
amine@153	530 self.close()
amine@153	531
amine@159	532 @property
amine@159	533 def use_channel(self):
amine@159	534 return self._use_channel
amine@159	535
amine@153	536 def is_open(self):
amine@153	537 return self._audio_stream is not None
amine@153	538
amine@153	539 def close(self):
amine@153	540 if self._audio_stream is not None:
amine@153	541 self._audio_stream.close()
amine@153	542 self._audio_stream = None
amine@153	543
amine@153	544 def _read_from_stream(self, size):
amine@153	545 raise NotImplementedError
amine@153	546
amine@153	547 def read(self, size):
amine@153	548 if not self.is_open():
amine@153	549 raise AudioIOError("Audio stream is not open")
amine@153	550 data = self._read_from_stream(size)
amine@153	551 if data:
amine@153	552 return self._extract_selected_channel(data)
amine@153	553 return None
amine@153	554
amine@153	555
amine@154	556 class RawAudioSource(_FileAudioSource, Rewindable):
amine@154	557 def __init__(
amine@154	558 self, file, sampling_rate, sample_width, channels, use_channel=0
amine@154	559 ):
amine@154	560 _FileAudioSource.__init__(
amine@154	561 self, sampling_rate, sample_width, channels, use_channel
amine@154	562 )
amine@154	563 self._file = file
amine@154	564 self._audio_stream = None
amine@154	565 self._sample_size = sample_width * channels
amine@154	566
amine@154	567 def open(self):
amine@154	568 if self._audio_stream is None:
amine@158	569 self._audio_stream = open(self._file, "rb")
amine@154	570
amine@154	571 def _read_from_stream(self, size):
amine@154	572 bytes_to_read = size * self._sample_size
amine@154	573 data = self._audio_stream.read(bytes_to_read)
amine@154	574 return data
amine@154	575
amine@154	576
amine@155	577 class WaveAudioSource(_FileAudioSource, Rewindable):
amine@32	578 """
amine@32	579 A class for an `AudioSource` that reads data from a wave file.
amine@155	580 This class should be used for large wave files to avoid loading
amine@155	581 the whole data to memory.
amine@67	582
amine@32	583 :Parameters:
amine@67	584
amine@32	585 `filename` :
amine@155	586 path to a valid wave file.
amine@32	587 """
amine@67	588
amine@155	589 def __init__(self, filename, use_channel=0):
amine@2	590 self._filename = filename
amine@2	591 self._audio_stream = None
amine@158	592 stream = wave.open(self._filename, "rb")
amine@155	593 _FileAudioSource.__init__(
amine@112	594 self,
amine@112	595 stream.getframerate(),
amine@112	596 stream.getsampwidth(),
amine@112	597 stream.getnchannels(),
amine@155	598 use_channel,
amine@112	599 )
amine@2	600 stream.close()
amine@67	601
amine@2	602 def open(self):
amine@112	603 if self._audio_stream is None:
amine@2	604 self._audio_stream = wave.open(self._filename)
amine@67	605
amine@155	606 def _read_from_stream(self, size):
amine@155	607 return self._audio_stream.readframes(size)
amine@2	608
amine@2	609
amine@2	610 class PyAudioSource(AudioSource):
amine@32	611 """
amine@32	612 A class for an `AudioSource` that reads data the built-in microphone using PyAudio.
amine@32	613 """
amine@67	614
amine@112	615 def __init__(
amine@112	616 self,
amine@112	617 sampling_rate=DEFAULT_SAMPLE_RATE,
amine@112	618 sample_width=DEFAULT_SAMPLE_WIDTH,
amine@112	619 channels=DEFAULT_NB_CHANNELS,
amine@112	620 frames_per_buffer=1024,
amine@112	621 input_device_index=None,
amine@112	622 ):
amine@67	623
amine@2	624 AudioSource.__init__(self, sampling_rate, sample_width, channels)
amine@2	625 self._chunk_size = frames_per_buffer
mathieu@79	626 self.input_device_index = input_device_index
amine@67	627
amine@2	628 import pyaudio
amine@112	629
amine@2	630 self._pyaudio_object = pyaudio.PyAudio()
amine@112	631 self._pyaudio_format = self._pyaudio_object.get_format_from_width(
amine@112	632 self.sample_width
amine@112	633 )
amine@2	634 self._audio_stream = None
amine@2	635
amine@2	636 def is_open(self):
amine@2	637 return self._audio_stream is not None
amine@67	638
amine@2	639 def open(self):
amine@112	640 self._audio_stream = self._pyaudio_object.open(
amine@112	641 format=self._pyaudio_format,
amine@112	642 channels=self.channels,
amine@112	643 rate=self.sampling_rate,
amine@112	644 input=True,
amine@112	645 output=False,
amine@112	646 input_device_index=self.input_device_index,
amine@112	647 frames_per_buffer=self._chunk_size,
amine@112	648 )
amine@67	649
amine@2	650 def close(self):
amine@2	651 if self._audio_stream is not None:
amine@2	652 self._audio_stream.stop_stream()
amine@2	653 self._audio_stream.close()
amine@2	654 self._audio_stream = None
amine@67	655
amine@2	656 def read(self, size):
amine@2	657 if self._audio_stream is None:
amine@2	658 raise IOError("Stream is not open")
amine@67	659
amine@2	660 if self._audio_stream.is_active():
amine@2	661 data = self._audio_stream.read(size)
amine@2	662 if data is None or len(data) < 1:
amine@2	663 return None
amine@2	664 return data
amine@67	665
amine@2	666 return None
amine@67	667
amine@2	668
amine@156	669 class StdinAudioSource(_FileAudioSource):
amine@32	670 """
amine@32	671 A class for an :class:`AudioSource` that reads data from standard input.
amine@32	672 """
amine@67	673
amine@112	674 def __init__(
amine@112	675 self,
amine@112	676 sampling_rate=DEFAULT_SAMPLE_RATE,
amine@112	677 sample_width=DEFAULT_SAMPLE_WIDTH,
amine@112	678 channels=DEFAULT_NB_CHANNELS,
amine@156	679 use_channel=0,
amine@112	680 ):
amine@67	681
amine@156	682 _FileAudioSource.__init__(
amine@156	683 self, sampling_rate, sample_width, channels, use_channel
amine@156	684 )
amine@10	685 self._is_open = False
amine@156	686 self._sample_size = sample_width * channels
amine@156	687 if PYTHON_3:
amine@156	688 self._stream = sys.stdin.buffer
amine@156	689 else:
amine@156	690 self._stream = sys.stdin
amine@67	691
amine@10	692 def is_open(self):
amine@10	693 return self._is_open
amine@67	694
amine@10	695 def open(self):
amine@10	696 self._is_open = True
amine@67	697
amine@10	698 def close(self):
amine@10	699 self._is_open = False
amine@67	700
amine@156	701 def _read_from_stream(self, size):
amine@156	702 bytes_to_read = size * self._sample_size
amine@156	703 data = self._stream.read(bytes_to_read)
amine@156	704 if data:
amine@156	705 return data
amine@156	706 return None
amine@67	707
amine@67	708
amine@112	709 class PyAudioPlayer:
amine@32	710 """
amine@32	711 A class for audio playback using Pyaudio
amine@32	712 """
amine@67	713
amine@112	714 def __init__(
amine@112	715 self,
amine@112	716 sampling_rate=DEFAULT_SAMPLE_RATE,
amine@112	717 sample_width=DEFAULT_SAMPLE_WIDTH,
amine@112	718 channels=DEFAULT_NB_CHANNELS,
amine@112	719 ):
amine@2	720 if not sample_width in (1, 2, 4):
amine@2	721 raise ValueError("Sample width must be one of: 1, 2 or 4 (bytes)")
amine@67	722
amine@2	723 self.sampling_rate = sampling_rate
amine@2	724 self.sample_width = sample_width
amine@2	725 self.channels = channels
amine@67	726
amine@2	727 import pyaudio
amine@112	728
amine@2	729 self._p = pyaudio.PyAudio()
amine@112	730 self.stream = self._p.open(
amine@112	731 format=self._p.get_format_from_width(self.sample_width),
amine@112	732 channels=self.channels,
amine@112	733 rate=self.sampling_rate,
amine@112	734 input=False,
amine@112	735 output=True,
amine@112	736 )
amine@67	737
amine@2	738 def play(self, data):
amine@2	739 if self.stream.is_stopped():
amine@2	740 self.stream.start_stream()
amine@67	741
amine@10	742 for chunk in self._chunk_data(data):
amine@10	743 self.stream.write(chunk)
amine@67	744
amine@2	745 self.stream.stop_stream()
amine@67	746
amine@67	747 def stop(self):
amine@2	748 if not self.stream.is_stopped():
amine@2	749 self.stream.stop_stream()
amine@2	750 self.stream.close()
amine@2	751 self._p.terminate()
amine@67	752
amine@10	753 def _chunk_data(self, data):
amine@10	754 # make audio chunks of 100 ms to allow interruption (like ctrl+c)
amine@112	755 chunk_size = int(
amine@112	756 (self.sampling_rate * self.sample_width * self.channels) / 10
amine@112	757 )
amine@10	758 start = 0
amine@10	759 while start < len(data):
amine@112	760 yield data[start : start + chunk_size]
amine@10	761 start += chunk_size
amine@67	762
amine@2	763
amine@112	764 def player_for(audio_source):
amine@112	765 """
amine@112	766 Return a :class:`PyAudioPlayer` that can play data from `audio_source`.
amine@112	767
amine@112	768 :Parameters:
amine@112	769
amine@112	770 `audio_source` :
amine@112	771 an `AudioSource` object.
amine@112	772
amine@112	773 :Returns:
amine@112	774
amine@112	775 `PyAudioPlayer` that has the same sampling rate, sample width and number of channels
amine@112	776 as `audio_source`.
amine@112	777 """
amine@112	778
amine@112	779 return PyAudioPlayer(
amine@112	780 audio_source.get_sampling_rate(),
amine@112	781 audio_source.get_sample_width(),
amine@112	782 audio_source.get_channels(),
amine@112	783 )
amine@112	784
amine@170	785 def get_audio_source(input=None, **kwargs):
amine@170	786
amine@170	787 # read data from standard input
amine@170	788 if input == "-":
amine@170	789 return StdinAudioSource(**kwargs)
amine@170	790
amine@170	791 # create AudioSource from raw data
amine@170	792 if isinstance(input, bytes):
amine@170	793 return BufferAudioSource(input, **kwargs)
amine@170	794
amine@170	795 # read data from a file
amine@170	796 if input is not None:
amine@170	797 return from_file(filename=input,
amine@170	798 audio_format=kwargs.get('audio_format'),
amine@170	799 large_file=kwargs.get('large_file', False),
amine@170	800 **kwargs)
amine@170	801
amine@170	802 # read data from microphone via pyaudio
amine@170	803 else:
amine@170	804 return PyAudioSource(**kwargs)
amine@170	805
amine@112	806
amine@112	807 def _load_raw(
amine@112	808 file,
amine@112	809 sampling_rate,
amine@112	810 sample_width,
amine@112	811 channels,
amine@112	812 use_channel=0,
amine@112	813 large_file=False,
amine@112	814 ):
amine@112	815 """
amine@112	816 Load a raw audio file with standard Python.
amine@112	817 If `large_file` is True, audio data will be lazily
amine@112	818 loaded to memory.
amine@112	819
amine@112	820 See also :func:`from_file`.
amine@112	821
amine@112	822 :Parameters:
amine@112	823 `file` : filelike object or str
amine@112	824 raw audio file to open
amine@112	825 `sampling_rate`: int
amine@112	826 sampling rate of audio data
amine@112	827 `sample_width`: int
amine@112	828 sample width of audio data
amine@112	829 `channels`: int
amine@112	830 number of channels of audio data
amine@112	831 `use_channel`: int
amine@112	832 audio channel to read if file is not mono audio. This must be an integer
amine@112	833 0 >= and < channels, or one of 'left' (treated as 0 or first channel), or
amine@112	834 right (treated as 1 or second channels).
amine@112	835
amine@112	836 :Returns:
amine@112	837
amine@112	838 `PyAudioPlayer` that has the same sampling rate, sample width and number of channels
amine@112	839 as `audio_source`.
amine@112	840 """
amine@112	841 if None in (sampling_rate, sample_width, channels):
amine@112	842 raise AudioParameterError(
amine@112	843 "All audio parameters are required for raw audio files"
amine@112	844 )
amine@112	845
amine@112	846 if large_file:
amine@112	847 return RawAudioSource(
amine@112	848 file,
amine@112	849 sampling_rate=sampling_rate,
amine@112	850 sample_width=sample_width,
amine@112	851 channels=channels,
amine@112	852 use_channel=use_channel,
amine@112	853 )
amine@112	854 else:
amine@112	855 with open(file, "rb") as fp:
amine@112	856 data = fp.read()
amine@112	857 if channels != 1:
amine@112	858 # TODO check if striding with mmap doesn't load all data to memory
amine@112	859 data = _extract_selected_channel(
amine@112	860 data, channels, sample_width, use_channel
amine@112	861 )
amine@112	862 return BufferAudioSource(
amine@112	863 data,
amine@112	864 sampling_rate=sampling_rate,
amine@112	865 sample_width=sample_width,
amine@112	866 channels=1,
amine@112	867 )
amine@112	868
amine@112	869
amine@113	870 def _load_wave(filename, large_file=False, use_channel=0):
amine@113	871 """
amine@113	872 Load a wave audio file with standard Python.
amine@113	873 If `large_file` is True, audio data will be lazily
amine@113	874 loaded to memory.
amine@113	875
amine@113	876 See also :func:`to_file`.
amine@113	877 """
amine@113	878 if large_file:
amine@113	879 return WaveAudioSource(filename, use_channel)
amine@113	880 with wave.open(filename) as fp:
amine@113	881 channels = fp.getnchannels()
amine@113	882 srate = fp.getframerate()
amine@113	883 swidth = fp.getsampwidth()
amine@113	884 data = fp.readframes(-1)
amine@113	885 if channels > 1:
amine@113	886 data = _extract_selected_channel(data, channels, swidth, use_channel)
amine@113	887 return BufferAudioSource(
amine@113	888 data, sampling_rate=srate, sample_width=swidth, channels=1
amine@113	889 )
amine@113	890
amine@113	891
amine@114	892 def _load_with_pydub(filename, audio_format, use_channel=0):
amine@114	893 """Open compressed audio file using pydub. If a video file
amine@114	894 is passed, its audio track(s) are extracted and loaded.
amine@114	895 This function should not be called directely, use :func:`from_file`
amine@114	896 instead.
amine@114	897
amine@114	898 :Parameters:
amine@114	899
amine@114	900 `filename`:
amine@114	901 path to audio file.
amine@114	902 `audio_format`:
amine@114	903 string, audio file format (e.g. raw, webm, wav, ogg)
amine@114	904 """
amine@114	905 func_dict = {
amine@114	906 "mp3": AudioSegment.from_mp3,
amine@114	907 "ogg": AudioSegment.from_ogg,
amine@114	908 "flv": AudioSegment.from_flv,
amine@114	909 }
amine@114	910 open_function = func_dict.get(audio_format, AudioSegment.from_file)
amine@114	911 segment = open_function(filename)
amine@114	912 data = segment._data
amine@114	913 if segment.channels > 1:
amine@114	914 data = _extract_selected_channel(
amine@114	915 data, segment.channels, segment.sample_width, use_channel
amine@114	916 )
amine@114	917 return BufferAudioSource(
amine@114	918 data_buffer=data,
amine@114	919 sampling_rate=segment.frame_rate,
amine@114	920 sample_width=segment.sample_width,
amine@114	921 channels=1,
amine@114	922 )
amine@114	923
amine@114	924
amine@122	925 def from_file(filename, audio_format=None, large_file=False, **kwargs):
amine@2	926 """
amine@115	927 Read audio data from `filename` and return an `AudioSource` object.
amine@115	928 if `audio_format` is None, the appropriate :class:`AudioSource` class is
amine@115	929 guessed from file's extension. `filename` can be a compressed audio or
amine@115	930 video file. This will require installing pydub:
amine@115	931 (https://github.com/jiaaro/pydub).
amine@115	932
amine@115	933 The normal behavior is to load all audio data to memory from which a
amine@115	934 :class:`BufferAudioSource` object is created. This should be convenient
amine@115	935 most of the time unless audio file is very large. In that case, and
amine@115	936 in order to load audio data in lazy manner (i.e. read data from disk each
amine@115	937 time :func:`AudioSource.read` is called), `large_file` should be True.
amine@115	938
amine@115	939 Note that the current implementation supports only wave and raw formats for
amine@115	940 lazy audio loading.
amine@115	941
amine@115	942 See also :func:`to_file`.
amine@67	943
amine@32	944 :Parameters:
amine@67	945
amine@115	946 `filename`: str
amine@115	947 path to input audio or video file.
amine@115	948 `audio_format`: str
amine@115	949 audio format used to save data (e.g. raw, webm, wav, ogg)
amine@115	950 `large_file`: bool
amine@115	951 If True, audio won't fully be loaded to memory but only when a window
amine@115	952 is read disk.
amine@115	953
amine@115	954 :kwargs:
amine@115	955
amine@115	956 If an audio format other than `raw` is used, the following keyword
amine@115	957 arguments are required:
amine@115	958
amine@115	959 `sampling_rate`, `sr`: int
amine@115	960 sampling rate of audio data
amine@115	961 `sample_width`: int
amine@115	962 sample width (i.e. number of bytes used to represent one audio sample)
amine@115	963 `channels`: int
amine@115	964 number of channels of audio data
amine@122	965 `use_channel`: int, str
amine@122	966 audio channel to extract from input file if file is not mono audio.
amine@122	967 This must be an integer >= 0 and < channels, or one of the special
amine@122	968 values `left` and `right` (treated as 0 and 1 respectively).
amine@67	969
amine@32	970 :Returns:
amine@67	971
amine@115	972 An `AudioSource` object that reads data from input file.
amine@115	973
amine@115	974 :Raises:
amine@115	975
amine@115	976 An `AudioIOError` is raised if audio data cannot be read in the given
amine@115	977 format; or if format is `raw` and one or more audio parameters are missing.
amine@2	978 """
amine@115	979 audio_format = _guess_audio_format(audio_format, filename)
amine@67	980
amine@115	981 if audio_format == "raw":
amine@115	982 srate, swidth, channels, use_channel = _get_audio_parameters(kwargs)
amine@115	983 return _load_raw(
amine@115	984 filename, srate, swidth, channels, use_channel, large_file
amine@115	985 )
amine@67	986
amine@122	987 use_channel = _normalize_use_channel(kwargs.get("use_channel"))
amine@115	988 if audio_format in ["wav", "wave"]:
amine@115	989 return _load_wave(filename, large_file, use_channel)
amine@115	990 if large_file:
amine@115	991 raise AudioIOError("Large file format should be raw or wav")
amine@115	992 if _WITH_PYDUB:
amine@115	993 return _load_with_pydub(
amine@115	994 filename, audio_format=audio_format, use_channel=use_channel
amine@115	995 )
amine@115	996 else:
amine@115	997 raise AudioIOError(
amine@115	998 "pydub is required for audio formats other than raw or wav"
amine@115	999 )
amine@2	1000
amine@2	1001
amine@136	1002 def _save_raw(data, file):
amine@104	1003 """
amine@104	1004 Saves audio data as a headerless (i.e. raw) file.
amine@104	1005 See also :func:`to_file`.
amine@104	1006 """
amine@104	1007 with open(file, "wb") as fp:
amine@104	1008 fp.write(data)
amine@104	1009
amine@104	1010
amine@136	1011 def _save_wave(data, file, sampling_rate, sample_width, channels):
amine@104	1012 """
amine@104	1013 Saves audio data to a wave file.
amine@104	1014 See also :func:`to_file`.
amine@104	1015 """
amine@132	1016 if None in (sampling_rate, sample_width, channels):
amine@132	1017 raise AudioParameterError(
amine@132	1018 "All audio parameters are required to save wave audio files"
amine@132	1019 )
amine@104	1020 with wave.open(file, "w") as fp:
amine@104	1021 fp.setframerate(sampling_rate)
amine@104	1022 fp.setsampwidth(sample_width)
amine@104	1023 fp.setnchannels(channels)
amine@104	1024 fp.writeframes(data)
amine@104	1025
amine@104	1026
amine@112	1027 def _save_with_pydub(
amine@136	1028 data, file, audio_format, sampling_rate, sample_width, channels
amine@112	1029 ):
amine@104	1030 """
amine@104	1031 Saves audio data with pydub (https://github.com/jiaaro/pydub).
amine@104	1032 See also :func:`to_file`.
amine@104	1033 """
amine@112	1034 segment = AudioSegment(
amine@112	1035 data,
amine@112	1036 frame_rate=sampling_rate,
amine@112	1037 sample_width=sample_width,
amine@112	1038 channels=channels,
amine@112	1039 )
amine@104	1040 with open(file, "wb") as fp:
amine@104	1041 segment.export(fp, format=audio_format)
amine@104	1042
amine@104	1043
amine@103	1044 def to_file(data, file, audio_format=None, **kwargs):
amine@103	1045 """
amine@103	1046 Writes audio data to file. If `audio_format` is `None`, output
amine@103	1047 audio format will be guessed from extension. If `audio_format`
amine@103	1048 is `None` and `file` comes without an extension then audio
amine@103	1049 data will be written as a raw audio file.
amine@103	1050
amine@103	1051 :Parameters:
amine@103	1052
amine@103	1053 `data`: buffer of bytes
amine@103	1054 audio data to be written. Can be a `bytes`, `bytearray`,
amine@103	1055 `memoryview`, `array` or `numpy.ndarray` object.
amine@103	1056 `file`: str
amine@103	1057 path to output audio file
amine@103	1058 `audio_format`: str
amine@103	1059 audio format used to save data (e.g. raw, webm, wav, ogg)
amine@103	1060 :kwargs:
amine@103	1061 If an audio format other than raw is used, the following
amine@103	1062 keyword arguments are required:
amine@103	1063 `sampling_rate`, `sr`: int
amine@103	1064 sampling rate of audio data
amine@103	1065 `sample_width`, `sw`: int
amine@103	1066 sample width (i.e., number of bytes of one audio sample)
amine@103	1067 `channels`, `ch`: int
amine@103	1068 number of channels of audio data
amine@103	1069 :Raises:
amine@103	1070
amine@103	1071 `AudioParameterError` if output format is different than raw and one
amine@103	1072 or more audio parameters are missing.
amine@103	1073 `AudioIOError` if audio data cannot be written in the desired format.
amine@103	1074 """
amine@103	1075 audio_format = _guess_audio_format(audio_format, file)
amine@103	1076 if audio_format in (None, "raw"):
amine@136	1077 _save_raw(data, file)
amine@103	1078 return
amine@103	1079 try:
amine@103	1080 params = _get_audio_parameters(kwargs)
amine@103	1081 sampling_rate, sample_width, channels, _ = params
amine@103	1082 except AudioParameterError as exc:
amine@103	1083 err_message = "All audio parameters are required to save formats "
amine@103	1084 "other than raw. Error detail: {}".format(exc)
amine@103	1085 raise AudioParameterError(err_message)
amine@103	1086 if audio_format in ("wav", "wave"):
amine@136	1087 _save_wave(data, file, sampling_rate, sample_width, channels)
amine@105	1088 elif _WITH_PYDUB:
amine@112	1089 _save_with_pydub(
amine@140	1090 data, file, audio_format, sampling_rate, sample_width, channels
amine@112	1091 )
amine@103	1092 else:
amine@103	1093 err_message = "cannot write file format {} (file name: {})"
amine@112	1094 raise AudioIOError(err_message.format(audio_format, file))

Mercurial > hg > auditok

annotate auditok/io.py @ 170:684392cc5019