amine@33
|
1 """
|
amine@33
|
2 This module gathers processing (i.e. tokenization) classes.
|
amine@33
|
3
|
amine@33
|
4 Class summary
|
amine@33
|
5 =============
|
amine@33
|
6
|
amine@33
|
7 .. autosummary::
|
amine@33
|
8
|
amine@33
|
9 StreamTokenizer
|
amine@33
|
10 """
|
amine@187
|
11 import os
|
amine@222
|
12 import math
|
amine@179
|
13 from auditok.util import AudioDataSource, DataValidator, AudioEnergyValidator
|
amine@239
|
14 from auditok.io import check_audio_data, to_file, player_for, get_audio_source
|
amine@236
|
15 from auditok.exceptions import TooSamllBlockDuration
|
amine@2
|
16
|
amine@246
|
17 try:
|
amine@246
|
18 from . import signal_numpy as signal
|
amine@246
|
19 except ImportError:
|
amine@246
|
20 from . import signal
|
amine@246
|
21
|
amine@179
|
22 __all__ = ["split", "AudioRegion", "StreamTokenizer"]
|
amine@179
|
23
|
amine@179
|
24
|
amine@179
|
25 DEFAULT_ANALYSIS_WINDOW = 0.05
|
amine@179
|
26 DEFAULT_ENERGY_THRESHOLD = 50
|
amine@232
|
27 _EPSILON = 1e-6
|
amine@179
|
28
|
amine@179
|
29
|
amine@179
|
30 def split(
|
amine@179
|
31 input,
|
amine@179
|
32 min_dur=0.2,
|
amine@179
|
33 max_dur=5,
|
amine@179
|
34 max_silence=0.3,
|
amine@179
|
35 drop_trailing_silence=False,
|
amine@183
|
36 strict_min_dur=False,
|
amine@179
|
37 **kwargs
|
amine@179
|
38 ):
|
amine@179
|
39 """Splits audio data and returns a generator of `AudioRegion`s
|
amine@179
|
40 TODO: implement max_trailing_silence
|
amine@179
|
41
|
amine@179
|
42 :Parameters:
|
amine@179
|
43
|
amine@179
|
44 input: str, bytes, AudioSource, AudioRegion, AudioDataSource
|
amine@179
|
45 input audio data. If str, it should be a path to an existing audio
|
amine@179
|
46 file. If bytes, input is considered as raw audio data.
|
amine@179
|
47 min_dur: float
|
amine@179
|
48 minimun duration in seconds of a detected audio event. Default: 0.2.
|
amine@179
|
49 Using large values, very short audio events (e.g., very short 1-word
|
amine@179
|
50 utterances like 'yes' or 'no') can be missed.
|
amine@179
|
51 Using very short values might result in a high number of short,
|
amine@179
|
52 unuseful audio events.
|
amine@179
|
53 max_dur: float
|
amine@179
|
54 maximum duration in seconds of a detected audio event. Default: 5.
|
amine@179
|
55 max_silence: float
|
amine@179
|
56 maximum duration of consecutive silence within an audio event. There
|
amine@179
|
57 might be many silent gaps of this duration within an audio event.
|
amine@179
|
58 drop_trailing_silence: bool
|
amine@179
|
59 drop trailing silence from detected events
|
amine@183
|
60 strict_min_dur: bool
|
amine@183
|
61 strict minimum duration. Drop an event if it is shorter than ´min_dur´
|
amine@179
|
62 even if it is continguous to the latest valid event. This happens if
|
amine@183
|
63 the the latest event had reached ´max_dur´.
|
amine@210
|
64 analysis_window, aw: float
|
amine@179
|
65 duration of analysis window in seconds. Default: 0.05 second (50 ms).
|
amine@179
|
66 A value up to 0.1 second (100 ms) should be good for most use-cases.
|
amine@179
|
67 You might need a different value, especially if you use a custom
|
amine@179
|
68 validator.
|
amine@210
|
69 audio_format, fmt: str
|
amine@210
|
70 type of audio date (e.g., wav, ogg, raw, etc.). This will only be used
|
amine@210
|
71 if ´input´ is a string path to audio file. If not given, audio type
|
amine@210
|
72 will be guessed from file name extension or from file header.
|
amine@179
|
73 sampling_rate, sr: int
|
amine@179
|
74 sampling rate of audio data. Only needed for raw audio files/data.
|
amine@179
|
75 sample_width, sw: int
|
amine@179
|
76 number of bytes used to encode an audio sample, typically 1, 2 or 4.
|
amine@179
|
77 Only needed for raw audio files/data.
|
amine@179
|
78 channels, ch: int
|
amine@179
|
79 nuumber of channels of audio data. Only needed for raw audio files.
|
amine@179
|
80 use_channel, uc: int, str
|
amine@179
|
81 which channel to use if input has multichannel audio data. Can be an
|
amine@241
|
82 int (0 being the first channel), or one of the following values:
|
amine@241
|
83 - None, "any": a valid frame from one any given channel makes
|
amine@241
|
84 parallel frames from all other channels automatically valid.
|
amine@241
|
85 - 'mix': compute average channel (i.e. mix down all channels)
|
amine@210
|
86 max_read, mr: float
|
amine@185
|
87 maximum data to read in seconds. Default: `None`, read until there is
|
amine@185
|
88 no more data to read.
|
amine@210
|
89 validator, val: DataValidator
|
amine@179
|
90 custom data validator. If ´None´ (default), an `AudioEnergyValidor` is
|
amine@179
|
91 used with the given energy threshold.
|
amine@210
|
92 energy_threshold, eth: float
|
amine@179
|
93 energy threshlod for audio activity detection, default: 50. If a custom
|
amine@179
|
94 validator is given, this argumemt will be ignored.
|
amine@179
|
95 """
|
amine@225
|
96 if min_dur <= 0:
|
amine@225
|
97 raise ValueError("'min_dur' ({}) must be > 0".format(min_dur))
|
amine@225
|
98 if max_dur <= 0:
|
amine@225
|
99 raise ValueError("'max_dur' ({}) must be > 0".format(max_dur))
|
amine@225
|
100 if max_silence < 0:
|
amine@225
|
101 raise ValueError("'max_silence' ({}) must be >= 0".format(max_silence))
|
amine@219
|
102
|
amine@179
|
103 if isinstance(input, AudioDataSource):
|
amine@179
|
104 source = input
|
amine@207
|
105 analysis_window = source.block_dur
|
amine@179
|
106 else:
|
amine@207
|
107 analysis_window = kwargs.get(
|
amine@210
|
108 "analysis_window", kwargs.get("aw", DEFAULT_ANALYSIS_WINDOW)
|
amine@207
|
109 )
|
amine@237
|
110 if analysis_window <= 0:
|
amine@237
|
111 raise ValueError(
|
amine@237
|
112 "'analysis_window' ({}) must be > 0".format(analysis_window)
|
amine@237
|
113 )
|
amine@210
|
114
|
amine@179
|
115 params = kwargs.copy()
|
amine@210
|
116 params["max_read"] = params.get("max_read", params.get("mr"))
|
amine@212
|
117 params["audio_format"] = params.get("audio_format", params.get("fmt"))
|
amine@179
|
118 if isinstance(input, AudioRegion):
|
amine@179
|
119 params["sampling_rate"] = input.sr
|
amine@179
|
120 params["sample_width"] = input.sw
|
amine@179
|
121 params["channels"] = input.ch
|
amine@179
|
122 input = bytes(input)
|
amine@236
|
123 try:
|
amine@236
|
124 source = AudioDataSource(
|
amine@236
|
125 input, block_dur=analysis_window, **params
|
amine@236
|
126 )
|
amine@236
|
127 except TooSamllBlockDuration as exc:
|
amine@236
|
128 err_msg = "Too small 'analysis_windows' ({0}) for sampling rate "
|
amine@236
|
129 err_msg += "({1}). Analysis windows should at least be 1/{1} to "
|
amine@236
|
130 err_msg += "cover one single data sample"
|
amine@236
|
131 raise ValueError(err_msg.format(exc.block_dur, exc.sampling_rate))
|
amine@179
|
132
|
amine@210
|
133 validator = kwargs.get("validator", kwargs.get("val"))
|
amine@179
|
134 if validator is None:
|
amine@185
|
135 energy_threshold = kwargs.get(
|
amine@185
|
136 "energy_threshold", kwargs.get("eth", DEFAULT_ENERGY_THRESHOLD)
|
amine@185
|
137 )
|
amine@241
|
138 use_channel = kwargs.get("use_channel", kwargs.get("uc"))
|
amine@241
|
139 validator = AudioEnergyValidator(
|
amine@241
|
140 energy_threshold, source.sw, source.ch, use_channel=use_channel
|
amine@241
|
141 )
|
amine@185
|
142 mode = (
|
amine@185
|
143 StreamTokenizer.DROP_TRAILING_SILENCE if drop_trailing_silence else 0
|
amine@185
|
144 )
|
amine@183
|
145 if strict_min_dur:
|
amine@179
|
146 mode |= StreamTokenizer.STRICT_MIN_LENGTH
|
amine@222
|
147 min_length = _duration_to_nb_windows(min_dur, analysis_window, math.ceil)
|
amine@236
|
148 max_length = _duration_to_nb_windows(
|
amine@236
|
149 max_dur, analysis_window, math.floor, _EPSILON
|
amine@236
|
150 )
|
amine@185
|
151 max_continuous_silence = _duration_to_nb_windows(
|
amine@232
|
152 max_silence, analysis_window, math.floor, _EPSILON
|
amine@185
|
153 )
|
amine@179
|
154
|
amine@222
|
155 err_msg = "({0} sec.) results in {1} analysis window(s) "
|
amine@222
|
156 err_msg += "({1} == {6}({0} / {2})) which is {5} the number "
|
amine@222
|
157 err_msg += "of analysis window(s) for 'max_dur' ({3} == floor({4} / {2}))"
|
amine@219
|
158 if min_length > max_length:
|
amine@219
|
159 err_msg = "'min_dur' " + err_msg
|
amine@219
|
160 raise ValueError(
|
amine@219
|
161 err_msg.format(
|
amine@222
|
162 min_dur,
|
amine@222
|
163 min_length,
|
amine@222
|
164 analysis_window,
|
amine@222
|
165 max_length,
|
amine@222
|
166 max_dur,
|
amine@222
|
167 "higher than",
|
amine@222
|
168 "ceil",
|
amine@219
|
169 )
|
amine@219
|
170 )
|
amine@219
|
171
|
amine@219
|
172 if max_continuous_silence >= max_length:
|
amine@219
|
173 err_msg = "'max_silence' " + err_msg
|
amine@219
|
174 raise ValueError(
|
amine@219
|
175 err_msg.format(
|
amine@219
|
176 max_silence,
|
amine@219
|
177 max_continuous_silence,
|
amine@219
|
178 analysis_window,
|
amine@219
|
179 max_length,
|
amine@219
|
180 max_dur,
|
amine@222
|
181 "higher or equal to",
|
amine@222
|
182 "floor",
|
amine@219
|
183 )
|
amine@219
|
184 )
|
amine@219
|
185
|
amine@236
|
186 # print(min_length, max_length, max_continuous_silence)
|
amine@179
|
187 tokenizer = StreamTokenizer(
|
amine@179
|
188 validator, min_length, max_length, max_continuous_silence, mode=mode
|
amine@179
|
189 )
|
amine@179
|
190 source.open()
|
amine@179
|
191 token_gen = tokenizer.tokenize(source, generator=True)
|
amine@179
|
192 region_gen = (
|
amine@179
|
193 _make_audio_region(
|
amine@185
|
194 source.block_dur,
|
amine@185
|
195 token[1],
|
amine@185
|
196 token[0],
|
amine@185
|
197 source.sr,
|
amine@185
|
198 source.sw,
|
amine@185
|
199 source.ch,
|
amine@179
|
200 )
|
amine@179
|
201 for token in token_gen
|
amine@179
|
202 )
|
amine@179
|
203 return region_gen
|
amine@179
|
204
|
amine@179
|
205
|
amine@236
|
206 def _duration_to_nb_windows(
|
amine@236
|
207 duration, analysis_window, round_fn=round, epsilon=0
|
amine@236
|
208 ):
|
amine@179
|
209 """
|
amine@215
|
210 Converts a given duration into a positive integer of analysis windows.
|
amine@179
|
211 if `duration / analysis_window` is not an integer, the result will be
|
amine@179
|
212 rounded to the closest bigger integer. If `duration == 0`, returns `0`.
|
amine@215
|
213 If `duration < analysis_window`, returns 1.
|
amine@179
|
214 `duration` and `analysis_window` can be in seconds or milliseconds but
|
amine@179
|
215 must be in the same unit.
|
amine@179
|
216
|
amine@179
|
217 :Parameters:
|
amine@179
|
218
|
amine@179
|
219 duration: float
|
amine@232
|
220 a given duration in seconds or ms.
|
amine@179
|
221 analysis_window: float
|
amine@232
|
222 size of analysis window, in the same unit as `duration`.
|
amine@232
|
223 round_fn: callable
|
amine@232
|
224 function called to round the result. Default: `round`.
|
amine@232
|
225 epsilon: float
|
amine@232
|
226 small value to add to the division result before rounding.
|
amine@232
|
227 E.g., `0.3 / 0.1 = 2.9999999999999996`, when called with
|
amine@232
|
228 `round_fn=math.floor` returns `2` instead of `3`. Adding a small value
|
amine@232
|
229 to `0.3 / 0.1` avoids this error.
|
amine@179
|
230
|
amine@179
|
231 Returns:
|
amine@179
|
232 --------
|
amine@179
|
233 nb_windows: int
|
amine@179
|
234 minimum number of `analysis_window`'s to cover `durartion`. That means
|
amine@179
|
235 that `analysis_window * nb_windows >= duration`.
|
amine@179
|
236 """
|
amine@215
|
237 if duration < 0 or analysis_window <= 0:
|
amine@215
|
238 err_msg = "'duration' ({}) must be >= 0 and 'analysis_window' ({}) > 0"
|
amine@215
|
239 raise ValueError(err_msg.format(duration, analysis_window))
|
amine@179
|
240 if duration == 0:
|
amine@179
|
241 return 0
|
amine@232
|
242 return int(round_fn(duration / analysis_window + epsilon))
|
amine@179
|
243
|
amine@179
|
244
|
amine@179
|
245 def _make_audio_region(
|
amine@185
|
246 frame_duration,
|
amine@185
|
247 start_frame,
|
amine@185
|
248 data_frames,
|
amine@185
|
249 sampling_rate,
|
amine@185
|
250 sample_width,
|
amine@185
|
251 channels,
|
amine@179
|
252 ):
|
amine@179
|
253 """Create and return an `AudioRegion`.
|
amine@179
|
254
|
amine@179
|
255 :Parameters:
|
amine@179
|
256
|
amine@179
|
257 frame_duration: float
|
amine@179
|
258 duration of analysis window in seconds
|
amine@179
|
259 start_frame: int
|
amine@179
|
260 index of the fisrt analysis window
|
amine@179
|
261 samling_rate: int
|
amine@179
|
262 sampling rate of audio data
|
amine@179
|
263 sample_width: int
|
amine@179
|
264 number of bytes of one audio sample
|
amine@179
|
265 channels: int
|
amine@179
|
266 number of channels of audio data
|
amine@179
|
267
|
amine@179
|
268 Returns:
|
amine@179
|
269 audio_region: AudioRegion
|
amine@185
|
270 AudioRegion whose start time is calculeted as:
|
amine@185
|
271 `1000 * start_frame * frame_duration`
|
amine@179
|
272 """
|
amine@179
|
273 start = start_frame * frame_duration
|
amine@179
|
274 data = b"".join(data_frames)
|
amine@244
|
275 duration = len(data) / (sampling_rate * sample_width * channels)
|
amine@244
|
276 meta = {"start": start, "end": start + duration}
|
amine@244
|
277 return AudioRegion(data, sampling_rate, sample_width, channels, meta)
|
amine@81
|
278
|
amine@81
|
279
|
amine@228
|
280 def _check_convert_index(index, types, err_msg):
|
amine@228
|
281 if not isinstance(index, slice) or index.step is not None:
|
amine@228
|
282 raise TypeError(err_msg)
|
amine@228
|
283 start = index.start if index.start is not None else 0
|
amine@228
|
284 stop = index.stop
|
amine@228
|
285 for index in (start, stop):
|
amine@228
|
286 if index is not None and not isinstance(index, types):
|
amine@228
|
287 raise TypeError(err_msg)
|
amine@228
|
288 return start, stop
|
amine@228
|
289
|
amine@228
|
290
|
amine@228
|
291 class _SecondsView:
|
amine@228
|
292 def __init__(self, region):
|
amine@228
|
293 self._region = region
|
amine@228
|
294
|
amine@228
|
295 def __getitem__(self, index):
|
amine@228
|
296 err_msg = "Slicing AudioRegion by seconds requires indices of type "
|
amine@228
|
297 err_msg += "'int' or 'float' without a step (e.g. region.sec[7.5:10])"
|
amine@228
|
298 start_s, stop_s = _check_convert_index(index, (int, float), err_msg)
|
amine@228
|
299 sr = self._region.sampling_rate
|
amine@228
|
300 start_sample = int(start_s * sr)
|
amine@228
|
301 stop_sample = None if stop_s is None else round(stop_s * sr)
|
amine@228
|
302 return self._region[start_sample:stop_sample]
|
amine@228
|
303
|
amine@245
|
304 @property
|
amine@245
|
305 def len(self):
|
amine@245
|
306 """
|
amine@245
|
307 Return region duration in seconds.
|
amine@245
|
308 """
|
amine@245
|
309 return self._region.duration
|
amine@245
|
310
|
amine@228
|
311
|
amine@228
|
312 class _MillisView(_SecondsView):
|
amine@228
|
313 def __getitem__(self, index):
|
amine@228
|
314 err_msg = (
|
amine@228
|
315 "Slicing AudioRegion by milliseconds requires indices of type "
|
amine@228
|
316 )
|
amine@228
|
317 err_msg += "'int' without a step (e.g. region.sec[500:1500])"
|
amine@228
|
318 start_ms, stop_ms = _check_convert_index(index, (int), err_msg)
|
amine@228
|
319 start_sec = start_ms / 1000
|
amine@228
|
320 stop_sec = None if stop_ms is None else stop_ms / 1000
|
amine@228
|
321 index = slice(start_sec, stop_sec)
|
amine@228
|
322 return super(_MillisView, self).__getitem__(index)
|
amine@228
|
323
|
amine@245
|
324 def __len__(self):
|
amine@245
|
325 """
|
amine@245
|
326 Return region duration in milliseconds.
|
amine@245
|
327 """
|
amine@245
|
328 return round(self._region.duration * 1000)
|
amine@245
|
329
|
amine@245
|
330 @property
|
amine@245
|
331 def len(self):
|
amine@245
|
332 """
|
amine@245
|
333 Return region duration in milliseconds.
|
amine@245
|
334 """
|
amine@245
|
335 return len(self)
|
amine@245
|
336
|
amine@228
|
337
|
amine@244
|
338 class _AudioRegionMetadata(dict):
|
amine@244
|
339 def __getattr__(self, name):
|
amine@244
|
340 if name in self:
|
amine@244
|
341 return self[name]
|
amine@244
|
342 else:
|
amine@244
|
343 err_msg = "AudioRegion metadata has no entry '{}'"
|
amine@244
|
344 raise AttributeError(err_msg.format(name))
|
amine@244
|
345
|
amine@244
|
346 def __setattr__(self, name, value):
|
amine@244
|
347 self[name] = value
|
amine@244
|
348
|
amine@244
|
349 def __str__(self):
|
amine@244
|
350 return "\n".join("{}: {}".format(k, v) for k, v in self.items())
|
amine@244
|
351
|
amine@244
|
352 def __repr__(self):
|
amine@244
|
353 return str(self)
|
amine@244
|
354
|
amine@244
|
355
|
amine@81
|
356 class AudioRegion(object):
|
amine@244
|
357 def __init__(self, data, sampling_rate, sample_width, channels, meta=None):
|
amine@81
|
358 """
|
amine@81
|
359 A class for detected audio events.
|
amine@81
|
360
|
amine@81
|
361 :Parameters:
|
amine@81
|
362
|
amine@81
|
363 data: bytes
|
amine@81
|
364 audio data
|
amine@81
|
365 samling_rate: int
|
amine@81
|
366 sampling rate of audio data
|
amine@81
|
367 sample_width: int
|
amine@81
|
368 number of bytes of one audio sample
|
amine@81
|
369 channels: int
|
amine@81
|
370 number of channels of audio data
|
amine@81
|
371 """
|
amine@96
|
372 check_audio_data(data, sample_width, channels)
|
amine@81
|
373 self._data = data
|
amine@81
|
374 self._sampling_rate = sampling_rate
|
amine@81
|
375 self._sample_width = sample_width
|
amine@81
|
376 self._channels = channels
|
amine@246
|
377 self._samples = None
|
amine@81
|
378
|
amine@244
|
379 if meta is not None:
|
amine@244
|
380 self._meta = _AudioRegionMetadata(meta)
|
amine@244
|
381 else:
|
amine@244
|
382 self._meta = None
|
amine@244
|
383
|
amine@228
|
384 self._seconds_view = _SecondsView(self)
|
amine@228
|
385 self.s = self.sec
|
amine@228
|
386
|
amine@228
|
387 self._millis_view = _MillisView(self)
|
amine@228
|
388 self.ms = self.millis
|
amine@228
|
389
|
amine@244
|
390 @property
|
amine@244
|
391 def meta(self):
|
amine@244
|
392 return self._meta
|
amine@244
|
393
|
amine@244
|
394 @meta.setter
|
amine@244
|
395 def meta(self, new_meta):
|
amine@244
|
396 self._meta = _AudioRegionMetadata(new_meta)
|
amine@244
|
397
|
amine@239
|
398 @classmethod
|
amine@239
|
399 def load(cls, file, skip=0, max_read=None, **kwargs):
|
amine@239
|
400 audio_source = get_audio_source(file, **kwargs)
|
amine@239
|
401 audio_source.open()
|
amine@239
|
402 if skip is not None and skip > 0:
|
amine@239
|
403 skip_samples = int(skip * audio_source.sampling_rate)
|
amine@239
|
404 audio_source.read(skip_samples)
|
amine@239
|
405 if max_read is None or max_read < 0:
|
amine@239
|
406 max_read_samples = None
|
amine@239
|
407 else:
|
amine@239
|
408 max_read_samples = round(max_read * audio_source.sampling_rate)
|
amine@239
|
409 data = audio_source.read(max_read_samples)
|
amine@239
|
410 audio_source.close()
|
amine@239
|
411 return cls(
|
amine@239
|
412 data,
|
amine@239
|
413 audio_source.sampling_rate,
|
amine@239
|
414 audio_source.sample_width,
|
amine@239
|
415 audio_source.channels,
|
amine@239
|
416 )
|
amine@239
|
417
|
amine@228
|
418 @property
|
amine@228
|
419 def sec(self):
|
amine@228
|
420 return self._seconds_view
|
amine@228
|
421
|
amine@228
|
422 @property
|
amine@228
|
423 def millis(self):
|
amine@228
|
424 return self._millis_view
|
amine@228
|
425
|
amine@81
|
426 @property
|
amine@82
|
427 def duration(self):
|
amine@85
|
428 """
|
amine@85
|
429 Returns region duration in seconds.
|
amine@85
|
430 """
|
amine@178
|
431 return len(self._data) / (
|
amine@178
|
432 self.sampling_rate * self.sample_width * self.channels
|
amine@178
|
433 )
|
amine@82
|
434
|
amine@82
|
435 @property
|
amine@81
|
436 def sampling_rate(self):
|
amine@85
|
437 return self._sampling_rate
|
amine@81
|
438
|
amine@81
|
439 @property
|
amine@81
|
440 def sr(self):
|
amine@81
|
441 return self._sampling_rate
|
amine@81
|
442
|
amine@81
|
443 @property
|
amine@81
|
444 def sample_width(self):
|
amine@81
|
445 return self._sample_width
|
amine@81
|
446
|
amine@81
|
447 @property
|
amine@81
|
448 def sw(self):
|
amine@81
|
449 return self._sample_width
|
amine@81
|
450
|
amine@81
|
451 @property
|
amine@81
|
452 def channels(self):
|
amine@81
|
453 return self._channels
|
amine@81
|
454
|
amine@81
|
455 @property
|
amine@81
|
456 def ch(self):
|
amine@81
|
457 return self._channels
|
amine@2
|
458
|
amine@201
|
459 def play(self, player=None, progress_bar=False, **progress_bar_kwargs):
|
amine@201
|
460 """Play audio region
|
amine@201
|
461
|
amine@201
|
462 :Parameters:
|
amine@201
|
463
|
amine@201
|
464 player: AudioPalyer, default: None
|
amine@201
|
465 audio player to use. if None (default), use `player_for(self)`
|
amine@201
|
466 to get a new audio player.
|
amine@201
|
467
|
amine@201
|
468 progress_bar bool, default: False
|
amine@201
|
469 whether to use a progress bar while playing audio. Default: False.
|
amine@201
|
470
|
amine@201
|
471 progress_bar_kwargs: kwargs
|
amine@201
|
472 keyword arguments to pass to progress_bar object. Currently only
|
amine@201
|
473 `tqdm` is supported.
|
amine@201
|
474 """
|
amine@199
|
475 if player is None:
|
amine@199
|
476 player = player_for(self)
|
amine@201
|
477 player.play(
|
amine@201
|
478 self._data, progress_bar=progress_bar, **progress_bar_kwargs
|
amine@201
|
479 )
|
amine@199
|
480
|
amine@187
|
481 def save(self, file, format=None, exists_ok=True, **audio_parameters):
|
amine@187
|
482 """Save audio region to file.
|
amine@187
|
483
|
amine@187
|
484 :Parameters:
|
amine@187
|
485
|
amine@187
|
486 file: str, file-like object
|
amine@187
|
487 path to output file or a file-like object. If ´str´, it may contain
|
amine@244
|
488 and ´{duration}´ place holders as well as any place holder that
|
amine@244
|
489 this region's metadata might contain (e.g., ´{meta.start}´).
|
amine@187
|
490
|
amine@187
|
491
|
amine@187
|
492 format: str
|
amine@187
|
493 type of audio file. If None (default), file type is guessed from
|
amine@187
|
494 `file`'s extension. If `file` is not a ´str´ or does not have
|
amine@187
|
495 an extension, audio data is as a raw (headerless) audio file.
|
amine@187
|
496 exists_ok: bool, default: True
|
amine@187
|
497 If True, overwrite ´file´ if a file with the same name exists.
|
amine@187
|
498 If False, raise an ´IOError´ if the file exists.
|
amine@187
|
499 audio_parameters: dict
|
amine@187
|
500 any keyword arguments to be passed to audio saving backend
|
amine@187
|
501 (e.g. bitrate, etc.)
|
amine@187
|
502
|
amine@187
|
503 :Returns:
|
amine@187
|
504
|
amine@187
|
505 file: str, file-like object
|
amine@187
|
506 name of the file of file-like object to which audio data was
|
amine@187
|
507 written. If parameter ´file´ was a ´str´ with at least one {start},
|
amine@187
|
508 {end} or {duration} place holders.
|
amine@187
|
509
|
amine@187
|
510 :Raises:
|
amine@187
|
511
|
amine@187
|
512 IOError if ´file´ exists and ´exists_ok´ is False.
|
amine@244
|
513
|
amine@244
|
514 Example:
|
amine@244
|
515
|
amine@244
|
516 .. code:: python
|
amine@244
|
517 region = AudioRegion(b'\0' * 2 * 24000,
|
amine@244
|
518 sampling_rate=16000,
|
amine@244
|
519 sample_width=2,
|
amine@244
|
520 channels=1)
|
amine@244
|
521 region.meta = {"start": 2.25, "end": 2.25 + region.duration}
|
amine@244
|
522 region.save('audio_{meta.start}-{meta.end}.wav')
|
amine@244
|
523 audio_2.25-3.75.wav
|
amine@244
|
524 region.save('region_{meta.start:.3f}_{duration:.3f}.wav')
|
amine@244
|
525 audio_2.250_1.500.wav
|
amine@187
|
526 """
|
amine@187
|
527 if isinstance(file, str):
|
amine@244
|
528 file = file.format(duration=self.duration, meta=self.meta)
|
amine@187
|
529 if not exists_ok and os.path.exists(file):
|
amine@191
|
530 raise FileExistsError("file '{file}' exists".format(file=file))
|
amine@187
|
531 to_file(
|
amine@191
|
532 self._data,
|
amine@187
|
533 file,
|
amine@187
|
534 format,
|
amine@187
|
535 sr=self.sr,
|
amine@187
|
536 sw=self.sw,
|
amine@187
|
537 ch=self.ch,
|
amine@195
|
538 audio_parameters=audio_parameters,
|
amine@187
|
539 )
|
amine@187
|
540 return file
|
amine@187
|
541
|
amine@246
|
542 def __array__(self):
|
amine@246
|
543 return self.samples
|
amine@246
|
544
|
amine@246
|
545 @property
|
amine@246
|
546 def samples(self):
|
amine@246
|
547 if self._samples is None:
|
amine@246
|
548 fmt = signal.FORMAT[self.sample_width]
|
amine@246
|
549 if self.channels == 1:
|
amine@246
|
550 self._samples = signal.to_array(self._data, fmt)
|
amine@246
|
551 else:
|
amine@246
|
552 self._samples = signal.separate_channels(
|
amine@246
|
553 self._data, fmt, self.channels
|
amine@246
|
554 )
|
amine@246
|
555 return self._samples
|
amine@246
|
556
|
amine@82
|
557 def __len__(self):
|
amine@85
|
558 """
|
amine@245
|
559 Return region length in number of samples.
|
amine@85
|
560 """
|
amine@245
|
561 return len(self._data) // (self.sample_width * self.channels)
|
amine@245
|
562
|
amine@245
|
563 @property
|
amine@245
|
564 def len(self):
|
amine@245
|
565 """
|
amine@245
|
566 Return region length in number of samples.
|
amine@245
|
567 """
|
amine@245
|
568 return len(self)
|
amine@82
|
569
|
amine@83
|
570 def __bytes__(self):
|
amine@83
|
571 return self._data
|
amine@83
|
572
|
amine@244
|
573 def __str__(self):
|
amine@178
|
574 return (
|
amine@244
|
575 "AudioRegion(duration={:.3f}, "
|
amine@178
|
576 "sampling_rate={}, sample_width={}, channels={})".format(
|
amine@244
|
577 self.duration, self.sr, self.sw, self.ch
|
amine@178
|
578 )
|
amine@178
|
579 )
|
amine@83
|
580
|
amine@244
|
581 def __repr__(self):
|
amine@244
|
582 return str(self)
|
amine@83
|
583
|
amine@87
|
584 def __add__(self, other):
|
amine@87
|
585 """
|
amine@87
|
586 Concatenates this region and `other` and return a new region.
|
amine@87
|
587 Both regions must have the same sampling rate, sample width
|
amine@87
|
588 and number of channels. If not, raises a `ValueError`.
|
amine@87
|
589 """
|
amine@87
|
590 if not isinstance(other, AudioRegion):
|
amine@178
|
591 raise TypeError(
|
amine@185
|
592 "Can only concatenate AudioRegion, "
|
amine@185
|
593 'not "{}"'.format(type(other))
|
amine@178
|
594 )
|
amine@87
|
595 if other.sr != self.sr:
|
amine@178
|
596 raise ValueError(
|
amine@178
|
597 "Can only concatenate AudioRegions of the same "
|
amine@178
|
598 "sampling rate ({} != {})".format(self.sr, other.sr)
|
amine@178
|
599 )
|
amine@87
|
600 if other.sw != self.sw:
|
amine@178
|
601 raise ValueError(
|
amine@178
|
602 "Can only concatenate AudioRegions of the same "
|
amine@178
|
603 "sample width ({} != {})".format(self.sw, other.sw)
|
amine@178
|
604 )
|
amine@87
|
605 if other.ch != self.ch:
|
amine@178
|
606 raise ValueError(
|
amine@178
|
607 "Can only concatenate AudioRegions of the same "
|
amine@178
|
608 "number of channels ({} != {})".format(self.ch, other.ch)
|
amine@178
|
609 )
|
amine@87
|
610 data = self._data + other._data
|
amine@244
|
611 return AudioRegion(data, self.sr, self.sw, self.ch)
|
amine@87
|
612
|
amine@87
|
613 def __radd__(self, other):
|
amine@87
|
614 """
|
amine@87
|
615 Concatenates `other` and this region. `other` should be an
|
amine@87
|
616 `AudioRegion` with the same audio parameters as this region
|
amine@87
|
617 but can exceptionally be `0` to make it possible to concatenate
|
amine@87
|
618 many regions with `sum`.
|
amine@87
|
619 """
|
amine@87
|
620 if other == 0:
|
amine@87
|
621 return self
|
amine@87
|
622 return other.add(self)
|
amine@87
|
623
|
amine@195
|
624 def __mul__(self, n):
|
amine@195
|
625 if not isinstance(n, int):
|
amine@195
|
626 err_msg = "Can't multiply AudioRegion by a non-int of type '{}'"
|
amine@195
|
627 raise TypeError(err_msg.format(type(n)))
|
amine@195
|
628 data = self._data * n
|
amine@244
|
629 return AudioRegion(data, self.sr, self.sw, self.ch)
|
amine@195
|
630
|
amine@195
|
631 def __rmul__(self, n):
|
amine@195
|
632 return self * n
|
amine@195
|
633
|
amine@198
|
634 def __eq__(self, other):
|
amine@198
|
635 if other is self:
|
amine@198
|
636 return True
|
amine@198
|
637 if not isinstance(other, AudioRegion):
|
amine@198
|
638 return False
|
amine@198
|
639 return (
|
amine@198
|
640 (self._data == other._data)
|
amine@198
|
641 and (self.sr == other.sr)
|
amine@198
|
642 and (self.sw == other.sw)
|
amine@198
|
643 and (self.ch == other.ch)
|
amine@198
|
644 )
|
amine@198
|
645
|
amine@188
|
646 def __getitem__(self, index):
|
amine@230
|
647 err_msg = "Slicing AudioRegion by samples requires indices of type "
|
amine@230
|
648 err_msg += "'int' without a step (e.g. region.sec[1600:3200])"
|
amine@230
|
649 start_sample, stop_sample = _check_convert_index(index, (int), err_msg)
|
amine@188
|
650
|
amine@228
|
651 bytes_per_sample = self.sample_width * self.channels
|
amine@228
|
652 len_samples = len(self._data) // bytes_per_sample
|
amine@188
|
653
|
amine@230
|
654 if start_sample < 0:
|
amine@230
|
655 start_sample = max(start_sample + len_samples, 0)
|
amine@228
|
656 onset = start_sample * bytes_per_sample
|
amine@188
|
657
|
amine@230
|
658 if stop_sample is not None:
|
amine@230
|
659 if stop_sample < 0:
|
amine@230
|
660 stop_sample = max(stop_sample + len_samples, 0)
|
amine@230
|
661 offset = index.stop * bytes_per_sample
|
amine@230
|
662 else:
|
amine@228
|
663 offset = None
|
amine@228
|
664
|
amine@188
|
665 data = self._data[onset:offset]
|
amine@244
|
666 return AudioRegion(data, self.sr, self.sw, self.ch)
|
amine@188
|
667
|
amine@2
|
668
|
amine@178
|
669 class StreamTokenizer:
|
amine@32
|
670 """
|
amine@32
|
671 Class for stream tokenizers. It implements a 4-state automaton scheme
|
amine@32
|
672 to extract sub-sequences of interest on the fly.
|
amine@67
|
673
|
amine@32
|
674 :Parameters:
|
amine@67
|
675
|
amine@5
|
676 `validator` :
|
amine@5
|
677 instance of `DataValidator` that implements `is_valid` method.
|
amine@67
|
678
|
amine@5
|
679 `min_length` : *(int)*
|
amine@5
|
680 Minimum number of frames of a valid token. This includes all \
|
amine@5
|
681 tolerated non valid frames within the token.
|
amine@67
|
682
|
amine@5
|
683 `max_length` : *(int)*
|
amine@5
|
684 Maximum number of frames of a valid token. This includes all \
|
amine@5
|
685 tolerated non valid frames within the token.
|
amine@67
|
686
|
amine@5
|
687 `max_continuous_silence` : *(int)*
|
amine@5
|
688 Maximum number of consecutive non-valid frames within a token.
|
amine@5
|
689 Note that, within a valid token, there may be many tolerated \
|
amine@5
|
690 *silent* regions that contain each a number of non valid frames up to \
|
amine@5
|
691 `max_continuous_silence`
|
amine@67
|
692
|
amine@5
|
693 `init_min` : *(int, default=0)*
|
amine@5
|
694 Minimum number of consecutive valid frames that must be **initially** \
|
amine@5
|
695 gathered before any sequence of non valid frames can be tolerated. This
|
amine@5
|
696 option is not always needed, it can be used to drop non-valid tokens as
|
amine@5
|
697 early as possible. **Default = 0** means that the option is by default
|
amine@5
|
698 ineffective.
|
amine@67
|
699
|
amine@5
|
700 `init_max_silence` : *(int, default=0)*
|
amine@5
|
701 Maximum number of tolerated consecutive non-valid frames if the \
|
amine@5
|
702 number already gathered valid frames has not yet reached 'init_min'.
|
amine@5
|
703 This argument is normally used if `init_min` is used. **Default = 0**,
|
amine@5
|
704 by default this argument is not taken into consideration.
|
amine@67
|
705
|
amine@5
|
706 `mode` : *(int, default=0)*
|
amine@5
|
707 `mode` can be:
|
amine@67
|
708
|
amine@35
|
709 1. `StreamTokenizer.STRICT_MIN_LENGTH`:
|
amine@32
|
710 if token *i* is delivered because `max_length`
|
amine@32
|
711 is reached, and token *i+1* is immediately adjacent to
|
amine@32
|
712 token *i* (i.e. token *i* ends at frame *k* and token *i+1* starts
|
amine@32
|
713 at frame *k+1*) then accept token *i+1* only of it has a size of at
|
amine@32
|
714 least `min_length`. The default behavior is to accept token *i+1*
|
amine@32
|
715 event if it is shorter than `min_length` (given that the above conditions
|
amine@32
|
716 are fulfilled of course).
|
amine@67
|
717
|
amine@32
|
718 :Examples:
|
amine@67
|
719
|
amine@32
|
720 In the following code, without `STRICT_MIN_LENGTH`, the 'BB' token is
|
amine@32
|
721 accepted although it is shorter than `min_length` (3), because it immediately
|
amine@32
|
722 follows the latest delivered token:
|
amine@67
|
723
|
amine@32
|
724 .. code:: python
|
amine@67
|
725
|
amine@32
|
726 from auditok import StreamTokenizer, StringDataSource, DataValidator
|
amine@67
|
727
|
amine@32
|
728 class UpperCaseChecker(DataValidator):
|
amine@32
|
729 def is_valid(self, frame):
|
amine@32
|
730 return frame.isupper()
|
amine@67
|
731
|
amine@67
|
732
|
amine@32
|
733 dsource = StringDataSource("aaaAAAABBbbb")
|
amine@32
|
734 tokenizer = StreamTokenizer(validator=UpperCaseChecker(),
|
amine@32
|
735 min_length=3,
|
amine@32
|
736 max_length=4,
|
amine@32
|
737 max_continuous_silence=0)
|
amine@67
|
738
|
amine@32
|
739 tokenizer.tokenize(dsource)
|
amine@67
|
740
|
amine@32
|
741 :output:
|
amine@67
|
742
|
amine@32
|
743 .. code:: python
|
amine@67
|
744
|
amine@32
|
745 [(['A', 'A', 'A', 'A'], 3, 6), (['B', 'B'], 7, 8)]
|
amine@32
|
746
|
amine@32
|
747
|
amine@32
|
748 The following tokenizer will however reject the 'BB' token:
|
amine@67
|
749
|
amine@32
|
750 .. code:: python
|
amine@67
|
751
|
amine@32
|
752 dsource = StringDataSource("aaaAAAABBbbb")
|
amine@32
|
753 tokenizer = StreamTokenizer(validator=UpperCaseChecker(),
|
amine@32
|
754 min_length=3, max_length=4,
|
amine@32
|
755 max_continuous_silence=0,
|
amine@32
|
756 mode=StreamTokenizer.STRICT_MIN_LENGTH)
|
amine@32
|
757 tokenizer.tokenize(dsource)
|
amine@67
|
758
|
amine@32
|
759 :output:
|
amine@67
|
760
|
amine@32
|
761 .. code:: python
|
amine@67
|
762
|
amine@32
|
763 [(['A', 'A', 'A', 'A'], 3, 6)]
|
amine@67
|
764
|
amine@67
|
765
|
amine@35
|
766 2. `StreamTokenizer.DROP_TRAILING_SILENCE`: drop all tailing non-valid frames
|
amine@32
|
767 from a token to be delivered if and only if it is not **truncated**.
|
amine@32
|
768 This can be a bit tricky. A token is actually delivered if:
|
amine@67
|
769
|
amine@32
|
770 - a. `max_continuous_silence` is reached
|
amine@67
|
771
|
amine@32
|
772 :or:
|
amine@67
|
773
|
amine@32
|
774 - b. Its length reaches `max_length`. This is called a **truncated** token
|
amine@67
|
775
|
amine@32
|
776 In the current implementation, a `StreamTokenizer`'s decision is only based on already seen
|
amine@32
|
777 data and on incoming data. Thus, if a token is truncated at a non-valid but tolerated
|
amine@32
|
778 frame (`max_length` is reached but `max_continuous_silence` not yet) any tailing
|
amine@32
|
779 silence will be kept because it can potentially be part of valid token (if `max_length`
|
amine@32
|
780 was bigger). But if `max_continuous_silence` is reached before `max_length`, the delivered
|
amine@32
|
781 token will not be considered as truncated but a result of *normal* end of detection
|
amine@32
|
782 (i.e. no more valid data). In that case the tailing silence can be removed if you use
|
amine@32
|
783 the `StreamTokenizer.DROP_TRAILING_SILENCE` mode.
|
amine@67
|
784
|
amine@32
|
785 :Example:
|
amine@67
|
786
|
amine@32
|
787 .. code:: python
|
amine@67
|
788
|
amine@32
|
789 tokenizer = StreamTokenizer(validator=UpperCaseChecker(), min_length=3,
|
amine@32
|
790 max_length=6, max_continuous_silence=3,
|
amine@32
|
791 mode=StreamTokenizer.DROP_TRAILING_SILENCE)
|
amine@67
|
792
|
amine@32
|
793 dsource = StringDataSource("aaaAAAaaaBBbbbb")
|
amine@32
|
794 tokenizer.tokenize(dsource)
|
amine@67
|
795
|
amine@32
|
796 :output:
|
amine@67
|
797
|
amine@32
|
798 .. code:: python
|
amine@67
|
799
|
amine@32
|
800 [(['A', 'A', 'A', 'a', 'a', 'a'], 3, 8), (['B', 'B'], 9, 10)]
|
amine@67
|
801
|
amine@32
|
802 The first token is delivered with its tailing silence because it is truncated
|
amine@32
|
803 while the second one has its tailing frames removed.
|
amine@67
|
804
|
amine@32
|
805 Without `StreamTokenizer.DROP_TRAILING_SILENCE` the output would be:
|
amine@67
|
806
|
amine@32
|
807 .. code:: python
|
amine@67
|
808
|
amine@32
|
809 [(['A', 'A', 'A', 'a', 'a', 'a'], 3, 8), (['B', 'B', 'b', 'b', 'b'], 9, 13)]
|
amine@67
|
810
|
amine@67
|
811
|
amine@32
|
812 3. `StreamTokenizer.STRICT_MIN_LENGTH | StreamTokenizer.DROP_TRAILING_SILENCE`:
|
amine@32
|
813 use both options. That means: first remove tailing silence, then ckeck if the
|
amine@32
|
814 token still has at least a length of `min_length`.
|
amine@32
|
815 """
|
amine@67
|
816
|
amine@32
|
817 SILENCE = 0
|
amine@32
|
818 POSSIBLE_SILENCE = 1
|
amine@67
|
819 POSSIBLE_NOISE = 2
|
amine@32
|
820 NOISE = 3
|
amine@67
|
821
|
amine@32
|
822 STRICT_MIN_LENGTH = 2
|
amine@32
|
823 DROP_TRAILING_SILENCE = 4
|
amine@32
|
824 # alias
|
amine@32
|
825 DROP_TAILING_SILENCE = 4
|
amine@67
|
826
|
amine@178
|
827 def __init__(
|
amine@178
|
828 self,
|
amine@178
|
829 validator,
|
amine@178
|
830 min_length,
|
amine@178
|
831 max_length,
|
amine@178
|
832 max_continuous_silence,
|
amine@178
|
833 init_min=0,
|
amine@178
|
834 init_max_silence=0,
|
amine@178
|
835 mode=0,
|
amine@178
|
836 ):
|
amine@67
|
837
|
amine@2
|
838 if not isinstance(validator, DataValidator):
|
amine@185
|
839 raise TypeError(
|
amine@185
|
840 "'validator' must be an instance of 'DataValidator'"
|
amine@185
|
841 )
|
amine@67
|
842
|
amine@2
|
843 if max_length <= 0:
|
amine@185
|
844 raise ValueError(
|
amine@185
|
845 "'max_length' must be > 0 (value={0})".format(max_length)
|
amine@185
|
846 )
|
amine@67
|
847
|
amine@2
|
848 if min_length <= 0 or min_length > max_length:
|
amine@178
|
849 raise ValueError(
|
amine@178
|
850 "'min_length' must be > 0 and <= 'max_length' (value={0})".format(
|
amine@178
|
851 min_length
|
amine@178
|
852 )
|
amine@178
|
853 )
|
amine@67
|
854
|
amine@2
|
855 if max_continuous_silence >= max_length:
|
amine@178
|
856 raise ValueError(
|
amine@178
|
857 "'max_continuous_silence' must be < 'max_length' (value={0})".format(
|
amine@178
|
858 max_continuous_silence
|
amine@178
|
859 )
|
amine@178
|
860 )
|
amine@67
|
861
|
amine@5
|
862 if init_min >= max_length:
|
amine@178
|
863 raise ValueError(
|
amine@178
|
864 "'init_min' must be < 'max_length' (value={0})".format(
|
amine@178
|
865 max_continuous_silence
|
amine@178
|
866 )
|
amine@178
|
867 )
|
amine@67
|
868
|
amine@2
|
869 self.validator = validator
|
amine@2
|
870 self.min_length = min_length
|
amine@2
|
871 self.max_length = max_length
|
amine@2
|
872 self.max_continuous_silence = max_continuous_silence
|
amine@2
|
873 self.init_min = init_min
|
amine@2
|
874 self.init_max_silent = init_max_silence
|
amine@67
|
875
|
amine@2
|
876 self._mode = None
|
amine@2
|
877 self.set_mode(mode)
|
amine@2
|
878 self._strict_min_length = (mode & self.STRICT_MIN_LENGTH) != 0
|
amine@67
|
879 self._drop_tailing_silence = (mode & self.DROP_TRAILING_SILENCE) != 0
|
amine@67
|
880
|
amine@2
|
881 self._deliver = None
|
amine@2
|
882 self._tokens = None
|
amine@2
|
883 self._state = None
|
amine@2
|
884 self._data = None
|
amine@2
|
885 self._contiguous_token = False
|
amine@67
|
886
|
amine@2
|
887 self._init_count = 0
|
amine@2
|
888 self._silence_length = 0
|
amine@2
|
889 self._start_frame = 0
|
amine@2
|
890 self._current_frame = 0
|
amine@67
|
891
|
amine@2
|
892 def set_mode(self, mode):
|
amine@177
|
893 # TODO: use properties and make these deprecated
|
amine@2
|
894 """
|
amine@32
|
895 :Parameters:
|
amine@67
|
896
|
amine@32
|
897 `mode` : *(int)*
|
amine@32
|
898 New mode, must be one of:
|
amine@67
|
899
|
amine@67
|
900
|
amine@33
|
901 - `StreamTokenizer.STRICT_MIN_LENGTH`
|
amine@67
|
902
|
amine@33
|
903 - `StreamTokenizer.DROP_TRAILING_SILENCE`
|
amine@67
|
904
|
amine@33
|
905 - `StreamTokenizer.STRICT_MIN_LENGTH | StreamTokenizer.DROP_TRAILING_SILENCE`
|
amine@67
|
906
|
amine@177
|
907 - `0` TODO: this mode should have a name
|
amine@67
|
908
|
amine@3
|
909 See `StreamTokenizer.__init__` for more information about the mode.
|
amine@2
|
910 """
|
amine@67
|
911
|
amine@178
|
912 if not mode in [
|
amine@178
|
913 self.STRICT_MIN_LENGTH,
|
amine@178
|
914 self.DROP_TRAILING_SILENCE,
|
amine@178
|
915 self.STRICT_MIN_LENGTH | self.DROP_TRAILING_SILENCE,
|
amine@178
|
916 0,
|
amine@178
|
917 ]:
|
amine@67
|
918
|
amine@2
|
919 raise ValueError("Wrong value for mode")
|
amine@67
|
920
|
amine@2
|
921 self._mode = mode
|
amine@2
|
922 self._strict_min_length = (mode & self.STRICT_MIN_LENGTH) != 0
|
amine@67
|
923 self._drop_tailing_silence = (mode & self.DROP_TRAILING_SILENCE) != 0
|
amine@67
|
924
|
amine@2
|
925 def get_mode(self):
|
amine@2
|
926 """
|
amine@2
|
927 Return the current mode. To check whether a specific mode is activated use
|
amine@2
|
928 the bitwise 'and' operator `&`. Example:
|
amine@67
|
929
|
amine@32
|
930 .. code:: python
|
amine@67
|
931
|
amine@2
|
932 if mode & self.STRICT_MIN_LENGTH != 0:
|
amine@32
|
933 do_something()
|
amine@2
|
934 """
|
amine@2
|
935 return self._mode
|
amine@67
|
936
|
amine@2
|
937 def _reinitialize(self):
|
amine@2
|
938 self._contiguous_token = False
|
amine@2
|
939 self._data = []
|
amine@2
|
940 self._tokens = []
|
amine@2
|
941 self._state = self.SILENCE
|
amine@2
|
942 self._current_frame = -1
|
amine@2
|
943 self._deliver = self._append_token
|
amine@67
|
944
|
amine@177
|
945 def tokenize(self, data_source, callback=None, generator=False):
|
amine@2
|
946 """
|
amine@2
|
947 Read data from `data_source`, one frame a time, and process the read frames in
|
amine@2
|
948 order to detect sequences of frames that make up valid tokens.
|
amine@67
|
949
|
amine@32
|
950 :Parameters:
|
amine@47
|
951 `data_source` : instance of the :class:`DataSource` class that implements a `read` method.
|
amine@32
|
952 'read' should return a slice of signal, i.e. frame (of whatever \
|
amine@32
|
953 type as long as it can be processed by validator) and None if \
|
amine@32
|
954 there is no more signal.
|
amine@67
|
955
|
amine@32
|
956 `callback` : an optional 3-argument function.
|
amine@32
|
957 If a `callback` function is given, it will be called each time a valid token
|
amine@32
|
958 is found.
|
amine@67
|
959
|
amine@67
|
960
|
amine@32
|
961 :Returns:
|
amine@32
|
962 A list of tokens if `callback` is None. Each token is tuple with the following elements:
|
amine@67
|
963
|
amine@32
|
964 .. code python
|
amine@67
|
965
|
amine@32
|
966 (data, start, end)
|
amine@67
|
967
|
amine@32
|
968 where `data` is a list of read frames, `start`: index of the first frame in the
|
amine@32
|
969 original data and `end` : index of the last frame.
|
amine@67
|
970
|
amine@2
|
971 """
|
amine@177
|
972 token_gen = self._iter_tokens(data_source)
|
amine@177
|
973 if callback:
|
amine@177
|
974 for token in token_gen:
|
amine@177
|
975 callback(*token)
|
amine@177
|
976 return
|
amine@177
|
977 if generator:
|
amine@177
|
978 return token_gen
|
amine@177
|
979 return list(token_gen)
|
amine@67
|
980
|
amine@177
|
981 def _iter_tokens(self, data_source):
|
amine@2
|
982 self._reinitialize()
|
amine@2
|
983 while True:
|
amine@67
|
984 frame = data_source.read()
|
amine@177
|
985 self._current_frame += 1
|
amine@47
|
986 if frame is None:
|
amine@177
|
987 token = self._post_process()
|
amine@177
|
988 if token is not None:
|
amine@177
|
989 yield token
|
amine@2
|
990 break
|
amine@177
|
991 token = self._process(frame)
|
amine@177
|
992 if token is not None:
|
amine@177
|
993 yield token
|
amine@67
|
994
|
amine@2
|
995 def _process(self, frame):
|
amine@67
|
996
|
amine@2
|
997 frame_is_valid = self.validator.is_valid(frame)
|
amine@67
|
998
|
amine@2
|
999 if self._state == self.SILENCE:
|
amine@67
|
1000
|
amine@2
|
1001 if frame_is_valid:
|
amine@2
|
1002 # seems we got a valid frame after a silence
|
amine@2
|
1003 self._init_count = 1
|
amine@2
|
1004 self._silence_length = 0
|
amine@2
|
1005 self._start_frame = self._current_frame
|
amine@2
|
1006 self._data.append(frame)
|
amine@67
|
1007
|
amine@67
|
1008 if self._init_count >= self.init_min:
|
amine@2
|
1009 self._state = self.NOISE
|
amine@2
|
1010 if len(self._data) >= self.max_length:
|
amine@177
|
1011 return self._process_end_of_detection(True)
|
amine@2
|
1012 else:
|
amine@2
|
1013 self._state = self.POSSIBLE_NOISE
|
amine@67
|
1014
|
amine@2
|
1015 elif self._state == self.POSSIBLE_NOISE:
|
amine@67
|
1016
|
amine@2
|
1017 if frame_is_valid:
|
amine@2
|
1018 self._silence_length = 0
|
amine@2
|
1019 self._init_count += 1
|
amine@2
|
1020 self._data.append(frame)
|
amine@67
|
1021 if self._init_count >= self.init_min:
|
amine@2
|
1022 self._state = self.NOISE
|
amine@2
|
1023 if len(self._data) >= self.max_length:
|
amine@177
|
1024 return self._process_end_of_detection(True)
|
amine@67
|
1025
|
amine@67
|
1026 else:
|
amine@2
|
1027 self._silence_length += 1
|
amine@178
|
1028 if (
|
amine@178
|
1029 self._silence_length > self.init_max_silent
|
amine@178
|
1030 or len(self._data) + 1 >= self.max_length
|
amine@178
|
1031 ):
|
amine@2
|
1032 # either init_max_silent or max_length is reached
|
amine@2
|
1033 # before _init_count, back to silence
|
amine@2
|
1034 self._data = []
|
amine@2
|
1035 self._state = self.SILENCE
|
amine@2
|
1036 else:
|
amine@2
|
1037 self._data.append(frame)
|
amine@67
|
1038
|
amine@2
|
1039 elif self._state == self.NOISE:
|
amine@67
|
1040
|
amine@2
|
1041 if frame_is_valid:
|
amine@2
|
1042 self._data.append(frame)
|
amine@2
|
1043 if len(self._data) >= self.max_length:
|
amine@177
|
1044 return self._process_end_of_detection(True)
|
amine@67
|
1045
|
amine@67
|
1046 elif self.max_continuous_silence <= 0:
|
amine@2
|
1047 # max token reached at this frame will _deliver if _contiguous_token
|
amine@2
|
1048 # and not _strict_min_length
|
amine@2
|
1049 self._state = self.SILENCE
|
amine@177
|
1050 return self._process_end_of_detection()
|
amine@2
|
1051 else:
|
amine@2
|
1052 # this is the first silent frame following a valid one
|
amine@2
|
1053 # and it is tolerated
|
amine@2
|
1054 self._silence_length = 1
|
amine@2
|
1055 self._data.append(frame)
|
amine@2
|
1056 self._state = self.POSSIBLE_SILENCE
|
amine@2
|
1057 if len(self._data) == self.max_length:
|
amine@177
|
1058 return self._process_end_of_detection(True)
|
amine@67
|
1059 # don't reset _silence_length because we still
|
amine@2
|
1060 # need to know the total number of silent frames
|
amine@67
|
1061
|
amine@2
|
1062 elif self._state == self.POSSIBLE_SILENCE:
|
amine@67
|
1063
|
amine@2
|
1064 if frame_is_valid:
|
amine@2
|
1065 self._data.append(frame)
|
amine@2
|
1066 self._silence_length = 0
|
amine@2
|
1067 self._state = self.NOISE
|
amine@2
|
1068 if len(self._data) >= self.max_length:
|
amine@177
|
1069 return self._process_end_of_detection(True)
|
amine@67
|
1070
|
amine@2
|
1071 else:
|
amine@2
|
1072 if self._silence_length >= self.max_continuous_silence:
|
amine@177
|
1073 self._state = self.SILENCE
|
amine@2
|
1074 if self._silence_length < len(self._data):
|
amine@67
|
1075 # _deliver only gathered frames aren't all silent
|
amine@177
|
1076 return self._process_end_of_detection()
|
amine@177
|
1077 self._data = []
|
amine@2
|
1078 self._silence_length = 0
|
amine@2
|
1079 else:
|
amine@2
|
1080 self._data.append(frame)
|
amine@2
|
1081 self._silence_length += 1
|
amine@2
|
1082 if len(self._data) >= self.max_length:
|
amine@177
|
1083 return self._process_end_of_detection(True)
|
amine@67
|
1084 # don't reset _silence_length because we still
|
amine@2
|
1085 # need to know the total number of silent frames
|
amine@67
|
1086
|
amine@2
|
1087 def _post_process(self):
|
amine@2
|
1088 if self._state == self.NOISE or self._state == self.POSSIBLE_SILENCE:
|
amine@2
|
1089 if len(self._data) > 0 and len(self._data) > self._silence_length:
|
amine@177
|
1090 return self._process_end_of_detection()
|
amine@67
|
1091
|
amine@2
|
1092 def _process_end_of_detection(self, truncated=False):
|
amine@67
|
1093
|
amine@185
|
1094 if (
|
amine@185
|
1095 not truncated
|
amine@185
|
1096 and self._drop_tailing_silence
|
amine@185
|
1097 and self._silence_length > 0
|
amine@185
|
1098 ):
|
amine@2
|
1099 # happens if max_continuous_silence is reached
|
amine@2
|
1100 # or max_length is reached at a silent frame
|
amine@178
|
1101 self._data = self._data[0 : -self._silence_length]
|
amine@67
|
1102
|
amine@178
|
1103 if (len(self._data) >= self.min_length) or (
|
amine@178
|
1104 len(self._data) > 0
|
amine@178
|
1105 and not self._strict_min_length
|
amine@178
|
1106 and self._contiguous_token
|
amine@178
|
1107 ):
|
amine@67
|
1108
|
amine@177
|
1109 start_frame = self._start_frame
|
amine@177
|
1110 end_frame = self._start_frame + len(self._data) - 1
|
amine@177
|
1111 data = self._data
|
amine@177
|
1112 self._data = []
|
amine@177
|
1113 token = (data, start_frame, end_frame)
|
amine@67
|
1114
|
amine@2
|
1115 if truncated:
|
amine@2
|
1116 # next token (if any) will start at _current_frame + 1
|
amine@2
|
1117 self._start_frame = self._current_frame + 1
|
amine@2
|
1118 # remember that it is contiguous with the just delivered one
|
amine@2
|
1119 self._contiguous_token = True
|
amine@2
|
1120 else:
|
amine@2
|
1121 self._contiguous_token = False
|
amine@177
|
1122 return token
|
amine@2
|
1123 else:
|
amine@67
|
1124 self._contiguous_token = False
|
amine@67
|
1125
|
amine@2
|
1126 self._data = []
|
amine@67
|
1127
|
amine@2
|
1128 def _append_token(self, data, start, end):
|
amine@178
|
1129 self._tokens.append((data, start, end))
|