amine@33
|
1 """
|
amine@33
|
2 This module gathers processing (i.e. tokenization) classes.
|
amine@33
|
3
|
amine@33
|
4 Class summary
|
amine@33
|
5 =============
|
amine@33
|
6
|
amine@33
|
7 .. autosummary::
|
amine@33
|
8
|
amine@33
|
9 StreamTokenizer
|
amine@33
|
10 """
|
amine@33
|
11
|
amine@2
|
12 from auditok.util import DataValidator
|
amine@96
|
13 from auditok.io import check_audio_data
|
amine@2
|
14
|
amine@81
|
15 __all__ = ["AudioRegion", "StreamTokenizer"]
|
amine@81
|
16
|
amine@81
|
17
|
amine@81
|
18 class AudioRegion(object):
|
amine@81
|
19
|
amine@81
|
20 def __init__(self, data, start, sampling_rate, sample_width, channels):
|
amine@81
|
21 """
|
amine@81
|
22 A class for detected audio events.
|
amine@81
|
23
|
amine@81
|
24 :Parameters:
|
amine@81
|
25
|
amine@81
|
26 data: bytes
|
amine@81
|
27 audio data
|
amine@81
|
28 start: float
|
amine@81
|
29 start time in seconds
|
amine@81
|
30 samling_rate: int
|
amine@81
|
31 sampling rate of audio data
|
amine@81
|
32 sample_width: int
|
amine@81
|
33 number of bytes of one audio sample
|
amine@81
|
34 channels: int
|
amine@81
|
35 number of channels of audio data
|
amine@81
|
36 """
|
amine@96
|
37 check_audio_data(data, sample_width, channels)
|
amine@81
|
38 self._data = data
|
amine@81
|
39 self._start = start
|
amine@81
|
40 self._sampling_rate = sampling_rate
|
amine@81
|
41 self._sample_width = sample_width
|
amine@81
|
42 self._channels = channels
|
amine@81
|
43
|
amine@81
|
44 @property
|
amine@81
|
45 def start(self):
|
amine@81
|
46 return self._start
|
amine@81
|
47
|
amine@81
|
48 @property
|
amine@81
|
49 def end(self):
|
amine@85
|
50 return self.start + self.duration
|
amine@81
|
51
|
amine@81
|
52 @property
|
amine@82
|
53 def duration(self):
|
amine@85
|
54 """
|
amine@85
|
55 Returns region duration in seconds.
|
amine@85
|
56 """
|
amine@85
|
57 return len(self._data) / (self.sampling_rate *
|
amine@85
|
58 self.sample_width *
|
amine@85
|
59 self.channels)
|
amine@82
|
60
|
amine@82
|
61 @property
|
amine@81
|
62 def sampling_rate(self):
|
amine@85
|
63 return self._sampling_rate
|
amine@81
|
64
|
amine@81
|
65 @property
|
amine@81
|
66 def sr(self):
|
amine@81
|
67 return self._sampling_rate
|
amine@81
|
68
|
amine@81
|
69 @property
|
amine@81
|
70 def sample_width(self):
|
amine@81
|
71 return self._sample_width
|
amine@81
|
72
|
amine@81
|
73 @property
|
amine@81
|
74 def sw(self):
|
amine@81
|
75 return self._sample_width
|
amine@81
|
76
|
amine@81
|
77 @property
|
amine@81
|
78 def channels(self):
|
amine@81
|
79 return self._channels
|
amine@81
|
80
|
amine@81
|
81 @property
|
amine@81
|
82 def ch(self):
|
amine@81
|
83 return self._channels
|
amine@2
|
84
|
amine@82
|
85 def __len__(self):
|
amine@85
|
86 """
|
amine@85
|
87 Rerurns region duration in milliseconds.
|
amine@85
|
88 """
|
amine@85
|
89 return round(self.duration * 1000)
|
amine@82
|
90
|
amine@83
|
91 def __bytes__(self):
|
amine@83
|
92 return self._data
|
amine@83
|
93
|
amine@83
|
94 def __repr__(self):
|
amine@83
|
95 return ('AudioRegion(data, start={:.3f}, end={:.3f}, '
|
amine@83
|
96 'sampling_rate={}, sample_width={}, channels={})'.format(self.start,
|
amine@83
|
97 self.end,
|
amine@83
|
98 self.sr,
|
amine@83
|
99 self.sw,
|
amine@83
|
100 self.ch))
|
amine@83
|
101
|
amine@83
|
102 def __str__(self):
|
amine@83
|
103 return 'AudioRegion(start={:.3f}, end={:.3f}, duration={:.3f}'.format(self.start,
|
amine@83
|
104 self.end,
|
amine@83
|
105 self.duration)
|
amine@83
|
106
|
amine@87
|
107 def __add__(self, other):
|
amine@87
|
108 """
|
amine@87
|
109 Concatenates this region and `other` and return a new region.
|
amine@87
|
110 Both regions must have the same sampling rate, sample width
|
amine@87
|
111 and number of channels. If not, raises a `ValueError`.
|
amine@87
|
112 """
|
amine@87
|
113 if not isinstance(other, AudioRegion):
|
amine@87
|
114 raise TypeError('Can only concatenate AudioRegion, '
|
amine@87
|
115 'not "{}"'.format(type(other)))
|
amine@87
|
116 if other.sr != self.sr:
|
amine@87
|
117 raise ValueError('Can only concatenate AudioRegions of the same '
|
amine@87
|
118 'sampling rate ({} != {})'.format(self.sr,
|
amine@87
|
119 other.sr))
|
amine@87
|
120 if other.sw != self.sw:
|
amine@87
|
121 raise ValueError('Can only concatenate AudioRegions of the same '
|
amine@87
|
122 'sample width ({} != {})'.format(self.sw,
|
amine@87
|
123 other.sw))
|
amine@87
|
124 if other.ch != self.ch:
|
amine@87
|
125 raise ValueError('Can only concatenate AudioRegions of the same '
|
amine@87
|
126 'number of channels ({} != {})'.format(self.ch,
|
amine@87
|
127 other.ch))
|
amine@87
|
128 data = self._data + other._data
|
amine@87
|
129 return AudioRegion(data, self.start, self.sr, self.sw, self.ch)
|
amine@87
|
130
|
amine@87
|
131 def __radd__(self, other):
|
amine@87
|
132 """
|
amine@87
|
133 Concatenates `other` and this region. `other` should be an
|
amine@87
|
134 `AudioRegion` with the same audio parameters as this region
|
amine@87
|
135 but can exceptionally be `0` to make it possible to concatenate
|
amine@87
|
136 many regions with `sum`.
|
amine@87
|
137 """
|
amine@87
|
138 if other == 0:
|
amine@87
|
139 return self
|
amine@87
|
140 return other.add(self)
|
amine@87
|
141
|
amine@2
|
142
|
amine@2
|
143 class StreamTokenizer():
|
amine@32
|
144 """
|
amine@32
|
145 Class for stream tokenizers. It implements a 4-state automaton scheme
|
amine@32
|
146 to extract sub-sequences of interest on the fly.
|
amine@67
|
147
|
amine@32
|
148 :Parameters:
|
amine@67
|
149
|
amine@5
|
150 `validator` :
|
amine@5
|
151 instance of `DataValidator` that implements `is_valid` method.
|
amine@67
|
152
|
amine@5
|
153 `min_length` : *(int)*
|
amine@5
|
154 Minimum number of frames of a valid token. This includes all \
|
amine@5
|
155 tolerated non valid frames within the token.
|
amine@67
|
156
|
amine@5
|
157 `max_length` : *(int)*
|
amine@5
|
158 Maximum number of frames of a valid token. This includes all \
|
amine@5
|
159 tolerated non valid frames within the token.
|
amine@67
|
160
|
amine@5
|
161 `max_continuous_silence` : *(int)*
|
amine@5
|
162 Maximum number of consecutive non-valid frames within a token.
|
amine@5
|
163 Note that, within a valid token, there may be many tolerated \
|
amine@5
|
164 *silent* regions that contain each a number of non valid frames up to \
|
amine@5
|
165 `max_continuous_silence`
|
amine@67
|
166
|
amine@5
|
167 `init_min` : *(int, default=0)*
|
amine@5
|
168 Minimum number of consecutive valid frames that must be **initially** \
|
amine@5
|
169 gathered before any sequence of non valid frames can be tolerated. This
|
amine@5
|
170 option is not always needed, it can be used to drop non-valid tokens as
|
amine@5
|
171 early as possible. **Default = 0** means that the option is by default
|
amine@5
|
172 ineffective.
|
amine@67
|
173
|
amine@5
|
174 `init_max_silence` : *(int, default=0)*
|
amine@5
|
175 Maximum number of tolerated consecutive non-valid frames if the \
|
amine@5
|
176 number already gathered valid frames has not yet reached 'init_min'.
|
amine@5
|
177 This argument is normally used if `init_min` is used. **Default = 0**,
|
amine@5
|
178 by default this argument is not taken into consideration.
|
amine@67
|
179
|
amine@5
|
180 `mode` : *(int, default=0)*
|
amine@5
|
181 `mode` can be:
|
amine@67
|
182
|
amine@35
|
183 1. `StreamTokenizer.STRICT_MIN_LENGTH`:
|
amine@32
|
184 if token *i* is delivered because `max_length`
|
amine@32
|
185 is reached, and token *i+1* is immediately adjacent to
|
amine@32
|
186 token *i* (i.e. token *i* ends at frame *k* and token *i+1* starts
|
amine@32
|
187 at frame *k+1*) then accept token *i+1* only of it has a size of at
|
amine@32
|
188 least `min_length`. The default behavior is to accept token *i+1*
|
amine@32
|
189 event if it is shorter than `min_length` (given that the above conditions
|
amine@32
|
190 are fulfilled of course).
|
amine@67
|
191
|
amine@32
|
192 :Examples:
|
amine@67
|
193
|
amine@32
|
194 In the following code, without `STRICT_MIN_LENGTH`, the 'BB' token is
|
amine@32
|
195 accepted although it is shorter than `min_length` (3), because it immediately
|
amine@32
|
196 follows the latest delivered token:
|
amine@67
|
197
|
amine@32
|
198 .. code:: python
|
amine@67
|
199
|
amine@32
|
200 from auditok import StreamTokenizer, StringDataSource, DataValidator
|
amine@67
|
201
|
amine@32
|
202 class UpperCaseChecker(DataValidator):
|
amine@32
|
203 def is_valid(self, frame):
|
amine@32
|
204 return frame.isupper()
|
amine@67
|
205
|
amine@67
|
206
|
amine@32
|
207 dsource = StringDataSource("aaaAAAABBbbb")
|
amine@32
|
208 tokenizer = StreamTokenizer(validator=UpperCaseChecker(),
|
amine@32
|
209 min_length=3,
|
amine@32
|
210 max_length=4,
|
amine@32
|
211 max_continuous_silence=0)
|
amine@67
|
212
|
amine@32
|
213 tokenizer.tokenize(dsource)
|
amine@67
|
214
|
amine@32
|
215 :output:
|
amine@67
|
216
|
amine@32
|
217 .. code:: python
|
amine@67
|
218
|
amine@32
|
219 [(['A', 'A', 'A', 'A'], 3, 6), (['B', 'B'], 7, 8)]
|
amine@32
|
220
|
amine@32
|
221
|
amine@32
|
222 The following tokenizer will however reject the 'BB' token:
|
amine@67
|
223
|
amine@32
|
224 .. code:: python
|
amine@67
|
225
|
amine@32
|
226 dsource = StringDataSource("aaaAAAABBbbb")
|
amine@32
|
227 tokenizer = StreamTokenizer(validator=UpperCaseChecker(),
|
amine@32
|
228 min_length=3, max_length=4,
|
amine@32
|
229 max_continuous_silence=0,
|
amine@32
|
230 mode=StreamTokenizer.STRICT_MIN_LENGTH)
|
amine@32
|
231 tokenizer.tokenize(dsource)
|
amine@67
|
232
|
amine@32
|
233 :output:
|
amine@67
|
234
|
amine@32
|
235 .. code:: python
|
amine@67
|
236
|
amine@32
|
237 [(['A', 'A', 'A', 'A'], 3, 6)]
|
amine@67
|
238
|
amine@67
|
239
|
amine@35
|
240 2. `StreamTokenizer.DROP_TRAILING_SILENCE`: drop all tailing non-valid frames
|
amine@32
|
241 from a token to be delivered if and only if it is not **truncated**.
|
amine@32
|
242 This can be a bit tricky. A token is actually delivered if:
|
amine@67
|
243
|
amine@32
|
244 - a. `max_continuous_silence` is reached
|
amine@67
|
245
|
amine@32
|
246 :or:
|
amine@67
|
247
|
amine@32
|
248 - b. Its length reaches `max_length`. This is called a **truncated** token
|
amine@67
|
249
|
amine@32
|
250 In the current implementation, a `StreamTokenizer`'s decision is only based on already seen
|
amine@32
|
251 data and on incoming data. Thus, if a token is truncated at a non-valid but tolerated
|
amine@32
|
252 frame (`max_length` is reached but `max_continuous_silence` not yet) any tailing
|
amine@32
|
253 silence will be kept because it can potentially be part of valid token (if `max_length`
|
amine@32
|
254 was bigger). But if `max_continuous_silence` is reached before `max_length`, the delivered
|
amine@32
|
255 token will not be considered as truncated but a result of *normal* end of detection
|
amine@32
|
256 (i.e. no more valid data). In that case the tailing silence can be removed if you use
|
amine@32
|
257 the `StreamTokenizer.DROP_TRAILING_SILENCE` mode.
|
amine@67
|
258
|
amine@32
|
259 :Example:
|
amine@67
|
260
|
amine@32
|
261 .. code:: python
|
amine@67
|
262
|
amine@32
|
263 tokenizer = StreamTokenizer(validator=UpperCaseChecker(), min_length=3,
|
amine@32
|
264 max_length=6, max_continuous_silence=3,
|
amine@32
|
265 mode=StreamTokenizer.DROP_TRAILING_SILENCE)
|
amine@67
|
266
|
amine@32
|
267 dsource = StringDataSource("aaaAAAaaaBBbbbb")
|
amine@32
|
268 tokenizer.tokenize(dsource)
|
amine@67
|
269
|
amine@32
|
270 :output:
|
amine@67
|
271
|
amine@32
|
272 .. code:: python
|
amine@67
|
273
|
amine@32
|
274 [(['A', 'A', 'A', 'a', 'a', 'a'], 3, 8), (['B', 'B'], 9, 10)]
|
amine@67
|
275
|
amine@32
|
276 The first token is delivered with its tailing silence because it is truncated
|
amine@32
|
277 while the second one has its tailing frames removed.
|
amine@67
|
278
|
amine@32
|
279 Without `StreamTokenizer.DROP_TRAILING_SILENCE` the output would be:
|
amine@67
|
280
|
amine@32
|
281 .. code:: python
|
amine@67
|
282
|
amine@32
|
283 [(['A', 'A', 'A', 'a', 'a', 'a'], 3, 8), (['B', 'B', 'b', 'b', 'b'], 9, 13)]
|
amine@67
|
284
|
amine@67
|
285
|
amine@32
|
286 3. `StreamTokenizer.STRICT_MIN_LENGTH | StreamTokenizer.DROP_TRAILING_SILENCE`:
|
amine@32
|
287 use both options. That means: first remove tailing silence, then ckeck if the
|
amine@32
|
288 token still has at least a length of `min_length`.
|
amine@32
|
289 """
|
amine@67
|
290
|
amine@32
|
291 SILENCE = 0
|
amine@32
|
292 POSSIBLE_SILENCE = 1
|
amine@67
|
293 POSSIBLE_NOISE = 2
|
amine@32
|
294 NOISE = 3
|
amine@67
|
295
|
amine@32
|
296 STRICT_MIN_LENGTH = 2
|
amine@32
|
297 DROP_TRAILING_SILENCE = 4
|
amine@32
|
298 # alias
|
amine@32
|
299 DROP_TAILING_SILENCE = 4
|
amine@67
|
300
|
amine@67
|
301 def __init__(self, validator,
|
amine@32
|
302 min_length, max_length, max_continuous_silence,
|
amine@32
|
303 init_min=0, init_max_silence=0,
|
amine@32
|
304 mode=0):
|
amine@67
|
305
|
amine@2
|
306 if not isinstance(validator, DataValidator):
|
amine@2
|
307 raise TypeError("'validator' must be an instance of 'DataValidator'")
|
amine@67
|
308
|
amine@2
|
309 if max_length <= 0:
|
amine@2
|
310 raise ValueError("'max_length' must be > 0 (value={0})".format(max_length))
|
amine@67
|
311
|
amine@2
|
312 if min_length <= 0 or min_length > max_length:
|
amine@35
|
313 raise ValueError("'min_length' must be > 0 and <= 'max_length' (value={0})".format(min_length))
|
amine@67
|
314
|
amine@2
|
315 if max_continuous_silence >= max_length:
|
amine@35
|
316 raise ValueError("'max_continuous_silence' must be < 'max_length' (value={0})".format(max_continuous_silence))
|
amine@67
|
317
|
amine@5
|
318 if init_min >= max_length:
|
amine@35
|
319 raise ValueError("'init_min' must be < 'max_length' (value={0})".format(max_continuous_silence))
|
amine@67
|
320
|
amine@2
|
321 self.validator = validator
|
amine@2
|
322 self.min_length = min_length
|
amine@2
|
323 self.max_length = max_length
|
amine@2
|
324 self.max_continuous_silence = max_continuous_silence
|
amine@2
|
325 self.init_min = init_min
|
amine@2
|
326 self.init_max_silent = init_max_silence
|
amine@67
|
327
|
amine@2
|
328 self._mode = None
|
amine@2
|
329 self.set_mode(mode)
|
amine@2
|
330 self._strict_min_length = (mode & self.STRICT_MIN_LENGTH) != 0
|
amine@67
|
331 self._drop_tailing_silence = (mode & self.DROP_TRAILING_SILENCE) != 0
|
amine@67
|
332
|
amine@2
|
333 self._deliver = None
|
amine@2
|
334 self._tokens = None
|
amine@2
|
335 self._state = None
|
amine@2
|
336 self._data = None
|
amine@2
|
337 self._contiguous_token = False
|
amine@67
|
338
|
amine@2
|
339 self._init_count = 0
|
amine@2
|
340 self._silence_length = 0
|
amine@2
|
341 self._start_frame = 0
|
amine@2
|
342 self._current_frame = 0
|
amine@67
|
343
|
amine@2
|
344 def set_mode(self, mode):
|
amine@177
|
345 # TODO: use properties and make these deprecated
|
amine@2
|
346 """
|
amine@32
|
347 :Parameters:
|
amine@67
|
348
|
amine@32
|
349 `mode` : *(int)*
|
amine@32
|
350 New mode, must be one of:
|
amine@67
|
351
|
amine@67
|
352
|
amine@33
|
353 - `StreamTokenizer.STRICT_MIN_LENGTH`
|
amine@67
|
354
|
amine@33
|
355 - `StreamTokenizer.DROP_TRAILING_SILENCE`
|
amine@67
|
356
|
amine@33
|
357 - `StreamTokenizer.STRICT_MIN_LENGTH | StreamTokenizer.DROP_TRAILING_SILENCE`
|
amine@67
|
358
|
amine@177
|
359 - `0` TODO: this mode should have a name
|
amine@67
|
360
|
amine@3
|
361 See `StreamTokenizer.__init__` for more information about the mode.
|
amine@2
|
362 """
|
amine@67
|
363
|
amine@32
|
364 if not mode in [self.STRICT_MIN_LENGTH, self.DROP_TRAILING_SILENCE,
|
amine@67
|
365 self.STRICT_MIN_LENGTH | self.DROP_TRAILING_SILENCE, 0]:
|
amine@67
|
366
|
amine@2
|
367 raise ValueError("Wrong value for mode")
|
amine@67
|
368
|
amine@2
|
369 self._mode = mode
|
amine@2
|
370 self._strict_min_length = (mode & self.STRICT_MIN_LENGTH) != 0
|
amine@67
|
371 self._drop_tailing_silence = (mode & self.DROP_TRAILING_SILENCE) != 0
|
amine@67
|
372
|
amine@2
|
373 def get_mode(self):
|
amine@2
|
374 """
|
amine@2
|
375 Return the current mode. To check whether a specific mode is activated use
|
amine@2
|
376 the bitwise 'and' operator `&`. Example:
|
amine@67
|
377
|
amine@32
|
378 .. code:: python
|
amine@67
|
379
|
amine@2
|
380 if mode & self.STRICT_MIN_LENGTH != 0:
|
amine@32
|
381 do_something()
|
amine@2
|
382 """
|
amine@2
|
383 return self._mode
|
amine@67
|
384
|
amine@2
|
385 def _reinitialize(self):
|
amine@2
|
386 self._contiguous_token = False
|
amine@2
|
387 self._data = []
|
amine@2
|
388 self._tokens = []
|
amine@2
|
389 self._state = self.SILENCE
|
amine@2
|
390 self._current_frame = -1
|
amine@2
|
391 self._deliver = self._append_token
|
amine@67
|
392
|
amine@177
|
393 def tokenize(self, data_source, callback=None, generator=False):
|
amine@2
|
394 """
|
amine@2
|
395 Read data from `data_source`, one frame a time, and process the read frames in
|
amine@2
|
396 order to detect sequences of frames that make up valid tokens.
|
amine@67
|
397
|
amine@32
|
398 :Parameters:
|
amine@47
|
399 `data_source` : instance of the :class:`DataSource` class that implements a `read` method.
|
amine@32
|
400 'read' should return a slice of signal, i.e. frame (of whatever \
|
amine@32
|
401 type as long as it can be processed by validator) and None if \
|
amine@32
|
402 there is no more signal.
|
amine@67
|
403
|
amine@32
|
404 `callback` : an optional 3-argument function.
|
amine@32
|
405 If a `callback` function is given, it will be called each time a valid token
|
amine@32
|
406 is found.
|
amine@67
|
407
|
amine@67
|
408
|
amine@32
|
409 :Returns:
|
amine@32
|
410 A list of tokens if `callback` is None. Each token is tuple with the following elements:
|
amine@67
|
411
|
amine@32
|
412 .. code python
|
amine@67
|
413
|
amine@32
|
414 (data, start, end)
|
amine@67
|
415
|
amine@32
|
416 where `data` is a list of read frames, `start`: index of the first frame in the
|
amine@32
|
417 original data and `end` : index of the last frame.
|
amine@67
|
418
|
amine@2
|
419 """
|
amine@177
|
420 token_gen = self._iter_tokens(data_source)
|
amine@177
|
421 if callback:
|
amine@177
|
422 for token in token_gen:
|
amine@177
|
423 callback(*token)
|
amine@177
|
424 return
|
amine@177
|
425 if generator:
|
amine@177
|
426 return token_gen
|
amine@177
|
427 return list(token_gen)
|
amine@67
|
428
|
amine@177
|
429 def _iter_tokens(self, data_source):
|
amine@2
|
430 self._reinitialize()
|
amine@2
|
431 while True:
|
amine@67
|
432 frame = data_source.read()
|
amine@177
|
433 self._current_frame += 1
|
amine@47
|
434 if frame is None:
|
amine@177
|
435 token = self._post_process()
|
amine@177
|
436 if token is not None:
|
amine@177
|
437 yield token
|
amine@2
|
438 break
|
amine@177
|
439 token = self._process(frame)
|
amine@177
|
440 if token is not None:
|
amine@177
|
441 yield token
|
amine@67
|
442
|
amine@2
|
443 def _process(self, frame):
|
amine@67
|
444
|
amine@2
|
445 frame_is_valid = self.validator.is_valid(frame)
|
amine@67
|
446
|
amine@2
|
447 if self._state == self.SILENCE:
|
amine@67
|
448
|
amine@2
|
449 if frame_is_valid:
|
amine@2
|
450 # seems we got a valid frame after a silence
|
amine@2
|
451 self._init_count = 1
|
amine@2
|
452 self._silence_length = 0
|
amine@2
|
453 self._start_frame = self._current_frame
|
amine@2
|
454 self._data.append(frame)
|
amine@67
|
455
|
amine@67
|
456 if self._init_count >= self.init_min:
|
amine@2
|
457 self._state = self.NOISE
|
amine@2
|
458 if len(self._data) >= self.max_length:
|
amine@177
|
459 return self._process_end_of_detection(True)
|
amine@2
|
460 else:
|
amine@2
|
461 self._state = self.POSSIBLE_NOISE
|
amine@67
|
462
|
amine@2
|
463 elif self._state == self.POSSIBLE_NOISE:
|
amine@67
|
464
|
amine@2
|
465 if frame_is_valid:
|
amine@2
|
466 self._silence_length = 0
|
amine@2
|
467 self._init_count += 1
|
amine@2
|
468 self._data.append(frame)
|
amine@67
|
469 if self._init_count >= self.init_min:
|
amine@2
|
470 self._state = self.NOISE
|
amine@2
|
471 if len(self._data) >= self.max_length:
|
amine@177
|
472 return self._process_end_of_detection(True)
|
amine@67
|
473
|
amine@67
|
474 else:
|
amine@2
|
475 self._silence_length += 1
|
amine@2
|
476 if self._silence_length > self.init_max_silent or \
|
amine@67
|
477 len(self._data) + 1 >= self.max_length:
|
amine@2
|
478 # either init_max_silent or max_length is reached
|
amine@2
|
479 # before _init_count, back to silence
|
amine@2
|
480 self._data = []
|
amine@2
|
481 self._state = self.SILENCE
|
amine@2
|
482 else:
|
amine@2
|
483 self._data.append(frame)
|
amine@67
|
484
|
amine@2
|
485 elif self._state == self.NOISE:
|
amine@67
|
486
|
amine@2
|
487 if frame_is_valid:
|
amine@2
|
488 self._data.append(frame)
|
amine@2
|
489 if len(self._data) >= self.max_length:
|
amine@177
|
490 return self._process_end_of_detection(True)
|
amine@67
|
491
|
amine@67
|
492 elif self.max_continuous_silence <= 0:
|
amine@2
|
493 # max token reached at this frame will _deliver if _contiguous_token
|
amine@2
|
494 # and not _strict_min_length
|
amine@2
|
495 self._state = self.SILENCE
|
amine@177
|
496 return self._process_end_of_detection()
|
amine@2
|
497 else:
|
amine@2
|
498 # this is the first silent frame following a valid one
|
amine@2
|
499 # and it is tolerated
|
amine@2
|
500 self._silence_length = 1
|
amine@2
|
501 self._data.append(frame)
|
amine@2
|
502 self._state = self.POSSIBLE_SILENCE
|
amine@2
|
503 if len(self._data) == self.max_length:
|
amine@177
|
504 return self._process_end_of_detection(True)
|
amine@67
|
505 # don't reset _silence_length because we still
|
amine@2
|
506 # need to know the total number of silent frames
|
amine@67
|
507
|
amine@2
|
508 elif self._state == self.POSSIBLE_SILENCE:
|
amine@67
|
509
|
amine@2
|
510 if frame_is_valid:
|
amine@2
|
511 self._data.append(frame)
|
amine@2
|
512 self._silence_length = 0
|
amine@2
|
513 self._state = self.NOISE
|
amine@2
|
514 if len(self._data) >= self.max_length:
|
amine@177
|
515 return self._process_end_of_detection(True)
|
amine@67
|
516
|
amine@2
|
517 else:
|
amine@2
|
518 if self._silence_length >= self.max_continuous_silence:
|
amine@177
|
519 self._state = self.SILENCE
|
amine@2
|
520 if self._silence_length < len(self._data):
|
amine@67
|
521 # _deliver only gathered frames aren't all silent
|
amine@177
|
522 return self._process_end_of_detection()
|
amine@177
|
523 self._data = []
|
amine@2
|
524 self._silence_length = 0
|
amine@2
|
525 else:
|
amine@2
|
526 self._data.append(frame)
|
amine@2
|
527 self._silence_length += 1
|
amine@2
|
528 if len(self._data) >= self.max_length:
|
amine@177
|
529 return self._process_end_of_detection(True)
|
amine@67
|
530 # don't reset _silence_length because we still
|
amine@2
|
531 # need to know the total number of silent frames
|
amine@67
|
532
|
amine@2
|
533 def _post_process(self):
|
amine@2
|
534 if self._state == self.NOISE or self._state == self.POSSIBLE_SILENCE:
|
amine@2
|
535 if len(self._data) > 0 and len(self._data) > self._silence_length:
|
amine@177
|
536 return self._process_end_of_detection()
|
amine@67
|
537
|
amine@2
|
538 def _process_end_of_detection(self, truncated=False):
|
amine@67
|
539
|
amine@3
|
540 if not truncated and self._drop_tailing_silence and self._silence_length > 0:
|
amine@2
|
541 # happens if max_continuous_silence is reached
|
amine@2
|
542 # or max_length is reached at a silent frame
|
amine@2
|
543 self._data = self._data[0: - self._silence_length]
|
amine@67
|
544
|
amine@2
|
545 if (len(self._data) >= self.min_length) or \
|
amine@67
|
546 (len(self._data) > 0 and
|
amine@67
|
547 not self._strict_min_length and self._contiguous_token):
|
amine@67
|
548
|
amine@177
|
549 start_frame = self._start_frame
|
amine@177
|
550 end_frame = self._start_frame + len(self._data) - 1
|
amine@177
|
551 data = self._data
|
amine@177
|
552 self._data = []
|
amine@177
|
553 token = (data, start_frame, end_frame)
|
amine@67
|
554
|
amine@2
|
555 if truncated:
|
amine@2
|
556 # next token (if any) will start at _current_frame + 1
|
amine@2
|
557 self._start_frame = self._current_frame + 1
|
amine@2
|
558 # remember that it is contiguous with the just delivered one
|
amine@2
|
559 self._contiguous_token = True
|
amine@2
|
560 else:
|
amine@2
|
561 self._contiguous_token = False
|
amine@177
|
562 return token
|
amine@2
|
563 else:
|
amine@67
|
564 self._contiguous_token = False
|
amine@67
|
565
|
amine@2
|
566 self._data = []
|
amine@67
|
567
|
amine@2
|
568 def _append_token(self, data, start, end):
|
amine@177
|
569 self._tokens.append((data, start, end)) |