auditok: auditok/core.py annotate

annotate auditok/core.py @ 177:2acbdbd18327

Implement a generator version for tokenize

author	Amine Sehili <amine.sehili@gmail.com>
date	Sat, 16 Mar 2019 18:28:23 +0100
parents	382f30f8dab5
children	11885f96acb2

rev	line source
amine@33	1 """
amine@33	2 This module gathers processing (i.e. tokenization) classes.
amine@33	3
amine@33	4 Class summary
amine@33	5 =============
amine@33	6
amine@33	7 .. autosummary::
amine@33	8
amine@33	9 StreamTokenizer
amine@33	10 """
amine@33	11
amine@2	12 from auditok.util import DataValidator
amine@96	13 from auditok.io import check_audio_data
amine@2	14
amine@81	15 __all__ = ["AudioRegion", "StreamTokenizer"]
amine@81	16
amine@81	17
amine@81	18 class AudioRegion(object):
amine@81	19
amine@81	20 def __init__(self, data, start, sampling_rate, sample_width, channels):
amine@81	21 """
amine@81	22 A class for detected audio events.
amine@81	23
amine@81	24 :Parameters:
amine@81	25
amine@81	26 data: bytes
amine@81	27 audio data
amine@81	28 start: float
amine@81	29 start time in seconds
amine@81	30 samling_rate: int
amine@81	31 sampling rate of audio data
amine@81	32 sample_width: int
amine@81	33 number of bytes of one audio sample
amine@81	34 channels: int
amine@81	35 number of channels of audio data
amine@81	36 """
amine@96	37 check_audio_data(data, sample_width, channels)
amine@81	38 self._data = data
amine@81	39 self._start = start
amine@81	40 self._sampling_rate = sampling_rate
amine@81	41 self._sample_width = sample_width
amine@81	42 self._channels = channels
amine@81	43
amine@81	44 @property
amine@81	45 def start(self):
amine@81	46 return self._start
amine@81	47
amine@81	48 @property
amine@81	49 def end(self):
amine@85	50 return self.start + self.duration
amine@81	51
amine@81	52 @property
amine@82	53 def duration(self):
amine@85	54 """
amine@85	55 Returns region duration in seconds.
amine@85	56 """
amine@85	57 return len(self._data) / (self.sampling_rate *
amine@85	58 self.sample_width *
amine@85	59 self.channels)
amine@82	60
amine@82	61 @property
amine@81	62 def sampling_rate(self):
amine@85	63 return self._sampling_rate
amine@81	64
amine@81	65 @property
amine@81	66 def sr(self):
amine@81	67 return self._sampling_rate
amine@81	68
amine@81	69 @property
amine@81	70 def sample_width(self):
amine@81	71 return self._sample_width
amine@81	72
amine@81	73 @property
amine@81	74 def sw(self):
amine@81	75 return self._sample_width
amine@81	76
amine@81	77 @property
amine@81	78 def channels(self):
amine@81	79 return self._channels
amine@81	80
amine@81	81 @property
amine@81	82 def ch(self):
amine@81	83 return self._channels
amine@2	84
amine@82	85 def __len__(self):
amine@85	86 """
amine@85	87 Rerurns region duration in milliseconds.
amine@85	88 """
amine@85	89 return round(self.duration * 1000)
amine@82	90
amine@83	91 def __bytes__(self):
amine@83	92 return self._data
amine@83	93
amine@83	94 def __repr__(self):
amine@83	95 return ('AudioRegion(data, start={:.3f}, end={:.3f}, '
amine@83	96 'sampling_rate={}, sample_width={}, channels={})'.format(self.start,
amine@83	97 self.end,
amine@83	98 self.sr,
amine@83	99 self.sw,
amine@83	100 self.ch))
amine@83	101
amine@83	102 def __str__(self):
amine@83	103 return 'AudioRegion(start={:.3f}, end={:.3f}, duration={:.3f}'.format(self.start,
amine@83	104 self.end,
amine@83	105 self.duration)
amine@83	106
amine@87	107 def __add__(self, other):
amine@87	108 """
amine@87	109 Concatenates this region and `other` and return a new region.
amine@87	110 Both regions must have the same sampling rate, sample width
amine@87	111 and number of channels. If not, raises a `ValueError`.
amine@87	112 """
amine@87	113 if not isinstance(other, AudioRegion):
amine@87	114 raise TypeError('Can only concatenate AudioRegion, '
amine@87	115 'not "{}"'.format(type(other)))
amine@87	116 if other.sr != self.sr:
amine@87	117 raise ValueError('Can only concatenate AudioRegions of the same '
amine@87	118 'sampling rate ({} != {})'.format(self.sr,
amine@87	119 other.sr))
amine@87	120 if other.sw != self.sw:
amine@87	121 raise ValueError('Can only concatenate AudioRegions of the same '
amine@87	122 'sample width ({} != {})'.format(self.sw,
amine@87	123 other.sw))
amine@87	124 if other.ch != self.ch:
amine@87	125 raise ValueError('Can only concatenate AudioRegions of the same '
amine@87	126 'number of channels ({} != {})'.format(self.ch,
amine@87	127 other.ch))
amine@87	128 data = self._data + other._data
amine@87	129 return AudioRegion(data, self.start, self.sr, self.sw, self.ch)
amine@87	130
amine@87	131 def __radd__(self, other):
amine@87	132 """
amine@87	133 Concatenates `other` and this region. `other` should be an
amine@87	134 `AudioRegion` with the same audio parameters as this region
amine@87	135 but can exceptionally be `0` to make it possible to concatenate
amine@87	136 many regions with `sum`.
amine@87	137 """
amine@87	138 if other == 0:
amine@87	139 return self
amine@87	140 return other.add(self)
amine@87	141
amine@2	142
amine@2	143 class StreamTokenizer():
amine@32	144 """
amine@32	145 Class for stream tokenizers. It implements a 4-state automaton scheme
amine@32	146 to extract sub-sequences of interest on the fly.
amine@67	147
amine@32	148 :Parameters:
amine@67	149
amine@5	150 `validator` :
amine@5	151 instance of `DataValidator` that implements `is_valid` method.
amine@67	152
amine@5	153 `min_length` : (int)
amine@5	154 Minimum number of frames of a valid token. This includes all \
amine@5	155 tolerated non valid frames within the token.
amine@67	156
amine@5	157 `max_length` : (int)
amine@5	158 Maximum number of frames of a valid token. This includes all \
amine@5	159 tolerated non valid frames within the token.
amine@67	160
amine@5	161 `max_continuous_silence` : (int)
amine@5	162 Maximum number of consecutive non-valid frames within a token.
amine@5	163 Note that, within a valid token, there may be many tolerated \
amine@5	164 silent regions that contain each a number of non valid frames up to \
amine@5	165 `max_continuous_silence`
amine@67	166
amine@5	167 `init_min` : (int, default=0)
amine@5	168 Minimum number of consecutive valid frames that must be initially \
amine@5	169 gathered before any sequence of non valid frames can be tolerated. This
amine@5	170 option is not always needed, it can be used to drop non-valid tokens as
amine@5	171 early as possible. Default = 0 means that the option is by default
amine@5	172 ineffective.
amine@67	173
amine@5	174 `init_max_silence` : (int, default=0)
amine@5	175 Maximum number of tolerated consecutive non-valid frames if the \
amine@5	176 number already gathered valid frames has not yet reached 'init_min'.
amine@5	177 This argument is normally used if `init_min` is used. Default = 0,
amine@5	178 by default this argument is not taken into consideration.
amine@67	179
amine@5	180 `mode` : (int, default=0)
amine@5	181 `mode` can be:
amine@67	182
amine@35	183 1. `StreamTokenizer.STRICT_MIN_LENGTH`:
amine@32	184 if token i is delivered because `max_length`
amine@32	185 is reached, and token i+1 is immediately adjacent to
amine@32	186 token i (i.e. token i ends at frame k and token i+1 starts
amine@32	187 at frame k+1) then accept token i+1 only of it has a size of at
amine@32	188 least `min_length`. The default behavior is to accept token i+1
amine@32	189 event if it is shorter than `min_length` (given that the above conditions
amine@32	190 are fulfilled of course).
amine@67	191
amine@32	192 :Examples:
amine@67	193
amine@32	194 In the following code, without `STRICT_MIN_LENGTH`, the 'BB' token is
amine@32	195 accepted although it is shorter than `min_length` (3), because it immediately
amine@32	196 follows the latest delivered token:
amine@67	197
amine@32	198 .. code:: python
amine@67	199
amine@32	200 from auditok import StreamTokenizer, StringDataSource, DataValidator
amine@67	201
amine@32	202 class UpperCaseChecker(DataValidator):
amine@32	203 def is_valid(self, frame):
amine@32	204 return frame.isupper()
amine@67	205
amine@67	206
amine@32	207 dsource = StringDataSource("aaaAAAABBbbb")
amine@32	208 tokenizer = StreamTokenizer(validator=UpperCaseChecker(),
amine@32	209 min_length=3,
amine@32	210 max_length=4,
amine@32	211 max_continuous_silence=0)
amine@67	212
amine@32	213 tokenizer.tokenize(dsource)
amine@67	214
amine@32	215 :output:
amine@67	216
amine@32	217 .. code:: python
amine@67	218
amine@32	219 [(['A', 'A', 'A', 'A'], 3, 6), (['B', 'B'], 7, 8)]
amine@32	220
amine@32	221
amine@32	222 The following tokenizer will however reject the 'BB' token:
amine@67	223
amine@32	224 .. code:: python
amine@67	225
amine@32	226 dsource = StringDataSource("aaaAAAABBbbb")
amine@32	227 tokenizer = StreamTokenizer(validator=UpperCaseChecker(),
amine@32	228 min_length=3, max_length=4,
amine@32	229 max_continuous_silence=0,
amine@32	230 mode=StreamTokenizer.STRICT_MIN_LENGTH)
amine@32	231 tokenizer.tokenize(dsource)
amine@67	232
amine@32	233 :output:
amine@67	234
amine@32	235 .. code:: python
amine@67	236
amine@32	237 [(['A', 'A', 'A', 'A'], 3, 6)]
amine@67	238
amine@67	239
amine@35	240 2. `StreamTokenizer.DROP_TRAILING_SILENCE`: drop all tailing non-valid frames
amine@32	241 from a token to be delivered if and only if it is not truncated.
amine@32	242 This can be a bit tricky. A token is actually delivered if:
amine@67	243
amine@32	244 - a. `max_continuous_silence` is reached
amine@67	245
amine@32	246 :or:
amine@67	247
amine@32	248 - b. Its length reaches `max_length`. This is called a truncated token
amine@67	249
amine@32	250 In the current implementation, a `StreamTokenizer`'s decision is only based on already seen
amine@32	251 data and on incoming data. Thus, if a token is truncated at a non-valid but tolerated
amine@32	252 frame (`max_length` is reached but `max_continuous_silence` not yet) any tailing
amine@32	253 silence will be kept because it can potentially be part of valid token (if `max_length`
amine@32	254 was bigger). But if `max_continuous_silence` is reached before `max_length`, the delivered
amine@32	255 token will not be considered as truncated but a result of normal end of detection
amine@32	256 (i.e. no more valid data). In that case the tailing silence can be removed if you use
amine@32	257 the `StreamTokenizer.DROP_TRAILING_SILENCE` mode.
amine@67	258
amine@32	259 :Example:
amine@67	260
amine@32	261 .. code:: python
amine@67	262
amine@32	263 tokenizer = StreamTokenizer(validator=UpperCaseChecker(), min_length=3,
amine@32	264 max_length=6, max_continuous_silence=3,
amine@32	265 mode=StreamTokenizer.DROP_TRAILING_SILENCE)
amine@67	266
amine@32	267 dsource = StringDataSource("aaaAAAaaaBBbbbb")
amine@32	268 tokenizer.tokenize(dsource)
amine@67	269
amine@32	270 :output:
amine@67	271
amine@32	272 .. code:: python
amine@67	273
amine@32	274 [(['A', 'A', 'A', 'a', 'a', 'a'], 3, 8), (['B', 'B'], 9, 10)]
amine@67	275
amine@32	276 The first token is delivered with its tailing silence because it is truncated
amine@32	277 while the second one has its tailing frames removed.
amine@67	278
amine@32	279 Without `StreamTokenizer.DROP_TRAILING_SILENCE` the output would be:
amine@67	280
amine@32	281 .. code:: python
amine@67	282
amine@32	283 [(['A', 'A', 'A', 'a', 'a', 'a'], 3, 8), (['B', 'B', 'b', 'b', 'b'], 9, 13)]
amine@67	284
amine@67	285
amine@32	286 3. `StreamTokenizer.STRICT_MIN_LENGTH \| StreamTokenizer.DROP_TRAILING_SILENCE`:
amine@32	287 use both options. That means: first remove tailing silence, then ckeck if the
amine@32	288 token still has at least a length of `min_length`.
amine@32	289 """
amine@67	290
amine@32	291 SILENCE = 0
amine@32	292 POSSIBLE_SILENCE = 1
amine@67	293 POSSIBLE_NOISE = 2
amine@32	294 NOISE = 3
amine@67	295
amine@32	296 STRICT_MIN_LENGTH = 2
amine@32	297 DROP_TRAILING_SILENCE = 4
amine@32	298 # alias
amine@32	299 DROP_TAILING_SILENCE = 4
amine@67	300
amine@67	301 def __init__(self, validator,
amine@32	302 min_length, max_length, max_continuous_silence,
amine@32	303 init_min=0, init_max_silence=0,
amine@32	304 mode=0):
amine@67	305
amine@2	306 if not isinstance(validator, DataValidator):
amine@2	307 raise TypeError("'validator' must be an instance of 'DataValidator'")
amine@67	308
amine@2	309 if max_length <= 0:
amine@2	310 raise ValueError("'max_length' must be > 0 (value={0})".format(max_length))
amine@67	311
amine@2	312 if min_length <= 0 or min_length > max_length:
amine@35	313 raise ValueError("'min_length' must be > 0 and <= 'max_length' (value={0})".format(min_length))
amine@67	314
amine@2	315 if max_continuous_silence >= max_length:
amine@35	316 raise ValueError("'max_continuous_silence' must be < 'max_length' (value={0})".format(max_continuous_silence))
amine@67	317
amine@5	318 if init_min >= max_length:
amine@35	319 raise ValueError("'init_min' must be < 'max_length' (value={0})".format(max_continuous_silence))
amine@67	320
amine@2	321 self.validator = validator
amine@2	322 self.min_length = min_length
amine@2	323 self.max_length = max_length
amine@2	324 self.max_continuous_silence = max_continuous_silence
amine@2	325 self.init_min = init_min
amine@2	326 self.init_max_silent = init_max_silence
amine@67	327
amine@2	328 self._mode = None
amine@2	329 self.set_mode(mode)
amine@2	330 self._strict_min_length = (mode & self.STRICT_MIN_LENGTH) != 0
amine@67	331 self._drop_tailing_silence = (mode & self.DROP_TRAILING_SILENCE) != 0
amine@67	332
amine@2	333 self._deliver = None
amine@2	334 self._tokens = None
amine@2	335 self._state = None
amine@2	336 self._data = None
amine@2	337 self._contiguous_token = False
amine@67	338
amine@2	339 self._init_count = 0
amine@2	340 self._silence_length = 0
amine@2	341 self._start_frame = 0
amine@2	342 self._current_frame = 0
amine@67	343
amine@2	344 def set_mode(self, mode):
amine@177	345 # TODO: use properties and make these deprecated
amine@2	346 """
amine@32	347 :Parameters:
amine@67	348
amine@32	349 `mode` : (int)
amine@32	350 New mode, must be one of:
amine@67	351
amine@67	352
amine@33	353 - `StreamTokenizer.STRICT_MIN_LENGTH`
amine@67	354
amine@33	355 - `StreamTokenizer.DROP_TRAILING_SILENCE`
amine@67	356
amine@33	357 - `StreamTokenizer.STRICT_MIN_LENGTH \| StreamTokenizer.DROP_TRAILING_SILENCE`
amine@67	358
amine@177	359 - `0` TODO: this mode should have a name
amine@67	360
amine@3	361 See `StreamTokenizer.__init__` for more information about the mode.
amine@2	362 """
amine@67	363
amine@32	364 if not mode in [self.STRICT_MIN_LENGTH, self.DROP_TRAILING_SILENCE,
amine@67	365 self.STRICT_MIN_LENGTH \| self.DROP_TRAILING_SILENCE, 0]:
amine@67	366
amine@2	367 raise ValueError("Wrong value for mode")
amine@67	368
amine@2	369 self._mode = mode
amine@2	370 self._strict_min_length = (mode & self.STRICT_MIN_LENGTH) != 0
amine@67	371 self._drop_tailing_silence = (mode & self.DROP_TRAILING_SILENCE) != 0
amine@67	372
amine@2	373 def get_mode(self):
amine@2	374 """
amine@2	375 Return the current mode. To check whether a specific mode is activated use
amine@2	376 the bitwise 'and' operator `&`. Example:
amine@67	377
amine@32	378 .. code:: python
amine@67	379
amine@2	380 if mode & self.STRICT_MIN_LENGTH != 0:
amine@32	381 do_something()
amine@2	382 """
amine@2	383 return self._mode
amine@67	384
amine@2	385 def _reinitialize(self):
amine@2	386 self._contiguous_token = False
amine@2	387 self._data = []
amine@2	388 self._tokens = []
amine@2	389 self._state = self.SILENCE
amine@2	390 self._current_frame = -1
amine@2	391 self._deliver = self._append_token
amine@67	392
amine@177	393 def tokenize(self, data_source, callback=None, generator=False):
amine@2	394 """
amine@2	395 Read data from `data_source`, one frame a time, and process the read frames in
amine@2	396 order to detect sequences of frames that make up valid tokens.
amine@67	397
amine@32	398 :Parameters:
amine@47	399 `data_source` : instance of the :class:`DataSource` class that implements a `read` method.
amine@32	400 'read' should return a slice of signal, i.e. frame (of whatever \
amine@32	401 type as long as it can be processed by validator) and None if \
amine@32	402 there is no more signal.
amine@67	403
amine@32	404 `callback` : an optional 3-argument function.
amine@32	405 If a `callback` function is given, it will be called each time a valid token
amine@32	406 is found.
amine@67	407
amine@67	408
amine@32	409 :Returns:
amine@32	410 A list of tokens if `callback` is None. Each token is tuple with the following elements:
amine@67	411
amine@32	412 .. code python
amine@67	413
amine@32	414 (data, start, end)
amine@67	415
amine@32	416 where `data` is a list of read frames, `start`: index of the first frame in the
amine@32	417 original data and `end` : index of the last frame.
amine@67	418
amine@2	419 """
amine@177	420 token_gen = self._iter_tokens(data_source)
amine@177	421 if callback:
amine@177	422 for token in token_gen:
amine@177	423 callback(*token)
amine@177	424 return
amine@177	425 if generator:
amine@177	426 return token_gen
amine@177	427 return list(token_gen)
amine@67	428
amine@177	429 def _iter_tokens(self, data_source):
amine@2	430 self._reinitialize()
amine@2	431 while True:
amine@67	432 frame = data_source.read()
amine@177	433 self._current_frame += 1
amine@47	434 if frame is None:
amine@177	435 token = self._post_process()
amine@177	436 if token is not None:
amine@177	437 yield token
amine@2	438 break
amine@177	439 token = self._process(frame)
amine@177	440 if token is not None:
amine@177	441 yield token
amine@67	442
amine@2	443 def _process(self, frame):
amine@67	444
amine@2	445 frame_is_valid = self.validator.is_valid(frame)
amine@67	446
amine@2	447 if self._state == self.SILENCE:
amine@67	448
amine@2	449 if frame_is_valid:
amine@2	450 # seems we got a valid frame after a silence
amine@2	451 self._init_count = 1
amine@2	452 self._silence_length = 0
amine@2	453 self._start_frame = self._current_frame
amine@2	454 self._data.append(frame)
amine@67	455
amine@67	456 if self._init_count >= self.init_min:
amine@2	457 self._state = self.NOISE
amine@2	458 if len(self._data) >= self.max_length:
amine@177	459 return self._process_end_of_detection(True)
amine@2	460 else:
amine@2	461 self._state = self.POSSIBLE_NOISE
amine@67	462
amine@2	463 elif self._state == self.POSSIBLE_NOISE:
amine@67	464
amine@2	465 if frame_is_valid:
amine@2	466 self._silence_length = 0
amine@2	467 self._init_count += 1
amine@2	468 self._data.append(frame)
amine@67	469 if self._init_count >= self.init_min:
amine@2	470 self._state = self.NOISE
amine@2	471 if len(self._data) >= self.max_length:
amine@177	472 return self._process_end_of_detection(True)
amine@67	473
amine@67	474 else:
amine@2	475 self._silence_length += 1
amine@2	476 if self._silence_length > self.init_max_silent or \
amine@67	477 len(self._data) + 1 >= self.max_length:
amine@2	478 # either init_max_silent or max_length is reached
amine@2	479 # before _init_count, back to silence
amine@2	480 self._data = []
amine@2	481 self._state = self.SILENCE
amine@2	482 else:
amine@2	483 self._data.append(frame)
amine@67	484
amine@2	485 elif self._state == self.NOISE:
amine@67	486
amine@2	487 if frame_is_valid:
amine@2	488 self._data.append(frame)
amine@2	489 if len(self._data) >= self.max_length:
amine@177	490 return self._process_end_of_detection(True)
amine@67	491
amine@67	492 elif self.max_continuous_silence <= 0:
amine@2	493 # max token reached at this frame will _deliver if _contiguous_token
amine@2	494 # and not _strict_min_length
amine@2	495 self._state = self.SILENCE
amine@177	496 return self._process_end_of_detection()
amine@2	497 else:
amine@2	498 # this is the first silent frame following a valid one
amine@2	499 # and it is tolerated
amine@2	500 self._silence_length = 1
amine@2	501 self._data.append(frame)
amine@2	502 self._state = self.POSSIBLE_SILENCE
amine@2	503 if len(self._data) == self.max_length:
amine@177	504 return self._process_end_of_detection(True)
amine@67	505 # don't reset _silence_length because we still
amine@2	506 # need to know the total number of silent frames
amine@67	507
amine@2	508 elif self._state == self.POSSIBLE_SILENCE:
amine@67	509
amine@2	510 if frame_is_valid:
amine@2	511 self._data.append(frame)
amine@2	512 self._silence_length = 0
amine@2	513 self._state = self.NOISE
amine@2	514 if len(self._data) >= self.max_length:
amine@177	515 return self._process_end_of_detection(True)
amine@67	516
amine@2	517 else:
amine@2	518 if self._silence_length >= self.max_continuous_silence:
amine@177	519 self._state = self.SILENCE
amine@2	520 if self._silence_length < len(self._data):
amine@67	521 # _deliver only gathered frames aren't all silent
amine@177	522 return self._process_end_of_detection()
amine@177	523 self._data = []
amine@2	524 self._silence_length = 0
amine@2	525 else:
amine@2	526 self._data.append(frame)
amine@2	527 self._silence_length += 1
amine@2	528 if len(self._data) >= self.max_length:
amine@177	529 return self._process_end_of_detection(True)
amine@67	530 # don't reset _silence_length because we still
amine@2	531 # need to know the total number of silent frames
amine@67	532
amine@2	533 def _post_process(self):
amine@2	534 if self._state == self.NOISE or self._state == self.POSSIBLE_SILENCE:
amine@2	535 if len(self._data) > 0 and len(self._data) > self._silence_length:
amine@177	536 return self._process_end_of_detection()
amine@67	537
amine@2	538 def _process_end_of_detection(self, truncated=False):
amine@67	539
amine@3	540 if not truncated and self._drop_tailing_silence and self._silence_length > 0:
amine@2	541 # happens if max_continuous_silence is reached
amine@2	542 # or max_length is reached at a silent frame
amine@2	543 self._data = self._data[0: - self._silence_length]
amine@67	544
amine@2	545 if (len(self._data) >= self.min_length) or \
amine@67	546 (len(self._data) > 0 and
amine@67	547 not self._strict_min_length and self._contiguous_token):
amine@67	548
amine@177	549 start_frame = self._start_frame
amine@177	550 end_frame = self._start_frame + len(self._data) - 1
amine@177	551 data = self._data
amine@177	552 self._data = []
amine@177	553 token = (data, start_frame, end_frame)
amine@67	554
amine@2	555 if truncated:
amine@2	556 # next token (if any) will start at _current_frame + 1
amine@2	557 self._start_frame = self._current_frame + 1
amine@2	558 # remember that it is contiguous with the just delivered one
amine@2	559 self._contiguous_token = True
amine@2	560 else:
amine@2	561 self._contiguous_token = False
amine@177	562 return token
amine@2	563 else:
amine@67	564 self._contiguous_token = False
amine@67	565
amine@2	566 self._data = []
amine@67	567
amine@2	568 def _append_token(self, data, start, end):
amine@177	569 self._tokens.append((data, start, end))

Mercurial > hg > auditok

annotate auditok/core.py @ 177:2acbdbd18327