annotate auditok/core.py @ 246:936511b60745

Implement __array__ and samples in AudioRegion
author Amine Sehili <amine.sehili@gmail.com>
date Tue, 20 Aug 2019 20:26:04 +0100
parents 1dfba457a9e1
children 56ff493c2b97
rev   line source
amine@33 1 """
amine@33 2 This module gathers processing (i.e. tokenization) classes.
amine@33 3
amine@33 4 Class summary
amine@33 5 =============
amine@33 6
amine@33 7 .. autosummary::
amine@33 8
amine@33 9 StreamTokenizer
amine@33 10 """
amine@187 11 import os
amine@222 12 import math
amine@179 13 from auditok.util import AudioDataSource, DataValidator, AudioEnergyValidator
amine@239 14 from auditok.io import check_audio_data, to_file, player_for, get_audio_source
amine@236 15 from auditok.exceptions import TooSamllBlockDuration
amine@2 16
amine@246 17 try:
amine@246 18 from . import signal_numpy as signal
amine@246 19 except ImportError:
amine@246 20 from . import signal
amine@246 21
amine@179 22 __all__ = ["split", "AudioRegion", "StreamTokenizer"]
amine@179 23
amine@179 24
amine@179 25 DEFAULT_ANALYSIS_WINDOW = 0.05
amine@179 26 DEFAULT_ENERGY_THRESHOLD = 50
amine@232 27 _EPSILON = 1e-6
amine@179 28
amine@179 29
amine@179 30 def split(
amine@179 31 input,
amine@179 32 min_dur=0.2,
amine@179 33 max_dur=5,
amine@179 34 max_silence=0.3,
amine@179 35 drop_trailing_silence=False,
amine@183 36 strict_min_dur=False,
amine@179 37 **kwargs
amine@179 38 ):
amine@179 39 """Splits audio data and returns a generator of `AudioRegion`s
amine@179 40 TODO: implement max_trailing_silence
amine@179 41
amine@179 42 :Parameters:
amine@179 43
amine@179 44 input: str, bytes, AudioSource, AudioRegion, AudioDataSource
amine@179 45 input audio data. If str, it should be a path to an existing audio
amine@179 46 file. If bytes, input is considered as raw audio data.
amine@179 47 min_dur: float
amine@179 48 minimun duration in seconds of a detected audio event. Default: 0.2.
amine@179 49 Using large values, very short audio events (e.g., very short 1-word
amine@179 50 utterances like 'yes' or 'no') can be missed.
amine@179 51 Using very short values might result in a high number of short,
amine@179 52 unuseful audio events.
amine@179 53 max_dur: float
amine@179 54 maximum duration in seconds of a detected audio event. Default: 5.
amine@179 55 max_silence: float
amine@179 56 maximum duration of consecutive silence within an audio event. There
amine@179 57 might be many silent gaps of this duration within an audio event.
amine@179 58 drop_trailing_silence: bool
amine@179 59 drop trailing silence from detected events
amine@183 60 strict_min_dur: bool
amine@183 61 strict minimum duration. Drop an event if it is shorter than ´min_dur´
amine@179 62 even if it is continguous to the latest valid event. This happens if
amine@183 63 the the latest event had reached ´max_dur´.
amine@210 64 analysis_window, aw: float
amine@179 65 duration of analysis window in seconds. Default: 0.05 second (50 ms).
amine@179 66 A value up to 0.1 second (100 ms) should be good for most use-cases.
amine@179 67 You might need a different value, especially if you use a custom
amine@179 68 validator.
amine@210 69 audio_format, fmt: str
amine@210 70 type of audio date (e.g., wav, ogg, raw, etc.). This will only be used
amine@210 71 if ´input´ is a string path to audio file. If not given, audio type
amine@210 72 will be guessed from file name extension or from file header.
amine@179 73 sampling_rate, sr: int
amine@179 74 sampling rate of audio data. Only needed for raw audio files/data.
amine@179 75 sample_width, sw: int
amine@179 76 number of bytes used to encode an audio sample, typically 1, 2 or 4.
amine@179 77 Only needed for raw audio files/data.
amine@179 78 channels, ch: int
amine@179 79 nuumber of channels of audio data. Only needed for raw audio files.
amine@179 80 use_channel, uc: int, str
amine@179 81 which channel to use if input has multichannel audio data. Can be an
amine@241 82 int (0 being the first channel), or one of the following values:
amine@241 83 - None, "any": a valid frame from one any given channel makes
amine@241 84 parallel frames from all other channels automatically valid.
amine@241 85 - 'mix': compute average channel (i.e. mix down all channels)
amine@210 86 max_read, mr: float
amine@185 87 maximum data to read in seconds. Default: `None`, read until there is
amine@185 88 no more data to read.
amine@210 89 validator, val: DataValidator
amine@179 90 custom data validator. If ´None´ (default), an `AudioEnergyValidor` is
amine@179 91 used with the given energy threshold.
amine@210 92 energy_threshold, eth: float
amine@179 93 energy threshlod for audio activity detection, default: 50. If a custom
amine@179 94 validator is given, this argumemt will be ignored.
amine@179 95 """
amine@225 96 if min_dur <= 0:
amine@225 97 raise ValueError("'min_dur' ({}) must be > 0".format(min_dur))
amine@225 98 if max_dur <= 0:
amine@225 99 raise ValueError("'max_dur' ({}) must be > 0".format(max_dur))
amine@225 100 if max_silence < 0:
amine@225 101 raise ValueError("'max_silence' ({}) must be >= 0".format(max_silence))
amine@219 102
amine@179 103 if isinstance(input, AudioDataSource):
amine@179 104 source = input
amine@207 105 analysis_window = source.block_dur
amine@179 106 else:
amine@207 107 analysis_window = kwargs.get(
amine@210 108 "analysis_window", kwargs.get("aw", DEFAULT_ANALYSIS_WINDOW)
amine@207 109 )
amine@237 110 if analysis_window <= 0:
amine@237 111 raise ValueError(
amine@237 112 "'analysis_window' ({}) must be > 0".format(analysis_window)
amine@237 113 )
amine@210 114
amine@179 115 params = kwargs.copy()
amine@210 116 params["max_read"] = params.get("max_read", params.get("mr"))
amine@212 117 params["audio_format"] = params.get("audio_format", params.get("fmt"))
amine@179 118 if isinstance(input, AudioRegion):
amine@179 119 params["sampling_rate"] = input.sr
amine@179 120 params["sample_width"] = input.sw
amine@179 121 params["channels"] = input.ch
amine@179 122 input = bytes(input)
amine@236 123 try:
amine@236 124 source = AudioDataSource(
amine@236 125 input, block_dur=analysis_window, **params
amine@236 126 )
amine@236 127 except TooSamllBlockDuration as exc:
amine@236 128 err_msg = "Too small 'analysis_windows' ({0}) for sampling rate "
amine@236 129 err_msg += "({1}). Analysis windows should at least be 1/{1} to "
amine@236 130 err_msg += "cover one single data sample"
amine@236 131 raise ValueError(err_msg.format(exc.block_dur, exc.sampling_rate))
amine@179 132
amine@210 133 validator = kwargs.get("validator", kwargs.get("val"))
amine@179 134 if validator is None:
amine@185 135 energy_threshold = kwargs.get(
amine@185 136 "energy_threshold", kwargs.get("eth", DEFAULT_ENERGY_THRESHOLD)
amine@185 137 )
amine@241 138 use_channel = kwargs.get("use_channel", kwargs.get("uc"))
amine@241 139 validator = AudioEnergyValidator(
amine@241 140 energy_threshold, source.sw, source.ch, use_channel=use_channel
amine@241 141 )
amine@185 142 mode = (
amine@185 143 StreamTokenizer.DROP_TRAILING_SILENCE if drop_trailing_silence else 0
amine@185 144 )
amine@183 145 if strict_min_dur:
amine@179 146 mode |= StreamTokenizer.STRICT_MIN_LENGTH
amine@222 147 min_length = _duration_to_nb_windows(min_dur, analysis_window, math.ceil)
amine@236 148 max_length = _duration_to_nb_windows(
amine@236 149 max_dur, analysis_window, math.floor, _EPSILON
amine@236 150 )
amine@185 151 max_continuous_silence = _duration_to_nb_windows(
amine@232 152 max_silence, analysis_window, math.floor, _EPSILON
amine@185 153 )
amine@179 154
amine@222 155 err_msg = "({0} sec.) results in {1} analysis window(s) "
amine@222 156 err_msg += "({1} == {6}({0} / {2})) which is {5} the number "
amine@222 157 err_msg += "of analysis window(s) for 'max_dur' ({3} == floor({4} / {2}))"
amine@219 158 if min_length > max_length:
amine@219 159 err_msg = "'min_dur' " + err_msg
amine@219 160 raise ValueError(
amine@219 161 err_msg.format(
amine@222 162 min_dur,
amine@222 163 min_length,
amine@222 164 analysis_window,
amine@222 165 max_length,
amine@222 166 max_dur,
amine@222 167 "higher than",
amine@222 168 "ceil",
amine@219 169 )
amine@219 170 )
amine@219 171
amine@219 172 if max_continuous_silence >= max_length:
amine@219 173 err_msg = "'max_silence' " + err_msg
amine@219 174 raise ValueError(
amine@219 175 err_msg.format(
amine@219 176 max_silence,
amine@219 177 max_continuous_silence,
amine@219 178 analysis_window,
amine@219 179 max_length,
amine@219 180 max_dur,
amine@222 181 "higher or equal to",
amine@222 182 "floor",
amine@219 183 )
amine@219 184 )
amine@219 185
amine@236 186 # print(min_length, max_length, max_continuous_silence)
amine@179 187 tokenizer = StreamTokenizer(
amine@179 188 validator, min_length, max_length, max_continuous_silence, mode=mode
amine@179 189 )
amine@179 190 source.open()
amine@179 191 token_gen = tokenizer.tokenize(source, generator=True)
amine@179 192 region_gen = (
amine@179 193 _make_audio_region(
amine@185 194 source.block_dur,
amine@185 195 token[1],
amine@185 196 token[0],
amine@185 197 source.sr,
amine@185 198 source.sw,
amine@185 199 source.ch,
amine@179 200 )
amine@179 201 for token in token_gen
amine@179 202 )
amine@179 203 return region_gen
amine@179 204
amine@179 205
amine@236 206 def _duration_to_nb_windows(
amine@236 207 duration, analysis_window, round_fn=round, epsilon=0
amine@236 208 ):
amine@179 209 """
amine@215 210 Converts a given duration into a positive integer of analysis windows.
amine@179 211 if `duration / analysis_window` is not an integer, the result will be
amine@179 212 rounded to the closest bigger integer. If `duration == 0`, returns `0`.
amine@215 213 If `duration < analysis_window`, returns 1.
amine@179 214 `duration` and `analysis_window` can be in seconds or milliseconds but
amine@179 215 must be in the same unit.
amine@179 216
amine@179 217 :Parameters:
amine@179 218
amine@179 219 duration: float
amine@232 220 a given duration in seconds or ms.
amine@179 221 analysis_window: float
amine@232 222 size of analysis window, in the same unit as `duration`.
amine@232 223 round_fn: callable
amine@232 224 function called to round the result. Default: `round`.
amine@232 225 epsilon: float
amine@232 226 small value to add to the division result before rounding.
amine@232 227 E.g., `0.3 / 0.1 = 2.9999999999999996`, when called with
amine@232 228 `round_fn=math.floor` returns `2` instead of `3`. Adding a small value
amine@232 229 to `0.3 / 0.1` avoids this error.
amine@179 230
amine@179 231 Returns:
amine@179 232 --------
amine@179 233 nb_windows: int
amine@179 234 minimum number of `analysis_window`'s to cover `durartion`. That means
amine@179 235 that `analysis_window * nb_windows >= duration`.
amine@179 236 """
amine@215 237 if duration < 0 or analysis_window <= 0:
amine@215 238 err_msg = "'duration' ({}) must be >= 0 and 'analysis_window' ({}) > 0"
amine@215 239 raise ValueError(err_msg.format(duration, analysis_window))
amine@179 240 if duration == 0:
amine@179 241 return 0
amine@232 242 return int(round_fn(duration / analysis_window + epsilon))
amine@179 243
amine@179 244
amine@179 245 def _make_audio_region(
amine@185 246 frame_duration,
amine@185 247 start_frame,
amine@185 248 data_frames,
amine@185 249 sampling_rate,
amine@185 250 sample_width,
amine@185 251 channels,
amine@179 252 ):
amine@179 253 """Create and return an `AudioRegion`.
amine@179 254
amine@179 255 :Parameters:
amine@179 256
amine@179 257 frame_duration: float
amine@179 258 duration of analysis window in seconds
amine@179 259 start_frame: int
amine@179 260 index of the fisrt analysis window
amine@179 261 samling_rate: int
amine@179 262 sampling rate of audio data
amine@179 263 sample_width: int
amine@179 264 number of bytes of one audio sample
amine@179 265 channels: int
amine@179 266 number of channels of audio data
amine@179 267
amine@179 268 Returns:
amine@179 269 audio_region: AudioRegion
amine@185 270 AudioRegion whose start time is calculeted as:
amine@185 271 `1000 * start_frame * frame_duration`
amine@179 272 """
amine@179 273 start = start_frame * frame_duration
amine@179 274 data = b"".join(data_frames)
amine@244 275 duration = len(data) / (sampling_rate * sample_width * channels)
amine@244 276 meta = {"start": start, "end": start + duration}
amine@244 277 return AudioRegion(data, sampling_rate, sample_width, channels, meta)
amine@81 278
amine@81 279
amine@228 280 def _check_convert_index(index, types, err_msg):
amine@228 281 if not isinstance(index, slice) or index.step is not None:
amine@228 282 raise TypeError(err_msg)
amine@228 283 start = index.start if index.start is not None else 0
amine@228 284 stop = index.stop
amine@228 285 for index in (start, stop):
amine@228 286 if index is not None and not isinstance(index, types):
amine@228 287 raise TypeError(err_msg)
amine@228 288 return start, stop
amine@228 289
amine@228 290
amine@228 291 class _SecondsView:
amine@228 292 def __init__(self, region):
amine@228 293 self._region = region
amine@228 294
amine@228 295 def __getitem__(self, index):
amine@228 296 err_msg = "Slicing AudioRegion by seconds requires indices of type "
amine@228 297 err_msg += "'int' or 'float' without a step (e.g. region.sec[7.5:10])"
amine@228 298 start_s, stop_s = _check_convert_index(index, (int, float), err_msg)
amine@228 299 sr = self._region.sampling_rate
amine@228 300 start_sample = int(start_s * sr)
amine@228 301 stop_sample = None if stop_s is None else round(stop_s * sr)
amine@228 302 return self._region[start_sample:stop_sample]
amine@228 303
amine@245 304 @property
amine@245 305 def len(self):
amine@245 306 """
amine@245 307 Return region duration in seconds.
amine@245 308 """
amine@245 309 return self._region.duration
amine@245 310
amine@228 311
amine@228 312 class _MillisView(_SecondsView):
amine@228 313 def __getitem__(self, index):
amine@228 314 err_msg = (
amine@228 315 "Slicing AudioRegion by milliseconds requires indices of type "
amine@228 316 )
amine@228 317 err_msg += "'int' without a step (e.g. region.sec[500:1500])"
amine@228 318 start_ms, stop_ms = _check_convert_index(index, (int), err_msg)
amine@228 319 start_sec = start_ms / 1000
amine@228 320 stop_sec = None if stop_ms is None else stop_ms / 1000
amine@228 321 index = slice(start_sec, stop_sec)
amine@228 322 return super(_MillisView, self).__getitem__(index)
amine@228 323
amine@245 324 def __len__(self):
amine@245 325 """
amine@245 326 Return region duration in milliseconds.
amine@245 327 """
amine@245 328 return round(self._region.duration * 1000)
amine@245 329
amine@245 330 @property
amine@245 331 def len(self):
amine@245 332 """
amine@245 333 Return region duration in milliseconds.
amine@245 334 """
amine@245 335 return len(self)
amine@245 336
amine@228 337
amine@244 338 class _AudioRegionMetadata(dict):
amine@244 339 def __getattr__(self, name):
amine@244 340 if name in self:
amine@244 341 return self[name]
amine@244 342 else:
amine@244 343 err_msg = "AudioRegion metadata has no entry '{}'"
amine@244 344 raise AttributeError(err_msg.format(name))
amine@244 345
amine@244 346 def __setattr__(self, name, value):
amine@244 347 self[name] = value
amine@244 348
amine@244 349 def __str__(self):
amine@244 350 return "\n".join("{}: {}".format(k, v) for k, v in self.items())
amine@244 351
amine@244 352 def __repr__(self):
amine@244 353 return str(self)
amine@244 354
amine@244 355
amine@81 356 class AudioRegion(object):
amine@244 357 def __init__(self, data, sampling_rate, sample_width, channels, meta=None):
amine@81 358 """
amine@81 359 A class for detected audio events.
amine@81 360
amine@81 361 :Parameters:
amine@81 362
amine@81 363 data: bytes
amine@81 364 audio data
amine@81 365 samling_rate: int
amine@81 366 sampling rate of audio data
amine@81 367 sample_width: int
amine@81 368 number of bytes of one audio sample
amine@81 369 channels: int
amine@81 370 number of channels of audio data
amine@81 371 """
amine@96 372 check_audio_data(data, sample_width, channels)
amine@81 373 self._data = data
amine@81 374 self._sampling_rate = sampling_rate
amine@81 375 self._sample_width = sample_width
amine@81 376 self._channels = channels
amine@246 377 self._samples = None
amine@81 378
amine@244 379 if meta is not None:
amine@244 380 self._meta = _AudioRegionMetadata(meta)
amine@244 381 else:
amine@244 382 self._meta = None
amine@244 383
amine@228 384 self._seconds_view = _SecondsView(self)
amine@228 385 self.s = self.sec
amine@228 386
amine@228 387 self._millis_view = _MillisView(self)
amine@228 388 self.ms = self.millis
amine@228 389
amine@244 390 @property
amine@244 391 def meta(self):
amine@244 392 return self._meta
amine@244 393
amine@244 394 @meta.setter
amine@244 395 def meta(self, new_meta):
amine@244 396 self._meta = _AudioRegionMetadata(new_meta)
amine@244 397
amine@239 398 @classmethod
amine@239 399 def load(cls, file, skip=0, max_read=None, **kwargs):
amine@239 400 audio_source = get_audio_source(file, **kwargs)
amine@239 401 audio_source.open()
amine@239 402 if skip is not None and skip > 0:
amine@239 403 skip_samples = int(skip * audio_source.sampling_rate)
amine@239 404 audio_source.read(skip_samples)
amine@239 405 if max_read is None or max_read < 0:
amine@239 406 max_read_samples = None
amine@239 407 else:
amine@239 408 max_read_samples = round(max_read * audio_source.sampling_rate)
amine@239 409 data = audio_source.read(max_read_samples)
amine@239 410 audio_source.close()
amine@239 411 return cls(
amine@239 412 data,
amine@239 413 audio_source.sampling_rate,
amine@239 414 audio_source.sample_width,
amine@239 415 audio_source.channels,
amine@239 416 )
amine@239 417
amine@228 418 @property
amine@228 419 def sec(self):
amine@228 420 return self._seconds_view
amine@228 421
amine@228 422 @property
amine@228 423 def millis(self):
amine@228 424 return self._millis_view
amine@228 425
amine@81 426 @property
amine@82 427 def duration(self):
amine@85 428 """
amine@85 429 Returns region duration in seconds.
amine@85 430 """
amine@178 431 return len(self._data) / (
amine@178 432 self.sampling_rate * self.sample_width * self.channels
amine@178 433 )
amine@82 434
amine@82 435 @property
amine@81 436 def sampling_rate(self):
amine@85 437 return self._sampling_rate
amine@81 438
amine@81 439 @property
amine@81 440 def sr(self):
amine@81 441 return self._sampling_rate
amine@81 442
amine@81 443 @property
amine@81 444 def sample_width(self):
amine@81 445 return self._sample_width
amine@81 446
amine@81 447 @property
amine@81 448 def sw(self):
amine@81 449 return self._sample_width
amine@81 450
amine@81 451 @property
amine@81 452 def channels(self):
amine@81 453 return self._channels
amine@81 454
amine@81 455 @property
amine@81 456 def ch(self):
amine@81 457 return self._channels
amine@2 458
amine@201 459 def play(self, player=None, progress_bar=False, **progress_bar_kwargs):
amine@201 460 """Play audio region
amine@201 461
amine@201 462 :Parameters:
amine@201 463
amine@201 464 player: AudioPalyer, default: None
amine@201 465 audio player to use. if None (default), use `player_for(self)`
amine@201 466 to get a new audio player.
amine@201 467
amine@201 468 progress_bar bool, default: False
amine@201 469 whether to use a progress bar while playing audio. Default: False.
amine@201 470
amine@201 471 progress_bar_kwargs: kwargs
amine@201 472 keyword arguments to pass to progress_bar object. Currently only
amine@201 473 `tqdm` is supported.
amine@201 474 """
amine@199 475 if player is None:
amine@199 476 player = player_for(self)
amine@201 477 player.play(
amine@201 478 self._data, progress_bar=progress_bar, **progress_bar_kwargs
amine@201 479 )
amine@199 480
amine@187 481 def save(self, file, format=None, exists_ok=True, **audio_parameters):
amine@187 482 """Save audio region to file.
amine@187 483
amine@187 484 :Parameters:
amine@187 485
amine@187 486 file: str, file-like object
amine@187 487 path to output file or a file-like object. If ´str´, it may contain
amine@244 488 and ´{duration}´ place holders as well as any place holder that
amine@244 489 this region's metadata might contain (e.g., ´{meta.start}´).
amine@187 490
amine@187 491
amine@187 492 format: str
amine@187 493 type of audio file. If None (default), file type is guessed from
amine@187 494 `file`'s extension. If `file` is not a ´str´ or does not have
amine@187 495 an extension, audio data is as a raw (headerless) audio file.
amine@187 496 exists_ok: bool, default: True
amine@187 497 If True, overwrite ´file´ if a file with the same name exists.
amine@187 498 If False, raise an ´IOError´ if the file exists.
amine@187 499 audio_parameters: dict
amine@187 500 any keyword arguments to be passed to audio saving backend
amine@187 501 (e.g. bitrate, etc.)
amine@187 502
amine@187 503 :Returns:
amine@187 504
amine@187 505 file: str, file-like object
amine@187 506 name of the file of file-like object to which audio data was
amine@187 507 written. If parameter ´file´ was a ´str´ with at least one {start},
amine@187 508 {end} or {duration} place holders.
amine@187 509
amine@187 510 :Raises:
amine@187 511
amine@187 512 IOError if ´file´ exists and ´exists_ok´ is False.
amine@244 513
amine@244 514 Example:
amine@244 515
amine@244 516 .. code:: python
amine@244 517 region = AudioRegion(b'\0' * 2 * 24000,
amine@244 518 sampling_rate=16000,
amine@244 519 sample_width=2,
amine@244 520 channels=1)
amine@244 521 region.meta = {"start": 2.25, "end": 2.25 + region.duration}
amine@244 522 region.save('audio_{meta.start}-{meta.end}.wav')
amine@244 523 audio_2.25-3.75.wav
amine@244 524 region.save('region_{meta.start:.3f}_{duration:.3f}.wav')
amine@244 525 audio_2.250_1.500.wav
amine@187 526 """
amine@187 527 if isinstance(file, str):
amine@244 528 file = file.format(duration=self.duration, meta=self.meta)
amine@187 529 if not exists_ok and os.path.exists(file):
amine@191 530 raise FileExistsError("file '{file}' exists".format(file=file))
amine@187 531 to_file(
amine@191 532 self._data,
amine@187 533 file,
amine@187 534 format,
amine@187 535 sr=self.sr,
amine@187 536 sw=self.sw,
amine@187 537 ch=self.ch,
amine@195 538 audio_parameters=audio_parameters,
amine@187 539 )
amine@187 540 return file
amine@187 541
amine@246 542 def __array__(self):
amine@246 543 return self.samples
amine@246 544
amine@246 545 @property
amine@246 546 def samples(self):
amine@246 547 if self._samples is None:
amine@246 548 fmt = signal.FORMAT[self.sample_width]
amine@246 549 if self.channels == 1:
amine@246 550 self._samples = signal.to_array(self._data, fmt)
amine@246 551 else:
amine@246 552 self._samples = signal.separate_channels(
amine@246 553 self._data, fmt, self.channels
amine@246 554 )
amine@246 555 return self._samples
amine@246 556
amine@82 557 def __len__(self):
amine@85 558 """
amine@245 559 Return region length in number of samples.
amine@85 560 """
amine@245 561 return len(self._data) // (self.sample_width * self.channels)
amine@245 562
amine@245 563 @property
amine@245 564 def len(self):
amine@245 565 """
amine@245 566 Return region length in number of samples.
amine@245 567 """
amine@245 568 return len(self)
amine@82 569
amine@83 570 def __bytes__(self):
amine@83 571 return self._data
amine@83 572
amine@244 573 def __str__(self):
amine@178 574 return (
amine@244 575 "AudioRegion(duration={:.3f}, "
amine@178 576 "sampling_rate={}, sample_width={}, channels={})".format(
amine@244 577 self.duration, self.sr, self.sw, self.ch
amine@178 578 )
amine@178 579 )
amine@83 580
amine@244 581 def __repr__(self):
amine@244 582 return str(self)
amine@83 583
amine@87 584 def __add__(self, other):
amine@87 585 """
amine@87 586 Concatenates this region and `other` and return a new region.
amine@87 587 Both regions must have the same sampling rate, sample width
amine@87 588 and number of channels. If not, raises a `ValueError`.
amine@87 589 """
amine@87 590 if not isinstance(other, AudioRegion):
amine@178 591 raise TypeError(
amine@185 592 "Can only concatenate AudioRegion, "
amine@185 593 'not "{}"'.format(type(other))
amine@178 594 )
amine@87 595 if other.sr != self.sr:
amine@178 596 raise ValueError(
amine@178 597 "Can only concatenate AudioRegions of the same "
amine@178 598 "sampling rate ({} != {})".format(self.sr, other.sr)
amine@178 599 )
amine@87 600 if other.sw != self.sw:
amine@178 601 raise ValueError(
amine@178 602 "Can only concatenate AudioRegions of the same "
amine@178 603 "sample width ({} != {})".format(self.sw, other.sw)
amine@178 604 )
amine@87 605 if other.ch != self.ch:
amine@178 606 raise ValueError(
amine@178 607 "Can only concatenate AudioRegions of the same "
amine@178 608 "number of channels ({} != {})".format(self.ch, other.ch)
amine@178 609 )
amine@87 610 data = self._data + other._data
amine@244 611 return AudioRegion(data, self.sr, self.sw, self.ch)
amine@87 612
amine@87 613 def __radd__(self, other):
amine@87 614 """
amine@87 615 Concatenates `other` and this region. `other` should be an
amine@87 616 `AudioRegion` with the same audio parameters as this region
amine@87 617 but can exceptionally be `0` to make it possible to concatenate
amine@87 618 many regions with `sum`.
amine@87 619 """
amine@87 620 if other == 0:
amine@87 621 return self
amine@87 622 return other.add(self)
amine@87 623
amine@195 624 def __mul__(self, n):
amine@195 625 if not isinstance(n, int):
amine@195 626 err_msg = "Can't multiply AudioRegion by a non-int of type '{}'"
amine@195 627 raise TypeError(err_msg.format(type(n)))
amine@195 628 data = self._data * n
amine@244 629 return AudioRegion(data, self.sr, self.sw, self.ch)
amine@195 630
amine@195 631 def __rmul__(self, n):
amine@195 632 return self * n
amine@195 633
amine@198 634 def __eq__(self, other):
amine@198 635 if other is self:
amine@198 636 return True
amine@198 637 if not isinstance(other, AudioRegion):
amine@198 638 return False
amine@198 639 return (
amine@198 640 (self._data == other._data)
amine@198 641 and (self.sr == other.sr)
amine@198 642 and (self.sw == other.sw)
amine@198 643 and (self.ch == other.ch)
amine@198 644 )
amine@198 645
amine@188 646 def __getitem__(self, index):
amine@230 647 err_msg = "Slicing AudioRegion by samples requires indices of type "
amine@230 648 err_msg += "'int' without a step (e.g. region.sec[1600:3200])"
amine@230 649 start_sample, stop_sample = _check_convert_index(index, (int), err_msg)
amine@188 650
amine@228 651 bytes_per_sample = self.sample_width * self.channels
amine@228 652 len_samples = len(self._data) // bytes_per_sample
amine@188 653
amine@230 654 if start_sample < 0:
amine@230 655 start_sample = max(start_sample + len_samples, 0)
amine@228 656 onset = start_sample * bytes_per_sample
amine@188 657
amine@230 658 if stop_sample is not None:
amine@230 659 if stop_sample < 0:
amine@230 660 stop_sample = max(stop_sample + len_samples, 0)
amine@230 661 offset = index.stop * bytes_per_sample
amine@230 662 else:
amine@228 663 offset = None
amine@228 664
amine@188 665 data = self._data[onset:offset]
amine@244 666 return AudioRegion(data, self.sr, self.sw, self.ch)
amine@188 667
amine@2 668
amine@178 669 class StreamTokenizer:
amine@32 670 """
amine@32 671 Class for stream tokenizers. It implements a 4-state automaton scheme
amine@32 672 to extract sub-sequences of interest on the fly.
amine@67 673
amine@32 674 :Parameters:
amine@67 675
amine@5 676 `validator` :
amine@5 677 instance of `DataValidator` that implements `is_valid` method.
amine@67 678
amine@5 679 `min_length` : *(int)*
amine@5 680 Minimum number of frames of a valid token. This includes all \
amine@5 681 tolerated non valid frames within the token.
amine@67 682
amine@5 683 `max_length` : *(int)*
amine@5 684 Maximum number of frames of a valid token. This includes all \
amine@5 685 tolerated non valid frames within the token.
amine@67 686
amine@5 687 `max_continuous_silence` : *(int)*
amine@5 688 Maximum number of consecutive non-valid frames within a token.
amine@5 689 Note that, within a valid token, there may be many tolerated \
amine@5 690 *silent* regions that contain each a number of non valid frames up to \
amine@5 691 `max_continuous_silence`
amine@67 692
amine@5 693 `init_min` : *(int, default=0)*
amine@5 694 Minimum number of consecutive valid frames that must be **initially** \
amine@5 695 gathered before any sequence of non valid frames can be tolerated. This
amine@5 696 option is not always needed, it can be used to drop non-valid tokens as
amine@5 697 early as possible. **Default = 0** means that the option is by default
amine@5 698 ineffective.
amine@67 699
amine@5 700 `init_max_silence` : *(int, default=0)*
amine@5 701 Maximum number of tolerated consecutive non-valid frames if the \
amine@5 702 number already gathered valid frames has not yet reached 'init_min'.
amine@5 703 This argument is normally used if `init_min` is used. **Default = 0**,
amine@5 704 by default this argument is not taken into consideration.
amine@67 705
amine@5 706 `mode` : *(int, default=0)*
amine@5 707 `mode` can be:
amine@67 708
amine@35 709 1. `StreamTokenizer.STRICT_MIN_LENGTH`:
amine@32 710 if token *i* is delivered because `max_length`
amine@32 711 is reached, and token *i+1* is immediately adjacent to
amine@32 712 token *i* (i.e. token *i* ends at frame *k* and token *i+1* starts
amine@32 713 at frame *k+1*) then accept token *i+1* only of it has a size of at
amine@32 714 least `min_length`. The default behavior is to accept token *i+1*
amine@32 715 event if it is shorter than `min_length` (given that the above conditions
amine@32 716 are fulfilled of course).
amine@67 717
amine@32 718 :Examples:
amine@67 719
amine@32 720 In the following code, without `STRICT_MIN_LENGTH`, the 'BB' token is
amine@32 721 accepted although it is shorter than `min_length` (3), because it immediately
amine@32 722 follows the latest delivered token:
amine@67 723
amine@32 724 .. code:: python
amine@67 725
amine@32 726 from auditok import StreamTokenizer, StringDataSource, DataValidator
amine@67 727
amine@32 728 class UpperCaseChecker(DataValidator):
amine@32 729 def is_valid(self, frame):
amine@32 730 return frame.isupper()
amine@67 731
amine@67 732
amine@32 733 dsource = StringDataSource("aaaAAAABBbbb")
amine@32 734 tokenizer = StreamTokenizer(validator=UpperCaseChecker(),
amine@32 735 min_length=3,
amine@32 736 max_length=4,
amine@32 737 max_continuous_silence=0)
amine@67 738
amine@32 739 tokenizer.tokenize(dsource)
amine@67 740
amine@32 741 :output:
amine@67 742
amine@32 743 .. code:: python
amine@67 744
amine@32 745 [(['A', 'A', 'A', 'A'], 3, 6), (['B', 'B'], 7, 8)]
amine@32 746
amine@32 747
amine@32 748 The following tokenizer will however reject the 'BB' token:
amine@67 749
amine@32 750 .. code:: python
amine@67 751
amine@32 752 dsource = StringDataSource("aaaAAAABBbbb")
amine@32 753 tokenizer = StreamTokenizer(validator=UpperCaseChecker(),
amine@32 754 min_length=3, max_length=4,
amine@32 755 max_continuous_silence=0,
amine@32 756 mode=StreamTokenizer.STRICT_MIN_LENGTH)
amine@32 757 tokenizer.tokenize(dsource)
amine@67 758
amine@32 759 :output:
amine@67 760
amine@32 761 .. code:: python
amine@67 762
amine@32 763 [(['A', 'A', 'A', 'A'], 3, 6)]
amine@67 764
amine@67 765
amine@35 766 2. `StreamTokenizer.DROP_TRAILING_SILENCE`: drop all tailing non-valid frames
amine@32 767 from a token to be delivered if and only if it is not **truncated**.
amine@32 768 This can be a bit tricky. A token is actually delivered if:
amine@67 769
amine@32 770 - a. `max_continuous_silence` is reached
amine@67 771
amine@32 772 :or:
amine@67 773
amine@32 774 - b. Its length reaches `max_length`. This is called a **truncated** token
amine@67 775
amine@32 776 In the current implementation, a `StreamTokenizer`'s decision is only based on already seen
amine@32 777 data and on incoming data. Thus, if a token is truncated at a non-valid but tolerated
amine@32 778 frame (`max_length` is reached but `max_continuous_silence` not yet) any tailing
amine@32 779 silence will be kept because it can potentially be part of valid token (if `max_length`
amine@32 780 was bigger). But if `max_continuous_silence` is reached before `max_length`, the delivered
amine@32 781 token will not be considered as truncated but a result of *normal* end of detection
amine@32 782 (i.e. no more valid data). In that case the tailing silence can be removed if you use
amine@32 783 the `StreamTokenizer.DROP_TRAILING_SILENCE` mode.
amine@67 784
amine@32 785 :Example:
amine@67 786
amine@32 787 .. code:: python
amine@67 788
amine@32 789 tokenizer = StreamTokenizer(validator=UpperCaseChecker(), min_length=3,
amine@32 790 max_length=6, max_continuous_silence=3,
amine@32 791 mode=StreamTokenizer.DROP_TRAILING_SILENCE)
amine@67 792
amine@32 793 dsource = StringDataSource("aaaAAAaaaBBbbbb")
amine@32 794 tokenizer.tokenize(dsource)
amine@67 795
amine@32 796 :output:
amine@67 797
amine@32 798 .. code:: python
amine@67 799
amine@32 800 [(['A', 'A', 'A', 'a', 'a', 'a'], 3, 8), (['B', 'B'], 9, 10)]
amine@67 801
amine@32 802 The first token is delivered with its tailing silence because it is truncated
amine@32 803 while the second one has its tailing frames removed.
amine@67 804
amine@32 805 Without `StreamTokenizer.DROP_TRAILING_SILENCE` the output would be:
amine@67 806
amine@32 807 .. code:: python
amine@67 808
amine@32 809 [(['A', 'A', 'A', 'a', 'a', 'a'], 3, 8), (['B', 'B', 'b', 'b', 'b'], 9, 13)]
amine@67 810
amine@67 811
amine@32 812 3. `StreamTokenizer.STRICT_MIN_LENGTH | StreamTokenizer.DROP_TRAILING_SILENCE`:
amine@32 813 use both options. That means: first remove tailing silence, then ckeck if the
amine@32 814 token still has at least a length of `min_length`.
amine@32 815 """
amine@67 816
amine@32 817 SILENCE = 0
amine@32 818 POSSIBLE_SILENCE = 1
amine@67 819 POSSIBLE_NOISE = 2
amine@32 820 NOISE = 3
amine@67 821
amine@32 822 STRICT_MIN_LENGTH = 2
amine@32 823 DROP_TRAILING_SILENCE = 4
amine@32 824 # alias
amine@32 825 DROP_TAILING_SILENCE = 4
amine@67 826
amine@178 827 def __init__(
amine@178 828 self,
amine@178 829 validator,
amine@178 830 min_length,
amine@178 831 max_length,
amine@178 832 max_continuous_silence,
amine@178 833 init_min=0,
amine@178 834 init_max_silence=0,
amine@178 835 mode=0,
amine@178 836 ):
amine@67 837
amine@2 838 if not isinstance(validator, DataValidator):
amine@185 839 raise TypeError(
amine@185 840 "'validator' must be an instance of 'DataValidator'"
amine@185 841 )
amine@67 842
amine@2 843 if max_length <= 0:
amine@185 844 raise ValueError(
amine@185 845 "'max_length' must be > 0 (value={0})".format(max_length)
amine@185 846 )
amine@67 847
amine@2 848 if min_length <= 0 or min_length > max_length:
amine@178 849 raise ValueError(
amine@178 850 "'min_length' must be > 0 and <= 'max_length' (value={0})".format(
amine@178 851 min_length
amine@178 852 )
amine@178 853 )
amine@67 854
amine@2 855 if max_continuous_silence >= max_length:
amine@178 856 raise ValueError(
amine@178 857 "'max_continuous_silence' must be < 'max_length' (value={0})".format(
amine@178 858 max_continuous_silence
amine@178 859 )
amine@178 860 )
amine@67 861
amine@5 862 if init_min >= max_length:
amine@178 863 raise ValueError(
amine@178 864 "'init_min' must be < 'max_length' (value={0})".format(
amine@178 865 max_continuous_silence
amine@178 866 )
amine@178 867 )
amine@67 868
amine@2 869 self.validator = validator
amine@2 870 self.min_length = min_length
amine@2 871 self.max_length = max_length
amine@2 872 self.max_continuous_silence = max_continuous_silence
amine@2 873 self.init_min = init_min
amine@2 874 self.init_max_silent = init_max_silence
amine@67 875
amine@2 876 self._mode = None
amine@2 877 self.set_mode(mode)
amine@2 878 self._strict_min_length = (mode & self.STRICT_MIN_LENGTH) != 0
amine@67 879 self._drop_tailing_silence = (mode & self.DROP_TRAILING_SILENCE) != 0
amine@67 880
amine@2 881 self._deliver = None
amine@2 882 self._tokens = None
amine@2 883 self._state = None
amine@2 884 self._data = None
amine@2 885 self._contiguous_token = False
amine@67 886
amine@2 887 self._init_count = 0
amine@2 888 self._silence_length = 0
amine@2 889 self._start_frame = 0
amine@2 890 self._current_frame = 0
amine@67 891
amine@2 892 def set_mode(self, mode):
amine@177 893 # TODO: use properties and make these deprecated
amine@2 894 """
amine@32 895 :Parameters:
amine@67 896
amine@32 897 `mode` : *(int)*
amine@32 898 New mode, must be one of:
amine@67 899
amine@67 900
amine@33 901 - `StreamTokenizer.STRICT_MIN_LENGTH`
amine@67 902
amine@33 903 - `StreamTokenizer.DROP_TRAILING_SILENCE`
amine@67 904
amine@33 905 - `StreamTokenizer.STRICT_MIN_LENGTH | StreamTokenizer.DROP_TRAILING_SILENCE`
amine@67 906
amine@177 907 - `0` TODO: this mode should have a name
amine@67 908
amine@3 909 See `StreamTokenizer.__init__` for more information about the mode.
amine@2 910 """
amine@67 911
amine@178 912 if not mode in [
amine@178 913 self.STRICT_MIN_LENGTH,
amine@178 914 self.DROP_TRAILING_SILENCE,
amine@178 915 self.STRICT_MIN_LENGTH | self.DROP_TRAILING_SILENCE,
amine@178 916 0,
amine@178 917 ]:
amine@67 918
amine@2 919 raise ValueError("Wrong value for mode")
amine@67 920
amine@2 921 self._mode = mode
amine@2 922 self._strict_min_length = (mode & self.STRICT_MIN_LENGTH) != 0
amine@67 923 self._drop_tailing_silence = (mode & self.DROP_TRAILING_SILENCE) != 0
amine@67 924
amine@2 925 def get_mode(self):
amine@2 926 """
amine@2 927 Return the current mode. To check whether a specific mode is activated use
amine@2 928 the bitwise 'and' operator `&`. Example:
amine@67 929
amine@32 930 .. code:: python
amine@67 931
amine@2 932 if mode & self.STRICT_MIN_LENGTH != 0:
amine@32 933 do_something()
amine@2 934 """
amine@2 935 return self._mode
amine@67 936
amine@2 937 def _reinitialize(self):
amine@2 938 self._contiguous_token = False
amine@2 939 self._data = []
amine@2 940 self._tokens = []
amine@2 941 self._state = self.SILENCE
amine@2 942 self._current_frame = -1
amine@2 943 self._deliver = self._append_token
amine@67 944
amine@177 945 def tokenize(self, data_source, callback=None, generator=False):
amine@2 946 """
amine@2 947 Read data from `data_source`, one frame a time, and process the read frames in
amine@2 948 order to detect sequences of frames that make up valid tokens.
amine@67 949
amine@32 950 :Parameters:
amine@47 951 `data_source` : instance of the :class:`DataSource` class that implements a `read` method.
amine@32 952 'read' should return a slice of signal, i.e. frame (of whatever \
amine@32 953 type as long as it can be processed by validator) and None if \
amine@32 954 there is no more signal.
amine@67 955
amine@32 956 `callback` : an optional 3-argument function.
amine@32 957 If a `callback` function is given, it will be called each time a valid token
amine@32 958 is found.
amine@67 959
amine@67 960
amine@32 961 :Returns:
amine@32 962 A list of tokens if `callback` is None. Each token is tuple with the following elements:
amine@67 963
amine@32 964 .. code python
amine@67 965
amine@32 966 (data, start, end)
amine@67 967
amine@32 968 where `data` is a list of read frames, `start`: index of the first frame in the
amine@32 969 original data and `end` : index of the last frame.
amine@67 970
amine@2 971 """
amine@177 972 token_gen = self._iter_tokens(data_source)
amine@177 973 if callback:
amine@177 974 for token in token_gen:
amine@177 975 callback(*token)
amine@177 976 return
amine@177 977 if generator:
amine@177 978 return token_gen
amine@177 979 return list(token_gen)
amine@67 980
amine@177 981 def _iter_tokens(self, data_source):
amine@2 982 self._reinitialize()
amine@2 983 while True:
amine@67 984 frame = data_source.read()
amine@177 985 self._current_frame += 1
amine@47 986 if frame is None:
amine@177 987 token = self._post_process()
amine@177 988 if token is not None:
amine@177 989 yield token
amine@2 990 break
amine@177 991 token = self._process(frame)
amine@177 992 if token is not None:
amine@177 993 yield token
amine@67 994
amine@2 995 def _process(self, frame):
amine@67 996
amine@2 997 frame_is_valid = self.validator.is_valid(frame)
amine@67 998
amine@2 999 if self._state == self.SILENCE:
amine@67 1000
amine@2 1001 if frame_is_valid:
amine@2 1002 # seems we got a valid frame after a silence
amine@2 1003 self._init_count = 1
amine@2 1004 self._silence_length = 0
amine@2 1005 self._start_frame = self._current_frame
amine@2 1006 self._data.append(frame)
amine@67 1007
amine@67 1008 if self._init_count >= self.init_min:
amine@2 1009 self._state = self.NOISE
amine@2 1010 if len(self._data) >= self.max_length:
amine@177 1011 return self._process_end_of_detection(True)
amine@2 1012 else:
amine@2 1013 self._state = self.POSSIBLE_NOISE
amine@67 1014
amine@2 1015 elif self._state == self.POSSIBLE_NOISE:
amine@67 1016
amine@2 1017 if frame_is_valid:
amine@2 1018 self._silence_length = 0
amine@2 1019 self._init_count += 1
amine@2 1020 self._data.append(frame)
amine@67 1021 if self._init_count >= self.init_min:
amine@2 1022 self._state = self.NOISE
amine@2 1023 if len(self._data) >= self.max_length:
amine@177 1024 return self._process_end_of_detection(True)
amine@67 1025
amine@67 1026 else:
amine@2 1027 self._silence_length += 1
amine@178 1028 if (
amine@178 1029 self._silence_length > self.init_max_silent
amine@178 1030 or len(self._data) + 1 >= self.max_length
amine@178 1031 ):
amine@2 1032 # either init_max_silent or max_length is reached
amine@2 1033 # before _init_count, back to silence
amine@2 1034 self._data = []
amine@2 1035 self._state = self.SILENCE
amine@2 1036 else:
amine@2 1037 self._data.append(frame)
amine@67 1038
amine@2 1039 elif self._state == self.NOISE:
amine@67 1040
amine@2 1041 if frame_is_valid:
amine@2 1042 self._data.append(frame)
amine@2 1043 if len(self._data) >= self.max_length:
amine@177 1044 return self._process_end_of_detection(True)
amine@67 1045
amine@67 1046 elif self.max_continuous_silence <= 0:
amine@2 1047 # max token reached at this frame will _deliver if _contiguous_token
amine@2 1048 # and not _strict_min_length
amine@2 1049 self._state = self.SILENCE
amine@177 1050 return self._process_end_of_detection()
amine@2 1051 else:
amine@2 1052 # this is the first silent frame following a valid one
amine@2 1053 # and it is tolerated
amine@2 1054 self._silence_length = 1
amine@2 1055 self._data.append(frame)
amine@2 1056 self._state = self.POSSIBLE_SILENCE
amine@2 1057 if len(self._data) == self.max_length:
amine@177 1058 return self._process_end_of_detection(True)
amine@67 1059 # don't reset _silence_length because we still
amine@2 1060 # need to know the total number of silent frames
amine@67 1061
amine@2 1062 elif self._state == self.POSSIBLE_SILENCE:
amine@67 1063
amine@2 1064 if frame_is_valid:
amine@2 1065 self._data.append(frame)
amine@2 1066 self._silence_length = 0
amine@2 1067 self._state = self.NOISE
amine@2 1068 if len(self._data) >= self.max_length:
amine@177 1069 return self._process_end_of_detection(True)
amine@67 1070
amine@2 1071 else:
amine@2 1072 if self._silence_length >= self.max_continuous_silence:
amine@177 1073 self._state = self.SILENCE
amine@2 1074 if self._silence_length < len(self._data):
amine@67 1075 # _deliver only gathered frames aren't all silent
amine@177 1076 return self._process_end_of_detection()
amine@177 1077 self._data = []
amine@2 1078 self._silence_length = 0
amine@2 1079 else:
amine@2 1080 self._data.append(frame)
amine@2 1081 self._silence_length += 1
amine@2 1082 if len(self._data) >= self.max_length:
amine@177 1083 return self._process_end_of_detection(True)
amine@67 1084 # don't reset _silence_length because we still
amine@2 1085 # need to know the total number of silent frames
amine@67 1086
amine@2 1087 def _post_process(self):
amine@2 1088 if self._state == self.NOISE or self._state == self.POSSIBLE_SILENCE:
amine@2 1089 if len(self._data) > 0 and len(self._data) > self._silence_length:
amine@177 1090 return self._process_end_of_detection()
amine@67 1091
amine@2 1092 def _process_end_of_detection(self, truncated=False):
amine@67 1093
amine@185 1094 if (
amine@185 1095 not truncated
amine@185 1096 and self._drop_tailing_silence
amine@185 1097 and self._silence_length > 0
amine@185 1098 ):
amine@2 1099 # happens if max_continuous_silence is reached
amine@2 1100 # or max_length is reached at a silent frame
amine@178 1101 self._data = self._data[0 : -self._silence_length]
amine@67 1102
amine@178 1103 if (len(self._data) >= self.min_length) or (
amine@178 1104 len(self._data) > 0
amine@178 1105 and not self._strict_min_length
amine@178 1106 and self._contiguous_token
amine@178 1107 ):
amine@67 1108
amine@177 1109 start_frame = self._start_frame
amine@177 1110 end_frame = self._start_frame + len(self._data) - 1
amine@177 1111 data = self._data
amine@177 1112 self._data = []
amine@177 1113 token = (data, start_frame, end_frame)
amine@67 1114
amine@2 1115 if truncated:
amine@2 1116 # next token (if any) will start at _current_frame + 1
amine@2 1117 self._start_frame = self._current_frame + 1
amine@2 1118 # remember that it is contiguous with the just delivered one
amine@2 1119 self._contiguous_token = True
amine@2 1120 else:
amine@2 1121 self._contiguous_token = False
amine@177 1122 return token
amine@2 1123 else:
amine@67 1124 self._contiguous_token = False
amine@67 1125
amine@2 1126 self._data = []
amine@67 1127
amine@2 1128 def _append_token(self, data, start, end):
amine@178 1129 self._tokens.append((data, start, end))