# HG changeset patch # User www-data # Date 1614716277 0 # Node ID 5fd9b6b7ff0d513d1ddf928caf643edb8dcb47a9 # Parent 059599823c343cd4b30845ff0e378ee33ff37f87# Parent bd242e80455f4df6adf48123e67a455d89e3b0ff Merge branch 'master' of https://github.com/amsehili/auditok diff -r 059599823c34 -r 5fd9b6b7ff0d CHANGELOG --- a/CHANGELOG Tue Mar 02 19:17:59 2021 +0000 +++ b/CHANGELOG Tue Mar 02 20:17:57 2021 +0000 @@ -7,6 +7,7 @@ - Implement split function as a high-level API for tokenization - Implement AudioRegion class for simple audio objects manipulation - Use a much faster energy computation method (based on standard audioop module) +- Make ADSFactory deprecated - Choose which channel(s) to use for tokenization - Save multi-channel audio data - Refactor code in all modules diff -r 059599823c34 -r 5fd9b6b7ff0d README.rst --- a/README.rst Tue Mar 02 19:17:59 2021 +0000 +++ b/README.rst Tue Mar 02 20:17:57 2021 +0000 @@ -10,7 +10,7 @@ ``auditok`` is an **Audio Activity Detection** tool that can process online data (read from an audio device or from standard input) as well as audio files. -It can be used as a command line program or by calling its API. +It can be used as a command-line program or by calling its API. The latest version of the documentation can be found on `readthedocs. `_ diff -r 059599823c34 -r 5fd9b6b7ff0d demos/audio_tokenize_demo.py --- a/demos/audio_tokenize_demo.py Tue Mar 02 19:17:59 2021 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,80 +0,0 @@ -""" -@author: Amine SEHILI -September, 2015 -""" - -from auditok import ( - ADSFactory, - AudioEnergyValidator, - StreamTokenizer, - player_for, - dataset, -) -import sys - -try: - - # We set the `record` argument to True so that we can rewind the source - asource = ADSFactory.ads( - filename=dataset.one_to_six_arabic_16000_mono_bc_noise, record=True - ) - - validator = AudioEnergyValidator( - sample_width=asource.get_sample_width(), energy_threshold=65 - ) - - # Default analysis window is 10 ms (float(asource.get_block_size()) / asource.get_sampling_rate()) - # min_length=20 : minimum length of a valid audio activity is 20 * 10 == 200 ms - # max_length=400 : maximum length of a valid audio activity is 400 * 10 == 4000 ms == 4 seconds - # max_continuous_silence=30 : maximum length of a tolerated silence within a valid audio activity is 30 * 30 == 300 ms - tokenizer = StreamTokenizer( - validator=validator, - min_length=20, - max_length=400, - max_continuous_silence=30, - ) - - asource.open() - tokens = tokenizer.tokenize(asource) - - # Play detected regions back - player = player_for(asource) - - # Rewind and read the whole signal - asource.rewind() - original_signal = [] - - while True: - w = asource.read() - if w is None: - break - original_signal.append(w) - - original_signal = b"".join(original_signal) - player.play(original_signal) - - print("\n ** playing detected regions...\n") - for i, t in enumerate(tokens): - print( - "Token [{0}] starts at {1} and ends at {2}".format( - i + 1, t[1], t[2] - ) - ) - data = b"".join(t[0]) - player.play(data) - - assert len(tokens) == 8 - - asource.close() - player.stop() - -except KeyboardInterrupt: - - player.stop() - asource.close() - sys.exit(0) - -except Exception as e: - - sys.stderr.write(str(e) + "\n") - sys.exit(1) diff -r 059599823c34 -r 5fd9b6b7ff0d demos/audio_trim_demo.py --- a/demos/audio_trim_demo.py Tue Mar 02 19:17:59 2021 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,120 +0,0 @@ -""" -@author: Amine SEHILI -September, 2015 -""" - -# Trim leading and trailing silence from a record - -from auditok import ( - ADSFactory, - AudioEnergyValidator, - StreamTokenizer, - player_for, - dataset, -) -import pyaudio -import sys - -""" -The tokenizer in the following example is set up to remove the silence -that precedes the first acoustic activity or follows the last activity -in a record. It preserves whatever it founds between the two activities. -In other words, it removes the leading and trailing silence. - -Sampling rate is 44100 sample per second, we'll use an analysis window of 100 ms -(i.e. bloc_ksize == 4410) - -Energy threshold is 50. - -The tokenizer will start accumulating windows up from the moment it encounters -the first analysis window of an energy >= 50. ALL the following windows will be -kept regardless of their energy. At the end of the analysis, it will drop trailing - windows with an energy below 50. - -This is an interesting example because the audio file we're analyzing contains a very -brief noise that occurs within the leading silence. We certainly do want our tokenizer -to stop at this point and considers whatever it comes after as a useful signal. -To force the tokenizer to ignore that brief event we use two other parameters `init_min` -ans `init_max_silence`. By `init_min`=3 and `init_max_silence`=1 we tell the tokenizer -that a valid event must start with at least 3 noisy windows, between which there -is at most 1 silent window. - -Still with this configuration we can get the tokenizer detect that noise as a valid event -(if it actually contains 3 consecutive noisy frames). To circummvent this we use an enough -large analysis window (here of 100 ms) to ensure that the brief noise be surrounded by a much -longer silence and hence the energy of the overall analysis window will be below 50. - -When using a shorter analysis window (of 10ms for instance, block_size == 441), the brief -noise contributes more to energy calculation which yields an energy of over 50 for the window. -Again we can deal with this situation by using a higher energy threshold (55 for example) - -""" - -try: - # record = True so that we'll be able to rewind the source. - asource = ADSFactory.ads( - filename=dataset.was_der_mensch_saet_mono_44100_lead_tail_silence, - record=True, - block_size=4410, - ) - asource.open() - - original_signal = [] - # Read the whole signal - while True: - w = asource.read() - if w is None: - break - original_signal.append(w) - - original_signal = b"".join(original_signal) - - # rewind source - asource.rewind() - - # Create a validator with an energy threshold of 50 - validator = AudioEnergyValidator( - sample_width=asource.get_sample_width(), energy_threshold=50 - ) - - # Create a tokenizer with an unlimited token length and continuous silence within a token - # Note the DROP_TRAILING_SILENCE mode that will ensure removing trailing silence - trimmer = StreamTokenizer( - validator, - min_length=20, - max_length=99999999, - max_continuous_silence=9999999, - mode=StreamTokenizer.DROP_TRAILING_SILENCE, - init_min=3, - init_max_silence=1, - ) - - tokens = trimmer.tokenize(asource) - - # Make sure we only have one token - assert len(tokens) == 1, "Should have detected one single token" - - trimmed_signal = b"".join(tokens[0][0]) - - player = player_for(asource) - - print( - "\n ** Playing original signal (with leading and trailing silence)..." - ) - player.play(original_signal) - print("\n ** Playing trimmed signal...") - player.play(trimmed_signal) - - player.stop() - asource.close() - -except KeyboardInterrupt: - - player.stop() - asource.close() - sys.exit(0) - -except Exception as e: - - sys.stderr.write(str(e) + "\n") - sys.exit(1) diff -r 059599823c34 -r 5fd9b6b7ff0d demos/echo.py --- a/demos/echo.py Tue Mar 02 19:17:59 2021 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,64 +0,0 @@ -from auditok import ( - ADSFactory, - AudioEnergyValidator, - StreamTokenizer, - player_for, -) -import pyaudio -import sys - -try: - - energy_threshold = 45 - duration = 10 # seconds - - if len(sys.argv) > 1: - energy_threshold = float(sys.argv[1]) - - if len(sys.argv) > 2: - duration = float(sys.argv[2]) - - # record = True so that we'll be able to rewind the source. - # max_time = 10: read 10 seconds from the microphone - asource = ADSFactory.ads(record=True, max_time=duration) - - validator = AudioEnergyValidator( - sample_width=asource.get_sample_width(), - energy_threshold=energy_threshold, - ) - tokenizer = StreamTokenizer( - validator=validator, - min_length=20, - max_length=250, - max_continuous_silence=30, - ) - - player = player_for(asource) - - def echo(data, start, end): - print("Acoustic activity at: {0}--{1}".format(start, end)) - player.play(b"".join(data)) - - asource.open() - - print( - "\n ** Make some noise (dur:{}, energy:{})...".format( - duration, energy_threshold - ) - ) - - tokenizer.tokenize(asource, callback=echo) - - asource.close() - player.stop() - -except KeyboardInterrupt: - - player.stop() - asource.close() - sys.exit(0) - -except Exception as e: - - sys.stderr.write(str(e) + "\n") - sys.exit(1)