changeset 388:5fd9b6b7ff0d

Merge branch 'master' of https://github.com/amsehili/auditok
author www-data <www-data@c4dm-xenserv-virt2.eecs.qmul.ac.uk>
date Tue, 02 Mar 2021 20:17:57 +0000
parents 059599823c34 (current diff) bd242e80455f (diff)
children ecdb95580c4e
files CHANGELOG README.rst demos/audio_tokenize_demo.py demos/audio_trim_demo.py demos/echo.py
diffstat 5 files changed, 2 insertions(+), 265 deletions(-) [+]
line wrap: on
line diff
--- a/CHANGELOG	Tue Mar 02 19:17:59 2021 +0000
+++ b/CHANGELOG	Tue Mar 02 20:17:57 2021 +0000
@@ -7,6 +7,7 @@
 - Implement split function as a high-level API for tokenization
 - Implement AudioRegion class for simple audio objects manipulation
 - Use a much faster energy computation method (based on standard audioop module)
+- Make ADSFactory deprecated
 - Choose which channel(s) to use for tokenization
 - Save multi-channel audio data
 - Refactor code in all modules
--- a/README.rst	Tue Mar 02 19:17:59 2021 +0000
+++ b/README.rst	Tue Mar 02 20:17:57 2021 +0000
@@ -10,7 +10,7 @@
 
 ``auditok`` is an **Audio Activity Detection** tool that can process online data
 (read from an audio device or from standard input) as well as audio files.
-It can be used as a command line program or by calling its API.
+It can be used as a command-line program or by calling its API.
 
 The latest version of the documentation can be found on
 `readthedocs. <https://readthedocs.org/projects/auditok/badge/?version=latest>`_
--- a/demos/audio_tokenize_demo.py	Tue Mar 02 19:17:59 2021 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,80 +0,0 @@
-"""
-@author: Amine SEHILI <amine.sehili@gmail.com>
-September, 2015
-"""
-
-from auditok import (
-    ADSFactory,
-    AudioEnergyValidator,
-    StreamTokenizer,
-    player_for,
-    dataset,
-)
-import sys
-
-try:
-
-    # We set the `record` argument to True so that we can rewind the source
-    asource = ADSFactory.ads(
-        filename=dataset.one_to_six_arabic_16000_mono_bc_noise, record=True
-    )
-
-    validator = AudioEnergyValidator(
-        sample_width=asource.get_sample_width(), energy_threshold=65
-    )
-
-    # Default analysis window is 10 ms (float(asource.get_block_size()) / asource.get_sampling_rate())
-    # min_length=20 : minimum length of a valid audio activity is 20 * 10 == 200 ms
-    # max_length=400 :  maximum length of a valid audio activity is 400 * 10 == 4000 ms == 4 seconds
-    # max_continuous_silence=30 : maximum length of a tolerated  silence within a valid audio activity is 30 * 30 == 300 ms
-    tokenizer = StreamTokenizer(
-        validator=validator,
-        min_length=20,
-        max_length=400,
-        max_continuous_silence=30,
-    )
-
-    asource.open()
-    tokens = tokenizer.tokenize(asource)
-
-    # Play detected regions back
-    player = player_for(asource)
-
-    # Rewind and read the whole signal
-    asource.rewind()
-    original_signal = []
-
-    while True:
-        w = asource.read()
-        if w is None:
-            break
-        original_signal.append(w)
-
-    original_signal = b"".join(original_signal)
-    player.play(original_signal)
-
-    print("\n ** playing detected regions...\n")
-    for i, t in enumerate(tokens):
-        print(
-            "Token [{0}] starts at {1} and ends at {2}".format(
-                i + 1, t[1], t[2]
-            )
-        )
-        data = b"".join(t[0])
-        player.play(data)
-
-    assert len(tokens) == 8
-
-    asource.close()
-    player.stop()
-
-except KeyboardInterrupt:
-
-    player.stop()
-    asource.close()
-    sys.exit(0)
-
-except Exception as e:
-
-    sys.stderr.write(str(e) + "\n")
-    sys.exit(1)
--- a/demos/audio_trim_demo.py	Tue Mar 02 19:17:59 2021 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,120 +0,0 @@
-"""
-@author: Amine SEHILI <amine.sehili@gmail.com>
-September, 2015
-"""
-
-# Trim leading and trailing silence from a record
-
-from auditok import (
-    ADSFactory,
-    AudioEnergyValidator,
-    StreamTokenizer,
-    player_for,
-    dataset,
-)
-import pyaudio
-import sys
-
-"""
-The  tokenizer in the following example is set up to remove the silence
-that precedes the first acoustic activity or follows the last activity
-in a record. It preserves whatever it founds between the two activities.
-In other words, it removes the leading and trailing silence.
-
-Sampling rate is 44100 sample per second, we'll use an analysis window of 100 ms
-(i.e. bloc_ksize == 4410)
-
-Energy threshold is 50.
-
-The tokenizer will start accumulating windows up from the moment it encounters
-the first analysis window of an energy >= 50. ALL the following windows will be
-kept regardless of their energy. At the end of the analysis, it will drop trailing
- windows with an energy below 50.
-
-This is an interesting example because the audio file we're analyzing contains a very
-brief noise that occurs within the leading silence. We certainly do want our tokenizer
-to stop at this point and considers whatever it comes after as a useful signal.
-To force the tokenizer to ignore that brief event we use two other parameters `init_min`
-ans `init_max_silence`. By `init_min`=3 and `init_max_silence`=1 we tell the tokenizer
-that a valid event must start with at least 3 noisy windows, between which there
-is at most 1 silent window.
-
-Still with this configuration we can get the tokenizer detect that noise as a valid event
-(if it actually contains 3 consecutive noisy frames). To circummvent this we use an enough
-large analysis window (here of 100 ms) to ensure that the brief noise be surrounded by a much
-longer silence and hence the energy of the overall analysis window will be below 50.
-
-When using a shorter analysis window (of 10ms for instance, block_size == 441), the brief
-noise contributes more to energy calculation which yields an energy of over 50 for the window.
-Again we can deal with this situation by using a higher energy threshold (55 for example)
-
-"""
-
-try:
-    # record = True so that we'll be able to rewind the source.
-    asource = ADSFactory.ads(
-        filename=dataset.was_der_mensch_saet_mono_44100_lead_tail_silence,
-        record=True,
-        block_size=4410,
-    )
-    asource.open()
-
-    original_signal = []
-    # Read the whole signal
-    while True:
-        w = asource.read()
-        if w is None:
-            break
-        original_signal.append(w)
-
-    original_signal = b"".join(original_signal)
-
-    # rewind source
-    asource.rewind()
-
-    # Create a validator with an energy threshold of 50
-    validator = AudioEnergyValidator(
-        sample_width=asource.get_sample_width(), energy_threshold=50
-    )
-
-    # Create a tokenizer with an unlimited token length and continuous silence within a token
-    # Note the DROP_TRAILING_SILENCE mode that will ensure removing trailing silence
-    trimmer = StreamTokenizer(
-        validator,
-        min_length=20,
-        max_length=99999999,
-        max_continuous_silence=9999999,
-        mode=StreamTokenizer.DROP_TRAILING_SILENCE,
-        init_min=3,
-        init_max_silence=1,
-    )
-
-    tokens = trimmer.tokenize(asource)
-
-    # Make sure we only have one token
-    assert len(tokens) == 1, "Should have detected one single token"
-
-    trimmed_signal = b"".join(tokens[0][0])
-
-    player = player_for(asource)
-
-    print(
-        "\n ** Playing original signal (with leading and trailing silence)..."
-    )
-    player.play(original_signal)
-    print("\n ** Playing trimmed signal...")
-    player.play(trimmed_signal)
-
-    player.stop()
-    asource.close()
-
-except KeyboardInterrupt:
-
-    player.stop()
-    asource.close()
-    sys.exit(0)
-
-except Exception as e:
-
-    sys.stderr.write(str(e) + "\n")
-    sys.exit(1)
--- a/demos/echo.py	Tue Mar 02 19:17:59 2021 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,64 +0,0 @@
-from auditok import (
-    ADSFactory,
-    AudioEnergyValidator,
-    StreamTokenizer,
-    player_for,
-)
-import pyaudio
-import sys
-
-try:
-
-    energy_threshold = 45
-    duration = 10  # seconds
-
-    if len(sys.argv) > 1:
-        energy_threshold = float(sys.argv[1])
-
-    if len(sys.argv) > 2:
-        duration = float(sys.argv[2])
-
-    # record = True so that we'll be able to rewind the source.
-    # max_time = 10: read 10 seconds from the microphone
-    asource = ADSFactory.ads(record=True, max_time=duration)
-
-    validator = AudioEnergyValidator(
-        sample_width=asource.get_sample_width(),
-        energy_threshold=energy_threshold,
-    )
-    tokenizer = StreamTokenizer(
-        validator=validator,
-        min_length=20,
-        max_length=250,
-        max_continuous_silence=30,
-    )
-
-    player = player_for(asource)
-
-    def echo(data, start, end):
-        print("Acoustic activity at: {0}--{1}".format(start, end))
-        player.play(b"".join(data))
-
-    asource.open()
-
-    print(
-        "\n  ** Make some noise (dur:{}, energy:{})...".format(
-            duration, energy_threshold
-        )
-    )
-
-    tokenizer.tokenize(asource, callback=echo)
-
-    asource.close()
-    player.stop()
-
-except KeyboardInterrupt:
-
-    player.stop()
-    asource.close()
-    sys.exit(0)
-
-except Exception as e:
-
-    sys.stderr.write(str(e) + "\n")
-    sys.exit(1)