annotate demos/audio_trim_demo.py @ 338:f424ac9193b7

Make sure all modules define __all__
author Amine Sehili <amine.sehili@gmail.com>
date Sun, 27 Oct 2019 15:23:00 +0100
parents 9741b52f194a
children
rev   line source
amine@2 1 """
amine@2 2 @author: Amine SEHILI <amine.sehili@gmail.com>
amine@2 3 September, 2015
amine@2 4 """
amine@2 5
amine@10 6 # Trim leading and trailing silence from a record
amine@2 7
amine@331 8 from auditok import (
amine@331 9 ADSFactory,
amine@331 10 AudioEnergyValidator,
amine@331 11 StreamTokenizer,
amine@331 12 player_for,
amine@331 13 dataset,
amine@331 14 )
amine@2 15 import pyaudio
amine@10 16 import sys
amine@2 17
amine@2 18 """
amine@2 19 The tokenizer in the following example is set up to remove the silence
amine@331 20 that precedes the first acoustic activity or follows the last activity
amine@2 21 in a record. It preserves whatever it founds between the two activities.
amine@10 22 In other words, it removes the leading and trailing silence.
amine@2 23
amine@2 24 Sampling rate is 44100 sample per second, we'll use an analysis window of 100 ms
amine@2 25 (i.e. bloc_ksize == 4410)
amine@2 26
amine@2 27 Energy threshold is 50.
amine@2 28
amine@2 29 The tokenizer will start accumulating windows up from the moment it encounters
amine@331 30 the first analysis window of an energy >= 50. ALL the following windows will be
amine@10 31 kept regardless of their energy. At the end of the analysis, it will drop trailing
amine@2 32 windows with an energy below 50.
amine@2 33
amine@2 34 This is an interesting example because the audio file we're analyzing contains a very
amine@331 35 brief noise that occurs within the leading silence. We certainly do want our tokenizer
amine@2 36 to stop at this point and considers whatever it comes after as a useful signal.
amine@2 37 To force the tokenizer to ignore that brief event we use two other parameters `init_min`
amine@2 38 ans `init_max_silence`. By `init_min`=3 and `init_max_silence`=1 we tell the tokenizer
amine@2 39 that a valid event must start with at least 3 noisy windows, between which there
amine@2 40 is at most 1 silent window.
amine@2 41
amine@2 42 Still with this configuration we can get the tokenizer detect that noise as a valid event
amine@2 43 (if it actually contains 3 consecutive noisy frames). To circummvent this we use an enough
amine@2 44 large analysis window (here of 100 ms) to ensure that the brief noise be surrounded by a much
amine@2 45 longer silence and hence the energy of the overall analysis window will be below 50.
amine@2 46
amine@2 47 When using a shorter analysis window (of 10ms for instance, block_size == 441), the brief
amine@2 48 noise contributes more to energy calculation which yields an energy of over 50 for the window.
amine@2 49 Again we can deal with this situation by using a higher energy threshold (55 for example)
amine@331 50
amine@2 51 """
amine@2 52
amine@10 53 try:
amine@331 54 # record = True so that we'll be able to rewind the source.
amine@331 55 asource = ADSFactory.ads(
amine@331 56 filename=dataset.was_der_mensch_saet_mono_44100_lead_tail_silence,
amine@331 57 record=True,
amine@331 58 block_size=4410,
amine@331 59 )
amine@331 60 asource.open()
amine@2 61
amine@331 62 original_signal = []
amine@331 63 # Read the whole signal
amine@331 64 while True:
amine@331 65 w = asource.read()
amine@331 66 if w is None:
amine@331 67 break
amine@331 68 original_signal.append(w)
amine@2 69
amine@331 70 original_signal = b"".join(original_signal)
amine@2 71
amine@331 72 # rewind source
amine@331 73 asource.rewind()
amine@2 74
amine@331 75 # Create a validator with an energy threshold of 50
amine@331 76 validator = AudioEnergyValidator(
amine@331 77 sample_width=asource.get_sample_width(), energy_threshold=50
amine@331 78 )
amine@2 79
amine@331 80 # Create a tokenizer with an unlimited token length and continuous silence within a token
amine@331 81 # Note the DROP_TRAILING_SILENCE mode that will ensure removing trailing silence
amine@331 82 trimmer = StreamTokenizer(
amine@331 83 validator,
amine@331 84 min_length=20,
amine@331 85 max_length=99999999,
amine@331 86 max_continuous_silence=9999999,
amine@331 87 mode=StreamTokenizer.DROP_TRAILING_SILENCE,
amine@331 88 init_min=3,
amine@331 89 init_max_silence=1,
amine@331 90 )
amine@2 91
amine@331 92 tokens = trimmer.tokenize(asource)
amine@2 93
amine@331 94 # Make sure we only have one token
amine@331 95 assert len(tokens) == 1, "Should have detected one single token"
amine@2 96
amine@331 97 trimmed_signal = b"".join(tokens[0][0])
amine@2 98
amine@331 99 player = player_for(asource)
amine@2 100
amine@331 101 print(
amine@331 102 "\n ** Playing original signal (with leading and trailing silence)..."
amine@331 103 )
amine@331 104 player.play(original_signal)
amine@331 105 print("\n ** Playing trimmed signal...")
amine@331 106 player.play(trimmed_signal)
amine@2 107
amine@331 108 player.stop()
amine@331 109 asource.close()
amine@2 110
amine@10 111 except KeyboardInterrupt:
amine@10 112
amine@331 113 player.stop()
amine@331 114 asource.close()
amine@331 115 sys.exit(0)
amine@10 116
amine@10 117 except Exception as e:
amine@331 118
amine@331 119 sys.stderr.write(str(e) + "\n")
amine@331 120 sys.exit(1)