amine@2
|
1 """
|
amine@2
|
2 @author: Amine SEHILI <amine.sehili@gmail.com>
|
amine@2
|
3 September, 2015
|
amine@2
|
4 """
|
amine@2
|
5
|
amine@2
|
6 # Trim leading and trailing silence from a record
|
amine@2
|
7
|
amine@2
|
8 from auditok import ADSFactory, AudioEnergyValidator, StreamTokenizer, player_for, dataset
|
amine@2
|
9 import pyaudio
|
amine@2
|
10
|
amine@2
|
11 """
|
amine@2
|
12 The tokenizer in the following example is set up to remove the silence
|
amine@2
|
13 that precedes the first acoustic activity or follows the last activity
|
amine@2
|
14 in a record. It preserves whatever it founds between the two activities.
|
amine@2
|
15 In other words, it removes the leading and trailing silence.
|
amine@2
|
16
|
amine@2
|
17 Sampling rate is 44100 sample per second, we'll use an analysis window of 100 ms
|
amine@2
|
18 (i.e. bloc_ksize == 4410)
|
amine@2
|
19
|
amine@2
|
20 Energy threshold is 50.
|
amine@2
|
21
|
amine@2
|
22 The tokenizer will start accumulating windows up from the moment it encounters
|
amine@2
|
23 the first analysis window of an energy >= 50. ALL the following windows will be
|
amine@2
|
24 kept regardless of their energy. At the end of the analysis, it will drop trailing
|
amine@2
|
25 windows with an energy below 50.
|
amine@2
|
26
|
amine@2
|
27 This is an interesting example because the audio file we're analyzing contains a very
|
amine@2
|
28 brief noise that occurs within the leading silence. We certainly do want our tokenizer
|
amine@2
|
29 to stop at this point and considers whatever it comes after as a useful signal.
|
amine@2
|
30 To force the tokenizer to ignore that brief event we use two other parameters `init_min`
|
amine@2
|
31 ans `init_max_silence`. By `init_min`=3 and `init_max_silence`=1 we tell the tokenizer
|
amine@2
|
32 that a valid event must start with at least 3 noisy windows, between which there
|
amine@2
|
33 is at most 1 silent window.
|
amine@2
|
34
|
amine@2
|
35 Still with this configuration we can get the tokenizer detect that noise as a valid event
|
amine@2
|
36 (if it actually contains 3 consecutive noisy frames). To circummvent this we use an enough
|
amine@2
|
37 large analysis window (here of 100 ms) to ensure that the brief noise be surrounded by a much
|
amine@2
|
38 longer silence and hence the energy of the overall analysis window will be below 50.
|
amine@2
|
39
|
amine@2
|
40 When using a shorter analysis window (of 10ms for instance, block_size == 441), the brief
|
amine@2
|
41 noise contributes more to energy calculation which yields an energy of over 50 for the window.
|
amine@2
|
42 Again we can deal with this situation by using a higher energy threshold (55 for example)
|
amine@2
|
43
|
amine@2
|
44 """
|
amine@2
|
45
|
amine@2
|
46
|
amine@2
|
47 # record = True so that we'll be able to rewind the source.
|
amine@2
|
48 asource = ADSFactory.ads(filename=dataset.was_der_mensch_saet_mono_44100_lead_trail_silence,
|
amine@2
|
49 record=True, block_size=4410)
|
amine@2
|
50 asource.open()
|
amine@2
|
51
|
amine@2
|
52 original_signal = []
|
amine@2
|
53 # Read the whole signal
|
amine@2
|
54 while True:
|
amine@2
|
55 w = asource.read()
|
amine@2
|
56 if w is None:
|
amine@2
|
57 break
|
amine@2
|
58 original_signal.append(w)
|
amine@2
|
59
|
amine@2
|
60 original_signal = ''.join(original_signal)
|
amine@2
|
61
|
amine@2
|
62
|
amine@2
|
63 # rewind source
|
amine@2
|
64 asource.rewind()
|
amine@2
|
65
|
amine@2
|
66 # Create a validator with an energy threshold of 50
|
amine@2
|
67 validator = AudioEnergyValidator(sample_width=asource.get_sample_width(), energy_threshold=50)
|
amine@2
|
68
|
amine@2
|
69 # Create a tokenizer with an unlimited token length and continuous silence within a token
|
amine@2
|
70 # Note the DROP_TRAILING_SILENCE mode that will ensure removing trailing silence
|
amine@2
|
71 trimmer = StreamTokenizer(validator, min_length = 20, max_length=99999999,
|
amine@2
|
72 max_continuous_silence=9999999, mode=StreamTokenizer.DROP_TRAILING_SILENCE, init_min=3, init_max_silence=1)
|
amine@2
|
73
|
amine@2
|
74
|
amine@2
|
75 tokens = trimmer.tokenize(asource)
|
amine@2
|
76
|
amine@2
|
77 # Make sure we only have one token
|
amine@2
|
78 assert len(tokens) == 1, "Should have detected one single token"
|
amine@2
|
79
|
amine@2
|
80 trimmed_signal = ''.join(tokens[0][0])
|
amine@2
|
81
|
amine@2
|
82 player = player_for(asource)
|
amine@2
|
83
|
amine@2
|
84 print("\n ** Playing original signal (with leading and trailing silence)...")
|
amine@2
|
85 player.play(original_signal)
|
amine@2
|
86 print("\n ** Playing trimmed signal...")
|
amine@2
|
87 player.play(trimmed_signal)
|
amine@2
|
88
|
amine@2
|
89 player.stop()
|
amine@2
|
90 asource.close()
|