amine@2
|
1 """
|
amine@2
|
2 @author: Amine SEHILI <amine.sehili@gmail.com>
|
amine@2
|
3 September, 2015
|
amine@2
|
4 """
|
amine@2
|
5
|
amine@10
|
6 # Trim leading and trailing silence from a record
|
amine@2
|
7
|
amine@331
|
8 from auditok import (
|
amine@331
|
9 ADSFactory,
|
amine@331
|
10 AudioEnergyValidator,
|
amine@331
|
11 StreamTokenizer,
|
amine@331
|
12 player_for,
|
amine@331
|
13 dataset,
|
amine@331
|
14 )
|
amine@2
|
15 import pyaudio
|
amine@10
|
16 import sys
|
amine@2
|
17
|
amine@2
|
18 """
|
amine@2
|
19 The tokenizer in the following example is set up to remove the silence
|
amine@331
|
20 that precedes the first acoustic activity or follows the last activity
|
amine@2
|
21 in a record. It preserves whatever it founds between the two activities.
|
amine@10
|
22 In other words, it removes the leading and trailing silence.
|
amine@2
|
23
|
amine@2
|
24 Sampling rate is 44100 sample per second, we'll use an analysis window of 100 ms
|
amine@2
|
25 (i.e. bloc_ksize == 4410)
|
amine@2
|
26
|
amine@2
|
27 Energy threshold is 50.
|
amine@2
|
28
|
amine@2
|
29 The tokenizer will start accumulating windows up from the moment it encounters
|
amine@331
|
30 the first analysis window of an energy >= 50. ALL the following windows will be
|
amine@10
|
31 kept regardless of their energy. At the end of the analysis, it will drop trailing
|
amine@2
|
32 windows with an energy below 50.
|
amine@2
|
33
|
amine@2
|
34 This is an interesting example because the audio file we're analyzing contains a very
|
amine@331
|
35 brief noise that occurs within the leading silence. We certainly do want our tokenizer
|
amine@2
|
36 to stop at this point and considers whatever it comes after as a useful signal.
|
amine@2
|
37 To force the tokenizer to ignore that brief event we use two other parameters `init_min`
|
amine@2
|
38 ans `init_max_silence`. By `init_min`=3 and `init_max_silence`=1 we tell the tokenizer
|
amine@2
|
39 that a valid event must start with at least 3 noisy windows, between which there
|
amine@2
|
40 is at most 1 silent window.
|
amine@2
|
41
|
amine@2
|
42 Still with this configuration we can get the tokenizer detect that noise as a valid event
|
amine@2
|
43 (if it actually contains 3 consecutive noisy frames). To circummvent this we use an enough
|
amine@2
|
44 large analysis window (here of 100 ms) to ensure that the brief noise be surrounded by a much
|
amine@2
|
45 longer silence and hence the energy of the overall analysis window will be below 50.
|
amine@2
|
46
|
amine@2
|
47 When using a shorter analysis window (of 10ms for instance, block_size == 441), the brief
|
amine@2
|
48 noise contributes more to energy calculation which yields an energy of over 50 for the window.
|
amine@2
|
49 Again we can deal with this situation by using a higher energy threshold (55 for example)
|
amine@331
|
50
|
amine@2
|
51 """
|
amine@2
|
52
|
amine@10
|
53 try:
|
amine@331
|
54 # record = True so that we'll be able to rewind the source.
|
amine@331
|
55 asource = ADSFactory.ads(
|
amine@331
|
56 filename=dataset.was_der_mensch_saet_mono_44100_lead_tail_silence,
|
amine@331
|
57 record=True,
|
amine@331
|
58 block_size=4410,
|
amine@331
|
59 )
|
amine@331
|
60 asource.open()
|
amine@2
|
61
|
amine@331
|
62 original_signal = []
|
amine@331
|
63 # Read the whole signal
|
amine@331
|
64 while True:
|
amine@331
|
65 w = asource.read()
|
amine@331
|
66 if w is None:
|
amine@331
|
67 break
|
amine@331
|
68 original_signal.append(w)
|
amine@2
|
69
|
amine@331
|
70 original_signal = b"".join(original_signal)
|
amine@2
|
71
|
amine@331
|
72 # rewind source
|
amine@331
|
73 asource.rewind()
|
amine@2
|
74
|
amine@331
|
75 # Create a validator with an energy threshold of 50
|
amine@331
|
76 validator = AudioEnergyValidator(
|
amine@331
|
77 sample_width=asource.get_sample_width(), energy_threshold=50
|
amine@331
|
78 )
|
amine@2
|
79
|
amine@331
|
80 # Create a tokenizer with an unlimited token length and continuous silence within a token
|
amine@331
|
81 # Note the DROP_TRAILING_SILENCE mode that will ensure removing trailing silence
|
amine@331
|
82 trimmer = StreamTokenizer(
|
amine@331
|
83 validator,
|
amine@331
|
84 min_length=20,
|
amine@331
|
85 max_length=99999999,
|
amine@331
|
86 max_continuous_silence=9999999,
|
amine@331
|
87 mode=StreamTokenizer.DROP_TRAILING_SILENCE,
|
amine@331
|
88 init_min=3,
|
amine@331
|
89 init_max_silence=1,
|
amine@331
|
90 )
|
amine@2
|
91
|
amine@331
|
92 tokens = trimmer.tokenize(asource)
|
amine@2
|
93
|
amine@331
|
94 # Make sure we only have one token
|
amine@331
|
95 assert len(tokens) == 1, "Should have detected one single token"
|
amine@2
|
96
|
amine@331
|
97 trimmed_signal = b"".join(tokens[0][0])
|
amine@2
|
98
|
amine@331
|
99 player = player_for(asource)
|
amine@2
|
100
|
amine@331
|
101 print(
|
amine@331
|
102 "\n ** Playing original signal (with leading and trailing silence)..."
|
amine@331
|
103 )
|
amine@331
|
104 player.play(original_signal)
|
amine@331
|
105 print("\n ** Playing trimmed signal...")
|
amine@331
|
106 player.play(trimmed_signal)
|
amine@2
|
107
|
amine@331
|
108 player.stop()
|
amine@331
|
109 asource.close()
|
amine@2
|
110
|
amine@10
|
111 except KeyboardInterrupt:
|
amine@10
|
112
|
amine@331
|
113 player.stop()
|
amine@331
|
114 asource.close()
|
amine@331
|
115 sys.exit(0)
|
amine@10
|
116
|
amine@10
|
117 except Exception as e:
|
amine@331
|
118
|
amine@331
|
119 sys.stderr.write(str(e) + "\n")
|
amine@331
|
120 sys.exit(1)
|