amine@2: """
amine@2: @author: Amine SEHILI <amine.sehili@gmail.com>
amine@2: September, 2015
amine@2: """
amine@2: 
amine@2: from auditok import ADSFactory, AudioEnergyValidator, StreamTokenizer, player_for, dataset
amine@2: 
amine@2: # We set the `record` argument to True so that we can rewind the source
amine@2: asource = ADSFactory.ads(filename=dataset.one_to_six_arabic_16000_mono_bc_noise, record=True)
amine@2: 
amine@2: validator = AudioEnergyValidator(sample_width=asource.get_sample_width(), energy_threshold=65)
amine@2: 
amine@2: # Defalut analysis window is 10 ms (float(asource.get_block_size()) / asource.get_sampling_rate())
amine@2: # min_length=20 : minimum length of a valid audio activity is 20 * 10 == 200 ms
amine@2: # max_length=400 :  maximum length of a valid audio activity is 400 * 10 == 4000 ms == 4 seconds
amine@2: # max_continuous_silence=30 : maximum length of a tolerated  silence within a valid audio activity is 30 * 30 == 300 ms 
amine@2: tokenizer = StreamTokenizer(validator=validator, min_length=20, max_length=400, max_continuous_silence=30)
amine@2: 
amine@2: asource.open()
amine@2: tokens = tokenizer.tokenize(asource)
amine@2: 
amine@2: # Play detected regions back
amine@2: 
amine@2: player = player_for(asource)
amine@2: 
amine@2: # Rewind and read the whole signal
amine@2: asource.rewind()
amine@2: original_signal = []
amine@2: 
amine@2: while True:
amine@2:    w = asource.read()
amine@2:    if w is None:
amine@2:       break
amine@2:    original_signal.append(w)
amine@2: 
amine@2: original_signal = ''.join(original_signal)
amine@2: 
amine@2: print("\n ** Playing original file...")
amine@2: player.play(original_signal)
amine@2: 
amine@2: print("\n ** playing detected regions...\n")
amine@2: for i,t in enumerate(tokens):
amine@2:     print("Token [{0}] starts at {1} and ends at {2}".format(i+1, t[1], t[2]))
amine@2:     data = ''.join(t[0])
amine@2:     player.play(data)
amine@2: 
amine@2: assert len(tokens) == 8
amine@2: 
amine@2: asource.close()
amine@2: player.stop()