changeset 180:8ee7f461b82e

Use ArgumentParser instead of OptionParser - Import workers from workers.py - Add {timestamp} keyword to --printf option - Add --timestamp-format option for timestamp formatting - Add --strict-min-duration option to reject a short event adjacent to a valid one - Refactor main
author Amine Sehili <amine.sehili@gmail.com>
date Wed, 27 Mar 2019 20:35:02 +0100
parents 45875e458c7e
children 678c1c1a2617
files auditok/cmdline.py
diffstat 1 files changed, 367 insertions(+), 760 deletions(-) [+]
line wrap: on
line diff
--- a/auditok/cmdline.py	Sun Mar 17 18:36:19 2019 +0100
+++ b/auditok/cmdline.py	Wed Mar 27 20:35:02 2019 +0100
@@ -1,794 +1,401 @@
 #!/usr/bin/env python
 # encoding: utf-8
-'''
+"""
 auditok.auditok -- Audio Activity Detection tool
 
-auditok.auditok is a program that can be used for Audio/Acoustic activity detection.
-It can read audio data from audio files as well as from built-in device(s) or standard input 
-
+auditok.auditok is a program that can be used for Audio/Acoustic
+activity detection. It can read audio data from audio files as well
+as from built-in device(s) or standard input.
 
 @author:     Mohamed El Amine SEHILI
-
-@copyright:  2015-2018 Mohamed El Amine SEHILI
-
+@copyright:  2015-2019 Mohamed El Amine SEHILI
 @license:    GPL v3
-
 @contact:    amine.sehili@gmail.com
-@deffield    updated: 01 Nov 2018
-'''
+@deffield    updated: 01 Dec 2018
+"""
 
 import sys
 import os
-
-from optparse import OptionParser, OptionGroup
-from threading import Thread
-import tempfile
-import wave
+from argparse import ArgumentParser
 import time
 import threading
-import logging
 
-try:
-    import future
-    from queue import Queue, Empty
-except ImportError:
-    if sys.version_info >= (3, 0):
-        from queue import Queue, Empty
-    else:
-        from Queue import Queue, Empty
+from auditok import __version__
+from .util import AudioDataSource
+from .cmdline_util import make_logger, make_kwargs
+from . import workers
 
-try:
-    from pydub import AudioSegment
-    WITH_PYDUB = True
-except ImportError:
-    WITH_PYDUB = False
-    
-
-from .core import StreamTokenizer
-from .io import PyAudioSource, BufferAudioSource, StdinAudioSource, player_for
-from .util import ADSFactory, AudioEnergyValidator
-from auditok import __version__ as version
 
 __all__ = []
-__version__ = version
-__date__ = '2015-11-23'
-__updated__ = '2018-10-06'
-
-DEBUG = 0
-TESTRUN = 1
-PROFILE = 0
-
-LOGGER_NAME = "AUDITOK_LOGGER"
-
-class AudioFileFormatError(Exception):
-    pass
-
-class TimeFormatError(Exception):
-    pass
-
-def file_to_audio_source(filename, filetype=None, **kwargs):
-    
-    lower_fname = filename.lower()
-    rawdata = False
-    
-    if filetype is not None:
-        filetype = filetype.lower()
-    
-    if filetype == "raw" or (filetype is None and lower_fname.endswith(".raw")):
-        
-        srate = kwargs.pop("sampling_rate", None)
-        if srate is None:
-            srate = kwargs.pop("sr", None)
-            
-        swidth = kwargs.pop("sample_width", None)
-        if swidth is None:
-            swidth = kwargs.pop("sw", None)
-        
-        ch = kwargs.pop("channels", None)
-        if ch is None:
-            ch = kwargs.pop("ch", None)
-        
-        if None in (swidth, srate, ch):
-            raise Exception("All audio parameters are required for raw data") 
-        
-        data = open(filename).read()
-        rawdata = True
-        
-    # try first with pydub
-    if WITH_PYDUB:
-        
-        use_channel = kwargs.pop("use_channel", None)
-        if use_channel is None:
-            use_channel = kwargs.pop("uc", None)
-        
-        if use_channel is None:
-            use_channel = 1
-        else:
-            try:
-                use_channel = int(use_channel)
-            except ValueError:
-                pass
-        
-        if not isinstance(use_channel, (int)) and not use_channel.lower() in ["left", "right", "mix"] :
-            raise ValueError("channel must be an integer or one of 'left', 'right' or 'mix'")
-        
-        asegment = None
-        
-        if rawdata:
-            asegment = AudioSegment(data, sample_width=swidth, frame_rate=srate, channels=ch)
-        if filetype in("wave", "wav") or (filetype is None and lower_fname.endswith(".wav")):
-            asegment = AudioSegment.from_wav(filename)
-        elif filetype == "mp3" or (filetype is None and lower_fname.endswith(".mp3")):
-            asegment = AudioSegment.from_mp3(filename)
-        elif filetype == "ogg" or (filetype is None and lower_fname.endswith(".ogg")):
-            asegment = AudioSegment.from_ogg(filename)
-        elif filetype == "flv" or (filetype is None and lower_fname.endswith(".flv")):
-            asegment = AudioSegment.from_flv(filename)
-        else:
-            asegment = AudioSegment.from_file(filename)
-            
-        if asegment.channels > 1:
-            
-            if isinstance(use_channel, int):
-                if use_channel > asegment.channels:
-                    raise ValueError("Can not use channel '{0}', audio file has only {1} channels".format(use_channel, asegment.channels))
-                else:
-                    asegment = asegment.split_to_mono()[use_channel - 1]
-            else:
-                ch_lower = use_channel.lower()
-                
-                if ch_lower == "mix":
-                    asegment = asegment.set_channels(1)
-                    
-                elif use_channel.lower() == "left":
-                    asegment = asegment.split_to_mono()[0]
-                    
-                elif use_channel.lower() == "right":
-                    asegment = asegment.split_to_mono()[1]
-        
-        return BufferAudioSource(data_buffer = asegment._data,
-                                     sampling_rate = asegment.frame_rate,
-                                     sample_width = asegment.sample_width,
-                                     channels = asegment.channels)
-    # fall back to standard python
-    else:
-        if rawdata:
-            if ch != 1:
-                raise ValueError("Cannot handle multi-channel audio without pydub")
-            return BufferAudioSource(data, srate, swidth, ch)
-    
-        if filetype in ("wav", "wave") or (filetype is None and lower_fname.endswith(".wav")):
-            
-            wfp = wave.open(filename)
-            
-            ch = wfp.getnchannels()
-            if ch != 1:
-                wfp.close()
-                raise ValueError("Cannot handle multi-channel audio without pydub")
-           
-            srate = wfp.getframerate()
-            swidth = wfp.getsampwidth()
-            data = wfp.readframes(wfp.getnframes())
-            wfp.close()
-            return BufferAudioSource(data, srate, swidth, ch)
-        
-        raise AudioFileFormatError("Cannot read audio file format")
-
-
-def save_audio_data(data, filename, filetype=None, **kwargs):
-    
-    lower_fname = filename.lower()
-    if filetype is not None:
-        filetype = filetype.lower()
-        
-    # save raw data
-    if filetype == "raw" or (filetype is None and lower_fname.endswith(".raw")):
-        fp = open(filename, "w")
-        fp.write(data)
-        fp.close()
-        return
-    
-    # save other types of data
-    # requires all audio parameters
-    srate = kwargs.pop("sampling_rate", None)
-    if srate is None:
-        srate = kwargs.pop("sr", None)
-        
-    swidth = kwargs.pop("sample_width", None)
-    if swidth is None:
-        swidth = kwargs.pop("sw", None)
-    
-    ch = kwargs.pop("channels", None)
-    if ch is None:
-        ch = kwargs.pop("ch", None)
-    
-    if None in (swidth, srate, ch):
-        raise Exception("All audio parameters are required to save no raw data")
-        
-    if filetype in ("wav", "wave") or (filetype is None and lower_fname.endswith(".wav")):
-        # use standard python's wave module
-        fp = wave.open(filename, "w")
-        fp.setnchannels(ch)
-        fp.setsampwidth(swidth)
-        fp.setframerate(srate)
-        fp.writeframes(data)
-        fp.close()
-    
-    elif WITH_PYDUB:
-        
-        asegment = AudioSegment(data, sample_width=swidth, frame_rate=srate, channels=ch)
-        asegment.export(filename, format=filetype)
-    
-    else:
-        raise AudioFileFormatError("cannot write file format {0} (file name: {1})".format(filetype, filename))
-
-
-def plot_all(signal, sampling_rate, energy_as_amp, detections=[], show=True, save_as=None):
-    
-    import matplotlib.pyplot as plt
-    import numpy as np
-    t = np.arange(0., np.ceil(float(len(signal))) / sampling_rate, 1./sampling_rate )
-    if len(t) > len(signal):
-        t = t[: len(signal) - len(t)]
-    
-    for start, end in detections:
-        p = plt.axvspan(start, end, facecolor='g', ec = 'r', lw = 2,  alpha=0.4)
-    
-    line = plt.axhline(y=energy_as_amp, lw=1, ls="--", c="r", label="Energy threshold as normalized amplitude")
-    plt.plot(t, signal)
-    legend = plt.legend(["Detection threshold"], bbox_to_anchor=(0., 1.02, 1., .102), loc=1, fontsize=16)
-    ax = plt.gca().add_artist(legend)
-
-    plt.xlabel("Time (s)", fontsize=24)
-    plt.ylabel("Amplitude (normalized)", fontsize=24)
-    
-    if save_as is not None:
-        plt.savefig(save_as, dpi=120)
-    
-    if show:
-        plt.show()
-
-
-def seconds_to_str_fromatter(_format):
-    """
-    Accepted format directives: %i %s %m %h
-    """
-    # check directives are correct 
-    
-    if _format == "%S":
-        def _fromatter(seconds):
-            return "{:.2f}".format(seconds)
-    
-    elif _format == "%I":
-        def _fromatter(seconds):
-            return "{0}".format(int(seconds * 1000))
-    
-    else:
-        _format = _format.replace("%h", "{hrs:02d}")
-        _format = _format.replace("%m", "{mins:02d}")
-        _format = _format.replace("%s", "{secs:02d}")
-        _format = _format.replace("%i", "{millis:03d}")
-        
-        try:
-            i = _format.index("%")
-            raise TimeFormatError("Unknow time format directive '{0}'".format(_format[i:i+2]))
-        except ValueError:
-            pass
-        
-        def _fromatter(seconds):
-            millis = int(seconds * 1000)
-            hrs, millis = divmod(millis, 3600000)
-            mins, millis = divmod(millis, 60000)
-            secs, millis = divmod(millis, 1000)
-            return _format.format(hrs=hrs, mins=mins, secs=secs, millis=millis)
-    
-    return _fromatter
-
-
-
-class Worker(Thread):
-    
-    def __init__(self, timeout=0.2, debug=False, logger=None):
-        self.timeout = timeout
-        self.debug = debug
-        self.logger = logger
-        
-        if self.debug and self.logger is None:
-            self.logger = logging.getLogger(LOGGER_NAME)
-            self.logger.setLevel(logging.DEBUG)
-            handler = logging.StreamHandler(sys.stdout)
-            self.logger.addHandler(handler)
-            
-        self._inbox = Queue()
-        self._stop_request = Queue()
-        Thread.__init__(self)
-    
-    
-    def debug_message(self, message):
-        self.logger.debug(message)
-        
-    def _stop_requested(self):
-        
-        try:
-            message = self._stop_request.get_nowait()
-            if message == "stop":
-                return True
-
-        except Empty:
-            return False
-    
-    def stop(self):
-        self._stop_request.put("stop")
-        self.join()
-        
-    def send(self, message):
-        self._inbox.put(message)
-    
-    def _get_message(self):
-        try:
-            message = self._inbox.get(timeout=self.timeout)
-            return message        
-        except Empty:
-            return None
-
-
-class TokenizerWorker(Worker):
-    
-    END_OF_PROCESSING = "END_OF_PROCESSING"
-    
-    def __init__(self, ads, tokenizer, analysis_window, observers):
-        self.ads = ads
-        self.tokenizer = tokenizer
-        self.analysis_window = analysis_window
-        self.observers = observers
-        self._inbox = Queue()
-        self.count = 0
-        Worker.__init__(self)
-        
-    def run(self):
-        
-        def notify_observers(data, start, end):
-            audio_data = b''.join(data)
-            self.count += 1
-            
-            start_time = start * self.analysis_window
-            end_time = (end+1) * self.analysis_window
-            duration = (end - start + 1) * self.analysis_window
-            
-            # notify observers
-            for observer in self.observers:
-                observer.notify({"id" : self.count,
-                                 "audio_data" : audio_data,
-                                 "start" : start,
-                                 "end" : end,
-                                 "start_time" : start_time,
-                                 "end_time" : end_time,
-                                 "duration" : duration}
-                                )
-        
-        self.ads.open()
-        self.tokenizer.tokenize(data_source=self, callback=notify_observers)
-        for observer in self.observers:
-            observer.notify(TokenizerWorker.END_OF_PROCESSING)
-            
-    def add_observer(self, observer):
-        self.observers.append(observer)
-       
-    def remove_observer(self, observer):
-        self.observers.remove(observer)
-    
-    def read(self):
-        if self._stop_requested():
-            return None
-        else:
-            return self.ads.read()
-    
-        
-class PlayerWorker(Worker):
-    
-    def __init__(self, player, timeout=0.2, debug=False, logger=None):
-        self.player = player
-        Worker.__init__(self, timeout=timeout, debug=debug, logger=logger)
-        
-    def run(self):
-        while True:
-            if self._stop_requested():
-                break
-            
-            message = self._get_message()
-            if message is not None:
-                if message == TokenizerWorker.END_OF_PROCESSING:
-                    break
-                
-                audio_data = message.pop("audio_data", None)
-                start_time = message.pop("start_time", None)
-                end_time = message.pop("end_time", None)
-                dur = message.pop("duration", None)
-                _id = message.pop("id", None)
-                
-                if audio_data is not None:
-                    if self.debug:
-                        self.debug_message("[PLAY]: Detection {id} played (start:{start}, end:{end}, dur:{dur})".format(id=_id, 
-                        start="{:5.2f}".format(start_time), end="{:5.2f}".format(end_time), dur="{:5.2f}".format(dur)))
-                    self.player.play(audio_data)
-    
-    def notify(self, message):
-        self.send(message)
-        
-               
-class CommandLineWorker(Worker):
-    
-    def __init__(self, command, timeout=0.2, debug=False, logger=None):
-        self.command = command
-        Worker.__init__(self, timeout=timeout, debug=debug, logger=logger)
-    
-    def run(self):
-        while True:
-            if self._stop_requested():
-                break
-            
-            message = self._get_message()
-            if message is not None:
-                if message == TokenizerWorker.END_OF_PROCESSING:
-                    break
-                
-                audio_data = message.pop("audio_data", None)
-                _id = message.pop("id", None)
-                if audio_data is not None:
-                    raw_audio_file = tempfile.NamedTemporaryFile(delete=False)
-                    raw_audio_file.write(audio_data)
-                    cmd = self.command.replace("$", raw_audio_file.name)
-                    if self.debug:
-                        self.debug_message("[CMD ]: Detection {id} command: {cmd}".format(id=_id, cmd=cmd))
-                    os.system(cmd)
-                    os.unlink(raw_audio_file.name)
-                
-    def notify(self, message):
-        self.send(message)
-        
-
-class TokenSaverWorker(Worker):
-    
-    def __init__(self, name_format, filetype, timeout=0.2, debug=False, logger=None, **kwargs):
-        self.name_format = name_format
-        self.filetype = filetype
-        self.kwargs = kwargs
-        Worker.__init__(self, timeout=timeout, debug=debug, logger=logger)
-    
-    def run(self):
-        while True:
-            if self._stop_requested():
-                break
-            
-            message = self._get_message()
-            if message is not None:
-                if message == TokenizerWorker.END_OF_PROCESSING:
-                    break
-                
-                audio_data = message.pop("audio_data", None)
-                start_time = message.pop("start_time", None)
-                end_time = message.pop("end_time", None)
-                _id = message.pop("id", None)
-                if audio_data is not None and len(audio_data) > 0:
-                    fname = self.name_format.format(N=_id, start = "{:.2f}".format(start_time), end = "{:.2f}".format(end_time))
-                    try:
-                        if self.debug:
-                            self.debug_message("[SAVE]: Detection {id} saved as {fname}".format(id=_id, fname=fname))
-                        save_audio_data(audio_data, fname, filetype=self.filetype, **self.kwargs)
-                    except Exception as e:
-                        sys.stderr.write(str(e) + "\n")
-    
-    def notify(self, message):
-        self.send(message)
-
-
-class LogWorker(Worker):
-    
-    def __init__(self, print_detections=False, output_format="{start} {end}",
-                 time_formatter=seconds_to_str_fromatter("%S"), timeout=0.2, debug=False, logger=None):
-        
-        self.print_detections = print_detections
-        self.output_format = output_format
-        self.time_formatter = time_formatter
-        self.detections = []
-        Worker.__init__(self, timeout=timeout, debug=debug, logger=logger)
-        
-    def run(self):
-        while True:
-            if self._stop_requested():
-                break
-            
-            message = self._get_message()
-            
-            if message is not None:
-                
-                if message == TokenizerWorker.END_OF_PROCESSING:
-                    break
-                
-                audio_data = message.pop("audio_data", None)
-                _id = message.pop("id", None)
-                start = message.pop("start", None)
-                end = message.pop("end", None)
-                start_time = message.pop("start_time", None)
-                end_time = message.pop("end_time", None)
-                duration = message.pop("duration", None)
-                if audio_data is not None and len(audio_data) > 0:
-                    
-                    if self.debug:
-                        self.debug_message("[DET ]: Detection {id} (start:{start}, end:{end})".format(id=_id, 
-                            start="{:5.2f}".format(start_time),
-                            end="{:5.2f}".format(end_time)))
-                    
-                    if self.print_detections:
-                        print(self.output_format.format(id = _id,
-                            start = self.time_formatter(start_time),
-                            end = self.time_formatter(end_time), duration = self.time_formatter(duration)))
-                        
-                    self.detections.append((_id, start, end, start_time, end_time))
-                   
-    
-    def notify(self, message):
-        self.send(message)
-
+version = __version__
+__date__ = "2015-11-23"
+__updated__ = "2018-12-01"
 
 
 def main(argv=None):
-    '''Command line options.'''
-
     program_name = os.path.basename(sys.argv[0])
-    program_version = version
-    program_build_date = "%s" % __updated__
-
-    program_version_string = '%%prog %s (%s)' % (program_version, program_build_date)
-    #program_usage = '''usage: spam two eggs''' # optional - will be autogenerated by optparse
-    program_longdesc = '''''' # optional - give further explanation about what the program does
-    program_license = "Copyright 2015-2018 Mohamed El Amine SEHILI                                            \
-                Licensed under the General Public License (GPL) Version 3 \nhttp://www.gnu.org/licenses/"
-
     if argv is None:
         argv = sys.argv[1:]
     try:
-        # setup option parser
-        parser = OptionParser(version=program_version_string, epilog=program_longdesc, description=program_license)
-        
-        group = OptionGroup(parser, "[Input-Output options]")
-        group.add_option("-i", "--input", dest="input", help="Input audio or video file. Use - for stdin [default: read from microphone using pyaudio]", metavar="FILE")
-        group.add_option("-t", "--input-type", dest="input_type", help="Input audio file type. Mandatory if file name has no extension [default: %default]", type=str, default=None, metavar="String")
-        group.add_option("-M", "--max_time", dest="max_time", help="Max data (in seconds) to read from microphone/file [default: read until the end of file/stream]", type=float, default=None, metavar="FLOAT")
-        group.add_option("-O", "--output-main", dest="output_main", help="Save main stream as. If omitted main stream will not be saved [default: omitted]", type=str, default=None, metavar="FILE")
-        group.add_option("-o", "--output-tokens", dest="output_tokens", help="Output file name format for detections. Use {N} and {start} and {end} to build file names, example: 'Det_{N}_{start}-{end}.wav'", type=str, default=None, metavar="STRING")
-        group.add_option("-T", "--output-type", dest="output_type", help="Audio type used to save detections and/or main stream. If not supplied will: (1). guess from extension or (2). use wav format", type=str, default=None, metavar="STRING")
-        group.add_option("-u", "--use-channel", dest="use_channel", help="Choose channel to use from a multi-channel audio file (requires pydub). 'left', 'right' and 'mix' are accepted values. [Default: 1 (i.e. 1st or left channel)]", type=str, default="1", metavar="STRING")
-        parser.add_option_group(group)
-        
-        
-        group = OptionGroup(parser, "[Tokenization options]", "Set tokenizer options and energy threshold.")
-        group.add_option("-a", "--analysis-window", dest="analysis_window", help="Size of analysis window in seconds [default: %default (10ms)]", type=float, default=0.01, metavar="FLOAT")
-        group.add_option("-n", "--min-duration", dest="min_duration", help="Min duration of a valid audio event in seconds [default: %default]", type=float, default=0.2, metavar="FLOAT")
-        group.add_option("-m", "--max-duration", dest="max_duration", help="Max duration of a valid audio event in seconds [default: %default]", type=float, default=5, metavar="FLOAT")
-        group.add_option("-s", "--max-silence", dest="max_silence", help="Max duration of a consecutive silence within a valid audio event in seconds [default: %default]", type=float, default=0.3, metavar="FLOAT")
-        group.add_option("-d", "--drop-trailing-silence", dest="drop_trailing_silence", help="Drop trailing silence from a detection [default: keep trailing silence]",  action="store_true", default=False)
-        group.add_option("-e", "--energy-threshold", dest="energy_threshold", help="Log energy threshold for detection [default: %default]", type=float, default=50, metavar="FLOAT")
-        parser.add_option_group(group)
-        
-        
-        group = OptionGroup(parser, "[Audio parameters]", "Define audio parameters if data is read from a headerless file (raw or stdin) or you want to use different microphone parameters.")        
-        group.add_option("-r", "--rate", dest="sampling_rate", help="Sampling rate of audio data [default: %default]", type=int, default=16000, metavar="INT")
-        group.add_option("-c", "--channels", dest="channels", help="Number of channels of audio data [default: %default]", type=int, default=1, metavar="INT")
-        group.add_option("-w", "--width", dest="sample_width", help="Number of bytes per audio sample [default: %default]", type=int, default=2, metavar="INT")
-        group.add_option("-I", "--input-device-index", dest="input_device_index", help="Audio device index [default: %default] - only when using PyAudio", type=int, default=None, metavar="INT")
-        group.add_option("-F", "--audio-frame-per-buffer", dest="frame_per_buffer", help="Audio frame per buffer [default: %default] - only when using PyAudio", type=int, default=1024, metavar="INT")
-        parser.add_option_group(group)
-        
-        group = OptionGroup(parser, "[Do something with detections]", "Use these options to print, play or plot detections.") 
-        group.add_option("-C", "--command", dest="command", help="Command to call when an audio detection occurs. Use $ to represent the file name to use with the command (e.g. -C 'du -h $')", default=None, type=str, metavar="STRING")
-        group.add_option("-E", "--echo", dest="echo", help="Play back each detection immediately using pyaudio [default: do not play]",  action="store_true", default=False)
-        group.add_option("-p", "--plot", dest="plot", help="Plot and show audio signal and detections (requires matplotlib)",  action="store_true", default=False)
-        group.add_option("", "--save-image", dest="save_image", help="Save plotted audio signal and detections as a picture or a PDF file (requires matplotlib)",  type=str, default=None, metavar="FILE")
-        group.add_option("", "--printf", dest="printf", help="print detections, one per line, using a user supplied format (e.g. '[{id}]: {start} -- {end}'). Available keywords {id}, {start}, {end} and {duration}",  type=str, default="{id} {start} {end}", metavar="STRING")
-        group.add_option("", "--time-format", dest="time_format", help="format used to print {start} and {end}. [Default= %default]. %S: absolute time in sec. %I: absolute time in ms. If at least one of (%h, %m, %s, %i) is used, convert time into hours, minutes, seconds and millis (e.g. %h:%m:%s.%i). Only required fields are printed",  type=str, default="%S", metavar="STRING")
-        parser.add_option_group(group)
-        
-        parser.add_option("-q", "--quiet", dest="quiet", help="Do not print any information about detections [default: print 'id', 'start' and 'end' of each detection]",  action="store_true", default=False)
-        parser.add_option("-D", "--debug", dest="debug", help="Print processing operations to STDOUT",  action="store_true", default=False)
-        parser.add_option("", "--debug-file", dest="debug_file", help="Print processing operations to FILE",  type=str, default=None, metavar="FILE")
-        
-        
+        parser = ArgumentParser(
+            prog=program_name, description="An Audio Tokenization tool"
+        )
+        parser.add_argument("--version", "-v", action="version", version=version)
+        group = parser.add_argument_group("Input-Output options")
+        group.add_argument(
+            "-i",
+            "--input",
+            dest="input",
+            help="Input audio or video file. Use - for stdin "
+            "[default: read from microphone using pyaudio]",
+            metavar="FILE",
+        )
+        group.add_argument(
+            "-I",
+            "--input-device-index",
+            dest="input_device_index",
+            help="Audio device index [default: %(default)s] - only when using PyAudio",
+            type=int,
+            default=None,
+            metavar="INT",
+        )
+        group.add_argument(
+            "-F",
+            "--audio-frame-per-buffer",
+            dest="frame_per_buffer",
+            help="Audio frame per buffer [default: %(default)s] - only when using PyAudio",
+            type=int,
+            default=1024,
+            metavar="INT",
+        )
+        group.add_argument(
+            "-t",
+            "--input-type",
+            dest="input_type",
+            type=str,
+            default=None,
+            help="Input audio file type. Mandatory if file name has no extension",
+            metavar="STRING",
+        )
+        group.add_argument(
+            "-M",
+            "--max-time",
+            dest="max_time",
+            type=float,
+            default=None,
+            help="Max data (in seconds) to read from microphone or file "
+            "[default: read until the end of file/stream]",
+            metavar="FLOAT",
+        )
+        group.add_argument(
+            "-O",
+            "--output-main",
+            dest="output_main",
+            type=str,
+            default=None,
+            help="Save acquired audio data to disk. If omitted no data will be saved "
+            "[default: omitted]",
+            metavar="FILE",
+        )
+        group.add_argument(
+            "-o",
+            "--output-tokens",
+            dest="output_tokens",
+            type=str,
+            default=None,
+            help="Output file name format for detections."
+            "Use {N}, {start} and {end} to build file names,"
+            "example: 'Det_{N}_{start}-{end}.wav'",
+            metavar="STRING",
+        )
+        group.add_argument(
+            "-T",
+            "--output-type",
+            dest="output_type",
+            type=str,
+            default=None,
+            help="Audio type used to save detections and/or main stream. "
+            "If not supplied, then it will: (1. be guessed from extension or (2. "
+            "use raw format",
+            metavar="STRING",
+        )
+        group.add_argument(
+            "-u",
+            "--use-channel",
+            dest="use_channel",
+            type=str,
+            default="1",
+            help="Choose channel to use from a multi-channel audio file "
+            "'left' (1st channel), 'right' (2nd channel) and 'mix' "
+            "(average of all channels) are accepted values. "
+            "[Default: 1]",
+            metavar="INT/STRING",
+        )
 
-        # process options
-        (opts, args) = parser.parse_args(argv)
-        
-        if opts.input == "-":
-            asource = StdinAudioSource(sampling_rate = opts.sampling_rate,
-                                       sample_width = opts.sample_width,
-                                       channels = opts.channels)
-        #read data from a file
-        elif opts.input is not None:
-            asource = file_to_audio_source(filename=opts.input, filetype=opts.input_type, uc=opts.use_channel)
-        
-        # read data from microphone via pyaudio
-        else:
-            try:
-                asource = PyAudioSource(sampling_rate = opts.sampling_rate,
-                                        sample_width = opts.sample_width,
-                                        channels = opts.channels,
-                                        frames_per_buffer = opts.frame_per_buffer,
-                                        input_device_index = opts.input_device_index)
-            except Exception:
-                sys.stderr.write("Cannot read data from audio device!\n")
-                sys.stderr.write("You should either install pyaudio or read data from STDIN\n")
-                sys.exit(2)
-               
-        logger = logging.getLogger(LOGGER_NAME)
-        logger.setLevel(logging.DEBUG)
-        
-        handler = logging.StreamHandler(sys.stdout)
-        if opts.quiet or not opts.debug:
-            # only critical messages will be printed
-            handler.setLevel(logging.CRITICAL)
-        else:
-            handler.setLevel(logging.DEBUG)
-        
-        logger.addHandler(handler)
-        
-        if opts.debug_file is not None:
-            logger.setLevel(logging.DEBUG)
-            opts.debug = True
-            handler = logging.FileHandler(opts.debug_file, "w")
-            fmt = logging.Formatter('[%(asctime)s] | %(message)s')
-            handler.setFormatter(fmt)
-            handler.setLevel(logging.DEBUG)
-            logger.addHandler(handler)
-        
-        record = opts.output_main is not None or opts.plot or opts.save_image is not None
-                        
-        ads = ADSFactory.ads(audio_source = asource, block_dur = opts.analysis_window, max_time = opts.max_time, record = record)
-        validator = AudioEnergyValidator(sample_width=asource.get_sample_width(), energy_threshold=opts.energy_threshold)
-        
-        
-        if opts.drop_trailing_silence:
-            mode = StreamTokenizer.DROP_TRAILING_SILENCE
-        else:
-            mode = 0
-        
-        analysis_window_per_second = 1. / opts.analysis_window
-        tokenizer = StreamTokenizer(validator=validator, min_length=opts.min_duration * analysis_window_per_second,
-                                    max_length=int(opts.max_duration * analysis_window_per_second),
-                                    max_continuous_silence=opts.max_silence * analysis_window_per_second,
-                                    mode = mode)
-        
-        
+        group = parser.add_argument_group(
+            "Tokenization options", "Set tokenizer options."
+        )
+        group.add_argument(
+            "-a",
+            "--analysis-window",
+            dest="analysis_window",
+            default=0.01,
+            type=float,
+            help="Size of analysis window in seconds [default: %(default)s (10ms)]",
+            metavar="FLOAT",
+        )
+        group.add_argument(
+            "-n",
+            "--min-duration",
+            dest="min_duration",
+            type=float,
+            default=0.2,
+            help="Min duration of a valid audio event in seconds [default: %(default)s]",
+            metavar="FLOAT",
+        )
+        group.add_argument(
+            "-m",
+            "--max-duration",
+            dest="max_duration",
+            type=float,
+            default=5,
+            help="Max duration of a valid audio event in seconds [default: %(default)s]",
+            metavar="FLOAT",
+        )
+        group.add_argument(
+            "-s",
+            "--max-silence",
+            dest="max_silence",
+            type=float,
+            default=0.3,
+            help="Max duration of a consecutive silence within a valid audio event "
+            "in seconds [default: %(default)s]",
+            metavar="FLOAT",
+        )
+        group.add_argument(
+            "-d",
+            "--drop-trailing-silence",
+            dest="drop_trailing_silence",
+            action="store_true",
+            default=False,
+            help="Drop trailing silence from a detection [default: keep "
+            "trailing silence]",
+        )
+
+        group.add_argument(
+            "-R",
+            "--strict-min-duration",
+            dest="strict_min_duration",
+            action="store_true",
+            default=False,
+            help="Reject an event shorter than --min-duration even if it's "
+            "adjacent to the latest valid event that reached max-duration "
+            "[default: keep such events]",
+        )
+
+        group.add_argument(
+            "-e",
+            "--energy-threshold",
+            dest="energy_threshold",
+            type=float,
+            default=50,
+            help="Log energy threshold for detection [default: %(default)s]",
+            metavar="FLOAT",
+        )
+
+        group = parser.add_argument_group(
+            "Audio parameters",
+            "Define audio parameters if data is read from a "
+            "headerless file (raw or stdin) or you want to use "
+            "different microphone parameters.",
+        )
+        group.add_argument(
+            "-r",
+            "--rate",
+            dest="sampling_rate",
+            type=int,
+            default=16000,
+            help="Sampling rate of audio data [default: %(default)s]",
+            metavar="INT",
+        )
+        group.add_argument(
+            "-c",
+            "--channels",
+            dest="channels",
+            type=int,
+            default=1,
+            help="Number of channels of audio data [default: %(default)s]",
+            metavar="INT",
+        )
+        group.add_argument(
+            "-w",
+            "--width",
+            dest="sample_width",
+            type=int,
+            default=2,
+            help="Number of bytes per audio sample [default: %(default)s]",
+            metavar="INT",
+        )
+
+        group = parser.add_argument_group(
+            "Do something with audio events",
+            "Use these options to print, play or plot detections.",
+        )
+        group.add_argument(
+            "-C",
+            "--command",
+            dest="command",
+            type=str,
+            help="Command to call when an audio detection occurs. Use $ to "
+            "represent the file name to use with the command (e.g. -C "
+            "'du -h $')",
+            metavar="STRING",
+        )
+        group.add_argument(
+            "-E",
+            "--echo",
+            dest="echo",
+            action="store_true",
+            default=False,
+            help="Play back each detection immediately using pyaudio",
+        )
+        group.add_argument(
+            "-p",
+            "--plot",
+            dest="plot",
+            action="store_true",
+            default=False,
+            help="Plot and show audio signal and detections (requires matplotlib)",
+        )
+        group.add_argument(
+            "--save-image",
+            dest="save_image",
+            type=str,
+            help="Save plotted audio signal and detections as a picture or a PDF "
+            "file (requires matplotlib)",
+            metavar="FILE",
+        )
+        group.add_argument(
+            "--printf",
+            dest="printf",
+            type=str,
+            default="{id} {start} {end}",
+            help="print detections, one per line, using a user supplied format "
+            "(e.g. '[{id}]: {start} -- {end}'). Available keywords are: "
+            "{id}, {start}, {end}, {duration} and {timestamp} "
+            "(i.e., system date and time)",
+            metavar="STRING",
+        )
+        group.add_argument(
+            "--time-format",
+            dest="time_format",
+            type=str,
+            default="%S",
+            help="format used to print {start} and {end}.[default= %(default)s]. "
+            "%%S: absolute time in seconds. %%I: absolute time in ms. If at least "
+            "one of (%%h, %%m, %%s, %%i) is used, convert time into hours, "
+            "minutes, seconds and millis (e.g. %%h:%%m:%%s.%%i). Only supplied "
+            "fields are printed. Note that %%S and %%I can only be used alone",
+            metavar="STRING",
+        )
+
+        group.add_argument(
+            "--timestamp-format",
+            dest="timestamp_format",
+            type=str,
+            default="%Y/%m/%D %H:%M:%S",
+            help="format used to print {timestamp}. Should be a format accepted by "
+            "datetime Default %Y/%m/%D %H:%M:%S",
+        )
+
+        parser.add_argument(
+            "-q",
+            "--quiet",
+            dest="quiet",
+            action="store_true",
+            default=False,
+            help="Do not print any information about detections [default: print "
+            "'id', 'start' and 'end' of each detection]",
+        )
+        parser.add_argument(
+            "-D",
+            "--debug",
+            dest="debug",
+            action="store_true",
+            default=False,
+            help="Print processing operations to STDOUT",
+        )
+        parser.add_argument(
+            "--debug-file",
+            dest="debug_file",
+            type=str,
+            default=None,
+            help="Print processing operations to FILE",
+            metavar="FILE",
+        )
+
+        args = parser.parse_args(argv)
+        logger = make_logger(args.debug, args.debug_file)
+        kwargs = make_kwargs(args)
         observers = []
-        tokenizer_worker = None
-        
-        if opts.output_tokens is not None:
-            
-            try:
-                # check user format is correct
-                fname  = opts.output_tokens.format(N=0, start=0, end=0)
-                
-                # find file type for detections
-                tok_type =  opts.output_type
-                if tok_type is None:
-                    tok_type = os.path.splitext(opts.output_tokens)[1][1:]
-                if tok_type == "": 
-                    tok_type = "wav"
-                
-                token_saver = TokenSaverWorker(name_format=opts.output_tokens, filetype=tok_type,
-                                               debug=opts.debug, logger=logger, sr=asource.get_sampling_rate(),
-                                               sw=asource.get_sample_width(),
-                                               ch=asource.get_channels())
-                observers.append(token_saver)
-            
-            except Exception:
-                sys.stderr.write("Wrong format for detections file name: '{0}'\n".format(opts.output_tokens))
-                sys.exit(2)
-            
-        if opts.echo:
-            try:
-                player = player_for(asource)
-                player_worker = PlayerWorker(player=player, debug=opts.debug, logger=logger)
-                observers.append(player_worker)
-            except Exception:
-                sys.stderr.write("Cannot get an audio player!\n")
-                sys.stderr.write("You should either install pyaudio or supply a command (-C option) to play audio\n")
-                sys.exit(2)
-                
-        if opts.command is not None and len(opts.command) > 0:
-            cmd_worker = CommandLineWorker(command=opts.command, debug=opts.debug, logger=logger)
-            observers.append(cmd_worker)
-        
-        if not opts.quiet or opts.plot is not None or opts.save_image is not None:    
-            oformat = opts.printf.replace("\\n", "\n").replace("\\t", "\t").replace("\\r", "\r")
-            converter = seconds_to_str_fromatter(opts.time_format)
-            log_worker = LogWorker(print_detections = not opts.quiet, output_format=oformat,
-                                   time_formatter=converter, logger=logger, debug=opts.debug)
-            observers.append(log_worker)
-        
-        tokenizer_worker = TokenizerWorker(ads, tokenizer, opts.analysis_window, observers)
-        
-        def _save_main_stream():
-            # find file type
-            main_type =  opts.output_type
-            if main_type is None:
-                main_type = os.path.splitext(opts.output_main)[1][1:]
-            if main_type == "": 
-                main_type = "wav"
-            ads.close()
-            ads.rewind()
-            data = ads.get_audio_source().get_data_buffer()
-            if len(data) > 0:
-                save_audio_data(data=data, filename=opts.output_main, filetype=main_type, sr=asource.get_sampling_rate(),
-                                sw = asource.get_sample_width(),
-                                ch = asource.get_channels())
-        
-        def _plot():
-            import numpy as np
-            ads.close()
-            ads.rewind()
-            data = ads.get_audio_source().get_data_buffer()
-            signal = AudioEnergyValidator._convert(data, asource.get_sample_width())
-            detections = [(det[3] , det[4]) for det in log_worker.detections]
-            max_amplitude = 2**(asource.get_sample_width() * 8 - 1) - 1
-            energy_as_amp = np.sqrt(np.exp(opts.energy_threshold * np.log(10) / 10)) / max_amplitude
-            plot_all(signal / max_amplitude, asource.get_sampling_rate(), energy_as_amp, detections, show = opts.plot, save_as = opts.save_image)
-        
-        
-        # start observer threads
-        for obs in observers:
-            obs.start()
-        # start tokenization thread
-        tokenizer_worker.start()
-        
+
+        if args.output_tokens is not None:
+            worker = workers.RegionSaverWorker(
+                args.output_tokens, args.output_type, logger=logger
+            )
+            observers.append(worker)
+
+        if args.echo:
+            progress_bar = args.quiet and not args.debug
+            worker = workers.PlayerWorker(progress_bar=progress_bar, logger=logger)
+            observers.append(worker)
+
+        if args.command is not None:
+            worker = workers.CommandLineWorker(command=args.command, logger=logger)
+            observers.append(worker)
+
+        if not args.quiet:
+            print_format = (
+                args.printf.replace("\\n", "\n")
+                .replace("\\t", "\t")
+                .replace("\\r", "\r")
+            )
+            time_format = args.time_format
+            timestamp_format = args.timestamp_format
+            worker = workers.PrintWorker(print_format, time_format, timestamp_format)
+            observers.append(worker)
+
+        reader = AudioDataSource(args.input, **kwargs.io_kwargs)
+        if args.output_main is not None:
+            reader = workers.StreamSaverWorker(reader, args.output_main)
+            reader.start()
+
+        tokenizer_worker = workers.TokenizerWorker(
+            reader, observers, logger=logger, **kwargs.split_kwargs
+        )
+        tokenizer_worker.start_all()
+
         while True:
             time.sleep(1)
             if len(threading.enumerate()) == 1:
-                break
-            
-        tokenizer_worker = None
-            
-        if opts.output_main is not None:
-            _save_main_stream()
-        if opts.plot or opts.save_image is not None:
-            _plot()
-            
-        return 0
-            
-    except KeyboardInterrupt:
-        
+                raise workers.EndOfProcessing
+
+    except (KeyboardInterrupt, workers.EndOfProcessing):
         if tokenizer_worker is not None:
-            tokenizer_worker.stop()
-        for obs in observers:
-            obs.stop()
-            
-        if opts.output_main is not None:
-            _save_main_stream()
-        if opts.plot or opts.save_image is not None:
-            _plot()
-        
+            tokenizer_worker.stop_all()
+            if args.output_main is not None:
+                reader.save_stream()
+            if args.plot or args.save_image is not None:
+                from plotting import plot_signal_and_detections
+                import numpy as np
+
+                formats = {1: np.int8, 2: np.int16, 4: np.int32}
+                reader.rewind()
+                signal = np.from_buffer(reader.data, dtype=formats[reader.sw])
+                regions = tokenizer_worker.audio_regions
+                plot_signal_and_detections(signal, regions, args.save_image)
         return 0
 
-    except Exception as e:
-        sys.stderr.write(program_name + ": " + str(e) + "\n")
-        sys.stderr.write("for help use -h\n")
-        
-        return 2
 
 if __name__ == "__main__":
-    if DEBUG:
-        sys.argv.append("-h")
-    if TESTRUN:
-        import doctest
-        doctest.testmod()
-    if PROFILE:
-        import cProfile
-        import pstats
-        profile_filename = 'auditok.auditok_profile.txt'
-        cProfile.run('main()', profile_filename)
-        statsfile = open("profile_stats.txt", "wb")
-        p = pstats.Stats(profile_filename, stream=statsfile)
-        stats = p.strip_dirs().sort_stats('cumulative')
-        stats.print_stats()
-        statsfile.close()
-        sys.exit(0)
-    sys.exit(main())
+    sys.exit(main(None))