changeset 69:d52581cfc9b6

Merge branch 'master' of https://github.com/amsehili/auditok into dev
author Amine SEHILI <amine.sehili@gmail.com>
date Sat, 28 Jan 2017 13:03:37 +0100
parents a0843d57697b (diff) 8e1ac2ebdcad (current diff)
children 8cfe5e7dc91e
files
diffstat 7 files changed, 479 insertions(+), 544 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.pre-commit-config.yaml	Sat Jan 28 13:03:37 2017 +0100
@@ -0,0 +1,7 @@
+
+- repo: git://github.com/pre-commit/mirrors-autopep8
+  sha: 575d256007455a3fdf2a6198de7d8168874d89d2
+  hooks:
+  - id: autopep8
+    args: ['-i', '--max-line-length=130']
+
--- a/.travis.yml	Tue Oct 18 21:38:13 2016 +0200
+++ b/.travis.yml	Sat Jan 28 13:03:37 2017 +0100
@@ -8,5 +8,7 @@
   - "3.2"
   - "3.3"
   - "3.4"
+  - "3.5"
+  - "3.6"
 script:
   - python -m unittest discover tests/
--- a/auditok/core.py	Tue Oct 18 21:38:13 2016 +0200
+++ b/auditok/core.py	Sat Jan 28 13:03:37 2017 +0100
@@ -18,42 +18,42 @@
     """
     Class for stream tokenizers. It implements a 4-state automaton scheme
     to extract sub-sequences of interest on the fly.
-    
+
     :Parameters:
-    
+
         `validator` :
             instance of `DataValidator` that implements `is_valid` method.
-        
+
         `min_length` : *(int)*
             Minimum number of frames of a valid token. This includes all \
             tolerated non valid frames within the token.
-            
+
         `max_length` : *(int)*
             Maximum number of frames of a valid token. This includes all \
             tolerated non valid frames within the token.
-        
+
         `max_continuous_silence` : *(int)*
             Maximum number of consecutive non-valid frames within a token.
             Note that, within a valid token, there may be many tolerated \
             *silent* regions that contain each a number of non valid frames up to \
             `max_continuous_silence`
-        
+
         `init_min` : *(int, default=0)*
             Minimum number of consecutive valid frames that must be **initially** \
             gathered before any sequence of non valid frames can be tolerated. This
             option is not always needed, it can be used to drop non-valid tokens as
             early as possible. **Default = 0** means that the option is by default 
             ineffective. 
-                
+
         `init_max_silence` : *(int, default=0)*
             Maximum number of tolerated consecutive non-valid frames if the \
             number already gathered valid frames has not yet reached 'init_min'.
             This argument is normally used if `init_min` is used. **Default = 0**,
             by default this argument is not taken into consideration.
-            
+
         `mode` : *(int, default=0)*
             `mode` can be:
-        
+
         1. `StreamTokenizer.STRICT_MIN_LENGTH`: 
         if token *i* is delivered because `max_length`
         is reached, and token *i+1* is immediately adjacent to
@@ -62,66 +62,65 @@
         least `min_length`. The default behavior is to accept token *i+1*
         event if it is shorter than `min_length` (given that the above conditions
         are fulfilled of course).
-           
+
         :Examples:
-               
+
         In the following code, without `STRICT_MIN_LENGTH`, the 'BB' token is
         accepted although it is shorter than `min_length` (3), because it immediately
         follows the latest delivered token:
-            
+
         .. code:: python
-        
+
             from auditok import StreamTokenizer, StringDataSource, DataValidator
-    
+
             class UpperCaseChecker(DataValidator):
                 def is_valid(self, frame):
                     return frame.isupper()
-                   
-    
+
+
             dsource = StringDataSource("aaaAAAABBbbb")
             tokenizer = StreamTokenizer(validator=UpperCaseChecker(),
                                         min_length=3,
                                         max_length=4,
                                         max_continuous_silence=0)
-         
+
             tokenizer.tokenize(dsource)
-                    
-                    
+
         :output:
-    
+
          .. code:: python
-         
+
             [(['A', 'A', 'A', 'A'], 3, 6), (['B', 'B'], 7, 8)]
 
 
         The following tokenizer will however reject the 'BB' token:
-     
+
         .. code:: python
-                
+
             dsource = StringDataSource("aaaAAAABBbbb")
             tokenizer = StreamTokenizer(validator=UpperCaseChecker(), 
                                         min_length=3, max_length=4,
                                         max_continuous_silence=0,
                                         mode=StreamTokenizer.STRICT_MIN_LENGTH)
             tokenizer.tokenize(dsource)
-        
+
         :output:
-            
+
         .. code:: python
-            
+
             [(['A', 'A', 'A', 'A'], 3, 6)]
-            
-           
+
+
         2. `StreamTokenizer.DROP_TRAILING_SILENCE`: drop all tailing non-valid frames
         from a token to be delivered if and only if it is not **truncated**.
         This can be a bit tricky. A token is actually delivered if:
-           
+
         - a. `max_continuous_silence` is reached
-           
+
         :or:
-           
+
         - b. Its length reaches `max_length`. This is called a **truncated** token
-           
+
         In the current implementation, a `StreamTokenizer`'s decision is only based on already seen
         data and on incoming data. Thus, if a token is truncated at a non-valid but tolerated
         frame (`max_length` is reached but `max_continuous_silence` not yet) any tailing
@@ -130,135 +129,132 @@
         token will not be considered as truncated but a result of *normal* end of detection
         (i.e. no more valid data). In that case the tailing silence can be removed if you use
         the `StreamTokenizer.DROP_TRAILING_SILENCE` mode.
-    
+
         :Example:
-    
+
         .. code:: python
-                       
+
              tokenizer = StreamTokenizer(validator=UpperCaseChecker(), min_length=3,
                                          max_length=6, max_continuous_silence=3,
                                          mode=StreamTokenizer.DROP_TRAILING_SILENCE)
-            
+
              dsource = StringDataSource("aaaAAAaaaBBbbbb")
              tokenizer.tokenize(dsource)
-        
+
         :output:
-            
+
         .. code:: python
-                
+
             [(['A', 'A', 'A', 'a', 'a', 'a'], 3, 8), (['B', 'B'], 9, 10)]
-                    
+
         The first token is delivered with its tailing silence because it is truncated
         while the second one has its tailing frames removed.
-                    
+
         Without `StreamTokenizer.DROP_TRAILING_SILENCE` the output would be:
-                        
+
         .. code:: python
-         
+
             [(['A', 'A', 'A', 'a', 'a', 'a'], 3, 8), (['B', 'B', 'b', 'b', 'b'], 9, 13)]
-    
-        
-        
+
+
         3. `StreamTokenizer.STRICT_MIN_LENGTH | StreamTokenizer.DROP_TRAILING_SILENCE`:
         use both options. That means: first remove tailing silence, then ckeck if the
         token still has at least a length of `min_length`.
     """
-    
-    
+
     SILENCE = 0
     POSSIBLE_SILENCE = 1
-    POSSIBLE_NOISE = 2 
+    POSSIBLE_NOISE = 2
     NOISE = 3
-    
+
     STRICT_MIN_LENGTH = 2
     DROP_TRAILING_SILENCE = 4
     # alias
     DROP_TAILING_SILENCE = 4
-    
-    def __init__(self, validator, 
+
+    def __init__(self, validator,
                  min_length, max_length, max_continuous_silence,
                  init_min=0, init_max_silence=0,
                  mode=0):
-        
+
         if not isinstance(validator, DataValidator):
             raise TypeError("'validator' must be an instance of 'DataValidator'")
-        
+
         if max_length <= 0:
             raise ValueError("'max_length' must be > 0 (value={0})".format(max_length))
-        
+
         if min_length <= 0 or min_length > max_length:
             raise ValueError("'min_length' must be > 0 and <= 'max_length' (value={0})".format(min_length))
-        
+
         if max_continuous_silence >= max_length:
             raise ValueError("'max_continuous_silence' must be < 'max_length' (value={0})".format(max_continuous_silence))
-        
+
         if init_min >= max_length:
             raise ValueError("'init_min' must be < 'max_length' (value={0})".format(max_continuous_silence))
-            
+
         self.validator = validator
         self.min_length = min_length
         self.max_length = max_length
         self.max_continuous_silence = max_continuous_silence
         self.init_min = init_min
         self.init_max_silent = init_max_silence
-        
+
         self._mode = None
         self.set_mode(mode)
         self._strict_min_length = (mode & self.STRICT_MIN_LENGTH) != 0
-        self._drop_tailing_silence  = (mode & self.DROP_TRAILING_SILENCE) != 0
-        
+        self._drop_tailing_silence = (mode & self.DROP_TRAILING_SILENCE) != 0
+
         self._deliver = None
         self._tokens = None
         self._state = None
         self._data = None
         self._contiguous_token = False
-        
+
         self._init_count = 0
         self._silence_length = 0
         self._start_frame = 0
         self._current_frame = 0
-    
+
     def set_mode(self, mode):
         """
         :Parameters:
-        
+
             `mode` : *(int)*
                 New mode, must be one of:
-                    
-                
+
+
             - `StreamTokenizer.STRICT_MIN_LENGTH`
-            
+
             - `StreamTokenizer.DROP_TRAILING_SILENCE`
-            
+
             - `StreamTokenizer.STRICT_MIN_LENGTH | StreamTokenizer.DROP_TRAILING_SILENCE`
-                   
+
             - `0`
-                       
+
         See `StreamTokenizer.__init__` for more information about the mode.
         """
-        
+
         if not mode in [self.STRICT_MIN_LENGTH, self.DROP_TRAILING_SILENCE,
-           self.STRICT_MIN_LENGTH | self.DROP_TRAILING_SILENCE, 0]:
-            
+                        self.STRICT_MIN_LENGTH | self.DROP_TRAILING_SILENCE, 0]:
+
             raise ValueError("Wrong value for mode")
-        
+
         self._mode = mode
         self._strict_min_length = (mode & self.STRICT_MIN_LENGTH) != 0
-        self._drop_tailing_silence  = (mode & self.DROP_TRAILING_SILENCE) != 0
-        
-    
+        self._drop_tailing_silence = (mode & self.DROP_TRAILING_SILENCE) != 0
+
     def get_mode(self):
         """
         Return the current mode. To check whether a specific mode is activated use
         the bitwise 'and' operator `&`. Example:
-           
+
         .. code:: python 
-                
+
             if mode & self.STRICT_MIN_LENGTH != 0:
                do_something()
         """
         return self._mode
-        
+
     def _reinitialize(self):
         self._contiguous_token = False
         self._data = []
@@ -266,112 +262,109 @@
         self._state = self.SILENCE
         self._current_frame = -1
         self._deliver = self._append_token
-    
-    
+
     def tokenize(self, data_source, callback=None):
         """
         Read data from `data_source`, one frame a time, and process the read frames in
         order to detect sequences of frames that make up valid tokens.
-        
+
         :Parameters:
            `data_source` : instance of the :class:`DataSource` class that implements a `read` method.
                'read' should return a slice of signal, i.e. frame (of whatever \
                type as long as it can be processed by validator) and None if \
                there is no more signal.
-        
+
            `callback` : an optional 3-argument function.
                If a `callback` function is given, it will be called each time a valid token
                is found.
-           
-           
+
+
         :Returns:
            A list of tokens if `callback` is None. Each token is tuple with the following elements:
-        
+
             .. code python
-            
+
                 (data, start, end)
-            
+
            where `data` is a list of read frames, `start`: index of the first frame in the
            original data and `end` : index of the last frame. 
-        
+
         """
-        
+
         self._reinitialize()
-        
+
         if callback is not None:
             self._deliver = callback
-        
+
         while True:
-            frame =  data_source.read()
+            frame = data_source.read()
             if frame is None:
                 break
             self._current_frame += 1
             self._process(frame)
-            
+
         self._post_process()
-        
+
         if callback is None:
             _ret = self._tokens
             self._tokens = None
             return _ret
-        
-        
+
     def _process(self, frame):
-        
+
         frame_is_valid = self.validator.is_valid(frame)
-        
+
         if self._state == self.SILENCE:
-            
+
             if frame_is_valid:
                 # seems we got a valid frame after a silence
                 self._init_count = 1
                 self._silence_length = 0
                 self._start_frame = self._current_frame
                 self._data.append(frame)
-                
-                if self._init_count  >= self.init_min:
+
+                if self._init_count >= self.init_min:
                     self._state = self.NOISE
                     if len(self._data) >= self.max_length:
                         self._process_end_of_detection(True)
                 else:
                     self._state = self.POSSIBLE_NOISE
-        
+
         elif self._state == self.POSSIBLE_NOISE:
-            
+
             if frame_is_valid:
                 self._silence_length = 0
                 self._init_count += 1
                 self._data.append(frame)
-                if self._init_count  >= self.init_min:
+                if self._init_count >= self.init_min:
                     self._state = self.NOISE
                     if len(self._data) >= self.max_length:
                         self._process_end_of_detection(True)
-            
-            else:                
+
+            else:
                 self._silence_length += 1
                 if self._silence_length > self.init_max_silent or \
-                len(self._data) + 1 >= self.max_length:
+                        len(self._data) + 1 >= self.max_length:
                     # either init_max_silent or max_length is reached
                     # before _init_count, back to silence
                     self._data = []
                     self._state = self.SILENCE
                 else:
                     self._data.append(frame)
-                    
-                
+
         elif self._state == self.NOISE:
-            
+
             if frame_is_valid:
                 self._data.append(frame)
                 if len(self._data) >= self.max_length:
                     self._process_end_of_detection(True)
-            
-            elif self.max_continuous_silence <= 0 :
+
+            elif self.max_continuous_silence <= 0:
                 # max token reached at this frame will _deliver if _contiguous_token
                 # and not _strict_min_length
                 self._process_end_of_detection()
                 self._state = self.SILENCE
-                
+
             else:
                 # this is the first silent frame following a valid one
                 # and it is tolerated
@@ -380,24 +373,22 @@
                 self._state = self.POSSIBLE_SILENCE
                 if len(self._data) == self.max_length:
                     self._process_end_of_detection(True)
-                    # don't reset _silence_length because we still 
+                    # don't reset _silence_length because we still
                     # need to know the total number of silent frames
-                                   
-                                
-    
+
         elif self._state == self.POSSIBLE_SILENCE:
-            
+
             if frame_is_valid:
                 self._data.append(frame)
                 self._silence_length = 0
                 self._state = self.NOISE
                 if len(self._data) >= self.max_length:
                     self._process_end_of_detection(True)
-                
+
             else:
                 if self._silence_length >= self.max_continuous_silence:
                     if self._silence_length < len(self._data):
-                        # _deliver only gathered frames aren't all silent                    
+                        # _deliver only gathered frames aren't all silent
                         self._process_end_of_detection()
                     else:
                         self._data = []
@@ -408,32 +399,28 @@
                     self._silence_length += 1
                     if len(self._data) >= self.max_length:
                         self._process_end_of_detection(True)
-                        # don't reset _silence_length because we still 
+                        # don't reset _silence_length because we still
                         # need to know the total number of silent frames
-                        
-    
+
     def _post_process(self):
         if self._state == self.NOISE or self._state == self.POSSIBLE_SILENCE:
             if len(self._data) > 0 and len(self._data) > self._silence_length:
                 self._process_end_of_detection()
-    
-    
+
     def _process_end_of_detection(self, truncated=False):
-        
+
         if not truncated and self._drop_tailing_silence and self._silence_length > 0:
             # happens if max_continuous_silence is reached
             # or max_length is reached at a silent frame
             self._data = self._data[0: - self._silence_length]
-        
+
         if (len(self._data) >= self.min_length) or \
-           (len(self._data) > 0 and \
-            not self._strict_min_length and self._contiguous_token):
-            
-            
-            
+           (len(self._data) > 0 and
+                not self._strict_min_length and self._contiguous_token):
+
             _end_frame = self._start_frame + len(self._data) - 1
             self._deliver(self._data, self._start_frame, _end_frame)
-            
+
             if truncated:
                 # next token (if any) will start at _current_frame + 1
                 self._start_frame = self._current_frame + 1
@@ -442,11 +429,9 @@
             else:
                 self._contiguous_token = False
         else:
-            self._contiguous_token = False       
-        
+            self._contiguous_token = False
+
         self._data = []
-            
-    
-    
+
     def _append_token(self, data, start, end):
         self._tokens.append((data, start, end))
--- a/auditok/dataset.py	Tue Oct 18 21:38:13 2016 +0200
+++ b/auditok/dataset.py	Sat Jan 28 13:03:37 2017 +0100
@@ -12,8 +12,7 @@
 16000_mono_bc_noise.wav".format(cd=_current_dir, sep=os.path.sep)
 """A wave file that contains a pronunciation of Arabic numbers from 1 to 6"""
 
-
 was_der_mensch_saet_mono_44100_lead_trail_silence = "{cd}{sep}data{sep}was_\
 der_mensch_saet_das_wird_er_vielfach_ernten_44100Hz_mono_lead_trail_\
 silence.wav".format(cd=_current_dir, sep=os.path.sep)
-""" A wave file that contains a sentence between long leading and trailing periods of silence"""
\ No newline at end of file
+""" A wave file that contains a sentence between long leading and trailing periods of silence"""
--- a/auditok/exceptions.py	Tue Oct 18 21:38:13 2016 +0200
+++ b/auditok/exceptions.py	Sat Jan 28 13:03:37 2017 +0100
@@ -3,7 +3,6 @@
 @author: Amine SEHILI <amine.sehili@gmail.com>
 """
 
+
 class DuplicateArgument(Exception):
     pass
-
-
--- a/auditok/io.py	Tue Oct 18 21:38:13 2016 +0200
+++ b/auditok/io.py	Sat Jan 28 13:03:37 2017 +0100
@@ -39,84 +39,82 @@
 class AudioSource():
     """ 
     Base class for audio source objects.
-        
+
     Subclasses should implement methods to open/close and audio stream 
     and read the desired amount of audio samples.
-    
+
     :Parameters:
-        
+
         `sampling_rate` : int
             Number of samples per second of audio stream. Default = 16000.
-        
+
         `sample_width` : int
             Size in bytes of one audio sample. Possible values : 1, 2, 4.
             Default = 2.
-            
+
         `channels` : int
             Number of channels of audio stream. The current version supports
             only mono audio streams (i.e. one channel).
     """
-    
+
     __metaclass__ = ABCMeta
 
-    def __init__(self, sampling_rate = DEFAULT_SAMPLE_RATE,
-                 sample_width = DEFAULT_SAMPLE_WIDTH,
-                 channels = DEFAULT_NB_CHANNELS):
-  
+    def __init__(self, sampling_rate=DEFAULT_SAMPLE_RATE,
+                 sample_width=DEFAULT_SAMPLE_WIDTH,
+                 channels=DEFAULT_NB_CHANNELS):
+
         if not sample_width in (1, 2, 4):
             raise ValueError("Sample width must be one of: 1, 2 or 4 (bytes)")
-        
+
         if channels != 1:
             raise ValueError("Only mono audio is currently handled")
-            
+
         self.sampling_rate = sampling_rate
         self.sample_width = sample_width
         self.channels = channels
-      
+
     @abstractmethod
     def is_open(self):
         """ Return True if audio source is open, False otherwise """
-    
+
     @abstractmethod
     def open(self):
         """ Open audio source """
-    
+
     @abstractmethod
     def close(self):
         """ Close audio source """
-    
+
     @abstractmethod
     def read(self, size):
         """
         Read and return `size` audio samples at most.
-        
+
         :Parameters:
-        
+
             `size` : int
                 the number of samples to read.
-            
+
         :Returns:
-            
+
             Audio data as a string of length 'N' * 'sample_width' * 'channels', where 'N' is:
-            
+
             - `size` if `size` < 'left_samples'
-            
+
             - 'left_samples' if `size` > 'left_samples' 
-        
-        """ 
-    
+        """
+
     def get_sampling_rate(self):
         """ Return the number of samples per second of audio stream """
         return self.sampling_rate
-    
+
     def get_sample_width(self):
         """ Return the number of bytes used to represent one audio sample """
         return self.sample_width
-    
+
     def get_channels(self):
         """ Return the number of channels of this audio source """
         return self.channels
-    
 
 
 class Rewindable():
@@ -126,192 +124,185 @@
     audio stream as well as method to move to an absolute audio position
     expressed in time or in number of samples. 
     """
-    
+
     __metaclass__ = ABCMeta
-    
+
     @abstractmethod
     def rewind(self):
         """ Go back to the beginning of audio stream """
         pass
-    
+
     @abstractmethod
     def get_position(self):
         """ Return the total number of already read samples """
-    
+
     @abstractmethod
     def get_time_position(self):
         """ Return the total duration in seconds of already read data """
-    
+
     @abstractmethod
     def set_position(self, position):
         """ Move to an absolute position 
-        
+
         :Parameters:
-        
+
             `position` : int
                 number of samples to skip from the start of the stream
         """
-    
+
     @abstractmethod
     def set_time_position(self, time_position):
         """ Move to an absolute position expressed in seconds
-        
+
         :Parameters:
-        
+
             `time_position` : float
                 seconds to skip from the start of the stream
         """
         pass
 
-    
 
 class BufferAudioSource(AudioSource, Rewindable):
     """
     An :class:`AudioSource` that encapsulates and reads data from a memory buffer.
     It implements methods from :class:`Rewindable` and is therefore a navigable :class:`AudioSource`.
     """
-    
+
     def __init__(self, data_buffer,
-                 sampling_rate = DEFAULT_SAMPLE_RATE,
-                 sample_width = DEFAULT_SAMPLE_WIDTH,
-                 channels = DEFAULT_NB_CHANNELS):
-        
-        if len(data_buffer) % (sample_width * channels) !=0:
+                 sampling_rate=DEFAULT_SAMPLE_RATE,
+                 sample_width=DEFAULT_SAMPLE_WIDTH,
+                 channels=DEFAULT_NB_CHANNELS):
+
+        if len(data_buffer) % (sample_width * channels) != 0:
             raise ValueError("length of data_buffer must be a multiple of (sample_width * channels)")
-        
+
         AudioSource.__init__(self, sampling_rate, sample_width, channels)
         self._buffer = data_buffer
         self._index = 0
         self._left = 0 if self._buffer is None else len(self._buffer)
         self._is_open = False
-    
+
     def is_open(self):
         return self._is_open
-        
+
     def open(self):
         self._is_open = True
-    
+
     def close(self):
         self._is_open = False
         self.rewind()
-    
+
     def read(self, size):
         if not self._is_open:
             raise IOError("Stream is not open")
-        
+
         if self._left > 0:
-            
-            to_read = size * self.sample_width * self.channels       
+
+            to_read = size * self.sample_width * self.channels
             if to_read > self._left:
-                to_read = self._left 
-                            
+                to_read = self._left
+
             data = self._buffer[self._index: self._index + to_read]
             self._index += to_read
             self._left -= to_read
-            
+
             return data
-        
+
         return None
-    
+
     def get_data_buffer(self):
         """ Return all audio data as one string buffer. """
         return self._buffer
-    
+
     def set_data(self, data_buffer):
         """ Set new data for this audio stream. 
-        
+
         :Parameters:
-        
+
             `data_buffer` : str, basestring, Bytes
                 a string buffer with a length multiple of (sample_width * channels)
         """
-        if len(data_buffer) % (self.sample_width * self.channels) !=0:
+        if len(data_buffer) % (self.sample_width * self.channels) != 0:
             raise ValueError("length of data_buffer must be a multiple of (sample_width * channels)")
         self._buffer = data_buffer
         self._index = 0
         self._left = 0 if self._buffer is None else len(self._buffer)
-    
+
     def append_data(self, data_buffer):
         """ Append data to this audio stream
-        
+
         :Parameters:
-        
+
             `data_buffer` : str, basestring, Bytes
                 a buffer with a length multiple of (sample_width * channels)
         """
-        
-        if len(data_buffer) % (self.sample_width * self.channels) !=0:
+
+        if len(data_buffer) % (self.sample_width * self.channels) != 0:
             raise ValueError("length of data_buffer must be a multiple of (sample_width * channels)")
-        
+
         self._buffer += data_buffer
         self._left += len(data_buffer)
 
-    
     def rewind(self):
         self.set_position(0)
-    
+
     def get_position(self):
         return self._index / self.sample_width
-    
+
     def get_time_position(self):
-        return float(self._index) / (self.sample_width * self.sampling_rate) 
-    
+        return float(self._index) / (self.sample_width * self.sampling_rate)
+
     def set_position(self, position):
         if position < 0:
             raise ValueError("position must be >= 0")
-        
+
         if self._buffer is None:
             self._index = 0
             self._left = 0
             return
-         
-        position *= self.sample_width 
+
+        position *= self.sample_width
         self._index = position if position < len(self._buffer) else len(self._buffer)
         self._left = len(self._buffer) - self._index
 
-
-    def set_time_position(self, time_position): # time in seconds
+    def set_time_position(self, time_position):  # time in seconds
         position = int(self.sampling_rate * time_position)
         self.set_position(position)
 
 
-
 class WaveAudioSource(AudioSource):
     """
     A class for an `AudioSource` that reads data from a wave file.
-    
+
     :Parameters:
-        
+
         `filename` :
             path to a valid wave file
     """
-    
+
     def __init__(self, filename):
-        
+
         self._filename = filename
         self._audio_stream = None
-        
+
         stream = wave.open(self._filename)
         AudioSource.__init__(self, stream.getframerate(),
-                                   stream.getsampwidth(),
-                                   stream.getnchannels())
+                             stream.getsampwidth(),
+                             stream.getnchannels())
         stream.close()
-    
-    
+
     def is_open(self):
         return self._audio_stream is not None
- 
+
     def open(self):
         if(self._audio_stream is None):
             self._audio_stream = wave.open(self._filename)
-      
-        
+
     def close(self):
         if self._audio_stream is not None:
             self._audio_stream.close()
             self._audio_stream = None
-        
-    
+
     def read(self, size):
         if self._audio_stream is None:
             raise IOError("Stream is not open")
@@ -326,174 +317,165 @@
     """
     A class for an `AudioSource` that reads data the built-in microphone using PyAudio. 
     """
-    
-    def __init__(self, sampling_rate = DEFAULT_SAMPLE_RATE,
-                 sample_width = DEFAULT_SAMPLE_WIDTH,
-                 channels = DEFAULT_NB_CHANNELS,
-                 frames_per_buffer = 1024):
-        
-        
+
+    def __init__(self, sampling_rate=DEFAULT_SAMPLE_RATE,
+                 sample_width=DEFAULT_SAMPLE_WIDTH,
+                 channels=DEFAULT_NB_CHANNELS,
+                 frames_per_buffer=1024):
+
         AudioSource.__init__(self, sampling_rate, sample_width, channels)
         self._chunk_size = frames_per_buffer
-        
+
         import pyaudio
         self._pyaudio_object = pyaudio.PyAudio()
-        self._pyaudio_format = self._pyaudio_object.get_format_from_width(self.sample_width) 
+        self._pyaudio_format = self._pyaudio_object.get_format_from_width(self.sample_width)
         self._audio_stream = None
 
-    
     def is_open(self):
         return self._audio_stream is not None
-    
+
     def open(self):
-        self._audio_stream = self._pyaudio_object.open(format = self._pyaudio_format,
-                                                   channels = self.channels,
-                                                   rate = self.sampling_rate,
-                                                   input = True,
-                                                   output = False,
-                                                   frames_per_buffer = self._chunk_size)
-        
-        
+        self._audio_stream = self._pyaudio_object.open(format=self._pyaudio_format,
+                                                       channels=self.channels,
+                                                       rate=self.sampling_rate,
+                                                       input=True,
+                                                       output=False,
+                                                       frames_per_buffer=self._chunk_size)
+
     def close(self):
         if self._audio_stream is not None:
             self._audio_stream.stop_stream()
             self._audio_stream.close()
             self._audio_stream = None
-            
-    
+
     def read(self, size):
         if self._audio_stream is None:
             raise IOError("Stream is not open")
-        
+
         if self._audio_stream.is_active():
             data = self._audio_stream.read(size)
             if data is None or len(data) < 1:
                 return None
             return data
-        
+
         return None
-    
+
 
 class StdinAudioSource(AudioSource):
     """
     A class for an :class:`AudioSource` that reads data from standard input.
     """
-    
-    def __init__(self, sampling_rate = DEFAULT_SAMPLE_RATE,
-                 sample_width = DEFAULT_SAMPLE_WIDTH,
-                 channels = DEFAULT_NB_CHANNELS):
-    
+
+    def __init__(self, sampling_rate=DEFAULT_SAMPLE_RATE,
+                 sample_width=DEFAULT_SAMPLE_WIDTH,
+                 channels=DEFAULT_NB_CHANNELS):
+
         AudioSource.__init__(self, sampling_rate, sample_width, channels)
         self._is_open = False
-    
-    
+
     def is_open(self):
         return self._is_open
-        
+
     def open(self):
         self._is_open = True
-    
+
     def close(self):
         self._is_open = False
-        
+
     def read(self, size):
         if not self._is_open:
             raise IOError("Stream is not open")
-        
+
         to_read = size * self.sample_width * self.channels
         data = sys.stdin.read(to_read)
-        
+
         if data is None or len(data) < 1:
             return None
-        
+
         return data
-       
-           
+
+
 class PyAudioPlayer():
     """
     A class for audio playback using Pyaudio
     """
-    
-    def __init__(self, sampling_rate = DEFAULT_SAMPLE_RATE,
-                 sample_width = DEFAULT_SAMPLE_WIDTH,
-                 channels = DEFAULT_NB_CHANNELS):
+
+    def __init__(self, sampling_rate=DEFAULT_SAMPLE_RATE,
+                 sample_width=DEFAULT_SAMPLE_WIDTH,
+                 channels=DEFAULT_NB_CHANNELS):
         if not sample_width in (1, 2, 4):
             raise ValueError("Sample width must be one of: 1, 2 or 4 (bytes)")
-        
+
         self.sampling_rate = sampling_rate
         self.sample_width = sample_width
         self.channels = channels
-        
+
         import pyaudio
         self._p = pyaudio.PyAudio()
-        self.stream = self._p.open(format = self._p.get_format_from_width(self.sample_width),
-         channels = self.channels, rate = self.sampling_rate,
-         input = False, output = True)
-        
+        self.stream = self._p.open(format=self._p.get_format_from_width(self.sample_width),
+                                   channels=self.channels, rate=self.sampling_rate,
+                                   input=False, output=True)
+
     def play(self, data):
         if self.stream.is_stopped():
             self.stream.start_stream()
-        
+
         for chunk in self._chunk_data(data):
             self.stream.write(chunk)
-            
+
         self.stream.stop_stream()
-    
-    def  stop(self):
+
+    def stop(self):
         if not self.stream.is_stopped():
             self.stream.stop_stream()
         self.stream.close()
         self._p.terminate()
-    
+
     def _chunk_data(self, data):
         # make audio chunks of 100 ms to allow interruption (like ctrl+c)
         chunk_size = int((self.sampling_rate * self.sample_width * self.channels) / 10)
         start = 0
         while start < len(data):
-            yield data[start : start + chunk_size]
+            yield data[start: start + chunk_size]
             start += chunk_size
-        
+
 
 def from_file(filename):
     """
     Create an `AudioSource` object using the audio file specified by `filename`.
     The appropriate :class:`AudioSource` class is guessed from file's extension.
-    
+
     :Parameters:
-    
+
         `filename` :
             path to an audio file.
-        
+
     :Returns:
-    
+
         an `AudioSource` object that reads data from the given file.
-    
     """
-    
+
     if filename.lower().endswith(".wav"):
         return WaveAudioSource(filename)
-    
-    raise Exception("Can not create an AudioSource object from '%s'" %(filename))
+
+    raise Exception("Can not create an AudioSource object from '%s'" % (filename))
 
 
 def player_for(audio_source):
     """
     Return a :class:`PyAudioPlayer` that can play data from `audio_source`.
-    
+
     :Parameters:
-    
+
         `audio_source` : 
             an `AudioSource` object.
-    
+
     :Returns:
-    
+
         `PyAudioPlayer` that has the same sampling rate, sample width and number of channels
         as `audio_source`.
     """
-    
+
     return PyAudioPlayer(audio_source.get_sampling_rate(),
-            audio_source.get_sample_width(),
-            audio_source.get_channels())
-    
-    
-
+                         audio_source.get_sample_width(),
+                         audio_source.get_channels())
--- a/auditok/util.py	Tue Oct 18 21:38:13 2016 +0200
+++ b/auditok/util.py	Sat Jan 28 13:03:37 2017 +0100
@@ -17,7 +17,6 @@
 
 """
 
-
 from abc import ABCMeta, abstractmethod
 import math
 from array import array
@@ -25,24 +24,21 @@
 from .exceptions import DuplicateArgument
 import sys
 
-
 try:
     import numpy
     _WITH_NUMPY = True
 except ImportError as e:
     _WITH_NUMPY = False
-    
+
 try:
     from builtins import str
     basestring = str
 except ImportError as e:
     if sys.version_info >= (3, 0):
         basestring = str
-    
-    
 
 __all__ = ["DataSource", "DataValidator", "StringDataSource", "ADSFactory", "AudioEnergyValidator"]
-    
+
 
 class DataSource():
     """
@@ -50,15 +46,15 @@
     Subclasses should implement a :func:`DataSource.read` method.
     """
     __metaclass__ = ABCMeta
-    
+
     @abstractmethod
     def read(self):
         """
         Read a piece of data read from this source.
         If no more data is available, return None.
         """
-    
-    
+
+
 class DataValidator():
     """
     Base class for a validator object used by :class:`.core.StreamTokenizer` to check
@@ -66,133 +62,130 @@
     Subclasses should implement :func:`is_valid` method.
     """
     __metaclass__ = ABCMeta
-    
+
     @abstractmethod
     def is_valid(self, data):
         """
         Check whether `data` is valid
         """
 
+
 class StringDataSource(DataSource):
     """
     A class that represent a :class:`DataSource` as a string buffer.
     Each call to :func:`DataSource.read` returns on character and moves one step forward.
     If the end of the buffer is reached, :func:`read` returns None.
-   
+
     :Parameters:
-        
+
         `data` : 
             a basestring object.
-     
+
     """
-     
+
     def __init__(self, data):
 
         self._data = None
         self._current = 0
         self.set_data(data)
-        
-    
+
     def read(self):
         """
         Read one character from buffer.
-        
+
         :Returns:
-        
+
             Current character or None if end of buffer is reached
         """
-        
+
         if self._current >= len(self._data):
             return None
         self._current += 1
         return self._data[self._current - 1]
-    
+
     def set_data(self, data):
         """
         Set a new data buffer.
-        
+
         :Parameters:
-        
+
             `data` : a basestring object 
                 New data buffer.
         """
-        
+
         if not isinstance(data, basestring):
             raise ValueError("data must an instance of basestring")
         self._data = data
         self._current = 0
-        
 
 
 class ADSFactory:
     """
     Factory class that makes it easy to create an :class:`ADSFactory.AudioDataSource` object that implements
     :class:`DataSource` and can therefore be passed to :func:`auditok.core.StreamTokenizer.tokenize`.
-    
+
     Whether you read audio data from a file, the microphone or a memory buffer, this factory
     instantiates and returns the right :class:`ADSFactory.AudioDataSource` object.
-    
+
     There are many other features you want your :class:`ADSFactory.AudioDataSource` object to have, such as: 
     memorize all read audio data so that you can rewind and reuse it (especially useful when 
     reading data from the microphone), read a fixed amount of data (also useful when reading 
     from the microphone), read overlapping audio frames (often needed when dosing a spectral
     analysis of data).
-    
+
     :func:`ADSFactory.ads` automatically creates and return object with the desired behavior according
     to the supplied keyword arguments. 
-     
     """
-    
+
     @staticmethod
     def _check_normalize_args(kwargs):
-        
+
         for k in kwargs:
             if not k in ["block_dur", "hop_dur", "block_size", "hop_size", "max_time", "record",
                          "audio_source", "filename", "data_buffer", "frames_per_buffer", "sampling_rate",
                          "sample_width", "channels", "sr", "sw", "ch", "asrc", "fn", "fpb", "db", "mt",
                          "rec", "bd", "hd", "bs", "hs"]:
                 raise ValueError("Invalid argument: {0}".format(k))
-        
+
         if "block_dur" in kwargs and "bd" in kwargs:
             raise DuplicateArgument("Either 'block_dur' or 'bd' must be specified, not both")
-        
+
         if "hop_dur" in kwargs and "hd" in kwargs:
             raise DuplicateArgument("Either 'hop_dur' or 'hd' must be specified, not both")
-        
+
         if "block_size" in kwargs and "bs" in kwargs:
             raise DuplicateArgument("Either 'block_size' or 'bs' must be specified, not both")
-        
+
         if "hop_size" in kwargs and "hs" in kwargs:
             raise DuplicateArgument("Either 'hop_size' or 'hs' must be specified, not both")
-        
+
         if "max_time" in kwargs and "mt" in kwargs:
             raise DuplicateArgument("Either 'max_time' or 'mt' must be specified, not both")
-        
+
         if "audio_source" in kwargs and "asrc" in kwargs:
             raise DuplicateArgument("Either 'audio_source' or 'asrc' must be specified, not both")
-        
+
         if "filename" in kwargs and "fn" in kwargs:
             raise DuplicateArgument("Either 'filename' or 'fn' must be specified, not both")
-        
+
         if "data_buffer" in kwargs and "db" in kwargs:
             raise DuplicateArgument("Either 'filename' or 'db' must be specified, not both")
-        
+
         if "frames_per_buffer" in kwargs and "fbb" in kwargs:
             raise DuplicateArgument("Either 'frames_per_buffer' or 'fpb' must be specified, not both")
-        
+
         if "sampling_rate" in kwargs and "sr" in kwargs:
             raise DuplicateArgument("Either 'sampling_rate' or 'sr' must be specified, not both")
-        
+
         if "sample_width" in kwargs and "sw" in kwargs:
             raise DuplicateArgument("Either 'sample_width' or 'sw' must be specified, not both")
-        
+
         if "channels" in kwargs and "ch" in kwargs:
             raise DuplicateArgument("Either 'channels' or 'ch' must be specified, not both")
-        
+
         if "record" in kwargs and "rec" in kwargs:
             raise DuplicateArgument("Either 'record' or 'rec' must be specified, not both")
-        
-        
+
         kwargs["bd"] = kwargs.pop("block_dur", None) or kwargs.pop("bd", None)
         kwargs["hd"] = kwargs.pop("hop_dur", None) or kwargs.pop("hd", None)
         kwargs["bs"] = kwargs.pop("block_size", None) or kwargs.pop("bs", None)
@@ -201,114 +194,106 @@
         kwargs["asrc"] = kwargs.pop("audio_source", None) or kwargs.pop("asrc", None)
         kwargs["fn"] = kwargs.pop("filename", None) or kwargs.pop("fn", None)
         kwargs["db"] = kwargs.pop("data_buffer", None) or kwargs.pop("db", None)
-        
+
         record = kwargs.pop("record", False)
         if not record:
             record = kwargs.pop("rec", False)
             if not isinstance(record, bool):
                 raise TypeError("'record' must be a boolean")
-            
+
         kwargs["rec"] = record
-        
+
         # keep long names for arguments meant for BufferAudioSource and PyAudioSource
         if "frames_per_buffer" in kwargs or "fpb" in kwargs:
             kwargs["frames_per_buffer"] = kwargs.pop("frames_per_buffer", None) or kwargs.pop("fpb", None)
-        
+
         if "sampling_rate" in kwargs or "sr" in kwargs:
             kwargs["sampling_rate"] = kwargs.pop("sampling_rate", None) or kwargs.pop("sr", None)
-        
-        if "sample_width" in kwargs or "sw" in kwargs:    
+
+        if "sample_width" in kwargs or "sw" in kwargs:
             kwargs["sample_width"] = kwargs.pop("sample_width", None) or kwargs.pop("sw", None)
-        
+
         if "channels" in kwargs or "ch" in kwargs:
             kwargs["channels"] = kwargs.pop("channels", None) or kwargs.pop("ch", None)
-        
-        
-        
-        
-            
-            
-    
+
     @staticmethod
     def ads(**kwargs):
-        
         """
         Create an return an :class:`ADSFactory.AudioDataSource`. The type and behavior of the object is the result
         of the supplied parameters.
-        
+
         :Parameters:
-        
+
         *No parameters* :  
            read audio data from the available built-in microphone with the default parameters.
            The returned :class:`ADSFactory.AudioDataSource` encapsulate an :class:`io.PyAudioSource` object and hence 
            it accepts the next four parameters are passed to use instead of their default values.
-        
+
         `sampling_rate`, `sr` : *(int)*
             number of samples per second. Default = 16000.
-        
+
         `sample_width`, `sw` : *(int)*
             number of bytes per sample (must be in (1, 2, 4)). Default = 2
-        
+
         `channels`, `ch` : *(int)*
             number of audio channels. Default = 1 (only this value is currently accepted)  
-            
+
         `frames_per_buffer`, `fpb` : *(int)*
             number of samples of PyAudio buffer. Default = 1024.
-        
+
         `audio_source`, `asrc` : an `AudioSource` object
             read data from this audio source
-            
+
         `filename`, `fn` : *(string)*
             build an `io.AudioSource` object using this file (currently only wave format is supported)
-            
+
         `data_buffer`, `db` : *(string)*
             build an `io.BufferAudioSource` using data in `data_buffer`. If this keyword is used,
             `sampling_rate`, `sample_width` and `channels` are passed to `io.BufferAudioSource`
             constructor and used instead of default values.
-            
+
         `max_time`, `mt` : *(float)*
             maximum time (in seconds) to read. Default behavior: read until there is no more data
             available. 
-         
+
         `record`, `rec` : *(bool)*
             save all read data in cache. Provide a navigable object which boasts a `rewind` method.
             Default = False.
-        
+
         `block_dur`, `bd` : *(float)*
             processing block duration in seconds. This represents the quantity of audio data to return 
             each time the :func:`read` method is invoked. If `block_dur` is 0.025 (i.e. 25 ms) and the sampling
             rate is 8000 and the sample width is 2 bytes, :func:`read` returns a buffer of 0.025 * 8000 * 2 = 400
             bytes at most. This parameter will be looked for (and used if available) before `block_size`.
             If neither parameter is given, `block_dur` will be set to 0.01 second (i.e. 10 ms)
-            
-            
+
         `hop_dur`, `hd` : *(float)*
             quantity of data to skip from current processing window. if `hop_dur` is supplied then there
             will be an overlap of `block_dur` - `hop_dur` between two adjacent blocks. This
             parameter will be looked for (and used if available) before `hop_size`. If neither parameter
             is given, `hop_dur` will be set to `block_dur` which means that there will be no overlap
             between two consecutively read blocks.
-             
+
         `block_size`, `bs` : *(int)*
             number of samples to read each time the `read` method is called. Default: a block size
             that represents a window of 10ms, so for a sampling rate of 16000, the default `block_size`
             is 160 samples, for a rate of 44100, `block_size` = 441 samples, etc.
-        
+
         `hop_size`, `hs` : *(int)*
             determines the number of overlapping samples between two adjacent read windows. For a
             `hop_size` of value *N*, the overlap is `block_size` - *N*. Default : `hop_size` = `block_size`,
             means that there is no overlap.
-            
+
         :Returns:
-        
+
         An AudioDataSource object that has the desired features.
-        
+
         :Exampels:
-        
+
         1. **Create an AudioDataSource that reads data from the microphone (requires Pyaudio) with default audio parameters:**
-        
+
         .. code:: python
-        
+
             from auditok import ADSFactory
             ads = ADSFactory.ads()
             ads.get_sampling_rate()
@@ -317,21 +302,20 @@
             2
             ads.get_channels()
             1
-        
-        
+
         2. **Create an AudioDataSource that reads data from the microphone with a sampling rate of 48KHz:**
-        
+
         .. code:: python
-        
+
             from auditok import ADSFactory
             ads = ADSFactory.ads(sr=48000)
             ads.get_sampling_rate()
             48000
-        
+
         3. **Create an AudioDataSource that reads data from a wave file:**
-        
+
         .. code:: python
-        
+
             import auditok
             from auditok import ADSFactory
             ads = ADSFactory.ads(fn=auditok.dataset.was_der_mensch_saet_mono_44100_lead_trail_silence)
@@ -341,11 +325,11 @@
             2
             ads.get_channels()
             1
-        
+
         4. **Define size of read blocks as 20 ms**
-        
+
         .. code:: python
-        
+
             import auditok
             from auditok import ADSFactory
             '''
@@ -361,11 +345,11 @@
             len(data)
             1764
             assert len(data) ==  ads.get_sample_width() * block_size
-        
+
         5. **Define block size as a duration (use block_dur or bd):**
-        
+
         .. code:: python
-        
+
             import auditok
             from auditok import ADSFactory
             dur = 0.25 # second
@@ -384,9 +368,9 @@
             len(data)
             22050
             assert len(data) ==  ads.get_sample_width() * ads.get_block_size()
-            
+
         6. **Read overlapping blocks (one of hope_size, hs, hop_dur or hd > 0):**
-        
+
         For better readability we'd better use :class:`auditok.io.BufferAudioSource` with a string buffer:
 
         .. code:: python
@@ -414,11 +398,11 @@
             'ghij'
             data = ads.read()
             assert data == 'ijkl'
-        
+
         7. **Limit amount of read data (use max_time or mt):**
-        
+
         .. code:: python
-        
+
             '''
             We know audio file is larger than 2.25 seconds
             We want to read up to 2.25 seconds of audio data
@@ -431,18 +415,18 @@
                 if d is None:
                     break
                 data.append(d)
-                
+
             ads.close()
             data = b''.join(data)
             assert len(data) == int(ads.get_sampling_rate() * 2.25 * ads.get_sample_width() * ads.get_channels())
         """
-        
+
         # copy user's dicionary (shallow copy)
         kwargs = kwargs.copy()
-        
+
         # check and normalize keyword arguments
         ADSFactory._check_normalize_args(kwargs)
-        
+
         block_dur = kwargs.pop("bd")
         hop_dur = kwargs.pop("hd")
         block_size = kwargs.pop("bs")
@@ -452,29 +436,28 @@
         filename = kwargs.pop("fn")
         data_buffer = kwargs.pop("db")
         record = kwargs.pop("rec")
-        
+
         # Case 1: an audio source is supplied
         if audio_source is not None:
             if (filename, data_buffer) != (None, None):
                 raise Warning("You should provide one of 'audio_source', 'filename' or 'data_buffer'\
                  keyword parameters. 'audio_source' will be used")
-            
+
         # Case 2: a file name is supplied
         elif filename is not None:
             if data_buffer is not None:
                 raise Warning("You should provide one of 'filename' or 'data_buffer'\
                  keyword parameters. 'filename' will be used")
             audio_source = from_file(filename)
-            
-        # Case 3: a data_buffer is supplied 
+
+        # Case 3: a data_buffer is supplied
         elif data_buffer is not None:
-            audio_source = BufferAudioSource(data_buffer = data_buffer, **kwargs)
-            
+            audio_source = BufferAudioSource(data_buffer=data_buffer, **kwargs)
+
         # Case 4: try to access native audio input
         else:
             audio_source = PyAudioSource(**kwargs)
-             
-             
+
         if block_dur is not None:
             if block_size is not None:
                 raise DuplicateArgument("Either 'block_dur' or 'block_size' can be specified, not both")
@@ -484,100 +467,94 @@
             # Set default block_size to 10 ms
             block_size = int(audio_source.get_sampling_rate() / 100)
 
-        # Instantiate base AudioDataSource  
+        # Instantiate base AudioDataSource
         ads = ADSFactory.AudioDataSource(audio_source=audio_source, block_size=block_size)
-        
+
         # Limit data to be read
         if max_time is not None:
             ads = ADSFactory.LimiterADS(ads=ads, max_time=max_time)
-        
+
         # Record, rewind and reuse data
         if record:
             ads = ADSFactory.RecorderADS(ads=ads)
-            
+
         # Read overlapping blocks of data
         if hop_dur is not None:
             if hop_size is not None:
                 raise DuplicateArgument("Either 'hop_dur' or 'hop_size' can be specified, not both")
             else:
                 hop_size = int(audio_source.get_sampling_rate() * hop_dur)
-            
+
         if hop_size is not None:
-            if hop_size <= 0 or  hop_size > block_size:
+            if hop_size <= 0 or hop_size > block_size:
                 raise ValueError("hop_size must be > 0 and <= block_size")
             if hop_size < block_size:
                 ads = ADSFactory.OverlapADS(ads=ads, hop_size=hop_size)
-        
+
         return ads
-        
-        
+
     class AudioDataSource(DataSource):
         """
         Base class for AudioDataSource objects.
         It inherits from DataSource and encapsulates an AudioSource object.
         """
-        
+
         def __init__(self, audio_source, block_size):
-            
+
             self.audio_source = audio_source
             self.block_size = block_size
-                
+
         def get_block_size(self):
             return self.block_size
-        
+
         def set_block_size(self, size):
             self.block_size = size
 
         def get_audio_source(self):
             return self.audio_source
-        
+
         def set_audio_source(self, audio_source):
             self.audio_source = audio_source
-            
+
         def open(self):
             self.audio_source.open()
-        
+
         def close(self):
             self.audio_source.close()
-            
+
         def is_open(self):
             return self.audio_source.is_open()
-        
+
         def get_sampling_rate(self):
             return self.audio_source.get_sampling_rate()
-        
+
         def get_sample_width(self):
             return self.audio_source.get_sample_width()
-        
+
         def get_channels(self):
             return self.audio_source.get_channels()
-        
-        
+
         def rewind(self):
             if isinstance(self.audio_source, Rewindable):
                 self.audio_source.rewind()
             else:
                 raise Exception("Audio source is not rewindable")
-            
-            
-        
+
         def is_rewindable(self):
             return isinstance(self.audio_source, Rewindable)
-        
-            
+
         def read(self):
             return self.audio_source.read(self.block_size)
 
-
     class ADSDecorator(AudioDataSource):
         """
         Base decorator class for AudioDataSource objects.
         """
         __metaclass__ = ABCMeta
-        
+
         def __init__(self, ads):
             self.ads = ads
-            
+
             self.get_block_size = self.ads.get_block_size
             self.set_block_size = self.ads.set_block_size
             self.get_audio_source = self.ads.get_audio_source
@@ -587,70 +564,68 @@
             self.get_sampling_rate = self.ads.get_sampling_rate
             self.get_sample_width = self.ads.get_sample_width
             self.get_channels = self.ads.get_channels
-        
+
         def is_rewindable(self):
             return self.ads.is_rewindable
-            
+
         def rewind(self):
             self.ads.rewind()
             self._reinit()
-            
+
         def set_audio_source(self, audio_source):
             self.ads.set_audio_source(audio_source)
             self._reinit()
-        
+
         def open(self):
             if not self.ads.is_open():
                 self.ads.open()
                 self._reinit()
-            
+
         @abstractmethod
         def _reinit(self):
-            pass            
-        
-        
+            pass
+
     class OverlapADS(ADSDecorator):
         """
-        A class for AudioDataSource objects that can read and return overlapping audio frames
+        A class for AudioDataSource objects that can read and return overlapping
+        audio frames
         """
-        
+
         def __init__(self, ads, hop_size):
             ADSFactory.ADSDecorator.__init__(self, ads)
-            
+
             if hop_size <= 0 or hop_size > self.get_block_size():
                 raise ValueError("hop_size must be either 'None' or \
                  between 1 and block_size (both inclusive)")
             self.hop_size = hop_size
             self._actual_block_size = self.get_block_size()
             self._reinit()
-            
-            
+
             def _get_block_size():
                 return self._actual_block_size
-            
-            
+
         def _read_first_block(self):
             # For the first call, we need an entire block of size 'block_size'
             block = self.ads.read()
             if block is None:
                 return None
-            
+
             # Keep a slice of data in cache and append it in the next call
             if len(block) > self._hop_size_bytes:
                 self._cache = block[self._hop_size_bytes:]
-            
+
             # Up from the next call, we will use '_read_next_blocks'
             # and we only read 'hop_size'
             self.ads.set_block_size(self.hop_size)
             self.read = self._read_next_blocks
-            
+
             return block
-                
+
         def _read_next_blocks(self):
             block = self.ads.read()
             if block is None:
                 return None
-            
+
             # Append block to cache data to ensure overlap
             block = self._cache + block
             # Keep a slice of data in cache only if we have a full length block
@@ -659,82 +634,76 @@
                 self._cache = block[self._hop_size_bytes:]
             else:
                 self._cache = None
-                
+
             return block
 
         def read(self):
             pass
-        
+
         def _reinit(self):
             self._cache = None
             self.ads.set_block_size(self._actual_block_size)
             self._hop_size_bytes = self.hop_size * \
-                               self.get_sample_width() * \
-                               self.get_channels()
+                self.get_sample_width() * \
+                self.get_channels()
             self._block_size_bytes = self.get_block_size() * \
-                               self.get_sample_width() * \
-                               self.get_channels()
+                self.get_sample_width() * \
+                self.get_channels()
             self.read = self._read_first_block
 
-
-
     class LimiterADS(ADSDecorator):
         """
         A class for AudioDataSource objects that can read a fixed amount of data.
         This can be useful when reading data from the microphone or from large audio files.
         """
-        
+
         def __init__(self, ads, max_time):
             ADSFactory.ADSDecorator.__init__(self, ads)
-            
+
             self.max_time = max_time
             self._reinit()
-            
+
         def read(self):
-            if self._total_read_bytes >=  self._max_read_bytes:
+            if self._total_read_bytes >= self._max_read_bytes:
                 return None
             block = self.ads.read()
             if block is None:
                 return None
             self._total_read_bytes += len(block)
-            
-            if self._total_read_bytes >=  self._max_read_bytes:
+
+            if self._total_read_bytes >= self._max_read_bytes:
                 self.close()
-            
+
             return block
-                
-                
+
         def _reinit(self):
             self._max_read_bytes = int(self.max_time  * self.get_sampling_rate()) * \
-                                  self.get_sample_width() * \
-                                  self.get_channels()
+                self.get_sample_width() * \
+                self.get_channels()
             self._total_read_bytes = 0
 
-            
-
     class RecorderADS(ADSDecorator):
         """
         A class for AudioDataSource objects that can record all audio data they read,
         with a rewind facility.
         """
-        
+
         def __init__(self, ads):
             ADSFactory.ADSDecorator.__init__(self, ads)
-            
+
             self._reinit()
-            
+
         def read(self):
             pass
-        
+
         def _read_and_rec(self):
             # Read and save read data
             block = self.ads.read()
             if block is not None:
                 self._cache.append(block)
-            
+
             return block
-            
-            
+
         def _read_simple(self):
             # Read without recording
             return self.ads.read()
@@ -745,31 +714,29 @@
                 # from recorded data
                 dbuffer = self._concatenate(self._cache)
                 asource = BufferAudioSource(dbuffer, self.get_sampling_rate(),
-                                             self.get_sample_width(),
-                                             self.get_channels())
-                
-                
+                                            self.get_sample_width(),
+                                            self.get_channels())
+
                 self.set_audio_source(asource)
                 self.open()
                 self._cache = []
                 self._record = False
                 self.read = self._read_simple
-            
+
             else:
                 self.ads.rewind()
                 if not self.is_open():
                     self.open()
-                    
-        
+
         def is_rewindable(self):
             return True
-        
+
         def _reinit(self):
             # when audio_source is replaced, start recording again
             self._record = True
             self._cache = []
             self.read = self._read_and_rec
-        
+
         def _concatenate(self, data):
             try:
                 # should always work for python 2
@@ -787,96 +754,90 @@
     This validator computes the log energy of an input audio frame
     and return True if the result is >= a given threshold, False 
     otherwise.
-    
+
     :Parameters:
-    
+
     `sample_width` : *(int)*
         Number of bytes of one audio sample. This is used to convert data from `basestring` or `Bytes` to
         an array of floats.
-        
+
     `energy_threshold` : *(float)*
         A threshold used to check whether an input data buffer is valid.
     """
-    
-    
+
     if _WITH_NUMPY:
-        
-        _formats = {1: numpy.int8 , 2: numpy.int16, 4: numpy.int32}
+        _formats = {1: numpy.int8, 2: numpy.int16, 4: numpy.int32}
 
         @staticmethod
         def _convert(signal, sample_width):
-            return numpy.array(numpy.frombuffer(signal, dtype=AudioEnergyValidator._formats[sample_width]), dtype=numpy.float64)                             
-            
+            return numpy.array(numpy.frombuffer(signal, dtype=AudioEnergyValidator._formats[sample_width]),
+                               dtype=numpy.float64)
+
         @staticmethod
         def _signal_energy(signal):
             return float(numpy.dot(signal, signal)) / len(signal)
-        
-        @staticmethod    
+
+        @staticmethod
         def _signal_log_energy(signal):
             energy = AudioEnergyValidator._signal_energy(signal)
             if energy <= 0:
                 return -200
             return 10. * numpy.log10(energy)
-        
+
     else:
-        
-        
-        _formats = {1: 'b' , 2: 'h', 4: 'i'}
-        
+        _formats = {1: 'b', 2: 'h', 4: 'i'}
+
         @staticmethod
         def _convert(signal, sample_width):
             return array("d", array(AudioEnergyValidator._formats[sample_width], signal))
-        
+
         @staticmethod
         def _signal_energy(signal):
             energy = 0.
             for a in signal:
                 energy += a * a
             return energy / len(signal)
-        
-        @staticmethod    
+
+        @staticmethod
         def _signal_log_energy(signal):
             energy = AudioEnergyValidator._signal_energy(signal)
             if energy <= 0:
                 return -200
             return 10. * math.log10(energy)
-            
-    
+
     def __init__(self, sample_width, energy_threshold=45):
         self.sample_width = sample_width
         self._energy_threshold = energy_threshold
-        
-            
+
     def is_valid(self, data):
         """
         Check if data is valid. Audio data will be converted into an array (of
         signed values) of which the log energy is computed. Log energy is computed
         as follows:
-        
+
         .. code:: python
-        
+
             arr = AudioEnergyValidator._convert(signal, sample_width)
             energy = float(numpy.dot(arr, arr)) / len(arr)
             log_energy = 10. * numpy.log10(energy)
-        
-        
+
+
         :Parameters:
-        
+
         `data` : either a *string* or a *Bytes* buffer
             `data` is converted into a numerical array using the `sample_width`
             given in the constructor.
-        
+
         :Returns:
-        
+
         True if `log_energy` >= `energy_threshold`, False otherwise.
         """
-        
+
         signal = AudioEnergyValidator._convert(data, self.sample_width)
         return AudioEnergyValidator._signal_log_energy(signal) >= self._energy_threshold
-    
+
     def get_energy_threshold(self):
         return self._energy_threshold
-    
+
     def set_energy_threshold(self, threshold):
         self._energy_threshold = threshold
-