changeset 297:7259b1eb9329

Refactor StreamTokenizer - Remove unused code - accept a callable validator - update doc
author Amine Sehili <amine.sehili@gmail.com>
date Tue, 08 Oct 2019 19:48:13 +0100
parents 5af0974b3446
children d5cbf4fc1416
files auditok/core.py tests/test_StreamTokenizer.py
diffstat 2 files changed, 879 insertions(+), 392 deletions(-) [+]
line wrap: on
line diff
--- a/auditok/core.py	Mon Oct 07 20:58:23 2019 +0100
+++ b/auditok/core.py	Tue Oct 08 19:48:13 2019 +0100
@@ -129,9 +129,7 @@
             params["channels"] = input.ch
             input = bytes(input)
         try:
-            source = AudioReader(
-                input, block_dur=analysis_window, **params
-            )
+            source = AudioReader(input, block_dur=analysis_window, **params)
         except TooSamllBlockDuration as exc:
             err_msg = "Too small 'analysis_windows' ({0}) for sampling rate "
             err_msg += "({1}). Analysis windows should at least be 1/{1} to "
@@ -763,7 +761,8 @@
     :Parameters:
 
         `validator` :
-            instance of `DataValidator` that implements `is_valid` method.
+            Callable or an instance of DataValidator that implements
+            `is_valid` method. 
 
         `min_length` : *(int)*
             Minimum number of frames of a valid token. This includes all \
@@ -795,7 +794,11 @@
         `mode` : *(int, default=0)*
             `mode` can be:
 
-        1. `StreamTokenizer.STRICT_MIN_LENGTH`: 
+        1. `StreamTokenizer.NORMAL`:
+        Do not drop trailing silence, and accept a token shorter than 
+        `min_length` if it is the continuation of the latest delivered token.
+        
+        2. `StreamTokenizer.STRICT_MIN_LENGTH`: 
         if token *i* is delivered because `max_length`
         is reached, and token *i+1* is immediately adjacent to
         token *i* (i.e. token *i* ends at frame *k* and token *i+1* starts
@@ -852,7 +855,7 @@
             [(['A', 'A', 'A', 'A'], 3, 6)]
 
 
-        2. `StreamTokenizer.DROP_TRAILING_SILENCE`: drop all tailing non-valid frames
+        3. `StreamTokenizer.DROP_TRAILING_SILENCE`: drop all tailing non-valid frames
         from a token to be delivered if and only if it is not **truncated**.
         This can be a bit tricky. A token is actually delivered if:
 
@@ -898,7 +901,7 @@
             [(['A', 'A', 'A', 'a', 'a', 'a'], 3, 8), (['B', 'B', 'b', 'b', 'b'], 9, 13)]
 
 
-        3. `StreamTokenizer.STRICT_MIN_LENGTH | StreamTokenizer.DROP_TRAILING_SILENCE`:
+        4. `StreamTokenizer.STRICT_MIN_LENGTH | StreamTokenizer.DROP_TRAILING_SILENCE`:
         use both options. That means: first remove tailing silence, then ckeck if the
         token still has at least a length of `min_length`.
     """
@@ -907,11 +910,9 @@
     POSSIBLE_SILENCE = 1
     POSSIBLE_NOISE = 2
     NOISE = 3
-
+    NORMAL = 0
     STRICT_MIN_LENGTH = 2
     DROP_TRAILING_SILENCE = 4
-    # alias
-    DROP_TAILING_SILENCE = 4
 
     def __init__(
         self,
@@ -923,10 +924,13 @@
         init_max_silence=0,
         mode=0,
     ):
-
-        if not isinstance(validator, DataValidator):
+        if callable(validator):
+            self._is_valid = validator
+        elif isinstance(validator, DataValidator):
+            self._is_valid = validator.is_valid
+        else:
             raise TypeError(
-                "'validator' must be an instance of 'DataValidator'"
+                "'validator' must be a callable or an instance of DataValidator"
             )
 
         if max_length <= 0:
@@ -961,67 +965,30 @@
         self.max_continuous_silence = max_continuous_silence
         self.init_min = init_min
         self.init_max_silent = init_max_silence
-
-        self._mode = None
-        self.set_mode(mode)
-        self._strict_min_length = (mode & self.STRICT_MIN_LENGTH) != 0
-        self._drop_tailing_silence = (mode & self.DROP_TRAILING_SILENCE) != 0
-
+        self._set_mode(mode)
         self._deliver = None
         self._tokens = None
         self._state = None
         self._data = None
         self._contiguous_token = False
-
         self._init_count = 0
         self._silence_length = 0
         self._start_frame = 0
         self._current_frame = 0
 
-    def set_mode(self, mode):
-        # TODO: use properties and make these deprecated
-        """
-        :Parameters:
-
-            `mode` : *(int)*
-                New mode, must be one of:
-
-
-            - `StreamTokenizer.STRICT_MIN_LENGTH`
-
-            - `StreamTokenizer.DROP_TRAILING_SILENCE`
-
-            - `StreamTokenizer.STRICT_MIN_LENGTH | StreamTokenizer.DROP_TRAILING_SILENCE`
-
-            - `0` TODO: this mode should have a name
-
-        See `StreamTokenizer.__init__` for more information about the mode.
-        """
-
+    def _set_mode(self, mode):
+        strict_min_and_drop_trailing = StreamTokenizer.STRICT_MIN_LENGTH
+        strict_min_and_drop_trailing |= StreamTokenizer.DROP_TRAILING_SILENCE
         if not mode in [
-            self.STRICT_MIN_LENGTH,
-            self.DROP_TRAILING_SILENCE,
-            self.STRICT_MIN_LENGTH | self.DROP_TRAILING_SILENCE,
-            0,
+            StreamTokenizer.NORMAL,
+            StreamTokenizer.STRICT_MIN_LENGTH,
+            StreamTokenizer.DROP_TRAILING_SILENCE,
+            strict_min_and_drop_trailing,
         ]:
-
             raise ValueError("Wrong value for mode")
-
         self._mode = mode
         self._strict_min_length = (mode & self.STRICT_MIN_LENGTH) != 0
-        self._drop_tailing_silence = (mode & self.DROP_TRAILING_SILENCE) != 0
-
-    def get_mode(self):
-        """
-        Return the current mode. To check whether a specific mode is activated use
-        the bitwise 'and' operator `&`. Example:
-
-        .. code:: python 
-
-            if mode & self.STRICT_MIN_LENGTH != 0:
-               do_something()
-        """
-        return self._mode
+        self._drop_trailing_silence = (mode & self.DROP_TRAILING_SILENCE) != 0
 
     def _reinitialize(self):
         self._contiguous_token = False
@@ -1056,7 +1023,6 @@
 
            where `data` is a list of read frames, `start`: index of the first frame in the
            original data and `end` : index of the last frame. 
-
         """
         token_gen = self._iter_tokens(data_source)
         if callback:
@@ -1083,7 +1049,7 @@
 
     def _process(self, frame):
 
-        frame_is_valid = self.validator.is_valid(frame)
+        frame_is_valid = self._is_valid(frame)
 
         if self._state == self.SILENCE:
 
@@ -1182,7 +1148,7 @@
 
         if (
             not truncated
-            and self._drop_tailing_silence
+            and self._drop_trailing_silence
             and self._silence_length > 0
         ):
             # happens if max_continuous_silence is reached
--- a/tests/test_StreamTokenizer.py	Mon Oct 07 20:58:23 2019 +0100
+++ b/tests/test_StreamTokenizer.py	Tue Oct 08 19:48:13 2019 +0100
@@ -1,500 +1,1021 @@
-'''
+"""
 @author: Amine Sehili <amine.sehili@gmail.com>
 September 2015
 
-'''
+"""
 
 import unittest
 from auditok import StreamTokenizer, StringDataSource, DataValidator
 
 
 class AValidator(DataValidator):
-    
     def is_valid(self, frame):
         return frame == "A"
 
 
 class TestStreamTokenizerInitParams(unittest.TestCase):
-    
-    
     def setUp(self):
         self.A_validator = AValidator()
-        
+
     # Completely deactivate init_min and init_max_silence
     # The tokenizer will only rely on the other parameters
     # Note that if init_min = 0, the value of init_max_silence
     # will have no effect
     def test_init_min_0_init_max_silence_0(self):
-        
-        tokenizer = StreamTokenizer(self.A_validator, min_length = 5, max_length=20,
-                                     max_continuous_silence=4, init_min = 0,
-                                     init_max_silence = 0, mode=0)
-        
-        
+
+        tokenizer = StreamTokenizer(
+            self.A_validator,
+            min_length=5,
+            max_length=20,
+            max_continuous_silence=4,
+            init_min=0,
+            init_max_silence=0,
+            mode=0,
+        )
+
         data_source = StringDataSource("aAaaaAaAaaAaAaaaaaaaAAAAAAAA")
         #                                ^              ^   ^      ^
         #                                2              16  20     27
         tokens = tokenizer.tokenize(data_source)
-                
-        self.assertEqual(len(tokens), 2, msg="wrong number of tokens, expected: 2, found: {0} ".format(len(tokens)))
+
+        self.assertEqual(
+            len(tokens),
+            2,
+            msg="wrong number of tokens, expected: 2, found: {0} ".format(
+                len(tokens)
+            ),
+        )
         tok1, tok2 = tokens[0], tokens[1]
-        
+
         # tok1[0]: data
         # tok1[1]: start frame (included)
         # tok1[2]: end frame (included)
-        
-        data = ''.join(tok1[0])
+
+        data = "".join(tok1[0])
         start = tok1[1]
         end = tok1[2]
-        self.assertEqual(data, "AaaaAaAaaAaAaaaa",
-                        msg="wrong data for token 1, expected: 'AaaaAaAaaAaAaaaa', found: {0} ".format(data))
-        self.assertEqual(start, 1, msg="wrong start frame for token 1, expected: 1, found: {0} ".format(start))
-        self.assertEqual(end, 16, msg="wrong end frame for token 1, expected: 16, found: {0} ".format(end))
-        
-        
-        data = ''.join(tok2[0])
+        self.assertEqual(
+            data,
+            "AaaaAaAaaAaAaaaa",
+            msg="wrong data for token 1, expected: 'AaaaAaAaaAaAaaaa', found: {0} ".format(
+                data
+            ),
+        )
+        self.assertEqual(
+            start,
+            1,
+            msg="wrong start frame for token 1, expected: 1, found: {0} ".format(
+                start
+            ),
+        )
+        self.assertEqual(
+            end,
+            16,
+            msg="wrong end frame for token 1, expected: 16, found: {0} ".format(
+                end
+            ),
+        )
+
+        data = "".join(tok2[0])
         start = tok2[1]
         end = tok2[2]
-        self.assertEqual(data, "AAAAAAAA",
-                        msg="wrong data for token 1, expected: 'AAAAAAAA', found: {0} ".format(data))
-        self.assertEqual(start, 20, msg="wrong start frame for token 2, expected: 20, found: {0} ".format(start))
-        self.assertEqual(end, 27, msg="wrong end frame for token 2, expected: 27, found: {0} ".format(end))
-    
-    
-        
+        self.assertEqual(
+            data,
+            "AAAAAAAA",
+            msg="wrong data for token 1, expected: 'AAAAAAAA', found: {0} ".format(
+                data
+            ),
+        )
+        self.assertEqual(
+            start,
+            20,
+            msg="wrong start frame for token 2, expected: 20, found: {0} ".format(
+                start
+            ),
+        )
+        self.assertEqual(
+            end,
+            27,
+            msg="wrong end frame for token 2, expected: 27, found: {0} ".format(
+                end
+            ),
+        )
+
     # A valid token is considered as so iff the tokenizer encounters
     # at least valid frames (init_min = 3) between witch there
     # are at most 0 consecutive non valid frames (init_max_silence = 0)
     # The tokenizer will only rely on the other parameters
     # In other words, a valid token must start with 3 valid frames
     def test_init_min_3_init_max_silence_0(self):
-        
-        tokenizer = StreamTokenizer(self.A_validator, min_length = 5, max_length=20,
-                                     max_continuous_silence=4, init_min = 3,
-                                     init_max_silence = 0, mode=0)
-        
-        
-        
-        data_source = StringDataSource("aAaaaAaAaaAaAaaaaaAAAAAAAAAaaaaaaAAAAA")
+
+        tokenizer = StreamTokenizer(
+            self.A_validator,
+            min_length=5,
+            max_length=20,
+            max_continuous_silence=4,
+            init_min=3,
+            init_max_silence=0,
+            mode=0,
+        )
+
+        data_source = StringDataSource(
+            "aAaaaAaAaaAaAaaaaaAAAAAAAAAaaaaaaAAAAA"
+        )
         #                                                 ^           ^  ^   ^
         #                                                 18          30 33  37
-        
+
         tokens = tokenizer.tokenize(data_source)
-                
-        self.assertEqual(len(tokens), 2, msg="wrong number of tokens, expected: 2, found: {0} ".format(len(tokens)))
+
+        self.assertEqual(
+            len(tokens),
+            2,
+            msg="wrong number of tokens, expected: 2, found: {0} ".format(
+                len(tokens)
+            ),
+        )
         tok1, tok2 = tokens[0], tokens[1]
-        
-        data = ''.join(tok1[0])
+
+        data = "".join(tok1[0])
         start = tok1[1]
         end = tok1[2]
-        self.assertEqual(data, "AAAAAAAAAaaaa",
-                        msg="wrong data for token 1, expected: 'AAAAAAAAAaaaa', found: '{0}' ".format(data))
-        self.assertEqual(start, 18, msg="wrong start frame for token 1, expected: 18, found: {0} ".format(start))
-        self.assertEqual(end, 30, msg="wrong end frame for token 1, expected: 30, found: {0} ".format(end))
-        
-        
-        data = ''.join(tok2[0])
+        self.assertEqual(
+            data,
+            "AAAAAAAAAaaaa",
+            msg="wrong data for token 1, expected: 'AAAAAAAAAaaaa', found: '{0}' ".format(
+                data
+            ),
+        )
+        self.assertEqual(
+            start,
+            18,
+            msg="wrong start frame for token 1, expected: 18, found: {0} ".format(
+                start
+            ),
+        )
+        self.assertEqual(
+            end,
+            30,
+            msg="wrong end frame for token 1, expected: 30, found: {0} ".format(
+                end
+            ),
+        )
+
+        data = "".join(tok2[0])
         start = tok2[1]
         end = tok2[2]
-        self.assertEqual(data, "AAAAA",
-                        msg="wrong data for token 1, expected: 'AAAAA', found: '{0}' ".format(data))
-        self.assertEqual(start, 33, msg="wrong start frame for token 2, expected: 33, found: {0} ".format(start))
-        self.assertEqual(end, 37, msg="wrong end frame for token 2, expected: 37, found: {0} ".format(end))
-        
-    
+        self.assertEqual(
+            data,
+            "AAAAA",
+            msg="wrong data for token 1, expected: 'AAAAA', found: '{0}' ".format(
+                data
+            ),
+        )
+        self.assertEqual(
+            start,
+            33,
+            msg="wrong start frame for token 2, expected: 33, found: {0} ".format(
+                start
+            ),
+        )
+        self.assertEqual(
+            end,
+            37,
+            msg="wrong end frame for token 2, expected: 37, found: {0} ".format(
+                end
+            ),
+        )
+
     # A valid token is considered iff the tokenizer encounters
     # at least valid frames (init_min = 3) between witch there
     # are at most 2 consecutive non valid frames (init_max_silence = 2)
     def test_init_min_3_init_max_silence_2(self):
-        
-        tokenizer = StreamTokenizer(self.A_validator, min_length = 5, max_length=20,
-                                     max_continuous_silence=4, init_min = 3,
-                                     init_max_silence = 2, mode=0)
-        
-        
-        data_source = StringDataSource("aAaaaAaAaaAaAaaaaaaAAAAAAAAAaaaaaaaAAAAA")
+
+        tokenizer = StreamTokenizer(
+            self.A_validator,
+            min_length=5,
+            max_length=20,
+            max_continuous_silence=4,
+            init_min=3,
+            init_max_silence=2,
+            mode=0,
+        )
+
+        data_source = StringDataSource(
+            "aAaaaAaAaaAaAaaaaaaAAAAAAAAAaaaaaaaAAAAA"
+        )
         #                                    ^          ^  ^           ^   ^   ^
         #                                    5          16 19          31  35  39
         tokens = tokenizer.tokenize(data_source)
-                
-        self.assertEqual(len(tokens), 3, msg="wrong number of tokens, expected: 3, found: {0} ".format(len(tokens)))
+
+        self.assertEqual(
+            len(tokens),
+            3,
+            msg="wrong number of tokens, expected: 3, found: {0} ".format(
+                len(tokens)
+            ),
+        )
         tok1, tok2, tok3 = tokens[0], tokens[1], tokens[2]
-        
-        
-        data = ''.join(tok1[0])
+
+        data = "".join(tok1[0])
         start = tok1[1]
         end = tok1[2]
-        self.assertEqual(data, "AaAaaAaAaaaa",
-                        msg="wrong data for token 1, expected: 'AaAaaAaA', found: '{0}' ".format(data))
-        self.assertEqual(start, 5, msg="wrong start frame for token 1, expected: 5, found: {0} ".format(start))
-        self.assertEqual(end, 16, msg="wrong end frame for token 1, expected: 16, found: {0} ".format(end))
-        
-        
-        data = ''.join(tok2[0])
+        self.assertEqual(
+            data,
+            "AaAaaAaAaaaa",
+            msg="wrong data for token 1, expected: 'AaAaaAaA', found: '{0}' ".format(
+                data
+            ),
+        )
+        self.assertEqual(
+            start,
+            5,
+            msg="wrong start frame for token 1, expected: 5, found: {0} ".format(
+                start
+            ),
+        )
+        self.assertEqual(
+            end,
+            16,
+            msg="wrong end frame for token 1, expected: 16, found: {0} ".format(
+                end
+            ),
+        )
+
+        data = "".join(tok2[0])
         start = tok2[1]
         end = tok2[2]
-        self.assertEqual(data, "AAAAAAAAAaaaa",
-                        msg="wrong data for token 2, expected: 'AAAAAAAAAaaaa', found: '{0}' ".format(data))
-        self.assertEqual(start, 19, msg="wrong start frame for token 2, expected: 19, found: {0} ".format(start))
-        self.assertEqual(end, 31, msg="wrong end frame for token 2, expected: 31, found: {0} ".format(end))
-        
-        
-        data = ''.join(tok3[0])
+        self.assertEqual(
+            data,
+            "AAAAAAAAAaaaa",
+            msg="wrong data for token 2, expected: 'AAAAAAAAAaaaa', found: '{0}' ".format(
+                data
+            ),
+        )
+        self.assertEqual(
+            start,
+            19,
+            msg="wrong start frame for token 2, expected: 19, found: {0} ".format(
+                start
+            ),
+        )
+        self.assertEqual(
+            end,
+            31,
+            msg="wrong end frame for token 2, expected: 31, found: {0} ".format(
+                end
+            ),
+        )
+
+        data = "".join(tok3[0])
         start = tok3[1]
         end = tok3[2]
-        self.assertEqual(data, "AAAAA",
-                        msg="wrong data for token 3, expected: 'AAAAA', found: '{0}' ".format(data))
-        self.assertEqual(start, 35, msg="wrong start frame for token 2, expected: 35, found: {0} ".format(start))
-        self.assertEqual(end, 39, msg="wrong end frame for token 2, expected: 39, found: {0} ".format(end))    
-               
-        
-    
+        self.assertEqual(
+            data,
+            "AAAAA",
+            msg="wrong data for token 3, expected: 'AAAAA', found: '{0}' ".format(
+                data
+            ),
+        )
+        self.assertEqual(
+            start,
+            35,
+            msg="wrong start frame for token 2, expected: 35, found: {0} ".format(
+                start
+            ),
+        )
+        self.assertEqual(
+            end,
+            39,
+            msg="wrong end frame for token 2, expected: 39, found: {0} ".format(
+                end
+            ),
+        )
+
+
 class TestStreamTokenizerMinMaxLength(unittest.TestCase):
-  
     def setUp(self):
         self.A_validator = AValidator()
-    
-    
+
     def test_min_length_6_init_max_length_20(self):
-    
-        tokenizer = StreamTokenizer(self.A_validator, min_length = 6, max_length=20,
-                                     max_continuous_silence=2, init_min = 3,
-                                     init_max_silence = 3, mode=0)
-        
-        
+
+        tokenizer = StreamTokenizer(
+            self.A_validator,
+            min_length=6,
+            max_length=20,
+            max_continuous_silence=2,
+            init_min=3,
+            init_max_silence=3,
+            mode=0,
+        )
+
         data_source = StringDataSource("aAaaaAaAaaAaAaaaaaAAAAAAAAAaaaaaAAAAA")
         #                                ^            ^   ^         ^
         #                                1            14  18        28
-        
+
         tokens = tokenizer.tokenize(data_source)
-                
-        self.assertEqual(len(tokens), 2, msg="wrong number of tokens, expected: 2, found: {0} ".format(len(tokens)))
+
+        self.assertEqual(
+            len(tokens),
+            2,
+            msg="wrong number of tokens, expected: 2, found: {0} ".format(
+                len(tokens)
+            ),
+        )
         tok1, tok2 = tokens[0], tokens[1]
-        
-        
-        data = ''.join(tok1[0])
+
+        data = "".join(tok1[0])
         start = tok1[1]
         end = tok1[2]
-        self.assertEqual(data, "AaaaAaAaaAaAaa",
-                        msg="wrong data for token 1, expected: 'AaaaAaAaaAaAaa', found: '{0}' ".format(data))
-        self.assertEqual(start, 1, msg="wrong start frame for token 1, expected: 1, found: {0} ".format(start))
-        self.assertEqual(end, 14, msg="wrong end frame for token 1, expected: 14, found: {0} ".format(end))
-        
-        
-        data = ''.join(tok2[0])
+        self.assertEqual(
+            data,
+            "AaaaAaAaaAaAaa",
+            msg="wrong data for token 1, expected: 'AaaaAaAaaAaAaa', found: '{0}' ".format(
+                data
+            ),
+        )
+        self.assertEqual(
+            start,
+            1,
+            msg="wrong start frame for token 1, expected: 1, found: {0} ".format(
+                start
+            ),
+        )
+        self.assertEqual(
+            end,
+            14,
+            msg="wrong end frame for token 1, expected: 14, found: {0} ".format(
+                end
+            ),
+        )
+
+        data = "".join(tok2[0])
         start = tok2[1]
         end = tok2[2]
-        self.assertEqual(data, "AAAAAAAAAaa",
-                        msg="wrong data for token 2, expected: 'AAAAAAAAAaa', found: '{0}' ".format(data))
-        self.assertEqual(start, 18, msg="wrong start frame for token 2, expected: 18, found: {0} ".format(start))
-        self.assertEqual(end, 28, msg="wrong end frame for token 2, expected: 28, found: {0} ".format(end))
-    
-    
+        self.assertEqual(
+            data,
+            "AAAAAAAAAaa",
+            msg="wrong data for token 2, expected: 'AAAAAAAAAaa', found: '{0}' ".format(
+                data
+            ),
+        )
+        self.assertEqual(
+            start,
+            18,
+            msg="wrong start frame for token 2, expected: 18, found: {0} ".format(
+                start
+            ),
+        )
+        self.assertEqual(
+            end,
+            28,
+            msg="wrong end frame for token 2, expected: 28, found: {0} ".format(
+                end
+            ),
+        )
+
     def test_min_length_1_init_max_length_1(self):
-    
-        tokenizer = StreamTokenizer(self.A_validator, min_length = 1, max_length=1,
-                                     max_continuous_silence=0, init_min = 0,
-                                     init_max_silence = 0, mode=0)
-        
-        
-        data_source = StringDataSource("AAaaaAaaaAaAaaAaAaaaaaAAAAAAAAAaaaaaAAAAA")
-        
+
+        tokenizer = StreamTokenizer(
+            self.A_validator,
+            min_length=1,
+            max_length=1,
+            max_continuous_silence=0,
+            init_min=0,
+            init_max_silence=0,
+            mode=0,
+        )
+
+        data_source = StringDataSource(
+            "AAaaaAaaaAaAaaAaAaaaaaAAAAAAAAAaaaaaAAAAA"
+        )
+
         tokens = tokenizer.tokenize(data_source)
-                        
-        self.assertEqual(len(tokens), 21, msg="wrong number of tokens, expected: 21, found: {0} ".format(len(tokens)))
-        
-        
+
+        self.assertEqual(
+            len(tokens),
+            21,
+            msg="wrong number of tokens, expected: 21, found: {0} ".format(
+                len(tokens)
+            ),
+        )
+
     def test_min_length_10_init_max_length_20(self):
-    
-        tokenizer = StreamTokenizer(self.A_validator, min_length = 10, max_length=20,
-                                     max_continuous_silence=4, init_min = 3,
-                                     init_max_silence = 3, mode=0)
-        
-        
-        data_source = StringDataSource("aAaaaAaAaaAaAaaaaaaAAAAAaaaaaaAAAAAaaAAaaAAA")
+
+        tokenizer = StreamTokenizer(
+            self.A_validator,
+            min_length=10,
+            max_length=20,
+            max_continuous_silence=4,
+            init_min=3,
+            init_max_silence=3,
+            mode=0,
+        )
+
+        data_source = StringDataSource(
+            "aAaaaAaAaaAaAaaaaaaAAAAAaaaaaaAAAAAaaAAaaAAA"
+        )
         #                                ^              ^             ^            ^
         #                                1              16            30           45
-        
+
         tokens = tokenizer.tokenize(data_source)
-                
-        self.assertEqual(len(tokens), 2, msg="wrong number of tokens, expected: 2, found: {0} ".format(len(tokens)))
+
+        self.assertEqual(
+            len(tokens),
+            2,
+            msg="wrong number of tokens, expected: 2, found: {0} ".format(
+                len(tokens)
+            ),
+        )
         tok1, tok2 = tokens[0], tokens[1]
-        
-        
-        data = ''.join(tok1[0])
+
+        data = "".join(tok1[0])
         start = tok1[1]
         end = tok1[2]
-        self.assertEqual(data, "AaaaAaAaaAaAaaaa",
-                        msg="wrong data for token 1, expected: 'AaaaAaAaaAaAaaaa', found: '{0}' ".format(data))
-        self.assertEqual(start, 1, msg="wrong start frame for token 1, expected: 1, found: {0} ".format(start))
-        self.assertEqual(end, 16, msg="wrong end frame for token 1, expected: 16, found: {0} ".format(end))
-        
-        
-        data = ''.join(tok2[0])
+        self.assertEqual(
+            data,
+            "AaaaAaAaaAaAaaaa",
+            msg="wrong data for token 1, expected: 'AaaaAaAaaAaAaaaa', found: '{0}' ".format(
+                data
+            ),
+        )
+        self.assertEqual(
+            start,
+            1,
+            msg="wrong start frame for token 1, expected: 1, found: {0} ".format(
+                start
+            ),
+        )
+        self.assertEqual(
+            end,
+            16,
+            msg="wrong end frame for token 1, expected: 16, found: {0} ".format(
+                end
+            ),
+        )
+
+        data = "".join(tok2[0])
         start = tok2[1]
         end = tok2[2]
-        self.assertEqual(data, "AAAAAaaAAaaAAA",
-                        msg="wrong data for token 2, expected: 'AAAAAaaAAaaAAA', found: '{0}' ".format(data))
-        self.assertEqual(start, 30, msg="wrong start frame for token 2, expected: 30, found: {0} ".format(start))
-        self.assertEqual(end, 43, msg="wrong end frame for token 2, expected: 43, found: {0} ".format(end))
-    
-    
-        
+        self.assertEqual(
+            data,
+            "AAAAAaaAAaaAAA",
+            msg="wrong data for token 2, expected: 'AAAAAaaAAaaAAA', found: '{0}' ".format(
+                data
+            ),
+        )
+        self.assertEqual(
+            start,
+            30,
+            msg="wrong start frame for token 2, expected: 30, found: {0} ".format(
+                start
+            ),
+        )
+        self.assertEqual(
+            end,
+            43,
+            msg="wrong end frame for token 2, expected: 43, found: {0} ".format(
+                end
+            ),
+        )
+
     def test_min_length_4_init_max_length_5(self):
-    
-        tokenizer = StreamTokenizer(self.A_validator, min_length = 4, max_length=5,
-                                     max_continuous_silence=4, init_min = 3,
-                                     init_max_silence = 3, mode=0)
-        
-        
-        data_source = StringDataSource("aAaaaAaAaaAaAaaaaaAAAAAAAAaaaaaaAAAAAaaaaaAAaaAaa")
+
+        tokenizer = StreamTokenizer(
+            self.A_validator,
+            min_length=4,
+            max_length=5,
+            max_continuous_silence=4,
+            init_min=3,
+            init_max_silence=3,
+            mode=0,
+        )
+
+        data_source = StringDataSource(
+            "aAaaaAaAaaAaAaaaaaAAAAAAAAaaaaaaAAAAAaaaaaAAaaAaa"
+        )
         #                                                 ^   ^^   ^    ^   ^     ^   ^
         #                                                 18 2223  27   32  36    42  46
-        
+
         tokens = tokenizer.tokenize(data_source)
-               
-        self.assertEqual(len(tokens), 4, msg="wrong number of tokens, expected: 4, found: {0} ".format(len(tokens)))
+
+        self.assertEqual(
+            len(tokens),
+            4,
+            msg="wrong number of tokens, expected: 4, found: {0} ".format(
+                len(tokens)
+            ),
+        )
         tok1, tok2, tok3, tok4 = tokens[0], tokens[1], tokens[2], tokens[3]
-        
-        
-        data = ''.join(tok1[0])
+
+        data = "".join(tok1[0])
         start = tok1[1]
         end = tok1[2]
-        self.assertEqual(data, "AAAAA",
-                        msg="wrong data for token 1, expected: 'AAAAA', found: '{0}' ".format(data))
-        self.assertEqual(start, 18, msg="wrong start frame for token 1, expected: 18, found: {0} ".format(start))
-        self.assertEqual(end, 22, msg="wrong end frame for token 1, expected: 22, found: {0} ".format(end))
-        
-        
-        data = ''.join(tok2[0])
+        self.assertEqual(
+            data,
+            "AAAAA",
+            msg="wrong data for token 1, expected: 'AAAAA', found: '{0}' ".format(
+                data
+            ),
+        )
+        self.assertEqual(
+            start,
+            18,
+            msg="wrong start frame for token 1, expected: 18, found: {0} ".format(
+                start
+            ),
+        )
+        self.assertEqual(
+            end,
+            22,
+            msg="wrong end frame for token 1, expected: 22, found: {0} ".format(
+                end
+            ),
+        )
+
+        data = "".join(tok2[0])
         start = tok2[1]
         end = tok2[2]
-        self.assertEqual(data, "AAAaa",
-                        msg="wrong data for token 1, expected: 'AAAaa', found: '{0}' ".format(data))
-        self.assertEqual(start, 23, msg="wrong start frame for token 1, expected: 23, found: {0} ".format(start))
-        self.assertEqual(end, 27, msg="wrong end frame for token 1, expected: 27, found: {0} ".format(end))
-        
-        
-        data = ''.join(tok3[0])
+        self.assertEqual(
+            data,
+            "AAAaa",
+            msg="wrong data for token 1, expected: 'AAAaa', found: '{0}' ".format(
+                data
+            ),
+        )
+        self.assertEqual(
+            start,
+            23,
+            msg="wrong start frame for token 1, expected: 23, found: {0} ".format(
+                start
+            ),
+        )
+        self.assertEqual(
+            end,
+            27,
+            msg="wrong end frame for token 1, expected: 27, found: {0} ".format(
+                end
+            ),
+        )
+
+        data = "".join(tok3[0])
         start = tok3[1]
         end = tok3[2]
-        self.assertEqual(data, "AAAAA",
-                        msg="wrong data for token 1, expected: 'AAAAA', found: '{0}' ".format(data))
-        self.assertEqual(start, 32, msg="wrong start frame for token 1, expected: 1, found: {0} ".format(start))
-        self.assertEqual(end, 36, msg="wrong end frame for token 1, expected: 7, found: {0} ".format(end))
-        
-        
-        data = ''.join(tok4[0])
+        self.assertEqual(
+            data,
+            "AAAAA",
+            msg="wrong data for token 1, expected: 'AAAAA', found: '{0}' ".format(
+                data
+            ),
+        )
+        self.assertEqual(
+            start,
+            32,
+            msg="wrong start frame for token 1, expected: 1, found: {0} ".format(
+                start
+            ),
+        )
+        self.assertEqual(
+            end,
+            36,
+            msg="wrong end frame for token 1, expected: 7, found: {0} ".format(
+                end
+            ),
+        )
+
+        data = "".join(tok4[0])
         start = tok4[1]
         end = tok4[2]
-        self.assertEqual(data, "AAaaA",
-                        msg="wrong data for token 2, expected: 'AAaaA', found: '{0}' ".format(data))
-        self.assertEqual(start, 42, msg="wrong start frame for token 2, expected: 17, found: {0} ".format(start))
-        self.assertEqual(end, 46, msg="wrong end frame for token 2, expected: 22, found: {0} ".format(end))
-        
-        
+        self.assertEqual(
+            data,
+            "AAaaA",
+            msg="wrong data for token 2, expected: 'AAaaA', found: '{0}' ".format(
+                data
+            ),
+        )
+        self.assertEqual(
+            start,
+            42,
+            msg="wrong start frame for token 2, expected: 17, found: {0} ".format(
+                start
+            ),
+        )
+        self.assertEqual(
+            end,
+            46,
+            msg="wrong end frame for token 2, expected: 22, found: {0} ".format(
+                end
+            ),
+        )
+
+
 class TestStreamTokenizerMaxContinuousSilence(unittest.TestCase):
-    
     def setUp(self):
         self.A_validator = AValidator()
-    
-    
+
     def test_min_5_max_10_max_continuous_silence_0(self):
 
-        tokenizer = StreamTokenizer(self.A_validator, min_length = 5, max_length=10,
-                                    max_continuous_silence=0, init_min = 3,
-                                    init_max_silence = 3, mode=0)
-        
+        tokenizer = StreamTokenizer(
+            self.A_validator,
+            min_length=5,
+            max_length=10,
+            max_continuous_silence=0,
+            init_min=3,
+            init_max_silence=3,
+            mode=0,
+        )
+
         data_source = StringDataSource("aaaAAAAAaAAAAAAaaAAAAAAAAAa")
         #                                  ^   ^ ^    ^  ^       ^
         #                                  3   7 9   14 17      25
-        
+
         tokens = tokenizer.tokenize(data_source)
-                
-        self.assertEqual(len(tokens), 3, msg="wrong number of tokens, expected: 3, found: {0} ".format(len(tokens)))
+
+        self.assertEqual(
+            len(tokens),
+            3,
+            msg="wrong number of tokens, expected: 3, found: {0} ".format(
+                len(tokens)
+            ),
+        )
         tok1, tok2, tok3 = tokens[0], tokens[1], tokens[2]
-        
-        
-        data = ''.join(tok1[0])
+
+        data = "".join(tok1[0])
         start = tok1[1]
         end = tok1[2]
-        self.assertEqual(data, "AAAAA",
-                        msg="wrong data for token 1, expected: 'AAAAA', found: '{0}' ".format(data))
-        self.assertEqual(start, 3, msg="wrong start frame for token 1, expected: 3, found: {0} ".format(start))
-        self.assertEqual(end, 7, msg="wrong end frame for token 1, expected: 7, found: {0} ".format(end))
-        
-        
-        data = ''.join(tok2[0])
+        self.assertEqual(
+            data,
+            "AAAAA",
+            msg="wrong data for token 1, expected: 'AAAAA', found: '{0}' ".format(
+                data
+            ),
+        )
+        self.assertEqual(
+            start,
+            3,
+            msg="wrong start frame for token 1, expected: 3, found: {0} ".format(
+                start
+            ),
+        )
+        self.assertEqual(
+            end,
+            7,
+            msg="wrong end frame for token 1, expected: 7, found: {0} ".format(
+                end
+            ),
+        )
+
+        data = "".join(tok2[0])
         start = tok2[1]
         end = tok2[2]
-        self.assertEqual(data, "AAAAAA",
-                        msg="wrong data for token 1, expected: 'AAAAAA', found: '{0}' ".format(data))
-        self.assertEqual(start, 9, msg="wrong start frame for token 1, expected: 9, found: {0} ".format(start))
-        self.assertEqual(end, 14, msg="wrong end frame for token 1, expected: 14, found: {0} ".format(end))
-        
-        
-        data = ''.join(tok3[0])
+        self.assertEqual(
+            data,
+            "AAAAAA",
+            msg="wrong data for token 1, expected: 'AAAAAA', found: '{0}' ".format(
+                data
+            ),
+        )
+        self.assertEqual(
+            start,
+            9,
+            msg="wrong start frame for token 1, expected: 9, found: {0} ".format(
+                start
+            ),
+        )
+        self.assertEqual(
+            end,
+            14,
+            msg="wrong end frame for token 1, expected: 14, found: {0} ".format(
+                end
+            ),
+        )
+
+        data = "".join(tok3[0])
         start = tok3[1]
         end = tok3[2]
-        self.assertEqual(data, "AAAAAAAAA",
-                        msg="wrong data for token 1, expected: 'AAAAAAAAA', found: '{0}' ".format(data))
-        self.assertEqual(start, 17, msg="wrong start frame for token 1, expected: 17, found: {0} ".format(start))
-        self.assertEqual(end, 25, msg="wrong end frame for token 1, expected: 25, found: {0} ".format(end))
-        
-        
-        
-        
+        self.assertEqual(
+            data,
+            "AAAAAAAAA",
+            msg="wrong data for token 1, expected: 'AAAAAAAAA', found: '{0}' ".format(
+                data
+            ),
+        )
+        self.assertEqual(
+            start,
+            17,
+            msg="wrong start frame for token 1, expected: 17, found: {0} ".format(
+                start
+            ),
+        )
+        self.assertEqual(
+            end,
+            25,
+            msg="wrong end frame for token 1, expected: 25, found: {0} ".format(
+                end
+            ),
+        )
+
     def test_min_5_max_10_max_continuous_silence_1(self):
 
-        tokenizer = StreamTokenizer(self.A_validator, min_length = 5, max_length=10,
-                                    max_continuous_silence=1, init_min = 3,
-                                    init_max_silence = 3, mode=0)
-        
+        tokenizer = StreamTokenizer(
+            self.A_validator,
+            min_length=5,
+            max_length=10,
+            max_continuous_silence=1,
+            init_min=3,
+            init_max_silence=3,
+            mode=0,
+        )
+
         data_source = StringDataSource("aaaAAAAAaAAAAAAaaAAAAAAAAAa")
         #                                  ^        ^^ ^ ^        ^
         #                                  3       12131517      26
         #                                         (12 13 15 17)
-        
+
         tokens = tokenizer.tokenize(data_source)
-                
-        self.assertEqual(len(tokens), 3, msg="wrong number of tokens, expected: 3, found: {0} ".format(len(tokens)))
+
+        self.assertEqual(
+            len(tokens),
+            3,
+            msg="wrong number of tokens, expected: 3, found: {0} ".format(
+                len(tokens)
+            ),
+        )
         tok1, tok2, tok3 = tokens[0], tokens[1], tokens[2]
-        
-        
-        data = ''.join(tok1[0])
+
+        data = "".join(tok1[0])
         start = tok1[1]
         end = tok1[2]
-        self.assertEqual(data, "AAAAAaAAAA",
-                        msg="wrong data for token 1, expected: 'AAAAAaAAAA', found: '{0}' ".format(data))
-        self.assertEqual(start, 3, msg="wrong start frame for token 1, expected: 3, found: {0} ".format(start))
-        self.assertEqual(end, 12, msg="wrong end frame for token 1, expected: 10, found: {0} ".format(end))
-        
-        
-        data = ''.join(tok2[0])
+        self.assertEqual(
+            data,
+            "AAAAAaAAAA",
+            msg="wrong data for token 1, expected: 'AAAAAaAAAA', found: '{0}' ".format(
+                data
+            ),
+        )
+        self.assertEqual(
+            start,
+            3,
+            msg="wrong start frame for token 1, expected: 3, found: {0} ".format(
+                start
+            ),
+        )
+        self.assertEqual(
+            end,
+            12,
+            msg="wrong end frame for token 1, expected: 10, found: {0} ".format(
+                end
+            ),
+        )
+
+        data = "".join(tok2[0])
         start = tok2[1]
         end = tok2[2]
-        self.assertEqual(data, "AAa",
-                        msg="wrong data for token 1, expected: 'AAa', found: '{0}' ".format(data))
-        self.assertEqual(start, 13, msg="wrong start frame for token 1, expected: 9, found: {0} ".format(start))
-        self.assertEqual(end, 15, msg="wrong end frame for token 1, expected: 14, found: {0} ".format(end))
-        
-        
-        data = ''.join(tok3[0])
+        self.assertEqual(
+            data,
+            "AAa",
+            msg="wrong data for token 1, expected: 'AAa', found: '{0}' ".format(
+                data
+            ),
+        )
+        self.assertEqual(
+            start,
+            13,
+            msg="wrong start frame for token 1, expected: 9, found: {0} ".format(
+                start
+            ),
+        )
+        self.assertEqual(
+            end,
+            15,
+            msg="wrong end frame for token 1, expected: 14, found: {0} ".format(
+                end
+            ),
+        )
+
+        data = "".join(tok3[0])
         start = tok3[1]
         end = tok3[2]
-        self.assertEqual(data, "AAAAAAAAAa",
-                        msg="wrong data for token 1, expected: 'AAAAAAAAAa', found: '{0}' ".format(data))
-        self.assertEqual(start, 17, msg="wrong start frame for token 1, expected: 17, found: {0} ".format(start))
-        self.assertEqual(end, 26, msg="wrong end frame for token 1, expected: 26, found: {0} ".format(end))
-        
-        
+        self.assertEqual(
+            data,
+            "AAAAAAAAAa",
+            msg="wrong data for token 1, expected: 'AAAAAAAAAa', found: '{0}' ".format(
+                data
+            ),
+        )
+        self.assertEqual(
+            start,
+            17,
+            msg="wrong start frame for token 1, expected: 17, found: {0} ".format(
+                start
+            ),
+        )
+        self.assertEqual(
+            end,
+            26,
+            msg="wrong end frame for token 1, expected: 26, found: {0} ".format(
+                end
+            ),
+        )
+
+
 class TestStreamTokenizerModes(unittest.TestCase):
-    
     def setUp(self):
         self.A_validator = AValidator()
-    
+
     def test_STRICT_MIN_LENGTH(self):
-        
-        tokenizer = StreamTokenizer(self.A_validator, min_length = 5, max_length=8,
-                                    max_continuous_silence=3, init_min = 3,
-                                    init_max_silence = 3, mode=StreamTokenizer.STRICT_MIN_LENGTH)
-        
+
+        tokenizer = StreamTokenizer(
+            self.A_validator,
+            min_length=5,
+            max_length=8,
+            max_continuous_silence=3,
+            init_min=3,
+            init_max_silence=3,
+            mode=StreamTokenizer.STRICT_MIN_LENGTH,
+        )
+
         data_source = StringDataSource("aaAAAAAAAAAAAA")
         #                                 ^      ^
         #                                 2      9
-        
+
         tokens = tokenizer.tokenize(data_source)
-                
-        self.assertEqual(len(tokens), 1, msg="wrong number of tokens, expected: 1, found: {0} ".format(len(tokens)))
+
+        self.assertEqual(
+            len(tokens),
+            1,
+            msg="wrong number of tokens, expected: 1, found: {0} ".format(
+                len(tokens)
+            ),
+        )
         tok1 = tokens[0]
-        
-        
-        data = ''.join(tok1[0])
+
+        data = "".join(tok1[0])
         start = tok1[1]
         end = tok1[2]
-        self.assertEqual(data, "AAAAAAAA",
-                        msg="wrong data for token 1, expected: 'AAAAAAAA', found: '{0}' ".format(data))
-        self.assertEqual(start, 2, msg="wrong start frame for token 1, expected: 2, found: {0} ".format(start))
-        self.assertEqual(end, 9, msg="wrong end frame for token 1, expected: 9, found: {0} ".format(end))
-    
-    
+        self.assertEqual(
+            data,
+            "AAAAAAAA",
+            msg="wrong data for token 1, expected: 'AAAAAAAA', found: '{0}' ".format(
+                data
+            ),
+        )
+        self.assertEqual(
+            start,
+            2,
+            msg="wrong start frame for token 1, expected: 2, found: {0} ".format(
+                start
+            ),
+        )
+        self.assertEqual(
+            end,
+            9,
+            msg="wrong end frame for token 1, expected: 9, found: {0} ".format(
+                end
+            ),
+        )
+
     def test_DROP_TAILING_SILENCE(self):
-        
-        tokenizer = StreamTokenizer(self.A_validator, min_length = 5, max_length=10,
-                                    max_continuous_silence=2, init_min = 3,
-                                    init_max_silence = 3, mode=StreamTokenizer.DROP_TAILING_SILENCE)
-        
+
+        tokenizer = StreamTokenizer(
+            self.A_validator,
+            min_length=5,
+            max_length=10,
+            max_continuous_silence=2,
+            init_min=3,
+            init_max_silence=3,
+            mode=StreamTokenizer.DROP_TRAILING_SILENCE,
+        )
+
         data_source = StringDataSource("aaAAAAAaaaaa")
         #                                 ^   ^
         #                                 2   6
-        
+
         tokens = tokenizer.tokenize(data_source)
-                
-        self.assertEqual(len(tokens), 1, msg="wrong number of tokens, expected: 1, found: {0} ".format(len(tokens)))
+
+        self.assertEqual(
+            len(tokens),
+            1,
+            msg="wrong number of tokens, expected: 1, found: {0} ".format(
+                len(tokens)
+            ),
+        )
         tok1 = tokens[0]
-        
-        
-        data = ''.join(tok1[0])
+
+        data = "".join(tok1[0])
         start = tok1[1]
         end = tok1[2]
-        self.assertEqual(data, "AAAAA",
-                        msg="wrong data for token 1, expected: 'AAAAA', found: '{0}' ".format(data))
-        self.assertEqual(start, 2, msg="wrong start frame for token 1, expected: 2, found: {0} ".format(start))
-        self.assertEqual(end, 6, msg="wrong end frame for token 1, expected: 6, found: {0} ".format(end))
-        
-        
+        self.assertEqual(
+            data,
+            "AAAAA",
+            msg="wrong data for token 1, expected: 'AAAAA', found: '{0}' ".format(
+                data
+            ),
+        )
+        self.assertEqual(
+            start,
+            2,
+            msg="wrong start frame for token 1, expected: 2, found: {0} ".format(
+                start
+            ),
+        )
+        self.assertEqual(
+            end,
+            6,
+            msg="wrong end frame for token 1, expected: 6, found: {0} ".format(
+                end
+            ),
+        )
+
     def test_STRICT_MIN_LENGTH_and_DROP_TAILING_SILENCE(self):
-        
-        tokenizer = StreamTokenizer(self.A_validator, min_length = 5, max_length=8,
-                                    max_continuous_silence=3, init_min = 3,
-                                    init_max_silence = 3, mode=StreamTokenizer.STRICT_MIN_LENGTH | StreamTokenizer.DROP_TAILING_SILENCE)
-        
+
+        tokenizer = StreamTokenizer(
+            self.A_validator,
+            min_length=5,
+            max_length=8,
+            max_continuous_silence=3,
+            init_min=3,
+            init_max_silence=3,
+            mode=StreamTokenizer.STRICT_MIN_LENGTH
+            | StreamTokenizer.DROP_TRAILING_SILENCE,
+        )
+
         data_source = StringDataSource("aaAAAAAAAAAAAAaa")
         #                                 ^      ^
         #                                 2      8
-        
+
         tokens = tokenizer.tokenize(data_source)
-                
-        self.assertEqual(len(tokens), 1, msg="wrong number of tokens, expected: 1, found: {0} ".format(len(tokens)))
+
+        self.assertEqual(
+            len(tokens),
+            1,
+            msg="wrong number of tokens, expected: 1, found: {0} ".format(
+                len(tokens)
+            ),
+        )
         tok1 = tokens[0]
-        
-        
-        data = ''.join(tok1[0])
+
+        data = "".join(tok1[0])
         start = tok1[1]
         end = tok1[2]
-        self.assertEqual(data, "AAAAAAAA",
-                        msg="wrong data for token 1, expected: 'AAAAAAAA', found: '{0}' ".format(data))
-        self.assertEqual(start, 2, msg="wrong start frame for token 1, expected: 2, found: {0} ".format(start))
-        self.assertEqual(end, 9, msg="wrong end frame for token 1, expected: 9, found: {0} ".format(end))
-        
-    
+        self.assertEqual(
+            data,
+            "AAAAAAAA",
+            msg="wrong data for token 1, expected: 'AAAAAAAA', found: '{0}' ".format(
+                data
+            ),
+        )
+        self.assertEqual(
+            start,
+            2,
+            msg="wrong start frame for token 1, expected: 2, found: {0} ".format(
+                start
+            ),
+        )
+        self.assertEqual(
+            end,
+            9,
+            msg="wrong end frame for token 1, expected: 9, found: {0} ".format(
+                end
+            ),
+        )
+
+
 class TestStreamTokenizerCallback(unittest.TestCase):
-    
     def setUp(self):
         self.A_validator = AValidator()
-    
+
     def test_callback(self):
-        
+
         tokens = []
-        
+
         def callback(data, start, end):
             tokens.append((data, start, end))
-            
-        
-        tokenizer = StreamTokenizer(self.A_validator, min_length = 5, max_length=8,
-                                    max_continuous_silence=3, init_min = 3,
-                                    init_max_silence = 3, mode=0)
-        
+
+        tokenizer = StreamTokenizer(
+            self.A_validator,
+            min_length=5,
+            max_length=8,
+            max_continuous_silence=3,
+            init_min=3,
+            init_max_silence=3,
+            mode=0,
+        )
+
         data_source = StringDataSource("aaAAAAAAAAAAAAa")
         #                                 ^      ^^   ^
         #                                 2      910  14
-        
+
         tokenizer.tokenize(data_source, callback=callback)
-        
-        self.assertEqual(len(tokens), 2, msg="wrong number of tokens, expected: 1, found: {0} ".format(len(tokens)))
-        
+
+        self.assertEqual(
+            len(tokens),
+            2,
+            msg="wrong number of tokens, expected: 1, found: {0} ".format(
+                len(tokens)
+            ),
+        )
 
 
 if __name__ == "__main__":
-    #import sys;sys.argv = ['', 'Test.testName']
+    # import sys;sys.argv = ['', 'Test.testName']
     unittest.main()