amine@297
|
1 """
|
amine@2
|
2 @author: Amine Sehili <amine.sehili@gmail.com>
|
amine@2
|
3 September 2015
|
amine@2
|
4
|
amine@297
|
5 """
|
amine@2
|
6
|
amine@2
|
7 import unittest
|
amine@2
|
8 from auditok import StreamTokenizer, StringDataSource, DataValidator
|
amine@2
|
9
|
amine@2
|
10
|
amine@2
|
11 class AValidator(DataValidator):
|
amine@2
|
12 def is_valid(self, frame):
|
amine@2
|
13 return frame == "A"
|
amine@2
|
14
|
amine@2
|
15
|
amine@2
|
16 class TestStreamTokenizerInitParams(unittest.TestCase):
|
amine@2
|
17 def setUp(self):
|
amine@2
|
18 self.A_validator = AValidator()
|
amine@297
|
19
|
amine@2
|
20 # Completely deactivate init_min and init_max_silence
|
amine@2
|
21 # The tokenizer will only rely on the other parameters
|
amine@2
|
22 # Note that if init_min = 0, the value of init_max_silence
|
amine@2
|
23 # will have no effect
|
amine@2
|
24 def test_init_min_0_init_max_silence_0(self):
|
amine@297
|
25
|
amine@297
|
26 tokenizer = StreamTokenizer(
|
amine@297
|
27 self.A_validator,
|
amine@297
|
28 min_length=5,
|
amine@297
|
29 max_length=20,
|
amine@297
|
30 max_continuous_silence=4,
|
amine@297
|
31 init_min=0,
|
amine@297
|
32 init_max_silence=0,
|
amine@297
|
33 mode=0,
|
amine@297
|
34 )
|
amine@297
|
35
|
amine@2
|
36 data_source = StringDataSource("aAaaaAaAaaAaAaaaaaaaAAAAAAAA")
|
amine@19
|
37 # ^ ^ ^ ^
|
amine@19
|
38 # 2 16 20 27
|
amine@2
|
39 tokens = tokenizer.tokenize(data_source)
|
amine@297
|
40
|
amine@297
|
41 self.assertEqual(
|
amine@297
|
42 len(tokens),
|
amine@297
|
43 2,
|
amine@297
|
44 msg="wrong number of tokens, expected: 2, found: {0} ".format(
|
amine@297
|
45 len(tokens)
|
amine@297
|
46 ),
|
amine@297
|
47 )
|
amine@2
|
48 tok1, tok2 = tokens[0], tokens[1]
|
amine@297
|
49
|
amine@2
|
50 # tok1[0]: data
|
amine@2
|
51 # tok1[1]: start frame (included)
|
amine@2
|
52 # tok1[2]: end frame (included)
|
amine@297
|
53
|
amine@297
|
54 data = "".join(tok1[0])
|
amine@2
|
55 start = tok1[1]
|
amine@2
|
56 end = tok1[2]
|
amine@297
|
57 self.assertEqual(
|
amine@297
|
58 data,
|
amine@297
|
59 "AaaaAaAaaAaAaaaa",
|
amine@334
|
60 msg=(
|
amine@334
|
61 "wrong data for token 1, expected: 'AaaaAaAaaAaAaaaa', "
|
amine@334
|
62 "found: {0} "
|
amine@334
|
63 ).format(data),
|
amine@297
|
64 )
|
amine@297
|
65 self.assertEqual(
|
amine@297
|
66 start,
|
amine@297
|
67 1,
|
amine@334
|
68 msg=(
|
amine@334
|
69 "wrong start frame for token 1, expected: 1, found: {0} "
|
amine@334
|
70 ).format(start),
|
amine@297
|
71 )
|
amine@297
|
72 self.assertEqual(
|
amine@297
|
73 end,
|
amine@297
|
74 16,
|
amine@334
|
75 msg=(
|
amine@334
|
76 "wrong end frame for token 1, expected: 16, found: {0} "
|
amine@334
|
77 ).format(end),
|
amine@297
|
78 )
|
amine@297
|
79
|
amine@297
|
80 data = "".join(tok2[0])
|
amine@2
|
81 start = tok2[1]
|
amine@2
|
82 end = tok2[2]
|
amine@297
|
83 self.assertEqual(
|
amine@297
|
84 data,
|
amine@297
|
85 "AAAAAAAA",
|
amine@334
|
86 msg=(
|
amine@334
|
87 "wrong data for token 1, expected: 'AAAAAAAA', found: {0} "
|
amine@334
|
88 ).format(data),
|
amine@297
|
89 )
|
amine@297
|
90 self.assertEqual(
|
amine@297
|
91 start,
|
amine@297
|
92 20,
|
amine@334
|
93 msg=(
|
amine@334
|
94 "wrong start frame for token 2, expected: 20, found: {0} "
|
amine@334
|
95 ).format(start),
|
amine@297
|
96 )
|
amine@297
|
97 self.assertEqual(
|
amine@297
|
98 end,
|
amine@297
|
99 27,
|
amine@334
|
100 msg=(
|
amine@334
|
101 "wrong end frame for token 2, expected: 27, found: {0} "
|
amine@334
|
102 ).format(end),
|
amine@297
|
103 )
|
amine@297
|
104
|
amine@5
|
105 # A valid token is considered as so iff the tokenizer encounters
|
amine@2
|
106 # at least valid frames (init_min = 3) between witch there
|
amine@2
|
107 # are at most 0 consecutive non valid frames (init_max_silence = 0)
|
amine@2
|
108 # The tokenizer will only rely on the other parameters
|
amine@2
|
109 # In other words, a valid token must start with 3 valid frames
|
amine@2
|
110 def test_init_min_3_init_max_silence_0(self):
|
amine@297
|
111
|
amine@297
|
112 tokenizer = StreamTokenizer(
|
amine@297
|
113 self.A_validator,
|
amine@297
|
114 min_length=5,
|
amine@297
|
115 max_length=20,
|
amine@297
|
116 max_continuous_silence=4,
|
amine@297
|
117 init_min=3,
|
amine@297
|
118 init_max_silence=0,
|
amine@297
|
119 mode=0,
|
amine@297
|
120 )
|
amine@297
|
121
|
amine@297
|
122 data_source = StringDataSource(
|
amine@297
|
123 "aAaaaAaAaaAaAaaaaaAAAAAAAAAaaaaaaAAAAA"
|
amine@297
|
124 )
|
amine@334
|
125 # ^ ^ ^ ^
|
amine@334
|
126 # 18 30 33 37
|
amine@297
|
127
|
amine@2
|
128 tokens = tokenizer.tokenize(data_source)
|
amine@297
|
129
|
amine@297
|
130 self.assertEqual(
|
amine@297
|
131 len(tokens),
|
amine@297
|
132 2,
|
amine@297
|
133 msg="wrong number of tokens, expected: 2, found: {0} ".format(
|
amine@297
|
134 len(tokens)
|
amine@297
|
135 ),
|
amine@297
|
136 )
|
amine@2
|
137 tok1, tok2 = tokens[0], tokens[1]
|
amine@297
|
138
|
amine@297
|
139 data = "".join(tok1[0])
|
amine@2
|
140 start = tok1[1]
|
amine@2
|
141 end = tok1[2]
|
amine@297
|
142 self.assertEqual(
|
amine@297
|
143 data,
|
amine@297
|
144 "AAAAAAAAAaaaa",
|
amine@334
|
145 msg=(
|
amine@334
|
146 "wrong data for token 1, expected: 'AAAAAAAAAaaaa', "
|
amine@334
|
147 "found: '{0}' "
|
amine@334
|
148 ).format(data),
|
amine@297
|
149 )
|
amine@297
|
150 self.assertEqual(
|
amine@297
|
151 start,
|
amine@297
|
152 18,
|
amine@334
|
153 msg=(
|
amine@334
|
154 "wrong start frame for token 1, expected: 18, found: {0} "
|
amine@334
|
155 ).format(start),
|
amine@297
|
156 )
|
amine@297
|
157 self.assertEqual(
|
amine@297
|
158 end,
|
amine@297
|
159 30,
|
amine@334
|
160 msg=(
|
amine@334
|
161 "wrong end frame for token 1, expected: 30, found: {0} "
|
amine@334
|
162 ).format(end),
|
amine@297
|
163 )
|
amine@297
|
164
|
amine@297
|
165 data = "".join(tok2[0])
|
amine@2
|
166 start = tok2[1]
|
amine@2
|
167 end = tok2[2]
|
amine@297
|
168 self.assertEqual(
|
amine@297
|
169 data,
|
amine@297
|
170 "AAAAA",
|
amine@334
|
171 msg=(
|
amine@334
|
172 "wrong data for token 1, expected: 'AAAAA', found: '{0}' "
|
amine@334
|
173 ).format(data),
|
amine@297
|
174 )
|
amine@297
|
175 self.assertEqual(
|
amine@297
|
176 start,
|
amine@297
|
177 33,
|
amine@334
|
178 msg=(
|
amine@334
|
179 "wrong start frame for token 2, expected: 33, found: {0} "
|
amine@334
|
180 ).format(start),
|
amine@297
|
181 )
|
amine@297
|
182 self.assertEqual(
|
amine@297
|
183 end,
|
amine@297
|
184 37,
|
amine@334
|
185 msg=(
|
amine@334
|
186 "wrong end frame for token 2, expected: 37, found: {0} "
|
amine@334
|
187 ).format(end),
|
amine@297
|
188 )
|
amine@297
|
189
|
amine@2
|
190 # A valid token is considered iff the tokenizer encounters
|
amine@2
|
191 # at least valid frames (init_min = 3) between witch there
|
amine@2
|
192 # are at most 2 consecutive non valid frames (init_max_silence = 2)
|
amine@2
|
193 def test_init_min_3_init_max_silence_2(self):
|
amine@297
|
194
|
amine@297
|
195 tokenizer = StreamTokenizer(
|
amine@297
|
196 self.A_validator,
|
amine@297
|
197 min_length=5,
|
amine@297
|
198 max_length=20,
|
amine@297
|
199 max_continuous_silence=4,
|
amine@297
|
200 init_min=3,
|
amine@297
|
201 init_max_silence=2,
|
amine@297
|
202 mode=0,
|
amine@297
|
203 )
|
amine@297
|
204
|
amine@297
|
205 data_source = StringDataSource(
|
amine@297
|
206 "aAaaaAaAaaAaAaaaaaaAAAAAAAAAaaaaaaaAAAAA"
|
amine@297
|
207 )
|
amine@334
|
208 # ^ ^ ^ ^ ^ ^
|
amine@334
|
209 # 5 16 19 31 35 39
|
amine@2
|
210 tokens = tokenizer.tokenize(data_source)
|
amine@297
|
211
|
amine@297
|
212 self.assertEqual(
|
amine@297
|
213 len(tokens),
|
amine@297
|
214 3,
|
amine@297
|
215 msg="wrong number of tokens, expected: 3, found: {0} ".format(
|
amine@297
|
216 len(tokens)
|
amine@297
|
217 ),
|
amine@297
|
218 )
|
amine@2
|
219 tok1, tok2, tok3 = tokens[0], tokens[1], tokens[2]
|
amine@297
|
220
|
amine@297
|
221 data = "".join(tok1[0])
|
amine@2
|
222 start = tok1[1]
|
amine@2
|
223 end = tok1[2]
|
amine@297
|
224 self.assertEqual(
|
amine@297
|
225 data,
|
amine@297
|
226 "AaAaaAaAaaaa",
|
amine@334
|
227 msg=(
|
amine@334
|
228 "wrong data for token 1, expected: 'AaAaaAaA', found: '{0}' "
|
amine@334
|
229 ).format(data),
|
amine@297
|
230 )
|
amine@297
|
231 self.assertEqual(
|
amine@297
|
232 start,
|
amine@297
|
233 5,
|
amine@334
|
234 msg=(
|
amine@334
|
235 "wrong start frame for token 1, expected: 5, found: {0} "
|
amine@334
|
236 ).format(start),
|
amine@297
|
237 )
|
amine@297
|
238 self.assertEqual(
|
amine@297
|
239 end,
|
amine@297
|
240 16,
|
amine@334
|
241 msg=(
|
amine@334
|
242 "wrong end frame for token 1, expected: 16, found: {0} "
|
amine@334
|
243 ).format(end),
|
amine@297
|
244 )
|
amine@297
|
245
|
amine@297
|
246 data = "".join(tok2[0])
|
amine@2
|
247 start = tok2[1]
|
amine@2
|
248 end = tok2[2]
|
amine@297
|
249 self.assertEqual(
|
amine@297
|
250 data,
|
amine@297
|
251 "AAAAAAAAAaaaa",
|
amine@334
|
252 msg=(
|
amine@334
|
253 "wrong data for token 2, expected: 'AAAAAAAAAaaaa', "
|
amine@334
|
254 "found: '{0}' "
|
amine@334
|
255 ).format(data),
|
amine@297
|
256 )
|
amine@297
|
257 self.assertEqual(
|
amine@297
|
258 start,
|
amine@297
|
259 19,
|
amine@334
|
260 msg=(
|
amine@334
|
261 "wrong start frame for token 2, expected: 19, found: {0} "
|
amine@334
|
262 ).format(start),
|
amine@297
|
263 )
|
amine@297
|
264 self.assertEqual(
|
amine@297
|
265 end,
|
amine@297
|
266 31,
|
amine@334
|
267 msg=(
|
amine@334
|
268 "wrong end frame for token 2, expected: 31, found: {0} "
|
amine@334
|
269 ).format(end),
|
amine@297
|
270 )
|
amine@297
|
271
|
amine@297
|
272 data = "".join(tok3[0])
|
amine@2
|
273 start = tok3[1]
|
amine@2
|
274 end = tok3[2]
|
amine@297
|
275 self.assertEqual(
|
amine@297
|
276 data,
|
amine@297
|
277 "AAAAA",
|
amine@334
|
278 msg=(
|
amine@334
|
279 "wrong data for token 3, expected: 'AAAAA', found: '{0}' "
|
amine@334
|
280 ).format(data),
|
amine@297
|
281 )
|
amine@297
|
282 self.assertEqual(
|
amine@297
|
283 start,
|
amine@297
|
284 35,
|
amine@334
|
285 msg=(
|
amine@334
|
286 "wrong start frame for token 2, expected: 35, found: {0} "
|
amine@334
|
287 ).format(start),
|
amine@297
|
288 )
|
amine@297
|
289 self.assertEqual(
|
amine@297
|
290 end,
|
amine@297
|
291 39,
|
amine@334
|
292 msg=(
|
amine@334
|
293 "wrong end frame for token 2, expected: 39, found: {0} "
|
amine@334
|
294 ).format(end),
|
amine@297
|
295 )
|
amine@297
|
296
|
amine@297
|
297
|
amine@2
|
298 class TestStreamTokenizerMinMaxLength(unittest.TestCase):
|
amine@2
|
299 def setUp(self):
|
amine@2
|
300 self.A_validator = AValidator()
|
amine@297
|
301
|
amine@2
|
302 def test_min_length_6_init_max_length_20(self):
|
amine@297
|
303
|
amine@297
|
304 tokenizer = StreamTokenizer(
|
amine@297
|
305 self.A_validator,
|
amine@297
|
306 min_length=6,
|
amine@297
|
307 max_length=20,
|
amine@297
|
308 max_continuous_silence=2,
|
amine@297
|
309 init_min=3,
|
amine@297
|
310 init_max_silence=3,
|
amine@297
|
311 mode=0,
|
amine@297
|
312 )
|
amine@297
|
313
|
amine@2
|
314 data_source = StringDataSource("aAaaaAaAaaAaAaaaaaAAAAAAAAAaaaaaAAAAA")
|
amine@5
|
315 # ^ ^ ^ ^
|
amine@5
|
316 # 1 14 18 28
|
amine@297
|
317
|
amine@2
|
318 tokens = tokenizer.tokenize(data_source)
|
amine@297
|
319
|
amine@297
|
320 self.assertEqual(
|
amine@297
|
321 len(tokens),
|
amine@297
|
322 2,
|
amine@297
|
323 msg="wrong number of tokens, expected: 2, found: {0} ".format(
|
amine@297
|
324 len(tokens)
|
amine@297
|
325 ),
|
amine@297
|
326 )
|
amine@2
|
327 tok1, tok2 = tokens[0], tokens[1]
|
amine@297
|
328
|
amine@297
|
329 data = "".join(tok1[0])
|
amine@2
|
330 start = tok1[1]
|
amine@2
|
331 end = tok1[2]
|
amine@297
|
332 self.assertEqual(
|
amine@297
|
333 data,
|
amine@297
|
334 "AaaaAaAaaAaAaa",
|
amine@334
|
335 msg=(
|
amine@334
|
336 "wrong data for token 1, expected: 'AaaaAaAaaAaAaa', "
|
amine@334
|
337 "found: '{0}' "
|
amine@334
|
338 ).format(data),
|
amine@297
|
339 )
|
amine@297
|
340 self.assertEqual(
|
amine@297
|
341 start,
|
amine@297
|
342 1,
|
amine@334
|
343 msg=(
|
amine@334
|
344 "wrong start frame for token 1, expected: 1, found: {0} "
|
amine@334
|
345 ).format(start),
|
amine@297
|
346 )
|
amine@297
|
347 self.assertEqual(
|
amine@297
|
348 end,
|
amine@297
|
349 14,
|
amine@334
|
350 msg=(
|
amine@334
|
351 "wrong end frame for token 1, expected: 14, found: {0} "
|
amine@334
|
352 ).format(end),
|
amine@297
|
353 )
|
amine@297
|
354
|
amine@297
|
355 data = "".join(tok2[0])
|
amine@2
|
356 start = tok2[1]
|
amine@2
|
357 end = tok2[2]
|
amine@297
|
358 self.assertEqual(
|
amine@297
|
359 data,
|
amine@297
|
360 "AAAAAAAAAaa",
|
amine@334
|
361 msg=(
|
amine@334
|
362 "wrong data for token 2, expected: 'AAAAAAAAAaa', "
|
amine@334
|
363 "found: '{0}' "
|
amine@334
|
364 ).format(data),
|
amine@297
|
365 )
|
amine@297
|
366 self.assertEqual(
|
amine@297
|
367 start,
|
amine@297
|
368 18,
|
amine@334
|
369 msg=(
|
amine@334
|
370 "wrong start frame for token 2, expected: 18, found: {0} "
|
amine@334
|
371 ).format(start),
|
amine@297
|
372 )
|
amine@297
|
373 self.assertEqual(
|
amine@297
|
374 end,
|
amine@297
|
375 28,
|
amine@334
|
376 msg=(
|
amine@334
|
377 "wrong end frame for token 2, expected: 28, found: {0} "
|
amine@334
|
378 ).format(end),
|
amine@297
|
379 )
|
amine@297
|
380
|
amine@2
|
381 def test_min_length_1_init_max_length_1(self):
|
amine@297
|
382
|
amine@297
|
383 tokenizer = StreamTokenizer(
|
amine@297
|
384 self.A_validator,
|
amine@297
|
385 min_length=1,
|
amine@297
|
386 max_length=1,
|
amine@297
|
387 max_continuous_silence=0,
|
amine@297
|
388 init_min=0,
|
amine@297
|
389 init_max_silence=0,
|
amine@297
|
390 mode=0,
|
amine@297
|
391 )
|
amine@297
|
392
|
amine@297
|
393 data_source = StringDataSource(
|
amine@297
|
394 "AAaaaAaaaAaAaaAaAaaaaaAAAAAAAAAaaaaaAAAAA"
|
amine@297
|
395 )
|
amine@297
|
396
|
amine@2
|
397 tokens = tokenizer.tokenize(data_source)
|
amine@297
|
398
|
amine@297
|
399 self.assertEqual(
|
amine@297
|
400 len(tokens),
|
amine@297
|
401 21,
|
amine@297
|
402 msg="wrong number of tokens, expected: 21, found: {0} ".format(
|
amine@297
|
403 len(tokens)
|
amine@297
|
404 ),
|
amine@297
|
405 )
|
amine@297
|
406
|
amine@2
|
407 def test_min_length_10_init_max_length_20(self):
|
amine@297
|
408
|
amine@297
|
409 tokenizer = StreamTokenizer(
|
amine@297
|
410 self.A_validator,
|
amine@297
|
411 min_length=10,
|
amine@297
|
412 max_length=20,
|
amine@297
|
413 max_continuous_silence=4,
|
amine@297
|
414 init_min=3,
|
amine@297
|
415 init_max_silence=3,
|
amine@297
|
416 mode=0,
|
amine@297
|
417 )
|
amine@297
|
418
|
amine@297
|
419 data_source = StringDataSource(
|
amine@297
|
420 "aAaaaAaAaaAaAaaaaaaAAAAAaaaaaaAAAAAaaAAaaAAA"
|
amine@297
|
421 )
|
amine@334
|
422 # ^ ^ ^ ^
|
amine@334
|
423 # 1 16 30 45
|
amine@297
|
424
|
amine@2
|
425 tokens = tokenizer.tokenize(data_source)
|
amine@297
|
426
|
amine@297
|
427 self.assertEqual(
|
amine@297
|
428 len(tokens),
|
amine@297
|
429 2,
|
amine@297
|
430 msg="wrong number of tokens, expected: 2, found: {0} ".format(
|
amine@297
|
431 len(tokens)
|
amine@297
|
432 ),
|
amine@297
|
433 )
|
amine@2
|
434 tok1, tok2 = tokens[0], tokens[1]
|
amine@297
|
435
|
amine@297
|
436 data = "".join(tok1[0])
|
amine@2
|
437 start = tok1[1]
|
amine@2
|
438 end = tok1[2]
|
amine@297
|
439 self.assertEqual(
|
amine@297
|
440 data,
|
amine@297
|
441 "AaaaAaAaaAaAaaaa",
|
amine@334
|
442 msg=(
|
amine@334
|
443 "wrong data for token 1, expected: 'AaaaAaAaaAaAaaaa', "
|
amine@334
|
444 "found: '{0}' "
|
amine@334
|
445 ).format(data),
|
amine@297
|
446 )
|
amine@297
|
447 self.assertEqual(
|
amine@297
|
448 start,
|
amine@297
|
449 1,
|
amine@334
|
450 msg=(
|
amine@334
|
451 "wrong start frame for token 1, expected: 1, found: {0} "
|
amine@334
|
452 ).format(start),
|
amine@297
|
453 )
|
amine@297
|
454 self.assertEqual(
|
amine@297
|
455 end,
|
amine@297
|
456 16,
|
amine@334
|
457 msg=(
|
amine@334
|
458 "wrong end frame for token 1, expected: 16, found: {0} "
|
amine@334
|
459 ).format(end),
|
amine@297
|
460 )
|
amine@297
|
461
|
amine@297
|
462 data = "".join(tok2[0])
|
amine@2
|
463 start = tok2[1]
|
amine@2
|
464 end = tok2[2]
|
amine@297
|
465 self.assertEqual(
|
amine@297
|
466 data,
|
amine@297
|
467 "AAAAAaaAAaaAAA",
|
amine@334
|
468 msg=(
|
amine@334
|
469 "wrong data for token 2, expected: 'AAAAAaaAAaaAAA', "
|
amine@334
|
470 "found: '{0}' "
|
amine@334
|
471 ).format(data),
|
amine@297
|
472 )
|
amine@297
|
473 self.assertEqual(
|
amine@297
|
474 start,
|
amine@297
|
475 30,
|
amine@334
|
476 msg=(
|
amine@334
|
477 "wrong start frame for token 2, expected: 30, found: {0} "
|
amine@334
|
478 ).format(start),
|
amine@297
|
479 )
|
amine@297
|
480 self.assertEqual(
|
amine@297
|
481 end,
|
amine@297
|
482 43,
|
amine@334
|
483 msg=(
|
amine@334
|
484 "wrong end frame for token 2, expected: 43, found: {0} "
|
amine@334
|
485 ).format(end),
|
amine@297
|
486 )
|
amine@297
|
487
|
amine@2
|
488 def test_min_length_4_init_max_length_5(self):
|
amine@297
|
489
|
amine@297
|
490 tokenizer = StreamTokenizer(
|
amine@297
|
491 self.A_validator,
|
amine@297
|
492 min_length=4,
|
amine@297
|
493 max_length=5,
|
amine@297
|
494 max_continuous_silence=4,
|
amine@297
|
495 init_min=3,
|
amine@297
|
496 init_max_silence=3,
|
amine@297
|
497 mode=0,
|
amine@297
|
498 )
|
amine@297
|
499
|
amine@297
|
500 data_source = StringDataSource(
|
amine@297
|
501 "aAaaaAaAaaAaAaaaaaAAAAAAAAaaaaaaAAAAAaaaaaAAaaAaa"
|
amine@297
|
502 )
|
amine@334
|
503 # ^ ^^ ^ ^ ^ ^ ^
|
amine@334
|
504 # 18 2223 27 32 36 42 46
|
amine@297
|
505
|
amine@2
|
506 tokens = tokenizer.tokenize(data_source)
|
amine@297
|
507
|
amine@297
|
508 self.assertEqual(
|
amine@297
|
509 len(tokens),
|
amine@297
|
510 4,
|
amine@297
|
511 msg="wrong number of tokens, expected: 4, found: {0} ".format(
|
amine@297
|
512 len(tokens)
|
amine@297
|
513 ),
|
amine@297
|
514 )
|
amine@2
|
515 tok1, tok2, tok3, tok4 = tokens[0], tokens[1], tokens[2], tokens[3]
|
amine@297
|
516
|
amine@297
|
517 data = "".join(tok1[0])
|
amine@2
|
518 start = tok1[1]
|
amine@2
|
519 end = tok1[2]
|
amine@297
|
520 self.assertEqual(
|
amine@297
|
521 data,
|
amine@297
|
522 "AAAAA",
|
amine@334
|
523 msg=(
|
amine@334
|
524 "wrong data for token 1, expected: 'AAAAA', found: '{0}' "
|
amine@334
|
525 ).format(data),
|
amine@297
|
526 )
|
amine@297
|
527 self.assertEqual(
|
amine@297
|
528 start,
|
amine@297
|
529 18,
|
amine@334
|
530 msg=(
|
amine@334
|
531 "wrong start frame for token 1, expected: 18, found: {0} "
|
amine@334
|
532 ).format(start),
|
amine@297
|
533 )
|
amine@297
|
534 self.assertEqual(
|
amine@297
|
535 end,
|
amine@297
|
536 22,
|
amine@334
|
537 msg=(
|
amine@334
|
538 "wrong end frame for token 1, expected: 22, found: {0} "
|
amine@334
|
539 ).format(end),
|
amine@297
|
540 )
|
amine@297
|
541
|
amine@297
|
542 data = "".join(tok2[0])
|
amine@2
|
543 start = tok2[1]
|
amine@2
|
544 end = tok2[2]
|
amine@297
|
545 self.assertEqual(
|
amine@297
|
546 data,
|
amine@297
|
547 "AAAaa",
|
amine@334
|
548 msg=(
|
amine@334
|
549 "wrong data for token 1, expected: 'AAAaa', found: '{0}' "
|
amine@334
|
550 ).format(data),
|
amine@297
|
551 )
|
amine@297
|
552 self.assertEqual(
|
amine@297
|
553 start,
|
amine@297
|
554 23,
|
amine@334
|
555 msg=(
|
amine@334
|
556 "wrong start frame for token 1, expected: 23, found: {0} "
|
amine@334
|
557 ).format(start),
|
amine@297
|
558 )
|
amine@297
|
559 self.assertEqual(
|
amine@297
|
560 end,
|
amine@297
|
561 27,
|
amine@334
|
562 msg=(
|
amine@334
|
563 "wrong end frame for token 1, expected: 27, found: {0} "
|
amine@334
|
564 ).format(end),
|
amine@297
|
565 )
|
amine@297
|
566
|
amine@297
|
567 data = "".join(tok3[0])
|
amine@2
|
568 start = tok3[1]
|
amine@2
|
569 end = tok3[2]
|
amine@297
|
570 self.assertEqual(
|
amine@297
|
571 data,
|
amine@297
|
572 "AAAAA",
|
amine@334
|
573 msg=(
|
amine@334
|
574 "wrong data for token 1, expected: 'AAAAA', found: '{0}' "
|
amine@334
|
575 ).format(data),
|
amine@297
|
576 )
|
amine@297
|
577 self.assertEqual(
|
amine@297
|
578 start,
|
amine@297
|
579 32,
|
amine@334
|
580 msg=(
|
amine@334
|
581 "wrong start frame for token 1, expected: 1, found: {0} "
|
amine@334
|
582 ).format(start),
|
amine@297
|
583 )
|
amine@297
|
584 self.assertEqual(
|
amine@297
|
585 end,
|
amine@297
|
586 36,
|
amine@334
|
587 msg=(
|
amine@334
|
588 "wrong end frame for token 1, expected: 7, found: {0} "
|
amine@334
|
589 ).format(end),
|
amine@297
|
590 )
|
amine@297
|
591
|
amine@297
|
592 data = "".join(tok4[0])
|
amine@2
|
593 start = tok4[1]
|
amine@2
|
594 end = tok4[2]
|
amine@297
|
595 self.assertEqual(
|
amine@297
|
596 data,
|
amine@297
|
597 "AAaaA",
|
amine@334
|
598 msg=(
|
amine@334
|
599 "wrong data for token 2, expected: 'AAaaA', found: '{0}' "
|
amine@334
|
600 ).format(data),
|
amine@297
|
601 )
|
amine@297
|
602 self.assertEqual(
|
amine@297
|
603 start,
|
amine@297
|
604 42,
|
amine@334
|
605 msg=(
|
amine@334
|
606 "wrong start frame for token 2, expected: 17, found: {0} "
|
amine@334
|
607 ).format(start),
|
amine@297
|
608 )
|
amine@297
|
609 self.assertEqual(
|
amine@297
|
610 end,
|
amine@297
|
611 46,
|
amine@334
|
612 msg=(
|
amine@334
|
613 "wrong end frame for token 2, expected: 22, found: {0} "
|
amine@334
|
614 ).format(end),
|
amine@297
|
615 )
|
amine@297
|
616
|
amine@297
|
617
|
amine@2
|
618 class TestStreamTokenizerMaxContinuousSilence(unittest.TestCase):
|
amine@2
|
619 def setUp(self):
|
amine@2
|
620 self.A_validator = AValidator()
|
amine@297
|
621
|
amine@2
|
622 def test_min_5_max_10_max_continuous_silence_0(self):
|
amine@2
|
623
|
amine@297
|
624 tokenizer = StreamTokenizer(
|
amine@297
|
625 self.A_validator,
|
amine@297
|
626 min_length=5,
|
amine@297
|
627 max_length=10,
|
amine@297
|
628 max_continuous_silence=0,
|
amine@297
|
629 init_min=3,
|
amine@297
|
630 init_max_silence=3,
|
amine@297
|
631 mode=0,
|
amine@297
|
632 )
|
amine@297
|
633
|
amine@2
|
634 data_source = StringDataSource("aaaAAAAAaAAAAAAaaAAAAAAAAAa")
|
amine@5
|
635 # ^ ^ ^ ^ ^ ^
|
amine@5
|
636 # 3 7 9 14 17 25
|
amine@297
|
637
|
amine@2
|
638 tokens = tokenizer.tokenize(data_source)
|
amine@297
|
639
|
amine@297
|
640 self.assertEqual(
|
amine@297
|
641 len(tokens),
|
amine@297
|
642 3,
|
amine@297
|
643 msg="wrong number of tokens, expected: 3, found: {0} ".format(
|
amine@297
|
644 len(tokens)
|
amine@297
|
645 ),
|
amine@297
|
646 )
|
amine@2
|
647 tok1, tok2, tok3 = tokens[0], tokens[1], tokens[2]
|
amine@297
|
648
|
amine@297
|
649 data = "".join(tok1[0])
|
amine@2
|
650 start = tok1[1]
|
amine@2
|
651 end = tok1[2]
|
amine@297
|
652 self.assertEqual(
|
amine@297
|
653 data,
|
amine@297
|
654 "AAAAA",
|
amine@334
|
655 msg=(
|
amine@334
|
656 "wrong data for token 1, expected: 'AAAAA', found: '{0}' "
|
amine@334
|
657 ).format(data),
|
amine@297
|
658 )
|
amine@297
|
659 self.assertEqual(
|
amine@297
|
660 start,
|
amine@297
|
661 3,
|
amine@334
|
662 msg=(
|
amine@334
|
663 "wrong start frame for token 1, expected: 3, found: {0} "
|
amine@334
|
664 ).format(start),
|
amine@297
|
665 )
|
amine@297
|
666 self.assertEqual(
|
amine@297
|
667 end,
|
amine@297
|
668 7,
|
amine@334
|
669 msg=(
|
amine@334
|
670 "wrong end frame for token 1, expected: 7, found: {0} "
|
amine@334
|
671 ).format(end),
|
amine@297
|
672 )
|
amine@297
|
673
|
amine@297
|
674 data = "".join(tok2[0])
|
amine@2
|
675 start = tok2[1]
|
amine@2
|
676 end = tok2[2]
|
amine@297
|
677 self.assertEqual(
|
amine@297
|
678 data,
|
amine@297
|
679 "AAAAAA",
|
amine@334
|
680 msg=(
|
amine@334
|
681 "wrong data for token 1, expected: 'AAAAAA', found: '{0}' "
|
amine@334
|
682 ).format(data),
|
amine@297
|
683 )
|
amine@297
|
684 self.assertEqual(
|
amine@297
|
685 start,
|
amine@297
|
686 9,
|
amine@334
|
687 msg=(
|
amine@334
|
688 "wrong start frame for token 1, expected: 9, found: {0} "
|
amine@334
|
689 ).format(start),
|
amine@297
|
690 )
|
amine@297
|
691 self.assertEqual(
|
amine@297
|
692 end,
|
amine@297
|
693 14,
|
amine@334
|
694 msg=(
|
amine@334
|
695 "wrong end frame for token 1, expected: 14, found: {0} "
|
amine@334
|
696 ).format(end),
|
amine@297
|
697 )
|
amine@297
|
698
|
amine@297
|
699 data = "".join(tok3[0])
|
amine@2
|
700 start = tok3[1]
|
amine@2
|
701 end = tok3[2]
|
amine@297
|
702 self.assertEqual(
|
amine@297
|
703 data,
|
amine@297
|
704 "AAAAAAAAA",
|
amine@334
|
705 msg=(
|
amine@334
|
706 "wrong data for token 1, expected: 'AAAAAAAAA', found: '{0}' "
|
amine@334
|
707 ).format(data),
|
amine@297
|
708 )
|
amine@297
|
709 self.assertEqual(
|
amine@297
|
710 start,
|
amine@297
|
711 17,
|
amine@334
|
712 msg=(
|
amine@334
|
713 "wrong start frame for token 1, expected: 17, found: {0} "
|
amine@334
|
714 ).format(start),
|
amine@297
|
715 )
|
amine@297
|
716 self.assertEqual(
|
amine@297
|
717 end,
|
amine@297
|
718 25,
|
amine@334
|
719 msg=(
|
amine@334
|
720 "wrong end frame for token 1, expected: 25, found: {0} "
|
amine@334
|
721 ).format(end),
|
amine@297
|
722 )
|
amine@297
|
723
|
amine@2
|
724 def test_min_5_max_10_max_continuous_silence_1(self):
|
amine@2
|
725
|
amine@297
|
726 tokenizer = StreamTokenizer(
|
amine@297
|
727 self.A_validator,
|
amine@297
|
728 min_length=5,
|
amine@297
|
729 max_length=10,
|
amine@297
|
730 max_continuous_silence=1,
|
amine@297
|
731 init_min=3,
|
amine@297
|
732 init_max_silence=3,
|
amine@297
|
733 mode=0,
|
amine@297
|
734 )
|
amine@297
|
735
|
amine@2
|
736 data_source = StringDataSource("aaaAAAAAaAAAAAAaaAAAAAAAAAa")
|
amine@5
|
737 # ^ ^^ ^ ^ ^
|
amine@5
|
738 # 3 12131517 26
|
amine@5
|
739 # (12 13 15 17)
|
amine@297
|
740
|
amine@2
|
741 tokens = tokenizer.tokenize(data_source)
|
amine@297
|
742
|
amine@297
|
743 self.assertEqual(
|
amine@297
|
744 len(tokens),
|
amine@297
|
745 3,
|
amine@297
|
746 msg="wrong number of tokens, expected: 3, found: {0} ".format(
|
amine@297
|
747 len(tokens)
|
amine@297
|
748 ),
|
amine@297
|
749 )
|
amine@2
|
750 tok1, tok2, tok3 = tokens[0], tokens[1], tokens[2]
|
amine@297
|
751
|
amine@297
|
752 data = "".join(tok1[0])
|
amine@2
|
753 start = tok1[1]
|
amine@2
|
754 end = tok1[2]
|
amine@297
|
755 self.assertEqual(
|
amine@297
|
756 data,
|
amine@297
|
757 "AAAAAaAAAA",
|
amine@334
|
758 msg=(
|
amine@334
|
759 "wrong data for token 1, expected: 'AAAAAaAAAA', "
|
amine@334
|
760 "found: '{0}' "
|
amine@334
|
761 ).format(data),
|
amine@297
|
762 )
|
amine@297
|
763 self.assertEqual(
|
amine@297
|
764 start,
|
amine@297
|
765 3,
|
amine@334
|
766 msg=(
|
amine@334
|
767 "wrong start frame for token 1, expected: 3, found: {0} "
|
amine@334
|
768 ).format(start),
|
amine@297
|
769 )
|
amine@297
|
770 self.assertEqual(
|
amine@297
|
771 end,
|
amine@297
|
772 12,
|
amine@334
|
773 msg=(
|
amine@334
|
774 "wrong end frame for token 1, expected: 10, found: {0} "
|
amine@334
|
775 ).format(end),
|
amine@297
|
776 )
|
amine@297
|
777
|
amine@297
|
778 data = "".join(tok2[0])
|
amine@2
|
779 start = tok2[1]
|
amine@2
|
780 end = tok2[2]
|
amine@297
|
781 self.assertEqual(
|
amine@297
|
782 data,
|
amine@297
|
783 "AAa",
|
amine@334
|
784 msg=(
|
amine@334
|
785 "wrong data for token 1, expected: 'AAa', found: '{0}' "
|
amine@334
|
786 ).format(data),
|
amine@297
|
787 )
|
amine@297
|
788 self.assertEqual(
|
amine@297
|
789 start,
|
amine@297
|
790 13,
|
amine@334
|
791 msg=(
|
amine@334
|
792 "wrong start frame for token 1, expected: 9, found: {0} "
|
amine@334
|
793 ).format(start),
|
amine@297
|
794 )
|
amine@297
|
795 self.assertEqual(
|
amine@297
|
796 end,
|
amine@297
|
797 15,
|
amine@334
|
798 msg=(
|
amine@334
|
799 "wrong end frame for token 1, expected: 14, found: {0} "
|
amine@334
|
800 ).format(end),
|
amine@297
|
801 )
|
amine@297
|
802
|
amine@297
|
803 data = "".join(tok3[0])
|
amine@2
|
804 start = tok3[1]
|
amine@2
|
805 end = tok3[2]
|
amine@297
|
806 self.assertEqual(
|
amine@297
|
807 data,
|
amine@297
|
808 "AAAAAAAAAa",
|
amine@334
|
809 msg=(
|
amine@334
|
810 "wrong data for token 1, expected: 'AAAAAAAAAa', "
|
amine@334
|
811 "found: '{0}' "
|
amine@334
|
812 ).format(data),
|
amine@297
|
813 )
|
amine@297
|
814 self.assertEqual(
|
amine@297
|
815 start,
|
amine@297
|
816 17,
|
amine@334
|
817 msg=(
|
amine@334
|
818 "wrong start frame for token 1, expected: 17, found: {0} "
|
amine@334
|
819 ).format(start),
|
amine@297
|
820 )
|
amine@297
|
821 self.assertEqual(
|
amine@297
|
822 end,
|
amine@297
|
823 26,
|
amine@334
|
824 msg=(
|
amine@334
|
825 "wrong end frame for token 1, expected: 26, found: {0} "
|
amine@334
|
826 ).format(end),
|
amine@297
|
827 )
|
amine@297
|
828
|
amine@297
|
829
|
amine@2
|
830 class TestStreamTokenizerModes(unittest.TestCase):
|
amine@2
|
831 def setUp(self):
|
amine@2
|
832 self.A_validator = AValidator()
|
amine@297
|
833
|
amine@2
|
834 def test_STRICT_MIN_LENGTH(self):
|
amine@297
|
835
|
amine@297
|
836 tokenizer = StreamTokenizer(
|
amine@297
|
837 self.A_validator,
|
amine@297
|
838 min_length=5,
|
amine@297
|
839 max_length=8,
|
amine@297
|
840 max_continuous_silence=3,
|
amine@297
|
841 init_min=3,
|
amine@297
|
842 init_max_silence=3,
|
amine@297
|
843 mode=StreamTokenizer.STRICT_MIN_LENGTH,
|
amine@297
|
844 )
|
amine@297
|
845
|
amine@2
|
846 data_source = StringDataSource("aaAAAAAAAAAAAA")
|
amine@5
|
847 # ^ ^
|
amine@5
|
848 # 2 9
|
amine@297
|
849
|
amine@2
|
850 tokens = tokenizer.tokenize(data_source)
|
amine@297
|
851
|
amine@297
|
852 self.assertEqual(
|
amine@297
|
853 len(tokens),
|
amine@297
|
854 1,
|
amine@297
|
855 msg="wrong number of tokens, expected: 1, found: {0} ".format(
|
amine@297
|
856 len(tokens)
|
amine@297
|
857 ),
|
amine@297
|
858 )
|
amine@2
|
859 tok1 = tokens[0]
|
amine@297
|
860
|
amine@297
|
861 data = "".join(tok1[0])
|
amine@2
|
862 start = tok1[1]
|
amine@2
|
863 end = tok1[2]
|
amine@297
|
864 self.assertEqual(
|
amine@297
|
865 data,
|
amine@297
|
866 "AAAAAAAA",
|
amine@334
|
867 msg=(
|
amine@334
|
868 "wrong data for token 1, expected: 'AAAAAAAA', found: '{0}' "
|
amine@334
|
869 ).format(data),
|
amine@297
|
870 )
|
amine@297
|
871 self.assertEqual(
|
amine@297
|
872 start,
|
amine@297
|
873 2,
|
amine@334
|
874 msg=(
|
amine@334
|
875 "wrong start frame for token 1, expected: 2, found: {0} "
|
amine@334
|
876 ).format(start),
|
amine@297
|
877 )
|
amine@297
|
878 self.assertEqual(
|
amine@297
|
879 end,
|
amine@297
|
880 9,
|
amine@334
|
881 msg=(
|
amine@334
|
882 "wrong end frame for token 1, expected: 9, found: {0} "
|
amine@334
|
883 ).format(end),
|
amine@297
|
884 )
|
amine@297
|
885
|
amine@3
|
886 def test_DROP_TAILING_SILENCE(self):
|
amine@297
|
887
|
amine@297
|
888 tokenizer = StreamTokenizer(
|
amine@297
|
889 self.A_validator,
|
amine@297
|
890 min_length=5,
|
amine@297
|
891 max_length=10,
|
amine@297
|
892 max_continuous_silence=2,
|
amine@297
|
893 init_min=3,
|
amine@297
|
894 init_max_silence=3,
|
amine@297
|
895 mode=StreamTokenizer.DROP_TRAILING_SILENCE,
|
amine@297
|
896 )
|
amine@297
|
897
|
amine@2
|
898 data_source = StringDataSource("aaAAAAAaaaaa")
|
amine@5
|
899 # ^ ^
|
amine@5
|
900 # 2 6
|
amine@297
|
901
|
amine@2
|
902 tokens = tokenizer.tokenize(data_source)
|
amine@297
|
903
|
amine@297
|
904 self.assertEqual(
|
amine@297
|
905 len(tokens),
|
amine@297
|
906 1,
|
amine@297
|
907 msg="wrong number of tokens, expected: 1, found: {0} ".format(
|
amine@297
|
908 len(tokens)
|
amine@297
|
909 ),
|
amine@297
|
910 )
|
amine@2
|
911 tok1 = tokens[0]
|
amine@297
|
912
|
amine@297
|
913 data = "".join(tok1[0])
|
amine@2
|
914 start = tok1[1]
|
amine@2
|
915 end = tok1[2]
|
amine@297
|
916 self.assertEqual(
|
amine@297
|
917 data,
|
amine@297
|
918 "AAAAA",
|
amine@334
|
919 msg=(
|
amine@334
|
920 "wrong data for token 1, expected: 'AAAAA', found: '{0}' "
|
amine@334
|
921 ).format(data),
|
amine@297
|
922 )
|
amine@297
|
923 self.assertEqual(
|
amine@297
|
924 start,
|
amine@297
|
925 2,
|
amine@334
|
926 msg=(
|
amine@334
|
927 "wrong start frame for token 1, expected: 2, found: {0} "
|
amine@334
|
928 ).format(start),
|
amine@297
|
929 )
|
amine@297
|
930 self.assertEqual(
|
amine@297
|
931 end,
|
amine@297
|
932 6,
|
amine@334
|
933 msg=(
|
amine@334
|
934 "wrong end frame for token 1, expected: 6, found: {0} "
|
amine@334
|
935 ).format(end),
|
amine@297
|
936 )
|
amine@297
|
937
|
amine@3
|
938 def test_STRICT_MIN_LENGTH_and_DROP_TAILING_SILENCE(self):
|
amine@297
|
939
|
amine@297
|
940 tokenizer = StreamTokenizer(
|
amine@297
|
941 self.A_validator,
|
amine@297
|
942 min_length=5,
|
amine@297
|
943 max_length=8,
|
amine@297
|
944 max_continuous_silence=3,
|
amine@297
|
945 init_min=3,
|
amine@297
|
946 init_max_silence=3,
|
amine@297
|
947 mode=StreamTokenizer.STRICT_MIN_LENGTH
|
amine@297
|
948 | StreamTokenizer.DROP_TRAILING_SILENCE,
|
amine@297
|
949 )
|
amine@297
|
950
|
amine@2
|
951 data_source = StringDataSource("aaAAAAAAAAAAAAaa")
|
amine@5
|
952 # ^ ^
|
amine@5
|
953 # 2 8
|
amine@297
|
954
|
amine@2
|
955 tokens = tokenizer.tokenize(data_source)
|
amine@297
|
956
|
amine@297
|
957 self.assertEqual(
|
amine@297
|
958 len(tokens),
|
amine@297
|
959 1,
|
amine@297
|
960 msg="wrong number of tokens, expected: 1, found: {0} ".format(
|
amine@297
|
961 len(tokens)
|
amine@297
|
962 ),
|
amine@297
|
963 )
|
amine@2
|
964 tok1 = tokens[0]
|
amine@297
|
965
|
amine@297
|
966 data = "".join(tok1[0])
|
amine@2
|
967 start = tok1[1]
|
amine@2
|
968 end = tok1[2]
|
amine@297
|
969 self.assertEqual(
|
amine@297
|
970 data,
|
amine@297
|
971 "AAAAAAAA",
|
amine@334
|
972 msg=(
|
amine@334
|
973 "wrong data for token 1, expected: 'AAAAAAAA', found: '{0}' "
|
amine@334
|
974 ).format(data),
|
amine@297
|
975 )
|
amine@297
|
976 self.assertEqual(
|
amine@297
|
977 start,
|
amine@297
|
978 2,
|
amine@334
|
979 msg=(
|
amine@334
|
980 "wrong start frame for token 1, expected: 2, found: {0} "
|
amine@334
|
981 ).format(start),
|
amine@297
|
982 )
|
amine@297
|
983 self.assertEqual(
|
amine@297
|
984 end,
|
amine@297
|
985 9,
|
amine@334
|
986 msg=(
|
amine@334
|
987 "wrong end frame for token 1, expected: 9, found: {0} "
|
amine@334
|
988 ).format(end),
|
amine@297
|
989 )
|
amine@297
|
990
|
amine@297
|
991
|
amine@2
|
992 class TestStreamTokenizerCallback(unittest.TestCase):
|
amine@2
|
993 def setUp(self):
|
amine@2
|
994 self.A_validator = AValidator()
|
amine@297
|
995
|
amine@2
|
996 def test_callback(self):
|
amine@297
|
997
|
amine@2
|
998 tokens = []
|
amine@297
|
999
|
amine@2
|
1000 def callback(data, start, end):
|
amine@2
|
1001 tokens.append((data, start, end))
|
amine@297
|
1002
|
amine@297
|
1003 tokenizer = StreamTokenizer(
|
amine@297
|
1004 self.A_validator,
|
amine@297
|
1005 min_length=5,
|
amine@297
|
1006 max_length=8,
|
amine@297
|
1007 max_continuous_silence=3,
|
amine@297
|
1008 init_min=3,
|
amine@297
|
1009 init_max_silence=3,
|
amine@297
|
1010 mode=0,
|
amine@297
|
1011 )
|
amine@297
|
1012
|
amine@2
|
1013 data_source = StringDataSource("aaAAAAAAAAAAAAa")
|
amine@5
|
1014 # ^ ^^ ^
|
amine@5
|
1015 # 2 910 14
|
amine@297
|
1016
|
amine@2
|
1017 tokenizer.tokenize(data_source, callback=callback)
|
amine@297
|
1018
|
amine@297
|
1019 self.assertEqual(
|
amine@297
|
1020 len(tokens),
|
amine@297
|
1021 2,
|
amine@297
|
1022 msg="wrong number of tokens, expected: 1, found: {0} ".format(
|
amine@297
|
1023 len(tokens)
|
amine@297
|
1024 ),
|
amine@297
|
1025 )
|
amine@2
|
1026
|
amine@2
|
1027
|
amine@2
|
1028 if __name__ == "__main__":
|
amine@2
|
1029 unittest.main()
|