comparison tests/test_StreamTokenizer.py @ 334:f7cbf707a34e

Refactor test_StreamTokenizer
author Amine Sehili <amine.sehili@gmail.com>
date Fri, 25 Oct 2019 20:56:12 +0100
parents 7259b1eb9329
children 9f17aa9a4018
comparison
equal deleted inserted replaced
333:6fc2d27bd2ef 334:f7cbf707a34e
55 start = tok1[1] 55 start = tok1[1]
56 end = tok1[2] 56 end = tok1[2]
57 self.assertEqual( 57 self.assertEqual(
58 data, 58 data,
59 "AaaaAaAaaAaAaaaa", 59 "AaaaAaAaaAaAaaaa",
60 msg="wrong data for token 1, expected: 'AaaaAaAaaAaAaaaa', found: {0} ".format( 60 msg=(
61 data 61 "wrong data for token 1, expected: 'AaaaAaAaaAaAaaaa', "
62 ), 62 "found: {0} "
63 ).format(data),
63 ) 64 )
64 self.assertEqual( 65 self.assertEqual(
65 start, 66 start,
66 1, 67 1,
67 msg="wrong start frame for token 1, expected: 1, found: {0} ".format( 68 msg=(
68 start 69 "wrong start frame for token 1, expected: 1, found: {0} "
69 ), 70 ).format(start),
70 ) 71 )
71 self.assertEqual( 72 self.assertEqual(
72 end, 73 end,
73 16, 74 16,
74 msg="wrong end frame for token 1, expected: 16, found: {0} ".format( 75 msg=(
75 end 76 "wrong end frame for token 1, expected: 16, found: {0} "
76 ), 77 ).format(end),
77 ) 78 )
78 79
79 data = "".join(tok2[0]) 80 data = "".join(tok2[0])
80 start = tok2[1] 81 start = tok2[1]
81 end = tok2[2] 82 end = tok2[2]
82 self.assertEqual( 83 self.assertEqual(
83 data, 84 data,
84 "AAAAAAAA", 85 "AAAAAAAA",
85 msg="wrong data for token 1, expected: 'AAAAAAAA', found: {0} ".format( 86 msg=(
86 data 87 "wrong data for token 1, expected: 'AAAAAAAA', found: {0} "
87 ), 88 ).format(data),
88 ) 89 )
89 self.assertEqual( 90 self.assertEqual(
90 start, 91 start,
91 20, 92 20,
92 msg="wrong start frame for token 2, expected: 20, found: {0} ".format( 93 msg=(
93 start 94 "wrong start frame for token 2, expected: 20, found: {0} "
94 ), 95 ).format(start),
95 ) 96 )
96 self.assertEqual( 97 self.assertEqual(
97 end, 98 end,
98 27, 99 27,
99 msg="wrong end frame for token 2, expected: 27, found: {0} ".format( 100 msg=(
100 end 101 "wrong end frame for token 2, expected: 27, found: {0} "
101 ), 102 ).format(end),
102 ) 103 )
103 104
104 # A valid token is considered as so iff the tokenizer encounters 105 # A valid token is considered as so iff the tokenizer encounters
105 # at least valid frames (init_min = 3) between witch there 106 # at least valid frames (init_min = 3) between witch there
106 # are at most 0 consecutive non valid frames (init_max_silence = 0) 107 # are at most 0 consecutive non valid frames (init_max_silence = 0)
119 ) 120 )
120 121
121 data_source = StringDataSource( 122 data_source = StringDataSource(
122 "aAaaaAaAaaAaAaaaaaAAAAAAAAAaaaaaaAAAAA" 123 "aAaaaAaAaaAaAaaaaaAAAAAAAAAaaaaaaAAAAA"
123 ) 124 )
124 # ^ ^ ^ ^ 125 # ^ ^ ^ ^
125 # 18 30 33 37 126 # 18 30 33 37
126 127
127 tokens = tokenizer.tokenize(data_source) 128 tokens = tokenizer.tokenize(data_source)
128 129
129 self.assertEqual( 130 self.assertEqual(
130 len(tokens), 131 len(tokens),
139 start = tok1[1] 140 start = tok1[1]
140 end = tok1[2] 141 end = tok1[2]
141 self.assertEqual( 142 self.assertEqual(
142 data, 143 data,
143 "AAAAAAAAAaaaa", 144 "AAAAAAAAAaaaa",
144 msg="wrong data for token 1, expected: 'AAAAAAAAAaaaa', found: '{0}' ".format( 145 msg=(
145 data 146 "wrong data for token 1, expected: 'AAAAAAAAAaaaa', "
146 ), 147 "found: '{0}' "
148 ).format(data),
147 ) 149 )
148 self.assertEqual( 150 self.assertEqual(
149 start, 151 start,
150 18, 152 18,
151 msg="wrong start frame for token 1, expected: 18, found: {0} ".format( 153 msg=(
152 start 154 "wrong start frame for token 1, expected: 18, found: {0} "
153 ), 155 ).format(start),
154 ) 156 )
155 self.assertEqual( 157 self.assertEqual(
156 end, 158 end,
157 30, 159 30,
158 msg="wrong end frame for token 1, expected: 30, found: {0} ".format( 160 msg=(
159 end 161 "wrong end frame for token 1, expected: 30, found: {0} "
160 ), 162 ).format(end),
161 ) 163 )
162 164
163 data = "".join(tok2[0]) 165 data = "".join(tok2[0])
164 start = tok2[1] 166 start = tok2[1]
165 end = tok2[2] 167 end = tok2[2]
166 self.assertEqual( 168 self.assertEqual(
167 data, 169 data,
168 "AAAAA", 170 "AAAAA",
169 msg="wrong data for token 1, expected: 'AAAAA', found: '{0}' ".format( 171 msg=(
170 data 172 "wrong data for token 1, expected: 'AAAAA', found: '{0}' "
171 ), 173 ).format(data),
172 ) 174 )
173 self.assertEqual( 175 self.assertEqual(
174 start, 176 start,
175 33, 177 33,
176 msg="wrong start frame for token 2, expected: 33, found: {0} ".format( 178 msg=(
177 start 179 "wrong start frame for token 2, expected: 33, found: {0} "
178 ), 180 ).format(start),
179 ) 181 )
180 self.assertEqual( 182 self.assertEqual(
181 end, 183 end,
182 37, 184 37,
183 msg="wrong end frame for token 2, expected: 37, found: {0} ".format( 185 msg=(
184 end 186 "wrong end frame for token 2, expected: 37, found: {0} "
185 ), 187 ).format(end),
186 ) 188 )
187 189
188 # A valid token is considered iff the tokenizer encounters 190 # A valid token is considered iff the tokenizer encounters
189 # at least valid frames (init_min = 3) between witch there 191 # at least valid frames (init_min = 3) between witch there
190 # are at most 2 consecutive non valid frames (init_max_silence = 2) 192 # are at most 2 consecutive non valid frames (init_max_silence = 2)
201 ) 203 )
202 204
203 data_source = StringDataSource( 205 data_source = StringDataSource(
204 "aAaaaAaAaaAaAaaaaaaAAAAAAAAAaaaaaaaAAAAA" 206 "aAaaaAaAaaAaAaaaaaaAAAAAAAAAaaaaaaaAAAAA"
205 ) 207 )
206 # ^ ^ ^ ^ ^ ^ 208 # ^ ^ ^ ^ ^ ^
207 # 5 16 19 31 35 39 209 # 5 16 19 31 35 39
208 tokens = tokenizer.tokenize(data_source) 210 tokens = tokenizer.tokenize(data_source)
209 211
210 self.assertEqual( 212 self.assertEqual(
211 len(tokens), 213 len(tokens),
212 3, 214 3,
220 start = tok1[1] 222 start = tok1[1]
221 end = tok1[2] 223 end = tok1[2]
222 self.assertEqual( 224 self.assertEqual(
223 data, 225 data,
224 "AaAaaAaAaaaa", 226 "AaAaaAaAaaaa",
225 msg="wrong data for token 1, expected: 'AaAaaAaA', found: '{0}' ".format( 227 msg=(
226 data 228 "wrong data for token 1, expected: 'AaAaaAaA', found: '{0}' "
227 ), 229 ).format(data),
228 ) 230 )
229 self.assertEqual( 231 self.assertEqual(
230 start, 232 start,
231 5, 233 5,
232 msg="wrong start frame for token 1, expected: 5, found: {0} ".format( 234 msg=(
233 start 235 "wrong start frame for token 1, expected: 5, found: {0} "
234 ), 236 ).format(start),
235 ) 237 )
236 self.assertEqual( 238 self.assertEqual(
237 end, 239 end,
238 16, 240 16,
239 msg="wrong end frame for token 1, expected: 16, found: {0} ".format( 241 msg=(
240 end 242 "wrong end frame for token 1, expected: 16, found: {0} "
241 ), 243 ).format(end),
242 ) 244 )
243 245
244 data = "".join(tok2[0]) 246 data = "".join(tok2[0])
245 start = tok2[1] 247 start = tok2[1]
246 end = tok2[2] 248 end = tok2[2]
247 self.assertEqual( 249 self.assertEqual(
248 data, 250 data,
249 "AAAAAAAAAaaaa", 251 "AAAAAAAAAaaaa",
250 msg="wrong data for token 2, expected: 'AAAAAAAAAaaaa', found: '{0}' ".format( 252 msg=(
251 data 253 "wrong data for token 2, expected: 'AAAAAAAAAaaaa', "
252 ), 254 "found: '{0}' "
255 ).format(data),
253 ) 256 )
254 self.assertEqual( 257 self.assertEqual(
255 start, 258 start,
256 19, 259 19,
257 msg="wrong start frame for token 2, expected: 19, found: {0} ".format( 260 msg=(
258 start 261 "wrong start frame for token 2, expected: 19, found: {0} "
259 ), 262 ).format(start),
260 ) 263 )
261 self.assertEqual( 264 self.assertEqual(
262 end, 265 end,
263 31, 266 31,
264 msg="wrong end frame for token 2, expected: 31, found: {0} ".format( 267 msg=(
265 end 268 "wrong end frame for token 2, expected: 31, found: {0} "
266 ), 269 ).format(end),
267 ) 270 )
268 271
269 data = "".join(tok3[0]) 272 data = "".join(tok3[0])
270 start = tok3[1] 273 start = tok3[1]
271 end = tok3[2] 274 end = tok3[2]
272 self.assertEqual( 275 self.assertEqual(
273 data, 276 data,
274 "AAAAA", 277 "AAAAA",
275 msg="wrong data for token 3, expected: 'AAAAA', found: '{0}' ".format( 278 msg=(
276 data 279 "wrong data for token 3, expected: 'AAAAA', found: '{0}' "
277 ), 280 ).format(data),
278 ) 281 )
279 self.assertEqual( 282 self.assertEqual(
280 start, 283 start,
281 35, 284 35,
282 msg="wrong start frame for token 2, expected: 35, found: {0} ".format( 285 msg=(
283 start 286 "wrong start frame for token 2, expected: 35, found: {0} "
284 ), 287 ).format(start),
285 ) 288 )
286 self.assertEqual( 289 self.assertEqual(
287 end, 290 end,
288 39, 291 39,
289 msg="wrong end frame for token 2, expected: 39, found: {0} ".format( 292 msg=(
290 end 293 "wrong end frame for token 2, expected: 39, found: {0} "
291 ), 294 ).format(end),
292 ) 295 )
293 296
294 297
295 class TestStreamTokenizerMinMaxLength(unittest.TestCase): 298 class TestStreamTokenizerMinMaxLength(unittest.TestCase):
296 def setUp(self): 299 def setUp(self):
327 start = tok1[1] 330 start = tok1[1]
328 end = tok1[2] 331 end = tok1[2]
329 self.assertEqual( 332 self.assertEqual(
330 data, 333 data,
331 "AaaaAaAaaAaAaa", 334 "AaaaAaAaaAaAaa",
332 msg="wrong data for token 1, expected: 'AaaaAaAaaAaAaa', found: '{0}' ".format( 335 msg=(
333 data 336 "wrong data for token 1, expected: 'AaaaAaAaaAaAaa', "
334 ), 337 "found: '{0}' "
338 ).format(data),
335 ) 339 )
336 self.assertEqual( 340 self.assertEqual(
337 start, 341 start,
338 1, 342 1,
339 msg="wrong start frame for token 1, expected: 1, found: {0} ".format( 343 msg=(
340 start 344 "wrong start frame for token 1, expected: 1, found: {0} "
341 ), 345 ).format(start),
342 ) 346 )
343 self.assertEqual( 347 self.assertEqual(
344 end, 348 end,
345 14, 349 14,
346 msg="wrong end frame for token 1, expected: 14, found: {0} ".format( 350 msg=(
347 end 351 "wrong end frame for token 1, expected: 14, found: {0} "
348 ), 352 ).format(end),
349 ) 353 )
350 354
351 data = "".join(tok2[0]) 355 data = "".join(tok2[0])
352 start = tok2[1] 356 start = tok2[1]
353 end = tok2[2] 357 end = tok2[2]
354 self.assertEqual( 358 self.assertEqual(
355 data, 359 data,
356 "AAAAAAAAAaa", 360 "AAAAAAAAAaa",
357 msg="wrong data for token 2, expected: 'AAAAAAAAAaa', found: '{0}' ".format( 361 msg=(
358 data 362 "wrong data for token 2, expected: 'AAAAAAAAAaa', "
359 ), 363 "found: '{0}' "
364 ).format(data),
360 ) 365 )
361 self.assertEqual( 366 self.assertEqual(
362 start, 367 start,
363 18, 368 18,
364 msg="wrong start frame for token 2, expected: 18, found: {0} ".format( 369 msg=(
365 start 370 "wrong start frame for token 2, expected: 18, found: {0} "
366 ), 371 ).format(start),
367 ) 372 )
368 self.assertEqual( 373 self.assertEqual(
369 end, 374 end,
370 28, 375 28,
371 msg="wrong end frame for token 2, expected: 28, found: {0} ".format( 376 msg=(
372 end 377 "wrong end frame for token 2, expected: 28, found: {0} "
373 ), 378 ).format(end),
374 ) 379 )
375 380
376 def test_min_length_1_init_max_length_1(self): 381 def test_min_length_1_init_max_length_1(self):
377 382
378 tokenizer = StreamTokenizer( 383 tokenizer = StreamTokenizer(
412 ) 417 )
413 418
414 data_source = StringDataSource( 419 data_source = StringDataSource(
415 "aAaaaAaAaaAaAaaaaaaAAAAAaaaaaaAAAAAaaAAaaAAA" 420 "aAaaaAaAaaAaAaaaaaaAAAAAaaaaaaAAAAAaaAAaaAAA"
416 ) 421 )
417 # ^ ^ ^ ^ 422 # ^ ^ ^ ^
418 # 1 16 30 45 423 # 1 16 30 45
419 424
420 tokens = tokenizer.tokenize(data_source) 425 tokens = tokenizer.tokenize(data_source)
421 426
422 self.assertEqual( 427 self.assertEqual(
423 len(tokens), 428 len(tokens),
432 start = tok1[1] 437 start = tok1[1]
433 end = tok1[2] 438 end = tok1[2]
434 self.assertEqual( 439 self.assertEqual(
435 data, 440 data,
436 "AaaaAaAaaAaAaaaa", 441 "AaaaAaAaaAaAaaaa",
437 msg="wrong data for token 1, expected: 'AaaaAaAaaAaAaaaa', found: '{0}' ".format( 442 msg=(
438 data 443 "wrong data for token 1, expected: 'AaaaAaAaaAaAaaaa', "
439 ), 444 "found: '{0}' "
445 ).format(data),
440 ) 446 )
441 self.assertEqual( 447 self.assertEqual(
442 start, 448 start,
443 1, 449 1,
444 msg="wrong start frame for token 1, expected: 1, found: {0} ".format( 450 msg=(
445 start 451 "wrong start frame for token 1, expected: 1, found: {0} "
446 ), 452 ).format(start),
447 ) 453 )
448 self.assertEqual( 454 self.assertEqual(
449 end, 455 end,
450 16, 456 16,
451 msg="wrong end frame for token 1, expected: 16, found: {0} ".format( 457 msg=(
452 end 458 "wrong end frame for token 1, expected: 16, found: {0} "
453 ), 459 ).format(end),
454 ) 460 )
455 461
456 data = "".join(tok2[0]) 462 data = "".join(tok2[0])
457 start = tok2[1] 463 start = tok2[1]
458 end = tok2[2] 464 end = tok2[2]
459 self.assertEqual( 465 self.assertEqual(
460 data, 466 data,
461 "AAAAAaaAAaaAAA", 467 "AAAAAaaAAaaAAA",
462 msg="wrong data for token 2, expected: 'AAAAAaaAAaaAAA', found: '{0}' ".format( 468 msg=(
463 data 469 "wrong data for token 2, expected: 'AAAAAaaAAaaAAA', "
464 ), 470 "found: '{0}' "
471 ).format(data),
465 ) 472 )
466 self.assertEqual( 473 self.assertEqual(
467 start, 474 start,
468 30, 475 30,
469 msg="wrong start frame for token 2, expected: 30, found: {0} ".format( 476 msg=(
470 start 477 "wrong start frame for token 2, expected: 30, found: {0} "
471 ), 478 ).format(start),
472 ) 479 )
473 self.assertEqual( 480 self.assertEqual(
474 end, 481 end,
475 43, 482 43,
476 msg="wrong end frame for token 2, expected: 43, found: {0} ".format( 483 msg=(
477 end 484 "wrong end frame for token 2, expected: 43, found: {0} "
478 ), 485 ).format(end),
479 ) 486 )
480 487
481 def test_min_length_4_init_max_length_5(self): 488 def test_min_length_4_init_max_length_5(self):
482 489
483 tokenizer = StreamTokenizer( 490 tokenizer = StreamTokenizer(
491 ) 498 )
492 499
493 data_source = StringDataSource( 500 data_source = StringDataSource(
494 "aAaaaAaAaaAaAaaaaaAAAAAAAAaaaaaaAAAAAaaaaaAAaaAaa" 501 "aAaaaAaAaaAaAaaaaaAAAAAAAAaaaaaaAAAAAaaaaaAAaaAaa"
495 ) 502 )
496 # ^ ^^ ^ ^ ^ ^ ^ 503 # ^ ^^ ^ ^ ^ ^ ^
497 # 18 2223 27 32 36 42 46 504 # 18 2223 27 32 36 42 46
498 505
499 tokens = tokenizer.tokenize(data_source) 506 tokens = tokenizer.tokenize(data_source)
500 507
501 self.assertEqual( 508 self.assertEqual(
502 len(tokens), 509 len(tokens),
511 start = tok1[1] 518 start = tok1[1]
512 end = tok1[2] 519 end = tok1[2]
513 self.assertEqual( 520 self.assertEqual(
514 data, 521 data,
515 "AAAAA", 522 "AAAAA",
516 msg="wrong data for token 1, expected: 'AAAAA', found: '{0}' ".format( 523 msg=(
517 data 524 "wrong data for token 1, expected: 'AAAAA', found: '{0}' "
518 ), 525 ).format(data),
519 ) 526 )
520 self.assertEqual( 527 self.assertEqual(
521 start, 528 start,
522 18, 529 18,
523 msg="wrong start frame for token 1, expected: 18, found: {0} ".format( 530 msg=(
524 start 531 "wrong start frame for token 1, expected: 18, found: {0} "
525 ), 532 ).format(start),
526 ) 533 )
527 self.assertEqual( 534 self.assertEqual(
528 end, 535 end,
529 22, 536 22,
530 msg="wrong end frame for token 1, expected: 22, found: {0} ".format( 537 msg=(
531 end 538 "wrong end frame for token 1, expected: 22, found: {0} "
532 ), 539 ).format(end),
533 ) 540 )
534 541
535 data = "".join(tok2[0]) 542 data = "".join(tok2[0])
536 start = tok2[1] 543 start = tok2[1]
537 end = tok2[2] 544 end = tok2[2]
538 self.assertEqual( 545 self.assertEqual(
539 data, 546 data,
540 "AAAaa", 547 "AAAaa",
541 msg="wrong data for token 1, expected: 'AAAaa', found: '{0}' ".format( 548 msg=(
542 data 549 "wrong data for token 1, expected: 'AAAaa', found: '{0}' "
543 ), 550 ).format(data),
544 ) 551 )
545 self.assertEqual( 552 self.assertEqual(
546 start, 553 start,
547 23, 554 23,
548 msg="wrong start frame for token 1, expected: 23, found: {0} ".format( 555 msg=(
549 start 556 "wrong start frame for token 1, expected: 23, found: {0} "
550 ), 557 ).format(start),
551 ) 558 )
552 self.assertEqual( 559 self.assertEqual(
553 end, 560 end,
554 27, 561 27,
555 msg="wrong end frame for token 1, expected: 27, found: {0} ".format( 562 msg=(
556 end 563 "wrong end frame for token 1, expected: 27, found: {0} "
557 ), 564 ).format(end),
558 ) 565 )
559 566
560 data = "".join(tok3[0]) 567 data = "".join(tok3[0])
561 start = tok3[1] 568 start = tok3[1]
562 end = tok3[2] 569 end = tok3[2]
563 self.assertEqual( 570 self.assertEqual(
564 data, 571 data,
565 "AAAAA", 572 "AAAAA",
566 msg="wrong data for token 1, expected: 'AAAAA', found: '{0}' ".format( 573 msg=(
567 data 574 "wrong data for token 1, expected: 'AAAAA', found: '{0}' "
568 ), 575 ).format(data),
569 ) 576 )
570 self.assertEqual( 577 self.assertEqual(
571 start, 578 start,
572 32, 579 32,
573 msg="wrong start frame for token 1, expected: 1, found: {0} ".format( 580 msg=(
574 start 581 "wrong start frame for token 1, expected: 1, found: {0} "
575 ), 582 ).format(start),
576 ) 583 )
577 self.assertEqual( 584 self.assertEqual(
578 end, 585 end,
579 36, 586 36,
580 msg="wrong end frame for token 1, expected: 7, found: {0} ".format( 587 msg=(
581 end 588 "wrong end frame for token 1, expected: 7, found: {0} "
582 ), 589 ).format(end),
583 ) 590 )
584 591
585 data = "".join(tok4[0]) 592 data = "".join(tok4[0])
586 start = tok4[1] 593 start = tok4[1]
587 end = tok4[2] 594 end = tok4[2]
588 self.assertEqual( 595 self.assertEqual(
589 data, 596 data,
590 "AAaaA", 597 "AAaaA",
591 msg="wrong data for token 2, expected: 'AAaaA', found: '{0}' ".format( 598 msg=(
592 data 599 "wrong data for token 2, expected: 'AAaaA', found: '{0}' "
593 ), 600 ).format(data),
594 ) 601 )
595 self.assertEqual( 602 self.assertEqual(
596 start, 603 start,
597 42, 604 42,
598 msg="wrong start frame for token 2, expected: 17, found: {0} ".format( 605 msg=(
599 start 606 "wrong start frame for token 2, expected: 17, found: {0} "
600 ), 607 ).format(start),
601 ) 608 )
602 self.assertEqual( 609 self.assertEqual(
603 end, 610 end,
604 46, 611 46,
605 msg="wrong end frame for token 2, expected: 22, found: {0} ".format( 612 msg=(
606 end 613 "wrong end frame for token 2, expected: 22, found: {0} "
607 ), 614 ).format(end),
608 ) 615 )
609 616
610 617
611 class TestStreamTokenizerMaxContinuousSilence(unittest.TestCase): 618 class TestStreamTokenizerMaxContinuousSilence(unittest.TestCase):
612 def setUp(self): 619 def setUp(self):
643 start = tok1[1] 650 start = tok1[1]
644 end = tok1[2] 651 end = tok1[2]
645 self.assertEqual( 652 self.assertEqual(
646 data, 653 data,
647 "AAAAA", 654 "AAAAA",
648 msg="wrong data for token 1, expected: 'AAAAA', found: '{0}' ".format( 655 msg=(
649 data 656 "wrong data for token 1, expected: 'AAAAA', found: '{0}' "
650 ), 657 ).format(data),
651 ) 658 )
652 self.assertEqual( 659 self.assertEqual(
653 start, 660 start,
654 3, 661 3,
655 msg="wrong start frame for token 1, expected: 3, found: {0} ".format( 662 msg=(
656 start 663 "wrong start frame for token 1, expected: 3, found: {0} "
657 ), 664 ).format(start),
658 ) 665 )
659 self.assertEqual( 666 self.assertEqual(
660 end, 667 end,
661 7, 668 7,
662 msg="wrong end frame for token 1, expected: 7, found: {0} ".format( 669 msg=(
663 end 670 "wrong end frame for token 1, expected: 7, found: {0} "
664 ), 671 ).format(end),
665 ) 672 )
666 673
667 data = "".join(tok2[0]) 674 data = "".join(tok2[0])
668 start = tok2[1] 675 start = tok2[1]
669 end = tok2[2] 676 end = tok2[2]
670 self.assertEqual( 677 self.assertEqual(
671 data, 678 data,
672 "AAAAAA", 679 "AAAAAA",
673 msg="wrong data for token 1, expected: 'AAAAAA', found: '{0}' ".format( 680 msg=(
674 data 681 "wrong data for token 1, expected: 'AAAAAA', found: '{0}' "
675 ), 682 ).format(data),
676 ) 683 )
677 self.assertEqual( 684 self.assertEqual(
678 start, 685 start,
679 9, 686 9,
680 msg="wrong start frame for token 1, expected: 9, found: {0} ".format( 687 msg=(
681 start 688 "wrong start frame for token 1, expected: 9, found: {0} "
682 ), 689 ).format(start),
683 ) 690 )
684 self.assertEqual( 691 self.assertEqual(
685 end, 692 end,
686 14, 693 14,
687 msg="wrong end frame for token 1, expected: 14, found: {0} ".format( 694 msg=(
688 end 695 "wrong end frame for token 1, expected: 14, found: {0} "
689 ), 696 ).format(end),
690 ) 697 )
691 698
692 data = "".join(tok3[0]) 699 data = "".join(tok3[0])
693 start = tok3[1] 700 start = tok3[1]
694 end = tok3[2] 701 end = tok3[2]
695 self.assertEqual( 702 self.assertEqual(
696 data, 703 data,
697 "AAAAAAAAA", 704 "AAAAAAAAA",
698 msg="wrong data for token 1, expected: 'AAAAAAAAA', found: '{0}' ".format( 705 msg=(
699 data 706 "wrong data for token 1, expected: 'AAAAAAAAA', found: '{0}' "
700 ), 707 ).format(data),
701 ) 708 )
702 self.assertEqual( 709 self.assertEqual(
703 start, 710 start,
704 17, 711 17,
705 msg="wrong start frame for token 1, expected: 17, found: {0} ".format( 712 msg=(
706 start 713 "wrong start frame for token 1, expected: 17, found: {0} "
707 ), 714 ).format(start),
708 ) 715 )
709 self.assertEqual( 716 self.assertEqual(
710 end, 717 end,
711 25, 718 25,
712 msg="wrong end frame for token 1, expected: 25, found: {0} ".format( 719 msg=(
713 end 720 "wrong end frame for token 1, expected: 25, found: {0} "
714 ), 721 ).format(end),
715 ) 722 )
716 723
717 def test_min_5_max_10_max_continuous_silence_1(self): 724 def test_min_5_max_10_max_continuous_silence_1(self):
718 725
719 tokenizer = StreamTokenizer( 726 tokenizer = StreamTokenizer(
746 start = tok1[1] 753 start = tok1[1]
747 end = tok1[2] 754 end = tok1[2]
748 self.assertEqual( 755 self.assertEqual(
749 data, 756 data,
750 "AAAAAaAAAA", 757 "AAAAAaAAAA",
751 msg="wrong data for token 1, expected: 'AAAAAaAAAA', found: '{0}' ".format( 758 msg=(
752 data 759 "wrong data for token 1, expected: 'AAAAAaAAAA', "
753 ), 760 "found: '{0}' "
761 ).format(data),
754 ) 762 )
755 self.assertEqual( 763 self.assertEqual(
756 start, 764 start,
757 3, 765 3,
758 msg="wrong start frame for token 1, expected: 3, found: {0} ".format( 766 msg=(
759 start 767 "wrong start frame for token 1, expected: 3, found: {0} "
760 ), 768 ).format(start),
761 ) 769 )
762 self.assertEqual( 770 self.assertEqual(
763 end, 771 end,
764 12, 772 12,
765 msg="wrong end frame for token 1, expected: 10, found: {0} ".format( 773 msg=(
766 end 774 "wrong end frame for token 1, expected: 10, found: {0} "
767 ), 775 ).format(end),
768 ) 776 )
769 777
770 data = "".join(tok2[0]) 778 data = "".join(tok2[0])
771 start = tok2[1] 779 start = tok2[1]
772 end = tok2[2] 780 end = tok2[2]
773 self.assertEqual( 781 self.assertEqual(
774 data, 782 data,
775 "AAa", 783 "AAa",
776 msg="wrong data for token 1, expected: 'AAa', found: '{0}' ".format( 784 msg=(
777 data 785 "wrong data for token 1, expected: 'AAa', found: '{0}' "
778 ), 786 ).format(data),
779 ) 787 )
780 self.assertEqual( 788 self.assertEqual(
781 start, 789 start,
782 13, 790 13,
783 msg="wrong start frame for token 1, expected: 9, found: {0} ".format( 791 msg=(
784 start 792 "wrong start frame for token 1, expected: 9, found: {0} "
785 ), 793 ).format(start),
786 ) 794 )
787 self.assertEqual( 795 self.assertEqual(
788 end, 796 end,
789 15, 797 15,
790 msg="wrong end frame for token 1, expected: 14, found: {0} ".format( 798 msg=(
791 end 799 "wrong end frame for token 1, expected: 14, found: {0} "
792 ), 800 ).format(end),
793 ) 801 )
794 802
795 data = "".join(tok3[0]) 803 data = "".join(tok3[0])
796 start = tok3[1] 804 start = tok3[1]
797 end = tok3[2] 805 end = tok3[2]
798 self.assertEqual( 806 self.assertEqual(
799 data, 807 data,
800 "AAAAAAAAAa", 808 "AAAAAAAAAa",
801 msg="wrong data for token 1, expected: 'AAAAAAAAAa', found: '{0}' ".format( 809 msg=(
802 data 810 "wrong data for token 1, expected: 'AAAAAAAAAa', "
803 ), 811 "found: '{0}' "
812 ).format(data),
804 ) 813 )
805 self.assertEqual( 814 self.assertEqual(
806 start, 815 start,
807 17, 816 17,
808 msg="wrong start frame for token 1, expected: 17, found: {0} ".format( 817 msg=(
809 start 818 "wrong start frame for token 1, expected: 17, found: {0} "
810 ), 819 ).format(start),
811 ) 820 )
812 self.assertEqual( 821 self.assertEqual(
813 end, 822 end,
814 26, 823 26,
815 msg="wrong end frame for token 1, expected: 26, found: {0} ".format( 824 msg=(
816 end 825 "wrong end frame for token 1, expected: 26, found: {0} "
817 ), 826 ).format(end),
818 ) 827 )
819 828
820 829
821 class TestStreamTokenizerModes(unittest.TestCase): 830 class TestStreamTokenizerModes(unittest.TestCase):
822 def setUp(self): 831 def setUp(self):
853 start = tok1[1] 862 start = tok1[1]
854 end = tok1[2] 863 end = tok1[2]
855 self.assertEqual( 864 self.assertEqual(
856 data, 865 data,
857 "AAAAAAAA", 866 "AAAAAAAA",
858 msg="wrong data for token 1, expected: 'AAAAAAAA', found: '{0}' ".format( 867 msg=(
859 data 868 "wrong data for token 1, expected: 'AAAAAAAA', found: '{0}' "
860 ), 869 ).format(data),
861 ) 870 )
862 self.assertEqual( 871 self.assertEqual(
863 start, 872 start,
864 2, 873 2,
865 msg="wrong start frame for token 1, expected: 2, found: {0} ".format( 874 msg=(
866 start 875 "wrong start frame for token 1, expected: 2, found: {0} "
867 ), 876 ).format(start),
868 ) 877 )
869 self.assertEqual( 878 self.assertEqual(
870 end, 879 end,
871 9, 880 9,
872 msg="wrong end frame for token 1, expected: 9, found: {0} ".format( 881 msg=(
873 end 882 "wrong end frame for token 1, expected: 9, found: {0} "
874 ), 883 ).format(end),
875 ) 884 )
876 885
877 def test_DROP_TAILING_SILENCE(self): 886 def test_DROP_TAILING_SILENCE(self):
878 887
879 tokenizer = StreamTokenizer( 888 tokenizer = StreamTokenizer(
905 start = tok1[1] 914 start = tok1[1]
906 end = tok1[2] 915 end = tok1[2]
907 self.assertEqual( 916 self.assertEqual(
908 data, 917 data,
909 "AAAAA", 918 "AAAAA",
910 msg="wrong data for token 1, expected: 'AAAAA', found: '{0}' ".format( 919 msg=(
911 data 920 "wrong data for token 1, expected: 'AAAAA', found: '{0}' "
912 ), 921 ).format(data),
913 ) 922 )
914 self.assertEqual( 923 self.assertEqual(
915 start, 924 start,
916 2, 925 2,
917 msg="wrong start frame for token 1, expected: 2, found: {0} ".format( 926 msg=(
918 start 927 "wrong start frame for token 1, expected: 2, found: {0} "
919 ), 928 ).format(start),
920 ) 929 )
921 self.assertEqual( 930 self.assertEqual(
922 end, 931 end,
923 6, 932 6,
924 msg="wrong end frame for token 1, expected: 6, found: {0} ".format( 933 msg=(
925 end 934 "wrong end frame for token 1, expected: 6, found: {0} "
926 ), 935 ).format(end),
927 ) 936 )
928 937
929 def test_STRICT_MIN_LENGTH_and_DROP_TAILING_SILENCE(self): 938 def test_STRICT_MIN_LENGTH_and_DROP_TAILING_SILENCE(self):
930 939
931 tokenizer = StreamTokenizer( 940 tokenizer = StreamTokenizer(
958 start = tok1[1] 967 start = tok1[1]
959 end = tok1[2] 968 end = tok1[2]
960 self.assertEqual( 969 self.assertEqual(
961 data, 970 data,
962 "AAAAAAAA", 971 "AAAAAAAA",
963 msg="wrong data for token 1, expected: 'AAAAAAAA', found: '{0}' ".format( 972 msg=(
964 data 973 "wrong data for token 1, expected: 'AAAAAAAA', found: '{0}' "
965 ), 974 ).format(data),
966 ) 975 )
967 self.assertEqual( 976 self.assertEqual(
968 start, 977 start,
969 2, 978 2,
970 msg="wrong start frame for token 1, expected: 2, found: {0} ".format( 979 msg=(
971 start 980 "wrong start frame for token 1, expected: 2, found: {0} "
972 ), 981 ).format(start),
973 ) 982 )
974 self.assertEqual( 983 self.assertEqual(
975 end, 984 end,
976 9, 985 9,
977 msg="wrong end frame for token 1, expected: 9, found: {0} ".format( 986 msg=(
978 end 987 "wrong end frame for token 1, expected: 9, found: {0} "
979 ), 988 ).format(end),
980 ) 989 )
981 990
982 991
983 class TestStreamTokenizerCallback(unittest.TestCase): 992 class TestStreamTokenizerCallback(unittest.TestCase):
984 def setUp(self): 993 def setUp(self):