Chris@0
|
1 #! /usr/bin/env python3
|
Chris@0
|
2 import sys
|
Chris@0
|
3 from os.path import abspath, expanduser, dirname, join
|
Chris@0
|
4 from itertools import chain
|
Chris@0
|
5 import json
|
Chris@0
|
6 import argparse
|
Chris@0
|
7
|
Chris@0
|
8 from vis import vis, unvis, VIS_WHITE
|
Chris@0
|
9
|
Chris@0
|
10
|
Chris@0
|
11 __dir__ = dirname(abspath(__file__))
|
Chris@0
|
12
|
Chris@0
|
13 OUTPUT_FILE = join(__dir__, '..', 'fixtures', 'unvis_fixtures.json')
|
Chris@0
|
14
|
Chris@0
|
15 # Add custom fixtures here
|
Chris@0
|
16 CUSTOM_FIXTURES = [
|
Chris@0
|
17 # test long multibyte string
|
Chris@0
|
18 ''.join(chr(cp) for cp in range(1024)),
|
Chris@0
|
19 'foo bar',
|
Chris@0
|
20 'foo\nbar',
|
Chris@0
|
21 "$bar = 'baz';",
|
Chris@0
|
22 r'$foo = "\x20\\x20\\\x20\\\\x20"',
|
Chris@0
|
23 '$foo = function($bar) use($baz) {\n\treturn $baz->getFoo()\n};'
|
Chris@0
|
24 ]
|
Chris@0
|
25
|
Chris@0
|
26 RANGES = {
|
Chris@0
|
27 # All valid codepoints in the BMP
|
Chris@0
|
28 'bmp': chain(range(0x0000, 0xD800), range(0xE000, 0xFFFF)),
|
Chris@0
|
29 # Smaller set of pertinent? codepoints inside BMP
|
Chris@0
|
30 # see: http://en.wikipedia.org/wiki/Plane_(Unicode)#Basic_Multilingual_Plane
|
Chris@0
|
31 'small': chain(
|
Chris@0
|
32 # latin blocks
|
Chris@0
|
33 range(0x0000, 0x0250),
|
Chris@0
|
34 # Greek, Cyrillic
|
Chris@0
|
35 range(0x0370, 0x0530),
|
Chris@0
|
36 # Hebrew, Arabic
|
Chris@0
|
37 range(0x590, 0x0700),
|
Chris@0
|
38 # CJK radicals
|
Chris@0
|
39 range(0x2E80, 0x2F00),
|
Chris@0
|
40 # Hiragana, Katakana
|
Chris@0
|
41 range(0x3040, 0x3100)
|
Chris@0
|
42 )
|
Chris@0
|
43 }
|
Chris@0
|
44
|
Chris@0
|
45
|
Chris@0
|
46 if __name__ == '__main__':
|
Chris@0
|
47
|
Chris@0
|
48 argp = argparse.ArgumentParser(
|
Chris@0
|
49 description='Generates test data for Psy\\Test\\Util\\StrTest')
|
Chris@0
|
50 argp.add_argument('-f', '--format-output', action='store_true',
|
Chris@0
|
51 help='Indent JSON output to ease debugging')
|
Chris@0
|
52 argp.add_argument('-a', '--all', action='store_true',
|
Chris@0
|
53 help="""Generates test data for all codepoints of the BMP.
|
Chris@0
|
54 (same as --range=bmp). WARNING: You will need quite
|
Chris@0
|
55 a lot of RAM to run the testsuite !
|
Chris@0
|
56 """)
|
Chris@0
|
57 argp.add_argument('-r', '--range',
|
Chris@0
|
58 help="""Choose the range of codepoints used to generate
|
Chris@0
|
59 test data.""",
|
Chris@0
|
60 choices=list(RANGES.keys()),
|
Chris@0
|
61 default='small')
|
Chris@0
|
62 argp.add_argument('-o', '--output-file',
|
Chris@0
|
63 help="""Write test data to OUTPUT_FILE
|
Chris@0
|
64 (defaults to PSYSH_DIR/test/fixtures)""")
|
Chris@0
|
65 args = argp.parse_args()
|
Chris@0
|
66
|
Chris@0
|
67 cp_range = RANGES['bmp'] if args.all else RANGES[args.range]
|
Chris@0
|
68 indent = 2 if args.format_output else None
|
Chris@0
|
69 if args.output_file:
|
Chris@0
|
70 OUTPUT_FILE = abspath(expanduser(args.output_file))
|
Chris@0
|
71
|
Chris@0
|
72 fixtures = []
|
Chris@0
|
73
|
Chris@0
|
74 # use SMALL_RANGE by default, it should be enough.
|
Chris@0
|
75 # use BMP_RANGE for a more complete smoke test
|
Chris@0
|
76 for codepoint in cp_range:
|
Chris@0
|
77 char = chr(codepoint)
|
Chris@0
|
78 encoded = vis(char, VIS_WHITE)
|
Chris@0
|
79 decoded = unvis(encoded)
|
Chris@0
|
80 fixtures.append((encoded, decoded))
|
Chris@0
|
81
|
Chris@0
|
82 # Add our own custom fixtures at the end,
|
Chris@0
|
83 # since they would fail anyway if one of the previous did.
|
Chris@0
|
84 for fixture in CUSTOM_FIXTURES:
|
Chris@0
|
85 encoded = vis(fixture, VIS_WHITE)
|
Chris@0
|
86 decoded = unvis(encoded)
|
Chris@0
|
87 fixtures.append((encoded, decoded))
|
Chris@0
|
88
|
Chris@0
|
89 with open(OUTPUT_FILE, 'w') as fp:
|
Chris@0
|
90 # dump as json to avoid backslashin and quotin nightmare
|
Chris@0
|
91 # between php and python
|
Chris@0
|
92 json.dump(fixtures, fp, indent=indent)
|
Chris@0
|
93
|
Chris@0
|
94 sys.exit(0)
|