annotate vendor/psy/psysh/test/tools/gen_unvis_fixtures.py @ 19:fa3358dc1485 tip

Add ndrum files
author Chris Cannam
date Wed, 28 Aug 2019 13:14:47 +0100
parents 4c8ae668cc8c
children
rev   line source
Chris@0 1 #! /usr/bin/env python3
Chris@0 2 import sys
Chris@0 3 from os.path import abspath, expanduser, dirname, join
Chris@0 4 from itertools import chain
Chris@0 5 import json
Chris@0 6 import argparse
Chris@0 7
Chris@0 8 from vis import vis, unvis, VIS_WHITE
Chris@0 9
Chris@0 10
Chris@0 11 __dir__ = dirname(abspath(__file__))
Chris@0 12
Chris@0 13 OUTPUT_FILE = join(__dir__, '..', 'fixtures', 'unvis_fixtures.json')
Chris@0 14
Chris@0 15 # Add custom fixtures here
Chris@0 16 CUSTOM_FIXTURES = [
Chris@0 17 # test long multibyte string
Chris@0 18 ''.join(chr(cp) for cp in range(1024)),
Chris@0 19 'foo bar',
Chris@0 20 'foo\nbar',
Chris@0 21 "$bar = 'baz';",
Chris@0 22 r'$foo = "\x20\\x20\\\x20\\\\x20"',
Chris@0 23 '$foo = function($bar) use($baz) {\n\treturn $baz->getFoo()\n};'
Chris@0 24 ]
Chris@0 25
Chris@0 26 RANGES = {
Chris@0 27 # All valid codepoints in the BMP
Chris@0 28 'bmp': chain(range(0x0000, 0xD800), range(0xE000, 0xFFFF)),
Chris@0 29 # Smaller set of pertinent? codepoints inside BMP
Chris@0 30 # see: http://en.wikipedia.org/wiki/Plane_(Unicode)#Basic_Multilingual_Plane
Chris@0 31 'small': chain(
Chris@0 32 # latin blocks
Chris@0 33 range(0x0000, 0x0250),
Chris@0 34 # Greek, Cyrillic
Chris@0 35 range(0x0370, 0x0530),
Chris@0 36 # Hebrew, Arabic
Chris@0 37 range(0x590, 0x0700),
Chris@0 38 # CJK radicals
Chris@0 39 range(0x2E80, 0x2F00),
Chris@0 40 # Hiragana, Katakana
Chris@0 41 range(0x3040, 0x3100)
Chris@0 42 )
Chris@0 43 }
Chris@0 44
Chris@0 45
Chris@0 46 if __name__ == '__main__':
Chris@0 47
Chris@0 48 argp = argparse.ArgumentParser(
Chris@0 49 description='Generates test data for Psy\\Test\\Util\\StrTest')
Chris@0 50 argp.add_argument('-f', '--format-output', action='store_true',
Chris@0 51 help='Indent JSON output to ease debugging')
Chris@0 52 argp.add_argument('-a', '--all', action='store_true',
Chris@0 53 help="""Generates test data for all codepoints of the BMP.
Chris@0 54 (same as --range=bmp). WARNING: You will need quite
Chris@0 55 a lot of RAM to run the testsuite !
Chris@0 56 """)
Chris@0 57 argp.add_argument('-r', '--range',
Chris@0 58 help="""Choose the range of codepoints used to generate
Chris@0 59 test data.""",
Chris@0 60 choices=list(RANGES.keys()),
Chris@0 61 default='small')
Chris@0 62 argp.add_argument('-o', '--output-file',
Chris@0 63 help="""Write test data to OUTPUT_FILE
Chris@0 64 (defaults to PSYSH_DIR/test/fixtures)""")
Chris@0 65 args = argp.parse_args()
Chris@0 66
Chris@0 67 cp_range = RANGES['bmp'] if args.all else RANGES[args.range]
Chris@0 68 indent = 2 if args.format_output else None
Chris@0 69 if args.output_file:
Chris@0 70 OUTPUT_FILE = abspath(expanduser(args.output_file))
Chris@0 71
Chris@0 72 fixtures = []
Chris@0 73
Chris@0 74 # use SMALL_RANGE by default, it should be enough.
Chris@0 75 # use BMP_RANGE for a more complete smoke test
Chris@0 76 for codepoint in cp_range:
Chris@0 77 char = chr(codepoint)
Chris@0 78 encoded = vis(char, VIS_WHITE)
Chris@0 79 decoded = unvis(encoded)
Chris@0 80 fixtures.append((encoded, decoded))
Chris@0 81
Chris@0 82 # Add our own custom fixtures at the end,
Chris@0 83 # since they would fail anyway if one of the previous did.
Chris@0 84 for fixture in CUSTOM_FIXTURES:
Chris@0 85 encoded = vis(fixture, VIS_WHITE)
Chris@0 86 decoded = unvis(encoded)
Chris@0 87 fixtures.append((encoded, decoded))
Chris@0 88
Chris@0 89 with open(OUTPUT_FILE, 'w') as fp:
Chris@0 90 # dump as json to avoid backslashin and quotin nightmare
Chris@0 91 # between php and python
Chris@0 92 json.dump(fixtures, fp, indent=indent)
Chris@0 93
Chris@0 94 sys.exit(0)