Chris@0: #! /usr/bin/env python3 Chris@0: import sys Chris@0: from os.path import abspath, expanduser, dirname, join Chris@0: from itertools import chain Chris@0: import json Chris@0: import argparse Chris@0: Chris@0: from vis import vis, unvis, VIS_WHITE Chris@0: Chris@0: Chris@0: __dir__ = dirname(abspath(__file__)) Chris@0: Chris@0: OUTPUT_FILE = join(__dir__, '..', 'fixtures', 'unvis_fixtures.json') Chris@0: Chris@0: # Add custom fixtures here Chris@0: CUSTOM_FIXTURES = [ Chris@0: # test long multibyte string Chris@0: ''.join(chr(cp) for cp in range(1024)), Chris@0: 'foo bar', Chris@0: 'foo\nbar', Chris@0: "$bar = 'baz';", Chris@0: r'$foo = "\x20\\x20\\\x20\\\\x20"', Chris@0: '$foo = function($bar) use($baz) {\n\treturn $baz->getFoo()\n};' Chris@0: ] Chris@0: Chris@0: RANGES = { Chris@0: # All valid codepoints in the BMP Chris@0: 'bmp': chain(range(0x0000, 0xD800), range(0xE000, 0xFFFF)), Chris@0: # Smaller set of pertinent? codepoints inside BMP Chris@0: # see: http://en.wikipedia.org/wiki/Plane_(Unicode)#Basic_Multilingual_Plane Chris@0: 'small': chain( Chris@0: # latin blocks Chris@0: range(0x0000, 0x0250), Chris@0: # Greek, Cyrillic Chris@0: range(0x0370, 0x0530), Chris@0: # Hebrew, Arabic Chris@0: range(0x590, 0x0700), Chris@0: # CJK radicals Chris@0: range(0x2E80, 0x2F00), Chris@0: # Hiragana, Katakana Chris@0: range(0x3040, 0x3100) Chris@0: ) Chris@0: } Chris@0: Chris@0: Chris@0: if __name__ == '__main__': Chris@0: Chris@0: argp = argparse.ArgumentParser( Chris@0: description='Generates test data for Psy\\Test\\Util\\StrTest') Chris@0: argp.add_argument('-f', '--format-output', action='store_true', Chris@0: help='Indent JSON output to ease debugging') Chris@0: argp.add_argument('-a', '--all', action='store_true', Chris@0: help="""Generates test data for all codepoints of the BMP. Chris@0: (same as --range=bmp). WARNING: You will need quite Chris@0: a lot of RAM to run the testsuite ! Chris@0: """) Chris@0: argp.add_argument('-r', '--range', Chris@0: help="""Choose the range of codepoints used to generate Chris@0: test data.""", Chris@0: choices=list(RANGES.keys()), Chris@0: default='small') Chris@0: argp.add_argument('-o', '--output-file', Chris@0: help="""Write test data to OUTPUT_FILE Chris@0: (defaults to PSYSH_DIR/test/fixtures)""") Chris@0: args = argp.parse_args() Chris@0: Chris@0: cp_range = RANGES['bmp'] if args.all else RANGES[args.range] Chris@0: indent = 2 if args.format_output else None Chris@0: if args.output_file: Chris@0: OUTPUT_FILE = abspath(expanduser(args.output_file)) Chris@0: Chris@0: fixtures = [] Chris@0: Chris@0: # use SMALL_RANGE by default, it should be enough. Chris@0: # use BMP_RANGE for a more complete smoke test Chris@0: for codepoint in cp_range: Chris@0: char = chr(codepoint) Chris@0: encoded = vis(char, VIS_WHITE) Chris@0: decoded = unvis(encoded) Chris@0: fixtures.append((encoded, decoded)) Chris@0: Chris@0: # Add our own custom fixtures at the end, Chris@0: # since they would fail anyway if one of the previous did. Chris@0: for fixture in CUSTOM_FIXTURES: Chris@0: encoded = vis(fixture, VIS_WHITE) Chris@0: decoded = unvis(encoded) Chris@0: fixtures.append((encoded, decoded)) Chris@0: Chris@0: with open(OUTPUT_FILE, 'w') as fp: Chris@0: # dump as json to avoid backslashin and quotin nightmare Chris@0: # between php and python Chris@0: json.dump(fixtures, fp, indent=indent) Chris@0: Chris@0: sys.exit(0)