Chris@0: 'Php5', Chris@0: __DIR__ . '/php7.y' => 'Php7', Chris@0: ]; Chris@0: Chris@0: $tokensFile = __DIR__ . '/tokens.y'; Chris@0: $tokensTemplate = __DIR__ . '/tokens.template'; Chris@0: $skeletonFile = __DIR__ . '/parser.template'; Chris@0: $tmpGrammarFile = __DIR__ . '/tmp_parser.phpy'; Chris@0: $tmpResultFile = __DIR__ . '/tmp_parser.php'; Chris@0: $resultDir = __DIR__ . '/../lib/PhpParser/Parser'; Chris@0: $tokensResultsFile = $resultDir . '/Tokens.php'; Chris@0: Chris@0: // check for kmyacc.exe binary in this directory, otherwise fall back to global name Chris@0: $kmyacc = __DIR__ . '/kmyacc.exe'; Chris@0: if (!file_exists($kmyacc)) { Chris@0: $kmyacc = 'kmyacc'; Chris@0: } Chris@0: Chris@0: $options = array_flip($argv); Chris@0: $optionDebug = isset($options['--debug']); Chris@0: $optionKeepTmpGrammar = isset($options['--keep-tmp-grammar']); Chris@0: Chris@0: /////////////////////////////// Chris@0: /// Utility regex constants /// Chris@0: /////////////////////////////// Chris@0: Chris@0: const LIB = '(?(DEFINE) Chris@0: (?\'[^\\\\\']*+(?:\\\\.[^\\\\\']*+)*+\') Chris@0: (?"[^\\\\"]*+(?:\\\\.[^\\\\"]*+)*+") Chris@0: (?(?&singleQuotedString)|(?&doubleQuotedString)) Chris@0: (?/\*[^*]*+(?:\*(?!/)[^*]*+)*+\*/) Chris@0: (?\{[^\'"/{}]*+(?:(?:(?&string)|(?&comment)|(?&code)|/)[^\'"/{}]*+)*+}) Chris@0: )'; Chris@0: Chris@0: const PARAMS = '\[(?[^[\]]*+(?:\[(?¶ms)\][^[\]]*+)*+)\]'; Chris@0: const ARGS = '\((?[^()]*+(?:\((?&args)\)[^()]*+)*+)\)'; Chris@0: Chris@0: /////////////////// Chris@0: /// Main script /// Chris@0: /////////////////// Chris@0: Chris@0: $tokens = file_get_contents($tokensFile); Chris@0: Chris@0: foreach ($grammarFileToName as $grammarFile => $name) { Chris@0: echo "Building temporary $name grammar file.\n"; Chris@0: Chris@0: $grammarCode = file_get_contents($grammarFile); Chris@0: $grammarCode = str_replace('%tokens', $tokens, $grammarCode); Chris@0: Chris@0: $grammarCode = resolveNodes($grammarCode); Chris@0: $grammarCode = resolveMacros($grammarCode); Chris@0: $grammarCode = resolveStackAccess($grammarCode); Chris@0: Chris@0: file_put_contents($tmpGrammarFile, $grammarCode); Chris@0: Chris@0: $additionalArgs = $optionDebug ? '-t -v' : ''; Chris@0: Chris@0: echo "Building $name parser.\n"; Chris@0: $output = trim(shell_exec("$kmyacc $additionalArgs -l -m $skeletonFile -p $name $tmpGrammarFile 2>&1")); Chris@0: echo "Output: \"$output\"\n"; Chris@0: Chris@0: $resultCode = file_get_contents($tmpResultFile); Chris@0: $resultCode = removeTrailingWhitespace($resultCode); Chris@0: Chris@0: ensureDirExists($resultDir); Chris@0: file_put_contents("$resultDir/$name.php", $resultCode); Chris@0: unlink($tmpResultFile); Chris@0: Chris@0: echo "Building token definition.\n"; Chris@0: $output = trim(shell_exec("$kmyacc -l -m $tokensTemplate $tmpGrammarFile 2>&1")); Chris@0: assert($output === ''); Chris@0: rename($tmpResultFile, $tokensResultsFile); Chris@0: Chris@0: if (!$optionKeepTmpGrammar) { Chris@0: unlink($tmpGrammarFile); Chris@0: } Chris@0: } Chris@0: Chris@0: /////////////////////////////// Chris@0: /// Preprocessing functions /// Chris@0: /////////////////////////////// Chris@0: Chris@0: function resolveNodes($code) { Chris@0: return preg_replace_callback( Chris@0: '~\b(?[A-Z][a-zA-Z_\\\\]++)\s*' . PARAMS . '~', Chris@0: function($matches) { Chris@0: // recurse Chris@0: $matches['params'] = resolveNodes($matches['params']); Chris@0: Chris@0: $params = magicSplit( Chris@0: '(?:' . PARAMS . '|' . ARGS . ')(*SKIP)(*FAIL)|,', Chris@0: $matches['params'] Chris@0: ); Chris@0: Chris@0: $paramCode = ''; Chris@0: foreach ($params as $param) { Chris@0: $paramCode .= $param . ', '; Chris@0: } Chris@0: Chris@0: return 'new ' . $matches['name'] . '(' . $paramCode . 'attributes())'; Chris@0: }, Chris@0: $code Chris@0: ); Chris@0: } Chris@0: Chris@0: function resolveMacros($code) { Chris@0: return preg_replace_callback( Chris@0: '~\b(?)(?!array\()(?[a-z][A-Za-z]++)' . ARGS . '~', Chris@0: function($matches) { Chris@0: // recurse Chris@0: $matches['args'] = resolveMacros($matches['args']); Chris@0: Chris@0: $name = $matches['name']; Chris@0: $args = magicSplit( Chris@0: '(?:' . PARAMS . '|' . ARGS . ')(*SKIP)(*FAIL)|,', Chris@0: $matches['args'] Chris@0: ); Chris@0: Chris@0: if ('attributes' == $name) { Chris@0: assertArgs(0, $args, $name); Chris@0: return '$this->startAttributeStack[#1] + $this->endAttributes'; Chris@0: } Chris@0: Chris@0: if ('stackAttributes' == $name) { Chris@0: assertArgs(1, $args, $name); Chris@0: return '$this->startAttributeStack[' . $args[0] . ']' Chris@0: . ' + $this->endAttributeStack[' . $args[0] . ']'; Chris@0: } Chris@0: Chris@0: if ('init' == $name) { Chris@0: return '$$ = array(' . implode(', ', $args) . ')'; Chris@0: } Chris@0: Chris@0: if ('push' == $name) { Chris@0: assertArgs(2, $args, $name); Chris@0: Chris@0: return $args[0] . '[] = ' . $args[1] . '; $$ = ' . $args[0]; Chris@0: } Chris@0: Chris@0: if ('pushNormalizing' == $name) { Chris@0: assertArgs(2, $args, $name); Chris@0: Chris@0: return 'if (is_array(' . $args[1] . ')) { $$ = array_merge(' . $args[0] . ', ' . $args[1] . '); }' Chris@0: . ' else { ' . $args[0] . '[] = ' . $args[1] . '; $$ = ' . $args[0] . '; }'; Chris@0: } Chris@0: Chris@0: if ('toArray' == $name) { Chris@0: assertArgs(1, $args, $name); Chris@0: Chris@0: return 'is_array(' . $args[0] . ') ? ' . $args[0] . ' : array(' . $args[0] . ')'; Chris@0: } Chris@0: Chris@0: if ('parseVar' == $name) { Chris@0: assertArgs(1, $args, $name); Chris@0: Chris@0: return 'substr(' . $args[0] . ', 1)'; Chris@0: } Chris@0: Chris@0: if ('parseEncapsed' == $name) { Chris@0: assertArgs(3, $args, $name); Chris@0: Chris@0: return 'foreach (' . $args[0] . ' as $s) { if ($s instanceof Node\Scalar\EncapsedStringPart) {' Chris@0: . ' $s->value = Node\Scalar\String_::parseEscapeSequences($s->value, ' . $args[1] . ', ' . $args[2] . '); } }'; Chris@0: } Chris@0: Chris@0: if ('makeNop' == $name) { Chris@13: assertArgs(3, $args, $name); Chris@0: Chris@0: return '$startAttributes = ' . $args[1] . ';' Chris@0: . ' if (isset($startAttributes[\'comments\']))' Chris@13: . ' { ' . $args[0] . ' = new Stmt\Nop($startAttributes + ' . $args[2] . '); }' Chris@0: . ' else { ' . $args[0] . ' = null; }'; Chris@0: } Chris@0: Chris@0: if ('strKind' == $name) { Chris@0: assertArgs(1, $args, $name); Chris@0: Chris@0: return '(' . $args[0] . '[0] === "\'" || (' . $args[0] . '[1] === "\'" && ' Chris@0: . '(' . $args[0] . '[0] === \'b\' || ' . $args[0] . '[0] === \'B\')) ' Chris@0: . '? Scalar\String_::KIND_SINGLE_QUOTED : Scalar\String_::KIND_DOUBLE_QUOTED)'; Chris@0: } Chris@0: Chris@0: if ('prependLeadingComments' == $name) { Chris@0: assertArgs(1, $args, $name); Chris@0: Chris@0: return '$attrs = $this->startAttributeStack[#1]; $stmts = ' . $args[0] . '; ' Chris@0: . 'if (!empty($attrs[\'comments\'])) {' Chris@0: . '$stmts[0]->setAttribute(\'comments\', ' Chris@0: . 'array_merge($attrs[\'comments\'], $stmts[0]->getAttribute(\'comments\', []))); }'; Chris@0: } Chris@0: Chris@0: return $matches[0]; Chris@0: }, Chris@0: $code Chris@0: ); Chris@0: } Chris@0: Chris@0: function assertArgs($num, $args, $name) { Chris@0: if ($num != count($args)) { Chris@0: die('Wrong argument count for ' . $name . '().'); Chris@0: } Chris@0: } Chris@0: Chris@0: function resolveStackAccess($code) { Chris@0: $code = preg_replace('/\$\d+/', '$this->semStack[$0]', $code); Chris@0: $code = preg_replace('/#(\d+)/', '$$1', $code); Chris@0: return $code; Chris@0: } Chris@0: Chris@0: function removeTrailingWhitespace($code) { Chris@0: $lines = explode("\n", $code); Chris@0: $lines = array_map('rtrim', $lines); Chris@0: return implode("\n", $lines); Chris@0: } Chris@0: Chris@0: function ensureDirExists($dir) { Chris@0: if (!is_dir($dir)) { Chris@0: mkdir($dir, 0777, true); Chris@0: } Chris@0: } Chris@0: Chris@0: ////////////////////////////// Chris@0: /// Regex helper functions /// Chris@0: ////////////////////////////// Chris@0: Chris@0: function regex($regex) { Chris@0: return '~' . LIB . '(?:' . str_replace('~', '\~', $regex) . ')~'; Chris@0: } Chris@0: Chris@0: function magicSplit($regex, $string) { Chris@0: $pieces = preg_split(regex('(?:(?&string)|(?&comment)|(?&code))(*SKIP)(*FAIL)|' . $regex), $string); Chris@0: Chris@0: foreach ($pieces as &$piece) { Chris@0: $piece = trim($piece); Chris@0: } Chris@0: Chris@0: if ($pieces === ['']) { Chris@0: return []; Chris@0: } Chris@0: Chris@0: return $pieces; Chris@0: }