annotate vendor/theseer/tokenizer/src/Tokenizer.php @ 19:fa3358dc1485 tip

Add ndrum files
author Chris Cannam
date Wed, 28 Aug 2019 13:14:47 +0100
parents af1871eacc83
children
rev   line source
Chris@14 1 <?php declare(strict_types = 1);
Chris@14 2 namespace TheSeer\Tokenizer;
Chris@14 3
Chris@14 4 class Tokenizer {
Chris@14 5
Chris@14 6 /**
Chris@14 7 * Token Map for "non-tokens"
Chris@14 8 *
Chris@14 9 * @var array
Chris@14 10 */
Chris@14 11 private $map = [
Chris@14 12 '(' => 'T_OPEN_BRACKET',
Chris@14 13 ')' => 'T_CLOSE_BRACKET',
Chris@14 14 '[' => 'T_OPEN_SQUARE',
Chris@14 15 ']' => 'T_CLOSE_SQUARE',
Chris@14 16 '{' => 'T_OPEN_CURLY',
Chris@14 17 '}' => 'T_CLOSE_CURLY',
Chris@14 18 ';' => 'T_SEMICOLON',
Chris@14 19 '.' => 'T_DOT',
Chris@14 20 ',' => 'T_COMMA',
Chris@14 21 '=' => 'T_EQUAL',
Chris@14 22 '<' => 'T_LT',
Chris@14 23 '>' => 'T_GT',
Chris@14 24 '+' => 'T_PLUS',
Chris@14 25 '-' => 'T_MINUS',
Chris@14 26 '*' => 'T_MULT',
Chris@14 27 '/' => 'T_DIV',
Chris@14 28 '?' => 'T_QUESTION_MARK',
Chris@14 29 '!' => 'T_EXCLAMATION_MARK',
Chris@14 30 ':' => 'T_COLON',
Chris@14 31 '"' => 'T_DOUBLE_QUOTES',
Chris@14 32 '@' => 'T_AT',
Chris@14 33 '&' => 'T_AMPERSAND',
Chris@14 34 '%' => 'T_PERCENT',
Chris@14 35 '|' => 'T_PIPE',
Chris@14 36 '$' => 'T_DOLLAR',
Chris@14 37 '^' => 'T_CARET',
Chris@14 38 '~' => 'T_TILDE',
Chris@14 39 '`' => 'T_BACKTICK'
Chris@14 40 ];
Chris@14 41
Chris@14 42 public function parse(string $source): TokenCollection {
Chris@14 43 $result = new TokenCollection();
Chris@18 44
Chris@18 45 if ($source === '') {
Chris@18 46 return $result;
Chris@18 47 }
Chris@18 48
Chris@14 49 $tokens = token_get_all($source);
Chris@14 50
Chris@14 51 $lastToken = new Token(
Chris@14 52 $tokens[0][2],
Chris@14 53 'Placeholder',
Chris@14 54 ''
Chris@14 55 );
Chris@14 56
Chris@14 57 foreach ($tokens as $pos => $tok) {
Chris@14 58 if (is_string($tok)) {
Chris@14 59 $token = new Token(
Chris@14 60 $lastToken->getLine(),
Chris@14 61 $this->map[$tok],
Chris@14 62 $tok
Chris@14 63 );
Chris@14 64 $result->addToken($token);
Chris@14 65 $lastToken = $token;
Chris@14 66 continue;
Chris@14 67 }
Chris@14 68
Chris@14 69 $line = $tok[2];
Chris@14 70 $values = preg_split('/\R+/Uu', $tok[1]);
Chris@14 71
Chris@14 72 foreach ($values as $v) {
Chris@14 73 $token = new Token(
Chris@14 74 $line,
Chris@14 75 token_name($tok[0]),
Chris@14 76 $v
Chris@14 77 );
Chris@14 78 $result->addToken($token);
Chris@14 79 $line++;
Chris@14 80 $lastToken = $token;
Chris@14 81 }
Chris@14 82 }
Chris@14 83
Chris@14 84 return $result;
Chris@14 85 }
Chris@14 86
Chris@14 87 }