Mercurial > hg > isophonics-drupal-site
annotate vendor/theseer/tokenizer/src/Tokenizer.php @ 19:fa3358dc1485 tip
Add ndrum files
author | Chris Cannam |
---|---|
date | Wed, 28 Aug 2019 13:14:47 +0100 |
parents | af1871eacc83 |
children |
rev | line source |
---|---|
Chris@14 | 1 <?php declare(strict_types = 1); |
Chris@14 | 2 namespace TheSeer\Tokenizer; |
Chris@14 | 3 |
Chris@14 | 4 class Tokenizer { |
Chris@14 | 5 |
Chris@14 | 6 /** |
Chris@14 | 7 * Token Map for "non-tokens" |
Chris@14 | 8 * |
Chris@14 | 9 * @var array |
Chris@14 | 10 */ |
Chris@14 | 11 private $map = [ |
Chris@14 | 12 '(' => 'T_OPEN_BRACKET', |
Chris@14 | 13 ')' => 'T_CLOSE_BRACKET', |
Chris@14 | 14 '[' => 'T_OPEN_SQUARE', |
Chris@14 | 15 ']' => 'T_CLOSE_SQUARE', |
Chris@14 | 16 '{' => 'T_OPEN_CURLY', |
Chris@14 | 17 '}' => 'T_CLOSE_CURLY', |
Chris@14 | 18 ';' => 'T_SEMICOLON', |
Chris@14 | 19 '.' => 'T_DOT', |
Chris@14 | 20 ',' => 'T_COMMA', |
Chris@14 | 21 '=' => 'T_EQUAL', |
Chris@14 | 22 '<' => 'T_LT', |
Chris@14 | 23 '>' => 'T_GT', |
Chris@14 | 24 '+' => 'T_PLUS', |
Chris@14 | 25 '-' => 'T_MINUS', |
Chris@14 | 26 '*' => 'T_MULT', |
Chris@14 | 27 '/' => 'T_DIV', |
Chris@14 | 28 '?' => 'T_QUESTION_MARK', |
Chris@14 | 29 '!' => 'T_EXCLAMATION_MARK', |
Chris@14 | 30 ':' => 'T_COLON', |
Chris@14 | 31 '"' => 'T_DOUBLE_QUOTES', |
Chris@14 | 32 '@' => 'T_AT', |
Chris@14 | 33 '&' => 'T_AMPERSAND', |
Chris@14 | 34 '%' => 'T_PERCENT', |
Chris@14 | 35 '|' => 'T_PIPE', |
Chris@14 | 36 '$' => 'T_DOLLAR', |
Chris@14 | 37 '^' => 'T_CARET', |
Chris@14 | 38 '~' => 'T_TILDE', |
Chris@14 | 39 '`' => 'T_BACKTICK' |
Chris@14 | 40 ]; |
Chris@14 | 41 |
Chris@14 | 42 public function parse(string $source): TokenCollection { |
Chris@14 | 43 $result = new TokenCollection(); |
Chris@18 | 44 |
Chris@18 | 45 if ($source === '') { |
Chris@18 | 46 return $result; |
Chris@18 | 47 } |
Chris@18 | 48 |
Chris@14 | 49 $tokens = token_get_all($source); |
Chris@14 | 50 |
Chris@14 | 51 $lastToken = new Token( |
Chris@14 | 52 $tokens[0][2], |
Chris@14 | 53 'Placeholder', |
Chris@14 | 54 '' |
Chris@14 | 55 ); |
Chris@14 | 56 |
Chris@14 | 57 foreach ($tokens as $pos => $tok) { |
Chris@14 | 58 if (is_string($tok)) { |
Chris@14 | 59 $token = new Token( |
Chris@14 | 60 $lastToken->getLine(), |
Chris@14 | 61 $this->map[$tok], |
Chris@14 | 62 $tok |
Chris@14 | 63 ); |
Chris@14 | 64 $result->addToken($token); |
Chris@14 | 65 $lastToken = $token; |
Chris@14 | 66 continue; |
Chris@14 | 67 } |
Chris@14 | 68 |
Chris@14 | 69 $line = $tok[2]; |
Chris@14 | 70 $values = preg_split('/\R+/Uu', $tok[1]); |
Chris@14 | 71 |
Chris@14 | 72 foreach ($values as $v) { |
Chris@14 | 73 $token = new Token( |
Chris@14 | 74 $line, |
Chris@14 | 75 token_name($tok[0]), |
Chris@14 | 76 $v |
Chris@14 | 77 ); |
Chris@14 | 78 $result->addToken($token); |
Chris@14 | 79 $line++; |
Chris@14 | 80 $lastToken = $token; |
Chris@14 | 81 } |
Chris@14 | 82 } |
Chris@14 | 83 |
Chris@14 | 84 return $result; |
Chris@14 | 85 } |
Chris@14 | 86 |
Chris@14 | 87 } |