Chris@14
|
1 <?php declare(strict_types = 1);
|
Chris@14
|
2 namespace TheSeer\Tokenizer;
|
Chris@14
|
3
|
Chris@14
|
4 class Tokenizer {
|
Chris@14
|
5
|
Chris@14
|
6 /**
|
Chris@14
|
7 * Token Map for "non-tokens"
|
Chris@14
|
8 *
|
Chris@14
|
9 * @var array
|
Chris@14
|
10 */
|
Chris@14
|
11 private $map = [
|
Chris@14
|
12 '(' => 'T_OPEN_BRACKET',
|
Chris@14
|
13 ')' => 'T_CLOSE_BRACKET',
|
Chris@14
|
14 '[' => 'T_OPEN_SQUARE',
|
Chris@14
|
15 ']' => 'T_CLOSE_SQUARE',
|
Chris@14
|
16 '{' => 'T_OPEN_CURLY',
|
Chris@14
|
17 '}' => 'T_CLOSE_CURLY',
|
Chris@14
|
18 ';' => 'T_SEMICOLON',
|
Chris@14
|
19 '.' => 'T_DOT',
|
Chris@14
|
20 ',' => 'T_COMMA',
|
Chris@14
|
21 '=' => 'T_EQUAL',
|
Chris@14
|
22 '<' => 'T_LT',
|
Chris@14
|
23 '>' => 'T_GT',
|
Chris@14
|
24 '+' => 'T_PLUS',
|
Chris@14
|
25 '-' => 'T_MINUS',
|
Chris@14
|
26 '*' => 'T_MULT',
|
Chris@14
|
27 '/' => 'T_DIV',
|
Chris@14
|
28 '?' => 'T_QUESTION_MARK',
|
Chris@14
|
29 '!' => 'T_EXCLAMATION_MARK',
|
Chris@14
|
30 ':' => 'T_COLON',
|
Chris@14
|
31 '"' => 'T_DOUBLE_QUOTES',
|
Chris@14
|
32 '@' => 'T_AT',
|
Chris@14
|
33 '&' => 'T_AMPERSAND',
|
Chris@14
|
34 '%' => 'T_PERCENT',
|
Chris@14
|
35 '|' => 'T_PIPE',
|
Chris@14
|
36 '$' => 'T_DOLLAR',
|
Chris@14
|
37 '^' => 'T_CARET',
|
Chris@14
|
38 '~' => 'T_TILDE',
|
Chris@14
|
39 '`' => 'T_BACKTICK'
|
Chris@14
|
40 ];
|
Chris@14
|
41
|
Chris@14
|
42 public function parse(string $source): TokenCollection {
|
Chris@14
|
43 $result = new TokenCollection();
|
Chris@14
|
44 $tokens = token_get_all($source);
|
Chris@14
|
45
|
Chris@14
|
46 $lastToken = new Token(
|
Chris@14
|
47 $tokens[0][2],
|
Chris@14
|
48 'Placeholder',
|
Chris@14
|
49 ''
|
Chris@14
|
50 );
|
Chris@14
|
51
|
Chris@14
|
52 foreach ($tokens as $pos => $tok) {
|
Chris@14
|
53 if (is_string($tok)) {
|
Chris@14
|
54 $token = new Token(
|
Chris@14
|
55 $lastToken->getLine(),
|
Chris@14
|
56 $this->map[$tok],
|
Chris@14
|
57 $tok
|
Chris@14
|
58 );
|
Chris@14
|
59 $result->addToken($token);
|
Chris@14
|
60 $lastToken = $token;
|
Chris@14
|
61 continue;
|
Chris@14
|
62 }
|
Chris@14
|
63
|
Chris@14
|
64 $line = $tok[2];
|
Chris@14
|
65 $values = preg_split('/\R+/Uu', $tok[1]);
|
Chris@14
|
66
|
Chris@14
|
67 foreach ($values as $v) {
|
Chris@14
|
68 $token = new Token(
|
Chris@14
|
69 $line,
|
Chris@14
|
70 token_name($tok[0]),
|
Chris@14
|
71 $v
|
Chris@14
|
72 );
|
Chris@14
|
73 $result->addToken($token);
|
Chris@14
|
74 $line++;
|
Chris@14
|
75 $lastToken = $token;
|
Chris@14
|
76 }
|
Chris@14
|
77 }
|
Chris@14
|
78
|
Chris@14
|
79 return $result;
|
Chris@14
|
80 }
|
Chris@14
|
81
|
Chris@14
|
82 }
|