Mercurial > hg > isophonics-drupal-site
comparison vendor/doctrine/lexer/lib/Doctrine/Common/Lexer/AbstractLexer.php @ 0:4c8ae668cc8c
Initial import (non-working)
author | Chris Cannam |
---|---|
date | Wed, 29 Nov 2017 16:09:58 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4c8ae668cc8c |
---|---|
1 <?php | |
2 /* | |
3 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
4 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
5 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
6 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
7 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
8 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
9 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
10 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
11 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
12 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
13 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
14 * | |
15 * This software consists of voluntary contributions made by many individuals | |
16 * and is licensed under the MIT license. For more information, see | |
17 * <http://www.doctrine-project.org>. | |
18 */ | |
19 | |
20 namespace Doctrine\Common\Lexer; | |
21 | |
22 /** | |
23 * Base class for writing simple lexers, i.e. for creating small DSLs. | |
24 * | |
25 * @since 2.0 | |
26 * @author Guilherme Blanco <guilhermeblanco@hotmail.com> | |
27 * @author Jonathan Wage <jonwage@gmail.com> | |
28 * @author Roman Borschel <roman@code-factory.org> | |
29 */ | |
30 abstract class AbstractLexer | |
31 { | |
32 /** | |
33 * Lexer original input string. | |
34 * | |
35 * @var string | |
36 */ | |
37 private $input; | |
38 | |
39 /** | |
40 * Array of scanned tokens. | |
41 * | |
42 * Each token is an associative array containing three items: | |
43 * - 'value' : the string value of the token in the input string | |
44 * - 'type' : the type of the token (identifier, numeric, string, input | |
45 * parameter, none) | |
46 * - 'position' : the position of the token in the input string | |
47 * | |
48 * @var array | |
49 */ | |
50 private $tokens = array(); | |
51 | |
52 /** | |
53 * Current lexer position in input string. | |
54 * | |
55 * @var integer | |
56 */ | |
57 private $position = 0; | |
58 | |
59 /** | |
60 * Current peek of current lexer position. | |
61 * | |
62 * @var integer | |
63 */ | |
64 private $peek = 0; | |
65 | |
66 /** | |
67 * The next token in the input. | |
68 * | |
69 * @var array | |
70 */ | |
71 public $lookahead; | |
72 | |
73 /** | |
74 * The last matched/seen token. | |
75 * | |
76 * @var array | |
77 */ | |
78 public $token; | |
79 | |
80 /** | |
81 * Sets the input data to be tokenized. | |
82 * | |
83 * The Lexer is immediately reset and the new input tokenized. | |
84 * Any unprocessed tokens from any previous input are lost. | |
85 * | |
86 * @param string $input The input to be tokenized. | |
87 * | |
88 * @return void | |
89 */ | |
90 public function setInput($input) | |
91 { | |
92 $this->input = $input; | |
93 $this->tokens = array(); | |
94 | |
95 $this->reset(); | |
96 $this->scan($input); | |
97 } | |
98 | |
99 /** | |
100 * Resets the lexer. | |
101 * | |
102 * @return void | |
103 */ | |
104 public function reset() | |
105 { | |
106 $this->lookahead = null; | |
107 $this->token = null; | |
108 $this->peek = 0; | |
109 $this->position = 0; | |
110 } | |
111 | |
112 /** | |
113 * Resets the peek pointer to 0. | |
114 * | |
115 * @return void | |
116 */ | |
117 public function resetPeek() | |
118 { | |
119 $this->peek = 0; | |
120 } | |
121 | |
122 /** | |
123 * Resets the lexer position on the input to the given position. | |
124 * | |
125 * @param integer $position Position to place the lexical scanner. | |
126 * | |
127 * @return void | |
128 */ | |
129 public function resetPosition($position = 0) | |
130 { | |
131 $this->position = $position; | |
132 } | |
133 | |
134 /** | |
135 * Retrieve the original lexer's input until a given position. | |
136 * | |
137 * @param integer $position | |
138 * | |
139 * @return string | |
140 */ | |
141 public function getInputUntilPosition($position) | |
142 { | |
143 return substr($this->input, 0, $position); | |
144 } | |
145 | |
146 /** | |
147 * Checks whether a given token matches the current lookahead. | |
148 * | |
149 * @param integer|string $token | |
150 * | |
151 * @return boolean | |
152 */ | |
153 public function isNextToken($token) | |
154 { | |
155 return null !== $this->lookahead && $this->lookahead['type'] === $token; | |
156 } | |
157 | |
158 /** | |
159 * Checks whether any of the given tokens matches the current lookahead. | |
160 * | |
161 * @param array $tokens | |
162 * | |
163 * @return boolean | |
164 */ | |
165 public function isNextTokenAny(array $tokens) | |
166 { | |
167 return null !== $this->lookahead && in_array($this->lookahead['type'], $tokens, true); | |
168 } | |
169 | |
170 /** | |
171 * Moves to the next token in the input string. | |
172 * | |
173 * @return boolean | |
174 */ | |
175 public function moveNext() | |
176 { | |
177 $this->peek = 0; | |
178 $this->token = $this->lookahead; | |
179 $this->lookahead = (isset($this->tokens[$this->position])) | |
180 ? $this->tokens[$this->position++] : null; | |
181 | |
182 return $this->lookahead !== null; | |
183 } | |
184 | |
185 /** | |
186 * Tells the lexer to skip input tokens until it sees a token with the given value. | |
187 * | |
188 * @param string $type The token type to skip until. | |
189 * | |
190 * @return void | |
191 */ | |
192 public function skipUntil($type) | |
193 { | |
194 while ($this->lookahead !== null && $this->lookahead['type'] !== $type) { | |
195 $this->moveNext(); | |
196 } | |
197 } | |
198 | |
199 /** | |
200 * Checks if given value is identical to the given token. | |
201 * | |
202 * @param mixed $value | |
203 * @param integer $token | |
204 * | |
205 * @return boolean | |
206 */ | |
207 public function isA($value, $token) | |
208 { | |
209 return $this->getType($value) === $token; | |
210 } | |
211 | |
212 /** | |
213 * Moves the lookahead token forward. | |
214 * | |
215 * @return array|null The next token or NULL if there are no more tokens ahead. | |
216 */ | |
217 public function peek() | |
218 { | |
219 if (isset($this->tokens[$this->position + $this->peek])) { | |
220 return $this->tokens[$this->position + $this->peek++]; | |
221 } else { | |
222 return null; | |
223 } | |
224 } | |
225 | |
226 /** | |
227 * Peeks at the next token, returns it and immediately resets the peek. | |
228 * | |
229 * @return array|null The next token or NULL if there are no more tokens ahead. | |
230 */ | |
231 public function glimpse() | |
232 { | |
233 $peek = $this->peek(); | |
234 $this->peek = 0; | |
235 return $peek; | |
236 } | |
237 | |
238 /** | |
239 * Scans the input string for tokens. | |
240 * | |
241 * @param string $input A query string. | |
242 * | |
243 * @return void | |
244 */ | |
245 protected function scan($input) | |
246 { | |
247 static $regex; | |
248 | |
249 if ( ! isset($regex)) { | |
250 $regex = sprintf( | |
251 '/(%s)|%s/%s', | |
252 implode(')|(', $this->getCatchablePatterns()), | |
253 implode('|', $this->getNonCatchablePatterns()), | |
254 $this->getModifiers() | |
255 ); | |
256 } | |
257 | |
258 $flags = PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_OFFSET_CAPTURE; | |
259 $matches = preg_split($regex, $input, -1, $flags); | |
260 | |
261 foreach ($matches as $match) { | |
262 // Must remain before 'value' assignment since it can change content | |
263 $type = $this->getType($match[0]); | |
264 | |
265 $this->tokens[] = array( | |
266 'value' => $match[0], | |
267 'type' => $type, | |
268 'position' => $match[1], | |
269 ); | |
270 } | |
271 } | |
272 | |
273 /** | |
274 * Gets the literal for a given token. | |
275 * | |
276 * @param integer $token | |
277 * | |
278 * @return string | |
279 */ | |
280 public function getLiteral($token) | |
281 { | |
282 $className = get_class($this); | |
283 $reflClass = new \ReflectionClass($className); | |
284 $constants = $reflClass->getConstants(); | |
285 | |
286 foreach ($constants as $name => $value) { | |
287 if ($value === $token) { | |
288 return $className . '::' . $name; | |
289 } | |
290 } | |
291 | |
292 return $token; | |
293 } | |
294 | |
295 /** | |
296 * Regex modifiers | |
297 * | |
298 * @return string | |
299 */ | |
300 protected function getModifiers() | |
301 { | |
302 return 'i'; | |
303 } | |
304 | |
305 /** | |
306 * Lexical catchable patterns. | |
307 * | |
308 * @return array | |
309 */ | |
310 abstract protected function getCatchablePatterns(); | |
311 | |
312 /** | |
313 * Lexical non-catchable patterns. | |
314 * | |
315 * @return array | |
316 */ | |
317 abstract protected function getNonCatchablePatterns(); | |
318 | |
319 /** | |
320 * Retrieve token type. Also processes the token value if necessary. | |
321 * | |
322 * @param string $value | |
323 * | |
324 * @return integer | |
325 */ | |
326 abstract protected function getType(&$value); | |
327 } |