Chris@0
|
1 <?php
|
Chris@0
|
2 /*
|
Chris@0
|
3 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
Chris@0
|
4 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
Chris@0
|
5 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
Chris@0
|
6 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
Chris@0
|
7 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
Chris@0
|
8 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
Chris@0
|
9 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
Chris@0
|
10 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
Chris@0
|
11 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
Chris@0
|
12 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
Chris@0
|
13 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
Chris@0
|
14 *
|
Chris@0
|
15 * This software consists of voluntary contributions made by many individuals
|
Chris@0
|
16 * and is licensed under the MIT license. For more information, see
|
Chris@0
|
17 * <http://www.doctrine-project.org>.
|
Chris@0
|
18 */
|
Chris@0
|
19
|
Chris@0
|
20 namespace Doctrine\Common\Lexer;
|
Chris@0
|
21
|
Chris@0
|
22 /**
|
Chris@0
|
23 * Base class for writing simple lexers, i.e. for creating small DSLs.
|
Chris@0
|
24 *
|
Chris@0
|
25 * @since 2.0
|
Chris@0
|
26 * @author Guilherme Blanco <guilhermeblanco@hotmail.com>
|
Chris@0
|
27 * @author Jonathan Wage <jonwage@gmail.com>
|
Chris@0
|
28 * @author Roman Borschel <roman@code-factory.org>
|
Chris@0
|
29 */
|
Chris@0
|
30 abstract class AbstractLexer
|
Chris@0
|
31 {
|
Chris@0
|
32 /**
|
Chris@0
|
33 * Lexer original input string.
|
Chris@0
|
34 *
|
Chris@0
|
35 * @var string
|
Chris@0
|
36 */
|
Chris@0
|
37 private $input;
|
Chris@0
|
38
|
Chris@0
|
39 /**
|
Chris@0
|
40 * Array of scanned tokens.
|
Chris@0
|
41 *
|
Chris@0
|
42 * Each token is an associative array containing three items:
|
Chris@0
|
43 * - 'value' : the string value of the token in the input string
|
Chris@0
|
44 * - 'type' : the type of the token (identifier, numeric, string, input
|
Chris@0
|
45 * parameter, none)
|
Chris@0
|
46 * - 'position' : the position of the token in the input string
|
Chris@0
|
47 *
|
Chris@0
|
48 * @var array
|
Chris@0
|
49 */
|
Chris@0
|
50 private $tokens = array();
|
Chris@0
|
51
|
Chris@0
|
52 /**
|
Chris@0
|
53 * Current lexer position in input string.
|
Chris@0
|
54 *
|
Chris@0
|
55 * @var integer
|
Chris@0
|
56 */
|
Chris@0
|
57 private $position = 0;
|
Chris@0
|
58
|
Chris@0
|
59 /**
|
Chris@0
|
60 * Current peek of current lexer position.
|
Chris@0
|
61 *
|
Chris@0
|
62 * @var integer
|
Chris@0
|
63 */
|
Chris@0
|
64 private $peek = 0;
|
Chris@0
|
65
|
Chris@0
|
66 /**
|
Chris@0
|
67 * The next token in the input.
|
Chris@0
|
68 *
|
Chris@0
|
69 * @var array
|
Chris@0
|
70 */
|
Chris@0
|
71 public $lookahead;
|
Chris@0
|
72
|
Chris@0
|
73 /**
|
Chris@0
|
74 * The last matched/seen token.
|
Chris@0
|
75 *
|
Chris@0
|
76 * @var array
|
Chris@0
|
77 */
|
Chris@0
|
78 public $token;
|
Chris@0
|
79
|
Chris@0
|
80 /**
|
Chris@0
|
81 * Sets the input data to be tokenized.
|
Chris@0
|
82 *
|
Chris@0
|
83 * The Lexer is immediately reset and the new input tokenized.
|
Chris@0
|
84 * Any unprocessed tokens from any previous input are lost.
|
Chris@0
|
85 *
|
Chris@0
|
86 * @param string $input The input to be tokenized.
|
Chris@0
|
87 *
|
Chris@0
|
88 * @return void
|
Chris@0
|
89 */
|
Chris@0
|
90 public function setInput($input)
|
Chris@0
|
91 {
|
Chris@0
|
92 $this->input = $input;
|
Chris@0
|
93 $this->tokens = array();
|
Chris@0
|
94
|
Chris@0
|
95 $this->reset();
|
Chris@0
|
96 $this->scan($input);
|
Chris@0
|
97 }
|
Chris@0
|
98
|
Chris@0
|
99 /**
|
Chris@0
|
100 * Resets the lexer.
|
Chris@0
|
101 *
|
Chris@0
|
102 * @return void
|
Chris@0
|
103 */
|
Chris@0
|
104 public function reset()
|
Chris@0
|
105 {
|
Chris@0
|
106 $this->lookahead = null;
|
Chris@0
|
107 $this->token = null;
|
Chris@0
|
108 $this->peek = 0;
|
Chris@0
|
109 $this->position = 0;
|
Chris@0
|
110 }
|
Chris@0
|
111
|
Chris@0
|
112 /**
|
Chris@0
|
113 * Resets the peek pointer to 0.
|
Chris@0
|
114 *
|
Chris@0
|
115 * @return void
|
Chris@0
|
116 */
|
Chris@0
|
117 public function resetPeek()
|
Chris@0
|
118 {
|
Chris@0
|
119 $this->peek = 0;
|
Chris@0
|
120 }
|
Chris@0
|
121
|
Chris@0
|
122 /**
|
Chris@0
|
123 * Resets the lexer position on the input to the given position.
|
Chris@0
|
124 *
|
Chris@0
|
125 * @param integer $position Position to place the lexical scanner.
|
Chris@0
|
126 *
|
Chris@0
|
127 * @return void
|
Chris@0
|
128 */
|
Chris@0
|
129 public function resetPosition($position = 0)
|
Chris@0
|
130 {
|
Chris@0
|
131 $this->position = $position;
|
Chris@0
|
132 }
|
Chris@0
|
133
|
Chris@0
|
134 /**
|
Chris@0
|
135 * Retrieve the original lexer's input until a given position.
|
Chris@0
|
136 *
|
Chris@0
|
137 * @param integer $position
|
Chris@0
|
138 *
|
Chris@0
|
139 * @return string
|
Chris@0
|
140 */
|
Chris@0
|
141 public function getInputUntilPosition($position)
|
Chris@0
|
142 {
|
Chris@0
|
143 return substr($this->input, 0, $position);
|
Chris@0
|
144 }
|
Chris@0
|
145
|
Chris@0
|
146 /**
|
Chris@0
|
147 * Checks whether a given token matches the current lookahead.
|
Chris@0
|
148 *
|
Chris@0
|
149 * @param integer|string $token
|
Chris@0
|
150 *
|
Chris@0
|
151 * @return boolean
|
Chris@0
|
152 */
|
Chris@0
|
153 public function isNextToken($token)
|
Chris@0
|
154 {
|
Chris@0
|
155 return null !== $this->lookahead && $this->lookahead['type'] === $token;
|
Chris@0
|
156 }
|
Chris@0
|
157
|
Chris@0
|
158 /**
|
Chris@0
|
159 * Checks whether any of the given tokens matches the current lookahead.
|
Chris@0
|
160 *
|
Chris@0
|
161 * @param array $tokens
|
Chris@0
|
162 *
|
Chris@0
|
163 * @return boolean
|
Chris@0
|
164 */
|
Chris@0
|
165 public function isNextTokenAny(array $tokens)
|
Chris@0
|
166 {
|
Chris@0
|
167 return null !== $this->lookahead && in_array($this->lookahead['type'], $tokens, true);
|
Chris@0
|
168 }
|
Chris@0
|
169
|
Chris@0
|
170 /**
|
Chris@0
|
171 * Moves to the next token in the input string.
|
Chris@0
|
172 *
|
Chris@0
|
173 * @return boolean
|
Chris@0
|
174 */
|
Chris@0
|
175 public function moveNext()
|
Chris@0
|
176 {
|
Chris@0
|
177 $this->peek = 0;
|
Chris@0
|
178 $this->token = $this->lookahead;
|
Chris@0
|
179 $this->lookahead = (isset($this->tokens[$this->position]))
|
Chris@0
|
180 ? $this->tokens[$this->position++] : null;
|
Chris@0
|
181
|
Chris@0
|
182 return $this->lookahead !== null;
|
Chris@0
|
183 }
|
Chris@0
|
184
|
Chris@0
|
185 /**
|
Chris@0
|
186 * Tells the lexer to skip input tokens until it sees a token with the given value.
|
Chris@0
|
187 *
|
Chris@0
|
188 * @param string $type The token type to skip until.
|
Chris@0
|
189 *
|
Chris@0
|
190 * @return void
|
Chris@0
|
191 */
|
Chris@0
|
192 public function skipUntil($type)
|
Chris@0
|
193 {
|
Chris@0
|
194 while ($this->lookahead !== null && $this->lookahead['type'] !== $type) {
|
Chris@0
|
195 $this->moveNext();
|
Chris@0
|
196 }
|
Chris@0
|
197 }
|
Chris@0
|
198
|
Chris@0
|
199 /**
|
Chris@0
|
200 * Checks if given value is identical to the given token.
|
Chris@0
|
201 *
|
Chris@0
|
202 * @param mixed $value
|
Chris@0
|
203 * @param integer $token
|
Chris@0
|
204 *
|
Chris@0
|
205 * @return boolean
|
Chris@0
|
206 */
|
Chris@0
|
207 public function isA($value, $token)
|
Chris@0
|
208 {
|
Chris@0
|
209 return $this->getType($value) === $token;
|
Chris@0
|
210 }
|
Chris@0
|
211
|
Chris@0
|
212 /**
|
Chris@0
|
213 * Moves the lookahead token forward.
|
Chris@0
|
214 *
|
Chris@0
|
215 * @return array|null The next token or NULL if there are no more tokens ahead.
|
Chris@0
|
216 */
|
Chris@0
|
217 public function peek()
|
Chris@0
|
218 {
|
Chris@0
|
219 if (isset($this->tokens[$this->position + $this->peek])) {
|
Chris@0
|
220 return $this->tokens[$this->position + $this->peek++];
|
Chris@0
|
221 } else {
|
Chris@0
|
222 return null;
|
Chris@0
|
223 }
|
Chris@0
|
224 }
|
Chris@0
|
225
|
Chris@0
|
226 /**
|
Chris@0
|
227 * Peeks at the next token, returns it and immediately resets the peek.
|
Chris@0
|
228 *
|
Chris@0
|
229 * @return array|null The next token or NULL if there are no more tokens ahead.
|
Chris@0
|
230 */
|
Chris@0
|
231 public function glimpse()
|
Chris@0
|
232 {
|
Chris@0
|
233 $peek = $this->peek();
|
Chris@0
|
234 $this->peek = 0;
|
Chris@0
|
235 return $peek;
|
Chris@0
|
236 }
|
Chris@0
|
237
|
Chris@0
|
238 /**
|
Chris@0
|
239 * Scans the input string for tokens.
|
Chris@0
|
240 *
|
Chris@0
|
241 * @param string $input A query string.
|
Chris@0
|
242 *
|
Chris@0
|
243 * @return void
|
Chris@0
|
244 */
|
Chris@0
|
245 protected function scan($input)
|
Chris@0
|
246 {
|
Chris@0
|
247 static $regex;
|
Chris@0
|
248
|
Chris@0
|
249 if ( ! isset($regex)) {
|
Chris@0
|
250 $regex = sprintf(
|
Chris@0
|
251 '/(%s)|%s/%s',
|
Chris@0
|
252 implode(')|(', $this->getCatchablePatterns()),
|
Chris@0
|
253 implode('|', $this->getNonCatchablePatterns()),
|
Chris@0
|
254 $this->getModifiers()
|
Chris@0
|
255 );
|
Chris@0
|
256 }
|
Chris@0
|
257
|
Chris@0
|
258 $flags = PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_OFFSET_CAPTURE;
|
Chris@0
|
259 $matches = preg_split($regex, $input, -1, $flags);
|
Chris@0
|
260
|
Chris@0
|
261 foreach ($matches as $match) {
|
Chris@0
|
262 // Must remain before 'value' assignment since it can change content
|
Chris@0
|
263 $type = $this->getType($match[0]);
|
Chris@0
|
264
|
Chris@0
|
265 $this->tokens[] = array(
|
Chris@0
|
266 'value' => $match[0],
|
Chris@0
|
267 'type' => $type,
|
Chris@0
|
268 'position' => $match[1],
|
Chris@0
|
269 );
|
Chris@0
|
270 }
|
Chris@0
|
271 }
|
Chris@0
|
272
|
Chris@0
|
273 /**
|
Chris@0
|
274 * Gets the literal for a given token.
|
Chris@0
|
275 *
|
Chris@0
|
276 * @param integer $token
|
Chris@0
|
277 *
|
Chris@0
|
278 * @return string
|
Chris@0
|
279 */
|
Chris@0
|
280 public function getLiteral($token)
|
Chris@0
|
281 {
|
Chris@0
|
282 $className = get_class($this);
|
Chris@0
|
283 $reflClass = new \ReflectionClass($className);
|
Chris@0
|
284 $constants = $reflClass->getConstants();
|
Chris@0
|
285
|
Chris@0
|
286 foreach ($constants as $name => $value) {
|
Chris@0
|
287 if ($value === $token) {
|
Chris@0
|
288 return $className . '::' . $name;
|
Chris@0
|
289 }
|
Chris@0
|
290 }
|
Chris@0
|
291
|
Chris@0
|
292 return $token;
|
Chris@0
|
293 }
|
Chris@0
|
294
|
Chris@0
|
295 /**
|
Chris@0
|
296 * Regex modifiers
|
Chris@0
|
297 *
|
Chris@0
|
298 * @return string
|
Chris@0
|
299 */
|
Chris@0
|
300 protected function getModifiers()
|
Chris@0
|
301 {
|
Chris@0
|
302 return 'i';
|
Chris@0
|
303 }
|
Chris@0
|
304
|
Chris@0
|
305 /**
|
Chris@0
|
306 * Lexical catchable patterns.
|
Chris@0
|
307 *
|
Chris@0
|
308 * @return array
|
Chris@0
|
309 */
|
Chris@0
|
310 abstract protected function getCatchablePatterns();
|
Chris@0
|
311
|
Chris@0
|
312 /**
|
Chris@0
|
313 * Lexical non-catchable patterns.
|
Chris@0
|
314 *
|
Chris@0
|
315 * @return array
|
Chris@0
|
316 */
|
Chris@0
|
317 abstract protected function getNonCatchablePatterns();
|
Chris@0
|
318
|
Chris@0
|
319 /**
|
Chris@0
|
320 * Retrieve token type. Also processes the token value if necessary.
|
Chris@0
|
321 *
|
Chris@0
|
322 * @param string $value
|
Chris@0
|
323 *
|
Chris@0
|
324 * @return integer
|
Chris@0
|
325 */
|
Chris@0
|
326 abstract protected function getType(&$value);
|
Chris@0
|
327 }
|