annotate vendor/doctrine/lexer/lib/Doctrine/Common/Lexer/AbstractLexer.php @ 19:fa3358dc1485 tip

Add ndrum files
author Chris Cannam
date Wed, 28 Aug 2019 13:14:47 +0100
parents 4c8ae668cc8c
children
rev   line source
Chris@0 1 <?php
Chris@0 2 /*
Chris@0 3 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
Chris@0 4 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
Chris@0 5 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
Chris@0 6 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
Chris@0 7 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
Chris@0 8 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
Chris@0 9 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
Chris@0 10 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
Chris@0 11 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
Chris@0 12 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
Chris@0 13 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Chris@0 14 *
Chris@0 15 * This software consists of voluntary contributions made by many individuals
Chris@0 16 * and is licensed under the MIT license. For more information, see
Chris@0 17 * <http://www.doctrine-project.org>.
Chris@0 18 */
Chris@0 19
Chris@0 20 namespace Doctrine\Common\Lexer;
Chris@0 21
Chris@0 22 /**
Chris@0 23 * Base class for writing simple lexers, i.e. for creating small DSLs.
Chris@0 24 *
Chris@0 25 * @since 2.0
Chris@0 26 * @author Guilherme Blanco <guilhermeblanco@hotmail.com>
Chris@0 27 * @author Jonathan Wage <jonwage@gmail.com>
Chris@0 28 * @author Roman Borschel <roman@code-factory.org>
Chris@0 29 */
Chris@0 30 abstract class AbstractLexer
Chris@0 31 {
Chris@0 32 /**
Chris@0 33 * Lexer original input string.
Chris@0 34 *
Chris@0 35 * @var string
Chris@0 36 */
Chris@0 37 private $input;
Chris@0 38
Chris@0 39 /**
Chris@0 40 * Array of scanned tokens.
Chris@0 41 *
Chris@0 42 * Each token is an associative array containing three items:
Chris@0 43 * - 'value' : the string value of the token in the input string
Chris@0 44 * - 'type' : the type of the token (identifier, numeric, string, input
Chris@0 45 * parameter, none)
Chris@0 46 * - 'position' : the position of the token in the input string
Chris@0 47 *
Chris@0 48 * @var array
Chris@0 49 */
Chris@0 50 private $tokens = array();
Chris@0 51
Chris@0 52 /**
Chris@0 53 * Current lexer position in input string.
Chris@0 54 *
Chris@0 55 * @var integer
Chris@0 56 */
Chris@0 57 private $position = 0;
Chris@0 58
Chris@0 59 /**
Chris@0 60 * Current peek of current lexer position.
Chris@0 61 *
Chris@0 62 * @var integer
Chris@0 63 */
Chris@0 64 private $peek = 0;
Chris@0 65
Chris@0 66 /**
Chris@0 67 * The next token in the input.
Chris@0 68 *
Chris@0 69 * @var array
Chris@0 70 */
Chris@0 71 public $lookahead;
Chris@0 72
Chris@0 73 /**
Chris@0 74 * The last matched/seen token.
Chris@0 75 *
Chris@0 76 * @var array
Chris@0 77 */
Chris@0 78 public $token;
Chris@0 79
Chris@0 80 /**
Chris@0 81 * Sets the input data to be tokenized.
Chris@0 82 *
Chris@0 83 * The Lexer is immediately reset and the new input tokenized.
Chris@0 84 * Any unprocessed tokens from any previous input are lost.
Chris@0 85 *
Chris@0 86 * @param string $input The input to be tokenized.
Chris@0 87 *
Chris@0 88 * @return void
Chris@0 89 */
Chris@0 90 public function setInput($input)
Chris@0 91 {
Chris@0 92 $this->input = $input;
Chris@0 93 $this->tokens = array();
Chris@0 94
Chris@0 95 $this->reset();
Chris@0 96 $this->scan($input);
Chris@0 97 }
Chris@0 98
Chris@0 99 /**
Chris@0 100 * Resets the lexer.
Chris@0 101 *
Chris@0 102 * @return void
Chris@0 103 */
Chris@0 104 public function reset()
Chris@0 105 {
Chris@0 106 $this->lookahead = null;
Chris@0 107 $this->token = null;
Chris@0 108 $this->peek = 0;
Chris@0 109 $this->position = 0;
Chris@0 110 }
Chris@0 111
Chris@0 112 /**
Chris@0 113 * Resets the peek pointer to 0.
Chris@0 114 *
Chris@0 115 * @return void
Chris@0 116 */
Chris@0 117 public function resetPeek()
Chris@0 118 {
Chris@0 119 $this->peek = 0;
Chris@0 120 }
Chris@0 121
Chris@0 122 /**
Chris@0 123 * Resets the lexer position on the input to the given position.
Chris@0 124 *
Chris@0 125 * @param integer $position Position to place the lexical scanner.
Chris@0 126 *
Chris@0 127 * @return void
Chris@0 128 */
Chris@0 129 public function resetPosition($position = 0)
Chris@0 130 {
Chris@0 131 $this->position = $position;
Chris@0 132 }
Chris@0 133
Chris@0 134 /**
Chris@0 135 * Retrieve the original lexer's input until a given position.
Chris@0 136 *
Chris@0 137 * @param integer $position
Chris@0 138 *
Chris@0 139 * @return string
Chris@0 140 */
Chris@0 141 public function getInputUntilPosition($position)
Chris@0 142 {
Chris@0 143 return substr($this->input, 0, $position);
Chris@0 144 }
Chris@0 145
Chris@0 146 /**
Chris@0 147 * Checks whether a given token matches the current lookahead.
Chris@0 148 *
Chris@0 149 * @param integer|string $token
Chris@0 150 *
Chris@0 151 * @return boolean
Chris@0 152 */
Chris@0 153 public function isNextToken($token)
Chris@0 154 {
Chris@0 155 return null !== $this->lookahead && $this->lookahead['type'] === $token;
Chris@0 156 }
Chris@0 157
Chris@0 158 /**
Chris@0 159 * Checks whether any of the given tokens matches the current lookahead.
Chris@0 160 *
Chris@0 161 * @param array $tokens
Chris@0 162 *
Chris@0 163 * @return boolean
Chris@0 164 */
Chris@0 165 public function isNextTokenAny(array $tokens)
Chris@0 166 {
Chris@0 167 return null !== $this->lookahead && in_array($this->lookahead['type'], $tokens, true);
Chris@0 168 }
Chris@0 169
Chris@0 170 /**
Chris@0 171 * Moves to the next token in the input string.
Chris@0 172 *
Chris@0 173 * @return boolean
Chris@0 174 */
Chris@0 175 public function moveNext()
Chris@0 176 {
Chris@0 177 $this->peek = 0;
Chris@0 178 $this->token = $this->lookahead;
Chris@0 179 $this->lookahead = (isset($this->tokens[$this->position]))
Chris@0 180 ? $this->tokens[$this->position++] : null;
Chris@0 181
Chris@0 182 return $this->lookahead !== null;
Chris@0 183 }
Chris@0 184
Chris@0 185 /**
Chris@0 186 * Tells the lexer to skip input tokens until it sees a token with the given value.
Chris@0 187 *
Chris@0 188 * @param string $type The token type to skip until.
Chris@0 189 *
Chris@0 190 * @return void
Chris@0 191 */
Chris@0 192 public function skipUntil($type)
Chris@0 193 {
Chris@0 194 while ($this->lookahead !== null && $this->lookahead['type'] !== $type) {
Chris@0 195 $this->moveNext();
Chris@0 196 }
Chris@0 197 }
Chris@0 198
Chris@0 199 /**
Chris@0 200 * Checks if given value is identical to the given token.
Chris@0 201 *
Chris@0 202 * @param mixed $value
Chris@0 203 * @param integer $token
Chris@0 204 *
Chris@0 205 * @return boolean
Chris@0 206 */
Chris@0 207 public function isA($value, $token)
Chris@0 208 {
Chris@0 209 return $this->getType($value) === $token;
Chris@0 210 }
Chris@0 211
Chris@0 212 /**
Chris@0 213 * Moves the lookahead token forward.
Chris@0 214 *
Chris@0 215 * @return array|null The next token or NULL if there are no more tokens ahead.
Chris@0 216 */
Chris@0 217 public function peek()
Chris@0 218 {
Chris@0 219 if (isset($this->tokens[$this->position + $this->peek])) {
Chris@0 220 return $this->tokens[$this->position + $this->peek++];
Chris@0 221 } else {
Chris@0 222 return null;
Chris@0 223 }
Chris@0 224 }
Chris@0 225
Chris@0 226 /**
Chris@0 227 * Peeks at the next token, returns it and immediately resets the peek.
Chris@0 228 *
Chris@0 229 * @return array|null The next token or NULL if there are no more tokens ahead.
Chris@0 230 */
Chris@0 231 public function glimpse()
Chris@0 232 {
Chris@0 233 $peek = $this->peek();
Chris@0 234 $this->peek = 0;
Chris@0 235 return $peek;
Chris@0 236 }
Chris@0 237
Chris@0 238 /**
Chris@0 239 * Scans the input string for tokens.
Chris@0 240 *
Chris@0 241 * @param string $input A query string.
Chris@0 242 *
Chris@0 243 * @return void
Chris@0 244 */
Chris@0 245 protected function scan($input)
Chris@0 246 {
Chris@0 247 static $regex;
Chris@0 248
Chris@0 249 if ( ! isset($regex)) {
Chris@0 250 $regex = sprintf(
Chris@0 251 '/(%s)|%s/%s',
Chris@0 252 implode(')|(', $this->getCatchablePatterns()),
Chris@0 253 implode('|', $this->getNonCatchablePatterns()),
Chris@0 254 $this->getModifiers()
Chris@0 255 );
Chris@0 256 }
Chris@0 257
Chris@0 258 $flags = PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_OFFSET_CAPTURE;
Chris@0 259 $matches = preg_split($regex, $input, -1, $flags);
Chris@0 260
Chris@0 261 foreach ($matches as $match) {
Chris@0 262 // Must remain before 'value' assignment since it can change content
Chris@0 263 $type = $this->getType($match[0]);
Chris@0 264
Chris@0 265 $this->tokens[] = array(
Chris@0 266 'value' => $match[0],
Chris@0 267 'type' => $type,
Chris@0 268 'position' => $match[1],
Chris@0 269 );
Chris@0 270 }
Chris@0 271 }
Chris@0 272
Chris@0 273 /**
Chris@0 274 * Gets the literal for a given token.
Chris@0 275 *
Chris@0 276 * @param integer $token
Chris@0 277 *
Chris@0 278 * @return string
Chris@0 279 */
Chris@0 280 public function getLiteral($token)
Chris@0 281 {
Chris@0 282 $className = get_class($this);
Chris@0 283 $reflClass = new \ReflectionClass($className);
Chris@0 284 $constants = $reflClass->getConstants();
Chris@0 285
Chris@0 286 foreach ($constants as $name => $value) {
Chris@0 287 if ($value === $token) {
Chris@0 288 return $className . '::' . $name;
Chris@0 289 }
Chris@0 290 }
Chris@0 291
Chris@0 292 return $token;
Chris@0 293 }
Chris@0 294
Chris@0 295 /**
Chris@0 296 * Regex modifiers
Chris@0 297 *
Chris@0 298 * @return string
Chris@0 299 */
Chris@0 300 protected function getModifiers()
Chris@0 301 {
Chris@0 302 return 'i';
Chris@0 303 }
Chris@0 304
Chris@0 305 /**
Chris@0 306 * Lexical catchable patterns.
Chris@0 307 *
Chris@0 308 * @return array
Chris@0 309 */
Chris@0 310 abstract protected function getCatchablePatterns();
Chris@0 311
Chris@0 312 /**
Chris@0 313 * Lexical non-catchable patterns.
Chris@0 314 *
Chris@0 315 * @return array
Chris@0 316 */
Chris@0 317 abstract protected function getNonCatchablePatterns();
Chris@0 318
Chris@0 319 /**
Chris@0 320 * Retrieve token type. Also processes the token value if necessary.
Chris@0 321 *
Chris@0 322 * @param string $value
Chris@0 323 *
Chris@0 324 * @return integer
Chris@0 325 */
Chris@0 326 abstract protected function getType(&$value);
Chris@0 327 }