Chris@0: . Chris@0: */ Chris@0: Chris@0: namespace Doctrine\Common\Lexer; Chris@0: Chris@0: /** Chris@0: * Base class for writing simple lexers, i.e. for creating small DSLs. Chris@0: * Chris@0: * @since 2.0 Chris@0: * @author Guilherme Blanco Chris@0: * @author Jonathan Wage Chris@0: * @author Roman Borschel Chris@0: */ Chris@0: abstract class AbstractLexer Chris@0: { Chris@0: /** Chris@0: * Lexer original input string. Chris@0: * Chris@0: * @var string Chris@0: */ Chris@0: private $input; Chris@0: Chris@0: /** Chris@0: * Array of scanned tokens. Chris@0: * Chris@0: * Each token is an associative array containing three items: Chris@0: * - 'value' : the string value of the token in the input string Chris@0: * - 'type' : the type of the token (identifier, numeric, string, input Chris@0: * parameter, none) Chris@0: * - 'position' : the position of the token in the input string Chris@0: * Chris@0: * @var array Chris@0: */ Chris@0: private $tokens = array(); Chris@0: Chris@0: /** Chris@0: * Current lexer position in input string. Chris@0: * Chris@0: * @var integer Chris@0: */ Chris@0: private $position = 0; Chris@0: Chris@0: /** Chris@0: * Current peek of current lexer position. Chris@0: * Chris@0: * @var integer Chris@0: */ Chris@0: private $peek = 0; Chris@0: Chris@0: /** Chris@0: * The next token in the input. Chris@0: * Chris@0: * @var array Chris@0: */ Chris@0: public $lookahead; Chris@0: Chris@0: /** Chris@0: * The last matched/seen token. Chris@0: * Chris@0: * @var array Chris@0: */ Chris@0: public $token; Chris@0: Chris@0: /** Chris@0: * Sets the input data to be tokenized. Chris@0: * Chris@0: * The Lexer is immediately reset and the new input tokenized. Chris@0: * Any unprocessed tokens from any previous input are lost. Chris@0: * Chris@0: * @param string $input The input to be tokenized. Chris@0: * Chris@0: * @return void Chris@0: */ Chris@0: public function setInput($input) Chris@0: { Chris@0: $this->input = $input; Chris@0: $this->tokens = array(); Chris@0: Chris@0: $this->reset(); Chris@0: $this->scan($input); Chris@0: } Chris@0: Chris@0: /** Chris@0: * Resets the lexer. Chris@0: * Chris@0: * @return void Chris@0: */ Chris@0: public function reset() Chris@0: { Chris@0: $this->lookahead = null; Chris@0: $this->token = null; Chris@0: $this->peek = 0; Chris@0: $this->position = 0; Chris@0: } Chris@0: Chris@0: /** Chris@0: * Resets the peek pointer to 0. Chris@0: * Chris@0: * @return void Chris@0: */ Chris@0: public function resetPeek() Chris@0: { Chris@0: $this->peek = 0; Chris@0: } Chris@0: Chris@0: /** Chris@0: * Resets the lexer position on the input to the given position. Chris@0: * Chris@0: * @param integer $position Position to place the lexical scanner. Chris@0: * Chris@0: * @return void Chris@0: */ Chris@0: public function resetPosition($position = 0) Chris@0: { Chris@0: $this->position = $position; Chris@0: } Chris@0: Chris@0: /** Chris@0: * Retrieve the original lexer's input until a given position. Chris@0: * Chris@0: * @param integer $position Chris@0: * Chris@0: * @return string Chris@0: */ Chris@0: public function getInputUntilPosition($position) Chris@0: { Chris@0: return substr($this->input, 0, $position); Chris@0: } Chris@0: Chris@0: /** Chris@0: * Checks whether a given token matches the current lookahead. Chris@0: * Chris@0: * @param integer|string $token Chris@0: * Chris@0: * @return boolean Chris@0: */ Chris@0: public function isNextToken($token) Chris@0: { Chris@0: return null !== $this->lookahead && $this->lookahead['type'] === $token; Chris@0: } Chris@0: Chris@0: /** Chris@0: * Checks whether any of the given tokens matches the current lookahead. Chris@0: * Chris@0: * @param array $tokens Chris@0: * Chris@0: * @return boolean Chris@0: */ Chris@0: public function isNextTokenAny(array $tokens) Chris@0: { Chris@0: return null !== $this->lookahead && in_array($this->lookahead['type'], $tokens, true); Chris@0: } Chris@0: Chris@0: /** Chris@0: * Moves to the next token in the input string. Chris@0: * Chris@0: * @return boolean Chris@0: */ Chris@0: public function moveNext() Chris@0: { Chris@0: $this->peek = 0; Chris@0: $this->token = $this->lookahead; Chris@0: $this->lookahead = (isset($this->tokens[$this->position])) Chris@0: ? $this->tokens[$this->position++] : null; Chris@0: Chris@0: return $this->lookahead !== null; Chris@0: } Chris@0: Chris@0: /** Chris@0: * Tells the lexer to skip input tokens until it sees a token with the given value. Chris@0: * Chris@0: * @param string $type The token type to skip until. Chris@0: * Chris@0: * @return void Chris@0: */ Chris@0: public function skipUntil($type) Chris@0: { Chris@0: while ($this->lookahead !== null && $this->lookahead['type'] !== $type) { Chris@0: $this->moveNext(); Chris@0: } Chris@0: } Chris@0: Chris@0: /** Chris@0: * Checks if given value is identical to the given token. Chris@0: * Chris@0: * @param mixed $value Chris@0: * @param integer $token Chris@0: * Chris@0: * @return boolean Chris@0: */ Chris@0: public function isA($value, $token) Chris@0: { Chris@0: return $this->getType($value) === $token; Chris@0: } Chris@0: Chris@0: /** Chris@0: * Moves the lookahead token forward. Chris@0: * Chris@0: * @return array|null The next token or NULL if there are no more tokens ahead. Chris@0: */ Chris@0: public function peek() Chris@0: { Chris@0: if (isset($this->tokens[$this->position + $this->peek])) { Chris@0: return $this->tokens[$this->position + $this->peek++]; Chris@0: } else { Chris@0: return null; Chris@0: } Chris@0: } Chris@0: Chris@0: /** Chris@0: * Peeks at the next token, returns it and immediately resets the peek. Chris@0: * Chris@0: * @return array|null The next token or NULL if there are no more tokens ahead. Chris@0: */ Chris@0: public function glimpse() Chris@0: { Chris@0: $peek = $this->peek(); Chris@0: $this->peek = 0; Chris@0: return $peek; Chris@0: } Chris@0: Chris@0: /** Chris@0: * Scans the input string for tokens. Chris@0: * Chris@0: * @param string $input A query string. Chris@0: * Chris@0: * @return void Chris@0: */ Chris@0: protected function scan($input) Chris@0: { Chris@0: static $regex; Chris@0: Chris@0: if ( ! isset($regex)) { Chris@0: $regex = sprintf( Chris@0: '/(%s)|%s/%s', Chris@0: implode(')|(', $this->getCatchablePatterns()), Chris@0: implode('|', $this->getNonCatchablePatterns()), Chris@0: $this->getModifiers() Chris@0: ); Chris@0: } Chris@0: Chris@0: $flags = PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_OFFSET_CAPTURE; Chris@0: $matches = preg_split($regex, $input, -1, $flags); Chris@0: Chris@0: foreach ($matches as $match) { Chris@0: // Must remain before 'value' assignment since it can change content Chris@0: $type = $this->getType($match[0]); Chris@0: Chris@0: $this->tokens[] = array( Chris@0: 'value' => $match[0], Chris@0: 'type' => $type, Chris@0: 'position' => $match[1], Chris@0: ); Chris@0: } Chris@0: } Chris@0: Chris@0: /** Chris@0: * Gets the literal for a given token. Chris@0: * Chris@0: * @param integer $token Chris@0: * Chris@0: * @return string Chris@0: */ Chris@0: public function getLiteral($token) Chris@0: { Chris@0: $className = get_class($this); Chris@0: $reflClass = new \ReflectionClass($className); Chris@0: $constants = $reflClass->getConstants(); Chris@0: Chris@0: foreach ($constants as $name => $value) { Chris@0: if ($value === $token) { Chris@0: return $className . '::' . $name; Chris@0: } Chris@0: } Chris@0: Chris@0: return $token; Chris@0: } Chris@0: Chris@0: /** Chris@0: * Regex modifiers Chris@0: * Chris@0: * @return string Chris@0: */ Chris@0: protected function getModifiers() Chris@0: { Chris@0: return 'i'; Chris@0: } Chris@0: Chris@0: /** Chris@0: * Lexical catchable patterns. Chris@0: * Chris@0: * @return array Chris@0: */ Chris@0: abstract protected function getCatchablePatterns(); Chris@0: Chris@0: /** Chris@0: * Lexical non-catchable patterns. Chris@0: * Chris@0: * @return array Chris@0: */ Chris@0: abstract protected function getNonCatchablePatterns(); Chris@0: Chris@0: /** Chris@0: * Retrieve token type. Also processes the token value if necessary. Chris@0: * Chris@0: * @param string $value Chris@0: * Chris@0: * @return integer Chris@0: */ Chris@0: abstract protected function getType(&$value); Chris@0: }