annotate vendor/masterminds/html5/src/HTML5/Parser/Scanner.php @ 0:4c8ae668cc8c

Initial import (non-working)
author Chris Cannam
date Wed, 29 Nov 2017 16:09:58 +0000
parents
children 129ea1e6d783
rev   line source
Chris@0 1 <?php
Chris@0 2 namespace Masterminds\HTML5\Parser;
Chris@0 3
Chris@0 4 /**
Chris@0 5 * The scanner.
Chris@0 6 *
Chris@0 7 * This scans over an input stream.
Chris@0 8 */
Chris@0 9 class Scanner
Chris@0 10 {
Chris@0 11
Chris@0 12 const CHARS_HEX = 'abcdefABCDEF01234567890';
Chris@0 13
Chris@0 14 const CHARS_ALNUM = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890';
Chris@0 15
Chris@0 16 const CHARS_ALPHA = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
Chris@0 17
Chris@0 18 protected $is;
Chris@0 19
Chris@0 20 // Flipping this to true will give minisculely more debugging info.
Chris@0 21 public $debug = false;
Chris@0 22
Chris@0 23 /**
Chris@0 24 * Create a new Scanner.
Chris@0 25 *
Chris@0 26 * @param \Masterminds\HTML5\Parser\InputStream $input
Chris@0 27 * An InputStream to be scanned.
Chris@0 28 */
Chris@0 29 public function __construct($input)
Chris@0 30 {
Chris@0 31 $this->is = $input;
Chris@0 32 }
Chris@0 33
Chris@0 34 /**
Chris@0 35 * Get the current position.
Chris@0 36 *
Chris@0 37 * @return int The current intiger byte position.
Chris@0 38 */
Chris@0 39 public function position()
Chris@0 40 {
Chris@0 41 return $this->is->key();
Chris@0 42 }
Chris@0 43
Chris@0 44 /**
Chris@0 45 * Take a peek at the next character in the data.
Chris@0 46 *
Chris@0 47 * @return string The next character.
Chris@0 48 */
Chris@0 49 public function peek()
Chris@0 50 {
Chris@0 51 return $this->is->peek();
Chris@0 52 }
Chris@0 53
Chris@0 54 /**
Chris@0 55 * Get the next character.
Chris@0 56 *
Chris@0 57 * Note: This advances the pointer.
Chris@0 58 *
Chris@0 59 * @return string The next character.
Chris@0 60 */
Chris@0 61 public function next()
Chris@0 62 {
Chris@0 63 $this->is->next();
Chris@0 64 if ($this->is->valid()) {
Chris@0 65 if ($this->debug)
Chris@0 66 fprintf(STDOUT, "> %s\n", $this->is->current());
Chris@0 67 return $this->is->current();
Chris@0 68 }
Chris@0 69
Chris@0 70 return false;
Chris@0 71 }
Chris@0 72
Chris@0 73 /**
Chris@0 74 * Get the current character.
Chris@0 75 *
Chris@0 76 * Note, this does not advance the pointer.
Chris@0 77 *
Chris@0 78 * @return string The current character.
Chris@0 79 */
Chris@0 80 public function current()
Chris@0 81 {
Chris@0 82 if ($this->is->valid()) {
Chris@0 83 return $this->is->current();
Chris@0 84 }
Chris@0 85
Chris@0 86 return false;
Chris@0 87 }
Chris@0 88
Chris@0 89 /**
Chris@0 90 * Silently consume N chars.
Chris@0 91 */
Chris@0 92 public function consume($count = 1)
Chris@0 93 {
Chris@0 94 for ($i = 0; $i < $count; ++ $i) {
Chris@0 95 $this->next();
Chris@0 96 }
Chris@0 97 }
Chris@0 98
Chris@0 99 /**
Chris@0 100 * Unconsume some of the data.
Chris@0 101 * This moves the data pointer backwards.
Chris@0 102 *
Chris@0 103 * @param int $howMany
Chris@0 104 * The number of characters to move the pointer back.
Chris@0 105 */
Chris@0 106 public function unconsume($howMany = 1)
Chris@0 107 {
Chris@0 108 $this->is->unconsume($howMany);
Chris@0 109 }
Chris@0 110
Chris@0 111 /**
Chris@0 112 * Get the next group of that contains hex characters.
Chris@0 113 *
Chris@0 114 * Note, along with getting the characters the pointer in the data will be
Chris@0 115 * moved as well.
Chris@0 116 *
Chris@0 117 * @return string The next group that is hex characters.
Chris@0 118 */
Chris@0 119 public function getHex()
Chris@0 120 {
Chris@0 121 return $this->is->charsWhile(static::CHARS_HEX);
Chris@0 122 }
Chris@0 123
Chris@0 124 /**
Chris@0 125 * Get the next group of characters that are ASCII Alpha characters.
Chris@0 126 *
Chris@0 127 * Note, along with getting the characters the pointer in the data will be
Chris@0 128 * moved as well.
Chris@0 129 *
Chris@0 130 * @return string The next group of ASCII alpha characters.
Chris@0 131 */
Chris@0 132 public function getAsciiAlpha()
Chris@0 133 {
Chris@0 134 return $this->is->charsWhile(static::CHARS_ALPHA);
Chris@0 135 }
Chris@0 136
Chris@0 137 /**
Chris@0 138 * Get the next group of characters that are ASCII Alpha characters and numbers.
Chris@0 139 *
Chris@0 140 * Note, along with getting the characters the pointer in the data will be
Chris@0 141 * moved as well.
Chris@0 142 *
Chris@0 143 * @return string The next group of ASCII alpha characters and numbers.
Chris@0 144 */
Chris@0 145 public function getAsciiAlphaNum()
Chris@0 146 {
Chris@0 147 return $this->is->charsWhile(static::CHARS_ALNUM);
Chris@0 148 }
Chris@0 149
Chris@0 150 /**
Chris@0 151 * Get the next group of numbers.
Chris@0 152 *
Chris@0 153 * Note, along with getting the characters the pointer in the data will be
Chris@0 154 * moved as well.
Chris@0 155 *
Chris@0 156 * @return string The next group of numbers.
Chris@0 157 */
Chris@0 158 public function getNumeric()
Chris@0 159 {
Chris@0 160 return $this->is->charsWhile('0123456789');
Chris@0 161 }
Chris@0 162
Chris@0 163 /**
Chris@0 164 * Consume whitespace.
Chris@0 165 *
Chris@0 166 * Whitespace in HTML5 is: formfeed, tab, newline, space.
Chris@0 167 */
Chris@0 168 public function whitespace()
Chris@0 169 {
Chris@0 170 return $this->is->charsWhile("\n\t\f ");
Chris@0 171 }
Chris@0 172
Chris@0 173 /**
Chris@0 174 * Returns the current line that is being consumed.
Chris@0 175 *
Chris@0 176 * @return int The current line number.
Chris@0 177 */
Chris@0 178 public function currentLine()
Chris@0 179 {
Chris@0 180 return $this->is->currentLine();
Chris@0 181 }
Chris@0 182
Chris@0 183 /**
Chris@0 184 * Read chars until something in the mask is encountered.
Chris@0 185 */
Chris@0 186 public function charsUntil($mask)
Chris@0 187 {
Chris@0 188 return $this->is->charsUntil($mask);
Chris@0 189 }
Chris@0 190
Chris@0 191 /**
Chris@0 192 * Read chars as long as the mask matches.
Chris@0 193 */
Chris@0 194 public function charsWhile($mask)
Chris@0 195 {
Chris@0 196 return $this->is->charsWhile($mask);
Chris@0 197 }
Chris@0 198
Chris@0 199 /**
Chris@0 200 * Returns the current column of the current line that the tokenizer is at.
Chris@0 201 *
Chris@0 202 * Newlines are column 0. The first char after a newline is column 1.
Chris@0 203 *
Chris@0 204 * @return int The column number.
Chris@0 205 */
Chris@0 206 public function columnOffset()
Chris@0 207 {
Chris@0 208 return $this->is->columnOffset();
Chris@0 209 }
Chris@0 210
Chris@0 211 /**
Chris@0 212 * Get all characters until EOF.
Chris@0 213 *
Chris@0 214 * This consumes characters until the EOF.
Chris@0 215 *
Chris@0 216 * @return int The number of characters remaining.
Chris@0 217 */
Chris@0 218 public function remainingChars()
Chris@0 219 {
Chris@0 220 return $this->is->remainingChars();
Chris@0 221 }
Chris@0 222 }