comparison vendor/masterminds/html5/src/HTML5/Parser/Scanner.php @ 0:4c8ae668cc8c

Initial import (non-working)
author Chris Cannam
date Wed, 29 Nov 2017 16:09:58 +0000
parents
children 129ea1e6d783
comparison
equal deleted inserted replaced
-1:000000000000 0:4c8ae668cc8c
1 <?php
2 namespace Masterminds\HTML5\Parser;
3
4 /**
5 * The scanner.
6 *
7 * This scans over an input stream.
8 */
9 class Scanner
10 {
11
12 const CHARS_HEX = 'abcdefABCDEF01234567890';
13
14 const CHARS_ALNUM = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890';
15
16 const CHARS_ALPHA = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
17
18 protected $is;
19
20 // Flipping this to true will give minisculely more debugging info.
21 public $debug = false;
22
23 /**
24 * Create a new Scanner.
25 *
26 * @param \Masterminds\HTML5\Parser\InputStream $input
27 * An InputStream to be scanned.
28 */
29 public function __construct($input)
30 {
31 $this->is = $input;
32 }
33
34 /**
35 * Get the current position.
36 *
37 * @return int The current intiger byte position.
38 */
39 public function position()
40 {
41 return $this->is->key();
42 }
43
44 /**
45 * Take a peek at the next character in the data.
46 *
47 * @return string The next character.
48 */
49 public function peek()
50 {
51 return $this->is->peek();
52 }
53
54 /**
55 * Get the next character.
56 *
57 * Note: This advances the pointer.
58 *
59 * @return string The next character.
60 */
61 public function next()
62 {
63 $this->is->next();
64 if ($this->is->valid()) {
65 if ($this->debug)
66 fprintf(STDOUT, "> %s\n", $this->is->current());
67 return $this->is->current();
68 }
69
70 return false;
71 }
72
73 /**
74 * Get the current character.
75 *
76 * Note, this does not advance the pointer.
77 *
78 * @return string The current character.
79 */
80 public function current()
81 {
82 if ($this->is->valid()) {
83 return $this->is->current();
84 }
85
86 return false;
87 }
88
89 /**
90 * Silently consume N chars.
91 */
92 public function consume($count = 1)
93 {
94 for ($i = 0; $i < $count; ++ $i) {
95 $this->next();
96 }
97 }
98
99 /**
100 * Unconsume some of the data.
101 * This moves the data pointer backwards.
102 *
103 * @param int $howMany
104 * The number of characters to move the pointer back.
105 */
106 public function unconsume($howMany = 1)
107 {
108 $this->is->unconsume($howMany);
109 }
110
111 /**
112 * Get the next group of that contains hex characters.
113 *
114 * Note, along with getting the characters the pointer in the data will be
115 * moved as well.
116 *
117 * @return string The next group that is hex characters.
118 */
119 public function getHex()
120 {
121 return $this->is->charsWhile(static::CHARS_HEX);
122 }
123
124 /**
125 * Get the next group of characters that are ASCII Alpha characters.
126 *
127 * Note, along with getting the characters the pointer in the data will be
128 * moved as well.
129 *
130 * @return string The next group of ASCII alpha characters.
131 */
132 public function getAsciiAlpha()
133 {
134 return $this->is->charsWhile(static::CHARS_ALPHA);
135 }
136
137 /**
138 * Get the next group of characters that are ASCII Alpha characters and numbers.
139 *
140 * Note, along with getting the characters the pointer in the data will be
141 * moved as well.
142 *
143 * @return string The next group of ASCII alpha characters and numbers.
144 */
145 public function getAsciiAlphaNum()
146 {
147 return $this->is->charsWhile(static::CHARS_ALNUM);
148 }
149
150 /**
151 * Get the next group of numbers.
152 *
153 * Note, along with getting the characters the pointer in the data will be
154 * moved as well.
155 *
156 * @return string The next group of numbers.
157 */
158 public function getNumeric()
159 {
160 return $this->is->charsWhile('0123456789');
161 }
162
163 /**
164 * Consume whitespace.
165 *
166 * Whitespace in HTML5 is: formfeed, tab, newline, space.
167 */
168 public function whitespace()
169 {
170 return $this->is->charsWhile("\n\t\f ");
171 }
172
173 /**
174 * Returns the current line that is being consumed.
175 *
176 * @return int The current line number.
177 */
178 public function currentLine()
179 {
180 return $this->is->currentLine();
181 }
182
183 /**
184 * Read chars until something in the mask is encountered.
185 */
186 public function charsUntil($mask)
187 {
188 return $this->is->charsUntil($mask);
189 }
190
191 /**
192 * Read chars as long as the mask matches.
193 */
194 public function charsWhile($mask)
195 {
196 return $this->is->charsWhile($mask);
197 }
198
199 /**
200 * Returns the current column of the current line that the tokenizer is at.
201 *
202 * Newlines are column 0. The first char after a newline is column 1.
203 *
204 * @return int The column number.
205 */
206 public function columnOffset()
207 {
208 return $this->is->columnOffset();
209 }
210
211 /**
212 * Get all characters until EOF.
213 *
214 * This consumes characters until the EOF.
215 *
216 * @return int The number of characters remaining.
217 */
218 public function remainingChars()
219 {
220 return $this->is->remainingChars();
221 }
222 }