Chris@0
|
1 <?php
|
Chris@0
|
2 namespace Masterminds\HTML5\Parser;
|
Chris@0
|
3
|
Chris@0
|
4 /**
|
Chris@0
|
5 * The scanner.
|
Chris@0
|
6 *
|
Chris@0
|
7 * This scans over an input stream.
|
Chris@0
|
8 */
|
Chris@0
|
9 class Scanner
|
Chris@0
|
10 {
|
Chris@0
|
11
|
Chris@0
|
12 const CHARS_HEX = 'abcdefABCDEF01234567890';
|
Chris@0
|
13
|
Chris@0
|
14 const CHARS_ALNUM = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890';
|
Chris@0
|
15
|
Chris@0
|
16 const CHARS_ALPHA = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
|
Chris@0
|
17
|
Chris@0
|
18 protected $is;
|
Chris@0
|
19
|
Chris@0
|
20 // Flipping this to true will give minisculely more debugging info.
|
Chris@0
|
21 public $debug = false;
|
Chris@0
|
22
|
Chris@0
|
23 /**
|
Chris@0
|
24 * Create a new Scanner.
|
Chris@0
|
25 *
|
Chris@0
|
26 * @param \Masterminds\HTML5\Parser\InputStream $input
|
Chris@0
|
27 * An InputStream to be scanned.
|
Chris@0
|
28 */
|
Chris@0
|
29 public function __construct($input)
|
Chris@0
|
30 {
|
Chris@0
|
31 $this->is = $input;
|
Chris@0
|
32 }
|
Chris@0
|
33
|
Chris@0
|
34 /**
|
Chris@0
|
35 * Get the current position.
|
Chris@0
|
36 *
|
Chris@0
|
37 * @return int The current intiger byte position.
|
Chris@0
|
38 */
|
Chris@0
|
39 public function position()
|
Chris@0
|
40 {
|
Chris@0
|
41 return $this->is->key();
|
Chris@0
|
42 }
|
Chris@0
|
43
|
Chris@0
|
44 /**
|
Chris@0
|
45 * Take a peek at the next character in the data.
|
Chris@0
|
46 *
|
Chris@0
|
47 * @return string The next character.
|
Chris@0
|
48 */
|
Chris@0
|
49 public function peek()
|
Chris@0
|
50 {
|
Chris@0
|
51 return $this->is->peek();
|
Chris@0
|
52 }
|
Chris@0
|
53
|
Chris@0
|
54 /**
|
Chris@0
|
55 * Get the next character.
|
Chris@0
|
56 *
|
Chris@0
|
57 * Note: This advances the pointer.
|
Chris@0
|
58 *
|
Chris@0
|
59 * @return string The next character.
|
Chris@0
|
60 */
|
Chris@0
|
61 public function next()
|
Chris@0
|
62 {
|
Chris@0
|
63 $this->is->next();
|
Chris@0
|
64 if ($this->is->valid()) {
|
Chris@0
|
65 if ($this->debug)
|
Chris@0
|
66 fprintf(STDOUT, "> %s\n", $this->is->current());
|
Chris@0
|
67 return $this->is->current();
|
Chris@0
|
68 }
|
Chris@0
|
69
|
Chris@0
|
70 return false;
|
Chris@0
|
71 }
|
Chris@0
|
72
|
Chris@0
|
73 /**
|
Chris@0
|
74 * Get the current character.
|
Chris@0
|
75 *
|
Chris@0
|
76 * Note, this does not advance the pointer.
|
Chris@0
|
77 *
|
Chris@0
|
78 * @return string The current character.
|
Chris@0
|
79 */
|
Chris@0
|
80 public function current()
|
Chris@0
|
81 {
|
Chris@0
|
82 if ($this->is->valid()) {
|
Chris@0
|
83 return $this->is->current();
|
Chris@0
|
84 }
|
Chris@0
|
85
|
Chris@0
|
86 return false;
|
Chris@0
|
87 }
|
Chris@0
|
88
|
Chris@0
|
89 /**
|
Chris@0
|
90 * Silently consume N chars.
|
Chris@0
|
91 */
|
Chris@0
|
92 public function consume($count = 1)
|
Chris@0
|
93 {
|
Chris@0
|
94 for ($i = 0; $i < $count; ++ $i) {
|
Chris@0
|
95 $this->next();
|
Chris@0
|
96 }
|
Chris@0
|
97 }
|
Chris@0
|
98
|
Chris@0
|
99 /**
|
Chris@0
|
100 * Unconsume some of the data.
|
Chris@0
|
101 * This moves the data pointer backwards.
|
Chris@0
|
102 *
|
Chris@0
|
103 * @param int $howMany
|
Chris@0
|
104 * The number of characters to move the pointer back.
|
Chris@0
|
105 */
|
Chris@0
|
106 public function unconsume($howMany = 1)
|
Chris@0
|
107 {
|
Chris@0
|
108 $this->is->unconsume($howMany);
|
Chris@0
|
109 }
|
Chris@0
|
110
|
Chris@0
|
111 /**
|
Chris@0
|
112 * Get the next group of that contains hex characters.
|
Chris@0
|
113 *
|
Chris@0
|
114 * Note, along with getting the characters the pointer in the data will be
|
Chris@0
|
115 * moved as well.
|
Chris@0
|
116 *
|
Chris@0
|
117 * @return string The next group that is hex characters.
|
Chris@0
|
118 */
|
Chris@0
|
119 public function getHex()
|
Chris@0
|
120 {
|
Chris@0
|
121 return $this->is->charsWhile(static::CHARS_HEX);
|
Chris@0
|
122 }
|
Chris@0
|
123
|
Chris@0
|
124 /**
|
Chris@0
|
125 * Get the next group of characters that are ASCII Alpha characters.
|
Chris@0
|
126 *
|
Chris@0
|
127 * Note, along with getting the characters the pointer in the data will be
|
Chris@0
|
128 * moved as well.
|
Chris@0
|
129 *
|
Chris@0
|
130 * @return string The next group of ASCII alpha characters.
|
Chris@0
|
131 */
|
Chris@0
|
132 public function getAsciiAlpha()
|
Chris@0
|
133 {
|
Chris@0
|
134 return $this->is->charsWhile(static::CHARS_ALPHA);
|
Chris@0
|
135 }
|
Chris@0
|
136
|
Chris@0
|
137 /**
|
Chris@0
|
138 * Get the next group of characters that are ASCII Alpha characters and numbers.
|
Chris@0
|
139 *
|
Chris@0
|
140 * Note, along with getting the characters the pointer in the data will be
|
Chris@0
|
141 * moved as well.
|
Chris@0
|
142 *
|
Chris@0
|
143 * @return string The next group of ASCII alpha characters and numbers.
|
Chris@0
|
144 */
|
Chris@0
|
145 public function getAsciiAlphaNum()
|
Chris@0
|
146 {
|
Chris@0
|
147 return $this->is->charsWhile(static::CHARS_ALNUM);
|
Chris@0
|
148 }
|
Chris@0
|
149
|
Chris@0
|
150 /**
|
Chris@0
|
151 * Get the next group of numbers.
|
Chris@0
|
152 *
|
Chris@0
|
153 * Note, along with getting the characters the pointer in the data will be
|
Chris@0
|
154 * moved as well.
|
Chris@0
|
155 *
|
Chris@0
|
156 * @return string The next group of numbers.
|
Chris@0
|
157 */
|
Chris@0
|
158 public function getNumeric()
|
Chris@0
|
159 {
|
Chris@0
|
160 return $this->is->charsWhile('0123456789');
|
Chris@0
|
161 }
|
Chris@0
|
162
|
Chris@0
|
163 /**
|
Chris@0
|
164 * Consume whitespace.
|
Chris@0
|
165 *
|
Chris@0
|
166 * Whitespace in HTML5 is: formfeed, tab, newline, space.
|
Chris@0
|
167 */
|
Chris@0
|
168 public function whitespace()
|
Chris@0
|
169 {
|
Chris@0
|
170 return $this->is->charsWhile("\n\t\f ");
|
Chris@0
|
171 }
|
Chris@0
|
172
|
Chris@0
|
173 /**
|
Chris@0
|
174 * Returns the current line that is being consumed.
|
Chris@0
|
175 *
|
Chris@0
|
176 * @return int The current line number.
|
Chris@0
|
177 */
|
Chris@0
|
178 public function currentLine()
|
Chris@0
|
179 {
|
Chris@0
|
180 return $this->is->currentLine();
|
Chris@0
|
181 }
|
Chris@0
|
182
|
Chris@0
|
183 /**
|
Chris@0
|
184 * Read chars until something in the mask is encountered.
|
Chris@0
|
185 */
|
Chris@0
|
186 public function charsUntil($mask)
|
Chris@0
|
187 {
|
Chris@0
|
188 return $this->is->charsUntil($mask);
|
Chris@0
|
189 }
|
Chris@0
|
190
|
Chris@0
|
191 /**
|
Chris@0
|
192 * Read chars as long as the mask matches.
|
Chris@0
|
193 */
|
Chris@0
|
194 public function charsWhile($mask)
|
Chris@0
|
195 {
|
Chris@0
|
196 return $this->is->charsWhile($mask);
|
Chris@0
|
197 }
|
Chris@0
|
198
|
Chris@0
|
199 /**
|
Chris@0
|
200 * Returns the current column of the current line that the tokenizer is at.
|
Chris@0
|
201 *
|
Chris@0
|
202 * Newlines are column 0. The first char after a newline is column 1.
|
Chris@0
|
203 *
|
Chris@0
|
204 * @return int The column number.
|
Chris@0
|
205 */
|
Chris@0
|
206 public function columnOffset()
|
Chris@0
|
207 {
|
Chris@0
|
208 return $this->is->columnOffset();
|
Chris@0
|
209 }
|
Chris@0
|
210
|
Chris@0
|
211 /**
|
Chris@0
|
212 * Get all characters until EOF.
|
Chris@0
|
213 *
|
Chris@0
|
214 * This consumes characters until the EOF.
|
Chris@0
|
215 *
|
Chris@0
|
216 * @return int The number of characters remaining.
|
Chris@0
|
217 */
|
Chris@0
|
218 public function remainingChars()
|
Chris@0
|
219 {
|
Chris@0
|
220 return $this->is->remainingChars();
|
Chris@0
|
221 }
|
Chris@0
|
222 }
|