Mercurial > hg > isophonics-drupal-site
comparison vendor/masterminds/html5/src/HTML5/Parser/Scanner.php @ 0:4c8ae668cc8c
Initial import (non-working)
author | Chris Cannam |
---|---|
date | Wed, 29 Nov 2017 16:09:58 +0000 |
parents | |
children | 129ea1e6d783 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4c8ae668cc8c |
---|---|
1 <?php | |
2 namespace Masterminds\HTML5\Parser; | |
3 | |
4 /** | |
5 * The scanner. | |
6 * | |
7 * This scans over an input stream. | |
8 */ | |
9 class Scanner | |
10 { | |
11 | |
12 const CHARS_HEX = 'abcdefABCDEF01234567890'; | |
13 | |
14 const CHARS_ALNUM = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890'; | |
15 | |
16 const CHARS_ALPHA = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'; | |
17 | |
18 protected $is; | |
19 | |
20 // Flipping this to true will give minisculely more debugging info. | |
21 public $debug = false; | |
22 | |
23 /** | |
24 * Create a new Scanner. | |
25 * | |
26 * @param \Masterminds\HTML5\Parser\InputStream $input | |
27 * An InputStream to be scanned. | |
28 */ | |
29 public function __construct($input) | |
30 { | |
31 $this->is = $input; | |
32 } | |
33 | |
34 /** | |
35 * Get the current position. | |
36 * | |
37 * @return int The current intiger byte position. | |
38 */ | |
39 public function position() | |
40 { | |
41 return $this->is->key(); | |
42 } | |
43 | |
44 /** | |
45 * Take a peek at the next character in the data. | |
46 * | |
47 * @return string The next character. | |
48 */ | |
49 public function peek() | |
50 { | |
51 return $this->is->peek(); | |
52 } | |
53 | |
54 /** | |
55 * Get the next character. | |
56 * | |
57 * Note: This advances the pointer. | |
58 * | |
59 * @return string The next character. | |
60 */ | |
61 public function next() | |
62 { | |
63 $this->is->next(); | |
64 if ($this->is->valid()) { | |
65 if ($this->debug) | |
66 fprintf(STDOUT, "> %s\n", $this->is->current()); | |
67 return $this->is->current(); | |
68 } | |
69 | |
70 return false; | |
71 } | |
72 | |
73 /** | |
74 * Get the current character. | |
75 * | |
76 * Note, this does not advance the pointer. | |
77 * | |
78 * @return string The current character. | |
79 */ | |
80 public function current() | |
81 { | |
82 if ($this->is->valid()) { | |
83 return $this->is->current(); | |
84 } | |
85 | |
86 return false; | |
87 } | |
88 | |
89 /** | |
90 * Silently consume N chars. | |
91 */ | |
92 public function consume($count = 1) | |
93 { | |
94 for ($i = 0; $i < $count; ++ $i) { | |
95 $this->next(); | |
96 } | |
97 } | |
98 | |
99 /** | |
100 * Unconsume some of the data. | |
101 * This moves the data pointer backwards. | |
102 * | |
103 * @param int $howMany | |
104 * The number of characters to move the pointer back. | |
105 */ | |
106 public function unconsume($howMany = 1) | |
107 { | |
108 $this->is->unconsume($howMany); | |
109 } | |
110 | |
111 /** | |
112 * Get the next group of that contains hex characters. | |
113 * | |
114 * Note, along with getting the characters the pointer in the data will be | |
115 * moved as well. | |
116 * | |
117 * @return string The next group that is hex characters. | |
118 */ | |
119 public function getHex() | |
120 { | |
121 return $this->is->charsWhile(static::CHARS_HEX); | |
122 } | |
123 | |
124 /** | |
125 * Get the next group of characters that are ASCII Alpha characters. | |
126 * | |
127 * Note, along with getting the characters the pointer in the data will be | |
128 * moved as well. | |
129 * | |
130 * @return string The next group of ASCII alpha characters. | |
131 */ | |
132 public function getAsciiAlpha() | |
133 { | |
134 return $this->is->charsWhile(static::CHARS_ALPHA); | |
135 } | |
136 | |
137 /** | |
138 * Get the next group of characters that are ASCII Alpha characters and numbers. | |
139 * | |
140 * Note, along with getting the characters the pointer in the data will be | |
141 * moved as well. | |
142 * | |
143 * @return string The next group of ASCII alpha characters and numbers. | |
144 */ | |
145 public function getAsciiAlphaNum() | |
146 { | |
147 return $this->is->charsWhile(static::CHARS_ALNUM); | |
148 } | |
149 | |
150 /** | |
151 * Get the next group of numbers. | |
152 * | |
153 * Note, along with getting the characters the pointer in the data will be | |
154 * moved as well. | |
155 * | |
156 * @return string The next group of numbers. | |
157 */ | |
158 public function getNumeric() | |
159 { | |
160 return $this->is->charsWhile('0123456789'); | |
161 } | |
162 | |
163 /** | |
164 * Consume whitespace. | |
165 * | |
166 * Whitespace in HTML5 is: formfeed, tab, newline, space. | |
167 */ | |
168 public function whitespace() | |
169 { | |
170 return $this->is->charsWhile("\n\t\f "); | |
171 } | |
172 | |
173 /** | |
174 * Returns the current line that is being consumed. | |
175 * | |
176 * @return int The current line number. | |
177 */ | |
178 public function currentLine() | |
179 { | |
180 return $this->is->currentLine(); | |
181 } | |
182 | |
183 /** | |
184 * Read chars until something in the mask is encountered. | |
185 */ | |
186 public function charsUntil($mask) | |
187 { | |
188 return $this->is->charsUntil($mask); | |
189 } | |
190 | |
191 /** | |
192 * Read chars as long as the mask matches. | |
193 */ | |
194 public function charsWhile($mask) | |
195 { | |
196 return $this->is->charsWhile($mask); | |
197 } | |
198 | |
199 /** | |
200 * Returns the current column of the current line that the tokenizer is at. | |
201 * | |
202 * Newlines are column 0. The first char after a newline is column 1. | |
203 * | |
204 * @return int The column number. | |
205 */ | |
206 public function columnOffset() | |
207 { | |
208 return $this->is->columnOffset(); | |
209 } | |
210 | |
211 /** | |
212 * Get all characters until EOF. | |
213 * | |
214 * This consumes characters until the EOF. | |
215 * | |
216 * @return int The number of characters remaining. | |
217 */ | |
218 public function remainingChars() | |
219 { | |
220 return $this->is->remainingChars(); | |
221 } | |
222 } |