Chris@18
|
1 <?php
|
Chris@18
|
2
|
Chris@18
|
3 namespace Egulias\EmailValidator\Parser;
|
Chris@18
|
4
|
Chris@18
|
5 use Egulias\EmailValidator\EmailLexer;
|
Chris@18
|
6 use Egulias\EmailValidator\Exception\CharNotAllowed;
|
Chris@18
|
7 use Egulias\EmailValidator\Exception\CommaInDomain;
|
Chris@18
|
8 use Egulias\EmailValidator\Exception\ConsecutiveAt;
|
Chris@18
|
9 use Egulias\EmailValidator\Exception\CRLFAtTheEnd;
|
Chris@18
|
10 use Egulias\EmailValidator\Exception\CRNoLF;
|
Chris@18
|
11 use Egulias\EmailValidator\Exception\DomainHyphened;
|
Chris@18
|
12 use Egulias\EmailValidator\Exception\DotAtEnd;
|
Chris@18
|
13 use Egulias\EmailValidator\Exception\DotAtStart;
|
Chris@18
|
14 use Egulias\EmailValidator\Exception\ExpectingATEXT;
|
Chris@18
|
15 use Egulias\EmailValidator\Exception\ExpectingDomainLiteralClose;
|
Chris@18
|
16 use Egulias\EmailValidator\Exception\ExpectingDTEXT;
|
Chris@18
|
17 use Egulias\EmailValidator\Exception\NoDomainPart;
|
Chris@18
|
18 use Egulias\EmailValidator\Exception\UnopenedComment;
|
Chris@18
|
19 use Egulias\EmailValidator\Warning\AddressLiteral;
|
Chris@18
|
20 use Egulias\EmailValidator\Warning\CFWSWithFWS;
|
Chris@18
|
21 use Egulias\EmailValidator\Warning\DeprecatedComment;
|
Chris@18
|
22 use Egulias\EmailValidator\Warning\DomainLiteral;
|
Chris@18
|
23 use Egulias\EmailValidator\Warning\DomainTooLong;
|
Chris@18
|
24 use Egulias\EmailValidator\Warning\IPV6BadChar;
|
Chris@18
|
25 use Egulias\EmailValidator\Warning\IPV6ColonEnd;
|
Chris@18
|
26 use Egulias\EmailValidator\Warning\IPV6ColonStart;
|
Chris@18
|
27 use Egulias\EmailValidator\Warning\IPV6Deprecated;
|
Chris@18
|
28 use Egulias\EmailValidator\Warning\IPV6DoubleColon;
|
Chris@18
|
29 use Egulias\EmailValidator\Warning\IPV6GroupCount;
|
Chris@18
|
30 use Egulias\EmailValidator\Warning\IPV6MaxGroups;
|
Chris@18
|
31 use Egulias\EmailValidator\Warning\LabelTooLong;
|
Chris@18
|
32 use Egulias\EmailValidator\Warning\ObsoleteDTEXT;
|
Chris@18
|
33 use Egulias\EmailValidator\Warning\TLD;
|
Chris@18
|
34
|
Chris@18
|
35 class DomainPart extends Parser
|
Chris@18
|
36 {
|
Chris@18
|
37 const DOMAIN_MAX_LENGTH = 254;
|
Chris@18
|
38 protected $domainPart = '';
|
Chris@18
|
39
|
Chris@18
|
40 public function parse($domainPart)
|
Chris@18
|
41 {
|
Chris@18
|
42 $this->lexer->moveNext();
|
Chris@18
|
43
|
Chris@18
|
44 if ($this->lexer->token['type'] === EmailLexer::S_DOT) {
|
Chris@18
|
45 throw new DotAtStart();
|
Chris@18
|
46 }
|
Chris@18
|
47
|
Chris@18
|
48 if ($this->lexer->token['type'] === EmailLexer::S_EMPTY) {
|
Chris@18
|
49 throw new NoDomainPart();
|
Chris@18
|
50 }
|
Chris@18
|
51 if ($this->lexer->token['type'] === EmailLexer::S_HYPHEN) {
|
Chris@18
|
52 throw new DomainHyphened();
|
Chris@18
|
53 }
|
Chris@18
|
54
|
Chris@18
|
55 if ($this->lexer->token['type'] === EmailLexer::S_OPENPARENTHESIS) {
|
Chris@18
|
56 $this->warnings[DeprecatedComment::CODE] = new DeprecatedComment();
|
Chris@18
|
57 $this->parseDomainComments();
|
Chris@18
|
58 }
|
Chris@18
|
59
|
Chris@18
|
60 $domain = $this->doParseDomainPart();
|
Chris@18
|
61
|
Chris@18
|
62 $prev = $this->lexer->getPrevious();
|
Chris@18
|
63 $length = strlen($domain);
|
Chris@18
|
64
|
Chris@18
|
65 if ($prev['type'] === EmailLexer::S_DOT) {
|
Chris@18
|
66 throw new DotAtEnd();
|
Chris@18
|
67 }
|
Chris@18
|
68 if ($prev['type'] === EmailLexer::S_HYPHEN) {
|
Chris@18
|
69 throw new DomainHyphened();
|
Chris@18
|
70 }
|
Chris@18
|
71 if ($length > self::DOMAIN_MAX_LENGTH) {
|
Chris@18
|
72 $this->warnings[DomainTooLong::CODE] = new DomainTooLong();
|
Chris@18
|
73 }
|
Chris@18
|
74 if ($prev['type'] === EmailLexer::S_CR) {
|
Chris@18
|
75 throw new CRLFAtTheEnd();
|
Chris@18
|
76 }
|
Chris@18
|
77 $this->domainPart = $domain;
|
Chris@18
|
78 }
|
Chris@18
|
79
|
Chris@18
|
80 public function getDomainPart()
|
Chris@18
|
81 {
|
Chris@18
|
82 return $this->domainPart;
|
Chris@18
|
83 }
|
Chris@18
|
84
|
Chris@18
|
85 public function checkIPV6Tag($addressLiteral, $maxGroups = 8)
|
Chris@18
|
86 {
|
Chris@18
|
87 $prev = $this->lexer->getPrevious();
|
Chris@18
|
88 if ($prev['type'] === EmailLexer::S_COLON) {
|
Chris@18
|
89 $this->warnings[IPV6ColonEnd::CODE] = new IPV6ColonEnd();
|
Chris@18
|
90 }
|
Chris@18
|
91
|
Chris@18
|
92 $IPv6 = substr($addressLiteral, 5);
|
Chris@18
|
93 //Daniel Marschall's new IPv6 testing strategy
|
Chris@18
|
94 $matchesIP = explode(':', $IPv6);
|
Chris@18
|
95 $groupCount = count($matchesIP);
|
Chris@18
|
96 $colons = strpos($IPv6, '::');
|
Chris@18
|
97
|
Chris@18
|
98 if (count(preg_grep('/^[0-9A-Fa-f]{0,4}$/', $matchesIP, PREG_GREP_INVERT)) !== 0) {
|
Chris@18
|
99 $this->warnings[IPV6BadChar::CODE] = new IPV6BadChar();
|
Chris@18
|
100 }
|
Chris@18
|
101
|
Chris@18
|
102 if ($colons === false) {
|
Chris@18
|
103 // We need exactly the right number of groups
|
Chris@18
|
104 if ($groupCount !== $maxGroups) {
|
Chris@18
|
105 $this->warnings[IPV6GroupCount::CODE] = new IPV6GroupCount();
|
Chris@18
|
106 }
|
Chris@18
|
107 return;
|
Chris@18
|
108 }
|
Chris@18
|
109
|
Chris@18
|
110 if ($colons !== strrpos($IPv6, '::')) {
|
Chris@18
|
111 $this->warnings[IPV6DoubleColon::CODE] = new IPV6DoubleColon();
|
Chris@18
|
112 return;
|
Chris@18
|
113 }
|
Chris@18
|
114
|
Chris@18
|
115 if ($colons === 0 || $colons === (strlen($IPv6) - 2)) {
|
Chris@18
|
116 // RFC 4291 allows :: at the start or end of an address
|
Chris@18
|
117 //with 7 other groups in addition
|
Chris@18
|
118 ++$maxGroups;
|
Chris@18
|
119 }
|
Chris@18
|
120
|
Chris@18
|
121 if ($groupCount > $maxGroups) {
|
Chris@18
|
122 $this->warnings[IPV6MaxGroups::CODE] = new IPV6MaxGroups();
|
Chris@18
|
123 } elseif ($groupCount === $maxGroups) {
|
Chris@18
|
124 $this->warnings[IPV6Deprecated::CODE] = new IPV6Deprecated();
|
Chris@18
|
125 }
|
Chris@18
|
126 }
|
Chris@18
|
127
|
Chris@18
|
128 protected function doParseDomainPart()
|
Chris@18
|
129 {
|
Chris@18
|
130 $domain = '';
|
Chris@18
|
131 $openedParenthesis = 0;
|
Chris@18
|
132 do {
|
Chris@18
|
133 $prev = $this->lexer->getPrevious();
|
Chris@18
|
134
|
Chris@18
|
135 $this->checkNotAllowedChars($this->lexer->token);
|
Chris@18
|
136
|
Chris@18
|
137 if ($this->lexer->token['type'] === EmailLexer::S_OPENPARENTHESIS) {
|
Chris@18
|
138 $this->parseComments();
|
Chris@18
|
139 $openedParenthesis += $this->getOpenedParenthesis();
|
Chris@18
|
140 $this->lexer->moveNext();
|
Chris@18
|
141 $tmpPrev = $this->lexer->getPrevious();
|
Chris@18
|
142 if ($tmpPrev['type'] === EmailLexer::S_CLOSEPARENTHESIS) {
|
Chris@18
|
143 $openedParenthesis--;
|
Chris@18
|
144 }
|
Chris@18
|
145 }
|
Chris@18
|
146 if ($this->lexer->token['type'] === EmailLexer::S_CLOSEPARENTHESIS) {
|
Chris@18
|
147 if ($openedParenthesis === 0) {
|
Chris@18
|
148 throw new UnopenedComment();
|
Chris@18
|
149 } else {
|
Chris@18
|
150 $openedParenthesis--;
|
Chris@18
|
151 }
|
Chris@18
|
152 }
|
Chris@18
|
153
|
Chris@18
|
154 $this->checkConsecutiveDots();
|
Chris@18
|
155 $this->checkDomainPartExceptions($prev);
|
Chris@18
|
156
|
Chris@18
|
157 if ($this->hasBrackets()) {
|
Chris@18
|
158 $this->parseDomainLiteral();
|
Chris@18
|
159 }
|
Chris@18
|
160
|
Chris@18
|
161 $this->checkLabelLength($prev);
|
Chris@18
|
162
|
Chris@18
|
163 if ($this->isFWS()) {
|
Chris@18
|
164 $this->parseFWS();
|
Chris@18
|
165 }
|
Chris@18
|
166
|
Chris@18
|
167 $domain .= $this->lexer->token['value'];
|
Chris@18
|
168 $this->lexer->moveNext();
|
Chris@18
|
169 } while ($this->lexer->token);
|
Chris@18
|
170
|
Chris@18
|
171 return $domain;
|
Chris@18
|
172 }
|
Chris@18
|
173
|
Chris@18
|
174 private function checkNotAllowedChars($token)
|
Chris@18
|
175 {
|
Chris@18
|
176 $notAllowed = [EmailLexer::S_BACKSLASH => true, EmailLexer::S_SLASH=> true];
|
Chris@18
|
177 if (isset($notAllowed[$token['type']])) {
|
Chris@18
|
178 throw new CharNotAllowed();
|
Chris@18
|
179 }
|
Chris@18
|
180 }
|
Chris@18
|
181
|
Chris@18
|
182 protected function parseDomainLiteral()
|
Chris@18
|
183 {
|
Chris@18
|
184 if ($this->lexer->isNextToken(EmailLexer::S_COLON)) {
|
Chris@18
|
185 $this->warnings[IPV6ColonStart::CODE] = new IPV6ColonStart();
|
Chris@18
|
186 }
|
Chris@18
|
187 if ($this->lexer->isNextToken(EmailLexer::S_IPV6TAG)) {
|
Chris@18
|
188 $lexer = clone $this->lexer;
|
Chris@18
|
189 $lexer->moveNext();
|
Chris@18
|
190 if ($lexer->isNextToken(EmailLexer::S_DOUBLECOLON)) {
|
Chris@18
|
191 $this->warnings[IPV6ColonStart::CODE] = new IPV6ColonStart();
|
Chris@18
|
192 }
|
Chris@18
|
193 }
|
Chris@18
|
194
|
Chris@18
|
195 return $this->doParseDomainLiteral();
|
Chris@18
|
196 }
|
Chris@18
|
197
|
Chris@18
|
198 protected function doParseDomainLiteral()
|
Chris@18
|
199 {
|
Chris@18
|
200 $IPv6TAG = false;
|
Chris@18
|
201 $addressLiteral = '';
|
Chris@18
|
202 do {
|
Chris@18
|
203 if ($this->lexer->token['type'] === EmailLexer::C_NUL) {
|
Chris@18
|
204 throw new ExpectingDTEXT();
|
Chris@18
|
205 }
|
Chris@18
|
206
|
Chris@18
|
207 if ($this->lexer->token['type'] === EmailLexer::INVALID ||
|
Chris@18
|
208 $this->lexer->token['type'] === EmailLexer::C_DEL ||
|
Chris@18
|
209 $this->lexer->token['type'] === EmailLexer::S_LF
|
Chris@18
|
210 ) {
|
Chris@18
|
211 $this->warnings[ObsoleteDTEXT::CODE] = new ObsoleteDTEXT();
|
Chris@18
|
212 }
|
Chris@18
|
213
|
Chris@18
|
214 if ($this->lexer->isNextTokenAny(array(EmailLexer::S_OPENQBRACKET, EmailLexer::S_OPENBRACKET))) {
|
Chris@18
|
215 throw new ExpectingDTEXT();
|
Chris@18
|
216 }
|
Chris@18
|
217
|
Chris@18
|
218 if ($this->lexer->isNextTokenAny(
|
Chris@18
|
219 array(EmailLexer::S_HTAB, EmailLexer::S_SP, $this->lexer->token['type'] === EmailLexer::CRLF)
|
Chris@18
|
220 )) {
|
Chris@18
|
221 $this->warnings[CFWSWithFWS::CODE] = new CFWSWithFWS();
|
Chris@18
|
222 $this->parseFWS();
|
Chris@18
|
223 }
|
Chris@18
|
224
|
Chris@18
|
225 if ($this->lexer->isNextToken(EmailLexer::S_CR)) {
|
Chris@18
|
226 throw new CRNoLF();
|
Chris@18
|
227 }
|
Chris@18
|
228
|
Chris@18
|
229 if ($this->lexer->token['type'] === EmailLexer::S_BACKSLASH) {
|
Chris@18
|
230 $this->warnings[ObsoleteDTEXT::CODE] = new ObsoleteDTEXT();
|
Chris@18
|
231 $addressLiteral .= $this->lexer->token['value'];
|
Chris@18
|
232 $this->lexer->moveNext();
|
Chris@18
|
233 $this->validateQuotedPair();
|
Chris@18
|
234 }
|
Chris@18
|
235 if ($this->lexer->token['type'] === EmailLexer::S_IPV6TAG) {
|
Chris@18
|
236 $IPv6TAG = true;
|
Chris@18
|
237 }
|
Chris@18
|
238 if ($this->lexer->token['type'] === EmailLexer::S_CLOSEQBRACKET) {
|
Chris@18
|
239 break;
|
Chris@18
|
240 }
|
Chris@18
|
241
|
Chris@18
|
242 $addressLiteral .= $this->lexer->token['value'];
|
Chris@18
|
243
|
Chris@18
|
244 } while ($this->lexer->moveNext());
|
Chris@18
|
245
|
Chris@18
|
246 $addressLiteral = str_replace('[', '', $addressLiteral);
|
Chris@18
|
247 $addressLiteral = $this->checkIPV4Tag($addressLiteral);
|
Chris@18
|
248
|
Chris@18
|
249 if (false === $addressLiteral) {
|
Chris@18
|
250 return $addressLiteral;
|
Chris@18
|
251 }
|
Chris@18
|
252
|
Chris@18
|
253 if (!$IPv6TAG) {
|
Chris@18
|
254 $this->warnings[DomainLiteral::CODE] = new DomainLiteral();
|
Chris@18
|
255 return $addressLiteral;
|
Chris@18
|
256 }
|
Chris@18
|
257
|
Chris@18
|
258 $this->warnings[AddressLiteral::CODE] = new AddressLiteral();
|
Chris@18
|
259
|
Chris@18
|
260 $this->checkIPV6Tag($addressLiteral);
|
Chris@18
|
261
|
Chris@18
|
262 return $addressLiteral;
|
Chris@18
|
263 }
|
Chris@18
|
264
|
Chris@18
|
265 protected function checkIPV4Tag($addressLiteral)
|
Chris@18
|
266 {
|
Chris@18
|
267 $matchesIP = array();
|
Chris@18
|
268
|
Chris@18
|
269 // Extract IPv4 part from the end of the address-literal (if there is one)
|
Chris@18
|
270 if (preg_match(
|
Chris@18
|
271 '/\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/',
|
Chris@18
|
272 $addressLiteral,
|
Chris@18
|
273 $matchesIP
|
Chris@18
|
274 ) > 0
|
Chris@18
|
275 ) {
|
Chris@18
|
276 $index = strrpos($addressLiteral, $matchesIP[0]);
|
Chris@18
|
277 if ($index === 0) {
|
Chris@18
|
278 $this->warnings[AddressLiteral::CODE] = new AddressLiteral();
|
Chris@18
|
279 return false;
|
Chris@18
|
280 }
|
Chris@18
|
281 // Convert IPv4 part to IPv6 format for further testing
|
Chris@18
|
282 $addressLiteral = substr($addressLiteral, 0, $index) . '0:0';
|
Chris@18
|
283 }
|
Chris@18
|
284
|
Chris@18
|
285 return $addressLiteral;
|
Chris@18
|
286 }
|
Chris@18
|
287
|
Chris@18
|
288 protected function checkDomainPartExceptions($prev)
|
Chris@18
|
289 {
|
Chris@18
|
290 $invalidDomainTokens = array(
|
Chris@18
|
291 EmailLexer::S_DQUOTE => true,
|
Chris@18
|
292 EmailLexer::S_SEMICOLON => true,
|
Chris@18
|
293 EmailLexer::S_GREATERTHAN => true,
|
Chris@18
|
294 EmailLexer::S_LOWERTHAN => true,
|
Chris@18
|
295 );
|
Chris@18
|
296
|
Chris@18
|
297 if (isset($invalidDomainTokens[$this->lexer->token['type']])) {
|
Chris@18
|
298 throw new ExpectingATEXT();
|
Chris@18
|
299 }
|
Chris@18
|
300
|
Chris@18
|
301 if ($this->lexer->token['type'] === EmailLexer::S_COMMA) {
|
Chris@18
|
302 throw new CommaInDomain();
|
Chris@18
|
303 }
|
Chris@18
|
304
|
Chris@18
|
305 if ($this->lexer->token['type'] === EmailLexer::S_AT) {
|
Chris@18
|
306 throw new ConsecutiveAt();
|
Chris@18
|
307 }
|
Chris@18
|
308
|
Chris@18
|
309 if ($this->lexer->token['type'] === EmailLexer::S_OPENQBRACKET && $prev['type'] !== EmailLexer::S_AT) {
|
Chris@18
|
310 throw new ExpectingATEXT();
|
Chris@18
|
311 }
|
Chris@18
|
312
|
Chris@18
|
313 if ($this->lexer->token['type'] === EmailLexer::S_HYPHEN && $this->lexer->isNextToken(EmailLexer::S_DOT)) {
|
Chris@18
|
314 throw new DomainHyphened();
|
Chris@18
|
315 }
|
Chris@18
|
316
|
Chris@18
|
317 if ($this->lexer->token['type'] === EmailLexer::S_BACKSLASH
|
Chris@18
|
318 && $this->lexer->isNextToken(EmailLexer::GENERIC)) {
|
Chris@18
|
319 throw new ExpectingATEXT();
|
Chris@18
|
320 }
|
Chris@18
|
321 }
|
Chris@18
|
322
|
Chris@18
|
323 protected function hasBrackets()
|
Chris@18
|
324 {
|
Chris@18
|
325 if ($this->lexer->token['type'] !== EmailLexer::S_OPENBRACKET) {
|
Chris@18
|
326 return false;
|
Chris@18
|
327 }
|
Chris@18
|
328
|
Chris@18
|
329 try {
|
Chris@18
|
330 $this->lexer->find(EmailLexer::S_CLOSEBRACKET);
|
Chris@18
|
331 } catch (\RuntimeException $e) {
|
Chris@18
|
332 throw new ExpectingDomainLiteralClose();
|
Chris@18
|
333 }
|
Chris@18
|
334
|
Chris@18
|
335 return true;
|
Chris@18
|
336 }
|
Chris@18
|
337
|
Chris@18
|
338 protected function checkLabelLength($prev)
|
Chris@18
|
339 {
|
Chris@18
|
340 if ($this->lexer->token['type'] === EmailLexer::S_DOT &&
|
Chris@18
|
341 $prev['type'] === EmailLexer::GENERIC &&
|
Chris@18
|
342 strlen($prev['value']) > 63
|
Chris@18
|
343 ) {
|
Chris@18
|
344 $this->warnings[LabelTooLong::CODE] = new LabelTooLong();
|
Chris@18
|
345 }
|
Chris@18
|
346 }
|
Chris@18
|
347
|
Chris@18
|
348 protected function parseDomainComments()
|
Chris@18
|
349 {
|
Chris@18
|
350 $this->isUnclosedComment();
|
Chris@18
|
351 while (!$this->lexer->isNextToken(EmailLexer::S_CLOSEPARENTHESIS)) {
|
Chris@18
|
352 $this->warnEscaping();
|
Chris@18
|
353 $this->lexer->moveNext();
|
Chris@18
|
354 }
|
Chris@18
|
355
|
Chris@18
|
356 $this->lexer->moveNext();
|
Chris@18
|
357 if ($this->lexer->isNextToken(EmailLexer::S_DOT)) {
|
Chris@18
|
358 throw new ExpectingATEXT();
|
Chris@18
|
359 }
|
Chris@18
|
360 }
|
Chris@18
|
361
|
Chris@18
|
362 protected function addTLDWarnings()
|
Chris@18
|
363 {
|
Chris@18
|
364 if ($this->warnings[DomainLiteral::CODE]) {
|
Chris@18
|
365 $this->warnings[TLD::CODE] = new TLD();
|
Chris@18
|
366 }
|
Chris@18
|
367 }
|
Chris@18
|
368 }
|