annotate vendor/egulias/email-validator/EmailValidator/Parser/DomainPart.php @ 19:fa3358dc1485 tip

Add ndrum files
author Chris Cannam
date Wed, 28 Aug 2019 13:14:47 +0100
parents af1871eacc83
children
rev   line source
Chris@18 1 <?php
Chris@18 2
Chris@18 3 namespace Egulias\EmailValidator\Parser;
Chris@18 4
Chris@18 5 use Egulias\EmailValidator\EmailLexer;
Chris@18 6 use Egulias\EmailValidator\Exception\CharNotAllowed;
Chris@18 7 use Egulias\EmailValidator\Exception\CommaInDomain;
Chris@18 8 use Egulias\EmailValidator\Exception\ConsecutiveAt;
Chris@18 9 use Egulias\EmailValidator\Exception\CRLFAtTheEnd;
Chris@18 10 use Egulias\EmailValidator\Exception\CRNoLF;
Chris@18 11 use Egulias\EmailValidator\Exception\DomainHyphened;
Chris@18 12 use Egulias\EmailValidator\Exception\DotAtEnd;
Chris@18 13 use Egulias\EmailValidator\Exception\DotAtStart;
Chris@18 14 use Egulias\EmailValidator\Exception\ExpectingATEXT;
Chris@18 15 use Egulias\EmailValidator\Exception\ExpectingDomainLiteralClose;
Chris@18 16 use Egulias\EmailValidator\Exception\ExpectingDTEXT;
Chris@18 17 use Egulias\EmailValidator\Exception\NoDomainPart;
Chris@18 18 use Egulias\EmailValidator\Exception\UnopenedComment;
Chris@18 19 use Egulias\EmailValidator\Warning\AddressLiteral;
Chris@18 20 use Egulias\EmailValidator\Warning\CFWSWithFWS;
Chris@18 21 use Egulias\EmailValidator\Warning\DeprecatedComment;
Chris@18 22 use Egulias\EmailValidator\Warning\DomainLiteral;
Chris@18 23 use Egulias\EmailValidator\Warning\DomainTooLong;
Chris@18 24 use Egulias\EmailValidator\Warning\IPV6BadChar;
Chris@18 25 use Egulias\EmailValidator\Warning\IPV6ColonEnd;
Chris@18 26 use Egulias\EmailValidator\Warning\IPV6ColonStart;
Chris@18 27 use Egulias\EmailValidator\Warning\IPV6Deprecated;
Chris@18 28 use Egulias\EmailValidator\Warning\IPV6DoubleColon;
Chris@18 29 use Egulias\EmailValidator\Warning\IPV6GroupCount;
Chris@18 30 use Egulias\EmailValidator\Warning\IPV6MaxGroups;
Chris@18 31 use Egulias\EmailValidator\Warning\LabelTooLong;
Chris@18 32 use Egulias\EmailValidator\Warning\ObsoleteDTEXT;
Chris@18 33 use Egulias\EmailValidator\Warning\TLD;
Chris@18 34
Chris@18 35 class DomainPart extends Parser
Chris@18 36 {
Chris@18 37 const DOMAIN_MAX_LENGTH = 254;
Chris@18 38 protected $domainPart = '';
Chris@18 39
Chris@18 40 public function parse($domainPart)
Chris@18 41 {
Chris@18 42 $this->lexer->moveNext();
Chris@18 43
Chris@18 44 if ($this->lexer->token['type'] === EmailLexer::S_DOT) {
Chris@18 45 throw new DotAtStart();
Chris@18 46 }
Chris@18 47
Chris@18 48 if ($this->lexer->token['type'] === EmailLexer::S_EMPTY) {
Chris@18 49 throw new NoDomainPart();
Chris@18 50 }
Chris@18 51 if ($this->lexer->token['type'] === EmailLexer::S_HYPHEN) {
Chris@18 52 throw new DomainHyphened();
Chris@18 53 }
Chris@18 54
Chris@18 55 if ($this->lexer->token['type'] === EmailLexer::S_OPENPARENTHESIS) {
Chris@18 56 $this->warnings[DeprecatedComment::CODE] = new DeprecatedComment();
Chris@18 57 $this->parseDomainComments();
Chris@18 58 }
Chris@18 59
Chris@18 60 $domain = $this->doParseDomainPart();
Chris@18 61
Chris@18 62 $prev = $this->lexer->getPrevious();
Chris@18 63 $length = strlen($domain);
Chris@18 64
Chris@18 65 if ($prev['type'] === EmailLexer::S_DOT) {
Chris@18 66 throw new DotAtEnd();
Chris@18 67 }
Chris@18 68 if ($prev['type'] === EmailLexer::S_HYPHEN) {
Chris@18 69 throw new DomainHyphened();
Chris@18 70 }
Chris@18 71 if ($length > self::DOMAIN_MAX_LENGTH) {
Chris@18 72 $this->warnings[DomainTooLong::CODE] = new DomainTooLong();
Chris@18 73 }
Chris@18 74 if ($prev['type'] === EmailLexer::S_CR) {
Chris@18 75 throw new CRLFAtTheEnd();
Chris@18 76 }
Chris@18 77 $this->domainPart = $domain;
Chris@18 78 }
Chris@18 79
Chris@18 80 public function getDomainPart()
Chris@18 81 {
Chris@18 82 return $this->domainPart;
Chris@18 83 }
Chris@18 84
Chris@18 85 public function checkIPV6Tag($addressLiteral, $maxGroups = 8)
Chris@18 86 {
Chris@18 87 $prev = $this->lexer->getPrevious();
Chris@18 88 if ($prev['type'] === EmailLexer::S_COLON) {
Chris@18 89 $this->warnings[IPV6ColonEnd::CODE] = new IPV6ColonEnd();
Chris@18 90 }
Chris@18 91
Chris@18 92 $IPv6 = substr($addressLiteral, 5);
Chris@18 93 //Daniel Marschall's new IPv6 testing strategy
Chris@18 94 $matchesIP = explode(':', $IPv6);
Chris@18 95 $groupCount = count($matchesIP);
Chris@18 96 $colons = strpos($IPv6, '::');
Chris@18 97
Chris@18 98 if (count(preg_grep('/^[0-9A-Fa-f]{0,4}$/', $matchesIP, PREG_GREP_INVERT)) !== 0) {
Chris@18 99 $this->warnings[IPV6BadChar::CODE] = new IPV6BadChar();
Chris@18 100 }
Chris@18 101
Chris@18 102 if ($colons === false) {
Chris@18 103 // We need exactly the right number of groups
Chris@18 104 if ($groupCount !== $maxGroups) {
Chris@18 105 $this->warnings[IPV6GroupCount::CODE] = new IPV6GroupCount();
Chris@18 106 }
Chris@18 107 return;
Chris@18 108 }
Chris@18 109
Chris@18 110 if ($colons !== strrpos($IPv6, '::')) {
Chris@18 111 $this->warnings[IPV6DoubleColon::CODE] = new IPV6DoubleColon();
Chris@18 112 return;
Chris@18 113 }
Chris@18 114
Chris@18 115 if ($colons === 0 || $colons === (strlen($IPv6) - 2)) {
Chris@18 116 // RFC 4291 allows :: at the start or end of an address
Chris@18 117 //with 7 other groups in addition
Chris@18 118 ++$maxGroups;
Chris@18 119 }
Chris@18 120
Chris@18 121 if ($groupCount > $maxGroups) {
Chris@18 122 $this->warnings[IPV6MaxGroups::CODE] = new IPV6MaxGroups();
Chris@18 123 } elseif ($groupCount === $maxGroups) {
Chris@18 124 $this->warnings[IPV6Deprecated::CODE] = new IPV6Deprecated();
Chris@18 125 }
Chris@18 126 }
Chris@18 127
Chris@18 128 protected function doParseDomainPart()
Chris@18 129 {
Chris@18 130 $domain = '';
Chris@18 131 $openedParenthesis = 0;
Chris@18 132 do {
Chris@18 133 $prev = $this->lexer->getPrevious();
Chris@18 134
Chris@18 135 $this->checkNotAllowedChars($this->lexer->token);
Chris@18 136
Chris@18 137 if ($this->lexer->token['type'] === EmailLexer::S_OPENPARENTHESIS) {
Chris@18 138 $this->parseComments();
Chris@18 139 $openedParenthesis += $this->getOpenedParenthesis();
Chris@18 140 $this->lexer->moveNext();
Chris@18 141 $tmpPrev = $this->lexer->getPrevious();
Chris@18 142 if ($tmpPrev['type'] === EmailLexer::S_CLOSEPARENTHESIS) {
Chris@18 143 $openedParenthesis--;
Chris@18 144 }
Chris@18 145 }
Chris@18 146 if ($this->lexer->token['type'] === EmailLexer::S_CLOSEPARENTHESIS) {
Chris@18 147 if ($openedParenthesis === 0) {
Chris@18 148 throw new UnopenedComment();
Chris@18 149 } else {
Chris@18 150 $openedParenthesis--;
Chris@18 151 }
Chris@18 152 }
Chris@18 153
Chris@18 154 $this->checkConsecutiveDots();
Chris@18 155 $this->checkDomainPartExceptions($prev);
Chris@18 156
Chris@18 157 if ($this->hasBrackets()) {
Chris@18 158 $this->parseDomainLiteral();
Chris@18 159 }
Chris@18 160
Chris@18 161 $this->checkLabelLength($prev);
Chris@18 162
Chris@18 163 if ($this->isFWS()) {
Chris@18 164 $this->parseFWS();
Chris@18 165 }
Chris@18 166
Chris@18 167 $domain .= $this->lexer->token['value'];
Chris@18 168 $this->lexer->moveNext();
Chris@18 169 } while ($this->lexer->token);
Chris@18 170
Chris@18 171 return $domain;
Chris@18 172 }
Chris@18 173
Chris@18 174 private function checkNotAllowedChars($token)
Chris@18 175 {
Chris@18 176 $notAllowed = [EmailLexer::S_BACKSLASH => true, EmailLexer::S_SLASH=> true];
Chris@18 177 if (isset($notAllowed[$token['type']])) {
Chris@18 178 throw new CharNotAllowed();
Chris@18 179 }
Chris@18 180 }
Chris@18 181
Chris@18 182 protected function parseDomainLiteral()
Chris@18 183 {
Chris@18 184 if ($this->lexer->isNextToken(EmailLexer::S_COLON)) {
Chris@18 185 $this->warnings[IPV6ColonStart::CODE] = new IPV6ColonStart();
Chris@18 186 }
Chris@18 187 if ($this->lexer->isNextToken(EmailLexer::S_IPV6TAG)) {
Chris@18 188 $lexer = clone $this->lexer;
Chris@18 189 $lexer->moveNext();
Chris@18 190 if ($lexer->isNextToken(EmailLexer::S_DOUBLECOLON)) {
Chris@18 191 $this->warnings[IPV6ColonStart::CODE] = new IPV6ColonStart();
Chris@18 192 }
Chris@18 193 }
Chris@18 194
Chris@18 195 return $this->doParseDomainLiteral();
Chris@18 196 }
Chris@18 197
Chris@18 198 protected function doParseDomainLiteral()
Chris@18 199 {
Chris@18 200 $IPv6TAG = false;
Chris@18 201 $addressLiteral = '';
Chris@18 202 do {
Chris@18 203 if ($this->lexer->token['type'] === EmailLexer::C_NUL) {
Chris@18 204 throw new ExpectingDTEXT();
Chris@18 205 }
Chris@18 206
Chris@18 207 if ($this->lexer->token['type'] === EmailLexer::INVALID ||
Chris@18 208 $this->lexer->token['type'] === EmailLexer::C_DEL ||
Chris@18 209 $this->lexer->token['type'] === EmailLexer::S_LF
Chris@18 210 ) {
Chris@18 211 $this->warnings[ObsoleteDTEXT::CODE] = new ObsoleteDTEXT();
Chris@18 212 }
Chris@18 213
Chris@18 214 if ($this->lexer->isNextTokenAny(array(EmailLexer::S_OPENQBRACKET, EmailLexer::S_OPENBRACKET))) {
Chris@18 215 throw new ExpectingDTEXT();
Chris@18 216 }
Chris@18 217
Chris@18 218 if ($this->lexer->isNextTokenAny(
Chris@18 219 array(EmailLexer::S_HTAB, EmailLexer::S_SP, $this->lexer->token['type'] === EmailLexer::CRLF)
Chris@18 220 )) {
Chris@18 221 $this->warnings[CFWSWithFWS::CODE] = new CFWSWithFWS();
Chris@18 222 $this->parseFWS();
Chris@18 223 }
Chris@18 224
Chris@18 225 if ($this->lexer->isNextToken(EmailLexer::S_CR)) {
Chris@18 226 throw new CRNoLF();
Chris@18 227 }
Chris@18 228
Chris@18 229 if ($this->lexer->token['type'] === EmailLexer::S_BACKSLASH) {
Chris@18 230 $this->warnings[ObsoleteDTEXT::CODE] = new ObsoleteDTEXT();
Chris@18 231 $addressLiteral .= $this->lexer->token['value'];
Chris@18 232 $this->lexer->moveNext();
Chris@18 233 $this->validateQuotedPair();
Chris@18 234 }
Chris@18 235 if ($this->lexer->token['type'] === EmailLexer::S_IPV6TAG) {
Chris@18 236 $IPv6TAG = true;
Chris@18 237 }
Chris@18 238 if ($this->lexer->token['type'] === EmailLexer::S_CLOSEQBRACKET) {
Chris@18 239 break;
Chris@18 240 }
Chris@18 241
Chris@18 242 $addressLiteral .= $this->lexer->token['value'];
Chris@18 243
Chris@18 244 } while ($this->lexer->moveNext());
Chris@18 245
Chris@18 246 $addressLiteral = str_replace('[', '', $addressLiteral);
Chris@18 247 $addressLiteral = $this->checkIPV4Tag($addressLiteral);
Chris@18 248
Chris@18 249 if (false === $addressLiteral) {
Chris@18 250 return $addressLiteral;
Chris@18 251 }
Chris@18 252
Chris@18 253 if (!$IPv6TAG) {
Chris@18 254 $this->warnings[DomainLiteral::CODE] = new DomainLiteral();
Chris@18 255 return $addressLiteral;
Chris@18 256 }
Chris@18 257
Chris@18 258 $this->warnings[AddressLiteral::CODE] = new AddressLiteral();
Chris@18 259
Chris@18 260 $this->checkIPV6Tag($addressLiteral);
Chris@18 261
Chris@18 262 return $addressLiteral;
Chris@18 263 }
Chris@18 264
Chris@18 265 protected function checkIPV4Tag($addressLiteral)
Chris@18 266 {
Chris@18 267 $matchesIP = array();
Chris@18 268
Chris@18 269 // Extract IPv4 part from the end of the address-literal (if there is one)
Chris@18 270 if (preg_match(
Chris@18 271 '/\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/',
Chris@18 272 $addressLiteral,
Chris@18 273 $matchesIP
Chris@18 274 ) > 0
Chris@18 275 ) {
Chris@18 276 $index = strrpos($addressLiteral, $matchesIP[0]);
Chris@18 277 if ($index === 0) {
Chris@18 278 $this->warnings[AddressLiteral::CODE] = new AddressLiteral();
Chris@18 279 return false;
Chris@18 280 }
Chris@18 281 // Convert IPv4 part to IPv6 format for further testing
Chris@18 282 $addressLiteral = substr($addressLiteral, 0, $index) . '0:0';
Chris@18 283 }
Chris@18 284
Chris@18 285 return $addressLiteral;
Chris@18 286 }
Chris@18 287
Chris@18 288 protected function checkDomainPartExceptions($prev)
Chris@18 289 {
Chris@18 290 $invalidDomainTokens = array(
Chris@18 291 EmailLexer::S_DQUOTE => true,
Chris@18 292 EmailLexer::S_SEMICOLON => true,
Chris@18 293 EmailLexer::S_GREATERTHAN => true,
Chris@18 294 EmailLexer::S_LOWERTHAN => true,
Chris@18 295 );
Chris@18 296
Chris@18 297 if (isset($invalidDomainTokens[$this->lexer->token['type']])) {
Chris@18 298 throw new ExpectingATEXT();
Chris@18 299 }
Chris@18 300
Chris@18 301 if ($this->lexer->token['type'] === EmailLexer::S_COMMA) {
Chris@18 302 throw new CommaInDomain();
Chris@18 303 }
Chris@18 304
Chris@18 305 if ($this->lexer->token['type'] === EmailLexer::S_AT) {
Chris@18 306 throw new ConsecutiveAt();
Chris@18 307 }
Chris@18 308
Chris@18 309 if ($this->lexer->token['type'] === EmailLexer::S_OPENQBRACKET && $prev['type'] !== EmailLexer::S_AT) {
Chris@18 310 throw new ExpectingATEXT();
Chris@18 311 }
Chris@18 312
Chris@18 313 if ($this->lexer->token['type'] === EmailLexer::S_HYPHEN && $this->lexer->isNextToken(EmailLexer::S_DOT)) {
Chris@18 314 throw new DomainHyphened();
Chris@18 315 }
Chris@18 316
Chris@18 317 if ($this->lexer->token['type'] === EmailLexer::S_BACKSLASH
Chris@18 318 && $this->lexer->isNextToken(EmailLexer::GENERIC)) {
Chris@18 319 throw new ExpectingATEXT();
Chris@18 320 }
Chris@18 321 }
Chris@18 322
Chris@18 323 protected function hasBrackets()
Chris@18 324 {
Chris@18 325 if ($this->lexer->token['type'] !== EmailLexer::S_OPENBRACKET) {
Chris@18 326 return false;
Chris@18 327 }
Chris@18 328
Chris@18 329 try {
Chris@18 330 $this->lexer->find(EmailLexer::S_CLOSEBRACKET);
Chris@18 331 } catch (\RuntimeException $e) {
Chris@18 332 throw new ExpectingDomainLiteralClose();
Chris@18 333 }
Chris@18 334
Chris@18 335 return true;
Chris@18 336 }
Chris@18 337
Chris@18 338 protected function checkLabelLength($prev)
Chris@18 339 {
Chris@18 340 if ($this->lexer->token['type'] === EmailLexer::S_DOT &&
Chris@18 341 $prev['type'] === EmailLexer::GENERIC &&
Chris@18 342 strlen($prev['value']) > 63
Chris@18 343 ) {
Chris@18 344 $this->warnings[LabelTooLong::CODE] = new LabelTooLong();
Chris@18 345 }
Chris@18 346 }
Chris@18 347
Chris@18 348 protected function parseDomainComments()
Chris@18 349 {
Chris@18 350 $this->isUnclosedComment();
Chris@18 351 while (!$this->lexer->isNextToken(EmailLexer::S_CLOSEPARENTHESIS)) {
Chris@18 352 $this->warnEscaping();
Chris@18 353 $this->lexer->moveNext();
Chris@18 354 }
Chris@18 355
Chris@18 356 $this->lexer->moveNext();
Chris@18 357 if ($this->lexer->isNextToken(EmailLexer::S_DOT)) {
Chris@18 358 throw new ExpectingATEXT();
Chris@18 359 }
Chris@18 360 }
Chris@18 361
Chris@18 362 protected function addTLDWarnings()
Chris@18 363 {
Chris@18 364 if ($this->warnings[DomainLiteral::CODE]) {
Chris@18 365 $this->warnings[TLD::CODE] = new TLD();
Chris@18 366 }
Chris@18 367 }
Chris@18 368 }