Chris@0
|
1 <?php
|
Chris@0
|
2
|
Chris@0
|
3 namespace Egulias\EmailValidator;
|
Chris@0
|
4
|
Chris@0
|
5 use Doctrine\Common\Lexer\AbstractLexer;
|
Chris@0
|
6
|
Chris@0
|
7 class EmailLexer extends AbstractLexer
|
Chris@0
|
8 {
|
Chris@0
|
9 //ASCII values
|
Chris@0
|
10 const C_DEL = 127;
|
Chris@0
|
11 const C_NUL = 0;
|
Chris@0
|
12 const S_AT = 64;
|
Chris@0
|
13 const S_BACKSLASH = 92;
|
Chris@0
|
14 const S_DOT = 46;
|
Chris@0
|
15 const S_DQUOTE = 34;
|
Chris@0
|
16 const S_OPENPARENTHESIS = 49;
|
Chris@0
|
17 const S_CLOSEPARENTHESIS = 261;
|
Chris@0
|
18 const S_OPENBRACKET = 262;
|
Chris@0
|
19 const S_CLOSEBRACKET = 263;
|
Chris@0
|
20 const S_HYPHEN = 264;
|
Chris@0
|
21 const S_COLON = 265;
|
Chris@0
|
22 const S_DOUBLECOLON = 266;
|
Chris@0
|
23 const S_SP = 267;
|
Chris@0
|
24 const S_HTAB = 268;
|
Chris@0
|
25 const S_CR = 269;
|
Chris@0
|
26 const S_LF = 270;
|
Chris@0
|
27 const S_IPV6TAG = 271;
|
Chris@0
|
28 const S_LOWERTHAN = 272;
|
Chris@0
|
29 const S_GREATERTHAN = 273;
|
Chris@0
|
30 const S_COMMA = 274;
|
Chris@0
|
31 const S_SEMICOLON = 275;
|
Chris@0
|
32 const S_OPENQBRACKET = 276;
|
Chris@0
|
33 const S_CLOSEQBRACKET = 277;
|
Chris@0
|
34 const S_SLASH = 278;
|
Chris@0
|
35 const S_EMPTY = null;
|
Chris@0
|
36 const GENERIC = 300;
|
Chris@0
|
37 const CRLF = 301;
|
Chris@0
|
38 const INVALID = 302;
|
Chris@0
|
39 const ASCII_INVALID_FROM = 127;
|
Chris@0
|
40 const ASCII_INVALID_TO = 199;
|
Chris@0
|
41
|
Chris@0
|
42 /**
|
Chris@0
|
43 * US-ASCII visible characters not valid for atext (@link http://tools.ietf.org/html/rfc5322#section-3.2.3)
|
Chris@0
|
44 *
|
Chris@0
|
45 * @var array
|
Chris@0
|
46 */
|
Chris@0
|
47 protected $charValue = array(
|
Chris@0
|
48 '(' => self::S_OPENPARENTHESIS,
|
Chris@0
|
49 ')' => self::S_CLOSEPARENTHESIS,
|
Chris@0
|
50 '<' => self::S_LOWERTHAN,
|
Chris@0
|
51 '>' => self::S_GREATERTHAN,
|
Chris@0
|
52 '[' => self::S_OPENBRACKET,
|
Chris@0
|
53 ']' => self::S_CLOSEBRACKET,
|
Chris@0
|
54 ':' => self::S_COLON,
|
Chris@0
|
55 ';' => self::S_SEMICOLON,
|
Chris@0
|
56 '@' => self::S_AT,
|
Chris@0
|
57 '\\' => self::S_BACKSLASH,
|
Chris@0
|
58 '/' => self::S_SLASH,
|
Chris@0
|
59 ',' => self::S_COMMA,
|
Chris@0
|
60 '.' => self::S_DOT,
|
Chris@0
|
61 '"' => self::S_DQUOTE,
|
Chris@0
|
62 '-' => self::S_HYPHEN,
|
Chris@0
|
63 '::' => self::S_DOUBLECOLON,
|
Chris@0
|
64 ' ' => self::S_SP,
|
Chris@0
|
65 "\t" => self::S_HTAB,
|
Chris@0
|
66 "\r" => self::S_CR,
|
Chris@0
|
67 "\n" => self::S_LF,
|
Chris@0
|
68 "\r\n" => self::CRLF,
|
Chris@0
|
69 'IPv6' => self::S_IPV6TAG,
|
Chris@0
|
70 '{' => self::S_OPENQBRACKET,
|
Chris@0
|
71 '}' => self::S_CLOSEQBRACKET,
|
Chris@0
|
72 '' => self::S_EMPTY,
|
Chris@0
|
73 '\0' => self::C_NUL,
|
Chris@0
|
74 );
|
Chris@0
|
75
|
Chris@0
|
76 protected $hasInvalidTokens = false;
|
Chris@0
|
77
|
Chris@0
|
78 protected $previous;
|
Chris@0
|
79
|
Chris@0
|
80 public function reset()
|
Chris@0
|
81 {
|
Chris@0
|
82 $this->hasInvalidTokens = false;
|
Chris@0
|
83 parent::reset();
|
Chris@0
|
84 }
|
Chris@0
|
85
|
Chris@0
|
86 public function hasInvalidTokens()
|
Chris@0
|
87 {
|
Chris@0
|
88 return $this->hasInvalidTokens;
|
Chris@0
|
89 }
|
Chris@0
|
90
|
Chris@0
|
91 /**
|
Chris@0
|
92 * @param $type
|
Chris@0
|
93 * @throws \UnexpectedValueException
|
Chris@0
|
94 * @return boolean
|
Chris@0
|
95 */
|
Chris@0
|
96 public function find($type)
|
Chris@0
|
97 {
|
Chris@0
|
98 $search = clone $this;
|
Chris@0
|
99 $search->skipUntil($type);
|
Chris@0
|
100
|
Chris@0
|
101 if (!$search->lookahead) {
|
Chris@0
|
102 throw new \UnexpectedValueException($type . ' not found');
|
Chris@0
|
103 }
|
Chris@0
|
104 return true;
|
Chris@0
|
105 }
|
Chris@0
|
106
|
Chris@0
|
107 /**
|
Chris@0
|
108 * getPrevious
|
Chris@0
|
109 *
|
Chris@0
|
110 * @return array token
|
Chris@0
|
111 */
|
Chris@0
|
112 public function getPrevious()
|
Chris@0
|
113 {
|
Chris@0
|
114 return $this->previous;
|
Chris@0
|
115 }
|
Chris@0
|
116
|
Chris@0
|
117 /**
|
Chris@0
|
118 * moveNext
|
Chris@0
|
119 *
|
Chris@0
|
120 * @return boolean
|
Chris@0
|
121 */
|
Chris@0
|
122 public function moveNext()
|
Chris@0
|
123 {
|
Chris@0
|
124 $this->previous = $this->token;
|
Chris@0
|
125
|
Chris@0
|
126 return parent::moveNext();
|
Chris@0
|
127 }
|
Chris@0
|
128
|
Chris@0
|
129 /**
|
Chris@0
|
130 * Lexical catchable patterns.
|
Chris@0
|
131 *
|
Chris@0
|
132 * @return string[]
|
Chris@0
|
133 */
|
Chris@0
|
134 protected function getCatchablePatterns()
|
Chris@0
|
135 {
|
Chris@0
|
136 return array(
|
Chris@0
|
137 '[a-zA-Z_]+[46]?', //ASCII and domain literal
|
Chris@0
|
138 '[^\x00-\x7F]', //UTF-8
|
Chris@0
|
139 '[0-9]+',
|
Chris@0
|
140 '\r\n',
|
Chris@0
|
141 '::',
|
Chris@0
|
142 '\s+?',
|
Chris@0
|
143 '.',
|
Chris@0
|
144 );
|
Chris@0
|
145 }
|
Chris@0
|
146
|
Chris@0
|
147 /**
|
Chris@0
|
148 * Lexical non-catchable patterns.
|
Chris@0
|
149 *
|
Chris@0
|
150 * @return string[]
|
Chris@0
|
151 */
|
Chris@0
|
152 protected function getNonCatchablePatterns()
|
Chris@0
|
153 {
|
Chris@0
|
154 return array('[\xA0-\xff]+');
|
Chris@0
|
155 }
|
Chris@0
|
156
|
Chris@0
|
157 /**
|
Chris@0
|
158 * Retrieve token type. Also processes the token value if necessary.
|
Chris@0
|
159 *
|
Chris@0
|
160 * @param string $value
|
Chris@0
|
161 * @throws \InvalidArgumentException
|
Chris@0
|
162 * @return integer
|
Chris@0
|
163 */
|
Chris@0
|
164 protected function getType(&$value)
|
Chris@0
|
165 {
|
Chris@0
|
166 if ($this->isNullType($value)) {
|
Chris@0
|
167 return self::C_NUL;
|
Chris@0
|
168 }
|
Chris@0
|
169
|
Chris@0
|
170 if ($this->isValid($value)) {
|
Chris@0
|
171 return $this->charValue[$value];
|
Chris@0
|
172 }
|
Chris@0
|
173
|
Chris@0
|
174 if ($this->isUTF8Invalid($value)) {
|
Chris@0
|
175 $this->hasInvalidTokens = true;
|
Chris@0
|
176 return self::INVALID;
|
Chris@0
|
177 }
|
Chris@0
|
178
|
Chris@0
|
179 return self::GENERIC;
|
Chris@0
|
180 }
|
Chris@0
|
181
|
Chris@0
|
182 protected function isValid($value)
|
Chris@0
|
183 {
|
Chris@0
|
184 if (isset($this->charValue[$value])) {
|
Chris@0
|
185 return true;
|
Chris@0
|
186 }
|
Chris@0
|
187
|
Chris@0
|
188 return false;
|
Chris@0
|
189 }
|
Chris@0
|
190
|
Chris@0
|
191 /**
|
Chris@0
|
192 * @param $value
|
Chris@0
|
193 * @return bool
|
Chris@0
|
194 */
|
Chris@0
|
195 protected function isNullType($value)
|
Chris@0
|
196 {
|
Chris@0
|
197 if ($value === "\0") {
|
Chris@0
|
198 return true;
|
Chris@0
|
199 }
|
Chris@0
|
200
|
Chris@0
|
201 return false;
|
Chris@0
|
202 }
|
Chris@0
|
203
|
Chris@0
|
204 /**
|
Chris@0
|
205 * @param $value
|
Chris@0
|
206 * @return bool
|
Chris@0
|
207 */
|
Chris@0
|
208 protected function isUTF8Invalid($value)
|
Chris@0
|
209 {
|
Chris@0
|
210 if (preg_match('/\p{Cc}+/u', $value)) {
|
Chris@0
|
211 return true;
|
Chris@0
|
212 }
|
Chris@0
|
213
|
Chris@0
|
214 return false;
|
Chris@0
|
215 }
|
Chris@0
|
216
|
Chris@0
|
217 protected function getModifiers()
|
Chris@0
|
218 {
|
Chris@0
|
219 return 'iu';
|
Chris@0
|
220 }
|
Chris@0
|
221 }
|