Chris@210
|
1 module CodeRay
|
Chris@210
|
2 module Scanners
|
Chris@210
|
3
|
Chris@210
|
4 load :html
|
Chris@210
|
5
|
Chris@210
|
6 # Original by Stefan Walk.
|
Chris@210
|
7 class PHP < Scanner
|
Chris@210
|
8
|
Chris@210
|
9 register_for :php
|
Chris@210
|
10 file_extension 'php'
|
Chris@210
|
11
|
Chris@210
|
12 KINDS_NOT_LOC = HTML::KINDS_NOT_LOC
|
Chris@210
|
13
|
Chris@210
|
14 def setup
|
Chris@210
|
15 @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
|
Chris@210
|
16 end
|
Chris@210
|
17
|
Chris@210
|
18 def reset_instance
|
Chris@210
|
19 super
|
Chris@210
|
20 @html_scanner.reset
|
Chris@210
|
21 end
|
Chris@210
|
22
|
Chris@210
|
23 module Words
|
Chris@210
|
24
|
Chris@210
|
25 # according to http://www.php.net/manual/en/reserved.keywords.php
|
Chris@210
|
26 KEYWORDS = %w[
|
Chris@210
|
27 abstract and array as break case catch class clone const continue declare default do else elseif
|
Chris@210
|
28 enddeclare endfor endforeach endif endswitch endwhile extends final for foreach function global
|
Chris@210
|
29 goto if implements interface instanceof namespace new or private protected public static switch
|
Chris@210
|
30 throw try use var while xor
|
Chris@210
|
31 cfunction old_function
|
Chris@210
|
32 ]
|
Chris@210
|
33
|
Chris@210
|
34 TYPES = %w[ int integer float double bool boolean string array object resource ]
|
Chris@210
|
35
|
Chris@210
|
36 LANGUAGE_CONSTRUCTS = %w[
|
Chris@210
|
37 die echo empty exit eval include include_once isset list
|
Chris@210
|
38 require require_once return print unset
|
Chris@210
|
39 ]
|
Chris@210
|
40
|
Chris@210
|
41 CLASSES = %w[ Directory stdClass __PHP_Incomplete_Class exception php_user_filter Closure ]
|
Chris@210
|
42
|
Chris@210
|
43 # according to http://php.net/quickref.php on 2009-04-21;
|
Chris@210
|
44 # all functions with _ excluded (module functions) and selected additional functions
|
Chris@210
|
45 BUILTIN_FUNCTIONS = %w[
|
Chris@210
|
46 abs acos acosh addcslashes addslashes aggregate array arsort ascii2ebcdic asin asinh asort assert atan atan2
|
Chris@210
|
47 atanh basename bcadd bccomp bcdiv bcmod bcmul bcpow bcpowmod bcscale bcsqrt bcsub bin2hex bindec
|
Chris@210
|
48 bindtextdomain bzclose bzcompress bzdecompress bzerrno bzerror bzerrstr bzflush bzopen bzread bzwrite
|
Chris@210
|
49 calculhmac ceil chdir checkdate checkdnsrr chgrp chmod chop chown chr chroot clearstatcache closedir closelog
|
Chris@210
|
50 compact constant copy cos cosh count crc32 crypt current date dcgettext dcngettext deaggregate decbin dechex
|
Chris@210
|
51 decoct define defined deg2rad delete dgettext die dirname diskfreespace dl dngettext doubleval each
|
Chris@210
|
52 ebcdic2ascii echo empty end ereg eregi escapeshellarg escapeshellcmd eval exec exit exp explode expm1 extract
|
Chris@210
|
53 fclose feof fflush fgetc fgetcsv fgets fgetss file fileatime filectime filegroup fileinode filemtime fileowner
|
Chris@210
|
54 fileperms filepro filesize filetype floatval flock floor flush fmod fnmatch fopen fpassthru fprintf fputcsv
|
Chris@210
|
55 fputs fread frenchtojd fscanf fseek fsockopen fstat ftell ftok ftruncate fwrite getallheaders getcwd getdate
|
Chris@210
|
56 getenv gethostbyaddr gethostbyname gethostbynamel getimagesize getlastmod getmxrr getmygid getmyinode getmypid
|
Chris@210
|
57 getmyuid getopt getprotobyname getprotobynumber getrandmax getrusage getservbyname getservbyport gettext
|
Chris@210
|
58 gettimeofday gettype glob gmdate gmmktime gmstrftime gregoriantojd gzclose gzcompress gzdecode gzdeflate
|
Chris@210
|
59 gzencode gzeof gzfile gzgetc gzgets gzgetss gzinflate gzopen gzpassthru gzputs gzread gzrewind gzseek gztell
|
Chris@210
|
60 gzuncompress gzwrite hash header hebrev hebrevc hexdec htmlentities htmlspecialchars hypot iconv idate
|
Chris@210
|
61 implode include intval ip2long iptcembed iptcparse isset
|
Chris@210
|
62 jddayofweek jdmonthname jdtofrench jdtogregorian jdtojewish jdtojulian jdtounix jewishtojd join jpeg2wbmp
|
Chris@210
|
63 juliantojd key krsort ksort lcfirst lchgrp lchown levenshtein link linkinfo list localeconv localtime log
|
Chris@210
|
64 log10 log1p long2ip lstat ltrim mail main max md5 metaphone mhash microtime min mkdir mktime msql natcasesort
|
Chris@210
|
65 natsort next ngettext nl2br nthmac octdec opendir openlog
|
Chris@210
|
66 ord overload pack passthru pathinfo pclose pfsockopen phpcredits phpinfo phpversion pi png2wbmp popen pos pow
|
Chris@210
|
67 prev print printf putenv quotemeta rad2deg rand range rawurldecode rawurlencode readdir readfile readgzfile
|
Chris@210
|
68 readline readlink realpath recode rename require reset rewind rewinddir rmdir round rsort rtrim scandir
|
Chris@210
|
69 serialize setcookie setlocale setrawcookie settype sha1 shuffle signeurlpaiement sin sinh sizeof sleep snmpget
|
Chris@210
|
70 snmpgetnext snmprealwalk snmpset snmpwalk snmpwalkoid sort soundex split spliti sprintf sqrt srand sscanf stat
|
Chris@210
|
71 strcasecmp strchr strcmp strcoll strcspn strftime stripcslashes stripos stripslashes stristr strlen
|
Chris@210
|
72 strnatcasecmp strnatcmp strncasecmp strncmp strpbrk strpos strptime strrchr strrev strripos strrpos strspn
|
Chris@210
|
73 strstr strtok strtolower strtotime strtoupper strtr strval substr symlink syslog system tan tanh tempnam
|
Chris@210
|
74 textdomain time tmpfile touch trim uasort ucfirst ucwords uksort umask uniqid unixtojd unlink unpack
|
Chris@210
|
75 unserialize unset urldecode urlencode usleep usort vfprintf virtual vprintf vsprintf wordwrap
|
Chris@210
|
76 array_change_key_case array_chunk array_combine array_count_values array_diff array_diff_assoc
|
Chris@210
|
77 array_diff_key array_diff_uassoc array_diff_ukey array_fill array_fill_keys array_filter array_flip
|
Chris@210
|
78 array_intersect array_intersect_assoc array_intersect_key array_intersect_uassoc array_intersect_ukey
|
Chris@210
|
79 array_key_exists array_keys array_map array_merge array_merge_recursive array_multisort array_pad
|
Chris@210
|
80 array_pop array_product array_push array_rand array_reduce array_reverse array_search array_shift
|
Chris@210
|
81 array_slice array_splice array_sum array_udiff array_udiff_assoc array_udiff_uassoc array_uintersect
|
Chris@210
|
82 array_uintersect_assoc array_uintersect_uassoc array_unique array_unshift array_values array_walk
|
Chris@210
|
83 array_walk_recursive
|
Chris@210
|
84 assert_options base_convert base64_decode base64_encode
|
Chris@210
|
85 chunk_split class_exists class_implements class_parents
|
Chris@210
|
86 count_chars debug_backtrace debug_print_backtrace debug_zval_dump
|
Chris@210
|
87 error_get_last error_log error_reporting extension_loaded
|
Chris@210
|
88 file_exists file_get_contents file_put_contents load_file
|
Chris@210
|
89 func_get_arg func_get_args func_num_args function_exists
|
Chris@210
|
90 get_browser get_called_class get_cfg_var get_class get_class_methods get_class_vars
|
Chris@210
|
91 get_current_user get_declared_classes get_declared_interfaces get_defined_constants
|
Chris@210
|
92 get_defined_functions get_defined_vars get_extension_funcs get_headers get_html_translation_table
|
Chris@210
|
93 get_include_path get_included_files get_loaded_extensions get_magic_quotes_gpc get_magic_quotes_runtime
|
Chris@210
|
94 get_meta_tags get_object_vars get_parent_class get_required_filesget_resource_type
|
Chris@210
|
95 gc_collect_cycles gc_disable gc_enable gc_enabled
|
Chris@210
|
96 halt_compiler headers_list headers_sent highlight_file highlight_string
|
Chris@210
|
97 html_entity_decode htmlspecialchars_decode
|
Chris@210
|
98 in_array include_once inclued_get_data
|
Chris@210
|
99 is_a is_array is_binary is_bool is_buffer is_callable is_dir is_double is_executable is_file is_finite
|
Chris@210
|
100 is_float is_infinite is_int is_integer is_link is_long is_nan is_null is_numeric is_object is_readable
|
Chris@210
|
101 is_real is_resource is_scalar is_soap_fault is_string is_subclass_of is_unicode is_uploaded_file
|
Chris@210
|
102 is_writable is_writeable
|
Chris@210
|
103 locale_get_default locale_set_default
|
Chris@210
|
104 number_format override_function parse_str parse_url
|
Chris@210
|
105 php_check_syntax php_ini_loaded_file php_ini_scanned_files php_logo_guid php_sapi_name
|
Chris@210
|
106 php_strip_whitespace php_uname
|
Chris@210
|
107 preg_filter preg_grep preg_last_error preg_match preg_match_all preg_quote preg_replace
|
Chris@210
|
108 preg_replace_callback preg_split print_r
|
Chris@210
|
109 require_once register_shutdown_function register_tick_function
|
Chris@210
|
110 set_error_handler set_exception_handler set_file_buffer set_include_path
|
Chris@210
|
111 set_magic_quotes_runtime set_time_limit shell_exec
|
Chris@210
|
112 str_getcsv str_ireplace str_pad str_repeat str_replace str_rot13 str_shuffle str_split str_word_count
|
Chris@210
|
113 strip_tags substr_compare substr_count substr_replace
|
Chris@210
|
114 time_nanosleep time_sleep_until
|
Chris@210
|
115 token_get_all token_name trigger_error
|
Chris@210
|
116 unregister_tick_function use_soap_error_handler user_error
|
Chris@210
|
117 utf8_decode utf8_encode var_dump var_export
|
Chris@210
|
118 version_compare
|
Chris@210
|
119 zend_logo_guid zend_thread_id zend_version
|
Chris@210
|
120 create_function call_user_func_array
|
Chris@210
|
121 posix_access posix_ctermid posix_get_last_error posix_getcwd posix_getegid
|
Chris@210
|
122 posix_geteuid posix_getgid posix_getgrgid posix_getgrnam posix_getgroups
|
Chris@210
|
123 posix_getlogin posix_getpgid posix_getpgrp posix_getpid posix_getppid
|
Chris@210
|
124 posix_getpwnam posix_getpwuid posix_getrlimit posix_getsid posix_getuid
|
Chris@210
|
125 posix_initgroups posix_isatty posix_kill posix_mkfifo posix_mknod
|
Chris@210
|
126 posix_setegid posix_seteuid posix_setgid posix_setpgid posix_setsid
|
Chris@210
|
127 posix_setuid posix_strerror posix_times posix_ttyname posix_uname
|
Chris@210
|
128 pcntl_alarm pcntl_exec pcntl_fork pcntl_getpriority pcntl_setpriority
|
Chris@210
|
129 pcntl_signal pcntl_signal_dispatch pcntl_sigprocmask pcntl_sigtimedwait
|
Chris@210
|
130 pcntl_sigwaitinfo pcntl_wait pcntl_waitpid pcntl_wexitstatus pcntl_wifexited
|
Chris@210
|
131 pcntl_wifsignaled pcntl_wifstopped pcntl_wstopsig pcntl_wtermsig
|
Chris@210
|
132 ]
|
Chris@210
|
133 # TODO: more built-in PHP functions?
|
Chris@210
|
134
|
Chris@210
|
135 EXCEPTIONS = %w[
|
Chris@210
|
136 E_ERROR E_WARNING E_PARSE E_NOTICE E_CORE_ERROR E_CORE_WARNING E_COMPILE_ERROR E_COMPILE_WARNING
|
Chris@210
|
137 E_USER_ERROR E_USER_WARNING E_USER_NOTICE E_DEPRECATED E_USER_DEPRECATED E_ALL E_STRICT
|
Chris@210
|
138 ]
|
Chris@210
|
139
|
Chris@210
|
140 CONSTANTS = %w[
|
Chris@210
|
141 null true false self parent
|
Chris@210
|
142 __LINE__ __DIR__ __FILE__ __LINE__
|
Chris@210
|
143 __CLASS__ __NAMESPACE__ __METHOD__ __FUNCTION__
|
Chris@210
|
144 PHP_VERSION PHP_MAJOR_VERSION PHP_MINOR_VERSION PHP_RELEASE_VERSION PHP_VERSION_ID PHP_EXTRA_VERSION PHP_ZTS
|
Chris@210
|
145 PHP_DEBUG PHP_MAXPATHLEN PHP_OS PHP_SAPI PHP_EOL PHP_INT_MAX PHP_INT_SIZE DEFAULT_INCLUDE_PATH
|
Chris@210
|
146 PEAR_INSTALL_DIR PEAR_EXTENSION_DIR PHP_EXTENSION_DIR PHP_PREFIX PHP_BINDIR PHP_LIBDIR PHP_DATADIR
|
Chris@210
|
147 PHP_SYSCONFDIR PHP_LOCALSTATEDIR PHP_CONFIG_FILE_PATH PHP_CONFIG_FILE_SCAN_DIR PHP_SHLIB_SUFFIX
|
Chris@210
|
148 PHP_OUTPUT_HANDLER_START PHP_OUTPUT_HANDLER_CONT PHP_OUTPUT_HANDLER_END
|
Chris@210
|
149 __COMPILER_HALT_OFFSET__
|
Chris@210
|
150 EXTR_OVERWRITE EXTR_SKIP EXTR_PREFIX_SAME EXTR_PREFIX_ALL EXTR_PREFIX_INVALID EXTR_PREFIX_IF_EXISTS
|
Chris@210
|
151 EXTR_IF_EXISTS SORT_ASC SORT_DESC SORT_REGULAR SORT_NUMERIC SORT_STRING CASE_LOWER CASE_UPPER COUNT_NORMAL
|
Chris@210
|
152 COUNT_RECURSIVE ASSERT_ACTIVE ASSERT_CALLBACK ASSERT_BAIL ASSERT_WARNING ASSERT_QUIET_EVAL CONNECTION_ABORTED
|
Chris@210
|
153 CONNECTION_NORMAL CONNECTION_TIMEOUT INI_USER INI_PERDIR INI_SYSTEM INI_ALL M_E M_LOG2E M_LOG10E M_LN2 M_LN10
|
Chris@210
|
154 M_PI M_PI_2 M_PI_4 M_1_PI M_2_PI M_2_SQRTPI M_SQRT2 M_SQRT1_2 CRYPT_SALT_LENGTH CRYPT_STD_DES CRYPT_EXT_DES
|
Chris@210
|
155 CRYPT_MD5 CRYPT_BLOWFISH DIRECTORY_SEPARATOR SEEK_SET SEEK_CUR SEEK_END LOCK_SH LOCK_EX LOCK_UN LOCK_NB
|
Chris@210
|
156 HTML_SPECIALCHARS HTML_ENTITIES ENT_COMPAT ENT_QUOTES ENT_NOQUOTES INFO_GENERAL INFO_CREDITS
|
Chris@210
|
157 INFO_CONFIGURATION INFO_MODULES INFO_ENVIRONMENT INFO_VARIABLES INFO_LICENSE INFO_ALL CREDITS_GROUP
|
Chris@210
|
158 CREDITS_GENERAL CREDITS_SAPI CREDITS_MODULES CREDITS_DOCS CREDITS_FULLPAGE CREDITS_QA CREDITS_ALL STR_PAD_LEFT
|
Chris@210
|
159 STR_PAD_RIGHT STR_PAD_BOTH PATHINFO_DIRNAME PATHINFO_BASENAME PATHINFO_EXTENSION PATH_SEPARATOR CHAR_MAX
|
Chris@210
|
160 LC_CTYPE LC_NUMERIC LC_TIME LC_COLLATE LC_MONETARY LC_ALL LC_MESSAGES ABDAY_1 ABDAY_2 ABDAY_3 ABDAY_4 ABDAY_5
|
Chris@210
|
161 ABDAY_6 ABDAY_7 DAY_1 DAY_2 DAY_3 DAY_4 DAY_5 DAY_6 DAY_7 ABMON_1 ABMON_2 ABMON_3 ABMON_4 ABMON_5 ABMON_6
|
Chris@210
|
162 ABMON_7 ABMON_8 ABMON_9 ABMON_10 ABMON_11 ABMON_12 MON_1 MON_2 MON_3 MON_4 MON_5 MON_6 MON_7 MON_8 MON_9
|
Chris@210
|
163 MON_10 MON_11 MON_12 AM_STR PM_STR D_T_FMT D_FMT T_FMT T_FMT_AMPM ERA ERA_YEAR ERA_D_T_FMT ERA_D_FMT ERA_T_FMT
|
Chris@210
|
164 ALT_DIGITS INT_CURR_SYMBOL CURRENCY_SYMBOL CRNCYSTR MON_DECIMAL_POINT MON_THOUSANDS_SEP MON_GROUPING
|
Chris@210
|
165 POSITIVE_SIGN NEGATIVE_SIGN INT_FRAC_DIGITS FRAC_DIGITS P_CS_PRECEDES P_SEP_BY_SPACE N_CS_PRECEDES
|
Chris@210
|
166 N_SEP_BY_SPACE P_SIGN_POSN N_SIGN_POSN DECIMAL_POINT RADIXCHAR THOUSANDS_SEP THOUSEP GROUPING YESEXPR NOEXPR
|
Chris@210
|
167 YESSTR NOSTR CODESET LOG_EMERG LOG_ALERT LOG_CRIT LOG_ERR LOG_WARNING LOG_NOTICE LOG_INFO LOG_DEBUG LOG_KERN
|
Chris@210
|
168 LOG_USER LOG_MAIL LOG_DAEMON LOG_AUTH LOG_SYSLOG LOG_LPR LOG_NEWS LOG_UUCP LOG_CRON LOG_AUTHPRIV LOG_LOCAL0
|
Chris@210
|
169 LOG_LOCAL1 LOG_LOCAL2 LOG_LOCAL3 LOG_LOCAL4 LOG_LOCAL5 LOG_LOCAL6 LOG_LOCAL7 LOG_PID LOG_CONS LOG_ODELAY
|
Chris@210
|
170 LOG_NDELAY LOG_NOWAIT LOG_PERROR
|
Chris@210
|
171 ]
|
Chris@210
|
172
|
Chris@210
|
173 PREDEFINED = %w[
|
Chris@210
|
174 $GLOBALS $_SERVER $_GET $_POST $_FILES $_REQUEST $_SESSION $_ENV
|
Chris@210
|
175 $_COOKIE $php_errormsg $HTTP_RAW_POST_DATA $http_response_header
|
Chris@210
|
176 $argc $argv
|
Chris@210
|
177 ]
|
Chris@210
|
178
|
Chris@210
|
179 IDENT_KIND = CaseIgnoringWordList.new(:ident).
|
Chris@210
|
180 add(KEYWORDS, :reserved).
|
Chris@210
|
181 add(TYPES, :pre_type).
|
Chris@210
|
182 add(LANGUAGE_CONSTRUCTS, :reserved).
|
Chris@210
|
183 add(BUILTIN_FUNCTIONS, :predefined).
|
Chris@210
|
184 add(CLASSES, :pre_constant).
|
Chris@210
|
185 add(EXCEPTIONS, :exception).
|
Chris@210
|
186 add(CONSTANTS, :pre_constant)
|
Chris@210
|
187
|
Chris@210
|
188 VARIABLE_KIND = WordList.new(:local_variable).
|
Chris@210
|
189 add(PREDEFINED, :predefined)
|
Chris@210
|
190 end
|
Chris@210
|
191
|
Chris@210
|
192 module RE
|
Chris@210
|
193
|
Chris@210
|
194 PHP_START = /
|
Chris@210
|
195 <script\s+[^>]*?language\s*=\s*"php"[^>]*?> |
|
Chris@210
|
196 <script\s+[^>]*?language\s*=\s*'php'[^>]*?> |
|
Chris@210
|
197 <\?php\d? |
|
Chris@210
|
198 <\?(?!xml)
|
Chris@210
|
199 /xi
|
Chris@210
|
200
|
Chris@210
|
201 PHP_END = %r!
|
Chris@210
|
202 </script> |
|
Chris@210
|
203 \?>
|
Chris@210
|
204 !xi
|
Chris@210
|
205
|
Chris@210
|
206 HTML_INDICATOR = /<!DOCTYPE html|<(?:html|body|div|p)[> ]/i
|
Chris@210
|
207
|
Chris@210
|
208 IDENTIFIER = /[a-z_\x7f-\xFF][a-z0-9_\x7f-\xFF]*/i
|
Chris@210
|
209 VARIABLE = /\$#{IDENTIFIER}/
|
Chris@210
|
210
|
Chris@210
|
211 OPERATOR = /
|
Chris@210
|
212 \.(?!\d)=? | # dot that is not decimal point, string concatenation
|
Chris@210
|
213 && | \|\| | # logic
|
Chris@210
|
214 :: | -> | => | # scope, member, dictionary
|
Chris@210
|
215 \\(?!\n) | # namespace
|
Chris@210
|
216 \+\+ | -- | # increment, decrement
|
Chris@210
|
217 [,;?:()\[\]{}] | # simple delimiters
|
Chris@210
|
218 [-+*\/%&|^]=? | # ordinary math, binary logic, assignment shortcuts
|
Chris@210
|
219 [~$] | # whatever
|
Chris@210
|
220 =& | # reference assignment
|
Chris@210
|
221 [=!]=?=? | <> | # comparison and assignment
|
Chris@210
|
222 <<=? | >>=? | [<>]=? # comparison and shift
|
Chris@210
|
223 /x
|
Chris@210
|
224
|
Chris@210
|
225 end
|
Chris@210
|
226
|
Chris@210
|
227 def scan_tokens tokens, options
|
Chris@210
|
228 if string.respond_to?(:encoding)
|
Chris@210
|
229 unless string.encoding == Encoding::ASCII_8BIT
|
Chris@210
|
230 self.string = string.encode Encoding::ASCII_8BIT,
|
Chris@210
|
231 :invalid => :replace, :undef => :replace, :replace => '?'
|
Chris@210
|
232 end
|
Chris@210
|
233 end
|
Chris@210
|
234
|
Chris@210
|
235 if check(RE::PHP_START) || # starts with <?
|
Chris@210
|
236 (match?(/\s*<\S/) && exist?(RE::PHP_START)) || # starts with tag and contains <?
|
Chris@210
|
237 exist?(RE::HTML_INDICATOR) ||
|
Chris@210
|
238 check(/.{1,100}#{RE::PHP_START}/om) # PHP start after max 100 chars
|
Chris@210
|
239 # is HTML with embedded PHP, so start with HTML
|
Chris@210
|
240 states = [:initial]
|
Chris@210
|
241 else
|
Chris@210
|
242 # is just PHP, so start with PHP surrounded by HTML
|
Chris@210
|
243 states = [:initial, :php]
|
Chris@210
|
244 end
|
Chris@210
|
245
|
Chris@210
|
246 label_expected = true
|
Chris@210
|
247 case_expected = false
|
Chris@210
|
248
|
Chris@210
|
249 heredoc_delimiter = nil
|
Chris@210
|
250 delimiter = nil
|
Chris@210
|
251 modifier = nil
|
Chris@210
|
252
|
Chris@210
|
253 until eos?
|
Chris@210
|
254
|
Chris@210
|
255 match = nil
|
Chris@210
|
256 kind = nil
|
Chris@210
|
257
|
Chris@210
|
258 case states.last
|
Chris@210
|
259
|
Chris@210
|
260 when :initial # HTML
|
Chris@210
|
261 if scan RE::PHP_START
|
Chris@210
|
262 kind = :inline_delimiter
|
Chris@210
|
263 label_expected = true
|
Chris@210
|
264 states << :php
|
Chris@210
|
265 else
|
Chris@210
|
266 match = scan_until(/(?=#{RE::PHP_START})/o) || scan_until(/\z/)
|
Chris@210
|
267 @html_scanner.tokenize match unless match.empty?
|
Chris@210
|
268 next
|
Chris@210
|
269 end
|
Chris@210
|
270
|
Chris@210
|
271 when :php
|
Chris@210
|
272 if match = scan(/\s+/)
|
Chris@210
|
273 tokens << [match, :space]
|
Chris@210
|
274 next
|
Chris@210
|
275
|
Chris@210
|
276 elsif scan(%r! (?m: \/\* (?: .*? \*\/ | .* ) ) | (?://|\#) .*? (?=#{RE::PHP_END}|$) !xo)
|
Chris@210
|
277 kind = :comment
|
Chris@210
|
278
|
Chris@210
|
279 elsif match = scan(RE::IDENTIFIER)
|
Chris@210
|
280 kind = Words::IDENT_KIND[match]
|
Chris@210
|
281 if kind == :ident && label_expected && check(/:(?!:)/)
|
Chris@210
|
282 kind = :label
|
Chris@210
|
283 label_expected = true
|
Chris@210
|
284 else
|
Chris@210
|
285 label_expected = false
|
Chris@210
|
286 if kind == :ident && match =~ /^[A-Z]/
|
Chris@210
|
287 kind = :constant
|
Chris@210
|
288 elsif kind == :reserved
|
Chris@210
|
289 case match
|
Chris@210
|
290 when 'class'
|
Chris@210
|
291 states << :class_expected
|
Chris@210
|
292 when 'function'
|
Chris@210
|
293 states << :function_expected
|
Chris@210
|
294 when 'case', 'default'
|
Chris@210
|
295 case_expected = true
|
Chris@210
|
296 end
|
Chris@210
|
297 elsif match == 'b' && check(/['"]/) # binary string literal
|
Chris@210
|
298 modifier = match
|
Chris@210
|
299 next
|
Chris@210
|
300 end
|
Chris@210
|
301 end
|
Chris@210
|
302
|
Chris@210
|
303 elsif scan(/(?:\d+\.\d*|\d*\.\d+)(?:e[-+]?\d+)?|\d+e[-+]?\d+/i)
|
Chris@210
|
304 label_expected = false
|
Chris@210
|
305 kind = :float
|
Chris@210
|
306
|
Chris@210
|
307 elsif scan(/0x[0-9a-fA-F]+/)
|
Chris@210
|
308 label_expected = false
|
Chris@210
|
309 kind = :hex
|
Chris@210
|
310
|
Chris@210
|
311 elsif scan(/\d+/)
|
Chris@210
|
312 label_expected = false
|
Chris@210
|
313 kind = :integer
|
Chris@210
|
314
|
Chris@210
|
315 elsif scan(/'/)
|
Chris@210
|
316 tokens << [:open, :string]
|
Chris@210
|
317 if modifier
|
Chris@210
|
318 tokens << [modifier, :modifier]
|
Chris@210
|
319 modifier = nil
|
Chris@210
|
320 end
|
Chris@210
|
321 kind = :delimiter
|
Chris@210
|
322 states.push :sqstring
|
Chris@210
|
323
|
Chris@210
|
324 elsif match = scan(/["`]/)
|
Chris@210
|
325 tokens << [:open, :string]
|
Chris@210
|
326 if modifier
|
Chris@210
|
327 tokens << [modifier, :modifier]
|
Chris@210
|
328 modifier = nil
|
Chris@210
|
329 end
|
Chris@210
|
330 delimiter = match
|
Chris@210
|
331 kind = :delimiter
|
Chris@210
|
332 states.push :dqstring
|
Chris@210
|
333
|
Chris@210
|
334 elsif match = scan(RE::VARIABLE)
|
Chris@210
|
335 label_expected = false
|
Chris@210
|
336 kind = Words::VARIABLE_KIND[match]
|
Chris@210
|
337
|
Chris@210
|
338 elsif scan(/\{/)
|
Chris@210
|
339 kind = :operator
|
Chris@210
|
340 label_expected = true
|
Chris@210
|
341 states.push :php
|
Chris@210
|
342
|
Chris@210
|
343 elsif scan(/\}/)
|
Chris@210
|
344 if states.size == 1
|
Chris@210
|
345 kind = :error
|
Chris@210
|
346 else
|
Chris@210
|
347 states.pop
|
Chris@210
|
348 if states.last.is_a?(::Array)
|
Chris@210
|
349 delimiter = states.last[1]
|
Chris@210
|
350 states[-1] = states.last[0]
|
Chris@210
|
351 tokens << [matched, :delimiter]
|
Chris@210
|
352 tokens << [:close, :inline]
|
Chris@210
|
353 next
|
Chris@210
|
354 else
|
Chris@210
|
355 kind = :operator
|
Chris@210
|
356 label_expected = true
|
Chris@210
|
357 end
|
Chris@210
|
358 end
|
Chris@210
|
359
|
Chris@210
|
360 elsif scan(/@/)
|
Chris@210
|
361 label_expected = false
|
Chris@210
|
362 kind = :exception
|
Chris@210
|
363
|
Chris@210
|
364 elsif scan RE::PHP_END
|
Chris@210
|
365 kind = :inline_delimiter
|
Chris@210
|
366 states = [:initial]
|
Chris@210
|
367
|
Chris@210
|
368 elsif match = scan(/<<<(?:(#{RE::IDENTIFIER})|"(#{RE::IDENTIFIER})"|'(#{RE::IDENTIFIER})')/o)
|
Chris@210
|
369 tokens << [:open, :string]
|
Chris@210
|
370 warn 'heredoc in heredoc?' if heredoc_delimiter
|
Chris@210
|
371 heredoc_delimiter = Regexp.escape(self[1] || self[2] || self[3])
|
Chris@210
|
372 kind = :delimiter
|
Chris@210
|
373 states.push self[3] ? :sqstring : :dqstring
|
Chris@210
|
374 heredoc_delimiter = /#{heredoc_delimiter}(?=;?$)/
|
Chris@210
|
375
|
Chris@210
|
376 elsif match = scan(/#{RE::OPERATOR}/o)
|
Chris@210
|
377 label_expected = match == ';'
|
Chris@210
|
378 if case_expected
|
Chris@210
|
379 label_expected = true if match == ':'
|
Chris@210
|
380 case_expected = false
|
Chris@210
|
381 end
|
Chris@210
|
382 kind = :operator
|
Chris@210
|
383
|
Chris@210
|
384 else
|
Chris@210
|
385 getch
|
Chris@210
|
386 kind = :error
|
Chris@210
|
387
|
Chris@210
|
388 end
|
Chris@210
|
389
|
Chris@210
|
390 when :sqstring
|
Chris@210
|
391 if scan(heredoc_delimiter ? /[^\\\n]+/ : /[^'\\]+/)
|
Chris@210
|
392 kind = :content
|
Chris@210
|
393 elsif !heredoc_delimiter && scan(/'/)
|
Chris@210
|
394 tokens << [matched, :delimiter]
|
Chris@210
|
395 tokens << [:close, :string]
|
Chris@210
|
396 delimiter = nil
|
Chris@210
|
397 label_expected = false
|
Chris@210
|
398 states.pop
|
Chris@210
|
399 next
|
Chris@210
|
400 elsif heredoc_delimiter && match = scan(/\n/)
|
Chris@210
|
401 kind = :content
|
Chris@210
|
402 if scan heredoc_delimiter
|
Chris@210
|
403 tokens << ["\n", :content]
|
Chris@210
|
404 tokens << [matched, :delimiter]
|
Chris@210
|
405 tokens << [:close, :string]
|
Chris@210
|
406 heredoc_delimiter = nil
|
Chris@210
|
407 label_expected = false
|
Chris@210
|
408 states.pop
|
Chris@210
|
409 next
|
Chris@210
|
410 end
|
Chris@210
|
411 elsif scan(heredoc_delimiter ? /\\\\/ : /\\[\\'\n]/)
|
Chris@210
|
412 kind = :char
|
Chris@210
|
413 elsif scan(/\\./m)
|
Chris@210
|
414 kind = :content
|
Chris@210
|
415 elsif scan(/\\/)
|
Chris@210
|
416 kind = :error
|
Chris@210
|
417 end
|
Chris@210
|
418
|
Chris@210
|
419 when :dqstring
|
Chris@210
|
420 if scan(heredoc_delimiter ? /[^${\\\n]+/ : (delimiter == '"' ? /[^"${\\]+/ : /[^`${\\]+/))
|
Chris@210
|
421 kind = :content
|
Chris@210
|
422 elsif !heredoc_delimiter && scan(delimiter == '"' ? /"/ : /`/)
|
Chris@210
|
423 tokens << [matched, :delimiter]
|
Chris@210
|
424 tokens << [:close, :string]
|
Chris@210
|
425 delimiter = nil
|
Chris@210
|
426 label_expected = false
|
Chris@210
|
427 states.pop
|
Chris@210
|
428 next
|
Chris@210
|
429 elsif heredoc_delimiter && match = scan(/\n/)
|
Chris@210
|
430 kind = :content
|
Chris@210
|
431 if scan heredoc_delimiter
|
Chris@210
|
432 tokens << ["\n", :content]
|
Chris@210
|
433 tokens << [matched, :delimiter]
|
Chris@210
|
434 tokens << [:close, :string]
|
Chris@210
|
435 heredoc_delimiter = nil
|
Chris@210
|
436 label_expected = false
|
Chris@210
|
437 states.pop
|
Chris@210
|
438 next
|
Chris@210
|
439 end
|
Chris@210
|
440 elsif scan(/\\(?:x[0-9A-Fa-f]{1,2}|[0-7]{1,3})/)
|
Chris@210
|
441 kind = :char
|
Chris@210
|
442 elsif scan(heredoc_delimiter ? /\\[nrtvf\\$]/ : (delimiter == '"' ? /\\[nrtvf\\$"]/ : /\\[nrtvf\\$`]/))
|
Chris@210
|
443 kind = :char
|
Chris@210
|
444 elsif scan(/\\./m)
|
Chris@210
|
445 kind = :content
|
Chris@210
|
446 elsif scan(/\\/)
|
Chris@210
|
447 kind = :error
|
Chris@210
|
448 elsif match = scan(/#{RE::VARIABLE}/o)
|
Chris@210
|
449 kind = :local_variable
|
Chris@210
|
450 if check(/\[#{RE::IDENTIFIER}\]/o)
|
Chris@210
|
451 tokens << [:open, :inline]
|
Chris@210
|
452 tokens << [match, :local_variable]
|
Chris@210
|
453 tokens << [scan(/\[/), :operator]
|
Chris@210
|
454 tokens << [scan(/#{RE::IDENTIFIER}/o), :ident]
|
Chris@210
|
455 tokens << [scan(/\]/), :operator]
|
Chris@210
|
456 tokens << [:close, :inline]
|
Chris@210
|
457 next
|
Chris@210
|
458 elsif check(/\[/)
|
Chris@210
|
459 match << scan(/\[['"]?#{RE::IDENTIFIER}?['"]?\]?/o)
|
Chris@210
|
460 kind = :error
|
Chris@210
|
461 elsif check(/->#{RE::IDENTIFIER}/o)
|
Chris@210
|
462 tokens << [:open, :inline]
|
Chris@210
|
463 tokens << [match, :local_variable]
|
Chris@210
|
464 tokens << [scan(/->/), :operator]
|
Chris@210
|
465 tokens << [scan(/#{RE::IDENTIFIER}/o), :ident]
|
Chris@210
|
466 tokens << [:close, :inline]
|
Chris@210
|
467 next
|
Chris@210
|
468 elsif check(/->/)
|
Chris@210
|
469 match << scan(/->/)
|
Chris@210
|
470 kind = :error
|
Chris@210
|
471 end
|
Chris@210
|
472 elsif match = scan(/\{/)
|
Chris@210
|
473 if check(/\$/)
|
Chris@210
|
474 kind = :delimiter
|
Chris@210
|
475 states[-1] = [states.last, delimiter]
|
Chris@210
|
476 delimiter = nil
|
Chris@210
|
477 states.push :php
|
Chris@210
|
478 tokens << [:open, :inline]
|
Chris@210
|
479 else
|
Chris@210
|
480 kind = :string
|
Chris@210
|
481 end
|
Chris@210
|
482 elsif scan(/\$\{#{RE::IDENTIFIER}\}/o)
|
Chris@210
|
483 kind = :local_variable
|
Chris@210
|
484 elsif scan(/\$/)
|
Chris@210
|
485 kind = :content
|
Chris@210
|
486 end
|
Chris@210
|
487
|
Chris@210
|
488 when :class_expected
|
Chris@210
|
489 if scan(/\s+/)
|
Chris@210
|
490 kind = :space
|
Chris@210
|
491 elsif match = scan(/#{RE::IDENTIFIER}/o)
|
Chris@210
|
492 kind = :class
|
Chris@210
|
493 states.pop
|
Chris@210
|
494 else
|
Chris@210
|
495 states.pop
|
Chris@210
|
496 next
|
Chris@210
|
497 end
|
Chris@210
|
498
|
Chris@210
|
499 when :function_expected
|
Chris@210
|
500 if scan(/\s+/)
|
Chris@210
|
501 kind = :space
|
Chris@210
|
502 elsif scan(/&/)
|
Chris@210
|
503 kind = :operator
|
Chris@210
|
504 elsif match = scan(/#{RE::IDENTIFIER}/o)
|
Chris@210
|
505 kind = :function
|
Chris@210
|
506 states.pop
|
Chris@210
|
507 else
|
Chris@210
|
508 states.pop
|
Chris@210
|
509 next
|
Chris@210
|
510 end
|
Chris@210
|
511
|
Chris@210
|
512 else
|
Chris@210
|
513 raise_inspect 'Unknown state!', tokens, states
|
Chris@210
|
514 end
|
Chris@210
|
515
|
Chris@210
|
516 match ||= matched
|
Chris@210
|
517 if $CODERAY_DEBUG and not kind
|
Chris@210
|
518 raise_inspect 'Error token %p in line %d' %
|
Chris@210
|
519 [[match, kind], line], tokens, states
|
Chris@210
|
520 end
|
Chris@210
|
521 raise_inspect 'Empty token', tokens, states unless match
|
Chris@210
|
522
|
Chris@210
|
523 tokens << [match, kind]
|
Chris@210
|
524
|
Chris@210
|
525 end
|
Chris@210
|
526
|
Chris@210
|
527 tokens
|
Chris@210
|
528 end
|
Chris@210
|
529
|
Chris@210
|
530 end
|
Chris@210
|
531
|
Chris@210
|
532 end
|
Chris@210
|
533 end
|