annotate vendor/gems/coderay-1.0.0/lib/coderay/scanners/php.rb @ 1169:492ff72268e3 bug_521

Close obsolete branch bug_521
author Chris Cannam
date Thu, 18 Oct 2012 10:42:48 +0100
parents cbb26bc654de
children
rev   line source
Chris@909 1 module CodeRay
Chris@909 2 module Scanners
Chris@909 3
Chris@909 4 load :html
Chris@909 5
Chris@909 6 # Scanner for PHP.
Chris@909 7 #
Chris@909 8 # Original by Stefan Walk.
Chris@909 9 class PHP < Scanner
Chris@909 10
Chris@909 11 register_for :php
Chris@909 12 file_extension 'php'
Chris@909 13 encoding 'BINARY'
Chris@909 14
Chris@909 15 KINDS_NOT_LOC = HTML::KINDS_NOT_LOC
Chris@909 16
Chris@909 17 protected
Chris@909 18
Chris@909 19 def setup
Chris@909 20 @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
Chris@909 21 end
Chris@909 22
Chris@909 23 def reset_instance
Chris@909 24 super
Chris@909 25 @html_scanner.reset
Chris@909 26 end
Chris@909 27
Chris@909 28 module Words # :nodoc:
Chris@909 29
Chris@909 30 # according to http://www.php.net/manual/en/reserved.keywords.php
Chris@909 31 KEYWORDS = %w[
Chris@909 32 abstract and array as break case catch class clone const continue declare default do else elseif
Chris@909 33 enddeclare endfor endforeach endif endswitch endwhile extends final for foreach function global
Chris@909 34 goto if implements interface instanceof namespace new or private protected public static switch
Chris@909 35 throw try use var while xor
Chris@909 36 cfunction old_function
Chris@909 37 ]
Chris@909 38
Chris@909 39 TYPES = %w[ int integer float double bool boolean string array object resource ]
Chris@909 40
Chris@909 41 LANGUAGE_CONSTRUCTS = %w[
Chris@909 42 die echo empty exit eval include include_once isset list
Chris@909 43 require require_once return print unset
Chris@909 44 ]
Chris@909 45
Chris@909 46 CLASSES = %w[ Directory stdClass __PHP_Incomplete_Class exception php_user_filter Closure ]
Chris@909 47
Chris@909 48 # according to http://php.net/quickref.php on 2009-04-21;
Chris@909 49 # all functions with _ excluded (module functions) and selected additional functions
Chris@909 50 BUILTIN_FUNCTIONS = %w[
Chris@909 51 abs acos acosh addcslashes addslashes aggregate array arsort ascii2ebcdic asin asinh asort assert atan atan2
Chris@909 52 atanh basename bcadd bccomp bcdiv bcmod bcmul bcpow bcpowmod bcscale bcsqrt bcsub bin2hex bindec
Chris@909 53 bindtextdomain bzclose bzcompress bzdecompress bzerrno bzerror bzerrstr bzflush bzopen bzread bzwrite
Chris@909 54 calculhmac ceil chdir checkdate checkdnsrr chgrp chmod chop chown chr chroot clearstatcache closedir closelog
Chris@909 55 compact constant copy cos cosh count crc32 crypt current date dcgettext dcngettext deaggregate decbin dechex
Chris@909 56 decoct define defined deg2rad delete dgettext die dirname diskfreespace dl dngettext doubleval each
Chris@909 57 ebcdic2ascii echo empty end ereg eregi escapeshellarg escapeshellcmd eval exec exit exp explode expm1 extract
Chris@909 58 fclose feof fflush fgetc fgetcsv fgets fgetss file fileatime filectime filegroup fileinode filemtime fileowner
Chris@909 59 fileperms filepro filesize filetype floatval flock floor flush fmod fnmatch fopen fpassthru fprintf fputcsv
Chris@909 60 fputs fread frenchtojd fscanf fseek fsockopen fstat ftell ftok ftruncate fwrite getallheaders getcwd getdate
Chris@909 61 getenv gethostbyaddr gethostbyname gethostbynamel getimagesize getlastmod getmxrr getmygid getmyinode getmypid
Chris@909 62 getmyuid getopt getprotobyname getprotobynumber getrandmax getrusage getservbyname getservbyport gettext
Chris@909 63 gettimeofday gettype glob gmdate gmmktime gmstrftime gregoriantojd gzclose gzcompress gzdecode gzdeflate
Chris@909 64 gzencode gzeof gzfile gzgetc gzgets gzgetss gzinflate gzopen gzpassthru gzputs gzread gzrewind gzseek gztell
Chris@909 65 gzuncompress gzwrite hash header hebrev hebrevc hexdec htmlentities htmlspecialchars hypot iconv idate
Chris@909 66 implode include intval ip2long iptcembed iptcparse isset
Chris@909 67 jddayofweek jdmonthname jdtofrench jdtogregorian jdtojewish jdtojulian jdtounix jewishtojd join jpeg2wbmp
Chris@909 68 juliantojd key krsort ksort lcfirst lchgrp lchown levenshtein link linkinfo list localeconv localtime log
Chris@909 69 log10 log1p long2ip lstat ltrim mail main max md5 metaphone mhash microtime min mkdir mktime msql natcasesort
Chris@909 70 natsort next ngettext nl2br nthmac octdec opendir openlog
Chris@909 71 ord overload pack passthru pathinfo pclose pfsockopen phpcredits phpinfo phpversion pi png2wbmp popen pos pow
Chris@909 72 prev print printf putenv quotemeta rad2deg rand range rawurldecode rawurlencode readdir readfile readgzfile
Chris@909 73 readline readlink realpath recode rename require reset rewind rewinddir rmdir round rsort rtrim scandir
Chris@909 74 serialize setcookie setlocale setrawcookie settype sha1 shuffle signeurlpaiement sin sinh sizeof sleep snmpget
Chris@909 75 snmpgetnext snmprealwalk snmpset snmpwalk snmpwalkoid sort soundex split spliti sprintf sqrt srand sscanf stat
Chris@909 76 strcasecmp strchr strcmp strcoll strcspn strftime stripcslashes stripos stripslashes stristr strlen
Chris@909 77 strnatcasecmp strnatcmp strncasecmp strncmp strpbrk strpos strptime strrchr strrev strripos strrpos strspn
Chris@909 78 strstr strtok strtolower strtotime strtoupper strtr strval substr symlink syslog system tan tanh tempnam
Chris@909 79 textdomain time tmpfile touch trim uasort ucfirst ucwords uksort umask uniqid unixtojd unlink unpack
Chris@909 80 unserialize unset urldecode urlencode usleep usort vfprintf virtual vprintf vsprintf wordwrap
Chris@909 81 array_change_key_case array_chunk array_combine array_count_values array_diff array_diff_assoc
Chris@909 82 array_diff_key array_diff_uassoc array_diff_ukey array_fill array_fill_keys array_filter array_flip
Chris@909 83 array_intersect array_intersect_assoc array_intersect_key array_intersect_uassoc array_intersect_ukey
Chris@909 84 array_key_exists array_keys array_map array_merge array_merge_recursive array_multisort array_pad
Chris@909 85 array_pop array_product array_push array_rand array_reduce array_reverse array_search array_shift
Chris@909 86 array_slice array_splice array_sum array_udiff array_udiff_assoc array_udiff_uassoc array_uintersect
Chris@909 87 array_uintersect_assoc array_uintersect_uassoc array_unique array_unshift array_values array_walk
Chris@909 88 array_walk_recursive
Chris@909 89 assert_options base_convert base64_decode base64_encode
Chris@909 90 chunk_split class_exists class_implements class_parents
Chris@909 91 count_chars debug_backtrace debug_print_backtrace debug_zval_dump
Chris@909 92 error_get_last error_log error_reporting extension_loaded
Chris@909 93 file_exists file_get_contents file_put_contents load_file
Chris@909 94 func_get_arg func_get_args func_num_args function_exists
Chris@909 95 get_browser get_called_class get_cfg_var get_class get_class_methods get_class_vars
Chris@909 96 get_current_user get_declared_classes get_declared_interfaces get_defined_constants
Chris@909 97 get_defined_functions get_defined_vars get_extension_funcs get_headers get_html_translation_table
Chris@909 98 get_include_path get_included_files get_loaded_extensions get_magic_quotes_gpc get_magic_quotes_runtime
Chris@909 99 get_meta_tags get_object_vars get_parent_class get_required_filesget_resource_type
Chris@909 100 gc_collect_cycles gc_disable gc_enable gc_enabled
Chris@909 101 halt_compiler headers_list headers_sent highlight_file highlight_string
Chris@909 102 html_entity_decode htmlspecialchars_decode
Chris@909 103 in_array include_once inclued_get_data
Chris@909 104 is_a is_array is_binary is_bool is_buffer is_callable is_dir is_double is_executable is_file is_finite
Chris@909 105 is_float is_infinite is_int is_integer is_link is_long is_nan is_null is_numeric is_object is_readable
Chris@909 106 is_real is_resource is_scalar is_soap_fault is_string is_subclass_of is_unicode is_uploaded_file
Chris@909 107 is_writable is_writeable
Chris@909 108 locale_get_default locale_set_default
Chris@909 109 number_format override_function parse_str parse_url
Chris@909 110 php_check_syntax php_ini_loaded_file php_ini_scanned_files php_logo_guid php_sapi_name
Chris@909 111 php_strip_whitespace php_uname
Chris@909 112 preg_filter preg_grep preg_last_error preg_match preg_match_all preg_quote preg_replace
Chris@909 113 preg_replace_callback preg_split print_r
Chris@909 114 require_once register_shutdown_function register_tick_function
Chris@909 115 set_error_handler set_exception_handler set_file_buffer set_include_path
Chris@909 116 set_magic_quotes_runtime set_time_limit shell_exec
Chris@909 117 str_getcsv str_ireplace str_pad str_repeat str_replace str_rot13 str_shuffle str_split str_word_count
Chris@909 118 strip_tags substr_compare substr_count substr_replace
Chris@909 119 time_nanosleep time_sleep_until
Chris@909 120 token_get_all token_name trigger_error
Chris@909 121 unregister_tick_function use_soap_error_handler user_error
Chris@909 122 utf8_decode utf8_encode var_dump var_export
Chris@909 123 version_compare
Chris@909 124 zend_logo_guid zend_thread_id zend_version
Chris@909 125 create_function call_user_func_array
Chris@909 126 posix_access posix_ctermid posix_get_last_error posix_getcwd posix_getegid
Chris@909 127 posix_geteuid posix_getgid posix_getgrgid posix_getgrnam posix_getgroups
Chris@909 128 posix_getlogin posix_getpgid posix_getpgrp posix_getpid posix_getppid
Chris@909 129 posix_getpwnam posix_getpwuid posix_getrlimit posix_getsid posix_getuid
Chris@909 130 posix_initgroups posix_isatty posix_kill posix_mkfifo posix_mknod
Chris@909 131 posix_setegid posix_seteuid posix_setgid posix_setpgid posix_setsid
Chris@909 132 posix_setuid posix_strerror posix_times posix_ttyname posix_uname
Chris@909 133 pcntl_alarm pcntl_exec pcntl_fork pcntl_getpriority pcntl_setpriority
Chris@909 134 pcntl_signal pcntl_signal_dispatch pcntl_sigprocmask pcntl_sigtimedwait
Chris@909 135 pcntl_sigwaitinfo pcntl_wait pcntl_waitpid pcntl_wexitstatus pcntl_wifexited
Chris@909 136 pcntl_wifsignaled pcntl_wifstopped pcntl_wstopsig pcntl_wtermsig
Chris@909 137 ]
Chris@909 138 # TODO: more built-in PHP functions?
Chris@909 139
Chris@909 140 EXCEPTIONS = %w[
Chris@909 141 E_ERROR E_WARNING E_PARSE E_NOTICE E_CORE_ERROR E_CORE_WARNING E_COMPILE_ERROR E_COMPILE_WARNING
Chris@909 142 E_USER_ERROR E_USER_WARNING E_USER_NOTICE E_DEPRECATED E_USER_DEPRECATED E_ALL E_STRICT
Chris@909 143 ]
Chris@909 144
Chris@909 145 CONSTANTS = %w[
Chris@909 146 null true false self parent
Chris@909 147 __LINE__ __DIR__ __FILE__ __LINE__
Chris@909 148 __CLASS__ __NAMESPACE__ __METHOD__ __FUNCTION__
Chris@909 149 PHP_VERSION PHP_MAJOR_VERSION PHP_MINOR_VERSION PHP_RELEASE_VERSION PHP_VERSION_ID PHP_EXTRA_VERSION PHP_ZTS
Chris@909 150 PHP_DEBUG PHP_MAXPATHLEN PHP_OS PHP_SAPI PHP_EOL PHP_INT_MAX PHP_INT_SIZE DEFAULT_INCLUDE_PATH
Chris@909 151 PEAR_INSTALL_DIR PEAR_EXTENSION_DIR PHP_EXTENSION_DIR PHP_PREFIX PHP_BINDIR PHP_LIBDIR PHP_DATADIR
Chris@909 152 PHP_SYSCONFDIR PHP_LOCALSTATEDIR PHP_CONFIG_FILE_PATH PHP_CONFIG_FILE_SCAN_DIR PHP_SHLIB_SUFFIX
Chris@909 153 PHP_OUTPUT_HANDLER_START PHP_OUTPUT_HANDLER_CONT PHP_OUTPUT_HANDLER_END
Chris@909 154 __COMPILER_HALT_OFFSET__
Chris@909 155 EXTR_OVERWRITE EXTR_SKIP EXTR_PREFIX_SAME EXTR_PREFIX_ALL EXTR_PREFIX_INVALID EXTR_PREFIX_IF_EXISTS
Chris@909 156 EXTR_IF_EXISTS SORT_ASC SORT_DESC SORT_REGULAR SORT_NUMERIC SORT_STRING CASE_LOWER CASE_UPPER COUNT_NORMAL
Chris@909 157 COUNT_RECURSIVE ASSERT_ACTIVE ASSERT_CALLBACK ASSERT_BAIL ASSERT_WARNING ASSERT_QUIET_EVAL CONNECTION_ABORTED
Chris@909 158 CONNECTION_NORMAL CONNECTION_TIMEOUT INI_USER INI_PERDIR INI_SYSTEM INI_ALL M_E M_LOG2E M_LOG10E M_LN2 M_LN10
Chris@909 159 M_PI M_PI_2 M_PI_4 M_1_PI M_2_PI M_2_SQRTPI M_SQRT2 M_SQRT1_2 CRYPT_SALT_LENGTH CRYPT_STD_DES CRYPT_EXT_DES
Chris@909 160 CRYPT_MD5 CRYPT_BLOWFISH DIRECTORY_SEPARATOR SEEK_SET SEEK_CUR SEEK_END LOCK_SH LOCK_EX LOCK_UN LOCK_NB
Chris@909 161 HTML_SPECIALCHARS HTML_ENTITIES ENT_COMPAT ENT_QUOTES ENT_NOQUOTES INFO_GENERAL INFO_CREDITS
Chris@909 162 INFO_CONFIGURATION INFO_MODULES INFO_ENVIRONMENT INFO_VARIABLES INFO_LICENSE INFO_ALL CREDITS_GROUP
Chris@909 163 CREDITS_GENERAL CREDITS_SAPI CREDITS_MODULES CREDITS_DOCS CREDITS_FULLPAGE CREDITS_QA CREDITS_ALL STR_PAD_LEFT
Chris@909 164 STR_PAD_RIGHT STR_PAD_BOTH PATHINFO_DIRNAME PATHINFO_BASENAME PATHINFO_EXTENSION PATH_SEPARATOR CHAR_MAX
Chris@909 165 LC_CTYPE LC_NUMERIC LC_TIME LC_COLLATE LC_MONETARY LC_ALL LC_MESSAGES ABDAY_1 ABDAY_2 ABDAY_3 ABDAY_4 ABDAY_5
Chris@909 166 ABDAY_6 ABDAY_7 DAY_1 DAY_2 DAY_3 DAY_4 DAY_5 DAY_6 DAY_7 ABMON_1 ABMON_2 ABMON_3 ABMON_4 ABMON_5 ABMON_6
Chris@909 167 ABMON_7 ABMON_8 ABMON_9 ABMON_10 ABMON_11 ABMON_12 MON_1 MON_2 MON_3 MON_4 MON_5 MON_6 MON_7 MON_8 MON_9
Chris@909 168 MON_10 MON_11 MON_12 AM_STR PM_STR D_T_FMT D_FMT T_FMT T_FMT_AMPM ERA ERA_YEAR ERA_D_T_FMT ERA_D_FMT ERA_T_FMT
Chris@909 169 ALT_DIGITS INT_CURR_SYMBOL CURRENCY_SYMBOL CRNCYSTR MON_DECIMAL_POINT MON_THOUSANDS_SEP MON_GROUPING
Chris@909 170 POSITIVE_SIGN NEGATIVE_SIGN INT_FRAC_DIGITS FRAC_DIGITS P_CS_PRECEDES P_SEP_BY_SPACE N_CS_PRECEDES
Chris@909 171 N_SEP_BY_SPACE P_SIGN_POSN N_SIGN_POSN DECIMAL_POINT RADIXCHAR THOUSANDS_SEP THOUSEP GROUPING YESEXPR NOEXPR
Chris@909 172 YESSTR NOSTR CODESET LOG_EMERG LOG_ALERT LOG_CRIT LOG_ERR LOG_WARNING LOG_NOTICE LOG_INFO LOG_DEBUG LOG_KERN
Chris@909 173 LOG_USER LOG_MAIL LOG_DAEMON LOG_AUTH LOG_SYSLOG LOG_LPR LOG_NEWS LOG_UUCP LOG_CRON LOG_AUTHPRIV LOG_LOCAL0
Chris@909 174 LOG_LOCAL1 LOG_LOCAL2 LOG_LOCAL3 LOG_LOCAL4 LOG_LOCAL5 LOG_LOCAL6 LOG_LOCAL7 LOG_PID LOG_CONS LOG_ODELAY
Chris@909 175 LOG_NDELAY LOG_NOWAIT LOG_PERROR
Chris@909 176 ]
Chris@909 177
Chris@909 178 PREDEFINED = %w[
Chris@909 179 $GLOBALS $_SERVER $_GET $_POST $_FILES $_REQUEST $_SESSION $_ENV
Chris@909 180 $_COOKIE $php_errormsg $HTTP_RAW_POST_DATA $http_response_header
Chris@909 181 $argc $argv
Chris@909 182 ]
Chris@909 183
Chris@909 184 IDENT_KIND = WordList::CaseIgnoring.new(:ident).
Chris@909 185 add(KEYWORDS, :keyword).
Chris@909 186 add(TYPES, :predefined_type).
Chris@909 187 add(LANGUAGE_CONSTRUCTS, :keyword).
Chris@909 188 add(BUILTIN_FUNCTIONS, :predefined).
Chris@909 189 add(CLASSES, :predefined_constant).
Chris@909 190 add(EXCEPTIONS, :exception).
Chris@909 191 add(CONSTANTS, :predefined_constant)
Chris@909 192
Chris@909 193 VARIABLE_KIND = WordList.new(:local_variable).
Chris@909 194 add(PREDEFINED, :predefined)
Chris@909 195 end
Chris@909 196
Chris@909 197 module RE # :nodoc:
Chris@909 198
Chris@909 199 PHP_START = /
Chris@909 200 <script\s+[^>]*?language\s*=\s*"php"[^>]*?> |
Chris@909 201 <script\s+[^>]*?language\s*=\s*'php'[^>]*?> |
Chris@909 202 <\?php\d? |
Chris@909 203 <\?(?!xml)
Chris@909 204 /xi
Chris@909 205
Chris@909 206 PHP_END = %r!
Chris@909 207 </script> |
Chris@909 208 \?>
Chris@909 209 !xi
Chris@909 210
Chris@909 211 HTML_INDICATOR = /<!DOCTYPE html|<(?:html|body|div|p)[> ]/i
Chris@909 212
Chris@909 213 IDENTIFIER = /[a-z_\x7f-\xFF][a-z0-9_\x7f-\xFF]*/i
Chris@909 214 VARIABLE = /\$#{IDENTIFIER}/
Chris@909 215
Chris@909 216 OPERATOR = /
Chris@909 217 \.(?!\d)=? | # dot that is not decimal point, string concatenation
Chris@909 218 && | \|\| | # logic
Chris@909 219 :: | -> | => | # scope, member, dictionary
Chris@909 220 \\(?!\n) | # namespace
Chris@909 221 \+\+ | -- | # increment, decrement
Chris@909 222 [,;?:()\[\]{}] | # simple delimiters
Chris@909 223 [-+*\/%&|^]=? | # ordinary math, binary logic, assignment shortcuts
Chris@909 224 [~$] | # whatever
Chris@909 225 =& | # reference assignment
Chris@909 226 [=!]=?=? | <> | # comparison and assignment
Chris@909 227 <<=? | >>=? | [<>]=? # comparison and shift
Chris@909 228 /x
Chris@909 229
Chris@909 230 end
Chris@909 231
Chris@909 232 protected
Chris@909 233
Chris@909 234 def scan_tokens encoder, options
Chris@909 235
Chris@909 236 if check(RE::PHP_START) || # starts with <?
Chris@909 237 (match?(/\s*<\S/) && check(/.{1,1000}#{RE::PHP_START}/om)) || # starts with tag and contains <?
Chris@909 238 check(/.{0,1000}#{RE::HTML_INDICATOR}/om) ||
Chris@909 239 check(/.{1,100}#{RE::PHP_START}/om) # PHP start after max 100 chars
Chris@909 240 # is HTML with embedded PHP, so start with HTML
Chris@909 241 states = [:initial]
Chris@909 242 else
Chris@909 243 # is just PHP, so start with PHP surrounded by HTML
Chris@909 244 states = [:initial, :php]
Chris@909 245 end
Chris@909 246
Chris@909 247 label_expected = true
Chris@909 248 case_expected = false
Chris@909 249
Chris@909 250 heredoc_delimiter = nil
Chris@909 251 delimiter = nil
Chris@909 252 modifier = nil
Chris@909 253
Chris@909 254 until eos?
Chris@909 255
Chris@909 256 case states.last
Chris@909 257
Chris@909 258 when :initial # HTML
Chris@909 259 if match = scan(RE::PHP_START)
Chris@909 260 encoder.text_token match, :inline_delimiter
Chris@909 261 label_expected = true
Chris@909 262 states << :php
Chris@909 263 else
Chris@909 264 match = scan_until(/(?=#{RE::PHP_START})/o) || scan_rest
Chris@909 265 @html_scanner.tokenize match unless match.empty?
Chris@909 266 end
Chris@909 267
Chris@909 268 when :php
Chris@909 269 if match = scan(/\s+/)
Chris@909 270 encoder.text_token match, :space
Chris@909 271
Chris@909 272 elsif match = scan(%r! (?m: \/\* (?: .*? \*\/ | .* ) ) | (?://|\#) .*? (?=#{RE::PHP_END}|$) !xo)
Chris@909 273 encoder.text_token match, :comment
Chris@909 274
Chris@909 275 elsif match = scan(RE::IDENTIFIER)
Chris@909 276 kind = Words::IDENT_KIND[match]
Chris@909 277 if kind == :ident && label_expected && check(/:(?!:)/)
Chris@909 278 kind = :label
Chris@909 279 label_expected = true
Chris@909 280 else
Chris@909 281 label_expected = false
Chris@909 282 if kind == :ident && match =~ /^[A-Z]/
Chris@909 283 kind = :constant
Chris@909 284 elsif kind == :keyword
Chris@909 285 case match
Chris@909 286 when 'class'
Chris@909 287 states << :class_expected
Chris@909 288 when 'function'
Chris@909 289 states << :function_expected
Chris@909 290 when 'case', 'default'
Chris@909 291 case_expected = true
Chris@909 292 end
Chris@909 293 elsif match == 'b' && check(/['"]/) # binary string literal
Chris@909 294 modifier = match
Chris@909 295 next
Chris@909 296 end
Chris@909 297 end
Chris@909 298 encoder.text_token match, kind
Chris@909 299
Chris@909 300 elsif match = scan(/(?:\d+\.\d*|\d*\.\d+)(?:e[-+]?\d+)?|\d+e[-+]?\d+/i)
Chris@909 301 label_expected = false
Chris@909 302 encoder.text_token match, :float
Chris@909 303
Chris@909 304 elsif match = scan(/0x[0-9a-fA-F]+/)
Chris@909 305 label_expected = false
Chris@909 306 encoder.text_token match, :hex
Chris@909 307
Chris@909 308 elsif match = scan(/\d+/)
Chris@909 309 label_expected = false
Chris@909 310 encoder.text_token match, :integer
Chris@909 311
Chris@909 312 elsif match = scan(/['"`]/)
Chris@909 313 encoder.begin_group :string
Chris@909 314 if modifier
Chris@909 315 encoder.text_token modifier, :modifier
Chris@909 316 modifier = nil
Chris@909 317 end
Chris@909 318 delimiter = match
Chris@909 319 encoder.text_token match, :delimiter
Chris@909 320 states.push match == "'" ? :sqstring : :dqstring
Chris@909 321
Chris@909 322 elsif match = scan(RE::VARIABLE)
Chris@909 323 label_expected = false
Chris@909 324 encoder.text_token match, Words::VARIABLE_KIND[match]
Chris@909 325
Chris@909 326 elsif match = scan(/\{/)
Chris@909 327 encoder.text_token match, :operator
Chris@909 328 label_expected = true
Chris@909 329 states.push :php
Chris@909 330
Chris@909 331 elsif match = scan(/\}/)
Chris@909 332 if states.size == 1
Chris@909 333 encoder.text_token match, :error
Chris@909 334 else
Chris@909 335 states.pop
Chris@909 336 if states.last.is_a?(::Array)
Chris@909 337 delimiter = states.last[1]
Chris@909 338 states[-1] = states.last[0]
Chris@909 339 encoder.text_token match, :delimiter
Chris@909 340 encoder.end_group :inline
Chris@909 341 else
Chris@909 342 encoder.text_token match, :operator
Chris@909 343 label_expected = true
Chris@909 344 end
Chris@909 345 end
Chris@909 346
Chris@909 347 elsif match = scan(/@/)
Chris@909 348 label_expected = false
Chris@909 349 encoder.text_token match, :exception
Chris@909 350
Chris@909 351 elsif match = scan(RE::PHP_END)
Chris@909 352 encoder.text_token match, :inline_delimiter
Chris@909 353 states = [:initial]
Chris@909 354
Chris@909 355 elsif match = scan(/<<<(?:(#{RE::IDENTIFIER})|"(#{RE::IDENTIFIER})"|'(#{RE::IDENTIFIER})')/o)
Chris@909 356 encoder.begin_group :string
Chris@909 357 # warn 'heredoc in heredoc?' if heredoc_delimiter
Chris@909 358 heredoc_delimiter = Regexp.escape(self[1] || self[2] || self[3])
Chris@909 359 encoder.text_token match, :delimiter
Chris@909 360 states.push self[3] ? :sqstring : :dqstring
Chris@909 361 heredoc_delimiter = /#{heredoc_delimiter}(?=;?$)/
Chris@909 362
Chris@909 363 elsif match = scan(/#{RE::OPERATOR}/o)
Chris@909 364 label_expected = match == ';'
Chris@909 365 if case_expected
Chris@909 366 label_expected = true if match == ':'
Chris@909 367 case_expected = false
Chris@909 368 end
Chris@909 369 encoder.text_token match, :operator
Chris@909 370
Chris@909 371 else
Chris@909 372 encoder.text_token getch, :error
Chris@909 373
Chris@909 374 end
Chris@909 375
Chris@909 376 when :sqstring
Chris@909 377 if match = scan(heredoc_delimiter ? /[^\\\n]+/ : /[^'\\]+/)
Chris@909 378 encoder.text_token match, :content
Chris@909 379 elsif !heredoc_delimiter && match = scan(/'/)
Chris@909 380 encoder.text_token match, :delimiter
Chris@909 381 encoder.end_group :string
Chris@909 382 delimiter = nil
Chris@909 383 label_expected = false
Chris@909 384 states.pop
Chris@909 385 elsif heredoc_delimiter && match = scan(/\n/)
Chris@909 386 if scan heredoc_delimiter
Chris@909 387 encoder.text_token "\n", :content
Chris@909 388 encoder.text_token matched, :delimiter
Chris@909 389 encoder.end_group :string
Chris@909 390 heredoc_delimiter = nil
Chris@909 391 label_expected = false
Chris@909 392 states.pop
Chris@909 393 else
Chris@909 394 encoder.text_token match, :content
Chris@909 395 end
Chris@909 396 elsif match = scan(heredoc_delimiter ? /\\\\/ : /\\[\\'\n]/)
Chris@909 397 encoder.text_token match, :char
Chris@909 398 elsif match = scan(/\\./m)
Chris@909 399 encoder.text_token match, :content
Chris@909 400 elsif match = scan(/\\/)
Chris@909 401 encoder.text_token match, :error
Chris@909 402 else
Chris@909 403 states.pop
Chris@909 404 end
Chris@909 405
Chris@909 406 when :dqstring
Chris@909 407 if match = scan(heredoc_delimiter ? /[^${\\\n]+/ : (delimiter == '"' ? /[^"${\\]+/ : /[^`${\\]+/))
Chris@909 408 encoder.text_token match, :content
Chris@909 409 elsif !heredoc_delimiter && match = scan(delimiter == '"' ? /"/ : /`/)
Chris@909 410 encoder.text_token match, :delimiter
Chris@909 411 encoder.end_group :string
Chris@909 412 delimiter = nil
Chris@909 413 label_expected = false
Chris@909 414 states.pop
Chris@909 415 elsif heredoc_delimiter && match = scan(/\n/)
Chris@909 416 if scan heredoc_delimiter
Chris@909 417 encoder.text_token "\n", :content
Chris@909 418 encoder.text_token matched, :delimiter
Chris@909 419 encoder.end_group :string
Chris@909 420 heredoc_delimiter = nil
Chris@909 421 label_expected = false
Chris@909 422 states.pop
Chris@909 423 else
Chris@909 424 encoder.text_token match, :content
Chris@909 425 end
Chris@909 426 elsif match = scan(/\\(?:x[0-9A-Fa-f]{1,2}|[0-7]{1,3})/)
Chris@909 427 encoder.text_token match, :char
Chris@909 428 elsif match = scan(heredoc_delimiter ? /\\[nrtvf\\$]/ : (delimiter == '"' ? /\\[nrtvf\\$"]/ : /\\[nrtvf\\$`]/))
Chris@909 429 encoder.text_token match, :char
Chris@909 430 elsif match = scan(/\\./m)
Chris@909 431 encoder.text_token match, :content
Chris@909 432 elsif match = scan(/\\/)
Chris@909 433 encoder.text_token match, :error
Chris@909 434 elsif match = scan(/#{RE::VARIABLE}/o)
Chris@909 435 if check(/\[#{RE::IDENTIFIER}\]/o)
Chris@909 436 encoder.begin_group :inline
Chris@909 437 encoder.text_token match, :local_variable
Chris@909 438 encoder.text_token scan(/\[/), :operator
Chris@909 439 encoder.text_token scan(/#{RE::IDENTIFIER}/o), :ident
Chris@909 440 encoder.text_token scan(/\]/), :operator
Chris@909 441 encoder.end_group :inline
Chris@909 442 elsif check(/\[/)
Chris@909 443 match << scan(/\[['"]?#{RE::IDENTIFIER}?['"]?\]?/o)
Chris@909 444 encoder.text_token match, :error
Chris@909 445 elsif check(/->#{RE::IDENTIFIER}/o)
Chris@909 446 encoder.begin_group :inline
Chris@909 447 encoder.text_token match, :local_variable
Chris@909 448 encoder.text_token scan(/->/), :operator
Chris@909 449 encoder.text_token scan(/#{RE::IDENTIFIER}/o), :ident
Chris@909 450 encoder.end_group :inline
Chris@909 451 elsif check(/->/)
Chris@909 452 match << scan(/->/)
Chris@909 453 encoder.text_token match, :error
Chris@909 454 else
Chris@909 455 encoder.text_token match, :local_variable
Chris@909 456 end
Chris@909 457 elsif match = scan(/\{/)
Chris@909 458 if check(/\$/)
Chris@909 459 encoder.begin_group :inline
Chris@909 460 states[-1] = [states.last, delimiter]
Chris@909 461 delimiter = nil
Chris@909 462 states.push :php
Chris@909 463 encoder.text_token match, :delimiter
Chris@909 464 else
Chris@909 465 encoder.text_token match, :content
Chris@909 466 end
Chris@909 467 elsif match = scan(/\$\{#{RE::IDENTIFIER}\}/o)
Chris@909 468 encoder.text_token match, :local_variable
Chris@909 469 elsif match = scan(/\$/)
Chris@909 470 encoder.text_token match, :content
Chris@909 471 else
Chris@909 472 states.pop
Chris@909 473 end
Chris@909 474
Chris@909 475 when :class_expected
Chris@909 476 if match = scan(/\s+/)
Chris@909 477 encoder.text_token match, :space
Chris@909 478 elsif match = scan(/#{RE::IDENTIFIER}/o)
Chris@909 479 encoder.text_token match, :class
Chris@909 480 states.pop
Chris@909 481 else
Chris@909 482 states.pop
Chris@909 483 end
Chris@909 484
Chris@909 485 when :function_expected
Chris@909 486 if match = scan(/\s+/)
Chris@909 487 encoder.text_token match, :space
Chris@909 488 elsif match = scan(/&/)
Chris@909 489 encoder.text_token match, :operator
Chris@909 490 elsif match = scan(/#{RE::IDENTIFIER}/o)
Chris@909 491 encoder.text_token match, :function
Chris@909 492 states.pop
Chris@909 493 else
Chris@909 494 states.pop
Chris@909 495 end
Chris@909 496
Chris@909 497 else
Chris@909 498 raise_inspect 'Unknown state!', encoder, states
Chris@909 499 end
Chris@909 500
Chris@909 501 end
Chris@909 502
Chris@909 503 encoder
Chris@909 504 end
Chris@909 505
Chris@909 506 end
Chris@909 507
Chris@909 508 end
Chris@909 509 end