annotate vendor/gems/coderay-0.9.7/lib/coderay/scanners/php.rb @ 855:7294e8db2515 bug_162

Close obsolete branch bug_162
author Chris Cannam
date Thu, 14 Jul 2011 11:59:19 +0100
parents 0579821a129a
children
rev   line source
Chris@210 1 module CodeRay
Chris@210 2 module Scanners
Chris@210 3
Chris@210 4 load :html
Chris@210 5
Chris@210 6 # Original by Stefan Walk.
Chris@210 7 class PHP < Scanner
Chris@210 8
Chris@210 9 register_for :php
Chris@210 10 file_extension 'php'
Chris@210 11
Chris@210 12 KINDS_NOT_LOC = HTML::KINDS_NOT_LOC
Chris@210 13
Chris@210 14 def setup
Chris@210 15 @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
Chris@210 16 end
Chris@210 17
Chris@210 18 def reset_instance
Chris@210 19 super
Chris@210 20 @html_scanner.reset
Chris@210 21 end
Chris@210 22
Chris@210 23 module Words
Chris@210 24
Chris@210 25 # according to http://www.php.net/manual/en/reserved.keywords.php
Chris@210 26 KEYWORDS = %w[
Chris@210 27 abstract and array as break case catch class clone const continue declare default do else elseif
Chris@210 28 enddeclare endfor endforeach endif endswitch endwhile extends final for foreach function global
Chris@210 29 goto if implements interface instanceof namespace new or private protected public static switch
Chris@210 30 throw try use var while xor
Chris@210 31 cfunction old_function
Chris@210 32 ]
Chris@210 33
Chris@210 34 TYPES = %w[ int integer float double bool boolean string array object resource ]
Chris@210 35
Chris@210 36 LANGUAGE_CONSTRUCTS = %w[
Chris@210 37 die echo empty exit eval include include_once isset list
Chris@210 38 require require_once return print unset
Chris@210 39 ]
Chris@210 40
Chris@210 41 CLASSES = %w[ Directory stdClass __PHP_Incomplete_Class exception php_user_filter Closure ]
Chris@210 42
Chris@210 43 # according to http://php.net/quickref.php on 2009-04-21;
Chris@210 44 # all functions with _ excluded (module functions) and selected additional functions
Chris@210 45 BUILTIN_FUNCTIONS = %w[
Chris@210 46 abs acos acosh addcslashes addslashes aggregate array arsort ascii2ebcdic asin asinh asort assert atan atan2
Chris@210 47 atanh basename bcadd bccomp bcdiv bcmod bcmul bcpow bcpowmod bcscale bcsqrt bcsub bin2hex bindec
Chris@210 48 bindtextdomain bzclose bzcompress bzdecompress bzerrno bzerror bzerrstr bzflush bzopen bzread bzwrite
Chris@210 49 calculhmac ceil chdir checkdate checkdnsrr chgrp chmod chop chown chr chroot clearstatcache closedir closelog
Chris@210 50 compact constant copy cos cosh count crc32 crypt current date dcgettext dcngettext deaggregate decbin dechex
Chris@210 51 decoct define defined deg2rad delete dgettext die dirname diskfreespace dl dngettext doubleval each
Chris@210 52 ebcdic2ascii echo empty end ereg eregi escapeshellarg escapeshellcmd eval exec exit exp explode expm1 extract
Chris@210 53 fclose feof fflush fgetc fgetcsv fgets fgetss file fileatime filectime filegroup fileinode filemtime fileowner
Chris@210 54 fileperms filepro filesize filetype floatval flock floor flush fmod fnmatch fopen fpassthru fprintf fputcsv
Chris@210 55 fputs fread frenchtojd fscanf fseek fsockopen fstat ftell ftok ftruncate fwrite getallheaders getcwd getdate
Chris@210 56 getenv gethostbyaddr gethostbyname gethostbynamel getimagesize getlastmod getmxrr getmygid getmyinode getmypid
Chris@210 57 getmyuid getopt getprotobyname getprotobynumber getrandmax getrusage getservbyname getservbyport gettext
Chris@210 58 gettimeofday gettype glob gmdate gmmktime gmstrftime gregoriantojd gzclose gzcompress gzdecode gzdeflate
Chris@210 59 gzencode gzeof gzfile gzgetc gzgets gzgetss gzinflate gzopen gzpassthru gzputs gzread gzrewind gzseek gztell
Chris@210 60 gzuncompress gzwrite hash header hebrev hebrevc hexdec htmlentities htmlspecialchars hypot iconv idate
Chris@210 61 implode include intval ip2long iptcembed iptcparse isset
Chris@210 62 jddayofweek jdmonthname jdtofrench jdtogregorian jdtojewish jdtojulian jdtounix jewishtojd join jpeg2wbmp
Chris@210 63 juliantojd key krsort ksort lcfirst lchgrp lchown levenshtein link linkinfo list localeconv localtime log
Chris@210 64 log10 log1p long2ip lstat ltrim mail main max md5 metaphone mhash microtime min mkdir mktime msql natcasesort
Chris@210 65 natsort next ngettext nl2br nthmac octdec opendir openlog
Chris@210 66 ord overload pack passthru pathinfo pclose pfsockopen phpcredits phpinfo phpversion pi png2wbmp popen pos pow
Chris@210 67 prev print printf putenv quotemeta rad2deg rand range rawurldecode rawurlencode readdir readfile readgzfile
Chris@210 68 readline readlink realpath recode rename require reset rewind rewinddir rmdir round rsort rtrim scandir
Chris@210 69 serialize setcookie setlocale setrawcookie settype sha1 shuffle signeurlpaiement sin sinh sizeof sleep snmpget
Chris@210 70 snmpgetnext snmprealwalk snmpset snmpwalk snmpwalkoid sort soundex split spliti sprintf sqrt srand sscanf stat
Chris@210 71 strcasecmp strchr strcmp strcoll strcspn strftime stripcslashes stripos stripslashes stristr strlen
Chris@210 72 strnatcasecmp strnatcmp strncasecmp strncmp strpbrk strpos strptime strrchr strrev strripos strrpos strspn
Chris@210 73 strstr strtok strtolower strtotime strtoupper strtr strval substr symlink syslog system tan tanh tempnam
Chris@210 74 textdomain time tmpfile touch trim uasort ucfirst ucwords uksort umask uniqid unixtojd unlink unpack
Chris@210 75 unserialize unset urldecode urlencode usleep usort vfprintf virtual vprintf vsprintf wordwrap
Chris@210 76 array_change_key_case array_chunk array_combine array_count_values array_diff array_diff_assoc
Chris@210 77 array_diff_key array_diff_uassoc array_diff_ukey array_fill array_fill_keys array_filter array_flip
Chris@210 78 array_intersect array_intersect_assoc array_intersect_key array_intersect_uassoc array_intersect_ukey
Chris@210 79 array_key_exists array_keys array_map array_merge array_merge_recursive array_multisort array_pad
Chris@210 80 array_pop array_product array_push array_rand array_reduce array_reverse array_search array_shift
Chris@210 81 array_slice array_splice array_sum array_udiff array_udiff_assoc array_udiff_uassoc array_uintersect
Chris@210 82 array_uintersect_assoc array_uintersect_uassoc array_unique array_unshift array_values array_walk
Chris@210 83 array_walk_recursive
Chris@210 84 assert_options base_convert base64_decode base64_encode
Chris@210 85 chunk_split class_exists class_implements class_parents
Chris@210 86 count_chars debug_backtrace debug_print_backtrace debug_zval_dump
Chris@210 87 error_get_last error_log error_reporting extension_loaded
Chris@210 88 file_exists file_get_contents file_put_contents load_file
Chris@210 89 func_get_arg func_get_args func_num_args function_exists
Chris@210 90 get_browser get_called_class get_cfg_var get_class get_class_methods get_class_vars
Chris@210 91 get_current_user get_declared_classes get_declared_interfaces get_defined_constants
Chris@210 92 get_defined_functions get_defined_vars get_extension_funcs get_headers get_html_translation_table
Chris@210 93 get_include_path get_included_files get_loaded_extensions get_magic_quotes_gpc get_magic_quotes_runtime
Chris@210 94 get_meta_tags get_object_vars get_parent_class get_required_filesget_resource_type
Chris@210 95 gc_collect_cycles gc_disable gc_enable gc_enabled
Chris@210 96 halt_compiler headers_list headers_sent highlight_file highlight_string
Chris@210 97 html_entity_decode htmlspecialchars_decode
Chris@210 98 in_array include_once inclued_get_data
Chris@210 99 is_a is_array is_binary is_bool is_buffer is_callable is_dir is_double is_executable is_file is_finite
Chris@210 100 is_float is_infinite is_int is_integer is_link is_long is_nan is_null is_numeric is_object is_readable
Chris@210 101 is_real is_resource is_scalar is_soap_fault is_string is_subclass_of is_unicode is_uploaded_file
Chris@210 102 is_writable is_writeable
Chris@210 103 locale_get_default locale_set_default
Chris@210 104 number_format override_function parse_str parse_url
Chris@210 105 php_check_syntax php_ini_loaded_file php_ini_scanned_files php_logo_guid php_sapi_name
Chris@210 106 php_strip_whitespace php_uname
Chris@210 107 preg_filter preg_grep preg_last_error preg_match preg_match_all preg_quote preg_replace
Chris@210 108 preg_replace_callback preg_split print_r
Chris@210 109 require_once register_shutdown_function register_tick_function
Chris@210 110 set_error_handler set_exception_handler set_file_buffer set_include_path
Chris@210 111 set_magic_quotes_runtime set_time_limit shell_exec
Chris@210 112 str_getcsv str_ireplace str_pad str_repeat str_replace str_rot13 str_shuffle str_split str_word_count
Chris@210 113 strip_tags substr_compare substr_count substr_replace
Chris@210 114 time_nanosleep time_sleep_until
Chris@210 115 token_get_all token_name trigger_error
Chris@210 116 unregister_tick_function use_soap_error_handler user_error
Chris@210 117 utf8_decode utf8_encode var_dump var_export
Chris@210 118 version_compare
Chris@210 119 zend_logo_guid zend_thread_id zend_version
Chris@210 120 create_function call_user_func_array
Chris@210 121 posix_access posix_ctermid posix_get_last_error posix_getcwd posix_getegid
Chris@210 122 posix_geteuid posix_getgid posix_getgrgid posix_getgrnam posix_getgroups
Chris@210 123 posix_getlogin posix_getpgid posix_getpgrp posix_getpid posix_getppid
Chris@210 124 posix_getpwnam posix_getpwuid posix_getrlimit posix_getsid posix_getuid
Chris@210 125 posix_initgroups posix_isatty posix_kill posix_mkfifo posix_mknod
Chris@210 126 posix_setegid posix_seteuid posix_setgid posix_setpgid posix_setsid
Chris@210 127 posix_setuid posix_strerror posix_times posix_ttyname posix_uname
Chris@210 128 pcntl_alarm pcntl_exec pcntl_fork pcntl_getpriority pcntl_setpriority
Chris@210 129 pcntl_signal pcntl_signal_dispatch pcntl_sigprocmask pcntl_sigtimedwait
Chris@210 130 pcntl_sigwaitinfo pcntl_wait pcntl_waitpid pcntl_wexitstatus pcntl_wifexited
Chris@210 131 pcntl_wifsignaled pcntl_wifstopped pcntl_wstopsig pcntl_wtermsig
Chris@210 132 ]
Chris@210 133 # TODO: more built-in PHP functions?
Chris@210 134
Chris@210 135 EXCEPTIONS = %w[
Chris@210 136 E_ERROR E_WARNING E_PARSE E_NOTICE E_CORE_ERROR E_CORE_WARNING E_COMPILE_ERROR E_COMPILE_WARNING
Chris@210 137 E_USER_ERROR E_USER_WARNING E_USER_NOTICE E_DEPRECATED E_USER_DEPRECATED E_ALL E_STRICT
Chris@210 138 ]
Chris@210 139
Chris@210 140 CONSTANTS = %w[
Chris@210 141 null true false self parent
Chris@210 142 __LINE__ __DIR__ __FILE__ __LINE__
Chris@210 143 __CLASS__ __NAMESPACE__ __METHOD__ __FUNCTION__
Chris@210 144 PHP_VERSION PHP_MAJOR_VERSION PHP_MINOR_VERSION PHP_RELEASE_VERSION PHP_VERSION_ID PHP_EXTRA_VERSION PHP_ZTS
Chris@210 145 PHP_DEBUG PHP_MAXPATHLEN PHP_OS PHP_SAPI PHP_EOL PHP_INT_MAX PHP_INT_SIZE DEFAULT_INCLUDE_PATH
Chris@210 146 PEAR_INSTALL_DIR PEAR_EXTENSION_DIR PHP_EXTENSION_DIR PHP_PREFIX PHP_BINDIR PHP_LIBDIR PHP_DATADIR
Chris@210 147 PHP_SYSCONFDIR PHP_LOCALSTATEDIR PHP_CONFIG_FILE_PATH PHP_CONFIG_FILE_SCAN_DIR PHP_SHLIB_SUFFIX
Chris@210 148 PHP_OUTPUT_HANDLER_START PHP_OUTPUT_HANDLER_CONT PHP_OUTPUT_HANDLER_END
Chris@210 149 __COMPILER_HALT_OFFSET__
Chris@210 150 EXTR_OVERWRITE EXTR_SKIP EXTR_PREFIX_SAME EXTR_PREFIX_ALL EXTR_PREFIX_INVALID EXTR_PREFIX_IF_EXISTS
Chris@210 151 EXTR_IF_EXISTS SORT_ASC SORT_DESC SORT_REGULAR SORT_NUMERIC SORT_STRING CASE_LOWER CASE_UPPER COUNT_NORMAL
Chris@210 152 COUNT_RECURSIVE ASSERT_ACTIVE ASSERT_CALLBACK ASSERT_BAIL ASSERT_WARNING ASSERT_QUIET_EVAL CONNECTION_ABORTED
Chris@210 153 CONNECTION_NORMAL CONNECTION_TIMEOUT INI_USER INI_PERDIR INI_SYSTEM INI_ALL M_E M_LOG2E M_LOG10E M_LN2 M_LN10
Chris@210 154 M_PI M_PI_2 M_PI_4 M_1_PI M_2_PI M_2_SQRTPI M_SQRT2 M_SQRT1_2 CRYPT_SALT_LENGTH CRYPT_STD_DES CRYPT_EXT_DES
Chris@210 155 CRYPT_MD5 CRYPT_BLOWFISH DIRECTORY_SEPARATOR SEEK_SET SEEK_CUR SEEK_END LOCK_SH LOCK_EX LOCK_UN LOCK_NB
Chris@210 156 HTML_SPECIALCHARS HTML_ENTITIES ENT_COMPAT ENT_QUOTES ENT_NOQUOTES INFO_GENERAL INFO_CREDITS
Chris@210 157 INFO_CONFIGURATION INFO_MODULES INFO_ENVIRONMENT INFO_VARIABLES INFO_LICENSE INFO_ALL CREDITS_GROUP
Chris@210 158 CREDITS_GENERAL CREDITS_SAPI CREDITS_MODULES CREDITS_DOCS CREDITS_FULLPAGE CREDITS_QA CREDITS_ALL STR_PAD_LEFT
Chris@210 159 STR_PAD_RIGHT STR_PAD_BOTH PATHINFO_DIRNAME PATHINFO_BASENAME PATHINFO_EXTENSION PATH_SEPARATOR CHAR_MAX
Chris@210 160 LC_CTYPE LC_NUMERIC LC_TIME LC_COLLATE LC_MONETARY LC_ALL LC_MESSAGES ABDAY_1 ABDAY_2 ABDAY_3 ABDAY_4 ABDAY_5
Chris@210 161 ABDAY_6 ABDAY_7 DAY_1 DAY_2 DAY_3 DAY_4 DAY_5 DAY_6 DAY_7 ABMON_1 ABMON_2 ABMON_3 ABMON_4 ABMON_5 ABMON_6
Chris@210 162 ABMON_7 ABMON_8 ABMON_9 ABMON_10 ABMON_11 ABMON_12 MON_1 MON_2 MON_3 MON_4 MON_5 MON_6 MON_7 MON_8 MON_9
Chris@210 163 MON_10 MON_11 MON_12 AM_STR PM_STR D_T_FMT D_FMT T_FMT T_FMT_AMPM ERA ERA_YEAR ERA_D_T_FMT ERA_D_FMT ERA_T_FMT
Chris@210 164 ALT_DIGITS INT_CURR_SYMBOL CURRENCY_SYMBOL CRNCYSTR MON_DECIMAL_POINT MON_THOUSANDS_SEP MON_GROUPING
Chris@210 165 POSITIVE_SIGN NEGATIVE_SIGN INT_FRAC_DIGITS FRAC_DIGITS P_CS_PRECEDES P_SEP_BY_SPACE N_CS_PRECEDES
Chris@210 166 N_SEP_BY_SPACE P_SIGN_POSN N_SIGN_POSN DECIMAL_POINT RADIXCHAR THOUSANDS_SEP THOUSEP GROUPING YESEXPR NOEXPR
Chris@210 167 YESSTR NOSTR CODESET LOG_EMERG LOG_ALERT LOG_CRIT LOG_ERR LOG_WARNING LOG_NOTICE LOG_INFO LOG_DEBUG LOG_KERN
Chris@210 168 LOG_USER LOG_MAIL LOG_DAEMON LOG_AUTH LOG_SYSLOG LOG_LPR LOG_NEWS LOG_UUCP LOG_CRON LOG_AUTHPRIV LOG_LOCAL0
Chris@210 169 LOG_LOCAL1 LOG_LOCAL2 LOG_LOCAL3 LOG_LOCAL4 LOG_LOCAL5 LOG_LOCAL6 LOG_LOCAL7 LOG_PID LOG_CONS LOG_ODELAY
Chris@210 170 LOG_NDELAY LOG_NOWAIT LOG_PERROR
Chris@210 171 ]
Chris@210 172
Chris@210 173 PREDEFINED = %w[
Chris@210 174 $GLOBALS $_SERVER $_GET $_POST $_FILES $_REQUEST $_SESSION $_ENV
Chris@210 175 $_COOKIE $php_errormsg $HTTP_RAW_POST_DATA $http_response_header
Chris@210 176 $argc $argv
Chris@210 177 ]
Chris@210 178
Chris@210 179 IDENT_KIND = CaseIgnoringWordList.new(:ident).
Chris@210 180 add(KEYWORDS, :reserved).
Chris@210 181 add(TYPES, :pre_type).
Chris@210 182 add(LANGUAGE_CONSTRUCTS, :reserved).
Chris@210 183 add(BUILTIN_FUNCTIONS, :predefined).
Chris@210 184 add(CLASSES, :pre_constant).
Chris@210 185 add(EXCEPTIONS, :exception).
Chris@210 186 add(CONSTANTS, :pre_constant)
Chris@210 187
Chris@210 188 VARIABLE_KIND = WordList.new(:local_variable).
Chris@210 189 add(PREDEFINED, :predefined)
Chris@210 190 end
Chris@210 191
Chris@210 192 module RE
Chris@210 193
Chris@210 194 PHP_START = /
Chris@210 195 <script\s+[^>]*?language\s*=\s*"php"[^>]*?> |
Chris@210 196 <script\s+[^>]*?language\s*=\s*'php'[^>]*?> |
Chris@210 197 <\?php\d? |
Chris@210 198 <\?(?!xml)
Chris@210 199 /xi
Chris@210 200
Chris@210 201 PHP_END = %r!
Chris@210 202 </script> |
Chris@210 203 \?>
Chris@210 204 !xi
Chris@210 205
Chris@210 206 HTML_INDICATOR = /<!DOCTYPE html|<(?:html|body|div|p)[> ]/i
Chris@210 207
Chris@210 208 IDENTIFIER = /[a-z_\x7f-\xFF][a-z0-9_\x7f-\xFF]*/i
Chris@210 209 VARIABLE = /\$#{IDENTIFIER}/
Chris@210 210
Chris@210 211 OPERATOR = /
Chris@210 212 \.(?!\d)=? | # dot that is not decimal point, string concatenation
Chris@210 213 && | \|\| | # logic
Chris@210 214 :: | -> | => | # scope, member, dictionary
Chris@210 215 \\(?!\n) | # namespace
Chris@210 216 \+\+ | -- | # increment, decrement
Chris@210 217 [,;?:()\[\]{}] | # simple delimiters
Chris@210 218 [-+*\/%&|^]=? | # ordinary math, binary logic, assignment shortcuts
Chris@210 219 [~$] | # whatever
Chris@210 220 =& | # reference assignment
Chris@210 221 [=!]=?=? | <> | # comparison and assignment
Chris@210 222 <<=? | >>=? | [<>]=? # comparison and shift
Chris@210 223 /x
Chris@210 224
Chris@210 225 end
Chris@210 226
Chris@210 227 def scan_tokens tokens, options
Chris@210 228 if string.respond_to?(:encoding)
Chris@210 229 unless string.encoding == Encoding::ASCII_8BIT
Chris@210 230 self.string = string.encode Encoding::ASCII_8BIT,
Chris@210 231 :invalid => :replace, :undef => :replace, :replace => '?'
Chris@210 232 end
Chris@210 233 end
Chris@210 234
Chris@210 235 if check(RE::PHP_START) || # starts with <?
Chris@210 236 (match?(/\s*<\S/) && exist?(RE::PHP_START)) || # starts with tag and contains <?
Chris@210 237 exist?(RE::HTML_INDICATOR) ||
Chris@210 238 check(/.{1,100}#{RE::PHP_START}/om) # PHP start after max 100 chars
Chris@210 239 # is HTML with embedded PHP, so start with HTML
Chris@210 240 states = [:initial]
Chris@210 241 else
Chris@210 242 # is just PHP, so start with PHP surrounded by HTML
Chris@210 243 states = [:initial, :php]
Chris@210 244 end
Chris@210 245
Chris@210 246 label_expected = true
Chris@210 247 case_expected = false
Chris@210 248
Chris@210 249 heredoc_delimiter = nil
Chris@210 250 delimiter = nil
Chris@210 251 modifier = nil
Chris@210 252
Chris@210 253 until eos?
Chris@210 254
Chris@210 255 match = nil
Chris@210 256 kind = nil
Chris@210 257
Chris@210 258 case states.last
Chris@210 259
Chris@210 260 when :initial # HTML
Chris@210 261 if scan RE::PHP_START
Chris@210 262 kind = :inline_delimiter
Chris@210 263 label_expected = true
Chris@210 264 states << :php
Chris@210 265 else
Chris@210 266 match = scan_until(/(?=#{RE::PHP_START})/o) || scan_until(/\z/)
Chris@210 267 @html_scanner.tokenize match unless match.empty?
Chris@210 268 next
Chris@210 269 end
Chris@210 270
Chris@210 271 when :php
Chris@210 272 if match = scan(/\s+/)
Chris@210 273 tokens << [match, :space]
Chris@210 274 next
Chris@210 275
Chris@210 276 elsif scan(%r! (?m: \/\* (?: .*? \*\/ | .* ) ) | (?://|\#) .*? (?=#{RE::PHP_END}|$) !xo)
Chris@210 277 kind = :comment
Chris@210 278
Chris@210 279 elsif match = scan(RE::IDENTIFIER)
Chris@210 280 kind = Words::IDENT_KIND[match]
Chris@210 281 if kind == :ident && label_expected && check(/:(?!:)/)
Chris@210 282 kind = :label
Chris@210 283 label_expected = true
Chris@210 284 else
Chris@210 285 label_expected = false
Chris@210 286 if kind == :ident && match =~ /^[A-Z]/
Chris@210 287 kind = :constant
Chris@210 288 elsif kind == :reserved
Chris@210 289 case match
Chris@210 290 when 'class'
Chris@210 291 states << :class_expected
Chris@210 292 when 'function'
Chris@210 293 states << :function_expected
Chris@210 294 when 'case', 'default'
Chris@210 295 case_expected = true
Chris@210 296 end
Chris@210 297 elsif match == 'b' && check(/['"]/) # binary string literal
Chris@210 298 modifier = match
Chris@210 299 next
Chris@210 300 end
Chris@210 301 end
Chris@210 302
Chris@210 303 elsif scan(/(?:\d+\.\d*|\d*\.\d+)(?:e[-+]?\d+)?|\d+e[-+]?\d+/i)
Chris@210 304 label_expected = false
Chris@210 305 kind = :float
Chris@210 306
Chris@210 307 elsif scan(/0x[0-9a-fA-F]+/)
Chris@210 308 label_expected = false
Chris@210 309 kind = :hex
Chris@210 310
Chris@210 311 elsif scan(/\d+/)
Chris@210 312 label_expected = false
Chris@210 313 kind = :integer
Chris@210 314
Chris@210 315 elsif scan(/'/)
Chris@210 316 tokens << [:open, :string]
Chris@210 317 if modifier
Chris@210 318 tokens << [modifier, :modifier]
Chris@210 319 modifier = nil
Chris@210 320 end
Chris@210 321 kind = :delimiter
Chris@210 322 states.push :sqstring
Chris@210 323
Chris@210 324 elsif match = scan(/["`]/)
Chris@210 325 tokens << [:open, :string]
Chris@210 326 if modifier
Chris@210 327 tokens << [modifier, :modifier]
Chris@210 328 modifier = nil
Chris@210 329 end
Chris@210 330 delimiter = match
Chris@210 331 kind = :delimiter
Chris@210 332 states.push :dqstring
Chris@210 333
Chris@210 334 elsif match = scan(RE::VARIABLE)
Chris@210 335 label_expected = false
Chris@210 336 kind = Words::VARIABLE_KIND[match]
Chris@210 337
Chris@210 338 elsif scan(/\{/)
Chris@210 339 kind = :operator
Chris@210 340 label_expected = true
Chris@210 341 states.push :php
Chris@210 342
Chris@210 343 elsif scan(/\}/)
Chris@210 344 if states.size == 1
Chris@210 345 kind = :error
Chris@210 346 else
Chris@210 347 states.pop
Chris@210 348 if states.last.is_a?(::Array)
Chris@210 349 delimiter = states.last[1]
Chris@210 350 states[-1] = states.last[0]
Chris@210 351 tokens << [matched, :delimiter]
Chris@210 352 tokens << [:close, :inline]
Chris@210 353 next
Chris@210 354 else
Chris@210 355 kind = :operator
Chris@210 356 label_expected = true
Chris@210 357 end
Chris@210 358 end
Chris@210 359
Chris@210 360 elsif scan(/@/)
Chris@210 361 label_expected = false
Chris@210 362 kind = :exception
Chris@210 363
Chris@210 364 elsif scan RE::PHP_END
Chris@210 365 kind = :inline_delimiter
Chris@210 366 states = [:initial]
Chris@210 367
Chris@210 368 elsif match = scan(/<<<(?:(#{RE::IDENTIFIER})|"(#{RE::IDENTIFIER})"|'(#{RE::IDENTIFIER})')/o)
Chris@210 369 tokens << [:open, :string]
Chris@210 370 warn 'heredoc in heredoc?' if heredoc_delimiter
Chris@210 371 heredoc_delimiter = Regexp.escape(self[1] || self[2] || self[3])
Chris@210 372 kind = :delimiter
Chris@210 373 states.push self[3] ? :sqstring : :dqstring
Chris@210 374 heredoc_delimiter = /#{heredoc_delimiter}(?=;?$)/
Chris@210 375
Chris@210 376 elsif match = scan(/#{RE::OPERATOR}/o)
Chris@210 377 label_expected = match == ';'
Chris@210 378 if case_expected
Chris@210 379 label_expected = true if match == ':'
Chris@210 380 case_expected = false
Chris@210 381 end
Chris@210 382 kind = :operator
Chris@210 383
Chris@210 384 else
Chris@210 385 getch
Chris@210 386 kind = :error
Chris@210 387
Chris@210 388 end
Chris@210 389
Chris@210 390 when :sqstring
Chris@210 391 if scan(heredoc_delimiter ? /[^\\\n]+/ : /[^'\\]+/)
Chris@210 392 kind = :content
Chris@210 393 elsif !heredoc_delimiter && scan(/'/)
Chris@210 394 tokens << [matched, :delimiter]
Chris@210 395 tokens << [:close, :string]
Chris@210 396 delimiter = nil
Chris@210 397 label_expected = false
Chris@210 398 states.pop
Chris@210 399 next
Chris@210 400 elsif heredoc_delimiter && match = scan(/\n/)
Chris@210 401 kind = :content
Chris@210 402 if scan heredoc_delimiter
Chris@210 403 tokens << ["\n", :content]
Chris@210 404 tokens << [matched, :delimiter]
Chris@210 405 tokens << [:close, :string]
Chris@210 406 heredoc_delimiter = nil
Chris@210 407 label_expected = false
Chris@210 408 states.pop
Chris@210 409 next
Chris@210 410 end
Chris@210 411 elsif scan(heredoc_delimiter ? /\\\\/ : /\\[\\'\n]/)
Chris@210 412 kind = :char
Chris@210 413 elsif scan(/\\./m)
Chris@210 414 kind = :content
Chris@210 415 elsif scan(/\\/)
Chris@210 416 kind = :error
Chris@210 417 end
Chris@210 418
Chris@210 419 when :dqstring
Chris@210 420 if scan(heredoc_delimiter ? /[^${\\\n]+/ : (delimiter == '"' ? /[^"${\\]+/ : /[^`${\\]+/))
Chris@210 421 kind = :content
Chris@210 422 elsif !heredoc_delimiter && scan(delimiter == '"' ? /"/ : /`/)
Chris@210 423 tokens << [matched, :delimiter]
Chris@210 424 tokens << [:close, :string]
Chris@210 425 delimiter = nil
Chris@210 426 label_expected = false
Chris@210 427 states.pop
Chris@210 428 next
Chris@210 429 elsif heredoc_delimiter && match = scan(/\n/)
Chris@210 430 kind = :content
Chris@210 431 if scan heredoc_delimiter
Chris@210 432 tokens << ["\n", :content]
Chris@210 433 tokens << [matched, :delimiter]
Chris@210 434 tokens << [:close, :string]
Chris@210 435 heredoc_delimiter = nil
Chris@210 436 label_expected = false
Chris@210 437 states.pop
Chris@210 438 next
Chris@210 439 end
Chris@210 440 elsif scan(/\\(?:x[0-9A-Fa-f]{1,2}|[0-7]{1,3})/)
Chris@210 441 kind = :char
Chris@210 442 elsif scan(heredoc_delimiter ? /\\[nrtvf\\$]/ : (delimiter == '"' ? /\\[nrtvf\\$"]/ : /\\[nrtvf\\$`]/))
Chris@210 443 kind = :char
Chris@210 444 elsif scan(/\\./m)
Chris@210 445 kind = :content
Chris@210 446 elsif scan(/\\/)
Chris@210 447 kind = :error
Chris@210 448 elsif match = scan(/#{RE::VARIABLE}/o)
Chris@210 449 kind = :local_variable
Chris@210 450 if check(/\[#{RE::IDENTIFIER}\]/o)
Chris@210 451 tokens << [:open, :inline]
Chris@210 452 tokens << [match, :local_variable]
Chris@210 453 tokens << [scan(/\[/), :operator]
Chris@210 454 tokens << [scan(/#{RE::IDENTIFIER}/o), :ident]
Chris@210 455 tokens << [scan(/\]/), :operator]
Chris@210 456 tokens << [:close, :inline]
Chris@210 457 next
Chris@210 458 elsif check(/\[/)
Chris@210 459 match << scan(/\[['"]?#{RE::IDENTIFIER}?['"]?\]?/o)
Chris@210 460 kind = :error
Chris@210 461 elsif check(/->#{RE::IDENTIFIER}/o)
Chris@210 462 tokens << [:open, :inline]
Chris@210 463 tokens << [match, :local_variable]
Chris@210 464 tokens << [scan(/->/), :operator]
Chris@210 465 tokens << [scan(/#{RE::IDENTIFIER}/o), :ident]
Chris@210 466 tokens << [:close, :inline]
Chris@210 467 next
Chris@210 468 elsif check(/->/)
Chris@210 469 match << scan(/->/)
Chris@210 470 kind = :error
Chris@210 471 end
Chris@210 472 elsif match = scan(/\{/)
Chris@210 473 if check(/\$/)
Chris@210 474 kind = :delimiter
Chris@210 475 states[-1] = [states.last, delimiter]
Chris@210 476 delimiter = nil
Chris@210 477 states.push :php
Chris@210 478 tokens << [:open, :inline]
Chris@210 479 else
Chris@210 480 kind = :string
Chris@210 481 end
Chris@210 482 elsif scan(/\$\{#{RE::IDENTIFIER}\}/o)
Chris@210 483 kind = :local_variable
Chris@210 484 elsif scan(/\$/)
Chris@210 485 kind = :content
Chris@210 486 end
Chris@210 487
Chris@210 488 when :class_expected
Chris@210 489 if scan(/\s+/)
Chris@210 490 kind = :space
Chris@210 491 elsif match = scan(/#{RE::IDENTIFIER}/o)
Chris@210 492 kind = :class
Chris@210 493 states.pop
Chris@210 494 else
Chris@210 495 states.pop
Chris@210 496 next
Chris@210 497 end
Chris@210 498
Chris@210 499 when :function_expected
Chris@210 500 if scan(/\s+/)
Chris@210 501 kind = :space
Chris@210 502 elsif scan(/&/)
Chris@210 503 kind = :operator
Chris@210 504 elsif match = scan(/#{RE::IDENTIFIER}/o)
Chris@210 505 kind = :function
Chris@210 506 states.pop
Chris@210 507 else
Chris@210 508 states.pop
Chris@210 509 next
Chris@210 510 end
Chris@210 511
Chris@210 512 else
Chris@210 513 raise_inspect 'Unknown state!', tokens, states
Chris@210 514 end
Chris@210 515
Chris@210 516 match ||= matched
Chris@210 517 if $CODERAY_DEBUG and not kind
Chris@210 518 raise_inspect 'Error token %p in line %d' %
Chris@210 519 [[match, kind], line], tokens, states
Chris@210 520 end
Chris@210 521 raise_inspect 'Empty token', tokens, states unless match
Chris@210 522
Chris@210 523 tokens << [match, kind]
Chris@210 524
Chris@210 525 end
Chris@210 526
Chris@210 527 tokens
Chris@210 528 end
Chris@210 529
Chris@210 530 end
Chris@210 531
Chris@210 532 end
Chris@210 533 end