Chris@210: module CodeRay Chris@210: module Scanners Chris@210: Chris@210: load :html Chris@210: Chris@210: # Original by Stefan Walk. Chris@210: class PHP < Scanner Chris@210: Chris@210: register_for :php Chris@210: file_extension 'php' Chris@210: Chris@210: KINDS_NOT_LOC = HTML::KINDS_NOT_LOC Chris@210: Chris@210: def setup Chris@210: @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true Chris@210: end Chris@210: Chris@210: def reset_instance Chris@210: super Chris@210: @html_scanner.reset Chris@210: end Chris@210: Chris@210: module Words Chris@210: Chris@210: # according to http://www.php.net/manual/en/reserved.keywords.php Chris@210: KEYWORDS = %w[ Chris@210: abstract and array as break case catch class clone const continue declare default do else elseif Chris@210: enddeclare endfor endforeach endif endswitch endwhile extends final for foreach function global Chris@210: goto if implements interface instanceof namespace new or private protected public static switch Chris@210: throw try use var while xor Chris@210: cfunction old_function Chris@210: ] Chris@210: Chris@210: TYPES = %w[ int integer float double bool boolean string array object resource ] Chris@210: Chris@210: LANGUAGE_CONSTRUCTS = %w[ Chris@210: die echo empty exit eval include include_once isset list Chris@210: require require_once return print unset Chris@210: ] Chris@210: Chris@210: CLASSES = %w[ Directory stdClass __PHP_Incomplete_Class exception php_user_filter Closure ] Chris@210: Chris@210: # according to http://php.net/quickref.php on 2009-04-21; Chris@210: # all functions with _ excluded (module functions) and selected additional functions Chris@210: BUILTIN_FUNCTIONS = %w[ Chris@210: abs acos acosh addcslashes addslashes aggregate array arsort ascii2ebcdic asin asinh asort assert atan atan2 Chris@210: atanh basename bcadd bccomp bcdiv bcmod bcmul bcpow bcpowmod bcscale bcsqrt bcsub bin2hex bindec Chris@210: bindtextdomain bzclose bzcompress bzdecompress bzerrno bzerror bzerrstr bzflush bzopen bzread bzwrite Chris@210: calculhmac ceil chdir checkdate checkdnsrr chgrp chmod chop chown chr chroot clearstatcache closedir closelog Chris@210: compact constant copy cos cosh count crc32 crypt current date dcgettext dcngettext deaggregate decbin dechex Chris@210: decoct define defined deg2rad delete dgettext die dirname diskfreespace dl dngettext doubleval each Chris@210: ebcdic2ascii echo empty end ereg eregi escapeshellarg escapeshellcmd eval exec exit exp explode expm1 extract Chris@210: fclose feof fflush fgetc fgetcsv fgets fgetss file fileatime filectime filegroup fileinode filemtime fileowner Chris@210: fileperms filepro filesize filetype floatval flock floor flush fmod fnmatch fopen fpassthru fprintf fputcsv Chris@210: fputs fread frenchtojd fscanf fseek fsockopen fstat ftell ftok ftruncate fwrite getallheaders getcwd getdate Chris@210: getenv gethostbyaddr gethostbyname gethostbynamel getimagesize getlastmod getmxrr getmygid getmyinode getmypid Chris@210: getmyuid getopt getprotobyname getprotobynumber getrandmax getrusage getservbyname getservbyport gettext Chris@210: gettimeofday gettype glob gmdate gmmktime gmstrftime gregoriantojd gzclose gzcompress gzdecode gzdeflate Chris@210: gzencode gzeof gzfile gzgetc gzgets gzgetss gzinflate gzopen gzpassthru gzputs gzread gzrewind gzseek gztell Chris@210: gzuncompress gzwrite hash header hebrev hebrevc hexdec htmlentities htmlspecialchars hypot iconv idate Chris@210: implode include intval ip2long iptcembed iptcparse isset Chris@210: jddayofweek jdmonthname jdtofrench jdtogregorian jdtojewish jdtojulian jdtounix jewishtojd join jpeg2wbmp Chris@210: juliantojd key krsort ksort lcfirst lchgrp lchown levenshtein link linkinfo list localeconv localtime log Chris@210: log10 log1p long2ip lstat ltrim mail main max md5 metaphone mhash microtime min mkdir mktime msql natcasesort Chris@210: natsort next ngettext nl2br nthmac octdec opendir openlog Chris@210: ord overload pack passthru pathinfo pclose pfsockopen phpcredits phpinfo phpversion pi png2wbmp popen pos pow Chris@210: prev print printf putenv quotemeta rad2deg rand range rawurldecode rawurlencode readdir readfile readgzfile Chris@210: readline readlink realpath recode rename require reset rewind rewinddir rmdir round rsort rtrim scandir Chris@210: serialize setcookie setlocale setrawcookie settype sha1 shuffle signeurlpaiement sin sinh sizeof sleep snmpget Chris@210: snmpgetnext snmprealwalk snmpset snmpwalk snmpwalkoid sort soundex split spliti sprintf sqrt srand sscanf stat Chris@210: strcasecmp strchr strcmp strcoll strcspn strftime stripcslashes stripos stripslashes stristr strlen Chris@210: strnatcasecmp strnatcmp strncasecmp strncmp strpbrk strpos strptime strrchr strrev strripos strrpos strspn Chris@210: strstr strtok strtolower strtotime strtoupper strtr strval substr symlink syslog system tan tanh tempnam Chris@210: textdomain time tmpfile touch trim uasort ucfirst ucwords uksort umask uniqid unixtojd unlink unpack Chris@210: unserialize unset urldecode urlencode usleep usort vfprintf virtual vprintf vsprintf wordwrap Chris@210: array_change_key_case array_chunk array_combine array_count_values array_diff array_diff_assoc Chris@210: array_diff_key array_diff_uassoc array_diff_ukey array_fill array_fill_keys array_filter array_flip Chris@210: array_intersect array_intersect_assoc array_intersect_key array_intersect_uassoc array_intersect_ukey Chris@210: array_key_exists array_keys array_map array_merge array_merge_recursive array_multisort array_pad Chris@210: array_pop array_product array_push array_rand array_reduce array_reverse array_search array_shift Chris@210: array_slice array_splice array_sum array_udiff array_udiff_assoc array_udiff_uassoc array_uintersect Chris@210: array_uintersect_assoc array_uintersect_uassoc array_unique array_unshift array_values array_walk Chris@210: array_walk_recursive Chris@210: assert_options base_convert base64_decode base64_encode Chris@210: chunk_split class_exists class_implements class_parents Chris@210: count_chars debug_backtrace debug_print_backtrace debug_zval_dump Chris@210: error_get_last error_log error_reporting extension_loaded Chris@210: file_exists file_get_contents file_put_contents load_file Chris@210: func_get_arg func_get_args func_num_args function_exists Chris@210: get_browser get_called_class get_cfg_var get_class get_class_methods get_class_vars Chris@210: get_current_user get_declared_classes get_declared_interfaces get_defined_constants Chris@210: get_defined_functions get_defined_vars get_extension_funcs get_headers get_html_translation_table Chris@210: get_include_path get_included_files get_loaded_extensions get_magic_quotes_gpc get_magic_quotes_runtime Chris@210: get_meta_tags get_object_vars get_parent_class get_required_filesget_resource_type Chris@210: gc_collect_cycles gc_disable gc_enable gc_enabled Chris@210: halt_compiler headers_list headers_sent highlight_file highlight_string Chris@210: html_entity_decode htmlspecialchars_decode Chris@210: in_array include_once inclued_get_data Chris@210: is_a is_array is_binary is_bool is_buffer is_callable is_dir is_double is_executable is_file is_finite Chris@210: is_float is_infinite is_int is_integer is_link is_long is_nan is_null is_numeric is_object is_readable Chris@210: is_real is_resource is_scalar is_soap_fault is_string is_subclass_of is_unicode is_uploaded_file Chris@210: is_writable is_writeable Chris@210: locale_get_default locale_set_default Chris@210: number_format override_function parse_str parse_url Chris@210: php_check_syntax php_ini_loaded_file php_ini_scanned_files php_logo_guid php_sapi_name Chris@210: php_strip_whitespace php_uname Chris@210: preg_filter preg_grep preg_last_error preg_match preg_match_all preg_quote preg_replace Chris@210: preg_replace_callback preg_split print_r Chris@210: require_once register_shutdown_function register_tick_function Chris@210: set_error_handler set_exception_handler set_file_buffer set_include_path Chris@210: set_magic_quotes_runtime set_time_limit shell_exec Chris@210: str_getcsv str_ireplace str_pad str_repeat str_replace str_rot13 str_shuffle str_split str_word_count Chris@210: strip_tags substr_compare substr_count substr_replace Chris@210: time_nanosleep time_sleep_until Chris@210: token_get_all token_name trigger_error Chris@210: unregister_tick_function use_soap_error_handler user_error Chris@210: utf8_decode utf8_encode var_dump var_export Chris@210: version_compare Chris@210: zend_logo_guid zend_thread_id zend_version Chris@210: create_function call_user_func_array Chris@210: posix_access posix_ctermid posix_get_last_error posix_getcwd posix_getegid Chris@210: posix_geteuid posix_getgid posix_getgrgid posix_getgrnam posix_getgroups Chris@210: posix_getlogin posix_getpgid posix_getpgrp posix_getpid posix_getppid Chris@210: posix_getpwnam posix_getpwuid posix_getrlimit posix_getsid posix_getuid Chris@210: posix_initgroups posix_isatty posix_kill posix_mkfifo posix_mknod Chris@210: posix_setegid posix_seteuid posix_setgid posix_setpgid posix_setsid Chris@210: posix_setuid posix_strerror posix_times posix_ttyname posix_uname Chris@210: pcntl_alarm pcntl_exec pcntl_fork pcntl_getpriority pcntl_setpriority Chris@210: pcntl_signal pcntl_signal_dispatch pcntl_sigprocmask pcntl_sigtimedwait Chris@210: pcntl_sigwaitinfo pcntl_wait pcntl_waitpid pcntl_wexitstatus pcntl_wifexited Chris@210: pcntl_wifsignaled pcntl_wifstopped pcntl_wstopsig pcntl_wtermsig Chris@210: ] Chris@210: # TODO: more built-in PHP functions? Chris@210: Chris@210: EXCEPTIONS = %w[ Chris@210: E_ERROR E_WARNING E_PARSE E_NOTICE E_CORE_ERROR E_CORE_WARNING E_COMPILE_ERROR E_COMPILE_WARNING Chris@210: E_USER_ERROR E_USER_WARNING E_USER_NOTICE E_DEPRECATED E_USER_DEPRECATED E_ALL E_STRICT Chris@210: ] Chris@210: Chris@210: CONSTANTS = %w[ Chris@210: null true false self parent Chris@210: __LINE__ __DIR__ __FILE__ __LINE__ Chris@210: __CLASS__ __NAMESPACE__ __METHOD__ __FUNCTION__ Chris@210: PHP_VERSION PHP_MAJOR_VERSION PHP_MINOR_VERSION PHP_RELEASE_VERSION PHP_VERSION_ID PHP_EXTRA_VERSION PHP_ZTS Chris@210: PHP_DEBUG PHP_MAXPATHLEN PHP_OS PHP_SAPI PHP_EOL PHP_INT_MAX PHP_INT_SIZE DEFAULT_INCLUDE_PATH Chris@210: PEAR_INSTALL_DIR PEAR_EXTENSION_DIR PHP_EXTENSION_DIR PHP_PREFIX PHP_BINDIR PHP_LIBDIR PHP_DATADIR Chris@210: PHP_SYSCONFDIR PHP_LOCALSTATEDIR PHP_CONFIG_FILE_PATH PHP_CONFIG_FILE_SCAN_DIR PHP_SHLIB_SUFFIX Chris@210: PHP_OUTPUT_HANDLER_START PHP_OUTPUT_HANDLER_CONT PHP_OUTPUT_HANDLER_END Chris@210: __COMPILER_HALT_OFFSET__ Chris@210: EXTR_OVERWRITE EXTR_SKIP EXTR_PREFIX_SAME EXTR_PREFIX_ALL EXTR_PREFIX_INVALID EXTR_PREFIX_IF_EXISTS Chris@210: EXTR_IF_EXISTS SORT_ASC SORT_DESC SORT_REGULAR SORT_NUMERIC SORT_STRING CASE_LOWER CASE_UPPER COUNT_NORMAL Chris@210: COUNT_RECURSIVE ASSERT_ACTIVE ASSERT_CALLBACK ASSERT_BAIL ASSERT_WARNING ASSERT_QUIET_EVAL CONNECTION_ABORTED Chris@210: CONNECTION_NORMAL CONNECTION_TIMEOUT INI_USER INI_PERDIR INI_SYSTEM INI_ALL M_E M_LOG2E M_LOG10E M_LN2 M_LN10 Chris@210: M_PI M_PI_2 M_PI_4 M_1_PI M_2_PI M_2_SQRTPI M_SQRT2 M_SQRT1_2 CRYPT_SALT_LENGTH CRYPT_STD_DES CRYPT_EXT_DES Chris@210: CRYPT_MD5 CRYPT_BLOWFISH DIRECTORY_SEPARATOR SEEK_SET SEEK_CUR SEEK_END LOCK_SH LOCK_EX LOCK_UN LOCK_NB Chris@210: HTML_SPECIALCHARS HTML_ENTITIES ENT_COMPAT ENT_QUOTES ENT_NOQUOTES INFO_GENERAL INFO_CREDITS Chris@210: INFO_CONFIGURATION INFO_MODULES INFO_ENVIRONMENT INFO_VARIABLES INFO_LICENSE INFO_ALL CREDITS_GROUP Chris@210: CREDITS_GENERAL CREDITS_SAPI CREDITS_MODULES CREDITS_DOCS CREDITS_FULLPAGE CREDITS_QA CREDITS_ALL STR_PAD_LEFT Chris@210: STR_PAD_RIGHT STR_PAD_BOTH PATHINFO_DIRNAME PATHINFO_BASENAME PATHINFO_EXTENSION PATH_SEPARATOR CHAR_MAX Chris@210: LC_CTYPE LC_NUMERIC LC_TIME LC_COLLATE LC_MONETARY LC_ALL LC_MESSAGES ABDAY_1 ABDAY_2 ABDAY_3 ABDAY_4 ABDAY_5 Chris@210: ABDAY_6 ABDAY_7 DAY_1 DAY_2 DAY_3 DAY_4 DAY_5 DAY_6 DAY_7 ABMON_1 ABMON_2 ABMON_3 ABMON_4 ABMON_5 ABMON_6 Chris@210: ABMON_7 ABMON_8 ABMON_9 ABMON_10 ABMON_11 ABMON_12 MON_1 MON_2 MON_3 MON_4 MON_5 MON_6 MON_7 MON_8 MON_9 Chris@210: MON_10 MON_11 MON_12 AM_STR PM_STR D_T_FMT D_FMT T_FMT T_FMT_AMPM ERA ERA_YEAR ERA_D_T_FMT ERA_D_FMT ERA_T_FMT Chris@210: ALT_DIGITS INT_CURR_SYMBOL CURRENCY_SYMBOL CRNCYSTR MON_DECIMAL_POINT MON_THOUSANDS_SEP MON_GROUPING Chris@210: POSITIVE_SIGN NEGATIVE_SIGN INT_FRAC_DIGITS FRAC_DIGITS P_CS_PRECEDES P_SEP_BY_SPACE N_CS_PRECEDES Chris@210: N_SEP_BY_SPACE P_SIGN_POSN N_SIGN_POSN DECIMAL_POINT RADIXCHAR THOUSANDS_SEP THOUSEP GROUPING YESEXPR NOEXPR Chris@210: YESSTR NOSTR CODESET LOG_EMERG LOG_ALERT LOG_CRIT LOG_ERR LOG_WARNING LOG_NOTICE LOG_INFO LOG_DEBUG LOG_KERN Chris@210: LOG_USER LOG_MAIL LOG_DAEMON LOG_AUTH LOG_SYSLOG LOG_LPR LOG_NEWS LOG_UUCP LOG_CRON LOG_AUTHPRIV LOG_LOCAL0 Chris@210: LOG_LOCAL1 LOG_LOCAL2 LOG_LOCAL3 LOG_LOCAL4 LOG_LOCAL5 LOG_LOCAL6 LOG_LOCAL7 LOG_PID LOG_CONS LOG_ODELAY Chris@210: LOG_NDELAY LOG_NOWAIT LOG_PERROR Chris@210: ] Chris@210: Chris@210: PREDEFINED = %w[ Chris@210: $GLOBALS $_SERVER $_GET $_POST $_FILES $_REQUEST $_SESSION $_ENV Chris@210: $_COOKIE $php_errormsg $HTTP_RAW_POST_DATA $http_response_header Chris@210: $argc $argv Chris@210: ] Chris@210: Chris@210: IDENT_KIND = CaseIgnoringWordList.new(:ident). Chris@210: add(KEYWORDS, :reserved). Chris@210: add(TYPES, :pre_type). Chris@210: add(LANGUAGE_CONSTRUCTS, :reserved). Chris@210: add(BUILTIN_FUNCTIONS, :predefined). Chris@210: add(CLASSES, :pre_constant). Chris@210: add(EXCEPTIONS, :exception). Chris@210: add(CONSTANTS, :pre_constant) Chris@210: Chris@210: VARIABLE_KIND = WordList.new(:local_variable). Chris@210: add(PREDEFINED, :predefined) Chris@210: end Chris@210: Chris@210: module RE Chris@210: Chris@210: PHP_START = / Chris@210: ]*?language\s*=\s*"php"[^>]*?> | Chris@210: ]*?language\s*=\s*'php'[^>]*?> | Chris@210: <\?php\d? | Chris@210: <\?(?!xml) Chris@210: /xi Chris@210: Chris@210: PHP_END = %r! Chris@210: | Chris@210: \?> Chris@210: !xi Chris@210: Chris@210: HTML_INDICATOR = / ]/i Chris@210: Chris@210: IDENTIFIER = /[a-z_\x7f-\xFF][a-z0-9_\x7f-\xFF]*/i Chris@210: VARIABLE = /\$#{IDENTIFIER}/ Chris@210: Chris@210: OPERATOR = / Chris@210: \.(?!\d)=? | # dot that is not decimal point, string concatenation Chris@210: && | \|\| | # logic Chris@210: :: | -> | => | # scope, member, dictionary Chris@210: \\(?!\n) | # namespace Chris@210: \+\+ | -- | # increment, decrement Chris@210: [,;?:()\[\]{}] | # simple delimiters Chris@210: [-+*\/%&|^]=? | # ordinary math, binary logic, assignment shortcuts Chris@210: [~$] | # whatever Chris@210: =& | # reference assignment Chris@210: [=!]=?=? | <> | # comparison and assignment Chris@210: <<=? | >>=? | [<>]=? # comparison and shift Chris@210: /x Chris@210: Chris@210: end Chris@210: Chris@210: def scan_tokens tokens, options Chris@210: if string.respond_to?(:encoding) Chris@210: unless string.encoding == Encoding::ASCII_8BIT Chris@210: self.string = string.encode Encoding::ASCII_8BIT, Chris@210: :invalid => :replace, :undef => :replace, :replace => '?' Chris@210: end Chris@210: end Chris@210: Chris@210: if check(RE::PHP_START) || # starts with #{RE::IDENTIFIER}/o) Chris@210: tokens << [:open, :inline] Chris@210: tokens << [match, :local_variable] Chris@210: tokens << [scan(/->/), :operator] Chris@210: tokens << [scan(/#{RE::IDENTIFIER}/o), :ident] Chris@210: tokens << [:close, :inline] Chris@210: next Chris@210: elsif check(/->/) Chris@210: match << scan(/->/) Chris@210: kind = :error Chris@210: end Chris@210: elsif match = scan(/\{/) Chris@210: if check(/\$/) Chris@210: kind = :delimiter Chris@210: states[-1] = [states.last, delimiter] Chris@210: delimiter = nil Chris@210: states.push :php Chris@210: tokens << [:open, :inline] Chris@210: else Chris@210: kind = :string Chris@210: end Chris@210: elsif scan(/\$\{#{RE::IDENTIFIER}\}/o) Chris@210: kind = :local_variable Chris@210: elsif scan(/\$/) Chris@210: kind = :content Chris@210: end Chris@210: Chris@210: when :class_expected Chris@210: if scan(/\s+/) Chris@210: kind = :space Chris@210: elsif match = scan(/#{RE::IDENTIFIER}/o) Chris@210: kind = :class Chris@210: states.pop Chris@210: else Chris@210: states.pop Chris@210: next Chris@210: end Chris@210: Chris@210: when :function_expected Chris@210: if scan(/\s+/) Chris@210: kind = :space Chris@210: elsif scan(/&/) Chris@210: kind = :operator Chris@210: elsif match = scan(/#{RE::IDENTIFIER}/o) Chris@210: kind = :function Chris@210: states.pop Chris@210: else Chris@210: states.pop Chris@210: next Chris@210: end Chris@210: Chris@210: else Chris@210: raise_inspect 'Unknown state!', tokens, states Chris@210: end Chris@210: Chris@210: match ||= matched Chris@210: if $CODERAY_DEBUG and not kind Chris@210: raise_inspect 'Error token %p in line %d' % Chris@210: [[match, kind], line], tokens, states Chris@210: end Chris@210: raise_inspect 'Empty token', tokens, states unless match Chris@210: Chris@210: tokens << [match, kind] Chris@210: Chris@210: end Chris@210: Chris@210: tokens Chris@210: end Chris@210: Chris@210: end Chris@210: Chris@210: end Chris@210: end