To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / vendor / gems / coderay-0.9.7 / lib / coderay / scanners / php.rb @ 442:753f1380d6bc

History | View | Annotate | Download (23.2 KB)

1
module CodeRay
2
module Scanners
3
  
4
  load :html
5
  
6
  # Original by Stefan Walk.
7
  class PHP < Scanner
8
    
9
    register_for :php
10
    file_extension 'php'
11
    
12
    KINDS_NOT_LOC = HTML::KINDS_NOT_LOC
13
    
14
    def setup
15
      @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
16
    end
17
    
18
    def reset_instance
19
      super
20
      @html_scanner.reset
21
    end
22
    
23
    module Words
24
      
25
      # according to http://www.php.net/manual/en/reserved.keywords.php
26
      KEYWORDS = %w[
27
        abstract and array as break case catch class clone const continue declare default do else elseif
28
        enddeclare endfor endforeach endif endswitch endwhile extends final for foreach function global
29
        goto if implements interface instanceof namespace new or private protected public static switch
30
        throw try use var while xor
31
        cfunction old_function
32
      ]
33
      
34
      TYPES = %w[ int integer float double bool boolean string array object resource ]
35
      
36
      LANGUAGE_CONSTRUCTS = %w[
37
        die echo empty exit eval include include_once isset list
38
        require require_once return print unset
39
      ]
40
      
41
      CLASSES = %w[ Directory stdClass __PHP_Incomplete_Class exception php_user_filter Closure ]
42
      
43
      # according to http://php.net/quickref.php on 2009-04-21;
44
      # all functions with _ excluded (module functions) and selected additional functions
45
      BUILTIN_FUNCTIONS = %w[
46
        abs acos acosh addcslashes addslashes aggregate array arsort ascii2ebcdic asin asinh asort assert atan atan2
47
        atanh basename bcadd bccomp bcdiv bcmod bcmul bcpow bcpowmod bcscale bcsqrt bcsub bin2hex bindec
48
        bindtextdomain bzclose bzcompress bzdecompress bzerrno bzerror bzerrstr bzflush bzopen bzread bzwrite
49
        calculhmac ceil chdir checkdate checkdnsrr chgrp chmod chop chown chr chroot clearstatcache closedir closelog
50
        compact constant copy cos cosh count crc32 crypt current date dcgettext dcngettext deaggregate decbin dechex
51
        decoct define defined deg2rad delete dgettext die dirname diskfreespace dl dngettext doubleval each
52
        ebcdic2ascii echo empty end ereg eregi escapeshellarg escapeshellcmd eval exec exit exp explode expm1 extract
53
        fclose feof fflush fgetc fgetcsv fgets fgetss file fileatime filectime filegroup fileinode filemtime fileowner
54
        fileperms filepro filesize filetype floatval flock floor flush fmod fnmatch fopen fpassthru fprintf fputcsv
55
        fputs fread frenchtojd fscanf fseek fsockopen fstat ftell ftok ftruncate fwrite getallheaders getcwd getdate
56
        getenv gethostbyaddr gethostbyname gethostbynamel getimagesize getlastmod getmxrr getmygid getmyinode getmypid
57
        getmyuid getopt getprotobyname getprotobynumber getrandmax getrusage getservbyname getservbyport gettext
58
        gettimeofday gettype glob gmdate gmmktime gmstrftime gregoriantojd gzclose gzcompress gzdecode gzdeflate
59
        gzencode gzeof gzfile gzgetc gzgets gzgetss gzinflate gzopen gzpassthru gzputs gzread gzrewind gzseek gztell
60
        gzuncompress gzwrite hash header hebrev hebrevc hexdec htmlentities htmlspecialchars hypot iconv idate
61
        implode include intval ip2long iptcembed iptcparse isset
62
        jddayofweek jdmonthname jdtofrench jdtogregorian jdtojewish jdtojulian jdtounix jewishtojd join jpeg2wbmp
63
        juliantojd key krsort ksort lcfirst lchgrp lchown levenshtein link linkinfo list localeconv localtime log
64
        log10 log1p long2ip lstat ltrim mail main max md5 metaphone mhash microtime min mkdir mktime msql natcasesort
65
        natsort next ngettext nl2br nthmac octdec opendir openlog
66
        ord overload pack passthru pathinfo pclose pfsockopen phpcredits phpinfo phpversion pi png2wbmp popen pos pow
67
        prev print printf putenv quotemeta rad2deg rand range rawurldecode rawurlencode readdir readfile readgzfile
68
        readline readlink realpath recode rename require reset rewind rewinddir rmdir round rsort rtrim scandir
69
        serialize setcookie setlocale setrawcookie settype sha1 shuffle signeurlpaiement sin sinh sizeof sleep snmpget
70
        snmpgetnext snmprealwalk snmpset snmpwalk snmpwalkoid sort soundex split spliti sprintf sqrt srand sscanf stat
71
        strcasecmp strchr strcmp strcoll strcspn strftime stripcslashes stripos stripslashes stristr strlen
72
        strnatcasecmp strnatcmp strncasecmp strncmp strpbrk strpos strptime strrchr strrev strripos strrpos strspn
73
        strstr strtok strtolower strtotime strtoupper strtr strval substr symlink syslog system tan tanh tempnam
74
        textdomain time tmpfile touch trim uasort ucfirst ucwords uksort umask uniqid unixtojd unlink unpack
75
        unserialize unset urldecode urlencode usleep usort vfprintf virtual vprintf vsprintf wordwrap
76
        array_change_key_case array_chunk array_combine array_count_values array_diff array_diff_assoc
77
        array_diff_key array_diff_uassoc array_diff_ukey array_fill array_fill_keys array_filter array_flip
78
        array_intersect array_intersect_assoc array_intersect_key array_intersect_uassoc array_intersect_ukey
79
        array_key_exists array_keys array_map array_merge array_merge_recursive array_multisort array_pad
80
        array_pop array_product array_push array_rand array_reduce array_reverse array_search array_shift
81
        array_slice array_splice array_sum array_udiff array_udiff_assoc array_udiff_uassoc array_uintersect
82
        array_uintersect_assoc array_uintersect_uassoc array_unique array_unshift array_values array_walk
83
        array_walk_recursive
84
        assert_options base_convert base64_decode base64_encode
85
        chunk_split class_exists class_implements class_parents
86
        count_chars debug_backtrace debug_print_backtrace debug_zval_dump
87
        error_get_last error_log error_reporting extension_loaded
88
        file_exists file_get_contents file_put_contents load_file
89
        func_get_arg func_get_args func_num_args function_exists
90
        get_browser get_called_class get_cfg_var get_class get_class_methods get_class_vars
91
        get_current_user get_declared_classes get_declared_interfaces get_defined_constants
92
        get_defined_functions get_defined_vars get_extension_funcs get_headers get_html_translation_table
93
        get_include_path get_included_files get_loaded_extensions get_magic_quotes_gpc get_magic_quotes_runtime
94
        get_meta_tags get_object_vars get_parent_class get_required_filesget_resource_type
95
        gc_collect_cycles gc_disable gc_enable gc_enabled
96
        halt_compiler headers_list headers_sent highlight_file highlight_string
97
        html_entity_decode htmlspecialchars_decode
98
        in_array include_once inclued_get_data
99
        is_a is_array is_binary is_bool is_buffer is_callable is_dir is_double is_executable is_file is_finite
100
        is_float is_infinite is_int is_integer is_link is_long is_nan is_null is_numeric is_object is_readable
101
        is_real is_resource is_scalar is_soap_fault is_string is_subclass_of is_unicode is_uploaded_file
102
        is_writable is_writeable
103
        locale_get_default locale_set_default
104
        number_format override_function parse_str parse_url
105
        php_check_syntax php_ini_loaded_file php_ini_scanned_files php_logo_guid php_sapi_name
106
        php_strip_whitespace php_uname
107
        preg_filter preg_grep preg_last_error preg_match preg_match_all preg_quote preg_replace
108
        preg_replace_callback preg_split print_r
109
        require_once register_shutdown_function register_tick_function
110
        set_error_handler set_exception_handler set_file_buffer set_include_path
111
        set_magic_quotes_runtime set_time_limit shell_exec
112
        str_getcsv str_ireplace str_pad str_repeat str_replace str_rot13 str_shuffle str_split str_word_count
113
        strip_tags substr_compare substr_count substr_replace
114
        time_nanosleep time_sleep_until
115
        token_get_all token_name trigger_error
116
        unregister_tick_function use_soap_error_handler user_error
117
        utf8_decode utf8_encode var_dump var_export
118
        version_compare
119
        zend_logo_guid zend_thread_id zend_version
120
        create_function call_user_func_array
121
        posix_access posix_ctermid posix_get_last_error posix_getcwd posix_getegid
122
        posix_geteuid posix_getgid posix_getgrgid posix_getgrnam posix_getgroups
123
        posix_getlogin posix_getpgid posix_getpgrp posix_getpid posix_getppid
124
        posix_getpwnam posix_getpwuid posix_getrlimit posix_getsid posix_getuid
125
        posix_initgroups posix_isatty posix_kill posix_mkfifo posix_mknod
126
        posix_setegid posix_seteuid posix_setgid posix_setpgid posix_setsid
127
        posix_setuid posix_strerror posix_times posix_ttyname posix_uname
128
        pcntl_alarm pcntl_exec pcntl_fork pcntl_getpriority pcntl_setpriority
129
        pcntl_signal pcntl_signal_dispatch pcntl_sigprocmask pcntl_sigtimedwait
130
        pcntl_sigwaitinfo pcntl_wait pcntl_waitpid pcntl_wexitstatus pcntl_wifexited
131
        pcntl_wifsignaled pcntl_wifstopped pcntl_wstopsig pcntl_wtermsig
132
      ]
133
      # TODO: more built-in PHP functions?
134
      
135
      EXCEPTIONS = %w[
136
        E_ERROR E_WARNING E_PARSE E_NOTICE E_CORE_ERROR E_CORE_WARNING E_COMPILE_ERROR E_COMPILE_WARNING
137
        E_USER_ERROR E_USER_WARNING E_USER_NOTICE E_DEPRECATED E_USER_DEPRECATED E_ALL E_STRICT
138
      ]
139
      
140
      CONSTANTS = %w[
141
        null true false self parent
142
        __LINE__ __DIR__ __FILE__ __LINE__
143
        __CLASS__ __NAMESPACE__ __METHOD__ __FUNCTION__
144
        PHP_VERSION PHP_MAJOR_VERSION PHP_MINOR_VERSION PHP_RELEASE_VERSION PHP_VERSION_ID PHP_EXTRA_VERSION PHP_ZTS
145
        PHP_DEBUG PHP_MAXPATHLEN PHP_OS PHP_SAPI PHP_EOL PHP_INT_MAX PHP_INT_SIZE DEFAULT_INCLUDE_PATH
146
        PEAR_INSTALL_DIR PEAR_EXTENSION_DIR PHP_EXTENSION_DIR PHP_PREFIX PHP_BINDIR PHP_LIBDIR PHP_DATADIR
147
        PHP_SYSCONFDIR PHP_LOCALSTATEDIR PHP_CONFIG_FILE_PATH PHP_CONFIG_FILE_SCAN_DIR PHP_SHLIB_SUFFIX
148
        PHP_OUTPUT_HANDLER_START PHP_OUTPUT_HANDLER_CONT PHP_OUTPUT_HANDLER_END
149
        __COMPILER_HALT_OFFSET__
150
        EXTR_OVERWRITE EXTR_SKIP EXTR_PREFIX_SAME EXTR_PREFIX_ALL EXTR_PREFIX_INVALID EXTR_PREFIX_IF_EXISTS
151
        EXTR_IF_EXISTS SORT_ASC SORT_DESC SORT_REGULAR SORT_NUMERIC SORT_STRING CASE_LOWER CASE_UPPER COUNT_NORMAL
152
        COUNT_RECURSIVE ASSERT_ACTIVE ASSERT_CALLBACK ASSERT_BAIL ASSERT_WARNING ASSERT_QUIET_EVAL CONNECTION_ABORTED
153
        CONNECTION_NORMAL CONNECTION_TIMEOUT INI_USER INI_PERDIR INI_SYSTEM INI_ALL M_E M_LOG2E M_LOG10E M_LN2 M_LN10
154
        M_PI M_PI_2 M_PI_4 M_1_PI M_2_PI M_2_SQRTPI M_SQRT2 M_SQRT1_2 CRYPT_SALT_LENGTH CRYPT_STD_DES CRYPT_EXT_DES
155
        CRYPT_MD5 CRYPT_BLOWFISH DIRECTORY_SEPARATOR SEEK_SET SEEK_CUR SEEK_END LOCK_SH LOCK_EX LOCK_UN LOCK_NB
156
        HTML_SPECIALCHARS HTML_ENTITIES ENT_COMPAT ENT_QUOTES ENT_NOQUOTES INFO_GENERAL INFO_CREDITS
157
        INFO_CONFIGURATION INFO_MODULES INFO_ENVIRONMENT INFO_VARIABLES INFO_LICENSE INFO_ALL CREDITS_GROUP
158
        CREDITS_GENERAL CREDITS_SAPI CREDITS_MODULES CREDITS_DOCS CREDITS_FULLPAGE CREDITS_QA CREDITS_ALL STR_PAD_LEFT
159
        STR_PAD_RIGHT STR_PAD_BOTH PATHINFO_DIRNAME PATHINFO_BASENAME PATHINFO_EXTENSION PATH_SEPARATOR CHAR_MAX
160
        LC_CTYPE LC_NUMERIC LC_TIME LC_COLLATE LC_MONETARY LC_ALL LC_MESSAGES ABDAY_1 ABDAY_2 ABDAY_3 ABDAY_4 ABDAY_5
161
        ABDAY_6 ABDAY_7 DAY_1 DAY_2 DAY_3 DAY_4 DAY_5 DAY_6 DAY_7 ABMON_1 ABMON_2 ABMON_3 ABMON_4 ABMON_5 ABMON_6
162
        ABMON_7 ABMON_8 ABMON_9 ABMON_10 ABMON_11 ABMON_12 MON_1 MON_2 MON_3 MON_4 MON_5 MON_6 MON_7 MON_8 MON_9
163
        MON_10 MON_11 MON_12 AM_STR PM_STR D_T_FMT D_FMT T_FMT T_FMT_AMPM ERA ERA_YEAR ERA_D_T_FMT ERA_D_FMT ERA_T_FMT
164
        ALT_DIGITS INT_CURR_SYMBOL CURRENCY_SYMBOL CRNCYSTR MON_DECIMAL_POINT MON_THOUSANDS_SEP MON_GROUPING
165
        POSITIVE_SIGN NEGATIVE_SIGN INT_FRAC_DIGITS FRAC_DIGITS P_CS_PRECEDES P_SEP_BY_SPACE N_CS_PRECEDES
166
        N_SEP_BY_SPACE P_SIGN_POSN N_SIGN_POSN DECIMAL_POINT RADIXCHAR THOUSANDS_SEP THOUSEP GROUPING YESEXPR NOEXPR
167
        YESSTR NOSTR CODESET LOG_EMERG LOG_ALERT LOG_CRIT LOG_ERR LOG_WARNING LOG_NOTICE LOG_INFO LOG_DEBUG LOG_KERN
168
        LOG_USER LOG_MAIL LOG_DAEMON LOG_AUTH LOG_SYSLOG LOG_LPR LOG_NEWS LOG_UUCP LOG_CRON LOG_AUTHPRIV LOG_LOCAL0
169
        LOG_LOCAL1 LOG_LOCAL2 LOG_LOCAL3 LOG_LOCAL4 LOG_LOCAL5 LOG_LOCAL6 LOG_LOCAL7 LOG_PID LOG_CONS LOG_ODELAY
170
        LOG_NDELAY LOG_NOWAIT LOG_PERROR
171
      ]
172
      
173
      PREDEFINED = %w[
174
        $GLOBALS $_SERVER $_GET $_POST $_FILES $_REQUEST $_SESSION $_ENV
175
        $_COOKIE $php_errormsg $HTTP_RAW_POST_DATA $http_response_header
176
        $argc $argv
177
      ]
178
      
179
      IDENT_KIND = CaseIgnoringWordList.new(:ident).
180
        add(KEYWORDS, :reserved).
181
        add(TYPES, :pre_type).
182
        add(LANGUAGE_CONSTRUCTS, :reserved).
183
        add(BUILTIN_FUNCTIONS, :predefined).
184
        add(CLASSES, :pre_constant).
185
        add(EXCEPTIONS, :exception).
186
        add(CONSTANTS, :pre_constant)
187
      
188
      VARIABLE_KIND = WordList.new(:local_variable).
189
        add(PREDEFINED, :predefined)
190
    end
191
    
192
    module RE
193
      
194
      PHP_START = /
195
        <script\s+[^>]*?language\s*=\s*"php"[^>]*?> |
196
        <script\s+[^>]*?language\s*=\s*'php'[^>]*?> |
197
        <\?php\d? |
198
        <\?(?!xml)
199
      /xi
200
      
201
      PHP_END = %r!
202
        </script> |
203
        \?>
204
      !xi
205
      
206
      HTML_INDICATOR = /<!DOCTYPE html|<(?:html|body|div|p)[> ]/i
207
      
208
      IDENTIFIER = /[a-z_\x7f-\xFF][a-z0-9_\x7f-\xFF]*/i
209
      VARIABLE = /\$#{IDENTIFIER}/
210
      
211
      OPERATOR = /
212
        \.(?!\d)=? |      # dot that is not decimal point, string concatenation
213
        && | \|\| |       # logic
214
        :: | -> | => |    # scope, member, dictionary
215
        \\(?!\n) |        # namespace
216
        \+\+ | -- |       # increment, decrement
217
        [,;?:()\[\]{}] |  # simple delimiters
218
        [-+*\/%&|^]=? |   # ordinary math, binary logic, assignment shortcuts
219
        [~$] |            # whatever
220
        =& |              # reference assignment
221
        [=!]=?=? | <> |   # comparison and assignment
222
        <<=? | >>=? | [<>]=?  # comparison and shift
223
      /x
224
      
225
    end
226
    
227
    def scan_tokens tokens, options
228
      if string.respond_to?(:encoding)
229
        unless string.encoding == Encoding::ASCII_8BIT
230
          self.string = string.encode Encoding::ASCII_8BIT,
231
            :invalid => :replace, :undef => :replace, :replace => '?'
232
        end
233
      end
234
      
235
      if check(RE::PHP_START) ||  # starts with <?
236
       (match?(/\s*<\S/) && exist?(RE::PHP_START)) || # starts with tag and contains <?
237
       exist?(RE::HTML_INDICATOR) ||
238
       check(/.{1,100}#{RE::PHP_START}/om)  # PHP start after max 100 chars
239
        # is HTML with embedded PHP, so start with HTML
240
        states = [:initial]
241
      else
242
        # is just PHP, so start with PHP surrounded by HTML
243
        states = [:initial, :php]
244
      end
245
      
246
      label_expected = true
247
      case_expected = false
248
      
249
      heredoc_delimiter = nil
250
      delimiter = nil
251
      modifier = nil
252
      
253
      until eos?
254
        
255
        match = nil
256
        kind = nil
257
        
258
        case states.last
259
        
260
        when :initial  # HTML
261
          if scan RE::PHP_START
262
            kind = :inline_delimiter
263
            label_expected = true
264
            states << :php
265
          else
266
            match = scan_until(/(?=#{RE::PHP_START})/o) || scan_until(/\z/)
267
            @html_scanner.tokenize match unless match.empty?
268
            next
269
          end
270
        
271
        when :php
272
          if match = scan(/\s+/)
273
            tokens << [match, :space]
274
            next
275
          
276
          elsif scan(%r! (?m: \/\* (?: .*? \*\/ | .* ) ) | (?://|\#) .*? (?=#{RE::PHP_END}|$) !xo)
277
            kind = :comment
278
          
279
          elsif match = scan(RE::IDENTIFIER)
280
            kind = Words::IDENT_KIND[match]
281
            if kind == :ident && label_expected && check(/:(?!:)/)
282
              kind = :label
283
              label_expected = true
284
            else
285
              label_expected = false
286
              if kind == :ident && match =~ /^[A-Z]/
287
                kind = :constant
288
              elsif kind == :reserved
289
                case match
290
                when 'class'
291
                  states << :class_expected
292
                when 'function'
293
                  states << :function_expected
294
                when 'case', 'default'
295
                  case_expected = true
296
                end
297
              elsif match == 'b' && check(/['"]/)  # binary string literal
298
                modifier = match
299
                next
300
              end
301
            end
302
          
303
          elsif scan(/(?:\d+\.\d*|\d*\.\d+)(?:e[-+]?\d+)?|\d+e[-+]?\d+/i)
304
            label_expected = false
305
            kind = :float
306
          
307
          elsif scan(/0x[0-9a-fA-F]+/)
308
            label_expected = false
309
            kind = :hex
310
          
311
          elsif scan(/\d+/)
312
            label_expected = false
313
            kind = :integer
314
          
315
          elsif scan(/'/)
316
            tokens << [:open, :string]
317
            if modifier
318
              tokens << [modifier, :modifier]
319
              modifier = nil
320
            end
321
            kind = :delimiter
322
            states.push :sqstring
323
          
324
          elsif match = scan(/["`]/)
325
            tokens << [:open, :string]
326
            if modifier
327
              tokens << [modifier, :modifier]
328
              modifier = nil
329
            end
330
            delimiter = match
331
            kind = :delimiter
332
            states.push :dqstring
333
          
334
          elsif match = scan(RE::VARIABLE)
335
            label_expected = false
336
            kind = Words::VARIABLE_KIND[match]
337
          
338
          elsif scan(/\{/)
339
            kind = :operator
340
            label_expected = true
341
            states.push :php
342
          
343
          elsif scan(/\}/)
344
            if states.size == 1
345
              kind = :error
346
            else
347
              states.pop
348
              if states.last.is_a?(::Array)
349
                delimiter = states.last[1]
350
                states[-1] = states.last[0]
351
                tokens << [matched, :delimiter]
352
                tokens << [:close, :inline]
353
                next
354
              else
355
                kind = :operator
356
                label_expected = true
357
              end
358
            end
359
          
360
          elsif scan(/@/)
361
            label_expected = false
362
            kind = :exception
363
          
364
          elsif scan RE::PHP_END
365
            kind = :inline_delimiter
366
            states = [:initial]
367
          
368
          elsif match = scan(/<<<(?:(#{RE::IDENTIFIER})|"(#{RE::IDENTIFIER})"|'(#{RE::IDENTIFIER})')/o)
369
            tokens << [:open, :string]
370
            warn 'heredoc in heredoc?' if heredoc_delimiter
371
            heredoc_delimiter = Regexp.escape(self[1] || self[2] || self[3])
372
            kind = :delimiter
373
            states.push self[3] ? :sqstring : :dqstring
374
            heredoc_delimiter = /#{heredoc_delimiter}(?=;?$)/
375
          
376
          elsif match = scan(/#{RE::OPERATOR}/o)
377
            label_expected = match == ';'
378
            if case_expected
379
              label_expected = true if match == ':'
380
              case_expected = false
381
            end
382
            kind = :operator
383
          
384
          else
385
            getch
386
            kind = :error
387
          
388
          end
389
        
390
        when :sqstring
391
          if scan(heredoc_delimiter ? /[^\\\n]+/ : /[^'\\]+/)
392
            kind = :content
393
          elsif !heredoc_delimiter && scan(/'/)
394
            tokens << [matched, :delimiter]
395
            tokens << [:close, :string]
396
            delimiter = nil
397
            label_expected = false
398
            states.pop
399
            next
400
          elsif heredoc_delimiter && match = scan(/\n/)
401
            kind = :content
402
            if scan heredoc_delimiter
403
              tokens << ["\n", :content]
404
              tokens << [matched, :delimiter]
405
              tokens << [:close, :string]
406
              heredoc_delimiter = nil
407
              label_expected = false
408
              states.pop
409
              next
410
            end
411
          elsif scan(heredoc_delimiter ? /\\\\/ : /\\[\\'\n]/)
412
            kind = :char
413
          elsif scan(/\\./m)
414
            kind = :content
415
          elsif scan(/\\/)
416
            kind = :error
417
          end
418
        
419
        when :dqstring
420
          if scan(heredoc_delimiter ? /[^${\\\n]+/ : (delimiter == '"' ? /[^"${\\]+/ : /[^`${\\]+/))
421
            kind = :content
422
          elsif !heredoc_delimiter && scan(delimiter == '"' ? /"/ : /`/)
423
            tokens << [matched, :delimiter]
424
            tokens << [:close, :string]
425
            delimiter = nil
426
            label_expected = false
427
            states.pop
428
            next
429
          elsif heredoc_delimiter && match = scan(/\n/)
430
            kind = :content
431
            if scan heredoc_delimiter
432
              tokens << ["\n", :content]
433
              tokens << [matched, :delimiter]
434
              tokens << [:close, :string]
435
              heredoc_delimiter = nil
436
              label_expected = false
437
              states.pop
438
              next
439
            end
440
          elsif scan(/\\(?:x[0-9A-Fa-f]{1,2}|[0-7]{1,3})/)
441
            kind = :char
442
          elsif scan(heredoc_delimiter ? /\\[nrtvf\\$]/ : (delimiter == '"' ? /\\[nrtvf\\$"]/ : /\\[nrtvf\\$`]/))
443
            kind = :char
444
          elsif scan(/\\./m)
445
            kind = :content
446
          elsif scan(/\\/)
447
            kind = :error
448
          elsif match = scan(/#{RE::VARIABLE}/o)
449
            kind = :local_variable
450
            if check(/\[#{RE::IDENTIFIER}\]/o)
451
              tokens << [:open, :inline]
452
              tokens << [match, :local_variable]
453
              tokens << [scan(/\[/), :operator]
454
              tokens << [scan(/#{RE::IDENTIFIER}/o), :ident]
455
              tokens << [scan(/\]/), :operator]
456
              tokens << [:close, :inline]
457
              next
458
            elsif check(/\[/)
459
              match << scan(/\[['"]?#{RE::IDENTIFIER}?['"]?\]?/o)
460
              kind = :error
461
            elsif check(/->#{RE::IDENTIFIER}/o)
462
              tokens << [:open, :inline]
463
              tokens << [match, :local_variable]
464
              tokens << [scan(/->/), :operator]
465
              tokens << [scan(/#{RE::IDENTIFIER}/o), :ident]
466
              tokens << [:close, :inline]
467
              next
468
            elsif check(/->/)
469
              match << scan(/->/)
470
              kind = :error
471
            end
472
          elsif match = scan(/\{/)
473
            if check(/\$/)
474
              kind = :delimiter
475
              states[-1] = [states.last, delimiter]
476
              delimiter = nil
477
              states.push :php
478
              tokens << [:open, :inline]
479
            else
480
              kind = :string
481
            end
482
          elsif scan(/\$\{#{RE::IDENTIFIER}\}/o)
483
            kind = :local_variable
484
          elsif scan(/\$/)
485
            kind = :content
486
          end
487
        
488
        when :class_expected
489
          if scan(/\s+/)
490
            kind = :space
491
          elsif match = scan(/#{RE::IDENTIFIER}/o)
492
            kind = :class
493
            states.pop
494
          else
495
            states.pop
496
            next
497
          end
498
        
499
        when :function_expected
500
          if scan(/\s+/)
501
            kind = :space
502
          elsif scan(/&/)
503
            kind = :operator
504
          elsif match = scan(/#{RE::IDENTIFIER}/o)
505
            kind = :function
506
            states.pop
507
          else
508
            states.pop
509
            next
510
          end
511
        
512
        else
513
          raise_inspect 'Unknown state!', tokens, states
514
        end
515
        
516
        match ||= matched
517
        if $CODERAY_DEBUG and not kind
518
          raise_inspect 'Error token %p in line %d' %
519
            [[match, kind], line], tokens, states
520
        end
521
        raise_inspect 'Empty token', tokens, states unless match
522
        
523
        tokens << [match, kind]
524
        
525
      end
526
      
527
      tokens
528
    end
529
    
530
  end
531
  
532
end
533
end