annotate vendor/gems/coderay-0.9.7/lib/coderay/scanners/java.rb @ 855:7294e8db2515 bug_162

Close obsolete branch bug_162
author Chris Cannam
date Thu, 14 Jul 2011 11:59:19 +0100
parents 0579821a129a
children
rev   line source
Chris@210 1 module CodeRay
Chris@210 2 module Scanners
Chris@210 3
Chris@210 4 class Java < Scanner
Chris@210 5
Chris@210 6 include Streamable
Chris@210 7 register_for :java
Chris@210 8 helper :builtin_types
Chris@210 9
Chris@210 10 # http://java.sun.com/docs/books/tutorial/java/nutsandbolts/_keywords.html
Chris@210 11 KEYWORDS = %w[
Chris@210 12 assert break case catch continue default do else
Chris@210 13 finally for if instanceof import new package
Chris@210 14 return switch throw try typeof while
Chris@210 15 debugger export
Chris@210 16 ]
Chris@210 17 RESERVED = %w[ const goto ]
Chris@210 18 CONSTANTS = %w[ false null true ]
Chris@210 19 MAGIC_VARIABLES = %w[ this super ]
Chris@210 20 TYPES = %w[
Chris@210 21 boolean byte char class double enum float int interface long
Chris@210 22 short void
Chris@210 23 ] << '[]' # because int[] should be highlighted as a type
Chris@210 24 DIRECTIVES = %w[
Chris@210 25 abstract extends final implements native private protected public
Chris@210 26 static strictfp synchronized throws transient volatile
Chris@210 27 ]
Chris@210 28
Chris@210 29 IDENT_KIND = WordList.new(:ident).
Chris@210 30 add(KEYWORDS, :keyword).
Chris@210 31 add(RESERVED, :reserved).
Chris@210 32 add(CONSTANTS, :pre_constant).
Chris@210 33 add(MAGIC_VARIABLES, :local_variable).
Chris@210 34 add(TYPES, :type).
Chris@210 35 add(BuiltinTypes::List, :pre_type).
Chris@210 36 add(BuiltinTypes::List.select { |builtin| builtin[/(Error|Exception)$/] }, :exception).
Chris@210 37 add(DIRECTIVES, :directive)
Chris@210 38
Chris@210 39 ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
Chris@210 40 UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
Chris@210 41 STRING_CONTENT_PATTERN = {
Chris@210 42 "'" => /[^\\']+/,
Chris@210 43 '"' => /[^\\"]+/,
Chris@210 44 '/' => /[^\\\/]+/,
Chris@210 45 }
Chris@210 46 IDENT = /[a-zA-Z_][A-Za-z_0-9]*/
Chris@210 47
Chris@210 48 def scan_tokens tokens, options
Chris@210 49
Chris@210 50 state = :initial
Chris@210 51 string_delimiter = nil
Chris@210 52 import_clause = class_name_follows = last_token_dot = false
Chris@210 53
Chris@210 54 until eos?
Chris@210 55
Chris@210 56 kind = nil
Chris@210 57 match = nil
Chris@210 58
Chris@210 59 case state
Chris@210 60
Chris@210 61 when :initial
Chris@210 62
Chris@210 63 if match = scan(/ \s+ | \\\n /x)
Chris@210 64 tokens << [match, :space]
Chris@210 65 next
Chris@210 66
Chris@210 67 elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
Chris@210 68 tokens << [match, :comment]
Chris@210 69 next
Chris@210 70
Chris@210 71 elsif import_clause && scan(/ #{IDENT} (?: \. #{IDENT} )* /ox)
Chris@210 72 kind = :include
Chris@210 73
Chris@210 74 elsif match = scan(/ #{IDENT} | \[\] /ox)
Chris@210 75 kind = IDENT_KIND[match]
Chris@210 76 if last_token_dot
Chris@210 77 kind = :ident
Chris@210 78 elsif class_name_follows
Chris@210 79 kind = :class
Chris@210 80 class_name_follows = false
Chris@210 81 else
Chris@210 82 import_clause = true if match == 'import'
Chris@210 83 class_name_follows = true if match == 'class' || match == 'interface'
Chris@210 84 end
Chris@210 85
Chris@210 86 elsif scan(/ \.(?!\d) | [,?:()\[\]}] | -- | \+\+ | && | \|\| | \*\*=? | [-+*\/%^~&|<>=!]=? | <<<?=? | >>>?=? /x)
Chris@210 87 kind = :operator
Chris@210 88
Chris@210 89 elsif scan(/;/)
Chris@210 90 import_clause = false
Chris@210 91 kind = :operator
Chris@210 92
Chris@210 93 elsif scan(/\{/)
Chris@210 94 class_name_follows = false
Chris@210 95 kind = :operator
Chris@210 96
Chris@210 97 elsif check(/[\d.]/)
Chris@210 98 if scan(/0[xX][0-9A-Fa-f]+/)
Chris@210 99 kind = :hex
Chris@210 100 elsif scan(/(?>0[0-7]+)(?![89.eEfF])/)
Chris@210 101 kind = :oct
Chris@210 102 elsif scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)
Chris@210 103 kind = :float
Chris@210 104 elsif scan(/\d+[lL]?/)
Chris@210 105 kind = :integer
Chris@210 106 end
Chris@210 107
Chris@210 108 elsif match = scan(/["']/)
Chris@210 109 tokens << [:open, :string]
Chris@210 110 state = :string
Chris@210 111 string_delimiter = match
Chris@210 112 kind = :delimiter
Chris@210 113
Chris@210 114 elsif scan(/ @ #{IDENT} /ox)
Chris@210 115 kind = :annotation
Chris@210 116
Chris@210 117 else
Chris@210 118 getch
Chris@210 119 kind = :error
Chris@210 120
Chris@210 121 end
Chris@210 122
Chris@210 123 when :string
Chris@210 124 if scan(STRING_CONTENT_PATTERN[string_delimiter])
Chris@210 125 kind = :content
Chris@210 126 elsif match = scan(/["'\/]/)
Chris@210 127 tokens << [match, :delimiter]
Chris@210 128 tokens << [:close, state]
Chris@210 129 string_delimiter = nil
Chris@210 130 state = :initial
Chris@210 131 next
Chris@210 132 elsif state == :string && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
Chris@210 133 if string_delimiter == "'" && !(match == "\\\\" || match == "\\'")
Chris@210 134 kind = :content
Chris@210 135 else
Chris@210 136 kind = :char
Chris@210 137 end
Chris@210 138 elsif scan(/\\./m)
Chris@210 139 kind = :content
Chris@210 140 elsif scan(/ \\ | $ /x)
Chris@210 141 tokens << [:close, state]
Chris@210 142 kind = :error
Chris@210 143 state = :initial
Chris@210 144 else
Chris@210 145 raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
Chris@210 146 end
Chris@210 147
Chris@210 148 else
Chris@210 149 raise_inspect 'Unknown state', tokens
Chris@210 150
Chris@210 151 end
Chris@210 152
Chris@210 153 match ||= matched
Chris@210 154 if $CODERAY_DEBUG and not kind
Chris@210 155 raise_inspect 'Error token %p in line %d' %
Chris@210 156 [[match, kind], line], tokens
Chris@210 157 end
Chris@210 158 raise_inspect 'Empty token', tokens unless match
Chris@210 159
Chris@210 160 last_token_dot = match == '.'
Chris@210 161
Chris@210 162 tokens << [match, kind]
Chris@210 163
Chris@210 164 end
Chris@210 165
Chris@210 166 if state == :string
Chris@210 167 tokens << [:close, state]
Chris@210 168 end
Chris@210 169
Chris@210 170 tokens
Chris@210 171 end
Chris@210 172
Chris@210 173 end
Chris@210 174
Chris@210 175 end
Chris@210 176 end