Chris@909
|
1 module CodeRay
|
Chris@909
|
2 module Scanners
|
Chris@909
|
3
|
Chris@909
|
4 # Scanner for Java.
|
Chris@909
|
5 class Java < Scanner
|
Chris@909
|
6
|
Chris@909
|
7 register_for :java
|
Chris@909
|
8
|
Chris@909
|
9 autoload :BuiltinTypes, 'coderay/scanners/java/builtin_types'
|
Chris@909
|
10
|
Chris@909
|
11 # http://java.sun.com/docs/books/tutorial/java/nutsandbolts/_keywords.html
|
Chris@909
|
12 KEYWORDS = %w[
|
Chris@909
|
13 assert break case catch continue default do else
|
Chris@909
|
14 finally for if instanceof import new package
|
Chris@909
|
15 return switch throw try typeof while
|
Chris@909
|
16 debugger export
|
Chris@909
|
17 ] # :nodoc:
|
Chris@909
|
18 RESERVED = %w[ const goto ] # :nodoc:
|
Chris@909
|
19 CONSTANTS = %w[ false null true ] # :nodoc:
|
Chris@909
|
20 MAGIC_VARIABLES = %w[ this super ] # :nodoc:
|
Chris@909
|
21 TYPES = %w[
|
Chris@909
|
22 boolean byte char class double enum float int interface long
|
Chris@909
|
23 short void
|
Chris@909
|
24 ] << '[]' # :nodoc: because int[] should be highlighted as a type
|
Chris@909
|
25 DIRECTIVES = %w[
|
Chris@909
|
26 abstract extends final implements native private protected public
|
Chris@909
|
27 static strictfp synchronized throws transient volatile
|
Chris@909
|
28 ] # :nodoc:
|
Chris@909
|
29
|
Chris@909
|
30 IDENT_KIND = WordList.new(:ident).
|
Chris@909
|
31 add(KEYWORDS, :keyword).
|
Chris@909
|
32 add(RESERVED, :reserved).
|
Chris@909
|
33 add(CONSTANTS, :predefined_constant).
|
Chris@909
|
34 add(MAGIC_VARIABLES, :local_variable).
|
Chris@909
|
35 add(TYPES, :type).
|
Chris@909
|
36 add(BuiltinTypes::List, :predefined_type).
|
Chris@909
|
37 add(BuiltinTypes::List.select { |builtin| builtin[/(Error|Exception)$/] }, :exception).
|
Chris@909
|
38 add(DIRECTIVES, :directive) # :nodoc:
|
Chris@909
|
39
|
Chris@909
|
40 ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
|
Chris@909
|
41 UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc:
|
Chris@909
|
42 STRING_CONTENT_PATTERN = {
|
Chris@909
|
43 "'" => /[^\\']+/,
|
Chris@909
|
44 '"' => /[^\\"]+/,
|
Chris@909
|
45 '/' => /[^\\\/]+/,
|
Chris@909
|
46 } # :nodoc:
|
Chris@909
|
47 IDENT = /[a-zA-Z_][A-Za-z_0-9]*/ # :nodoc:
|
Chris@909
|
48
|
Chris@909
|
49 protected
|
Chris@909
|
50
|
Chris@909
|
51 def scan_tokens encoder, options
|
Chris@909
|
52
|
Chris@909
|
53 state = :initial
|
Chris@909
|
54 string_delimiter = nil
|
Chris@909
|
55 package_name_expected = false
|
Chris@909
|
56 class_name_follows = false
|
Chris@909
|
57 last_token_dot = false
|
Chris@909
|
58
|
Chris@909
|
59 until eos?
|
Chris@909
|
60
|
Chris@909
|
61 case state
|
Chris@909
|
62
|
Chris@909
|
63 when :initial
|
Chris@909
|
64
|
Chris@909
|
65 if match = scan(/ \s+ | \\\n /x)
|
Chris@909
|
66 encoder.text_token match, :space
|
Chris@909
|
67 next
|
Chris@909
|
68
|
Chris@909
|
69 elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
|
Chris@909
|
70 encoder.text_token match, :comment
|
Chris@909
|
71 next
|
Chris@909
|
72
|
Chris@909
|
73 elsif package_name_expected && match = scan(/ #{IDENT} (?: \. #{IDENT} )* /ox)
|
Chris@909
|
74 encoder.text_token match, package_name_expected
|
Chris@909
|
75
|
Chris@909
|
76 elsif match = scan(/ #{IDENT} | \[\] /ox)
|
Chris@909
|
77 kind = IDENT_KIND[match]
|
Chris@909
|
78 if last_token_dot
|
Chris@909
|
79 kind = :ident
|
Chris@909
|
80 elsif class_name_follows
|
Chris@909
|
81 kind = :class
|
Chris@909
|
82 class_name_follows = false
|
Chris@909
|
83 else
|
Chris@909
|
84 case match
|
Chris@909
|
85 when 'import'
|
Chris@909
|
86 package_name_expected = :include
|
Chris@909
|
87 when 'package'
|
Chris@909
|
88 package_name_expected = :namespace
|
Chris@909
|
89 when 'class', 'interface'
|
Chris@909
|
90 class_name_follows = true
|
Chris@909
|
91 end
|
Chris@909
|
92 end
|
Chris@909
|
93 encoder.text_token match, kind
|
Chris@909
|
94
|
Chris@909
|
95 elsif match = scan(/ \.(?!\d) | [,?:()\[\]}] | -- | \+\+ | && | \|\| | \*\*=? | [-+*\/%^~&|<>=!]=? | <<<?=? | >>>?=? /x)
|
Chris@909
|
96 encoder.text_token match, :operator
|
Chris@909
|
97
|
Chris@909
|
98 elsif match = scan(/;/)
|
Chris@909
|
99 package_name_expected = false
|
Chris@909
|
100 encoder.text_token match, :operator
|
Chris@909
|
101
|
Chris@909
|
102 elsif match = scan(/\{/)
|
Chris@909
|
103 class_name_follows = false
|
Chris@909
|
104 encoder.text_token match, :operator
|
Chris@909
|
105
|
Chris@909
|
106 elsif check(/[\d.]/)
|
Chris@909
|
107 if match = scan(/0[xX][0-9A-Fa-f]+/)
|
Chris@909
|
108 encoder.text_token match, :hex
|
Chris@909
|
109 elsif match = scan(/(?>0[0-7]+)(?![89.eEfF])/)
|
Chris@909
|
110 encoder.text_token match, :octal
|
Chris@909
|
111 elsif match = scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)
|
Chris@909
|
112 encoder.text_token match, :float
|
Chris@909
|
113 elsif match = scan(/\d+[lL]?/)
|
Chris@909
|
114 encoder.text_token match, :integer
|
Chris@909
|
115 end
|
Chris@909
|
116
|
Chris@909
|
117 elsif match = scan(/["']/)
|
Chris@909
|
118 state = :string
|
Chris@909
|
119 encoder.begin_group state
|
Chris@909
|
120 string_delimiter = match
|
Chris@909
|
121 encoder.text_token match, :delimiter
|
Chris@909
|
122
|
Chris@909
|
123 elsif match = scan(/ @ #{IDENT} /ox)
|
Chris@909
|
124 encoder.text_token match, :annotation
|
Chris@909
|
125
|
Chris@909
|
126 else
|
Chris@909
|
127 encoder.text_token getch, :error
|
Chris@909
|
128
|
Chris@909
|
129 end
|
Chris@909
|
130
|
Chris@909
|
131 when :string
|
Chris@909
|
132 if match = scan(STRING_CONTENT_PATTERN[string_delimiter])
|
Chris@909
|
133 encoder.text_token match, :content
|
Chris@909
|
134 elsif match = scan(/["'\/]/)
|
Chris@909
|
135 encoder.text_token match, :delimiter
|
Chris@909
|
136 encoder.end_group state
|
Chris@909
|
137 state = :initial
|
Chris@909
|
138 string_delimiter = nil
|
Chris@909
|
139 elsif state == :string && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
|
Chris@909
|
140 if string_delimiter == "'" && !(match == "\\\\" || match == "\\'")
|
Chris@909
|
141 encoder.text_token match, :content
|
Chris@909
|
142 else
|
Chris@909
|
143 encoder.text_token match, :char
|
Chris@909
|
144 end
|
Chris@909
|
145 elsif match = scan(/\\./m)
|
Chris@909
|
146 encoder.text_token match, :content
|
Chris@909
|
147 elsif match = scan(/ \\ | $ /x)
|
Chris@909
|
148 encoder.end_group state
|
Chris@909
|
149 state = :initial
|
Chris@909
|
150 encoder.text_token match, :error
|
Chris@909
|
151 else
|
Chris@909
|
152 raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
|
Chris@909
|
153 end
|
Chris@909
|
154
|
Chris@909
|
155 else
|
Chris@909
|
156 raise_inspect 'Unknown state', encoder
|
Chris@909
|
157
|
Chris@909
|
158 end
|
Chris@909
|
159
|
Chris@909
|
160 last_token_dot = match == '.'
|
Chris@909
|
161
|
Chris@909
|
162 end
|
Chris@909
|
163
|
Chris@909
|
164 if state == :string
|
Chris@909
|
165 encoder.end_group state
|
Chris@909
|
166 end
|
Chris@909
|
167
|
Chris@909
|
168 encoder
|
Chris@909
|
169 end
|
Chris@909
|
170
|
Chris@909
|
171 end
|
Chris@909
|
172
|
Chris@909
|
173 end
|
Chris@909
|
174 end
|