Chris@0
|
1 module CodeRay
|
Chris@0
|
2 module Scanners
|
Chris@0
|
3
|
Chris@0
|
4 class Java < Scanner
|
Chris@0
|
5
|
Chris@0
|
6 include Streamable
|
Chris@0
|
7 register_for :java
|
Chris@0
|
8 helper :builtin_types
|
Chris@0
|
9
|
Chris@0
|
10 # http://java.sun.com/docs/books/tutorial/java/nutsandbolts/_keywords.html
|
Chris@0
|
11 KEYWORDS = %w[
|
Chris@0
|
12 assert break case catch continue default do else
|
Chris@0
|
13 finally for if instanceof import new package
|
Chris@0
|
14 return switch throw try typeof while
|
Chris@0
|
15 debugger export
|
Chris@0
|
16 ]
|
Chris@0
|
17 RESERVED = %w[ const goto ]
|
Chris@0
|
18 CONSTANTS = %w[ false null true ]
|
Chris@0
|
19 MAGIC_VARIABLES = %w[ this super ]
|
Chris@0
|
20 TYPES = %w[
|
Chris@0
|
21 boolean byte char class double enum float int interface long
|
Chris@0
|
22 short void
|
Chris@0
|
23 ] << '[]' # because int[] should be highlighted as a type
|
Chris@0
|
24 DIRECTIVES = %w[
|
Chris@0
|
25 abstract extends final implements native private protected public
|
Chris@0
|
26 static strictfp synchronized throws transient volatile
|
Chris@0
|
27 ]
|
Chris@0
|
28
|
Chris@0
|
29 IDENT_KIND = WordList.new(:ident).
|
Chris@0
|
30 add(KEYWORDS, :keyword).
|
Chris@0
|
31 add(RESERVED, :reserved).
|
Chris@0
|
32 add(CONSTANTS, :pre_constant).
|
Chris@0
|
33 add(MAGIC_VARIABLES, :local_variable).
|
Chris@0
|
34 add(TYPES, :type).
|
Chris@0
|
35 add(BuiltinTypes::List, :pre_type).
|
Chris@0
|
36 add(BuiltinTypes::List.select { |builtin| builtin[/(Error|Exception)$/] }, :exception).
|
Chris@0
|
37 add(DIRECTIVES, :directive)
|
Chris@0
|
38
|
Chris@0
|
39 ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
|
Chris@0
|
40 UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
|
Chris@0
|
41 STRING_CONTENT_PATTERN = {
|
Chris@0
|
42 "'" => /[^\\']+/,
|
Chris@0
|
43 '"' => /[^\\"]+/,
|
Chris@0
|
44 '/' => /[^\\\/]+/,
|
Chris@0
|
45 }
|
Chris@0
|
46 IDENT = /[a-zA-Z_][A-Za-z_0-9]*/
|
Chris@0
|
47
|
Chris@0
|
48 def scan_tokens tokens, options
|
Chris@0
|
49
|
Chris@0
|
50 state = :initial
|
Chris@0
|
51 string_delimiter = nil
|
Chris@0
|
52 import_clause = class_name_follows = last_token_dot = false
|
Chris@0
|
53
|
Chris@0
|
54 until eos?
|
Chris@0
|
55
|
Chris@0
|
56 kind = nil
|
Chris@0
|
57 match = nil
|
Chris@0
|
58
|
Chris@0
|
59 case state
|
Chris@0
|
60
|
Chris@0
|
61 when :initial
|
Chris@0
|
62
|
Chris@0
|
63 if match = scan(/ \s+ | \\\n /x)
|
Chris@0
|
64 tokens << [match, :space]
|
Chris@0
|
65 next
|
Chris@0
|
66
|
Chris@0
|
67 elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
|
Chris@0
|
68 tokens << [match, :comment]
|
Chris@0
|
69 next
|
Chris@0
|
70
|
Chris@0
|
71 elsif import_clause && scan(/ #{IDENT} (?: \. #{IDENT} )* /ox)
|
Chris@0
|
72 kind = :include
|
Chris@0
|
73
|
Chris@0
|
74 elsif match = scan(/ #{IDENT} | \[\] /ox)
|
Chris@0
|
75 kind = IDENT_KIND[match]
|
Chris@0
|
76 if last_token_dot
|
Chris@0
|
77 kind = :ident
|
Chris@0
|
78 elsif class_name_follows
|
Chris@0
|
79 kind = :class
|
Chris@0
|
80 class_name_follows = false
|
Chris@0
|
81 else
|
Chris@0
|
82 import_clause = true if match == 'import'
|
Chris@0
|
83 class_name_follows = true if match == 'class' || match == 'interface'
|
Chris@0
|
84 end
|
Chris@0
|
85
|
Chris@0
|
86 elsif scan(/ \.(?!\d) | [,?:()\[\]}] | -- | \+\+ | && | \|\| | \*\*=? | [-+*\/%^~&|<>=!]=? | <<<?=? | >>>?=? /x)
|
Chris@0
|
87 kind = :operator
|
Chris@0
|
88
|
Chris@0
|
89 elsif scan(/;/)
|
Chris@0
|
90 import_clause = false
|
Chris@0
|
91 kind = :operator
|
Chris@0
|
92
|
Chris@0
|
93 elsif scan(/\{/)
|
Chris@0
|
94 class_name_follows = false
|
Chris@0
|
95 kind = :operator
|
Chris@0
|
96
|
Chris@0
|
97 elsif check(/[\d.]/)
|
Chris@0
|
98 if scan(/0[xX][0-9A-Fa-f]+/)
|
Chris@0
|
99 kind = :hex
|
Chris@0
|
100 elsif scan(/(?>0[0-7]+)(?![89.eEfF])/)
|
Chris@0
|
101 kind = :oct
|
Chris@0
|
102 elsif scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)
|
Chris@0
|
103 kind = :float
|
Chris@0
|
104 elsif scan(/\d+[lL]?/)
|
Chris@0
|
105 kind = :integer
|
Chris@0
|
106 end
|
Chris@0
|
107
|
Chris@0
|
108 elsif match = scan(/["']/)
|
Chris@0
|
109 tokens << [:open, :string]
|
Chris@0
|
110 state = :string
|
Chris@0
|
111 string_delimiter = match
|
Chris@0
|
112 kind = :delimiter
|
Chris@0
|
113
|
Chris@0
|
114 elsif scan(/ @ #{IDENT} /ox)
|
Chris@0
|
115 kind = :annotation
|
Chris@0
|
116
|
Chris@0
|
117 else
|
Chris@0
|
118 getch
|
Chris@0
|
119 kind = :error
|
Chris@0
|
120
|
Chris@0
|
121 end
|
Chris@0
|
122
|
Chris@0
|
123 when :string
|
Chris@0
|
124 if scan(STRING_CONTENT_PATTERN[string_delimiter])
|
Chris@0
|
125 kind = :content
|
Chris@0
|
126 elsif match = scan(/["'\/]/)
|
Chris@0
|
127 tokens << [match, :delimiter]
|
Chris@0
|
128 tokens << [:close, state]
|
Chris@0
|
129 string_delimiter = nil
|
Chris@0
|
130 state = :initial
|
Chris@0
|
131 next
|
Chris@0
|
132 elsif state == :string && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
|
Chris@0
|
133 if string_delimiter == "'" && !(match == "\\\\" || match == "\\'")
|
Chris@0
|
134 kind = :content
|
Chris@0
|
135 else
|
Chris@0
|
136 kind = :char
|
Chris@0
|
137 end
|
Chris@0
|
138 elsif scan(/\\./m)
|
Chris@0
|
139 kind = :content
|
Chris@0
|
140 elsif scan(/ \\ | $ /x)
|
Chris@0
|
141 tokens << [:close, :delimiter]
|
Chris@0
|
142 kind = :error
|
Chris@0
|
143 state = :initial
|
Chris@0
|
144 else
|
Chris@0
|
145 raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
|
Chris@0
|
146 end
|
Chris@0
|
147
|
Chris@0
|
148 else
|
Chris@0
|
149 raise_inspect 'Unknown state', tokens
|
Chris@0
|
150
|
Chris@0
|
151 end
|
Chris@0
|
152
|
Chris@0
|
153 match ||= matched
|
Chris@0
|
154 if $CODERAY_DEBUG and not kind
|
Chris@0
|
155 raise_inspect 'Error token %p in line %d' %
|
Chris@0
|
156 [[match, kind], line], tokens
|
Chris@0
|
157 end
|
Chris@0
|
158 raise_inspect 'Empty token', tokens unless match
|
Chris@0
|
159
|
Chris@0
|
160 last_token_dot = match == '.'
|
Chris@0
|
161
|
Chris@0
|
162 tokens << [match, kind]
|
Chris@0
|
163
|
Chris@0
|
164 end
|
Chris@0
|
165
|
Chris@0
|
166 if state == :string
|
Chris@0
|
167 tokens << [:close, state]
|
Chris@0
|
168 end
|
Chris@0
|
169
|
Chris@0
|
170 tokens
|
Chris@0
|
171 end
|
Chris@0
|
172
|
Chris@0
|
173 end
|
Chris@0
|
174
|
Chris@0
|
175 end
|
Chris@0
|
176 end
|