Chris@909
|
1 module CodeRay
|
Chris@909
|
2 module Scanners
|
Chris@909
|
3
|
Chris@909
|
4 load :java
|
Chris@909
|
5
|
Chris@909
|
6 # Scanner for Groovy.
|
Chris@909
|
7 class Groovy < Java
|
Chris@909
|
8
|
Chris@909
|
9 register_for :groovy
|
Chris@909
|
10
|
Chris@909
|
11 # TODO: check list of keywords
|
Chris@909
|
12 GROOVY_KEYWORDS = %w[
|
Chris@909
|
13 as assert def in
|
Chris@909
|
14 ] # :nodoc:
|
Chris@909
|
15 KEYWORDS_EXPECTING_VALUE = WordList.new.add %w[
|
Chris@909
|
16 case instanceof new return throw typeof while as assert in
|
Chris@909
|
17 ] # :nodoc:
|
Chris@909
|
18 GROOVY_MAGIC_VARIABLES = %w[ it ] # :nodoc:
|
Chris@909
|
19
|
Chris@909
|
20 IDENT_KIND = Java::IDENT_KIND.dup.
|
Chris@909
|
21 add(GROOVY_KEYWORDS, :keyword).
|
Chris@909
|
22 add(GROOVY_MAGIC_VARIABLES, :local_variable) # :nodoc:
|
Chris@909
|
23
|
Chris@909
|
24 ESCAPE = / [bfnrtv$\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
|
Chris@909
|
25 UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x # :nodoc: no 4-byte unicode chars? U[a-fA-F0-9]{8}
|
Chris@909
|
26 REGEXP_ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} | \d | [bBdDsSwW\/] /x # :nodoc:
|
Chris@909
|
27
|
Chris@909
|
28 # TODO: interpretation inside ', ", /
|
Chris@909
|
29 STRING_CONTENT_PATTERN = {
|
Chris@909
|
30 "'" => /(?>\\[^\\'\n]+|[^\\'\n]+)+/,
|
Chris@909
|
31 '"' => /[^\\$"\n]+/,
|
Chris@909
|
32 "'''" => /(?>[^\\']+|'(?!''))+/,
|
Chris@909
|
33 '"""' => /(?>[^\\$"]+|"(?!""))+/,
|
Chris@909
|
34 '/' => /[^\\$\/\n]+/,
|
Chris@909
|
35 } # :nodoc:
|
Chris@909
|
36
|
Chris@909
|
37 protected
|
Chris@909
|
38
|
Chris@909
|
39 def scan_tokens encoder, options
|
Chris@909
|
40
|
Chris@909
|
41 state = :initial
|
Chris@909
|
42 inline_block_stack = []
|
Chris@909
|
43 inline_block_paren_depth = nil
|
Chris@909
|
44 string_delimiter = nil
|
Chris@909
|
45 import_clause = class_name_follows = last_token = after_def = false
|
Chris@909
|
46 value_expected = true
|
Chris@909
|
47
|
Chris@909
|
48 until eos?
|
Chris@909
|
49
|
Chris@909
|
50 case state
|
Chris@909
|
51
|
Chris@909
|
52 when :initial
|
Chris@909
|
53
|
Chris@909
|
54 if match = scan(/ \s+ | \\\n /x)
|
Chris@909
|
55 encoder.text_token match, :space
|
Chris@909
|
56 if match.index ?\n
|
Chris@909
|
57 import_clause = after_def = false
|
Chris@909
|
58 value_expected = true unless value_expected
|
Chris@909
|
59 end
|
Chris@909
|
60 next
|
Chris@909
|
61
|
Chris@909
|
62 elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
|
Chris@909
|
63 value_expected = true
|
Chris@909
|
64 after_def = false
|
Chris@909
|
65 encoder.text_token match, :comment
|
Chris@909
|
66
|
Chris@909
|
67 elsif bol? && match = scan(/ \#!.* /x)
|
Chris@909
|
68 encoder.text_token match, :doctype
|
Chris@909
|
69
|
Chris@909
|
70 elsif import_clause && match = scan(/ (?!as) #{IDENT} (?: \. #{IDENT} )* (?: \.\* )? /ox)
|
Chris@909
|
71 after_def = value_expected = false
|
Chris@909
|
72 encoder.text_token match, :include
|
Chris@909
|
73
|
Chris@909
|
74 elsif match = scan(/ #{IDENT} | \[\] /ox)
|
Chris@909
|
75 kind = IDENT_KIND[match]
|
Chris@909
|
76 value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match]
|
Chris@909
|
77 if last_token == '.'
|
Chris@909
|
78 kind = :ident
|
Chris@909
|
79 elsif class_name_follows
|
Chris@909
|
80 kind = :class
|
Chris@909
|
81 class_name_follows = false
|
Chris@909
|
82 elsif after_def && check(/\s*[({]/)
|
Chris@909
|
83 kind = :method
|
Chris@909
|
84 after_def = false
|
Chris@909
|
85 elsif kind == :ident && last_token != '?' && check(/:/)
|
Chris@909
|
86 kind = :key
|
Chris@909
|
87 else
|
Chris@909
|
88 class_name_follows = true if match == 'class' || (import_clause && match == 'as')
|
Chris@909
|
89 import_clause = match == 'import'
|
Chris@909
|
90 after_def = true if match == 'def'
|
Chris@909
|
91 end
|
Chris@909
|
92 encoder.text_token match, kind
|
Chris@909
|
93
|
Chris@909
|
94 elsif match = scan(/;/)
|
Chris@909
|
95 import_clause = after_def = false
|
Chris@909
|
96 value_expected = true
|
Chris@909
|
97 encoder.text_token match, :operator
|
Chris@909
|
98
|
Chris@909
|
99 elsif match = scan(/\{/)
|
Chris@909
|
100 class_name_follows = after_def = false
|
Chris@909
|
101 value_expected = true
|
Chris@909
|
102 encoder.text_token match, :operator
|
Chris@909
|
103 if !inline_block_stack.empty?
|
Chris@909
|
104 inline_block_paren_depth += 1
|
Chris@909
|
105 end
|
Chris@909
|
106
|
Chris@909
|
107 # TODO: ~'...', ~"..." and ~/.../ style regexps
|
Chris@909
|
108 elsif match = scan(/ \.\.<? | \*?\.(?!\d)@? | \.& | \?:? | [,?:(\[] | -[->] | \+\+ |
|
Chris@909
|
109 && | \|\| | \*\*=? | ==?~ | <=?>? | [-+*%^~&|>=!]=? | <<<?=? | >>>?=? /x)
|
Chris@909
|
110 value_expected = true
|
Chris@909
|
111 value_expected = :regexp if match == '~'
|
Chris@909
|
112 after_def = false
|
Chris@909
|
113 encoder.text_token match, :operator
|
Chris@909
|
114
|
Chris@909
|
115 elsif match = scan(/ [)\]}] /x)
|
Chris@909
|
116 value_expected = after_def = false
|
Chris@909
|
117 if !inline_block_stack.empty? && match == '}'
|
Chris@909
|
118 inline_block_paren_depth -= 1
|
Chris@909
|
119 if inline_block_paren_depth == 0 # closing brace of inline block reached
|
Chris@909
|
120 encoder.text_token match, :inline_delimiter
|
Chris@909
|
121 encoder.end_group :inline
|
Chris@909
|
122 state, string_delimiter, inline_block_paren_depth = inline_block_stack.pop
|
Chris@909
|
123 next
|
Chris@909
|
124 end
|
Chris@909
|
125 end
|
Chris@909
|
126 encoder.text_token match, :operator
|
Chris@909
|
127
|
Chris@909
|
128 elsif check(/[\d.]/)
|
Chris@909
|
129 after_def = value_expected = false
|
Chris@909
|
130 if match = scan(/0[xX][0-9A-Fa-f]+/)
|
Chris@909
|
131 encoder.text_token match, :hex
|
Chris@909
|
132 elsif match = scan(/(?>0[0-7]+)(?![89.eEfF])/)
|
Chris@909
|
133 encoder.text_token match, :octal
|
Chris@909
|
134 elsif match = scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)
|
Chris@909
|
135 encoder.text_token match, :float
|
Chris@909
|
136 elsif match = scan(/\d+[lLgG]?/)
|
Chris@909
|
137 encoder.text_token match, :integer
|
Chris@909
|
138 end
|
Chris@909
|
139
|
Chris@909
|
140 elsif match = scan(/'''|"""/)
|
Chris@909
|
141 after_def = value_expected = false
|
Chris@909
|
142 state = :multiline_string
|
Chris@909
|
143 encoder.begin_group :string
|
Chris@909
|
144 string_delimiter = match
|
Chris@909
|
145 encoder.text_token match, :delimiter
|
Chris@909
|
146
|
Chris@909
|
147 # TODO: record.'name' syntax
|
Chris@909
|
148 elsif match = scan(/["']/)
|
Chris@909
|
149 after_def = value_expected = false
|
Chris@909
|
150 state = match == '/' ? :regexp : :string
|
Chris@909
|
151 encoder.begin_group state
|
Chris@909
|
152 string_delimiter = match
|
Chris@909
|
153 encoder.text_token match, :delimiter
|
Chris@909
|
154
|
Chris@909
|
155 elsif value_expected && match = scan(/\//)
|
Chris@909
|
156 after_def = value_expected = false
|
Chris@909
|
157 encoder.begin_group :regexp
|
Chris@909
|
158 state = :regexp
|
Chris@909
|
159 string_delimiter = '/'
|
Chris@909
|
160 encoder.text_token match, :delimiter
|
Chris@909
|
161
|
Chris@909
|
162 elsif match = scan(/ @ #{IDENT} /ox)
|
Chris@909
|
163 after_def = value_expected = false
|
Chris@909
|
164 encoder.text_token match, :annotation
|
Chris@909
|
165
|
Chris@909
|
166 elsif match = scan(/\//)
|
Chris@909
|
167 after_def = false
|
Chris@909
|
168 value_expected = true
|
Chris@909
|
169 encoder.text_token match, :operator
|
Chris@909
|
170
|
Chris@909
|
171 else
|
Chris@909
|
172 encoder.text_token getch, :error
|
Chris@909
|
173
|
Chris@909
|
174 end
|
Chris@909
|
175
|
Chris@909
|
176 when :string, :regexp, :multiline_string
|
Chris@909
|
177 if match = scan(STRING_CONTENT_PATTERN[string_delimiter])
|
Chris@909
|
178 encoder.text_token match, :content
|
Chris@909
|
179
|
Chris@909
|
180 elsif match = scan(state == :multiline_string ? /'''|"""/ : /["'\/]/)
|
Chris@909
|
181 encoder.text_token match, :delimiter
|
Chris@909
|
182 if state == :regexp
|
Chris@909
|
183 # TODO: regexp modifiers? s, m, x, i?
|
Chris@909
|
184 modifiers = scan(/[ix]+/)
|
Chris@909
|
185 encoder.text_token modifiers, :modifier if modifiers && !modifiers.empty?
|
Chris@909
|
186 end
|
Chris@909
|
187 state = :string if state == :multiline_string
|
Chris@909
|
188 encoder.end_group state
|
Chris@909
|
189 string_delimiter = nil
|
Chris@909
|
190 after_def = value_expected = false
|
Chris@909
|
191 state = :initial
|
Chris@909
|
192 next
|
Chris@909
|
193
|
Chris@909
|
194 elsif (state == :string || state == :multiline_string) &&
|
Chris@909
|
195 (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
|
Chris@909
|
196 if string_delimiter[0] == ?' && !(match == "\\\\" || match == "\\'")
|
Chris@909
|
197 encoder.text_token match, :content
|
Chris@909
|
198 else
|
Chris@909
|
199 encoder.text_token match, :char
|
Chris@909
|
200 end
|
Chris@909
|
201 elsif state == :regexp && match = scan(/ \\ (?: #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
|
Chris@909
|
202 encoder.text_token match, :char
|
Chris@909
|
203
|
Chris@909
|
204 elsif match = scan(/ \$ #{IDENT} /mox)
|
Chris@909
|
205 encoder.begin_group :inline
|
Chris@909
|
206 encoder.text_token '$', :inline_delimiter
|
Chris@909
|
207 match = match[1..-1]
|
Chris@909
|
208 encoder.text_token match, IDENT_KIND[match]
|
Chris@909
|
209 encoder.end_group :inline
|
Chris@909
|
210 next
|
Chris@909
|
211 elsif match = scan(/ \$ \{ /x)
|
Chris@909
|
212 encoder.begin_group :inline
|
Chris@909
|
213 encoder.text_token match, :inline_delimiter
|
Chris@909
|
214 inline_block_stack << [state, string_delimiter, inline_block_paren_depth]
|
Chris@909
|
215 inline_block_paren_depth = 1
|
Chris@909
|
216 state = :initial
|
Chris@909
|
217 next
|
Chris@909
|
218
|
Chris@909
|
219 elsif match = scan(/ \$ /mx)
|
Chris@909
|
220 encoder.text_token match, :content
|
Chris@909
|
221
|
Chris@909
|
222 elsif match = scan(/ \\. /mx)
|
Chris@909
|
223 encoder.text_token match, :content # TODO: Shouldn't this be :error?
|
Chris@909
|
224
|
Chris@909
|
225 elsif match = scan(/ \\ | \n /x)
|
Chris@909
|
226 encoder.end_group state
|
Chris@909
|
227 encoder.text_token match, :error
|
Chris@909
|
228 after_def = value_expected = false
|
Chris@909
|
229 state = :initial
|
Chris@909
|
230
|
Chris@909
|
231 else
|
Chris@909
|
232 raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
|
Chris@909
|
233
|
Chris@909
|
234 end
|
Chris@909
|
235
|
Chris@909
|
236 else
|
Chris@909
|
237 raise_inspect 'Unknown state', encoder
|
Chris@909
|
238
|
Chris@909
|
239 end
|
Chris@909
|
240
|
Chris@909
|
241 last_token = match unless [:space, :comment, :doctype].include? kind
|
Chris@909
|
242
|
Chris@909
|
243 end
|
Chris@909
|
244
|
Chris@909
|
245 if [:multiline_string, :string, :regexp].include? state
|
Chris@909
|
246 encoder.end_group state
|
Chris@909
|
247 end
|
Chris@909
|
248
|
Chris@909
|
249 encoder
|
Chris@909
|
250 end
|
Chris@909
|
251
|
Chris@909
|
252 end
|
Chris@909
|
253
|
Chris@909
|
254 end
|
Chris@909
|
255 end
|