Chris@0
|
1 module CodeRay
|
Chris@0
|
2 module Scanners
|
Chris@0
|
3
|
Chris@0
|
4 load :java
|
Chris@0
|
5
|
Chris@0
|
6 class Groovy < Java
|
Chris@0
|
7
|
Chris@0
|
8 include Streamable
|
Chris@0
|
9 register_for :groovy
|
Chris@0
|
10
|
Chris@0
|
11 # TODO: Check this!
|
Chris@0
|
12 GROOVY_KEYWORDS = %w[
|
Chris@0
|
13 as assert def in
|
Chris@0
|
14 ]
|
Chris@0
|
15 KEYWORDS_EXPECTING_VALUE = WordList.new.add %w[
|
Chris@0
|
16 case instanceof new return throw typeof while as assert in
|
Chris@0
|
17 ]
|
Chris@0
|
18 GROOVY_MAGIC_VARIABLES = %w[ it ]
|
Chris@0
|
19
|
Chris@0
|
20 IDENT_KIND = Java::IDENT_KIND.dup.
|
Chris@0
|
21 add(GROOVY_KEYWORDS, :keyword).
|
Chris@0
|
22 add(GROOVY_MAGIC_VARIABLES, :local_variable)
|
Chris@0
|
23
|
Chris@0
|
24 ESCAPE = / [bfnrtv$\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
|
Chris@0
|
25 UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x # no 4-byte unicode chars? U[a-fA-F0-9]{8}
|
Chris@0
|
26 REGEXP_ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} | \d | [bBdDsSwW\/] /x
|
Chris@0
|
27
|
Chris@0
|
28 # TODO: interpretation inside ', ", /
|
Chris@0
|
29 STRING_CONTENT_PATTERN = {
|
Chris@0
|
30 "'" => /(?>\\[^\\'\n]+|[^\\'\n]+)+/,
|
Chris@0
|
31 '"' => /[^\\$"\n]+/,
|
Chris@0
|
32 "'''" => /(?>[^\\']+|'(?!''))+/,
|
Chris@0
|
33 '"""' => /(?>[^\\$"]+|"(?!""))+/,
|
Chris@0
|
34 '/' => /[^\\$\/\n]+/,
|
Chris@0
|
35 }
|
Chris@0
|
36
|
Chris@0
|
37 def scan_tokens tokens, options
|
Chris@0
|
38
|
Chris@0
|
39 state = :initial
|
Chris@0
|
40 inline_block_stack = []
|
Chris@0
|
41 inline_block_paren_depth = nil
|
Chris@0
|
42 string_delimiter = nil
|
Chris@0
|
43 import_clause = class_name_follows = last_token = after_def = false
|
Chris@0
|
44 value_expected = true
|
Chris@0
|
45
|
Chris@0
|
46 until eos?
|
Chris@0
|
47
|
Chris@0
|
48 kind = nil
|
Chris@0
|
49 match = nil
|
Chris@0
|
50
|
Chris@0
|
51 case state
|
Chris@0
|
52
|
Chris@0
|
53 when :initial
|
Chris@0
|
54
|
Chris@0
|
55 if match = scan(/ \s+ | \\\n /x)
|
Chris@0
|
56 tokens << [match, :space]
|
Chris@0
|
57 if match.index ?\n
|
Chris@0
|
58 import_clause = after_def = false
|
Chris@0
|
59 value_expected = true unless value_expected
|
Chris@0
|
60 end
|
Chris@0
|
61 next
|
Chris@0
|
62
|
Chris@0
|
63 elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
|
Chris@0
|
64 value_expected = true
|
Chris@0
|
65 after_def = false
|
Chris@0
|
66 kind = :comment
|
Chris@0
|
67
|
Chris@0
|
68 elsif bol? && scan(/ \#!.* /x)
|
Chris@0
|
69 kind = :doctype
|
Chris@0
|
70
|
Chris@0
|
71 elsif import_clause && scan(/ (?!as) #{IDENT} (?: \. #{IDENT} )* (?: \.\* )? /ox)
|
Chris@0
|
72 after_def = value_expected = false
|
Chris@0
|
73 kind = :include
|
Chris@0
|
74
|
Chris@0
|
75 elsif match = scan(/ #{IDENT} | \[\] /ox)
|
Chris@0
|
76 kind = IDENT_KIND[match]
|
Chris@0
|
77 value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match]
|
Chris@0
|
78 if last_token == '.'
|
Chris@0
|
79 kind = :ident
|
Chris@0
|
80 elsif class_name_follows
|
Chris@0
|
81 kind = :class
|
Chris@0
|
82 class_name_follows = false
|
Chris@0
|
83 elsif after_def && check(/\s*[({]/)
|
Chris@0
|
84 kind = :method
|
Chris@0
|
85 after_def = false
|
Chris@0
|
86 elsif kind == :ident && last_token != '?' && check(/:/)
|
Chris@0
|
87 kind = :key
|
Chris@0
|
88 else
|
Chris@0
|
89 class_name_follows = true if match == 'class' || (import_clause && match == 'as')
|
Chris@0
|
90 import_clause = match == 'import'
|
Chris@0
|
91 after_def = true if match == 'def'
|
Chris@0
|
92 end
|
Chris@0
|
93
|
Chris@0
|
94 elsif scan(/;/)
|
Chris@0
|
95 import_clause = after_def = false
|
Chris@0
|
96 value_expected = true
|
Chris@0
|
97 kind = :operator
|
Chris@0
|
98
|
Chris@0
|
99 elsif scan(/\{/)
|
Chris@0
|
100 class_name_follows = after_def = false
|
Chris@0
|
101 value_expected = true
|
Chris@0
|
102 kind = :operator
|
Chris@0
|
103 if !inline_block_stack.empty?
|
Chris@0
|
104 inline_block_paren_depth += 1
|
Chris@0
|
105 end
|
Chris@0
|
106
|
Chris@0
|
107 # TODO: ~'...', ~"..." and ~/.../ style regexps
|
Chris@0
|
108 elsif match = scan(/ \.\.<? | \*?\.(?!\d)@? | \.& | \?:? | [,?:(\[] | -[->] | \+\+ |
|
Chris@0
|
109 && | \|\| | \*\*=? | ==?~ | <=?>? | [-+*%^~&|>=!]=? | <<<?=? | >>>?=? /x)
|
Chris@0
|
110 value_expected = true
|
Chris@0
|
111 value_expected = :regexp if match == '~'
|
Chris@0
|
112 after_def = false
|
Chris@0
|
113 kind = :operator
|
Chris@0
|
114
|
Chris@0
|
115 elsif match = scan(/ [)\]}] /x)
|
Chris@0
|
116 value_expected = after_def = false
|
Chris@0
|
117 if !inline_block_stack.empty? && match == '}'
|
Chris@0
|
118 inline_block_paren_depth -= 1
|
Chris@0
|
119 if inline_block_paren_depth == 0 # closing brace of inline block reached
|
Chris@0
|
120 tokens << [match, :inline_delimiter]
|
Chris@0
|
121 tokens << [:close, :inline]
|
Chris@0
|
122 state, string_delimiter, inline_block_paren_depth = inline_block_stack.pop
|
Chris@0
|
123 next
|
Chris@0
|
124 end
|
Chris@0
|
125 end
|
Chris@0
|
126 kind = :operator
|
Chris@0
|
127
|
Chris@0
|
128 elsif check(/[\d.]/)
|
Chris@0
|
129 after_def = value_expected = false
|
Chris@0
|
130 if scan(/0[xX][0-9A-Fa-f]+/)
|
Chris@0
|
131 kind = :hex
|
Chris@0
|
132 elsif scan(/(?>0[0-7]+)(?![89.eEfF])/)
|
Chris@0
|
133 kind = :oct
|
Chris@0
|
134 elsif scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)
|
Chris@0
|
135 kind = :float
|
Chris@0
|
136 elsif scan(/\d+[lLgG]?/)
|
Chris@0
|
137 kind = :integer
|
Chris@0
|
138 end
|
Chris@0
|
139
|
Chris@0
|
140 elsif match = scan(/'''|"""/)
|
Chris@0
|
141 after_def = value_expected = false
|
Chris@0
|
142 state = :multiline_string
|
Chris@0
|
143 tokens << [:open, :string]
|
Chris@0
|
144 string_delimiter = match
|
Chris@0
|
145 kind = :delimiter
|
Chris@0
|
146
|
Chris@0
|
147 # TODO: record.'name'
|
Chris@0
|
148 elsif match = scan(/["']/)
|
Chris@0
|
149 after_def = value_expected = false
|
Chris@0
|
150 state = match == '/' ? :regexp : :string
|
Chris@0
|
151 tokens << [:open, state]
|
Chris@0
|
152 string_delimiter = match
|
Chris@0
|
153 kind = :delimiter
|
Chris@0
|
154
|
Chris@0
|
155 elsif value_expected && (match = scan(/\//))
|
Chris@0
|
156 after_def = value_expected = false
|
Chris@0
|
157 tokens << [:open, :regexp]
|
Chris@0
|
158 state = :regexp
|
Chris@0
|
159 string_delimiter = '/'
|
Chris@0
|
160 kind = :delimiter
|
Chris@0
|
161
|
Chris@0
|
162 elsif scan(/ @ #{IDENT} /ox)
|
Chris@0
|
163 after_def = value_expected = false
|
Chris@0
|
164 kind = :annotation
|
Chris@0
|
165
|
Chris@0
|
166 elsif scan(/\//)
|
Chris@0
|
167 after_def = false
|
Chris@0
|
168 value_expected = true
|
Chris@0
|
169 kind = :operator
|
Chris@0
|
170
|
Chris@0
|
171 else
|
Chris@0
|
172 getch
|
Chris@0
|
173 kind = :error
|
Chris@0
|
174
|
Chris@0
|
175 end
|
Chris@0
|
176
|
Chris@0
|
177 when :string, :regexp, :multiline_string
|
Chris@0
|
178 if scan(STRING_CONTENT_PATTERN[string_delimiter])
|
Chris@0
|
179 kind = :content
|
Chris@0
|
180
|
Chris@0
|
181 elsif match = scan(state == :multiline_string ? /'''|"""/ : /["'\/]/)
|
Chris@0
|
182 tokens << [match, :delimiter]
|
Chris@0
|
183 if state == :regexp
|
Chris@0
|
184 # TODO: regexp modifiers? s, m, x, i?
|
Chris@0
|
185 modifiers = scan(/[ix]+/)
|
Chris@0
|
186 tokens << [modifiers, :modifier] if modifiers && !modifiers.empty?
|
Chris@0
|
187 end
|
Chris@0
|
188 state = :string if state == :multiline_string
|
Chris@0
|
189 tokens << [:close, state]
|
Chris@0
|
190 string_delimiter = nil
|
Chris@0
|
191 after_def = value_expected = false
|
Chris@0
|
192 state = :initial
|
Chris@0
|
193 next
|
Chris@0
|
194
|
Chris@0
|
195 elsif (state == :string || state == :multiline_string) &&
|
Chris@0
|
196 (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
|
Chris@0
|
197 if string_delimiter[0] == ?' && !(match == "\\\\" || match == "\\'")
|
Chris@0
|
198 kind = :content
|
Chris@0
|
199 else
|
Chris@0
|
200 kind = :char
|
Chris@0
|
201 end
|
Chris@0
|
202 elsif state == :regexp && scan(/ \\ (?: #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
|
Chris@0
|
203 kind = :char
|
Chris@0
|
204
|
Chris@0
|
205 elsif match = scan(/ \$ #{IDENT} /mox)
|
Chris@0
|
206 tokens << [:open, :inline]
|
Chris@0
|
207 tokens << ['$', :inline_delimiter]
|
Chris@0
|
208 match = match[1..-1]
|
Chris@0
|
209 tokens << [match, IDENT_KIND[match]]
|
Chris@0
|
210 tokens << [:close, :inline]
|
Chris@0
|
211 next
|
Chris@0
|
212 elsif match = scan(/ \$ \{ /x)
|
Chris@0
|
213 tokens << [:open, :inline]
|
Chris@0
|
214 tokens << ['${', :inline_delimiter]
|
Chris@0
|
215 inline_block_stack << [state, string_delimiter, inline_block_paren_depth]
|
Chris@0
|
216 inline_block_paren_depth = 1
|
Chris@0
|
217 state = :initial
|
Chris@0
|
218 next
|
Chris@0
|
219
|
Chris@0
|
220 elsif scan(/ \$ /mx)
|
Chris@0
|
221 kind = :content
|
Chris@0
|
222
|
Chris@0
|
223 elsif scan(/ \\. /mx)
|
Chris@0
|
224 kind = :content
|
Chris@0
|
225
|
Chris@0
|
226 elsif scan(/ \\ | \n /x)
|
Chris@0
|
227 tokens << [:close, state]
|
Chris@0
|
228 kind = :error
|
Chris@0
|
229 after_def = value_expected = false
|
Chris@0
|
230 state = :initial
|
Chris@0
|
231
|
Chris@0
|
232 else
|
Chris@0
|
233 raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
|
Chris@0
|
234 end
|
Chris@0
|
235
|
Chris@0
|
236 else
|
Chris@0
|
237 raise_inspect 'Unknown state', tokens
|
Chris@0
|
238
|
Chris@0
|
239 end
|
Chris@0
|
240
|
Chris@0
|
241 match ||= matched
|
Chris@0
|
242 if $CODERAY_DEBUG and not kind
|
Chris@0
|
243 raise_inspect 'Error token %p in line %d' %
|
Chris@0
|
244 [[match, kind], line], tokens
|
Chris@0
|
245 end
|
Chris@0
|
246 raise_inspect 'Empty token', tokens unless match
|
Chris@0
|
247
|
Chris@0
|
248 last_token = match unless [:space, :comment, :doctype].include? kind
|
Chris@0
|
249
|
Chris@0
|
250 tokens << [match, kind]
|
Chris@0
|
251
|
Chris@0
|
252 end
|
Chris@0
|
253
|
Chris@0
|
254 if [:multiline_string, :string, :regexp].include? state
|
Chris@0
|
255 tokens << [:close, state]
|
Chris@0
|
256 end
|
Chris@0
|
257
|
Chris@0
|
258 tokens
|
Chris@0
|
259 end
|
Chris@0
|
260
|
Chris@0
|
261 end
|
Chris@0
|
262
|
Chris@0
|
263 end
|
Chris@0
|
264 end
|