comparison vendor/gems/coderay-1.0.0/lib/coderay/scanners/ruby.rb @ 909:cbb26bc654de redmine-1.3

Update to Redmine 1.3-stable branch (Redmine SVN rev 8964)
author Chris Cannam
date Fri, 24 Feb 2012 19:09:32 +0000
parents
children
comparison
equal deleted inserted replaced
908:c6c2cbd0afee 909:cbb26bc654de
1 module CodeRay
2 module Scanners
3
4 # This scanner is really complex, since Ruby _is_ a complex language!
5 #
6 # It tries to highlight 100% of all common code,
7 # and 90% of strange codes.
8 #
9 # It is optimized for HTML highlighting, and is not very useful for
10 # parsing or pretty printing.
11 class Ruby < Scanner
12
13 register_for :ruby
14 file_extension 'rb'
15
16 autoload :Patterns, 'coderay/scanners/ruby/patterns'
17 autoload :StringState, 'coderay/scanners/ruby/string_state'
18
19 def interpreted_string_state
20 StringState.new :string, true, '"'
21 end
22
23 protected
24
25 def setup
26 @state = :initial
27 end
28
29 def scan_tokens encoder, options
30 state, heredocs = options[:state] || @state
31 heredocs = heredocs.dup if heredocs.is_a?(Array)
32
33 if state && state.instance_of?(StringState)
34 encoder.begin_group state.type
35 end
36
37 last_state = nil
38
39 method_call_expected = false
40 value_expected = true
41
42 inline_block_stack = nil
43 inline_block_curly_depth = 0
44
45 if heredocs
46 state = heredocs.shift
47 encoder.begin_group state.type
48 heredocs = nil if heredocs.empty?
49 end
50
51 # def_object_stack = nil
52 # def_object_paren_depth = 0
53
54 patterns = Patterns # avoid constant lookup
55
56 unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
57
58 until eos?
59
60 if state.instance_of? ::Symbol
61
62 if match = scan(/[ \t\f\v]+/)
63 encoder.text_token match, :space
64
65 elsif match = scan(/\n/)
66 if heredocs
67 unscan # heredoc scanning needs \n at start
68 state = heredocs.shift
69 encoder.begin_group state.type
70 heredocs = nil if heredocs.empty?
71 else
72 state = :initial if state == :undef_comma_expected
73 encoder.text_token match, :space
74 value_expected = true
75 end
76
77 elsif match = scan(bol? ? / \#(!)?.* | #{patterns::RUBYDOC_OR_DATA} /ox : /\#.*/)
78 encoder.text_token match, self[1] ? :doctype : :comment
79
80 elsif match = scan(/\\\n/)
81 if heredocs
82 unscan # heredoc scanning needs \n at start
83 encoder.text_token scan(/\\/), :space
84 state = heredocs.shift
85 encoder.begin_group state.type
86 heredocs = nil if heredocs.empty?
87 else
88 encoder.text_token match, :space
89 end
90
91 elsif state == :initial
92
93 # IDENTS #
94 if !method_call_expected &&
95 match = scan(unicode ? /#{patterns::METHOD_NAME}/uo :
96 /#{patterns::METHOD_NAME}/o)
97 value_expected = false
98 kind = patterns::IDENT_KIND[match]
99 if kind == :ident
100 if match[/\A[A-Z]/] && !(match[/[!?]$/] || match?(/\(/))
101 kind = :constant
102 end
103 elsif kind == :keyword
104 state = patterns::KEYWORD_NEW_STATE[match]
105 value_expected = true if patterns::KEYWORDS_EXPECTING_VALUE[match]
106 end
107 value_expected = true if !value_expected && check(/#{patterns::VALUE_FOLLOWS}/o)
108 encoder.text_token match, kind
109
110 elsif method_call_expected &&
111 match = scan(unicode ? /#{patterns::METHOD_AFTER_DOT}/uo :
112 /#{patterns::METHOD_AFTER_DOT}/o)
113 if method_call_expected == '::' && match[/\A[A-Z]/] && !match?(/\(/)
114 encoder.text_token match, :constant
115 else
116 encoder.text_token match, :ident
117 end
118 method_call_expected = false
119 value_expected = check(/#{patterns::VALUE_FOLLOWS}/o)
120
121 # OPERATORS #
122 elsif !method_call_expected && match = scan(/ (\.(?!\.)|::) | (?: \.\.\.? | ==?=? | [,\(\[\{] )() | [\)\]\}] /x)
123 method_call_expected = self[1]
124 value_expected = !method_call_expected && self[2]
125 if inline_block_stack
126 case match
127 when '{'
128 inline_block_curly_depth += 1
129 when '}'
130 inline_block_curly_depth -= 1
131 if inline_block_curly_depth == 0 # closing brace of inline block reached
132 state, inline_block_curly_depth, heredocs = inline_block_stack.pop
133 inline_block_stack = nil if inline_block_stack.empty?
134 heredocs = nil if heredocs && heredocs.empty?
135 encoder.text_token match, :inline_delimiter
136 encoder.end_group :inline
137 next
138 end
139 end
140 end
141 encoder.text_token match, :operator
142
143 elsif match = scan(unicode ? /#{patterns::SYMBOL}/uo :
144 /#{patterns::SYMBOL}/o)
145 case delim = match[1]
146 when ?', ?"
147 encoder.begin_group :symbol
148 encoder.text_token ':', :symbol
149 match = delim.chr
150 encoder.text_token match, :delimiter
151 state = self.class::StringState.new :symbol, delim == ?", match
152 else
153 encoder.text_token match, :symbol
154 value_expected = false
155 end
156
157 elsif match = scan(/ ' (?:(?>[^'\\]*) ')? | " (?:(?>[^"\\\#]*) ")? /mx)
158 encoder.begin_group :string
159 if match.size == 1
160 encoder.text_token match, :delimiter
161 state = self.class::StringState.new :string, match == '"', match # important for streaming
162 else
163 encoder.text_token match[0,1], :delimiter
164 encoder.text_token match[1..-2], :content if match.size > 2
165 encoder.text_token match[-1,1], :delimiter
166 encoder.end_group :string
167 value_expected = false
168 end
169
170 elsif match = scan(unicode ? /#{patterns::INSTANCE_VARIABLE}/uo :
171 /#{patterns::INSTANCE_VARIABLE}/o)
172 value_expected = false
173 encoder.text_token match, :instance_variable
174
175 elsif value_expected && match = scan(/\//)
176 encoder.begin_group :regexp
177 encoder.text_token match, :delimiter
178 state = self.class::StringState.new :regexp, true, '/'
179
180 elsif match = scan(value_expected ? /[-+]?#{patterns::NUMERIC}/o : /#{patterns::NUMERIC}/o)
181 if method_call_expected
182 encoder.text_token match, :error
183 method_call_expected = false
184 else
185 encoder.text_token match, self[1] ? :float : :integer # TODO: send :hex/:octal/:binary
186 end
187 value_expected = false
188
189 elsif match = scan(/ [-+!~^\/]=? | [:;] | [*|&]{1,2}=? | >>? /x)
190 value_expected = true
191 encoder.text_token match, :operator
192
193 elsif value_expected && match = scan(/#{patterns::HEREDOC_OPEN}/o)
194 quote = self[3]
195 delim = self[quote ? 4 : 2]
196 kind = patterns::QUOTE_TO_TYPE[quote]
197 encoder.begin_group kind
198 encoder.text_token match, :delimiter
199 encoder.end_group kind
200 heredocs ||= [] # create heredocs if empty
201 heredocs << self.class::StringState.new(kind, quote != "'", delim,
202 self[1] == '-' ? :indented : :linestart)
203 value_expected = false
204
205 elsif value_expected && match = scan(/#{patterns::FANCY_STRING_START}/o)
206 kind = patterns::FANCY_STRING_KIND[self[1]]
207 encoder.begin_group kind
208 state = self.class::StringState.new kind, patterns::FANCY_STRING_INTERPRETED[self[1]], self[2]
209 encoder.text_token match, :delimiter
210
211 elsif value_expected && match = scan(/#{patterns::CHARACTER}/o)
212 value_expected = false
213 encoder.text_token match, :integer
214
215 elsif match = scan(/ %=? | <(?:<|=>?)? | \? /x)
216 value_expected = true
217 encoder.text_token match, :operator
218
219 elsif match = scan(/`/)
220 encoder.begin_group :shell
221 encoder.text_token match, :delimiter
222 state = self.class::StringState.new :shell, true, match
223
224 elsif match = scan(unicode ? /#{patterns::GLOBAL_VARIABLE}/uo :
225 /#{patterns::GLOBAL_VARIABLE}/o)
226 encoder.text_token match, :global_variable
227 value_expected = false
228
229 elsif match = scan(unicode ? /#{patterns::CLASS_VARIABLE}/uo :
230 /#{patterns::CLASS_VARIABLE}/o)
231 encoder.text_token match, :class_variable
232 value_expected = false
233
234 elsif match = scan(/\\\z/)
235 encoder.text_token match, :space
236
237 else
238 if method_call_expected
239 method_call_expected = false
240 next
241 end
242 unless unicode
243 # check for unicode
244 $DEBUG_BEFORE, $DEBUG = $DEBUG, false
245 begin
246 if check(/./mu).size > 1
247 # seems like we should try again with unicode
248 unicode = true
249 end
250 rescue
251 # bad unicode char; use getch
252 ensure
253 $DEBUG = $DEBUG_BEFORE
254 end
255 next if unicode
256 end
257
258 encoder.text_token getch, :error
259
260 end
261
262 if last_state
263 state = last_state
264 last_state = nil
265 end
266
267 elsif state == :def_expected
268 if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
269 /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
270 encoder.text_token match, :method
271 state = :initial
272 else
273 last_state = :dot_expected
274 state = :initial
275 end
276
277 elsif state == :dot_expected
278 if match = scan(/\.|::/)
279 # invalid definition
280 state = :def_expected
281 encoder.text_token match, :operator
282 else
283 state = :initial
284 end
285
286 elsif state == :module_expected
287 if match = scan(/<</)
288 encoder.text_token match, :operator
289 else
290 state = :initial
291 if match = scan(unicode ? / (?:#{patterns::IDENT}::)* #{patterns::IDENT} /oux :
292 / (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox)
293 encoder.text_token match, :class
294 end
295 end
296
297 elsif state == :undef_expected
298 state = :undef_comma_expected
299 if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
300 /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
301 encoder.text_token match, :method
302 elsif match = scan(/#{patterns::SYMBOL}/o)
303 case delim = match[1]
304 when ?', ?"
305 encoder.begin_group :symbol
306 encoder.text_token ':', :symbol
307 match = delim.chr
308 encoder.text_token match, :delimiter
309 state = self.class::StringState.new :symbol, delim == ?", match
310 state.next_state = :undef_comma_expected
311 else
312 encoder.text_token match, :symbol
313 end
314 else
315 state = :initial
316 end
317
318 elsif state == :undef_comma_expected
319 if match = scan(/,/)
320 encoder.text_token match, :operator
321 state = :undef_expected
322 else
323 state = :initial
324 end
325
326 elsif state == :alias_expected
327 match = scan(unicode ? /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/uo :
328 /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o)
329
330 if match
331 encoder.text_token self[1], (self[1][0] == ?: ? :symbol : :method)
332 encoder.text_token self[2], :space
333 encoder.text_token self[3], (self[3][0] == ?: ? :symbol : :method)
334 end
335 state = :initial
336
337 else
338 #:nocov:
339 raise_inspect 'Unknown state: %p' % [state], encoder
340 #:nocov:
341 end
342
343 else # StringState
344
345 match = scan_until(state.pattern) || scan_rest
346 unless match.empty?
347 encoder.text_token match, :content
348 break if eos?
349 end
350
351 if state.heredoc && self[1] # end of heredoc
352 match = getch
353 match << scan_until(/$/) unless eos?
354 encoder.text_token match, :delimiter unless match.empty?
355 encoder.end_group state.type
356 state = state.next_state
357 next
358 end
359
360 case match = getch
361
362 when state.delim
363 if state.paren_depth
364 state.paren_depth -= 1
365 if state.paren_depth > 0
366 encoder.text_token match, :content
367 next
368 end
369 end
370 encoder.text_token match, :delimiter
371 if state.type == :regexp && !eos?
372 match = scan(/#{patterns::REGEXP_MODIFIERS}/o)
373 encoder.text_token match, :modifier unless match.empty?
374 end
375 encoder.end_group state.type
376 value_expected = false
377 state = state.next_state
378
379 when '\\'
380 if state.interpreted
381 if esc = scan(/#{patterns::ESCAPE}/o)
382 encoder.text_token match + esc, :char
383 else
384 encoder.text_token match, :error
385 end
386 else
387 case esc = getch
388 when nil
389 encoder.text_token match, :content
390 when state.delim, '\\'
391 encoder.text_token match + esc, :char
392 else
393 encoder.text_token match + esc, :content
394 end
395 end
396
397 when '#'
398 case peek(1)
399 when '{'
400 inline_block_stack ||= []
401 inline_block_stack << [state, inline_block_curly_depth, heredocs]
402 value_expected = true
403 state = :initial
404 inline_block_curly_depth = 1
405 encoder.begin_group :inline
406 encoder.text_token match + getch, :inline_delimiter
407 when '$', '@'
408 encoder.text_token match, :escape
409 last_state = state
410 state = :initial
411 else
412 #:nocov:
413 raise_inspect 'else-case # reached; #%p not handled' % [peek(1)], encoder
414 #:nocov:
415 end
416
417 when state.opening_paren
418 state.paren_depth += 1
419 encoder.text_token match, :content
420
421 else
422 #:nocov
423 raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], encoder
424 #:nocov:
425
426 end
427
428 end
429
430 end
431
432 # cleaning up
433 if state.is_a? StringState
434 encoder.end_group state.type
435 end
436
437 if options[:keep_state]
438 if state.is_a?(StringState) && state.heredoc
439 (heredocs ||= []).unshift state
440 state = :initial
441 elsif heredocs && heredocs.empty?
442 heredocs = nil
443 end
444 @state = state, heredocs
445 end
446
447 if inline_block_stack
448 until inline_block_stack.empty?
449 state, = *inline_block_stack.pop
450 encoder.end_group :inline
451 encoder.end_group state.type
452 end
453 end
454
455 encoder
456 end
457
458 end
459
460 end
461 end