comparison vendor/gems/coderay-0.9.7/lib/coderay/scanners/.svn/text-base/ruby.rb.svn-base @ 210:0579821a129a

Update to Redmine trunk rev 4802
author Chris Cannam
date Tue, 08 Feb 2011 13:51:46 +0000
parents
children
comparison
equal deleted inserted replaced
128:07fa8a8b56a8 210:0579821a129a
1 # encoding: utf-8
2 module CodeRay
3 module Scanners
4
5 # This scanner is really complex, since Ruby _is_ a complex language!
6 #
7 # It tries to highlight 100% of all common code,
8 # and 90% of strange codes.
9 #
10 # It is optimized for HTML highlighting, and is not very useful for
11 # parsing or pretty printing.
12 #
13 # For now, I think it's better than the scanners in VIM or Syntax, or
14 # any highlighter I was able to find, except Caleb's RubyLexer.
15 #
16 # I hope it's also better than the rdoc/irb lexer.
17 class Ruby < Scanner
18
19 include Streamable
20
21 register_for :ruby
22 file_extension 'rb'
23
24 helper :patterns
25
26 if not defined? EncodingError
27 EncodingError = Class.new Exception
28 end
29
30 private
31 def scan_tokens tokens, options
32 if string.respond_to?(:encoding)
33 unless string.encoding == Encoding::UTF_8
34 self.string = string.encode Encoding::UTF_8,
35 :invalid => :replace, :undef => :replace, :replace => '?'
36 end
37 unicode = false
38 else
39 unicode = exist?(/[^\x00-\x7f]/)
40 end
41
42 last_token_dot = false
43 value_expected = true
44 heredocs = nil
45 last_state = nil
46 state = :initial
47 depth = nil
48 inline_block_stack = []
49
50
51 patterns = Patterns # avoid constant lookup
52
53 until eos?
54 match = nil
55 kind = nil
56
57 if state.instance_of? patterns::StringState
58 # {{{
59 match = scan_until(state.pattern) || scan_until(/\z/)
60 tokens << [match, :content] unless match.empty?
61 break if eos?
62
63 if state.heredoc and self[1] # end of heredoc
64 match = getch.to_s
65 match << scan_until(/$/) unless eos?
66 tokens << [match, :delimiter]
67 tokens << [:close, state.type]
68 state = state.next_state
69 next
70 end
71
72 case match = getch
73
74 when state.delim
75 if state.paren
76 state.paren_depth -= 1
77 if state.paren_depth > 0
78 tokens << [match, :nesting_delimiter]
79 next
80 end
81 end
82 tokens << [match, :delimiter]
83 if state.type == :regexp and not eos?
84 modifiers = scan(/#{patterns::REGEXP_MODIFIERS}/ox)
85 tokens << [modifiers, :modifier] unless modifiers.empty?
86 end
87 tokens << [:close, state.type]
88 value_expected = false
89 state = state.next_state
90
91 when '\\'
92 if state.interpreted
93 if esc = scan(/ #{patterns::ESCAPE} /ox)
94 tokens << [match + esc, :char]
95 else
96 tokens << [match, :error]
97 end
98 else
99 case m = getch
100 when state.delim, '\\'
101 tokens << [match + m, :char]
102 when nil
103 tokens << [match, :error]
104 else
105 tokens << [match + m, :content]
106 end
107 end
108
109 when '#'
110 case peek(1)
111 when '{'
112 inline_block_stack << [state, depth, heredocs]
113 value_expected = true
114 state = :initial
115 depth = 1
116 tokens << [:open, :inline]
117 tokens << [match + getch, :inline_delimiter]
118 when '$', '@'
119 tokens << [match, :escape]
120 last_state = state # scan one token as normal code, then return here
121 state = :initial
122 else
123 raise_inspect 'else-case # reached; #%p not handled' % peek(1), tokens
124 end
125
126 when state.paren
127 state.paren_depth += 1
128 tokens << [match, :nesting_delimiter]
129
130 when /#{patterns::REGEXP_SYMBOLS}/ox
131 tokens << [match, :function]
132
133 else
134 raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], tokens
135
136 end
137 next
138 # }}}
139 else
140 # {{{
141 if match = scan(/[ \t\f]+/)
142 kind = :space
143 match << scan(/\s*/) unless eos? || heredocs
144 value_expected = true if match.index(?\n)
145 tokens << [match, kind]
146 next
147
148 elsif match = scan(/\\?\n/)
149 kind = :space
150 if match == "\n"
151 value_expected = true
152 state = :initial if state == :undef_comma_expected
153 end
154 if heredocs
155 unscan # heredoc scanning needs \n at start
156 state = heredocs.shift
157 tokens << [:open, state.type]
158 heredocs = nil if heredocs.empty?
159 next
160 else
161 match << scan(/\s*/) unless eos?
162 end
163 tokens << [match, kind]
164 next
165
166 elsif bol? && match = scan(/\#!.*/)
167 tokens << [match, :doctype]
168 next
169
170 elsif match = scan(/\#.*/) or
171 ( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) )
172 kind = :comment
173 tokens << [match, kind]
174 next
175
176 elsif state == :initial
177
178 # IDENTS #
179 if match = scan(unicode ? /#{patterns::METHOD_NAME}/uo :
180 /#{patterns::METHOD_NAME}/o)
181 if last_token_dot
182 kind = if match[/^[A-Z]/] and not match?(/\(/) then :constant else :ident end
183 else
184 if value_expected != :expect_colon && scan(/:(?= )/)
185 tokens << [match, :key]
186 match = ':'
187 kind = :operator
188 else
189 kind = patterns::IDENT_KIND[match]
190 if kind == :ident
191 if match[/\A[A-Z]/] and not match[/[!?]$/] and not match?(/\(/)
192 kind = :constant
193 end
194 elsif kind == :reserved
195 state = patterns::DEF_NEW_STATE[match]
196 value_expected = :set if patterns::KEYWORDS_EXPECTING_VALUE[match]
197 end
198 end
199 end
200 value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/o)
201
202 elsif last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}|\(/o)
203 kind = :ident
204 value_expected = :set if check(unicode ? /#{patterns::VALUE_FOLLOWS}/uo :
205 /#{patterns::VALUE_FOLLOWS}/o)
206
207 # OPERATORS #
208 elsif not last_token_dot and match = scan(/ \.\.\.? | (?:\.|::)() | [,\(\)\[\]\{\}] | ==?=? /x)
209 if match !~ / [.\)\]\}] /x or match =~ /\.\.\.?/
210 value_expected = :set
211 end
212 last_token_dot = :set if self[1]
213 kind = :operator
214 unless inline_block_stack.empty?
215 case match
216 when '{'
217 depth += 1
218 when '}'
219 depth -= 1
220 if depth == 0 # closing brace of inline block reached
221 state, depth, heredocs = inline_block_stack.pop
222 heredocs = nil if heredocs && heredocs.empty?
223 tokens << [match, :inline_delimiter]
224 kind = :inline
225 match = :close
226 end
227 end
228 end
229
230 elsif match = scan(/ ['"] /mx)
231 tokens << [:open, :string]
232 kind = :delimiter
233 state = patterns::StringState.new :string, match == '"', match # important for streaming
234
235 elsif match = scan(unicode ? /#{patterns::INSTANCE_VARIABLE}/uo :
236 /#{patterns::INSTANCE_VARIABLE}/o)
237 kind = :instance_variable
238
239 elsif value_expected and match = scan(/\//)
240 tokens << [:open, :regexp]
241 kind = :delimiter
242 interpreted = true
243 state = patterns::StringState.new :regexp, interpreted, match
244
245 # elsif match = scan(/[-+]?#{patterns::NUMERIC}/o)
246 elsif match = value_expected ? scan(/[-+]?#{patterns::NUMERIC}/o) : scan(/#{patterns::NUMERIC}/o)
247 kind = self[1] ? :float : :integer
248
249 elsif match = scan(unicode ? /#{patterns::SYMBOL}/uo :
250 /#{patterns::SYMBOL}/o)
251 case delim = match[1]
252 when ?', ?"
253 tokens << [:open, :symbol]
254 tokens << [':', :symbol]
255 match = delim.chr
256 kind = :delimiter
257 state = patterns::StringState.new :symbol, delim == ?", match
258 else
259 kind = :symbol
260 end
261
262 elsif match = scan(/ -[>=]? | [+!~^]=? | [*|&]{1,2}=? | >>? /x)
263 value_expected = :set
264 kind = :operator
265
266 elsif value_expected and match = scan(unicode ? /#{patterns::HEREDOC_OPEN}/uo :
267 /#{patterns::HEREDOC_OPEN}/o)
268 indented = self[1] == '-'
269 quote = self[3]
270 delim = self[quote ? 4 : 2]
271 kind = patterns::QUOTE_TO_TYPE[quote]
272 tokens << [:open, kind]
273 tokens << [match, :delimiter]
274 match = :close
275 heredoc = patterns::StringState.new kind, quote != '\'', delim, (indented ? :indented : :linestart )
276 heredocs ||= [] # create heredocs if empty
277 heredocs << heredoc
278
279 elsif value_expected and match = scan(/#{patterns::FANCY_START_CORRECT}/o)
280 kind, interpreted = *patterns::FancyStringType.fetch(self[1]) do
281 raise_inspect 'Unknown fancy string: %%%p' % k, tokens
282 end
283 tokens << [:open, kind]
284 state = patterns::StringState.new kind, interpreted, self[2]
285 kind = :delimiter
286
287 elsif value_expected and match = scan(unicode ? /#{patterns::CHARACTER}/uo :
288 /#{patterns::CHARACTER}/o)
289 kind = :integer
290
291 elsif match = scan(/ [\/%]=? | <(?:<|=>?)? | [?:;] /x)
292 value_expected = :set
293 kind = :operator
294
295 elsif match = scan(/`/)
296 if last_token_dot
297 kind = :operator
298 else
299 tokens << [:open, :shell]
300 kind = :delimiter
301 state = patterns::StringState.new :shell, true, match
302 end
303
304 elsif match = scan(unicode ? /#{patterns::GLOBAL_VARIABLE}/uo :
305 /#{patterns::GLOBAL_VARIABLE}/o)
306 kind = :global_variable
307
308 elsif match = scan(unicode ? /#{patterns::CLASS_VARIABLE}/uo :
309 /#{patterns::CLASS_VARIABLE}/o)
310 kind = :class_variable
311
312 else
313 if !unicode && !string.respond_to?(:encoding)
314 # check for unicode
315 debug, $DEBUG = $DEBUG, false
316 begin
317 if check(/./mu).size > 1
318 # seems like we should try again with unicode
319 unicode = true
320 end
321 rescue
322 # bad unicode char; use getch
323 ensure
324 $DEBUG = debug
325 end
326 next if unicode
327 end
328 kind = :error
329 match = scan(unicode ? /./mu : /./m)
330
331 end
332
333 elsif state == :def_expected
334 state = :initial
335 if scan(/self\./)
336 tokens << ['self', :pre_constant]
337 tokens << ['.', :operator]
338 end
339 if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
340 /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
341 kind = :method
342 else
343 next
344 end
345
346 elsif state == :module_expected
347 if match = scan(/<</)
348 kind = :operator
349 else
350 state = :initial
351 if match = scan(unicode ? /(?:#{patterns::IDENT}::)*#{patterns::IDENT}/uo :
352 /(?:#{patterns::IDENT}::)*#{patterns::IDENT}/o)
353 kind = :class
354 else
355 next
356 end
357 end
358
359 elsif state == :undef_expected
360 state = :undef_comma_expected
361 if match = scan(unicode ? /#{patterns::METHOD_NAME_EX}/uo :
362 /#{patterns::METHOD_NAME_EX}/o)
363 kind = :method
364 elsif match = scan(unicode ? /#{patterns::SYMBOL}/uo :
365 /#{patterns::SYMBOL}/o)
366 case delim = match[1]
367 when ?', ?"
368 tokens << [:open, :symbol]
369 tokens << [':', :symbol]
370 match = delim.chr
371 kind = :delimiter
372 state = patterns::StringState.new :symbol, delim == ?", match
373 state.next_state = :undef_comma_expected
374 else
375 kind = :symbol
376 end
377 else
378 state = :initial
379 next
380 end
381
382 elsif state == :alias_expected
383 match = scan(unicode ? /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/uo :
384 /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o)
385
386 if match
387 tokens << [self[1], (self[1][0] == ?: ? :symbol : :method)]
388 tokens << [self[2], :space]
389 tokens << [self[3], (self[3][0] == ?: ? :symbol : :method)]
390 end
391 state = :initial
392 next
393
394 elsif state == :undef_comma_expected
395 if match = scan(/,/)
396 kind = :operator
397 state = :undef_expected
398 else
399 state = :initial
400 next
401 end
402
403 end
404 # }}}
405
406 unless kind == :error
407 if value_expected = value_expected == :set
408 value_expected = :expect_colon if match == '?' || match == 'when'
409 end
410 last_token_dot = last_token_dot == :set
411 end
412
413 if $CODERAY_DEBUG and not kind
414 raise_inspect 'Error token %p in line %d' %
415 [[match, kind], line], tokens, state
416 end
417 raise_inspect 'Empty token', tokens unless match
418
419 tokens << [match, kind]
420
421 if last_state
422 state = last_state
423 last_state = nil
424 end
425 end
426 end
427
428 inline_block_stack << [state] if state.is_a? patterns::StringState
429 until inline_block_stack.empty?
430 this_block = inline_block_stack.pop
431 tokens << [:close, :inline] if this_block.size > 1
432 state = this_block.first
433 tokens << [:close, state.type]
434 end
435
436 tokens
437 end
438
439 end
440
441 end
442 end
443
444 # vim:fdm=marker