comparison vendor/plugins/coderay-0.9.2/lib/coderay/scanners/.svn/text-base/ruby.rb.svn-base @ 0:513646585e45

* Import Redmine trunk SVN rev 3859
author Chris Cannam
date Fri, 23 Jul 2010 15:52:44 +0100
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:513646585e45
1 module CodeRay
2 module Scanners
3
4 # This scanner is really complex, since Ruby _is_ a complex language!
5 #
6 # It tries to highlight 100% of all common code,
7 # and 90% of strange codes.
8 #
9 # It is optimized for HTML highlighting, and is not very useful for
10 # parsing or pretty printing.
11 #
12 # For now, I think it's better than the scanners in VIM or Syntax, or
13 # any highlighter I was able to find, except Caleb's RubyLexer.
14 #
15 # I hope it's also better than the rdoc/irb lexer.
16 class Ruby < Scanner
17
18 include Streamable
19
20 register_for :ruby
21 file_extension 'rb'
22
23 helper :patterns
24
25 if not defined? EncodingError
26 EncodingError = Class.new Exception
27 end
28
29 private
30 def scan_tokens tokens, options
31 last_token_dot = false
32 value_expected = true
33 heredocs = nil
34 last_state = nil
35 state = :initial
36 depth = nil
37 inline_block_stack = []
38 unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
39
40 patterns = Patterns # avoid constant lookup
41
42 until eos?
43 match = nil
44 kind = nil
45
46 if state.instance_of? patterns::StringState
47 # {{{
48 match = scan_until(state.pattern) || scan_until(/\z/)
49 tokens << [match, :content] unless match.empty?
50 break if eos?
51
52 if state.heredoc and self[1] # end of heredoc
53 match = getch.to_s
54 match << scan_until(/$/) unless eos?
55 tokens << [match, :delimiter]
56 tokens << [:close, state.type]
57 state = state.next_state
58 next
59 end
60
61 case match = getch
62
63 when state.delim
64 if state.paren
65 state.paren_depth -= 1
66 if state.paren_depth > 0
67 tokens << [match, :nesting_delimiter]
68 next
69 end
70 end
71 tokens << [match, :delimiter]
72 if state.type == :regexp and not eos?
73 modifiers = scan(/#{patterns::REGEXP_MODIFIERS}/ox)
74 tokens << [modifiers, :modifier] unless modifiers.empty?
75 end
76 tokens << [:close, state.type]
77 value_expected = false
78 state = state.next_state
79
80 when '\\'
81 if state.interpreted
82 if esc = scan(/ #{patterns::ESCAPE} /ox)
83 tokens << [match + esc, :char]
84 else
85 tokens << [match, :error]
86 end
87 else
88 case m = getch
89 when state.delim, '\\'
90 tokens << [match + m, :char]
91 when nil
92 tokens << [match, :error]
93 else
94 tokens << [match + m, :content]
95 end
96 end
97
98 when '#'
99 case peek(1)
100 when '{'
101 inline_block_stack << [state, depth, heredocs]
102 value_expected = true
103 state = :initial
104 depth = 1
105 tokens << [:open, :inline]
106 tokens << [match + getch, :inline_delimiter]
107 when '$', '@'
108 tokens << [match, :escape]
109 last_state = state # scan one token as normal code, then return here
110 state = :initial
111 else
112 raise_inspect 'else-case # reached; #%p not handled' % peek(1), tokens
113 end
114
115 when state.paren
116 state.paren_depth += 1
117 tokens << [match, :nesting_delimiter]
118
119 when /#{patterns::REGEXP_SYMBOLS}/ox
120 tokens << [match, :function]
121
122 else
123 raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], tokens
124
125 end
126 next
127 # }}}
128 else
129 # {{{
130 if match = scan(/[ \t\f]+/)
131 kind = :space
132 match << scan(/\s*/) unless eos? || heredocs
133 value_expected = true if match.index(?\n)
134 tokens << [match, kind]
135 next
136
137 elsif match = scan(/\\?\n/)
138 kind = :space
139 if match == "\n"
140 value_expected = true
141 state = :initial if state == :undef_comma_expected
142 end
143 if heredocs
144 unscan # heredoc scanning needs \n at start
145 state = heredocs.shift
146 tokens << [:open, state.type]
147 heredocs = nil if heredocs.empty?
148 next
149 else
150 match << scan(/\s*/) unless eos?
151 end
152 tokens << [match, kind]
153 next
154
155 elsif bol? && match = scan(/\#!.*/)
156 tokens << [match, :doctype]
157 next
158
159 elsif match = scan(/\#.*/) or
160 ( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) )
161 kind = :comment
162 tokens << [match, kind]
163 next
164
165 elsif state == :initial
166
167 # IDENTS #
168 if match = scan(unicode ? /#{patterns::METHOD_NAME}/uo :
169 /#{patterns::METHOD_NAME}/o)
170 if last_token_dot
171 kind = if match[/^[A-Z]/] and not match?(/\(/) then :constant else :ident end
172 else
173 kind = patterns::IDENT_KIND[match]
174 if kind == :ident and match[/^[A-Z]/] and not match[/[!?]$/] and not match?(/\(/)
175 kind = :constant
176 elsif kind == :reserved
177 state = patterns::DEF_NEW_STATE[match]
178 value_expected = :set if patterns::KEYWORDS_EXPECTING_VALUE[match]
179 end
180 end
181 value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/o)
182
183 elsif last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}|\(/o)
184 kind = :ident
185 value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/o)
186
187 # OPERATORS #
188 elsif not last_token_dot and match = scan(/ \.\.\.? | (?:\.|::)() | [,\(\)\[\]\{\}] | ==?=? /x)
189 if match !~ / [.\)\]\}] /x or match =~ /\.\.\.?/
190 value_expected = :set
191 end
192 last_token_dot = :set if self[1]
193 kind = :operator
194 unless inline_block_stack.empty?
195 case match
196 when '{'
197 depth += 1
198 when '}'
199 depth -= 1
200 if depth == 0 # closing brace of inline block reached
201 state, depth, heredocs = inline_block_stack.pop
202 heredocs = nil if heredocs && heredocs.empty?
203 tokens << [match, :inline_delimiter]
204 kind = :inline
205 match = :close
206 end
207 end
208 end
209
210 elsif match = scan(/ ['"] /mx)
211 tokens << [:open, :string]
212 kind = :delimiter
213 state = patterns::StringState.new :string, match == '"', match # important for streaming
214
215 elsif match = scan(/#{patterns::INSTANCE_VARIABLE}/o)
216 kind = :instance_variable
217
218 elsif value_expected and match = scan(/\//)
219 tokens << [:open, :regexp]
220 kind = :delimiter
221 interpreted = true
222 state = patterns::StringState.new :regexp, interpreted, match
223
224 # elsif match = scan(/[-+]?#{patterns::NUMERIC}/o)
225 elsif match = value_expected ? scan(/[-+]?#{patterns::NUMERIC}/o) : scan(/#{patterns::NUMERIC}/o)
226 kind = self[1] ? :float : :integer
227
228 elsif match = scan(/#{patterns::SYMBOL}/o)
229 case delim = match[1]
230 when ?', ?"
231 tokens << [:open, :symbol]
232 tokens << [':', :symbol]
233 match = delim.chr
234 kind = :delimiter
235 state = patterns::StringState.new :symbol, delim == ?", match
236 else
237 kind = :symbol
238 end
239
240 elsif match = scan(/ [-+!~^]=? | [*|&]{1,2}=? | >>? /x)
241 value_expected = :set
242 kind = :operator
243
244 elsif value_expected and match = scan(/#{patterns::HEREDOC_OPEN}/o)
245 indented = self[1] == '-'
246 quote = self[3]
247 delim = self[quote ? 4 : 2]
248 kind = patterns::QUOTE_TO_TYPE[quote]
249 tokens << [:open, kind]
250 tokens << [match, :delimiter]
251 match = :close
252 heredoc = patterns::StringState.new kind, quote != '\'', delim, (indented ? :indented : :linestart )
253 heredocs ||= [] # create heredocs if empty
254 heredocs << heredoc
255
256 elsif value_expected and match = scan(/#{patterns::FANCY_START_CORRECT}/o)
257 kind, interpreted = *patterns::FancyStringType.fetch(self[1]) do
258 raise_inspect 'Unknown fancy string: %%%p' % k, tokens
259 end
260 tokens << [:open, kind]
261 state = patterns::StringState.new kind, interpreted, self[2]
262 kind = :delimiter
263
264 elsif value_expected and match = scan(/#{patterns::CHARACTER}/o)
265 kind = :integer
266
267 elsif match = scan(/ [\/%]=? | <(?:<|=>?)? | [?:;] /x)
268 value_expected = :set
269 kind = :operator
270
271 elsif match = scan(/`/)
272 if last_token_dot
273 kind = :operator
274 else
275 tokens << [:open, :shell]
276 kind = :delimiter
277 state = patterns::StringState.new :shell, true, match
278 end
279
280 elsif match = scan(/#{patterns::GLOBAL_VARIABLE}/o)
281 kind = :global_variable
282
283 elsif match = scan(/#{patterns::CLASS_VARIABLE}/o)
284 kind = :class_variable
285
286 else
287 if !unicode
288 # check for unicode
289 debug, $DEBUG = $DEBUG, false
290 begin
291 if check(/./mu).size > 1
292 # seems like we should try again with unicode
293 unicode = true
294 end
295 rescue
296 # bad unicode char; use getch
297 ensure
298 $DEBUG = debug
299 end
300 next if unicode
301 end
302 kind = :error
303 match = getch
304
305 end
306
307 elsif state == :def_expected
308 state = :initial
309 if scan(/self\./)
310 tokens << ['self', :pre_constant]
311 tokens << ['.', :operator]
312 end
313 if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
314 /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
315 kind = :method
316 else
317 next
318 end
319
320 elsif state == :module_expected
321 if match = scan(/<</)
322 kind = :operator
323 else
324 state = :initial
325 if match = scan(/ (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox)
326 kind = :class
327 else
328 next
329 end
330 end
331
332 elsif state == :undef_expected
333 state = :undef_comma_expected
334 if match = scan(/#{patterns::METHOD_NAME_EX}/o)
335 kind = :method
336 elsif match = scan(/#{patterns::SYMBOL}/o)
337 case delim = match[1]
338 when ?', ?"
339 tokens << [:open, :symbol]
340 tokens << [':', :symbol]
341 match = delim.chr
342 kind = :delimiter
343 state = patterns::StringState.new :symbol, delim == ?", match
344 state.next_state = :undef_comma_expected
345 else
346 kind = :symbol
347 end
348 else
349 state = :initial
350 next
351 end
352
353 elsif state == :alias_expected
354 match = scan(unicode ? /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/uo :
355 /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o)
356
357 if match
358 tokens << [self[1], (self[1][0] == ?: ? :symbol : :method)]
359 tokens << [self[2], :space]
360 tokens << [self[3], (self[3][0] == ?: ? :symbol : :method)]
361 end
362 state = :initial
363 next
364
365 elsif state == :undef_comma_expected
366 if match = scan(/,/)
367 kind = :operator
368 state = :undef_expected
369 else
370 state = :initial
371 next
372 end
373
374 end
375 # }}}
376
377 unless kind == :error
378 value_expected = value_expected == :set
379 last_token_dot = last_token_dot == :set
380 end
381
382 if $CODERAY_DEBUG and not kind
383 raise_inspect 'Error token %p in line %d' %
384 [[match, kind], line], tokens, state
385 end
386 raise_inspect 'Empty token', tokens unless match
387
388 tokens << [match, kind]
389
390 if last_state
391 state = last_state
392 last_state = nil
393 end
394 end
395 end
396
397 inline_block_stack << [state] if state.is_a? patterns::StringState
398 until inline_block_stack.empty?
399 this_block = inline_block_stack.pop
400 tokens << [:close, :inline] if this_block.size > 1
401 state = this_block.first
402 tokens << [:close, state.type]
403 end
404
405 tokens
406 end
407
408 end
409
410 end
411 end
412
413 # vim:fdm=marker