Mercurial > hg > soundsoftware-site
comparison vendor/gems/coderay-0.9.7/lib/coderay/scanners/.svn/text-base/ruby.rb.svn-base @ 210:0579821a129a
Update to Redmine trunk rev 4802
author | Chris Cannam |
---|---|
date | Tue, 08 Feb 2011 13:51:46 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
128:07fa8a8b56a8 | 210:0579821a129a |
---|---|
1 # encoding: utf-8 | |
2 module CodeRay | |
3 module Scanners | |
4 | |
5 # This scanner is really complex, since Ruby _is_ a complex language! | |
6 # | |
7 # It tries to highlight 100% of all common code, | |
8 # and 90% of strange codes. | |
9 # | |
10 # It is optimized for HTML highlighting, and is not very useful for | |
11 # parsing or pretty printing. | |
12 # | |
13 # For now, I think it's better than the scanners in VIM or Syntax, or | |
14 # any highlighter I was able to find, except Caleb's RubyLexer. | |
15 # | |
16 # I hope it's also better than the rdoc/irb lexer. | |
17 class Ruby < Scanner | |
18 | |
19 include Streamable | |
20 | |
21 register_for :ruby | |
22 file_extension 'rb' | |
23 | |
24 helper :patterns | |
25 | |
26 if not defined? EncodingError | |
27 EncodingError = Class.new Exception | |
28 end | |
29 | |
30 private | |
31 def scan_tokens tokens, options | |
32 if string.respond_to?(:encoding) | |
33 unless string.encoding == Encoding::UTF_8 | |
34 self.string = string.encode Encoding::UTF_8, | |
35 :invalid => :replace, :undef => :replace, :replace => '?' | |
36 end | |
37 unicode = false | |
38 else | |
39 unicode = exist?(/[^\x00-\x7f]/) | |
40 end | |
41 | |
42 last_token_dot = false | |
43 value_expected = true | |
44 heredocs = nil | |
45 last_state = nil | |
46 state = :initial | |
47 depth = nil | |
48 inline_block_stack = [] | |
49 | |
50 | |
51 patterns = Patterns # avoid constant lookup | |
52 | |
53 until eos? | |
54 match = nil | |
55 kind = nil | |
56 | |
57 if state.instance_of? patterns::StringState | |
58 # {{{ | |
59 match = scan_until(state.pattern) || scan_until(/\z/) | |
60 tokens << [match, :content] unless match.empty? | |
61 break if eos? | |
62 | |
63 if state.heredoc and self[1] # end of heredoc | |
64 match = getch.to_s | |
65 match << scan_until(/$/) unless eos? | |
66 tokens << [match, :delimiter] | |
67 tokens << [:close, state.type] | |
68 state = state.next_state | |
69 next | |
70 end | |
71 | |
72 case match = getch | |
73 | |
74 when state.delim | |
75 if state.paren | |
76 state.paren_depth -= 1 | |
77 if state.paren_depth > 0 | |
78 tokens << [match, :nesting_delimiter] | |
79 next | |
80 end | |
81 end | |
82 tokens << [match, :delimiter] | |
83 if state.type == :regexp and not eos? | |
84 modifiers = scan(/#{patterns::REGEXP_MODIFIERS}/ox) | |
85 tokens << [modifiers, :modifier] unless modifiers.empty? | |
86 end | |
87 tokens << [:close, state.type] | |
88 value_expected = false | |
89 state = state.next_state | |
90 | |
91 when '\\' | |
92 if state.interpreted | |
93 if esc = scan(/ #{patterns::ESCAPE} /ox) | |
94 tokens << [match + esc, :char] | |
95 else | |
96 tokens << [match, :error] | |
97 end | |
98 else | |
99 case m = getch | |
100 when state.delim, '\\' | |
101 tokens << [match + m, :char] | |
102 when nil | |
103 tokens << [match, :error] | |
104 else | |
105 tokens << [match + m, :content] | |
106 end | |
107 end | |
108 | |
109 when '#' | |
110 case peek(1) | |
111 when '{' | |
112 inline_block_stack << [state, depth, heredocs] | |
113 value_expected = true | |
114 state = :initial | |
115 depth = 1 | |
116 tokens << [:open, :inline] | |
117 tokens << [match + getch, :inline_delimiter] | |
118 when '$', '@' | |
119 tokens << [match, :escape] | |
120 last_state = state # scan one token as normal code, then return here | |
121 state = :initial | |
122 else | |
123 raise_inspect 'else-case # reached; #%p not handled' % peek(1), tokens | |
124 end | |
125 | |
126 when state.paren | |
127 state.paren_depth += 1 | |
128 tokens << [match, :nesting_delimiter] | |
129 | |
130 when /#{patterns::REGEXP_SYMBOLS}/ox | |
131 tokens << [match, :function] | |
132 | |
133 else | |
134 raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], tokens | |
135 | |
136 end | |
137 next | |
138 # }}} | |
139 else | |
140 # {{{ | |
141 if match = scan(/[ \t\f]+/) | |
142 kind = :space | |
143 match << scan(/\s*/) unless eos? || heredocs | |
144 value_expected = true if match.index(?\n) | |
145 tokens << [match, kind] | |
146 next | |
147 | |
148 elsif match = scan(/\\?\n/) | |
149 kind = :space | |
150 if match == "\n" | |
151 value_expected = true | |
152 state = :initial if state == :undef_comma_expected | |
153 end | |
154 if heredocs | |
155 unscan # heredoc scanning needs \n at start | |
156 state = heredocs.shift | |
157 tokens << [:open, state.type] | |
158 heredocs = nil if heredocs.empty? | |
159 next | |
160 else | |
161 match << scan(/\s*/) unless eos? | |
162 end | |
163 tokens << [match, kind] | |
164 next | |
165 | |
166 elsif bol? && match = scan(/\#!.*/) | |
167 tokens << [match, :doctype] | |
168 next | |
169 | |
170 elsif match = scan(/\#.*/) or | |
171 ( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) ) | |
172 kind = :comment | |
173 tokens << [match, kind] | |
174 next | |
175 | |
176 elsif state == :initial | |
177 | |
178 # IDENTS # | |
179 if match = scan(unicode ? /#{patterns::METHOD_NAME}/uo : | |
180 /#{patterns::METHOD_NAME}/o) | |
181 if last_token_dot | |
182 kind = if match[/^[A-Z]/] and not match?(/\(/) then :constant else :ident end | |
183 else | |
184 if value_expected != :expect_colon && scan(/:(?= )/) | |
185 tokens << [match, :key] | |
186 match = ':' | |
187 kind = :operator | |
188 else | |
189 kind = patterns::IDENT_KIND[match] | |
190 if kind == :ident | |
191 if match[/\A[A-Z]/] and not match[/[!?]$/] and not match?(/\(/) | |
192 kind = :constant | |
193 end | |
194 elsif kind == :reserved | |
195 state = patterns::DEF_NEW_STATE[match] | |
196 value_expected = :set if patterns::KEYWORDS_EXPECTING_VALUE[match] | |
197 end | |
198 end | |
199 end | |
200 value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/o) | |
201 | |
202 elsif last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}|\(/o) | |
203 kind = :ident | |
204 value_expected = :set if check(unicode ? /#{patterns::VALUE_FOLLOWS}/uo : | |
205 /#{patterns::VALUE_FOLLOWS}/o) | |
206 | |
207 # OPERATORS # | |
208 elsif not last_token_dot and match = scan(/ \.\.\.? | (?:\.|::)() | [,\(\)\[\]\{\}] | ==?=? /x) | |
209 if match !~ / [.\)\]\}] /x or match =~ /\.\.\.?/ | |
210 value_expected = :set | |
211 end | |
212 last_token_dot = :set if self[1] | |
213 kind = :operator | |
214 unless inline_block_stack.empty? | |
215 case match | |
216 when '{' | |
217 depth += 1 | |
218 when '}' | |
219 depth -= 1 | |
220 if depth == 0 # closing brace of inline block reached | |
221 state, depth, heredocs = inline_block_stack.pop | |
222 heredocs = nil if heredocs && heredocs.empty? | |
223 tokens << [match, :inline_delimiter] | |
224 kind = :inline | |
225 match = :close | |
226 end | |
227 end | |
228 end | |
229 | |
230 elsif match = scan(/ ['"] /mx) | |
231 tokens << [:open, :string] | |
232 kind = :delimiter | |
233 state = patterns::StringState.new :string, match == '"', match # important for streaming | |
234 | |
235 elsif match = scan(unicode ? /#{patterns::INSTANCE_VARIABLE}/uo : | |
236 /#{patterns::INSTANCE_VARIABLE}/o) | |
237 kind = :instance_variable | |
238 | |
239 elsif value_expected and match = scan(/\//) | |
240 tokens << [:open, :regexp] | |
241 kind = :delimiter | |
242 interpreted = true | |
243 state = patterns::StringState.new :regexp, interpreted, match | |
244 | |
245 # elsif match = scan(/[-+]?#{patterns::NUMERIC}/o) | |
246 elsif match = value_expected ? scan(/[-+]?#{patterns::NUMERIC}/o) : scan(/#{patterns::NUMERIC}/o) | |
247 kind = self[1] ? :float : :integer | |
248 | |
249 elsif match = scan(unicode ? /#{patterns::SYMBOL}/uo : | |
250 /#{patterns::SYMBOL}/o) | |
251 case delim = match[1] | |
252 when ?', ?" | |
253 tokens << [:open, :symbol] | |
254 tokens << [':', :symbol] | |
255 match = delim.chr | |
256 kind = :delimiter | |
257 state = patterns::StringState.new :symbol, delim == ?", match | |
258 else | |
259 kind = :symbol | |
260 end | |
261 | |
262 elsif match = scan(/ -[>=]? | [+!~^]=? | [*|&]{1,2}=? | >>? /x) | |
263 value_expected = :set | |
264 kind = :operator | |
265 | |
266 elsif value_expected and match = scan(unicode ? /#{patterns::HEREDOC_OPEN}/uo : | |
267 /#{patterns::HEREDOC_OPEN}/o) | |
268 indented = self[1] == '-' | |
269 quote = self[3] | |
270 delim = self[quote ? 4 : 2] | |
271 kind = patterns::QUOTE_TO_TYPE[quote] | |
272 tokens << [:open, kind] | |
273 tokens << [match, :delimiter] | |
274 match = :close | |
275 heredoc = patterns::StringState.new kind, quote != '\'', delim, (indented ? :indented : :linestart ) | |
276 heredocs ||= [] # create heredocs if empty | |
277 heredocs << heredoc | |
278 | |
279 elsif value_expected and match = scan(/#{patterns::FANCY_START_CORRECT}/o) | |
280 kind, interpreted = *patterns::FancyStringType.fetch(self[1]) do | |
281 raise_inspect 'Unknown fancy string: %%%p' % k, tokens | |
282 end | |
283 tokens << [:open, kind] | |
284 state = patterns::StringState.new kind, interpreted, self[2] | |
285 kind = :delimiter | |
286 | |
287 elsif value_expected and match = scan(unicode ? /#{patterns::CHARACTER}/uo : | |
288 /#{patterns::CHARACTER}/o) | |
289 kind = :integer | |
290 | |
291 elsif match = scan(/ [\/%]=? | <(?:<|=>?)? | [?:;] /x) | |
292 value_expected = :set | |
293 kind = :operator | |
294 | |
295 elsif match = scan(/`/) | |
296 if last_token_dot | |
297 kind = :operator | |
298 else | |
299 tokens << [:open, :shell] | |
300 kind = :delimiter | |
301 state = patterns::StringState.new :shell, true, match | |
302 end | |
303 | |
304 elsif match = scan(unicode ? /#{patterns::GLOBAL_VARIABLE}/uo : | |
305 /#{patterns::GLOBAL_VARIABLE}/o) | |
306 kind = :global_variable | |
307 | |
308 elsif match = scan(unicode ? /#{patterns::CLASS_VARIABLE}/uo : | |
309 /#{patterns::CLASS_VARIABLE}/o) | |
310 kind = :class_variable | |
311 | |
312 else | |
313 if !unicode && !string.respond_to?(:encoding) | |
314 # check for unicode | |
315 debug, $DEBUG = $DEBUG, false | |
316 begin | |
317 if check(/./mu).size > 1 | |
318 # seems like we should try again with unicode | |
319 unicode = true | |
320 end | |
321 rescue | |
322 # bad unicode char; use getch | |
323 ensure | |
324 $DEBUG = debug | |
325 end | |
326 next if unicode | |
327 end | |
328 kind = :error | |
329 match = scan(unicode ? /./mu : /./m) | |
330 | |
331 end | |
332 | |
333 elsif state == :def_expected | |
334 state = :initial | |
335 if scan(/self\./) | |
336 tokens << ['self', :pre_constant] | |
337 tokens << ['.', :operator] | |
338 end | |
339 if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo : | |
340 /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o) | |
341 kind = :method | |
342 else | |
343 next | |
344 end | |
345 | |
346 elsif state == :module_expected | |
347 if match = scan(/<</) | |
348 kind = :operator | |
349 else | |
350 state = :initial | |
351 if match = scan(unicode ? /(?:#{patterns::IDENT}::)*#{patterns::IDENT}/uo : | |
352 /(?:#{patterns::IDENT}::)*#{patterns::IDENT}/o) | |
353 kind = :class | |
354 else | |
355 next | |
356 end | |
357 end | |
358 | |
359 elsif state == :undef_expected | |
360 state = :undef_comma_expected | |
361 if match = scan(unicode ? /#{patterns::METHOD_NAME_EX}/uo : | |
362 /#{patterns::METHOD_NAME_EX}/o) | |
363 kind = :method | |
364 elsif match = scan(unicode ? /#{patterns::SYMBOL}/uo : | |
365 /#{patterns::SYMBOL}/o) | |
366 case delim = match[1] | |
367 when ?', ?" | |
368 tokens << [:open, :symbol] | |
369 tokens << [':', :symbol] | |
370 match = delim.chr | |
371 kind = :delimiter | |
372 state = patterns::StringState.new :symbol, delim == ?", match | |
373 state.next_state = :undef_comma_expected | |
374 else | |
375 kind = :symbol | |
376 end | |
377 else | |
378 state = :initial | |
379 next | |
380 end | |
381 | |
382 elsif state == :alias_expected | |
383 match = scan(unicode ? /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/uo : | |
384 /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o) | |
385 | |
386 if match | |
387 tokens << [self[1], (self[1][0] == ?: ? :symbol : :method)] | |
388 tokens << [self[2], :space] | |
389 tokens << [self[3], (self[3][0] == ?: ? :symbol : :method)] | |
390 end | |
391 state = :initial | |
392 next | |
393 | |
394 elsif state == :undef_comma_expected | |
395 if match = scan(/,/) | |
396 kind = :operator | |
397 state = :undef_expected | |
398 else | |
399 state = :initial | |
400 next | |
401 end | |
402 | |
403 end | |
404 # }}} | |
405 | |
406 unless kind == :error | |
407 if value_expected = value_expected == :set | |
408 value_expected = :expect_colon if match == '?' || match == 'when' | |
409 end | |
410 last_token_dot = last_token_dot == :set | |
411 end | |
412 | |
413 if $CODERAY_DEBUG and not kind | |
414 raise_inspect 'Error token %p in line %d' % | |
415 [[match, kind], line], tokens, state | |
416 end | |
417 raise_inspect 'Empty token', tokens unless match | |
418 | |
419 tokens << [match, kind] | |
420 | |
421 if last_state | |
422 state = last_state | |
423 last_state = nil | |
424 end | |
425 end | |
426 end | |
427 | |
428 inline_block_stack << [state] if state.is_a? patterns::StringState | |
429 until inline_block_stack.empty? | |
430 this_block = inline_block_stack.pop | |
431 tokens << [:close, :inline] if this_block.size > 1 | |
432 state = this_block.first | |
433 tokens << [:close, state.type] | |
434 end | |
435 | |
436 tokens | |
437 end | |
438 | |
439 end | |
440 | |
441 end | |
442 end | |
443 | |
444 # vim:fdm=marker |