Mercurial > hg > soundsoftware-site
comparison vendor/plugins/coderay-0.9.2/lib/coderay/scanners/.svn/text-base/ruby.rb.svn-base @ 0:513646585e45
* Import Redmine trunk SVN rev 3859
author | Chris Cannam |
---|---|
date | Fri, 23 Jul 2010 15:52:44 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:513646585e45 |
---|---|
1 module CodeRay | |
2 module Scanners | |
3 | |
4 # This scanner is really complex, since Ruby _is_ a complex language! | |
5 # | |
6 # It tries to highlight 100% of all common code, | |
7 # and 90% of strange codes. | |
8 # | |
9 # It is optimized for HTML highlighting, and is not very useful for | |
10 # parsing or pretty printing. | |
11 # | |
12 # For now, I think it's better than the scanners in VIM or Syntax, or | |
13 # any highlighter I was able to find, except Caleb's RubyLexer. | |
14 # | |
15 # I hope it's also better than the rdoc/irb lexer. | |
16 class Ruby < Scanner | |
17 | |
18 include Streamable | |
19 | |
20 register_for :ruby | |
21 file_extension 'rb' | |
22 | |
23 helper :patterns | |
24 | |
25 if not defined? EncodingError | |
26 EncodingError = Class.new Exception | |
27 end | |
28 | |
29 private | |
30 def scan_tokens tokens, options | |
31 last_token_dot = false | |
32 value_expected = true | |
33 heredocs = nil | |
34 last_state = nil | |
35 state = :initial | |
36 depth = nil | |
37 inline_block_stack = [] | |
38 unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8' | |
39 | |
40 patterns = Patterns # avoid constant lookup | |
41 | |
42 until eos? | |
43 match = nil | |
44 kind = nil | |
45 | |
46 if state.instance_of? patterns::StringState | |
47 # {{{ | |
48 match = scan_until(state.pattern) || scan_until(/\z/) | |
49 tokens << [match, :content] unless match.empty? | |
50 break if eos? | |
51 | |
52 if state.heredoc and self[1] # end of heredoc | |
53 match = getch.to_s | |
54 match << scan_until(/$/) unless eos? | |
55 tokens << [match, :delimiter] | |
56 tokens << [:close, state.type] | |
57 state = state.next_state | |
58 next | |
59 end | |
60 | |
61 case match = getch | |
62 | |
63 when state.delim | |
64 if state.paren | |
65 state.paren_depth -= 1 | |
66 if state.paren_depth > 0 | |
67 tokens << [match, :nesting_delimiter] | |
68 next | |
69 end | |
70 end | |
71 tokens << [match, :delimiter] | |
72 if state.type == :regexp and not eos? | |
73 modifiers = scan(/#{patterns::REGEXP_MODIFIERS}/ox) | |
74 tokens << [modifiers, :modifier] unless modifiers.empty? | |
75 end | |
76 tokens << [:close, state.type] | |
77 value_expected = false | |
78 state = state.next_state | |
79 | |
80 when '\\' | |
81 if state.interpreted | |
82 if esc = scan(/ #{patterns::ESCAPE} /ox) | |
83 tokens << [match + esc, :char] | |
84 else | |
85 tokens << [match, :error] | |
86 end | |
87 else | |
88 case m = getch | |
89 when state.delim, '\\' | |
90 tokens << [match + m, :char] | |
91 when nil | |
92 tokens << [match, :error] | |
93 else | |
94 tokens << [match + m, :content] | |
95 end | |
96 end | |
97 | |
98 when '#' | |
99 case peek(1) | |
100 when '{' | |
101 inline_block_stack << [state, depth, heredocs] | |
102 value_expected = true | |
103 state = :initial | |
104 depth = 1 | |
105 tokens << [:open, :inline] | |
106 tokens << [match + getch, :inline_delimiter] | |
107 when '$', '@' | |
108 tokens << [match, :escape] | |
109 last_state = state # scan one token as normal code, then return here | |
110 state = :initial | |
111 else | |
112 raise_inspect 'else-case # reached; #%p not handled' % peek(1), tokens | |
113 end | |
114 | |
115 when state.paren | |
116 state.paren_depth += 1 | |
117 tokens << [match, :nesting_delimiter] | |
118 | |
119 when /#{patterns::REGEXP_SYMBOLS}/ox | |
120 tokens << [match, :function] | |
121 | |
122 else | |
123 raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], tokens | |
124 | |
125 end | |
126 next | |
127 # }}} | |
128 else | |
129 # {{{ | |
130 if match = scan(/[ \t\f]+/) | |
131 kind = :space | |
132 match << scan(/\s*/) unless eos? || heredocs | |
133 value_expected = true if match.index(?\n) | |
134 tokens << [match, kind] | |
135 next | |
136 | |
137 elsif match = scan(/\\?\n/) | |
138 kind = :space | |
139 if match == "\n" | |
140 value_expected = true | |
141 state = :initial if state == :undef_comma_expected | |
142 end | |
143 if heredocs | |
144 unscan # heredoc scanning needs \n at start | |
145 state = heredocs.shift | |
146 tokens << [:open, state.type] | |
147 heredocs = nil if heredocs.empty? | |
148 next | |
149 else | |
150 match << scan(/\s*/) unless eos? | |
151 end | |
152 tokens << [match, kind] | |
153 next | |
154 | |
155 elsif bol? && match = scan(/\#!.*/) | |
156 tokens << [match, :doctype] | |
157 next | |
158 | |
159 elsif match = scan(/\#.*/) or | |
160 ( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) ) | |
161 kind = :comment | |
162 tokens << [match, kind] | |
163 next | |
164 | |
165 elsif state == :initial | |
166 | |
167 # IDENTS # | |
168 if match = scan(unicode ? /#{patterns::METHOD_NAME}/uo : | |
169 /#{patterns::METHOD_NAME}/o) | |
170 if last_token_dot | |
171 kind = if match[/^[A-Z]/] and not match?(/\(/) then :constant else :ident end | |
172 else | |
173 kind = patterns::IDENT_KIND[match] | |
174 if kind == :ident and match[/^[A-Z]/] and not match[/[!?]$/] and not match?(/\(/) | |
175 kind = :constant | |
176 elsif kind == :reserved | |
177 state = patterns::DEF_NEW_STATE[match] | |
178 value_expected = :set if patterns::KEYWORDS_EXPECTING_VALUE[match] | |
179 end | |
180 end | |
181 value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/o) | |
182 | |
183 elsif last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}|\(/o) | |
184 kind = :ident | |
185 value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/o) | |
186 | |
187 # OPERATORS # | |
188 elsif not last_token_dot and match = scan(/ \.\.\.? | (?:\.|::)() | [,\(\)\[\]\{\}] | ==?=? /x) | |
189 if match !~ / [.\)\]\}] /x or match =~ /\.\.\.?/ | |
190 value_expected = :set | |
191 end | |
192 last_token_dot = :set if self[1] | |
193 kind = :operator | |
194 unless inline_block_stack.empty? | |
195 case match | |
196 when '{' | |
197 depth += 1 | |
198 when '}' | |
199 depth -= 1 | |
200 if depth == 0 # closing brace of inline block reached | |
201 state, depth, heredocs = inline_block_stack.pop | |
202 heredocs = nil if heredocs && heredocs.empty? | |
203 tokens << [match, :inline_delimiter] | |
204 kind = :inline | |
205 match = :close | |
206 end | |
207 end | |
208 end | |
209 | |
210 elsif match = scan(/ ['"] /mx) | |
211 tokens << [:open, :string] | |
212 kind = :delimiter | |
213 state = patterns::StringState.new :string, match == '"', match # important for streaming | |
214 | |
215 elsif match = scan(/#{patterns::INSTANCE_VARIABLE}/o) | |
216 kind = :instance_variable | |
217 | |
218 elsif value_expected and match = scan(/\//) | |
219 tokens << [:open, :regexp] | |
220 kind = :delimiter | |
221 interpreted = true | |
222 state = patterns::StringState.new :regexp, interpreted, match | |
223 | |
224 # elsif match = scan(/[-+]?#{patterns::NUMERIC}/o) | |
225 elsif match = value_expected ? scan(/[-+]?#{patterns::NUMERIC}/o) : scan(/#{patterns::NUMERIC}/o) | |
226 kind = self[1] ? :float : :integer | |
227 | |
228 elsif match = scan(/#{patterns::SYMBOL}/o) | |
229 case delim = match[1] | |
230 when ?', ?" | |
231 tokens << [:open, :symbol] | |
232 tokens << [':', :symbol] | |
233 match = delim.chr | |
234 kind = :delimiter | |
235 state = patterns::StringState.new :symbol, delim == ?", match | |
236 else | |
237 kind = :symbol | |
238 end | |
239 | |
240 elsif match = scan(/ [-+!~^]=? | [*|&]{1,2}=? | >>? /x) | |
241 value_expected = :set | |
242 kind = :operator | |
243 | |
244 elsif value_expected and match = scan(/#{patterns::HEREDOC_OPEN}/o) | |
245 indented = self[1] == '-' | |
246 quote = self[3] | |
247 delim = self[quote ? 4 : 2] | |
248 kind = patterns::QUOTE_TO_TYPE[quote] | |
249 tokens << [:open, kind] | |
250 tokens << [match, :delimiter] | |
251 match = :close | |
252 heredoc = patterns::StringState.new kind, quote != '\'', delim, (indented ? :indented : :linestart ) | |
253 heredocs ||= [] # create heredocs if empty | |
254 heredocs << heredoc | |
255 | |
256 elsif value_expected and match = scan(/#{patterns::FANCY_START_CORRECT}/o) | |
257 kind, interpreted = *patterns::FancyStringType.fetch(self[1]) do | |
258 raise_inspect 'Unknown fancy string: %%%p' % k, tokens | |
259 end | |
260 tokens << [:open, kind] | |
261 state = patterns::StringState.new kind, interpreted, self[2] | |
262 kind = :delimiter | |
263 | |
264 elsif value_expected and match = scan(/#{patterns::CHARACTER}/o) | |
265 kind = :integer | |
266 | |
267 elsif match = scan(/ [\/%]=? | <(?:<|=>?)? | [?:;] /x) | |
268 value_expected = :set | |
269 kind = :operator | |
270 | |
271 elsif match = scan(/`/) | |
272 if last_token_dot | |
273 kind = :operator | |
274 else | |
275 tokens << [:open, :shell] | |
276 kind = :delimiter | |
277 state = patterns::StringState.new :shell, true, match | |
278 end | |
279 | |
280 elsif match = scan(/#{patterns::GLOBAL_VARIABLE}/o) | |
281 kind = :global_variable | |
282 | |
283 elsif match = scan(/#{patterns::CLASS_VARIABLE}/o) | |
284 kind = :class_variable | |
285 | |
286 else | |
287 if !unicode | |
288 # check for unicode | |
289 debug, $DEBUG = $DEBUG, false | |
290 begin | |
291 if check(/./mu).size > 1 | |
292 # seems like we should try again with unicode | |
293 unicode = true | |
294 end | |
295 rescue | |
296 # bad unicode char; use getch | |
297 ensure | |
298 $DEBUG = debug | |
299 end | |
300 next if unicode | |
301 end | |
302 kind = :error | |
303 match = getch | |
304 | |
305 end | |
306 | |
307 elsif state == :def_expected | |
308 state = :initial | |
309 if scan(/self\./) | |
310 tokens << ['self', :pre_constant] | |
311 tokens << ['.', :operator] | |
312 end | |
313 if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo : | |
314 /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o) | |
315 kind = :method | |
316 else | |
317 next | |
318 end | |
319 | |
320 elsif state == :module_expected | |
321 if match = scan(/<</) | |
322 kind = :operator | |
323 else | |
324 state = :initial | |
325 if match = scan(/ (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox) | |
326 kind = :class | |
327 else | |
328 next | |
329 end | |
330 end | |
331 | |
332 elsif state == :undef_expected | |
333 state = :undef_comma_expected | |
334 if match = scan(/#{patterns::METHOD_NAME_EX}/o) | |
335 kind = :method | |
336 elsif match = scan(/#{patterns::SYMBOL}/o) | |
337 case delim = match[1] | |
338 when ?', ?" | |
339 tokens << [:open, :symbol] | |
340 tokens << [':', :symbol] | |
341 match = delim.chr | |
342 kind = :delimiter | |
343 state = patterns::StringState.new :symbol, delim == ?", match | |
344 state.next_state = :undef_comma_expected | |
345 else | |
346 kind = :symbol | |
347 end | |
348 else | |
349 state = :initial | |
350 next | |
351 end | |
352 | |
353 elsif state == :alias_expected | |
354 match = scan(unicode ? /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/uo : | |
355 /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o) | |
356 | |
357 if match | |
358 tokens << [self[1], (self[1][0] == ?: ? :symbol : :method)] | |
359 tokens << [self[2], :space] | |
360 tokens << [self[3], (self[3][0] == ?: ? :symbol : :method)] | |
361 end | |
362 state = :initial | |
363 next | |
364 | |
365 elsif state == :undef_comma_expected | |
366 if match = scan(/,/) | |
367 kind = :operator | |
368 state = :undef_expected | |
369 else | |
370 state = :initial | |
371 next | |
372 end | |
373 | |
374 end | |
375 # }}} | |
376 | |
377 unless kind == :error | |
378 value_expected = value_expected == :set | |
379 last_token_dot = last_token_dot == :set | |
380 end | |
381 | |
382 if $CODERAY_DEBUG and not kind | |
383 raise_inspect 'Error token %p in line %d' % | |
384 [[match, kind], line], tokens, state | |
385 end | |
386 raise_inspect 'Empty token', tokens unless match | |
387 | |
388 tokens << [match, kind] | |
389 | |
390 if last_state | |
391 state = last_state | |
392 last_state = nil | |
393 end | |
394 end | |
395 end | |
396 | |
397 inline_block_stack << [state] if state.is_a? patterns::StringState | |
398 until inline_block_stack.empty? | |
399 this_block = inline_block_stack.pop | |
400 tokens << [:close, :inline] if this_block.size > 1 | |
401 state = this_block.first | |
402 tokens << [:close, state.type] | |
403 end | |
404 | |
405 tokens | |
406 end | |
407 | |
408 end | |
409 | |
410 end | |
411 end | |
412 | |
413 # vim:fdm=marker |