Mercurial > hg > soundsoftware-site
comparison vendor/gems/coderay-1.0.0/lib/coderay/scanners/ruby.rb @ 909:cbb26bc654de redmine-1.3
Update to Redmine 1.3-stable branch (Redmine SVN rev 8964)
author | Chris Cannam |
---|---|
date | Fri, 24 Feb 2012 19:09:32 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
908:c6c2cbd0afee | 909:cbb26bc654de |
---|---|
1 module CodeRay | |
2 module Scanners | |
3 | |
4 # This scanner is really complex, since Ruby _is_ a complex language! | |
5 # | |
6 # It tries to highlight 100% of all common code, | |
7 # and 90% of strange codes. | |
8 # | |
9 # It is optimized for HTML highlighting, and is not very useful for | |
10 # parsing or pretty printing. | |
11 class Ruby < Scanner | |
12 | |
13 register_for :ruby | |
14 file_extension 'rb' | |
15 | |
16 autoload :Patterns, 'coderay/scanners/ruby/patterns' | |
17 autoload :StringState, 'coderay/scanners/ruby/string_state' | |
18 | |
19 def interpreted_string_state | |
20 StringState.new :string, true, '"' | |
21 end | |
22 | |
23 protected | |
24 | |
25 def setup | |
26 @state = :initial | |
27 end | |
28 | |
29 def scan_tokens encoder, options | |
30 state, heredocs = options[:state] || @state | |
31 heredocs = heredocs.dup if heredocs.is_a?(Array) | |
32 | |
33 if state && state.instance_of?(StringState) | |
34 encoder.begin_group state.type | |
35 end | |
36 | |
37 last_state = nil | |
38 | |
39 method_call_expected = false | |
40 value_expected = true | |
41 | |
42 inline_block_stack = nil | |
43 inline_block_curly_depth = 0 | |
44 | |
45 if heredocs | |
46 state = heredocs.shift | |
47 encoder.begin_group state.type | |
48 heredocs = nil if heredocs.empty? | |
49 end | |
50 | |
51 # def_object_stack = nil | |
52 # def_object_paren_depth = 0 | |
53 | |
54 patterns = Patterns # avoid constant lookup | |
55 | |
56 unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8' | |
57 | |
58 until eos? | |
59 | |
60 if state.instance_of? ::Symbol | |
61 | |
62 if match = scan(/[ \t\f\v]+/) | |
63 encoder.text_token match, :space | |
64 | |
65 elsif match = scan(/\n/) | |
66 if heredocs | |
67 unscan # heredoc scanning needs \n at start | |
68 state = heredocs.shift | |
69 encoder.begin_group state.type | |
70 heredocs = nil if heredocs.empty? | |
71 else | |
72 state = :initial if state == :undef_comma_expected | |
73 encoder.text_token match, :space | |
74 value_expected = true | |
75 end | |
76 | |
77 elsif match = scan(bol? ? / \#(!)?.* | #{patterns::RUBYDOC_OR_DATA} /ox : /\#.*/) | |
78 encoder.text_token match, self[1] ? :doctype : :comment | |
79 | |
80 elsif match = scan(/\\\n/) | |
81 if heredocs | |
82 unscan # heredoc scanning needs \n at start | |
83 encoder.text_token scan(/\\/), :space | |
84 state = heredocs.shift | |
85 encoder.begin_group state.type | |
86 heredocs = nil if heredocs.empty? | |
87 else | |
88 encoder.text_token match, :space | |
89 end | |
90 | |
91 elsif state == :initial | |
92 | |
93 # IDENTS # | |
94 if !method_call_expected && | |
95 match = scan(unicode ? /#{patterns::METHOD_NAME}/uo : | |
96 /#{patterns::METHOD_NAME}/o) | |
97 value_expected = false | |
98 kind = patterns::IDENT_KIND[match] | |
99 if kind == :ident | |
100 if match[/\A[A-Z]/] && !(match[/[!?]$/] || match?(/\(/)) | |
101 kind = :constant | |
102 end | |
103 elsif kind == :keyword | |
104 state = patterns::KEYWORD_NEW_STATE[match] | |
105 value_expected = true if patterns::KEYWORDS_EXPECTING_VALUE[match] | |
106 end | |
107 value_expected = true if !value_expected && check(/#{patterns::VALUE_FOLLOWS}/o) | |
108 encoder.text_token match, kind | |
109 | |
110 elsif method_call_expected && | |
111 match = scan(unicode ? /#{patterns::METHOD_AFTER_DOT}/uo : | |
112 /#{patterns::METHOD_AFTER_DOT}/o) | |
113 if method_call_expected == '::' && match[/\A[A-Z]/] && !match?(/\(/) | |
114 encoder.text_token match, :constant | |
115 else | |
116 encoder.text_token match, :ident | |
117 end | |
118 method_call_expected = false | |
119 value_expected = check(/#{patterns::VALUE_FOLLOWS}/o) | |
120 | |
121 # OPERATORS # | |
122 elsif !method_call_expected && match = scan(/ (\.(?!\.)|::) | (?: \.\.\.? | ==?=? | [,\(\[\{] )() | [\)\]\}] /x) | |
123 method_call_expected = self[1] | |
124 value_expected = !method_call_expected && self[2] | |
125 if inline_block_stack | |
126 case match | |
127 when '{' | |
128 inline_block_curly_depth += 1 | |
129 when '}' | |
130 inline_block_curly_depth -= 1 | |
131 if inline_block_curly_depth == 0 # closing brace of inline block reached | |
132 state, inline_block_curly_depth, heredocs = inline_block_stack.pop | |
133 inline_block_stack = nil if inline_block_stack.empty? | |
134 heredocs = nil if heredocs && heredocs.empty? | |
135 encoder.text_token match, :inline_delimiter | |
136 encoder.end_group :inline | |
137 next | |
138 end | |
139 end | |
140 end | |
141 encoder.text_token match, :operator | |
142 | |
143 elsif match = scan(unicode ? /#{patterns::SYMBOL}/uo : | |
144 /#{patterns::SYMBOL}/o) | |
145 case delim = match[1] | |
146 when ?', ?" | |
147 encoder.begin_group :symbol | |
148 encoder.text_token ':', :symbol | |
149 match = delim.chr | |
150 encoder.text_token match, :delimiter | |
151 state = self.class::StringState.new :symbol, delim == ?", match | |
152 else | |
153 encoder.text_token match, :symbol | |
154 value_expected = false | |
155 end | |
156 | |
157 elsif match = scan(/ ' (?:(?>[^'\\]*) ')? | " (?:(?>[^"\\\#]*) ")? /mx) | |
158 encoder.begin_group :string | |
159 if match.size == 1 | |
160 encoder.text_token match, :delimiter | |
161 state = self.class::StringState.new :string, match == '"', match # important for streaming | |
162 else | |
163 encoder.text_token match[0,1], :delimiter | |
164 encoder.text_token match[1..-2], :content if match.size > 2 | |
165 encoder.text_token match[-1,1], :delimiter | |
166 encoder.end_group :string | |
167 value_expected = false | |
168 end | |
169 | |
170 elsif match = scan(unicode ? /#{patterns::INSTANCE_VARIABLE}/uo : | |
171 /#{patterns::INSTANCE_VARIABLE}/o) | |
172 value_expected = false | |
173 encoder.text_token match, :instance_variable | |
174 | |
175 elsif value_expected && match = scan(/\//) | |
176 encoder.begin_group :regexp | |
177 encoder.text_token match, :delimiter | |
178 state = self.class::StringState.new :regexp, true, '/' | |
179 | |
180 elsif match = scan(value_expected ? /[-+]?#{patterns::NUMERIC}/o : /#{patterns::NUMERIC}/o) | |
181 if method_call_expected | |
182 encoder.text_token match, :error | |
183 method_call_expected = false | |
184 else | |
185 encoder.text_token match, self[1] ? :float : :integer # TODO: send :hex/:octal/:binary | |
186 end | |
187 value_expected = false | |
188 | |
189 elsif match = scan(/ [-+!~^\/]=? | [:;] | [*|&]{1,2}=? | >>? /x) | |
190 value_expected = true | |
191 encoder.text_token match, :operator | |
192 | |
193 elsif value_expected && match = scan(/#{patterns::HEREDOC_OPEN}/o) | |
194 quote = self[3] | |
195 delim = self[quote ? 4 : 2] | |
196 kind = patterns::QUOTE_TO_TYPE[quote] | |
197 encoder.begin_group kind | |
198 encoder.text_token match, :delimiter | |
199 encoder.end_group kind | |
200 heredocs ||= [] # create heredocs if empty | |
201 heredocs << self.class::StringState.new(kind, quote != "'", delim, | |
202 self[1] == '-' ? :indented : :linestart) | |
203 value_expected = false | |
204 | |
205 elsif value_expected && match = scan(/#{patterns::FANCY_STRING_START}/o) | |
206 kind = patterns::FANCY_STRING_KIND[self[1]] | |
207 encoder.begin_group kind | |
208 state = self.class::StringState.new kind, patterns::FANCY_STRING_INTERPRETED[self[1]], self[2] | |
209 encoder.text_token match, :delimiter | |
210 | |
211 elsif value_expected && match = scan(/#{patterns::CHARACTER}/o) | |
212 value_expected = false | |
213 encoder.text_token match, :integer | |
214 | |
215 elsif match = scan(/ %=? | <(?:<|=>?)? | \? /x) | |
216 value_expected = true | |
217 encoder.text_token match, :operator | |
218 | |
219 elsif match = scan(/`/) | |
220 encoder.begin_group :shell | |
221 encoder.text_token match, :delimiter | |
222 state = self.class::StringState.new :shell, true, match | |
223 | |
224 elsif match = scan(unicode ? /#{patterns::GLOBAL_VARIABLE}/uo : | |
225 /#{patterns::GLOBAL_VARIABLE}/o) | |
226 encoder.text_token match, :global_variable | |
227 value_expected = false | |
228 | |
229 elsif match = scan(unicode ? /#{patterns::CLASS_VARIABLE}/uo : | |
230 /#{patterns::CLASS_VARIABLE}/o) | |
231 encoder.text_token match, :class_variable | |
232 value_expected = false | |
233 | |
234 elsif match = scan(/\\\z/) | |
235 encoder.text_token match, :space | |
236 | |
237 else | |
238 if method_call_expected | |
239 method_call_expected = false | |
240 next | |
241 end | |
242 unless unicode | |
243 # check for unicode | |
244 $DEBUG_BEFORE, $DEBUG = $DEBUG, false | |
245 begin | |
246 if check(/./mu).size > 1 | |
247 # seems like we should try again with unicode | |
248 unicode = true | |
249 end | |
250 rescue | |
251 # bad unicode char; use getch | |
252 ensure | |
253 $DEBUG = $DEBUG_BEFORE | |
254 end | |
255 next if unicode | |
256 end | |
257 | |
258 encoder.text_token getch, :error | |
259 | |
260 end | |
261 | |
262 if last_state | |
263 state = last_state | |
264 last_state = nil | |
265 end | |
266 | |
267 elsif state == :def_expected | |
268 if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo : | |
269 /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o) | |
270 encoder.text_token match, :method | |
271 state = :initial | |
272 else | |
273 last_state = :dot_expected | |
274 state = :initial | |
275 end | |
276 | |
277 elsif state == :dot_expected | |
278 if match = scan(/\.|::/) | |
279 # invalid definition | |
280 state = :def_expected | |
281 encoder.text_token match, :operator | |
282 else | |
283 state = :initial | |
284 end | |
285 | |
286 elsif state == :module_expected | |
287 if match = scan(/<</) | |
288 encoder.text_token match, :operator | |
289 else | |
290 state = :initial | |
291 if match = scan(unicode ? / (?:#{patterns::IDENT}::)* #{patterns::IDENT} /oux : | |
292 / (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox) | |
293 encoder.text_token match, :class | |
294 end | |
295 end | |
296 | |
297 elsif state == :undef_expected | |
298 state = :undef_comma_expected | |
299 if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo : | |
300 /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o) | |
301 encoder.text_token match, :method | |
302 elsif match = scan(/#{patterns::SYMBOL}/o) | |
303 case delim = match[1] | |
304 when ?', ?" | |
305 encoder.begin_group :symbol | |
306 encoder.text_token ':', :symbol | |
307 match = delim.chr | |
308 encoder.text_token match, :delimiter | |
309 state = self.class::StringState.new :symbol, delim == ?", match | |
310 state.next_state = :undef_comma_expected | |
311 else | |
312 encoder.text_token match, :symbol | |
313 end | |
314 else | |
315 state = :initial | |
316 end | |
317 | |
318 elsif state == :undef_comma_expected | |
319 if match = scan(/,/) | |
320 encoder.text_token match, :operator | |
321 state = :undef_expected | |
322 else | |
323 state = :initial | |
324 end | |
325 | |
326 elsif state == :alias_expected | |
327 match = scan(unicode ? /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/uo : | |
328 /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o) | |
329 | |
330 if match | |
331 encoder.text_token self[1], (self[1][0] == ?: ? :symbol : :method) | |
332 encoder.text_token self[2], :space | |
333 encoder.text_token self[3], (self[3][0] == ?: ? :symbol : :method) | |
334 end | |
335 state = :initial | |
336 | |
337 else | |
338 #:nocov: | |
339 raise_inspect 'Unknown state: %p' % [state], encoder | |
340 #:nocov: | |
341 end | |
342 | |
343 else # StringState | |
344 | |
345 match = scan_until(state.pattern) || scan_rest | |
346 unless match.empty? | |
347 encoder.text_token match, :content | |
348 break if eos? | |
349 end | |
350 | |
351 if state.heredoc && self[1] # end of heredoc | |
352 match = getch | |
353 match << scan_until(/$/) unless eos? | |
354 encoder.text_token match, :delimiter unless match.empty? | |
355 encoder.end_group state.type | |
356 state = state.next_state | |
357 next | |
358 end | |
359 | |
360 case match = getch | |
361 | |
362 when state.delim | |
363 if state.paren_depth | |
364 state.paren_depth -= 1 | |
365 if state.paren_depth > 0 | |
366 encoder.text_token match, :content | |
367 next | |
368 end | |
369 end | |
370 encoder.text_token match, :delimiter | |
371 if state.type == :regexp && !eos? | |
372 match = scan(/#{patterns::REGEXP_MODIFIERS}/o) | |
373 encoder.text_token match, :modifier unless match.empty? | |
374 end | |
375 encoder.end_group state.type | |
376 value_expected = false | |
377 state = state.next_state | |
378 | |
379 when '\\' | |
380 if state.interpreted | |
381 if esc = scan(/#{patterns::ESCAPE}/o) | |
382 encoder.text_token match + esc, :char | |
383 else | |
384 encoder.text_token match, :error | |
385 end | |
386 else | |
387 case esc = getch | |
388 when nil | |
389 encoder.text_token match, :content | |
390 when state.delim, '\\' | |
391 encoder.text_token match + esc, :char | |
392 else | |
393 encoder.text_token match + esc, :content | |
394 end | |
395 end | |
396 | |
397 when '#' | |
398 case peek(1) | |
399 when '{' | |
400 inline_block_stack ||= [] | |
401 inline_block_stack << [state, inline_block_curly_depth, heredocs] | |
402 value_expected = true | |
403 state = :initial | |
404 inline_block_curly_depth = 1 | |
405 encoder.begin_group :inline | |
406 encoder.text_token match + getch, :inline_delimiter | |
407 when '$', '@' | |
408 encoder.text_token match, :escape | |
409 last_state = state | |
410 state = :initial | |
411 else | |
412 #:nocov: | |
413 raise_inspect 'else-case # reached; #%p not handled' % [peek(1)], encoder | |
414 #:nocov: | |
415 end | |
416 | |
417 when state.opening_paren | |
418 state.paren_depth += 1 | |
419 encoder.text_token match, :content | |
420 | |
421 else | |
422 #:nocov | |
423 raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], encoder | |
424 #:nocov: | |
425 | |
426 end | |
427 | |
428 end | |
429 | |
430 end | |
431 | |
432 # cleaning up | |
433 if state.is_a? StringState | |
434 encoder.end_group state.type | |
435 end | |
436 | |
437 if options[:keep_state] | |
438 if state.is_a?(StringState) && state.heredoc | |
439 (heredocs ||= []).unshift state | |
440 state = :initial | |
441 elsif heredocs && heredocs.empty? | |
442 heredocs = nil | |
443 end | |
444 @state = state, heredocs | |
445 end | |
446 | |
447 if inline_block_stack | |
448 until inline_block_stack.empty? | |
449 state, = *inline_block_stack.pop | |
450 encoder.end_group :inline | |
451 encoder.end_group state.type | |
452 end | |
453 end | |
454 | |
455 encoder | |
456 end | |
457 | |
458 end | |
459 | |
460 end | |
461 end |