To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / .svn / pristine / d2 / d27acaba5e51d3baf547fd1e551a5deb117304a0.svn-base @ 1297:0a574315af3e

History | View | Annotate | Download (16.5 KB)

1
module CodeRay
2
module Scanners
3
  
4
  # This scanner is really complex, since Ruby _is_ a complex language!
5
  #
6
  # It tries to highlight 100% of all common code,
7
  # and 90% of strange codes.
8
  #
9
  # It is optimized for HTML highlighting, and is not very useful for
10
  # parsing or pretty printing.
11
  class Ruby < Scanner
12
    
13
    register_for :ruby
14
    file_extension 'rb'
15
    
16
    autoload :Patterns,    'coderay/scanners/ruby/patterns'
17
    autoload :StringState, 'coderay/scanners/ruby/string_state'
18
    
19
    def interpreted_string_state
20
      StringState.new :string, true, '"'
21
    end
22
    
23
  protected
24
    
25
    def setup
26
      @state = :initial
27
    end
28
    
29
    def scan_tokens encoder, options
30
      state, heredocs = options[:state] || @state
31
      heredocs = heredocs.dup if heredocs.is_a?(Array)
32
      
33
      if state && state.instance_of?(StringState)
34
        encoder.begin_group state.type
35
      end
36
      
37
      last_state = nil
38
      
39
      method_call_expected = false
40
      value_expected = true
41
      
42
      inline_block_stack = nil
43
      inline_block_curly_depth = 0
44
      
45
      if heredocs
46
        state = heredocs.shift
47
        encoder.begin_group state.type
48
        heredocs = nil if heredocs.empty?
49
      end
50
      
51
      # def_object_stack = nil
52
      # def_object_paren_depth = 0
53
      
54
      patterns = Patterns  # avoid constant lookup
55
      
56
      unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
57
      
58
      until eos?
59
        
60
        if state.instance_of? ::Symbol
61
          
62
          if match = scan(/[ \t\f\v]+/)
63
            encoder.text_token match, :space
64
            
65
          elsif match = scan(/\n/)
66
            if heredocs
67
              unscan  # heredoc scanning needs \n at start
68
              state = heredocs.shift
69
              encoder.begin_group state.type
70
              heredocs = nil if heredocs.empty?
71
            else
72
              state = :initial if state == :undef_comma_expected
73
              encoder.text_token match, :space
74
              value_expected = true
75
            end
76
            
77
          elsif match = scan(bol? ? / \#(!)?.* | #{patterns::RUBYDOC_OR_DATA} /ox : /\#.*/)
78
            encoder.text_token match, self[1] ? :doctype : :comment
79
            
80
          elsif match = scan(/\\\n/)
81
            if heredocs
82
              unscan  # heredoc scanning needs \n at start
83
              encoder.text_token scan(/\\/), :space
84
              state = heredocs.shift
85
              encoder.begin_group state.type
86
              heredocs = nil if heredocs.empty?
87
            else
88
              encoder.text_token match, :space
89
            end
90
            
91
          elsif state == :initial
92
            
93
            # IDENTS #
94
            if !method_call_expected &&
95
               match = scan(unicode ? /#{patterns::METHOD_NAME}/uo :
96
                                      /#{patterns::METHOD_NAME}/o)
97
              value_expected = false
98
              kind = patterns::IDENT_KIND[match]
99
              if kind == :ident
100
                if match[/\A[A-Z]/] && !(match[/[!?]$/] || match?(/\(/))
101
                  kind = :constant
102
                end
103
              elsif kind == :keyword
104
                state = patterns::KEYWORD_NEW_STATE[match]
105
                value_expected = true if patterns::KEYWORDS_EXPECTING_VALUE[match]
106
              end
107
              value_expected = true if !value_expected && check(/#{patterns::VALUE_FOLLOWS}/o)
108
              encoder.text_token match, kind
109
              
110
            elsif method_call_expected &&
111
               match = scan(unicode ? /#{patterns::METHOD_AFTER_DOT}/uo :
112
                                      /#{patterns::METHOD_AFTER_DOT}/o)
113
              if method_call_expected == '::' && match[/\A[A-Z]/] && !match?(/\(/)
114
                encoder.text_token match, :constant
115
              else
116
                encoder.text_token match, :ident
117
              end
118
              method_call_expected = false
119
              value_expected = check(/#{patterns::VALUE_FOLLOWS}/o)
120
              
121
            # OPERATORS #
122
            elsif !method_call_expected && match = scan(/ (\.(?!\.)|::) | (?: \.\.\.? | ==?=? | [,\(\[\{] )() | [\)\]\}] /x)
123
              method_call_expected = self[1]
124
              value_expected = !method_call_expected && self[2]
125
              if inline_block_stack
126
                case match
127
                when '{'
128
                  inline_block_curly_depth += 1
129
                when '}'
130
                  inline_block_curly_depth -= 1
131
                  if inline_block_curly_depth == 0  # closing brace of inline block reached
132
                    state, inline_block_curly_depth, heredocs = inline_block_stack.pop
133
                    inline_block_stack = nil if inline_block_stack.empty?
134
                    heredocs = nil if heredocs && heredocs.empty?
135
                    encoder.text_token match, :inline_delimiter
136
                    encoder.end_group :inline
137
                    next
138
                  end
139
                end
140
              end
141
              encoder.text_token match, :operator
142
              
143
            elsif match = scan(unicode ? /#{patterns::SYMBOL}/uo :
144
                                         /#{patterns::SYMBOL}/o)
145
              case delim = match[1]
146
              when ?', ?"
147
                encoder.begin_group :symbol
148
                encoder.text_token ':', :symbol
149
                match = delim.chr
150
                encoder.text_token match, :delimiter
151
                state = self.class::StringState.new :symbol, delim == ?", match
152
              else
153
                encoder.text_token match, :symbol
154
                value_expected = false
155
              end
156
              
157
            elsif match = scan(/ ' (?:(?>[^'\\]*) ')? | " (?:(?>[^"\\\#]*) ")? /mx)
158
              encoder.begin_group :string
159
              if match.size == 1
160
                encoder.text_token match, :delimiter
161
                state = self.class::StringState.new :string, match == '"', match  # important for streaming
162
              else
163
                encoder.text_token match[0,1], :delimiter
164
                encoder.text_token match[1..-2], :content if match.size > 2
165
                encoder.text_token match[-1,1], :delimiter
166
                encoder.end_group :string
167
                value_expected = false
168
              end
169
              
170
            elsif match = scan(unicode ? /#{patterns::INSTANCE_VARIABLE}/uo :
171
                                         /#{patterns::INSTANCE_VARIABLE}/o)
172
              value_expected = false
173
              encoder.text_token match, :instance_variable
174
              
175
            elsif value_expected && match = scan(/\//)
176
              encoder.begin_group :regexp
177
              encoder.text_token match, :delimiter
178
              state = self.class::StringState.new :regexp, true, '/'
179
              
180
            elsif match = scan(value_expected ? /[-+]?#{patterns::NUMERIC}/o : /#{patterns::NUMERIC}/o)
181
              if method_call_expected
182
                encoder.text_token match, :error
183
                method_call_expected = false
184
              else
185
                encoder.text_token match, self[1] ? :float : :integer  # TODO: send :hex/:octal/:binary
186
              end
187
              value_expected = false
188
              
189
            elsif match = scan(/ [-+!~^\/]=? | [:;] | [*|&]{1,2}=? | >>? /x)
190
              value_expected = true
191
              encoder.text_token match, :operator
192
              
193
            elsif value_expected && match = scan(/#{patterns::HEREDOC_OPEN}/o)
194
              quote = self[3]
195
              delim = self[quote ? 4 : 2]
196
              kind = patterns::QUOTE_TO_TYPE[quote]
197
              encoder.begin_group kind
198
              encoder.text_token match, :delimiter
199
              encoder.end_group kind
200
              heredocs ||= []  # create heredocs if empty
201
              heredocs << self.class::StringState.new(kind, quote != "'", delim,
202
                self[1] == '-' ? :indented : :linestart)
203
              value_expected = false
204
              
205
            elsif value_expected && match = scan(/#{patterns::FANCY_STRING_START}/o)
206
              kind = patterns::FANCY_STRING_KIND[self[1]]
207
              encoder.begin_group kind
208
              state = self.class::StringState.new kind, patterns::FANCY_STRING_INTERPRETED[self[1]], self[2]
209
              encoder.text_token match, :delimiter
210
              
211
            elsif value_expected && match = scan(/#{patterns::CHARACTER}/o)
212
              value_expected = false
213
              encoder.text_token match, :integer
214
              
215
            elsif match = scan(/ %=? | <(?:<|=>?)? | \? /x)
216
              value_expected = true
217
              encoder.text_token match, :operator
218
              
219
            elsif match = scan(/`/)
220
              encoder.begin_group :shell
221
              encoder.text_token match, :delimiter
222
              state = self.class::StringState.new :shell, true, match
223
              
224
            elsif match = scan(unicode ? /#{patterns::GLOBAL_VARIABLE}/uo :
225
                                         /#{patterns::GLOBAL_VARIABLE}/o)
226
              encoder.text_token match, :global_variable
227
              value_expected = false
228
              
229
            elsif match = scan(unicode ? /#{patterns::CLASS_VARIABLE}/uo :
230
                                         /#{patterns::CLASS_VARIABLE}/o)
231
              encoder.text_token match, :class_variable
232
              value_expected = false
233
              
234
            elsif match = scan(/\\\z/)
235
              encoder.text_token match, :space
236
              
237
            else
238
              if method_call_expected
239
                method_call_expected = false
240
                next
241
              end
242
              unless unicode
243
                # check for unicode
244
                $DEBUG_BEFORE, $DEBUG = $DEBUG, false
245
                begin
246
                  if check(/./mu).size > 1
247
                    # seems like we should try again with unicode
248
                    unicode = true
249
                  end
250
                rescue
251
                  # bad unicode char; use getch
252
                ensure
253
                  $DEBUG = $DEBUG_BEFORE
254
                end
255
                next if unicode
256
              end
257
              
258
              encoder.text_token getch, :error
259
              
260
            end
261
            
262
            if last_state
263
              state = last_state
264
              last_state = nil
265
            end
266
            
267
          elsif state == :def_expected
268
            if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
269
                                      /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
270
              encoder.text_token match, :method
271
              state = :initial
272
            else
273
              last_state = :dot_expected
274
              state = :initial
275
            end
276
            
277
          elsif state == :dot_expected
278
            if match = scan(/\.|::/)
279
              # invalid definition
280
              state = :def_expected
281
              encoder.text_token match, :operator
282
            else
283
              state = :initial
284
            end
285
            
286
          elsif state == :module_expected
287
            if match = scan(/<</)
288
              encoder.text_token match, :operator
289
            else
290
              state = :initial
291
              if match = scan(unicode ? / (?:#{patterns::IDENT}::)* #{patterns::IDENT} /oux :
292
                                        / (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox)
293
                encoder.text_token match, :class
294
              end
295
            end
296
            
297
          elsif state == :undef_expected
298
            state = :undef_comma_expected
299
            if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
300
                                      /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
301
              encoder.text_token match, :method
302
            elsif match = scan(/#{patterns::SYMBOL}/o)
303
              case delim = match[1]
304
              when ?', ?"
305
                encoder.begin_group :symbol
306
                encoder.text_token ':', :symbol
307
                match = delim.chr
308
                encoder.text_token match, :delimiter
309
                state = self.class::StringState.new :symbol, delim == ?", match
310
                state.next_state = :undef_comma_expected
311
              else
312
                encoder.text_token match, :symbol
313
              end
314
            else
315
              state = :initial
316
            end
317
            
318
          elsif state == :undef_comma_expected
319
            if match = scan(/,/)
320
              encoder.text_token match, :operator
321
              state = :undef_expected
322
            else
323
              state = :initial
324
            end
325
            
326
          elsif state == :alias_expected
327
            match = scan(unicode ? /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/uo :
328
                                   /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o)
329
            
330
            if match
331
              encoder.text_token self[1], (self[1][0] == ?: ? :symbol : :method)
332
              encoder.text_token self[2], :space
333
              encoder.text_token self[3], (self[3][0] == ?: ? :symbol : :method)
334
            end
335
            state = :initial
336
            
337
          else
338
            #:nocov:
339
            raise_inspect 'Unknown state: %p' % [state], encoder
340
            #:nocov:
341
          end
342
          
343
        else  # StringState
344
          
345
          match = scan_until(state.pattern) || scan_rest
346
          unless match.empty?
347
            encoder.text_token match, :content
348
            break if eos?
349
          end
350
          
351
          if state.heredoc && self[1]  # end of heredoc
352
            match = getch
353
            match << scan_until(/$/) unless eos?
354
            encoder.text_token match, :delimiter unless match.empty?
355
            encoder.end_group state.type
356
            state = state.next_state
357
            next
358
          end
359
          
360
          case match = getch
361
          
362
          when state.delim
363
            if state.paren_depth
364
              state.paren_depth -= 1
365
              if state.paren_depth > 0
366
                encoder.text_token match, :content
367
                next
368
              end
369
            end
370
            encoder.text_token match, :delimiter
371
            if state.type == :regexp && !eos?
372
              match = scan(/#{patterns::REGEXP_MODIFIERS}/o)
373
              encoder.text_token match, :modifier unless match.empty?
374
            end
375
            encoder.end_group state.type
376
            value_expected = false
377
            state = state.next_state
378
            
379
          when '\\'
380
            if state.interpreted
381
              if esc = scan(/#{patterns::ESCAPE}/o)
382
                encoder.text_token match + esc, :char
383
              else
384
                encoder.text_token match, :error
385
              end
386
            else
387
              case esc = getch
388
              when nil
389
                encoder.text_token match, :content
390
              when state.delim, '\\'
391
                encoder.text_token match + esc, :char
392
              else
393
                encoder.text_token match + esc, :content
394
              end
395
            end
396
            
397
          when '#'
398
            case peek(1)
399
            when '{'
400
              inline_block_stack ||= []
401
              inline_block_stack << [state, inline_block_curly_depth, heredocs]
402
              value_expected = true
403
              state = :initial
404
              inline_block_curly_depth = 1
405
              encoder.begin_group :inline
406
              encoder.text_token match + getch, :inline_delimiter
407
            when '$', '@'
408
              encoder.text_token match, :escape
409
              last_state = state
410
              state = :initial
411
            else
412
              #:nocov:
413
              raise_inspect 'else-case # reached; #%p not handled' % [peek(1)], encoder
414
              #:nocov:
415
            end
416
            
417
          when state.opening_paren
418
            state.paren_depth += 1
419
            encoder.text_token match, :content
420
            
421
          else
422
            #:nocov
423
            raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], encoder
424
            #:nocov:
425
            
426
          end
427
          
428
        end
429
        
430
      end
431
      
432
      # cleaning up
433
      if state.is_a? StringState
434
        encoder.end_group state.type
435
      end
436
      
437
      if options[:keep_state]
438
        if state.is_a?(StringState) && state.heredoc
439
          (heredocs ||= []).unshift state
440
          state = :initial
441
        elsif heredocs && heredocs.empty?
442
          heredocs = nil
443
        end
444
        @state = state, heredocs
445
      end
446
      
447
      if inline_block_stack
448
        until inline_block_stack.empty?
449
          state, = *inline_block_stack.pop
450
          encoder.end_group :inline
451
          encoder.end_group state.type
452
        end
453
      end
454
      
455
      encoder
456
    end
457
    
458
  end
459
  
460
end
461
end