To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / vendor / gems / coderay-0.9.7 / lib / coderay / scanners / ruby.rb @ 442:753f1380d6bc

History | View | Annotate | Download (15 KB)

1
# encoding: utf-8
2
module CodeRay
3
module Scanners
4

    
5
  # This scanner is really complex, since Ruby _is_ a complex language!
6
  #
7
  # It tries to highlight 100% of all common code,
8
  # and 90% of strange codes.
9
  #
10
  # It is optimized for HTML highlighting, and is not very useful for
11
  # parsing or pretty printing.
12
  #
13
  # For now, I think it's better than the scanners in VIM or Syntax, or
14
  # any highlighter I was able to find, except Caleb's RubyLexer.
15
  #
16
  # I hope it's also better than the rdoc/irb lexer.
17
  class Ruby < Scanner
18

    
19
    include Streamable
20

    
21
    register_for :ruby
22
    file_extension 'rb'
23

    
24
    helper :patterns
25
    
26
    if not defined? EncodingError
27
      EncodingError = Class.new Exception
28
    end
29

    
30
  private
31
    def scan_tokens tokens, options
32
      if string.respond_to?(:encoding)
33
        unless string.encoding == Encoding::UTF_8
34
          self.string = string.encode Encoding::UTF_8,
35
            :invalid => :replace, :undef => :replace, :replace => '?'
36
        end
37
        unicode = false
38
      else
39
        unicode = exist?(/[^\x00-\x7f]/)
40
      end
41
      
42
      last_token_dot = false
43
      value_expected = true
44
      heredocs = nil
45
      last_state = nil
46
      state = :initial
47
      depth = nil
48
      inline_block_stack = []
49
      
50
      
51
      patterns = Patterns  # avoid constant lookup
52
      
53
      until eos?
54
        match = nil
55
        kind = nil
56

    
57
        if state.instance_of? patterns::StringState
58
# {{{
59
          match = scan_until(state.pattern) || scan_until(/\z/)
60
          tokens << [match, :content] unless match.empty?
61
          break if eos?
62

    
63
          if state.heredoc and self[1]  # end of heredoc
64
            match = getch.to_s
65
            match << scan_until(/$/) unless eos?
66
            tokens << [match, :delimiter]
67
            tokens << [:close, state.type]
68
            state = state.next_state
69
            next
70
          end
71

    
72
          case match = getch
73

    
74
          when state.delim
75
            if state.paren
76
              state.paren_depth -= 1
77
              if state.paren_depth > 0
78
                tokens << [match, :nesting_delimiter]
79
                next
80
              end
81
            end
82
            tokens << [match, :delimiter]
83
            if state.type == :regexp and not eos?
84
              modifiers = scan(/#{patterns::REGEXP_MODIFIERS}/ox)
85
              tokens << [modifiers, :modifier] unless modifiers.empty?
86
            end
87
            tokens << [:close, state.type]
88
            value_expected = false
89
            state = state.next_state
90

    
91
          when '\\'
92
            if state.interpreted
93
              if esc = scan(/ #{patterns::ESCAPE} /ox)
94
                tokens << [match + esc, :char]
95
              else
96
                tokens << [match, :error]
97
              end
98
            else
99
              case m = getch
100
              when state.delim, '\\'
101
                tokens << [match + m, :char]
102
              when nil
103
                tokens << [match, :error]
104
              else
105
                tokens << [match + m, :content]
106
              end
107
            end
108

    
109
          when '#'
110
            case peek(1)
111
            when '{'
112
              inline_block_stack << [state, depth, heredocs]
113
              value_expected = true
114
              state = :initial
115
              depth = 1
116
              tokens << [:open, :inline]
117
              tokens << [match + getch, :inline_delimiter]
118
            when '$', '@'
119
              tokens << [match, :escape]
120
              last_state = state  # scan one token as normal code, then return here
121
              state = :initial
122
            else
123
              raise_inspect 'else-case # reached; #%p not handled' % peek(1), tokens
124
            end
125

    
126
          when state.paren
127
            state.paren_depth += 1
128
            tokens << [match, :nesting_delimiter]
129

    
130
          when /#{patterns::REGEXP_SYMBOLS}/ox
131
            tokens << [match, :function]
132

    
133
          else
134
            raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], tokens
135

    
136
          end
137
          next
138
# }}}
139
        else
140
# {{{
141
          if match = scan(/[ \t\f]+/)
142
            kind = :space
143
            match << scan(/\s*/) unless eos? || heredocs
144
            value_expected = true if match.index(?\n)
145
            tokens << [match, kind]
146
            next
147
            
148
          elsif match = scan(/\\?\n/)
149
            kind = :space
150
            if match == "\n"
151
              value_expected = true
152
              state = :initial if state == :undef_comma_expected
153
            end
154
            if heredocs
155
              unscan  # heredoc scanning needs \n at start
156
              state = heredocs.shift
157
              tokens << [:open, state.type]
158
              heredocs = nil if heredocs.empty?
159
              next
160
            else
161
              match << scan(/\s*/) unless eos?
162
            end
163
            tokens << [match, kind]
164
            next
165
          
166
          elsif bol? && match = scan(/\#!.*/)
167
            tokens << [match, :doctype]
168
            next
169
            
170
          elsif match = scan(/\#.*/) or
171
            ( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) )
172
              kind = :comment
173
              tokens << [match, kind]
174
              next
175

    
176
          elsif state == :initial
177

    
178
            # IDENTS #
179
            if match = scan(unicode ? /#{patterns::METHOD_NAME}/uo :
180
                                      /#{patterns::METHOD_NAME}/o)
181
              if last_token_dot
182
                kind = if match[/^[A-Z]/] and not match?(/\(/) then :constant else :ident end
183
              else
184
                if value_expected != :expect_colon && scan(/:(?= )/)
185
                  tokens << [match, :key]
186
                  match = ':'
187
                  kind = :operator
188
                else
189
                  kind = patterns::IDENT_KIND[match]
190
                  if kind == :ident
191
                    if match[/\A[A-Z]/] and not match[/[!?]$/] and not match?(/\(/)
192
                      kind = :constant
193
                    end
194
                  elsif kind == :reserved
195
                    state = patterns::DEF_NEW_STATE[match]
196
                    value_expected = :set if patterns::KEYWORDS_EXPECTING_VALUE[match]
197
                  end
198
                end
199
              end
200
              value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/o)
201
            
202
            elsif last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}|\(/o)
203
              kind = :ident
204
              value_expected = :set if check(unicode ? /#{patterns::VALUE_FOLLOWS}/uo :
205
                                                       /#{patterns::VALUE_FOLLOWS}/o)
206

    
207
            # OPERATORS #
208
            elsif not last_token_dot and match = scan(/ \.\.\.? | (?:\.|::)() | [,\(\)\[\]\{\}] | ==?=? /x)
209
              if match !~ / [.\)\]\}] /x or match =~ /\.\.\.?/
210
                value_expected = :set
211
              end
212
              last_token_dot = :set if self[1]
213
              kind = :operator
214
              unless inline_block_stack.empty?
215
                case match
216
                when '{'
217
                  depth += 1
218
                when '}'
219
                  depth -= 1
220
                  if depth == 0  # closing brace of inline block reached
221
                    state, depth, heredocs = inline_block_stack.pop
222
                    heredocs = nil if heredocs && heredocs.empty?
223
                    tokens << [match, :inline_delimiter]
224
                    kind = :inline
225
                    match = :close
226
                  end
227
                end
228
              end
229

    
230
            elsif match = scan(/ ['"] /mx)
231
              tokens << [:open, :string]
232
              kind = :delimiter
233
              state = patterns::StringState.new :string, match == '"', match  # important for streaming
234

    
235
            elsif match = scan(unicode ? /#{patterns::INSTANCE_VARIABLE}/uo :
236
                                         /#{patterns::INSTANCE_VARIABLE}/o)
237
              kind = :instance_variable
238

    
239
            elsif value_expected and match = scan(/\//)
240
              tokens << [:open, :regexp]
241
              kind = :delimiter
242
              interpreted = true
243
              state = patterns::StringState.new :regexp, interpreted, match
244

    
245
            # elsif match = scan(/[-+]?#{patterns::NUMERIC}/o)
246
            elsif match = value_expected ? scan(/[-+]?#{patterns::NUMERIC}/o) : scan(/#{patterns::NUMERIC}/o)
247
              kind = self[1] ? :float : :integer
248

    
249
            elsif match = scan(unicode ? /#{patterns::SYMBOL}/uo :
250
                                         /#{patterns::SYMBOL}/o)
251
              case delim = match[1]
252
              when ?', ?"
253
                tokens << [:open, :symbol]
254
                tokens << [':', :symbol]
255
                match = delim.chr
256
                kind = :delimiter
257
                state = patterns::StringState.new :symbol, delim == ?", match
258
              else
259
                kind = :symbol
260
              end
261

    
262
            elsif match = scan(/ -[>=]? | [+!~^]=? | [*|&]{1,2}=? | >>? /x)
263
              value_expected = :set
264
              kind = :operator
265

    
266
            elsif value_expected and match = scan(unicode ? /#{patterns::HEREDOC_OPEN}/uo :
267
                                                            /#{patterns::HEREDOC_OPEN}/o)
268
              indented = self[1] == '-'
269
              quote = self[3]
270
              delim = self[quote ? 4 : 2]
271
              kind = patterns::QUOTE_TO_TYPE[quote]
272
              tokens << [:open, kind]
273
              tokens << [match, :delimiter]
274
              match = :close
275
              heredoc = patterns::StringState.new kind, quote != '\'', delim, (indented ? :indented : :linestart )
276
              heredocs ||= []  # create heredocs if empty
277
              heredocs << heredoc
278

    
279
            elsif value_expected and match = scan(/#{patterns::FANCY_START_CORRECT}/o)
280
              kind, interpreted = *patterns::FancyStringType.fetch(self[1]) do
281
                raise_inspect 'Unknown fancy string: %%%p' % k, tokens
282
              end
283
              tokens << [:open, kind]
284
              state = patterns::StringState.new kind, interpreted, self[2]
285
              kind = :delimiter
286

    
287
            elsif value_expected and match = scan(unicode ? /#{patterns::CHARACTER}/uo :
288
                                                            /#{patterns::CHARACTER}/o)
289
              kind = :integer
290

    
291
            elsif match = scan(/ [\/%]=? | <(?:<|=>?)? | [?:;] /x)
292
              value_expected = :set
293
              kind = :operator
294

    
295
            elsif match = scan(/`/)
296
              if last_token_dot
297
                kind = :operator
298
              else
299
                tokens << [:open, :shell]
300
                kind = :delimiter
301
                state = patterns::StringState.new :shell, true, match
302
              end
303

    
304
            elsif match = scan(unicode ? /#{patterns::GLOBAL_VARIABLE}/uo :
305
                                         /#{patterns::GLOBAL_VARIABLE}/o)
306
              kind = :global_variable
307

    
308
            elsif match = scan(unicode ? /#{patterns::CLASS_VARIABLE}/uo :
309
                                         /#{patterns::CLASS_VARIABLE}/o)
310
              kind = :class_variable
311

    
312
            else
313
              if !unicode && !string.respond_to?(:encoding)
314
                # check for unicode
315
                debug, $DEBUG = $DEBUG, false
316
                begin
317
                  if check(/./mu).size > 1
318
                    # seems like we should try again with unicode
319
                    unicode = true
320
                  end
321
                rescue
322
                  # bad unicode char; use getch
323
                ensure
324
                  $DEBUG = debug
325
                end
326
                next if unicode
327
              end
328
              kind = :error
329
              match = scan(unicode ? /./mu : /./m)
330

    
331
            end
332

    
333
          elsif state == :def_expected
334
            state = :initial
335
            if scan(/self\./)
336
              tokens << ['self', :pre_constant]
337
              tokens << ['.', :operator]
338
            end
339
            if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
340
                                      /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
341
              kind = :method
342
            else
343
              next
344
            end
345

    
346
          elsif state == :module_expected
347
            if match = scan(/<</)
348
              kind = :operator
349
            else
350
              state = :initial
351
              if match = scan(unicode ? /(?:#{patterns::IDENT}::)*#{patterns::IDENT}/uo :
352
                                        /(?:#{patterns::IDENT}::)*#{patterns::IDENT}/o)
353
                kind = :class
354
              else
355
                next
356
              end
357
            end
358

    
359
          elsif state == :undef_expected
360
            state = :undef_comma_expected
361
            if match = scan(unicode ? /#{patterns::METHOD_NAME_EX}/uo :
362
                                      /#{patterns::METHOD_NAME_EX}/o)
363
              kind = :method
364
            elsif match = scan(unicode ? /#{patterns::SYMBOL}/uo :
365
                                         /#{patterns::SYMBOL}/o)
366
              case delim = match[1]
367
              when ?', ?"
368
                tokens << [:open, :symbol]
369
                tokens << [':', :symbol]
370
                match = delim.chr
371
                kind = :delimiter
372
                state = patterns::StringState.new :symbol, delim == ?", match
373
                state.next_state = :undef_comma_expected
374
              else
375
                kind = :symbol
376
              end
377
            else
378
              state = :initial
379
              next
380
            end
381

    
382
          elsif state == :alias_expected
383
            match = scan(unicode ? /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/uo :
384
                                   /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o)
385
            
386
            if match
387
              tokens << [self[1], (self[1][0] == ?: ? :symbol : :method)]
388
              tokens << [self[2], :space]
389
              tokens << [self[3], (self[3][0] == ?: ? :symbol : :method)]
390
            end
391
            state = :initial
392
            next
393

    
394
          elsif state == :undef_comma_expected
395
            if match = scan(/,/)
396
              kind = :operator
397
              state = :undef_expected
398
            else
399
              state = :initial
400
              next
401
            end
402

    
403
          end
404
# }}}
405
          
406
          unless kind == :error
407
            if value_expected = value_expected == :set
408
              value_expected = :expect_colon if match == '?' || match == 'when'
409
            end
410
            last_token_dot = last_token_dot == :set
411
          end
412
          
413
          if $CODERAY_DEBUG and not kind
414
            raise_inspect 'Error token %p in line %d' %
415
              [[match, kind], line], tokens, state
416
          end
417
          raise_inspect 'Empty token', tokens unless match
418

    
419
          tokens << [match, kind]
420

    
421
          if last_state
422
            state = last_state
423
            last_state = nil
424
          end
425
        end
426
      end
427

    
428
      inline_block_stack << [state] if state.is_a? patterns::StringState
429
      until inline_block_stack.empty?
430
        this_block = inline_block_stack.pop
431
        tokens << [:close, :inline] if this_block.size > 1
432
        state = this_block.first
433
        tokens << [:close, state.type]
434
      end
435

    
436
      tokens
437
    end
438

    
439
  end
440

    
441
end
442
end
443

    
444
# vim:fdm=marker