view vendor/gems/coderay-1.0.0/lib/coderay/scanners/ruby.rb @ 1082:997f6d7738f7 bug_531

In repo controller entry action, show the page for the file even if it's binary (so user still has access to history etc links). This makes it possible to use the entry action as the default when a file is clicked on
author Chris Cannam <chris.cannam@soundsoftware.ac.uk>
date Thu, 22 Nov 2012 18:04:17 +0000
parents cbb26bc654de
children
line wrap: on
line source
module CodeRay
module Scanners
  
  # This scanner is really complex, since Ruby _is_ a complex language!
  #
  # It tries to highlight 100% of all common code,
  # and 90% of strange codes.
  #
  # It is optimized for HTML highlighting, and is not very useful for
  # parsing or pretty printing.
  class Ruby < Scanner
    
    register_for :ruby
    file_extension 'rb'
    
    autoload :Patterns,    'coderay/scanners/ruby/patterns'
    autoload :StringState, 'coderay/scanners/ruby/string_state'
    
    def interpreted_string_state
      StringState.new :string, true, '"'
    end
    
  protected
    
    def setup
      @state = :initial
    end
    
    def scan_tokens encoder, options
      state, heredocs = options[:state] || @state
      heredocs = heredocs.dup if heredocs.is_a?(Array)
      
      if state && state.instance_of?(StringState)
        encoder.begin_group state.type
      end
      
      last_state = nil
      
      method_call_expected = false
      value_expected = true
      
      inline_block_stack = nil
      inline_block_curly_depth = 0
      
      if heredocs
        state = heredocs.shift
        encoder.begin_group state.type
        heredocs = nil if heredocs.empty?
      end
      
      # def_object_stack = nil
      # def_object_paren_depth = 0
      
      patterns = Patterns  # avoid constant lookup
      
      unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
      
      until eos?
        
        if state.instance_of? ::Symbol
          
          if match = scan(/[ \t\f\v]+/)
            encoder.text_token match, :space
            
          elsif match = scan(/\n/)
            if heredocs
              unscan  # heredoc scanning needs \n at start
              state = heredocs.shift
              encoder.begin_group state.type
              heredocs = nil if heredocs.empty?
            else
              state = :initial if state == :undef_comma_expected
              encoder.text_token match, :space
              value_expected = true
            end
            
          elsif match = scan(bol? ? / \#(!)?.* | #{patterns::RUBYDOC_OR_DATA} /ox : /\#.*/)
            encoder.text_token match, self[1] ? :doctype : :comment
            
          elsif match = scan(/\\\n/)
            if heredocs
              unscan  # heredoc scanning needs \n at start
              encoder.text_token scan(/\\/), :space
              state = heredocs.shift
              encoder.begin_group state.type
              heredocs = nil if heredocs.empty?
            else
              encoder.text_token match, :space
            end
            
          elsif state == :initial
            
            # IDENTS #
            if !method_call_expected &&
               match = scan(unicode ? /#{patterns::METHOD_NAME}/uo :
                                      /#{patterns::METHOD_NAME}/o)
              value_expected = false
              kind = patterns::IDENT_KIND[match]
              if kind == :ident
                if match[/\A[A-Z]/] && !(match[/[!?]$/] || match?(/\(/))
                  kind = :constant
                end
              elsif kind == :keyword
                state = patterns::KEYWORD_NEW_STATE[match]
                value_expected = true if patterns::KEYWORDS_EXPECTING_VALUE[match]
              end
              value_expected = true if !value_expected && check(/#{patterns::VALUE_FOLLOWS}/o)
              encoder.text_token match, kind
              
            elsif method_call_expected &&
               match = scan(unicode ? /#{patterns::METHOD_AFTER_DOT}/uo :
                                      /#{patterns::METHOD_AFTER_DOT}/o)
              if method_call_expected == '::' && match[/\A[A-Z]/] && !match?(/\(/)
                encoder.text_token match, :constant
              else
                encoder.text_token match, :ident
              end
              method_call_expected = false
              value_expected = check(/#{patterns::VALUE_FOLLOWS}/o)
              
            # OPERATORS #
            elsif !method_call_expected && match = scan(/ (\.(?!\.)|::) | (?: \.\.\.? | ==?=? | [,\(\[\{] )() | [\)\]\}] /x)
              method_call_expected = self[1]
              value_expected = !method_call_expected && self[2]
              if inline_block_stack
                case match
                when '{'
                  inline_block_curly_depth += 1
                when '}'
                  inline_block_curly_depth -= 1
                  if inline_block_curly_depth == 0  # closing brace of inline block reached
                    state, inline_block_curly_depth, heredocs = inline_block_stack.pop
                    inline_block_stack = nil if inline_block_stack.empty?
                    heredocs = nil if heredocs && heredocs.empty?
                    encoder.text_token match, :inline_delimiter
                    encoder.end_group :inline
                    next
                  end
                end
              end
              encoder.text_token match, :operator
              
            elsif match = scan(unicode ? /#{patterns::SYMBOL}/uo :
                                         /#{patterns::SYMBOL}/o)
              case delim = match[1]
              when ?', ?"
                encoder.begin_group :symbol
                encoder.text_token ':', :symbol
                match = delim.chr
                encoder.text_token match, :delimiter
                state = self.class::StringState.new :symbol, delim == ?", match
              else
                encoder.text_token match, :symbol
                value_expected = false
              end
              
            elsif match = scan(/ ' (?:(?>[^'\\]*) ')? | " (?:(?>[^"\\\#]*) ")? /mx)
              encoder.begin_group :string
              if match.size == 1
                encoder.text_token match, :delimiter
                state = self.class::StringState.new :string, match == '"', match  # important for streaming
              else
                encoder.text_token match[0,1], :delimiter
                encoder.text_token match[1..-2], :content if match.size > 2
                encoder.text_token match[-1,1], :delimiter
                encoder.end_group :string
                value_expected = false
              end
              
            elsif match = scan(unicode ? /#{patterns::INSTANCE_VARIABLE}/uo :
                                         /#{patterns::INSTANCE_VARIABLE}/o)
              value_expected = false
              encoder.text_token match, :instance_variable
              
            elsif value_expected && match = scan(/\//)
              encoder.begin_group :regexp
              encoder.text_token match, :delimiter
              state = self.class::StringState.new :regexp, true, '/'
              
            elsif match = scan(value_expected ? /[-+]?#{patterns::NUMERIC}/o : /#{patterns::NUMERIC}/o)
              if method_call_expected
                encoder.text_token match, :error
                method_call_expected = false
              else
                encoder.text_token match, self[1] ? :float : :integer  # TODO: send :hex/:octal/:binary
              end
              value_expected = false
              
            elsif match = scan(/ [-+!~^\/]=? | [:;] | [*|&]{1,2}=? | >>? /x)
              value_expected = true
              encoder.text_token match, :operator
              
            elsif value_expected && match = scan(/#{patterns::HEREDOC_OPEN}/o)
              quote = self[3]
              delim = self[quote ? 4 : 2]
              kind = patterns::QUOTE_TO_TYPE[quote]
              encoder.begin_group kind
              encoder.text_token match, :delimiter
              encoder.end_group kind
              heredocs ||= []  # create heredocs if empty
              heredocs << self.class::StringState.new(kind, quote != "'", delim,
                self[1] == '-' ? :indented : :linestart)
              value_expected = false
              
            elsif value_expected && match = scan(/#{patterns::FANCY_STRING_START}/o)
              kind = patterns::FANCY_STRING_KIND[self[1]]
              encoder.begin_group kind
              state = self.class::StringState.new kind, patterns::FANCY_STRING_INTERPRETED[self[1]], self[2]
              encoder.text_token match, :delimiter
              
            elsif value_expected && match = scan(/#{patterns::CHARACTER}/o)
              value_expected = false
              encoder.text_token match, :integer
              
            elsif match = scan(/ %=? | <(?:<|=>?)? | \? /x)
              value_expected = true
              encoder.text_token match, :operator
              
            elsif match = scan(/`/)
              encoder.begin_group :shell
              encoder.text_token match, :delimiter
              state = self.class::StringState.new :shell, true, match
              
            elsif match = scan(unicode ? /#{patterns::GLOBAL_VARIABLE}/uo :
                                         /#{patterns::GLOBAL_VARIABLE}/o)
              encoder.text_token match, :global_variable
              value_expected = false
              
            elsif match = scan(unicode ? /#{patterns::CLASS_VARIABLE}/uo :
                                         /#{patterns::CLASS_VARIABLE}/o)
              encoder.text_token match, :class_variable
              value_expected = false
              
            elsif match = scan(/\\\z/)
              encoder.text_token match, :space
              
            else
              if method_call_expected
                method_call_expected = false
                next
              end
              unless unicode
                # check for unicode
                $DEBUG_BEFORE, $DEBUG = $DEBUG, false
                begin
                  if check(/./mu).size > 1
                    # seems like we should try again with unicode
                    unicode = true
                  end
                rescue
                  # bad unicode char; use getch
                ensure
                  $DEBUG = $DEBUG_BEFORE
                end
                next if unicode
              end
              
              encoder.text_token getch, :error
              
            end
            
            if last_state
              state = last_state
              last_state = nil
            end
            
          elsif state == :def_expected
            if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
                                      /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
              encoder.text_token match, :method
              state = :initial
            else
              last_state = :dot_expected
              state = :initial
            end
            
          elsif state == :dot_expected
            if match = scan(/\.|::/)
              # invalid definition
              state = :def_expected
              encoder.text_token match, :operator
            else
              state = :initial
            end
            
          elsif state == :module_expected
            if match = scan(/<</)
              encoder.text_token match, :operator
            else
              state = :initial
              if match = scan(unicode ? / (?:#{patterns::IDENT}::)* #{patterns::IDENT} /oux :
                                        / (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox)
                encoder.text_token match, :class
              end
            end
            
          elsif state == :undef_expected
            state = :undef_comma_expected
            if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
                                      /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
              encoder.text_token match, :method
            elsif match = scan(/#{patterns::SYMBOL}/o)
              case delim = match[1]
              when ?', ?"
                encoder.begin_group :symbol
                encoder.text_token ':', :symbol
                match = delim.chr
                encoder.text_token match, :delimiter
                state = self.class::StringState.new :symbol, delim == ?", match
                state.next_state = :undef_comma_expected
              else
                encoder.text_token match, :symbol
              end
            else
              state = :initial
            end
            
          elsif state == :undef_comma_expected
            if match = scan(/,/)
              encoder.text_token match, :operator
              state = :undef_expected
            else
              state = :initial
            end
            
          elsif state == :alias_expected
            match = scan(unicode ? /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/uo :
                                   /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o)
            
            if match
              encoder.text_token self[1], (self[1][0] == ?: ? :symbol : :method)
              encoder.text_token self[2], :space
              encoder.text_token self[3], (self[3][0] == ?: ? :symbol : :method)
            end
            state = :initial
            
          else
            #:nocov:
            raise_inspect 'Unknown state: %p' % [state], encoder
            #:nocov:
          end
          
        else  # StringState
          
          match = scan_until(state.pattern) || scan_rest
          unless match.empty?
            encoder.text_token match, :content
            break if eos?
          end
          
          if state.heredoc && self[1]  # end of heredoc
            match = getch
            match << scan_until(/$/) unless eos?
            encoder.text_token match, :delimiter unless match.empty?
            encoder.end_group state.type
            state = state.next_state
            next
          end
          
          case match = getch
          
          when state.delim
            if state.paren_depth
              state.paren_depth -= 1
              if state.paren_depth > 0
                encoder.text_token match, :content
                next
              end
            end
            encoder.text_token match, :delimiter
            if state.type == :regexp && !eos?
              match = scan(/#{patterns::REGEXP_MODIFIERS}/o)
              encoder.text_token match, :modifier unless match.empty?
            end
            encoder.end_group state.type
            value_expected = false
            state = state.next_state
            
          when '\\'
            if state.interpreted
              if esc = scan(/#{patterns::ESCAPE}/o)
                encoder.text_token match + esc, :char
              else
                encoder.text_token match, :error
              end
            else
              case esc = getch
              when nil
                encoder.text_token match, :content
              when state.delim, '\\'
                encoder.text_token match + esc, :char
              else
                encoder.text_token match + esc, :content
              end
            end
            
          when '#'
            case peek(1)
            when '{'
              inline_block_stack ||= []
              inline_block_stack << [state, inline_block_curly_depth, heredocs]
              value_expected = true
              state = :initial
              inline_block_curly_depth = 1
              encoder.begin_group :inline
              encoder.text_token match + getch, :inline_delimiter
            when '$', '@'
              encoder.text_token match, :escape
              last_state = state
              state = :initial
            else
              #:nocov:
              raise_inspect 'else-case # reached; #%p not handled' % [peek(1)], encoder
              #:nocov:
            end
            
          when state.opening_paren
            state.paren_depth += 1
            encoder.text_token match, :content
            
          else
            #:nocov
            raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], encoder
            #:nocov:
            
          end
          
        end
        
      end
      
      # cleaning up
      if state.is_a? StringState
        encoder.end_group state.type
      end
      
      if options[:keep_state]
        if state.is_a?(StringState) && state.heredoc
          (heredocs ||= []).unshift state
          state = :initial
        elsif heredocs && heredocs.empty?
          heredocs = nil
        end
        @state = state, heredocs
      end
      
      if inline_block_stack
        until inline_block_stack.empty?
          state, = *inline_block_stack.pop
          encoder.end_group :inline
          encoder.end_group state.type
        end
      end
      
      encoder
    end
    
  end
  
end
end