diff vendor/gems/coderay-1.0.0/lib/coderay/scanner.rb @ 909:cbb26bc654de redmine-1.3

Update to Redmine 1.3-stable branch (Redmine SVN rev 8964)
author Chris Cannam
date Fri, 24 Feb 2012 19:09:32 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/vendor/gems/coderay-1.0.0/lib/coderay/scanner.rb	Fri Feb 24 19:09:32 2012 +0000
@@ -0,0 +1,323 @@
+# encoding: utf-8
+require 'strscan'
+
+module CodeRay
+
+  autoload :WordList, 'coderay/helpers/word_list'
+  
+  # = Scanners
+  #
+  # This module holds the Scanner class and its subclasses.
+  # For example, the Ruby scanner is named CodeRay::Scanners::Ruby
+  # can be found in coderay/scanners/ruby.
+  #
+  # Scanner also provides methods and constants for the register
+  # mechanism and the [] method that returns the Scanner class
+  # belonging to the given lang.
+  #
+  # See PluginHost.
+  module Scanners
+    extend PluginHost
+    plugin_path File.dirname(__FILE__), 'scanners'
+    
+    
+    # = Scanner
+    #
+    # The base class for all Scanners.
+    #
+    # It is a subclass of Ruby's great +StringScanner+, which
+    # makes it easy to access the scanning methods inside.
+    #
+    # It is also +Enumerable+, so you can use it like an Array of
+    # Tokens:
+    #
+    #   require 'coderay'
+    #   
+    #   c_scanner = CodeRay::Scanners[:c].new "if (*p == '{') nest++;"
+    #   
+    #   for text, kind in c_scanner
+    #     puts text if kind == :operator
+    #   end
+    #   
+    #   # prints: (*==)++;
+    #
+    # OK, this is a very simple example :)
+    # You can also use +map+, +any?+, +find+ and even +sort_by+,
+    # if you want.
+    class Scanner < StringScanner
+      
+      extend Plugin
+      plugin_host Scanners
+      
+      # Raised if a Scanner fails while scanning
+      ScanError = Class.new StandardError
+      
+      # The default options for all scanner classes.
+      #
+      # Define @default_options for subclasses.
+      DEFAULT_OPTIONS = { }
+      
+      KINDS_NOT_LOC = [:comment, :doctype, :docstring]
+      
+      attr_accessor :state
+      
+      class << self
+        
+        # Normalizes the given code into a string with UNIX newlines, in the
+        # scanner's internal encoding, with invalid and undefined charachters
+        # replaced by placeholders. Always returns a new object.
+        def normalize code
+          # original = code
+          code = code.to_s unless code.is_a? ::String
+          return code if code.empty?
+          
+          if code.respond_to? :encoding
+            code = encode_with_encoding code, self.encoding
+          else
+            code = to_unix code
+          end
+          # code = code.dup if code.eql? original
+          code
+        end
+        
+        # The typical filename suffix for this scanner's language.
+        def file_extension extension = lang
+          @file_extension ||= extension.to_s
+        end
+        
+        # The encoding used internally by this scanner.
+        def encoding name = 'UTF-8'
+          @encoding ||= defined?(Encoding.find) && Encoding.find(name)
+        end
+        
+        # The lang of this Scanner class, which is equal to its Plugin ID.
+        def lang
+          @plugin_id
+        end
+        
+      protected
+        
+        def encode_with_encoding code, target_encoding
+          if code.encoding == target_encoding
+            if code.valid_encoding?
+              return to_unix(code)
+            else
+              source_encoding = guess_encoding code
+            end
+          else
+            source_encoding = code.encoding
+          end
+          # print "encode_with_encoding from #{source_encoding} to #{target_encoding}"
+          code.encode target_encoding, source_encoding, :universal_newline => true, :undef => :replace, :invalid => :replace
+        end
+        
+        def to_unix code
+          code.index(?\r) ? code.gsub(/\r\n?/, "\n") : code
+        end
+        
+        def guess_encoding s
+          #:nocov:
+          IO.popen("file -b --mime -", "w+") do |file|
+            file.write s[0, 1024]
+            file.close_write
+            begin
+              Encoding.find file.gets[/charset=([-\w]+)/, 1]
+            rescue ArgumentError
+              Encoding::BINARY
+            end
+          end
+          #:nocov:
+        end
+        
+      end
+      
+      # Create a new Scanner.
+      #
+      # * +code+ is the input String and is handled by the superclass
+      #   StringScanner.
+      # * +options+ is a Hash with Symbols as keys.
+      #   It is merged with the default options of the class (you can
+      #   overwrite default options here.)
+      #
+      # Else, a Tokens object is used.
+      def initialize code = '', options = {}
+        if self.class == Scanner
+          raise NotImplementedError, "I am only the basic Scanner class. I can't scan anything. :( Use my subclasses."
+        end
+        
+        @options = self.class::DEFAULT_OPTIONS.merge options
+        
+        super self.class.normalize(code)
+        
+        @tokens = options[:tokens] || Tokens.new
+        @tokens.scanner = self if @tokens.respond_to? :scanner=
+        
+        setup
+      end
+      
+      # Sets back the scanner. Subclasses should redefine the reset_instance
+      # method instead of this one.
+      def reset
+        super
+        reset_instance
+      end
+      
+      # Set a new string to be scanned.
+      def string= code
+        code = self.class.normalize(code)
+        super code
+        reset_instance
+      end
+      
+      # the Plugin ID for this scanner
+      def lang
+        self.class.lang
+      end
+      
+      # the default file extension for this scanner
+      def file_extension
+        self.class.file_extension
+      end
+      
+      # Scan the code and returns all tokens in a Tokens object.
+      def tokenize source = nil, options = {}
+        options = @options.merge(options)
+        @tokens = options[:tokens] || @tokens || Tokens.new
+        @tokens.scanner = self if @tokens.respond_to? :scanner=
+        case source
+        when Array
+          self.string = self.class.normalize(source.join)
+        when nil
+          reset
+        else
+          self.string = self.class.normalize(source)
+        end
+        
+        begin
+          scan_tokens @tokens, options
+        rescue => e
+          message = "Error in %s#scan_tokens, initial state was: %p" % [self.class, defined?(state) && state]
+          raise_inspect e.message, @tokens, message, 30, e.backtrace
+        end
+        
+        @cached_tokens = @tokens
+        if source.is_a? Array
+          @tokens.split_into_parts(*source.map { |part| part.size })
+        else
+          @tokens
+        end
+      end
+      
+      # Cache the result of tokenize.
+      def tokens
+        @cached_tokens ||= tokenize
+      end
+      
+      # Traverse the tokens.
+      def each &block
+        tokens.each(&block)
+      end
+      include Enumerable
+      
+      # The current line position of the scanner, starting with 1.
+      # See also: #column.
+      #
+      # Beware, this is implemented inefficiently. It should be used
+      # for debugging only.
+      def line pos = self.pos
+        return 1 if pos <= 0
+        binary_string[0...pos].count("\n") + 1
+      end
+      
+      # The current column position of the scanner, starting with 1.
+      # See also: #line.
+      def column pos = self.pos
+        return 1 if pos <= 0
+        pos - (binary_string.rindex(?\n, pos - 1) || -1)
+      end
+      
+      # The string in binary encoding.
+      # 
+      # To be used with #pos, which is the index of the byte the scanner
+      # will scan next.
+      def binary_string
+        @binary_string ||=
+          if string.respond_to?(:bytesize) && string.bytesize != string.size
+            #:nocov:
+            string.dup.force_encoding('binary')
+            #:nocov:
+          else
+            string
+          end
+      end
+      
+    protected
+      
+      # Can be implemented by subclasses to do some initialization
+      # that has to be done once per instance.
+      #
+      # Use reset for initialization that has to be done once per
+      # scan.
+      def setup  # :doc:
+      end
+      
+      # This is the central method, and commonly the only one a
+      # subclass implements.
+      #
+      # Subclasses must implement this method; it must return +tokens+
+      # and must only use Tokens#<< for storing scanned tokens!
+      def scan_tokens tokens, options  # :doc:
+        raise NotImplementedError, "#{self.class}#scan_tokens not implemented."
+      end
+      
+      # Resets the scanner.
+      def reset_instance
+        @tokens.clear if @tokens.respond_to?(:clear) && !@options[:keep_tokens]
+        @cached_tokens = nil
+        @binary_string = nil if defined? @binary_string
+      end
+      
+      # Scanner error with additional status information
+      def raise_inspect msg, tokens, state = self.state || 'No state given!', ambit = 30, backtrace = caller
+        raise ScanError, <<-EOE % [
+
+
+***ERROR in %s: %s (after %d tokens)
+
+tokens:
+%s
+
+current line: %d  column: %d  pos: %d
+matched: %p  state: %p
+bol? = %p,  eos? = %p
+
+surrounding code:
+%p  ~~  %p
+
+
+***ERROR***
+
+        EOE
+          File.basename(caller[0]),
+          msg,
+          tokens.respond_to?(:size) ? tokens.size : 0,
+          tokens.respond_to?(:last) ? tokens.last(10).map { |t| t.inspect }.join("\n") : '',
+          line, column, pos,
+          matched, state, bol?, eos?,
+          binary_string[pos - ambit, ambit],
+          binary_string[pos, ambit],
+        ], backtrace
+      end
+      
+      # Shorthand for scan_until(/\z/).
+      # This method also avoids a JRuby 1.9 mode bug.
+      def scan_rest
+        rest = self.rest
+        terminate
+        rest
+      end
+      
+    end
+    
+  end
+end
\ No newline at end of file