Chris@909: module CodeRay
Chris@909:   
Chris@909:   # GZip library for writing and reading token dumps.
Chris@909:   autoload :GZip, 'coderay/helpers/gzip'
Chris@909:   
Chris@909:   # = Tokens  TODO: Rewrite!
Chris@909:   #
Chris@909:   # The Tokens class represents a list of tokens returnd from
Chris@909:   # a Scanner.
Chris@909:   #
Chris@909:   # A token is not a special object, just a two-element Array
Chris@909:   # consisting of
Chris@909:   # * the _token_ _text_ (the original source of the token in a String) or
Chris@909:   #   a _token_ _action_ (begin_group, end_group, begin_line, end_line)
Chris@909:   # * the _token_ _kind_ (a Symbol representing the type of the token)
Chris@909:   #
Chris@909:   # A token looks like this:
Chris@909:   #
Chris@909:   #   ['# It looks like this', :comment]
Chris@909:   #   ['3.1415926', :float]
Chris@909:   #   ['$^', :error]
Chris@909:   #
Chris@909:   # Some scanners also yield sub-tokens, represented by special
Chris@909:   # token actions, namely begin_group and end_group.
Chris@909:   #
Chris@909:   # The Ruby scanner, for example, splits "a string" into:
Chris@909:   #
Chris@909:   #  [
Chris@909:   #   [:begin_group, :string],
Chris@909:   #   ['"', :delimiter],
Chris@909:   #   ['a string', :content],
Chris@909:   #   ['"', :delimiter],
Chris@909:   #   [:end_group, :string]
Chris@909:   #  ]
Chris@909:   #
Chris@909:   # Tokens is the interface between Scanners and Encoders:
Chris@909:   # The input is split and saved into a Tokens object. The Encoder
Chris@909:   # then builds the output from this object.
Chris@909:   #
Chris@909:   # Thus, the syntax below becomes clear:
Chris@909:   #
Chris@909:   #   CodeRay.scan('price = 2.59', :ruby).html
Chris@909:   #   # the Tokens object is here -------^
Chris@909:   #
Chris@909:   # See how small it is? ;)
Chris@909:   #
Chris@909:   # Tokens gives you the power to handle pre-scanned code very easily:
Chris@909:   # You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string
Chris@909:   # that you put in your DB.
Chris@909:   # 
Chris@909:   # It also allows you to generate tokens directly (without using a scanner),
Chris@909:   # to load them from a file, and still use any Encoder that CodeRay provides.
Chris@909:   class Tokens < Array
Chris@909:     
Chris@909:     # The Scanner instance that created the tokens.
Chris@909:     attr_accessor :scanner
Chris@909:     
Chris@909:     # Encode the tokens using encoder.
Chris@909:     #
Chris@909:     # encoder can be
Chris@909:     # * a symbol like :html oder :statistic
Chris@909:     # * an Encoder class
Chris@909:     # * an Encoder object
Chris@909:     #
Chris@909:     # options are passed to the encoder.
Chris@909:     def encode encoder, options = {}
Chris@909:       encoder = Encoders[encoder].new options if encoder.respond_to? :to_sym
Chris@909:       encoder.encode_tokens self, options
Chris@909:     end
Chris@909:     
Chris@909:     # Turn tokens into a string by concatenating them.
Chris@909:     def to_s
Chris@909:       encode CodeRay::Encoders::Encoder.new
Chris@909:     end
Chris@909:     
Chris@909:     # Redirects unknown methods to encoder calls.
Chris@909:     #
Chris@909:     # For example, if you call +tokens.html+, the HTML encoder
Chris@909:     # is used to highlight the tokens.
Chris@909:     def method_missing meth, options = {}
Chris@909:       encode meth, options
Chris@909:     rescue PluginHost::PluginNotFound
Chris@909:       super
Chris@909:     end
Chris@909:     
Chris@909:     # Split the tokens into parts of the given +sizes+.
Chris@909:     # 
Chris@909:     # The result will be an Array of Tokens objects. The parts have
Chris@909:     # the text size specified by the parameter. In addition, each
Chris@909:     # part closes all opened tokens. This is useful to insert tokens
Chris@909:     # betweem them.
Chris@909:     # 
Chris@909:     # This method is used by @Scanner#tokenize@ when called with an Array
Chris@909:     # of source strings. The Diff encoder uses it for inline highlighting.
Chris@909:     def split_into_parts *sizes
Chris@909:       parts = []
Chris@909:       opened = []
Chris@909:       content = nil
Chris@909:       part = Tokens.new
Chris@909:       part_size = 0
Chris@909:       size = sizes.first
Chris@909:       i = 0
Chris@909:       for item in self
Chris@909:         case content
Chris@909:         when nil
Chris@909:           content = item
Chris@909:         when String
Chris@909:           if size && part_size + content.size > size  # token must be cut
Chris@909:             if part_size < size  # some part of the token goes into this part
Chris@909:               content = content.dup  # content may no be safe to change
Chris@909:               part << content.slice!(0, size - part_size) << item
Chris@909:             end
Chris@909:             # close all open groups and lines...
Chris@909:             closing = opened.reverse.flatten.map do |content_or_kind|
Chris@909:               case content_or_kind
Chris@909:               when :begin_group
Chris@909:                 :end_group
Chris@909:               when :begin_line
Chris@909:                 :end_line
Chris@909:               else
Chris@909:                 content_or_kind
Chris@909:               end
Chris@909:             end
Chris@909:             part.concat closing
Chris@909:             begin
Chris@909:               parts << part
Chris@909:               part = Tokens.new
Chris@909:               size = sizes[i += 1]
Chris@909:             end until size.nil? || size > 0
Chris@909:             # ...and open them again.
Chris@909:             part.concat opened.flatten
Chris@909:             part_size = 0
Chris@909:             redo unless content.empty?
Chris@909:           else
Chris@909:             part << content << item
Chris@909:             part_size += content.size
Chris@909:           end
Chris@909:           content = nil
Chris@909:         when Symbol
Chris@909:           case content
Chris@909:           when :begin_group, :begin_line
Chris@909:             opened << [content, item]
Chris@909:           when :end_group, :end_line
Chris@909:             opened.pop
Chris@909:           else
Chris@909:             raise ArgumentError, 'Unknown token action: %p, kind = %p' % [content, item]
Chris@909:           end
Chris@909:           part << content << item
Chris@909:           content = nil
Chris@909:         else
Chris@909:           raise ArgumentError, 'Token input junk: %p, kind = %p' % [content, item]
Chris@909:         end
Chris@909:       end
Chris@909:       parts << part
Chris@909:       parts << Tokens.new while parts.size < sizes.size
Chris@909:       parts
Chris@909:     end
Chris@909:     
Chris@909:     # Dumps the object into a String that can be saved
Chris@909:     # in files or databases.
Chris@909:     #
Chris@909:     # The dump is created with Marshal.dump;
Chris@909:     # In addition, it is gzipped using GZip.gzip.
Chris@909:     #
Chris@909:     # The returned String object includes Undumping
Chris@909:     # so it has an #undump method. See Tokens.load.
Chris@909:     #
Chris@909:     # You can configure the level of compression,
Chris@909:     # but the default value 7 should be what you want
Chris@909:     # in most cases as it is a good compromise between
Chris@909:     # speed and compression rate.
Chris@909:     #
Chris@909:     # See GZip module.
Chris@909:     def dump gzip_level = 7
Chris@909:       dump = Marshal.dump self
Chris@909:       dump = GZip.gzip dump, gzip_level
Chris@909:       dump.extend Undumping
Chris@909:     end
Chris@909:     
Chris@909:     # Return the actual number of tokens.
Chris@909:     def count
Chris@909:       size / 2
Chris@909:     end
Chris@909:     
Chris@909:     # Include this module to give an object an #undump
Chris@909:     # method.
Chris@909:     #
Chris@909:     # The string returned by Tokens.dump includes Undumping.
Chris@909:     module Undumping
Chris@909:       # Calls Tokens.load with itself.
Chris@909:       def undump
Chris@909:         Tokens.load self
Chris@909:       end
Chris@909:     end
Chris@909:     
Chris@909:     # Undump the object using Marshal.load, then
Chris@909:     # unzip it using GZip.gunzip.
Chris@909:     #
Chris@909:     # The result is commonly a Tokens object, but
Chris@909:     # this is not guaranteed.
Chris@909:     def Tokens.load dump
Chris@909:       dump = GZip.gunzip dump
Chris@909:       @dump = Marshal.load dump
Chris@909:     end
Chris@909:     
Chris@909:     alias text_token push
Chris@909:     def begin_group kind; push :begin_group, kind end
Chris@909:     def end_group kind; push :end_group, kind end
Chris@909:     def begin_line kind; push :begin_line, kind end
Chris@909:     def end_line kind; push :end_line, kind end
Chris@909:     alias tokens concat
Chris@909:     
Chris@909:   end
Chris@909:   
Chris@909: end