Chris@909: module CodeRay Chris@909: Chris@909: # GZip library for writing and reading token dumps. Chris@909: autoload :GZip, 'coderay/helpers/gzip' Chris@909: Chris@909: # = Tokens TODO: Rewrite! Chris@909: # Chris@909: # The Tokens class represents a list of tokens returnd from Chris@909: # a Scanner. Chris@909: # Chris@909: # A token is not a special object, just a two-element Array Chris@909: # consisting of Chris@909: # * the _token_ _text_ (the original source of the token in a String) or Chris@909: # a _token_ _action_ (begin_group, end_group, begin_line, end_line) Chris@909: # * the _token_ _kind_ (a Symbol representing the type of the token) Chris@909: # Chris@909: # A token looks like this: Chris@909: # Chris@909: # ['# It looks like this', :comment] Chris@909: # ['3.1415926', :float] Chris@909: # ['$^', :error] Chris@909: # Chris@909: # Some scanners also yield sub-tokens, represented by special Chris@909: # token actions, namely begin_group and end_group. Chris@909: # Chris@909: # The Ruby scanner, for example, splits "a string" into: Chris@909: # Chris@909: # [ Chris@909: # [:begin_group, :string], Chris@909: # ['"', :delimiter], Chris@909: # ['a string', :content], Chris@909: # ['"', :delimiter], Chris@909: # [:end_group, :string] Chris@909: # ] Chris@909: # Chris@909: # Tokens is the interface between Scanners and Encoders: Chris@909: # The input is split and saved into a Tokens object. The Encoder Chris@909: # then builds the output from this object. Chris@909: # Chris@909: # Thus, the syntax below becomes clear: Chris@909: # Chris@909: # CodeRay.scan('price = 2.59', :ruby).html Chris@909: # # the Tokens object is here -------^ Chris@909: # Chris@909: # See how small it is? ;) Chris@909: # Chris@909: # Tokens gives you the power to handle pre-scanned code very easily: Chris@909: # You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string Chris@909: # that you put in your DB. Chris@909: # Chris@909: # It also allows you to generate tokens directly (without using a scanner), Chris@909: # to load them from a file, and still use any Encoder that CodeRay provides. Chris@909: class Tokens < Array Chris@909: Chris@909: # The Scanner instance that created the tokens. Chris@909: attr_accessor :scanner Chris@909: Chris@909: # Encode the tokens using encoder. Chris@909: # Chris@909: # encoder can be Chris@909: # * a symbol like :html oder :statistic Chris@909: # * an Encoder class Chris@909: # * an Encoder object Chris@909: # Chris@909: # options are passed to the encoder. Chris@909: def encode encoder, options = {} Chris@909: encoder = Encoders[encoder].new options if encoder.respond_to? :to_sym Chris@909: encoder.encode_tokens self, options Chris@909: end Chris@909: Chris@909: # Turn tokens into a string by concatenating them. Chris@909: def to_s Chris@909: encode CodeRay::Encoders::Encoder.new Chris@909: end Chris@909: Chris@909: # Redirects unknown methods to encoder calls. Chris@909: # Chris@909: # For example, if you call +tokens.html+, the HTML encoder Chris@909: # is used to highlight the tokens. Chris@909: def method_missing meth, options = {} Chris@909: encode meth, options Chris@909: rescue PluginHost::PluginNotFound Chris@909: super Chris@909: end Chris@909: Chris@909: # Split the tokens into parts of the given +sizes+. Chris@909: # Chris@909: # The result will be an Array of Tokens objects. The parts have Chris@909: # the text size specified by the parameter. In addition, each Chris@909: # part closes all opened tokens. This is useful to insert tokens Chris@909: # betweem them. Chris@909: # Chris@909: # This method is used by @Scanner#tokenize@ when called with an Array Chris@909: # of source strings. The Diff encoder uses it for inline highlighting. Chris@909: def split_into_parts *sizes Chris@909: parts = [] Chris@909: opened = [] Chris@909: content = nil Chris@909: part = Tokens.new Chris@909: part_size = 0 Chris@909: size = sizes.first Chris@909: i = 0 Chris@909: for item in self Chris@909: case content Chris@909: when nil Chris@909: content = item Chris@909: when String Chris@909: if size && part_size + content.size > size # token must be cut Chris@909: if part_size < size # some part of the token goes into this part Chris@909: content = content.dup # content may no be safe to change Chris@909: part << content.slice!(0, size - part_size) << item Chris@909: end Chris@909: # close all open groups and lines... Chris@909: closing = opened.reverse.flatten.map do |content_or_kind| Chris@909: case content_or_kind Chris@909: when :begin_group Chris@909: :end_group Chris@909: when :begin_line Chris@909: :end_line Chris@909: else Chris@909: content_or_kind Chris@909: end Chris@909: end Chris@909: part.concat closing Chris@909: begin Chris@909: parts << part Chris@909: part = Tokens.new Chris@909: size = sizes[i += 1] Chris@909: end until size.nil? || size > 0 Chris@909: # ...and open them again. Chris@909: part.concat opened.flatten Chris@909: part_size = 0 Chris@909: redo unless content.empty? Chris@909: else Chris@909: part << content << item Chris@909: part_size += content.size Chris@909: end Chris@909: content = nil Chris@909: when Symbol Chris@909: case content Chris@909: when :begin_group, :begin_line Chris@909: opened << [content, item] Chris@909: when :end_group, :end_line Chris@909: opened.pop Chris@909: else Chris@909: raise ArgumentError, 'Unknown token action: %p, kind = %p' % [content, item] Chris@909: end Chris@909: part << content << item Chris@909: content = nil Chris@909: else Chris@909: raise ArgumentError, 'Token input junk: %p, kind = %p' % [content, item] Chris@909: end Chris@909: end Chris@909: parts << part Chris@909: parts << Tokens.new while parts.size < sizes.size Chris@909: parts Chris@909: end Chris@909: Chris@909: # Dumps the object into a String that can be saved Chris@909: # in files or databases. Chris@909: # Chris@909: # The dump is created with Marshal.dump; Chris@909: # In addition, it is gzipped using GZip.gzip. Chris@909: # Chris@909: # The returned String object includes Undumping Chris@909: # so it has an #undump method. See Tokens.load. Chris@909: # Chris@909: # You can configure the level of compression, Chris@909: # but the default value 7 should be what you want Chris@909: # in most cases as it is a good compromise between Chris@909: # speed and compression rate. Chris@909: # Chris@909: # See GZip module. Chris@909: def dump gzip_level = 7 Chris@909: dump = Marshal.dump self Chris@909: dump = GZip.gzip dump, gzip_level Chris@909: dump.extend Undumping Chris@909: end Chris@909: Chris@909: # Return the actual number of tokens. Chris@909: def count Chris@909: size / 2 Chris@909: end Chris@909: Chris@909: # Include this module to give an object an #undump Chris@909: # method. Chris@909: # Chris@909: # The string returned by Tokens.dump includes Undumping. Chris@909: module Undumping Chris@909: # Calls Tokens.load with itself. Chris@909: def undump Chris@909: Tokens.load self Chris@909: end Chris@909: end Chris@909: Chris@909: # Undump the object using Marshal.load, then Chris@909: # unzip it using GZip.gunzip. Chris@909: # Chris@909: # The result is commonly a Tokens object, but Chris@909: # this is not guaranteed. Chris@909: def Tokens.load dump Chris@909: dump = GZip.gunzip dump Chris@909: @dump = Marshal.load dump Chris@909: end Chris@909: Chris@909: alias text_token push Chris@909: def begin_group kind; push :begin_group, kind end Chris@909: def end_group kind; push :end_group, kind end Chris@909: def begin_line kind; push :begin_line, kind end Chris@909: def end_line kind; push :end_line, kind end Chris@909: alias tokens concat Chris@909: Chris@909: end Chris@909: Chris@909: end