soundsoftware-site: vendor/gems/coderay-1.0.0/lib/coderay/tokens.rb annotate

annotate vendor/gems/coderay-1.0.0/lib/coderay/tokens.rb @ 1169:492ff72268e3 bug_521

Close obsolete branch bug_521

author	Chris Cannam
date	Thu, 18 Oct 2012 10:42:48 +0100
parents	cbb26bc654de
children

rev	line source
Chris@909	1 module CodeRay
Chris@909	2
Chris@909	3 # GZip library for writing and reading token dumps.
Chris@909	4 autoload :GZip, 'coderay/helpers/gzip'
Chris@909	5
Chris@909	6 # = Tokens TODO: Rewrite!
Chris@909	7 #
Chris@909	8 # The Tokens class represents a list of tokens returnd from
Chris@909	9 # a Scanner.
Chris@909	10 #
Chris@909	11 # A token is not a special object, just a two-element Array
Chris@909	12 # consisting of
Chris@909	13 # * the _token_ _text_ (the original source of the token in a String) or
Chris@909	14 # a _token_ _action_ (begin_group, end_group, begin_line, end_line)
Chris@909	15 # * the _token_ _kind_ (a Symbol representing the type of the token)
Chris@909	16 #
Chris@909	17 # A token looks like this:
Chris@909	18 #
Chris@909	19 # ['# It looks like this', :comment]
Chris@909	20 # ['3.1415926', :float]
Chris@909	21 # ['$^', :error]
Chris@909	22 #
Chris@909	23 # Some scanners also yield sub-tokens, represented by special
Chris@909	24 # token actions, namely begin_group and end_group.
Chris@909	25 #
Chris@909	26 # The Ruby scanner, for example, splits "a string" into:
Chris@909	27 #
Chris@909	28 # [
Chris@909	29 # [:begin_group, :string],
Chris@909	30 # ['"', :delimiter],
Chris@909	31 # ['a string', :content],
Chris@909	32 # ['"', :delimiter],
Chris@909	33 # [:end_group, :string]
Chris@909	34 # ]
Chris@909	35 #
Chris@909	36 # Tokens is the interface between Scanners and Encoders:
Chris@909	37 # The input is split and saved into a Tokens object. The Encoder
Chris@909	38 # then builds the output from this object.
Chris@909	39 #
Chris@909	40 # Thus, the syntax below becomes clear:
Chris@909	41 #
Chris@909	42 # CodeRay.scan('price = 2.59', :ruby).html
Chris@909	43 # # the Tokens object is here -------^
Chris@909	44 #
Chris@909	45 # See how small it is? ;)
Chris@909	46 #
Chris@909	47 # Tokens gives you the power to handle pre-scanned code very easily:
Chris@909	48 # You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string
Chris@909	49 # that you put in your DB.
Chris@909	50 #
Chris@909	51 # It also allows you to generate tokens directly (without using a scanner),
Chris@909	52 # to load them from a file, and still use any Encoder that CodeRay provides.
Chris@909	53 class Tokens < Array
Chris@909	54
Chris@909	55 # The Scanner instance that created the tokens.
Chris@909	56 attr_accessor :scanner
Chris@909	57
Chris@909	58 # Encode the tokens using encoder.
Chris@909	59 #
Chris@909	60 # encoder can be
Chris@909	61 # * a symbol like :html oder :statistic
Chris@909	62 # * an Encoder class
Chris@909	63 # * an Encoder object
Chris@909	64 #
Chris@909	65 # options are passed to the encoder.
Chris@909	66 def encode encoder, options = {}
Chris@909	67 encoder = Encoders[encoder].new options if encoder.respond_to? :to_sym
Chris@909	68 encoder.encode_tokens self, options
Chris@909	69 end
Chris@909	70
Chris@909	71 # Turn tokens into a string by concatenating them.
Chris@909	72 def to_s
Chris@909	73 encode CodeRay::Encoders::Encoder.new
Chris@909	74 end
Chris@909	75
Chris@909	76 # Redirects unknown methods to encoder calls.
Chris@909	77 #
Chris@909	78 # For example, if you call +tokens.html+, the HTML encoder
Chris@909	79 # is used to highlight the tokens.
Chris@909	80 def method_missing meth, options = {}
Chris@909	81 encode meth, options
Chris@909	82 rescue PluginHost::PluginNotFound
Chris@909	83 super
Chris@909	84 end
Chris@909	85
Chris@909	86 # Split the tokens into parts of the given +sizes+.
Chris@909	87 #
Chris@909	88 # The result will be an Array of Tokens objects. The parts have
Chris@909	89 # the text size specified by the parameter. In addition, each
Chris@909	90 # part closes all opened tokens. This is useful to insert tokens
Chris@909	91 # betweem them.
Chris@909	92 #
Chris@909	93 # This method is used by @Scanner#tokenize@ when called with an Array
Chris@909	94 # of source strings. The Diff encoder uses it for inline highlighting.
Chris@909	95 def split_into_parts *sizes
Chris@909	96 parts = []
Chris@909	97 opened = []
Chris@909	98 content = nil
Chris@909	99 part = Tokens.new
Chris@909	100 part_size = 0
Chris@909	101 size = sizes.first
Chris@909	102 i = 0
Chris@909	103 for item in self
Chris@909	104 case content
Chris@909	105 when nil
Chris@909	106 content = item
Chris@909	107 when String
Chris@909	108 if size && part_size + content.size > size # token must be cut
Chris@909	109 if part_size < size # some part of the token goes into this part
Chris@909	110 content = content.dup # content may no be safe to change
Chris@909	111 part << content.slice!(0, size - part_size) << item
Chris@909	112 end
Chris@909	113 # close all open groups and lines...
Chris@909	114 closing = opened.reverse.flatten.map do \|content_or_kind\|
Chris@909	115 case content_or_kind
Chris@909	116 when :begin_group
Chris@909	117 :end_group
Chris@909	118 when :begin_line
Chris@909	119 :end_line
Chris@909	120 else
Chris@909	121 content_or_kind
Chris@909	122 end
Chris@909	123 end
Chris@909	124 part.concat closing
Chris@909	125 begin
Chris@909	126 parts << part
Chris@909	127 part = Tokens.new
Chris@909	128 size = sizes[i += 1]
Chris@909	129 end until size.nil? \|\| size > 0
Chris@909	130 # ...and open them again.
Chris@909	131 part.concat opened.flatten
Chris@909	132 part_size = 0
Chris@909	133 redo unless content.empty?
Chris@909	134 else
Chris@909	135 part << content << item
Chris@909	136 part_size += content.size
Chris@909	137 end
Chris@909	138 content = nil
Chris@909	139 when Symbol
Chris@909	140 case content
Chris@909	141 when :begin_group, :begin_line
Chris@909	142 opened << [content, item]
Chris@909	143 when :end_group, :end_line
Chris@909	144 opened.pop
Chris@909	145 else
Chris@909	146 raise ArgumentError, 'Unknown token action: %p, kind = %p' % [content, item]
Chris@909	147 end
Chris@909	148 part << content << item
Chris@909	149 content = nil
Chris@909	150 else
Chris@909	151 raise ArgumentError, 'Token input junk: %p, kind = %p' % [content, item]
Chris@909	152 end
Chris@909	153 end
Chris@909	154 parts << part
Chris@909	155 parts << Tokens.new while parts.size < sizes.size
Chris@909	156 parts
Chris@909	157 end
Chris@909	158
Chris@909	159 # Dumps the object into a String that can be saved
Chris@909	160 # in files or databases.
Chris@909	161 #
Chris@909	162 # The dump is created with Marshal.dump;
Chris@909	163 # In addition, it is gzipped using GZip.gzip.
Chris@909	164 #
Chris@909	165 # The returned String object includes Undumping
Chris@909	166 # so it has an #undump method. See Tokens.load.
Chris@909	167 #
Chris@909	168 # You can configure the level of compression,
Chris@909	169 # but the default value 7 should be what you want
Chris@909	170 # in most cases as it is a good compromise between
Chris@909	171 # speed and compression rate.
Chris@909	172 #
Chris@909	173 # See GZip module.
Chris@909	174 def dump gzip_level = 7
Chris@909	175 dump = Marshal.dump self
Chris@909	176 dump = GZip.gzip dump, gzip_level
Chris@909	177 dump.extend Undumping
Chris@909	178 end
Chris@909	179
Chris@909	180 # Return the actual number of tokens.
Chris@909	181 def count
Chris@909	182 size / 2
Chris@909	183 end
Chris@909	184
Chris@909	185 # Include this module to give an object an #undump
Chris@909	186 # method.
Chris@909	187 #
Chris@909	188 # The string returned by Tokens.dump includes Undumping.
Chris@909	189 module Undumping
Chris@909	190 # Calls Tokens.load with itself.
Chris@909	191 def undump
Chris@909	192 Tokens.load self
Chris@909	193 end
Chris@909	194 end
Chris@909	195
Chris@909	196 # Undump the object using Marshal.load, then
Chris@909	197 # unzip it using GZip.gunzip.
Chris@909	198 #
Chris@909	199 # The result is commonly a Tokens object, but
Chris@909	200 # this is not guaranteed.
Chris@909	201 def Tokens.load dump
Chris@909	202 dump = GZip.gunzip dump
Chris@909	203 @dump = Marshal.load dump
Chris@909	204 end
Chris@909	205
Chris@909	206 alias text_token push
Chris@909	207 def begin_group kind; push :begin_group, kind end
Chris@909	208 def end_group kind; push :end_group, kind end
Chris@909	209 def begin_line kind; push :begin_line, kind end
Chris@909	210 def end_line kind; push :end_line, kind end
Chris@909	211 alias tokens concat
Chris@909	212
Chris@909	213 end
Chris@909	214
Chris@909	215 end

Mercurial > hg > soundsoftware-site

annotate vendor/gems/coderay-1.0.0/lib/coderay/tokens.rb @ 1169:492ff72268e3 bug_521