soundsoftware-site: vendor/gems/coderay-0.9.7/lib/coderay/tokens.rb annotate

annotate vendor/gems/coderay-0.9.7/lib/coderay/tokens.rb @ 855:7294e8db2515 bug_162

Close obsolete branch bug_162

author	Chris Cannam
date	Thu, 14 Jul 2011 11:59:19 +0100
parents	0579821a129a
children

rev	line source
Chris@210	1 module CodeRay
Chris@210	2
Chris@210	3 # = Tokens
Chris@210	4 #
Chris@210	5 # The Tokens class represents a list of tokens returnd from
Chris@210	6 # a Scanner.
Chris@210	7 #
Chris@210	8 # A token is not a special object, just a two-element Array
Chris@210	9 # consisting of
Chris@210	10 # * the _token_ _text_ (the original source of the token in a String) or
Chris@210	11 # a _token_ _action_ (:open, :close, :begin_line, :end_line)
Chris@210	12 # * the _token_ _kind_ (a Symbol representing the type of the token)
Chris@210	13 #
Chris@210	14 # A token looks like this:
Chris@210	15 #
Chris@210	16 # ['# It looks like this', :comment]
Chris@210	17 # ['3.1415926', :float]
Chris@210	18 # ['$^', :error]
Chris@210	19 #
Chris@210	20 # Some scanners also yield sub-tokens, represented by special
Chris@210	21 # token actions, namely :open and :close.
Chris@210	22 #
Chris@210	23 # The Ruby scanner, for example, splits "a string" into:
Chris@210	24 #
Chris@210	25 # [
Chris@210	26 # [:open, :string],
Chris@210	27 # ['"', :delimiter],
Chris@210	28 # ['a string', :content],
Chris@210	29 # ['"', :delimiter],
Chris@210	30 # [:close, :string]
Chris@210	31 # ]
Chris@210	32 #
Chris@210	33 # Tokens is the interface between Scanners and Encoders:
Chris@210	34 # The input is split and saved into a Tokens object. The Encoder
Chris@210	35 # then builds the output from this object.
Chris@210	36 #
Chris@210	37 # Thus, the syntax below becomes clear:
Chris@210	38 #
Chris@210	39 # CodeRay.scan('price = 2.59', :ruby).html
Chris@210	40 # # the Tokens object is here -------^
Chris@210	41 #
Chris@210	42 # See how small it is? ;)
Chris@210	43 #
Chris@210	44 # Tokens gives you the power to handle pre-scanned code very easily:
Chris@210	45 # You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string
Chris@210	46 # that you put in your DB.
Chris@210	47 #
Chris@210	48 # It also allows you to generate tokens directly (without using a scanner),
Chris@210	49 # to load them from a file, and still use any Encoder that CodeRay provides.
Chris@210	50 #
Chris@210	51 # Tokens' subclass TokenStream allows streaming to save memory.
Chris@210	52 class Tokens < Array
Chris@210	53
Chris@210	54 # The Scanner instance that created the tokens.
Chris@210	55 attr_accessor :scanner
Chris@210	56
Chris@210	57 # Whether the object is a TokenStream.
Chris@210	58 #
Chris@210	59 # Returns false.
Chris@210	60 def stream?
Chris@210	61 false
Chris@210	62 end
Chris@210	63
Chris@210	64 # Iterates over all tokens.
Chris@210	65 #
Chris@210	66 # If a filter is given, only tokens of that kind are yielded.
Chris@210	67 def each kind_filter = nil, &block
Chris@210	68 unless kind_filter
Chris@210	69 super(&block)
Chris@210	70 else
Chris@210	71 super() do \|text, kind\|
Chris@210	72 next unless kind == kind_filter
Chris@210	73 yield text, kind
Chris@210	74 end
Chris@210	75 end
Chris@210	76 end
Chris@210	77
Chris@210	78 # Iterates over all text tokens.
Chris@210	79 # Range tokens like [:open, :string] are left out.
Chris@210	80 #
Chris@210	81 # Example:
Chris@210	82 # tokens.each_text_token { \|text, kind\| text.replace html_escape(text) }
Chris@210	83 def each_text_token
Chris@210	84 each do \|text, kind\|
Chris@210	85 next unless text.is_a? ::String
Chris@210	86 yield text, kind
Chris@210	87 end
Chris@210	88 end
Chris@210	89
Chris@210	90 # Encode the tokens using encoder.
Chris@210	91 #
Chris@210	92 # encoder can be
Chris@210	93 # * a symbol like :html oder :statistic
Chris@210	94 # * an Encoder class
Chris@210	95 # * an Encoder object
Chris@210	96 #
Chris@210	97 # options are passed to the encoder.
Chris@210	98 def encode encoder, options = {}
Chris@210	99 unless encoder.is_a? Encoders::Encoder
Chris@210	100 unless encoder.is_a? Class
Chris@210	101 encoder_class = Encoders[encoder]
Chris@210	102 end
Chris@210	103 encoder = encoder_class.new options
Chris@210	104 end
Chris@210	105 encoder.encode_tokens self, options
Chris@210	106 end
Chris@210	107
Chris@210	108
Chris@210	109 # Turn into a string using Encoders::Text.
Chris@210	110 #
Chris@210	111 # +options+ are passed to the encoder if given.
Chris@210	112 def to_s options = {}
Chris@210	113 encode :text, options
Chris@210	114 end
Chris@210	115
Chris@210	116 # Redirects unknown methods to encoder calls.
Chris@210	117 #
Chris@210	118 # For example, if you call +tokens.html+, the HTML encoder
Chris@210	119 # is used to highlight the tokens.
Chris@210	120 def method_missing meth, options = {}
Chris@210	121 Encoders[meth].new(options).encode_tokens self
Chris@210	122 end
Chris@210	123
Chris@210	124 # Returns the tokens compressed by joining consecutive
Chris@210	125 # tokens of the same kind.
Chris@210	126 #
Chris@210	127 # This can not be undone, but should yield the same output
Chris@210	128 # in most Encoders. It basically makes the output smaller.
Chris@210	129 #
Chris@210	130 # Combined with dump, it saves space for the cost of time.
Chris@210	131 #
Chris@210	132 # If the scanner is written carefully, this is not required -
Chris@210	133 # for example, consecutive //-comment lines could already be
Chris@210	134 # joined in one comment token by the Scanner.
Chris@210	135 def optimize
Chris@210	136 last_kind = last_text = nil
Chris@210	137 new = self.class.new
Chris@210	138 for text, kind in self
Chris@210	139 if text.is_a? String
Chris@210	140 if kind == last_kind
Chris@210	141 last_text << text
Chris@210	142 else
Chris@210	143 new << [last_text, last_kind] if last_kind
Chris@210	144 last_text = text
Chris@210	145 last_kind = kind
Chris@210	146 end
Chris@210	147 else
Chris@210	148 new << [last_text, last_kind] if last_kind
Chris@210	149 last_kind = last_text = nil
Chris@210	150 new << [text, kind]
Chris@210	151 end
Chris@210	152 end
Chris@210	153 new << [last_text, last_kind] if last_kind
Chris@210	154 new
Chris@210	155 end
Chris@210	156
Chris@210	157 # Compact the object itself; see optimize.
Chris@210	158 def optimize!
Chris@210	159 replace optimize
Chris@210	160 end
Chris@210	161
Chris@210	162 # Ensure that all :open tokens have a correspondent :close one.
Chris@210	163 #
Chris@210	164 # TODO: Test this!
Chris@210	165 def fix
Chris@210	166 tokens = self.class.new
Chris@210	167 # Check token nesting using a stack of kinds.
Chris@210	168 opened = []
Chris@210	169 for type, kind in self
Chris@210	170 case type
Chris@210	171 when :open
Chris@210	172 opened.push [:close, kind]
Chris@210	173 when :begin_line
Chris@210	174 opened.push [:end_line, kind]
Chris@210	175 when :close, :end_line
Chris@210	176 expected = opened.pop
Chris@210	177 if [type, kind] != expected
Chris@210	178 # Unexpected :close; decide what to do based on the kind:
Chris@210	179 # - token was never opened: delete the :close (just skip it)
Chris@210	180 next unless opened.rindex expected
Chris@210	181 # - token was opened earlier: also close tokens in between
Chris@210	182 tokens << token until (token = opened.pop) == expected
Chris@210	183 end
Chris@210	184 end
Chris@210	185 tokens << [type, kind]
Chris@210	186 end
Chris@210	187 # Close remaining opened tokens
Chris@210	188 tokens << token while token = opened.pop
Chris@210	189 tokens
Chris@210	190 end
Chris@210	191
Chris@210	192 def fix!
Chris@210	193 replace fix
Chris@210	194 end
Chris@210	195
Chris@210	196 # TODO: Scanner#split_into_lines
Chris@210	197 #
Chris@210	198 # Makes sure that:
Chris@210	199 # - newlines are single tokens
Chris@210	200 # (which means all other token are single-line)
Chris@210	201 # - there are no open tokens at the end the line
Chris@210	202 #
Chris@210	203 # This makes it simple for encoders that work line-oriented,
Chris@210	204 # like HTML with list-style numeration.
Chris@210	205 def split_into_lines
Chris@210	206 raise NotImplementedError
Chris@210	207 end
Chris@210	208
Chris@210	209 def split_into_lines!
Chris@210	210 replace split_into_lines
Chris@210	211 end
Chris@210	212
Chris@210	213 # Dumps the object into a String that can be saved
Chris@210	214 # in files or databases.
Chris@210	215 #
Chris@210	216 # The dump is created with Marshal.dump;
Chris@210	217 # In addition, it is gzipped using GZip.gzip.
Chris@210	218 #
Chris@210	219 # The returned String object includes Undumping
Chris@210	220 # so it has an #undump method. See Tokens.load.
Chris@210	221 #
Chris@210	222 # You can configure the level of compression,
Chris@210	223 # but the default value 7 should be what you want
Chris@210	224 # in most cases as it is a good compromise between
Chris@210	225 # speed and compression rate.
Chris@210	226 #
Chris@210	227 # See GZip module.
Chris@210	228 def dump gzip_level = 7
Chris@210	229 require 'coderay/helpers/gzip_simple'
Chris@210	230 dump = Marshal.dump self
Chris@210	231 dump = dump.gzip gzip_level
Chris@210	232 dump.extend Undumping
Chris@210	233 end
Chris@210	234
Chris@210	235 # The total size of the tokens.
Chris@210	236 # Should be equal to the input size before
Chris@210	237 # scanning.
Chris@210	238 def text_size
Chris@210	239 size = 0
Chris@210	240 each_text_token do \|t, k\|
Chris@210	241 size + t.size
Chris@210	242 end
Chris@210	243 size
Chris@210	244 end
Chris@210	245
Chris@210	246 # Return all text tokens joined into a single string.
Chris@210	247 def text
Chris@210	248 map { \|t, k\| t if t.is_a? ::String }.join
Chris@210	249 end
Chris@210	250
Chris@210	251 # Include this module to give an object an #undump
Chris@210	252 # method.
Chris@210	253 #
Chris@210	254 # The string returned by Tokens.dump includes Undumping.
Chris@210	255 module Undumping
Chris@210	256 # Calls Tokens.load with itself.
Chris@210	257 def undump
Chris@210	258 Tokens.load self
Chris@210	259 end
Chris@210	260 end
Chris@210	261
Chris@210	262 # Undump the object using Marshal.load, then
Chris@210	263 # unzip it using GZip.gunzip.
Chris@210	264 #
Chris@210	265 # The result is commonly a Tokens object, but
Chris@210	266 # this is not guaranteed.
Chris@210	267 def Tokens.load dump
Chris@210	268 require 'coderay/helpers/gzip_simple'
Chris@210	269 dump = dump.gunzip
Chris@210	270 @dump = Marshal.load dump
Chris@210	271 end
Chris@210	272
Chris@210	273 end
Chris@210	274
Chris@210	275
Chris@210	276 # = TokenStream
Chris@210	277 #
Chris@210	278 # The TokenStream class is a fake Array without elements.
Chris@210	279 #
Chris@210	280 # It redirects the method << to a block given at creation.
Chris@210	281 #
Chris@210	282 # This allows scanners and Encoders to use streaming (no
Chris@210	283 # tokens are saved, the input is highlighted the same time it
Chris@210	284 # is scanned) with the same code.
Chris@210	285 #
Chris@210	286 # See CodeRay.encode_stream and CodeRay.scan_stream
Chris@210	287 class TokenStream < Tokens
Chris@210	288
Chris@210	289 # Whether the object is a TokenStream.
Chris@210	290 #
Chris@210	291 # Returns true.
Chris@210	292 def stream?
Chris@210	293 true
Chris@210	294 end
Chris@210	295
Chris@210	296 # The Array is empty, but size counts the tokens given by <<.
Chris@210	297 attr_reader :size
Chris@210	298
Chris@210	299 # Creates a new TokenStream that calls +block+ whenever
Chris@210	300 # its << method is called.
Chris@210	301 #
Chris@210	302 # Example:
Chris@210	303 #
Chris@210	304 # require 'coderay'
Chris@210	305 #
Chris@210	306 # token_stream = CodeRay::TokenStream.new do \|text, kind\|
Chris@210	307 # puts 'kind: %s, text size: %d.' % [kind, text.size]
Chris@210	308 # end
Chris@210	309 #
Chris@210	310 # token_stream << ['/\d+/', :regexp]
Chris@210	311 # #-> kind: rexpexp, text size: 5.
Chris@210	312 #
Chris@210	313 def initialize &block
Chris@210	314 raise ArgumentError, 'Block expected for streaming.' unless block
Chris@210	315 @callback = block
Chris@210	316 @size = 0
Chris@210	317 end
Chris@210	318
Chris@210	319 # Calls +block+ with +token+ and increments size.
Chris@210	320 #
Chris@210	321 # Returns self.
Chris@210	322 def << token
Chris@210	323 @callback.call(*token)
Chris@210	324 @size += 1
Chris@210	325 self
Chris@210	326 end
Chris@210	327
Chris@210	328 # This method is not implemented due to speed reasons. Use Tokens.
Chris@210	329 def text_size
Chris@210	330 raise NotImplementedError,
Chris@210	331 'This method is not implemented due to speed reasons.'
Chris@210	332 end
Chris@210	333
Chris@210	334 # A TokenStream cannot be dumped. Use Tokens.
Chris@210	335 def dump
Chris@210	336 raise NotImplementedError, 'A TokenStream cannot be dumped.'
Chris@210	337 end
Chris@210	338
Chris@210	339 # A TokenStream cannot be optimized. Use Tokens.
Chris@210	340 def optimize
Chris@210	341 raise NotImplementedError, 'A TokenStream cannot be optimized.'
Chris@210	342 end
Chris@210	343
Chris@210	344 end
Chris@210	345
Chris@210	346 end
Chris@210	347
Chris@210	348 if $0 == __FILE__
Chris@210	349 $VERBOSE = true
Chris@210	350 $: << File.join(File.dirname(__FILE__), '..')
Chris@210	351 eval DATA.read, nil, $0, __LINE__ + 4
Chris@210	352 end
Chris@210	353
Chris@210	354 __END__
Chris@210	355 require 'test/unit'
Chris@210	356
Chris@210	357 class TokensTest < Test::Unit::TestCase
Chris@210	358
Chris@210	359 def test_creation
Chris@210	360 assert CodeRay::Tokens < Array
Chris@210	361 tokens = nil
Chris@210	362 assert_nothing_raised do
Chris@210	363 tokens = CodeRay::Tokens.new
Chris@210	364 end
Chris@210	365 assert_kind_of Array, tokens
Chris@210	366 end
Chris@210	367
Chris@210	368 def test_adding_tokens
Chris@210	369 tokens = CodeRay::Tokens.new
Chris@210	370 assert_nothing_raised do
Chris@210	371 tokens << ['string', :type]
Chris@210	372 tokens << ['()', :operator]
Chris@210	373 end
Chris@210	374 assert_equal tokens.size, 2
Chris@210	375 end
Chris@210	376
Chris@210	377 def test_dump_undump
Chris@210	378 tokens = CodeRay::Tokens.new
Chris@210	379 assert_nothing_raised do
Chris@210	380 tokens << ['string', :type]
Chris@210	381 tokens << ['()', :operator]
Chris@210	382 end
Chris@210	383 tokens2 = nil
Chris@210	384 assert_nothing_raised do
Chris@210	385 tokens2 = tokens.dump.undump
Chris@210	386 end
Chris@210	387 assert_equal tokens, tokens2
Chris@210	388 end
Chris@210	389
Chris@210	390 end

Mercurial > hg > soundsoftware-site

annotate vendor/gems/coderay-0.9.7/lib/coderay/tokens.rb @ 855:7294e8db2515 bug_162