To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.
root / .svn / pristine / d9 / d9545294de024a7e3a5f7c74daf7f80255e0f5d7.svn-base @ 1297:0a574315af3e
History | View | Annotate | Download (6.4 KB)
| 1 |
module CodeRay |
|---|---|
| 2 |
|
| 3 |
# GZip library for writing and reading token dumps. |
| 4 |
autoload :GZip, 'coderay/helpers/gzip' |
| 5 |
|
| 6 |
# = Tokens TODO: Rewrite! |
| 7 |
# |
| 8 |
# The Tokens class represents a list of tokens returnd from |
| 9 |
# a Scanner. |
| 10 |
# |
| 11 |
# A token is not a special object, just a two-element Array |
| 12 |
# consisting of |
| 13 |
# * the _token_ _text_ (the original source of the token in a String) or |
| 14 |
# a _token_ _action_ (begin_group, end_group, begin_line, end_line) |
| 15 |
# * the _token_ _kind_ (a Symbol representing the type of the token) |
| 16 |
# |
| 17 |
# A token looks like this: |
| 18 |
# |
| 19 |
# ['# It looks like this', :comment] |
| 20 |
# ['3.1415926', :float] |
| 21 |
# ['$^', :error] |
| 22 |
# |
| 23 |
# Some scanners also yield sub-tokens, represented by special |
| 24 |
# token actions, namely begin_group and end_group. |
| 25 |
# |
| 26 |
# The Ruby scanner, for example, splits "a string" into: |
| 27 |
# |
| 28 |
# [ |
| 29 |
# [:begin_group, :string], |
| 30 |
# ['"', :delimiter], |
| 31 |
# ['a string', :content], |
| 32 |
# ['"', :delimiter], |
| 33 |
# [:end_group, :string] |
| 34 |
# ] |
| 35 |
# |
| 36 |
# Tokens is the interface between Scanners and Encoders: |
| 37 |
# The input is split and saved into a Tokens object. The Encoder |
| 38 |
# then builds the output from this object. |
| 39 |
# |
| 40 |
# Thus, the syntax below becomes clear: |
| 41 |
# |
| 42 |
# CodeRay.scan('price = 2.59', :ruby).html
|
| 43 |
# # the Tokens object is here -------^ |
| 44 |
# |
| 45 |
# See how small it is? ;) |
| 46 |
# |
| 47 |
# Tokens gives you the power to handle pre-scanned code very easily: |
| 48 |
# You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string |
| 49 |
# that you put in your DB. |
| 50 |
# |
| 51 |
# It also allows you to generate tokens directly (without using a scanner), |
| 52 |
# to load them from a file, and still use any Encoder that CodeRay provides. |
| 53 |
class Tokens < Array |
| 54 |
|
| 55 |
# The Scanner instance that created the tokens. |
| 56 |
attr_accessor :scanner |
| 57 |
|
| 58 |
# Encode the tokens using encoder. |
| 59 |
# |
| 60 |
# encoder can be |
| 61 |
# * a symbol like :html oder :statistic |
| 62 |
# * an Encoder class |
| 63 |
# * an Encoder object |
| 64 |
# |
| 65 |
# options are passed to the encoder. |
| 66 |
def encode encoder, options = {}
|
| 67 |
encoder = Encoders[encoder].new options if encoder.respond_to? :to_sym |
| 68 |
encoder.encode_tokens self, options |
| 69 |
end |
| 70 |
|
| 71 |
# Turn tokens into a string by concatenating them. |
| 72 |
def to_s |
| 73 |
encode CodeRay::Encoders::Encoder.new |
| 74 |
end |
| 75 |
|
| 76 |
# Redirects unknown methods to encoder calls. |
| 77 |
# |
| 78 |
# For example, if you call +tokens.html+, the HTML encoder |
| 79 |
# is used to highlight the tokens. |
| 80 |
def method_missing meth, options = {}
|
| 81 |
encode meth, options |
| 82 |
rescue PluginHost::PluginNotFound |
| 83 |
super |
| 84 |
end |
| 85 |
|
| 86 |
# Split the tokens into parts of the given +sizes+. |
| 87 |
# |
| 88 |
# The result will be an Array of Tokens objects. The parts have |
| 89 |
# the text size specified by the parameter. In addition, each |
| 90 |
# part closes all opened tokens. This is useful to insert tokens |
| 91 |
# betweem them. |
| 92 |
# |
| 93 |
# This method is used by @Scanner#tokenize@ when called with an Array |
| 94 |
# of source strings. The Diff encoder uses it for inline highlighting. |
| 95 |
def split_into_parts *sizes |
| 96 |
parts = [] |
| 97 |
opened = [] |
| 98 |
content = nil |
| 99 |
part = Tokens.new |
| 100 |
part_size = 0 |
| 101 |
size = sizes.first |
| 102 |
i = 0 |
| 103 |
for item in self |
| 104 |
case content |
| 105 |
when nil |
| 106 |
content = item |
| 107 |
when String |
| 108 |
if size && part_size + content.size > size # token must be cut |
| 109 |
if part_size < size # some part of the token goes into this part |
| 110 |
content = content.dup # content may no be safe to change |
| 111 |
part << content.slice!(0, size - part_size) << item |
| 112 |
end |
| 113 |
# close all open groups and lines... |
| 114 |
closing = opened.reverse.flatten.map do |content_or_kind| |
| 115 |
case content_or_kind |
| 116 |
when :begin_group |
| 117 |
:end_group |
| 118 |
when :begin_line |
| 119 |
:end_line |
| 120 |
else |
| 121 |
content_or_kind |
| 122 |
end |
| 123 |
end |
| 124 |
part.concat closing |
| 125 |
begin |
| 126 |
parts << part |
| 127 |
part = Tokens.new |
| 128 |
size = sizes[i += 1] |
| 129 |
end until size.nil? || size > 0 |
| 130 |
# ...and open them again. |
| 131 |
part.concat opened.flatten |
| 132 |
part_size = 0 |
| 133 |
redo unless content.empty? |
| 134 |
else |
| 135 |
part << content << item |
| 136 |
part_size += content.size |
| 137 |
end |
| 138 |
content = nil |
| 139 |
when Symbol |
| 140 |
case content |
| 141 |
when :begin_group, :begin_line |
| 142 |
opened << [content, item] |
| 143 |
when :end_group, :end_line |
| 144 |
opened.pop |
| 145 |
else |
| 146 |
raise ArgumentError, 'Unknown token action: %p, kind = %p' % [content, item] |
| 147 |
end |
| 148 |
part << content << item |
| 149 |
content = nil |
| 150 |
else |
| 151 |
raise ArgumentError, 'Token input junk: %p, kind = %p' % [content, item] |
| 152 |
end |
| 153 |
end |
| 154 |
parts << part |
| 155 |
parts << Tokens.new while parts.size < sizes.size |
| 156 |
parts |
| 157 |
end |
| 158 |
|
| 159 |
# Dumps the object into a String that can be saved |
| 160 |
# in files or databases. |
| 161 |
# |
| 162 |
# The dump is created with Marshal.dump; |
| 163 |
# In addition, it is gzipped using GZip.gzip. |
| 164 |
# |
| 165 |
# The returned String object includes Undumping |
| 166 |
# so it has an #undump method. See Tokens.load. |
| 167 |
# |
| 168 |
# You can configure the level of compression, |
| 169 |
# but the default value 7 should be what you want |
| 170 |
# in most cases as it is a good compromise between |
| 171 |
# speed and compression rate. |
| 172 |
# |
| 173 |
# See GZip module. |
| 174 |
def dump gzip_level = 7 |
| 175 |
dump = Marshal.dump self |
| 176 |
dump = GZip.gzip dump, gzip_level |
| 177 |
dump.extend Undumping |
| 178 |
end |
| 179 |
|
| 180 |
# Return the actual number of tokens. |
| 181 |
def count |
| 182 |
size / 2 |
| 183 |
end |
| 184 |
|
| 185 |
# Include this module to give an object an #undump |
| 186 |
# method. |
| 187 |
# |
| 188 |
# The string returned by Tokens.dump includes Undumping. |
| 189 |
module Undumping |
| 190 |
# Calls Tokens.load with itself. |
| 191 |
def undump |
| 192 |
Tokens.load self |
| 193 |
end |
| 194 |
end |
| 195 |
|
| 196 |
# Undump the object using Marshal.load, then |
| 197 |
# unzip it using GZip.gunzip. |
| 198 |
# |
| 199 |
# The result is commonly a Tokens object, but |
| 200 |
# this is not guaranteed. |
| 201 |
def Tokens.load dump |
| 202 |
dump = GZip.gunzip dump |
| 203 |
@dump = Marshal.load dump |
| 204 |
end |
| 205 |
|
| 206 |
alias text_token push |
| 207 |
def begin_group kind; push :begin_group, kind end |
| 208 |
def end_group kind; push :end_group, kind end |
| 209 |
def begin_line kind; push :begin_line, kind end |
| 210 |
def end_line kind; push :end_line, kind end |
| 211 |
alias tokens concat |
| 212 |
|
| 213 |
end |
| 214 |
|
| 215 |
end |