Chris@909
|
1 module CodeRay
|
Chris@909
|
2
|
Chris@909
|
3 # GZip library for writing and reading token dumps.
|
Chris@909
|
4 autoload :GZip, 'coderay/helpers/gzip'
|
Chris@909
|
5
|
Chris@909
|
6 # = Tokens TODO: Rewrite!
|
Chris@909
|
7 #
|
Chris@909
|
8 # The Tokens class represents a list of tokens returnd from
|
Chris@909
|
9 # a Scanner.
|
Chris@909
|
10 #
|
Chris@909
|
11 # A token is not a special object, just a two-element Array
|
Chris@909
|
12 # consisting of
|
Chris@909
|
13 # * the _token_ _text_ (the original source of the token in a String) or
|
Chris@909
|
14 # a _token_ _action_ (begin_group, end_group, begin_line, end_line)
|
Chris@909
|
15 # * the _token_ _kind_ (a Symbol representing the type of the token)
|
Chris@909
|
16 #
|
Chris@909
|
17 # A token looks like this:
|
Chris@909
|
18 #
|
Chris@909
|
19 # ['# It looks like this', :comment]
|
Chris@909
|
20 # ['3.1415926', :float]
|
Chris@909
|
21 # ['$^', :error]
|
Chris@909
|
22 #
|
Chris@909
|
23 # Some scanners also yield sub-tokens, represented by special
|
Chris@909
|
24 # token actions, namely begin_group and end_group.
|
Chris@909
|
25 #
|
Chris@909
|
26 # The Ruby scanner, for example, splits "a string" into:
|
Chris@909
|
27 #
|
Chris@909
|
28 # [
|
Chris@909
|
29 # [:begin_group, :string],
|
Chris@909
|
30 # ['"', :delimiter],
|
Chris@909
|
31 # ['a string', :content],
|
Chris@909
|
32 # ['"', :delimiter],
|
Chris@909
|
33 # [:end_group, :string]
|
Chris@909
|
34 # ]
|
Chris@909
|
35 #
|
Chris@909
|
36 # Tokens is the interface between Scanners and Encoders:
|
Chris@909
|
37 # The input is split and saved into a Tokens object. The Encoder
|
Chris@909
|
38 # then builds the output from this object.
|
Chris@909
|
39 #
|
Chris@909
|
40 # Thus, the syntax below becomes clear:
|
Chris@909
|
41 #
|
Chris@909
|
42 # CodeRay.scan('price = 2.59', :ruby).html
|
Chris@909
|
43 # # the Tokens object is here -------^
|
Chris@909
|
44 #
|
Chris@909
|
45 # See how small it is? ;)
|
Chris@909
|
46 #
|
Chris@909
|
47 # Tokens gives you the power to handle pre-scanned code very easily:
|
Chris@909
|
48 # You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string
|
Chris@909
|
49 # that you put in your DB.
|
Chris@909
|
50 #
|
Chris@909
|
51 # It also allows you to generate tokens directly (without using a scanner),
|
Chris@909
|
52 # to load them from a file, and still use any Encoder that CodeRay provides.
|
Chris@909
|
53 class Tokens < Array
|
Chris@909
|
54
|
Chris@909
|
55 # The Scanner instance that created the tokens.
|
Chris@909
|
56 attr_accessor :scanner
|
Chris@909
|
57
|
Chris@909
|
58 # Encode the tokens using encoder.
|
Chris@909
|
59 #
|
Chris@909
|
60 # encoder can be
|
Chris@909
|
61 # * a symbol like :html oder :statistic
|
Chris@909
|
62 # * an Encoder class
|
Chris@909
|
63 # * an Encoder object
|
Chris@909
|
64 #
|
Chris@909
|
65 # options are passed to the encoder.
|
Chris@909
|
66 def encode encoder, options = {}
|
Chris@909
|
67 encoder = Encoders[encoder].new options if encoder.respond_to? :to_sym
|
Chris@909
|
68 encoder.encode_tokens self, options
|
Chris@909
|
69 end
|
Chris@909
|
70
|
Chris@909
|
71 # Turn tokens into a string by concatenating them.
|
Chris@909
|
72 def to_s
|
Chris@909
|
73 encode CodeRay::Encoders::Encoder.new
|
Chris@909
|
74 end
|
Chris@909
|
75
|
Chris@909
|
76 # Redirects unknown methods to encoder calls.
|
Chris@909
|
77 #
|
Chris@909
|
78 # For example, if you call +tokens.html+, the HTML encoder
|
Chris@909
|
79 # is used to highlight the tokens.
|
Chris@909
|
80 def method_missing meth, options = {}
|
Chris@909
|
81 encode meth, options
|
Chris@909
|
82 rescue PluginHost::PluginNotFound
|
Chris@909
|
83 super
|
Chris@909
|
84 end
|
Chris@909
|
85
|
Chris@909
|
86 # Split the tokens into parts of the given +sizes+.
|
Chris@909
|
87 #
|
Chris@909
|
88 # The result will be an Array of Tokens objects. The parts have
|
Chris@909
|
89 # the text size specified by the parameter. In addition, each
|
Chris@909
|
90 # part closes all opened tokens. This is useful to insert tokens
|
Chris@909
|
91 # betweem them.
|
Chris@909
|
92 #
|
Chris@909
|
93 # This method is used by @Scanner#tokenize@ when called with an Array
|
Chris@909
|
94 # of source strings. The Diff encoder uses it for inline highlighting.
|
Chris@909
|
95 def split_into_parts *sizes
|
Chris@909
|
96 parts = []
|
Chris@909
|
97 opened = []
|
Chris@909
|
98 content = nil
|
Chris@909
|
99 part = Tokens.new
|
Chris@909
|
100 part_size = 0
|
Chris@909
|
101 size = sizes.first
|
Chris@909
|
102 i = 0
|
Chris@909
|
103 for item in self
|
Chris@909
|
104 case content
|
Chris@909
|
105 when nil
|
Chris@909
|
106 content = item
|
Chris@909
|
107 when String
|
Chris@909
|
108 if size && part_size + content.size > size # token must be cut
|
Chris@909
|
109 if part_size < size # some part of the token goes into this part
|
Chris@909
|
110 content = content.dup # content may no be safe to change
|
Chris@909
|
111 part << content.slice!(0, size - part_size) << item
|
Chris@909
|
112 end
|
Chris@909
|
113 # close all open groups and lines...
|
Chris@909
|
114 closing = opened.reverse.flatten.map do |content_or_kind|
|
Chris@909
|
115 case content_or_kind
|
Chris@909
|
116 when :begin_group
|
Chris@909
|
117 :end_group
|
Chris@909
|
118 when :begin_line
|
Chris@909
|
119 :end_line
|
Chris@909
|
120 else
|
Chris@909
|
121 content_or_kind
|
Chris@909
|
122 end
|
Chris@909
|
123 end
|
Chris@909
|
124 part.concat closing
|
Chris@909
|
125 begin
|
Chris@909
|
126 parts << part
|
Chris@909
|
127 part = Tokens.new
|
Chris@909
|
128 size = sizes[i += 1]
|
Chris@909
|
129 end until size.nil? || size > 0
|
Chris@909
|
130 # ...and open them again.
|
Chris@909
|
131 part.concat opened.flatten
|
Chris@909
|
132 part_size = 0
|
Chris@909
|
133 redo unless content.empty?
|
Chris@909
|
134 else
|
Chris@909
|
135 part << content << item
|
Chris@909
|
136 part_size += content.size
|
Chris@909
|
137 end
|
Chris@909
|
138 content = nil
|
Chris@909
|
139 when Symbol
|
Chris@909
|
140 case content
|
Chris@909
|
141 when :begin_group, :begin_line
|
Chris@909
|
142 opened << [content, item]
|
Chris@909
|
143 when :end_group, :end_line
|
Chris@909
|
144 opened.pop
|
Chris@909
|
145 else
|
Chris@909
|
146 raise ArgumentError, 'Unknown token action: %p, kind = %p' % [content, item]
|
Chris@909
|
147 end
|
Chris@909
|
148 part << content << item
|
Chris@909
|
149 content = nil
|
Chris@909
|
150 else
|
Chris@909
|
151 raise ArgumentError, 'Token input junk: %p, kind = %p' % [content, item]
|
Chris@909
|
152 end
|
Chris@909
|
153 end
|
Chris@909
|
154 parts << part
|
Chris@909
|
155 parts << Tokens.new while parts.size < sizes.size
|
Chris@909
|
156 parts
|
Chris@909
|
157 end
|
Chris@909
|
158
|
Chris@909
|
159 # Dumps the object into a String that can be saved
|
Chris@909
|
160 # in files or databases.
|
Chris@909
|
161 #
|
Chris@909
|
162 # The dump is created with Marshal.dump;
|
Chris@909
|
163 # In addition, it is gzipped using GZip.gzip.
|
Chris@909
|
164 #
|
Chris@909
|
165 # The returned String object includes Undumping
|
Chris@909
|
166 # so it has an #undump method. See Tokens.load.
|
Chris@909
|
167 #
|
Chris@909
|
168 # You can configure the level of compression,
|
Chris@909
|
169 # but the default value 7 should be what you want
|
Chris@909
|
170 # in most cases as it is a good compromise between
|
Chris@909
|
171 # speed and compression rate.
|
Chris@909
|
172 #
|
Chris@909
|
173 # See GZip module.
|
Chris@909
|
174 def dump gzip_level = 7
|
Chris@909
|
175 dump = Marshal.dump self
|
Chris@909
|
176 dump = GZip.gzip dump, gzip_level
|
Chris@909
|
177 dump.extend Undumping
|
Chris@909
|
178 end
|
Chris@909
|
179
|
Chris@909
|
180 # Return the actual number of tokens.
|
Chris@909
|
181 def count
|
Chris@909
|
182 size / 2
|
Chris@909
|
183 end
|
Chris@909
|
184
|
Chris@909
|
185 # Include this module to give an object an #undump
|
Chris@909
|
186 # method.
|
Chris@909
|
187 #
|
Chris@909
|
188 # The string returned by Tokens.dump includes Undumping.
|
Chris@909
|
189 module Undumping
|
Chris@909
|
190 # Calls Tokens.load with itself.
|
Chris@909
|
191 def undump
|
Chris@909
|
192 Tokens.load self
|
Chris@909
|
193 end
|
Chris@909
|
194 end
|
Chris@909
|
195
|
Chris@909
|
196 # Undump the object using Marshal.load, then
|
Chris@909
|
197 # unzip it using GZip.gunzip.
|
Chris@909
|
198 #
|
Chris@909
|
199 # The result is commonly a Tokens object, but
|
Chris@909
|
200 # this is not guaranteed.
|
Chris@909
|
201 def Tokens.load dump
|
Chris@909
|
202 dump = GZip.gunzip dump
|
Chris@909
|
203 @dump = Marshal.load dump
|
Chris@909
|
204 end
|
Chris@909
|
205
|
Chris@909
|
206 alias text_token push
|
Chris@909
|
207 def begin_group kind; push :begin_group, kind end
|
Chris@909
|
208 def end_group kind; push :end_group, kind end
|
Chris@909
|
209 def begin_line kind; push :begin_line, kind end
|
Chris@909
|
210 def end_line kind; push :end_line, kind end
|
Chris@909
|
211 alias tokens concat
|
Chris@909
|
212
|
Chris@909
|
213 end
|
Chris@909
|
214
|
Chris@909
|
215 end
|