Mercurial > hg > soundsoftware-site
comparison vendor/gems/coderay-0.9.7/lib/coderay/tokens.rb @ 210:0579821a129a
Update to Redmine trunk rev 4802
author | Chris Cannam |
---|---|
date | Tue, 08 Feb 2011 13:51:46 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
128:07fa8a8b56a8 | 210:0579821a129a |
---|---|
1 module CodeRay | |
2 | |
3 # = Tokens | |
4 # | |
5 # The Tokens class represents a list of tokens returnd from | |
6 # a Scanner. | |
7 # | |
8 # A token is not a special object, just a two-element Array | |
9 # consisting of | |
10 # * the _token_ _text_ (the original source of the token in a String) or | |
11 # a _token_ _action_ (:open, :close, :begin_line, :end_line) | |
12 # * the _token_ _kind_ (a Symbol representing the type of the token) | |
13 # | |
14 # A token looks like this: | |
15 # | |
16 # ['# It looks like this', :comment] | |
17 # ['3.1415926', :float] | |
18 # ['$^', :error] | |
19 # | |
20 # Some scanners also yield sub-tokens, represented by special | |
21 # token actions, namely :open and :close. | |
22 # | |
23 # The Ruby scanner, for example, splits "a string" into: | |
24 # | |
25 # [ | |
26 # [:open, :string], | |
27 # ['"', :delimiter], | |
28 # ['a string', :content], | |
29 # ['"', :delimiter], | |
30 # [:close, :string] | |
31 # ] | |
32 # | |
33 # Tokens is the interface between Scanners and Encoders: | |
34 # The input is split and saved into a Tokens object. The Encoder | |
35 # then builds the output from this object. | |
36 # | |
37 # Thus, the syntax below becomes clear: | |
38 # | |
39 # CodeRay.scan('price = 2.59', :ruby).html | |
40 # # the Tokens object is here -------^ | |
41 # | |
42 # See how small it is? ;) | |
43 # | |
44 # Tokens gives you the power to handle pre-scanned code very easily: | |
45 # You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string | |
46 # that you put in your DB. | |
47 # | |
48 # It also allows you to generate tokens directly (without using a scanner), | |
49 # to load them from a file, and still use any Encoder that CodeRay provides. | |
50 # | |
51 # Tokens' subclass TokenStream allows streaming to save memory. | |
52 class Tokens < Array | |
53 | |
54 # The Scanner instance that created the tokens. | |
55 attr_accessor :scanner | |
56 | |
57 # Whether the object is a TokenStream. | |
58 # | |
59 # Returns false. | |
60 def stream? | |
61 false | |
62 end | |
63 | |
64 # Iterates over all tokens. | |
65 # | |
66 # If a filter is given, only tokens of that kind are yielded. | |
67 def each kind_filter = nil, &block | |
68 unless kind_filter | |
69 super(&block) | |
70 else | |
71 super() do |text, kind| | |
72 next unless kind == kind_filter | |
73 yield text, kind | |
74 end | |
75 end | |
76 end | |
77 | |
78 # Iterates over all text tokens. | |
79 # Range tokens like [:open, :string] are left out. | |
80 # | |
81 # Example: | |
82 # tokens.each_text_token { |text, kind| text.replace html_escape(text) } | |
83 def each_text_token | |
84 each do |text, kind| | |
85 next unless text.is_a? ::String | |
86 yield text, kind | |
87 end | |
88 end | |
89 | |
90 # Encode the tokens using encoder. | |
91 # | |
92 # encoder can be | |
93 # * a symbol like :html oder :statistic | |
94 # * an Encoder class | |
95 # * an Encoder object | |
96 # | |
97 # options are passed to the encoder. | |
98 def encode encoder, options = {} | |
99 unless encoder.is_a? Encoders::Encoder | |
100 unless encoder.is_a? Class | |
101 encoder_class = Encoders[encoder] | |
102 end | |
103 encoder = encoder_class.new options | |
104 end | |
105 encoder.encode_tokens self, options | |
106 end | |
107 | |
108 | |
109 # Turn into a string using Encoders::Text. | |
110 # | |
111 # +options+ are passed to the encoder if given. | |
112 def to_s options = {} | |
113 encode :text, options | |
114 end | |
115 | |
116 # Redirects unknown methods to encoder calls. | |
117 # | |
118 # For example, if you call +tokens.html+, the HTML encoder | |
119 # is used to highlight the tokens. | |
120 def method_missing meth, options = {} | |
121 Encoders[meth].new(options).encode_tokens self | |
122 end | |
123 | |
124 # Returns the tokens compressed by joining consecutive | |
125 # tokens of the same kind. | |
126 # | |
127 # This can not be undone, but should yield the same output | |
128 # in most Encoders. It basically makes the output smaller. | |
129 # | |
130 # Combined with dump, it saves space for the cost of time. | |
131 # | |
132 # If the scanner is written carefully, this is not required - | |
133 # for example, consecutive //-comment lines could already be | |
134 # joined in one comment token by the Scanner. | |
135 def optimize | |
136 last_kind = last_text = nil | |
137 new = self.class.new | |
138 for text, kind in self | |
139 if text.is_a? String | |
140 if kind == last_kind | |
141 last_text << text | |
142 else | |
143 new << [last_text, last_kind] if last_kind | |
144 last_text = text | |
145 last_kind = kind | |
146 end | |
147 else | |
148 new << [last_text, last_kind] if last_kind | |
149 last_kind = last_text = nil | |
150 new << [text, kind] | |
151 end | |
152 end | |
153 new << [last_text, last_kind] if last_kind | |
154 new | |
155 end | |
156 | |
157 # Compact the object itself; see optimize. | |
158 def optimize! | |
159 replace optimize | |
160 end | |
161 | |
162 # Ensure that all :open tokens have a correspondent :close one. | |
163 # | |
164 # TODO: Test this! | |
165 def fix | |
166 tokens = self.class.new | |
167 # Check token nesting using a stack of kinds. | |
168 opened = [] | |
169 for type, kind in self | |
170 case type | |
171 when :open | |
172 opened.push [:close, kind] | |
173 when :begin_line | |
174 opened.push [:end_line, kind] | |
175 when :close, :end_line | |
176 expected = opened.pop | |
177 if [type, kind] != expected | |
178 # Unexpected :close; decide what to do based on the kind: | |
179 # - token was never opened: delete the :close (just skip it) | |
180 next unless opened.rindex expected | |
181 # - token was opened earlier: also close tokens in between | |
182 tokens << token until (token = opened.pop) == expected | |
183 end | |
184 end | |
185 tokens << [type, kind] | |
186 end | |
187 # Close remaining opened tokens | |
188 tokens << token while token = opened.pop | |
189 tokens | |
190 end | |
191 | |
192 def fix! | |
193 replace fix | |
194 end | |
195 | |
196 # TODO: Scanner#split_into_lines | |
197 # | |
198 # Makes sure that: | |
199 # - newlines are single tokens | |
200 # (which means all other token are single-line) | |
201 # - there are no open tokens at the end the line | |
202 # | |
203 # This makes it simple for encoders that work line-oriented, | |
204 # like HTML with list-style numeration. | |
205 def split_into_lines | |
206 raise NotImplementedError | |
207 end | |
208 | |
209 def split_into_lines! | |
210 replace split_into_lines | |
211 end | |
212 | |
213 # Dumps the object into a String that can be saved | |
214 # in files or databases. | |
215 # | |
216 # The dump is created with Marshal.dump; | |
217 # In addition, it is gzipped using GZip.gzip. | |
218 # | |
219 # The returned String object includes Undumping | |
220 # so it has an #undump method. See Tokens.load. | |
221 # | |
222 # You can configure the level of compression, | |
223 # but the default value 7 should be what you want | |
224 # in most cases as it is a good compromise between | |
225 # speed and compression rate. | |
226 # | |
227 # See GZip module. | |
228 def dump gzip_level = 7 | |
229 require 'coderay/helpers/gzip_simple' | |
230 dump = Marshal.dump self | |
231 dump = dump.gzip gzip_level | |
232 dump.extend Undumping | |
233 end | |
234 | |
235 # The total size of the tokens. | |
236 # Should be equal to the input size before | |
237 # scanning. | |
238 def text_size | |
239 size = 0 | |
240 each_text_token do |t, k| | |
241 size + t.size | |
242 end | |
243 size | |
244 end | |
245 | |
246 # Return all text tokens joined into a single string. | |
247 def text | |
248 map { |t, k| t if t.is_a? ::String }.join | |
249 end | |
250 | |
251 # Include this module to give an object an #undump | |
252 # method. | |
253 # | |
254 # The string returned by Tokens.dump includes Undumping. | |
255 module Undumping | |
256 # Calls Tokens.load with itself. | |
257 def undump | |
258 Tokens.load self | |
259 end | |
260 end | |
261 | |
262 # Undump the object using Marshal.load, then | |
263 # unzip it using GZip.gunzip. | |
264 # | |
265 # The result is commonly a Tokens object, but | |
266 # this is not guaranteed. | |
267 def Tokens.load dump | |
268 require 'coderay/helpers/gzip_simple' | |
269 dump = dump.gunzip | |
270 @dump = Marshal.load dump | |
271 end | |
272 | |
273 end | |
274 | |
275 | |
276 # = TokenStream | |
277 # | |
278 # The TokenStream class is a fake Array without elements. | |
279 # | |
280 # It redirects the method << to a block given at creation. | |
281 # | |
282 # This allows scanners and Encoders to use streaming (no | |
283 # tokens are saved, the input is highlighted the same time it | |
284 # is scanned) with the same code. | |
285 # | |
286 # See CodeRay.encode_stream and CodeRay.scan_stream | |
287 class TokenStream < Tokens | |
288 | |
289 # Whether the object is a TokenStream. | |
290 # | |
291 # Returns true. | |
292 def stream? | |
293 true | |
294 end | |
295 | |
296 # The Array is empty, but size counts the tokens given by <<. | |
297 attr_reader :size | |
298 | |
299 # Creates a new TokenStream that calls +block+ whenever | |
300 # its << method is called. | |
301 # | |
302 # Example: | |
303 # | |
304 # require 'coderay' | |
305 # | |
306 # token_stream = CodeRay::TokenStream.new do |text, kind| | |
307 # puts 'kind: %s, text size: %d.' % [kind, text.size] | |
308 # end | |
309 # | |
310 # token_stream << ['/\d+/', :regexp] | |
311 # #-> kind: rexpexp, text size: 5. | |
312 # | |
313 def initialize &block | |
314 raise ArgumentError, 'Block expected for streaming.' unless block | |
315 @callback = block | |
316 @size = 0 | |
317 end | |
318 | |
319 # Calls +block+ with +token+ and increments size. | |
320 # | |
321 # Returns self. | |
322 def << token | |
323 @callback.call(*token) | |
324 @size += 1 | |
325 self | |
326 end | |
327 | |
328 # This method is not implemented due to speed reasons. Use Tokens. | |
329 def text_size | |
330 raise NotImplementedError, | |
331 'This method is not implemented due to speed reasons.' | |
332 end | |
333 | |
334 # A TokenStream cannot be dumped. Use Tokens. | |
335 def dump | |
336 raise NotImplementedError, 'A TokenStream cannot be dumped.' | |
337 end | |
338 | |
339 # A TokenStream cannot be optimized. Use Tokens. | |
340 def optimize | |
341 raise NotImplementedError, 'A TokenStream cannot be optimized.' | |
342 end | |
343 | |
344 end | |
345 | |
346 end | |
347 | |
348 if $0 == __FILE__ | |
349 $VERBOSE = true | |
350 $: << File.join(File.dirname(__FILE__), '..') | |
351 eval DATA.read, nil, $0, __LINE__ + 4 | |
352 end | |
353 | |
354 __END__ | |
355 require 'test/unit' | |
356 | |
357 class TokensTest < Test::Unit::TestCase | |
358 | |
359 def test_creation | |
360 assert CodeRay::Tokens < Array | |
361 tokens = nil | |
362 assert_nothing_raised do | |
363 tokens = CodeRay::Tokens.new | |
364 end | |
365 assert_kind_of Array, tokens | |
366 end | |
367 | |
368 def test_adding_tokens | |
369 tokens = CodeRay::Tokens.new | |
370 assert_nothing_raised do | |
371 tokens << ['string', :type] | |
372 tokens << ['()', :operator] | |
373 end | |
374 assert_equal tokens.size, 2 | |
375 end | |
376 | |
377 def test_dump_undump | |
378 tokens = CodeRay::Tokens.new | |
379 assert_nothing_raised do | |
380 tokens << ['string', :type] | |
381 tokens << ['()', :operator] | |
382 end | |
383 tokens2 = nil | |
384 assert_nothing_raised do | |
385 tokens2 = tokens.dump.undump | |
386 end | |
387 assert_equal tokens, tokens2 | |
388 end | |
389 | |
390 end |