Mercurial > hg > soundsoftware-site
comparison vendor/plugins/coderay-0.9.2/lib/coderay/.svn/text-base/tokens.rb.svn-base @ 0:513646585e45
* Import Redmine trunk SVN rev 3859
author | Chris Cannam |
---|---|
date | Fri, 23 Jul 2010 15:52:44 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:513646585e45 |
---|---|
1 module CodeRay | |
2 | |
3 # = Tokens | |
4 # | |
5 # The Tokens class represents a list of tokens returnd from | |
6 # a Scanner. | |
7 # | |
8 # A token is not a special object, just a two-element Array | |
9 # consisting of | |
10 # * the _token_ _kind_ (a Symbol representing the type of the token) | |
11 # * the _token_ _text_ (the original source of the token in a String) | |
12 # | |
13 # A token looks like this: | |
14 # | |
15 # [:comment, '# It looks like this'] | |
16 # [:float, '3.1415926'] | |
17 # [:error, '$^'] | |
18 # | |
19 # Some scanners also yield some kind of sub-tokens, represented by special | |
20 # token texts, namely :open and :close . | |
21 # | |
22 # The Ruby scanner, for example, splits "a string" into: | |
23 # | |
24 # [ | |
25 # [:open, :string], | |
26 # [:delimiter, '"'], | |
27 # [:content, 'a string'], | |
28 # [:delimiter, '"'], | |
29 # [:close, :string] | |
30 # ] | |
31 # | |
32 # Tokens is also the interface between Scanners and Encoders: | |
33 # The input is split and saved into a Tokens object. The Encoder | |
34 # then builds the output from this object. | |
35 # | |
36 # Thus, the syntax below becomes clear: | |
37 # | |
38 # CodeRay.scan('price = 2.59', :ruby).html | |
39 # # the Tokens object is here -------^ | |
40 # | |
41 # See how small it is? ;) | |
42 # | |
43 # Tokens gives you the power to handle pre-scanned code very easily: | |
44 # You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string | |
45 # that you put in your DB. | |
46 # | |
47 # Tokens' subclass TokenStream allows streaming to save memory. | |
48 class Tokens < Array | |
49 | |
50 # The Scanner instance that created the tokens. | |
51 attr_accessor :scanner | |
52 | |
53 # Whether the object is a TokenStream. | |
54 # | |
55 # Returns false. | |
56 def stream? | |
57 false | |
58 end | |
59 | |
60 # Iterates over all tokens. | |
61 # | |
62 # If a filter is given, only tokens of that kind are yielded. | |
63 def each kind_filter = nil, &block | |
64 unless kind_filter | |
65 super(&block) | |
66 else | |
67 super() do |text, kind| | |
68 next unless kind == kind_filter | |
69 yield text, kind | |
70 end | |
71 end | |
72 end | |
73 | |
74 # Iterates over all text tokens. | |
75 # Range tokens like [:open, :string] are left out. | |
76 # | |
77 # Example: | |
78 # tokens.each_text_token { |text, kind| text.replace html_escape(text) } | |
79 def each_text_token | |
80 each do |text, kind| | |
81 next unless text.is_a? ::String | |
82 yield text, kind | |
83 end | |
84 end | |
85 | |
86 # Encode the tokens using encoder. | |
87 # | |
88 # encoder can be | |
89 # * a symbol like :html oder :statistic | |
90 # * an Encoder class | |
91 # * an Encoder object | |
92 # | |
93 # options are passed to the encoder. | |
94 def encode encoder, options = {} | |
95 unless encoder.is_a? Encoders::Encoder | |
96 unless encoder.is_a? Class | |
97 encoder_class = Encoders[encoder] | |
98 end | |
99 encoder = encoder_class.new options | |
100 end | |
101 encoder.encode_tokens self, options | |
102 end | |
103 | |
104 | |
105 # Turn into a string using Encoders::Text. | |
106 # | |
107 # +options+ are passed to the encoder if given. | |
108 def to_s options = {} | |
109 encode :text, options | |
110 end | |
111 | |
112 # Redirects unknown methods to encoder calls. | |
113 # | |
114 # For example, if you call +tokens.html+, the HTML encoder | |
115 # is used to highlight the tokens. | |
116 def method_missing meth, options = {} | |
117 Encoders[meth].new(options).encode_tokens self | |
118 end | |
119 | |
120 # Returns the tokens compressed by joining consecutive | |
121 # tokens of the same kind. | |
122 # | |
123 # This can not be undone, but should yield the same output | |
124 # in most Encoders. It basically makes the output smaller. | |
125 # | |
126 # Combined with dump, it saves space for the cost of time. | |
127 # | |
128 # If the scanner is written carefully, this is not required - | |
129 # for example, consecutive //-comment lines could already be | |
130 # joined in one comment token by the Scanner. | |
131 def optimize | |
132 last_kind = last_text = nil | |
133 new = self.class.new | |
134 for text, kind in self | |
135 if text.is_a? String | |
136 if kind == last_kind | |
137 last_text << text | |
138 else | |
139 new << [last_text, last_kind] if last_kind | |
140 last_text = text | |
141 last_kind = kind | |
142 end | |
143 else | |
144 new << [last_text, last_kind] if last_kind | |
145 last_kind = last_text = nil | |
146 new << [text, kind] | |
147 end | |
148 end | |
149 new << [last_text, last_kind] if last_kind | |
150 new | |
151 end | |
152 | |
153 # Compact the object itself; see optimize. | |
154 def optimize! | |
155 replace optimize | |
156 end | |
157 | |
158 # Ensure that all :open tokens have a correspondent :close one. | |
159 # | |
160 # TODO: Test this! | |
161 def fix | |
162 tokens = self.class.new | |
163 # Check token nesting using a stack of kinds. | |
164 opened = [] | |
165 for type, kind in self | |
166 case type | |
167 when :open | |
168 opened.push [:close, kind] | |
169 when :begin_line | |
170 opened.push [:end_line, kind] | |
171 when :close, :end_line | |
172 expected = opened.pop | |
173 if [type, kind] != expected | |
174 # Unexpected :close; decide what to do based on the kind: | |
175 # - token was never opened: delete the :close (just skip it) | |
176 next unless opened.rindex expected | |
177 # - token was opened earlier: also close tokens in between | |
178 tokens << token until (token = opened.pop) == expected | |
179 end | |
180 end | |
181 tokens << [type, kind] | |
182 end | |
183 # Close remaining opened tokens | |
184 tokens << token while token = opened.pop | |
185 tokens | |
186 end | |
187 | |
188 def fix! | |
189 replace fix | |
190 end | |
191 | |
192 # TODO: Scanner#split_into_lines | |
193 # | |
194 # Makes sure that: | |
195 # - newlines are single tokens | |
196 # (which means all other token are single-line) | |
197 # - there are no open tokens at the end the line | |
198 # | |
199 # This makes it simple for encoders that work line-oriented, | |
200 # like HTML with list-style numeration. | |
201 def split_into_lines | |
202 raise NotImplementedError | |
203 end | |
204 | |
205 def split_into_lines! | |
206 replace split_into_lines | |
207 end | |
208 | |
209 # Dumps the object into a String that can be saved | |
210 # in files or databases. | |
211 # | |
212 # The dump is created with Marshal.dump; | |
213 # In addition, it is gzipped using GZip.gzip. | |
214 # | |
215 # The returned String object includes Undumping | |
216 # so it has an #undump method. See Tokens.load. | |
217 # | |
218 # You can configure the level of compression, | |
219 # but the default value 7 should be what you want | |
220 # in most cases as it is a good compromise between | |
221 # speed and compression rate. | |
222 # | |
223 # See GZip module. | |
224 def dump gzip_level = 7 | |
225 require 'coderay/helpers/gzip_simple' | |
226 dump = Marshal.dump self | |
227 dump = dump.gzip gzip_level | |
228 dump.extend Undumping | |
229 end | |
230 | |
231 # The total size of the tokens. | |
232 # Should be equal to the input size before | |
233 # scanning. | |
234 def text_size | |
235 size = 0 | |
236 each_text_token do |t, k| | |
237 size + t.size | |
238 end | |
239 size | |
240 end | |
241 | |
242 # The total size of the tokens. | |
243 # Should be equal to the input size before | |
244 # scanning. | |
245 def text | |
246 map { |t, k| t if t.is_a? ::String }.join | |
247 end | |
248 | |
249 # Include this module to give an object an #undump | |
250 # method. | |
251 # | |
252 # The string returned by Tokens.dump includes Undumping. | |
253 module Undumping | |
254 # Calls Tokens.load with itself. | |
255 def undump | |
256 Tokens.load self | |
257 end | |
258 end | |
259 | |
260 # Undump the object using Marshal.load, then | |
261 # unzip it using GZip.gunzip. | |
262 # | |
263 # The result is commonly a Tokens object, but | |
264 # this is not guaranteed. | |
265 def Tokens.load dump | |
266 require 'coderay/helpers/gzip_simple' | |
267 dump = dump.gunzip | |
268 @dump = Marshal.load dump | |
269 end | |
270 | |
271 end | |
272 | |
273 | |
274 # = TokenStream | |
275 # | |
276 # The TokenStream class is a fake Array without elements. | |
277 # | |
278 # It redirects the method << to a block given at creation. | |
279 # | |
280 # This allows scanners and Encoders to use streaming (no | |
281 # tokens are saved, the input is highlighted the same time it | |
282 # is scanned) with the same code. | |
283 # | |
284 # See CodeRay.encode_stream and CodeRay.scan_stream | |
285 class TokenStream < Tokens | |
286 | |
287 # Whether the object is a TokenStream. | |
288 # | |
289 # Returns true. | |
290 def stream? | |
291 true | |
292 end | |
293 | |
294 # The Array is empty, but size counts the tokens given by <<. | |
295 attr_reader :size | |
296 | |
297 # Creates a new TokenStream that calls +block+ whenever | |
298 # its << method is called. | |
299 # | |
300 # Example: | |
301 # | |
302 # require 'coderay' | |
303 # | |
304 # token_stream = CodeRay::TokenStream.new do |kind, text| | |
305 # puts 'kind: %s, text size: %d.' % [kind, text.size] | |
306 # end | |
307 # | |
308 # token_stream << [:regexp, '/\d+/'] | |
309 # #-> kind: rexpexp, text size: 5. | |
310 # | |
311 def initialize &block | |
312 raise ArgumentError, 'Block expected for streaming.' unless block | |
313 @callback = block | |
314 @size = 0 | |
315 end | |
316 | |
317 # Calls +block+ with +token+ and increments size. | |
318 # | |
319 # Returns self. | |
320 def << token | |
321 @callback.call(*token) | |
322 @size += 1 | |
323 self | |
324 end | |
325 | |
326 # This method is not implemented due to speed reasons. Use Tokens. | |
327 def text_size | |
328 raise NotImplementedError, | |
329 'This method is not implemented due to speed reasons.' | |
330 end | |
331 | |
332 # A TokenStream cannot be dumped. Use Tokens. | |
333 def dump | |
334 raise NotImplementedError, 'A TokenStream cannot be dumped.' | |
335 end | |
336 | |
337 # A TokenStream cannot be optimized. Use Tokens. | |
338 def optimize | |
339 raise NotImplementedError, 'A TokenStream cannot be optimized.' | |
340 end | |
341 | |
342 end | |
343 | |
344 end | |
345 | |
346 if $0 == __FILE__ | |
347 $VERBOSE = true | |
348 $: << File.join(File.dirname(__FILE__), '..') | |
349 eval DATA.read, nil, $0, __LINE__ + 4 | |
350 end | |
351 | |
352 __END__ | |
353 require 'test/unit' | |
354 | |
355 class TokensTest < Test::Unit::TestCase | |
356 | |
357 def test_creation | |
358 assert CodeRay::Tokens < Array | |
359 tokens = nil | |
360 assert_nothing_raised do | |
361 tokens = CodeRay::Tokens.new | |
362 end | |
363 assert_kind_of Array, tokens | |
364 end | |
365 | |
366 def test_adding_tokens | |
367 tokens = CodeRay::Tokens.new | |
368 assert_nothing_raised do | |
369 tokens << ['string', :type] | |
370 tokens << ['()', :operator] | |
371 end | |
372 assert_equal tokens.size, 2 | |
373 end | |
374 | |
375 def test_dump_undump | |
376 tokens = CodeRay::Tokens.new | |
377 assert_nothing_raised do | |
378 tokens << ['string', :type] | |
379 tokens << ['()', :operator] | |
380 end | |
381 tokens2 = nil | |
382 assert_nothing_raised do | |
383 tokens2 = tokens.dump.undump | |
384 end | |
385 assert_equal tokens, tokens2 | |
386 end | |
387 | |
388 end |