Mercurial > hg > soundsoftware-site
comparison vendor/gems/coderay-1.0.0/lib/coderay/tokens.rb @ 909:cbb26bc654de redmine-1.3
Update to Redmine 1.3-stable branch (Redmine SVN rev 8964)
author | Chris Cannam |
---|---|
date | Fri, 24 Feb 2012 19:09:32 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
908:c6c2cbd0afee | 909:cbb26bc654de |
---|---|
1 module CodeRay | |
2 | |
3 # GZip library for writing and reading token dumps. | |
4 autoload :GZip, 'coderay/helpers/gzip' | |
5 | |
6 # = Tokens TODO: Rewrite! | |
7 # | |
8 # The Tokens class represents a list of tokens returnd from | |
9 # a Scanner. | |
10 # | |
11 # A token is not a special object, just a two-element Array | |
12 # consisting of | |
13 # * the _token_ _text_ (the original source of the token in a String) or | |
14 # a _token_ _action_ (begin_group, end_group, begin_line, end_line) | |
15 # * the _token_ _kind_ (a Symbol representing the type of the token) | |
16 # | |
17 # A token looks like this: | |
18 # | |
19 # ['# It looks like this', :comment] | |
20 # ['3.1415926', :float] | |
21 # ['$^', :error] | |
22 # | |
23 # Some scanners also yield sub-tokens, represented by special | |
24 # token actions, namely begin_group and end_group. | |
25 # | |
26 # The Ruby scanner, for example, splits "a string" into: | |
27 # | |
28 # [ | |
29 # [:begin_group, :string], | |
30 # ['"', :delimiter], | |
31 # ['a string', :content], | |
32 # ['"', :delimiter], | |
33 # [:end_group, :string] | |
34 # ] | |
35 # | |
36 # Tokens is the interface between Scanners and Encoders: | |
37 # The input is split and saved into a Tokens object. The Encoder | |
38 # then builds the output from this object. | |
39 # | |
40 # Thus, the syntax below becomes clear: | |
41 # | |
42 # CodeRay.scan('price = 2.59', :ruby).html | |
43 # # the Tokens object is here -------^ | |
44 # | |
45 # See how small it is? ;) | |
46 # | |
47 # Tokens gives you the power to handle pre-scanned code very easily: | |
48 # You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string | |
49 # that you put in your DB. | |
50 # | |
51 # It also allows you to generate tokens directly (without using a scanner), | |
52 # to load them from a file, and still use any Encoder that CodeRay provides. | |
53 class Tokens < Array | |
54 | |
55 # The Scanner instance that created the tokens. | |
56 attr_accessor :scanner | |
57 | |
58 # Encode the tokens using encoder. | |
59 # | |
60 # encoder can be | |
61 # * a symbol like :html oder :statistic | |
62 # * an Encoder class | |
63 # * an Encoder object | |
64 # | |
65 # options are passed to the encoder. | |
66 def encode encoder, options = {} | |
67 encoder = Encoders[encoder].new options if encoder.respond_to? :to_sym | |
68 encoder.encode_tokens self, options | |
69 end | |
70 | |
71 # Turn tokens into a string by concatenating them. | |
72 def to_s | |
73 encode CodeRay::Encoders::Encoder.new | |
74 end | |
75 | |
76 # Redirects unknown methods to encoder calls. | |
77 # | |
78 # For example, if you call +tokens.html+, the HTML encoder | |
79 # is used to highlight the tokens. | |
80 def method_missing meth, options = {} | |
81 encode meth, options | |
82 rescue PluginHost::PluginNotFound | |
83 super | |
84 end | |
85 | |
86 # Split the tokens into parts of the given +sizes+. | |
87 # | |
88 # The result will be an Array of Tokens objects. The parts have | |
89 # the text size specified by the parameter. In addition, each | |
90 # part closes all opened tokens. This is useful to insert tokens | |
91 # betweem them. | |
92 # | |
93 # This method is used by @Scanner#tokenize@ when called with an Array | |
94 # of source strings. The Diff encoder uses it for inline highlighting. | |
95 def split_into_parts *sizes | |
96 parts = [] | |
97 opened = [] | |
98 content = nil | |
99 part = Tokens.new | |
100 part_size = 0 | |
101 size = sizes.first | |
102 i = 0 | |
103 for item in self | |
104 case content | |
105 when nil | |
106 content = item | |
107 when String | |
108 if size && part_size + content.size > size # token must be cut | |
109 if part_size < size # some part of the token goes into this part | |
110 content = content.dup # content may no be safe to change | |
111 part << content.slice!(0, size - part_size) << item | |
112 end | |
113 # close all open groups and lines... | |
114 closing = opened.reverse.flatten.map do |content_or_kind| | |
115 case content_or_kind | |
116 when :begin_group | |
117 :end_group | |
118 when :begin_line | |
119 :end_line | |
120 else | |
121 content_or_kind | |
122 end | |
123 end | |
124 part.concat closing | |
125 begin | |
126 parts << part | |
127 part = Tokens.new | |
128 size = sizes[i += 1] | |
129 end until size.nil? || size > 0 | |
130 # ...and open them again. | |
131 part.concat opened.flatten | |
132 part_size = 0 | |
133 redo unless content.empty? | |
134 else | |
135 part << content << item | |
136 part_size += content.size | |
137 end | |
138 content = nil | |
139 when Symbol | |
140 case content | |
141 when :begin_group, :begin_line | |
142 opened << [content, item] | |
143 when :end_group, :end_line | |
144 opened.pop | |
145 else | |
146 raise ArgumentError, 'Unknown token action: %p, kind = %p' % [content, item] | |
147 end | |
148 part << content << item | |
149 content = nil | |
150 else | |
151 raise ArgumentError, 'Token input junk: %p, kind = %p' % [content, item] | |
152 end | |
153 end | |
154 parts << part | |
155 parts << Tokens.new while parts.size < sizes.size | |
156 parts | |
157 end | |
158 | |
159 # Dumps the object into a String that can be saved | |
160 # in files or databases. | |
161 # | |
162 # The dump is created with Marshal.dump; | |
163 # In addition, it is gzipped using GZip.gzip. | |
164 # | |
165 # The returned String object includes Undumping | |
166 # so it has an #undump method. See Tokens.load. | |
167 # | |
168 # You can configure the level of compression, | |
169 # but the default value 7 should be what you want | |
170 # in most cases as it is a good compromise between | |
171 # speed and compression rate. | |
172 # | |
173 # See GZip module. | |
174 def dump gzip_level = 7 | |
175 dump = Marshal.dump self | |
176 dump = GZip.gzip dump, gzip_level | |
177 dump.extend Undumping | |
178 end | |
179 | |
180 # Return the actual number of tokens. | |
181 def count | |
182 size / 2 | |
183 end | |
184 | |
185 # Include this module to give an object an #undump | |
186 # method. | |
187 # | |
188 # The string returned by Tokens.dump includes Undumping. | |
189 module Undumping | |
190 # Calls Tokens.load with itself. | |
191 def undump | |
192 Tokens.load self | |
193 end | |
194 end | |
195 | |
196 # Undump the object using Marshal.load, then | |
197 # unzip it using GZip.gunzip. | |
198 # | |
199 # The result is commonly a Tokens object, but | |
200 # this is not guaranteed. | |
201 def Tokens.load dump | |
202 dump = GZip.gunzip dump | |
203 @dump = Marshal.load dump | |
204 end | |
205 | |
206 alias text_token push | |
207 def begin_group kind; push :begin_group, kind end | |
208 def end_group kind; push :end_group, kind end | |
209 def begin_line kind; push :begin_line, kind end | |
210 def end_line kind; push :end_line, kind end | |
211 alias tokens concat | |
212 | |
213 end | |
214 | |
215 end |