Mercurial > hg > soundsoftware-site
comparison vendor/gems/coderay-0.9.7/lib/coderay/.svn/text-base/scanner.rb.svn-base @ 210:0579821a129a
Update to Redmine trunk rev 4802
author | Chris Cannam |
---|---|
date | Tue, 08 Feb 2011 13:51:46 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
128:07fa8a8b56a8 | 210:0579821a129a |
---|---|
1 module CodeRay | |
2 | |
3 require 'coderay/helpers/plugin' | |
4 | |
5 # = Scanners | |
6 # | |
7 # This module holds the Scanner class and its subclasses. | |
8 # For example, the Ruby scanner is named CodeRay::Scanners::Ruby | |
9 # can be found in coderay/scanners/ruby. | |
10 # | |
11 # Scanner also provides methods and constants for the register | |
12 # mechanism and the [] method that returns the Scanner class | |
13 # belonging to the given lang. | |
14 # | |
15 # See PluginHost. | |
16 module Scanners | |
17 extend PluginHost | |
18 plugin_path File.dirname(__FILE__), 'scanners' | |
19 | |
20 require 'strscan' | |
21 | |
22 # = Scanner | |
23 # | |
24 # The base class for all Scanners. | |
25 # | |
26 # It is a subclass of Ruby's great +StringScanner+, which | |
27 # makes it easy to access the scanning methods inside. | |
28 # | |
29 # It is also +Enumerable+, so you can use it like an Array of | |
30 # Tokens: | |
31 # | |
32 # require 'coderay' | |
33 # | |
34 # c_scanner = CodeRay::Scanners[:c].new "if (*p == '{') nest++;" | |
35 # | |
36 # for text, kind in c_scanner | |
37 # puts text if kind == :operator | |
38 # end | |
39 # | |
40 # # prints: (*==)++; | |
41 # | |
42 # OK, this is a very simple example :) | |
43 # You can also use +map+, +any?+, +find+ and even +sort_by+, | |
44 # if you want. | |
45 class Scanner < StringScanner | |
46 | |
47 extend Plugin | |
48 plugin_host Scanners | |
49 | |
50 # Raised if a Scanner fails while scanning | |
51 ScanError = Class.new(Exception) | |
52 | |
53 require 'coderay/helpers/word_list' | |
54 | |
55 # The default options for all scanner classes. | |
56 # | |
57 # Define @default_options for subclasses. | |
58 DEFAULT_OPTIONS = { :stream => false } | |
59 | |
60 KINDS_NOT_LOC = [:comment, :doctype] | |
61 | |
62 class << self | |
63 | |
64 # Returns if the Scanner can be used in streaming mode. | |
65 def streamable? | |
66 is_a? Streamable | |
67 end | |
68 | |
69 def normify code | |
70 code = code.to_s | |
71 if code.respond_to?(:encoding) && (code.encoding.name != 'UTF-8' || !code.valid_encoding?) | |
72 code = code.dup | |
73 original_encoding = code.encoding | |
74 code.force_encoding 'Windows-1252' | |
75 unless code.valid_encoding? | |
76 code.force_encoding original_encoding | |
77 if code.encoding.name == 'UTF-8' | |
78 code.encode! 'UTF-16BE', :invalid => :replace, :undef => :replace, :replace => '?' | |
79 end | |
80 code.encode! 'UTF-8', :invalid => :replace, :undef => :replace, :replace => '?' | |
81 end | |
82 end | |
83 code.to_unix | |
84 end | |
85 | |
86 def file_extension extension = nil | |
87 if extension | |
88 @file_extension = extension.to_s | |
89 else | |
90 @file_extension ||= plugin_id.to_s | |
91 end | |
92 end | |
93 | |
94 end | |
95 | |
96 =begin | |
97 ## Excluded for speed reasons; protected seems to make methods slow. | |
98 | |
99 # Save the StringScanner methods from being called. | |
100 # This would not be useful for highlighting. | |
101 strscan_public_methods = | |
102 StringScanner.instance_methods - | |
103 StringScanner.ancestors[1].instance_methods | |
104 protected(*strscan_public_methods) | |
105 =end | |
106 | |
107 # Create a new Scanner. | |
108 # | |
109 # * +code+ is the input String and is handled by the superclass | |
110 # StringScanner. | |
111 # * +options+ is a Hash with Symbols as keys. | |
112 # It is merged with the default options of the class (you can | |
113 # overwrite default options here.) | |
114 # * +block+ is the callback for streamed highlighting. | |
115 # | |
116 # If you set :stream to +true+ in the options, the Scanner uses a | |
117 # TokenStream with the +block+ as callback to handle the tokens. | |
118 # | |
119 # Else, a Tokens object is used. | |
120 def initialize code='', options = {}, &block | |
121 raise "I am only the basic Scanner class. I can't scan "\ | |
122 "anything. :( Use my subclasses." if self.class == Scanner | |
123 | |
124 @options = self.class::DEFAULT_OPTIONS.merge options | |
125 | |
126 super Scanner.normify(code) | |
127 | |
128 @tokens = options[:tokens] | |
129 if @options[:stream] | |
130 warn "warning in CodeRay::Scanner.new: :stream is set, "\ | |
131 "but no block was given" unless block_given? | |
132 raise NotStreamableError, self unless kind_of? Streamable | |
133 @tokens ||= TokenStream.new(&block) | |
134 else | |
135 warn "warning in CodeRay::Scanner.new: Block given, "\ | |
136 "but :stream is #{@options[:stream]}" if block_given? | |
137 @tokens ||= Tokens.new | |
138 end | |
139 @tokens.scanner = self | |
140 | |
141 setup | |
142 end | |
143 | |
144 def reset | |
145 super | |
146 reset_instance | |
147 end | |
148 | |
149 def string= code | |
150 code = Scanner.normify(code) | |
151 if defined?(RUBY_DESCRIPTION) && RUBY_DESCRIPTION['rubinius 1.0.1'] | |
152 reset_state | |
153 @string = code | |
154 else | |
155 super code | |
156 end | |
157 reset_instance | |
158 end | |
159 | |
160 # More mnemonic accessor name for the input string. | |
161 alias code string | |
162 alias code= string= | |
163 | |
164 # Returns the Plugin ID for this scanner. | |
165 def lang | |
166 self.class.plugin_id | |
167 end | |
168 | |
169 # Scans the code and returns all tokens in a Tokens object. | |
170 def tokenize new_string=nil, options = {} | |
171 options = @options.merge(options) | |
172 self.string = new_string if new_string | |
173 @cached_tokens = | |
174 if @options[:stream] # :stream must have been set already | |
175 reset unless new_string | |
176 scan_tokens @tokens, options | |
177 @tokens | |
178 else | |
179 scan_tokens @tokens, options | |
180 end | |
181 end | |
182 | |
183 def tokens | |
184 @cached_tokens ||= tokenize | |
185 end | |
186 | |
187 # Whether the scanner is in streaming mode. | |
188 def streaming? | |
189 !!@options[:stream] | |
190 end | |
191 | |
192 # Traverses the tokens. | |
193 def each &block | |
194 raise ArgumentError, | |
195 'Cannot traverse TokenStream.' if @options[:stream] | |
196 tokens.each(&block) | |
197 end | |
198 include Enumerable | |
199 | |
200 # The current line position of the scanner. | |
201 # | |
202 # Beware, this is implemented inefficiently. It should be used | |
203 # for debugging only. | |
204 def line | |
205 string[0..pos].count("\n") + 1 | |
206 end | |
207 | |
208 def column pos = self.pos | |
209 return 0 if pos <= 0 | |
210 string = string() | |
211 if string.respond_to?(:bytesize) && (defined?(@bin_string) || string.bytesize != string.size) | |
212 @bin_string ||= string.dup.force_encoding('binary') | |
213 string = @bin_string | |
214 end | |
215 pos - (string.rindex(?\n, pos) || 0) | |
216 end | |
217 | |
218 def marshal_dump | |
219 @options | |
220 end | |
221 | |
222 def marshal_load options | |
223 @options = options | |
224 end | |
225 | |
226 protected | |
227 | |
228 # Can be implemented by subclasses to do some initialization | |
229 # that has to be done once per instance. | |
230 # | |
231 # Use reset for initialization that has to be done once per | |
232 # scan. | |
233 def setup | |
234 end | |
235 | |
236 # This is the central method, and commonly the only one a | |
237 # subclass implements. | |
238 # | |
239 # Subclasses must implement this method; it must return +tokens+ | |
240 # and must only use Tokens#<< for storing scanned tokens! | |
241 def scan_tokens tokens, options | |
242 raise NotImplementedError, | |
243 "#{self.class}#scan_tokens not implemented." | |
244 end | |
245 | |
246 def reset_instance | |
247 @tokens.clear unless @options[:keep_tokens] | |
248 @cached_tokens = nil | |
249 @bin_string = nil if defined? @bin_string | |
250 end | |
251 | |
252 # Scanner error with additional status information | |
253 def raise_inspect msg, tokens, state = 'No state given!', ambit = 30 | |
254 raise ScanError, <<-EOE % [ | |
255 | |
256 | |
257 ***ERROR in %s: %s (after %d tokens) | |
258 | |
259 tokens: | |
260 %s | |
261 | |
262 current line: %d column: %d pos: %d | |
263 matched: %p state: %p | |
264 bol? = %p, eos? = %p | |
265 | |
266 surrounding code: | |
267 %p ~~ %p | |
268 | |
269 | |
270 ***ERROR*** | |
271 | |
272 EOE | |
273 File.basename(caller[0]), | |
274 msg, | |
275 tokens.size, | |
276 tokens.last(10).map { |t| t.inspect }.join("\n"), | |
277 line, column, pos, | |
278 matched, state, bol?, eos?, | |
279 string[pos - ambit, ambit], | |
280 string[pos, ambit], | |
281 ] | |
282 end | |
283 | |
284 end | |
285 | |
286 end | |
287 end | |
288 | |
289 class String | |
290 # I love this hack. It seems to silence all dos/unix/mac newline problems. | |
291 def to_unix | |
292 if index ?\r | |
293 gsub(/\r\n?/, "\n") | |
294 else | |
295 self | |
296 end | |
297 end | |
298 end |