Mercurial > hg > soundsoftware-site
comparison vendor/plugins/coderay-0.9.2/lib/coderay/scanner.rb @ 0:513646585e45
* Import Redmine trunk SVN rev 3859
author | Chris Cannam |
---|---|
date | Fri, 23 Jul 2010 15:52:44 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:513646585e45 |
---|---|
1 module CodeRay | |
2 | |
3 require 'coderay/helpers/plugin' | |
4 | |
5 # = Scanners | |
6 # | |
7 # This module holds the Scanner class and its subclasses. | |
8 # For example, the Ruby scanner is named CodeRay::Scanners::Ruby | |
9 # can be found in coderay/scanners/ruby. | |
10 # | |
11 # Scanner also provides methods and constants for the register | |
12 # mechanism and the [] method that returns the Scanner class | |
13 # belonging to the given lang. | |
14 # | |
15 # See PluginHost. | |
16 module Scanners | |
17 extend PluginHost | |
18 plugin_path File.dirname(__FILE__), 'scanners' | |
19 | |
20 require 'strscan' | |
21 | |
22 # = Scanner | |
23 # | |
24 # The base class for all Scanners. | |
25 # | |
26 # It is a subclass of Ruby's great +StringScanner+, which | |
27 # makes it easy to access the scanning methods inside. | |
28 # | |
29 # It is also +Enumerable+, so you can use it like an Array of | |
30 # Tokens: | |
31 # | |
32 # require 'coderay' | |
33 # | |
34 # c_scanner = CodeRay::Scanners[:c].new "if (*p == '{') nest++;" | |
35 # | |
36 # for text, kind in c_scanner | |
37 # puts text if kind == :operator | |
38 # end | |
39 # | |
40 # # prints: (*==)++; | |
41 # | |
42 # OK, this is a very simple example :) | |
43 # You can also use +map+, +any?+, +find+ and even +sort_by+, | |
44 # if you want. | |
45 class Scanner < StringScanner | |
46 | |
47 extend Plugin | |
48 plugin_host Scanners | |
49 | |
50 # Raised if a Scanner fails while scanning | |
51 ScanError = Class.new(Exception) | |
52 | |
53 require 'coderay/helpers/word_list' | |
54 | |
55 # The default options for all scanner classes. | |
56 # | |
57 # Define @default_options for subclasses. | |
58 DEFAULT_OPTIONS = { :stream => false } | |
59 | |
60 KINDS_NOT_LOC = [:comment, :doctype] | |
61 | |
62 class << self | |
63 | |
64 # Returns if the Scanner can be used in streaming mode. | |
65 def streamable? | |
66 is_a? Streamable | |
67 end | |
68 | |
69 def normify code | |
70 code = code.to_s | |
71 if code.respond_to? :force_encoding | |
72 debug, $DEBUG = $DEBUG, false | |
73 begin | |
74 code.force_encoding 'utf-8' | |
75 code[/\z/] # raises an ArgumentError when code contains a non-UTF-8 char | |
76 rescue ArgumentError | |
77 code.force_encoding 'binary' | |
78 ensure | |
79 $DEBUG = debug | |
80 end | |
81 end | |
82 code.to_unix | |
83 end | |
84 | |
85 def file_extension extension = nil | |
86 if extension | |
87 @file_extension = extension.to_s | |
88 else | |
89 @file_extension ||= plugin_id.to_s | |
90 end | |
91 end | |
92 | |
93 end | |
94 | |
95 =begin | |
96 ## Excluded for speed reasons; protected seems to make methods slow. | |
97 | |
98 # Save the StringScanner methods from being called. | |
99 # This would not be useful for highlighting. | |
100 strscan_public_methods = | |
101 StringScanner.instance_methods - | |
102 StringScanner.ancestors[1].instance_methods | |
103 protected(*strscan_public_methods) | |
104 =end | |
105 | |
106 # Create a new Scanner. | |
107 # | |
108 # * +code+ is the input String and is handled by the superclass | |
109 # StringScanner. | |
110 # * +options+ is a Hash with Symbols as keys. | |
111 # It is merged with the default options of the class (you can | |
112 # overwrite default options here.) | |
113 # * +block+ is the callback for streamed highlighting. | |
114 # | |
115 # If you set :stream to +true+ in the options, the Scanner uses a | |
116 # TokenStream with the +block+ as callback to handle the tokens. | |
117 # | |
118 # Else, a Tokens object is used. | |
119 def initialize code='', options = {}, &block | |
120 raise "I am only the basic Scanner class. I can't scan "\ | |
121 "anything. :( Use my subclasses." if self.class == Scanner | |
122 | |
123 @options = self.class::DEFAULT_OPTIONS.merge options | |
124 | |
125 super Scanner.normify(code) | |
126 | |
127 @tokens = options[:tokens] | |
128 if @options[:stream] | |
129 warn "warning in CodeRay::Scanner.new: :stream is set, "\ | |
130 "but no block was given" unless block_given? | |
131 raise NotStreamableError, self unless kind_of? Streamable | |
132 @tokens ||= TokenStream.new(&block) | |
133 else | |
134 warn "warning in CodeRay::Scanner.new: Block given, "\ | |
135 "but :stream is #{@options[:stream]}" if block_given? | |
136 @tokens ||= Tokens.new | |
137 end | |
138 @tokens.scanner = self | |
139 | |
140 setup | |
141 end | |
142 | |
143 def reset | |
144 super | |
145 reset_instance | |
146 end | |
147 | |
148 def string= code | |
149 code = Scanner.normify(code) | |
150 super code | |
151 reset_instance | |
152 end | |
153 | |
154 # More mnemonic accessor name for the input string. | |
155 alias code string | |
156 alias code= string= | |
157 | |
158 # Returns the Plugin ID for this scanner. | |
159 def lang | |
160 self.class.plugin_id | |
161 end | |
162 | |
163 # Scans the code and returns all tokens in a Tokens object. | |
164 def tokenize new_string=nil, options = {} | |
165 options = @options.merge(options) | |
166 self.string = new_string if new_string | |
167 @cached_tokens = | |
168 if @options[:stream] # :stream must have been set already | |
169 reset unless new_string | |
170 scan_tokens @tokens, options | |
171 @tokens | |
172 else | |
173 scan_tokens @tokens, options | |
174 end | |
175 end | |
176 | |
177 def tokens | |
178 @cached_tokens ||= tokenize | |
179 end | |
180 | |
181 # Whether the scanner is in streaming mode. | |
182 def streaming? | |
183 !!@options[:stream] | |
184 end | |
185 | |
186 # Traverses the tokens. | |
187 def each &block | |
188 raise ArgumentError, | |
189 'Cannot traverse TokenStream.' if @options[:stream] | |
190 tokens.each(&block) | |
191 end | |
192 include Enumerable | |
193 | |
194 # The current line position of the scanner. | |
195 # | |
196 # Beware, this is implemented inefficiently. It should be used | |
197 # for debugging only. | |
198 def line | |
199 string[0..pos].count("\n") + 1 | |
200 end | |
201 | |
202 def column pos = self.pos | |
203 return 0 if pos <= 0 | |
204 string = string() | |
205 if string.respond_to?(:bytesize) && (defined?(@bin_string) || string.bytesize != string.size) | |
206 @bin_string ||= string.dup.force_encoding('binary') | |
207 string = @bin_string | |
208 end | |
209 pos - (string.rindex(?\n, pos) || 0) | |
210 end | |
211 | |
212 def marshal_dump | |
213 @options | |
214 end | |
215 | |
216 def marshal_load options | |
217 @options = options | |
218 end | |
219 | |
220 protected | |
221 | |
222 # Can be implemented by subclasses to do some initialization | |
223 # that has to be done once per instance. | |
224 # | |
225 # Use reset for initialization that has to be done once per | |
226 # scan. | |
227 def setup | |
228 end | |
229 | |
230 # This is the central method, and commonly the only one a | |
231 # subclass implements. | |
232 # | |
233 # Subclasses must implement this method; it must return +tokens+ | |
234 # and must only use Tokens#<< for storing scanned tokens! | |
235 def scan_tokens tokens, options | |
236 raise NotImplementedError, | |
237 "#{self.class}#scan_tokens not implemented." | |
238 end | |
239 | |
240 def reset_instance | |
241 @tokens.clear unless @options[:keep_tokens] | |
242 @cached_tokens = nil | |
243 @bin_string = nil if defined? @bin_string | |
244 end | |
245 | |
246 # Scanner error with additional status information | |
247 def raise_inspect msg, tokens, state = 'No state given!', ambit = 30 | |
248 raise ScanError, <<-EOE % [ | |
249 | |
250 | |
251 ***ERROR in %s: %s (after %d tokens) | |
252 | |
253 tokens: | |
254 %s | |
255 | |
256 current line: %d column: %d pos: %d | |
257 matched: %p state: %p | |
258 bol? = %p, eos? = %p | |
259 | |
260 surrounding code: | |
261 %p ~~ %p | |
262 | |
263 | |
264 ***ERROR*** | |
265 | |
266 EOE | |
267 File.basename(caller[0]), | |
268 msg, | |
269 tokens.size, | |
270 tokens.last(10).map { |t| t.inspect }.join("\n"), | |
271 line, column, pos, | |
272 matched, state, bol?, eos?, | |
273 string[pos - ambit, ambit], | |
274 string[pos, ambit], | |
275 ] | |
276 end | |
277 | |
278 end | |
279 | |
280 end | |
281 end | |
282 | |
283 class String | |
284 # I love this hack. It seems to silence all dos/unix/mac newline problems. | |
285 def to_unix | |
286 if index ?\r | |
287 gsub(/\r\n?/, "\n") | |
288 else | |
289 self | |
290 end | |
291 end | |
292 end |