Chris@0
|
1 module CodeRay
|
Chris@0
|
2
|
Chris@0
|
3 require 'coderay/helpers/plugin'
|
Chris@0
|
4
|
Chris@0
|
5 # = Scanners
|
Chris@0
|
6 #
|
Chris@0
|
7 # This module holds the Scanner class and its subclasses.
|
Chris@0
|
8 # For example, the Ruby scanner is named CodeRay::Scanners::Ruby
|
Chris@0
|
9 # can be found in coderay/scanners/ruby.
|
Chris@0
|
10 #
|
Chris@0
|
11 # Scanner also provides methods and constants for the register
|
Chris@0
|
12 # mechanism and the [] method that returns the Scanner class
|
Chris@0
|
13 # belonging to the given lang.
|
Chris@0
|
14 #
|
Chris@0
|
15 # See PluginHost.
|
Chris@0
|
16 module Scanners
|
Chris@0
|
17 extend PluginHost
|
Chris@0
|
18 plugin_path File.dirname(__FILE__), 'scanners'
|
Chris@0
|
19
|
Chris@0
|
20 require 'strscan'
|
Chris@0
|
21
|
Chris@0
|
22 # = Scanner
|
Chris@0
|
23 #
|
Chris@0
|
24 # The base class for all Scanners.
|
Chris@0
|
25 #
|
Chris@0
|
26 # It is a subclass of Ruby's great +StringScanner+, which
|
Chris@0
|
27 # makes it easy to access the scanning methods inside.
|
Chris@0
|
28 #
|
Chris@0
|
29 # It is also +Enumerable+, so you can use it like an Array of
|
Chris@0
|
30 # Tokens:
|
Chris@0
|
31 #
|
Chris@0
|
32 # require 'coderay'
|
Chris@0
|
33 #
|
Chris@0
|
34 # c_scanner = CodeRay::Scanners[:c].new "if (*p == '{') nest++;"
|
Chris@0
|
35 #
|
Chris@0
|
36 # for text, kind in c_scanner
|
Chris@0
|
37 # puts text if kind == :operator
|
Chris@0
|
38 # end
|
Chris@0
|
39 #
|
Chris@0
|
40 # # prints: (*==)++;
|
Chris@0
|
41 #
|
Chris@0
|
42 # OK, this is a very simple example :)
|
Chris@0
|
43 # You can also use +map+, +any?+, +find+ and even +sort_by+,
|
Chris@0
|
44 # if you want.
|
Chris@0
|
45 class Scanner < StringScanner
|
Chris@0
|
46
|
Chris@0
|
47 extend Plugin
|
Chris@0
|
48 plugin_host Scanners
|
Chris@0
|
49
|
Chris@0
|
50 # Raised if a Scanner fails while scanning
|
Chris@0
|
51 ScanError = Class.new(Exception)
|
Chris@0
|
52
|
Chris@0
|
53 require 'coderay/helpers/word_list'
|
Chris@0
|
54
|
Chris@0
|
55 # The default options for all scanner classes.
|
Chris@0
|
56 #
|
Chris@0
|
57 # Define @default_options for subclasses.
|
Chris@0
|
58 DEFAULT_OPTIONS = { :stream => false }
|
Chris@0
|
59
|
Chris@0
|
60 KINDS_NOT_LOC = [:comment, :doctype]
|
Chris@0
|
61
|
Chris@0
|
62 class << self
|
Chris@0
|
63
|
Chris@0
|
64 # Returns if the Scanner can be used in streaming mode.
|
Chris@0
|
65 def streamable?
|
Chris@0
|
66 is_a? Streamable
|
Chris@0
|
67 end
|
Chris@0
|
68
|
Chris@0
|
69 def normify code
|
Chris@0
|
70 code = code.to_s
|
Chris@0
|
71 if code.respond_to? :force_encoding
|
Chris@0
|
72 debug, $DEBUG = $DEBUG, false
|
Chris@0
|
73 begin
|
Chris@0
|
74 code.force_encoding 'utf-8'
|
Chris@0
|
75 code[/\z/] # raises an ArgumentError when code contains a non-UTF-8 char
|
Chris@0
|
76 rescue ArgumentError
|
Chris@0
|
77 code.force_encoding 'binary'
|
Chris@0
|
78 ensure
|
Chris@0
|
79 $DEBUG = debug
|
Chris@0
|
80 end
|
Chris@0
|
81 end
|
Chris@0
|
82 code.to_unix
|
Chris@0
|
83 end
|
Chris@0
|
84
|
Chris@0
|
85 def file_extension extension = nil
|
Chris@0
|
86 if extension
|
Chris@0
|
87 @file_extension = extension.to_s
|
Chris@0
|
88 else
|
Chris@0
|
89 @file_extension ||= plugin_id.to_s
|
Chris@0
|
90 end
|
Chris@0
|
91 end
|
Chris@0
|
92
|
Chris@0
|
93 end
|
Chris@0
|
94
|
Chris@0
|
95 =begin
|
Chris@0
|
96 ## Excluded for speed reasons; protected seems to make methods slow.
|
Chris@0
|
97
|
Chris@0
|
98 # Save the StringScanner methods from being called.
|
Chris@0
|
99 # This would not be useful for highlighting.
|
Chris@0
|
100 strscan_public_methods =
|
Chris@0
|
101 StringScanner.instance_methods -
|
Chris@0
|
102 StringScanner.ancestors[1].instance_methods
|
Chris@0
|
103 protected(*strscan_public_methods)
|
Chris@0
|
104 =end
|
Chris@0
|
105
|
Chris@0
|
106 # Create a new Scanner.
|
Chris@0
|
107 #
|
Chris@0
|
108 # * +code+ is the input String and is handled by the superclass
|
Chris@0
|
109 # StringScanner.
|
Chris@0
|
110 # * +options+ is a Hash with Symbols as keys.
|
Chris@0
|
111 # It is merged with the default options of the class (you can
|
Chris@0
|
112 # overwrite default options here.)
|
Chris@0
|
113 # * +block+ is the callback for streamed highlighting.
|
Chris@0
|
114 #
|
Chris@0
|
115 # If you set :stream to +true+ in the options, the Scanner uses a
|
Chris@0
|
116 # TokenStream with the +block+ as callback to handle the tokens.
|
Chris@0
|
117 #
|
Chris@0
|
118 # Else, a Tokens object is used.
|
Chris@0
|
119 def initialize code='', options = {}, &block
|
Chris@0
|
120 raise "I am only the basic Scanner class. I can't scan "\
|
Chris@0
|
121 "anything. :( Use my subclasses." if self.class == Scanner
|
Chris@0
|
122
|
Chris@0
|
123 @options = self.class::DEFAULT_OPTIONS.merge options
|
Chris@0
|
124
|
Chris@0
|
125 super Scanner.normify(code)
|
Chris@0
|
126
|
Chris@0
|
127 @tokens = options[:tokens]
|
Chris@0
|
128 if @options[:stream]
|
Chris@0
|
129 warn "warning in CodeRay::Scanner.new: :stream is set, "\
|
Chris@0
|
130 "but no block was given" unless block_given?
|
Chris@0
|
131 raise NotStreamableError, self unless kind_of? Streamable
|
Chris@0
|
132 @tokens ||= TokenStream.new(&block)
|
Chris@0
|
133 else
|
Chris@0
|
134 warn "warning in CodeRay::Scanner.new: Block given, "\
|
Chris@0
|
135 "but :stream is #{@options[:stream]}" if block_given?
|
Chris@0
|
136 @tokens ||= Tokens.new
|
Chris@0
|
137 end
|
Chris@0
|
138 @tokens.scanner = self
|
Chris@0
|
139
|
Chris@0
|
140 setup
|
Chris@0
|
141 end
|
Chris@0
|
142
|
Chris@0
|
143 def reset
|
Chris@0
|
144 super
|
Chris@0
|
145 reset_instance
|
Chris@0
|
146 end
|
Chris@0
|
147
|
Chris@0
|
148 def string= code
|
Chris@0
|
149 code = Scanner.normify(code)
|
Chris@0
|
150 super code
|
Chris@0
|
151 reset_instance
|
Chris@0
|
152 end
|
Chris@0
|
153
|
Chris@0
|
154 # More mnemonic accessor name for the input string.
|
Chris@0
|
155 alias code string
|
Chris@0
|
156 alias code= string=
|
Chris@0
|
157
|
Chris@0
|
158 # Returns the Plugin ID for this scanner.
|
Chris@0
|
159 def lang
|
Chris@0
|
160 self.class.plugin_id
|
Chris@0
|
161 end
|
Chris@0
|
162
|
Chris@0
|
163 # Scans the code and returns all tokens in a Tokens object.
|
Chris@0
|
164 def tokenize new_string=nil, options = {}
|
Chris@0
|
165 options = @options.merge(options)
|
Chris@0
|
166 self.string = new_string if new_string
|
Chris@0
|
167 @cached_tokens =
|
Chris@0
|
168 if @options[:stream] # :stream must have been set already
|
Chris@0
|
169 reset unless new_string
|
Chris@0
|
170 scan_tokens @tokens, options
|
Chris@0
|
171 @tokens
|
Chris@0
|
172 else
|
Chris@0
|
173 scan_tokens @tokens, options
|
Chris@0
|
174 end
|
Chris@0
|
175 end
|
Chris@0
|
176
|
Chris@0
|
177 def tokens
|
Chris@0
|
178 @cached_tokens ||= tokenize
|
Chris@0
|
179 end
|
Chris@0
|
180
|
Chris@0
|
181 # Whether the scanner is in streaming mode.
|
Chris@0
|
182 def streaming?
|
Chris@0
|
183 !!@options[:stream]
|
Chris@0
|
184 end
|
Chris@0
|
185
|
Chris@0
|
186 # Traverses the tokens.
|
Chris@0
|
187 def each &block
|
Chris@0
|
188 raise ArgumentError,
|
Chris@0
|
189 'Cannot traverse TokenStream.' if @options[:stream]
|
Chris@0
|
190 tokens.each(&block)
|
Chris@0
|
191 end
|
Chris@0
|
192 include Enumerable
|
Chris@0
|
193
|
Chris@0
|
194 # The current line position of the scanner.
|
Chris@0
|
195 #
|
Chris@0
|
196 # Beware, this is implemented inefficiently. It should be used
|
Chris@0
|
197 # for debugging only.
|
Chris@0
|
198 def line
|
Chris@0
|
199 string[0..pos].count("\n") + 1
|
Chris@0
|
200 end
|
Chris@0
|
201
|
Chris@0
|
202 def column pos = self.pos
|
Chris@0
|
203 return 0 if pos <= 0
|
Chris@0
|
204 string = string()
|
Chris@0
|
205 if string.respond_to?(:bytesize) && (defined?(@bin_string) || string.bytesize != string.size)
|
Chris@0
|
206 @bin_string ||= string.dup.force_encoding('binary')
|
Chris@0
|
207 string = @bin_string
|
Chris@0
|
208 end
|
Chris@0
|
209 pos - (string.rindex(?\n, pos) || 0)
|
Chris@0
|
210 end
|
Chris@0
|
211
|
Chris@0
|
212 def marshal_dump
|
Chris@0
|
213 @options
|
Chris@0
|
214 end
|
Chris@0
|
215
|
Chris@0
|
216 def marshal_load options
|
Chris@0
|
217 @options = options
|
Chris@0
|
218 end
|
Chris@0
|
219
|
Chris@0
|
220 protected
|
Chris@0
|
221
|
Chris@0
|
222 # Can be implemented by subclasses to do some initialization
|
Chris@0
|
223 # that has to be done once per instance.
|
Chris@0
|
224 #
|
Chris@0
|
225 # Use reset for initialization that has to be done once per
|
Chris@0
|
226 # scan.
|
Chris@0
|
227 def setup
|
Chris@0
|
228 end
|
Chris@0
|
229
|
Chris@0
|
230 # This is the central method, and commonly the only one a
|
Chris@0
|
231 # subclass implements.
|
Chris@0
|
232 #
|
Chris@0
|
233 # Subclasses must implement this method; it must return +tokens+
|
Chris@0
|
234 # and must only use Tokens#<< for storing scanned tokens!
|
Chris@0
|
235 def scan_tokens tokens, options
|
Chris@0
|
236 raise NotImplementedError,
|
Chris@0
|
237 "#{self.class}#scan_tokens not implemented."
|
Chris@0
|
238 end
|
Chris@0
|
239
|
Chris@0
|
240 def reset_instance
|
Chris@0
|
241 @tokens.clear unless @options[:keep_tokens]
|
Chris@0
|
242 @cached_tokens = nil
|
Chris@0
|
243 @bin_string = nil if defined? @bin_string
|
Chris@0
|
244 end
|
Chris@0
|
245
|
Chris@0
|
246 # Scanner error with additional status information
|
Chris@0
|
247 def raise_inspect msg, tokens, state = 'No state given!', ambit = 30
|
Chris@0
|
248 raise ScanError, <<-EOE % [
|
Chris@0
|
249
|
Chris@0
|
250
|
Chris@0
|
251 ***ERROR in %s: %s (after %d tokens)
|
Chris@0
|
252
|
Chris@0
|
253 tokens:
|
Chris@0
|
254 %s
|
Chris@0
|
255
|
Chris@0
|
256 current line: %d column: %d pos: %d
|
Chris@0
|
257 matched: %p state: %p
|
Chris@0
|
258 bol? = %p, eos? = %p
|
Chris@0
|
259
|
Chris@0
|
260 surrounding code:
|
Chris@0
|
261 %p ~~ %p
|
Chris@0
|
262
|
Chris@0
|
263
|
Chris@0
|
264 ***ERROR***
|
Chris@0
|
265
|
Chris@0
|
266 EOE
|
Chris@0
|
267 File.basename(caller[0]),
|
Chris@0
|
268 msg,
|
Chris@0
|
269 tokens.size,
|
Chris@0
|
270 tokens.last(10).map { |t| t.inspect }.join("\n"),
|
Chris@0
|
271 line, column, pos,
|
Chris@0
|
272 matched, state, bol?, eos?,
|
Chris@0
|
273 string[pos - ambit, ambit],
|
Chris@0
|
274 string[pos, ambit],
|
Chris@0
|
275 ]
|
Chris@0
|
276 end
|
Chris@0
|
277
|
Chris@0
|
278 end
|
Chris@0
|
279
|
Chris@0
|
280 end
|
Chris@0
|
281 end
|
Chris@0
|
282
|
Chris@0
|
283 class String
|
Chris@0
|
284 # I love this hack. It seems to silence all dos/unix/mac newline problems.
|
Chris@0
|
285 def to_unix
|
Chris@0
|
286 if index ?\r
|
Chris@0
|
287 gsub(/\r\n?/, "\n")
|
Chris@0
|
288 else
|
Chris@0
|
289 self
|
Chris@0
|
290 end
|
Chris@0
|
291 end
|
Chris@0
|
292 end
|