comparison vendor/plugins/coderay-0.9.2/lib/coderay/scanner.rb @ 0:513646585e45

* Import Redmine trunk SVN rev 3859
author Chris Cannam
date Fri, 23 Jul 2010 15:52:44 +0100
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:513646585e45
1 module CodeRay
2
3 require 'coderay/helpers/plugin'
4
5 # = Scanners
6 #
7 # This module holds the Scanner class and its subclasses.
8 # For example, the Ruby scanner is named CodeRay::Scanners::Ruby
9 # can be found in coderay/scanners/ruby.
10 #
11 # Scanner also provides methods and constants for the register
12 # mechanism and the [] method that returns the Scanner class
13 # belonging to the given lang.
14 #
15 # See PluginHost.
16 module Scanners
17 extend PluginHost
18 plugin_path File.dirname(__FILE__), 'scanners'
19
20 require 'strscan'
21
22 # = Scanner
23 #
24 # The base class for all Scanners.
25 #
26 # It is a subclass of Ruby's great +StringScanner+, which
27 # makes it easy to access the scanning methods inside.
28 #
29 # It is also +Enumerable+, so you can use it like an Array of
30 # Tokens:
31 #
32 # require 'coderay'
33 #
34 # c_scanner = CodeRay::Scanners[:c].new "if (*p == '{') nest++;"
35 #
36 # for text, kind in c_scanner
37 # puts text if kind == :operator
38 # end
39 #
40 # # prints: (*==)++;
41 #
42 # OK, this is a very simple example :)
43 # You can also use +map+, +any?+, +find+ and even +sort_by+,
44 # if you want.
45 class Scanner < StringScanner
46
47 extend Plugin
48 plugin_host Scanners
49
50 # Raised if a Scanner fails while scanning
51 ScanError = Class.new(Exception)
52
53 require 'coderay/helpers/word_list'
54
55 # The default options for all scanner classes.
56 #
57 # Define @default_options for subclasses.
58 DEFAULT_OPTIONS = { :stream => false }
59
60 KINDS_NOT_LOC = [:comment, :doctype]
61
62 class << self
63
64 # Returns if the Scanner can be used in streaming mode.
65 def streamable?
66 is_a? Streamable
67 end
68
69 def normify code
70 code = code.to_s
71 if code.respond_to? :force_encoding
72 debug, $DEBUG = $DEBUG, false
73 begin
74 code.force_encoding 'utf-8'
75 code[/\z/] # raises an ArgumentError when code contains a non-UTF-8 char
76 rescue ArgumentError
77 code.force_encoding 'binary'
78 ensure
79 $DEBUG = debug
80 end
81 end
82 code.to_unix
83 end
84
85 def file_extension extension = nil
86 if extension
87 @file_extension = extension.to_s
88 else
89 @file_extension ||= plugin_id.to_s
90 end
91 end
92
93 end
94
95 =begin
96 ## Excluded for speed reasons; protected seems to make methods slow.
97
98 # Save the StringScanner methods from being called.
99 # This would not be useful for highlighting.
100 strscan_public_methods =
101 StringScanner.instance_methods -
102 StringScanner.ancestors[1].instance_methods
103 protected(*strscan_public_methods)
104 =end
105
106 # Create a new Scanner.
107 #
108 # * +code+ is the input String and is handled by the superclass
109 # StringScanner.
110 # * +options+ is a Hash with Symbols as keys.
111 # It is merged with the default options of the class (you can
112 # overwrite default options here.)
113 # * +block+ is the callback for streamed highlighting.
114 #
115 # If you set :stream to +true+ in the options, the Scanner uses a
116 # TokenStream with the +block+ as callback to handle the tokens.
117 #
118 # Else, a Tokens object is used.
119 def initialize code='', options = {}, &block
120 raise "I am only the basic Scanner class. I can't scan "\
121 "anything. :( Use my subclasses." if self.class == Scanner
122
123 @options = self.class::DEFAULT_OPTIONS.merge options
124
125 super Scanner.normify(code)
126
127 @tokens = options[:tokens]
128 if @options[:stream]
129 warn "warning in CodeRay::Scanner.new: :stream is set, "\
130 "but no block was given" unless block_given?
131 raise NotStreamableError, self unless kind_of? Streamable
132 @tokens ||= TokenStream.new(&block)
133 else
134 warn "warning in CodeRay::Scanner.new: Block given, "\
135 "but :stream is #{@options[:stream]}" if block_given?
136 @tokens ||= Tokens.new
137 end
138 @tokens.scanner = self
139
140 setup
141 end
142
143 def reset
144 super
145 reset_instance
146 end
147
148 def string= code
149 code = Scanner.normify(code)
150 super code
151 reset_instance
152 end
153
154 # More mnemonic accessor name for the input string.
155 alias code string
156 alias code= string=
157
158 # Returns the Plugin ID for this scanner.
159 def lang
160 self.class.plugin_id
161 end
162
163 # Scans the code and returns all tokens in a Tokens object.
164 def tokenize new_string=nil, options = {}
165 options = @options.merge(options)
166 self.string = new_string if new_string
167 @cached_tokens =
168 if @options[:stream] # :stream must have been set already
169 reset unless new_string
170 scan_tokens @tokens, options
171 @tokens
172 else
173 scan_tokens @tokens, options
174 end
175 end
176
177 def tokens
178 @cached_tokens ||= tokenize
179 end
180
181 # Whether the scanner is in streaming mode.
182 def streaming?
183 !!@options[:stream]
184 end
185
186 # Traverses the tokens.
187 def each &block
188 raise ArgumentError,
189 'Cannot traverse TokenStream.' if @options[:stream]
190 tokens.each(&block)
191 end
192 include Enumerable
193
194 # The current line position of the scanner.
195 #
196 # Beware, this is implemented inefficiently. It should be used
197 # for debugging only.
198 def line
199 string[0..pos].count("\n") + 1
200 end
201
202 def column pos = self.pos
203 return 0 if pos <= 0
204 string = string()
205 if string.respond_to?(:bytesize) && (defined?(@bin_string) || string.bytesize != string.size)
206 @bin_string ||= string.dup.force_encoding('binary')
207 string = @bin_string
208 end
209 pos - (string.rindex(?\n, pos) || 0)
210 end
211
212 def marshal_dump
213 @options
214 end
215
216 def marshal_load options
217 @options = options
218 end
219
220 protected
221
222 # Can be implemented by subclasses to do some initialization
223 # that has to be done once per instance.
224 #
225 # Use reset for initialization that has to be done once per
226 # scan.
227 def setup
228 end
229
230 # This is the central method, and commonly the only one a
231 # subclass implements.
232 #
233 # Subclasses must implement this method; it must return +tokens+
234 # and must only use Tokens#<< for storing scanned tokens!
235 def scan_tokens tokens, options
236 raise NotImplementedError,
237 "#{self.class}#scan_tokens not implemented."
238 end
239
240 def reset_instance
241 @tokens.clear unless @options[:keep_tokens]
242 @cached_tokens = nil
243 @bin_string = nil if defined? @bin_string
244 end
245
246 # Scanner error with additional status information
247 def raise_inspect msg, tokens, state = 'No state given!', ambit = 30
248 raise ScanError, <<-EOE % [
249
250
251 ***ERROR in %s: %s (after %d tokens)
252
253 tokens:
254 %s
255
256 current line: %d column: %d pos: %d
257 matched: %p state: %p
258 bol? = %p, eos? = %p
259
260 surrounding code:
261 %p ~~ %p
262
263
264 ***ERROR***
265
266 EOE
267 File.basename(caller[0]),
268 msg,
269 tokens.size,
270 tokens.last(10).map { |t| t.inspect }.join("\n"),
271 line, column, pos,
272 matched, state, bol?, eos?,
273 string[pos - ambit, ambit],
274 string[pos, ambit],
275 ]
276 end
277
278 end
279
280 end
281 end
282
283 class String
284 # I love this hack. It seems to silence all dos/unix/mac newline problems.
285 def to_unix
286 if index ?\r
287 gsub(/\r\n?/, "\n")
288 else
289 self
290 end
291 end
292 end