comparison vendor/gems/coderay-0.9.7/lib/coderay/.svn/text-base/scanner.rb.svn-base @ 210:0579821a129a

Update to Redmine trunk rev 4802
author Chris Cannam
date Tue, 08 Feb 2011 13:51:46 +0000
parents
children
comparison
equal deleted inserted replaced
128:07fa8a8b56a8 210:0579821a129a
1 module CodeRay
2
3 require 'coderay/helpers/plugin'
4
5 # = Scanners
6 #
7 # This module holds the Scanner class and its subclasses.
8 # For example, the Ruby scanner is named CodeRay::Scanners::Ruby
9 # can be found in coderay/scanners/ruby.
10 #
11 # Scanner also provides methods and constants for the register
12 # mechanism and the [] method that returns the Scanner class
13 # belonging to the given lang.
14 #
15 # See PluginHost.
16 module Scanners
17 extend PluginHost
18 plugin_path File.dirname(__FILE__), 'scanners'
19
20 require 'strscan'
21
22 # = Scanner
23 #
24 # The base class for all Scanners.
25 #
26 # It is a subclass of Ruby's great +StringScanner+, which
27 # makes it easy to access the scanning methods inside.
28 #
29 # It is also +Enumerable+, so you can use it like an Array of
30 # Tokens:
31 #
32 # require 'coderay'
33 #
34 # c_scanner = CodeRay::Scanners[:c].new "if (*p == '{') nest++;"
35 #
36 # for text, kind in c_scanner
37 # puts text if kind == :operator
38 # end
39 #
40 # # prints: (*==)++;
41 #
42 # OK, this is a very simple example :)
43 # You can also use +map+, +any?+, +find+ and even +sort_by+,
44 # if you want.
45 class Scanner < StringScanner
46
47 extend Plugin
48 plugin_host Scanners
49
50 # Raised if a Scanner fails while scanning
51 ScanError = Class.new(Exception)
52
53 require 'coderay/helpers/word_list'
54
55 # The default options for all scanner classes.
56 #
57 # Define @default_options for subclasses.
58 DEFAULT_OPTIONS = { :stream => false }
59
60 KINDS_NOT_LOC = [:comment, :doctype]
61
62 class << self
63
64 # Returns if the Scanner can be used in streaming mode.
65 def streamable?
66 is_a? Streamable
67 end
68
69 def normify code
70 code = code.to_s
71 if code.respond_to?(:encoding) && (code.encoding.name != 'UTF-8' || !code.valid_encoding?)
72 code = code.dup
73 original_encoding = code.encoding
74 code.force_encoding 'Windows-1252'
75 unless code.valid_encoding?
76 code.force_encoding original_encoding
77 if code.encoding.name == 'UTF-8'
78 code.encode! 'UTF-16BE', :invalid => :replace, :undef => :replace, :replace => '?'
79 end
80 code.encode! 'UTF-8', :invalid => :replace, :undef => :replace, :replace => '?'
81 end
82 end
83 code.to_unix
84 end
85
86 def file_extension extension = nil
87 if extension
88 @file_extension = extension.to_s
89 else
90 @file_extension ||= plugin_id.to_s
91 end
92 end
93
94 end
95
96 =begin
97 ## Excluded for speed reasons; protected seems to make methods slow.
98
99 # Save the StringScanner methods from being called.
100 # This would not be useful for highlighting.
101 strscan_public_methods =
102 StringScanner.instance_methods -
103 StringScanner.ancestors[1].instance_methods
104 protected(*strscan_public_methods)
105 =end
106
107 # Create a new Scanner.
108 #
109 # * +code+ is the input String and is handled by the superclass
110 # StringScanner.
111 # * +options+ is a Hash with Symbols as keys.
112 # It is merged with the default options of the class (you can
113 # overwrite default options here.)
114 # * +block+ is the callback for streamed highlighting.
115 #
116 # If you set :stream to +true+ in the options, the Scanner uses a
117 # TokenStream with the +block+ as callback to handle the tokens.
118 #
119 # Else, a Tokens object is used.
120 def initialize code='', options = {}, &block
121 raise "I am only the basic Scanner class. I can't scan "\
122 "anything. :( Use my subclasses." if self.class == Scanner
123
124 @options = self.class::DEFAULT_OPTIONS.merge options
125
126 super Scanner.normify(code)
127
128 @tokens = options[:tokens]
129 if @options[:stream]
130 warn "warning in CodeRay::Scanner.new: :stream is set, "\
131 "but no block was given" unless block_given?
132 raise NotStreamableError, self unless kind_of? Streamable
133 @tokens ||= TokenStream.new(&block)
134 else
135 warn "warning in CodeRay::Scanner.new: Block given, "\
136 "but :stream is #{@options[:stream]}" if block_given?
137 @tokens ||= Tokens.new
138 end
139 @tokens.scanner = self
140
141 setup
142 end
143
144 def reset
145 super
146 reset_instance
147 end
148
149 def string= code
150 code = Scanner.normify(code)
151 if defined?(RUBY_DESCRIPTION) && RUBY_DESCRIPTION['rubinius 1.0.1']
152 reset_state
153 @string = code
154 else
155 super code
156 end
157 reset_instance
158 end
159
160 # More mnemonic accessor name for the input string.
161 alias code string
162 alias code= string=
163
164 # Returns the Plugin ID for this scanner.
165 def lang
166 self.class.plugin_id
167 end
168
169 # Scans the code and returns all tokens in a Tokens object.
170 def tokenize new_string=nil, options = {}
171 options = @options.merge(options)
172 self.string = new_string if new_string
173 @cached_tokens =
174 if @options[:stream] # :stream must have been set already
175 reset unless new_string
176 scan_tokens @tokens, options
177 @tokens
178 else
179 scan_tokens @tokens, options
180 end
181 end
182
183 def tokens
184 @cached_tokens ||= tokenize
185 end
186
187 # Whether the scanner is in streaming mode.
188 def streaming?
189 !!@options[:stream]
190 end
191
192 # Traverses the tokens.
193 def each &block
194 raise ArgumentError,
195 'Cannot traverse TokenStream.' if @options[:stream]
196 tokens.each(&block)
197 end
198 include Enumerable
199
200 # The current line position of the scanner.
201 #
202 # Beware, this is implemented inefficiently. It should be used
203 # for debugging only.
204 def line
205 string[0..pos].count("\n") + 1
206 end
207
208 def column pos = self.pos
209 return 0 if pos <= 0
210 string = string()
211 if string.respond_to?(:bytesize) && (defined?(@bin_string) || string.bytesize != string.size)
212 @bin_string ||= string.dup.force_encoding('binary')
213 string = @bin_string
214 end
215 pos - (string.rindex(?\n, pos) || 0)
216 end
217
218 def marshal_dump
219 @options
220 end
221
222 def marshal_load options
223 @options = options
224 end
225
226 protected
227
228 # Can be implemented by subclasses to do some initialization
229 # that has to be done once per instance.
230 #
231 # Use reset for initialization that has to be done once per
232 # scan.
233 def setup
234 end
235
236 # This is the central method, and commonly the only one a
237 # subclass implements.
238 #
239 # Subclasses must implement this method; it must return +tokens+
240 # and must only use Tokens#<< for storing scanned tokens!
241 def scan_tokens tokens, options
242 raise NotImplementedError,
243 "#{self.class}#scan_tokens not implemented."
244 end
245
246 def reset_instance
247 @tokens.clear unless @options[:keep_tokens]
248 @cached_tokens = nil
249 @bin_string = nil if defined? @bin_string
250 end
251
252 # Scanner error with additional status information
253 def raise_inspect msg, tokens, state = 'No state given!', ambit = 30
254 raise ScanError, <<-EOE % [
255
256
257 ***ERROR in %s: %s (after %d tokens)
258
259 tokens:
260 %s
261
262 current line: %d column: %d pos: %d
263 matched: %p state: %p
264 bol? = %p, eos? = %p
265
266 surrounding code:
267 %p ~~ %p
268
269
270 ***ERROR***
271
272 EOE
273 File.basename(caller[0]),
274 msg,
275 tokens.size,
276 tokens.last(10).map { |t| t.inspect }.join("\n"),
277 line, column, pos,
278 matched, state, bol?, eos?,
279 string[pos - ambit, ambit],
280 string[pos, ambit],
281 ]
282 end
283
284 end
285
286 end
287 end
288
289 class String
290 # I love this hack. It seems to silence all dos/unix/mac newline problems.
291 def to_unix
292 if index ?\r
293 gsub(/\r\n?/, "\n")
294 else
295 self
296 end
297 end
298 end