Mercurial > hg > soundsoftware-site
comparison vendor/gems/coderay-1.0.0/lib/coderay/scanner.rb @ 909:cbb26bc654de redmine-1.3
Update to Redmine 1.3-stable branch (Redmine SVN rev 8964)
author | Chris Cannam |
---|---|
date | Fri, 24 Feb 2012 19:09:32 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
908:c6c2cbd0afee | 909:cbb26bc654de |
---|---|
1 # encoding: utf-8 | |
2 require 'strscan' | |
3 | |
4 module CodeRay | |
5 | |
6 autoload :WordList, 'coderay/helpers/word_list' | |
7 | |
8 # = Scanners | |
9 # | |
10 # This module holds the Scanner class and its subclasses. | |
11 # For example, the Ruby scanner is named CodeRay::Scanners::Ruby | |
12 # can be found in coderay/scanners/ruby. | |
13 # | |
14 # Scanner also provides methods and constants for the register | |
15 # mechanism and the [] method that returns the Scanner class | |
16 # belonging to the given lang. | |
17 # | |
18 # See PluginHost. | |
19 module Scanners | |
20 extend PluginHost | |
21 plugin_path File.dirname(__FILE__), 'scanners' | |
22 | |
23 | |
24 # = Scanner | |
25 # | |
26 # The base class for all Scanners. | |
27 # | |
28 # It is a subclass of Ruby's great +StringScanner+, which | |
29 # makes it easy to access the scanning methods inside. | |
30 # | |
31 # It is also +Enumerable+, so you can use it like an Array of | |
32 # Tokens: | |
33 # | |
34 # require 'coderay' | |
35 # | |
36 # c_scanner = CodeRay::Scanners[:c].new "if (*p == '{') nest++;" | |
37 # | |
38 # for text, kind in c_scanner | |
39 # puts text if kind == :operator | |
40 # end | |
41 # | |
42 # # prints: (*==)++; | |
43 # | |
44 # OK, this is a very simple example :) | |
45 # You can also use +map+, +any?+, +find+ and even +sort_by+, | |
46 # if you want. | |
47 class Scanner < StringScanner | |
48 | |
49 extend Plugin | |
50 plugin_host Scanners | |
51 | |
52 # Raised if a Scanner fails while scanning | |
53 ScanError = Class.new StandardError | |
54 | |
55 # The default options for all scanner classes. | |
56 # | |
57 # Define @default_options for subclasses. | |
58 DEFAULT_OPTIONS = { } | |
59 | |
60 KINDS_NOT_LOC = [:comment, :doctype, :docstring] | |
61 | |
62 attr_accessor :state | |
63 | |
64 class << self | |
65 | |
66 # Normalizes the given code into a string with UNIX newlines, in the | |
67 # scanner's internal encoding, with invalid and undefined charachters | |
68 # replaced by placeholders. Always returns a new object. | |
69 def normalize code | |
70 # original = code | |
71 code = code.to_s unless code.is_a? ::String | |
72 return code if code.empty? | |
73 | |
74 if code.respond_to? :encoding | |
75 code = encode_with_encoding code, self.encoding | |
76 else | |
77 code = to_unix code | |
78 end | |
79 # code = code.dup if code.eql? original | |
80 code | |
81 end | |
82 | |
83 # The typical filename suffix for this scanner's language. | |
84 def file_extension extension = lang | |
85 @file_extension ||= extension.to_s | |
86 end | |
87 | |
88 # The encoding used internally by this scanner. | |
89 def encoding name = 'UTF-8' | |
90 @encoding ||= defined?(Encoding.find) && Encoding.find(name) | |
91 end | |
92 | |
93 # The lang of this Scanner class, which is equal to its Plugin ID. | |
94 def lang | |
95 @plugin_id | |
96 end | |
97 | |
98 protected | |
99 | |
100 def encode_with_encoding code, target_encoding | |
101 if code.encoding == target_encoding | |
102 if code.valid_encoding? | |
103 return to_unix(code) | |
104 else | |
105 source_encoding = guess_encoding code | |
106 end | |
107 else | |
108 source_encoding = code.encoding | |
109 end | |
110 # print "encode_with_encoding from #{source_encoding} to #{target_encoding}" | |
111 code.encode target_encoding, source_encoding, :universal_newline => true, :undef => :replace, :invalid => :replace | |
112 end | |
113 | |
114 def to_unix code | |
115 code.index(?\r) ? code.gsub(/\r\n?/, "\n") : code | |
116 end | |
117 | |
118 def guess_encoding s | |
119 #:nocov: | |
120 IO.popen("file -b --mime -", "w+") do |file| | |
121 file.write s[0, 1024] | |
122 file.close_write | |
123 begin | |
124 Encoding.find file.gets[/charset=([-\w]+)/, 1] | |
125 rescue ArgumentError | |
126 Encoding::BINARY | |
127 end | |
128 end | |
129 #:nocov: | |
130 end | |
131 | |
132 end | |
133 | |
134 # Create a new Scanner. | |
135 # | |
136 # * +code+ is the input String and is handled by the superclass | |
137 # StringScanner. | |
138 # * +options+ is a Hash with Symbols as keys. | |
139 # It is merged with the default options of the class (you can | |
140 # overwrite default options here.) | |
141 # | |
142 # Else, a Tokens object is used. | |
143 def initialize code = '', options = {} | |
144 if self.class == Scanner | |
145 raise NotImplementedError, "I am only the basic Scanner class. I can't scan anything. :( Use my subclasses." | |
146 end | |
147 | |
148 @options = self.class::DEFAULT_OPTIONS.merge options | |
149 | |
150 super self.class.normalize(code) | |
151 | |
152 @tokens = options[:tokens] || Tokens.new | |
153 @tokens.scanner = self if @tokens.respond_to? :scanner= | |
154 | |
155 setup | |
156 end | |
157 | |
158 # Sets back the scanner. Subclasses should redefine the reset_instance | |
159 # method instead of this one. | |
160 def reset | |
161 super | |
162 reset_instance | |
163 end | |
164 | |
165 # Set a new string to be scanned. | |
166 def string= code | |
167 code = self.class.normalize(code) | |
168 super code | |
169 reset_instance | |
170 end | |
171 | |
172 # the Plugin ID for this scanner | |
173 def lang | |
174 self.class.lang | |
175 end | |
176 | |
177 # the default file extension for this scanner | |
178 def file_extension | |
179 self.class.file_extension | |
180 end | |
181 | |
182 # Scan the code and returns all tokens in a Tokens object. | |
183 def tokenize source = nil, options = {} | |
184 options = @options.merge(options) | |
185 @tokens = options[:tokens] || @tokens || Tokens.new | |
186 @tokens.scanner = self if @tokens.respond_to? :scanner= | |
187 case source | |
188 when Array | |
189 self.string = self.class.normalize(source.join) | |
190 when nil | |
191 reset | |
192 else | |
193 self.string = self.class.normalize(source) | |
194 end | |
195 | |
196 begin | |
197 scan_tokens @tokens, options | |
198 rescue => e | |
199 message = "Error in %s#scan_tokens, initial state was: %p" % [self.class, defined?(state) && state] | |
200 raise_inspect e.message, @tokens, message, 30, e.backtrace | |
201 end | |
202 | |
203 @cached_tokens = @tokens | |
204 if source.is_a? Array | |
205 @tokens.split_into_parts(*source.map { |part| part.size }) | |
206 else | |
207 @tokens | |
208 end | |
209 end | |
210 | |
211 # Cache the result of tokenize. | |
212 def tokens | |
213 @cached_tokens ||= tokenize | |
214 end | |
215 | |
216 # Traverse the tokens. | |
217 def each &block | |
218 tokens.each(&block) | |
219 end | |
220 include Enumerable | |
221 | |
222 # The current line position of the scanner, starting with 1. | |
223 # See also: #column. | |
224 # | |
225 # Beware, this is implemented inefficiently. It should be used | |
226 # for debugging only. | |
227 def line pos = self.pos | |
228 return 1 if pos <= 0 | |
229 binary_string[0...pos].count("\n") + 1 | |
230 end | |
231 | |
232 # The current column position of the scanner, starting with 1. | |
233 # See also: #line. | |
234 def column pos = self.pos | |
235 return 1 if pos <= 0 | |
236 pos - (binary_string.rindex(?\n, pos - 1) || -1) | |
237 end | |
238 | |
239 # The string in binary encoding. | |
240 # | |
241 # To be used with #pos, which is the index of the byte the scanner | |
242 # will scan next. | |
243 def binary_string | |
244 @binary_string ||= | |
245 if string.respond_to?(:bytesize) && string.bytesize != string.size | |
246 #:nocov: | |
247 string.dup.force_encoding('binary') | |
248 #:nocov: | |
249 else | |
250 string | |
251 end | |
252 end | |
253 | |
254 protected | |
255 | |
256 # Can be implemented by subclasses to do some initialization | |
257 # that has to be done once per instance. | |
258 # | |
259 # Use reset for initialization that has to be done once per | |
260 # scan. | |
261 def setup # :doc: | |
262 end | |
263 | |
264 # This is the central method, and commonly the only one a | |
265 # subclass implements. | |
266 # | |
267 # Subclasses must implement this method; it must return +tokens+ | |
268 # and must only use Tokens#<< for storing scanned tokens! | |
269 def scan_tokens tokens, options # :doc: | |
270 raise NotImplementedError, "#{self.class}#scan_tokens not implemented." | |
271 end | |
272 | |
273 # Resets the scanner. | |
274 def reset_instance | |
275 @tokens.clear if @tokens.respond_to?(:clear) && !@options[:keep_tokens] | |
276 @cached_tokens = nil | |
277 @binary_string = nil if defined? @binary_string | |
278 end | |
279 | |
280 # Scanner error with additional status information | |
281 def raise_inspect msg, tokens, state = self.state || 'No state given!', ambit = 30, backtrace = caller | |
282 raise ScanError, <<-EOE % [ | |
283 | |
284 | |
285 ***ERROR in %s: %s (after %d tokens) | |
286 | |
287 tokens: | |
288 %s | |
289 | |
290 current line: %d column: %d pos: %d | |
291 matched: %p state: %p | |
292 bol? = %p, eos? = %p | |
293 | |
294 surrounding code: | |
295 %p ~~ %p | |
296 | |
297 | |
298 ***ERROR*** | |
299 | |
300 EOE | |
301 File.basename(caller[0]), | |
302 msg, | |
303 tokens.respond_to?(:size) ? tokens.size : 0, | |
304 tokens.respond_to?(:last) ? tokens.last(10).map { |t| t.inspect }.join("\n") : '', | |
305 line, column, pos, | |
306 matched, state, bol?, eos?, | |
307 binary_string[pos - ambit, ambit], | |
308 binary_string[pos, ambit], | |
309 ], backtrace | |
310 end | |
311 | |
312 # Shorthand for scan_until(/\z/). | |
313 # This method also avoids a JRuby 1.9 mode bug. | |
314 def scan_rest | |
315 rest = self.rest | |
316 terminate | |
317 rest | |
318 end | |
319 | |
320 end | |
321 | |
322 end | |
323 end |