To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.
root / vendor / gems / coderay-0.9.7 / lib / coderay / scanner.rb @ 442:753f1380d6bc
History | View | Annotate | Download (7.97 KB)
| 1 |
module CodeRay |
|---|---|
| 2 |
|
| 3 |
require 'coderay/helpers/plugin'
|
| 4 |
|
| 5 |
# = Scanners
|
| 6 |
#
|
| 7 |
# This module holds the Scanner class and its subclasses.
|
| 8 |
# For example, the Ruby scanner is named CodeRay::Scanners::Ruby
|
| 9 |
# can be found in coderay/scanners/ruby.
|
| 10 |
#
|
| 11 |
# Scanner also provides methods and constants for the register
|
| 12 |
# mechanism and the [] method that returns the Scanner class
|
| 13 |
# belonging to the given lang.
|
| 14 |
#
|
| 15 |
# See PluginHost.
|
| 16 |
module Scanners |
| 17 |
extend PluginHost
|
| 18 |
plugin_path File.dirname(__FILE__), 'scanners' |
| 19 |
|
| 20 |
require 'strscan'
|
| 21 |
|
| 22 |
# = Scanner
|
| 23 |
#
|
| 24 |
# The base class for all Scanners.
|
| 25 |
#
|
| 26 |
# It is a subclass of Ruby's great +StringScanner+, which
|
| 27 |
# makes it easy to access the scanning methods inside.
|
| 28 |
#
|
| 29 |
# It is also +Enumerable+, so you can use it like an Array of
|
| 30 |
# Tokens:
|
| 31 |
#
|
| 32 |
# require 'coderay'
|
| 33 |
#
|
| 34 |
# c_scanner = CodeRay::Scanners[:c].new "if (*p == '{') nest++;"
|
| 35 |
#
|
| 36 |
# for text, kind in c_scanner
|
| 37 |
# puts text if kind == :operator
|
| 38 |
# end
|
| 39 |
#
|
| 40 |
# # prints: (*==)++;
|
| 41 |
#
|
| 42 |
# OK, this is a very simple example :)
|
| 43 |
# You can also use +map+, +any?+, +find+ and even +sort_by+,
|
| 44 |
# if you want.
|
| 45 |
class Scanner < StringScanner |
| 46 |
|
| 47 |
extend Plugin
|
| 48 |
plugin_host Scanners
|
| 49 |
|
| 50 |
# Raised if a Scanner fails while scanning
|
| 51 |
ScanError = Class.new(Exception) |
| 52 |
|
| 53 |
require 'coderay/helpers/word_list'
|
| 54 |
|
| 55 |
# The default options for all scanner classes.
|
| 56 |
#
|
| 57 |
# Define @default_options for subclasses.
|
| 58 |
DEFAULT_OPTIONS = { :stream => false } |
| 59 |
|
| 60 |
KINDS_NOT_LOC = [:comment, :doctype] |
| 61 |
|
| 62 |
class << self |
| 63 |
|
| 64 |
# Returns if the Scanner can be used in streaming mode.
|
| 65 |
def streamable? |
| 66 |
is_a? Streamable
|
| 67 |
end
|
| 68 |
|
| 69 |
def normify code |
| 70 |
code = code.to_s |
| 71 |
if code.respond_to?(:encoding) && (code.encoding.name != 'UTF-8' || !code.valid_encoding?) |
| 72 |
code = code.dup |
| 73 |
original_encoding = code.encoding |
| 74 |
code.force_encoding 'Windows-1252'
|
| 75 |
unless code.valid_encoding?
|
| 76 |
code.force_encoding original_encoding |
| 77 |
if code.encoding.name == 'UTF-8' |
| 78 |
code.encode! 'UTF-16BE', :invalid => :replace, :undef => :replace, :replace => '?' |
| 79 |
end
|
| 80 |
code.encode! 'UTF-8', :invalid => :replace, :undef => :replace, :replace => '?' |
| 81 |
end
|
| 82 |
end
|
| 83 |
code.to_unix |
| 84 |
end
|
| 85 |
|
| 86 |
def file_extension extension = nil |
| 87 |
if extension
|
| 88 |
@file_extension = extension.to_s
|
| 89 |
else
|
| 90 |
@file_extension ||= plugin_id.to_s
|
| 91 |
end
|
| 92 |
end
|
| 93 |
|
| 94 |
end
|
| 95 |
|
| 96 |
=begin
|
| 97 |
## Excluded for speed reasons; protected seems to make methods slow.
|
| 98 |
|
| 99 |
# Save the StringScanner methods from being called.
|
| 100 |
# This would not be useful for highlighting.
|
| 101 |
strscan_public_methods =
|
| 102 |
StringScanner.instance_methods -
|
| 103 |
StringScanner.ancestors[1].instance_methods
|
| 104 |
protected(*strscan_public_methods)
|
| 105 |
=end
|
| 106 |
|
| 107 |
# Create a new Scanner.
|
| 108 |
#
|
| 109 |
# * +code+ is the input String and is handled by the superclass
|
| 110 |
# StringScanner.
|
| 111 |
# * +options+ is a Hash with Symbols as keys.
|
| 112 |
# It is merged with the default options of the class (you can
|
| 113 |
# overwrite default options here.)
|
| 114 |
# * +block+ is the callback for streamed highlighting.
|
| 115 |
#
|
| 116 |
# If you set :stream to +true+ in the options, the Scanner uses a
|
| 117 |
# TokenStream with the +block+ as callback to handle the tokens.
|
| 118 |
#
|
| 119 |
# Else, a Tokens object is used.
|
| 120 |
def initialize code='', options = {}, &block |
| 121 |
raise "I am only the basic Scanner class. I can't scan "\
|
| 122 |
"anything. :( Use my subclasses." if self.class == Scanner |
| 123 |
|
| 124 |
@options = self.class::DEFAULT_OPTIONS.merge options |
| 125 |
|
| 126 |
super Scanner.normify(code) |
| 127 |
|
| 128 |
@tokens = options[:tokens] |
| 129 |
if @options[:stream] |
| 130 |
warn "warning in CodeRay::Scanner.new: :stream is set, "\
|
| 131 |
"but no block was given" unless block_given? |
| 132 |
raise NotStreamableError, self unless kind_of? Streamable |
| 133 |
@tokens ||= TokenStream.new(&block) |
| 134 |
else
|
| 135 |
warn "warning in CodeRay::Scanner.new: Block given, "\
|
| 136 |
"but :stream is #{@options[:stream]}" if block_given? |
| 137 |
@tokens ||= Tokens.new |
| 138 |
end
|
| 139 |
@tokens.scanner = self |
| 140 |
|
| 141 |
setup |
| 142 |
end
|
| 143 |
|
| 144 |
def reset |
| 145 |
super
|
| 146 |
reset_instance |
| 147 |
end
|
| 148 |
|
| 149 |
def string= code |
| 150 |
code = Scanner.normify(code)
|
| 151 |
if defined?(RUBY_DESCRIPTION) && RUBY_DESCRIPTION['rubinius 1.0.1'] |
| 152 |
reset_state |
| 153 |
@string = code
|
| 154 |
else
|
| 155 |
super code
|
| 156 |
end
|
| 157 |
reset_instance |
| 158 |
end
|
| 159 |
|
| 160 |
# More mnemonic accessor name for the input string.
|
| 161 |
alias code string |
| 162 |
alias code= string= |
| 163 |
|
| 164 |
# Returns the Plugin ID for this scanner.
|
| 165 |
def lang |
| 166 |
self.class.plugin_id
|
| 167 |
end
|
| 168 |
|
| 169 |
# Scans the code and returns all tokens in a Tokens object.
|
| 170 |
def tokenize new_string=nil, options = {} |
| 171 |
options = @options.merge(options)
|
| 172 |
self.string = new_string if new_string |
| 173 |
@cached_tokens =
|
| 174 |
if @options[:stream] # :stream must have been set already |
| 175 |
reset unless new_string
|
| 176 |
scan_tokens @tokens, options
|
| 177 |
@tokens
|
| 178 |
else
|
| 179 |
scan_tokens @tokens, options
|
| 180 |
end
|
| 181 |
end
|
| 182 |
|
| 183 |
def tokens |
| 184 |
@cached_tokens ||= tokenize
|
| 185 |
end
|
| 186 |
|
| 187 |
# Whether the scanner is in streaming mode.
|
| 188 |
def streaming? |
| 189 |
!!@options[:stream] |
| 190 |
end
|
| 191 |
|
| 192 |
# Traverses the tokens.
|
| 193 |
def each &block |
| 194 |
raise ArgumentError,
|
| 195 |
'Cannot traverse TokenStream.' if @options[:stream] |
| 196 |
tokens.each(&block) |
| 197 |
end
|
| 198 |
include Enumerable
|
| 199 |
|
| 200 |
# The current line position of the scanner.
|
| 201 |
#
|
| 202 |
# Beware, this is implemented inefficiently. It should be used
|
| 203 |
# for debugging only.
|
| 204 |
def line |
| 205 |
string[0..pos].count("\n") + 1 |
| 206 |
end
|
| 207 |
|
| 208 |
def column pos = self.pos |
| 209 |
return 0 if pos <= 0 |
| 210 |
string = string() |
| 211 |
if string.respond_to?(:bytesize) && (defined?(@bin_string) || string.bytesize != string.size) |
| 212 |
@bin_string ||= string.dup.force_encoding('binary') |
| 213 |
string = @bin_string
|
| 214 |
end
|
| 215 |
pos - (string.rindex(?\n, pos) || 0) |
| 216 |
end
|
| 217 |
|
| 218 |
def marshal_dump |
| 219 |
@options
|
| 220 |
end
|
| 221 |
|
| 222 |
def marshal_load options |
| 223 |
@options = options
|
| 224 |
end
|
| 225 |
|
| 226 |
protected |
| 227 |
|
| 228 |
# Can be implemented by subclasses to do some initialization
|
| 229 |
# that has to be done once per instance.
|
| 230 |
#
|
| 231 |
# Use reset for initialization that has to be done once per
|
| 232 |
# scan.
|
| 233 |
def setup |
| 234 |
end
|
| 235 |
|
| 236 |
# This is the central method, and commonly the only one a
|
| 237 |
# subclass implements.
|
| 238 |
#
|
| 239 |
# Subclasses must implement this method; it must return +tokens+
|
| 240 |
# and must only use Tokens#<< for storing scanned tokens!
|
| 241 |
def scan_tokens tokens, options |
| 242 |
raise NotImplementedError,
|
| 243 |
"#{self.class}#scan_tokens not implemented."
|
| 244 |
end
|
| 245 |
|
| 246 |
def reset_instance |
| 247 |
@tokens.clear unless @options[:keep_tokens] |
| 248 |
@cached_tokens = nil |
| 249 |
@bin_string = nil if defined? @bin_string |
| 250 |
end
|
| 251 |
|
| 252 |
# Scanner error with additional status information
|
| 253 |
def raise_inspect msg, tokens, state = 'No state given!', ambit = 30 |
| 254 |
raise ScanError, <<-EOE % [ |
| 255 |
|
| 256 |
|
| 257 |
***ERROR in %s: %s (after %d tokens)
|
| 258 |
|
| 259 |
tokens:
|
| 260 |
%s
|
| 261 |
|
| 262 |
current line: %d column: %d pos: %d
|
| 263 |
matched: %p state: %p
|
| 264 |
bol? = %p, eos? = %p
|
| 265 |
|
| 266 |
surrounding code:
|
| 267 |
%p ~~ %p
|
| 268 |
|
| 269 |
|
| 270 |
***ERROR***
|
| 271 |
|
| 272 |
EOE
|
| 273 |
File.basename(caller[0]), |
| 274 |
msg, |
| 275 |
tokens.size, |
| 276 |
tokens.last(10).map { |t| t.inspect }.join("\n"), |
| 277 |
line, column, pos, |
| 278 |
matched, state, bol?, eos?, |
| 279 |
string[pos - ambit, ambit], |
| 280 |
string[pos, ambit], |
| 281 |
] |
| 282 |
end
|
| 283 |
|
| 284 |
end
|
| 285 |
|
| 286 |
end
|
| 287 |
end
|
| 288 |
|
| 289 |
class String |
| 290 |
# I love this hack. It seems to silence all dos/unix/mac newline problems.
|
| 291 |
def to_unix |
| 292 |
if index ?\r |
| 293 |
gsub(/\r\n?/, "\n") |
| 294 |
else
|
| 295 |
self
|
| 296 |
end
|
| 297 |
end
|
| 298 |
end
|