annotate vendor/plugins/coderay-0.9.2/lib/coderay/.svn/text-base/scanner.rb.svn-base @ 877:e97cef3bd5d0 bug_70

Close obsolete branch bug_70
author Chris Cannam
date Wed, 30 Mar 2011 10:48:32 +0100
parents 513646585e45
children
rev   line source
Chris@0 1 module CodeRay
Chris@0 2
Chris@0 3 require 'coderay/helpers/plugin'
Chris@0 4
Chris@0 5 # = Scanners
Chris@0 6 #
Chris@0 7 # This module holds the Scanner class and its subclasses.
Chris@0 8 # For example, the Ruby scanner is named CodeRay::Scanners::Ruby
Chris@0 9 # can be found in coderay/scanners/ruby.
Chris@0 10 #
Chris@0 11 # Scanner also provides methods and constants for the register
Chris@0 12 # mechanism and the [] method that returns the Scanner class
Chris@0 13 # belonging to the given lang.
Chris@0 14 #
Chris@0 15 # See PluginHost.
Chris@0 16 module Scanners
Chris@0 17 extend PluginHost
Chris@0 18 plugin_path File.dirname(__FILE__), 'scanners'
Chris@0 19
Chris@0 20 require 'strscan'
Chris@0 21
Chris@0 22 # = Scanner
Chris@0 23 #
Chris@0 24 # The base class for all Scanners.
Chris@0 25 #
Chris@0 26 # It is a subclass of Ruby's great +StringScanner+, which
Chris@0 27 # makes it easy to access the scanning methods inside.
Chris@0 28 #
Chris@0 29 # It is also +Enumerable+, so you can use it like an Array of
Chris@0 30 # Tokens:
Chris@0 31 #
Chris@0 32 # require 'coderay'
Chris@0 33 #
Chris@0 34 # c_scanner = CodeRay::Scanners[:c].new "if (*p == '{') nest++;"
Chris@0 35 #
Chris@0 36 # for text, kind in c_scanner
Chris@0 37 # puts text if kind == :operator
Chris@0 38 # end
Chris@0 39 #
Chris@0 40 # # prints: (*==)++;
Chris@0 41 #
Chris@0 42 # OK, this is a very simple example :)
Chris@0 43 # You can also use +map+, +any?+, +find+ and even +sort_by+,
Chris@0 44 # if you want.
Chris@0 45 class Scanner < StringScanner
Chris@0 46
Chris@0 47 extend Plugin
Chris@0 48 plugin_host Scanners
Chris@0 49
Chris@0 50 # Raised if a Scanner fails while scanning
Chris@0 51 ScanError = Class.new(Exception)
Chris@0 52
Chris@0 53 require 'coderay/helpers/word_list'
Chris@0 54
Chris@0 55 # The default options for all scanner classes.
Chris@0 56 #
Chris@0 57 # Define @default_options for subclasses.
Chris@0 58 DEFAULT_OPTIONS = { :stream => false }
Chris@0 59
Chris@0 60 KINDS_NOT_LOC = [:comment, :doctype]
Chris@0 61
Chris@0 62 class << self
Chris@0 63
Chris@0 64 # Returns if the Scanner can be used in streaming mode.
Chris@0 65 def streamable?
Chris@0 66 is_a? Streamable
Chris@0 67 end
Chris@0 68
Chris@0 69 def normify code
Chris@0 70 code = code.to_s
Chris@0 71 if code.respond_to? :force_encoding
Chris@0 72 debug, $DEBUG = $DEBUG, false
Chris@0 73 begin
Chris@0 74 code.force_encoding 'utf-8'
Chris@0 75 code[/\z/] # raises an ArgumentError when code contains a non-UTF-8 char
Chris@0 76 rescue ArgumentError
Chris@0 77 code.force_encoding 'binary'
Chris@0 78 ensure
Chris@0 79 $DEBUG = debug
Chris@0 80 end
Chris@0 81 end
Chris@0 82 code.to_unix
Chris@0 83 end
Chris@0 84
Chris@0 85 def file_extension extension = nil
Chris@0 86 if extension
Chris@0 87 @file_extension = extension.to_s
Chris@0 88 else
Chris@0 89 @file_extension ||= plugin_id.to_s
Chris@0 90 end
Chris@0 91 end
Chris@0 92
Chris@0 93 end
Chris@0 94
Chris@0 95 =begin
Chris@0 96 ## Excluded for speed reasons; protected seems to make methods slow.
Chris@0 97
Chris@0 98 # Save the StringScanner methods from being called.
Chris@0 99 # This would not be useful for highlighting.
Chris@0 100 strscan_public_methods =
Chris@0 101 StringScanner.instance_methods -
Chris@0 102 StringScanner.ancestors[1].instance_methods
Chris@0 103 protected(*strscan_public_methods)
Chris@0 104 =end
Chris@0 105
Chris@0 106 # Create a new Scanner.
Chris@0 107 #
Chris@0 108 # * +code+ is the input String and is handled by the superclass
Chris@0 109 # StringScanner.
Chris@0 110 # * +options+ is a Hash with Symbols as keys.
Chris@0 111 # It is merged with the default options of the class (you can
Chris@0 112 # overwrite default options here.)
Chris@0 113 # * +block+ is the callback for streamed highlighting.
Chris@0 114 #
Chris@0 115 # If you set :stream to +true+ in the options, the Scanner uses a
Chris@0 116 # TokenStream with the +block+ as callback to handle the tokens.
Chris@0 117 #
Chris@0 118 # Else, a Tokens object is used.
Chris@0 119 def initialize code='', options = {}, &block
Chris@0 120 raise "I am only the basic Scanner class. I can't scan "\
Chris@0 121 "anything. :( Use my subclasses." if self.class == Scanner
Chris@0 122
Chris@0 123 @options = self.class::DEFAULT_OPTIONS.merge options
Chris@0 124
Chris@0 125 super Scanner.normify(code)
Chris@0 126
Chris@0 127 @tokens = options[:tokens]
Chris@0 128 if @options[:stream]
Chris@0 129 warn "warning in CodeRay::Scanner.new: :stream is set, "\
Chris@0 130 "but no block was given" unless block_given?
Chris@0 131 raise NotStreamableError, self unless kind_of? Streamable
Chris@0 132 @tokens ||= TokenStream.new(&block)
Chris@0 133 else
Chris@0 134 warn "warning in CodeRay::Scanner.new: Block given, "\
Chris@0 135 "but :stream is #{@options[:stream]}" if block_given?
Chris@0 136 @tokens ||= Tokens.new
Chris@0 137 end
Chris@0 138 @tokens.scanner = self
Chris@0 139
Chris@0 140 setup
Chris@0 141 end
Chris@0 142
Chris@0 143 def reset
Chris@0 144 super
Chris@0 145 reset_instance
Chris@0 146 end
Chris@0 147
Chris@0 148 def string= code
Chris@0 149 code = Scanner.normify(code)
Chris@0 150 super code
Chris@0 151 reset_instance
Chris@0 152 end
Chris@0 153
Chris@0 154 # More mnemonic accessor name for the input string.
Chris@0 155 alias code string
Chris@0 156 alias code= string=
Chris@0 157
Chris@0 158 # Returns the Plugin ID for this scanner.
Chris@0 159 def lang
Chris@0 160 self.class.plugin_id
Chris@0 161 end
Chris@0 162
Chris@0 163 # Scans the code and returns all tokens in a Tokens object.
Chris@0 164 def tokenize new_string=nil, options = {}
Chris@0 165 options = @options.merge(options)
Chris@0 166 self.string = new_string if new_string
Chris@0 167 @cached_tokens =
Chris@0 168 if @options[:stream] # :stream must have been set already
Chris@0 169 reset unless new_string
Chris@0 170 scan_tokens @tokens, options
Chris@0 171 @tokens
Chris@0 172 else
Chris@0 173 scan_tokens @tokens, options
Chris@0 174 end
Chris@0 175 end
Chris@0 176
Chris@0 177 def tokens
Chris@0 178 @cached_tokens ||= tokenize
Chris@0 179 end
Chris@0 180
Chris@0 181 # Whether the scanner is in streaming mode.
Chris@0 182 def streaming?
Chris@0 183 !!@options[:stream]
Chris@0 184 end
Chris@0 185
Chris@0 186 # Traverses the tokens.
Chris@0 187 def each &block
Chris@0 188 raise ArgumentError,
Chris@0 189 'Cannot traverse TokenStream.' if @options[:stream]
Chris@0 190 tokens.each(&block)
Chris@0 191 end
Chris@0 192 include Enumerable
Chris@0 193
Chris@0 194 # The current line position of the scanner.
Chris@0 195 #
Chris@0 196 # Beware, this is implemented inefficiently. It should be used
Chris@0 197 # for debugging only.
Chris@0 198 def line
Chris@0 199 string[0..pos].count("\n") + 1
Chris@0 200 end
Chris@0 201
Chris@0 202 def column pos = self.pos
Chris@0 203 return 0 if pos <= 0
Chris@0 204 string = string()
Chris@0 205 if string.respond_to?(:bytesize) && (defined?(@bin_string) || string.bytesize != string.size)
Chris@0 206 @bin_string ||= string.dup.force_encoding('binary')
Chris@0 207 string = @bin_string
Chris@0 208 end
Chris@0 209 pos - (string.rindex(?\n, pos) || 0)
Chris@0 210 end
Chris@0 211
Chris@0 212 def marshal_dump
Chris@0 213 @options
Chris@0 214 end
Chris@0 215
Chris@0 216 def marshal_load options
Chris@0 217 @options = options
Chris@0 218 end
Chris@0 219
Chris@0 220 protected
Chris@0 221
Chris@0 222 # Can be implemented by subclasses to do some initialization
Chris@0 223 # that has to be done once per instance.
Chris@0 224 #
Chris@0 225 # Use reset for initialization that has to be done once per
Chris@0 226 # scan.
Chris@0 227 def setup
Chris@0 228 end
Chris@0 229
Chris@0 230 # This is the central method, and commonly the only one a
Chris@0 231 # subclass implements.
Chris@0 232 #
Chris@0 233 # Subclasses must implement this method; it must return +tokens+
Chris@0 234 # and must only use Tokens#<< for storing scanned tokens!
Chris@0 235 def scan_tokens tokens, options
Chris@0 236 raise NotImplementedError,
Chris@0 237 "#{self.class}#scan_tokens not implemented."
Chris@0 238 end
Chris@0 239
Chris@0 240 def reset_instance
Chris@0 241 @tokens.clear unless @options[:keep_tokens]
Chris@0 242 @cached_tokens = nil
Chris@0 243 @bin_string = nil if defined? @bin_string
Chris@0 244 end
Chris@0 245
Chris@0 246 # Scanner error with additional status information
Chris@0 247 def raise_inspect msg, tokens, state = 'No state given!', ambit = 30
Chris@0 248 raise ScanError, <<-EOE % [
Chris@0 249
Chris@0 250
Chris@0 251 ***ERROR in %s: %s (after %d tokens)
Chris@0 252
Chris@0 253 tokens:
Chris@0 254 %s
Chris@0 255
Chris@0 256 current line: %d column: %d pos: %d
Chris@0 257 matched: %p state: %p
Chris@0 258 bol? = %p, eos? = %p
Chris@0 259
Chris@0 260 surrounding code:
Chris@0 261 %p ~~ %p
Chris@0 262
Chris@0 263
Chris@0 264 ***ERROR***
Chris@0 265
Chris@0 266 EOE
Chris@0 267 File.basename(caller[0]),
Chris@0 268 msg,
Chris@0 269 tokens.size,
Chris@0 270 tokens.last(10).map { |t| t.inspect }.join("\n"),
Chris@0 271 line, column, pos,
Chris@0 272 matched, state, bol?, eos?,
Chris@0 273 string[pos - ambit, ambit],
Chris@0 274 string[pos, ambit],
Chris@0 275 ]
Chris@0 276 end
Chris@0 277
Chris@0 278 end
Chris@0 279
Chris@0 280 end
Chris@0 281 end
Chris@0 282
Chris@0 283 class String
Chris@0 284 # I love this hack. It seems to silence all dos/unix/mac newline problems.
Chris@0 285 def to_unix
Chris@0 286 if index ?\r
Chris@0 287 gsub(/\r\n?/, "\n")
Chris@0 288 else
Chris@0 289 self
Chris@0 290 end
Chris@0 291 end
Chris@0 292 end