annotate vendor/gems/coderay-1.0.0/lib/coderay/scanners/ruby.rb @ 1171:b4558bc5837f bug_505

Close obsolete branch bug_505
author Chris Cannam
date Fri, 03 Aug 2012 19:40:23 +0100
parents cbb26bc654de
children
rev   line source
Chris@909 1 module CodeRay
Chris@909 2 module Scanners
Chris@909 3
Chris@909 4 # This scanner is really complex, since Ruby _is_ a complex language!
Chris@909 5 #
Chris@909 6 # It tries to highlight 100% of all common code,
Chris@909 7 # and 90% of strange codes.
Chris@909 8 #
Chris@909 9 # It is optimized for HTML highlighting, and is not very useful for
Chris@909 10 # parsing or pretty printing.
Chris@909 11 class Ruby < Scanner
Chris@909 12
Chris@909 13 register_for :ruby
Chris@909 14 file_extension 'rb'
Chris@909 15
Chris@909 16 autoload :Patterns, 'coderay/scanners/ruby/patterns'
Chris@909 17 autoload :StringState, 'coderay/scanners/ruby/string_state'
Chris@909 18
Chris@909 19 def interpreted_string_state
Chris@909 20 StringState.new :string, true, '"'
Chris@909 21 end
Chris@909 22
Chris@909 23 protected
Chris@909 24
Chris@909 25 def setup
Chris@909 26 @state = :initial
Chris@909 27 end
Chris@909 28
Chris@909 29 def scan_tokens encoder, options
Chris@909 30 state, heredocs = options[:state] || @state
Chris@909 31 heredocs = heredocs.dup if heredocs.is_a?(Array)
Chris@909 32
Chris@909 33 if state && state.instance_of?(StringState)
Chris@909 34 encoder.begin_group state.type
Chris@909 35 end
Chris@909 36
Chris@909 37 last_state = nil
Chris@909 38
Chris@909 39 method_call_expected = false
Chris@909 40 value_expected = true
Chris@909 41
Chris@909 42 inline_block_stack = nil
Chris@909 43 inline_block_curly_depth = 0
Chris@909 44
Chris@909 45 if heredocs
Chris@909 46 state = heredocs.shift
Chris@909 47 encoder.begin_group state.type
Chris@909 48 heredocs = nil if heredocs.empty?
Chris@909 49 end
Chris@909 50
Chris@909 51 # def_object_stack = nil
Chris@909 52 # def_object_paren_depth = 0
Chris@909 53
Chris@909 54 patterns = Patterns # avoid constant lookup
Chris@909 55
Chris@909 56 unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
Chris@909 57
Chris@909 58 until eos?
Chris@909 59
Chris@909 60 if state.instance_of? ::Symbol
Chris@909 61
Chris@909 62 if match = scan(/[ \t\f\v]+/)
Chris@909 63 encoder.text_token match, :space
Chris@909 64
Chris@909 65 elsif match = scan(/\n/)
Chris@909 66 if heredocs
Chris@909 67 unscan # heredoc scanning needs \n at start
Chris@909 68 state = heredocs.shift
Chris@909 69 encoder.begin_group state.type
Chris@909 70 heredocs = nil if heredocs.empty?
Chris@909 71 else
Chris@909 72 state = :initial if state == :undef_comma_expected
Chris@909 73 encoder.text_token match, :space
Chris@909 74 value_expected = true
Chris@909 75 end
Chris@909 76
Chris@909 77 elsif match = scan(bol? ? / \#(!)?.* | #{patterns::RUBYDOC_OR_DATA} /ox : /\#.*/)
Chris@909 78 encoder.text_token match, self[1] ? :doctype : :comment
Chris@909 79
Chris@909 80 elsif match = scan(/\\\n/)
Chris@909 81 if heredocs
Chris@909 82 unscan # heredoc scanning needs \n at start
Chris@909 83 encoder.text_token scan(/\\/), :space
Chris@909 84 state = heredocs.shift
Chris@909 85 encoder.begin_group state.type
Chris@909 86 heredocs = nil if heredocs.empty?
Chris@909 87 else
Chris@909 88 encoder.text_token match, :space
Chris@909 89 end
Chris@909 90
Chris@909 91 elsif state == :initial
Chris@909 92
Chris@909 93 # IDENTS #
Chris@909 94 if !method_call_expected &&
Chris@909 95 match = scan(unicode ? /#{patterns::METHOD_NAME}/uo :
Chris@909 96 /#{patterns::METHOD_NAME}/o)
Chris@909 97 value_expected = false
Chris@909 98 kind = patterns::IDENT_KIND[match]
Chris@909 99 if kind == :ident
Chris@909 100 if match[/\A[A-Z]/] && !(match[/[!?]$/] || match?(/\(/))
Chris@909 101 kind = :constant
Chris@909 102 end
Chris@909 103 elsif kind == :keyword
Chris@909 104 state = patterns::KEYWORD_NEW_STATE[match]
Chris@909 105 value_expected = true if patterns::KEYWORDS_EXPECTING_VALUE[match]
Chris@909 106 end
Chris@909 107 value_expected = true if !value_expected && check(/#{patterns::VALUE_FOLLOWS}/o)
Chris@909 108 encoder.text_token match, kind
Chris@909 109
Chris@909 110 elsif method_call_expected &&
Chris@909 111 match = scan(unicode ? /#{patterns::METHOD_AFTER_DOT}/uo :
Chris@909 112 /#{patterns::METHOD_AFTER_DOT}/o)
Chris@909 113 if method_call_expected == '::' && match[/\A[A-Z]/] && !match?(/\(/)
Chris@909 114 encoder.text_token match, :constant
Chris@909 115 else
Chris@909 116 encoder.text_token match, :ident
Chris@909 117 end
Chris@909 118 method_call_expected = false
Chris@909 119 value_expected = check(/#{patterns::VALUE_FOLLOWS}/o)
Chris@909 120
Chris@909 121 # OPERATORS #
Chris@909 122 elsif !method_call_expected && match = scan(/ (\.(?!\.)|::) | (?: \.\.\.? | ==?=? | [,\(\[\{] )() | [\)\]\}] /x)
Chris@909 123 method_call_expected = self[1]
Chris@909 124 value_expected = !method_call_expected && self[2]
Chris@909 125 if inline_block_stack
Chris@909 126 case match
Chris@909 127 when '{'
Chris@909 128 inline_block_curly_depth += 1
Chris@909 129 when '}'
Chris@909 130 inline_block_curly_depth -= 1
Chris@909 131 if inline_block_curly_depth == 0 # closing brace of inline block reached
Chris@909 132 state, inline_block_curly_depth, heredocs = inline_block_stack.pop
Chris@909 133 inline_block_stack = nil if inline_block_stack.empty?
Chris@909 134 heredocs = nil if heredocs && heredocs.empty?
Chris@909 135 encoder.text_token match, :inline_delimiter
Chris@909 136 encoder.end_group :inline
Chris@909 137 next
Chris@909 138 end
Chris@909 139 end
Chris@909 140 end
Chris@909 141 encoder.text_token match, :operator
Chris@909 142
Chris@909 143 elsif match = scan(unicode ? /#{patterns::SYMBOL}/uo :
Chris@909 144 /#{patterns::SYMBOL}/o)
Chris@909 145 case delim = match[1]
Chris@909 146 when ?', ?"
Chris@909 147 encoder.begin_group :symbol
Chris@909 148 encoder.text_token ':', :symbol
Chris@909 149 match = delim.chr
Chris@909 150 encoder.text_token match, :delimiter
Chris@909 151 state = self.class::StringState.new :symbol, delim == ?", match
Chris@909 152 else
Chris@909 153 encoder.text_token match, :symbol
Chris@909 154 value_expected = false
Chris@909 155 end
Chris@909 156
Chris@909 157 elsif match = scan(/ ' (?:(?>[^'\\]*) ')? | " (?:(?>[^"\\\#]*) ")? /mx)
Chris@909 158 encoder.begin_group :string
Chris@909 159 if match.size == 1
Chris@909 160 encoder.text_token match, :delimiter
Chris@909 161 state = self.class::StringState.new :string, match == '"', match # important for streaming
Chris@909 162 else
Chris@909 163 encoder.text_token match[0,1], :delimiter
Chris@909 164 encoder.text_token match[1..-2], :content if match.size > 2
Chris@909 165 encoder.text_token match[-1,1], :delimiter
Chris@909 166 encoder.end_group :string
Chris@909 167 value_expected = false
Chris@909 168 end
Chris@909 169
Chris@909 170 elsif match = scan(unicode ? /#{patterns::INSTANCE_VARIABLE}/uo :
Chris@909 171 /#{patterns::INSTANCE_VARIABLE}/o)
Chris@909 172 value_expected = false
Chris@909 173 encoder.text_token match, :instance_variable
Chris@909 174
Chris@909 175 elsif value_expected && match = scan(/\//)
Chris@909 176 encoder.begin_group :regexp
Chris@909 177 encoder.text_token match, :delimiter
Chris@909 178 state = self.class::StringState.new :regexp, true, '/'
Chris@909 179
Chris@909 180 elsif match = scan(value_expected ? /[-+]?#{patterns::NUMERIC}/o : /#{patterns::NUMERIC}/o)
Chris@909 181 if method_call_expected
Chris@909 182 encoder.text_token match, :error
Chris@909 183 method_call_expected = false
Chris@909 184 else
Chris@909 185 encoder.text_token match, self[1] ? :float : :integer # TODO: send :hex/:octal/:binary
Chris@909 186 end
Chris@909 187 value_expected = false
Chris@909 188
Chris@909 189 elsif match = scan(/ [-+!~^\/]=? | [:;] | [*|&]{1,2}=? | >>? /x)
Chris@909 190 value_expected = true
Chris@909 191 encoder.text_token match, :operator
Chris@909 192
Chris@909 193 elsif value_expected && match = scan(/#{patterns::HEREDOC_OPEN}/o)
Chris@909 194 quote = self[3]
Chris@909 195 delim = self[quote ? 4 : 2]
Chris@909 196 kind = patterns::QUOTE_TO_TYPE[quote]
Chris@909 197 encoder.begin_group kind
Chris@909 198 encoder.text_token match, :delimiter
Chris@909 199 encoder.end_group kind
Chris@909 200 heredocs ||= [] # create heredocs if empty
Chris@909 201 heredocs << self.class::StringState.new(kind, quote != "'", delim,
Chris@909 202 self[1] == '-' ? :indented : :linestart)
Chris@909 203 value_expected = false
Chris@909 204
Chris@909 205 elsif value_expected && match = scan(/#{patterns::FANCY_STRING_START}/o)
Chris@909 206 kind = patterns::FANCY_STRING_KIND[self[1]]
Chris@909 207 encoder.begin_group kind
Chris@909 208 state = self.class::StringState.new kind, patterns::FANCY_STRING_INTERPRETED[self[1]], self[2]
Chris@909 209 encoder.text_token match, :delimiter
Chris@909 210
Chris@909 211 elsif value_expected && match = scan(/#{patterns::CHARACTER}/o)
Chris@909 212 value_expected = false
Chris@909 213 encoder.text_token match, :integer
Chris@909 214
Chris@909 215 elsif match = scan(/ %=? | <(?:<|=>?)? | \? /x)
Chris@909 216 value_expected = true
Chris@909 217 encoder.text_token match, :operator
Chris@909 218
Chris@909 219 elsif match = scan(/`/)
Chris@909 220 encoder.begin_group :shell
Chris@909 221 encoder.text_token match, :delimiter
Chris@909 222 state = self.class::StringState.new :shell, true, match
Chris@909 223
Chris@909 224 elsif match = scan(unicode ? /#{patterns::GLOBAL_VARIABLE}/uo :
Chris@909 225 /#{patterns::GLOBAL_VARIABLE}/o)
Chris@909 226 encoder.text_token match, :global_variable
Chris@909 227 value_expected = false
Chris@909 228
Chris@909 229 elsif match = scan(unicode ? /#{patterns::CLASS_VARIABLE}/uo :
Chris@909 230 /#{patterns::CLASS_VARIABLE}/o)
Chris@909 231 encoder.text_token match, :class_variable
Chris@909 232 value_expected = false
Chris@909 233
Chris@909 234 elsif match = scan(/\\\z/)
Chris@909 235 encoder.text_token match, :space
Chris@909 236
Chris@909 237 else
Chris@909 238 if method_call_expected
Chris@909 239 method_call_expected = false
Chris@909 240 next
Chris@909 241 end
Chris@909 242 unless unicode
Chris@909 243 # check for unicode
Chris@909 244 $DEBUG_BEFORE, $DEBUG = $DEBUG, false
Chris@909 245 begin
Chris@909 246 if check(/./mu).size > 1
Chris@909 247 # seems like we should try again with unicode
Chris@909 248 unicode = true
Chris@909 249 end
Chris@909 250 rescue
Chris@909 251 # bad unicode char; use getch
Chris@909 252 ensure
Chris@909 253 $DEBUG = $DEBUG_BEFORE
Chris@909 254 end
Chris@909 255 next if unicode
Chris@909 256 end
Chris@909 257
Chris@909 258 encoder.text_token getch, :error
Chris@909 259
Chris@909 260 end
Chris@909 261
Chris@909 262 if last_state
Chris@909 263 state = last_state
Chris@909 264 last_state = nil
Chris@909 265 end
Chris@909 266
Chris@909 267 elsif state == :def_expected
Chris@909 268 if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
Chris@909 269 /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
Chris@909 270 encoder.text_token match, :method
Chris@909 271 state = :initial
Chris@909 272 else
Chris@909 273 last_state = :dot_expected
Chris@909 274 state = :initial
Chris@909 275 end
Chris@909 276
Chris@909 277 elsif state == :dot_expected
Chris@909 278 if match = scan(/\.|::/)
Chris@909 279 # invalid definition
Chris@909 280 state = :def_expected
Chris@909 281 encoder.text_token match, :operator
Chris@909 282 else
Chris@909 283 state = :initial
Chris@909 284 end
Chris@909 285
Chris@909 286 elsif state == :module_expected
Chris@909 287 if match = scan(/<</)
Chris@909 288 encoder.text_token match, :operator
Chris@909 289 else
Chris@909 290 state = :initial
Chris@909 291 if match = scan(unicode ? / (?:#{patterns::IDENT}::)* #{patterns::IDENT} /oux :
Chris@909 292 / (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox)
Chris@909 293 encoder.text_token match, :class
Chris@909 294 end
Chris@909 295 end
Chris@909 296
Chris@909 297 elsif state == :undef_expected
Chris@909 298 state = :undef_comma_expected
Chris@909 299 if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
Chris@909 300 /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
Chris@909 301 encoder.text_token match, :method
Chris@909 302 elsif match = scan(/#{patterns::SYMBOL}/o)
Chris@909 303 case delim = match[1]
Chris@909 304 when ?', ?"
Chris@909 305 encoder.begin_group :symbol
Chris@909 306 encoder.text_token ':', :symbol
Chris@909 307 match = delim.chr
Chris@909 308 encoder.text_token match, :delimiter
Chris@909 309 state = self.class::StringState.new :symbol, delim == ?", match
Chris@909 310 state.next_state = :undef_comma_expected
Chris@909 311 else
Chris@909 312 encoder.text_token match, :symbol
Chris@909 313 end
Chris@909 314 else
Chris@909 315 state = :initial
Chris@909 316 end
Chris@909 317
Chris@909 318 elsif state == :undef_comma_expected
Chris@909 319 if match = scan(/,/)
Chris@909 320 encoder.text_token match, :operator
Chris@909 321 state = :undef_expected
Chris@909 322 else
Chris@909 323 state = :initial
Chris@909 324 end
Chris@909 325
Chris@909 326 elsif state == :alias_expected
Chris@909 327 match = scan(unicode ? /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/uo :
Chris@909 328 /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o)
Chris@909 329
Chris@909 330 if match
Chris@909 331 encoder.text_token self[1], (self[1][0] == ?: ? :symbol : :method)
Chris@909 332 encoder.text_token self[2], :space
Chris@909 333 encoder.text_token self[3], (self[3][0] == ?: ? :symbol : :method)
Chris@909 334 end
Chris@909 335 state = :initial
Chris@909 336
Chris@909 337 else
Chris@909 338 #:nocov:
Chris@909 339 raise_inspect 'Unknown state: %p' % [state], encoder
Chris@909 340 #:nocov:
Chris@909 341 end
Chris@909 342
Chris@909 343 else # StringState
Chris@909 344
Chris@909 345 match = scan_until(state.pattern) || scan_rest
Chris@909 346 unless match.empty?
Chris@909 347 encoder.text_token match, :content
Chris@909 348 break if eos?
Chris@909 349 end
Chris@909 350
Chris@909 351 if state.heredoc && self[1] # end of heredoc
Chris@909 352 match = getch
Chris@909 353 match << scan_until(/$/) unless eos?
Chris@909 354 encoder.text_token match, :delimiter unless match.empty?
Chris@909 355 encoder.end_group state.type
Chris@909 356 state = state.next_state
Chris@909 357 next
Chris@909 358 end
Chris@909 359
Chris@909 360 case match = getch
Chris@909 361
Chris@909 362 when state.delim
Chris@909 363 if state.paren_depth
Chris@909 364 state.paren_depth -= 1
Chris@909 365 if state.paren_depth > 0
Chris@909 366 encoder.text_token match, :content
Chris@909 367 next
Chris@909 368 end
Chris@909 369 end
Chris@909 370 encoder.text_token match, :delimiter
Chris@909 371 if state.type == :regexp && !eos?
Chris@909 372 match = scan(/#{patterns::REGEXP_MODIFIERS}/o)
Chris@909 373 encoder.text_token match, :modifier unless match.empty?
Chris@909 374 end
Chris@909 375 encoder.end_group state.type
Chris@909 376 value_expected = false
Chris@909 377 state = state.next_state
Chris@909 378
Chris@909 379 when '\\'
Chris@909 380 if state.interpreted
Chris@909 381 if esc = scan(/#{patterns::ESCAPE}/o)
Chris@909 382 encoder.text_token match + esc, :char
Chris@909 383 else
Chris@909 384 encoder.text_token match, :error
Chris@909 385 end
Chris@909 386 else
Chris@909 387 case esc = getch
Chris@909 388 when nil
Chris@909 389 encoder.text_token match, :content
Chris@909 390 when state.delim, '\\'
Chris@909 391 encoder.text_token match + esc, :char
Chris@909 392 else
Chris@909 393 encoder.text_token match + esc, :content
Chris@909 394 end
Chris@909 395 end
Chris@909 396
Chris@909 397 when '#'
Chris@909 398 case peek(1)
Chris@909 399 when '{'
Chris@909 400 inline_block_stack ||= []
Chris@909 401 inline_block_stack << [state, inline_block_curly_depth, heredocs]
Chris@909 402 value_expected = true
Chris@909 403 state = :initial
Chris@909 404 inline_block_curly_depth = 1
Chris@909 405 encoder.begin_group :inline
Chris@909 406 encoder.text_token match + getch, :inline_delimiter
Chris@909 407 when '$', '@'
Chris@909 408 encoder.text_token match, :escape
Chris@909 409 last_state = state
Chris@909 410 state = :initial
Chris@909 411 else
Chris@909 412 #:nocov:
Chris@909 413 raise_inspect 'else-case # reached; #%p not handled' % [peek(1)], encoder
Chris@909 414 #:nocov:
Chris@909 415 end
Chris@909 416
Chris@909 417 when state.opening_paren
Chris@909 418 state.paren_depth += 1
Chris@909 419 encoder.text_token match, :content
Chris@909 420
Chris@909 421 else
Chris@909 422 #:nocov
Chris@909 423 raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], encoder
Chris@909 424 #:nocov:
Chris@909 425
Chris@909 426 end
Chris@909 427
Chris@909 428 end
Chris@909 429
Chris@909 430 end
Chris@909 431
Chris@909 432 # cleaning up
Chris@909 433 if state.is_a? StringState
Chris@909 434 encoder.end_group state.type
Chris@909 435 end
Chris@909 436
Chris@909 437 if options[:keep_state]
Chris@909 438 if state.is_a?(StringState) && state.heredoc
Chris@909 439 (heredocs ||= []).unshift state
Chris@909 440 state = :initial
Chris@909 441 elsif heredocs && heredocs.empty?
Chris@909 442 heredocs = nil
Chris@909 443 end
Chris@909 444 @state = state, heredocs
Chris@909 445 end
Chris@909 446
Chris@909 447 if inline_block_stack
Chris@909 448 until inline_block_stack.empty?
Chris@909 449 state, = *inline_block_stack.pop
Chris@909 450 encoder.end_group :inline
Chris@909 451 encoder.end_group state.type
Chris@909 452 end
Chris@909 453 end
Chris@909 454
Chris@909 455 encoder
Chris@909 456 end
Chris@909 457
Chris@909 458 end
Chris@909 459
Chris@909 460 end
Chris@909 461 end