annotate vendor/plugins/coderay-0.9.2/lib/coderay/scanners/.svn/text-base/ruby.rb.svn-base @ 36:de76cd3e8c8e cc-branches

* Probably abortive experiments in extracting the branch from Hg
author Chris Cannam <chris.cannam@soundsoftware.ac.uk>
date Wed, 20 Oct 2010 10:07:29 +0100
parents 513646585e45
children
rev   line source
Chris@0 1 module CodeRay
Chris@0 2 module Scanners
Chris@0 3
Chris@0 4 # This scanner is really complex, since Ruby _is_ a complex language!
Chris@0 5 #
Chris@0 6 # It tries to highlight 100% of all common code,
Chris@0 7 # and 90% of strange codes.
Chris@0 8 #
Chris@0 9 # It is optimized for HTML highlighting, and is not very useful for
Chris@0 10 # parsing or pretty printing.
Chris@0 11 #
Chris@0 12 # For now, I think it's better than the scanners in VIM or Syntax, or
Chris@0 13 # any highlighter I was able to find, except Caleb's RubyLexer.
Chris@0 14 #
Chris@0 15 # I hope it's also better than the rdoc/irb lexer.
Chris@0 16 class Ruby < Scanner
Chris@0 17
Chris@0 18 include Streamable
Chris@0 19
Chris@0 20 register_for :ruby
Chris@0 21 file_extension 'rb'
Chris@0 22
Chris@0 23 helper :patterns
Chris@0 24
Chris@0 25 if not defined? EncodingError
Chris@0 26 EncodingError = Class.new Exception
Chris@0 27 end
Chris@0 28
Chris@0 29 private
Chris@0 30 def scan_tokens tokens, options
Chris@0 31 last_token_dot = false
Chris@0 32 value_expected = true
Chris@0 33 heredocs = nil
Chris@0 34 last_state = nil
Chris@0 35 state = :initial
Chris@0 36 depth = nil
Chris@0 37 inline_block_stack = []
Chris@0 38 unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
Chris@0 39
Chris@0 40 patterns = Patterns # avoid constant lookup
Chris@0 41
Chris@0 42 until eos?
Chris@0 43 match = nil
Chris@0 44 kind = nil
Chris@0 45
Chris@0 46 if state.instance_of? patterns::StringState
Chris@0 47 # {{{
Chris@0 48 match = scan_until(state.pattern) || scan_until(/\z/)
Chris@0 49 tokens << [match, :content] unless match.empty?
Chris@0 50 break if eos?
Chris@0 51
Chris@0 52 if state.heredoc and self[1] # end of heredoc
Chris@0 53 match = getch.to_s
Chris@0 54 match << scan_until(/$/) unless eos?
Chris@0 55 tokens << [match, :delimiter]
Chris@0 56 tokens << [:close, state.type]
Chris@0 57 state = state.next_state
Chris@0 58 next
Chris@0 59 end
Chris@0 60
Chris@0 61 case match = getch
Chris@0 62
Chris@0 63 when state.delim
Chris@0 64 if state.paren
Chris@0 65 state.paren_depth -= 1
Chris@0 66 if state.paren_depth > 0
Chris@0 67 tokens << [match, :nesting_delimiter]
Chris@0 68 next
Chris@0 69 end
Chris@0 70 end
Chris@0 71 tokens << [match, :delimiter]
Chris@0 72 if state.type == :regexp and not eos?
Chris@0 73 modifiers = scan(/#{patterns::REGEXP_MODIFIERS}/ox)
Chris@0 74 tokens << [modifiers, :modifier] unless modifiers.empty?
Chris@0 75 end
Chris@0 76 tokens << [:close, state.type]
Chris@0 77 value_expected = false
Chris@0 78 state = state.next_state
Chris@0 79
Chris@0 80 when '\\'
Chris@0 81 if state.interpreted
Chris@0 82 if esc = scan(/ #{patterns::ESCAPE} /ox)
Chris@0 83 tokens << [match + esc, :char]
Chris@0 84 else
Chris@0 85 tokens << [match, :error]
Chris@0 86 end
Chris@0 87 else
Chris@0 88 case m = getch
Chris@0 89 when state.delim, '\\'
Chris@0 90 tokens << [match + m, :char]
Chris@0 91 when nil
Chris@0 92 tokens << [match, :error]
Chris@0 93 else
Chris@0 94 tokens << [match + m, :content]
Chris@0 95 end
Chris@0 96 end
Chris@0 97
Chris@0 98 when '#'
Chris@0 99 case peek(1)
Chris@0 100 when '{'
Chris@0 101 inline_block_stack << [state, depth, heredocs]
Chris@0 102 value_expected = true
Chris@0 103 state = :initial
Chris@0 104 depth = 1
Chris@0 105 tokens << [:open, :inline]
Chris@0 106 tokens << [match + getch, :inline_delimiter]
Chris@0 107 when '$', '@'
Chris@0 108 tokens << [match, :escape]
Chris@0 109 last_state = state # scan one token as normal code, then return here
Chris@0 110 state = :initial
Chris@0 111 else
Chris@0 112 raise_inspect 'else-case # reached; #%p not handled' % peek(1), tokens
Chris@0 113 end
Chris@0 114
Chris@0 115 when state.paren
Chris@0 116 state.paren_depth += 1
Chris@0 117 tokens << [match, :nesting_delimiter]
Chris@0 118
Chris@0 119 when /#{patterns::REGEXP_SYMBOLS}/ox
Chris@0 120 tokens << [match, :function]
Chris@0 121
Chris@0 122 else
Chris@0 123 raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], tokens
Chris@0 124
Chris@0 125 end
Chris@0 126 next
Chris@0 127 # }}}
Chris@0 128 else
Chris@0 129 # {{{
Chris@0 130 if match = scan(/[ \t\f]+/)
Chris@0 131 kind = :space
Chris@0 132 match << scan(/\s*/) unless eos? || heredocs
Chris@0 133 value_expected = true if match.index(?\n)
Chris@0 134 tokens << [match, kind]
Chris@0 135 next
Chris@0 136
Chris@0 137 elsif match = scan(/\\?\n/)
Chris@0 138 kind = :space
Chris@0 139 if match == "\n"
Chris@0 140 value_expected = true
Chris@0 141 state = :initial if state == :undef_comma_expected
Chris@0 142 end
Chris@0 143 if heredocs
Chris@0 144 unscan # heredoc scanning needs \n at start
Chris@0 145 state = heredocs.shift
Chris@0 146 tokens << [:open, state.type]
Chris@0 147 heredocs = nil if heredocs.empty?
Chris@0 148 next
Chris@0 149 else
Chris@0 150 match << scan(/\s*/) unless eos?
Chris@0 151 end
Chris@0 152 tokens << [match, kind]
Chris@0 153 next
Chris@0 154
Chris@0 155 elsif bol? && match = scan(/\#!.*/)
Chris@0 156 tokens << [match, :doctype]
Chris@0 157 next
Chris@0 158
Chris@0 159 elsif match = scan(/\#.*/) or
Chris@0 160 ( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) )
Chris@0 161 kind = :comment
Chris@0 162 tokens << [match, kind]
Chris@0 163 next
Chris@0 164
Chris@0 165 elsif state == :initial
Chris@0 166
Chris@0 167 # IDENTS #
Chris@0 168 if match = scan(unicode ? /#{patterns::METHOD_NAME}/uo :
Chris@0 169 /#{patterns::METHOD_NAME}/o)
Chris@0 170 if last_token_dot
Chris@0 171 kind = if match[/^[A-Z]/] and not match?(/\(/) then :constant else :ident end
Chris@0 172 else
Chris@0 173 kind = patterns::IDENT_KIND[match]
Chris@0 174 if kind == :ident and match[/^[A-Z]/] and not match[/[!?]$/] and not match?(/\(/)
Chris@0 175 kind = :constant
Chris@0 176 elsif kind == :reserved
Chris@0 177 state = patterns::DEF_NEW_STATE[match]
Chris@0 178 value_expected = :set if patterns::KEYWORDS_EXPECTING_VALUE[match]
Chris@0 179 end
Chris@0 180 end
Chris@0 181 value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/o)
Chris@0 182
Chris@0 183 elsif last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}|\(/o)
Chris@0 184 kind = :ident
Chris@0 185 value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/o)
Chris@0 186
Chris@0 187 # OPERATORS #
Chris@0 188 elsif not last_token_dot and match = scan(/ \.\.\.? | (?:\.|::)() | [,\(\)\[\]\{\}] | ==?=? /x)
Chris@0 189 if match !~ / [.\)\]\}] /x or match =~ /\.\.\.?/
Chris@0 190 value_expected = :set
Chris@0 191 end
Chris@0 192 last_token_dot = :set if self[1]
Chris@0 193 kind = :operator
Chris@0 194 unless inline_block_stack.empty?
Chris@0 195 case match
Chris@0 196 when '{'
Chris@0 197 depth += 1
Chris@0 198 when '}'
Chris@0 199 depth -= 1
Chris@0 200 if depth == 0 # closing brace of inline block reached
Chris@0 201 state, depth, heredocs = inline_block_stack.pop
Chris@0 202 heredocs = nil if heredocs && heredocs.empty?
Chris@0 203 tokens << [match, :inline_delimiter]
Chris@0 204 kind = :inline
Chris@0 205 match = :close
Chris@0 206 end
Chris@0 207 end
Chris@0 208 end
Chris@0 209
Chris@0 210 elsif match = scan(/ ['"] /mx)
Chris@0 211 tokens << [:open, :string]
Chris@0 212 kind = :delimiter
Chris@0 213 state = patterns::StringState.new :string, match == '"', match # important for streaming
Chris@0 214
Chris@0 215 elsif match = scan(/#{patterns::INSTANCE_VARIABLE}/o)
Chris@0 216 kind = :instance_variable
Chris@0 217
Chris@0 218 elsif value_expected and match = scan(/\//)
Chris@0 219 tokens << [:open, :regexp]
Chris@0 220 kind = :delimiter
Chris@0 221 interpreted = true
Chris@0 222 state = patterns::StringState.new :regexp, interpreted, match
Chris@0 223
Chris@0 224 # elsif match = scan(/[-+]?#{patterns::NUMERIC}/o)
Chris@0 225 elsif match = value_expected ? scan(/[-+]?#{patterns::NUMERIC}/o) : scan(/#{patterns::NUMERIC}/o)
Chris@0 226 kind = self[1] ? :float : :integer
Chris@0 227
Chris@0 228 elsif match = scan(/#{patterns::SYMBOL}/o)
Chris@0 229 case delim = match[1]
Chris@0 230 when ?', ?"
Chris@0 231 tokens << [:open, :symbol]
Chris@0 232 tokens << [':', :symbol]
Chris@0 233 match = delim.chr
Chris@0 234 kind = :delimiter
Chris@0 235 state = patterns::StringState.new :symbol, delim == ?", match
Chris@0 236 else
Chris@0 237 kind = :symbol
Chris@0 238 end
Chris@0 239
Chris@0 240 elsif match = scan(/ [-+!~^]=? | [*|&]{1,2}=? | >>? /x)
Chris@0 241 value_expected = :set
Chris@0 242 kind = :operator
Chris@0 243
Chris@0 244 elsif value_expected and match = scan(/#{patterns::HEREDOC_OPEN}/o)
Chris@0 245 indented = self[1] == '-'
Chris@0 246 quote = self[3]
Chris@0 247 delim = self[quote ? 4 : 2]
Chris@0 248 kind = patterns::QUOTE_TO_TYPE[quote]
Chris@0 249 tokens << [:open, kind]
Chris@0 250 tokens << [match, :delimiter]
Chris@0 251 match = :close
Chris@0 252 heredoc = patterns::StringState.new kind, quote != '\'', delim, (indented ? :indented : :linestart )
Chris@0 253 heredocs ||= [] # create heredocs if empty
Chris@0 254 heredocs << heredoc
Chris@0 255
Chris@0 256 elsif value_expected and match = scan(/#{patterns::FANCY_START_CORRECT}/o)
Chris@0 257 kind, interpreted = *patterns::FancyStringType.fetch(self[1]) do
Chris@0 258 raise_inspect 'Unknown fancy string: %%%p' % k, tokens
Chris@0 259 end
Chris@0 260 tokens << [:open, kind]
Chris@0 261 state = patterns::StringState.new kind, interpreted, self[2]
Chris@0 262 kind = :delimiter
Chris@0 263
Chris@0 264 elsif value_expected and match = scan(/#{patterns::CHARACTER}/o)
Chris@0 265 kind = :integer
Chris@0 266
Chris@0 267 elsif match = scan(/ [\/%]=? | <(?:<|=>?)? | [?:;] /x)
Chris@0 268 value_expected = :set
Chris@0 269 kind = :operator
Chris@0 270
Chris@0 271 elsif match = scan(/`/)
Chris@0 272 if last_token_dot
Chris@0 273 kind = :operator
Chris@0 274 else
Chris@0 275 tokens << [:open, :shell]
Chris@0 276 kind = :delimiter
Chris@0 277 state = patterns::StringState.new :shell, true, match
Chris@0 278 end
Chris@0 279
Chris@0 280 elsif match = scan(/#{patterns::GLOBAL_VARIABLE}/o)
Chris@0 281 kind = :global_variable
Chris@0 282
Chris@0 283 elsif match = scan(/#{patterns::CLASS_VARIABLE}/o)
Chris@0 284 kind = :class_variable
Chris@0 285
Chris@0 286 else
Chris@0 287 if !unicode
Chris@0 288 # check for unicode
Chris@0 289 debug, $DEBUG = $DEBUG, false
Chris@0 290 begin
Chris@0 291 if check(/./mu).size > 1
Chris@0 292 # seems like we should try again with unicode
Chris@0 293 unicode = true
Chris@0 294 end
Chris@0 295 rescue
Chris@0 296 # bad unicode char; use getch
Chris@0 297 ensure
Chris@0 298 $DEBUG = debug
Chris@0 299 end
Chris@0 300 next if unicode
Chris@0 301 end
Chris@0 302 kind = :error
Chris@0 303 match = getch
Chris@0 304
Chris@0 305 end
Chris@0 306
Chris@0 307 elsif state == :def_expected
Chris@0 308 state = :initial
Chris@0 309 if scan(/self\./)
Chris@0 310 tokens << ['self', :pre_constant]
Chris@0 311 tokens << ['.', :operator]
Chris@0 312 end
Chris@0 313 if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
Chris@0 314 /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
Chris@0 315 kind = :method
Chris@0 316 else
Chris@0 317 next
Chris@0 318 end
Chris@0 319
Chris@0 320 elsif state == :module_expected
Chris@0 321 if match = scan(/<</)
Chris@0 322 kind = :operator
Chris@0 323 else
Chris@0 324 state = :initial
Chris@0 325 if match = scan(/ (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox)
Chris@0 326 kind = :class
Chris@0 327 else
Chris@0 328 next
Chris@0 329 end
Chris@0 330 end
Chris@0 331
Chris@0 332 elsif state == :undef_expected
Chris@0 333 state = :undef_comma_expected
Chris@0 334 if match = scan(/#{patterns::METHOD_NAME_EX}/o)
Chris@0 335 kind = :method
Chris@0 336 elsif match = scan(/#{patterns::SYMBOL}/o)
Chris@0 337 case delim = match[1]
Chris@0 338 when ?', ?"
Chris@0 339 tokens << [:open, :symbol]
Chris@0 340 tokens << [':', :symbol]
Chris@0 341 match = delim.chr
Chris@0 342 kind = :delimiter
Chris@0 343 state = patterns::StringState.new :symbol, delim == ?", match
Chris@0 344 state.next_state = :undef_comma_expected
Chris@0 345 else
Chris@0 346 kind = :symbol
Chris@0 347 end
Chris@0 348 else
Chris@0 349 state = :initial
Chris@0 350 next
Chris@0 351 end
Chris@0 352
Chris@0 353 elsif state == :alias_expected
Chris@0 354 match = scan(unicode ? /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/uo :
Chris@0 355 /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o)
Chris@0 356
Chris@0 357 if match
Chris@0 358 tokens << [self[1], (self[1][0] == ?: ? :symbol : :method)]
Chris@0 359 tokens << [self[2], :space]
Chris@0 360 tokens << [self[3], (self[3][0] == ?: ? :symbol : :method)]
Chris@0 361 end
Chris@0 362 state = :initial
Chris@0 363 next
Chris@0 364
Chris@0 365 elsif state == :undef_comma_expected
Chris@0 366 if match = scan(/,/)
Chris@0 367 kind = :operator
Chris@0 368 state = :undef_expected
Chris@0 369 else
Chris@0 370 state = :initial
Chris@0 371 next
Chris@0 372 end
Chris@0 373
Chris@0 374 end
Chris@0 375 # }}}
Chris@0 376
Chris@0 377 unless kind == :error
Chris@0 378 value_expected = value_expected == :set
Chris@0 379 last_token_dot = last_token_dot == :set
Chris@0 380 end
Chris@0 381
Chris@0 382 if $CODERAY_DEBUG and not kind
Chris@0 383 raise_inspect 'Error token %p in line %d' %
Chris@0 384 [[match, kind], line], tokens, state
Chris@0 385 end
Chris@0 386 raise_inspect 'Empty token', tokens unless match
Chris@0 387
Chris@0 388 tokens << [match, kind]
Chris@0 389
Chris@0 390 if last_state
Chris@0 391 state = last_state
Chris@0 392 last_state = nil
Chris@0 393 end
Chris@0 394 end
Chris@0 395 end
Chris@0 396
Chris@0 397 inline_block_stack << [state] if state.is_a? patterns::StringState
Chris@0 398 until inline_block_stack.empty?
Chris@0 399 this_block = inline_block_stack.pop
Chris@0 400 tokens << [:close, :inline] if this_block.size > 1
Chris@0 401 state = this_block.first
Chris@0 402 tokens << [:close, state.type]
Chris@0 403 end
Chris@0 404
Chris@0 405 tokens
Chris@0 406 end
Chris@0 407
Chris@0 408 end
Chris@0 409
Chris@0 410 end
Chris@0 411 end
Chris@0 412
Chris@0 413 # vim:fdm=marker