annotate vendor/gems/coderay-1.0.0/lib/coderay/scanners/java.rb @ 1022:f2ec92061fca browsing

Merge from live branch
author Chris Cannam <chris.cannam@soundsoftware.ac.uk>
date Tue, 13 Nov 2012 10:35:40 +0000
parents cbb26bc654de
children
rev   line source
Chris@909 1 module CodeRay
Chris@909 2 module Scanners
Chris@909 3
Chris@909 4 # Scanner for Java.
Chris@909 5 class Java < Scanner
Chris@909 6
Chris@909 7 register_for :java
Chris@909 8
Chris@909 9 autoload :BuiltinTypes, 'coderay/scanners/java/builtin_types'
Chris@909 10
Chris@909 11 # http://java.sun.com/docs/books/tutorial/java/nutsandbolts/_keywords.html
Chris@909 12 KEYWORDS = %w[
Chris@909 13 assert break case catch continue default do else
Chris@909 14 finally for if instanceof import new package
Chris@909 15 return switch throw try typeof while
Chris@909 16 debugger export
Chris@909 17 ] # :nodoc:
Chris@909 18 RESERVED = %w[ const goto ] # :nodoc:
Chris@909 19 CONSTANTS = %w[ false null true ] # :nodoc:
Chris@909 20 MAGIC_VARIABLES = %w[ this super ] # :nodoc:
Chris@909 21 TYPES = %w[
Chris@909 22 boolean byte char class double enum float int interface long
Chris@909 23 short void
Chris@909 24 ] << '[]' # :nodoc: because int[] should be highlighted as a type
Chris@909 25 DIRECTIVES = %w[
Chris@909 26 abstract extends final implements native private protected public
Chris@909 27 static strictfp synchronized throws transient volatile
Chris@909 28 ] # :nodoc:
Chris@909 29
Chris@909 30 IDENT_KIND = WordList.new(:ident).
Chris@909 31 add(KEYWORDS, :keyword).
Chris@909 32 add(RESERVED, :reserved).
Chris@909 33 add(CONSTANTS, :predefined_constant).
Chris@909 34 add(MAGIC_VARIABLES, :local_variable).
Chris@909 35 add(TYPES, :type).
Chris@909 36 add(BuiltinTypes::List, :predefined_type).
Chris@909 37 add(BuiltinTypes::List.select { |builtin| builtin[/(Error|Exception)$/] }, :exception).
Chris@909 38 add(DIRECTIVES, :directive) # :nodoc:
Chris@909 39
Chris@909 40 ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
Chris@909 41 UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc:
Chris@909 42 STRING_CONTENT_PATTERN = {
Chris@909 43 "'" => /[^\\']+/,
Chris@909 44 '"' => /[^\\"]+/,
Chris@909 45 '/' => /[^\\\/]+/,
Chris@909 46 } # :nodoc:
Chris@909 47 IDENT = /[a-zA-Z_][A-Za-z_0-9]*/ # :nodoc:
Chris@909 48
Chris@909 49 protected
Chris@909 50
Chris@909 51 def scan_tokens encoder, options
Chris@909 52
Chris@909 53 state = :initial
Chris@909 54 string_delimiter = nil
Chris@909 55 package_name_expected = false
Chris@909 56 class_name_follows = false
Chris@909 57 last_token_dot = false
Chris@909 58
Chris@909 59 until eos?
Chris@909 60
Chris@909 61 case state
Chris@909 62
Chris@909 63 when :initial
Chris@909 64
Chris@909 65 if match = scan(/ \s+ | \\\n /x)
Chris@909 66 encoder.text_token match, :space
Chris@909 67 next
Chris@909 68
Chris@909 69 elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
Chris@909 70 encoder.text_token match, :comment
Chris@909 71 next
Chris@909 72
Chris@909 73 elsif package_name_expected && match = scan(/ #{IDENT} (?: \. #{IDENT} )* /ox)
Chris@909 74 encoder.text_token match, package_name_expected
Chris@909 75
Chris@909 76 elsif match = scan(/ #{IDENT} | \[\] /ox)
Chris@909 77 kind = IDENT_KIND[match]
Chris@909 78 if last_token_dot
Chris@909 79 kind = :ident
Chris@909 80 elsif class_name_follows
Chris@909 81 kind = :class
Chris@909 82 class_name_follows = false
Chris@909 83 else
Chris@909 84 case match
Chris@909 85 when 'import'
Chris@909 86 package_name_expected = :include
Chris@909 87 when 'package'
Chris@909 88 package_name_expected = :namespace
Chris@909 89 when 'class', 'interface'
Chris@909 90 class_name_follows = true
Chris@909 91 end
Chris@909 92 end
Chris@909 93 encoder.text_token match, kind
Chris@909 94
Chris@909 95 elsif match = scan(/ \.(?!\d) | [,?:()\[\]}] | -- | \+\+ | && | \|\| | \*\*=? | [-+*\/%^~&|<>=!]=? | <<<?=? | >>>?=? /x)
Chris@909 96 encoder.text_token match, :operator
Chris@909 97
Chris@909 98 elsif match = scan(/;/)
Chris@909 99 package_name_expected = false
Chris@909 100 encoder.text_token match, :operator
Chris@909 101
Chris@909 102 elsif match = scan(/\{/)
Chris@909 103 class_name_follows = false
Chris@909 104 encoder.text_token match, :operator
Chris@909 105
Chris@909 106 elsif check(/[\d.]/)
Chris@909 107 if match = scan(/0[xX][0-9A-Fa-f]+/)
Chris@909 108 encoder.text_token match, :hex
Chris@909 109 elsif match = scan(/(?>0[0-7]+)(?![89.eEfF])/)
Chris@909 110 encoder.text_token match, :octal
Chris@909 111 elsif match = scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)
Chris@909 112 encoder.text_token match, :float
Chris@909 113 elsif match = scan(/\d+[lL]?/)
Chris@909 114 encoder.text_token match, :integer
Chris@909 115 end
Chris@909 116
Chris@909 117 elsif match = scan(/["']/)
Chris@909 118 state = :string
Chris@909 119 encoder.begin_group state
Chris@909 120 string_delimiter = match
Chris@909 121 encoder.text_token match, :delimiter
Chris@909 122
Chris@909 123 elsif match = scan(/ @ #{IDENT} /ox)
Chris@909 124 encoder.text_token match, :annotation
Chris@909 125
Chris@909 126 else
Chris@909 127 encoder.text_token getch, :error
Chris@909 128
Chris@909 129 end
Chris@909 130
Chris@909 131 when :string
Chris@909 132 if match = scan(STRING_CONTENT_PATTERN[string_delimiter])
Chris@909 133 encoder.text_token match, :content
Chris@909 134 elsif match = scan(/["'\/]/)
Chris@909 135 encoder.text_token match, :delimiter
Chris@909 136 encoder.end_group state
Chris@909 137 state = :initial
Chris@909 138 string_delimiter = nil
Chris@909 139 elsif state == :string && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
Chris@909 140 if string_delimiter == "'" && !(match == "\\\\" || match == "\\'")
Chris@909 141 encoder.text_token match, :content
Chris@909 142 else
Chris@909 143 encoder.text_token match, :char
Chris@909 144 end
Chris@909 145 elsif match = scan(/\\./m)
Chris@909 146 encoder.text_token match, :content
Chris@909 147 elsif match = scan(/ \\ | $ /x)
Chris@909 148 encoder.end_group state
Chris@909 149 state = :initial
Chris@909 150 encoder.text_token match, :error
Chris@909 151 else
Chris@909 152 raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
Chris@909 153 end
Chris@909 154
Chris@909 155 else
Chris@909 156 raise_inspect 'Unknown state', encoder
Chris@909 157
Chris@909 158 end
Chris@909 159
Chris@909 160 last_token_dot = match == '.'
Chris@909 161
Chris@909 162 end
Chris@909 163
Chris@909 164 if state == :string
Chris@909 165 encoder.end_group state
Chris@909 166 end
Chris@909 167
Chris@909 168 encoder
Chris@909 169 end
Chris@909 170
Chris@909 171 end
Chris@909 172
Chris@909 173 end
Chris@909 174 end