annotate vendor/plugins/coderay-0.9.2/lib/coderay/scanners/java.rb @ 861:b8105f717bf7 bug_182

Close obsolete branch bug_182
author Chris Cannam
date Fri, 10 Jun 2011 16:49:58 +0100
parents 513646585e45
children
rev   line source
Chris@0 1 module CodeRay
Chris@0 2 module Scanners
Chris@0 3
Chris@0 4 class Java < Scanner
Chris@0 5
Chris@0 6 include Streamable
Chris@0 7 register_for :java
Chris@0 8 helper :builtin_types
Chris@0 9
Chris@0 10 # http://java.sun.com/docs/books/tutorial/java/nutsandbolts/_keywords.html
Chris@0 11 KEYWORDS = %w[
Chris@0 12 assert break case catch continue default do else
Chris@0 13 finally for if instanceof import new package
Chris@0 14 return switch throw try typeof while
Chris@0 15 debugger export
Chris@0 16 ]
Chris@0 17 RESERVED = %w[ const goto ]
Chris@0 18 CONSTANTS = %w[ false null true ]
Chris@0 19 MAGIC_VARIABLES = %w[ this super ]
Chris@0 20 TYPES = %w[
Chris@0 21 boolean byte char class double enum float int interface long
Chris@0 22 short void
Chris@0 23 ] << '[]' # because int[] should be highlighted as a type
Chris@0 24 DIRECTIVES = %w[
Chris@0 25 abstract extends final implements native private protected public
Chris@0 26 static strictfp synchronized throws transient volatile
Chris@0 27 ]
Chris@0 28
Chris@0 29 IDENT_KIND = WordList.new(:ident).
Chris@0 30 add(KEYWORDS, :keyword).
Chris@0 31 add(RESERVED, :reserved).
Chris@0 32 add(CONSTANTS, :pre_constant).
Chris@0 33 add(MAGIC_VARIABLES, :local_variable).
Chris@0 34 add(TYPES, :type).
Chris@0 35 add(BuiltinTypes::List, :pre_type).
Chris@0 36 add(BuiltinTypes::List.select { |builtin| builtin[/(Error|Exception)$/] }, :exception).
Chris@0 37 add(DIRECTIVES, :directive)
Chris@0 38
Chris@0 39 ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
Chris@0 40 UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
Chris@0 41 STRING_CONTENT_PATTERN = {
Chris@0 42 "'" => /[^\\']+/,
Chris@0 43 '"' => /[^\\"]+/,
Chris@0 44 '/' => /[^\\\/]+/,
Chris@0 45 }
Chris@0 46 IDENT = /[a-zA-Z_][A-Za-z_0-9]*/
Chris@0 47
Chris@0 48 def scan_tokens tokens, options
Chris@0 49
Chris@0 50 state = :initial
Chris@0 51 string_delimiter = nil
Chris@0 52 import_clause = class_name_follows = last_token_dot = false
Chris@0 53
Chris@0 54 until eos?
Chris@0 55
Chris@0 56 kind = nil
Chris@0 57 match = nil
Chris@0 58
Chris@0 59 case state
Chris@0 60
Chris@0 61 when :initial
Chris@0 62
Chris@0 63 if match = scan(/ \s+ | \\\n /x)
Chris@0 64 tokens << [match, :space]
Chris@0 65 next
Chris@0 66
Chris@0 67 elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
Chris@0 68 tokens << [match, :comment]
Chris@0 69 next
Chris@0 70
Chris@0 71 elsif import_clause && scan(/ #{IDENT} (?: \. #{IDENT} )* /ox)
Chris@0 72 kind = :include
Chris@0 73
Chris@0 74 elsif match = scan(/ #{IDENT} | \[\] /ox)
Chris@0 75 kind = IDENT_KIND[match]
Chris@0 76 if last_token_dot
Chris@0 77 kind = :ident
Chris@0 78 elsif class_name_follows
Chris@0 79 kind = :class
Chris@0 80 class_name_follows = false
Chris@0 81 else
Chris@0 82 import_clause = true if match == 'import'
Chris@0 83 class_name_follows = true if match == 'class' || match == 'interface'
Chris@0 84 end
Chris@0 85
Chris@0 86 elsif scan(/ \.(?!\d) | [,?:()\[\]}] | -- | \+\+ | && | \|\| | \*\*=? | [-+*\/%^~&|<>=!]=? | <<<?=? | >>>?=? /x)
Chris@0 87 kind = :operator
Chris@0 88
Chris@0 89 elsif scan(/;/)
Chris@0 90 import_clause = false
Chris@0 91 kind = :operator
Chris@0 92
Chris@0 93 elsif scan(/\{/)
Chris@0 94 class_name_follows = false
Chris@0 95 kind = :operator
Chris@0 96
Chris@0 97 elsif check(/[\d.]/)
Chris@0 98 if scan(/0[xX][0-9A-Fa-f]+/)
Chris@0 99 kind = :hex
Chris@0 100 elsif scan(/(?>0[0-7]+)(?![89.eEfF])/)
Chris@0 101 kind = :oct
Chris@0 102 elsif scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)
Chris@0 103 kind = :float
Chris@0 104 elsif scan(/\d+[lL]?/)
Chris@0 105 kind = :integer
Chris@0 106 end
Chris@0 107
Chris@0 108 elsif match = scan(/["']/)
Chris@0 109 tokens << [:open, :string]
Chris@0 110 state = :string
Chris@0 111 string_delimiter = match
Chris@0 112 kind = :delimiter
Chris@0 113
Chris@0 114 elsif scan(/ @ #{IDENT} /ox)
Chris@0 115 kind = :annotation
Chris@0 116
Chris@0 117 else
Chris@0 118 getch
Chris@0 119 kind = :error
Chris@0 120
Chris@0 121 end
Chris@0 122
Chris@0 123 when :string
Chris@0 124 if scan(STRING_CONTENT_PATTERN[string_delimiter])
Chris@0 125 kind = :content
Chris@0 126 elsif match = scan(/["'\/]/)
Chris@0 127 tokens << [match, :delimiter]
Chris@0 128 tokens << [:close, state]
Chris@0 129 string_delimiter = nil
Chris@0 130 state = :initial
Chris@0 131 next
Chris@0 132 elsif state == :string && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
Chris@0 133 if string_delimiter == "'" && !(match == "\\\\" || match == "\\'")
Chris@0 134 kind = :content
Chris@0 135 else
Chris@0 136 kind = :char
Chris@0 137 end
Chris@0 138 elsif scan(/\\./m)
Chris@0 139 kind = :content
Chris@0 140 elsif scan(/ \\ | $ /x)
Chris@0 141 tokens << [:close, :delimiter]
Chris@0 142 kind = :error
Chris@0 143 state = :initial
Chris@0 144 else
Chris@0 145 raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
Chris@0 146 end
Chris@0 147
Chris@0 148 else
Chris@0 149 raise_inspect 'Unknown state', tokens
Chris@0 150
Chris@0 151 end
Chris@0 152
Chris@0 153 match ||= matched
Chris@0 154 if $CODERAY_DEBUG and not kind
Chris@0 155 raise_inspect 'Error token %p in line %d' %
Chris@0 156 [[match, kind], line], tokens
Chris@0 157 end
Chris@0 158 raise_inspect 'Empty token', tokens unless match
Chris@0 159
Chris@0 160 last_token_dot = match == '.'
Chris@0 161
Chris@0 162 tokens << [match, kind]
Chris@0 163
Chris@0 164 end
Chris@0 165
Chris@0 166 if state == :string
Chris@0 167 tokens << [:close, state]
Chris@0 168 end
Chris@0 169
Chris@0 170 tokens
Chris@0 171 end
Chris@0 172
Chris@0 173 end
Chris@0 174
Chris@0 175 end
Chris@0 176 end