Chris@909: module CodeRay Chris@909: module Scanners Chris@909: Chris@909: # Scanner for Java. Chris@909: class Java < Scanner Chris@909: Chris@909: register_for :java Chris@909: Chris@909: autoload :BuiltinTypes, 'coderay/scanners/java/builtin_types' Chris@909: Chris@909: # http://java.sun.com/docs/books/tutorial/java/nutsandbolts/_keywords.html Chris@909: KEYWORDS = %w[ Chris@909: assert break case catch continue default do else Chris@909: finally for if instanceof import new package Chris@909: return switch throw try typeof while Chris@909: debugger export Chris@909: ] # :nodoc: Chris@909: RESERVED = %w[ const goto ] # :nodoc: Chris@909: CONSTANTS = %w[ false null true ] # :nodoc: Chris@909: MAGIC_VARIABLES = %w[ this super ] # :nodoc: Chris@909: TYPES = %w[ Chris@909: boolean byte char class double enum float int interface long Chris@909: short void Chris@909: ] << '[]' # :nodoc: because int[] should be highlighted as a type Chris@909: DIRECTIVES = %w[ Chris@909: abstract extends final implements native private protected public Chris@909: static strictfp synchronized throws transient volatile Chris@909: ] # :nodoc: Chris@909: Chris@909: IDENT_KIND = WordList.new(:ident). Chris@909: add(KEYWORDS, :keyword). Chris@909: add(RESERVED, :reserved). Chris@909: add(CONSTANTS, :predefined_constant). Chris@909: add(MAGIC_VARIABLES, :local_variable). Chris@909: add(TYPES, :type). Chris@909: add(BuiltinTypes::List, :predefined_type). Chris@909: add(BuiltinTypes::List.select { |builtin| builtin[/(Error|Exception)$/] }, :exception). Chris@909: add(DIRECTIVES, :directive) # :nodoc: Chris@909: Chris@909: ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc: Chris@909: UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc: Chris@909: STRING_CONTENT_PATTERN = { Chris@909: "'" => /[^\\']+/, Chris@909: '"' => /[^\\"]+/, Chris@909: '/' => /[^\\\/]+/, Chris@909: } # :nodoc: Chris@909: IDENT = /[a-zA-Z_][A-Za-z_0-9]*/ # :nodoc: Chris@909: Chris@909: protected Chris@909: Chris@909: def scan_tokens encoder, options Chris@909: Chris@909: state = :initial Chris@909: string_delimiter = nil Chris@909: package_name_expected = false Chris@909: class_name_follows = false Chris@909: last_token_dot = false Chris@909: Chris@909: until eos? Chris@909: Chris@909: case state Chris@909: Chris@909: when :initial Chris@909: Chris@909: if match = scan(/ \s+ | \\\n /x) Chris@909: encoder.text_token match, :space Chris@909: next Chris@909: Chris@909: elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) Chris@909: encoder.text_token match, :comment Chris@909: next Chris@909: Chris@909: elsif package_name_expected && match = scan(/ #{IDENT} (?: \. #{IDENT} )* /ox) Chris@909: encoder.text_token match, package_name_expected Chris@909: Chris@909: elsif match = scan(/ #{IDENT} | \[\] /ox) Chris@909: kind = IDENT_KIND[match] Chris@909: if last_token_dot Chris@909: kind = :ident Chris@909: elsif class_name_follows Chris@909: kind = :class Chris@909: class_name_follows = false Chris@909: else Chris@909: case match Chris@909: when 'import' Chris@909: package_name_expected = :include Chris@909: when 'package' Chris@909: package_name_expected = :namespace Chris@909: when 'class', 'interface' Chris@909: class_name_follows = true Chris@909: end Chris@909: end Chris@909: encoder.text_token match, kind Chris@909: Chris@909: elsif match = scan(/ \.(?!\d) | [,?:()\[\]}] | -- | \+\+ | && | \|\| | \*\*=? | [-+*\/%^~&|<>=!]=? | <<>>?=? /x) Chris@909: encoder.text_token match, :operator Chris@909: Chris@909: elsif match = scan(/;/) Chris@909: package_name_expected = false Chris@909: encoder.text_token match, :operator Chris@909: Chris@909: elsif match = scan(/\{/) Chris@909: class_name_follows = false Chris@909: encoder.text_token match, :operator Chris@909: Chris@909: elsif check(/[\d.]/) Chris@909: if match = scan(/0[xX][0-9A-Fa-f]+/) Chris@909: encoder.text_token match, :hex Chris@909: elsif match = scan(/(?>0[0-7]+)(?![89.eEfF])/) Chris@909: encoder.text_token match, :octal Chris@909: elsif match = scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/) Chris@909: encoder.text_token match, :float Chris@909: elsif match = scan(/\d+[lL]?/) Chris@909: encoder.text_token match, :integer Chris@909: end Chris@909: Chris@909: elsif match = scan(/["']/) Chris@909: state = :string Chris@909: encoder.begin_group state Chris@909: string_delimiter = match Chris@909: encoder.text_token match, :delimiter Chris@909: Chris@909: elsif match = scan(/ @ #{IDENT} /ox) Chris@909: encoder.text_token match, :annotation Chris@909: Chris@909: else Chris@909: encoder.text_token getch, :error Chris@909: Chris@909: end Chris@909: Chris@909: when :string Chris@909: if match = scan(STRING_CONTENT_PATTERN[string_delimiter]) Chris@909: encoder.text_token match, :content Chris@909: elsif match = scan(/["'\/]/) Chris@909: encoder.text_token match, :delimiter Chris@909: encoder.end_group state Chris@909: state = :initial Chris@909: string_delimiter = nil Chris@909: elsif state == :string && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)) Chris@909: if string_delimiter == "'" && !(match == "\\\\" || match == "\\'") Chris@909: encoder.text_token match, :content Chris@909: else Chris@909: encoder.text_token match, :char Chris@909: end Chris@909: elsif match = scan(/\\./m) Chris@909: encoder.text_token match, :content Chris@909: elsif match = scan(/ \\ | $ /x) Chris@909: encoder.end_group state Chris@909: state = :initial Chris@909: encoder.text_token match, :error Chris@909: else Chris@909: raise_inspect "else case \" reached; %p not handled." % peek(1), encoder Chris@909: end Chris@909: Chris@909: else Chris@909: raise_inspect 'Unknown state', encoder Chris@909: Chris@909: end Chris@909: Chris@909: last_token_dot = match == '.' Chris@909: Chris@909: end Chris@909: Chris@909: if state == :string Chris@909: encoder.end_group state Chris@909: end Chris@909: Chris@909: encoder Chris@909: end Chris@909: Chris@909: end Chris@909: Chris@909: end Chris@909: end