Chris@909: module CodeRay Chris@909: module Scanners Chris@909: Chris@909: load :java Chris@909: Chris@909: # Scanner for Groovy. Chris@909: class Groovy < Java Chris@909: Chris@909: register_for :groovy Chris@909: Chris@909: # TODO: check list of keywords Chris@909: GROOVY_KEYWORDS = %w[ Chris@909: as assert def in Chris@909: ] # :nodoc: Chris@909: KEYWORDS_EXPECTING_VALUE = WordList.new.add %w[ Chris@909: case instanceof new return throw typeof while as assert in Chris@909: ] # :nodoc: Chris@909: GROOVY_MAGIC_VARIABLES = %w[ it ] # :nodoc: Chris@909: Chris@909: IDENT_KIND = Java::IDENT_KIND.dup. Chris@909: add(GROOVY_KEYWORDS, :keyword). Chris@909: add(GROOVY_MAGIC_VARIABLES, :local_variable) # :nodoc: Chris@909: Chris@909: ESCAPE = / [bfnrtv$\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc: Chris@909: UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x # :nodoc: no 4-byte unicode chars? U[a-fA-F0-9]{8} Chris@909: REGEXP_ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} | \d | [bBdDsSwW\/] /x # :nodoc: Chris@909: Chris@909: # TODO: interpretation inside ', ", / Chris@909: STRING_CONTENT_PATTERN = { Chris@909: "'" => /(?>\\[^\\'\n]+|[^\\'\n]+)+/, Chris@909: '"' => /[^\\$"\n]+/, Chris@909: "'''" => /(?>[^\\']+|'(?!''))+/, Chris@909: '"""' => /(?>[^\\$"]+|"(?!""))+/, Chris@909: '/' => /[^\\$\/\n]+/, Chris@909: } # :nodoc: Chris@909: Chris@909: protected Chris@909: Chris@909: def scan_tokens encoder, options Chris@909: Chris@909: state = :initial Chris@909: inline_block_stack = [] Chris@909: inline_block_paren_depth = nil Chris@909: string_delimiter = nil Chris@909: import_clause = class_name_follows = last_token = after_def = false Chris@909: value_expected = true Chris@909: Chris@909: until eos? Chris@909: Chris@909: case state Chris@909: Chris@909: when :initial Chris@909: Chris@909: if match = scan(/ \s+ | \\\n /x) Chris@909: encoder.text_token match, :space Chris@909: if match.index ?\n Chris@909: import_clause = after_def = false Chris@909: value_expected = true unless value_expected Chris@909: end Chris@909: next Chris@909: Chris@909: elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) Chris@909: value_expected = true Chris@909: after_def = false Chris@909: encoder.text_token match, :comment Chris@909: Chris@909: elsif bol? && match = scan(/ \#!.* /x) Chris@909: encoder.text_token match, :doctype Chris@909: Chris@909: elsif import_clause && match = scan(/ (?!as) #{IDENT} (?: \. #{IDENT} )* (?: \.\* )? /ox) Chris@909: after_def = value_expected = false Chris@909: encoder.text_token match, :include Chris@909: Chris@909: elsif match = scan(/ #{IDENT} | \[\] /ox) Chris@909: kind = IDENT_KIND[match] Chris@909: value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match] Chris@909: if last_token == '.' Chris@909: kind = :ident Chris@909: elsif class_name_follows Chris@909: kind = :class Chris@909: class_name_follows = false Chris@909: elsif after_def && check(/\s*[({]/) Chris@909: kind = :method Chris@909: after_def = false Chris@909: elsif kind == :ident && last_token != '?' && check(/:/) Chris@909: kind = :key Chris@909: else Chris@909: class_name_follows = true if match == 'class' || (import_clause && match == 'as') Chris@909: import_clause = match == 'import' Chris@909: after_def = true if match == 'def' Chris@909: end Chris@909: encoder.text_token match, kind Chris@909: Chris@909: elsif match = scan(/;/) Chris@909: import_clause = after_def = false Chris@909: value_expected = true Chris@909: encoder.text_token match, :operator Chris@909: Chris@909: elsif match = scan(/\{/) Chris@909: class_name_follows = after_def = false Chris@909: value_expected = true Chris@909: encoder.text_token match, :operator Chris@909: if !inline_block_stack.empty? Chris@909: inline_block_paren_depth += 1 Chris@909: end Chris@909: Chris@909: # TODO: ~'...', ~"..." and ~/.../ style regexps Chris@909: elsif match = scan(/ \.\.] | \+\+ | Chris@909: && | \|\| | \*\*=? | ==?~ | <=?>? | [-+*%^~&|>=!]=? | <<>>?=? /x) Chris@909: value_expected = true Chris@909: value_expected = :regexp if match == '~' Chris@909: after_def = false Chris@909: encoder.text_token match, :operator Chris@909: Chris@909: elsif match = scan(/ [)\]}] /x) Chris@909: value_expected = after_def = false Chris@909: if !inline_block_stack.empty? && match == '}' Chris@909: inline_block_paren_depth -= 1 Chris@909: if inline_block_paren_depth == 0 # closing brace of inline block reached Chris@909: encoder.text_token match, :inline_delimiter Chris@909: encoder.end_group :inline Chris@909: state, string_delimiter, inline_block_paren_depth = inline_block_stack.pop Chris@909: next Chris@909: end Chris@909: end Chris@909: encoder.text_token match, :operator Chris@909: Chris@909: elsif check(/[\d.]/) Chris@909: after_def = value_expected = false Chris@909: if match = scan(/0[xX][0-9A-Fa-f]+/) Chris@909: encoder.text_token match, :hex Chris@909: elsif match = scan(/(?>0[0-7]+)(?![89.eEfF])/) Chris@909: encoder.text_token match, :octal Chris@909: elsif match = scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/) Chris@909: encoder.text_token match, :float Chris@909: elsif match = scan(/\d+[lLgG]?/) Chris@909: encoder.text_token match, :integer Chris@909: end Chris@909: Chris@909: elsif match = scan(/'''|"""/) Chris@909: after_def = value_expected = false Chris@909: state = :multiline_string Chris@909: encoder.begin_group :string Chris@909: string_delimiter = match Chris@909: encoder.text_token match, :delimiter Chris@909: Chris@909: # TODO: record.'name' syntax Chris@909: elsif match = scan(/["']/) Chris@909: after_def = value_expected = false Chris@909: state = match == '/' ? :regexp : :string Chris@909: encoder.begin_group state Chris@909: string_delimiter = match Chris@909: encoder.text_token match, :delimiter Chris@909: Chris@909: elsif value_expected && match = scan(/\//) Chris@909: after_def = value_expected = false Chris@909: encoder.begin_group :regexp Chris@909: state = :regexp Chris@909: string_delimiter = '/' Chris@909: encoder.text_token match, :delimiter Chris@909: Chris@909: elsif match = scan(/ @ #{IDENT} /ox) Chris@909: after_def = value_expected = false Chris@909: encoder.text_token match, :annotation Chris@909: Chris@909: elsif match = scan(/\//) Chris@909: after_def = false Chris@909: value_expected = true Chris@909: encoder.text_token match, :operator Chris@909: Chris@909: else Chris@909: encoder.text_token getch, :error Chris@909: Chris@909: end Chris@909: Chris@909: when :string, :regexp, :multiline_string Chris@909: if match = scan(STRING_CONTENT_PATTERN[string_delimiter]) Chris@909: encoder.text_token match, :content Chris@909: Chris@909: elsif match = scan(state == :multiline_string ? /'''|"""/ : /["'\/]/) Chris@909: encoder.text_token match, :delimiter Chris@909: if state == :regexp Chris@909: # TODO: regexp modifiers? s, m, x, i? Chris@909: modifiers = scan(/[ix]+/) Chris@909: encoder.text_token modifiers, :modifier if modifiers && !modifiers.empty? Chris@909: end Chris@909: state = :string if state == :multiline_string Chris@909: encoder.end_group state Chris@909: string_delimiter = nil Chris@909: after_def = value_expected = false Chris@909: state = :initial Chris@909: next Chris@909: Chris@909: elsif (state == :string || state == :multiline_string) && Chris@909: (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)) Chris@909: if string_delimiter[0] == ?' && !(match == "\\\\" || match == "\\'") Chris@909: encoder.text_token match, :content Chris@909: else Chris@909: encoder.text_token match, :char Chris@909: end Chris@909: elsif state == :regexp && match = scan(/ \\ (?: #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox) Chris@909: encoder.text_token match, :char Chris@909: Chris@909: elsif match = scan(/ \$ #{IDENT} /mox) Chris@909: encoder.begin_group :inline Chris@909: encoder.text_token '$', :inline_delimiter Chris@909: match = match[1..-1] Chris@909: encoder.text_token match, IDENT_KIND[match] Chris@909: encoder.end_group :inline Chris@909: next Chris@909: elsif match = scan(/ \$ \{ /x) Chris@909: encoder.begin_group :inline Chris@909: encoder.text_token match, :inline_delimiter Chris@909: inline_block_stack << [state, string_delimiter, inline_block_paren_depth] Chris@909: inline_block_paren_depth = 1 Chris@909: state = :initial Chris@909: next Chris@909: Chris@909: elsif match = scan(/ \$ /mx) Chris@909: encoder.text_token match, :content Chris@909: Chris@909: elsif match = scan(/ \\. /mx) Chris@909: encoder.text_token match, :content # TODO: Shouldn't this be :error? Chris@909: Chris@909: elsif match = scan(/ \\ | \n /x) Chris@909: encoder.end_group state Chris@909: encoder.text_token match, :error Chris@909: after_def = value_expected = false Chris@909: state = :initial Chris@909: Chris@909: else Chris@909: raise_inspect "else case \" reached; %p not handled." % peek(1), encoder Chris@909: Chris@909: end Chris@909: Chris@909: else Chris@909: raise_inspect 'Unknown state', encoder Chris@909: Chris@909: end Chris@909: Chris@909: last_token = match unless [:space, :comment, :doctype].include? kind Chris@909: Chris@909: end Chris@909: Chris@909: if [:multiline_string, :string, :regexp].include? state Chris@909: encoder.end_group state Chris@909: end Chris@909: Chris@909: encoder Chris@909: end Chris@909: Chris@909: end Chris@909: Chris@909: end Chris@909: end