Chris@0: module CodeRay Chris@0: module Scanners Chris@0: Chris@0: load :java Chris@0: Chris@0: class Groovy < Java Chris@0: Chris@0: include Streamable Chris@0: register_for :groovy Chris@0: Chris@0: # TODO: Check this! Chris@0: GROOVY_KEYWORDS = %w[ Chris@0: as assert def in Chris@0: ] Chris@0: KEYWORDS_EXPECTING_VALUE = WordList.new.add %w[ Chris@0: case instanceof new return throw typeof while as assert in Chris@0: ] Chris@0: GROOVY_MAGIC_VARIABLES = %w[ it ] Chris@0: Chris@0: IDENT_KIND = Java::IDENT_KIND.dup. Chris@0: add(GROOVY_KEYWORDS, :keyword). Chris@0: add(GROOVY_MAGIC_VARIABLES, :local_variable) Chris@0: Chris@0: ESCAPE = / [bfnrtv$\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x Chris@0: UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x # no 4-byte unicode chars? U[a-fA-F0-9]{8} Chris@0: REGEXP_ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} | \d | [bBdDsSwW\/] /x Chris@0: Chris@0: # TODO: interpretation inside ', ", / Chris@0: STRING_CONTENT_PATTERN = { Chris@0: "'" => /(?>\\[^\\'\n]+|[^\\'\n]+)+/, Chris@0: '"' => /[^\\$"\n]+/, Chris@0: "'''" => /(?>[^\\']+|'(?!''))+/, Chris@0: '"""' => /(?>[^\\$"]+|"(?!""))+/, Chris@0: '/' => /[^\\$\/\n]+/, Chris@0: } Chris@0: Chris@0: def scan_tokens tokens, options Chris@0: Chris@0: state = :initial Chris@0: inline_block_stack = [] Chris@0: inline_block_paren_depth = nil Chris@0: string_delimiter = nil Chris@0: import_clause = class_name_follows = last_token = after_def = false Chris@0: value_expected = true Chris@0: Chris@0: until eos? Chris@0: Chris@0: kind = nil Chris@0: match = nil Chris@0: Chris@0: case state Chris@0: Chris@0: when :initial Chris@0: Chris@0: if match = scan(/ \s+ | \\\n /x) Chris@0: tokens << [match, :space] Chris@0: if match.index ?\n Chris@0: import_clause = after_def = false Chris@0: value_expected = true unless value_expected Chris@0: end Chris@0: next Chris@0: Chris@0: elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) Chris@0: value_expected = true Chris@0: after_def = false Chris@0: kind = :comment Chris@0: Chris@0: elsif bol? && scan(/ \#!.* /x) Chris@0: kind = :doctype Chris@0: Chris@0: elsif import_clause && scan(/ (?!as) #{IDENT} (?: \. #{IDENT} )* (?: \.\* )? /ox) Chris@0: after_def = value_expected = false Chris@0: kind = :include Chris@0: Chris@0: elsif match = scan(/ #{IDENT} | \[\] /ox) Chris@0: kind = IDENT_KIND[match] Chris@0: value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match] Chris@0: if last_token == '.' Chris@0: kind = :ident Chris@0: elsif class_name_follows Chris@0: kind = :class Chris@0: class_name_follows = false Chris@0: elsif after_def && check(/\s*[({]/) Chris@0: kind = :method Chris@0: after_def = false Chris@0: elsif kind == :ident && last_token != '?' && check(/:/) Chris@0: kind = :key Chris@0: else Chris@0: class_name_follows = true if match == 'class' || (import_clause && match == 'as') Chris@0: import_clause = match == 'import' Chris@0: after_def = true if match == 'def' Chris@0: end Chris@0: Chris@0: elsif scan(/;/) Chris@0: import_clause = after_def = false Chris@0: value_expected = true Chris@0: kind = :operator Chris@0: Chris@0: elsif scan(/\{/) Chris@0: class_name_follows = after_def = false Chris@0: value_expected = true Chris@0: kind = :operator Chris@0: if !inline_block_stack.empty? Chris@0: inline_block_paren_depth += 1 Chris@0: end Chris@0: Chris@0: # TODO: ~'...', ~"..." and ~/.../ style regexps Chris@0: elsif match = scan(/ \.\.] | \+\+ | Chris@0: && | \|\| | \*\*=? | ==?~ | <=?>? | [-+*%^~&|>=!]=? | <<>>?=? /x) Chris@0: value_expected = true Chris@0: value_expected = :regexp if match == '~' Chris@0: after_def = false Chris@0: kind = :operator Chris@0: Chris@0: elsif match = scan(/ [)\]}] /x) Chris@0: value_expected = after_def = false Chris@0: if !inline_block_stack.empty? && match == '}' Chris@0: inline_block_paren_depth -= 1 Chris@0: if inline_block_paren_depth == 0 # closing brace of inline block reached Chris@0: tokens << [match, :inline_delimiter] Chris@0: tokens << [:close, :inline] Chris@0: state, string_delimiter, inline_block_paren_depth = inline_block_stack.pop Chris@0: next Chris@0: end Chris@0: end Chris@0: kind = :operator Chris@0: Chris@0: elsif check(/[\d.]/) Chris@0: after_def = value_expected = false Chris@0: if scan(/0[xX][0-9A-Fa-f]+/) Chris@0: kind = :hex Chris@0: elsif scan(/(?>0[0-7]+)(?![89.eEfF])/) Chris@0: kind = :oct Chris@0: elsif scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/) Chris@0: kind = :float Chris@0: elsif scan(/\d+[lLgG]?/) Chris@0: kind = :integer Chris@0: end Chris@0: Chris@0: elsif match = scan(/'''|"""/) Chris@0: after_def = value_expected = false Chris@0: state = :multiline_string Chris@0: tokens << [:open, :string] Chris@0: string_delimiter = match Chris@0: kind = :delimiter Chris@0: Chris@0: # TODO: record.'name' Chris@0: elsif match = scan(/["']/) Chris@0: after_def = value_expected = false Chris@0: state = match == '/' ? :regexp : :string Chris@0: tokens << [:open, state] Chris@0: string_delimiter = match Chris@0: kind = :delimiter Chris@0: Chris@0: elsif value_expected && (match = scan(/\//)) Chris@0: after_def = value_expected = false Chris@0: tokens << [:open, :regexp] Chris@0: state = :regexp Chris@0: string_delimiter = '/' Chris@0: kind = :delimiter Chris@0: Chris@0: elsif scan(/ @ #{IDENT} /ox) Chris@0: after_def = value_expected = false Chris@0: kind = :annotation Chris@0: Chris@0: elsif scan(/\//) Chris@0: after_def = false Chris@0: value_expected = true Chris@0: kind = :operator Chris@0: Chris@0: else Chris@0: getch Chris@0: kind = :error Chris@0: Chris@0: end Chris@0: Chris@0: when :string, :regexp, :multiline_string Chris@0: if scan(STRING_CONTENT_PATTERN[string_delimiter]) Chris@0: kind = :content Chris@0: Chris@0: elsif match = scan(state == :multiline_string ? /'''|"""/ : /["'\/]/) Chris@0: tokens << [match, :delimiter] Chris@0: if state == :regexp Chris@0: # TODO: regexp modifiers? s, m, x, i? Chris@0: modifiers = scan(/[ix]+/) Chris@0: tokens << [modifiers, :modifier] if modifiers && !modifiers.empty? Chris@0: end Chris@0: state = :string if state == :multiline_string Chris@0: tokens << [:close, state] Chris@0: string_delimiter = nil Chris@0: after_def = value_expected = false Chris@0: state = :initial Chris@0: next Chris@0: Chris@0: elsif (state == :string || state == :multiline_string) && Chris@0: (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)) Chris@0: if string_delimiter[0] == ?' && !(match == "\\\\" || match == "\\'") Chris@0: kind = :content Chris@0: else Chris@0: kind = :char Chris@0: end Chris@0: elsif state == :regexp && scan(/ \\ (?: #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox) Chris@0: kind = :char Chris@0: Chris@0: elsif match = scan(/ \$ #{IDENT} /mox) Chris@0: tokens << [:open, :inline] Chris@0: tokens << ['$', :inline_delimiter] Chris@0: match = match[1..-1] Chris@0: tokens << [match, IDENT_KIND[match]] Chris@0: tokens << [:close, :inline] Chris@0: next Chris@0: elsif match = scan(/ \$ \{ /x) Chris@0: tokens << [:open, :inline] Chris@0: tokens << ['${', :inline_delimiter] Chris@0: inline_block_stack << [state, string_delimiter, inline_block_paren_depth] Chris@0: inline_block_paren_depth = 1 Chris@0: state = :initial Chris@0: next Chris@0: Chris@0: elsif scan(/ \$ /mx) Chris@0: kind = :content Chris@0: Chris@0: elsif scan(/ \\. /mx) Chris@0: kind = :content Chris@0: Chris@0: elsif scan(/ \\ | \n /x) Chris@0: tokens << [:close, state] Chris@0: kind = :error Chris@0: after_def = value_expected = false Chris@0: state = :initial Chris@0: Chris@0: else Chris@0: raise_inspect "else case \" reached; %p not handled." % peek(1), tokens Chris@0: end Chris@0: Chris@0: else Chris@0: raise_inspect 'Unknown state', tokens Chris@0: Chris@0: end Chris@0: Chris@0: match ||= matched Chris@0: if $CODERAY_DEBUG and not kind Chris@0: raise_inspect 'Error token %p in line %d' % Chris@0: [[match, kind], line], tokens Chris@0: end Chris@0: raise_inspect 'Empty token', tokens unless match Chris@0: Chris@0: last_token = match unless [:space, :comment, :doctype].include? kind Chris@0: Chris@0: tokens << [match, kind] Chris@0: Chris@0: end Chris@0: Chris@0: if [:multiline_string, :string, :regexp].include? state Chris@0: tokens << [:close, state] Chris@0: end Chris@0: Chris@0: tokens Chris@0: end Chris@0: Chris@0: end Chris@0: Chris@0: end Chris@0: end