Chris@210: module CodeRay Chris@210: module Scanners Chris@210: Chris@210: class JavaScript < Scanner Chris@210: Chris@210: include Streamable Chris@210: Chris@210: register_for :java_script Chris@210: file_extension 'js' Chris@210: Chris@210: # The actual JavaScript keywords. Chris@210: KEYWORDS = %w[ Chris@210: break case catch continue default delete do else Chris@210: finally for function if in instanceof new Chris@210: return switch throw try typeof var void while with Chris@210: ] Chris@210: PREDEFINED_CONSTANTS = %w[ Chris@210: false null true undefined Chris@210: ] Chris@210: Chris@210: MAGIC_VARIABLES = %w[ this arguments ] # arguments was introduced in JavaScript 1.4 Chris@210: Chris@210: KEYWORDS_EXPECTING_VALUE = WordList.new.add %w[ Chris@210: case delete in instanceof new return throw typeof with Chris@210: ] Chris@210: Chris@210: # Reserved for future use. Chris@210: RESERVED_WORDS = %w[ Chris@210: abstract boolean byte char class debugger double enum export extends Chris@210: final float goto implements import int interface long native package Chris@210: private protected public short static super synchronized throws transient Chris@210: volatile Chris@210: ] Chris@210: Chris@210: IDENT_KIND = WordList.new(:ident). Chris@210: add(RESERVED_WORDS, :reserved). Chris@210: add(PREDEFINED_CONSTANTS, :pre_constant). Chris@210: add(MAGIC_VARIABLES, :local_variable). Chris@210: add(KEYWORDS, :keyword) Chris@210: Chris@210: ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x Chris@210: UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x Chris@210: REGEXP_ESCAPE = / [bBdDsSwW] /x Chris@210: STRING_CONTENT_PATTERN = { Chris@210: "'" => /[^\\']+/, Chris@210: '"' => /[^\\"]+/, Chris@210: '/' => /[^\\\/]+/, Chris@210: } Chris@210: KEY_CHECK_PATTERN = { Chris@210: "'" => / (?> [^\\']* (?: \\. [^\\']* )* ) ' \s* : /mx, Chris@210: '"' => / (?> [^\\"]* (?: \\. [^\\"]* )* ) " \s* : /mx, Chris@210: } Chris@210: Chris@210: def scan_tokens tokens, options Chris@210: Chris@210: state = :initial Chris@210: string_delimiter = nil Chris@210: value_expected = true Chris@210: key_expected = false Chris@210: function_expected = false Chris@210: Chris@210: until eos? Chris@210: Chris@210: kind = nil Chris@210: match = nil Chris@210: Chris@210: case state Chris@210: Chris@210: when :initial Chris@210: Chris@210: if match = scan(/ \s+ | \\\n /x) Chris@210: value_expected = true if !value_expected && match.index(?\n) Chris@210: tokens << [match, :space] Chris@210: next Chris@210: Chris@210: elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) Chris@210: value_expected = true Chris@210: kind = :comment Chris@210: Chris@210: elsif check(/\.?\d/) Chris@210: key_expected = value_expected = false Chris@210: if scan(/0[xX][0-9A-Fa-f]+/) Chris@210: kind = :hex Chris@210: elsif scan(/(?>0[0-7]+)(?![89.eEfF])/) Chris@210: kind = :oct Chris@210: elsif scan(/\d+[fF]|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) Chris@210: kind = :float Chris@210: elsif scan(/\d+/) Chris@210: kind = :integer Chris@210: end Chris@210: Chris@210: elsif value_expected && match = scan(/<([[:alpha:]]\w*) (?: [^\/>]*\/> | .*?<\/\1>)/xim) Chris@210: # FIXME: scan over nested tags Chris@210: xml_scanner.tokenize match Chris@210: value_expected = false Chris@210: next Chris@210: Chris@210: elsif match = scan(/ [-+*=<>?:;,!&^|(\[{~%]+ | \.(?!\d) /x) Chris@210: value_expected = true Chris@210: last_operator = match[-1] Chris@210: key_expected = (last_operator == ?{) || (last_operator == ?,) Chris@210: function_expected = false Chris@210: kind = :operator Chris@210: Chris@210: elsif scan(/ [)\]}]+ /x) Chris@210: function_expected = key_expected = value_expected = false Chris@210: kind = :operator Chris@210: Chris@210: elsif match = scan(/ [$a-zA-Z_][A-Za-z_0-9$]* /x) Chris@210: kind = IDENT_KIND[match] Chris@210: value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match] Chris@210: # TODO: labels Chris@210: if kind == :ident Chris@210: if match.index(?$) # $ allowed inside an identifier Chris@210: kind = :predefined Chris@210: elsif function_expected Chris@210: kind = :function Chris@210: elsif check(/\s*[=:]\s*function\b/) Chris@210: kind = :function Chris@210: elsif key_expected && check(/\s*:/) Chris@210: kind = :key Chris@210: end Chris@210: end Chris@210: function_expected = (kind == :keyword) && (match == 'function') Chris@210: key_expected = false Chris@210: Chris@210: elsif match = scan(/["']/) Chris@210: if key_expected && check(KEY_CHECK_PATTERN[match]) Chris@210: state = :key Chris@210: else Chris@210: state = :string Chris@210: end Chris@210: tokens << [:open, state] Chris@210: string_delimiter = match Chris@210: kind = :delimiter Chris@210: Chris@210: elsif value_expected && (match = scan(/\/(?=\S)/)) Chris@210: tokens << [:open, :regexp] Chris@210: state = :regexp Chris@210: string_delimiter = '/' Chris@210: kind = :delimiter Chris@210: Chris@210: elsif scan(/ \/ /x) Chris@210: value_expected = true Chris@210: key_expected = false Chris@210: kind = :operator Chris@210: Chris@210: else Chris@210: getch Chris@210: kind = :error Chris@210: Chris@210: end Chris@210: Chris@210: when :string, :regexp, :key Chris@210: if scan(STRING_CONTENT_PATTERN[string_delimiter]) Chris@210: kind = :content Chris@210: elsif match = scan(/["'\/]/) Chris@210: tokens << [match, :delimiter] Chris@210: if state == :regexp Chris@210: modifiers = scan(/[gim]+/) Chris@210: tokens << [modifiers, :modifier] if modifiers && !modifiers.empty? Chris@210: end Chris@210: tokens << [:close, state] Chris@210: string_delimiter = nil Chris@210: key_expected = value_expected = false Chris@210: state = :initial Chris@210: next Chris@210: elsif state != :regexp && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)) Chris@210: if string_delimiter == "'" && !(match == "\\\\" || match == "\\'") Chris@210: kind = :content Chris@210: else Chris@210: kind = :char Chris@210: end Chris@210: elsif state == :regexp && scan(/ \\ (?: #{ESCAPE} | #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox) Chris@210: kind = :char Chris@210: elsif scan(/\\./m) Chris@210: kind = :content Chris@210: elsif scan(/ \\ | $ /x) Chris@210: tokens << [:close, state] Chris@210: kind = :error Chris@210: key_expected = value_expected = false Chris@210: state = :initial Chris@210: else Chris@210: raise_inspect "else case \" reached; %p not handled." % peek(1), tokens Chris@210: end Chris@210: Chris@210: else Chris@210: raise_inspect 'Unknown state', tokens Chris@210: Chris@210: end Chris@210: Chris@210: match ||= matched Chris@210: if $CODERAY_DEBUG and not kind Chris@210: raise_inspect 'Error token %p in line %d' % Chris@210: [[match, kind], line], tokens Chris@210: end Chris@210: raise_inspect 'Empty token', tokens unless match Chris@210: Chris@210: tokens << [match, kind] Chris@210: Chris@210: end Chris@210: Chris@210: if [:string, :regexp].include? state Chris@210: tokens << [:close, state] Chris@210: end Chris@210: Chris@210: tokens Chris@210: end Chris@210: Chris@210: protected Chris@210: Chris@210: def reset_instance Chris@210: super Chris@210: @xml_scanner.reset if defined? @xml_scanner Chris@210: end Chris@210: Chris@210: def xml_scanner Chris@210: @xml_scanner ||= CodeRay.scanner :xml, :tokens => @tokens, :keep_tokens => true, :keep_state => false Chris@210: end Chris@210: Chris@210: end Chris@210: Chris@210: end Chris@210: end