Chris@909: module CodeRay Chris@909: module Scanners Chris@909: Chris@909: # Scanner for JavaScript. Chris@909: # Chris@909: # Aliases: +ecmascript+, +ecma_script+, +javascript+ Chris@909: class JavaScript < Scanner Chris@909: Chris@909: register_for :java_script Chris@909: file_extension 'js' Chris@909: Chris@909: # The actual JavaScript keywords. Chris@909: KEYWORDS = %w[ Chris@909: break case catch continue default delete do else Chris@909: finally for function if in instanceof new Chris@909: return switch throw try typeof var void while with Chris@909: ] # :nodoc: Chris@909: PREDEFINED_CONSTANTS = %w[ Chris@909: false null true undefined NaN Infinity Chris@909: ] # :nodoc: Chris@909: Chris@909: MAGIC_VARIABLES = %w[ this arguments ] # :nodoc: arguments was introduced in JavaScript 1.4 Chris@909: Chris@909: KEYWORDS_EXPECTING_VALUE = WordList.new.add %w[ Chris@909: case delete in instanceof new return throw typeof with Chris@909: ] # :nodoc: Chris@909: Chris@909: # Reserved for future use. Chris@909: RESERVED_WORDS = %w[ Chris@909: abstract boolean byte char class debugger double enum export extends Chris@909: final float goto implements import int interface long native package Chris@909: private protected public short static super synchronized throws transient Chris@909: volatile Chris@909: ] # :nodoc: Chris@909: Chris@909: IDENT_KIND = WordList.new(:ident). Chris@909: add(RESERVED_WORDS, :reserved). Chris@909: add(PREDEFINED_CONSTANTS, :predefined_constant). Chris@909: add(MAGIC_VARIABLES, :local_variable). Chris@909: add(KEYWORDS, :keyword) # :nodoc: Chris@909: Chris@909: ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc: Chris@909: UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc: Chris@909: REGEXP_ESCAPE = / [bBdDsSwW] /x # :nodoc: Chris@909: STRING_CONTENT_PATTERN = { Chris@909: "'" => /[^\\']+/, Chris@909: '"' => /[^\\"]+/, Chris@909: '/' => /[^\\\/]+/, Chris@909: } # :nodoc: Chris@909: KEY_CHECK_PATTERN = { Chris@909: "'" => / (?> [^\\']* (?: \\. [^\\']* )* ) ' \s* : /mx, Chris@909: '"' => / (?> [^\\"]* (?: \\. [^\\"]* )* ) " \s* : /mx, Chris@909: } # :nodoc: Chris@909: Chris@909: protected Chris@909: Chris@909: def scan_tokens encoder, options Chris@909: Chris@909: state = :initial Chris@909: string_delimiter = nil Chris@909: value_expected = true Chris@909: key_expected = false Chris@909: function_expected = false Chris@909: Chris@909: until eos? Chris@909: Chris@909: case state Chris@909: Chris@909: when :initial Chris@909: Chris@909: if match = scan(/ \s+ | \\\n /x) Chris@909: value_expected = true if !value_expected && match.index(?\n) Chris@909: encoder.text_token match, :space Chris@909: Chris@909: elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) Chris@909: value_expected = true Chris@909: encoder.text_token match, :comment Chris@909: Chris@909: elsif check(/\.?\d/) Chris@909: key_expected = value_expected = false Chris@909: if match = scan(/0[xX][0-9A-Fa-f]+/) Chris@909: encoder.text_token match, :hex Chris@909: elsif match = scan(/(?>0[0-7]+)(?![89.eEfF])/) Chris@909: encoder.text_token match, :octal Chris@909: elsif match = scan(/\d+[fF]|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) Chris@909: encoder.text_token match, :float Chris@909: elsif match = scan(/\d+/) Chris@909: encoder.text_token match, :integer Chris@909: end Chris@909: Chris@909: elsif value_expected && match = scan(/<([[:alpha:]]\w*) (?: [^\/>]*\/> | .*?<\/\1>)/xim) Chris@909: # TODO: scan over nested tags Chris@909: xml_scanner.tokenize match, :tokens => encoder Chris@909: value_expected = false Chris@909: next Chris@909: Chris@909: elsif match = scan(/ [-+*=<>?:;,!&^|(\[{~%]+ | \.(?!\d) /x) Chris@909: value_expected = true Chris@909: last_operator = match[-1] Chris@909: key_expected = (last_operator == ?{) || (last_operator == ?,) Chris@909: function_expected = false Chris@909: encoder.text_token match, :operator Chris@909: Chris@909: elsif match = scan(/ [)\]}]+ /x) Chris@909: function_expected = key_expected = value_expected = false Chris@909: encoder.text_token match, :operator Chris@909: Chris@909: elsif match = scan(/ [$a-zA-Z_][A-Za-z_0-9$]* /x) Chris@909: kind = IDENT_KIND[match] Chris@909: value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match] Chris@909: # TODO: labels Chris@909: if kind == :ident Chris@909: if match.index(?$) # $ allowed inside an identifier Chris@909: kind = :predefined Chris@909: elsif function_expected Chris@909: kind = :function Chris@909: elsif check(/\s*[=:]\s*function\b/) Chris@909: kind = :function Chris@909: elsif key_expected && check(/\s*:/) Chris@909: kind = :key Chris@909: end Chris@909: end Chris@909: function_expected = (kind == :keyword) && (match == 'function') Chris@909: key_expected = false Chris@909: encoder.text_token match, kind Chris@909: Chris@909: elsif match = scan(/["']/) Chris@909: if key_expected && check(KEY_CHECK_PATTERN[match]) Chris@909: state = :key Chris@909: else Chris@909: state = :string Chris@909: end Chris@909: encoder.begin_group state Chris@909: string_delimiter = match Chris@909: encoder.text_token match, :delimiter Chris@909: Chris@909: elsif value_expected && (match = scan(/\//)) Chris@909: encoder.begin_group :regexp Chris@909: state = :regexp Chris@909: string_delimiter = '/' Chris@909: encoder.text_token match, :delimiter Chris@909: Chris@909: elsif match = scan(/ \/ /x) Chris@909: value_expected = true Chris@909: key_expected = false Chris@909: encoder.text_token match, :operator Chris@909: Chris@909: else Chris@909: encoder.text_token getch, :error Chris@909: Chris@909: end Chris@909: Chris@909: when :string, :regexp, :key Chris@909: if match = scan(STRING_CONTENT_PATTERN[string_delimiter]) Chris@909: encoder.text_token match, :content Chris@909: elsif match = scan(/["'\/]/) Chris@909: encoder.text_token match, :delimiter Chris@909: if state == :regexp Chris@909: modifiers = scan(/[gim]+/) Chris@909: encoder.text_token modifiers, :modifier if modifiers && !modifiers.empty? Chris@909: end Chris@909: encoder.end_group state Chris@909: string_delimiter = nil Chris@909: key_expected = value_expected = false Chris@909: state = :initial Chris@909: elsif state != :regexp && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)) Chris@909: if string_delimiter == "'" && !(match == "\\\\" || match == "\\'") Chris@909: encoder.text_token match, :content Chris@909: else Chris@909: encoder.text_token match, :char Chris@909: end Chris@909: elsif state == :regexp && match = scan(/ \\ (?: #{ESCAPE} | #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox) Chris@909: encoder.text_token match, :char Chris@909: elsif match = scan(/\\./m) Chris@909: encoder.text_token match, :content Chris@909: elsif match = scan(/ \\ | $ /x) Chris@909: encoder.end_group state Chris@909: encoder.text_token match, :error Chris@909: key_expected = value_expected = false Chris@909: state = :initial Chris@909: else Chris@909: raise_inspect "else case \" reached; %p not handled." % peek(1), encoder Chris@909: end Chris@909: Chris@909: else Chris@909: raise_inspect 'Unknown state', encoder Chris@909: Chris@909: end Chris@909: Chris@909: end Chris@909: Chris@909: if [:string, :regexp].include? state Chris@909: encoder.end_group state Chris@909: end Chris@909: Chris@909: encoder Chris@909: end Chris@909: Chris@909: protected Chris@909: Chris@909: def reset_instance Chris@909: super Chris@909: @xml_scanner.reset if defined? @xml_scanner Chris@909: end Chris@909: Chris@909: def xml_scanner Chris@909: @xml_scanner ||= CodeRay.scanner :xml, :tokens => @tokens, :keep_tokens => true, :keep_state => false Chris@909: end Chris@909: Chris@909: end Chris@909: Chris@909: end Chris@909: end