annotate vendor/gems/coderay-0.9.7/lib/coderay/scanners/java_script-0.9.6.rb @ 855:7294e8db2515 bug_162

Close obsolete branch bug_162
author Chris Cannam
date Thu, 14 Jul 2011 11:59:19 +0100
parents 0579821a129a
children
rev   line source
Chris@210 1 module CodeRay
Chris@210 2 module Scanners
Chris@210 3
Chris@210 4 class JavaScript < Scanner
Chris@210 5
Chris@210 6 include Streamable
Chris@210 7
Chris@210 8 register_for :java_script
Chris@210 9 file_extension 'js'
Chris@210 10
Chris@210 11 # The actual JavaScript keywords.
Chris@210 12 KEYWORDS = %w[
Chris@210 13 break case catch continue default delete do else
Chris@210 14 finally for function if in instanceof new
Chris@210 15 return switch throw try typeof var void while with
Chris@210 16 ]
Chris@210 17 PREDEFINED_CONSTANTS = %w[
Chris@210 18 false null true undefined
Chris@210 19 ]
Chris@210 20
Chris@210 21 MAGIC_VARIABLES = %w[ this arguments ] # arguments was introduced in JavaScript 1.4
Chris@210 22
Chris@210 23 KEYWORDS_EXPECTING_VALUE = WordList.new.add %w[
Chris@210 24 case delete in instanceof new return throw typeof with
Chris@210 25 ]
Chris@210 26
Chris@210 27 # Reserved for future use.
Chris@210 28 RESERVED_WORDS = %w[
Chris@210 29 abstract boolean byte char class debugger double enum export extends
Chris@210 30 final float goto implements import int interface long native package
Chris@210 31 private protected public short static super synchronized throws transient
Chris@210 32 volatile
Chris@210 33 ]
Chris@210 34
Chris@210 35 IDENT_KIND = WordList.new(:ident).
Chris@210 36 add(RESERVED_WORDS, :reserved).
Chris@210 37 add(PREDEFINED_CONSTANTS, :pre_constant).
Chris@210 38 add(MAGIC_VARIABLES, :local_variable).
Chris@210 39 add(KEYWORDS, :keyword)
Chris@210 40
Chris@210 41 ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
Chris@210 42 UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
Chris@210 43 REGEXP_ESCAPE = / [bBdDsSwW] /x
Chris@210 44 STRING_CONTENT_PATTERN = {
Chris@210 45 "'" => /[^\\']+/,
Chris@210 46 '"' => /[^\\"]+/,
Chris@210 47 '/' => /[^\\\/]+/,
Chris@210 48 }
Chris@210 49 KEY_CHECK_PATTERN = {
Chris@210 50 "'" => / [^\\']* (?: \\.? [^\\']* )* '? \s* : /x,
Chris@210 51 '"' => / [^\\"]* (?: \\.? [^\\"]* )* "? \s* : /x,
Chris@210 52 }
Chris@210 53
Chris@210 54 def scan_tokens tokens, options
Chris@210 55
Chris@210 56 state = :initial
Chris@210 57 string_delimiter = nil
Chris@210 58 value_expected = true
Chris@210 59 key_expected = false
Chris@210 60 function_expected = false
Chris@210 61
Chris@210 62 until eos?
Chris@210 63
Chris@210 64 kind = nil
Chris@210 65 match = nil
Chris@210 66
Chris@210 67 case state
Chris@210 68
Chris@210 69 when :initial
Chris@210 70
Chris@210 71 if match = scan(/ \s+ | \\\n /x)
Chris@210 72 value_expected = true if !value_expected && match.index(?\n)
Chris@210 73 tokens << [match, :space]
Chris@210 74 next
Chris@210 75
Chris@210 76 elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
Chris@210 77 value_expected = true
Chris@210 78 kind = :comment
Chris@210 79
Chris@210 80 elsif check(/\.?\d/)
Chris@210 81 key_expected = value_expected = false
Chris@210 82 if scan(/0[xX][0-9A-Fa-f]+/)
Chris@210 83 kind = :hex
Chris@210 84 elsif scan(/(?>0[0-7]+)(?![89.eEfF])/)
Chris@210 85 kind = :oct
Chris@210 86 elsif scan(/\d+[fF]|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
Chris@210 87 kind = :float
Chris@210 88 elsif scan(/\d+/)
Chris@210 89 kind = :integer
Chris@210 90 end
Chris@210 91
Chris@210 92 elsif value_expected && match = scan(/<([[:alpha:]]\w*) (?: [^\/>]*\/> | .*?<\/\1>)/xim)
Chris@210 93 # FIXME: scan over nested tags
Chris@210 94 xml_scanner.tokenize match
Chris@210 95 value_expected = false
Chris@210 96 next
Chris@210 97
Chris@210 98 elsif match = scan(/ [-+*=<>?:;,!&^|(\[{~%]+ | \.(?!\d) /x)
Chris@210 99 value_expected = true
Chris@210 100 last_operator = match[-1]
Chris@210 101 key_expected = (last_operator == ?{) || (last_operator == ?,)
Chris@210 102 function_expected = false
Chris@210 103 kind = :operator
Chris@210 104
Chris@210 105 elsif scan(/ [)\]}]+ /x)
Chris@210 106 function_expected = key_expected = value_expected = false
Chris@210 107 kind = :operator
Chris@210 108
Chris@210 109 elsif match = scan(/ [$a-zA-Z_][A-Za-z_0-9$]* /x)
Chris@210 110 kind = IDENT_KIND[match]
Chris@210 111 value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match]
Chris@210 112 # TODO: labels
Chris@210 113 if kind == :ident
Chris@210 114 if match.index(?$) # $ allowed inside an identifier
Chris@210 115 kind = :predefined
Chris@210 116 elsif function_expected
Chris@210 117 kind = :function
Chris@210 118 elsif check(/\s*[=:]\s*function\b/)
Chris@210 119 kind = :function
Chris@210 120 elsif key_expected && check(/\s*:/)
Chris@210 121 kind = :key
Chris@210 122 end
Chris@210 123 end
Chris@210 124 function_expected = (kind == :keyword) && (match == 'function')
Chris@210 125 key_expected = false
Chris@210 126
Chris@210 127 elsif match = scan(/["']/)
Chris@210 128 if key_expected && check(KEY_CHECK_PATTERN[match])
Chris@210 129 state = :key
Chris@210 130 else
Chris@210 131 state = :string
Chris@210 132 end
Chris@210 133 tokens << [:open, state]
Chris@210 134 string_delimiter = match
Chris@210 135 kind = :delimiter
Chris@210 136
Chris@210 137 elsif value_expected && (match = scan(/\/(?=\S)/))
Chris@210 138 tokens << [:open, :regexp]
Chris@210 139 state = :regexp
Chris@210 140 string_delimiter = '/'
Chris@210 141 kind = :delimiter
Chris@210 142
Chris@210 143 elsif scan(/ \/ /x)
Chris@210 144 value_expected = true
Chris@210 145 key_expected = false
Chris@210 146 kind = :operator
Chris@210 147
Chris@210 148 else
Chris@210 149 getch
Chris@210 150 kind = :error
Chris@210 151
Chris@210 152 end
Chris@210 153
Chris@210 154 when :string, :regexp, :key
Chris@210 155 if scan(STRING_CONTENT_PATTERN[string_delimiter])
Chris@210 156 kind = :content
Chris@210 157 elsif match = scan(/["'\/]/)
Chris@210 158 tokens << [match, :delimiter]
Chris@210 159 if state == :regexp
Chris@210 160 modifiers = scan(/[gim]+/)
Chris@210 161 tokens << [modifiers, :modifier] if modifiers && !modifiers.empty?
Chris@210 162 end
Chris@210 163 tokens << [:close, state]
Chris@210 164 string_delimiter = nil
Chris@210 165 key_expected = value_expected = false
Chris@210 166 state = :initial
Chris@210 167 next
Chris@210 168 elsif state != :regexp && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
Chris@210 169 if string_delimiter == "'" && !(match == "\\\\" || match == "\\'")
Chris@210 170 kind = :content
Chris@210 171 else
Chris@210 172 kind = :char
Chris@210 173 end
Chris@210 174 elsif state == :regexp && scan(/ \\ (?: #{ESCAPE} | #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
Chris@210 175 kind = :char
Chris@210 176 elsif scan(/\\./m)
Chris@210 177 kind = :content
Chris@210 178 elsif scan(/ \\ | $ /x)
Chris@210 179 tokens << [:close, state]
Chris@210 180 kind = :error
Chris@210 181 key_expected = value_expected = false
Chris@210 182 state = :initial
Chris@210 183 else
Chris@210 184 raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
Chris@210 185 end
Chris@210 186
Chris@210 187 else
Chris@210 188 raise_inspect 'Unknown state', tokens
Chris@210 189
Chris@210 190 end
Chris@210 191
Chris@210 192 match ||= matched
Chris@210 193 if $CODERAY_DEBUG and not kind
Chris@210 194 raise_inspect 'Error token %p in line %d' %
Chris@210 195 [[match, kind], line], tokens
Chris@210 196 end
Chris@210 197 raise_inspect 'Empty token', tokens unless match
Chris@210 198
Chris@210 199 tokens << [match, kind]
Chris@210 200
Chris@210 201 end
Chris@210 202
Chris@210 203 if [:string, :regexp].include? state
Chris@210 204 tokens << [:close, state]
Chris@210 205 end
Chris@210 206
Chris@210 207 tokens
Chris@210 208 end
Chris@210 209
Chris@210 210 protected
Chris@210 211
Chris@210 212 def reset_instance
Chris@210 213 super
Chris@210 214 @xml_scanner.reset if defined? @xml_scanner
Chris@210 215 end
Chris@210 216
Chris@210 217 def xml_scanner
Chris@210 218 @xml_scanner ||= CodeRay.scanner :xml, :tokens => @tokens, :keep_tokens => true, :keep_state => false
Chris@210 219 end
Chris@210 220
Chris@210 221 end
Chris@210 222
Chris@210 223 end
Chris@210 224 end