Chris@909
|
1 module CodeRay
|
Chris@909
|
2 module Scanners
|
Chris@909
|
3
|
Chris@909
|
4 # Scanner for JavaScript.
|
Chris@909
|
5 #
|
Chris@909
|
6 # Aliases: +ecmascript+, +ecma_script+, +javascript+
|
Chris@909
|
7 class JavaScript < Scanner
|
Chris@909
|
8
|
Chris@909
|
9 register_for :java_script
|
Chris@909
|
10 file_extension 'js'
|
Chris@909
|
11
|
Chris@909
|
12 # The actual JavaScript keywords.
|
Chris@909
|
13 KEYWORDS = %w[
|
Chris@909
|
14 break case catch continue default delete do else
|
Chris@909
|
15 finally for function if in instanceof new
|
Chris@909
|
16 return switch throw try typeof var void while with
|
Chris@909
|
17 ] # :nodoc:
|
Chris@909
|
18 PREDEFINED_CONSTANTS = %w[
|
Chris@909
|
19 false null true undefined NaN Infinity
|
Chris@909
|
20 ] # :nodoc:
|
Chris@909
|
21
|
Chris@909
|
22 MAGIC_VARIABLES = %w[ this arguments ] # :nodoc: arguments was introduced in JavaScript 1.4
|
Chris@909
|
23
|
Chris@909
|
24 KEYWORDS_EXPECTING_VALUE = WordList.new.add %w[
|
Chris@909
|
25 case delete in instanceof new return throw typeof with
|
Chris@909
|
26 ] # :nodoc:
|
Chris@909
|
27
|
Chris@909
|
28 # Reserved for future use.
|
Chris@909
|
29 RESERVED_WORDS = %w[
|
Chris@909
|
30 abstract boolean byte char class debugger double enum export extends
|
Chris@909
|
31 final float goto implements import int interface long native package
|
Chris@909
|
32 private protected public short static super synchronized throws transient
|
Chris@909
|
33 volatile
|
Chris@909
|
34 ] # :nodoc:
|
Chris@909
|
35
|
Chris@909
|
36 IDENT_KIND = WordList.new(:ident).
|
Chris@909
|
37 add(RESERVED_WORDS, :reserved).
|
Chris@909
|
38 add(PREDEFINED_CONSTANTS, :predefined_constant).
|
Chris@909
|
39 add(MAGIC_VARIABLES, :local_variable).
|
Chris@909
|
40 add(KEYWORDS, :keyword) # :nodoc:
|
Chris@909
|
41
|
Chris@909
|
42 ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
|
Chris@909
|
43 UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc:
|
Chris@909
|
44 REGEXP_ESCAPE = / [bBdDsSwW] /x # :nodoc:
|
Chris@909
|
45 STRING_CONTENT_PATTERN = {
|
Chris@909
|
46 "'" => /[^\\']+/,
|
Chris@909
|
47 '"' => /[^\\"]+/,
|
Chris@909
|
48 '/' => /[^\\\/]+/,
|
Chris@909
|
49 } # :nodoc:
|
Chris@909
|
50 KEY_CHECK_PATTERN = {
|
Chris@909
|
51 "'" => / (?> [^\\']* (?: \\. [^\\']* )* ) ' \s* : /mx,
|
Chris@909
|
52 '"' => / (?> [^\\"]* (?: \\. [^\\"]* )* ) " \s* : /mx,
|
Chris@909
|
53 } # :nodoc:
|
Chris@909
|
54
|
Chris@909
|
55 protected
|
Chris@909
|
56
|
Chris@909
|
57 def scan_tokens encoder, options
|
Chris@909
|
58
|
Chris@909
|
59 state = :initial
|
Chris@909
|
60 string_delimiter = nil
|
Chris@909
|
61 value_expected = true
|
Chris@909
|
62 key_expected = false
|
Chris@909
|
63 function_expected = false
|
Chris@909
|
64
|
Chris@909
|
65 until eos?
|
Chris@909
|
66
|
Chris@909
|
67 case state
|
Chris@909
|
68
|
Chris@909
|
69 when :initial
|
Chris@909
|
70
|
Chris@909
|
71 if match = scan(/ \s+ | \\\n /x)
|
Chris@909
|
72 value_expected = true if !value_expected && match.index(?\n)
|
Chris@909
|
73 encoder.text_token match, :space
|
Chris@909
|
74
|
Chris@909
|
75 elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
|
Chris@909
|
76 value_expected = true
|
Chris@909
|
77 encoder.text_token match, :comment
|
Chris@909
|
78
|
Chris@909
|
79 elsif check(/\.?\d/)
|
Chris@909
|
80 key_expected = value_expected = false
|
Chris@909
|
81 if match = scan(/0[xX][0-9A-Fa-f]+/)
|
Chris@909
|
82 encoder.text_token match, :hex
|
Chris@909
|
83 elsif match = scan(/(?>0[0-7]+)(?![89.eEfF])/)
|
Chris@909
|
84 encoder.text_token match, :octal
|
Chris@909
|
85 elsif match = scan(/\d+[fF]|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
|
Chris@909
|
86 encoder.text_token match, :float
|
Chris@909
|
87 elsif match = scan(/\d+/)
|
Chris@909
|
88 encoder.text_token match, :integer
|
Chris@909
|
89 end
|
Chris@909
|
90
|
Chris@909
|
91 elsif value_expected && match = scan(/<([[:alpha:]]\w*) (?: [^\/>]*\/> | .*?<\/\1>)/xim)
|
Chris@909
|
92 # TODO: scan over nested tags
|
Chris@909
|
93 xml_scanner.tokenize match, :tokens => encoder
|
Chris@909
|
94 value_expected = false
|
Chris@909
|
95 next
|
Chris@909
|
96
|
Chris@909
|
97 elsif match = scan(/ [-+*=<>?:;,!&^|(\[{~%]+ | \.(?!\d) /x)
|
Chris@909
|
98 value_expected = true
|
Chris@909
|
99 last_operator = match[-1]
|
Chris@909
|
100 key_expected = (last_operator == ?{) || (last_operator == ?,)
|
Chris@909
|
101 function_expected = false
|
Chris@909
|
102 encoder.text_token match, :operator
|
Chris@909
|
103
|
Chris@909
|
104 elsif match = scan(/ [)\]}]+ /x)
|
Chris@909
|
105 function_expected = key_expected = value_expected = false
|
Chris@909
|
106 encoder.text_token match, :operator
|
Chris@909
|
107
|
Chris@909
|
108 elsif match = scan(/ [$a-zA-Z_][A-Za-z_0-9$]* /x)
|
Chris@909
|
109 kind = IDENT_KIND[match]
|
Chris@909
|
110 value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match]
|
Chris@909
|
111 # TODO: labels
|
Chris@909
|
112 if kind == :ident
|
Chris@909
|
113 if match.index(?$) # $ allowed inside an identifier
|
Chris@909
|
114 kind = :predefined
|
Chris@909
|
115 elsif function_expected
|
Chris@909
|
116 kind = :function
|
Chris@909
|
117 elsif check(/\s*[=:]\s*function\b/)
|
Chris@909
|
118 kind = :function
|
Chris@909
|
119 elsif key_expected && check(/\s*:/)
|
Chris@909
|
120 kind = :key
|
Chris@909
|
121 end
|
Chris@909
|
122 end
|
Chris@909
|
123 function_expected = (kind == :keyword) && (match == 'function')
|
Chris@909
|
124 key_expected = false
|
Chris@909
|
125 encoder.text_token match, kind
|
Chris@909
|
126
|
Chris@909
|
127 elsif match = scan(/["']/)
|
Chris@909
|
128 if key_expected && check(KEY_CHECK_PATTERN[match])
|
Chris@909
|
129 state = :key
|
Chris@909
|
130 else
|
Chris@909
|
131 state = :string
|
Chris@909
|
132 end
|
Chris@909
|
133 encoder.begin_group state
|
Chris@909
|
134 string_delimiter = match
|
Chris@909
|
135 encoder.text_token match, :delimiter
|
Chris@909
|
136
|
Chris@909
|
137 elsif value_expected && (match = scan(/\//))
|
Chris@909
|
138 encoder.begin_group :regexp
|
Chris@909
|
139 state = :regexp
|
Chris@909
|
140 string_delimiter = '/'
|
Chris@909
|
141 encoder.text_token match, :delimiter
|
Chris@909
|
142
|
Chris@909
|
143 elsif match = scan(/ \/ /x)
|
Chris@909
|
144 value_expected = true
|
Chris@909
|
145 key_expected = false
|
Chris@909
|
146 encoder.text_token match, :operator
|
Chris@909
|
147
|
Chris@909
|
148 else
|
Chris@909
|
149 encoder.text_token getch, :error
|
Chris@909
|
150
|
Chris@909
|
151 end
|
Chris@909
|
152
|
Chris@909
|
153 when :string, :regexp, :key
|
Chris@909
|
154 if match = scan(STRING_CONTENT_PATTERN[string_delimiter])
|
Chris@909
|
155 encoder.text_token match, :content
|
Chris@909
|
156 elsif match = scan(/["'\/]/)
|
Chris@909
|
157 encoder.text_token match, :delimiter
|
Chris@909
|
158 if state == :regexp
|
Chris@909
|
159 modifiers = scan(/[gim]+/)
|
Chris@909
|
160 encoder.text_token modifiers, :modifier if modifiers && !modifiers.empty?
|
Chris@909
|
161 end
|
Chris@909
|
162 encoder.end_group state
|
Chris@909
|
163 string_delimiter = nil
|
Chris@909
|
164 key_expected = value_expected = false
|
Chris@909
|
165 state = :initial
|
Chris@909
|
166 elsif state != :regexp && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
|
Chris@909
|
167 if string_delimiter == "'" && !(match == "\\\\" || match == "\\'")
|
Chris@909
|
168 encoder.text_token match, :content
|
Chris@909
|
169 else
|
Chris@909
|
170 encoder.text_token match, :char
|
Chris@909
|
171 end
|
Chris@909
|
172 elsif state == :regexp && match = scan(/ \\ (?: #{ESCAPE} | #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
|
Chris@909
|
173 encoder.text_token match, :char
|
Chris@909
|
174 elsif match = scan(/\\./m)
|
Chris@909
|
175 encoder.text_token match, :content
|
Chris@909
|
176 elsif match = scan(/ \\ | $ /x)
|
Chris@909
|
177 encoder.end_group state
|
Chris@909
|
178 encoder.text_token match, :error
|
Chris@909
|
179 key_expected = value_expected = false
|
Chris@909
|
180 state = :initial
|
Chris@909
|
181 else
|
Chris@909
|
182 raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
|
Chris@909
|
183 end
|
Chris@909
|
184
|
Chris@909
|
185 else
|
Chris@909
|
186 raise_inspect 'Unknown state', encoder
|
Chris@909
|
187
|
Chris@909
|
188 end
|
Chris@909
|
189
|
Chris@909
|
190 end
|
Chris@909
|
191
|
Chris@909
|
192 if [:string, :regexp].include? state
|
Chris@909
|
193 encoder.end_group state
|
Chris@909
|
194 end
|
Chris@909
|
195
|
Chris@909
|
196 encoder
|
Chris@909
|
197 end
|
Chris@909
|
198
|
Chris@909
|
199 protected
|
Chris@909
|
200
|
Chris@909
|
201 def reset_instance
|
Chris@909
|
202 super
|
Chris@909
|
203 @xml_scanner.reset if defined? @xml_scanner
|
Chris@909
|
204 end
|
Chris@909
|
205
|
Chris@909
|
206 def xml_scanner
|
Chris@909
|
207 @xml_scanner ||= CodeRay.scanner :xml, :tokens => @tokens, :keep_tokens => true, :keep_state => false
|
Chris@909
|
208 end
|
Chris@909
|
209
|
Chris@909
|
210 end
|
Chris@909
|
211
|
Chris@909
|
212 end
|
Chris@909
|
213 end
|