Chris@210
|
1 # encoding: utf-8
|
Chris@210
|
2 module CodeRay
|
Chris@210
|
3 module Scanners
|
Chris@210
|
4
|
Chris@210
|
5 module Ruby::Patterns # :nodoc:
|
Chris@210
|
6
|
Chris@210
|
7 RESERVED_WORDS = %w[
|
Chris@210
|
8 and def end in or unless begin
|
Chris@210
|
9 defined? ensure module redo super until
|
Chris@210
|
10 BEGIN break do next rescue then
|
Chris@210
|
11 when END case else for retry
|
Chris@210
|
12 while alias class elsif if not return
|
Chris@210
|
13 undef yield
|
Chris@210
|
14 ]
|
Chris@210
|
15
|
Chris@210
|
16 DEF_KEYWORDS = %w[ def ]
|
Chris@210
|
17 UNDEF_KEYWORDS = %w[ undef ]
|
Chris@210
|
18 ALIAS_KEYWORDS = %w[ alias ]
|
Chris@210
|
19 MODULE_KEYWORDS = %w[ class module ]
|
Chris@210
|
20 DEF_NEW_STATE = WordList.new(:initial).
|
Chris@210
|
21 add(DEF_KEYWORDS, :def_expected).
|
Chris@210
|
22 add(UNDEF_KEYWORDS, :undef_expected).
|
Chris@210
|
23 add(ALIAS_KEYWORDS, :alias_expected).
|
Chris@210
|
24 add(MODULE_KEYWORDS, :module_expected)
|
Chris@210
|
25
|
Chris@210
|
26 PREDEFINED_CONSTANTS = %w[
|
Chris@210
|
27 nil true false self
|
Chris@210
|
28 DATA ARGV ARGF
|
Chris@210
|
29 __FILE__ __LINE__ __ENCODING__
|
Chris@210
|
30 ]
|
Chris@210
|
31
|
Chris@210
|
32 IDENT_KIND = WordList.new(:ident).
|
Chris@210
|
33 add(RESERVED_WORDS, :reserved).
|
Chris@210
|
34 add(PREDEFINED_CONSTANTS, :pre_constant)
|
Chris@210
|
35
|
Chris@210
|
36 if /\w/u === '∑'
|
Chris@210
|
37 # MRI 1.8.6, 1.8.7
|
Chris@210
|
38 IDENT = /[^\W\d]\w*/
|
Chris@210
|
39 else
|
Chris@210
|
40 if //.respond_to? :encoding
|
Chris@210
|
41 # MRI 1.9.1, 1.9.2
|
Chris@210
|
42 IDENT = Regexp.new '[\p{L}\p{M}\p{Pc}\p{Sm}&&[^\x00-\x40\x5b-\x5e\x60\x7b-\x7f]][\p{L}\p{M}\p{N}\p{Pc}\p{Sm}&&[^\x00-\x2f\x3a-\x40\x5b-\x5e\x60\x7b-\x7f]]*'
|
Chris@210
|
43 else
|
Chris@210
|
44 # JRuby, Rubinius
|
Chris@210
|
45 IDENT = /[^\x00-\x40\x5b-\x5e\x60\x7b-\x7f][^\x00-\x2f\x3a-\x40\x5b-\x5e\x60\x7b-\x7f]*/
|
Chris@210
|
46 end
|
Chris@210
|
47 end
|
Chris@210
|
48
|
Chris@210
|
49 METHOD_NAME = / #{IDENT} [?!]? /ox
|
Chris@210
|
50 METHOD_NAME_OPERATOR = /
|
Chris@210
|
51 \*\*? # multiplication and power
|
Chris@210
|
52 | [-+~]@? # plus, minus, tilde with and without at sign
|
Chris@210
|
53 | [\/%&|^`] # division, modulo or format strings, and, or, xor, system
|
Chris@210
|
54 | \[\]=? # array getter and setter
|
Chris@210
|
55 | << | >> # append or shift left, shift right
|
Chris@210
|
56 | <=?>? | >=? # comparison, rocket operator
|
Chris@210
|
57 | ===? | =~ # simple equality, case equality, match
|
Chris@210
|
58 | ![~=@]? # negation with and without at sign, not-equal and not-match
|
Chris@210
|
59 /ox
|
Chris@210
|
60 METHOD_NAME_EX = / #{IDENT} (?:[?!]|=(?!>))? | #{METHOD_NAME_OPERATOR} /ox
|
Chris@210
|
61 INSTANCE_VARIABLE = / @ #{IDENT} /ox
|
Chris@210
|
62 CLASS_VARIABLE = / @@ #{IDENT} /ox
|
Chris@210
|
63 OBJECT_VARIABLE = / @@? #{IDENT} /ox
|
Chris@210
|
64 GLOBAL_VARIABLE = / \$ (?: #{IDENT} | [1-9]\d* | 0\w* | [~&+`'=\/,;_.<>!@$?*":\\] | -[a-zA-Z_0-9] ) /ox
|
Chris@210
|
65 PREFIX_VARIABLE = / #{GLOBAL_VARIABLE} | #{OBJECT_VARIABLE} /ox
|
Chris@210
|
66 VARIABLE = / @?@? #{IDENT} | #{GLOBAL_VARIABLE} /ox
|
Chris@210
|
67
|
Chris@210
|
68 QUOTE_TO_TYPE = {
|
Chris@210
|
69 '`' => :shell,
|
Chris@210
|
70 '/'=> :regexp,
|
Chris@210
|
71 }
|
Chris@210
|
72 QUOTE_TO_TYPE.default = :string
|
Chris@210
|
73
|
Chris@210
|
74 REGEXP_MODIFIERS = /[mixounse]*/
|
Chris@210
|
75 REGEXP_SYMBOLS = /[|?*+(){}\[\].^$]/
|
Chris@210
|
76
|
Chris@210
|
77 DECIMAL = /\d+(?:_\d+)*/
|
Chris@210
|
78 OCTAL = /0_?[0-7]+(?:_[0-7]+)*/
|
Chris@210
|
79 HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/
|
Chris@210
|
80 BINARY = /0b[01]+(?:_[01]+)*/
|
Chris@210
|
81
|
Chris@210
|
82 EXPONENT = / [eE] [+-]? #{DECIMAL} /ox
|
Chris@210
|
83 FLOAT_SUFFIX = / #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? /ox
|
Chris@210
|
84 FLOAT_OR_INT = / #{DECIMAL} (?: #{FLOAT_SUFFIX} () )? /ox
|
Chris@210
|
85 NUMERIC = / (?: (?=0) (?: #{OCTAL} | #{HEXADECIMAL} | #{BINARY} ) | #{FLOAT_OR_INT} ) /ox
|
Chris@210
|
86
|
Chris@210
|
87 SYMBOL = /
|
Chris@210
|
88 :
|
Chris@210
|
89 (?:
|
Chris@210
|
90 #{METHOD_NAME_EX}
|
Chris@210
|
91 | #{PREFIX_VARIABLE}
|
Chris@210
|
92 | ['"]
|
Chris@210
|
93 )
|
Chris@210
|
94 /ox
|
Chris@210
|
95 METHOD_NAME_OR_SYMBOL = / #{METHOD_NAME_EX} | #{SYMBOL} /ox
|
Chris@210
|
96
|
Chris@210
|
97 SIMPLE_ESCAPE = /
|
Chris@210
|
98 [abefnrstv]
|
Chris@210
|
99 | [0-7]{1,3}
|
Chris@210
|
100 | x[0-9A-Fa-f]{1,2}
|
Chris@210
|
101 | .?
|
Chris@210
|
102 /mx
|
Chris@210
|
103
|
Chris@210
|
104 CONTROL_META_ESCAPE = /
|
Chris@210
|
105 (?: M-|C-|c )
|
Chris@210
|
106 (?: \\ (?: M-|C-|c ) )*
|
Chris@210
|
107 (?: [^\\] | \\ #{SIMPLE_ESCAPE} )?
|
Chris@210
|
108 /mox
|
Chris@210
|
109
|
Chris@210
|
110 ESCAPE = /
|
Chris@210
|
111 #{CONTROL_META_ESCAPE} | #{SIMPLE_ESCAPE}
|
Chris@210
|
112 /mox
|
Chris@210
|
113
|
Chris@210
|
114 CHARACTER = /
|
Chris@210
|
115 \?
|
Chris@210
|
116 (?:
|
Chris@210
|
117 [^\s\\]
|
Chris@210
|
118 | \\ #{ESCAPE}
|
Chris@210
|
119 )
|
Chris@210
|
120 /mox
|
Chris@210
|
121
|
Chris@210
|
122 # NOTE: This is not completely correct, but
|
Chris@210
|
123 # nobody needs heredoc delimiters ending with \n.
|
Chris@210
|
124 # Also, delimiters starting with numbers are allowed.
|
Chris@210
|
125 # but they are more often than not a false positive.
|
Chris@210
|
126 HEREDOC_OPEN = /
|
Chris@210
|
127 << (-)? # $1 = float
|
Chris@210
|
128 (?:
|
Chris@210
|
129 ( #{IDENT} ) # $2 = delim
|
Chris@210
|
130 |
|
Chris@210
|
131 ( ["'`\/] ) # $3 = quote, type
|
Chris@210
|
132 ( [^\n]*? ) \3 # $4 = delim
|
Chris@210
|
133 )
|
Chris@210
|
134 /mx
|
Chris@210
|
135
|
Chris@210
|
136 RUBYDOC = /
|
Chris@210
|
137 =begin (?!\S)
|
Chris@210
|
138 .*?
|
Chris@210
|
139 (?: \Z | ^=end (?!\S) [^\n]* )
|
Chris@210
|
140 /mx
|
Chris@210
|
141
|
Chris@210
|
142 DATA = /
|
Chris@210
|
143 __END__$
|
Chris@210
|
144 .*?
|
Chris@210
|
145 (?: \Z | (?=^\#CODE) )
|
Chris@210
|
146 /mx
|
Chris@210
|
147
|
Chris@210
|
148 # Checks for a valid value to follow. This enables
|
Chris@210
|
149 # value_expected in method calls without parentheses.
|
Chris@210
|
150 VALUE_FOLLOWS = /
|
Chris@210
|
151 (?>[ \t\f\v]+)
|
Chris@210
|
152 (?:
|
Chris@210
|
153 [%\/][^\s=]
|
Chris@210
|
154 | <<-?\S
|
Chris@210
|
155 | [-+] \d
|
Chris@210
|
156 | #{CHARACTER}
|
Chris@210
|
157 )
|
Chris@210
|
158 /x
|
Chris@210
|
159 KEYWORDS_EXPECTING_VALUE = WordList.new.add(%w[
|
Chris@210
|
160 and end in or unless begin
|
Chris@210
|
161 defined? ensure redo super until
|
Chris@210
|
162 break do next rescue then
|
Chris@210
|
163 when case else for retry
|
Chris@210
|
164 while elsif if not return
|
Chris@210
|
165 yield
|
Chris@210
|
166 ])
|
Chris@210
|
167
|
Chris@210
|
168 RUBYDOC_OR_DATA = / #{RUBYDOC} | #{DATA} /xo
|
Chris@210
|
169
|
Chris@210
|
170 RDOC_DATA_START = / ^=begin (?!\S) | ^__END__$ /x
|
Chris@210
|
171
|
Chris@210
|
172 FANCY_START_CORRECT = / % ( [qQwWxsr] | (?![a-zA-Z0-9]) ) ([^a-zA-Z0-9]) /mx
|
Chris@210
|
173
|
Chris@210
|
174 FancyStringType = {
|
Chris@210
|
175 'q' => [:string, false],
|
Chris@210
|
176 'Q' => [:string, true],
|
Chris@210
|
177 'r' => [:regexp, true],
|
Chris@210
|
178 's' => [:symbol, false],
|
Chris@210
|
179 'x' => [:shell, true]
|
Chris@210
|
180 }
|
Chris@210
|
181 FancyStringType['w'] = FancyStringType['q']
|
Chris@210
|
182 FancyStringType['W'] = FancyStringType[''] = FancyStringType['Q']
|
Chris@210
|
183
|
Chris@210
|
184 class StringState < Struct.new :type, :interpreted, :delim, :heredoc,
|
Chris@210
|
185 :paren, :paren_depth, :pattern, :next_state
|
Chris@210
|
186
|
Chris@210
|
187 CLOSING_PAREN = Hash[ *%w[
|
Chris@210
|
188 ( )
|
Chris@210
|
189 [ ]
|
Chris@210
|
190 < >
|
Chris@210
|
191 { }
|
Chris@210
|
192 ] ]
|
Chris@210
|
193
|
Chris@210
|
194 CLOSING_PAREN.each { |k,v| k.freeze; v.freeze } # debug, if I try to change it with <<
|
Chris@210
|
195 OPENING_PAREN = CLOSING_PAREN.invert
|
Chris@210
|
196
|
Chris@210
|
197 STRING_PATTERN = Hash.new do |h, k|
|
Chris@210
|
198 delim, interpreted = *k
|
Chris@210
|
199 delim_pattern = Regexp.escape(delim.dup) # dup: workaround for old Ruby
|
Chris@210
|
200 if closing_paren = CLOSING_PAREN[delim]
|
Chris@210
|
201 delim_pattern = delim_pattern[0..-1] if defined? JRUBY_VERSION # JRuby fix
|
Chris@210
|
202 delim_pattern << Regexp.escape(closing_paren)
|
Chris@210
|
203 end
|
Chris@210
|
204 delim_pattern << '\\\\' unless delim == '\\'
|
Chris@210
|
205
|
Chris@210
|
206 special_escapes =
|
Chris@210
|
207 case interpreted
|
Chris@210
|
208 when :regexp_symbols
|
Chris@210
|
209 '| ' + REGEXP_SYMBOLS.source
|
Chris@210
|
210 when :words
|
Chris@210
|
211 '| \s'
|
Chris@210
|
212 end
|
Chris@210
|
213
|
Chris@210
|
214 h[k] =
|
Chris@210
|
215 if interpreted and not delim == '#'
|
Chris@210
|
216 / (?= [#{delim_pattern}] | \# [{$@] #{special_escapes} ) /mx
|
Chris@210
|
217 else
|
Chris@210
|
218 / (?= [#{delim_pattern}] #{special_escapes} ) /mx
|
Chris@210
|
219 end
|
Chris@210
|
220 end
|
Chris@210
|
221
|
Chris@210
|
222 HEREDOC_PATTERN = Hash.new do |h, k|
|
Chris@210
|
223 delim, interpreted, indented = *k
|
Chris@210
|
224 delim_pattern = Regexp.escape(delim.dup) # dup: workaround for old Ruby
|
Chris@210
|
225 delim_pattern = / \n #{ '(?>[\ \t]*)' if indented } #{ Regexp.new delim_pattern } $ /x
|
Chris@210
|
226 h[k] =
|
Chris@210
|
227 if interpreted
|
Chris@210
|
228 / (?= #{delim_pattern}() | \\ | \# [{$@] ) /mx # $1 set == end of heredoc
|
Chris@210
|
229 else
|
Chris@210
|
230 / (?= #{delim_pattern}() | \\ ) /mx
|
Chris@210
|
231 end
|
Chris@210
|
232 end
|
Chris@210
|
233
|
Chris@210
|
234 def initialize kind, interpreted, delim, heredoc = false
|
Chris@210
|
235 if heredoc
|
Chris@210
|
236 pattern = HEREDOC_PATTERN[ [delim, interpreted, heredoc == :indented] ]
|
Chris@210
|
237 delim = nil
|
Chris@210
|
238 else
|
Chris@210
|
239 pattern = STRING_PATTERN[ [delim, interpreted] ]
|
Chris@210
|
240 if paren = CLOSING_PAREN[delim]
|
Chris@210
|
241 delim, paren = paren, delim
|
Chris@210
|
242 paren_depth = 1
|
Chris@210
|
243 end
|
Chris@210
|
244 end
|
Chris@210
|
245 super kind, interpreted, delim, heredoc, paren, paren_depth, pattern, :initial
|
Chris@210
|
246 end
|
Chris@210
|
247 end unless defined? StringState
|
Chris@210
|
248
|
Chris@210
|
249 end
|
Chris@210
|
250
|
Chris@210
|
251 end
|
Chris@210
|
252 end
|