comparison vendor/gems/coderay-0.9.7/lib/coderay/scanners/ruby/patterns.rb @ 524:1248a47e81b3 feature_36

Merge from branch "luisf"
author luisf <luis.figueira@eecs.qmul.ac.uk>
date Mon, 25 Jul 2011 14:39:38 +0100
parents 0579821a129a
children
comparison
equal deleted inserted replaced
519:3be6bc3c2a17 524:1248a47e81b3
1 # encoding: utf-8
2 module CodeRay
3 module Scanners
4
5 module Ruby::Patterns # :nodoc:
6
7 RESERVED_WORDS = %w[
8 and def end in or unless begin
9 defined? ensure module redo super until
10 BEGIN break do next rescue then
11 when END case else for retry
12 while alias class elsif if not return
13 undef yield
14 ]
15
16 DEF_KEYWORDS = %w[ def ]
17 UNDEF_KEYWORDS = %w[ undef ]
18 ALIAS_KEYWORDS = %w[ alias ]
19 MODULE_KEYWORDS = %w[ class module ]
20 DEF_NEW_STATE = WordList.new(:initial).
21 add(DEF_KEYWORDS, :def_expected).
22 add(UNDEF_KEYWORDS, :undef_expected).
23 add(ALIAS_KEYWORDS, :alias_expected).
24 add(MODULE_KEYWORDS, :module_expected)
25
26 PREDEFINED_CONSTANTS = %w[
27 nil true false self
28 DATA ARGV ARGF
29 __FILE__ __LINE__ __ENCODING__
30 ]
31
32 IDENT_KIND = WordList.new(:ident).
33 add(RESERVED_WORDS, :reserved).
34 add(PREDEFINED_CONSTANTS, :pre_constant)
35
36 if /\w/u === '∑'
37 # MRI 1.8.6, 1.8.7
38 IDENT = /[^\W\d]\w*/
39 else
40 if //.respond_to? :encoding
41 # MRI 1.9.1, 1.9.2
42 IDENT = Regexp.new '[\p{L}\p{M}\p{Pc}\p{Sm}&&[^\x00-\x40\x5b-\x5e\x60\x7b-\x7f]][\p{L}\p{M}\p{N}\p{Pc}\p{Sm}&&[^\x00-\x2f\x3a-\x40\x5b-\x5e\x60\x7b-\x7f]]*'
43 else
44 # JRuby, Rubinius
45 IDENT = /[^\x00-\x40\x5b-\x5e\x60\x7b-\x7f][^\x00-\x2f\x3a-\x40\x5b-\x5e\x60\x7b-\x7f]*/
46 end
47 end
48
49 METHOD_NAME = / #{IDENT} [?!]? /ox
50 METHOD_NAME_OPERATOR = /
51 \*\*? # multiplication and power
52 | [-+~]@? # plus, minus, tilde with and without at sign
53 | [\/%&|^`] # division, modulo or format strings, and, or, xor, system
54 | \[\]=? # array getter and setter
55 | << | >> # append or shift left, shift right
56 | <=?>? | >=? # comparison, rocket operator
57 | ===? | =~ # simple equality, case equality, match
58 | ![~=@]? # negation with and without at sign, not-equal and not-match
59 /ox
60 METHOD_NAME_EX = / #{IDENT} (?:[?!]|=(?!>))? | #{METHOD_NAME_OPERATOR} /ox
61 INSTANCE_VARIABLE = / @ #{IDENT} /ox
62 CLASS_VARIABLE = / @@ #{IDENT} /ox
63 OBJECT_VARIABLE = / @@? #{IDENT} /ox
64 GLOBAL_VARIABLE = / \$ (?: #{IDENT} | [1-9]\d* | 0\w* | [~&+`'=\/,;_.<>!@$?*":\\] | -[a-zA-Z_0-9] ) /ox
65 PREFIX_VARIABLE = / #{GLOBAL_VARIABLE} | #{OBJECT_VARIABLE} /ox
66 VARIABLE = / @?@? #{IDENT} | #{GLOBAL_VARIABLE} /ox
67
68 QUOTE_TO_TYPE = {
69 '`' => :shell,
70 '/'=> :regexp,
71 }
72 QUOTE_TO_TYPE.default = :string
73
74 REGEXP_MODIFIERS = /[mixounse]*/
75 REGEXP_SYMBOLS = /[|?*+(){}\[\].^$]/
76
77 DECIMAL = /\d+(?:_\d+)*/
78 OCTAL = /0_?[0-7]+(?:_[0-7]+)*/
79 HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/
80 BINARY = /0b[01]+(?:_[01]+)*/
81
82 EXPONENT = / [eE] [+-]? #{DECIMAL} /ox
83 FLOAT_SUFFIX = / #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? /ox
84 FLOAT_OR_INT = / #{DECIMAL} (?: #{FLOAT_SUFFIX} () )? /ox
85 NUMERIC = / (?: (?=0) (?: #{OCTAL} | #{HEXADECIMAL} | #{BINARY} ) | #{FLOAT_OR_INT} ) /ox
86
87 SYMBOL = /
88 :
89 (?:
90 #{METHOD_NAME_EX}
91 | #{PREFIX_VARIABLE}
92 | ['"]
93 )
94 /ox
95 METHOD_NAME_OR_SYMBOL = / #{METHOD_NAME_EX} | #{SYMBOL} /ox
96
97 SIMPLE_ESCAPE = /
98 [abefnrstv]
99 | [0-7]{1,3}
100 | x[0-9A-Fa-f]{1,2}
101 | .?
102 /mx
103
104 CONTROL_META_ESCAPE = /
105 (?: M-|C-|c )
106 (?: \\ (?: M-|C-|c ) )*
107 (?: [^\\] | \\ #{SIMPLE_ESCAPE} )?
108 /mox
109
110 ESCAPE = /
111 #{CONTROL_META_ESCAPE} | #{SIMPLE_ESCAPE}
112 /mox
113
114 CHARACTER = /
115 \?
116 (?:
117 [^\s\\]
118 | \\ #{ESCAPE}
119 )
120 /mox
121
122 # NOTE: This is not completely correct, but
123 # nobody needs heredoc delimiters ending with \n.
124 # Also, delimiters starting with numbers are allowed.
125 # but they are more often than not a false positive.
126 HEREDOC_OPEN = /
127 << (-)? # $1 = float
128 (?:
129 ( #{IDENT} ) # $2 = delim
130 |
131 ( ["'`\/] ) # $3 = quote, type
132 ( [^\n]*? ) \3 # $4 = delim
133 )
134 /mx
135
136 RUBYDOC = /
137 =begin (?!\S)
138 .*?
139 (?: \Z | ^=end (?!\S) [^\n]* )
140 /mx
141
142 DATA = /
143 __END__$
144 .*?
145 (?: \Z | (?=^\#CODE) )
146 /mx
147
148 # Checks for a valid value to follow. This enables
149 # value_expected in method calls without parentheses.
150 VALUE_FOLLOWS = /
151 (?>[ \t\f\v]+)
152 (?:
153 [%\/][^\s=]
154 | <<-?\S
155 | [-+] \d
156 | #{CHARACTER}
157 )
158 /x
159 KEYWORDS_EXPECTING_VALUE = WordList.new.add(%w[
160 and end in or unless begin
161 defined? ensure redo super until
162 break do next rescue then
163 when case else for retry
164 while elsif if not return
165 yield
166 ])
167
168 RUBYDOC_OR_DATA = / #{RUBYDOC} | #{DATA} /xo
169
170 RDOC_DATA_START = / ^=begin (?!\S) | ^__END__$ /x
171
172 FANCY_START_CORRECT = / % ( [qQwWxsr] | (?![a-zA-Z0-9]) ) ([^a-zA-Z0-9]) /mx
173
174 FancyStringType = {
175 'q' => [:string, false],
176 'Q' => [:string, true],
177 'r' => [:regexp, true],
178 's' => [:symbol, false],
179 'x' => [:shell, true]
180 }
181 FancyStringType['w'] = FancyStringType['q']
182 FancyStringType['W'] = FancyStringType[''] = FancyStringType['Q']
183
184 class StringState < Struct.new :type, :interpreted, :delim, :heredoc,
185 :paren, :paren_depth, :pattern, :next_state
186
187 CLOSING_PAREN = Hash[ *%w[
188 ( )
189 [ ]
190 < >
191 { }
192 ] ]
193
194 CLOSING_PAREN.each { |k,v| k.freeze; v.freeze } # debug, if I try to change it with <<
195 OPENING_PAREN = CLOSING_PAREN.invert
196
197 STRING_PATTERN = Hash.new do |h, k|
198 delim, interpreted = *k
199 delim_pattern = Regexp.escape(delim.dup) # dup: workaround for old Ruby
200 if closing_paren = CLOSING_PAREN[delim]
201 delim_pattern = delim_pattern[0..-1] if defined? JRUBY_VERSION # JRuby fix
202 delim_pattern << Regexp.escape(closing_paren)
203 end
204 delim_pattern << '\\\\' unless delim == '\\'
205
206 special_escapes =
207 case interpreted
208 when :regexp_symbols
209 '| ' + REGEXP_SYMBOLS.source
210 when :words
211 '| \s'
212 end
213
214 h[k] =
215 if interpreted and not delim == '#'
216 / (?= [#{delim_pattern}] | \# [{$@] #{special_escapes} ) /mx
217 else
218 / (?= [#{delim_pattern}] #{special_escapes} ) /mx
219 end
220 end
221
222 HEREDOC_PATTERN = Hash.new do |h, k|
223 delim, interpreted, indented = *k
224 delim_pattern = Regexp.escape(delim.dup) # dup: workaround for old Ruby
225 delim_pattern = / \n #{ '(?>[\ \t]*)' if indented } #{ Regexp.new delim_pattern } $ /x
226 h[k] =
227 if interpreted
228 / (?= #{delim_pattern}() | \\ | \# [{$@] ) /mx # $1 set == end of heredoc
229 else
230 / (?= #{delim_pattern}() | \\ ) /mx
231 end
232 end
233
234 def initialize kind, interpreted, delim, heredoc = false
235 if heredoc
236 pattern = HEREDOC_PATTERN[ [delim, interpreted, heredoc == :indented] ]
237 delim = nil
238 else
239 pattern = STRING_PATTERN[ [delim, interpreted] ]
240 if paren = CLOSING_PAREN[delim]
241 delim, paren = paren, delim
242 paren_depth = 1
243 end
244 end
245 super kind, interpreted, delim, heredoc, paren, paren_depth, pattern, :initial
246 end
247 end unless defined? StringState
248
249 end
250
251 end
252 end