Mercurial > hg > soundsoftware-site
comparison vendor/gems/coderay-0.9.7/lib/coderay/scanners/ruby/patterns.rb @ 524:1248a47e81b3 feature_36
Merge from branch "luisf"
author | luisf <luis.figueira@eecs.qmul.ac.uk> |
---|---|
date | Mon, 25 Jul 2011 14:39:38 +0100 |
parents | 0579821a129a |
children |
comparison
equal
deleted
inserted
replaced
519:3be6bc3c2a17 | 524:1248a47e81b3 |
---|---|
1 # encoding: utf-8 | |
2 module CodeRay | |
3 module Scanners | |
4 | |
5 module Ruby::Patterns # :nodoc: | |
6 | |
7 RESERVED_WORDS = %w[ | |
8 and def end in or unless begin | |
9 defined? ensure module redo super until | |
10 BEGIN break do next rescue then | |
11 when END case else for retry | |
12 while alias class elsif if not return | |
13 undef yield | |
14 ] | |
15 | |
16 DEF_KEYWORDS = %w[ def ] | |
17 UNDEF_KEYWORDS = %w[ undef ] | |
18 ALIAS_KEYWORDS = %w[ alias ] | |
19 MODULE_KEYWORDS = %w[ class module ] | |
20 DEF_NEW_STATE = WordList.new(:initial). | |
21 add(DEF_KEYWORDS, :def_expected). | |
22 add(UNDEF_KEYWORDS, :undef_expected). | |
23 add(ALIAS_KEYWORDS, :alias_expected). | |
24 add(MODULE_KEYWORDS, :module_expected) | |
25 | |
26 PREDEFINED_CONSTANTS = %w[ | |
27 nil true false self | |
28 DATA ARGV ARGF | |
29 __FILE__ __LINE__ __ENCODING__ | |
30 ] | |
31 | |
32 IDENT_KIND = WordList.new(:ident). | |
33 add(RESERVED_WORDS, :reserved). | |
34 add(PREDEFINED_CONSTANTS, :pre_constant) | |
35 | |
36 if /\w/u === '∑' | |
37 # MRI 1.8.6, 1.8.7 | |
38 IDENT = /[^\W\d]\w*/ | |
39 else | |
40 if //.respond_to? :encoding | |
41 # MRI 1.9.1, 1.9.2 | |
42 IDENT = Regexp.new '[\p{L}\p{M}\p{Pc}\p{Sm}&&[^\x00-\x40\x5b-\x5e\x60\x7b-\x7f]][\p{L}\p{M}\p{N}\p{Pc}\p{Sm}&&[^\x00-\x2f\x3a-\x40\x5b-\x5e\x60\x7b-\x7f]]*' | |
43 else | |
44 # JRuby, Rubinius | |
45 IDENT = /[^\x00-\x40\x5b-\x5e\x60\x7b-\x7f][^\x00-\x2f\x3a-\x40\x5b-\x5e\x60\x7b-\x7f]*/ | |
46 end | |
47 end | |
48 | |
49 METHOD_NAME = / #{IDENT} [?!]? /ox | |
50 METHOD_NAME_OPERATOR = / | |
51 \*\*? # multiplication and power | |
52 | [-+~]@? # plus, minus, tilde with and without at sign | |
53 | [\/%&|^`] # division, modulo or format strings, and, or, xor, system | |
54 | \[\]=? # array getter and setter | |
55 | << | >> # append or shift left, shift right | |
56 | <=?>? | >=? # comparison, rocket operator | |
57 | ===? | =~ # simple equality, case equality, match | |
58 | ![~=@]? # negation with and without at sign, not-equal and not-match | |
59 /ox | |
60 METHOD_NAME_EX = / #{IDENT} (?:[?!]|=(?!>))? | #{METHOD_NAME_OPERATOR} /ox | |
61 INSTANCE_VARIABLE = / @ #{IDENT} /ox | |
62 CLASS_VARIABLE = / @@ #{IDENT} /ox | |
63 OBJECT_VARIABLE = / @@? #{IDENT} /ox | |
64 GLOBAL_VARIABLE = / \$ (?: #{IDENT} | [1-9]\d* | 0\w* | [~&+`'=\/,;_.<>!@$?*":\\] | -[a-zA-Z_0-9] ) /ox | |
65 PREFIX_VARIABLE = / #{GLOBAL_VARIABLE} | #{OBJECT_VARIABLE} /ox | |
66 VARIABLE = / @?@? #{IDENT} | #{GLOBAL_VARIABLE} /ox | |
67 | |
68 QUOTE_TO_TYPE = { | |
69 '`' => :shell, | |
70 '/'=> :regexp, | |
71 } | |
72 QUOTE_TO_TYPE.default = :string | |
73 | |
74 REGEXP_MODIFIERS = /[mixounse]*/ | |
75 REGEXP_SYMBOLS = /[|?*+(){}\[\].^$]/ | |
76 | |
77 DECIMAL = /\d+(?:_\d+)*/ | |
78 OCTAL = /0_?[0-7]+(?:_[0-7]+)*/ | |
79 HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/ | |
80 BINARY = /0b[01]+(?:_[01]+)*/ | |
81 | |
82 EXPONENT = / [eE] [+-]? #{DECIMAL} /ox | |
83 FLOAT_SUFFIX = / #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? /ox | |
84 FLOAT_OR_INT = / #{DECIMAL} (?: #{FLOAT_SUFFIX} () )? /ox | |
85 NUMERIC = / (?: (?=0) (?: #{OCTAL} | #{HEXADECIMAL} | #{BINARY} ) | #{FLOAT_OR_INT} ) /ox | |
86 | |
87 SYMBOL = / | |
88 : | |
89 (?: | |
90 #{METHOD_NAME_EX} | |
91 | #{PREFIX_VARIABLE} | |
92 | ['"] | |
93 ) | |
94 /ox | |
95 METHOD_NAME_OR_SYMBOL = / #{METHOD_NAME_EX} | #{SYMBOL} /ox | |
96 | |
97 SIMPLE_ESCAPE = / | |
98 [abefnrstv] | |
99 | [0-7]{1,3} | |
100 | x[0-9A-Fa-f]{1,2} | |
101 | .? | |
102 /mx | |
103 | |
104 CONTROL_META_ESCAPE = / | |
105 (?: M-|C-|c ) | |
106 (?: \\ (?: M-|C-|c ) )* | |
107 (?: [^\\] | \\ #{SIMPLE_ESCAPE} )? | |
108 /mox | |
109 | |
110 ESCAPE = / | |
111 #{CONTROL_META_ESCAPE} | #{SIMPLE_ESCAPE} | |
112 /mox | |
113 | |
114 CHARACTER = / | |
115 \? | |
116 (?: | |
117 [^\s\\] | |
118 | \\ #{ESCAPE} | |
119 ) | |
120 /mox | |
121 | |
122 # NOTE: This is not completely correct, but | |
123 # nobody needs heredoc delimiters ending with \n. | |
124 # Also, delimiters starting with numbers are allowed. | |
125 # but they are more often than not a false positive. | |
126 HEREDOC_OPEN = / | |
127 << (-)? # $1 = float | |
128 (?: | |
129 ( #{IDENT} ) # $2 = delim | |
130 | | |
131 ( ["'`\/] ) # $3 = quote, type | |
132 ( [^\n]*? ) \3 # $4 = delim | |
133 ) | |
134 /mx | |
135 | |
136 RUBYDOC = / | |
137 =begin (?!\S) | |
138 .*? | |
139 (?: \Z | ^=end (?!\S) [^\n]* ) | |
140 /mx | |
141 | |
142 DATA = / | |
143 __END__$ | |
144 .*? | |
145 (?: \Z | (?=^\#CODE) ) | |
146 /mx | |
147 | |
148 # Checks for a valid value to follow. This enables | |
149 # value_expected in method calls without parentheses. | |
150 VALUE_FOLLOWS = / | |
151 (?>[ \t\f\v]+) | |
152 (?: | |
153 [%\/][^\s=] | |
154 | <<-?\S | |
155 | [-+] \d | |
156 | #{CHARACTER} | |
157 ) | |
158 /x | |
159 KEYWORDS_EXPECTING_VALUE = WordList.new.add(%w[ | |
160 and end in or unless begin | |
161 defined? ensure redo super until | |
162 break do next rescue then | |
163 when case else for retry | |
164 while elsif if not return | |
165 yield | |
166 ]) | |
167 | |
168 RUBYDOC_OR_DATA = / #{RUBYDOC} | #{DATA} /xo | |
169 | |
170 RDOC_DATA_START = / ^=begin (?!\S) | ^__END__$ /x | |
171 | |
172 FANCY_START_CORRECT = / % ( [qQwWxsr] | (?![a-zA-Z0-9]) ) ([^a-zA-Z0-9]) /mx | |
173 | |
174 FancyStringType = { | |
175 'q' => [:string, false], | |
176 'Q' => [:string, true], | |
177 'r' => [:regexp, true], | |
178 's' => [:symbol, false], | |
179 'x' => [:shell, true] | |
180 } | |
181 FancyStringType['w'] = FancyStringType['q'] | |
182 FancyStringType['W'] = FancyStringType[''] = FancyStringType['Q'] | |
183 | |
184 class StringState < Struct.new :type, :interpreted, :delim, :heredoc, | |
185 :paren, :paren_depth, :pattern, :next_state | |
186 | |
187 CLOSING_PAREN = Hash[ *%w[ | |
188 ( ) | |
189 [ ] | |
190 < > | |
191 { } | |
192 ] ] | |
193 | |
194 CLOSING_PAREN.each { |k,v| k.freeze; v.freeze } # debug, if I try to change it with << | |
195 OPENING_PAREN = CLOSING_PAREN.invert | |
196 | |
197 STRING_PATTERN = Hash.new do |h, k| | |
198 delim, interpreted = *k | |
199 delim_pattern = Regexp.escape(delim.dup) # dup: workaround for old Ruby | |
200 if closing_paren = CLOSING_PAREN[delim] | |
201 delim_pattern = delim_pattern[0..-1] if defined? JRUBY_VERSION # JRuby fix | |
202 delim_pattern << Regexp.escape(closing_paren) | |
203 end | |
204 delim_pattern << '\\\\' unless delim == '\\' | |
205 | |
206 special_escapes = | |
207 case interpreted | |
208 when :regexp_symbols | |
209 '| ' + REGEXP_SYMBOLS.source | |
210 when :words | |
211 '| \s' | |
212 end | |
213 | |
214 h[k] = | |
215 if interpreted and not delim == '#' | |
216 / (?= [#{delim_pattern}] | \# [{$@] #{special_escapes} ) /mx | |
217 else | |
218 / (?= [#{delim_pattern}] #{special_escapes} ) /mx | |
219 end | |
220 end | |
221 | |
222 HEREDOC_PATTERN = Hash.new do |h, k| | |
223 delim, interpreted, indented = *k | |
224 delim_pattern = Regexp.escape(delim.dup) # dup: workaround for old Ruby | |
225 delim_pattern = / \n #{ '(?>[\ \t]*)' if indented } #{ Regexp.new delim_pattern } $ /x | |
226 h[k] = | |
227 if interpreted | |
228 / (?= #{delim_pattern}() | \\ | \# [{$@] ) /mx # $1 set == end of heredoc | |
229 else | |
230 / (?= #{delim_pattern}() | \\ ) /mx | |
231 end | |
232 end | |
233 | |
234 def initialize kind, interpreted, delim, heredoc = false | |
235 if heredoc | |
236 pattern = HEREDOC_PATTERN[ [delim, interpreted, heredoc == :indented] ] | |
237 delim = nil | |
238 else | |
239 pattern = STRING_PATTERN[ [delim, interpreted] ] | |
240 if paren = CLOSING_PAREN[delim] | |
241 delim, paren = paren, delim | |
242 paren_depth = 1 | |
243 end | |
244 end | |
245 super kind, interpreted, delim, heredoc, paren, paren_depth, pattern, :initial | |
246 end | |
247 end unless defined? StringState | |
248 | |
249 end | |
250 | |
251 end | |
252 end |