annotate vendor/gems/coderay-0.9.7/lib/coderay/scanners/ruby/patterns.rb @ 855:7294e8db2515 bug_162

Close obsolete branch bug_162
author Chris Cannam
date Thu, 14 Jul 2011 11:59:19 +0100
parents 0579821a129a
children
rev   line source
Chris@210 1 # encoding: utf-8
Chris@210 2 module CodeRay
Chris@210 3 module Scanners
Chris@210 4
Chris@210 5 module Ruby::Patterns # :nodoc:
Chris@210 6
Chris@210 7 RESERVED_WORDS = %w[
Chris@210 8 and def end in or unless begin
Chris@210 9 defined? ensure module redo super until
Chris@210 10 BEGIN break do next rescue then
Chris@210 11 when END case else for retry
Chris@210 12 while alias class elsif if not return
Chris@210 13 undef yield
Chris@210 14 ]
Chris@210 15
Chris@210 16 DEF_KEYWORDS = %w[ def ]
Chris@210 17 UNDEF_KEYWORDS = %w[ undef ]
Chris@210 18 ALIAS_KEYWORDS = %w[ alias ]
Chris@210 19 MODULE_KEYWORDS = %w[ class module ]
Chris@210 20 DEF_NEW_STATE = WordList.new(:initial).
Chris@210 21 add(DEF_KEYWORDS, :def_expected).
Chris@210 22 add(UNDEF_KEYWORDS, :undef_expected).
Chris@210 23 add(ALIAS_KEYWORDS, :alias_expected).
Chris@210 24 add(MODULE_KEYWORDS, :module_expected)
Chris@210 25
Chris@210 26 PREDEFINED_CONSTANTS = %w[
Chris@210 27 nil true false self
Chris@210 28 DATA ARGV ARGF
Chris@210 29 __FILE__ __LINE__ __ENCODING__
Chris@210 30 ]
Chris@210 31
Chris@210 32 IDENT_KIND = WordList.new(:ident).
Chris@210 33 add(RESERVED_WORDS, :reserved).
Chris@210 34 add(PREDEFINED_CONSTANTS, :pre_constant)
Chris@210 35
Chris@210 36 if /\w/u === '∑'
Chris@210 37 # MRI 1.8.6, 1.8.7
Chris@210 38 IDENT = /[^\W\d]\w*/
Chris@210 39 else
Chris@210 40 if //.respond_to? :encoding
Chris@210 41 # MRI 1.9.1, 1.9.2
Chris@210 42 IDENT = Regexp.new '[\p{L}\p{M}\p{Pc}\p{Sm}&&[^\x00-\x40\x5b-\x5e\x60\x7b-\x7f]][\p{L}\p{M}\p{N}\p{Pc}\p{Sm}&&[^\x00-\x2f\x3a-\x40\x5b-\x5e\x60\x7b-\x7f]]*'
Chris@210 43 else
Chris@210 44 # JRuby, Rubinius
Chris@210 45 IDENT = /[^\x00-\x40\x5b-\x5e\x60\x7b-\x7f][^\x00-\x2f\x3a-\x40\x5b-\x5e\x60\x7b-\x7f]*/
Chris@210 46 end
Chris@210 47 end
Chris@210 48
Chris@210 49 METHOD_NAME = / #{IDENT} [?!]? /ox
Chris@210 50 METHOD_NAME_OPERATOR = /
Chris@210 51 \*\*? # multiplication and power
Chris@210 52 | [-+~]@? # plus, minus, tilde with and without at sign
Chris@210 53 | [\/%&|^`] # division, modulo or format strings, and, or, xor, system
Chris@210 54 | \[\]=? # array getter and setter
Chris@210 55 | << | >> # append or shift left, shift right
Chris@210 56 | <=?>? | >=? # comparison, rocket operator
Chris@210 57 | ===? | =~ # simple equality, case equality, match
Chris@210 58 | ![~=@]? # negation with and without at sign, not-equal and not-match
Chris@210 59 /ox
Chris@210 60 METHOD_NAME_EX = / #{IDENT} (?:[?!]|=(?!>))? | #{METHOD_NAME_OPERATOR} /ox
Chris@210 61 INSTANCE_VARIABLE = / @ #{IDENT} /ox
Chris@210 62 CLASS_VARIABLE = / @@ #{IDENT} /ox
Chris@210 63 OBJECT_VARIABLE = / @@? #{IDENT} /ox
Chris@210 64 GLOBAL_VARIABLE = / \$ (?: #{IDENT} | [1-9]\d* | 0\w* | [~&+`'=\/,;_.<>!@$?*":\\] | -[a-zA-Z_0-9] ) /ox
Chris@210 65 PREFIX_VARIABLE = / #{GLOBAL_VARIABLE} | #{OBJECT_VARIABLE} /ox
Chris@210 66 VARIABLE = / @?@? #{IDENT} | #{GLOBAL_VARIABLE} /ox
Chris@210 67
Chris@210 68 QUOTE_TO_TYPE = {
Chris@210 69 '`' => :shell,
Chris@210 70 '/'=> :regexp,
Chris@210 71 }
Chris@210 72 QUOTE_TO_TYPE.default = :string
Chris@210 73
Chris@210 74 REGEXP_MODIFIERS = /[mixounse]*/
Chris@210 75 REGEXP_SYMBOLS = /[|?*+(){}\[\].^$]/
Chris@210 76
Chris@210 77 DECIMAL = /\d+(?:_\d+)*/
Chris@210 78 OCTAL = /0_?[0-7]+(?:_[0-7]+)*/
Chris@210 79 HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/
Chris@210 80 BINARY = /0b[01]+(?:_[01]+)*/
Chris@210 81
Chris@210 82 EXPONENT = / [eE] [+-]? #{DECIMAL} /ox
Chris@210 83 FLOAT_SUFFIX = / #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? /ox
Chris@210 84 FLOAT_OR_INT = / #{DECIMAL} (?: #{FLOAT_SUFFIX} () )? /ox
Chris@210 85 NUMERIC = / (?: (?=0) (?: #{OCTAL} | #{HEXADECIMAL} | #{BINARY} ) | #{FLOAT_OR_INT} ) /ox
Chris@210 86
Chris@210 87 SYMBOL = /
Chris@210 88 :
Chris@210 89 (?:
Chris@210 90 #{METHOD_NAME_EX}
Chris@210 91 | #{PREFIX_VARIABLE}
Chris@210 92 | ['"]
Chris@210 93 )
Chris@210 94 /ox
Chris@210 95 METHOD_NAME_OR_SYMBOL = / #{METHOD_NAME_EX} | #{SYMBOL} /ox
Chris@210 96
Chris@210 97 SIMPLE_ESCAPE = /
Chris@210 98 [abefnrstv]
Chris@210 99 | [0-7]{1,3}
Chris@210 100 | x[0-9A-Fa-f]{1,2}
Chris@210 101 | .?
Chris@210 102 /mx
Chris@210 103
Chris@210 104 CONTROL_META_ESCAPE = /
Chris@210 105 (?: M-|C-|c )
Chris@210 106 (?: \\ (?: M-|C-|c ) )*
Chris@210 107 (?: [^\\] | \\ #{SIMPLE_ESCAPE} )?
Chris@210 108 /mox
Chris@210 109
Chris@210 110 ESCAPE = /
Chris@210 111 #{CONTROL_META_ESCAPE} | #{SIMPLE_ESCAPE}
Chris@210 112 /mox
Chris@210 113
Chris@210 114 CHARACTER = /
Chris@210 115 \?
Chris@210 116 (?:
Chris@210 117 [^\s\\]
Chris@210 118 | \\ #{ESCAPE}
Chris@210 119 )
Chris@210 120 /mox
Chris@210 121
Chris@210 122 # NOTE: This is not completely correct, but
Chris@210 123 # nobody needs heredoc delimiters ending with \n.
Chris@210 124 # Also, delimiters starting with numbers are allowed.
Chris@210 125 # but they are more often than not a false positive.
Chris@210 126 HEREDOC_OPEN = /
Chris@210 127 << (-)? # $1 = float
Chris@210 128 (?:
Chris@210 129 ( #{IDENT} ) # $2 = delim
Chris@210 130 |
Chris@210 131 ( ["'`\/] ) # $3 = quote, type
Chris@210 132 ( [^\n]*? ) \3 # $4 = delim
Chris@210 133 )
Chris@210 134 /mx
Chris@210 135
Chris@210 136 RUBYDOC = /
Chris@210 137 =begin (?!\S)
Chris@210 138 .*?
Chris@210 139 (?: \Z | ^=end (?!\S) [^\n]* )
Chris@210 140 /mx
Chris@210 141
Chris@210 142 DATA = /
Chris@210 143 __END__$
Chris@210 144 .*?
Chris@210 145 (?: \Z | (?=^\#CODE) )
Chris@210 146 /mx
Chris@210 147
Chris@210 148 # Checks for a valid value to follow. This enables
Chris@210 149 # value_expected in method calls without parentheses.
Chris@210 150 VALUE_FOLLOWS = /
Chris@210 151 (?>[ \t\f\v]+)
Chris@210 152 (?:
Chris@210 153 [%\/][^\s=]
Chris@210 154 | <<-?\S
Chris@210 155 | [-+] \d
Chris@210 156 | #{CHARACTER}
Chris@210 157 )
Chris@210 158 /x
Chris@210 159 KEYWORDS_EXPECTING_VALUE = WordList.new.add(%w[
Chris@210 160 and end in or unless begin
Chris@210 161 defined? ensure redo super until
Chris@210 162 break do next rescue then
Chris@210 163 when case else for retry
Chris@210 164 while elsif if not return
Chris@210 165 yield
Chris@210 166 ])
Chris@210 167
Chris@210 168 RUBYDOC_OR_DATA = / #{RUBYDOC} | #{DATA} /xo
Chris@210 169
Chris@210 170 RDOC_DATA_START = / ^=begin (?!\S) | ^__END__$ /x
Chris@210 171
Chris@210 172 FANCY_START_CORRECT = / % ( [qQwWxsr] | (?![a-zA-Z0-9]) ) ([^a-zA-Z0-9]) /mx
Chris@210 173
Chris@210 174 FancyStringType = {
Chris@210 175 'q' => [:string, false],
Chris@210 176 'Q' => [:string, true],
Chris@210 177 'r' => [:regexp, true],
Chris@210 178 's' => [:symbol, false],
Chris@210 179 'x' => [:shell, true]
Chris@210 180 }
Chris@210 181 FancyStringType['w'] = FancyStringType['q']
Chris@210 182 FancyStringType['W'] = FancyStringType[''] = FancyStringType['Q']
Chris@210 183
Chris@210 184 class StringState < Struct.new :type, :interpreted, :delim, :heredoc,
Chris@210 185 :paren, :paren_depth, :pattern, :next_state
Chris@210 186
Chris@210 187 CLOSING_PAREN = Hash[ *%w[
Chris@210 188 ( )
Chris@210 189 [ ]
Chris@210 190 < >
Chris@210 191 { }
Chris@210 192 ] ]
Chris@210 193
Chris@210 194 CLOSING_PAREN.each { |k,v| k.freeze; v.freeze } # debug, if I try to change it with <<
Chris@210 195 OPENING_PAREN = CLOSING_PAREN.invert
Chris@210 196
Chris@210 197 STRING_PATTERN = Hash.new do |h, k|
Chris@210 198 delim, interpreted = *k
Chris@210 199 delim_pattern = Regexp.escape(delim.dup) # dup: workaround for old Ruby
Chris@210 200 if closing_paren = CLOSING_PAREN[delim]
Chris@210 201 delim_pattern = delim_pattern[0..-1] if defined? JRUBY_VERSION # JRuby fix
Chris@210 202 delim_pattern << Regexp.escape(closing_paren)
Chris@210 203 end
Chris@210 204 delim_pattern << '\\\\' unless delim == '\\'
Chris@210 205
Chris@210 206 special_escapes =
Chris@210 207 case interpreted
Chris@210 208 when :regexp_symbols
Chris@210 209 '| ' + REGEXP_SYMBOLS.source
Chris@210 210 when :words
Chris@210 211 '| \s'
Chris@210 212 end
Chris@210 213
Chris@210 214 h[k] =
Chris@210 215 if interpreted and not delim == '#'
Chris@210 216 / (?= [#{delim_pattern}] | \# [{$@] #{special_escapes} ) /mx
Chris@210 217 else
Chris@210 218 / (?= [#{delim_pattern}] #{special_escapes} ) /mx
Chris@210 219 end
Chris@210 220 end
Chris@210 221
Chris@210 222 HEREDOC_PATTERN = Hash.new do |h, k|
Chris@210 223 delim, interpreted, indented = *k
Chris@210 224 delim_pattern = Regexp.escape(delim.dup) # dup: workaround for old Ruby
Chris@210 225 delim_pattern = / \n #{ '(?>[\ \t]*)' if indented } #{ Regexp.new delim_pattern } $ /x
Chris@210 226 h[k] =
Chris@210 227 if interpreted
Chris@210 228 / (?= #{delim_pattern}() | \\ | \# [{$@] ) /mx # $1 set == end of heredoc
Chris@210 229 else
Chris@210 230 / (?= #{delim_pattern}() | \\ ) /mx
Chris@210 231 end
Chris@210 232 end
Chris@210 233
Chris@210 234 def initialize kind, interpreted, delim, heredoc = false
Chris@210 235 if heredoc
Chris@210 236 pattern = HEREDOC_PATTERN[ [delim, interpreted, heredoc == :indented] ]
Chris@210 237 delim = nil
Chris@210 238 else
Chris@210 239 pattern = STRING_PATTERN[ [delim, interpreted] ]
Chris@210 240 if paren = CLOSING_PAREN[delim]
Chris@210 241 delim, paren = paren, delim
Chris@210 242 paren_depth = 1
Chris@210 243 end
Chris@210 244 end
Chris@210 245 super kind, interpreted, delim, heredoc, paren, paren_depth, pattern, :initial
Chris@210 246 end
Chris@210 247 end unless defined? StringState
Chris@210 248
Chris@210 249 end
Chris@210 250
Chris@210 251 end
Chris@210 252 end