annotate vendor/plugins/coderay-0.9.2/lib/coderay/scanners/ruby/patterns.rb @ 866:2cd212a468b6 bug_152

Close obsolete branch bug_152
author Chris Cannam
date Wed, 11 May 2011 10:08:34 +0100
parents 513646585e45
children
rev   line source
Chris@0 1 # encoding: utf-8
Chris@0 2 module CodeRay
Chris@0 3 module Scanners
Chris@0 4
Chris@0 5 module Ruby::Patterns # :nodoc:
Chris@0 6
Chris@0 7 RESERVED_WORDS = %w[
Chris@0 8 and def end in or unless begin
Chris@0 9 defined? ensure module redo super until
Chris@0 10 BEGIN break do next rescue then
Chris@0 11 when END case else for retry
Chris@0 12 while alias class elsif if not return
Chris@0 13 undef yield
Chris@0 14 ]
Chris@0 15
Chris@0 16 DEF_KEYWORDS = %w[ def ]
Chris@0 17 UNDEF_KEYWORDS = %w[ undef ]
Chris@0 18 ALIAS_KEYWORDS = %w[ alias ]
Chris@0 19 MODULE_KEYWORDS = %w[class module]
Chris@0 20 DEF_NEW_STATE = WordList.new(:initial).
Chris@0 21 add(DEF_KEYWORDS, :def_expected).
Chris@0 22 add(UNDEF_KEYWORDS, :undef_expected).
Chris@0 23 add(ALIAS_KEYWORDS, :alias_expected).
Chris@0 24 add(MODULE_KEYWORDS, :module_expected)
Chris@0 25
Chris@0 26 PREDEFINED_CONSTANTS = %w[
Chris@0 27 nil true false self
Chris@0 28 DATA ARGV ARGF __FILE__ __LINE__
Chris@0 29 ]
Chris@0 30
Chris@0 31 IDENT_KIND = WordList.new(:ident).
Chris@0 32 add(RESERVED_WORDS, :reserved).
Chris@0 33 add(PREDEFINED_CONSTANTS, :pre_constant)
Chris@0 34
Chris@0 35 IDENT = 'ä'[/[[:alpha:]]/] == 'ä' ? /[[:alpha:]_][[:alnum:]_]*/ : /[^\W\d]\w*/
Chris@0 36
Chris@0 37 METHOD_NAME = / #{IDENT} [?!]? /ox
Chris@0 38 METHOD_NAME_OPERATOR = /
Chris@0 39 \*\*? # multiplication and power
Chris@0 40 | [-+~]@? # plus, minus, tilde with and without at sign
Chris@0 41 | [\/%&|^`] # division, modulo or format strings, and, or, xor, system
Chris@0 42 | \[\]=? # array getter and setter
Chris@0 43 | << | >> # append or shift left, shift right
Chris@0 44 | <=?>? | >=? # comparison, rocket operator
Chris@0 45 | ===? | =~ # simple equality, case equality, match
Chris@0 46 | ![~=@]? # negation with and without at sign, not-equal and not-match
Chris@0 47 /ox
Chris@0 48 METHOD_NAME_EX = / #{IDENT} (?:[?!]|=(?!>))? | #{METHOD_NAME_OPERATOR} /ox
Chris@0 49 INSTANCE_VARIABLE = / @ #{IDENT} /ox
Chris@0 50 CLASS_VARIABLE = / @@ #{IDENT} /ox
Chris@0 51 OBJECT_VARIABLE = / @@? #{IDENT} /ox
Chris@0 52 GLOBAL_VARIABLE = / \$ (?: #{IDENT} | [1-9]\d* | 0\w* | [~&+`'=\/,;_.<>!@$?*":\\] | -[a-zA-Z_0-9] ) /ox
Chris@0 53 PREFIX_VARIABLE = / #{GLOBAL_VARIABLE} | #{OBJECT_VARIABLE} /ox
Chris@0 54 VARIABLE = / @?@? #{IDENT} | #{GLOBAL_VARIABLE} /ox
Chris@0 55
Chris@0 56 QUOTE_TO_TYPE = {
Chris@0 57 '`' => :shell,
Chris@0 58 '/'=> :regexp,
Chris@0 59 }
Chris@0 60 QUOTE_TO_TYPE.default = :string
Chris@0 61
Chris@0 62 REGEXP_MODIFIERS = /[mixounse]*/
Chris@0 63 REGEXP_SYMBOLS = /[|?*+(){}\[\].^$]/
Chris@0 64
Chris@0 65 DECIMAL = /\d+(?:_\d+)*/
Chris@0 66 OCTAL = /0_?[0-7]+(?:_[0-7]+)*/
Chris@0 67 HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/
Chris@0 68 BINARY = /0b[01]+(?:_[01]+)*/
Chris@0 69
Chris@0 70 EXPONENT = / [eE] [+-]? #{DECIMAL} /ox
Chris@0 71 FLOAT_SUFFIX = / #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? /ox
Chris@0 72 FLOAT_OR_INT = / #{DECIMAL} (?: #{FLOAT_SUFFIX} () )? /ox
Chris@0 73 NUMERIC = / (?: (?=0) (?: #{OCTAL} | #{HEXADECIMAL} | #{BINARY} ) | #{FLOAT_OR_INT} ) /ox
Chris@0 74
Chris@0 75 SYMBOL = /
Chris@0 76 :
Chris@0 77 (?:
Chris@0 78 #{METHOD_NAME_EX}
Chris@0 79 | #{PREFIX_VARIABLE}
Chris@0 80 | ['"]
Chris@0 81 )
Chris@0 82 /ox
Chris@0 83 METHOD_NAME_OR_SYMBOL = / #{METHOD_NAME_EX} | #{SYMBOL} /ox
Chris@0 84
Chris@0 85 SIMPLE_ESCAPE = /
Chris@0 86 [abefnrstv]
Chris@0 87 | [0-7]{1,3}
Chris@0 88 | x[0-9A-Fa-f]{1,2}
Chris@0 89 | .?
Chris@0 90 /mx
Chris@0 91
Chris@0 92 CONTROL_META_ESCAPE = /
Chris@0 93 (?: M-|C-|c )
Chris@0 94 (?: \\ (?: M-|C-|c ) )*
Chris@0 95 (?: [^\\] | \\ #{SIMPLE_ESCAPE} )?
Chris@0 96 /mox
Chris@0 97
Chris@0 98 ESCAPE = /
Chris@0 99 #{CONTROL_META_ESCAPE} | #{SIMPLE_ESCAPE}
Chris@0 100 /mox
Chris@0 101
Chris@0 102 CHARACTER = /
Chris@0 103 \?
Chris@0 104 (?:
Chris@0 105 [^\s\\]
Chris@0 106 | \\ #{ESCAPE}
Chris@0 107 )
Chris@0 108 /mox
Chris@0 109
Chris@0 110 # NOTE: This is not completely correct, but
Chris@0 111 # nobody needs heredoc delimiters ending with \n.
Chris@0 112 HEREDOC_OPEN = /
Chris@0 113 << (-)? # $1 = float
Chris@0 114 (?:
Chris@0 115 ( [A-Za-z_0-9]+ ) # $2 = delim
Chris@0 116 |
Chris@0 117 ( ["'`\/] ) # $3 = quote, type
Chris@0 118 ( [^\n]*? ) \3 # $4 = delim
Chris@0 119 )
Chris@0 120 /mx
Chris@0 121
Chris@0 122 RUBYDOC = /
Chris@0 123 =begin (?!\S)
Chris@0 124 .*?
Chris@0 125 (?: \Z | ^=end (?!\S) [^\n]* )
Chris@0 126 /mx
Chris@0 127
Chris@0 128 DATA = /
Chris@0 129 __END__$
Chris@0 130 .*?
Chris@0 131 (?: \Z | (?=^\#CODE) )
Chris@0 132 /mx
Chris@0 133
Chris@0 134 # Checks for a valid value to follow. This enables
Chris@0 135 # value_expected in method calls without parentheses.
Chris@0 136 VALUE_FOLLOWS = /
Chris@0 137 (?>[ \t\f\v]+)
Chris@0 138 (?:
Chris@0 139 [%\/][^\s=]
Chris@0 140 | <<-?\S
Chris@0 141 | [-+] \d
Chris@0 142 | #{CHARACTER}
Chris@0 143 )
Chris@0 144 /x
Chris@0 145 KEYWORDS_EXPECTING_VALUE = WordList.new.add(%w[
Chris@0 146 and end in or unless begin
Chris@0 147 defined? ensure redo super until
Chris@0 148 break do next rescue then
Chris@0 149 when case else for retry
Chris@0 150 while elsif if not return
Chris@0 151 yield
Chris@0 152 ])
Chris@0 153
Chris@0 154 RUBYDOC_OR_DATA = / #{RUBYDOC} | #{DATA} /xo
Chris@0 155
Chris@0 156 RDOC_DATA_START = / ^=begin (?!\S) | ^__END__$ /x
Chris@0 157
Chris@0 158 FANCY_START_CORRECT = / % ( [qQwWxsr] | (?![a-zA-Z0-9]) ) ([^a-zA-Z0-9]) /mx
Chris@0 159
Chris@0 160 FancyStringType = {
Chris@0 161 'q' => [:string, false],
Chris@0 162 'Q' => [:string, true],
Chris@0 163 'r' => [:regexp, true],
Chris@0 164 's' => [:symbol, false],
Chris@0 165 'x' => [:shell, true]
Chris@0 166 }
Chris@0 167 FancyStringType['w'] = FancyStringType['q']
Chris@0 168 FancyStringType['W'] = FancyStringType[''] = FancyStringType['Q']
Chris@0 169
Chris@0 170 class StringState < Struct.new :type, :interpreted, :delim, :heredoc,
Chris@0 171 :paren, :paren_depth, :pattern, :next_state
Chris@0 172
Chris@0 173 CLOSING_PAREN = Hash[ *%w[
Chris@0 174 ( )
Chris@0 175 [ ]
Chris@0 176 < >
Chris@0 177 { }
Chris@0 178 ] ]
Chris@0 179
Chris@0 180 CLOSING_PAREN.each { |k,v| k.freeze; v.freeze } # debug, if I try to change it with <<
Chris@0 181 OPENING_PAREN = CLOSING_PAREN.invert
Chris@0 182
Chris@0 183 STRING_PATTERN = Hash.new do |h, k|
Chris@0 184 delim, interpreted = *k
Chris@0 185 delim_pattern = Regexp.escape(delim.dup) # dup: workaround for old Ruby
Chris@0 186 if closing_paren = CLOSING_PAREN[delim]
Chris@0 187 delim_pattern = delim_pattern[0..-1] if defined? JRUBY_VERSION # JRuby fix
Chris@0 188 delim_pattern << Regexp.escape(closing_paren)
Chris@0 189 end
Chris@0 190 delim_pattern << '\\\\' unless delim == '\\'
Chris@0 191
Chris@0 192 special_escapes =
Chris@0 193 case interpreted
Chris@0 194 when :regexp_symbols
Chris@0 195 '| ' + REGEXP_SYMBOLS.source
Chris@0 196 when :words
Chris@0 197 '| \s'
Chris@0 198 end
Chris@0 199
Chris@0 200 h[k] =
Chris@0 201 if interpreted and not delim == '#'
Chris@0 202 / (?= [#{delim_pattern}] | \# [{$@] #{special_escapes} ) /mx
Chris@0 203 else
Chris@0 204 / (?= [#{delim_pattern}] #{special_escapes} ) /mx
Chris@0 205 end
Chris@0 206 end
Chris@0 207
Chris@0 208 HEREDOC_PATTERN = Hash.new do |h, k|
Chris@0 209 delim, interpreted, indented = *k
Chris@0 210 delim_pattern = Regexp.escape(delim.dup) # dup: workaround for old Ruby
Chris@0 211 delim_pattern = / \n #{ '(?>[\ \t]*)' if indented } #{ Regexp.new delim_pattern } $ /x
Chris@0 212 h[k] =
Chris@0 213 if interpreted
Chris@0 214 / (?= #{delim_pattern}() | \\ | \# [{$@] ) /mx # $1 set == end of heredoc
Chris@0 215 else
Chris@0 216 / (?= #{delim_pattern}() | \\ ) /mx
Chris@0 217 end
Chris@0 218 end
Chris@0 219
Chris@0 220 def initialize kind, interpreted, delim, heredoc = false
Chris@0 221 if heredoc
Chris@0 222 pattern = HEREDOC_PATTERN[ [delim, interpreted, heredoc == :indented] ]
Chris@0 223 delim = nil
Chris@0 224 else
Chris@0 225 pattern = STRING_PATTERN[ [delim, interpreted] ]
Chris@0 226 if paren = CLOSING_PAREN[delim]
Chris@0 227 delim, paren = paren, delim
Chris@0 228 paren_depth = 1
Chris@0 229 end
Chris@0 230 end
Chris@0 231 super kind, interpreted, delim, heredoc, paren, paren_depth, pattern, :initial
Chris@0 232 end
Chris@0 233 end unless defined? StringState
Chris@0 234
Chris@0 235 end
Chris@0 236
Chris@0 237 end
Chris@0 238 end