To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / vendor / gems / coderay-0.9.7 / lib / coderay / scanners / ruby / patterns.rb @ 442:753f1380d6bc

History | View | Annotate | Download (7.24 KB)

1
# encoding: utf-8
2
module CodeRay
3
module Scanners
4

    
5
  module Ruby::Patterns  # :nodoc:
6

    
7
    RESERVED_WORDS = %w[
8
      and def end in or unless begin
9
      defined? ensure module redo super until
10
      BEGIN break do next rescue then
11
      when END case else for retry
12
      while alias class elsif if not return
13
      undef yield
14
    ]
15

    
16
    DEF_KEYWORDS = %w[ def ]
17
    UNDEF_KEYWORDS = %w[ undef ]
18
    ALIAS_KEYWORDS = %w[ alias ]
19
    MODULE_KEYWORDS = %w[ class module ]
20
    DEF_NEW_STATE = WordList.new(:initial).
21
      add(DEF_KEYWORDS, :def_expected).
22
      add(UNDEF_KEYWORDS, :undef_expected).
23
      add(ALIAS_KEYWORDS, :alias_expected).
24
      add(MODULE_KEYWORDS, :module_expected)
25

    
26
    PREDEFINED_CONSTANTS = %w[
27
      nil true false self
28
      DATA ARGV ARGF
29
      __FILE__ __LINE__ __ENCODING__
30
    ]
31

    
32
    IDENT_KIND = WordList.new(:ident).
33
      add(RESERVED_WORDS, :reserved).
34
      add(PREDEFINED_CONSTANTS, :pre_constant)
35

    
36
    if /\w/u === ''
37
      # MRI 1.8.6, 1.8.7
38
      IDENT = /[^\W\d]\w*/
39
    else
40
      if //.respond_to? :encoding
41
        # MRI 1.9.1, 1.9.2
42
        IDENT = Regexp.new '[\p{L}\p{M}\p{Pc}\p{Sm}&&[^\x00-\x40\x5b-\x5e\x60\x7b-\x7f]][\p{L}\p{M}\p{N}\p{Pc}\p{Sm}&&[^\x00-\x2f\x3a-\x40\x5b-\x5e\x60\x7b-\x7f]]*'
43
      else
44
        # JRuby, Rubinius
45
        IDENT = /[^\x00-\x40\x5b-\x5e\x60\x7b-\x7f][^\x00-\x2f\x3a-\x40\x5b-\x5e\x60\x7b-\x7f]*/
46
      end
47
    end
48

    
49
    METHOD_NAME = / #{IDENT} [?!]? /ox
50
    METHOD_NAME_OPERATOR = /
51
      \*\*?           # multiplication and power
52
      | [-+~]@?       # plus, minus, tilde with and without at sign
53
      | [\/%&|^`]     # division, modulo or format strings, and, or, xor, system
54
      | \[\]=?        # array getter and setter
55
      | << | >>       # append or shift left, shift right
56
      | <=?>? | >=?   # comparison, rocket operator
57
      | ===? | =~     # simple equality, case equality, match
58
      | ![~=@]?       # negation with and without at sign, not-equal and not-match
59
    /ox
60
    METHOD_NAME_EX = / #{IDENT} (?:[?!]|=(?!>))? | #{METHOD_NAME_OPERATOR} /ox
61
    INSTANCE_VARIABLE = / @ #{IDENT} /ox
62
    CLASS_VARIABLE = / @@ #{IDENT} /ox
63
    OBJECT_VARIABLE = / @@? #{IDENT} /ox
64
    GLOBAL_VARIABLE = / \$ (?: #{IDENT} | [1-9]\d* | 0\w* | [~&+`'=\/,;_.<>!@$?*":\\] | -[a-zA-Z_0-9] ) /ox
65
    PREFIX_VARIABLE = / #{GLOBAL_VARIABLE} | #{OBJECT_VARIABLE} /ox
66
    VARIABLE = / @?@? #{IDENT} | #{GLOBAL_VARIABLE} /ox
67

    
68
    QUOTE_TO_TYPE = {
69
      '`' => :shell,
70
      '/'=> :regexp,
71
    }
72
    QUOTE_TO_TYPE.default = :string
73

    
74
    REGEXP_MODIFIERS = /[mixounse]*/
75
    REGEXP_SYMBOLS = /[|?*+(){}\[\].^$]/
76

    
77
    DECIMAL = /\d+(?:_\d+)*/
78
    OCTAL = /0_?[0-7]+(?:_[0-7]+)*/
79
    HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/
80
    BINARY = /0b[01]+(?:_[01]+)*/
81

    
82
    EXPONENT = / [eE] [+-]? #{DECIMAL} /ox
83
    FLOAT_SUFFIX = / #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? /ox
84
    FLOAT_OR_INT = / #{DECIMAL} (?: #{FLOAT_SUFFIX} () )? /ox
85
    NUMERIC = / (?: (?=0) (?: #{OCTAL} | #{HEXADECIMAL} | #{BINARY} ) | #{FLOAT_OR_INT} ) /ox
86

    
87
    SYMBOL = /
88
      :
89
      (?:
90
        #{METHOD_NAME_EX}
91
      | #{PREFIX_VARIABLE}
92
      | ['"]
93
      )
94
    /ox
95
    METHOD_NAME_OR_SYMBOL = / #{METHOD_NAME_EX} | #{SYMBOL} /ox
96

    
97
    SIMPLE_ESCAPE = /
98
        [abefnrstv]
99
      |  [0-7]{1,3}
100
      | x[0-9A-Fa-f]{1,2}
101
      | .?
102
    /mx
103
    
104
    CONTROL_META_ESCAPE = /
105
      (?: M-|C-|c )
106
      (?: \\ (?: M-|C-|c ) )*
107
      (?: [^\\] | \\ #{SIMPLE_ESCAPE} )?
108
    /mox
109
    
110
    ESCAPE = /
111
      #{CONTROL_META_ESCAPE} | #{SIMPLE_ESCAPE}
112
    /mox
113
    
114
    CHARACTER = /
115
      \?
116
      (?:
117
        [^\s\\]
118
      | \\ #{ESCAPE}
119
      )
120
    /mox
121

    
122
    # NOTE: This is not completely correct, but
123
    # nobody needs heredoc delimiters ending with \n.
124
    # Also, delimiters starting with numbers are allowed.
125
    # but they are more often than not a false positive.
126
    HEREDOC_OPEN = /
127
      << (-)?              # $1 = float
128
      (?:
129
        ( #{IDENT} )       # $2 = delim
130
      |
131
        ( ["'`\/] )        # $3 = quote, type
132
        ( [^\n]*? ) \3     # $4 = delim
133
      )
134
    /mx
135

    
136
    RUBYDOC = /
137
      =begin (?!\S)
138
      .*?
139
      (?: \Z | ^=end (?!\S) [^\n]* )
140
    /mx
141

    
142
    DATA = /
143
      __END__$
144
      .*?
145
      (?: \Z | (?=^\#CODE) )
146
    /mx
147
    
148
    # Checks for a valid value to follow. This enables
149
    # value_expected in method calls without parentheses.
150
    VALUE_FOLLOWS = /
151
      (?>[ \t\f\v]+)
152
      (?:
153
        [%\/][^\s=]
154
      | <<-?\S
155
      | [-+] \d
156
      | #{CHARACTER}
157
      )
158
    /x
159
    KEYWORDS_EXPECTING_VALUE = WordList.new.add(%w[
160
      and end in or unless begin
161
      defined? ensure redo super until
162
      break do next rescue then
163
      when case else for retry
164
      while elsif if not return
165
      yield
166
    ])
167

    
168
    RUBYDOC_OR_DATA = / #{RUBYDOC} | #{DATA} /xo
169

    
170
    RDOC_DATA_START = / ^=begin (?!\S) | ^__END__$ /x
171

    
172
    FANCY_START_CORRECT = / % ( [qQwWxsr] | (?![a-zA-Z0-9]) ) ([^a-zA-Z0-9]) /mx
173

    
174
    FancyStringType = {
175
      'q' => [:string, false],
176
      'Q' => [:string, true],
177
      'r' => [:regexp, true],
178
      's' => [:symbol, false],
179
      'x' => [:shell, true]
180
    }
181
    FancyStringType['w'] = FancyStringType['q']
182
    FancyStringType['W'] = FancyStringType[''] = FancyStringType['Q']
183

    
184
    class StringState < Struct.new :type, :interpreted, :delim, :heredoc,
185
      :paren, :paren_depth, :pattern, :next_state
186

    
187
      CLOSING_PAREN = Hash[ *%w[
188
        ( )
189
        [ ]
190
        < >
191
        { }
192
      ] ]
193

    
194
      CLOSING_PAREN.each { |k,v| k.freeze; v.freeze }  # debug, if I try to change it with <<
195
      OPENING_PAREN = CLOSING_PAREN.invert
196

    
197
      STRING_PATTERN = Hash.new do |h, k|
198
        delim, interpreted = *k
199
        delim_pattern = Regexp.escape(delim.dup)  # dup: workaround for old Ruby
200
        if closing_paren = CLOSING_PAREN[delim]
201
          delim_pattern = delim_pattern[0..-1] if defined? JRUBY_VERSION  # JRuby fix
202
          delim_pattern << Regexp.escape(closing_paren)
203
        end
204
        delim_pattern << '\\\\' unless delim == '\\'
205
        
206
        special_escapes =
207
          case interpreted
208
          when :regexp_symbols
209
            '| ' + REGEXP_SYMBOLS.source
210
          when :words
211
            '| \s'
212
          end
213
        
214
        h[k] =
215
          if interpreted and not delim == '#'
216
            / (?= [#{delim_pattern}] | \# [{$@] #{special_escapes} ) /mx
217
          else
218
            / (?= [#{delim_pattern}] #{special_escapes} ) /mx
219
          end
220
      end
221

    
222
      HEREDOC_PATTERN = Hash.new do |h, k|
223
        delim, interpreted, indented = *k
224
        delim_pattern = Regexp.escape(delim.dup)  # dup: workaround for old Ruby
225
        delim_pattern = / \n #{ '(?>[\ \t]*)' if indented } #{ Regexp.new delim_pattern } $ /x
226
        h[k] =
227
          if interpreted
228
            / (?= #{delim_pattern}() | \\ | \# [{$@] ) /mx  # $1 set == end of heredoc
229
          else
230
            / (?= #{delim_pattern}() | \\ ) /mx
231
          end
232
      end
233

    
234
      def initialize kind, interpreted, delim, heredoc = false
235
        if heredoc
236
          pattern = HEREDOC_PATTERN[ [delim, interpreted, heredoc == :indented] ]
237
          delim = nil
238
        else
239
          pattern = STRING_PATTERN[ [delim, interpreted] ]
240
          if paren = CLOSING_PAREN[delim]
241
            delim, paren = paren, delim
242
            paren_depth = 1
243
          end
244
        end
245
        super kind, interpreted, delim, heredoc, paren, paren_depth, pattern, :initial
246
      end
247
    end unless defined? StringState
248

    
249
  end
250

    
251
end
252
end