To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / vendor / gems / coderay-0.9.7 / lib / coderay / scanners / python.rb @ 442:753f1380d6bc

History | View | Annotate | Download (8.97 KB)

1
module CodeRay
2
module Scanners
3
  
4
  # Bases on pygments' PythonLexer, see
5
  # http://dev.pocoo.org/projects/pygments/browser/pygments/lexers/agile.py.
6
  class Python < Scanner
7
    
8
    include Streamable
9
    
10
    register_for :python
11
    file_extension 'py'
12
    
13
    KEYWORDS = [
14
      'and', 'as', 'assert', 'break', 'class', 'continue', 'def',
15
      'del', 'elif', 'else', 'except', 'finally', 'for',
16
      'from', 'global', 'if', 'import', 'in', 'is', 'lambda', 'not',
17
      'or', 'pass', 'raise', 'return', 'try', 'while', 'with', 'yield',
18
      'nonlocal',  # new in Python 3
19
    ]
20
    
21
    OLD_KEYWORDS = [
22
      'exec', 'print',  # gone in Python 3
23
    ]
24
    
25
    PREDEFINED_METHODS_AND_TYPES = %w[
26
      __import__ abs all any apply basestring bin bool buffer
27
      bytearray bytes callable chr classmethod cmp coerce compile
28
      complex delattr dict dir divmod enumerate eval execfile exit
29
      file filter float frozenset getattr globals hasattr hash hex id
30
      input int intern isinstance issubclass iter len list locals
31
      long map max min next object oct open ord pow property range
32
      raw_input reduce reload repr reversed round set setattr slice
33
      sorted staticmethod str sum super tuple type unichr unicode
34
      vars xrange zip
35
    ]
36
    
37
    PREDEFINED_EXCEPTIONS = %w[
38
      ArithmeticError AssertionError AttributeError
39
      BaseException DeprecationWarning EOFError EnvironmentError
40
      Exception FloatingPointError FutureWarning GeneratorExit IOError
41
      ImportError ImportWarning IndentationError IndexError KeyError
42
      KeyboardInterrupt LookupError MemoryError NameError
43
      NotImplemented NotImplementedError OSError OverflowError
44
      OverflowWarning PendingDeprecationWarning ReferenceError
45
      RuntimeError RuntimeWarning StandardError StopIteration
46
      SyntaxError SyntaxWarning SystemError SystemExit TabError
47
      TypeError UnboundLocalError UnicodeDecodeError
48
      UnicodeEncodeError UnicodeError UnicodeTranslateError
49
      UnicodeWarning UserWarning ValueError Warning ZeroDivisionError
50
    ]
51
    
52
    PREDEFINED_VARIABLES_AND_CONSTANTS = [
53
      'False', 'True', 'None', # "keywords" since Python 3
54
      'self', 'Ellipsis', 'NotImplemented',
55
    ]
56
    
57
    IDENT_KIND = WordList.new(:ident).
58
      add(KEYWORDS, :keyword).
59
      add(OLD_KEYWORDS, :old_keyword).
60
      add(PREDEFINED_METHODS_AND_TYPES, :predefined).
61
      add(PREDEFINED_VARIABLES_AND_CONSTANTS, :pre_constant).
62
      add(PREDEFINED_EXCEPTIONS, :exception)
63
    
64
    NAME = / [^\W\d] \w* /x
65
    ESCAPE = / [abfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
66
    UNICODE_ESCAPE =  / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} | N\{[-\w ]+\} /x
67
    
68
    OPERATOR = /
69
      \.\.\. |          # ellipsis
70
      \.(?!\d) |        # dot but not decimal point
71
      [,;:()\[\]{}] |   # simple delimiters
72
      \/\/=? | \*\*=? | # special math
73
      [-+*\/%&|^]=? |   # ordinary math and binary logic
74
      [~`] |            # binary complement and inspection
75
      <<=? | >>=? | [<>=]=? | !=  # comparison and assignment
76
    /x
77
    
78
    STRING_DELIMITER_REGEXP = Hash.new do |h, delimiter|
79
      h[delimiter] = Regexp.union delimiter
80
    end
81
    
82
    STRING_CONTENT_REGEXP = Hash.new do |h, delimiter|
83
      h[delimiter] = / [^\\\n]+? (?= \\ | $ | #{Regexp.escape(delimiter)} ) /x
84
    end
85
    
86
    DEF_NEW_STATE = WordList.new(:initial).
87
      add(%w(def), :def_expected).
88
      add(%w(import from), :include_expected).
89
      add(%w(class), :class_expected)
90
    
91
    DESCRIPTOR = /
92
      #{NAME}
93
      (?: \. #{NAME} )*
94
      | \*
95
    /x
96
    
97
    def scan_tokens tokens, options
98
      
99
      state = :initial
100
      string_delimiter = nil
101
      string_raw = false
102
      import_clause = class_name_follows = last_token_dot = false
103
      unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
104
      from_import_state = []
105
      
106
      until eos?
107
        
108
        kind = nil
109
        match = nil
110
        
111
        if state == :string
112
          if scan(STRING_DELIMITER_REGEXP[string_delimiter])
113
            tokens << [matched, :delimiter]
114
            tokens << [:close, :string]
115
            state = :initial
116
            next
117
          elsif string_delimiter.size == 3 && scan(/\n/)
118
            kind = :content
119
          elsif scan(STRING_CONTENT_REGEXP[string_delimiter])
120
            kind = :content
121
          elsif !string_raw && scan(/ \\ #{ESCAPE} /ox)
122
            kind = :char
123
          elsif scan(/ \\ #{UNICODE_ESCAPE} /ox)
124
            kind = :char
125
          elsif scan(/ \\ . /x)
126
            kind = :content
127
          elsif scan(/ \\ | $ /x)
128
            tokens << [:close, :string]
129
            kind = :error
130
            state = :initial
131
          else
132
            raise_inspect "else case \" reached; %p not handled." % peek(1), tokens, state
133
          end
134
        
135
        elsif match = scan(/ [ \t]+ | \\\n /x)
136
          tokens << [match, :space]
137
          next
138
        
139
        elsif match = scan(/\n/)
140
          tokens << [match, :space]
141
          state = :initial if state == :include_expected
142
          next
143
        
144
        elsif match = scan(/ \# [^\n]* /mx)
145
          tokens << [match, :comment]
146
          next
147
        
148
        elsif state == :initial
149
          
150
          if scan(/#{OPERATOR}/o)
151
            kind = :operator
152
          
153
          elsif match = scan(/(u?r?|b)?("""|"|'''|')/i)
154
            tokens << [:open, :string]
155
            string_delimiter = self[2]
156
            string_raw = false
157
            modifiers = self[1]
158
            unless modifiers.empty?
159
              string_raw = !!modifiers.index(?r)
160
              tokens << [modifiers, :modifier]
161
              match = string_delimiter
162
            end
163
            state = :string
164
            kind = :delimiter
165
          
166
          # TODO: backticks
167
          
168
          elsif match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
169
            kind = IDENT_KIND[match]
170
            # TODO: keyword arguments
171
            kind = :ident if last_token_dot
172
            if kind == :old_keyword
173
              kind = check(/\(/) ? :ident : :keyword
174
            elsif kind == :predefined && check(/ *=/)
175
              kind = :ident
176
            elsif kind == :keyword
177
              state = DEF_NEW_STATE[match]
178
              from_import_state << match.to_sym if state == :include_expected
179
            end
180
          
181
          elsif scan(/@[a-zA-Z0-9_.]+[lL]?/)
182
            kind = :decorator
183
          
184
          elsif scan(/0[xX][0-9A-Fa-f]+[lL]?/)
185
            kind = :hex
186
          
187
          elsif scan(/0[bB][01]+[lL]?/)
188
            kind = :bin
189
          
190
          elsif match = scan(/(?:\d*\.\d+|\d+\.\d*)(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/)
191
            kind = :float
192
            if scan(/[jJ]/)
193
              match << matched
194
              kind = :imaginary
195
            end
196
          
197
          elsif scan(/0[oO][0-7]+|0[0-7]+(?![89.eE])[lL]?/)
198
            kind = :oct
199
          
200
          elsif match = scan(/\d+([lL])?/)
201
            kind = :integer
202
            if self[1] == nil && scan(/[jJ]/)
203
              match << matched
204
              kind = :imaginary
205
            end
206
          
207
          else
208
            getch
209
            kind = :error
210
          
211
          end
212
            
213
        elsif state == :def_expected
214
          state = :initial
215
          if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
216
            kind = :method
217
          else
218
            next
219
          end
220
        
221
        elsif state == :class_expected
222
          state = :initial
223
          if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
224
            kind = :class
225
          else
226
            next
227
          end
228
          
229
        elsif state == :include_expected
230
          if match = scan(unicode ? /#{DESCRIPTOR}/uo : /#{DESCRIPTOR}/o)
231
            kind = :include
232
            if match == 'as'
233
              kind = :keyword
234
              from_import_state << :as
235
            elsif from_import_state.first == :from && match == 'import'
236
              kind = :keyword
237
              from_import_state << :import
238
            elsif from_import_state.last == :as
239
              # kind = match[0,1][unicode ? /[[:upper:]]/u : /[[:upper:]]/] ? :class : :method
240
              kind = :ident
241
              from_import_state.pop
242
            elsif IDENT_KIND[match] == :keyword
243
              unscan
244
              match = nil
245
              state = :initial
246
              next
247
            end
248
          elsif match = scan(/,/)
249
            from_import_state.pop if from_import_state.last == :as
250
            kind = :operator
251
          else
252
            from_import_state = []
253
            state = :initial
254
            next
255
          end
256
          
257
        else
258
          raise_inspect 'Unknown state', tokens, state
259
          
260
        end
261
        
262
        match ||= matched
263
        if $CODERAY_DEBUG and not kind
264
          raise_inspect 'Error token %p in line %d' %
265
            [[match, kind], line], tokens, state
266
        end
267
        raise_inspect 'Empty token', tokens, state unless match
268
        
269
        last_token_dot = match == '.'
270
        
271
        tokens << [match, kind]
272
        
273
      end
274
      
275
      if state == :string
276
        tokens << [:close, :string]
277
      end
278
      
279
      tokens
280
    end
281
    
282
  end
283
  
284
end
285
end