To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / .svn / pristine / d7 / d79648acb8390e7dfe4f791129d77e634240a8ef.svn-base @ 1297:0a574315af3e

History | View | Annotate | Download (9.83 KB)

1
module CodeRay
2
module Scanners
3
  
4
  # Scanner for Python. Supports Python 3.
5
  # 
6
  # Based on pygments' PythonLexer, see
7
  # http://dev.pocoo.org/projects/pygments/browser/pygments/lexers/agile.py.
8
  class Python < Scanner
9
    
10
    register_for :python
11
    file_extension 'py'
12
    
13
    KEYWORDS = [
14
      'and', 'as', 'assert', 'break', 'class', 'continue', 'def',
15
      'del', 'elif', 'else', 'except', 'finally', 'for',
16
      'from', 'global', 'if', 'import', 'in', 'is', 'lambda', 'not',
17
      'or', 'pass', 'raise', 'return', 'try', 'while', 'with', 'yield',
18
      'nonlocal',  # new in Python 3
19
    ]  # :nodoc:
20
    
21
    OLD_KEYWORDS = [
22
      'exec', 'print',  # gone in Python 3
23
    ]  # :nodoc:
24
    
25
    PREDEFINED_METHODS_AND_TYPES = %w[
26
      __import__ abs all any apply basestring bin bool buffer
27
      bytearray bytes callable chr classmethod cmp coerce compile
28
      complex delattr dict dir divmod enumerate eval execfile exit
29
      file filter float frozenset getattr globals hasattr hash hex id
30
      input int intern isinstance issubclass iter len list locals
31
      long map max min next object oct open ord pow property range
32
      raw_input reduce reload repr reversed round set setattr slice
33
      sorted staticmethod str sum super tuple type unichr unicode
34
      vars xrange zip
35
    ]  # :nodoc:
36
    
37
    PREDEFINED_EXCEPTIONS = %w[
38
      ArithmeticError AssertionError AttributeError
39
      BaseException DeprecationWarning EOFError EnvironmentError
40
      Exception FloatingPointError FutureWarning GeneratorExit IOError
41
      ImportError ImportWarning IndentationError IndexError KeyError
42
      KeyboardInterrupt LookupError MemoryError NameError
43
      NotImplemented NotImplementedError OSError OverflowError
44
      OverflowWarning PendingDeprecationWarning ReferenceError
45
      RuntimeError RuntimeWarning StandardError StopIteration
46
      SyntaxError SyntaxWarning SystemError SystemExit TabError
47
      TypeError UnboundLocalError UnicodeDecodeError
48
      UnicodeEncodeError UnicodeError UnicodeTranslateError
49
      UnicodeWarning UserWarning ValueError Warning ZeroDivisionError
50
    ]  # :nodoc:
51
    
52
    PREDEFINED_VARIABLES_AND_CONSTANTS = [
53
      'False', 'True', 'None',  # "keywords" since Python 3
54
      'self', 'Ellipsis', 'NotImplemented',
55
    ]  # :nodoc:
56
    
57
    IDENT_KIND = WordList.new(:ident).
58
      add(KEYWORDS, :keyword).
59
      add(OLD_KEYWORDS, :old_keyword).
60
      add(PREDEFINED_METHODS_AND_TYPES, :predefined).
61
      add(PREDEFINED_VARIABLES_AND_CONSTANTS, :predefined_constant).
62
      add(PREDEFINED_EXCEPTIONS, :exception)  # :nodoc:
63
    
64
    NAME = / [^\W\d] \w* /x  # :nodoc:
65
    ESCAPE = / [abfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x  # :nodoc:
66
    UNICODE_ESCAPE =  / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} | N\{[-\w ]+\} /x  # :nodoc:
67
    
68
    OPERATOR = /
69
      \.\.\. |          # ellipsis
70
      \.(?!\d) |        # dot but not decimal point
71
      [,;:()\[\]{}] |   # simple delimiters
72
      \/\/=? | \*\*=? | # special math
73
      [-+*\/%&|^]=? |   # ordinary math and binary logic
74
      [~`] |            # binary complement and inspection
75
      <<=? | >>=? | [<>=]=? | !=  # comparison and assignment
76
    /x  # :nodoc:
77
    
78
    STRING_DELIMITER_REGEXP = Hash.new { |h, delimiter|
79
      h[delimiter] = Regexp.union delimiter  # :nodoc:
80
    }
81
    
82
    STRING_CONTENT_REGEXP = Hash.new { |h, delimiter|
83
      h[delimiter] = / [^\\\n]+? (?= \\ | $ | #{Regexp.escape(delimiter)} ) /x  # :nodoc:
84
    }
85
    
86
    DEF_NEW_STATE = WordList.new(:initial).
87
      add(%w(def), :def_expected).
88
      add(%w(import from), :include_expected).
89
      add(%w(class), :class_expected)  # :nodoc:
90
    
91
    DESCRIPTOR = /
92
      #{NAME}
93
      (?: \. #{NAME} )*
94
      | \*
95
    /x  # :nodoc:
96
    
97
    DOCSTRING_COMING = /
98
      [ \t]* u?r? ("""|''')
99
    /x  # :nodoc:
100
    
101
  protected
102
    
103
    def scan_tokens encoder, options
104
      
105
      state = :initial
106
      string_delimiter = nil
107
      string_raw = false
108
      string_type = nil
109
      docstring_coming = match?(/#{DOCSTRING_COMING}/o)
110
      last_token_dot = false
111
      unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
112
      from_import_state = []
113
      
114
      until eos?
115
        
116
        if state == :string
117
          if match = scan(STRING_DELIMITER_REGEXP[string_delimiter])
118
            encoder.text_token match, :delimiter
119
            encoder.end_group string_type
120
            string_type = nil
121
            state = :initial
122
            next
123
          elsif string_delimiter.size == 3 && match = scan(/\n/)
124
            encoder.text_token match, :content
125
          elsif match = scan(STRING_CONTENT_REGEXP[string_delimiter])
126
            encoder.text_token match, :content
127
          elsif !string_raw && match = scan(/ \\ #{ESCAPE} /ox)
128
            encoder.text_token match, :char
129
          elsif match = scan(/ \\ #{UNICODE_ESCAPE} /ox)
130
            encoder.text_token match, :char
131
          elsif match = scan(/ \\ . /x)
132
            encoder.text_token match, :content
133
          elsif match = scan(/ \\ | $ /x)
134
            encoder.end_group string_type
135
            string_type = nil
136
            encoder.text_token match, :error
137
            state = :initial
138
          else
139
            raise_inspect "else case \" reached; %p not handled." % peek(1), encoder, state
140
          end
141
        
142
        elsif match = scan(/ [ \t]+ | \\?\n /x)
143
          encoder.text_token match, :space
144
          if match == "\n"
145
            state = :initial if state == :include_expected
146
            docstring_coming = true if match?(/#{DOCSTRING_COMING}/o)
147
          end
148
          next
149
        
150
        elsif match = scan(/ \# [^\n]* /mx)
151
          encoder.text_token match, :comment
152
          next
153
        
154
        elsif state == :initial
155
          
156
          if match = scan(/#{OPERATOR}/o)
157
            encoder.text_token match, :operator
158
          
159
          elsif match = scan(/(u?r?|b)?("""|"|'''|')/i)
160
            string_delimiter = self[2]
161
            string_type = docstring_coming ? :docstring : :string
162
            docstring_coming = false if docstring_coming
163
            encoder.begin_group string_type
164
            string_raw = false
165
            modifiers = self[1]
166
            unless modifiers.empty?
167
              string_raw = !!modifiers.index(?r)
168
              encoder.text_token modifiers, :modifier
169
              match = string_delimiter
170
            end
171
            state = :string
172
            encoder.text_token match, :delimiter
173
          
174
          # TODO: backticks
175
          
176
          elsif match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
177
            kind = IDENT_KIND[match]
178
            # TODO: keyword arguments
179
            kind = :ident if last_token_dot
180
            if kind == :old_keyword
181
              kind = check(/\(/) ? :ident : :keyword
182
            elsif kind == :predefined && check(/ *=/)
183
              kind = :ident
184
            elsif kind == :keyword
185
              state = DEF_NEW_STATE[match]
186
              from_import_state << match.to_sym if state == :include_expected
187
            end
188
            encoder.text_token match, kind
189
          
190
          elsif match = scan(/@[a-zA-Z0-9_.]+[lL]?/)
191
            encoder.text_token match, :decorator
192
          
193
          elsif match = scan(/0[xX][0-9A-Fa-f]+[lL]?/)
194
            encoder.text_token match, :hex
195
          
196
          elsif match = scan(/0[bB][01]+[lL]?/)
197
            encoder.text_token match, :binary
198
          
199
          elsif match = scan(/(?:\d*\.\d+|\d+\.\d*)(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/)
200
            if scan(/[jJ]/)
201
              match << matched
202
              encoder.text_token match, :imaginary
203
            else
204
              encoder.text_token match, :float
205
            end
206
          
207
          elsif match = scan(/0[oO][0-7]+|0[0-7]+(?![89.eE])[lL]?/)
208
            encoder.text_token match, :octal
209
          
210
          elsif match = scan(/\d+([lL])?/)
211
            if self[1] == nil && scan(/[jJ]/)
212
              match << matched
213
              encoder.text_token match, :imaginary
214
            else
215
              encoder.text_token match, :integer
216
            end
217
          
218
          else
219
            encoder.text_token getch, :error
220
          
221
          end
222
            
223
        elsif state == :def_expected
224
          state = :initial
225
          if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
226
            encoder.text_token match, :method
227
          else
228
            next
229
          end
230
        
231
        elsif state == :class_expected
232
          state = :initial
233
          if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
234
            encoder.text_token match, :class
235
          else
236
            next
237
          end
238
          
239
        elsif state == :include_expected
240
          if match = scan(unicode ? /#{DESCRIPTOR}/uo : /#{DESCRIPTOR}/o)
241
            if match == 'as'
242
              encoder.text_token match, :keyword
243
              from_import_state << :as
244
            elsif from_import_state.first == :from && match == 'import'
245
              encoder.text_token match, :keyword
246
              from_import_state << :import
247
            elsif from_import_state.last == :as
248
              # encoder.text_token match, match[0,1][unicode ? /[[:upper:]]/u : /[[:upper:]]/] ? :class : :method
249
              encoder.text_token match, :ident
250
              from_import_state.pop
251
            elsif IDENT_KIND[match] == :keyword
252
              unscan
253
              match = nil
254
              state = :initial
255
              next
256
            else
257
              encoder.text_token match, :include
258
            end
259
          elsif match = scan(/,/)
260
            from_import_state.pop if from_import_state.last == :as
261
            encoder.text_token match, :operator
262
          else
263
            from_import_state = []
264
            state = :initial
265
            next
266
          end
267
          
268
        else
269
          raise_inspect 'Unknown state', encoder, state
270
          
271
        end
272
        
273
        last_token_dot = match == '.'
274
        
275
      end
276
      
277
      if state == :string
278
        encoder.end_group string_type
279
      end
280
      
281
      encoder
282
    end
283
    
284
  end
285
  
286
end
287
end