Chris@0
|
1 module CodeRay
|
Chris@0
|
2 module Scanners
|
Chris@0
|
3
|
Chris@0
|
4 # Bases on pygments' PythonLexer, see
|
Chris@0
|
5 # http://dev.pocoo.org/projects/pygments/browser/pygments/lexers/agile.py.
|
Chris@0
|
6 class Python < Scanner
|
Chris@0
|
7
|
Chris@0
|
8 include Streamable
|
Chris@0
|
9
|
Chris@0
|
10 register_for :python
|
Chris@0
|
11 file_extension 'py'
|
Chris@0
|
12
|
Chris@0
|
13 KEYWORDS = [
|
Chris@0
|
14 'and', 'as', 'assert', 'break', 'class', 'continue', 'def',
|
Chris@0
|
15 'del', 'elif', 'else', 'except', 'finally', 'for',
|
Chris@0
|
16 'from', 'global', 'if', 'import', 'in', 'is', 'lambda', 'not',
|
Chris@0
|
17 'or', 'pass', 'raise', 'return', 'try', 'while', 'with', 'yield',
|
Chris@0
|
18 'nonlocal', # new in Python 3
|
Chris@0
|
19 ]
|
Chris@0
|
20
|
Chris@0
|
21 OLD_KEYWORDS = [
|
Chris@0
|
22 'exec', 'print', # gone in Python 3
|
Chris@0
|
23 ]
|
Chris@0
|
24
|
Chris@0
|
25 PREDEFINED_METHODS_AND_TYPES = %w[
|
Chris@0
|
26 __import__ abs all any apply basestring bin bool buffer
|
Chris@0
|
27 bytearray bytes callable chr classmethod cmp coerce compile
|
Chris@0
|
28 complex delattr dict dir divmod enumerate eval execfile exit
|
Chris@0
|
29 file filter float frozenset getattr globals hasattr hash hex id
|
Chris@0
|
30 input int intern isinstance issubclass iter len list locals
|
Chris@0
|
31 long map max min next object oct open ord pow property range
|
Chris@0
|
32 raw_input reduce reload repr reversed round set setattr slice
|
Chris@0
|
33 sorted staticmethod str sum super tuple type unichr unicode
|
Chris@0
|
34 vars xrange zip
|
Chris@0
|
35 ]
|
Chris@0
|
36
|
Chris@0
|
37 PREDEFINED_EXCEPTIONS = %w[
|
Chris@0
|
38 ArithmeticError AssertionError AttributeError
|
Chris@0
|
39 BaseException DeprecationWarning EOFError EnvironmentError
|
Chris@0
|
40 Exception FloatingPointError FutureWarning GeneratorExit IOError
|
Chris@0
|
41 ImportError ImportWarning IndentationError IndexError KeyError
|
Chris@0
|
42 KeyboardInterrupt LookupError MemoryError NameError
|
Chris@0
|
43 NotImplemented NotImplementedError OSError OverflowError
|
Chris@0
|
44 OverflowWarning PendingDeprecationWarning ReferenceError
|
Chris@0
|
45 RuntimeError RuntimeWarning StandardError StopIteration
|
Chris@0
|
46 SyntaxError SyntaxWarning SystemError SystemExit TabError
|
Chris@0
|
47 TypeError UnboundLocalError UnicodeDecodeError
|
Chris@0
|
48 UnicodeEncodeError UnicodeError UnicodeTranslateError
|
Chris@0
|
49 UnicodeWarning UserWarning ValueError Warning ZeroDivisionError
|
Chris@0
|
50 ]
|
Chris@0
|
51
|
Chris@0
|
52 PREDEFINED_VARIABLES_AND_CONSTANTS = [
|
Chris@0
|
53 'False', 'True', 'None', # "keywords" since Python 3
|
Chris@0
|
54 'self', 'Ellipsis', 'NotImplemented',
|
Chris@0
|
55 ]
|
Chris@0
|
56
|
Chris@0
|
57 IDENT_KIND = WordList.new(:ident).
|
Chris@0
|
58 add(KEYWORDS, :keyword).
|
Chris@0
|
59 add(OLD_KEYWORDS, :old_keyword).
|
Chris@0
|
60 add(PREDEFINED_METHODS_AND_TYPES, :predefined).
|
Chris@0
|
61 add(PREDEFINED_VARIABLES_AND_CONSTANTS, :pre_constant).
|
Chris@0
|
62 add(PREDEFINED_EXCEPTIONS, :exception)
|
Chris@0
|
63
|
Chris@0
|
64 NAME = / [^\W\d] \w* /x
|
Chris@0
|
65 ESCAPE = / [abfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
|
Chris@0
|
66 UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} | N\{[-\w ]+\} /x
|
Chris@0
|
67
|
Chris@0
|
68 OPERATOR = /
|
Chris@0
|
69 \.\.\. | # ellipsis
|
Chris@0
|
70 \.(?!\d) | # dot but not decimal point
|
Chris@0
|
71 [,;:()\[\]{}] | # simple delimiters
|
Chris@0
|
72 \/\/=? | \*\*=? | # special math
|
Chris@0
|
73 [-+*\/%&|^]=? | # ordinary math and binary logic
|
Chris@0
|
74 [~`] | # binary complement and inspection
|
Chris@0
|
75 <<=? | >>=? | [<>=]=? | != # comparison and assignment
|
Chris@0
|
76 /x
|
Chris@0
|
77
|
Chris@0
|
78 STRING_DELIMITER_REGEXP = Hash.new do |h, delimiter|
|
Chris@0
|
79 h[delimiter] = Regexp.union delimiter
|
Chris@0
|
80 end
|
Chris@0
|
81
|
Chris@0
|
82 STRING_CONTENT_REGEXP = Hash.new do |h, delimiter|
|
Chris@0
|
83 h[delimiter] = / [^\\\n]+? (?= \\ | $ | #{Regexp.escape(delimiter)} ) /x
|
Chris@0
|
84 end
|
Chris@0
|
85
|
Chris@0
|
86 DEF_NEW_STATE = WordList.new(:initial).
|
Chris@0
|
87 add(%w(def), :def_expected).
|
Chris@0
|
88 add(%w(import from), :include_expected).
|
Chris@0
|
89 add(%w(class), :class_expected)
|
Chris@0
|
90
|
Chris@0
|
91 DESCRIPTOR = /
|
Chris@0
|
92 #{NAME}
|
Chris@0
|
93 (?: \. #{NAME} )*
|
Chris@0
|
94 | \*
|
Chris@0
|
95 /x
|
Chris@0
|
96
|
Chris@0
|
97 def scan_tokens tokens, options
|
Chris@0
|
98
|
Chris@0
|
99 state = :initial
|
Chris@0
|
100 string_delimiter = nil
|
Chris@0
|
101 string_raw = false
|
Chris@0
|
102 import_clause = class_name_follows = last_token_dot = false
|
Chris@0
|
103 unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
|
Chris@0
|
104 from_import_state = []
|
Chris@0
|
105
|
Chris@0
|
106 until eos?
|
Chris@0
|
107
|
Chris@0
|
108 kind = nil
|
Chris@0
|
109 match = nil
|
Chris@0
|
110
|
Chris@0
|
111 if state == :string
|
Chris@0
|
112 if scan(STRING_DELIMITER_REGEXP[string_delimiter])
|
Chris@0
|
113 tokens << [matched, :delimiter]
|
Chris@0
|
114 tokens << [:close, :string]
|
Chris@0
|
115 state = :initial
|
Chris@0
|
116 next
|
Chris@0
|
117 elsif string_delimiter.size == 3 && scan(/\n/)
|
Chris@0
|
118 kind = :content
|
Chris@0
|
119 elsif scan(STRING_CONTENT_REGEXP[string_delimiter])
|
Chris@0
|
120 kind = :content
|
Chris@0
|
121 elsif !string_raw && scan(/ \\ #{ESCAPE} /ox)
|
Chris@0
|
122 kind = :char
|
Chris@0
|
123 elsif scan(/ \\ #{UNICODE_ESCAPE} /ox)
|
Chris@0
|
124 kind = :char
|
Chris@0
|
125 elsif scan(/ \\ . /x)
|
Chris@0
|
126 kind = :content
|
Chris@0
|
127 elsif scan(/ \\ | $ /x)
|
Chris@0
|
128 tokens << [:close, :string]
|
Chris@0
|
129 kind = :error
|
Chris@0
|
130 state = :initial
|
Chris@0
|
131 else
|
Chris@0
|
132 raise_inspect "else case \" reached; %p not handled." % peek(1), tokens, state
|
Chris@0
|
133 end
|
Chris@0
|
134
|
Chris@0
|
135 elsif match = scan(/ [ \t]+ | \\\n /x)
|
Chris@0
|
136 tokens << [match, :space]
|
Chris@0
|
137 next
|
Chris@0
|
138
|
Chris@0
|
139 elsif match = scan(/\n/)
|
Chris@0
|
140 tokens << [match, :space]
|
Chris@0
|
141 state = :initial if state == :include_expected
|
Chris@0
|
142 next
|
Chris@0
|
143
|
Chris@0
|
144 elsif match = scan(/ \# [^\n]* /mx)
|
Chris@0
|
145 tokens << [match, :comment]
|
Chris@0
|
146 next
|
Chris@0
|
147
|
Chris@0
|
148 elsif state == :initial
|
Chris@0
|
149
|
Chris@0
|
150 if scan(/#{OPERATOR}/o)
|
Chris@0
|
151 kind = :operator
|
Chris@0
|
152
|
Chris@0
|
153 elsif match = scan(/(u?r?|b)?("""|"|'''|')/i)
|
Chris@0
|
154 tokens << [:open, :string]
|
Chris@0
|
155 string_delimiter = self[2]
|
Chris@0
|
156 string_raw = false
|
Chris@0
|
157 modifiers = self[1]
|
Chris@0
|
158 unless modifiers.empty?
|
Chris@0
|
159 string_raw = !!modifiers.index(?r)
|
Chris@0
|
160 tokens << [modifiers, :modifier]
|
Chris@0
|
161 match = string_delimiter
|
Chris@0
|
162 end
|
Chris@0
|
163 state = :string
|
Chris@0
|
164 kind = :delimiter
|
Chris@0
|
165
|
Chris@0
|
166 # TODO: backticks
|
Chris@0
|
167
|
Chris@0
|
168 elsif match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
|
Chris@0
|
169 kind = IDENT_KIND[match]
|
Chris@0
|
170 # TODO: keyword arguments
|
Chris@0
|
171 kind = :ident if last_token_dot
|
Chris@0
|
172 if kind == :old_keyword
|
Chris@0
|
173 kind = check(/\(/) ? :ident : :keyword
|
Chris@0
|
174 elsif kind == :predefined && check(/ *=/)
|
Chris@0
|
175 kind = :ident
|
Chris@0
|
176 elsif kind == :keyword
|
Chris@0
|
177 state = DEF_NEW_STATE[match]
|
Chris@0
|
178 from_import_state << match.to_sym if state == :include_expected
|
Chris@0
|
179 end
|
Chris@0
|
180
|
Chris@0
|
181 elsif scan(/@[a-zA-Z0-9_.]+[lL]?/)
|
Chris@0
|
182 kind = :decorator
|
Chris@0
|
183
|
Chris@0
|
184 elsif scan(/0[xX][0-9A-Fa-f]+[lL]?/)
|
Chris@0
|
185 kind = :hex
|
Chris@0
|
186
|
Chris@0
|
187 elsif scan(/0[bB][01]+[lL]?/)
|
Chris@0
|
188 kind = :bin
|
Chris@0
|
189
|
Chris@0
|
190 elsif match = scan(/(?:\d*\.\d+|\d+\.\d*)(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/)
|
Chris@0
|
191 kind = :float
|
Chris@0
|
192 if scan(/[jJ]/)
|
Chris@0
|
193 match << matched
|
Chris@0
|
194 kind = :imaginary
|
Chris@0
|
195 end
|
Chris@0
|
196
|
Chris@0
|
197 elsif scan(/0[oO][0-7]+|0[0-7]+(?![89.eE])[lL]?/)
|
Chris@0
|
198 kind = :oct
|
Chris@0
|
199
|
Chris@0
|
200 elsif match = scan(/\d+([lL])?/)
|
Chris@0
|
201 kind = :integer
|
Chris@0
|
202 if self[1] == nil && scan(/[jJ]/)
|
Chris@0
|
203 match << matched
|
Chris@0
|
204 kind = :imaginary
|
Chris@0
|
205 end
|
Chris@0
|
206
|
Chris@0
|
207 else
|
Chris@0
|
208 getch
|
Chris@0
|
209 kind = :error
|
Chris@0
|
210
|
Chris@0
|
211 end
|
Chris@0
|
212
|
Chris@0
|
213 elsif state == :def_expected
|
Chris@0
|
214 state = :initial
|
Chris@0
|
215 if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
|
Chris@0
|
216 kind = :method
|
Chris@0
|
217 else
|
Chris@0
|
218 next
|
Chris@0
|
219 end
|
Chris@0
|
220
|
Chris@0
|
221 elsif state == :class_expected
|
Chris@0
|
222 state = :initial
|
Chris@0
|
223 if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
|
Chris@0
|
224 kind = :class
|
Chris@0
|
225 else
|
Chris@0
|
226 next
|
Chris@0
|
227 end
|
Chris@0
|
228
|
Chris@0
|
229 elsif state == :include_expected
|
Chris@0
|
230 if match = scan(unicode ? /#{DESCRIPTOR}/uo : /#{DESCRIPTOR}/o)
|
Chris@0
|
231 kind = :include
|
Chris@0
|
232 if match == 'as'
|
Chris@0
|
233 kind = :keyword
|
Chris@0
|
234 from_import_state << :as
|
Chris@0
|
235 elsif from_import_state.first == :from && match == 'import'
|
Chris@0
|
236 kind = :keyword
|
Chris@0
|
237 from_import_state << :import
|
Chris@0
|
238 elsif from_import_state.last == :as
|
Chris@0
|
239 # kind = match[0,1][unicode ? /[[:upper:]]/u : /[[:upper:]]/] ? :class : :method
|
Chris@0
|
240 kind = :ident
|
Chris@0
|
241 from_import_state.pop
|
Chris@0
|
242 elsif IDENT_KIND[match] == :keyword
|
Chris@0
|
243 unscan
|
Chris@0
|
244 match = nil
|
Chris@0
|
245 state = :initial
|
Chris@0
|
246 next
|
Chris@0
|
247 end
|
Chris@0
|
248 elsif match = scan(/,/)
|
Chris@0
|
249 from_import_state.pop if from_import_state.last == :as
|
Chris@0
|
250 kind = :operator
|
Chris@0
|
251 else
|
Chris@0
|
252 from_import_state = []
|
Chris@0
|
253 state = :initial
|
Chris@0
|
254 next
|
Chris@0
|
255 end
|
Chris@0
|
256
|
Chris@0
|
257 else
|
Chris@0
|
258 raise_inspect 'Unknown state', tokens, state
|
Chris@0
|
259
|
Chris@0
|
260 end
|
Chris@0
|
261
|
Chris@0
|
262 match ||= matched
|
Chris@0
|
263 if $CODERAY_DEBUG and not kind
|
Chris@0
|
264 raise_inspect 'Error token %p in line %d' %
|
Chris@0
|
265 [[match, kind], line], tokens, state
|
Chris@0
|
266 end
|
Chris@0
|
267 raise_inspect 'Empty token', tokens, state unless match
|
Chris@0
|
268
|
Chris@0
|
269 last_token_dot = match == '.'
|
Chris@0
|
270
|
Chris@0
|
271 tokens << [match, kind]
|
Chris@0
|
272
|
Chris@0
|
273 end
|
Chris@0
|
274
|
Chris@0
|
275 if state == :string
|
Chris@0
|
276 tokens << [:close, :string]
|
Chris@0
|
277 end
|
Chris@0
|
278
|
Chris@0
|
279 tokens
|
Chris@0
|
280 end
|
Chris@0
|
281
|
Chris@0
|
282 end
|
Chris@0
|
283
|
Chris@0
|
284 end
|
Chris@0
|
285 end
|