comparison vendor/plugins/coderay-0.9.2/lib/coderay/scanners/.svn/text-base/python.rb.svn-base @ 0:513646585e45

* Import Redmine trunk SVN rev 3859
author Chris Cannam
date Fri, 23 Jul 2010 15:52:44 +0100
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:513646585e45
1 module CodeRay
2 module Scanners
3
4 # Bases on pygments' PythonLexer, see
5 # http://dev.pocoo.org/projects/pygments/browser/pygments/lexers/agile.py.
6 class Python < Scanner
7
8 include Streamable
9
10 register_for :python
11 file_extension 'py'
12
13 KEYWORDS = [
14 'and', 'as', 'assert', 'break', 'class', 'continue', 'def',
15 'del', 'elif', 'else', 'except', 'finally', 'for',
16 'from', 'global', 'if', 'import', 'in', 'is', 'lambda', 'not',
17 'or', 'pass', 'raise', 'return', 'try', 'while', 'with', 'yield',
18 'nonlocal', # new in Python 3
19 ]
20
21 OLD_KEYWORDS = [
22 'exec', 'print', # gone in Python 3
23 ]
24
25 PREDEFINED_METHODS_AND_TYPES = %w[
26 __import__ abs all any apply basestring bin bool buffer
27 bytearray bytes callable chr classmethod cmp coerce compile
28 complex delattr dict dir divmod enumerate eval execfile exit
29 file filter float frozenset getattr globals hasattr hash hex id
30 input int intern isinstance issubclass iter len list locals
31 long map max min next object oct open ord pow property range
32 raw_input reduce reload repr reversed round set setattr slice
33 sorted staticmethod str sum super tuple type unichr unicode
34 vars xrange zip
35 ]
36
37 PREDEFINED_EXCEPTIONS = %w[
38 ArithmeticError AssertionError AttributeError
39 BaseException DeprecationWarning EOFError EnvironmentError
40 Exception FloatingPointError FutureWarning GeneratorExit IOError
41 ImportError ImportWarning IndentationError IndexError KeyError
42 KeyboardInterrupt LookupError MemoryError NameError
43 NotImplemented NotImplementedError OSError OverflowError
44 OverflowWarning PendingDeprecationWarning ReferenceError
45 RuntimeError RuntimeWarning StandardError StopIteration
46 SyntaxError SyntaxWarning SystemError SystemExit TabError
47 TypeError UnboundLocalError UnicodeDecodeError
48 UnicodeEncodeError UnicodeError UnicodeTranslateError
49 UnicodeWarning UserWarning ValueError Warning ZeroDivisionError
50 ]
51
52 PREDEFINED_VARIABLES_AND_CONSTANTS = [
53 'False', 'True', 'None', # "keywords" since Python 3
54 'self', 'Ellipsis', 'NotImplemented',
55 ]
56
57 IDENT_KIND = WordList.new(:ident).
58 add(KEYWORDS, :keyword).
59 add(OLD_KEYWORDS, :old_keyword).
60 add(PREDEFINED_METHODS_AND_TYPES, :predefined).
61 add(PREDEFINED_VARIABLES_AND_CONSTANTS, :pre_constant).
62 add(PREDEFINED_EXCEPTIONS, :exception)
63
64 NAME = / [^\W\d] \w* /x
65 ESCAPE = / [abfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
66 UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} | N\{[-\w ]+\} /x
67
68 OPERATOR = /
69 \.\.\. | # ellipsis
70 \.(?!\d) | # dot but not decimal point
71 [,;:()\[\]{}] | # simple delimiters
72 \/\/=? | \*\*=? | # special math
73 [-+*\/%&|^]=? | # ordinary math and binary logic
74 [~`] | # binary complement and inspection
75 <<=? | >>=? | [<>=]=? | != # comparison and assignment
76 /x
77
78 STRING_DELIMITER_REGEXP = Hash.new do |h, delimiter|
79 h[delimiter] = Regexp.union delimiter
80 end
81
82 STRING_CONTENT_REGEXP = Hash.new do |h, delimiter|
83 h[delimiter] = / [^\\\n]+? (?= \\ | $ | #{Regexp.escape(delimiter)} ) /x
84 end
85
86 DEF_NEW_STATE = WordList.new(:initial).
87 add(%w(def), :def_expected).
88 add(%w(import from), :include_expected).
89 add(%w(class), :class_expected)
90
91 DESCRIPTOR = /
92 #{NAME}
93 (?: \. #{NAME} )*
94 | \*
95 /x
96
97 def scan_tokens tokens, options
98
99 state = :initial
100 string_delimiter = nil
101 string_raw = false
102 import_clause = class_name_follows = last_token_dot = false
103 unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
104 from_import_state = []
105
106 until eos?
107
108 kind = nil
109 match = nil
110
111 if state == :string
112 if scan(STRING_DELIMITER_REGEXP[string_delimiter])
113 tokens << [matched, :delimiter]
114 tokens << [:close, :string]
115 state = :initial
116 next
117 elsif string_delimiter.size == 3 && scan(/\n/)
118 kind = :content
119 elsif scan(STRING_CONTENT_REGEXP[string_delimiter])
120 kind = :content
121 elsif !string_raw && scan(/ \\ #{ESCAPE} /ox)
122 kind = :char
123 elsif scan(/ \\ #{UNICODE_ESCAPE} /ox)
124 kind = :char
125 elsif scan(/ \\ . /x)
126 kind = :content
127 elsif scan(/ \\ | $ /x)
128 tokens << [:close, :string]
129 kind = :error
130 state = :initial
131 else
132 raise_inspect "else case \" reached; %p not handled." % peek(1), tokens, state
133 end
134
135 elsif match = scan(/ [ \t]+ | \\\n /x)
136 tokens << [match, :space]
137 next
138
139 elsif match = scan(/\n/)
140 tokens << [match, :space]
141 state = :initial if state == :include_expected
142 next
143
144 elsif match = scan(/ \# [^\n]* /mx)
145 tokens << [match, :comment]
146 next
147
148 elsif state == :initial
149
150 if scan(/#{OPERATOR}/o)
151 kind = :operator
152
153 elsif match = scan(/(u?r?|b)?("""|"|'''|')/i)
154 tokens << [:open, :string]
155 string_delimiter = self[2]
156 string_raw = false
157 modifiers = self[1]
158 unless modifiers.empty?
159 string_raw = !!modifiers.index(?r)
160 tokens << [modifiers, :modifier]
161 match = string_delimiter
162 end
163 state = :string
164 kind = :delimiter
165
166 # TODO: backticks
167
168 elsif match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
169 kind = IDENT_KIND[match]
170 # TODO: keyword arguments
171 kind = :ident if last_token_dot
172 if kind == :old_keyword
173 kind = check(/\(/) ? :ident : :keyword
174 elsif kind == :predefined && check(/ *=/)
175 kind = :ident
176 elsif kind == :keyword
177 state = DEF_NEW_STATE[match]
178 from_import_state << match.to_sym if state == :include_expected
179 end
180
181 elsif scan(/@[a-zA-Z0-9_.]+[lL]?/)
182 kind = :decorator
183
184 elsif scan(/0[xX][0-9A-Fa-f]+[lL]?/)
185 kind = :hex
186
187 elsif scan(/0[bB][01]+[lL]?/)
188 kind = :bin
189
190 elsif match = scan(/(?:\d*\.\d+|\d+\.\d*)(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/)
191 kind = :float
192 if scan(/[jJ]/)
193 match << matched
194 kind = :imaginary
195 end
196
197 elsif scan(/0[oO][0-7]+|0[0-7]+(?![89.eE])[lL]?/)
198 kind = :oct
199
200 elsif match = scan(/\d+([lL])?/)
201 kind = :integer
202 if self[1] == nil && scan(/[jJ]/)
203 match << matched
204 kind = :imaginary
205 end
206
207 else
208 getch
209 kind = :error
210
211 end
212
213 elsif state == :def_expected
214 state = :initial
215 if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
216 kind = :method
217 else
218 next
219 end
220
221 elsif state == :class_expected
222 state = :initial
223 if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
224 kind = :class
225 else
226 next
227 end
228
229 elsif state == :include_expected
230 if match = scan(unicode ? /#{DESCRIPTOR}/uo : /#{DESCRIPTOR}/o)
231 kind = :include
232 if match == 'as'
233 kind = :keyword
234 from_import_state << :as
235 elsif from_import_state.first == :from && match == 'import'
236 kind = :keyword
237 from_import_state << :import
238 elsif from_import_state.last == :as
239 # kind = match[0,1][unicode ? /[[:upper:]]/u : /[[:upper:]]/] ? :class : :method
240 kind = :ident
241 from_import_state.pop
242 elsif IDENT_KIND[match] == :keyword
243 unscan
244 match = nil
245 state = :initial
246 next
247 end
248 elsif match = scan(/,/)
249 from_import_state.pop if from_import_state.last == :as
250 kind = :operator
251 else
252 from_import_state = []
253 state = :initial
254 next
255 end
256
257 else
258 raise_inspect 'Unknown state', tokens, state
259
260 end
261
262 match ||= matched
263 if $CODERAY_DEBUG and not kind
264 raise_inspect 'Error token %p in line %d' %
265 [[match, kind], line], tokens, state
266 end
267 raise_inspect 'Empty token', tokens, state unless match
268
269 last_token_dot = match == '.'
270
271 tokens << [match, kind]
272
273 end
274
275 if state == :string
276 tokens << [:close, :string]
277 end
278
279 tokens
280 end
281
282 end
283
284 end
285 end