comparison vendor/gems/coderay-1.0.0/lib/coderay/scanners/python.rb @ 909:cbb26bc654de redmine-1.3

Update to Redmine 1.3-stable branch (Redmine SVN rev 8964)
author Chris Cannam
date Fri, 24 Feb 2012 19:09:32 +0000
parents
children
comparison
equal deleted inserted replaced
908:c6c2cbd0afee 909:cbb26bc654de
1 module CodeRay
2 module Scanners
3
4 # Scanner for Python. Supports Python 3.
5 #
6 # Based on pygments' PythonLexer, see
7 # http://dev.pocoo.org/projects/pygments/browser/pygments/lexers/agile.py.
8 class Python < Scanner
9
10 register_for :python
11 file_extension 'py'
12
13 KEYWORDS = [
14 'and', 'as', 'assert', 'break', 'class', 'continue', 'def',
15 'del', 'elif', 'else', 'except', 'finally', 'for',
16 'from', 'global', 'if', 'import', 'in', 'is', 'lambda', 'not',
17 'or', 'pass', 'raise', 'return', 'try', 'while', 'with', 'yield',
18 'nonlocal', # new in Python 3
19 ] # :nodoc:
20
21 OLD_KEYWORDS = [
22 'exec', 'print', # gone in Python 3
23 ] # :nodoc:
24
25 PREDEFINED_METHODS_AND_TYPES = %w[
26 __import__ abs all any apply basestring bin bool buffer
27 bytearray bytes callable chr classmethod cmp coerce compile
28 complex delattr dict dir divmod enumerate eval execfile exit
29 file filter float frozenset getattr globals hasattr hash hex id
30 input int intern isinstance issubclass iter len list locals
31 long map max min next object oct open ord pow property range
32 raw_input reduce reload repr reversed round set setattr slice
33 sorted staticmethod str sum super tuple type unichr unicode
34 vars xrange zip
35 ] # :nodoc:
36
37 PREDEFINED_EXCEPTIONS = %w[
38 ArithmeticError AssertionError AttributeError
39 BaseException DeprecationWarning EOFError EnvironmentError
40 Exception FloatingPointError FutureWarning GeneratorExit IOError
41 ImportError ImportWarning IndentationError IndexError KeyError
42 KeyboardInterrupt LookupError MemoryError NameError
43 NotImplemented NotImplementedError OSError OverflowError
44 OverflowWarning PendingDeprecationWarning ReferenceError
45 RuntimeError RuntimeWarning StandardError StopIteration
46 SyntaxError SyntaxWarning SystemError SystemExit TabError
47 TypeError UnboundLocalError UnicodeDecodeError
48 UnicodeEncodeError UnicodeError UnicodeTranslateError
49 UnicodeWarning UserWarning ValueError Warning ZeroDivisionError
50 ] # :nodoc:
51
52 PREDEFINED_VARIABLES_AND_CONSTANTS = [
53 'False', 'True', 'None', # "keywords" since Python 3
54 'self', 'Ellipsis', 'NotImplemented',
55 ] # :nodoc:
56
57 IDENT_KIND = WordList.new(:ident).
58 add(KEYWORDS, :keyword).
59 add(OLD_KEYWORDS, :old_keyword).
60 add(PREDEFINED_METHODS_AND_TYPES, :predefined).
61 add(PREDEFINED_VARIABLES_AND_CONSTANTS, :predefined_constant).
62 add(PREDEFINED_EXCEPTIONS, :exception) # :nodoc:
63
64 NAME = / [^\W\d] \w* /x # :nodoc:
65 ESCAPE = / [abfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
66 UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} | N\{[-\w ]+\} /x # :nodoc:
67
68 OPERATOR = /
69 \.\.\. | # ellipsis
70 \.(?!\d) | # dot but not decimal point
71 [,;:()\[\]{}] | # simple delimiters
72 \/\/=? | \*\*=? | # special math
73 [-+*\/%&|^]=? | # ordinary math and binary logic
74 [~`] | # binary complement and inspection
75 <<=? | >>=? | [<>=]=? | != # comparison and assignment
76 /x # :nodoc:
77
78 STRING_DELIMITER_REGEXP = Hash.new { |h, delimiter|
79 h[delimiter] = Regexp.union delimiter # :nodoc:
80 }
81
82 STRING_CONTENT_REGEXP = Hash.new { |h, delimiter|
83 h[delimiter] = / [^\\\n]+? (?= \\ | $ | #{Regexp.escape(delimiter)} ) /x # :nodoc:
84 }
85
86 DEF_NEW_STATE = WordList.new(:initial).
87 add(%w(def), :def_expected).
88 add(%w(import from), :include_expected).
89 add(%w(class), :class_expected) # :nodoc:
90
91 DESCRIPTOR = /
92 #{NAME}
93 (?: \. #{NAME} )*
94 | \*
95 /x # :nodoc:
96
97 DOCSTRING_COMING = /
98 [ \t]* u?r? ("""|''')
99 /x # :nodoc:
100
101 protected
102
103 def scan_tokens encoder, options
104
105 state = :initial
106 string_delimiter = nil
107 string_raw = false
108 string_type = nil
109 docstring_coming = match?(/#{DOCSTRING_COMING}/o)
110 last_token_dot = false
111 unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
112 from_import_state = []
113
114 until eos?
115
116 if state == :string
117 if match = scan(STRING_DELIMITER_REGEXP[string_delimiter])
118 encoder.text_token match, :delimiter
119 encoder.end_group string_type
120 string_type = nil
121 state = :initial
122 next
123 elsif string_delimiter.size == 3 && match = scan(/\n/)
124 encoder.text_token match, :content
125 elsif match = scan(STRING_CONTENT_REGEXP[string_delimiter])
126 encoder.text_token match, :content
127 elsif !string_raw && match = scan(/ \\ #{ESCAPE} /ox)
128 encoder.text_token match, :char
129 elsif match = scan(/ \\ #{UNICODE_ESCAPE} /ox)
130 encoder.text_token match, :char
131 elsif match = scan(/ \\ . /x)
132 encoder.text_token match, :content
133 elsif match = scan(/ \\ | $ /x)
134 encoder.end_group string_type
135 string_type = nil
136 encoder.text_token match, :error
137 state = :initial
138 else
139 raise_inspect "else case \" reached; %p not handled." % peek(1), encoder, state
140 end
141
142 elsif match = scan(/ [ \t]+ | \\?\n /x)
143 encoder.text_token match, :space
144 if match == "\n"
145 state = :initial if state == :include_expected
146 docstring_coming = true if match?(/#{DOCSTRING_COMING}/o)
147 end
148 next
149
150 elsif match = scan(/ \# [^\n]* /mx)
151 encoder.text_token match, :comment
152 next
153
154 elsif state == :initial
155
156 if match = scan(/#{OPERATOR}/o)
157 encoder.text_token match, :operator
158
159 elsif match = scan(/(u?r?|b)?("""|"|'''|')/i)
160 string_delimiter = self[2]
161 string_type = docstring_coming ? :docstring : :string
162 docstring_coming = false if docstring_coming
163 encoder.begin_group string_type
164 string_raw = false
165 modifiers = self[1]
166 unless modifiers.empty?
167 string_raw = !!modifiers.index(?r)
168 encoder.text_token modifiers, :modifier
169 match = string_delimiter
170 end
171 state = :string
172 encoder.text_token match, :delimiter
173
174 # TODO: backticks
175
176 elsif match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
177 kind = IDENT_KIND[match]
178 # TODO: keyword arguments
179 kind = :ident if last_token_dot
180 if kind == :old_keyword
181 kind = check(/\(/) ? :ident : :keyword
182 elsif kind == :predefined && check(/ *=/)
183 kind = :ident
184 elsif kind == :keyword
185 state = DEF_NEW_STATE[match]
186 from_import_state << match.to_sym if state == :include_expected
187 end
188 encoder.text_token match, kind
189
190 elsif match = scan(/@[a-zA-Z0-9_.]+[lL]?/)
191 encoder.text_token match, :decorator
192
193 elsif match = scan(/0[xX][0-9A-Fa-f]+[lL]?/)
194 encoder.text_token match, :hex
195
196 elsif match = scan(/0[bB][01]+[lL]?/)
197 encoder.text_token match, :binary
198
199 elsif match = scan(/(?:\d*\.\d+|\d+\.\d*)(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/)
200 if scan(/[jJ]/)
201 match << matched
202 encoder.text_token match, :imaginary
203 else
204 encoder.text_token match, :float
205 end
206
207 elsif match = scan(/0[oO][0-7]+|0[0-7]+(?![89.eE])[lL]?/)
208 encoder.text_token match, :octal
209
210 elsif match = scan(/\d+([lL])?/)
211 if self[1] == nil && scan(/[jJ]/)
212 match << matched
213 encoder.text_token match, :imaginary
214 else
215 encoder.text_token match, :integer
216 end
217
218 else
219 encoder.text_token getch, :error
220
221 end
222
223 elsif state == :def_expected
224 state = :initial
225 if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
226 encoder.text_token match, :method
227 else
228 next
229 end
230
231 elsif state == :class_expected
232 state = :initial
233 if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
234 encoder.text_token match, :class
235 else
236 next
237 end
238
239 elsif state == :include_expected
240 if match = scan(unicode ? /#{DESCRIPTOR}/uo : /#{DESCRIPTOR}/o)
241 if match == 'as'
242 encoder.text_token match, :keyword
243 from_import_state << :as
244 elsif from_import_state.first == :from && match == 'import'
245 encoder.text_token match, :keyword
246 from_import_state << :import
247 elsif from_import_state.last == :as
248 # encoder.text_token match, match[0,1][unicode ? /[[:upper:]]/u : /[[:upper:]]/] ? :class : :method
249 encoder.text_token match, :ident
250 from_import_state.pop
251 elsif IDENT_KIND[match] == :keyword
252 unscan
253 match = nil
254 state = :initial
255 next
256 else
257 encoder.text_token match, :include
258 end
259 elsif match = scan(/,/)
260 from_import_state.pop if from_import_state.last == :as
261 encoder.text_token match, :operator
262 else
263 from_import_state = []
264 state = :initial
265 next
266 end
267
268 else
269 raise_inspect 'Unknown state', encoder, state
270
271 end
272
273 last_token_dot = match == '.'
274
275 end
276
277 if state == :string
278 encoder.end_group string_type
279 end
280
281 encoder
282 end
283
284 end
285
286 end
287 end