Mercurial > hg > soundsoftware-site
comparison vendor/gems/coderay-1.0.0/lib/coderay/scanners/python.rb @ 909:cbb26bc654de redmine-1.3
Update to Redmine 1.3-stable branch (Redmine SVN rev 8964)
author | Chris Cannam |
---|---|
date | Fri, 24 Feb 2012 19:09:32 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
908:c6c2cbd0afee | 909:cbb26bc654de |
---|---|
1 module CodeRay | |
2 module Scanners | |
3 | |
4 # Scanner for Python. Supports Python 3. | |
5 # | |
6 # Based on pygments' PythonLexer, see | |
7 # http://dev.pocoo.org/projects/pygments/browser/pygments/lexers/agile.py. | |
8 class Python < Scanner | |
9 | |
10 register_for :python | |
11 file_extension 'py' | |
12 | |
13 KEYWORDS = [ | |
14 'and', 'as', 'assert', 'break', 'class', 'continue', 'def', | |
15 'del', 'elif', 'else', 'except', 'finally', 'for', | |
16 'from', 'global', 'if', 'import', 'in', 'is', 'lambda', 'not', | |
17 'or', 'pass', 'raise', 'return', 'try', 'while', 'with', 'yield', | |
18 'nonlocal', # new in Python 3 | |
19 ] # :nodoc: | |
20 | |
21 OLD_KEYWORDS = [ | |
22 'exec', 'print', # gone in Python 3 | |
23 ] # :nodoc: | |
24 | |
25 PREDEFINED_METHODS_AND_TYPES = %w[ | |
26 __import__ abs all any apply basestring bin bool buffer | |
27 bytearray bytes callable chr classmethod cmp coerce compile | |
28 complex delattr dict dir divmod enumerate eval execfile exit | |
29 file filter float frozenset getattr globals hasattr hash hex id | |
30 input int intern isinstance issubclass iter len list locals | |
31 long map max min next object oct open ord pow property range | |
32 raw_input reduce reload repr reversed round set setattr slice | |
33 sorted staticmethod str sum super tuple type unichr unicode | |
34 vars xrange zip | |
35 ] # :nodoc: | |
36 | |
37 PREDEFINED_EXCEPTIONS = %w[ | |
38 ArithmeticError AssertionError AttributeError | |
39 BaseException DeprecationWarning EOFError EnvironmentError | |
40 Exception FloatingPointError FutureWarning GeneratorExit IOError | |
41 ImportError ImportWarning IndentationError IndexError KeyError | |
42 KeyboardInterrupt LookupError MemoryError NameError | |
43 NotImplemented NotImplementedError OSError OverflowError | |
44 OverflowWarning PendingDeprecationWarning ReferenceError | |
45 RuntimeError RuntimeWarning StandardError StopIteration | |
46 SyntaxError SyntaxWarning SystemError SystemExit TabError | |
47 TypeError UnboundLocalError UnicodeDecodeError | |
48 UnicodeEncodeError UnicodeError UnicodeTranslateError | |
49 UnicodeWarning UserWarning ValueError Warning ZeroDivisionError | |
50 ] # :nodoc: | |
51 | |
52 PREDEFINED_VARIABLES_AND_CONSTANTS = [ | |
53 'False', 'True', 'None', # "keywords" since Python 3 | |
54 'self', 'Ellipsis', 'NotImplemented', | |
55 ] # :nodoc: | |
56 | |
57 IDENT_KIND = WordList.new(:ident). | |
58 add(KEYWORDS, :keyword). | |
59 add(OLD_KEYWORDS, :old_keyword). | |
60 add(PREDEFINED_METHODS_AND_TYPES, :predefined). | |
61 add(PREDEFINED_VARIABLES_AND_CONSTANTS, :predefined_constant). | |
62 add(PREDEFINED_EXCEPTIONS, :exception) # :nodoc: | |
63 | |
64 NAME = / [^\W\d] \w* /x # :nodoc: | |
65 ESCAPE = / [abfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc: | |
66 UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} | N\{[-\w ]+\} /x # :nodoc: | |
67 | |
68 OPERATOR = / | |
69 \.\.\. | # ellipsis | |
70 \.(?!\d) | # dot but not decimal point | |
71 [,;:()\[\]{}] | # simple delimiters | |
72 \/\/=? | \*\*=? | # special math | |
73 [-+*\/%&|^]=? | # ordinary math and binary logic | |
74 [~`] | # binary complement and inspection | |
75 <<=? | >>=? | [<>=]=? | != # comparison and assignment | |
76 /x # :nodoc: | |
77 | |
78 STRING_DELIMITER_REGEXP = Hash.new { |h, delimiter| | |
79 h[delimiter] = Regexp.union delimiter # :nodoc: | |
80 } | |
81 | |
82 STRING_CONTENT_REGEXP = Hash.new { |h, delimiter| | |
83 h[delimiter] = / [^\\\n]+? (?= \\ | $ | #{Regexp.escape(delimiter)} ) /x # :nodoc: | |
84 } | |
85 | |
86 DEF_NEW_STATE = WordList.new(:initial). | |
87 add(%w(def), :def_expected). | |
88 add(%w(import from), :include_expected). | |
89 add(%w(class), :class_expected) # :nodoc: | |
90 | |
91 DESCRIPTOR = / | |
92 #{NAME} | |
93 (?: \. #{NAME} )* | |
94 | \* | |
95 /x # :nodoc: | |
96 | |
97 DOCSTRING_COMING = / | |
98 [ \t]* u?r? ("""|''') | |
99 /x # :nodoc: | |
100 | |
101 protected | |
102 | |
103 def scan_tokens encoder, options | |
104 | |
105 state = :initial | |
106 string_delimiter = nil | |
107 string_raw = false | |
108 string_type = nil | |
109 docstring_coming = match?(/#{DOCSTRING_COMING}/o) | |
110 last_token_dot = false | |
111 unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8' | |
112 from_import_state = [] | |
113 | |
114 until eos? | |
115 | |
116 if state == :string | |
117 if match = scan(STRING_DELIMITER_REGEXP[string_delimiter]) | |
118 encoder.text_token match, :delimiter | |
119 encoder.end_group string_type | |
120 string_type = nil | |
121 state = :initial | |
122 next | |
123 elsif string_delimiter.size == 3 && match = scan(/\n/) | |
124 encoder.text_token match, :content | |
125 elsif match = scan(STRING_CONTENT_REGEXP[string_delimiter]) | |
126 encoder.text_token match, :content | |
127 elsif !string_raw && match = scan(/ \\ #{ESCAPE} /ox) | |
128 encoder.text_token match, :char | |
129 elsif match = scan(/ \\ #{UNICODE_ESCAPE} /ox) | |
130 encoder.text_token match, :char | |
131 elsif match = scan(/ \\ . /x) | |
132 encoder.text_token match, :content | |
133 elsif match = scan(/ \\ | $ /x) | |
134 encoder.end_group string_type | |
135 string_type = nil | |
136 encoder.text_token match, :error | |
137 state = :initial | |
138 else | |
139 raise_inspect "else case \" reached; %p not handled." % peek(1), encoder, state | |
140 end | |
141 | |
142 elsif match = scan(/ [ \t]+ | \\?\n /x) | |
143 encoder.text_token match, :space | |
144 if match == "\n" | |
145 state = :initial if state == :include_expected | |
146 docstring_coming = true if match?(/#{DOCSTRING_COMING}/o) | |
147 end | |
148 next | |
149 | |
150 elsif match = scan(/ \# [^\n]* /mx) | |
151 encoder.text_token match, :comment | |
152 next | |
153 | |
154 elsif state == :initial | |
155 | |
156 if match = scan(/#{OPERATOR}/o) | |
157 encoder.text_token match, :operator | |
158 | |
159 elsif match = scan(/(u?r?|b)?("""|"|'''|')/i) | |
160 string_delimiter = self[2] | |
161 string_type = docstring_coming ? :docstring : :string | |
162 docstring_coming = false if docstring_coming | |
163 encoder.begin_group string_type | |
164 string_raw = false | |
165 modifiers = self[1] | |
166 unless modifiers.empty? | |
167 string_raw = !!modifiers.index(?r) | |
168 encoder.text_token modifiers, :modifier | |
169 match = string_delimiter | |
170 end | |
171 state = :string | |
172 encoder.text_token match, :delimiter | |
173 | |
174 # TODO: backticks | |
175 | |
176 elsif match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o) | |
177 kind = IDENT_KIND[match] | |
178 # TODO: keyword arguments | |
179 kind = :ident if last_token_dot | |
180 if kind == :old_keyword | |
181 kind = check(/\(/) ? :ident : :keyword | |
182 elsif kind == :predefined && check(/ *=/) | |
183 kind = :ident | |
184 elsif kind == :keyword | |
185 state = DEF_NEW_STATE[match] | |
186 from_import_state << match.to_sym if state == :include_expected | |
187 end | |
188 encoder.text_token match, kind | |
189 | |
190 elsif match = scan(/@[a-zA-Z0-9_.]+[lL]?/) | |
191 encoder.text_token match, :decorator | |
192 | |
193 elsif match = scan(/0[xX][0-9A-Fa-f]+[lL]?/) | |
194 encoder.text_token match, :hex | |
195 | |
196 elsif match = scan(/0[bB][01]+[lL]?/) | |
197 encoder.text_token match, :binary | |
198 | |
199 elsif match = scan(/(?:\d*\.\d+|\d+\.\d*)(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/) | |
200 if scan(/[jJ]/) | |
201 match << matched | |
202 encoder.text_token match, :imaginary | |
203 else | |
204 encoder.text_token match, :float | |
205 end | |
206 | |
207 elsif match = scan(/0[oO][0-7]+|0[0-7]+(?![89.eE])[lL]?/) | |
208 encoder.text_token match, :octal | |
209 | |
210 elsif match = scan(/\d+([lL])?/) | |
211 if self[1] == nil && scan(/[jJ]/) | |
212 match << matched | |
213 encoder.text_token match, :imaginary | |
214 else | |
215 encoder.text_token match, :integer | |
216 end | |
217 | |
218 else | |
219 encoder.text_token getch, :error | |
220 | |
221 end | |
222 | |
223 elsif state == :def_expected | |
224 state = :initial | |
225 if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o) | |
226 encoder.text_token match, :method | |
227 else | |
228 next | |
229 end | |
230 | |
231 elsif state == :class_expected | |
232 state = :initial | |
233 if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o) | |
234 encoder.text_token match, :class | |
235 else | |
236 next | |
237 end | |
238 | |
239 elsif state == :include_expected | |
240 if match = scan(unicode ? /#{DESCRIPTOR}/uo : /#{DESCRIPTOR}/o) | |
241 if match == 'as' | |
242 encoder.text_token match, :keyword | |
243 from_import_state << :as | |
244 elsif from_import_state.first == :from && match == 'import' | |
245 encoder.text_token match, :keyword | |
246 from_import_state << :import | |
247 elsif from_import_state.last == :as | |
248 # encoder.text_token match, match[0,1][unicode ? /[[:upper:]]/u : /[[:upper:]]/] ? :class : :method | |
249 encoder.text_token match, :ident | |
250 from_import_state.pop | |
251 elsif IDENT_KIND[match] == :keyword | |
252 unscan | |
253 match = nil | |
254 state = :initial | |
255 next | |
256 else | |
257 encoder.text_token match, :include | |
258 end | |
259 elsif match = scan(/,/) | |
260 from_import_state.pop if from_import_state.last == :as | |
261 encoder.text_token match, :operator | |
262 else | |
263 from_import_state = [] | |
264 state = :initial | |
265 next | |
266 end | |
267 | |
268 else | |
269 raise_inspect 'Unknown state', encoder, state | |
270 | |
271 end | |
272 | |
273 last_token_dot = match == '.' | |
274 | |
275 end | |
276 | |
277 if state == :string | |
278 encoder.end_group string_type | |
279 end | |
280 | |
281 encoder | |
282 end | |
283 | |
284 end | |
285 | |
286 end | |
287 end |