To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.
root / .svn / pristine / d7 / d79648acb8390e7dfe4f791129d77e634240a8ef.svn-base @ 1297:0a574315af3e
History | View | Annotate | Download (9.83 KB)
| 1 |
module CodeRay |
|---|---|
| 2 |
module Scanners |
| 3 |
|
| 4 |
# Scanner for Python. Supports Python 3. |
| 5 |
# |
| 6 |
# Based on pygments' PythonLexer, see |
| 7 |
# http://dev.pocoo.org/projects/pygments/browser/pygments/lexers/agile.py. |
| 8 |
class Python < Scanner |
| 9 |
|
| 10 |
register_for :python |
| 11 |
file_extension 'py' |
| 12 |
|
| 13 |
KEYWORDS = [ |
| 14 |
'and', 'as', 'assert', 'break', 'class', 'continue', 'def', |
| 15 |
'del', 'elif', 'else', 'except', 'finally', 'for', |
| 16 |
'from', 'global', 'if', 'import', 'in', 'is', 'lambda', 'not', |
| 17 |
'or', 'pass', 'raise', 'return', 'try', 'while', 'with', 'yield', |
| 18 |
'nonlocal', # new in Python 3 |
| 19 |
] # :nodoc: |
| 20 |
|
| 21 |
OLD_KEYWORDS = [ |
| 22 |
'exec', 'print', # gone in Python 3 |
| 23 |
] # :nodoc: |
| 24 |
|
| 25 |
PREDEFINED_METHODS_AND_TYPES = %w[ |
| 26 |
__import__ abs all any apply basestring bin bool buffer |
| 27 |
bytearray bytes callable chr classmethod cmp coerce compile |
| 28 |
complex delattr dict dir divmod enumerate eval execfile exit |
| 29 |
file filter float frozenset getattr globals hasattr hash hex id |
| 30 |
input int intern isinstance issubclass iter len list locals |
| 31 |
long map max min next object oct open ord pow property range |
| 32 |
raw_input reduce reload repr reversed round set setattr slice |
| 33 |
sorted staticmethod str sum super tuple type unichr unicode |
| 34 |
vars xrange zip |
| 35 |
] # :nodoc: |
| 36 |
|
| 37 |
PREDEFINED_EXCEPTIONS = %w[ |
| 38 |
ArithmeticError AssertionError AttributeError |
| 39 |
BaseException DeprecationWarning EOFError EnvironmentError |
| 40 |
Exception FloatingPointError FutureWarning GeneratorExit IOError |
| 41 |
ImportError ImportWarning IndentationError IndexError KeyError |
| 42 |
KeyboardInterrupt LookupError MemoryError NameError |
| 43 |
NotImplemented NotImplementedError OSError OverflowError |
| 44 |
OverflowWarning PendingDeprecationWarning ReferenceError |
| 45 |
RuntimeError RuntimeWarning StandardError StopIteration |
| 46 |
SyntaxError SyntaxWarning SystemError SystemExit TabError |
| 47 |
TypeError UnboundLocalError UnicodeDecodeError |
| 48 |
UnicodeEncodeError UnicodeError UnicodeTranslateError |
| 49 |
UnicodeWarning UserWarning ValueError Warning ZeroDivisionError |
| 50 |
] # :nodoc: |
| 51 |
|
| 52 |
PREDEFINED_VARIABLES_AND_CONSTANTS = [ |
| 53 |
'False', 'True', 'None', # "keywords" since Python 3 |
| 54 |
'self', 'Ellipsis', 'NotImplemented', |
| 55 |
] # :nodoc: |
| 56 |
|
| 57 |
IDENT_KIND = WordList.new(:ident). |
| 58 |
add(KEYWORDS, :keyword). |
| 59 |
add(OLD_KEYWORDS, :old_keyword). |
| 60 |
add(PREDEFINED_METHODS_AND_TYPES, :predefined). |
| 61 |
add(PREDEFINED_VARIABLES_AND_CONSTANTS, :predefined_constant). |
| 62 |
add(PREDEFINED_EXCEPTIONS, :exception) # :nodoc: |
| 63 |
|
| 64 |
NAME = / [^\W\d] \w* /x # :nodoc: |
| 65 |
ESCAPE = / [abfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
|
| 66 |
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} | N\{[-\w ]+\} /x # :nodoc:
|
| 67 |
|
| 68 |
OPERATOR = / |
| 69 |
\.\.\. | # ellipsis |
| 70 |
\.(?!\d) | # dot but not decimal point |
| 71 |
[,;:()\[\]{}] | # simple delimiters
|
| 72 |
\/\/=? | \*\*=? | # special math |
| 73 |
[-+*\/%&|^]=? | # ordinary math and binary logic |
| 74 |
[~`] | # binary complement and inspection |
| 75 |
<<=? | >>=? | [<>=]=? | != # comparison and assignment |
| 76 |
/x # :nodoc: |
| 77 |
|
| 78 |
STRING_DELIMITER_REGEXP = Hash.new { |h, delimiter|
|
| 79 |
h[delimiter] = Regexp.union delimiter # :nodoc: |
| 80 |
} |
| 81 |
|
| 82 |
STRING_CONTENT_REGEXP = Hash.new { |h, delimiter|
|
| 83 |
h[delimiter] = / [^\\\n]+? (?= \\ | $ | #{Regexp.escape(delimiter)} ) /x # :nodoc:
|
| 84 |
} |
| 85 |
|
| 86 |
DEF_NEW_STATE = WordList.new(:initial). |
| 87 |
add(%w(def), :def_expected). |
| 88 |
add(%w(import from), :include_expected). |
| 89 |
add(%w(class), :class_expected) # :nodoc: |
| 90 |
|
| 91 |
DESCRIPTOR = / |
| 92 |
#{NAME}
|
| 93 |
(?: \. #{NAME} )*
|
| 94 |
| \* |
| 95 |
/x # :nodoc: |
| 96 |
|
| 97 |
DOCSTRING_COMING = / |
| 98 |
[ \t]* u?r? ("""|''')
|
| 99 |
/x # :nodoc: |
| 100 |
|
| 101 |
protected |
| 102 |
|
| 103 |
def scan_tokens encoder, options |
| 104 |
|
| 105 |
state = :initial |
| 106 |
string_delimiter = nil |
| 107 |
string_raw = false |
| 108 |
string_type = nil |
| 109 |
docstring_coming = match?(/#{DOCSTRING_COMING}/o)
|
| 110 |
last_token_dot = false |
| 111 |
unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8' |
| 112 |
from_import_state = [] |
| 113 |
|
| 114 |
until eos? |
| 115 |
|
| 116 |
if state == :string |
| 117 |
if match = scan(STRING_DELIMITER_REGEXP[string_delimiter]) |
| 118 |
encoder.text_token match, :delimiter |
| 119 |
encoder.end_group string_type |
| 120 |
string_type = nil |
| 121 |
state = :initial |
| 122 |
next |
| 123 |
elsif string_delimiter.size == 3 && match = scan(/\n/) |
| 124 |
encoder.text_token match, :content |
| 125 |
elsif match = scan(STRING_CONTENT_REGEXP[string_delimiter]) |
| 126 |
encoder.text_token match, :content |
| 127 |
elsif !string_raw && match = scan(/ \\ #{ESCAPE} /ox)
|
| 128 |
encoder.text_token match, :char |
| 129 |
elsif match = scan(/ \\ #{UNICODE_ESCAPE} /ox)
|
| 130 |
encoder.text_token match, :char |
| 131 |
elsif match = scan(/ \\ . /x) |
| 132 |
encoder.text_token match, :content |
| 133 |
elsif match = scan(/ \\ | $ /x) |
| 134 |
encoder.end_group string_type |
| 135 |
string_type = nil |
| 136 |
encoder.text_token match, :error |
| 137 |
state = :initial |
| 138 |
else |
| 139 |
raise_inspect "else case \" reached; %p not handled." % peek(1), encoder, state |
| 140 |
end |
| 141 |
|
| 142 |
elsif match = scan(/ [ \t]+ | \\?\n /x) |
| 143 |
encoder.text_token match, :space |
| 144 |
if match == "\n" |
| 145 |
state = :initial if state == :include_expected |
| 146 |
docstring_coming = true if match?(/#{DOCSTRING_COMING}/o)
|
| 147 |
end |
| 148 |
next |
| 149 |
|
| 150 |
elsif match = scan(/ \# [^\n]* /mx) |
| 151 |
encoder.text_token match, :comment |
| 152 |
next |
| 153 |
|
| 154 |
elsif state == :initial |
| 155 |
|
| 156 |
if match = scan(/#{OPERATOR}/o)
|
| 157 |
encoder.text_token match, :operator |
| 158 |
|
| 159 |
elsif match = scan(/(u?r?|b)?("""|"|'''|')/i)
|
| 160 |
string_delimiter = self[2] |
| 161 |
string_type = docstring_coming ? :docstring : :string |
| 162 |
docstring_coming = false if docstring_coming |
| 163 |
encoder.begin_group string_type |
| 164 |
string_raw = false |
| 165 |
modifiers = self[1] |
| 166 |
unless modifiers.empty? |
| 167 |
string_raw = !!modifiers.index(?r) |
| 168 |
encoder.text_token modifiers, :modifier |
| 169 |
match = string_delimiter |
| 170 |
end |
| 171 |
state = :string |
| 172 |
encoder.text_token match, :delimiter |
| 173 |
|
| 174 |
# TODO: backticks |
| 175 |
|
| 176 |
elsif match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
|
| 177 |
kind = IDENT_KIND[match] |
| 178 |
# TODO: keyword arguments |
| 179 |
kind = :ident if last_token_dot |
| 180 |
if kind == :old_keyword |
| 181 |
kind = check(/\(/) ? :ident : :keyword |
| 182 |
elsif kind == :predefined && check(/ *=/) |
| 183 |
kind = :ident |
| 184 |
elsif kind == :keyword |
| 185 |
state = DEF_NEW_STATE[match] |
| 186 |
from_import_state << match.to_sym if state == :include_expected |
| 187 |
end |
| 188 |
encoder.text_token match, kind |
| 189 |
|
| 190 |
elsif match = scan(/@[a-zA-Z0-9_.]+[lL]?/) |
| 191 |
encoder.text_token match, :decorator |
| 192 |
|
| 193 |
elsif match = scan(/0[xX][0-9A-Fa-f]+[lL]?/) |
| 194 |
encoder.text_token match, :hex |
| 195 |
|
| 196 |
elsif match = scan(/0[bB][01]+[lL]?/) |
| 197 |
encoder.text_token match, :binary |
| 198 |
|
| 199 |
elsif match = scan(/(?:\d*\.\d+|\d+\.\d*)(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/) |
| 200 |
if scan(/[jJ]/) |
| 201 |
match << matched |
| 202 |
encoder.text_token match, :imaginary |
| 203 |
else |
| 204 |
encoder.text_token match, :float |
| 205 |
end |
| 206 |
|
| 207 |
elsif match = scan(/0[oO][0-7]+|0[0-7]+(?![89.eE])[lL]?/) |
| 208 |
encoder.text_token match, :octal |
| 209 |
|
| 210 |
elsif match = scan(/\d+([lL])?/) |
| 211 |
if self[1] == nil && scan(/[jJ]/) |
| 212 |
match << matched |
| 213 |
encoder.text_token match, :imaginary |
| 214 |
else |
| 215 |
encoder.text_token match, :integer |
| 216 |
end |
| 217 |
|
| 218 |
else |
| 219 |
encoder.text_token getch, :error |
| 220 |
|
| 221 |
end |
| 222 |
|
| 223 |
elsif state == :def_expected |
| 224 |
state = :initial |
| 225 |
if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
|
| 226 |
encoder.text_token match, :method |
| 227 |
else |
| 228 |
next |
| 229 |
end |
| 230 |
|
| 231 |
elsif state == :class_expected |
| 232 |
state = :initial |
| 233 |
if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
|
| 234 |
encoder.text_token match, :class |
| 235 |
else |
| 236 |
next |
| 237 |
end |
| 238 |
|
| 239 |
elsif state == :include_expected |
| 240 |
if match = scan(unicode ? /#{DESCRIPTOR}/uo : /#{DESCRIPTOR}/o)
|
| 241 |
if match == 'as' |
| 242 |
encoder.text_token match, :keyword |
| 243 |
from_import_state << :as |
| 244 |
elsif from_import_state.first == :from && match == 'import' |
| 245 |
encoder.text_token match, :keyword |
| 246 |
from_import_state << :import |
| 247 |
elsif from_import_state.last == :as |
| 248 |
# encoder.text_token match, match[0,1][unicode ? /[[:upper:]]/u : /[[:upper:]]/] ? :class : :method |
| 249 |
encoder.text_token match, :ident |
| 250 |
from_import_state.pop |
| 251 |
elsif IDENT_KIND[match] == :keyword |
| 252 |
unscan |
| 253 |
match = nil |
| 254 |
state = :initial |
| 255 |
next |
| 256 |
else |
| 257 |
encoder.text_token match, :include |
| 258 |
end |
| 259 |
elsif match = scan(/,/) |
| 260 |
from_import_state.pop if from_import_state.last == :as |
| 261 |
encoder.text_token match, :operator |
| 262 |
else |
| 263 |
from_import_state = [] |
| 264 |
state = :initial |
| 265 |
next |
| 266 |
end |
| 267 |
|
| 268 |
else |
| 269 |
raise_inspect 'Unknown state', encoder, state |
| 270 |
|
| 271 |
end |
| 272 |
|
| 273 |
last_token_dot = match == '.' |
| 274 |
|
| 275 |
end |
| 276 |
|
| 277 |
if state == :string |
| 278 |
encoder.end_group string_type |
| 279 |
end |
| 280 |
|
| 281 |
encoder |
| 282 |
end |
| 283 |
|
| 284 |
end |
| 285 |
|
| 286 |
end |
| 287 |
end |