To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.
root / vendor / gems / coderay-0.9.7 / lib / coderay / scanners / python.rb @ 442:753f1380d6bc
History | View | Annotate | Download (8.97 KB)
| 1 |
module CodeRay |
|---|---|
| 2 |
module Scanners |
| 3 |
|
| 4 |
# Bases on pygments' PythonLexer, see
|
| 5 |
# http://dev.pocoo.org/projects/pygments/browser/pygments/lexers/agile.py.
|
| 6 |
class Python < Scanner |
| 7 |
|
| 8 |
include Streamable
|
| 9 |
|
| 10 |
register_for :python
|
| 11 |
file_extension 'py'
|
| 12 |
|
| 13 |
KEYWORDS = [
|
| 14 |
'and', 'as', 'assert', 'break', 'class', 'continue', 'def', |
| 15 |
'del', 'elif', 'else', 'except', 'finally', 'for', |
| 16 |
'from', 'global', 'if', 'import', 'in', 'is', 'lambda', 'not', |
| 17 |
'or', 'pass', 'raise', 'return', 'try', 'while', 'with', 'yield', |
| 18 |
'nonlocal', # new in Python 3 |
| 19 |
] |
| 20 |
|
| 21 |
OLD_KEYWORDS = [
|
| 22 |
'exec', 'print', # gone in Python 3 |
| 23 |
] |
| 24 |
|
| 25 |
PREDEFINED_METHODS_AND_TYPES = %w[ |
| 26 |
__import__ abs all any apply basestring bin bool buffer
|
| 27 |
bytearray bytes callable chr classmethod cmp coerce compile
|
| 28 |
complex delattr dict dir divmod enumerate eval execfile exit
|
| 29 |
file filter float frozenset getattr globals hasattr hash hex id
|
| 30 |
input int intern isinstance issubclass iter len list locals
|
| 31 |
long map max min next object oct open ord pow property range
|
| 32 |
raw_input reduce reload repr reversed round set setattr slice
|
| 33 |
sorted staticmethod str sum super tuple type unichr unicode
|
| 34 |
vars xrange zip
|
| 35 |
]
|
| 36 |
|
| 37 |
PREDEFINED_EXCEPTIONS = %w[ |
| 38 |
ArithmeticError AssertionError AttributeError
|
| 39 |
BaseException DeprecationWarning EOFError EnvironmentError
|
| 40 |
Exception FloatingPointError FutureWarning GeneratorExit IOError
|
| 41 |
ImportError ImportWarning IndentationError IndexError KeyError
|
| 42 |
KeyboardInterrupt LookupError MemoryError NameError
|
| 43 |
NotImplemented NotImplementedError OSError OverflowError
|
| 44 |
OverflowWarning PendingDeprecationWarning ReferenceError
|
| 45 |
RuntimeError RuntimeWarning StandardError StopIteration
|
| 46 |
SyntaxError SyntaxWarning SystemError SystemExit TabError
|
| 47 |
TypeError UnboundLocalError UnicodeDecodeError
|
| 48 |
UnicodeEncodeError UnicodeError UnicodeTranslateError
|
| 49 |
UnicodeWarning UserWarning ValueError Warning ZeroDivisionError
|
| 50 |
]
|
| 51 |
|
| 52 |
PREDEFINED_VARIABLES_AND_CONSTANTS = [
|
| 53 |
'False', 'True', 'None', # "keywords" since Python 3 |
| 54 |
'self', 'Ellipsis', 'NotImplemented', |
| 55 |
] |
| 56 |
|
| 57 |
IDENT_KIND = WordList.new(:ident). |
| 58 |
add(KEYWORDS, :keyword). |
| 59 |
add(OLD_KEYWORDS, :old_keyword). |
| 60 |
add(PREDEFINED_METHODS_AND_TYPES, :predefined). |
| 61 |
add(PREDEFINED_VARIABLES_AND_CONSTANTS, :pre_constant). |
| 62 |
add(PREDEFINED_EXCEPTIONS, :exception) |
| 63 |
|
| 64 |
NAME = / [^\W\d] \w* /x |
| 65 |
ESCAPE = / [abfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x |
| 66 |
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} | N\{[-\w ]+\} /x |
| 67 |
|
| 68 |
OPERATOR = / |
| 69 |
\.\.\. | # ellipsis
|
| 70 |
\.(?!\d) | # dot but not decimal point
|
| 71 |
[,;:()\[\]{}] | # simple delimiters
|
| 72 |
\/\/=? | \*\*=? | # special math
|
| 73 |
[-+*\/%&|^]=? | # ordinary math and binary logic
|
| 74 |
[~`] | # binary complement and inspection
|
| 75 |
<<=? | >>=? | [<>=]=? | != # comparison and assignment
|
| 76 |
/x
|
| 77 |
|
| 78 |
STRING_DELIMITER_REGEXP = Hash.new do |h, delimiter| |
| 79 |
h[delimiter] = Regexp.union delimiter
|
| 80 |
end
|
| 81 |
|
| 82 |
STRING_CONTENT_REGEXP = Hash.new do |h, delimiter| |
| 83 |
h[delimiter] = / [^\\\n]+? (?= \\ | $ | #{Regexp.escape(delimiter)} ) /x
|
| 84 |
end
|
| 85 |
|
| 86 |
DEF_NEW_STATE = WordList.new(:initial). |
| 87 |
add(%w(def), :def_expected). |
| 88 |
add(%w(import from), :include_expected). |
| 89 |
add(%w(class), :class_expected) |
| 90 |
|
| 91 |
DESCRIPTOR = / |
| 92 |
#{NAME}
|
| 93 |
(?: \. #{NAME} )*
|
| 94 |
| \*
|
| 95 |
/x
|
| 96 |
|
| 97 |
def scan_tokens tokens, options |
| 98 |
|
| 99 |
state = :initial
|
| 100 |
string_delimiter = nil
|
| 101 |
string_raw = false
|
| 102 |
import_clause = class_name_follows = last_token_dot = false
|
| 103 |
unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8' |
| 104 |
from_import_state = [] |
| 105 |
|
| 106 |
until eos?
|
| 107 |
|
| 108 |
kind = nil
|
| 109 |
match = nil
|
| 110 |
|
| 111 |
if state == :string |
| 112 |
if scan(STRING_DELIMITER_REGEXP[string_delimiter]) |
| 113 |
tokens << [matched, :delimiter]
|
| 114 |
tokens << [:close, :string] |
| 115 |
state = :initial
|
| 116 |
next
|
| 117 |
elsif string_delimiter.size == 3 && scan(/\n/) |
| 118 |
kind = :content
|
| 119 |
elsif scan(STRING_CONTENT_REGEXP[string_delimiter]) |
| 120 |
kind = :content
|
| 121 |
elsif !string_raw && scan(/ \\ #{ESCAPE} /ox) |
| 122 |
kind = :char
|
| 123 |
elsif scan(/ \\ #{UNICODE_ESCAPE} /ox) |
| 124 |
kind = :char
|
| 125 |
elsif scan(/ \\ . /x) |
| 126 |
kind = :content
|
| 127 |
elsif scan(/ \\ | $ /x) |
| 128 |
tokens << [:close, :string] |
| 129 |
kind = :error
|
| 130 |
state = :initial
|
| 131 |
else
|
| 132 |
raise_inspect "else case \" reached; %p not handled." % peek(1), tokens, state |
| 133 |
end
|
| 134 |
|
| 135 |
elsif match = scan(/ [ \t]+ | \\\n /x) |
| 136 |
tokens << [match, :space]
|
| 137 |
next
|
| 138 |
|
| 139 |
elsif match = scan(/\n/) |
| 140 |
tokens << [match, :space]
|
| 141 |
state = :initial if state == :include_expected |
| 142 |
next
|
| 143 |
|
| 144 |
elsif match = scan(/ \# [^\n]* /mx) |
| 145 |
tokens << [match, :comment]
|
| 146 |
next
|
| 147 |
|
| 148 |
elsif state == :initial |
| 149 |
|
| 150 |
if scan(/#{OPERATOR}/o) |
| 151 |
kind = :operator
|
| 152 |
|
| 153 |
elsif match = scan(/(u?r?|b)?("""|"|'''|')/i) |
| 154 |
tokens << [:open, :string] |
| 155 |
string_delimiter = self[2] |
| 156 |
string_raw = false
|
| 157 |
modifiers = self[1] |
| 158 |
unless modifiers.empty?
|
| 159 |
string_raw = !!modifiers.index(?r)
|
| 160 |
tokens << [modifiers, :modifier]
|
| 161 |
match = string_delimiter |
| 162 |
end
|
| 163 |
state = :string
|
| 164 |
kind = :delimiter
|
| 165 |
|
| 166 |
# TODO: backticks
|
| 167 |
|
| 168 |
elsif match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o) |
| 169 |
kind = IDENT_KIND[match]
|
| 170 |
# TODO: keyword arguments
|
| 171 |
kind = :ident if last_token_dot |
| 172 |
if kind == :old_keyword |
| 173 |
kind = check(/\(/) ? :ident : :keyword |
| 174 |
elsif kind == :predefined && check(/ *=/) |
| 175 |
kind = :ident
|
| 176 |
elsif kind == :keyword |
| 177 |
state = DEF_NEW_STATE[match]
|
| 178 |
from_import_state << match.to_sym if state == :include_expected |
| 179 |
end
|
| 180 |
|
| 181 |
elsif scan(/@[a-zA-Z0-9_.]+[lL]?/) |
| 182 |
kind = :decorator
|
| 183 |
|
| 184 |
elsif scan(/0[xX][0-9A-Fa-f]+[lL]?/) |
| 185 |
kind = :hex
|
| 186 |
|
| 187 |
elsif scan(/0[bB][01]+[lL]?/) |
| 188 |
kind = :bin
|
| 189 |
|
| 190 |
elsif match = scan(/(?:\d*\.\d+|\d+\.\d*)(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/) |
| 191 |
kind = :float
|
| 192 |
if scan(/[jJ]/) |
| 193 |
match << matched |
| 194 |
kind = :imaginary
|
| 195 |
end
|
| 196 |
|
| 197 |
elsif scan(/0[oO][0-7]+|0[0-7]+(?![89.eE])[lL]?/) |
| 198 |
kind = :oct
|
| 199 |
|
| 200 |
elsif match = scan(/\d+([lL])?/) |
| 201 |
kind = :integer
|
| 202 |
if self[1] == nil && scan(/[jJ]/) |
| 203 |
match << matched |
| 204 |
kind = :imaginary
|
| 205 |
end
|
| 206 |
|
| 207 |
else
|
| 208 |
getch |
| 209 |
kind = :error
|
| 210 |
|
| 211 |
end
|
| 212 |
|
| 213 |
elsif state == :def_expected |
| 214 |
state = :initial
|
| 215 |
if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o) |
| 216 |
kind = :method
|
| 217 |
else
|
| 218 |
next
|
| 219 |
end
|
| 220 |
|
| 221 |
elsif state == :class_expected |
| 222 |
state = :initial
|
| 223 |
if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o) |
| 224 |
kind = :class
|
| 225 |
else
|
| 226 |
next
|
| 227 |
end
|
| 228 |
|
| 229 |
elsif state == :include_expected |
| 230 |
if match = scan(unicode ? /#{DESCRIPTOR}/uo : /#{DESCRIPTOR}/o) |
| 231 |
kind = :include
|
| 232 |
if match == 'as' |
| 233 |
kind = :keyword
|
| 234 |
from_import_state << :as
|
| 235 |
elsif from_import_state.first == :from && match == 'import' |
| 236 |
kind = :keyword
|
| 237 |
from_import_state << :import
|
| 238 |
elsif from_import_state.last == :as |
| 239 |
# kind = match[0,1][unicode ? /[[:upper:]]/u : /[[:upper:]]/] ? :class : :method
|
| 240 |
kind = :ident
|
| 241 |
from_import_state.pop |
| 242 |
elsif IDENT_KIND[match] == :keyword |
| 243 |
unscan |
| 244 |
match = nil
|
| 245 |
state = :initial
|
| 246 |
next
|
| 247 |
end
|
| 248 |
elsif match = scan(/,/) |
| 249 |
from_import_state.pop if from_import_state.last == :as |
| 250 |
kind = :operator
|
| 251 |
else
|
| 252 |
from_import_state = [] |
| 253 |
state = :initial
|
| 254 |
next
|
| 255 |
end
|
| 256 |
|
| 257 |
else
|
| 258 |
raise_inspect 'Unknown state', tokens, state
|
| 259 |
|
| 260 |
end
|
| 261 |
|
| 262 |
match ||= matched |
| 263 |
if $CODERAY_DEBUG and not kind |
| 264 |
raise_inspect 'Error token %p in line %d' %
|
| 265 |
[[match, kind], line], tokens, state |
| 266 |
end
|
| 267 |
raise_inspect 'Empty token', tokens, state unless match |
| 268 |
|
| 269 |
last_token_dot = match == '.'
|
| 270 |
|
| 271 |
tokens << [match, kind] |
| 272 |
|
| 273 |
end
|
| 274 |
|
| 275 |
if state == :string |
| 276 |
tokens << [:close, :string] |
| 277 |
end
|
| 278 |
|
| 279 |
tokens |
| 280 |
end
|
| 281 |
|
| 282 |
end
|
| 283 |
|
| 284 |
end
|
| 285 |
end
|