To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.
root / .svn / pristine / 24 / 24f7f0414301f421feaefeee3ae3bf564c315361.svn-base @ 1297:0a574315af3e
History | View | Annotate | Download (10.3 KB)
| 1 |
# encoding: utf-8 |
|---|---|
| 2 |
module CodeRay |
| 3 |
module Scanners |
| 4 |
|
| 5 |
# Clojure scanner by Licenser. |
| 6 |
class Clojure < Scanner |
| 7 |
|
| 8 |
register_for :clojure |
| 9 |
file_extension 'clj' |
| 10 |
|
| 11 |
SPECIAL_FORMS = %w[ |
| 12 |
def if do let quote var fn loop recur throw try catch monitor-enter monitor-exit . |
| 13 |
new |
| 14 |
] # :nodoc: |
| 15 |
|
| 16 |
CORE_FORMS = %w[ |
| 17 |
+ - -> ->> .. / * <= < = == >= > accessor aclone add-classpath add-watch |
| 18 |
agent agent-error agent-errors aget alength alias all-ns alter alter-meta! |
| 19 |
alter-var-root amap ancestors and apply areduce array-map aset aset-boolean |
| 20 |
aset-byte aset-char aset-double aset-float aset-int aset-long aset-short |
| 21 |
assert assoc assoc! assoc-in associative? atom await await-for bases bean |
| 22 |
bigdec bigint binding bit-and bit-and-not bit-clear bit-flip bit-not bit-or |
| 23 |
bit-set bit-shift-left bit-shift-right bit-test bit-xor boolean boolean-array |
| 24 |
booleans bound-fn bound-fn* bound? butlast byte byte-array bytes case cast char |
| 25 |
char-array char-escape-string char-name-string char? chars class class? |
| 26 |
clear-agent-errors clojure-version coll? comment commute comp comparator |
| 27 |
compare compare-and-set! compile complement concat cond condp conj conj! |
| 28 |
cons constantly construct-proxy contains? count counted? create-ns |
| 29 |
create-struct cycle dec decimal? declare definline defmacro defmethod defmulti |
| 30 |
defn defn- defonce defprotocol defrecord defstruct deftype delay delay? |
| 31 |
deliver denominator deref derive descendants disj disj! dissoc dissoc! |
| 32 |
distinct distinct? doall doc dorun doseq dosync dotimes doto double |
| 33 |
double-array doubles drop drop-last drop-while empty empty? ensure |
| 34 |
enumeration-seq error-handler error-mode eval even? every? extend |
| 35 |
extend-protocol extend-type extenders extends? false? ffirst file-seq |
| 36 |
filter find find-doc find-ns find-var first float float-array float? |
| 37 |
floats flush fn fn? fnext for force format future future-call future-cancel |
| 38 |
future-cancelled? future-done? future? gen-class gen-interface gensym get |
| 39 |
get-in get-method get-proxy-class get-thread-bindings get-validator hash |
| 40 |
hash-map hash-set identical? identity if-let if-not ifn? import in-ns |
| 41 |
inc init-proxy instance? int int-array integer? interleave intern |
| 42 |
interpose into into-array ints io! isa? iterate iterator-seq juxt key |
| 43 |
keys keyword keyword? last lazy-cat lazy-seq let letfn line-seq list list* |
| 44 |
list? load load-file load-reader load-string loaded-libs locking long |
| 45 |
long-array longs loop macroexpand macroexpand-1 make-array make-hierarchy |
| 46 |
map map? mapcat max max-key memfn memoize merge merge-with meta methods |
| 47 |
min min-key mod name namespace neg? newline next nfirst nil? nnext not |
| 48 |
not-any? not-empty not-every? not= ns ns-aliases ns-imports ns-interns |
| 49 |
ns-map ns-name ns-publics ns-refers ns-resolve ns-unalias ns-unmap nth |
| 50 |
nthnext num number? numerator object-array odd? or parents partial |
| 51 |
partition pcalls peek persistent! pmap pop pop! pop-thread-bindings |
| 52 |
pos? pr pr-str prefer-method prefers print print-namespace-doc |
| 53 |
print-str printf println println-str prn prn-str promise proxy |
| 54 |
proxy-mappings proxy-super push-thread-bindings pvalues quot rand |
| 55 |
rand-int range ratio? rationalize re-find re-groups re-matcher |
| 56 |
re-matches re-pattern re-seq read read-line read-string reduce ref |
| 57 |
ref-history-count ref-max-history ref-min-history ref-set refer |
| 58 |
refer-clojure reify release-pending-sends rem remove remove-all-methods |
| 59 |
remove-method remove-ns remove-watch repeat repeatedly replace replicate |
| 60 |
require reset! reset-meta! resolve rest restart-agent resultset-seq |
| 61 |
reverse reversible? rseq rsubseq satisfies? second select-keys send |
| 62 |
send-off seq seq? seque sequence sequential? set set-error-handler! |
| 63 |
set-error-mode! set-validator! set? short short-array shorts |
| 64 |
shutdown-agents slurp some sort sort-by sorted-map sorted-map-by |
| 65 |
sorted-set sorted-set-by sorted? special-form-anchor special-symbol? |
| 66 |
split-at split-with str string? struct struct-map subs subseq subvec |
| 67 |
supers swap! symbol symbol? sync syntax-symbol-anchor take take-last |
| 68 |
take-nth take-while test the-ns thread-bound? time to-array to-array-2d |
| 69 |
trampoline transient tree-seq true? type unchecked-add unchecked-dec |
| 70 |
unchecked-divide unchecked-inc unchecked-multiply unchecked-negate |
| 71 |
unchecked-remainder unchecked-subtract underive update-in update-proxy |
| 72 |
use val vals var-get var-set var? vary-meta vec vector vector-of vector? |
| 73 |
when when-first when-let when-not while with-bindings with-bindings* |
| 74 |
with-in-str with-local-vars with-meta with-open with-out-str |
| 75 |
with-precision xml-seq zero? zipmap |
| 76 |
] # :nodoc: |
| 77 |
|
| 78 |
PREDEFINED_CONSTANTS = %w[ |
| 79 |
true false nil *1 *2 *3 *agent* *clojure-version* *command-line-args* |
| 80 |
*compile-files* *compile-path* *e *err* *file* *flush-on-newline* |
| 81 |
*in* *ns* *out* *print-dup* *print-length* *print-level* *print-meta* |
| 82 |
*print-readably* *read-eval* *warn-on-reflection* |
| 83 |
] # :nodoc: |
| 84 |
|
| 85 |
IDENT_KIND = WordList.new(:ident). |
| 86 |
add(SPECIAL_FORMS, :keyword). |
| 87 |
add(CORE_FORMS, :keyword). |
| 88 |
add(PREDEFINED_CONSTANTS, :predefined_constant) |
| 89 |
|
| 90 |
KEYWORD_NEXT_TOKEN_KIND = WordList.new(nil). |
| 91 |
add(%w[ def defn defn- definline defmacro defmulti defmethod defstruct defonce declare ], :function). |
| 92 |
add(%w[ ns ], :namespace). |
| 93 |
add(%w[ defprotocol defrecord ], :class) |
| 94 |
|
| 95 |
BASIC_IDENTIFIER = /[a-zA-Z$%*\/_+!?&<>\-=]=?[a-zA-Z0-9$&*+!\/_?<>\-\#]*/ |
| 96 |
IDENTIFIER = /(?!-\d)(?:(?:#{BASIC_IDENTIFIER}\.)*#{BASIC_IDENTIFIER}(?:\/#{BASIC_IDENTIFIER})?\.?)|\.\.?/
|
| 97 |
SYMBOL = /::?#{IDENTIFIER}/o
|
| 98 |
DIGIT = /\d/ |
| 99 |
DIGIT10 = DIGIT |
| 100 |
DIGIT16 = /[0-9a-f]/i |
| 101 |
DIGIT8 = /[0-7]/ |
| 102 |
DIGIT2 = /[01]/ |
| 103 |
RADIX16 = /\#x/i |
| 104 |
RADIX8 = /\#o/i |
| 105 |
RADIX2 = /\#b/i |
| 106 |
RADIX10 = /\#d/i |
| 107 |
EXACTNESS = /#i|#e/i |
| 108 |
SIGN = /[\+-]?/ |
| 109 |
EXP_MARK = /[esfdl]/i |
| 110 |
EXP = /#{EXP_MARK}#{SIGN}#{DIGIT}+/
|
| 111 |
SUFFIX = /#{EXP}?/
|
| 112 |
PREFIX10 = /#{RADIX10}?#{EXACTNESS}?|#{EXACTNESS}?#{RADIX10}?/
|
| 113 |
PREFIX16 = /#{RADIX16}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX16}/
|
| 114 |
PREFIX8 = /#{RADIX8}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX8}/
|
| 115 |
PREFIX2 = /#{RADIX2}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX2}/
|
| 116 |
UINT10 = /#{DIGIT10}+#*/
|
| 117 |
UINT16 = /#{DIGIT16}+#*/
|
| 118 |
UINT8 = /#{DIGIT8}+#*/
|
| 119 |
UINT2 = /#{DIGIT2}+#*/
|
| 120 |
DECIMAL = /#{DIGIT10}+#+\.#*#{SUFFIX}|#{DIGIT10}+\.#{DIGIT10}*#*#{SUFFIX}|\.#{DIGIT10}+#*#{SUFFIX}|#{UINT10}#{EXP}/
|
| 121 |
UREAL10 = /#{UINT10}\/#{UINT10}|#{DECIMAL}|#{UINT10}/
|
| 122 |
UREAL16 = /#{UINT16}\/#{UINT16}|#{UINT16}/
|
| 123 |
UREAL8 = /#{UINT8}\/#{UINT8}|#{UINT8}/
|
| 124 |
UREAL2 = /#{UINT2}\/#{UINT2}|#{UINT2}/
|
| 125 |
REAL10 = /#{SIGN}#{UREAL10}/
|
| 126 |
REAL16 = /#{SIGN}#{UREAL16}/
|
| 127 |
REAL8 = /#{SIGN}#{UREAL8}/
|
| 128 |
REAL2 = /#{SIGN}#{UREAL2}/
|
| 129 |
IMAG10 = /i|#{UREAL10}i/
|
| 130 |
IMAG16 = /i|#{UREAL16}i/
|
| 131 |
IMAG8 = /i|#{UREAL8}i/
|
| 132 |
IMAG2 = /i|#{UREAL2}i/
|
| 133 |
COMPLEX10 = /#{REAL10}@#{REAL10}|#{REAL10}\+#{IMAG10}|#{REAL10}-#{IMAG10}|\+#{IMAG10}|-#{IMAG10}|#{REAL10}/
|
| 134 |
COMPLEX16 = /#{REAL16}@#{REAL16}|#{REAL16}\+#{IMAG16}|#{REAL16}-#{IMAG16}|\+#{IMAG16}|-#{IMAG16}|#{REAL16}/
|
| 135 |
COMPLEX8 = /#{REAL8}@#{REAL8}|#{REAL8}\+#{IMAG8}|#{REAL8}-#{IMAG8}|\+#{IMAG8}|-#{IMAG8}|#{REAL8}/
|
| 136 |
COMPLEX2 = /#{REAL2}@#{REAL2}|#{REAL2}\+#{IMAG2}|#{REAL2}-#{IMAG2}|\+#{IMAG2}|-#{IMAG2}|#{REAL2}/
|
| 137 |
NUM10 = /#{PREFIX10}?#{COMPLEX10}/
|
| 138 |
NUM16 = /#{PREFIX16}#{COMPLEX16}/
|
| 139 |
NUM8 = /#{PREFIX8}#{COMPLEX8}/
|
| 140 |
NUM2 = /#{PREFIX2}#{COMPLEX2}/
|
| 141 |
NUM = /#{NUM10}|#{NUM16}|#{NUM8}|#{NUM2}/
|
| 142 |
|
| 143 |
protected |
| 144 |
|
| 145 |
def scan_tokens encoder, options |
| 146 |
|
| 147 |
state = :initial |
| 148 |
kind = nil |
| 149 |
|
| 150 |
until eos? |
| 151 |
|
| 152 |
case state |
| 153 |
when :initial |
| 154 |
if match = scan(/ \s+ | \\\n | , /x) |
| 155 |
encoder.text_token match, :space |
| 156 |
elsif match = scan(/['`\(\[\)\]\{\}]|\#[({]|~@?|[@\^]/)
|
| 157 |
encoder.text_token match, :operator |
| 158 |
elsif match = scan(/;.*/) |
| 159 |
encoder.text_token match, :comment # TODO: recognize (comment ...) too |
| 160 |
elsif match = scan(/\#?\\(?:newline|space|.?)/) |
| 161 |
encoder.text_token match, :char |
| 162 |
elsif match = scan(/\#[ft]/) |
| 163 |
encoder.text_token match, :predefined_constant |
| 164 |
elsif match = scan(/#{IDENTIFIER}/o)
|
| 165 |
kind = IDENT_KIND[match] |
| 166 |
encoder.text_token match, kind |
| 167 |
if rest? && kind == :keyword |
| 168 |
if kind = KEYWORD_NEXT_TOKEN_KIND[match] |
| 169 |
encoder.text_token match, :space if match = scan(/\s+/o) |
| 170 |
encoder.text_token match, kind if match = scan(/#{IDENTIFIER}/o)
|
| 171 |
end |
| 172 |
end |
| 173 |
elsif match = scan(/#{SYMBOL}/o)
|
| 174 |
encoder.text_token match, :symbol |
| 175 |
elsif match = scan(/\./) |
| 176 |
encoder.text_token match, :operator |
| 177 |
elsif match = scan(/ \# \^ #{IDENTIFIER} /ox)
|
| 178 |
encoder.text_token match, :type |
| 179 |
elsif match = scan(/ (\#)? " /x) |
| 180 |
state = self[1] ? :regexp : :string |
| 181 |
encoder.begin_group state |
| 182 |
encoder.text_token match, :delimiter |
| 183 |
elsif match = scan(/#{NUM}/o) and not matched.empty?
|
| 184 |
encoder.text_token match, match[/[.e\/]/i] ? :float : :integer |
| 185 |
else |
| 186 |
encoder.text_token getch, :error |
| 187 |
end |
| 188 |
|
| 189 |
when :string, :regexp |
| 190 |
if match = scan(/[^"\\]+|\\.?/) |
| 191 |
encoder.text_token match, :content |
| 192 |
elsif match = scan(/"/) |
| 193 |
encoder.text_token match, :delimiter |
| 194 |
encoder.end_group state |
| 195 |
state = :initial |
| 196 |
else |
| 197 |
raise_inspect "else case \" reached; %p not handled." % peek(1), |
| 198 |
encoder, state |
| 199 |
end |
| 200 |
|
| 201 |
else |
| 202 |
raise 'else case reached' |
| 203 |
|
| 204 |
end |
| 205 |
|
| 206 |
end |
| 207 |
|
| 208 |
if [:string, :regexp].include? state |
| 209 |
encoder.end_group state |
| 210 |
end |
| 211 |
|
| 212 |
encoder |
| 213 |
|
| 214 |
end |
| 215 |
end |
| 216 |
end |
| 217 |
end |