To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / .svn / pristine / 24 / 24f7f0414301f421feaefeee3ae3bf564c315361.svn-base @ 1297:0a574315af3e

History | View | Annotate | Download (10.3 KB)

1
# encoding: utf-8
2
module CodeRay
3
  module Scanners
4
    
5
    # Clojure scanner by Licenser.
6
    class Clojure < Scanner
7
      
8
      register_for :clojure
9
      file_extension 'clj'
10
      
11
      SPECIAL_FORMS = %w[
12
        def if do let quote var fn loop recur throw try catch monitor-enter monitor-exit .
13
        new 
14
      ]  # :nodoc:
15
      
16
      CORE_FORMS = %w[
17
        + - -> ->> .. / * <= < = == >= > accessor aclone add-classpath add-watch
18
        agent agent-error agent-errors aget alength alias all-ns alter alter-meta!
19
        alter-var-root amap ancestors and apply areduce array-map aset aset-boolean
20
        aset-byte aset-char aset-double aset-float aset-int aset-long aset-short
21
        assert assoc assoc! assoc-in associative? atom await await-for bases bean
22
        bigdec bigint binding bit-and bit-and-not bit-clear bit-flip bit-not bit-or
23
        bit-set bit-shift-left bit-shift-right bit-test bit-xor boolean boolean-array
24
        booleans bound-fn bound-fn* bound? butlast byte byte-array bytes case cast char
25
        char-array char-escape-string char-name-string char? chars class class?
26
        clear-agent-errors clojure-version coll? comment commute comp comparator
27
        compare compare-and-set! compile complement concat cond condp conj conj!
28
        cons constantly construct-proxy contains? count counted? create-ns
29
        create-struct cycle dec decimal? declare definline defmacro defmethod defmulti
30
        defn defn- defonce defprotocol defrecord defstruct deftype delay delay?
31
        deliver denominator deref derive descendants disj disj! dissoc dissoc!
32
        distinct distinct? doall doc dorun doseq dosync dotimes doto double
33
        double-array doubles drop drop-last drop-while empty empty? ensure
34
        enumeration-seq error-handler error-mode eval even? every? extend
35
        extend-protocol extend-type extenders extends? false? ffirst file-seq
36
        filter find find-doc find-ns find-var first float float-array float?
37
        floats flush fn fn? fnext for force format future future-call future-cancel
38
        future-cancelled? future-done? future? gen-class gen-interface gensym get
39
        get-in get-method get-proxy-class get-thread-bindings get-validator hash
40
        hash-map hash-set identical? identity if-let if-not ifn? import in-ns
41
        inc init-proxy instance? int int-array integer? interleave intern
42
        interpose into into-array ints io! isa? iterate iterator-seq juxt key
43
        keys keyword keyword? last lazy-cat lazy-seq let letfn line-seq list list*
44
        list? load load-file load-reader load-string loaded-libs locking long
45
        long-array longs loop macroexpand macroexpand-1 make-array make-hierarchy
46
        map map? mapcat max max-key memfn memoize merge merge-with meta methods
47
        min min-key mod name namespace neg? newline next nfirst nil? nnext not
48
        not-any? not-empty not-every? not= ns ns-aliases ns-imports ns-interns
49
        ns-map ns-name ns-publics ns-refers ns-resolve ns-unalias ns-unmap nth
50
        nthnext num number? numerator object-array odd? or parents partial
51
        partition pcalls peek persistent! pmap pop pop! pop-thread-bindings
52
        pos? pr pr-str prefer-method prefers print print-namespace-doc
53
        print-str printf println println-str prn prn-str promise proxy
54
        proxy-mappings proxy-super push-thread-bindings pvalues quot rand
55
        rand-int range ratio? rationalize re-find re-groups re-matcher
56
        re-matches re-pattern re-seq read read-line read-string reduce ref
57
        ref-history-count ref-max-history ref-min-history ref-set refer
58
        refer-clojure reify release-pending-sends rem remove remove-all-methods
59
        remove-method remove-ns remove-watch repeat repeatedly replace replicate
60
        require reset! reset-meta! resolve rest restart-agent resultset-seq
61
        reverse reversible? rseq rsubseq satisfies? second select-keys send
62
        send-off seq seq? seque sequence sequential? set set-error-handler!
63
        set-error-mode! set-validator! set? short short-array shorts
64
        shutdown-agents slurp some sort sort-by sorted-map sorted-map-by
65
        sorted-set sorted-set-by sorted? special-form-anchor special-symbol?
66
        split-at split-with str string? struct struct-map subs subseq subvec
67
        supers swap! symbol symbol? sync syntax-symbol-anchor take take-last
68
        take-nth take-while test the-ns thread-bound? time to-array to-array-2d
69
        trampoline transient tree-seq true? type unchecked-add unchecked-dec
70
        unchecked-divide unchecked-inc unchecked-multiply unchecked-negate
71
        unchecked-remainder unchecked-subtract underive update-in update-proxy
72
        use val vals var-get var-set var? vary-meta vec vector vector-of vector?
73
        when when-first when-let when-not while with-bindings with-bindings*
74
        with-in-str with-local-vars with-meta with-open with-out-str
75
        with-precision xml-seq zero? zipmap 
76
      ]  # :nodoc:
77
      
78
      PREDEFINED_CONSTANTS = %w[
79
        true false nil *1 *2 *3 *agent* *clojure-version* *command-line-args*
80
        *compile-files* *compile-path* *e *err* *file* *flush-on-newline*
81
        *in* *ns* *out* *print-dup* *print-length* *print-level* *print-meta*
82
        *print-readably* *read-eval* *warn-on-reflection*
83
      ]  # :nodoc:
84
      
85
      IDENT_KIND = WordList.new(:ident).
86
        add(SPECIAL_FORMS, :keyword).
87
        add(CORE_FORMS, :keyword).
88
        add(PREDEFINED_CONSTANTS, :predefined_constant)
89
      
90
      KEYWORD_NEXT_TOKEN_KIND = WordList.new(nil).
91
        add(%w[ def defn defn- definline defmacro defmulti defmethod defstruct defonce declare ], :function).
92
        add(%w[ ns ], :namespace).
93
        add(%w[ defprotocol defrecord ], :class)
94
      
95
      BASIC_IDENTIFIER = /[a-zA-Z$%*\/_+!?&<>\-=]=?[a-zA-Z0-9$&*+!\/_?<>\-\#]*/
96
      IDENTIFIER = /(?!-\d)(?:(?:#{BASIC_IDENTIFIER}\.)*#{BASIC_IDENTIFIER}(?:\/#{BASIC_IDENTIFIER})?\.?)|\.\.?/
97
      SYMBOL = /::?#{IDENTIFIER}/o
98
      DIGIT = /\d/
99
      DIGIT10 = DIGIT
100
      DIGIT16 = /[0-9a-f]/i
101
      DIGIT8 = /[0-7]/
102
      DIGIT2 = /[01]/
103
      RADIX16 = /\#x/i
104
      RADIX8 = /\#o/i
105
      RADIX2 = /\#b/i
106
      RADIX10 = /\#d/i
107
      EXACTNESS = /#i|#e/i
108
      SIGN = /[\+-]?/
109
      EXP_MARK = /[esfdl]/i
110
      EXP = /#{EXP_MARK}#{SIGN}#{DIGIT}+/
111
      SUFFIX = /#{EXP}?/
112
      PREFIX10 = /#{RADIX10}?#{EXACTNESS}?|#{EXACTNESS}?#{RADIX10}?/
113
      PREFIX16 = /#{RADIX16}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX16}/
114
      PREFIX8 = /#{RADIX8}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX8}/
115
      PREFIX2 = /#{RADIX2}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX2}/
116
      UINT10 = /#{DIGIT10}+#*/
117
      UINT16 = /#{DIGIT16}+#*/
118
      UINT8 = /#{DIGIT8}+#*/
119
      UINT2 = /#{DIGIT2}+#*/
120
      DECIMAL = /#{DIGIT10}+#+\.#*#{SUFFIX}|#{DIGIT10}+\.#{DIGIT10}*#*#{SUFFIX}|\.#{DIGIT10}+#*#{SUFFIX}|#{UINT10}#{EXP}/
121
      UREAL10 = /#{UINT10}\/#{UINT10}|#{DECIMAL}|#{UINT10}/
122
      UREAL16 = /#{UINT16}\/#{UINT16}|#{UINT16}/
123
      UREAL8 = /#{UINT8}\/#{UINT8}|#{UINT8}/
124
      UREAL2 = /#{UINT2}\/#{UINT2}|#{UINT2}/
125
      REAL10 = /#{SIGN}#{UREAL10}/
126
      REAL16 = /#{SIGN}#{UREAL16}/
127
      REAL8 = /#{SIGN}#{UREAL8}/
128
      REAL2 = /#{SIGN}#{UREAL2}/
129
      IMAG10 = /i|#{UREAL10}i/
130
      IMAG16 = /i|#{UREAL16}i/
131
      IMAG8 = /i|#{UREAL8}i/
132
      IMAG2 = /i|#{UREAL2}i/
133
      COMPLEX10 = /#{REAL10}@#{REAL10}|#{REAL10}\+#{IMAG10}|#{REAL10}-#{IMAG10}|\+#{IMAG10}|-#{IMAG10}|#{REAL10}/
134
      COMPLEX16 = /#{REAL16}@#{REAL16}|#{REAL16}\+#{IMAG16}|#{REAL16}-#{IMAG16}|\+#{IMAG16}|-#{IMAG16}|#{REAL16}/
135
      COMPLEX8 = /#{REAL8}@#{REAL8}|#{REAL8}\+#{IMAG8}|#{REAL8}-#{IMAG8}|\+#{IMAG8}|-#{IMAG8}|#{REAL8}/
136
      COMPLEX2 = /#{REAL2}@#{REAL2}|#{REAL2}\+#{IMAG2}|#{REAL2}-#{IMAG2}|\+#{IMAG2}|-#{IMAG2}|#{REAL2}/
137
      NUM10 = /#{PREFIX10}?#{COMPLEX10}/
138
      NUM16 = /#{PREFIX16}#{COMPLEX16}/
139
      NUM8 = /#{PREFIX8}#{COMPLEX8}/
140
      NUM2 = /#{PREFIX2}#{COMPLEX2}/
141
      NUM = /#{NUM10}|#{NUM16}|#{NUM8}|#{NUM2}/
142
      
143
    protected
144
      
145
      def scan_tokens encoder, options
146
        
147
        state = :initial
148
        kind = nil
149
        
150
        until eos?
151
          
152
          case state
153
          when :initial
154
            if match = scan(/ \s+ | \\\n | , /x)
155
              encoder.text_token match, :space
156
            elsif match = scan(/['`\(\[\)\]\{\}]|\#[({]|~@?|[@\^]/)
157
              encoder.text_token match, :operator
158
            elsif match = scan(/;.*/)
159
              encoder.text_token match, :comment  # TODO: recognize (comment ...) too
160
            elsif match = scan(/\#?\\(?:newline|space|.?)/)
161
              encoder.text_token match, :char
162
            elsif match = scan(/\#[ft]/)
163
              encoder.text_token match, :predefined_constant
164
            elsif match = scan(/#{IDENTIFIER}/o)
165
              kind = IDENT_KIND[match]
166
              encoder.text_token match, kind
167
              if rest? && kind == :keyword
168
                if kind = KEYWORD_NEXT_TOKEN_KIND[match]
169
                  encoder.text_token match, :space if match = scan(/\s+/o)
170
                  encoder.text_token match, kind if match = scan(/#{IDENTIFIER}/o)
171
                end
172
              end
173
            elsif match = scan(/#{SYMBOL}/o)
174
              encoder.text_token match, :symbol
175
            elsif match = scan(/\./)
176
              encoder.text_token match, :operator
177
            elsif match = scan(/ \# \^ #{IDENTIFIER} /ox)
178
              encoder.text_token match, :type
179
            elsif match = scan(/ (\#)? " /x)
180
              state = self[1] ? :regexp : :string
181
              encoder.begin_group state
182
              encoder.text_token match, :delimiter
183
            elsif match = scan(/#{NUM}/o) and not matched.empty?
184
              encoder.text_token match, match[/[.e\/]/i] ? :float : :integer
185
            else
186
              encoder.text_token getch, :error
187
            end
188
            
189
          when :string, :regexp
190
            if match = scan(/[^"\\]+|\\.?/)
191
              encoder.text_token match, :content
192
            elsif match = scan(/"/)
193
              encoder.text_token match, :delimiter
194
              encoder.end_group state
195
              state = :initial
196
            else
197
              raise_inspect "else case \" reached; %p not handled." % peek(1),
198
                encoder, state
199
            end
200
            
201
          else
202
            raise 'else case reached'
203
            
204
          end
205
          
206
        end
207
        
208
        if [:string, :regexp].include? state
209
          encoder.end_group state
210
        end
211
        
212
        encoder
213
        
214
      end
215
    end
216
  end
217
end