Chris@0
|
1 module CodeRay
|
Chris@0
|
2 module Scanners
|
Chris@0
|
3
|
Chris@0
|
4 # Scheme scanner for CodeRay (by closure).
|
Chris@0
|
5 # Thanks to murphy for putting CodeRay into public.
|
Chris@0
|
6 class Scheme < Scanner
|
Chris@0
|
7
|
Chris@0
|
8 # TODO: function defs
|
Chris@0
|
9 # TODO: built-in functions
|
Chris@0
|
10
|
Chris@0
|
11 register_for :scheme
|
Chris@0
|
12 file_extension 'scm'
|
Chris@0
|
13
|
Chris@0
|
14 CORE_FORMS = %w[
|
Chris@0
|
15 lambda let let* letrec syntax-case define-syntax let-syntax
|
Chris@0
|
16 letrec-syntax begin define quote if or and cond case do delay
|
Chris@0
|
17 quasiquote set! cons force call-with-current-continuation call/cc
|
Chris@0
|
18 ]
|
Chris@0
|
19
|
Chris@0
|
20 IDENT_KIND = CaseIgnoringWordList.new(:ident).
|
Chris@0
|
21 add(CORE_FORMS, :reserved)
|
Chris@0
|
22
|
Chris@0
|
23 #IDENTIFIER_INITIAL = /[a-z!@\$%&\*\/\:<=>\?~_\^]/i
|
Chris@0
|
24 #IDENTIFIER_SUBSEQUENT = /#{IDENTIFIER_INITIAL}|\d|\.|\+|-/
|
Chris@0
|
25 #IDENTIFIER = /#{IDENTIFIER_INITIAL}#{IDENTIFIER_SUBSEQUENT}*|\+|-|\.{3}/
|
Chris@0
|
26 IDENTIFIER = /[a-zA-Z!@$%&*\/:<=>?~_^][\w!@$%&*\/:<=>?~^.+\-]*|[+-]|\.\.\./
|
Chris@0
|
27 DIGIT = /\d/
|
Chris@0
|
28 DIGIT10 = DIGIT
|
Chris@0
|
29 DIGIT16 = /[0-9a-f]/i
|
Chris@0
|
30 DIGIT8 = /[0-7]/
|
Chris@0
|
31 DIGIT2 = /[01]/
|
Chris@0
|
32 RADIX16 = /\#x/i
|
Chris@0
|
33 RADIX8 = /\#o/i
|
Chris@0
|
34 RADIX2 = /\#b/i
|
Chris@0
|
35 RADIX10 = /\#d/i
|
Chris@0
|
36 EXACTNESS = /#i|#e/i
|
Chris@0
|
37 SIGN = /[\+-]?/
|
Chris@0
|
38 EXP_MARK = /[esfdl]/i
|
Chris@0
|
39 EXP = /#{EXP_MARK}#{SIGN}#{DIGIT}+/
|
Chris@0
|
40 SUFFIX = /#{EXP}?/
|
Chris@0
|
41 PREFIX10 = /#{RADIX10}?#{EXACTNESS}?|#{EXACTNESS}?#{RADIX10}?/
|
Chris@0
|
42 PREFIX16 = /#{RADIX16}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX16}/
|
Chris@0
|
43 PREFIX8 = /#{RADIX8}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX8}/
|
Chris@0
|
44 PREFIX2 = /#{RADIX2}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX2}/
|
Chris@0
|
45 UINT10 = /#{DIGIT10}+#*/
|
Chris@0
|
46 UINT16 = /#{DIGIT16}+#*/
|
Chris@0
|
47 UINT8 = /#{DIGIT8}+#*/
|
Chris@0
|
48 UINT2 = /#{DIGIT2}+#*/
|
Chris@0
|
49 DECIMAL = /#{DIGIT10}+#+\.#*#{SUFFIX}|#{DIGIT10}+\.#{DIGIT10}*#*#{SUFFIX}|\.#{DIGIT10}+#*#{SUFFIX}|#{UINT10}#{EXP}/
|
Chris@0
|
50 UREAL10 = /#{UINT10}\/#{UINT10}|#{DECIMAL}|#{UINT10}/
|
Chris@0
|
51 UREAL16 = /#{UINT16}\/#{UINT16}|#{UINT16}/
|
Chris@0
|
52 UREAL8 = /#{UINT8}\/#{UINT8}|#{UINT8}/
|
Chris@0
|
53 UREAL2 = /#{UINT2}\/#{UINT2}|#{UINT2}/
|
Chris@0
|
54 REAL10 = /#{SIGN}#{UREAL10}/
|
Chris@0
|
55 REAL16 = /#{SIGN}#{UREAL16}/
|
Chris@0
|
56 REAL8 = /#{SIGN}#{UREAL8}/
|
Chris@0
|
57 REAL2 = /#{SIGN}#{UREAL2}/
|
Chris@0
|
58 IMAG10 = /i|#{UREAL10}i/
|
Chris@0
|
59 IMAG16 = /i|#{UREAL16}i/
|
Chris@0
|
60 IMAG8 = /i|#{UREAL8}i/
|
Chris@0
|
61 IMAG2 = /i|#{UREAL2}i/
|
Chris@0
|
62 COMPLEX10 = /#{REAL10}@#{REAL10}|#{REAL10}\+#{IMAG10}|#{REAL10}-#{IMAG10}|\+#{IMAG10}|-#{IMAG10}|#{REAL10}/
|
Chris@0
|
63 COMPLEX16 = /#{REAL16}@#{REAL16}|#{REAL16}\+#{IMAG16}|#{REAL16}-#{IMAG16}|\+#{IMAG16}|-#{IMAG16}|#{REAL16}/
|
Chris@0
|
64 COMPLEX8 = /#{REAL8}@#{REAL8}|#{REAL8}\+#{IMAG8}|#{REAL8}-#{IMAG8}|\+#{IMAG8}|-#{IMAG8}|#{REAL8}/
|
Chris@0
|
65 COMPLEX2 = /#{REAL2}@#{REAL2}|#{REAL2}\+#{IMAG2}|#{REAL2}-#{IMAG2}|\+#{IMAG2}|-#{IMAG2}|#{REAL2}/
|
Chris@0
|
66 NUM10 = /#{PREFIX10}?#{COMPLEX10}/
|
Chris@0
|
67 NUM16 = /#{PREFIX16}#{COMPLEX16}/
|
Chris@0
|
68 NUM8 = /#{PREFIX8}#{COMPLEX8}/
|
Chris@0
|
69 NUM2 = /#{PREFIX2}#{COMPLEX2}/
|
Chris@0
|
70 NUM = /#{NUM10}|#{NUM16}|#{NUM8}|#{NUM2}/
|
Chris@0
|
71
|
Chris@0
|
72 private
|
Chris@0
|
73 def scan_tokens tokens,options
|
Chris@0
|
74
|
Chris@0
|
75 state = :initial
|
Chris@0
|
76 ident_kind = IDENT_KIND
|
Chris@0
|
77
|
Chris@0
|
78 until eos?
|
Chris@0
|
79 kind = match = nil
|
Chris@0
|
80
|
Chris@0
|
81 case state
|
Chris@0
|
82 when :initial
|
Chris@0
|
83 if scan(/ \s+ | \\\n /x)
|
Chris@0
|
84 kind = :space
|
Chris@0
|
85 elsif scan(/['\(\[\)\]]|#\(/)
|
Chris@0
|
86 kind = :operator_fat
|
Chris@0
|
87 elsif scan(/;.*/)
|
Chris@0
|
88 kind = :comment
|
Chris@0
|
89 elsif scan(/#\\(?:newline|space|.?)/)
|
Chris@0
|
90 kind = :char
|
Chris@0
|
91 elsif scan(/#[ft]/)
|
Chris@0
|
92 kind = :pre_constant
|
Chris@0
|
93 elsif scan(/#{IDENTIFIER}/o)
|
Chris@0
|
94 kind = ident_kind[matched]
|
Chris@0
|
95 elsif scan(/\./)
|
Chris@0
|
96 kind = :operator
|
Chris@0
|
97 elsif scan(/"/)
|
Chris@0
|
98 tokens << [:open, :string]
|
Chris@0
|
99 state = :string
|
Chris@0
|
100 tokens << ['"', :delimiter]
|
Chris@0
|
101 next
|
Chris@0
|
102 elsif scan(/#{NUM}/o) and not matched.empty?
|
Chris@0
|
103 kind = :integer
|
Chris@0
|
104 elsif getch
|
Chris@0
|
105 kind = :error
|
Chris@0
|
106 end
|
Chris@0
|
107
|
Chris@0
|
108 when :string
|
Chris@0
|
109 if scan(/[^"\\]+/) or scan(/\\.?/)
|
Chris@0
|
110 kind = :content
|
Chris@0
|
111 elsif scan(/"/)
|
Chris@0
|
112 tokens << ['"', :delimiter]
|
Chris@0
|
113 tokens << [:close, :string]
|
Chris@0
|
114 state = :initial
|
Chris@0
|
115 next
|
Chris@0
|
116 else
|
Chris@0
|
117 raise_inspect "else case \" reached; %p not handled." % peek(1),
|
Chris@0
|
118 tokens, state
|
Chris@0
|
119 end
|
Chris@0
|
120
|
Chris@0
|
121 else
|
Chris@0
|
122 raise "else case reached"
|
Chris@0
|
123 end
|
Chris@0
|
124
|
Chris@0
|
125 match ||= matched
|
Chris@0
|
126 if $CODERAY_DEBUG and not kind
|
Chris@0
|
127 raise_inspect 'Error token %p in line %d' %
|
Chris@0
|
128 [[match, kind], line], tokens
|
Chris@0
|
129 end
|
Chris@0
|
130 raise_inspect 'Empty token', tokens, state unless match
|
Chris@0
|
131
|
Chris@0
|
132 tokens << [match, kind]
|
Chris@0
|
133
|
Chris@0
|
134 end # until eos
|
Chris@0
|
135
|
Chris@0
|
136 if state == :string
|
Chris@0
|
137 tokens << [:close, :string]
|
Chris@0
|
138 end
|
Chris@0
|
139
|
Chris@0
|
140 tokens
|
Chris@0
|
141
|
Chris@0
|
142 end #scan_tokens
|
Chris@0
|
143 end #class
|
Chris@0
|
144 end #module scanners
|
Chris@0
|
145 end #module coderay |