Chris@0
|
1 module CodeRay
|
Chris@0
|
2 module Scanners
|
Chris@0
|
3
|
Chris@0
|
4 class C < Scanner
|
Chris@0
|
5
|
Chris@0
|
6 include Streamable
|
Chris@0
|
7
|
Chris@0
|
8 register_for :c
|
Chris@0
|
9 file_extension 'c'
|
Chris@0
|
10
|
Chris@0
|
11 RESERVED_WORDS = [
|
Chris@0
|
12 'asm', 'break', 'case', 'continue', 'default', 'do',
|
Chris@0
|
13 'else', 'enum', 'for', 'goto', 'if', 'return',
|
Chris@0
|
14 'sizeof', 'struct', 'switch', 'typedef', 'union', 'while',
|
Chris@0
|
15 'restrict', # added in C99
|
Chris@0
|
16 ]
|
Chris@0
|
17
|
Chris@0
|
18 PREDEFINED_TYPES = [
|
Chris@0
|
19 'int', 'long', 'short', 'char',
|
Chris@0
|
20 'signed', 'unsigned', 'float', 'double',
|
Chris@0
|
21 'bool', 'complex', # added in C99
|
Chris@0
|
22 ]
|
Chris@0
|
23
|
Chris@0
|
24 PREDEFINED_CONSTANTS = [
|
Chris@0
|
25 'EOF', 'NULL',
|
Chris@0
|
26 'true', 'false', # added in C99
|
Chris@0
|
27 ]
|
Chris@0
|
28 DIRECTIVES = [
|
Chris@0
|
29 'auto', 'extern', 'register', 'static', 'void',
|
Chris@0
|
30 'const', 'volatile', # added in C89
|
Chris@0
|
31 'inline', # added in C99
|
Chris@0
|
32 ]
|
Chris@0
|
33
|
Chris@0
|
34 IDENT_KIND = WordList.new(:ident).
|
Chris@0
|
35 add(RESERVED_WORDS, :reserved).
|
Chris@0
|
36 add(PREDEFINED_TYPES, :pre_type).
|
Chris@0
|
37 add(DIRECTIVES, :directive).
|
Chris@0
|
38 add(PREDEFINED_CONSTANTS, :pre_constant)
|
Chris@0
|
39
|
Chris@0
|
40 ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
|
Chris@0
|
41 UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
|
Chris@0
|
42
|
Chris@0
|
43 def scan_tokens tokens, options
|
Chris@0
|
44
|
Chris@0
|
45 state = :initial
|
Chris@0
|
46 label_expected = true
|
Chris@0
|
47 case_expected = false
|
Chris@0
|
48 label_expected_before_preproc_line = nil
|
Chris@0
|
49 in_preproc_line = false
|
Chris@0
|
50
|
Chris@0
|
51 until eos?
|
Chris@0
|
52
|
Chris@0
|
53 kind = nil
|
Chris@0
|
54 match = nil
|
Chris@0
|
55
|
Chris@0
|
56 case state
|
Chris@0
|
57
|
Chris@0
|
58 when :initial
|
Chris@0
|
59
|
Chris@0
|
60 if match = scan(/ \s+ | \\\n /x)
|
Chris@0
|
61 if in_preproc_line && match != "\\\n" && match.index(?\n)
|
Chris@0
|
62 in_preproc_line = false
|
Chris@0
|
63 label_expected = label_expected_before_preproc_line
|
Chris@0
|
64 end
|
Chris@0
|
65 tokens << [match, :space]
|
Chris@0
|
66 next
|
Chris@0
|
67
|
Chris@0
|
68 elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
|
Chris@0
|
69 kind = :comment
|
Chris@0
|
70
|
Chris@0
|
71 elsif match = scan(/ \# \s* if \s* 0 /x)
|
Chris@0
|
72 match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
|
Chris@0
|
73 kind = :comment
|
Chris@0
|
74
|
Chris@0
|
75 elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x)
|
Chris@0
|
76 label_expected = match =~ /[;\{\}]/
|
Chris@0
|
77 if case_expected
|
Chris@0
|
78 label_expected = true if match == ':'
|
Chris@0
|
79 case_expected = false
|
Chris@0
|
80 end
|
Chris@0
|
81 kind = :operator
|
Chris@0
|
82
|
Chris@0
|
83 elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
|
Chris@0
|
84 kind = IDENT_KIND[match]
|
Chris@0
|
85 if kind == :ident && label_expected && !in_preproc_line && scan(/:(?!:)/)
|
Chris@0
|
86 kind = :label
|
Chris@0
|
87 match << matched
|
Chris@0
|
88 else
|
Chris@0
|
89 label_expected = false
|
Chris@0
|
90 if kind == :reserved
|
Chris@0
|
91 case match
|
Chris@0
|
92 when 'case', 'default'
|
Chris@0
|
93 case_expected = true
|
Chris@0
|
94 end
|
Chris@0
|
95 end
|
Chris@0
|
96 end
|
Chris@0
|
97
|
Chris@0
|
98 elsif scan(/\$/)
|
Chris@0
|
99 kind = :ident
|
Chris@0
|
100
|
Chris@0
|
101 elsif match = scan(/L?"/)
|
Chris@0
|
102 tokens << [:open, :string]
|
Chris@0
|
103 if match[0] == ?L
|
Chris@0
|
104 tokens << ['L', :modifier]
|
Chris@0
|
105 match = '"'
|
Chris@0
|
106 end
|
Chris@0
|
107 state = :string
|
Chris@0
|
108 kind = :delimiter
|
Chris@0
|
109
|
Chris@0
|
110 elsif scan(/#[ \t]*(\w*)/)
|
Chris@0
|
111 kind = :preprocessor
|
Chris@0
|
112 in_preproc_line = true
|
Chris@0
|
113 label_expected_before_preproc_line = label_expected
|
Chris@0
|
114 state = :include_expected if self[1] == 'include'
|
Chris@0
|
115
|
Chris@0
|
116 elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
|
Chris@0
|
117 label_expected = false
|
Chris@0
|
118 kind = :char
|
Chris@0
|
119
|
Chris@0
|
120 elsif scan(/0[xX][0-9A-Fa-f]+/)
|
Chris@0
|
121 label_expected = false
|
Chris@0
|
122 kind = :hex
|
Chris@0
|
123
|
Chris@0
|
124 elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
|
Chris@0
|
125 label_expected = false
|
Chris@0
|
126 kind = :oct
|
Chris@0
|
127
|
Chris@0
|
128 elsif scan(/(?:\d+)(?![.eEfF])L?L?/)
|
Chris@0
|
129 label_expected = false
|
Chris@0
|
130 kind = :integer
|
Chris@0
|
131
|
Chris@0
|
132 elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
|
Chris@0
|
133 label_expected = false
|
Chris@0
|
134 kind = :float
|
Chris@0
|
135
|
Chris@0
|
136 else
|
Chris@0
|
137 getch
|
Chris@0
|
138 kind = :error
|
Chris@0
|
139
|
Chris@0
|
140 end
|
Chris@0
|
141
|
Chris@0
|
142 when :string
|
Chris@0
|
143 if scan(/[^\\\n"]+/)
|
Chris@0
|
144 kind = :content
|
Chris@0
|
145 elsif scan(/"/)
|
Chris@0
|
146 tokens << ['"', :delimiter]
|
Chris@0
|
147 tokens << [:close, :string]
|
Chris@0
|
148 state = :initial
|
Chris@0
|
149 label_expected = false
|
Chris@0
|
150 next
|
Chris@0
|
151 elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
|
Chris@0
|
152 kind = :char
|
Chris@0
|
153 elsif scan(/ \\ | $ /x)
|
Chris@0
|
154 tokens << [:close, :string]
|
Chris@0
|
155 kind = :error
|
Chris@0
|
156 state = :initial
|
Chris@0
|
157 label_expected = false
|
Chris@0
|
158 else
|
Chris@0
|
159 raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
|
Chris@0
|
160 end
|
Chris@0
|
161
|
Chris@0
|
162 when :include_expected
|
Chris@0
|
163 if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
|
Chris@0
|
164 kind = :include
|
Chris@0
|
165 state = :initial
|
Chris@0
|
166
|
Chris@0
|
167 elsif match = scan(/\s+/)
|
Chris@0
|
168 kind = :space
|
Chris@0
|
169 state = :initial if match.index ?\n
|
Chris@0
|
170
|
Chris@0
|
171 else
|
Chris@0
|
172 state = :initial
|
Chris@0
|
173 next
|
Chris@0
|
174
|
Chris@0
|
175 end
|
Chris@0
|
176
|
Chris@0
|
177 else
|
Chris@0
|
178 raise_inspect 'Unknown state', tokens
|
Chris@0
|
179
|
Chris@0
|
180 end
|
Chris@0
|
181
|
Chris@0
|
182 match ||= matched
|
Chris@0
|
183 if $CODERAY_DEBUG and not kind
|
Chris@0
|
184 raise_inspect 'Error token %p in line %d' %
|
Chris@0
|
185 [[match, kind], line], tokens
|
Chris@0
|
186 end
|
Chris@0
|
187 raise_inspect 'Empty token', tokens unless match
|
Chris@0
|
188
|
Chris@0
|
189 tokens << [match, kind]
|
Chris@0
|
190
|
Chris@0
|
191 end
|
Chris@0
|
192
|
Chris@0
|
193 if state == :string
|
Chris@0
|
194 tokens << [:close, :string]
|
Chris@0
|
195 end
|
Chris@0
|
196
|
Chris@0
|
197 tokens
|
Chris@0
|
198 end
|
Chris@0
|
199
|
Chris@0
|
200 end
|
Chris@0
|
201
|
Chris@0
|
202 end
|
Chris@0
|
203 end
|