Chris@0
|
1 module CodeRay
|
Chris@0
|
2 module Scanners
|
Chris@0
|
3
|
Chris@0
|
4 class CPlusPlus < Scanner
|
Chris@0
|
5
|
Chris@0
|
6 include Streamable
|
Chris@0
|
7
|
Chris@0
|
8 register_for :cpp
|
Chris@0
|
9 file_extension 'cpp'
|
Chris@0
|
10 title 'C++'
|
Chris@0
|
11
|
Chris@0
|
12 # http://www.cppreference.com/wiki/keywords/start
|
Chris@0
|
13 RESERVED_WORDS = [
|
Chris@0
|
14 'and', 'and_eq', 'asm', 'bitand', 'bitor', 'break',
|
Chris@0
|
15 'case', 'catch', 'class', 'compl', 'const_cast',
|
Chris@0
|
16 'continue', 'default', 'delete', 'do', 'dynamic_cast', 'else',
|
Chris@0
|
17 'enum', 'export', 'for', 'goto', 'if', 'namespace', 'new',
|
Chris@0
|
18 'not', 'not_eq', 'or', 'or_eq', 'reinterpret_cast', 'return',
|
Chris@0
|
19 'sizeof', 'static_cast', 'struct', 'switch', 'template',
|
Chris@0
|
20 'throw', 'try', 'typedef', 'typeid', 'typename', 'union',
|
Chris@0
|
21 'while', 'xor', 'xor_eq'
|
Chris@0
|
22 ]
|
Chris@0
|
23
|
Chris@0
|
24 PREDEFINED_TYPES = [
|
Chris@0
|
25 'bool', 'char', 'double', 'float', 'int', 'long',
|
Chris@0
|
26 'short', 'signed', 'unsigned', 'wchar_t', 'string'
|
Chris@0
|
27 ]
|
Chris@0
|
28 PREDEFINED_CONSTANTS = [
|
Chris@0
|
29 'false', 'true',
|
Chris@0
|
30 'EOF', 'NULL',
|
Chris@0
|
31 ]
|
Chris@0
|
32 PREDEFINED_VARIABLES = [
|
Chris@0
|
33 'this'
|
Chris@0
|
34 ]
|
Chris@0
|
35 DIRECTIVES = [
|
Chris@0
|
36 'auto', 'const', 'explicit', 'extern', 'friend', 'inline', 'mutable', 'operator',
|
Chris@0
|
37 'private', 'protected', 'public', 'register', 'static', 'using', 'virtual', 'void',
|
Chris@0
|
38 'volatile'
|
Chris@0
|
39 ]
|
Chris@0
|
40
|
Chris@0
|
41 IDENT_KIND = WordList.new(:ident).
|
Chris@0
|
42 add(RESERVED_WORDS, :reserved).
|
Chris@0
|
43 add(PREDEFINED_TYPES, :pre_type).
|
Chris@0
|
44 add(PREDEFINED_VARIABLES, :local_variable).
|
Chris@0
|
45 add(DIRECTIVES, :directive).
|
Chris@0
|
46 add(PREDEFINED_CONSTANTS, :pre_constant)
|
Chris@0
|
47
|
Chris@0
|
48 ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
|
Chris@0
|
49 UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
|
Chris@0
|
50
|
Chris@0
|
51 def scan_tokens tokens, options
|
Chris@0
|
52
|
Chris@0
|
53 state = :initial
|
Chris@0
|
54 label_expected = true
|
Chris@0
|
55 case_expected = false
|
Chris@0
|
56 label_expected_before_preproc_line = nil
|
Chris@0
|
57 in_preproc_line = false
|
Chris@0
|
58
|
Chris@0
|
59 until eos?
|
Chris@0
|
60
|
Chris@0
|
61 kind = nil
|
Chris@0
|
62 match = nil
|
Chris@0
|
63
|
Chris@0
|
64 case state
|
Chris@0
|
65
|
Chris@0
|
66 when :initial
|
Chris@0
|
67
|
Chris@0
|
68 if match = scan(/ \s+ | \\\n /x)
|
Chris@0
|
69 if in_preproc_line && match != "\\\n" && match.index(?\n)
|
Chris@0
|
70 in_preproc_line = false
|
Chris@0
|
71 label_expected = label_expected_before_preproc_line
|
Chris@0
|
72 end
|
Chris@0
|
73 tokens << [match, :space]
|
Chris@0
|
74 next
|
Chris@0
|
75
|
Chris@0
|
76 elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
|
Chris@0
|
77 kind = :comment
|
Chris@0
|
78
|
Chris@0
|
79 elsif match = scan(/ \# \s* if \s* 0 /x)
|
Chris@0
|
80 match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
|
Chris@0
|
81 kind = :comment
|
Chris@0
|
82
|
Chris@0
|
83 elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x)
|
Chris@0
|
84 label_expected = match =~ /[;\{\}]/
|
Chris@0
|
85 if case_expected
|
Chris@0
|
86 label_expected = true if match == ':'
|
Chris@0
|
87 case_expected = false
|
Chris@0
|
88 end
|
Chris@0
|
89 kind = :operator
|
Chris@0
|
90
|
Chris@0
|
91 elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
|
Chris@0
|
92 kind = IDENT_KIND[match]
|
Chris@0
|
93 if kind == :ident && label_expected && !in_preproc_line && scan(/:(?!:)/)
|
Chris@0
|
94 kind = :label
|
Chris@0
|
95 match << matched
|
Chris@0
|
96 else
|
Chris@0
|
97 label_expected = false
|
Chris@0
|
98 if kind == :reserved
|
Chris@0
|
99 case match
|
Chris@0
|
100 when 'class'
|
Chris@0
|
101 state = :class_name_expected
|
Chris@0
|
102 when 'case', 'default'
|
Chris@0
|
103 case_expected = true
|
Chris@0
|
104 end
|
Chris@0
|
105 end
|
Chris@0
|
106 end
|
Chris@0
|
107
|
Chris@0
|
108 elsif scan(/\$/)
|
Chris@0
|
109 kind = :ident
|
Chris@0
|
110
|
Chris@0
|
111 elsif match = scan(/L?"/)
|
Chris@0
|
112 tokens << [:open, :string]
|
Chris@0
|
113 if match[0] == ?L
|
Chris@0
|
114 tokens << ['L', :modifier]
|
Chris@0
|
115 match = '"'
|
Chris@0
|
116 end
|
Chris@0
|
117 state = :string
|
Chris@0
|
118 kind = :delimiter
|
Chris@0
|
119
|
Chris@0
|
120 elsif scan(/#[ \t]*(\w*)/)
|
Chris@0
|
121 kind = :preprocessor
|
Chris@0
|
122 in_preproc_line = true
|
Chris@0
|
123 label_expected_before_preproc_line = label_expected
|
Chris@0
|
124 state = :include_expected if self[1] == 'include'
|
Chris@0
|
125
|
Chris@0
|
126 elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
|
Chris@0
|
127 label_expected = false
|
Chris@0
|
128 kind = :char
|
Chris@0
|
129
|
Chris@0
|
130 elsif scan(/0[xX][0-9A-Fa-f]+/)
|
Chris@0
|
131 label_expected = false
|
Chris@0
|
132 kind = :hex
|
Chris@0
|
133
|
Chris@0
|
134 elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
|
Chris@0
|
135 label_expected = false
|
Chris@0
|
136 kind = :oct
|
Chris@0
|
137
|
Chris@0
|
138 elsif scan(/(?:\d+)(?![.eEfF])L?L?/)
|
Chris@0
|
139 label_expected = false
|
Chris@0
|
140 kind = :integer
|
Chris@0
|
141
|
Chris@0
|
142 elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
|
Chris@0
|
143 label_expected = false
|
Chris@0
|
144 kind = :float
|
Chris@0
|
145
|
Chris@0
|
146 else
|
Chris@0
|
147 getch
|
Chris@0
|
148 kind = :error
|
Chris@0
|
149
|
Chris@0
|
150 end
|
Chris@0
|
151
|
Chris@0
|
152 when :string
|
Chris@0
|
153 if scan(/[^\\"]+/)
|
Chris@0
|
154 kind = :content
|
Chris@0
|
155 elsif scan(/"/)
|
Chris@0
|
156 tokens << ['"', :delimiter]
|
Chris@0
|
157 tokens << [:close, :string]
|
Chris@0
|
158 state = :initial
|
Chris@0
|
159 label_expected = false
|
Chris@0
|
160 next
|
Chris@0
|
161 elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
|
Chris@0
|
162 kind = :char
|
Chris@0
|
163 elsif scan(/ \\ | $ /x)
|
Chris@0
|
164 tokens << [:close, :string]
|
Chris@0
|
165 kind = :error
|
Chris@0
|
166 state = :initial
|
Chris@0
|
167 label_expected = false
|
Chris@0
|
168 else
|
Chris@0
|
169 raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
|
Chris@0
|
170 end
|
Chris@0
|
171
|
Chris@0
|
172 when :include_expected
|
Chris@0
|
173 if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
|
Chris@0
|
174 kind = :include
|
Chris@0
|
175 state = :initial
|
Chris@0
|
176
|
Chris@0
|
177 elsif match = scan(/\s+/)
|
Chris@0
|
178 kind = :space
|
Chris@0
|
179 state = :initial if match.index ?\n
|
Chris@0
|
180
|
Chris@0
|
181 else
|
Chris@0
|
182 state = :initial
|
Chris@0
|
183 next
|
Chris@0
|
184
|
Chris@0
|
185 end
|
Chris@0
|
186
|
Chris@0
|
187 when :class_name_expected
|
Chris@0
|
188 if scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
|
Chris@0
|
189 kind = :class
|
Chris@0
|
190 state = :initial
|
Chris@0
|
191
|
Chris@0
|
192 elsif match = scan(/\s+/)
|
Chris@0
|
193 kind = :space
|
Chris@0
|
194
|
Chris@0
|
195 else
|
Chris@0
|
196 getch
|
Chris@0
|
197 kind = :error
|
Chris@0
|
198 state = :initial
|
Chris@0
|
199
|
Chris@0
|
200 end
|
Chris@0
|
201
|
Chris@0
|
202 else
|
Chris@0
|
203 raise_inspect 'Unknown state', tokens
|
Chris@0
|
204
|
Chris@0
|
205 end
|
Chris@0
|
206
|
Chris@0
|
207 match ||= matched
|
Chris@0
|
208 if $CODERAY_DEBUG and not kind
|
Chris@0
|
209 raise_inspect 'Error token %p in line %d' %
|
Chris@0
|
210 [[match, kind], line], tokens
|
Chris@0
|
211 end
|
Chris@0
|
212 raise_inspect 'Empty token', tokens unless match
|
Chris@0
|
213
|
Chris@0
|
214 tokens << [match, kind]
|
Chris@0
|
215
|
Chris@0
|
216 end
|
Chris@0
|
217
|
Chris@0
|
218 if state == :string
|
Chris@0
|
219 tokens << [:close, :string]
|
Chris@0
|
220 end
|
Chris@0
|
221
|
Chris@0
|
222 tokens
|
Chris@0
|
223 end
|
Chris@0
|
224
|
Chris@0
|
225 end
|
Chris@0
|
226
|
Chris@0
|
227 end
|
Chris@0
|
228 end
|