Chris@909
|
1 module CodeRay
|
Chris@909
|
2 module Scanners
|
Chris@909
|
3
|
Chris@909
|
4 # Scanner for C++.
|
Chris@909
|
5 #
|
Chris@909
|
6 # Aliases: +cplusplus+, c++
|
Chris@909
|
7 class CPlusPlus < Scanner
|
Chris@909
|
8
|
Chris@909
|
9 register_for :cpp
|
Chris@909
|
10 file_extension 'cpp'
|
Chris@909
|
11 title 'C++'
|
Chris@909
|
12
|
Chris@909
|
13 #-- http://www.cppreference.com/wiki/keywords/start
|
Chris@909
|
14 KEYWORDS = [
|
Chris@909
|
15 'and', 'and_eq', 'asm', 'bitand', 'bitor', 'break',
|
Chris@909
|
16 'case', 'catch', 'class', 'compl', 'const_cast',
|
Chris@909
|
17 'continue', 'default', 'delete', 'do', 'dynamic_cast', 'else',
|
Chris@909
|
18 'enum', 'export', 'for', 'goto', 'if', 'namespace', 'new',
|
Chris@909
|
19 'not', 'not_eq', 'or', 'or_eq', 'reinterpret_cast', 'return',
|
Chris@909
|
20 'sizeof', 'static_cast', 'struct', 'switch', 'template',
|
Chris@909
|
21 'throw', 'try', 'typedef', 'typeid', 'typename', 'union',
|
Chris@909
|
22 'while', 'xor', 'xor_eq',
|
Chris@909
|
23 ] # :nodoc:
|
Chris@909
|
24
|
Chris@909
|
25 PREDEFINED_TYPES = [
|
Chris@909
|
26 'bool', 'char', 'double', 'float', 'int', 'long',
|
Chris@909
|
27 'short', 'signed', 'unsigned', 'wchar_t', 'string',
|
Chris@909
|
28 ] # :nodoc:
|
Chris@909
|
29 PREDEFINED_CONSTANTS = [
|
Chris@909
|
30 'false', 'true',
|
Chris@909
|
31 'EOF', 'NULL',
|
Chris@909
|
32 ] # :nodoc:
|
Chris@909
|
33 PREDEFINED_VARIABLES = [
|
Chris@909
|
34 'this',
|
Chris@909
|
35 ] # :nodoc:
|
Chris@909
|
36 DIRECTIVES = [
|
Chris@909
|
37 'auto', 'const', 'explicit', 'extern', 'friend', 'inline', 'mutable', 'operator',
|
Chris@909
|
38 'private', 'protected', 'public', 'register', 'static', 'using', 'virtual', 'void',
|
Chris@909
|
39 'volatile',
|
Chris@909
|
40 ] # :nodoc:
|
Chris@909
|
41
|
Chris@909
|
42 IDENT_KIND = WordList.new(:ident).
|
Chris@909
|
43 add(KEYWORDS, :keyword).
|
Chris@909
|
44 add(PREDEFINED_TYPES, :predefined_type).
|
Chris@909
|
45 add(PREDEFINED_VARIABLES, :local_variable).
|
Chris@909
|
46 add(DIRECTIVES, :directive).
|
Chris@909
|
47 add(PREDEFINED_CONSTANTS, :predefined_constant) # :nodoc:
|
Chris@909
|
48
|
Chris@909
|
49 ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
|
Chris@909
|
50 UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc:
|
Chris@909
|
51
|
Chris@909
|
52 protected
|
Chris@909
|
53
|
Chris@909
|
54 def scan_tokens encoder, options
|
Chris@909
|
55
|
Chris@909
|
56 state = :initial
|
Chris@909
|
57 label_expected = true
|
Chris@909
|
58 case_expected = false
|
Chris@909
|
59 label_expected_before_preproc_line = nil
|
Chris@909
|
60 in_preproc_line = false
|
Chris@909
|
61
|
Chris@909
|
62 until eos?
|
Chris@909
|
63
|
Chris@909
|
64 case state
|
Chris@909
|
65
|
Chris@909
|
66 when :initial
|
Chris@909
|
67
|
Chris@909
|
68 if match = scan(/ \s+ | \\\n /x)
|
Chris@909
|
69 if in_preproc_line && match != "\\\n" && match.index(?\n)
|
Chris@909
|
70 in_preproc_line = false
|
Chris@909
|
71 label_expected = label_expected_before_preproc_line
|
Chris@909
|
72 end
|
Chris@909
|
73 encoder.text_token match, :space
|
Chris@909
|
74
|
Chris@909
|
75 elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
|
Chris@909
|
76 encoder.text_token match, :comment
|
Chris@909
|
77
|
Chris@909
|
78 elsif match = scan(/ \# \s* if \s* 0 /x)
|
Chris@909
|
79 match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
|
Chris@909
|
80 encoder.text_token match, :comment
|
Chris@909
|
81
|
Chris@909
|
82 elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x)
|
Chris@909
|
83 label_expected = match =~ /[;\{\}]/
|
Chris@909
|
84 if case_expected
|
Chris@909
|
85 label_expected = true if match == ':'
|
Chris@909
|
86 case_expected = false
|
Chris@909
|
87 end
|
Chris@909
|
88 encoder.text_token match, :operator
|
Chris@909
|
89
|
Chris@909
|
90 elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
|
Chris@909
|
91 kind = IDENT_KIND[match]
|
Chris@909
|
92 if kind == :ident && label_expected && !in_preproc_line && scan(/:(?!:)/)
|
Chris@909
|
93 kind = :label
|
Chris@909
|
94 match << matched
|
Chris@909
|
95 else
|
Chris@909
|
96 label_expected = false
|
Chris@909
|
97 if kind == :keyword
|
Chris@909
|
98 case match
|
Chris@909
|
99 when 'class'
|
Chris@909
|
100 state = :class_name_expected
|
Chris@909
|
101 when 'case', 'default'
|
Chris@909
|
102 case_expected = true
|
Chris@909
|
103 end
|
Chris@909
|
104 end
|
Chris@909
|
105 end
|
Chris@909
|
106 encoder.text_token match, kind
|
Chris@909
|
107
|
Chris@909
|
108 elsif match = scan(/\$/)
|
Chris@909
|
109 encoder.text_token match, :ident
|
Chris@909
|
110
|
Chris@909
|
111 elsif match = scan(/L?"/)
|
Chris@909
|
112 encoder.begin_group :string
|
Chris@909
|
113 if match[0] == ?L
|
Chris@909
|
114 encoder.text_token match, 'L', :modifier
|
Chris@909
|
115 match = '"'
|
Chris@909
|
116 end
|
Chris@909
|
117 state = :string
|
Chris@909
|
118 encoder.text_token match, :delimiter
|
Chris@909
|
119
|
Chris@909
|
120 elsif match = scan(/#[ \t]*(\w*)/)
|
Chris@909
|
121 encoder.text_token match, :preprocessor
|
Chris@909
|
122 in_preproc_line = true
|
Chris@909
|
123 label_expected_before_preproc_line = label_expected
|
Chris@909
|
124 state = :include_expected if self[1] == 'include'
|
Chris@909
|
125
|
Chris@909
|
126 elsif match = scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
|
Chris@909
|
127 label_expected = false
|
Chris@909
|
128 encoder.text_token match, :char
|
Chris@909
|
129
|
Chris@909
|
130 elsif match = scan(/0[xX][0-9A-Fa-f]+/)
|
Chris@909
|
131 label_expected = false
|
Chris@909
|
132 encoder.text_token match, :hex
|
Chris@909
|
133
|
Chris@909
|
134 elsif match = scan(/(?:0[0-7]+)(?![89.eEfF])/)
|
Chris@909
|
135 label_expected = false
|
Chris@909
|
136 encoder.text_token match, :octal
|
Chris@909
|
137
|
Chris@909
|
138 elsif match = scan(/(?:\d+)(?![.eEfF])L?L?/)
|
Chris@909
|
139 label_expected = false
|
Chris@909
|
140 encoder.text_token match, :integer
|
Chris@909
|
141
|
Chris@909
|
142 elsif match = scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
|
Chris@909
|
143 label_expected = false
|
Chris@909
|
144 encoder.text_token match, :float
|
Chris@909
|
145
|
Chris@909
|
146 else
|
Chris@909
|
147 encoder.text_token getch, :error
|
Chris@909
|
148
|
Chris@909
|
149 end
|
Chris@909
|
150
|
Chris@909
|
151 when :string
|
Chris@909
|
152 if match = scan(/[^\\"]+/)
|
Chris@909
|
153 encoder.text_token match, :content
|
Chris@909
|
154 elsif match = scan(/"/)
|
Chris@909
|
155 encoder.text_token match, :delimiter
|
Chris@909
|
156 encoder.end_group :string
|
Chris@909
|
157 state = :initial
|
Chris@909
|
158 label_expected = false
|
Chris@909
|
159 elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
|
Chris@909
|
160 encoder.text_token match, :char
|
Chris@909
|
161 elsif match = scan(/ \\ | $ /x)
|
Chris@909
|
162 encoder.end_group :string
|
Chris@909
|
163 encoder.text_token match, :error
|
Chris@909
|
164 state = :initial
|
Chris@909
|
165 label_expected = false
|
Chris@909
|
166 else
|
Chris@909
|
167 raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
|
Chris@909
|
168 end
|
Chris@909
|
169
|
Chris@909
|
170 when :include_expected
|
Chris@909
|
171 if match = scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
|
Chris@909
|
172 encoder.text_token match, :include
|
Chris@909
|
173 state = :initial
|
Chris@909
|
174
|
Chris@909
|
175 elsif match = scan(/\s+/)
|
Chris@909
|
176 encoder.text_token match, :space
|
Chris@909
|
177 state = :initial if match.index ?\n
|
Chris@909
|
178
|
Chris@909
|
179 else
|
Chris@909
|
180 state = :initial
|
Chris@909
|
181
|
Chris@909
|
182 end
|
Chris@909
|
183
|
Chris@909
|
184 when :class_name_expected
|
Chris@909
|
185 if match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
|
Chris@909
|
186 encoder.text_token match, :class
|
Chris@909
|
187 state = :initial
|
Chris@909
|
188
|
Chris@909
|
189 elsif match = scan(/\s+/)
|
Chris@909
|
190 encoder.text_token match, :space
|
Chris@909
|
191
|
Chris@909
|
192 else
|
Chris@909
|
193 encoder.text_token getch, :error
|
Chris@909
|
194 state = :initial
|
Chris@909
|
195
|
Chris@909
|
196 end
|
Chris@909
|
197
|
Chris@909
|
198 else
|
Chris@909
|
199 raise_inspect 'Unknown state', encoder
|
Chris@909
|
200
|
Chris@909
|
201 end
|
Chris@909
|
202
|
Chris@909
|
203 end
|
Chris@909
|
204
|
Chris@909
|
205 if state == :string
|
Chris@909
|
206 encoder.end_group :string
|
Chris@909
|
207 end
|
Chris@909
|
208
|
Chris@909
|
209 encoder
|
Chris@909
|
210 end
|
Chris@909
|
211
|
Chris@909
|
212 end
|
Chris@909
|
213
|
Chris@909
|
214 end
|
Chris@909
|
215 end
|