annotate vendor/plugins/coderay-0.9.2/lib/coderay/scanners/cpp.rb @ 863:818ff422eece bug_168

Close obsolete branch bug_168
author Chris Cannam
date Tue, 07 Jun 2011 10:56:57 +0100
parents 513646585e45
children
rev   line source
Chris@0 1 module CodeRay
Chris@0 2 module Scanners
Chris@0 3
Chris@0 4 class CPlusPlus < Scanner
Chris@0 5
Chris@0 6 include Streamable
Chris@0 7
Chris@0 8 register_for :cpp
Chris@0 9 file_extension 'cpp'
Chris@0 10 title 'C++'
Chris@0 11
Chris@0 12 # http://www.cppreference.com/wiki/keywords/start
Chris@0 13 RESERVED_WORDS = [
Chris@0 14 'and', 'and_eq', 'asm', 'bitand', 'bitor', 'break',
Chris@0 15 'case', 'catch', 'class', 'compl', 'const_cast',
Chris@0 16 'continue', 'default', 'delete', 'do', 'dynamic_cast', 'else',
Chris@0 17 'enum', 'export', 'for', 'goto', 'if', 'namespace', 'new',
Chris@0 18 'not', 'not_eq', 'or', 'or_eq', 'reinterpret_cast', 'return',
Chris@0 19 'sizeof', 'static_cast', 'struct', 'switch', 'template',
Chris@0 20 'throw', 'try', 'typedef', 'typeid', 'typename', 'union',
Chris@0 21 'while', 'xor', 'xor_eq'
Chris@0 22 ]
Chris@0 23
Chris@0 24 PREDEFINED_TYPES = [
Chris@0 25 'bool', 'char', 'double', 'float', 'int', 'long',
Chris@0 26 'short', 'signed', 'unsigned', 'wchar_t', 'string'
Chris@0 27 ]
Chris@0 28 PREDEFINED_CONSTANTS = [
Chris@0 29 'false', 'true',
Chris@0 30 'EOF', 'NULL',
Chris@0 31 ]
Chris@0 32 PREDEFINED_VARIABLES = [
Chris@0 33 'this'
Chris@0 34 ]
Chris@0 35 DIRECTIVES = [
Chris@0 36 'auto', 'const', 'explicit', 'extern', 'friend', 'inline', 'mutable', 'operator',
Chris@0 37 'private', 'protected', 'public', 'register', 'static', 'using', 'virtual', 'void',
Chris@0 38 'volatile'
Chris@0 39 ]
Chris@0 40
Chris@0 41 IDENT_KIND = WordList.new(:ident).
Chris@0 42 add(RESERVED_WORDS, :reserved).
Chris@0 43 add(PREDEFINED_TYPES, :pre_type).
Chris@0 44 add(PREDEFINED_VARIABLES, :local_variable).
Chris@0 45 add(DIRECTIVES, :directive).
Chris@0 46 add(PREDEFINED_CONSTANTS, :pre_constant)
Chris@0 47
Chris@0 48 ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
Chris@0 49 UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
Chris@0 50
Chris@0 51 def scan_tokens tokens, options
Chris@0 52
Chris@0 53 state = :initial
Chris@0 54 label_expected = true
Chris@0 55 case_expected = false
Chris@0 56 label_expected_before_preproc_line = nil
Chris@0 57 in_preproc_line = false
Chris@0 58
Chris@0 59 until eos?
Chris@0 60
Chris@0 61 kind = nil
Chris@0 62 match = nil
Chris@0 63
Chris@0 64 case state
Chris@0 65
Chris@0 66 when :initial
Chris@0 67
Chris@0 68 if match = scan(/ \s+ | \\\n /x)
Chris@0 69 if in_preproc_line && match != "\\\n" && match.index(?\n)
Chris@0 70 in_preproc_line = false
Chris@0 71 label_expected = label_expected_before_preproc_line
Chris@0 72 end
Chris@0 73 tokens << [match, :space]
Chris@0 74 next
Chris@0 75
Chris@0 76 elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
Chris@0 77 kind = :comment
Chris@0 78
Chris@0 79 elsif match = scan(/ \# \s* if \s* 0 /x)
Chris@0 80 match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
Chris@0 81 kind = :comment
Chris@0 82
Chris@0 83 elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x)
Chris@0 84 label_expected = match =~ /[;\{\}]/
Chris@0 85 if case_expected
Chris@0 86 label_expected = true if match == ':'
Chris@0 87 case_expected = false
Chris@0 88 end
Chris@0 89 kind = :operator
Chris@0 90
Chris@0 91 elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
Chris@0 92 kind = IDENT_KIND[match]
Chris@0 93 if kind == :ident && label_expected && !in_preproc_line && scan(/:(?!:)/)
Chris@0 94 kind = :label
Chris@0 95 match << matched
Chris@0 96 else
Chris@0 97 label_expected = false
Chris@0 98 if kind == :reserved
Chris@0 99 case match
Chris@0 100 when 'class'
Chris@0 101 state = :class_name_expected
Chris@0 102 when 'case', 'default'
Chris@0 103 case_expected = true
Chris@0 104 end
Chris@0 105 end
Chris@0 106 end
Chris@0 107
Chris@0 108 elsif scan(/\$/)
Chris@0 109 kind = :ident
Chris@0 110
Chris@0 111 elsif match = scan(/L?"/)
Chris@0 112 tokens << [:open, :string]
Chris@0 113 if match[0] == ?L
Chris@0 114 tokens << ['L', :modifier]
Chris@0 115 match = '"'
Chris@0 116 end
Chris@0 117 state = :string
Chris@0 118 kind = :delimiter
Chris@0 119
Chris@0 120 elsif scan(/#[ \t]*(\w*)/)
Chris@0 121 kind = :preprocessor
Chris@0 122 in_preproc_line = true
Chris@0 123 label_expected_before_preproc_line = label_expected
Chris@0 124 state = :include_expected if self[1] == 'include'
Chris@0 125
Chris@0 126 elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
Chris@0 127 label_expected = false
Chris@0 128 kind = :char
Chris@0 129
Chris@0 130 elsif scan(/0[xX][0-9A-Fa-f]+/)
Chris@0 131 label_expected = false
Chris@0 132 kind = :hex
Chris@0 133
Chris@0 134 elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
Chris@0 135 label_expected = false
Chris@0 136 kind = :oct
Chris@0 137
Chris@0 138 elsif scan(/(?:\d+)(?![.eEfF])L?L?/)
Chris@0 139 label_expected = false
Chris@0 140 kind = :integer
Chris@0 141
Chris@0 142 elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
Chris@0 143 label_expected = false
Chris@0 144 kind = :float
Chris@0 145
Chris@0 146 else
Chris@0 147 getch
Chris@0 148 kind = :error
Chris@0 149
Chris@0 150 end
Chris@0 151
Chris@0 152 when :string
Chris@0 153 if scan(/[^\\"]+/)
Chris@0 154 kind = :content
Chris@0 155 elsif scan(/"/)
Chris@0 156 tokens << ['"', :delimiter]
Chris@0 157 tokens << [:close, :string]
Chris@0 158 state = :initial
Chris@0 159 label_expected = false
Chris@0 160 next
Chris@0 161 elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
Chris@0 162 kind = :char
Chris@0 163 elsif scan(/ \\ | $ /x)
Chris@0 164 tokens << [:close, :string]
Chris@0 165 kind = :error
Chris@0 166 state = :initial
Chris@0 167 label_expected = false
Chris@0 168 else
Chris@0 169 raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
Chris@0 170 end
Chris@0 171
Chris@0 172 when :include_expected
Chris@0 173 if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
Chris@0 174 kind = :include
Chris@0 175 state = :initial
Chris@0 176
Chris@0 177 elsif match = scan(/\s+/)
Chris@0 178 kind = :space
Chris@0 179 state = :initial if match.index ?\n
Chris@0 180
Chris@0 181 else
Chris@0 182 state = :initial
Chris@0 183 next
Chris@0 184
Chris@0 185 end
Chris@0 186
Chris@0 187 when :class_name_expected
Chris@0 188 if scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
Chris@0 189 kind = :class
Chris@0 190 state = :initial
Chris@0 191
Chris@0 192 elsif match = scan(/\s+/)
Chris@0 193 kind = :space
Chris@0 194
Chris@0 195 else
Chris@0 196 getch
Chris@0 197 kind = :error
Chris@0 198 state = :initial
Chris@0 199
Chris@0 200 end
Chris@0 201
Chris@0 202 else
Chris@0 203 raise_inspect 'Unknown state', tokens
Chris@0 204
Chris@0 205 end
Chris@0 206
Chris@0 207 match ||= matched
Chris@0 208 if $CODERAY_DEBUG and not kind
Chris@0 209 raise_inspect 'Error token %p in line %d' %
Chris@0 210 [[match, kind], line], tokens
Chris@0 211 end
Chris@0 212 raise_inspect 'Empty token', tokens unless match
Chris@0 213
Chris@0 214 tokens << [match, kind]
Chris@0 215
Chris@0 216 end
Chris@0 217
Chris@0 218 if state == :string
Chris@0 219 tokens << [:close, :string]
Chris@0 220 end
Chris@0 221
Chris@0 222 tokens
Chris@0 223 end
Chris@0 224
Chris@0 225 end
Chris@0 226
Chris@0 227 end
Chris@0 228 end