Chris@0: module CodeRay Chris@0: module Scanners Chris@0: Chris@0: class CPlusPlus < Scanner Chris@0: Chris@0: include Streamable Chris@0: Chris@0: register_for :cpp Chris@0: file_extension 'cpp' Chris@0: title 'C++' Chris@0: Chris@0: # http://www.cppreference.com/wiki/keywords/start Chris@0: RESERVED_WORDS = [ Chris@0: 'and', 'and_eq', 'asm', 'bitand', 'bitor', 'break', Chris@0: 'case', 'catch', 'class', 'compl', 'const_cast', Chris@0: 'continue', 'default', 'delete', 'do', 'dynamic_cast', 'else', Chris@0: 'enum', 'export', 'for', 'goto', 'if', 'namespace', 'new', Chris@0: 'not', 'not_eq', 'or', 'or_eq', 'reinterpret_cast', 'return', Chris@0: 'sizeof', 'static_cast', 'struct', 'switch', 'template', Chris@0: 'throw', 'try', 'typedef', 'typeid', 'typename', 'union', Chris@0: 'while', 'xor', 'xor_eq' Chris@0: ] Chris@0: Chris@0: PREDEFINED_TYPES = [ Chris@0: 'bool', 'char', 'double', 'float', 'int', 'long', Chris@0: 'short', 'signed', 'unsigned', 'wchar_t', 'string' Chris@0: ] Chris@0: PREDEFINED_CONSTANTS = [ Chris@0: 'false', 'true', Chris@0: 'EOF', 'NULL', Chris@0: ] Chris@0: PREDEFINED_VARIABLES = [ Chris@0: 'this' Chris@0: ] Chris@0: DIRECTIVES = [ Chris@0: 'auto', 'const', 'explicit', 'extern', 'friend', 'inline', 'mutable', 'operator', Chris@0: 'private', 'protected', 'public', 'register', 'static', 'using', 'virtual', 'void', Chris@0: 'volatile' Chris@0: ] Chris@0: Chris@0: IDENT_KIND = WordList.new(:ident). Chris@0: add(RESERVED_WORDS, :reserved). Chris@0: add(PREDEFINED_TYPES, :pre_type). Chris@0: add(PREDEFINED_VARIABLES, :local_variable). Chris@0: add(DIRECTIVES, :directive). Chris@0: add(PREDEFINED_CONSTANTS, :pre_constant) Chris@0: Chris@0: ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x Chris@0: UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x Chris@0: Chris@0: def scan_tokens tokens, options Chris@0: Chris@0: state = :initial Chris@0: label_expected = true Chris@0: case_expected = false Chris@0: label_expected_before_preproc_line = nil Chris@0: in_preproc_line = false Chris@0: Chris@0: until eos? Chris@0: Chris@0: kind = nil Chris@0: match = nil Chris@0: Chris@0: case state Chris@0: Chris@0: when :initial Chris@0: Chris@0: if match = scan(/ \s+ | \\\n /x) Chris@0: if in_preproc_line && match != "\\\n" && match.index(?\n) Chris@0: in_preproc_line = false Chris@0: label_expected = label_expected_before_preproc_line Chris@0: end Chris@0: tokens << [match, :space] Chris@0: next Chris@0: Chris@0: elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) Chris@0: kind = :comment Chris@0: Chris@0: elsif match = scan(/ \# \s* if \s* 0 /x) Chris@0: match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos? Chris@0: kind = :comment Chris@0: Chris@0: elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x) Chris@0: label_expected = match =~ /[;\{\}]/ Chris@0: if case_expected Chris@0: label_expected = true if match == ':' Chris@0: case_expected = false Chris@0: end Chris@0: kind = :operator Chris@0: Chris@0: elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) Chris@0: kind = IDENT_KIND[match] Chris@0: if kind == :ident && label_expected && !in_preproc_line && scan(/:(?!:)/) Chris@0: kind = :label Chris@0: match << matched Chris@0: else Chris@0: label_expected = false Chris@0: if kind == :reserved Chris@0: case match Chris@0: when 'class' Chris@0: state = :class_name_expected Chris@0: when 'case', 'default' Chris@0: case_expected = true Chris@0: end Chris@0: end Chris@0: end Chris@0: Chris@0: elsif scan(/\$/) Chris@0: kind = :ident Chris@0: Chris@0: elsif match = scan(/L?"/) Chris@0: tokens << [:open, :string] Chris@0: if match[0] == ?L Chris@0: tokens << ['L', :modifier] Chris@0: match = '"' Chris@0: end Chris@0: state = :string Chris@0: kind = :delimiter Chris@0: Chris@0: elsif scan(/#[ \t]*(\w*)/) Chris@0: kind = :preprocessor Chris@0: in_preproc_line = true Chris@0: label_expected_before_preproc_line = label_expected Chris@0: state = :include_expected if self[1] == 'include' Chris@0: Chris@0: elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox) Chris@0: label_expected = false Chris@0: kind = :char Chris@0: Chris@0: elsif scan(/0[xX][0-9A-Fa-f]+/) Chris@0: label_expected = false Chris@0: kind = :hex Chris@0: Chris@0: elsif scan(/(?:0[0-7]+)(?![89.eEfF])/) Chris@0: label_expected = false Chris@0: kind = :oct Chris@0: Chris@0: elsif scan(/(?:\d+)(?![.eEfF])L?L?/) Chris@0: label_expected = false Chris@0: kind = :integer Chris@0: Chris@0: elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) Chris@0: label_expected = false Chris@0: kind = :float Chris@0: Chris@0: else Chris@0: getch Chris@0: kind = :error Chris@0: Chris@0: end Chris@0: Chris@0: when :string Chris@0: if scan(/[^\\"]+/) Chris@0: kind = :content Chris@0: elsif scan(/"/) Chris@0: tokens << ['"', :delimiter] Chris@0: tokens << [:close, :string] Chris@0: state = :initial Chris@0: label_expected = false Chris@0: next Chris@0: elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) Chris@0: kind = :char Chris@0: elsif scan(/ \\ | $ /x) Chris@0: tokens << [:close, :string] Chris@0: kind = :error Chris@0: state = :initial Chris@0: label_expected = false Chris@0: else Chris@0: raise_inspect "else case \" reached; %p not handled." % peek(1), tokens Chris@0: end Chris@0: Chris@0: when :include_expected Chris@0: if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/) Chris@0: kind = :include Chris@0: state = :initial Chris@0: Chris@0: elsif match = scan(/\s+/) Chris@0: kind = :space Chris@0: state = :initial if match.index ?\n Chris@0: Chris@0: else Chris@0: state = :initial Chris@0: next Chris@0: Chris@0: end Chris@0: Chris@0: when :class_name_expected Chris@0: if scan(/ [A-Za-z_][A-Za-z_0-9]* /x) Chris@0: kind = :class Chris@0: state = :initial Chris@0: Chris@0: elsif match = scan(/\s+/) Chris@0: kind = :space Chris@0: Chris@0: else Chris@0: getch Chris@0: kind = :error Chris@0: state = :initial Chris@0: Chris@0: end Chris@0: Chris@0: else Chris@0: raise_inspect 'Unknown state', tokens Chris@0: Chris@0: end Chris@0: Chris@0: match ||= matched Chris@0: if $CODERAY_DEBUG and not kind Chris@0: raise_inspect 'Error token %p in line %d' % Chris@0: [[match, kind], line], tokens Chris@0: end Chris@0: raise_inspect 'Empty token', tokens unless match Chris@0: Chris@0: tokens << [match, kind] Chris@0: Chris@0: end Chris@0: Chris@0: if state == :string Chris@0: tokens << [:close, :string] Chris@0: end Chris@0: Chris@0: tokens Chris@0: end Chris@0: Chris@0: end Chris@0: Chris@0: end Chris@0: end