Chris@909: # encoding: utf-8 Chris@909: module CodeRay Chris@909: module Scanners Chris@909: Chris@909: module Ruby::Patterns # :nodoc: all Chris@909: Chris@909: KEYWORDS = %w[ Chris@909: and def end in or unless begin Chris@909: defined? ensure module redo super until Chris@909: BEGIN break do next rescue then Chris@909: when END case else for retry Chris@909: while alias class elsif if not return Chris@909: undef yield Chris@909: ] Chris@909: Chris@909: # See http://murfy.de/ruby-constants. Chris@909: PREDEFINED_CONSTANTS = %w[ Chris@909: nil true false self Chris@909: DATA ARGV ARGF ENV Chris@909: FALSE TRUE NIL Chris@909: STDERR STDIN STDOUT Chris@909: TOPLEVEL_BINDING Chris@909: RUBY_COPYRIGHT RUBY_DESCRIPTION RUBY_ENGINE RUBY_PATCHLEVEL Chris@909: RUBY_PLATFORM RUBY_RELEASE_DATE RUBY_REVISION RUBY_VERSION Chris@909: __FILE__ __LINE__ __ENCODING__ Chris@909: ] Chris@909: Chris@909: IDENT_KIND = WordList.new(:ident). Chris@909: add(KEYWORDS, :keyword). Chris@909: add(PREDEFINED_CONSTANTS, :predefined_constant) Chris@909: Chris@909: KEYWORD_NEW_STATE = WordList.new(:initial). Chris@909: add(%w[ def ], :def_expected). Chris@909: add(%w[ undef ], :undef_expected). Chris@909: add(%w[ alias ], :alias_expected). Chris@909: add(%w[ class module ], :module_expected) Chris@909: Chris@909: IDENT = 'ä'[/[[:alpha:]]/] == 'ä' ? /[[:alpha:]_][[:alnum:]_]*/ : /[^\W\d]\w*/ Chris@909: Chris@909: METHOD_NAME = / #{IDENT} [?!]? /ox Chris@909: METHOD_NAME_OPERATOR = / Chris@909: \*\*? # multiplication and power Chris@909: | [-+~]@? # plus, minus, tilde with and without at sign Chris@909: | [\/%&|^`] # division, modulo or format strings, and, or, xor, system Chris@909: | \[\]=? # array getter and setter Chris@909: | << | >> # append or shift left, shift right Chris@909: | <=?>? | >=? # comparison, rocket operator Chris@909: | ===? | =~ # simple equality, case equality, match Chris@909: | ![~=@]? # negation with and without at sign, not-equal and not-match Chris@909: /ox Chris@909: METHOD_SUFFIX = / (?: [?!] | = (?![~>]|=(?!>)) ) /x Chris@909: METHOD_NAME_EX = / #{IDENT} #{METHOD_SUFFIX}? | #{METHOD_NAME_OPERATOR} /ox Chris@909: METHOD_AFTER_DOT = / #{IDENT} [?!]? | #{METHOD_NAME_OPERATOR} /ox Chris@909: INSTANCE_VARIABLE = / @ #{IDENT} /ox Chris@909: CLASS_VARIABLE = / @@ #{IDENT} /ox Chris@909: OBJECT_VARIABLE = / @@? #{IDENT} /ox Chris@909: GLOBAL_VARIABLE = / \$ (?: #{IDENT} | [1-9]\d* | 0\w* | [~&+`'=\/,;_.<>!@$?*":\\] | -[a-zA-Z_0-9] ) /ox Chris@909: PREFIX_VARIABLE = / #{GLOBAL_VARIABLE} | #{OBJECT_VARIABLE} /ox Chris@909: VARIABLE = / @?@? #{IDENT} | #{GLOBAL_VARIABLE} /ox Chris@909: Chris@909: QUOTE_TO_TYPE = { Chris@909: '`' => :shell, Chris@909: '/'=> :regexp, Chris@909: } Chris@909: QUOTE_TO_TYPE.default = :string Chris@909: Chris@909: REGEXP_MODIFIERS = /[mousenix]*/ Chris@909: Chris@909: DECIMAL = /\d+(?:_\d+)*/ Chris@909: OCTAL = /0_?[0-7]+(?:_[0-7]+)*/ Chris@909: HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/ Chris@909: BINARY = /0b[01]+(?:_[01]+)*/ Chris@909: Chris@909: EXPONENT = / [eE] [+-]? #{DECIMAL} /ox Chris@909: FLOAT_SUFFIX = / #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? /ox Chris@909: FLOAT_OR_INT = / #{DECIMAL} (?: #{FLOAT_SUFFIX} () )? /ox Chris@909: NUMERIC = / (?: (?=0) (?: #{OCTAL} | #{HEXADECIMAL} | #{BINARY} ) | #{FLOAT_OR_INT} ) /ox Chris@909: Chris@909: SYMBOL = / Chris@909: : Chris@909: (?: Chris@909: #{METHOD_NAME_EX} Chris@909: | #{PREFIX_VARIABLE} Chris@909: | ['"] Chris@909: ) Chris@909: /ox Chris@909: METHOD_NAME_OR_SYMBOL = / #{METHOD_NAME_EX} | #{SYMBOL} /ox Chris@909: Chris@909: SIMPLE_ESCAPE = / Chris@909: [abefnrstv] Chris@909: | [0-7]{1,3} Chris@909: | x[0-9A-Fa-f]{1,2} Chris@909: | . Chris@909: /mx Chris@909: Chris@909: CONTROL_META_ESCAPE = / Chris@909: (?: M-|C-|c ) Chris@909: (?: \\ (?: M-|C-|c ) )* Chris@909: (?: [^\\] | \\ #{SIMPLE_ESCAPE} )? Chris@909: /mox Chris@909: Chris@909: ESCAPE = / Chris@909: #{CONTROL_META_ESCAPE} | #{SIMPLE_ESCAPE} Chris@909: /mox Chris@909: Chris@909: CHARACTER = / Chris@909: \? Chris@909: (?: Chris@909: [^\s\\] Chris@909: | \\ #{ESCAPE} Chris@909: ) Chris@909: /mox Chris@909: Chris@909: # NOTE: This is not completely correct, but Chris@909: # nobody needs heredoc delimiters ending with \n. Chris@909: HEREDOC_OPEN = / Chris@909: << (-)? # $1 = float Chris@909: (?: Chris@909: ( [A-Za-z_0-9]+ ) # $2 = delim Chris@909: | Chris@909: ( ["'`\/] ) # $3 = quote, type Chris@909: ( [^\n]*? ) \3 # $4 = delim Chris@909: ) Chris@909: /mx Chris@909: Chris@909: RUBYDOC = / Chris@909: =begin (?!\S) Chris@909: .*? Chris@909: (?: \Z | ^=end (?!\S) [^\n]* ) Chris@909: /mx Chris@909: Chris@909: DATA = / Chris@909: __END__$ Chris@909: .*? Chris@909: (?: \Z | (?=^\#CODE) ) Chris@909: /mx Chris@909: Chris@909: RUBYDOC_OR_DATA = / #{RUBYDOC} | #{DATA} /xo Chris@909: Chris@909: # Checks for a valid value to follow. This enables Chris@909: # value_expected in method calls without parentheses. Chris@909: VALUE_FOLLOWS = / Chris@909: (?>[ \t\f\v]+) Chris@909: (?: Chris@909: [%\/][^\s=] Chris@909: | <<-?\S Chris@909: | [-+] \d Chris@909: | #{CHARACTER} Chris@909: ) Chris@909: /ox Chris@909: KEYWORDS_EXPECTING_VALUE = WordList.new.add(%w[ Chris@909: and end in or unless begin Chris@909: defined? ensure redo super until Chris@909: break do next rescue then Chris@909: when case else for retry Chris@909: while elsif if not return Chris@909: yield Chris@909: ]) Chris@909: Chris@909: FANCY_STRING_START = / % ( [QqrsWwx] | (?![a-zA-Z0-9]) ) ([^a-zA-Z0-9]) /x Chris@909: FANCY_STRING_KIND = Hash.new(:string).merge({ Chris@909: 'r' => :regexp, Chris@909: 's' => :symbol, Chris@909: 'x' => :shell, Chris@909: }) Chris@909: FANCY_STRING_INTERPRETED = Hash.new(true).merge({ Chris@909: 'q' => false, Chris@909: 's' => false, Chris@909: 'w' => false, Chris@909: }) Chris@909: Chris@909: end Chris@909: Chris@909: end Chris@909: end