Chris@0: module CodeRay Chris@0: module Scanners Chris@0: Chris@0: # Scheme scanner for CodeRay (by closure). Chris@0: # Thanks to murphy for putting CodeRay into public. Chris@0: class Scheme < Scanner Chris@0: Chris@0: # TODO: function defs Chris@0: # TODO: built-in functions Chris@0: Chris@0: register_for :scheme Chris@0: file_extension 'scm' Chris@0: Chris@0: CORE_FORMS = %w[ Chris@0: lambda let let* letrec syntax-case define-syntax let-syntax Chris@0: letrec-syntax begin define quote if or and cond case do delay Chris@0: quasiquote set! cons force call-with-current-continuation call/cc Chris@0: ] Chris@0: Chris@0: IDENT_KIND = CaseIgnoringWordList.new(:ident). Chris@0: add(CORE_FORMS, :reserved) Chris@0: Chris@0: #IDENTIFIER_INITIAL = /[a-z!@\$%&\*\/\:<=>\?~_\^]/i Chris@0: #IDENTIFIER_SUBSEQUENT = /#{IDENTIFIER_INITIAL}|\d|\.|\+|-/ Chris@0: #IDENTIFIER = /#{IDENTIFIER_INITIAL}#{IDENTIFIER_SUBSEQUENT}*|\+|-|\.{3}/ Chris@0: IDENTIFIER = /[a-zA-Z!@$%&*\/:<=>?~_^][\w!@$%&*\/:<=>?~^.+\-]*|[+-]|\.\.\./ Chris@0: DIGIT = /\d/ Chris@0: DIGIT10 = DIGIT Chris@0: DIGIT16 = /[0-9a-f]/i Chris@0: DIGIT8 = /[0-7]/ Chris@0: DIGIT2 = /[01]/ Chris@0: RADIX16 = /\#x/i Chris@0: RADIX8 = /\#o/i Chris@0: RADIX2 = /\#b/i Chris@0: RADIX10 = /\#d/i Chris@0: EXACTNESS = /#i|#e/i Chris@0: SIGN = /[\+-]?/ Chris@0: EXP_MARK = /[esfdl]/i Chris@0: EXP = /#{EXP_MARK}#{SIGN}#{DIGIT}+/ Chris@0: SUFFIX = /#{EXP}?/ Chris@0: PREFIX10 = /#{RADIX10}?#{EXACTNESS}?|#{EXACTNESS}?#{RADIX10}?/ Chris@0: PREFIX16 = /#{RADIX16}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX16}/ Chris@0: PREFIX8 = /#{RADIX8}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX8}/ Chris@0: PREFIX2 = /#{RADIX2}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX2}/ Chris@0: UINT10 = /#{DIGIT10}+#*/ Chris@0: UINT16 = /#{DIGIT16}+#*/ Chris@0: UINT8 = /#{DIGIT8}+#*/ Chris@0: UINT2 = /#{DIGIT2}+#*/ Chris@0: DECIMAL = /#{DIGIT10}+#+\.#*#{SUFFIX}|#{DIGIT10}+\.#{DIGIT10}*#*#{SUFFIX}|\.#{DIGIT10}+#*#{SUFFIX}|#{UINT10}#{EXP}/ Chris@0: UREAL10 = /#{UINT10}\/#{UINT10}|#{DECIMAL}|#{UINT10}/ Chris@0: UREAL16 = /#{UINT16}\/#{UINT16}|#{UINT16}/ Chris@0: UREAL8 = /#{UINT8}\/#{UINT8}|#{UINT8}/ Chris@0: UREAL2 = /#{UINT2}\/#{UINT2}|#{UINT2}/ Chris@0: REAL10 = /#{SIGN}#{UREAL10}/ Chris@0: REAL16 = /#{SIGN}#{UREAL16}/ Chris@0: REAL8 = /#{SIGN}#{UREAL8}/ Chris@0: REAL2 = /#{SIGN}#{UREAL2}/ Chris@0: IMAG10 = /i|#{UREAL10}i/ Chris@0: IMAG16 = /i|#{UREAL16}i/ Chris@0: IMAG8 = /i|#{UREAL8}i/ Chris@0: IMAG2 = /i|#{UREAL2}i/ Chris@0: COMPLEX10 = /#{REAL10}@#{REAL10}|#{REAL10}\+#{IMAG10}|#{REAL10}-#{IMAG10}|\+#{IMAG10}|-#{IMAG10}|#{REAL10}/ Chris@0: COMPLEX16 = /#{REAL16}@#{REAL16}|#{REAL16}\+#{IMAG16}|#{REAL16}-#{IMAG16}|\+#{IMAG16}|-#{IMAG16}|#{REAL16}/ Chris@0: COMPLEX8 = /#{REAL8}@#{REAL8}|#{REAL8}\+#{IMAG8}|#{REAL8}-#{IMAG8}|\+#{IMAG8}|-#{IMAG8}|#{REAL8}/ Chris@0: COMPLEX2 = /#{REAL2}@#{REAL2}|#{REAL2}\+#{IMAG2}|#{REAL2}-#{IMAG2}|\+#{IMAG2}|-#{IMAG2}|#{REAL2}/ Chris@0: NUM10 = /#{PREFIX10}?#{COMPLEX10}/ Chris@0: NUM16 = /#{PREFIX16}#{COMPLEX16}/ Chris@0: NUM8 = /#{PREFIX8}#{COMPLEX8}/ Chris@0: NUM2 = /#{PREFIX2}#{COMPLEX2}/ Chris@0: NUM = /#{NUM10}|#{NUM16}|#{NUM8}|#{NUM2}/ Chris@0: Chris@0: private Chris@0: def scan_tokens tokens,options Chris@0: Chris@0: state = :initial Chris@0: ident_kind = IDENT_KIND Chris@0: Chris@0: until eos? Chris@0: kind = match = nil Chris@0: Chris@0: case state Chris@0: when :initial Chris@0: if scan(/ \s+ | \\\n /x) Chris@0: kind = :space Chris@0: elsif scan(/['\(\[\)\]]|#\(/) Chris@0: kind = :operator_fat Chris@0: elsif scan(/;.*/) Chris@0: kind = :comment Chris@0: elsif scan(/#\\(?:newline|space|.?)/) Chris@0: kind = :char Chris@0: elsif scan(/#[ft]/) Chris@0: kind = :pre_constant Chris@0: elsif scan(/#{IDENTIFIER}/o) Chris@0: kind = ident_kind[matched] Chris@0: elsif scan(/\./) Chris@0: kind = :operator Chris@0: elsif scan(/"/) Chris@0: tokens << [:open, :string] Chris@0: state = :string Chris@0: tokens << ['"', :delimiter] Chris@0: next Chris@0: elsif scan(/#{NUM}/o) and not matched.empty? Chris@0: kind = :integer Chris@0: elsif getch Chris@0: kind = :error Chris@0: end Chris@0: Chris@0: when :string Chris@0: if scan(/[^"\\]+/) or scan(/\\.?/) Chris@0: kind = :content Chris@0: elsif scan(/"/) Chris@0: tokens << ['"', :delimiter] Chris@0: tokens << [:close, :string] Chris@0: state = :initial Chris@0: next Chris@0: else Chris@0: raise_inspect "else case \" reached; %p not handled." % peek(1), Chris@0: tokens, state Chris@0: end Chris@0: Chris@0: else Chris@0: raise "else case reached" Chris@0: end Chris@0: Chris@0: match ||= matched Chris@0: if $CODERAY_DEBUG and not kind Chris@0: raise_inspect 'Error token %p in line %d' % Chris@0: [[match, kind], line], tokens Chris@0: end Chris@0: raise_inspect 'Empty token', tokens, state unless match Chris@0: Chris@0: tokens << [match, kind] Chris@0: Chris@0: end # until eos Chris@0: Chris@0: if state == :string Chris@0: tokens << [:close, :string] Chris@0: end Chris@0: Chris@0: tokens Chris@0: Chris@0: end #scan_tokens Chris@0: end #class Chris@0: end #module scanners Chris@0: end #module coderay