Chris@0: module CodeRay module Scanners Chris@0: Chris@0: # by Josh Goebel Chris@0: class SQL < Scanner Chris@0: Chris@0: register_for :sql Chris@0: Chris@0: RESERVED_WORDS = %w( Chris@0: create database table index trigger drop primary key set select Chris@0: insert update delete replace into Chris@0: on from values before and or if exists case when Chris@0: then else as group order by avg where Chris@0: join inner outer union engine not Chris@0: like end using collate show columns begin Chris@0: ) Chris@0: Chris@0: PREDEFINED_TYPES = %w( Chris@0: char varchar enum binary text tinytext mediumtext Chris@0: longtext blob tinyblob mediumblob longblob timestamp Chris@0: date time datetime year double decimal float int Chris@0: integer tinyint mediumint bigint smallint unsigned bit Chris@0: bool boolean hex bin oct Chris@0: ) Chris@0: Chris@0: PREDEFINED_FUNCTIONS = %w( sum cast abs pi count min max avg ) Chris@0: Chris@0: DIRECTIVES = %w( auto_increment unique default charset ) Chris@0: Chris@0: PREDEFINED_CONSTANTS = %w( null true false ) Chris@0: Chris@0: IDENT_KIND = CaseIgnoringWordList.new(:ident). Chris@0: add(RESERVED_WORDS, :reserved). Chris@0: add(PREDEFINED_TYPES, :pre_type). Chris@0: add(PREDEFINED_CONSTANTS, :pre_constant). Chris@0: add(PREDEFINED_FUNCTIONS, :predefined). Chris@0: add(DIRECTIVES, :directive) Chris@0: Chris@0: ESCAPE = / [rbfntv\n\\\/'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} | . /mx Chris@0: UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x Chris@0: Chris@0: STRING_PREFIXES = /[xnb]|_\w+/i Chris@0: Chris@0: def scan_tokens tokens, options Chris@0: Chris@0: state = :initial Chris@0: string_type = nil Chris@0: string_content = '' Chris@0: Chris@0: until eos? Chris@0: Chris@0: kind = nil Chris@0: match = nil Chris@0: Chris@0: if state == :initial Chris@0: Chris@0: if scan(/ \s+ | \\\n /x) Chris@0: kind = :space Chris@0: Chris@0: elsif scan(/^(?:--\s?|#).*/) Chris@0: kind = :comment Chris@0: Chris@0: elsif scan(%r! /\* (?: .*? \*/ | .* ) !mx) Chris@0: kind = :comment Chris@0: Chris@0: elsif scan(/ [-+*\/=<>;,!&^|()\[\]{}~%] | \.(?!\d) /x) Chris@0: kind = :operator Chris@0: Chris@0: elsif scan(/(#{STRING_PREFIXES})?([`"'])/o) Chris@0: prefix = self[1] Chris@0: string_type = self[2] Chris@0: tokens << [:open, :string] Chris@0: tokens << [prefix, :modifier] if prefix Chris@0: match = string_type Chris@0: state = :string Chris@0: kind = :delimiter Chris@0: Chris@0: elsif match = scan(/ @? [A-Za-z_][A-Za-z_0-9]* /x) Chris@0: kind = match[0] == ?@ ? :variable : IDENT_KIND[match.downcase] Chris@0: Chris@0: elsif scan(/0[xX][0-9A-Fa-f]+/) Chris@0: kind = :hex Chris@0: Chris@0: elsif scan(/0[0-7]+(?![89.eEfF])/) Chris@0: kind = :oct Chris@0: Chris@0: elsif scan(/(?>\d+)(?![.eEfF])/) Chris@0: kind = :integer Chris@0: Chris@0: elsif scan(/\d[fF]|\d*\.\d+(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/) Chris@0: kind = :float Chris@0: Chris@0: else Chris@0: getch Chris@0: kind = :error Chris@0: Chris@0: end Chris@0: Chris@0: elsif state == :string Chris@0: if match = scan(/[^\\"'`]+/) Chris@0: string_content << match Chris@0: next Chris@0: elsif match = scan(/["'`]/) Chris@0: if string_type == match Chris@0: if peek(1) == string_type # doubling means escape Chris@0: string_content << string_type << getch Chris@0: next Chris@0: end Chris@0: unless string_content.empty? Chris@0: tokens << [string_content, :content] Chris@0: string_content = '' Chris@0: end Chris@0: tokens << [matched, :delimiter] Chris@0: tokens << [:close, :string] Chris@0: state = :initial Chris@0: string_type = nil Chris@0: next Chris@0: else Chris@0: string_content << match Chris@0: end Chris@0: next Chris@0: elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) Chris@0: unless string_content.empty? Chris@0: tokens << [string_content, :content] Chris@0: string_content = '' Chris@0: end Chris@0: kind = :char Chris@0: elsif match = scan(/ \\ . /mox) Chris@0: string_content << match Chris@0: next Chris@0: elsif scan(/ \\ | $ /x) Chris@0: unless string_content.empty? Chris@0: tokens << [string_content, :content] Chris@0: string_content = '' Chris@0: end Chris@0: kind = :error Chris@0: state = :initial Chris@0: else Chris@0: raise "else case \" reached; %p not handled." % peek(1), tokens Chris@0: end Chris@0: Chris@0: else Chris@0: raise 'else-case reached', tokens Chris@0: Chris@0: end Chris@0: Chris@0: match ||= matched Chris@0: unless kind Chris@0: raise_inspect 'Error token %p in line %d' % Chris@0: [[match, kind], line], tokens, state Chris@0: end Chris@0: raise_inspect 'Empty token', tokens unless match Chris@0: Chris@0: tokens << [match, kind] Chris@0: Chris@0: end Chris@0: tokens Chris@0: Chris@0: end Chris@0: Chris@0: end Chris@0: Chris@0: end end