Chris@210: module CodeRay module Scanners Chris@210: Chris@210: # by Josh Goebel Chris@210: class SQL < Scanner Chris@210: Chris@210: register_for :sql Chris@210: Chris@210: RESERVED_WORDS = %w( Chris@210: create database table index trigger drop primary key set select Chris@210: insert update delete replace into Chris@210: on from values before and or if exists case when Chris@210: then else as group order by avg where Chris@210: join inner outer union engine not Chris@210: like end using collate show columns begin Chris@210: ) Chris@210: Chris@210: PREDEFINED_TYPES = %w( Chris@210: char varchar enum binary text tinytext mediumtext Chris@210: longtext blob tinyblob mediumblob longblob timestamp Chris@210: date time datetime year double decimal float int Chris@210: integer tinyint mediumint bigint smallint unsigned bit Chris@210: bool boolean hex bin oct Chris@210: ) Chris@210: Chris@210: PREDEFINED_FUNCTIONS = %w( sum cast abs pi count min max avg ) Chris@210: Chris@210: DIRECTIVES = %w( auto_increment unique default charset ) Chris@210: Chris@210: PREDEFINED_CONSTANTS = %w( null true false ) Chris@210: Chris@210: IDENT_KIND = CaseIgnoringWordList.new(:ident). Chris@210: add(RESERVED_WORDS, :reserved). Chris@210: add(PREDEFINED_TYPES, :pre_type). Chris@210: add(PREDEFINED_CONSTANTS, :pre_constant). Chris@210: add(PREDEFINED_FUNCTIONS, :predefined). Chris@210: add(DIRECTIVES, :directive) Chris@210: Chris@210: ESCAPE = / [rbfntv\n\\\/'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} | . /mx Chris@210: UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x Chris@210: Chris@210: STRING_PREFIXES = /[xnb]|_\w+/i Chris@210: Chris@210: def scan_tokens tokens, options Chris@210: Chris@210: state = :initial Chris@210: string_type = nil Chris@210: string_content = '' Chris@210: Chris@210: until eos? Chris@210: Chris@210: kind = nil Chris@210: match = nil Chris@210: Chris@210: if state == :initial Chris@210: Chris@210: if scan(/ \s+ | \\\n /x) Chris@210: kind = :space Chris@210: Chris@210: elsif scan(/(?:--\s?|#).*/) Chris@210: kind = :comment Chris@210: Chris@210: elsif scan(%r! /\* (?: .*? \*/ | .* ) !mx) Chris@210: kind = :comment Chris@210: Chris@210: elsif scan(/ [-+*\/=<>;,!&^|()\[\]{}~%] | \.(?!\d) /x) Chris@210: kind = :operator Chris@210: Chris@210: elsif scan(/(#{STRING_PREFIXES})?([`"'])/o) Chris@210: prefix = self[1] Chris@210: string_type = self[2] Chris@210: tokens << [:open, :string] Chris@210: tokens << [prefix, :modifier] if prefix Chris@210: match = string_type Chris@210: state = :string Chris@210: kind = :delimiter Chris@210: Chris@210: elsif match = scan(/ @? [A-Za-z_][A-Za-z_0-9]* /x) Chris@210: kind = match[0] == ?@ ? :variable : IDENT_KIND[match.downcase] Chris@210: Chris@210: elsif scan(/0[xX][0-9A-Fa-f]+/) Chris@210: kind = :hex Chris@210: Chris@210: elsif scan(/0[0-7]+(?![89.eEfF])/) Chris@210: kind = :oct Chris@210: Chris@210: elsif scan(/(?>\d+)(?![.eEfF])/) Chris@210: kind = :integer Chris@210: Chris@210: elsif scan(/\d[fF]|\d*\.\d+(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/) Chris@210: kind = :float Chris@210: Chris@210: else Chris@210: getch Chris@210: kind = :error Chris@210: Chris@210: end Chris@210: Chris@210: elsif state == :string Chris@210: if match = scan(/[^\\"'`]+/) Chris@210: string_content << match Chris@210: next Chris@210: elsif match = scan(/["'`]/) Chris@210: if string_type == match Chris@210: if peek(1) == string_type # doubling means escape Chris@210: string_content << string_type << getch Chris@210: next Chris@210: end Chris@210: unless string_content.empty? Chris@210: tokens << [string_content, :content] Chris@210: string_content = '' Chris@210: end Chris@210: tokens << [matched, :delimiter] Chris@210: tokens << [:close, :string] Chris@210: state = :initial Chris@210: string_type = nil Chris@210: next Chris@210: else Chris@210: string_content << match Chris@210: end Chris@210: next Chris@210: elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) Chris@210: unless string_content.empty? Chris@210: tokens << [string_content, :content] Chris@210: string_content = '' Chris@210: end Chris@210: kind = :char Chris@210: elsif match = scan(/ \\ . /mox) Chris@210: string_content << match Chris@210: next Chris@210: elsif scan(/ \\ | $ /x) Chris@210: unless string_content.empty? Chris@210: tokens << [string_content, :content] Chris@210: string_content = '' Chris@210: end Chris@210: kind = :error Chris@210: state = :initial Chris@210: else Chris@210: raise "else case \" reached; %p not handled." % peek(1), tokens Chris@210: end Chris@210: Chris@210: else Chris@210: raise 'else-case reached', tokens Chris@210: Chris@210: end Chris@210: Chris@210: match ||= matched Chris@210: unless kind Chris@210: raise_inspect 'Error token %p in line %d' % Chris@210: [[match, kind], line], tokens, state Chris@210: end Chris@210: raise_inspect 'Empty token', tokens unless match Chris@210: Chris@210: tokens << [match, kind] Chris@210: Chris@210: end Chris@210: tokens Chris@210: Chris@210: end Chris@210: Chris@210: end Chris@210: Chris@210: end end