Chris@909: module CodeRay module Scanners Chris@909: Chris@909: # by Josh Goebel Chris@909: class SQL < Scanner Chris@909: Chris@909: register_for :sql Chris@909: Chris@909: KEYWORDS = %w( Chris@909: all and any as before begin between by case check collate Chris@909: each else end exists Chris@909: for foreign from full group having if in inner is join Chris@909: like not of on or order outer over references Chris@909: then to union using values when where Chris@909: left right distinct Chris@909: ) Chris@909: Chris@909: OBJECTS = %w( Chris@909: database databases table tables column columns fields index constraint Chris@909: constraints transaction function procedure row key view trigger Chris@909: ) Chris@909: Chris@909: COMMANDS = %w( Chris@909: add alter comment create delete drop grant insert into select update set Chris@909: show prompt begin commit rollback replace truncate Chris@909: ) Chris@909: Chris@909: PREDEFINED_TYPES = %w( Chris@909: char varchar varchar2 enum binary text tinytext mediumtext Chris@909: longtext blob tinyblob mediumblob longblob timestamp Chris@909: date time datetime year double decimal float int Chris@909: integer tinyint mediumint bigint smallint unsigned bit Chris@909: bool boolean hex bin oct Chris@909: ) Chris@909: Chris@909: PREDEFINED_FUNCTIONS = %w( sum cast substring abs pi count min max avg now ) Chris@909: Chris@909: DIRECTIVES = %w( Chris@909: auto_increment unique default charset initially deferred Chris@909: deferrable cascade immediate read write asc desc after Chris@909: primary foreign return engine Chris@909: ) Chris@909: Chris@909: PREDEFINED_CONSTANTS = %w( null true false ) Chris@909: Chris@909: IDENT_KIND = WordList::CaseIgnoring.new(:ident). Chris@909: add(KEYWORDS, :keyword). Chris@909: add(OBJECTS, :type). Chris@909: add(COMMANDS, :class). Chris@909: add(PREDEFINED_TYPES, :predefined_type). Chris@909: add(PREDEFINED_CONSTANTS, :predefined_constant). Chris@909: add(PREDEFINED_FUNCTIONS, :predefined). Chris@909: add(DIRECTIVES, :directive) Chris@909: Chris@909: ESCAPE = / [rbfntv\n\\\/'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} | . /mx Chris@909: UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x Chris@909: Chris@909: STRING_PREFIXES = /[xnb]|_\w+/i Chris@909: Chris@909: def scan_tokens encoder, options Chris@909: Chris@909: state = :initial Chris@909: string_type = nil Chris@909: string_content = '' Chris@909: name_expected = false Chris@909: Chris@909: until eos? Chris@909: Chris@909: if state == :initial Chris@909: Chris@909: if match = scan(/ \s+ | \\\n /x) Chris@909: encoder.text_token match, :space Chris@909: Chris@909: elsif match = scan(/(?:--\s?|#).*/) Chris@909: encoder.text_token match, :comment Chris@909: Chris@909: elsif match = scan(%r( /\* (!)? (?: .*? \*/ | .* ) )mx) Chris@909: encoder.text_token match, self[1] ? :directive : :comment Chris@909: Chris@909: elsif match = scan(/ [*\/=<>:;,!&^|()\[\]{}~%] | [-+\.](?!\d) /x) Chris@909: name_expected = true if match == '.' && check(/[A-Za-z_]/) Chris@909: encoder.text_token match, :operator Chris@909: Chris@909: elsif match = scan(/(#{STRING_PREFIXES})?([`"'])/o) Chris@909: prefix = self[1] Chris@909: string_type = self[2] Chris@909: encoder.begin_group :string Chris@909: encoder.text_token prefix, :modifier if prefix Chris@909: match = string_type Chris@909: state = :string Chris@909: encoder.text_token match, :delimiter Chris@909: Chris@909: elsif match = scan(/ @? [A-Za-z_][A-Za-z_0-9]* /x) Chris@909: encoder.text_token match, name_expected ? :ident : (match[0] == ?@ ? :variable : IDENT_KIND[match]) Chris@909: name_expected = false Chris@909: Chris@909: elsif match = scan(/0[xX][0-9A-Fa-f]+/) Chris@909: encoder.text_token match, :hex Chris@909: Chris@909: elsif match = scan(/0[0-7]+(?![89.eEfF])/) Chris@909: encoder.text_token match, :octal Chris@909: Chris@909: elsif match = scan(/[-+]?(?>\d+)(?![.eEfF])/) Chris@909: encoder.text_token match, :integer Chris@909: Chris@909: elsif match = scan(/[-+]?(?:\d[fF]|\d*\.\d+(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+)/) Chris@909: encoder.text_token match, :float Chris@909: Chris@909: elsif match = scan(/\\N/) Chris@909: encoder.text_token match, :predefined_constant Chris@909: Chris@909: else Chris@909: encoder.text_token getch, :error Chris@909: Chris@909: end Chris@909: Chris@909: elsif state == :string Chris@909: if match = scan(/[^\\"'`]+/) Chris@909: string_content << match Chris@909: next Chris@909: elsif match = scan(/["'`]/) Chris@909: if string_type == match Chris@909: if peek(1) == string_type # doubling means escape Chris@909: string_content << string_type << getch Chris@909: next Chris@909: end Chris@909: unless string_content.empty? Chris@909: encoder.text_token string_content, :content Chris@909: string_content = '' Chris@909: end Chris@909: encoder.text_token match, :delimiter Chris@909: encoder.end_group :string Chris@909: state = :initial Chris@909: string_type = nil Chris@909: else Chris@909: string_content << match Chris@909: end Chris@909: elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) Chris@909: unless string_content.empty? Chris@909: encoder.text_token string_content, :content Chris@909: string_content = '' Chris@909: end Chris@909: encoder.text_token match, :char Chris@909: elsif match = scan(/ \\ . /mox) Chris@909: string_content << match Chris@909: next Chris@909: elsif match = scan(/ \\ | $ /x) Chris@909: unless string_content.empty? Chris@909: encoder.text_token string_content, :content Chris@909: string_content = '' Chris@909: end Chris@909: encoder.text_token match, :error Chris@909: state = :initial Chris@909: else Chris@909: raise "else case \" reached; %p not handled." % peek(1), encoder Chris@909: end Chris@909: Chris@909: else Chris@909: raise 'else-case reached', encoder Chris@909: Chris@909: end Chris@909: Chris@909: end Chris@909: Chris@909: if state == :string Chris@909: encoder.end_group state Chris@909: end Chris@909: Chris@909: encoder Chris@909: Chris@909: end Chris@909: Chris@909: end Chris@909: Chris@909: end end