Chris@0: module CodeRay Chris@0: module Scanners Chris@0: Chris@0: class JSON < Scanner Chris@0: Chris@0: include Streamable Chris@0: Chris@0: register_for :json Chris@0: file_extension 'json' Chris@0: Chris@0: KINDS_NOT_LOC = [ Chris@0: :float, :char, :content, :delimiter, Chris@0: :error, :integer, :operator, :value, Chris@0: ] Chris@0: Chris@0: CONSTANTS = %w( true false null ) Chris@0: IDENT_KIND = WordList.new(:key).add(CONSTANTS, :value) Chris@0: Chris@0: ESCAPE = / [bfnrt\\"\/] /x Chris@0: UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x Chris@0: Chris@0: def scan_tokens tokens, options Chris@0: Chris@0: state = :initial Chris@0: stack = [] Chris@0: string_delimiter = nil Chris@0: key_expected = false Chris@0: Chris@0: until eos? Chris@0: Chris@0: kind = nil Chris@0: match = nil Chris@0: Chris@0: case state Chris@0: Chris@0: when :initial Chris@0: if match = scan(/ \s+ | \\\n /x) Chris@0: tokens << [match, :space] Chris@0: next Chris@0: elsif match = scan(/ [:,\[{\]}] /x) Chris@0: kind = :operator Chris@0: case match Chris@0: when '{' then stack << :object; key_expected = true Chris@0: when '[' then stack << :array Chris@0: when ':' then key_expected = false Chris@0: when ',' then key_expected = true if stack.last == :object Chris@0: when '}', ']' then stack.pop # no error recovery, but works for valid JSON Chris@0: end Chris@0: elsif match = scan(/ true | false | null /x) Chris@0: kind = IDENT_KIND[match] Chris@0: elsif match = scan(/-?(?:0|[1-9]\d*)/) Chris@0: kind = :integer Chris@0: if scan(/\.\d+(?:[eE][-+]?\d+)?|[eE][-+]?\d+/) Chris@0: match << matched Chris@0: kind = :float Chris@0: end Chris@0: elsif match = scan(/"/) Chris@0: state = key_expected ? :key : :string Chris@0: tokens << [:open, state] Chris@0: kind = :delimiter Chris@0: else Chris@0: getch Chris@0: kind = :error Chris@0: end Chris@0: Chris@0: when :string, :key Chris@0: if scan(/[^\\"]+/) Chris@0: kind = :content Chris@0: elsif scan(/"/) Chris@0: tokens << ['"', :delimiter] Chris@0: tokens << [:close, state] Chris@0: state = :initial Chris@0: next Chris@0: elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) Chris@0: kind = :char Chris@0: elsif scan(/\\./m) Chris@0: kind = :content Chris@0: elsif scan(/ \\ | $ /x) Chris@0: tokens << [:close, :delimiter] Chris@0: kind = :error Chris@0: state = :initial Chris@0: else Chris@0: raise_inspect "else case \" reached; %p not handled." % peek(1), tokens Chris@0: end Chris@0: Chris@0: else Chris@0: raise_inspect 'Unknown state', tokens Chris@0: Chris@0: end Chris@0: Chris@0: match ||= matched Chris@0: if $CODERAY_DEBUG and not kind Chris@0: raise_inspect 'Error token %p in line %d' % Chris@0: [[match, kind], line], tokens Chris@0: end Chris@0: raise_inspect 'Empty token', tokens unless match Chris@0: Chris@0: tokens << [match, kind] Chris@0: Chris@0: end Chris@0: Chris@0: if [:string, :key].include? state Chris@0: tokens << [:close, state] Chris@0: end Chris@0: Chris@0: tokens Chris@0: end Chris@0: Chris@0: end Chris@0: Chris@0: end Chris@0: end