Chris@210: module CodeRay Chris@210: module Scanners Chris@210: Chris@210: class JSON < Scanner Chris@210: Chris@210: include Streamable Chris@210: Chris@210: register_for :json Chris@210: file_extension 'json' Chris@210: Chris@210: KINDS_NOT_LOC = [ Chris@210: :float, :char, :content, :delimiter, Chris@210: :error, :integer, :operator, :value, Chris@210: ] Chris@210: Chris@210: ESCAPE = / [bfnrt\\"\/] /x Chris@210: UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x Chris@210: Chris@210: def scan_tokens tokens, options Chris@210: Chris@210: state = :initial Chris@210: stack = [] Chris@210: key_expected = false Chris@210: Chris@210: until eos? Chris@210: Chris@210: kind = nil Chris@210: match = nil Chris@210: Chris@210: case state Chris@210: Chris@210: when :initial Chris@210: if match = scan(/ \s+ | \\\n /x) Chris@210: tokens << [match, :space] Chris@210: next Chris@210: elsif match = scan(/ [:,\[{\]}] /x) Chris@210: kind = :operator Chris@210: case match Chris@210: when '{' then stack << :object; key_expected = true Chris@210: when '[' then stack << :array Chris@210: when ':' then key_expected = false Chris@210: when ',' then key_expected = true if stack.last == :object Chris@210: when '}', ']' then stack.pop # no error recovery, but works for valid JSON Chris@210: end Chris@210: elsif match = scan(/ true | false | null /x) Chris@210: kind = :value Chris@210: elsif match = scan(/-?(?:0|[1-9]\d*)/) Chris@210: kind = :integer Chris@210: if scan(/\.\d+(?:[eE][-+]?\d+)?|[eE][-+]?\d+/) Chris@210: match << matched Chris@210: kind = :float Chris@210: end Chris@210: elsif match = scan(/"/) Chris@210: state = key_expected ? :key : :string Chris@210: tokens << [:open, state] Chris@210: kind = :delimiter Chris@210: else Chris@210: getch Chris@210: kind = :error Chris@210: end Chris@210: Chris@210: when :string, :key Chris@210: if scan(/[^\\"]+/) Chris@210: kind = :content Chris@210: elsif scan(/"/) Chris@210: tokens << ['"', :delimiter] Chris@210: tokens << [:close, state] Chris@210: state = :initial Chris@210: next Chris@210: elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) Chris@210: kind = :char Chris@210: elsif scan(/\\./m) Chris@210: kind = :content Chris@210: elsif scan(/ \\ | $ /x) Chris@210: tokens << [:close, state] Chris@210: kind = :error Chris@210: state = :initial Chris@210: else Chris@210: raise_inspect "else case \" reached; %p not handled." % peek(1), tokens Chris@210: end Chris@210: Chris@210: else Chris@210: raise_inspect 'Unknown state', tokens Chris@210: Chris@210: end Chris@210: Chris@210: match ||= matched Chris@210: if $CODERAY_DEBUG and not kind Chris@210: raise_inspect 'Error token %p in line %d' % Chris@210: [[match, kind], line], tokens Chris@210: end Chris@210: raise_inspect 'Empty token', tokens unless match Chris@210: Chris@210: tokens << [match, kind] Chris@210: Chris@210: end Chris@210: Chris@210: if [:string, :key].include? state Chris@210: tokens << [:close, state] Chris@210: end Chris@210: Chris@210: tokens Chris@210: end Chris@210: Chris@210: end Chris@210: Chris@210: end Chris@210: end