Chris@909: module CodeRay Chris@909: module Scanners Chris@909: Chris@909: # Scanner for JSON (JavaScript Object Notation). Chris@909: class JSON < Scanner Chris@909: Chris@909: register_for :json Chris@909: file_extension 'json' Chris@909: Chris@909: KINDS_NOT_LOC = [ Chris@909: :float, :char, :content, :delimiter, Chris@909: :error, :integer, :operator, :value, Chris@909: ] # :nodoc: Chris@909: Chris@909: ESCAPE = / [bfnrt\\"\/] /x # :nodoc: Chris@909: UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x # :nodoc: Chris@909: Chris@909: protected Chris@909: Chris@909: # See http://json.org/ for a definition of the JSON lexic/grammar. Chris@909: def scan_tokens encoder, options Chris@909: Chris@909: state = :initial Chris@909: stack = [] Chris@909: key_expected = false Chris@909: Chris@909: until eos? Chris@909: Chris@909: case state Chris@909: Chris@909: when :initial Chris@909: if match = scan(/ \s+ /x) Chris@909: encoder.text_token match, :space Chris@909: elsif match = scan(/"/) Chris@909: state = key_expected ? :key : :string Chris@909: encoder.begin_group state Chris@909: encoder.text_token match, :delimiter Chris@909: elsif match = scan(/ [:,\[{\]}] /x) Chris@909: encoder.text_token match, :operator Chris@909: case match Chris@909: when ':' then key_expected = false Chris@909: when ',' then key_expected = true if stack.last == :object Chris@909: when '{' then stack << :object; key_expected = true Chris@909: when '[' then stack << :array Chris@909: when '}', ']' then stack.pop # no error recovery, but works for valid JSON Chris@909: end Chris@909: elsif match = scan(/ true | false | null /x) Chris@909: encoder.text_token match, :value Chris@909: elsif match = scan(/ -? (?: 0 | [1-9]\d* ) /x) Chris@909: if scan(/ \.\d+ (?:[eE][-+]?\d+)? | [eE][-+]? \d+ /x) Chris@909: match << matched Chris@909: encoder.text_token match, :float Chris@909: else Chris@909: encoder.text_token match, :integer Chris@909: end Chris@909: else Chris@909: encoder.text_token getch, :error Chris@909: end Chris@909: Chris@909: when :string, :key Chris@909: if match = scan(/[^\\"]+/) Chris@909: encoder.text_token match, :content Chris@909: elsif match = scan(/"/) Chris@909: encoder.text_token match, :delimiter Chris@909: encoder.end_group state Chris@909: state = :initial Chris@909: elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) Chris@909: encoder.text_token match, :char Chris@909: elsif match = scan(/\\./m) Chris@909: encoder.text_token match, :content Chris@909: elsif match = scan(/ \\ | $ /x) Chris@909: encoder.end_group state Chris@909: encoder.text_token match, :error Chris@909: state = :initial Chris@909: else Chris@909: raise_inspect "else case \" reached; %p not handled." % peek(1), encoder Chris@909: end Chris@909: Chris@909: else Chris@909: raise_inspect 'Unknown state: %p' % [state], encoder Chris@909: Chris@909: end Chris@909: end Chris@909: Chris@909: if [:string, :key].include? state Chris@909: encoder.end_group state Chris@909: end Chris@909: Chris@909: encoder Chris@909: end Chris@909: Chris@909: end Chris@909: Chris@909: end Chris@909: end