Chris@909: module CodeRay Chris@909: module Scanners Chris@909: Chris@909: # Scanner for YAML. Chris@909: # Chris@909: # Based on the YAML scanner from Syntax by Jamis Buck. Chris@909: class YAML < Scanner Chris@909: Chris@909: register_for :yaml Chris@909: file_extension 'yml' Chris@909: Chris@909: KINDS_NOT_LOC = :all Chris@909: Chris@909: protected Chris@909: Chris@909: def scan_tokens encoder, options Chris@909: Chris@909: state = :initial Chris@909: key_indent = string_indent = 0 Chris@909: Chris@909: until eos? Chris@909: Chris@909: key_indent = nil if bol? Chris@909: Chris@909: if match = scan(/ +[\t ]*/) Chris@909: encoder.text_token match, :space Chris@909: Chris@909: elsif match = scan(/\n+/) Chris@909: encoder.text_token match, :space Chris@909: state = :initial if match.index(?\n) Chris@909: Chris@909: elsif match = scan(/#.*/) Chris@909: encoder.text_token match, :comment Chris@909: Chris@909: elsif bol? and case Chris@909: when match = scan(/---|\.\.\./) Chris@909: encoder.begin_group :head Chris@909: encoder.text_token match, :head Chris@909: encoder.end_group :head Chris@909: next Chris@909: when match = scan(/%.*/) Chris@909: encoder.text_token match, :doctype Chris@909: next Chris@909: end Chris@909: Chris@909: elsif state == :value and case Chris@909: when !check(/(?:"[^"]*")(?=: |:$)/) && match = scan(/"/) Chris@909: encoder.begin_group :string Chris@909: encoder.text_token match, :delimiter Chris@909: encoder.text_token match, :content if match = scan(/ [^"\\]* (?: \\. [^"\\]* )* /mx) Chris@909: encoder.text_token match, :delimiter if match = scan(/"/) Chris@909: encoder.end_group :string Chris@909: next Chris@909: when match = scan(/[|>][-+]?/) Chris@909: encoder.begin_group :string Chris@909: encoder.text_token match, :delimiter Chris@909: string_indent = key_indent || column(pos - match.size) - 1 Chris@909: encoder.text_token matched, :content if scan(/(?:\n+ {#{string_indent + 1}}.*)+/) Chris@909: encoder.end_group :string Chris@909: next Chris@909: when match = scan(/(?![!"*&]).+?(?=$|\s+#)/) Chris@909: encoder.begin_group :string Chris@909: encoder.text_token match, :content Chris@909: string_indent = key_indent || column(pos - match.size) - 1 Chris@909: encoder.text_token matched, :content if scan(/(?:\n+ {#{string_indent + 1}}.*)+/) Chris@909: encoder.end_group :string Chris@909: next Chris@909: end Chris@909: Chris@909: elsif case Chris@909: when match = scan(/[-:](?= |$)/) Chris@909: state = :value if state == :colon && (match == ':' || match == '-') Chris@909: state = :value if state == :initial && match == '-' Chris@909: encoder.text_token match, :operator Chris@909: next Chris@909: when match = scan(/[,{}\[\]]/) Chris@909: encoder.text_token match, :operator Chris@909: next Chris@909: when state == :initial && match = scan(/[\w.() ]*\S(?= *:(?: |$))/) Chris@909: encoder.text_token match, :key Chris@909: key_indent = column(pos - match.size) - 1 Chris@909: state = :colon Chris@909: next Chris@909: when match = scan(/(?:"[^"\n]*"|'[^'\n]*')(?= *:(?: |$))/) Chris@909: encoder.begin_group :key Chris@909: encoder.text_token match[0,1], :delimiter Chris@909: encoder.text_token match[1..-2], :content Chris@909: encoder.text_token match[-1,1], :delimiter Chris@909: encoder.end_group :key Chris@909: key_indent = column(pos - match.size) - 1 Chris@909: state = :colon Chris@909: next Chris@909: when match = scan(/(![\w\/]+)(:([\w:]+))?/) Chris@909: encoder.text_token self[1], :type Chris@909: if self[2] Chris@909: encoder.text_token ':', :operator Chris@909: encoder.text_token self[3], :class Chris@909: end Chris@909: next Chris@909: when match = scan(/&\S+/) Chris@909: encoder.text_token match, :variable Chris@909: next Chris@909: when match = scan(/\*\w+/) Chris@909: encoder.text_token match, :global_variable Chris@909: next Chris@909: when match = scan(/<