annotate vendor/gems/coderay-0.9.7/lib/coderay/scanners/html.rb @ 855:7294e8db2515 bug_162

Close obsolete branch bug_162
author Chris Cannam
date Thu, 14 Jul 2011 11:59:19 +0100
parents 0579821a129a
children
rev   line source
Chris@210 1 module CodeRay
Chris@210 2 module Scanners
Chris@210 3
Chris@210 4 # HTML Scanner
Chris@210 5 class HTML < Scanner
Chris@210 6
Chris@210 7 include Streamable
Chris@210 8 register_for :html
Chris@210 9
Chris@210 10 KINDS_NOT_LOC = [
Chris@210 11 :comment, :doctype, :preprocessor,
Chris@210 12 :tag, :attribute_name, :operator,
Chris@210 13 :attribute_value, :delimiter, :content,
Chris@210 14 :plain, :entity, :error
Chris@210 15 ]
Chris@210 16
Chris@210 17 ATTR_NAME = /[\w.:-]+/
Chris@210 18 ATTR_VALUE_UNQUOTED = ATTR_NAME
Chris@210 19 TAG_END = /\/?>/
Chris@210 20 HEX = /[0-9a-fA-F]/
Chris@210 21 ENTITY = /
Chris@210 22 &
Chris@210 23 (?:
Chris@210 24 \w+
Chris@210 25 |
Chris@210 26 \#
Chris@210 27 (?:
Chris@210 28 \d+
Chris@210 29 |
Chris@210 30 x#{HEX}+
Chris@210 31 )
Chris@210 32 )
Chris@210 33 ;
Chris@210 34 /ox
Chris@210 35
Chris@210 36 PLAIN_STRING_CONTENT = {
Chris@210 37 "'" => /[^&'>\n]+/,
Chris@210 38 '"' => /[^&">\n]+/,
Chris@210 39 }
Chris@210 40
Chris@210 41 def reset
Chris@210 42 super
Chris@210 43 @state = :initial
Chris@210 44 end
Chris@210 45
Chris@210 46 private
Chris@210 47 def setup
Chris@210 48 @state = :initial
Chris@210 49 @plain_string_content = nil
Chris@210 50 end
Chris@210 51
Chris@210 52 def scan_tokens tokens, options
Chris@210 53
Chris@210 54 state = @state
Chris@210 55 plain_string_content = @plain_string_content
Chris@210 56
Chris@210 57 until eos?
Chris@210 58
Chris@210 59 kind = nil
Chris@210 60 match = nil
Chris@210 61
Chris@210 62 if scan(/\s+/m)
Chris@210 63 kind = :space
Chris@210 64
Chris@210 65 else
Chris@210 66
Chris@210 67 case state
Chris@210 68
Chris@210 69 when :initial
Chris@210 70 if scan(/<!--.*?-->/m)
Chris@210 71 kind = :comment
Chris@210 72 elsif scan(/<!DOCTYPE.*?>/m)
Chris@210 73 kind = :doctype
Chris@210 74 elsif scan(/<\?xml.*?\?>/m)
Chris@210 75 kind = :preprocessor
Chris@210 76 elsif scan(/<\?.*?\?>|<%.*?%>/m)
Chris@210 77 kind = :comment
Chris@210 78 elsif scan(/<\/[-\w.:]*>/m)
Chris@210 79 kind = :tag
Chris@210 80 elsif match = scan(/<[-\w.:]+>?/m)
Chris@210 81 kind = :tag
Chris@210 82 state = :attribute unless match[-1] == ?>
Chris@210 83 elsif scan(/[^<>&]+/)
Chris@210 84 kind = :plain
Chris@210 85 elsif scan(/#{ENTITY}/ox)
Chris@210 86 kind = :entity
Chris@210 87 elsif scan(/[<>&]/)
Chris@210 88 kind = :error
Chris@210 89 else
Chris@210 90 raise_inspect '[BUG] else-case reached with state %p' % [state], tokens
Chris@210 91 end
Chris@210 92
Chris@210 93 when :attribute
Chris@210 94 if scan(/#{TAG_END}/o)
Chris@210 95 kind = :tag
Chris@210 96 state = :initial
Chris@210 97 elsif scan(/#{ATTR_NAME}/o)
Chris@210 98 kind = :attribute_name
Chris@210 99 state = :attribute_equal
Chris@210 100 else
Chris@210 101 kind = :error
Chris@210 102 getch
Chris@210 103 end
Chris@210 104
Chris@210 105 when :attribute_equal
Chris@210 106 if scan(/=/)
Chris@210 107 kind = :operator
Chris@210 108 state = :attribute_value
Chris@210 109 elsif scan(/#{ATTR_NAME}/o)
Chris@210 110 kind = :attribute_name
Chris@210 111 elsif scan(/#{TAG_END}/o)
Chris@210 112 kind = :tag
Chris@210 113 state = :initial
Chris@210 114 elsif scan(/./)
Chris@210 115 kind = :error
Chris@210 116 state = :attribute
Chris@210 117 end
Chris@210 118
Chris@210 119 when :attribute_value
Chris@210 120 if scan(/#{ATTR_VALUE_UNQUOTED}/o)
Chris@210 121 kind = :attribute_value
Chris@210 122 state = :attribute
Chris@210 123 elsif match = scan(/["']/)
Chris@210 124 tokens << [:open, :string]
Chris@210 125 state = :attribute_value_string
Chris@210 126 plain_string_content = PLAIN_STRING_CONTENT[match]
Chris@210 127 kind = :delimiter
Chris@210 128 elsif scan(/#{TAG_END}/o)
Chris@210 129 kind = :tag
Chris@210 130 state = :initial
Chris@210 131 else
Chris@210 132 kind = :error
Chris@210 133 getch
Chris@210 134 end
Chris@210 135
Chris@210 136 when :attribute_value_string
Chris@210 137 if scan(plain_string_content)
Chris@210 138 kind = :content
Chris@210 139 elsif scan(/['"]/)
Chris@210 140 tokens << [matched, :delimiter]
Chris@210 141 tokens << [:close, :string]
Chris@210 142 state = :attribute
Chris@210 143 next
Chris@210 144 elsif scan(/#{ENTITY}/ox)
Chris@210 145 kind = :entity
Chris@210 146 elsif scan(/&/)
Chris@210 147 kind = :content
Chris@210 148 elsif scan(/[\n>]/)
Chris@210 149 tokens << [:close, :string]
Chris@210 150 kind = :error
Chris@210 151 state = :initial
Chris@210 152 end
Chris@210 153
Chris@210 154 else
Chris@210 155 raise_inspect 'Unknown state: %p' % [state], tokens
Chris@210 156
Chris@210 157 end
Chris@210 158
Chris@210 159 end
Chris@210 160
Chris@210 161 match ||= matched
Chris@210 162 if $CODERAY_DEBUG and not kind
Chris@210 163 raise_inspect 'Error token %p in line %d' %
Chris@210 164 [[match, kind], line], tokens, state
Chris@210 165 end
Chris@210 166 raise_inspect 'Empty token', tokens unless match
Chris@210 167
Chris@210 168 tokens << [match, kind]
Chris@210 169 end
Chris@210 170
Chris@210 171 if options[:keep_state]
Chris@210 172 @state = state
Chris@210 173 @plain_string_content = plain_string_content
Chris@210 174 end
Chris@210 175
Chris@210 176 tokens
Chris@210 177 end
Chris@210 178
Chris@210 179 end
Chris@210 180
Chris@210 181 end
Chris@210 182 end