annotate vendor/gems/coderay-1.0.0/lib/coderay/scanners/html.rb @ 1171:b4558bc5837f bug_505

Close obsolete branch bug_505
author Chris Cannam
date Fri, 03 Aug 2012 19:40:23 +0100
parents cbb26bc654de
children
rev   line source
Chris@909 1 module CodeRay
Chris@909 2 module Scanners
Chris@909 3
Chris@909 4 # HTML Scanner
Chris@909 5 #
Chris@909 6 # Alias: +xhtml+
Chris@909 7 #
Chris@909 8 # See also: Scanners::XML
Chris@909 9 class HTML < Scanner
Chris@909 10
Chris@909 11 register_for :html
Chris@909 12
Chris@909 13 KINDS_NOT_LOC = [
Chris@909 14 :comment, :doctype, :preprocessor,
Chris@909 15 :tag, :attribute_name, :operator,
Chris@909 16 :attribute_value, :string,
Chris@909 17 :plain, :entity, :error,
Chris@909 18 ] # :nodoc:
Chris@909 19
Chris@909 20 EVENT_ATTRIBUTES = %w(
Chris@909 21 onabort onafterprint onbeforeprint onbeforeunload onblur oncanplay
Chris@909 22 oncanplaythrough onchange onclick oncontextmenu oncuechange ondblclick
Chris@909 23 ondrag ondragdrop ondragend ondragenter ondragleave ondragover
Chris@909 24 ondragstart ondrop ondurationchange onemptied onended onerror onfocus
Chris@909 25 onformchange onforminput onhashchange oninput oninvalid onkeydown
Chris@909 26 onkeypress onkeyup onload onloadeddata onloadedmetadata onloadstart
Chris@909 27 onmessage onmousedown onmousemove onmouseout onmouseover onmouseup
Chris@909 28 onmousewheel onmove onoffline ononline onpagehide onpageshow onpause
Chris@909 29 onplay onplaying onpopstate onprogress onratechange onreadystatechange
Chris@909 30 onredo onreset onresize onscroll onseeked onseeking onselect onshow
Chris@909 31 onstalled onstorage onsubmit onsuspend ontimeupdate onundo onunload
Chris@909 32 onvolumechange onwaiting
Chris@909 33 )
Chris@909 34
Chris@909 35 IN_ATTRIBUTE = WordList::CaseIgnoring.new(nil).
Chris@909 36 add(EVENT_ATTRIBUTES, :script)
Chris@909 37
Chris@909 38 ATTR_NAME = /[\w.:-]+/ # :nodoc:
Chris@909 39 TAG_END = /\/?>/ # :nodoc:
Chris@909 40 HEX = /[0-9a-fA-F]/ # :nodoc:
Chris@909 41 ENTITY = /
Chris@909 42 &
Chris@909 43 (?:
Chris@909 44 \w+
Chris@909 45 |
Chris@909 46 \#
Chris@909 47 (?:
Chris@909 48 \d+
Chris@909 49 |
Chris@909 50 x#{HEX}+
Chris@909 51 )
Chris@909 52 )
Chris@909 53 ;
Chris@909 54 /ox # :nodoc:
Chris@909 55
Chris@909 56 PLAIN_STRING_CONTENT = {
Chris@909 57 "'" => /[^&'>\n]+/,
Chris@909 58 '"' => /[^&">\n]+/,
Chris@909 59 } # :nodoc:
Chris@909 60
Chris@909 61 def reset
Chris@909 62 super
Chris@909 63 @state = :initial
Chris@909 64 @plain_string_content = nil
Chris@909 65 end
Chris@909 66
Chris@909 67 protected
Chris@909 68
Chris@909 69 def setup
Chris@909 70 @state = :initial
Chris@909 71 @plain_string_content = nil
Chris@909 72 end
Chris@909 73
Chris@909 74 def scan_java_script encoder, code
Chris@909 75 if code && !code.empty?
Chris@909 76 @java_script_scanner ||= Scanners::JavaScript.new '', :keep_tokens => true
Chris@909 77 # encoder.begin_group :inline
Chris@909 78 @java_script_scanner.tokenize code, :tokens => encoder
Chris@909 79 # encoder.end_group :inline
Chris@909 80 end
Chris@909 81 end
Chris@909 82
Chris@909 83 def scan_tokens encoder, options
Chris@909 84 state = options[:state] || @state
Chris@909 85 plain_string_content = @plain_string_content
Chris@909 86 in_tag = in_attribute = nil
Chris@909 87
Chris@909 88 encoder.begin_group :string if state == :attribute_value_string
Chris@909 89
Chris@909 90 until eos?
Chris@909 91
Chris@909 92 if state != :in_special_tag && match = scan(/\s+/m)
Chris@909 93 encoder.text_token match, :space
Chris@909 94
Chris@909 95 else
Chris@909 96
Chris@909 97 case state
Chris@909 98
Chris@909 99 when :initial
Chris@909 100 if match = scan(/<!--(?:.*?-->|.*)/m)
Chris@909 101 encoder.text_token match, :comment
Chris@909 102 elsif match = scan(/<!DOCTYPE(?:.*?>|.*)/m)
Chris@909 103 encoder.text_token match, :doctype
Chris@909 104 elsif match = scan(/<\?xml(?:.*?\?>|.*)/m)
Chris@909 105 encoder.text_token match, :preprocessor
Chris@909 106 elsif match = scan(/<\?(?:.*?\?>|.*)/m)
Chris@909 107 encoder.text_token match, :comment
Chris@909 108 elsif match = scan(/<\/[-\w.:]*>?/m)
Chris@909 109 in_tag = nil
Chris@909 110 encoder.text_token match, :tag
Chris@909 111 elsif match = scan(/<(?:(script)|[-\w.:]+)(>)?/m)
Chris@909 112 encoder.text_token match, :tag
Chris@909 113 in_tag = self[1]
Chris@909 114 if self[2]
Chris@909 115 state = :in_special_tag if in_tag
Chris@909 116 else
Chris@909 117 state = :attribute
Chris@909 118 end
Chris@909 119 elsif match = scan(/[^<>&]+/)
Chris@909 120 encoder.text_token match, :plain
Chris@909 121 elsif match = scan(/#{ENTITY}/ox)
Chris@909 122 encoder.text_token match, :entity
Chris@909 123 elsif match = scan(/[<>&]/)
Chris@909 124 in_tag = nil
Chris@909 125 encoder.text_token match, :error
Chris@909 126 else
Chris@909 127 raise_inspect '[BUG] else-case reached with state %p' % [state], encoder
Chris@909 128 end
Chris@909 129
Chris@909 130 when :attribute
Chris@909 131 if match = scan(/#{TAG_END}/o)
Chris@909 132 encoder.text_token match, :tag
Chris@909 133 in_attribute = nil
Chris@909 134 if in_tag
Chris@909 135 state = :in_special_tag
Chris@909 136 else
Chris@909 137 state = :initial
Chris@909 138 end
Chris@909 139 elsif match = scan(/#{ATTR_NAME}/o)
Chris@909 140 in_attribute = IN_ATTRIBUTE[match]
Chris@909 141 encoder.text_token match, :attribute_name
Chris@909 142 state = :attribute_equal
Chris@909 143 else
Chris@909 144 in_tag = nil
Chris@909 145 encoder.text_token getch, :error
Chris@909 146 end
Chris@909 147
Chris@909 148 when :attribute_equal
Chris@909 149 if match = scan(/=/) #/
Chris@909 150 encoder.text_token match, :operator
Chris@909 151 state = :attribute_value
Chris@909 152 elsif scan(/#{ATTR_NAME}/o) || scan(/#{TAG_END}/o)
Chris@909 153 state = :attribute
Chris@909 154 next
Chris@909 155 else
Chris@909 156 encoder.text_token getch, :error
Chris@909 157 state = :attribute
Chris@909 158 end
Chris@909 159
Chris@909 160 when :attribute_value
Chris@909 161 if match = scan(/#{ATTR_NAME}/o)
Chris@909 162 encoder.text_token match, :attribute_value
Chris@909 163 state = :attribute
Chris@909 164 elsif match = scan(/["']/)
Chris@909 165 if in_attribute == :script
Chris@909 166 encoder.begin_group :inline
Chris@909 167 encoder.text_token match, :inline_delimiter
Chris@909 168 if scan(/javascript:[ \t]*/)
Chris@909 169 encoder.text_token matched, :comment
Chris@909 170 end
Chris@909 171 code = scan_until(match == '"' ? /(?="|\z)/ : /(?='|\z)/)
Chris@909 172 scan_java_script encoder, code
Chris@909 173 match = scan(/["']/)
Chris@909 174 encoder.text_token match, :inline_delimiter if match
Chris@909 175 encoder.end_group :inline
Chris@909 176 state = :attribute
Chris@909 177 in_attribute = nil
Chris@909 178 else
Chris@909 179 encoder.begin_group :string
Chris@909 180 state = :attribute_value_string
Chris@909 181 plain_string_content = PLAIN_STRING_CONTENT[match]
Chris@909 182 encoder.text_token match, :delimiter
Chris@909 183 end
Chris@909 184 elsif match = scan(/#{TAG_END}/o)
Chris@909 185 encoder.text_token match, :tag
Chris@909 186 state = :initial
Chris@909 187 else
Chris@909 188 encoder.text_token getch, :error
Chris@909 189 end
Chris@909 190
Chris@909 191 when :attribute_value_string
Chris@909 192 if match = scan(plain_string_content)
Chris@909 193 encoder.text_token match, :content
Chris@909 194 elsif match = scan(/['"]/)
Chris@909 195 encoder.text_token match, :delimiter
Chris@909 196 encoder.end_group :string
Chris@909 197 state = :attribute
Chris@909 198 elsif match = scan(/#{ENTITY}/ox)
Chris@909 199 encoder.text_token match, :entity
Chris@909 200 elsif match = scan(/&/)
Chris@909 201 encoder.text_token match, :content
Chris@909 202 elsif match = scan(/[\n>]/)
Chris@909 203 encoder.end_group :string
Chris@909 204 state = :initial
Chris@909 205 encoder.text_token match, :error
Chris@909 206 end
Chris@909 207
Chris@909 208 when :in_special_tag
Chris@909 209 case in_tag
Chris@909 210 when 'script'
Chris@909 211 encoder.text_token match, :space if match = scan(/[ \t]*\n/)
Chris@909 212 if scan(/(\s*<!--)(?:(.*?)(-->)|(.*))/m)
Chris@909 213 code = self[2] || self[4]
Chris@909 214 closing = self[3]
Chris@909 215 encoder.text_token self[1], :comment
Chris@909 216 else
Chris@909 217 code = scan_until(/(?=(?:\n\s*)?<\/script>)|\z/)
Chris@909 218 closing = false
Chris@909 219 end
Chris@909 220 unless code.empty?
Chris@909 221 encoder.begin_group :inline
Chris@909 222 scan_java_script encoder, code
Chris@909 223 encoder.end_group :inline
Chris@909 224 end
Chris@909 225 encoder.text_token closing, :comment if closing
Chris@909 226 state = :initial
Chris@909 227 else
Chris@909 228 raise 'unknown special tag: %p' % [in_tag]
Chris@909 229 end
Chris@909 230
Chris@909 231 else
Chris@909 232 raise_inspect 'Unknown state: %p' % [state], encoder
Chris@909 233
Chris@909 234 end
Chris@909 235
Chris@909 236 end
Chris@909 237
Chris@909 238 end
Chris@909 239
Chris@909 240 if options[:keep_state]
Chris@909 241 @state = state
Chris@909 242 @plain_string_content = plain_string_content
Chris@909 243 end
Chris@909 244
Chris@909 245 encoder.end_group :string if state == :attribute_value_string
Chris@909 246
Chris@909 247 encoder
Chris@909 248 end
Chris@909 249
Chris@909 250 end
Chris@909 251
Chris@909 252 end
Chris@909 253 end