annotate vendor/plugins/coderay-0.9.2/lib/coderay/scanners/html.rb @ 882:9e27d0747c77 bug_97

Close obsolete branch bug_97
author Chris Cannam
date Fri, 25 Mar 2011 13:58:03 +0000
parents 513646585e45
children
rev   line source
Chris@0 1 module CodeRay
Chris@0 2 module Scanners
Chris@0 3
Chris@0 4 # HTML Scanner
Chris@0 5 class HTML < Scanner
Chris@0 6
Chris@0 7 include Streamable
Chris@0 8 register_for :html
Chris@0 9
Chris@0 10 KINDS_NOT_LOC = [
Chris@0 11 :comment, :doctype, :preprocessor,
Chris@0 12 :tag, :attribute_name, :operator,
Chris@0 13 :attribute_value, :delimiter, :content,
Chris@0 14 :plain, :entity, :error
Chris@0 15 ]
Chris@0 16
Chris@0 17 ATTR_NAME = /[\w.:-]+/
Chris@0 18 ATTR_VALUE_UNQUOTED = ATTR_NAME
Chris@0 19 TAG_END = /\/?>/
Chris@0 20 HEX = /[0-9a-fA-F]/
Chris@0 21 ENTITY = /
Chris@0 22 &
Chris@0 23 (?:
Chris@0 24 \w+
Chris@0 25 |
Chris@0 26 \#
Chris@0 27 (?:
Chris@0 28 \d+
Chris@0 29 |
Chris@0 30 x#{HEX}+
Chris@0 31 )
Chris@0 32 )
Chris@0 33 ;
Chris@0 34 /ox
Chris@0 35
Chris@0 36 PLAIN_STRING_CONTENT = {
Chris@0 37 "'" => /[^&'>\n]+/,
Chris@0 38 '"' => /[^&">\n]+/,
Chris@0 39 }
Chris@0 40
Chris@0 41 def reset
Chris@0 42 super
Chris@0 43 @state = :initial
Chris@0 44 end
Chris@0 45
Chris@0 46 private
Chris@0 47 def setup
Chris@0 48 @state = :initial
Chris@0 49 @plain_string_content = nil
Chris@0 50 end
Chris@0 51
Chris@0 52 def scan_tokens tokens, options
Chris@0 53
Chris@0 54 state = @state
Chris@0 55 plain_string_content = @plain_string_content
Chris@0 56
Chris@0 57 until eos?
Chris@0 58
Chris@0 59 kind = nil
Chris@0 60 match = nil
Chris@0 61
Chris@0 62 if scan(/\s+/m)
Chris@0 63 kind = :space
Chris@0 64
Chris@0 65 else
Chris@0 66
Chris@0 67 case state
Chris@0 68
Chris@0 69 when :initial
Chris@0 70 if scan(/<!--.*?-->/m)
Chris@0 71 kind = :comment
Chris@0 72 elsif scan(/<!DOCTYPE.*?>/m)
Chris@0 73 kind = :doctype
Chris@0 74 elsif scan(/<\?xml.*?\?>/m)
Chris@0 75 kind = :preprocessor
Chris@0 76 elsif scan(/<\?.*?\?>|<%.*?%>/m)
Chris@0 77 kind = :comment
Chris@0 78 elsif scan(/<\/[-\w.:]*>/m)
Chris@0 79 kind = :tag
Chris@0 80 elsif match = scan(/<[-\w.:]+>?/m)
Chris@0 81 kind = :tag
Chris@0 82 state = :attribute unless match[-1] == ?>
Chris@0 83 elsif scan(/[^<>&]+/)
Chris@0 84 kind = :plain
Chris@0 85 elsif scan(/#{ENTITY}/ox)
Chris@0 86 kind = :entity
Chris@0 87 elsif scan(/[<>&]/)
Chris@0 88 kind = :error
Chris@0 89 else
Chris@0 90 raise_inspect '[BUG] else-case reached with state %p' % [state], tokens
Chris@0 91 end
Chris@0 92
Chris@0 93 when :attribute
Chris@0 94 if scan(/#{TAG_END}/)
Chris@0 95 kind = :tag
Chris@0 96 state = :initial
Chris@0 97 elsif scan(/#{ATTR_NAME}/o)
Chris@0 98 kind = :attribute_name
Chris@0 99 state = :attribute_equal
Chris@0 100 else
Chris@0 101 kind = :error
Chris@0 102 getch
Chris@0 103 end
Chris@0 104
Chris@0 105 when :attribute_equal
Chris@0 106 if scan(/=/)
Chris@0 107 kind = :operator
Chris@0 108 state = :attribute_value
Chris@0 109 elsif scan(/#{ATTR_NAME}/o)
Chris@0 110 kind = :attribute_name
Chris@0 111 elsif scan(/#{TAG_END}/o)
Chris@0 112 kind = :tag
Chris@0 113 state = :initial
Chris@0 114 elsif scan(/./)
Chris@0 115 kind = :error
Chris@0 116 state = :attribute
Chris@0 117 end
Chris@0 118
Chris@0 119 when :attribute_value
Chris@0 120 if scan(/#{ATTR_VALUE_UNQUOTED}/o)
Chris@0 121 kind = :attribute_value
Chris@0 122 state = :attribute
Chris@0 123 elsif match = scan(/["']/)
Chris@0 124 tokens << [:open, :string]
Chris@0 125 state = :attribute_value_string
Chris@0 126 plain_string_content = PLAIN_STRING_CONTENT[match]
Chris@0 127 kind = :delimiter
Chris@0 128 elsif scan(/#{TAG_END}/o)
Chris@0 129 kind = :tag
Chris@0 130 state = :initial
Chris@0 131 else
Chris@0 132 kind = :error
Chris@0 133 getch
Chris@0 134 end
Chris@0 135
Chris@0 136 when :attribute_value_string
Chris@0 137 if scan(plain_string_content)
Chris@0 138 kind = :content
Chris@0 139 elsif scan(/['"]/)
Chris@0 140 tokens << [matched, :delimiter]
Chris@0 141 tokens << [:close, :string]
Chris@0 142 state = :attribute
Chris@0 143 next
Chris@0 144 elsif scan(/#{ENTITY}/ox)
Chris@0 145 kind = :entity
Chris@0 146 elsif scan(/&/)
Chris@0 147 kind = :content
Chris@0 148 elsif scan(/[\n>]/)
Chris@0 149 tokens << [:close, :string]
Chris@0 150 kind = :error
Chris@0 151 state = :initial
Chris@0 152 end
Chris@0 153
Chris@0 154 else
Chris@0 155 raise_inspect 'Unknown state: %p' % [state], tokens
Chris@0 156
Chris@0 157 end
Chris@0 158
Chris@0 159 end
Chris@0 160
Chris@0 161 match ||= matched
Chris@0 162 if $CODERAY_DEBUG and not kind
Chris@0 163 raise_inspect 'Error token %p in line %d' %
Chris@0 164 [[match, kind], line], tokens, state
Chris@0 165 end
Chris@0 166 raise_inspect 'Empty token', tokens unless match
Chris@0 167
Chris@0 168 tokens << [match, kind]
Chris@0 169 end
Chris@0 170
Chris@0 171 if options[:keep_state]
Chris@0 172 @state = state
Chris@0 173 @plain_string_content = plain_string_content
Chris@0 174 end
Chris@0 175
Chris@0 176 tokens
Chris@0 177 end
Chris@0 178
Chris@0 179 end
Chris@0 180
Chris@0 181 end
Chris@0 182 end