comparison vendor/gems/coderay-1.0.0/lib/coderay/scanners/html.rb @ 909:cbb26bc654de redmine-1.3

Update to Redmine 1.3-stable branch (Redmine SVN rev 8964)
author Chris Cannam
date Fri, 24 Feb 2012 19:09:32 +0000
parents
children
comparison
equal deleted inserted replaced
908:c6c2cbd0afee 909:cbb26bc654de
1 module CodeRay
2 module Scanners
3
4 # HTML Scanner
5 #
6 # Alias: +xhtml+
7 #
8 # See also: Scanners::XML
9 class HTML < Scanner
10
11 register_for :html
12
13 KINDS_NOT_LOC = [
14 :comment, :doctype, :preprocessor,
15 :tag, :attribute_name, :operator,
16 :attribute_value, :string,
17 :plain, :entity, :error,
18 ] # :nodoc:
19
20 EVENT_ATTRIBUTES = %w(
21 onabort onafterprint onbeforeprint onbeforeunload onblur oncanplay
22 oncanplaythrough onchange onclick oncontextmenu oncuechange ondblclick
23 ondrag ondragdrop ondragend ondragenter ondragleave ondragover
24 ondragstart ondrop ondurationchange onemptied onended onerror onfocus
25 onformchange onforminput onhashchange oninput oninvalid onkeydown
26 onkeypress onkeyup onload onloadeddata onloadedmetadata onloadstart
27 onmessage onmousedown onmousemove onmouseout onmouseover onmouseup
28 onmousewheel onmove onoffline ononline onpagehide onpageshow onpause
29 onplay onplaying onpopstate onprogress onratechange onreadystatechange
30 onredo onreset onresize onscroll onseeked onseeking onselect onshow
31 onstalled onstorage onsubmit onsuspend ontimeupdate onundo onunload
32 onvolumechange onwaiting
33 )
34
35 IN_ATTRIBUTE = WordList::CaseIgnoring.new(nil).
36 add(EVENT_ATTRIBUTES, :script)
37
38 ATTR_NAME = /[\w.:-]+/ # :nodoc:
39 TAG_END = /\/?>/ # :nodoc:
40 HEX = /[0-9a-fA-F]/ # :nodoc:
41 ENTITY = /
42 &
43 (?:
44 \w+
45 |
46 \#
47 (?:
48 \d+
49 |
50 x#{HEX}+
51 )
52 )
53 ;
54 /ox # :nodoc:
55
56 PLAIN_STRING_CONTENT = {
57 "'" => /[^&'>\n]+/,
58 '"' => /[^&">\n]+/,
59 } # :nodoc:
60
61 def reset
62 super
63 @state = :initial
64 @plain_string_content = nil
65 end
66
67 protected
68
69 def setup
70 @state = :initial
71 @plain_string_content = nil
72 end
73
74 def scan_java_script encoder, code
75 if code && !code.empty?
76 @java_script_scanner ||= Scanners::JavaScript.new '', :keep_tokens => true
77 # encoder.begin_group :inline
78 @java_script_scanner.tokenize code, :tokens => encoder
79 # encoder.end_group :inline
80 end
81 end
82
83 def scan_tokens encoder, options
84 state = options[:state] || @state
85 plain_string_content = @plain_string_content
86 in_tag = in_attribute = nil
87
88 encoder.begin_group :string if state == :attribute_value_string
89
90 until eos?
91
92 if state != :in_special_tag && match = scan(/\s+/m)
93 encoder.text_token match, :space
94
95 else
96
97 case state
98
99 when :initial
100 if match = scan(/<!--(?:.*?-->|.*)/m)
101 encoder.text_token match, :comment
102 elsif match = scan(/<!DOCTYPE(?:.*?>|.*)/m)
103 encoder.text_token match, :doctype
104 elsif match = scan(/<\?xml(?:.*?\?>|.*)/m)
105 encoder.text_token match, :preprocessor
106 elsif match = scan(/<\?(?:.*?\?>|.*)/m)
107 encoder.text_token match, :comment
108 elsif match = scan(/<\/[-\w.:]*>?/m)
109 in_tag = nil
110 encoder.text_token match, :tag
111 elsif match = scan(/<(?:(script)|[-\w.:]+)(>)?/m)
112 encoder.text_token match, :tag
113 in_tag = self[1]
114 if self[2]
115 state = :in_special_tag if in_tag
116 else
117 state = :attribute
118 end
119 elsif match = scan(/[^<>&]+/)
120 encoder.text_token match, :plain
121 elsif match = scan(/#{ENTITY}/ox)
122 encoder.text_token match, :entity
123 elsif match = scan(/[<>&]/)
124 in_tag = nil
125 encoder.text_token match, :error
126 else
127 raise_inspect '[BUG] else-case reached with state %p' % [state], encoder
128 end
129
130 when :attribute
131 if match = scan(/#{TAG_END}/o)
132 encoder.text_token match, :tag
133 in_attribute = nil
134 if in_tag
135 state = :in_special_tag
136 else
137 state = :initial
138 end
139 elsif match = scan(/#{ATTR_NAME}/o)
140 in_attribute = IN_ATTRIBUTE[match]
141 encoder.text_token match, :attribute_name
142 state = :attribute_equal
143 else
144 in_tag = nil
145 encoder.text_token getch, :error
146 end
147
148 when :attribute_equal
149 if match = scan(/=/) #/
150 encoder.text_token match, :operator
151 state = :attribute_value
152 elsif scan(/#{ATTR_NAME}/o) || scan(/#{TAG_END}/o)
153 state = :attribute
154 next
155 else
156 encoder.text_token getch, :error
157 state = :attribute
158 end
159
160 when :attribute_value
161 if match = scan(/#{ATTR_NAME}/o)
162 encoder.text_token match, :attribute_value
163 state = :attribute
164 elsif match = scan(/["']/)
165 if in_attribute == :script
166 encoder.begin_group :inline
167 encoder.text_token match, :inline_delimiter
168 if scan(/javascript:[ \t]*/)
169 encoder.text_token matched, :comment
170 end
171 code = scan_until(match == '"' ? /(?="|\z)/ : /(?='|\z)/)
172 scan_java_script encoder, code
173 match = scan(/["']/)
174 encoder.text_token match, :inline_delimiter if match
175 encoder.end_group :inline
176 state = :attribute
177 in_attribute = nil
178 else
179 encoder.begin_group :string
180 state = :attribute_value_string
181 plain_string_content = PLAIN_STRING_CONTENT[match]
182 encoder.text_token match, :delimiter
183 end
184 elsif match = scan(/#{TAG_END}/o)
185 encoder.text_token match, :tag
186 state = :initial
187 else
188 encoder.text_token getch, :error
189 end
190
191 when :attribute_value_string
192 if match = scan(plain_string_content)
193 encoder.text_token match, :content
194 elsif match = scan(/['"]/)
195 encoder.text_token match, :delimiter
196 encoder.end_group :string
197 state = :attribute
198 elsif match = scan(/#{ENTITY}/ox)
199 encoder.text_token match, :entity
200 elsif match = scan(/&/)
201 encoder.text_token match, :content
202 elsif match = scan(/[\n>]/)
203 encoder.end_group :string
204 state = :initial
205 encoder.text_token match, :error
206 end
207
208 when :in_special_tag
209 case in_tag
210 when 'script'
211 encoder.text_token match, :space if match = scan(/[ \t]*\n/)
212 if scan(/(\s*<!--)(?:(.*?)(-->)|(.*))/m)
213 code = self[2] || self[4]
214 closing = self[3]
215 encoder.text_token self[1], :comment
216 else
217 code = scan_until(/(?=(?:\n\s*)?<\/script>)|\z/)
218 closing = false
219 end
220 unless code.empty?
221 encoder.begin_group :inline
222 scan_java_script encoder, code
223 encoder.end_group :inline
224 end
225 encoder.text_token closing, :comment if closing
226 state = :initial
227 else
228 raise 'unknown special tag: %p' % [in_tag]
229 end
230
231 else
232 raise_inspect 'Unknown state: %p' % [state], encoder
233
234 end
235
236 end
237
238 end
239
240 if options[:keep_state]
241 @state = state
242 @plain_string_content = plain_string_content
243 end
244
245 encoder.end_group :string if state == :attribute_value_string
246
247 encoder
248 end
249
250 end
251
252 end
253 end