Mercurial > hg > soundsoftware-site
comparison vendor/gems/coderay-1.0.0/lib/coderay/scanners/html.rb @ 909:cbb26bc654de redmine-1.3
Update to Redmine 1.3-stable branch (Redmine SVN rev 8964)
author | Chris Cannam |
---|---|
date | Fri, 24 Feb 2012 19:09:32 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
908:c6c2cbd0afee | 909:cbb26bc654de |
---|---|
1 module CodeRay | |
2 module Scanners | |
3 | |
4 # HTML Scanner | |
5 # | |
6 # Alias: +xhtml+ | |
7 # | |
8 # See also: Scanners::XML | |
9 class HTML < Scanner | |
10 | |
11 register_for :html | |
12 | |
13 KINDS_NOT_LOC = [ | |
14 :comment, :doctype, :preprocessor, | |
15 :tag, :attribute_name, :operator, | |
16 :attribute_value, :string, | |
17 :plain, :entity, :error, | |
18 ] # :nodoc: | |
19 | |
20 EVENT_ATTRIBUTES = %w( | |
21 onabort onafterprint onbeforeprint onbeforeunload onblur oncanplay | |
22 oncanplaythrough onchange onclick oncontextmenu oncuechange ondblclick | |
23 ondrag ondragdrop ondragend ondragenter ondragleave ondragover | |
24 ondragstart ondrop ondurationchange onemptied onended onerror onfocus | |
25 onformchange onforminput onhashchange oninput oninvalid onkeydown | |
26 onkeypress onkeyup onload onloadeddata onloadedmetadata onloadstart | |
27 onmessage onmousedown onmousemove onmouseout onmouseover onmouseup | |
28 onmousewheel onmove onoffline ononline onpagehide onpageshow onpause | |
29 onplay onplaying onpopstate onprogress onratechange onreadystatechange | |
30 onredo onreset onresize onscroll onseeked onseeking onselect onshow | |
31 onstalled onstorage onsubmit onsuspend ontimeupdate onundo onunload | |
32 onvolumechange onwaiting | |
33 ) | |
34 | |
35 IN_ATTRIBUTE = WordList::CaseIgnoring.new(nil). | |
36 add(EVENT_ATTRIBUTES, :script) | |
37 | |
38 ATTR_NAME = /[\w.:-]+/ # :nodoc: | |
39 TAG_END = /\/?>/ # :nodoc: | |
40 HEX = /[0-9a-fA-F]/ # :nodoc: | |
41 ENTITY = / | |
42 & | |
43 (?: | |
44 \w+ | |
45 | | |
46 \# | |
47 (?: | |
48 \d+ | |
49 | | |
50 x#{HEX}+ | |
51 ) | |
52 ) | |
53 ; | |
54 /ox # :nodoc: | |
55 | |
56 PLAIN_STRING_CONTENT = { | |
57 "'" => /[^&'>\n]+/, | |
58 '"' => /[^&">\n]+/, | |
59 } # :nodoc: | |
60 | |
61 def reset | |
62 super | |
63 @state = :initial | |
64 @plain_string_content = nil | |
65 end | |
66 | |
67 protected | |
68 | |
69 def setup | |
70 @state = :initial | |
71 @plain_string_content = nil | |
72 end | |
73 | |
74 def scan_java_script encoder, code | |
75 if code && !code.empty? | |
76 @java_script_scanner ||= Scanners::JavaScript.new '', :keep_tokens => true | |
77 # encoder.begin_group :inline | |
78 @java_script_scanner.tokenize code, :tokens => encoder | |
79 # encoder.end_group :inline | |
80 end | |
81 end | |
82 | |
83 def scan_tokens encoder, options | |
84 state = options[:state] || @state | |
85 plain_string_content = @plain_string_content | |
86 in_tag = in_attribute = nil | |
87 | |
88 encoder.begin_group :string if state == :attribute_value_string | |
89 | |
90 until eos? | |
91 | |
92 if state != :in_special_tag && match = scan(/\s+/m) | |
93 encoder.text_token match, :space | |
94 | |
95 else | |
96 | |
97 case state | |
98 | |
99 when :initial | |
100 if match = scan(/<!--(?:.*?-->|.*)/m) | |
101 encoder.text_token match, :comment | |
102 elsif match = scan(/<!DOCTYPE(?:.*?>|.*)/m) | |
103 encoder.text_token match, :doctype | |
104 elsif match = scan(/<\?xml(?:.*?\?>|.*)/m) | |
105 encoder.text_token match, :preprocessor | |
106 elsif match = scan(/<\?(?:.*?\?>|.*)/m) | |
107 encoder.text_token match, :comment | |
108 elsif match = scan(/<\/[-\w.:]*>?/m) | |
109 in_tag = nil | |
110 encoder.text_token match, :tag | |
111 elsif match = scan(/<(?:(script)|[-\w.:]+)(>)?/m) | |
112 encoder.text_token match, :tag | |
113 in_tag = self[1] | |
114 if self[2] | |
115 state = :in_special_tag if in_tag | |
116 else | |
117 state = :attribute | |
118 end | |
119 elsif match = scan(/[^<>&]+/) | |
120 encoder.text_token match, :plain | |
121 elsif match = scan(/#{ENTITY}/ox) | |
122 encoder.text_token match, :entity | |
123 elsif match = scan(/[<>&]/) | |
124 in_tag = nil | |
125 encoder.text_token match, :error | |
126 else | |
127 raise_inspect '[BUG] else-case reached with state %p' % [state], encoder | |
128 end | |
129 | |
130 when :attribute | |
131 if match = scan(/#{TAG_END}/o) | |
132 encoder.text_token match, :tag | |
133 in_attribute = nil | |
134 if in_tag | |
135 state = :in_special_tag | |
136 else | |
137 state = :initial | |
138 end | |
139 elsif match = scan(/#{ATTR_NAME}/o) | |
140 in_attribute = IN_ATTRIBUTE[match] | |
141 encoder.text_token match, :attribute_name | |
142 state = :attribute_equal | |
143 else | |
144 in_tag = nil | |
145 encoder.text_token getch, :error | |
146 end | |
147 | |
148 when :attribute_equal | |
149 if match = scan(/=/) #/ | |
150 encoder.text_token match, :operator | |
151 state = :attribute_value | |
152 elsif scan(/#{ATTR_NAME}/o) || scan(/#{TAG_END}/o) | |
153 state = :attribute | |
154 next | |
155 else | |
156 encoder.text_token getch, :error | |
157 state = :attribute | |
158 end | |
159 | |
160 when :attribute_value | |
161 if match = scan(/#{ATTR_NAME}/o) | |
162 encoder.text_token match, :attribute_value | |
163 state = :attribute | |
164 elsif match = scan(/["']/) | |
165 if in_attribute == :script | |
166 encoder.begin_group :inline | |
167 encoder.text_token match, :inline_delimiter | |
168 if scan(/javascript:[ \t]*/) | |
169 encoder.text_token matched, :comment | |
170 end | |
171 code = scan_until(match == '"' ? /(?="|\z)/ : /(?='|\z)/) | |
172 scan_java_script encoder, code | |
173 match = scan(/["']/) | |
174 encoder.text_token match, :inline_delimiter if match | |
175 encoder.end_group :inline | |
176 state = :attribute | |
177 in_attribute = nil | |
178 else | |
179 encoder.begin_group :string | |
180 state = :attribute_value_string | |
181 plain_string_content = PLAIN_STRING_CONTENT[match] | |
182 encoder.text_token match, :delimiter | |
183 end | |
184 elsif match = scan(/#{TAG_END}/o) | |
185 encoder.text_token match, :tag | |
186 state = :initial | |
187 else | |
188 encoder.text_token getch, :error | |
189 end | |
190 | |
191 when :attribute_value_string | |
192 if match = scan(plain_string_content) | |
193 encoder.text_token match, :content | |
194 elsif match = scan(/['"]/) | |
195 encoder.text_token match, :delimiter | |
196 encoder.end_group :string | |
197 state = :attribute | |
198 elsif match = scan(/#{ENTITY}/ox) | |
199 encoder.text_token match, :entity | |
200 elsif match = scan(/&/) | |
201 encoder.text_token match, :content | |
202 elsif match = scan(/[\n>]/) | |
203 encoder.end_group :string | |
204 state = :initial | |
205 encoder.text_token match, :error | |
206 end | |
207 | |
208 when :in_special_tag | |
209 case in_tag | |
210 when 'script' | |
211 encoder.text_token match, :space if match = scan(/[ \t]*\n/) | |
212 if scan(/(\s*<!--)(?:(.*?)(-->)|(.*))/m) | |
213 code = self[2] || self[4] | |
214 closing = self[3] | |
215 encoder.text_token self[1], :comment | |
216 else | |
217 code = scan_until(/(?=(?:\n\s*)?<\/script>)|\z/) | |
218 closing = false | |
219 end | |
220 unless code.empty? | |
221 encoder.begin_group :inline | |
222 scan_java_script encoder, code | |
223 encoder.end_group :inline | |
224 end | |
225 encoder.text_token closing, :comment if closing | |
226 state = :initial | |
227 else | |
228 raise 'unknown special tag: %p' % [in_tag] | |
229 end | |
230 | |
231 else | |
232 raise_inspect 'Unknown state: %p' % [state], encoder | |
233 | |
234 end | |
235 | |
236 end | |
237 | |
238 end | |
239 | |
240 if options[:keep_state] | |
241 @state = state | |
242 @plain_string_content = plain_string_content | |
243 end | |
244 | |
245 encoder.end_group :string if state == :attribute_value_string | |
246 | |
247 encoder | |
248 end | |
249 | |
250 end | |
251 | |
252 end | |
253 end |