comparison vendor/plugins/coderay-0.9.2/lib/coderay/scanners/html.rb @ 0:513646585e45

* Import Redmine trunk SVN rev 3859
author Chris Cannam
date Fri, 23 Jul 2010 15:52:44 +0100
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:513646585e45
1 module CodeRay
2 module Scanners
3
4 # HTML Scanner
5 class HTML < Scanner
6
7 include Streamable
8 register_for :html
9
10 KINDS_NOT_LOC = [
11 :comment, :doctype, :preprocessor,
12 :tag, :attribute_name, :operator,
13 :attribute_value, :delimiter, :content,
14 :plain, :entity, :error
15 ]
16
17 ATTR_NAME = /[\w.:-]+/
18 ATTR_VALUE_UNQUOTED = ATTR_NAME
19 TAG_END = /\/?>/
20 HEX = /[0-9a-fA-F]/
21 ENTITY = /
22 &
23 (?:
24 \w+
25 |
26 \#
27 (?:
28 \d+
29 |
30 x#{HEX}+
31 )
32 )
33 ;
34 /ox
35
36 PLAIN_STRING_CONTENT = {
37 "'" => /[^&'>\n]+/,
38 '"' => /[^&">\n]+/,
39 }
40
41 def reset
42 super
43 @state = :initial
44 end
45
46 private
47 def setup
48 @state = :initial
49 @plain_string_content = nil
50 end
51
52 def scan_tokens tokens, options
53
54 state = @state
55 plain_string_content = @plain_string_content
56
57 until eos?
58
59 kind = nil
60 match = nil
61
62 if scan(/\s+/m)
63 kind = :space
64
65 else
66
67 case state
68
69 when :initial
70 if scan(/<!--.*?-->/m)
71 kind = :comment
72 elsif scan(/<!DOCTYPE.*?>/m)
73 kind = :doctype
74 elsif scan(/<\?xml.*?\?>/m)
75 kind = :preprocessor
76 elsif scan(/<\?.*?\?>|<%.*?%>/m)
77 kind = :comment
78 elsif scan(/<\/[-\w.:]*>/m)
79 kind = :tag
80 elsif match = scan(/<[-\w.:]+>?/m)
81 kind = :tag
82 state = :attribute unless match[-1] == ?>
83 elsif scan(/[^<>&]+/)
84 kind = :plain
85 elsif scan(/#{ENTITY}/ox)
86 kind = :entity
87 elsif scan(/[<>&]/)
88 kind = :error
89 else
90 raise_inspect '[BUG] else-case reached with state %p' % [state], tokens
91 end
92
93 when :attribute
94 if scan(/#{TAG_END}/)
95 kind = :tag
96 state = :initial
97 elsif scan(/#{ATTR_NAME}/o)
98 kind = :attribute_name
99 state = :attribute_equal
100 else
101 kind = :error
102 getch
103 end
104
105 when :attribute_equal
106 if scan(/=/)
107 kind = :operator
108 state = :attribute_value
109 elsif scan(/#{ATTR_NAME}/o)
110 kind = :attribute_name
111 elsif scan(/#{TAG_END}/o)
112 kind = :tag
113 state = :initial
114 elsif scan(/./)
115 kind = :error
116 state = :attribute
117 end
118
119 when :attribute_value
120 if scan(/#{ATTR_VALUE_UNQUOTED}/o)
121 kind = :attribute_value
122 state = :attribute
123 elsif match = scan(/["']/)
124 tokens << [:open, :string]
125 state = :attribute_value_string
126 plain_string_content = PLAIN_STRING_CONTENT[match]
127 kind = :delimiter
128 elsif scan(/#{TAG_END}/o)
129 kind = :tag
130 state = :initial
131 else
132 kind = :error
133 getch
134 end
135
136 when :attribute_value_string
137 if scan(plain_string_content)
138 kind = :content
139 elsif scan(/['"]/)
140 tokens << [matched, :delimiter]
141 tokens << [:close, :string]
142 state = :attribute
143 next
144 elsif scan(/#{ENTITY}/ox)
145 kind = :entity
146 elsif scan(/&/)
147 kind = :content
148 elsif scan(/[\n>]/)
149 tokens << [:close, :string]
150 kind = :error
151 state = :initial
152 end
153
154 else
155 raise_inspect 'Unknown state: %p' % [state], tokens
156
157 end
158
159 end
160
161 match ||= matched
162 if $CODERAY_DEBUG and not kind
163 raise_inspect 'Error token %p in line %d' %
164 [[match, kind], line], tokens, state
165 end
166 raise_inspect 'Empty token', tokens unless match
167
168 tokens << [match, kind]
169 end
170
171 if options[:keep_state]
172 @state = state
173 @plain_string_content = plain_string_content
174 end
175
176 tokens
177 end
178
179 end
180
181 end
182 end