To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / vendor / gems / coderay-0.9.7 / lib / coderay / scanners / html.rb @ 442:753f1380d6bc

History | View | Annotate | Download (4.15 KB)

1
module CodeRay
2
module Scanners
3

    
4
  # HTML Scanner
5
  class HTML < Scanner
6

    
7
    include Streamable
8
    register_for :html
9
    
10
    KINDS_NOT_LOC = [
11
      :comment, :doctype, :preprocessor,
12
      :tag, :attribute_name, :operator,
13
      :attribute_value, :delimiter, :content,
14
      :plain, :entity, :error
15
    ]
16

    
17
    ATTR_NAME = /[\w.:-]+/
18
    ATTR_VALUE_UNQUOTED = ATTR_NAME
19
    TAG_END = /\/?>/
20
    HEX = /[0-9a-fA-F]/
21
    ENTITY = /
22
      &
23
      (?:
24
        \w+
25
      |
26
        \#
27
        (?:
28
          \d+
29
        |
30
          x#{HEX}+
31
        )
32
      )
33
      ;
34
    /ox
35

    
36
    PLAIN_STRING_CONTENT = {
37
      "'" => /[^&'>\n]+/,
38
      '"' => /[^&">\n]+/,
39
    }
40

    
41
    def reset
42
      super
43
      @state = :initial
44
    end
45

    
46
  private
47
    def setup
48
      @state = :initial
49
      @plain_string_content = nil
50
    end
51

    
52
    def scan_tokens tokens, options
53

    
54
      state = @state
55
      plain_string_content = @plain_string_content
56

    
57
      until eos?
58

    
59
        kind = nil
60
        match = nil
61

    
62
        if scan(/\s+/m)
63
          kind = :space
64

    
65
        else
66

    
67
          case state
68

    
69
          when :initial
70
            if scan(/<!--.*?-->/m)
71
              kind = :comment
72
            elsif scan(/<!DOCTYPE.*?>/m)
73
              kind = :doctype
74
            elsif scan(/<\?xml.*?\?>/m)
75
              kind = :preprocessor
76
            elsif scan(/<\?.*?\?>|<%.*?%>/m)
77
              kind = :comment
78
            elsif scan(/<\/[-\w.:]*>/m)
79
              kind = :tag
80
            elsif match = scan(/<[-\w.:]+>?/m)
81
              kind = :tag
82
              state = :attribute unless match[-1] == ?>
83
            elsif scan(/[^<>&]+/)
84
              kind = :plain
85
            elsif scan(/#{ENTITY}/ox)
86
              kind = :entity
87
            elsif scan(/[<>&]/)
88
              kind = :error
89
            else
90
              raise_inspect '[BUG] else-case reached with state %p' % [state], tokens
91
            end
92

    
93
          when :attribute
94
            if scan(/#{TAG_END}/o)
95
              kind = :tag
96
              state = :initial
97
            elsif scan(/#{ATTR_NAME}/o)
98
              kind = :attribute_name
99
              state = :attribute_equal
100
            else
101
              kind = :error
102
              getch
103
            end
104

    
105
          when :attribute_equal
106
            if scan(/=/)
107
              kind = :operator
108
              state = :attribute_value
109
            elsif scan(/#{ATTR_NAME}/o)
110
              kind = :attribute_name
111
            elsif scan(/#{TAG_END}/o)
112
              kind = :tag
113
              state = :initial
114
            elsif scan(/./)
115
              kind = :error
116
              state = :attribute
117
            end
118

    
119
          when :attribute_value
120
            if scan(/#{ATTR_VALUE_UNQUOTED}/o)
121
              kind = :attribute_value
122
              state = :attribute
123
            elsif match = scan(/["']/)
124
              tokens << [:open, :string]
125
              state = :attribute_value_string
126
              plain_string_content = PLAIN_STRING_CONTENT[match]
127
              kind = :delimiter
128
            elsif scan(/#{TAG_END}/o)
129
              kind = :tag
130
              state = :initial
131
            else
132
              kind = :error
133
              getch
134
            end
135

    
136
          when :attribute_value_string
137
            if scan(plain_string_content)
138
              kind = :content
139
            elsif scan(/['"]/)
140
              tokens << [matched, :delimiter]
141
              tokens << [:close, :string]
142
              state = :attribute
143
              next
144
            elsif scan(/#{ENTITY}/ox)
145
              kind = :entity
146
            elsif scan(/&/)
147
              kind = :content
148
            elsif scan(/[\n>]/)
149
              tokens << [:close, :string]
150
              kind = :error
151
              state = :initial
152
            end
153

    
154
          else
155
            raise_inspect 'Unknown state: %p' % [state], tokens
156

    
157
          end
158

    
159
        end
160

    
161
        match ||= matched
162
        if $CODERAY_DEBUG and not kind
163
          raise_inspect 'Error token %p in line %d' %
164
            [[match, kind], line], tokens, state
165
        end
166
        raise_inspect 'Empty token', tokens unless match
167

    
168
        tokens << [match, kind]
169
      end
170

    
171
      if options[:keep_state]
172
        @state = state
173
        @plain_string_content = plain_string_content
174
      end
175

    
176
      tokens
177
    end
178

    
179
  end
180

    
181
end
182
end