diff .svn/pristine/c4/c434d791a7d10db078bf19f213b250982f17b1e9.svn-base @ 909:cbb26bc654de redmine-1.3

Update to Redmine 1.3-stable branch (Redmine SVN rev 8964)
author Chris Cannam
date Fri, 24 Feb 2012 19:09:32 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.svn/pristine/c4/c434d791a7d10db078bf19f213b250982f17b1e9.svn-base	Fri Feb 24 19:09:32 2012 +0000
@@ -0,0 +1,253 @@
+module CodeRay
+module Scanners
+
+  # HTML Scanner
+  # 
+  # Alias: +xhtml+
+  # 
+  # See also: Scanners::XML
+  class HTML < Scanner
+
+    register_for :html
+    
+    KINDS_NOT_LOC = [
+      :comment, :doctype, :preprocessor,
+      :tag, :attribute_name, :operator,
+      :attribute_value, :string,
+      :plain, :entity, :error,
+    ]  # :nodoc:
+    
+    EVENT_ATTRIBUTES = %w(
+      onabort onafterprint onbeforeprint onbeforeunload onblur oncanplay
+      oncanplaythrough onchange onclick oncontextmenu oncuechange ondblclick
+      ondrag ondragdrop ondragend ondragenter ondragleave ondragover
+      ondragstart ondrop ondurationchange onemptied onended onerror onfocus
+      onformchange onforminput onhashchange oninput oninvalid onkeydown
+      onkeypress onkeyup onload onloadeddata onloadedmetadata onloadstart
+      onmessage onmousedown onmousemove onmouseout onmouseover onmouseup
+      onmousewheel onmove onoffline ononline onpagehide onpageshow onpause
+      onplay onplaying onpopstate onprogress onratechange onreadystatechange
+      onredo onreset onresize onscroll onseeked onseeking onselect onshow
+      onstalled onstorage onsubmit onsuspend ontimeupdate onundo onunload
+      onvolumechange onwaiting
+    )
+    
+    IN_ATTRIBUTE = WordList::CaseIgnoring.new(nil).
+      add(EVENT_ATTRIBUTES, :script)
+    
+    ATTR_NAME = /[\w.:-]+/  # :nodoc:
+    TAG_END = /\/?>/  # :nodoc:
+    HEX = /[0-9a-fA-F]/  # :nodoc:
+    ENTITY = /
+      &
+      (?:
+        \w+
+      |
+        \#
+        (?:
+          \d+
+        |
+          x#{HEX}+
+        )
+      )
+      ;
+    /ox  # :nodoc:
+    
+    PLAIN_STRING_CONTENT = {
+      "'" => /[^&'>\n]+/,
+      '"' => /[^&">\n]+/,
+    }  # :nodoc:
+    
+    def reset
+      super
+      @state = :initial
+      @plain_string_content = nil
+    end
+    
+  protected
+    
+    def setup
+      @state = :initial
+      @plain_string_content = nil
+    end
+    
+    def scan_java_script encoder, code
+      if code && !code.empty?
+        @java_script_scanner ||= Scanners::JavaScript.new '', :keep_tokens => true
+        # encoder.begin_group :inline
+        @java_script_scanner.tokenize code, :tokens => encoder
+        # encoder.end_group :inline
+      end
+    end
+    
+    def scan_tokens encoder, options
+      state = options[:state] || @state
+      plain_string_content = @plain_string_content
+      in_tag = in_attribute = nil
+      
+      encoder.begin_group :string if state == :attribute_value_string
+      
+      until eos?
+        
+        if state != :in_special_tag && match = scan(/\s+/m)
+          encoder.text_token match, :space
+          
+        else
+          
+          case state
+          
+          when :initial
+            if match = scan(/<!--(?:.*?-->|.*)/m)
+              encoder.text_token match, :comment
+            elsif match = scan(/<!DOCTYPE(?:.*?>|.*)/m)
+              encoder.text_token match, :doctype
+            elsif match = scan(/<\?xml(?:.*?\?>|.*)/m)
+              encoder.text_token match, :preprocessor
+            elsif match = scan(/<\?(?:.*?\?>|.*)/m)
+              encoder.text_token match, :comment
+            elsif match = scan(/<\/[-\w.:]*>?/m)
+              in_tag = nil
+              encoder.text_token match, :tag
+            elsif match = scan(/<(?:(script)|[-\w.:]+)(>)?/m)
+              encoder.text_token match, :tag
+              in_tag = self[1]
+              if self[2]
+                state = :in_special_tag if in_tag
+              else
+                state = :attribute
+              end
+            elsif match = scan(/[^<>&]+/)
+              encoder.text_token match, :plain
+            elsif match = scan(/#{ENTITY}/ox)
+              encoder.text_token match, :entity
+            elsif match = scan(/[<>&]/)
+              in_tag = nil
+              encoder.text_token match, :error
+            else
+              raise_inspect '[BUG] else-case reached with state %p' % [state], encoder
+            end
+            
+          when :attribute
+            if match = scan(/#{TAG_END}/o)
+              encoder.text_token match, :tag
+              in_attribute = nil
+              if in_tag
+                state = :in_special_tag
+              else
+                state = :initial
+              end
+            elsif match = scan(/#{ATTR_NAME}/o)
+              in_attribute = IN_ATTRIBUTE[match]
+              encoder.text_token match, :attribute_name
+              state = :attribute_equal
+            else
+              in_tag = nil
+              encoder.text_token getch, :error
+            end
+            
+          when :attribute_equal
+            if match = scan(/=/)  #/
+              encoder.text_token match, :operator
+              state = :attribute_value
+            elsif scan(/#{ATTR_NAME}/o) || scan(/#{TAG_END}/o)
+              state = :attribute
+              next
+            else
+              encoder.text_token getch, :error
+              state = :attribute
+            end
+            
+          when :attribute_value
+            if match = scan(/#{ATTR_NAME}/o)
+              encoder.text_token match, :attribute_value
+              state = :attribute
+            elsif match = scan(/["']/)
+              if in_attribute == :script
+                encoder.begin_group :inline
+                encoder.text_token match, :inline_delimiter
+                if scan(/javascript:[ \t]*/)
+                  encoder.text_token matched, :comment
+                end
+                code = scan_until(match == '"' ? /(?="|\z)/ : /(?='|\z)/)
+                scan_java_script encoder, code
+                match = scan(/["']/)
+                encoder.text_token match, :inline_delimiter if match
+                encoder.end_group :inline
+                state = :attribute
+                in_attribute = nil
+              else
+                encoder.begin_group :string
+                state = :attribute_value_string
+                plain_string_content = PLAIN_STRING_CONTENT[match]
+                encoder.text_token match, :delimiter
+              end
+            elsif match = scan(/#{TAG_END}/o)
+              encoder.text_token match, :tag
+              state = :initial
+            else
+              encoder.text_token getch, :error
+            end
+            
+          when :attribute_value_string
+            if match = scan(plain_string_content)
+              encoder.text_token match, :content
+            elsif match = scan(/['"]/)
+              encoder.text_token match, :delimiter
+              encoder.end_group :string
+              state = :attribute
+            elsif match = scan(/#{ENTITY}/ox)
+              encoder.text_token match, :entity
+            elsif match = scan(/&/)
+              encoder.text_token match, :content
+            elsif match = scan(/[\n>]/)
+              encoder.end_group :string
+              state = :initial
+              encoder.text_token match, :error
+            end
+            
+          when :in_special_tag
+            case in_tag
+            when 'script'
+              encoder.text_token match, :space if match = scan(/[ \t]*\n/)
+              if scan(/(\s*<!--)(?:(.*?)(-->)|(.*))/m)
+                code = self[2] || self[4]
+                closing = self[3]
+                encoder.text_token self[1], :comment
+              else
+                code = scan_until(/(?=(?:\n\s*)?<\/script>)|\z/)
+                closing = false
+              end
+              unless code.empty?
+                encoder.begin_group :inline
+                scan_java_script encoder, code
+                encoder.end_group :inline
+              end
+              encoder.text_token closing, :comment if closing
+              state = :initial
+            else
+              raise 'unknown special tag: %p' % [in_tag]
+            end
+            
+          else
+            raise_inspect 'Unknown state: %p' % [state], encoder
+            
+          end
+          
+        end
+        
+      end
+      
+      if options[:keep_state]
+        @state = state
+        @plain_string_content = plain_string_content
+      end
+      
+      encoder.end_group :string if state == :attribute_value_string
+      
+      encoder
+    end
+    
+  end
+  
+end
+end