Mercurial > hg > soundsoftware-site
diff .svn/pristine/c4/c434d791a7d10db078bf19f213b250982f17b1e9.svn-base @ 909:cbb26bc654de redmine-1.3
Update to Redmine 1.3-stable branch (Redmine SVN rev 8964)
author | Chris Cannam |
---|---|
date | Fri, 24 Feb 2012 19:09:32 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.svn/pristine/c4/c434d791a7d10db078bf19f213b250982f17b1e9.svn-base Fri Feb 24 19:09:32 2012 +0000 @@ -0,0 +1,253 @@ +module CodeRay +module Scanners + + # HTML Scanner + # + # Alias: +xhtml+ + # + # See also: Scanners::XML + class HTML < Scanner + + register_for :html + + KINDS_NOT_LOC = [ + :comment, :doctype, :preprocessor, + :tag, :attribute_name, :operator, + :attribute_value, :string, + :plain, :entity, :error, + ] # :nodoc: + + EVENT_ATTRIBUTES = %w( + onabort onafterprint onbeforeprint onbeforeunload onblur oncanplay + oncanplaythrough onchange onclick oncontextmenu oncuechange ondblclick + ondrag ondragdrop ondragend ondragenter ondragleave ondragover + ondragstart ondrop ondurationchange onemptied onended onerror onfocus + onformchange onforminput onhashchange oninput oninvalid onkeydown + onkeypress onkeyup onload onloadeddata onloadedmetadata onloadstart + onmessage onmousedown onmousemove onmouseout onmouseover onmouseup + onmousewheel onmove onoffline ononline onpagehide onpageshow onpause + onplay onplaying onpopstate onprogress onratechange onreadystatechange + onredo onreset onresize onscroll onseeked onseeking onselect onshow + onstalled onstorage onsubmit onsuspend ontimeupdate onundo onunload + onvolumechange onwaiting + ) + + IN_ATTRIBUTE = WordList::CaseIgnoring.new(nil). + add(EVENT_ATTRIBUTES, :script) + + ATTR_NAME = /[\w.:-]+/ # :nodoc: + TAG_END = /\/?>/ # :nodoc: + HEX = /[0-9a-fA-F]/ # :nodoc: + ENTITY = / + & + (?: + \w+ + | + \# + (?: + \d+ + | + x#{HEX}+ + ) + ) + ; + /ox # :nodoc: + + PLAIN_STRING_CONTENT = { + "'" => /[^&'>\n]+/, + '"' => /[^&">\n]+/, + } # :nodoc: + + def reset + super + @state = :initial + @plain_string_content = nil + end + + protected + + def setup + @state = :initial + @plain_string_content = nil + end + + def scan_java_script encoder, code + if code && !code.empty? + @java_script_scanner ||= Scanners::JavaScript.new '', :keep_tokens => true + # encoder.begin_group :inline + @java_script_scanner.tokenize code, :tokens => encoder + # encoder.end_group :inline + end + end + + def scan_tokens encoder, options + state = options[:state] || @state + plain_string_content = @plain_string_content + in_tag = in_attribute = nil + + encoder.begin_group :string if state == :attribute_value_string + + until eos? + + if state != :in_special_tag && match = scan(/\s+/m) + encoder.text_token match, :space + + else + + case state + + when :initial + if match = scan(/<!--(?:.*?-->|.*)/m) + encoder.text_token match, :comment + elsif match = scan(/<!DOCTYPE(?:.*?>|.*)/m) + encoder.text_token match, :doctype + elsif match = scan(/<\?xml(?:.*?\?>|.*)/m) + encoder.text_token match, :preprocessor + elsif match = scan(/<\?(?:.*?\?>|.*)/m) + encoder.text_token match, :comment + elsif match = scan(/<\/[-\w.:]*>?/m) + in_tag = nil + encoder.text_token match, :tag + elsif match = scan(/<(?:(script)|[-\w.:]+)(>)?/m) + encoder.text_token match, :tag + in_tag = self[1] + if self[2] + state = :in_special_tag if in_tag + else + state = :attribute + end + elsif match = scan(/[^<>&]+/) + encoder.text_token match, :plain + elsif match = scan(/#{ENTITY}/ox) + encoder.text_token match, :entity + elsif match = scan(/[<>&]/) + in_tag = nil + encoder.text_token match, :error + else + raise_inspect '[BUG] else-case reached with state %p' % [state], encoder + end + + when :attribute + if match = scan(/#{TAG_END}/o) + encoder.text_token match, :tag + in_attribute = nil + if in_tag + state = :in_special_tag + else + state = :initial + end + elsif match = scan(/#{ATTR_NAME}/o) + in_attribute = IN_ATTRIBUTE[match] + encoder.text_token match, :attribute_name + state = :attribute_equal + else + in_tag = nil + encoder.text_token getch, :error + end + + when :attribute_equal + if match = scan(/=/) #/ + encoder.text_token match, :operator + state = :attribute_value + elsif scan(/#{ATTR_NAME}/o) || scan(/#{TAG_END}/o) + state = :attribute + next + else + encoder.text_token getch, :error + state = :attribute + end + + when :attribute_value + if match = scan(/#{ATTR_NAME}/o) + encoder.text_token match, :attribute_value + state = :attribute + elsif match = scan(/["']/) + if in_attribute == :script + encoder.begin_group :inline + encoder.text_token match, :inline_delimiter + if scan(/javascript:[ \t]*/) + encoder.text_token matched, :comment + end + code = scan_until(match == '"' ? /(?="|\z)/ : /(?='|\z)/) + scan_java_script encoder, code + match = scan(/["']/) + encoder.text_token match, :inline_delimiter if match + encoder.end_group :inline + state = :attribute + in_attribute = nil + else + encoder.begin_group :string + state = :attribute_value_string + plain_string_content = PLAIN_STRING_CONTENT[match] + encoder.text_token match, :delimiter + end + elsif match = scan(/#{TAG_END}/o) + encoder.text_token match, :tag + state = :initial + else + encoder.text_token getch, :error + end + + when :attribute_value_string + if match = scan(plain_string_content) + encoder.text_token match, :content + elsif match = scan(/['"]/) + encoder.text_token match, :delimiter + encoder.end_group :string + state = :attribute + elsif match = scan(/#{ENTITY}/ox) + encoder.text_token match, :entity + elsif match = scan(/&/) + encoder.text_token match, :content + elsif match = scan(/[\n>]/) + encoder.end_group :string + state = :initial + encoder.text_token match, :error + end + + when :in_special_tag + case in_tag + when 'script' + encoder.text_token match, :space if match = scan(/[ \t]*\n/) + if scan(/(\s*<!--)(?:(.*?)(-->)|(.*))/m) + code = self[2] || self[4] + closing = self[3] + encoder.text_token self[1], :comment + else + code = scan_until(/(?=(?:\n\s*)?<\/script>)|\z/) + closing = false + end + unless code.empty? + encoder.begin_group :inline + scan_java_script encoder, code + encoder.end_group :inline + end + encoder.text_token closing, :comment if closing + state = :initial + else + raise 'unknown special tag: %p' % [in_tag] + end + + else + raise_inspect 'Unknown state: %p' % [state], encoder + + end + + end + + end + + if options[:keep_state] + @state = state + @plain_string_content = plain_string_content + end + + encoder.end_group :string if state == :attribute_value_string + + encoder + end + + end + +end +end