annotate lib/redcloth3.rb @ 8:0c83d98252d9 yuya

* Add custom repo prefix and proper auth realm, remove auth cache (seems like an unwise feature), pass DB handle around, various other bits of tidying
author Chris Cannam
date Thu, 12 Aug 2010 15:31:37 +0100
parents 513646585e45
children 94944d00e43c
rev   line source
Chris@0 1 # vim:ts=4:sw=4:
Chris@0 2 # = RedCloth - Textile and Markdown Hybrid for Ruby
Chris@0 3 #
Chris@0 4 # Homepage:: http://whytheluckystiff.net/ruby/redcloth/
Chris@0 5 # Author:: why the lucky stiff (http://whytheluckystiff.net/)
Chris@0 6 # Copyright:: (cc) 2004 why the lucky stiff (and his puppet organizations.)
Chris@0 7 # License:: BSD
Chris@0 8 #
Chris@0 9 # (see http://hobix.com/textile/ for a Textile Reference.)
Chris@0 10 #
Chris@0 11 # Based on (and also inspired by) both:
Chris@0 12 #
Chris@0 13 # PyTextile: http://diveintomark.org/projects/textile/textile.py.txt
Chris@0 14 # Textism for PHP: http://www.textism.com/tools/textile/
Chris@0 15 #
Chris@0 16 #
Chris@0 17
Chris@0 18 # = RedCloth
Chris@0 19 #
Chris@0 20 # RedCloth is a Ruby library for converting Textile and/or Markdown
Chris@0 21 # into HTML. You can use either format, intermingled or separately.
Chris@0 22 # You can also extend RedCloth to honor your own custom text stylings.
Chris@0 23 #
Chris@0 24 # RedCloth users are encouraged to use Textile if they are generating
Chris@0 25 # HTML and to use Markdown if others will be viewing the plain text.
Chris@0 26 #
Chris@0 27 # == What is Textile?
Chris@0 28 #
Chris@0 29 # Textile is a simple formatting style for text
Chris@0 30 # documents, loosely based on some HTML conventions.
Chris@0 31 #
Chris@0 32 # == Sample Textile Text
Chris@0 33 #
Chris@0 34 # h2. This is a title
Chris@0 35 #
Chris@0 36 # h3. This is a subhead
Chris@0 37 #
Chris@0 38 # This is a bit of paragraph.
Chris@0 39 #
Chris@0 40 # bq. This is a blockquote.
Chris@0 41 #
Chris@0 42 # = Writing Textile
Chris@0 43 #
Chris@0 44 # A Textile document consists of paragraphs. Paragraphs
Chris@0 45 # can be specially formatted by adding a small instruction
Chris@0 46 # to the beginning of the paragraph.
Chris@0 47 #
Chris@0 48 # h[n]. Header of size [n].
Chris@0 49 # bq. Blockquote.
Chris@0 50 # # Numeric list.
Chris@0 51 # * Bulleted list.
Chris@0 52 #
Chris@0 53 # == Quick Phrase Modifiers
Chris@0 54 #
Chris@0 55 # Quick phrase modifiers are also included, to allow formatting
Chris@0 56 # of small portions of text within a paragraph.
Chris@0 57 #
Chris@0 58 # \_emphasis\_
Chris@0 59 # \_\_italicized\_\_
Chris@0 60 # \*strong\*
Chris@0 61 # \*\*bold\*\*
Chris@0 62 # ??citation??
Chris@0 63 # -deleted text-
Chris@0 64 # +inserted text+
Chris@0 65 # ^superscript^
Chris@0 66 # ~subscript~
Chris@0 67 # @code@
Chris@0 68 # %(classname)span%
Chris@0 69 #
Chris@0 70 # ==notextile== (leave text alone)
Chris@0 71 #
Chris@0 72 # == Links
Chris@0 73 #
Chris@0 74 # To make a hypertext link, put the link text in "quotation
Chris@0 75 # marks" followed immediately by a colon and the URL of the link.
Chris@0 76 #
Chris@0 77 # Optional: text in (parentheses) following the link text,
Chris@0 78 # but before the closing quotation mark, will become a Title
Chris@0 79 # attribute for the link, visible as a tool tip when a cursor is above it.
Chris@0 80 #
Chris@0 81 # Example:
Chris@0 82 #
Chris@0 83 # "This is a link (This is a title) ":http://www.textism.com
Chris@0 84 #
Chris@0 85 # Will become:
Chris@0 86 #
Chris@0 87 # <a href="http://www.textism.com" title="This is a title">This is a link</a>
Chris@0 88 #
Chris@0 89 # == Images
Chris@0 90 #
Chris@0 91 # To insert an image, put the URL for the image inside exclamation marks.
Chris@0 92 #
Chris@0 93 # Optional: text that immediately follows the URL in (parentheses) will
Chris@0 94 # be used as the Alt text for the image. Images on the web should always
Chris@0 95 # have descriptive Alt text for the benefit of readers using non-graphical
Chris@0 96 # browsers.
Chris@0 97 #
Chris@0 98 # Optional: place a colon followed by a URL immediately after the
Chris@0 99 # closing ! to make the image into a link.
Chris@0 100 #
Chris@0 101 # Example:
Chris@0 102 #
Chris@0 103 # !http://www.textism.com/common/textist.gif(Textist)!
Chris@0 104 #
Chris@0 105 # Will become:
Chris@0 106 #
Chris@0 107 # <img src="http://www.textism.com/common/textist.gif" alt="Textist" />
Chris@0 108 #
Chris@0 109 # With a link:
Chris@0 110 #
Chris@0 111 # !/common/textist.gif(Textist)!:http://textism.com
Chris@0 112 #
Chris@0 113 # Will become:
Chris@0 114 #
Chris@0 115 # <a href="http://textism.com"><img src="/common/textist.gif" alt="Textist" /></a>
Chris@0 116 #
Chris@0 117 # == Defining Acronyms
Chris@0 118 #
Chris@0 119 # HTML allows authors to define acronyms via the tag. The definition appears as a
Chris@0 120 # tool tip when a cursor hovers over the acronym. A crucial aid to clear writing,
Chris@0 121 # this should be used at least once for each acronym in documents where they appear.
Chris@0 122 #
Chris@0 123 # To quickly define an acronym in Textile, place the full text in (parentheses)
Chris@0 124 # immediately following the acronym.
Chris@0 125 #
Chris@0 126 # Example:
Chris@0 127 #
Chris@0 128 # ACLU(American Civil Liberties Union)
Chris@0 129 #
Chris@0 130 # Will become:
Chris@0 131 #
Chris@0 132 # <acronym title="American Civil Liberties Union">ACLU</acronym>
Chris@0 133 #
Chris@0 134 # == Adding Tables
Chris@0 135 #
Chris@0 136 # In Textile, simple tables can be added by seperating each column by
Chris@0 137 # a pipe.
Chris@0 138 #
Chris@0 139 # |a|simple|table|row|
Chris@0 140 # |And|Another|table|row|
Chris@0 141 #
Chris@0 142 # Attributes are defined by style definitions in parentheses.
Chris@0 143 #
Chris@0 144 # table(border:1px solid black).
Chris@0 145 # (background:#ddd;color:red). |{}| | | |
Chris@0 146 #
Chris@0 147 # == Using RedCloth
Chris@0 148 #
Chris@0 149 # RedCloth is simply an extension of the String class, which can handle
Chris@0 150 # Textile formatting. Use it like a String and output HTML with its
Chris@0 151 # RedCloth#to_html method.
Chris@0 152 #
Chris@0 153 # doc = RedCloth.new "
Chris@0 154 #
Chris@0 155 # h2. Test document
Chris@0 156 #
Chris@0 157 # Just a simple test."
Chris@0 158 #
Chris@0 159 # puts doc.to_html
Chris@0 160 #
Chris@0 161 # By default, RedCloth uses both Textile and Markdown formatting, with
Chris@0 162 # Textile formatting taking precedence. If you want to turn off Markdown
Chris@0 163 # formatting, to boost speed and limit the processor:
Chris@0 164 #
Chris@0 165 # class RedCloth::Textile.new( str )
Chris@0 166
Chris@0 167 class RedCloth3 < String
Chris@0 168
Chris@0 169 VERSION = '3.0.4'
Chris@0 170 DEFAULT_RULES = [:textile, :markdown]
Chris@0 171
Chris@0 172 #
Chris@0 173 # Two accessor for setting security restrictions.
Chris@0 174 #
Chris@0 175 # This is a nice thing if you're using RedCloth for
Chris@0 176 # formatting in public places (e.g. Wikis) where you
Chris@0 177 # don't want users to abuse HTML for bad things.
Chris@0 178 #
Chris@0 179 # If +:filter_html+ is set, HTML which wasn't
Chris@0 180 # created by the Textile processor will be escaped.
Chris@0 181 #
Chris@0 182 # If +:filter_styles+ is set, it will also disable
Chris@0 183 # the style markup specifier. ('{color: red}')
Chris@0 184 #
Chris@0 185 attr_accessor :filter_html, :filter_styles
Chris@0 186
Chris@0 187 #
Chris@0 188 # Accessor for toggling hard breaks.
Chris@0 189 #
Chris@0 190 # If +:hard_breaks+ is set, single newlines will
Chris@0 191 # be converted to HTML break tags. This is the
Chris@0 192 # default behavior for traditional RedCloth.
Chris@0 193 #
Chris@0 194 attr_accessor :hard_breaks
Chris@0 195
Chris@0 196 # Accessor for toggling lite mode.
Chris@0 197 #
Chris@0 198 # In lite mode, block-level rules are ignored. This means
Chris@0 199 # that tables, paragraphs, lists, and such aren't available.
Chris@0 200 # Only the inline markup for bold, italics, entities and so on.
Chris@0 201 #
Chris@0 202 # r = RedCloth.new( "And then? She *fell*!", [:lite_mode] )
Chris@0 203 # r.to_html
Chris@0 204 # #=> "And then? She <strong>fell</strong>!"
Chris@0 205 #
Chris@0 206 attr_accessor :lite_mode
Chris@0 207
Chris@0 208 #
Chris@0 209 # Accessor for toggling span caps.
Chris@0 210 #
Chris@0 211 # Textile places `span' tags around capitalized
Chris@0 212 # words by default, but this wreaks havoc on Wikis.
Chris@0 213 # If +:no_span_caps+ is set, this will be
Chris@0 214 # suppressed.
Chris@0 215 #
Chris@0 216 attr_accessor :no_span_caps
Chris@0 217
Chris@0 218 #
Chris@0 219 # Establishes the markup predence. Available rules include:
Chris@0 220 #
Chris@0 221 # == Textile Rules
Chris@0 222 #
Chris@0 223 # The following textile rules can be set individually. Or add the complete
Chris@0 224 # set of rules with the single :textile rule, which supplies the rule set in
Chris@0 225 # the following precedence:
Chris@0 226 #
Chris@0 227 # refs_textile:: Textile references (i.e. [hobix]http://hobix.com/)
Chris@0 228 # block_textile_table:: Textile table block structures
Chris@0 229 # block_textile_lists:: Textile list structures
Chris@0 230 # block_textile_prefix:: Textile blocks with prefixes (i.e. bq., h2., etc.)
Chris@0 231 # inline_textile_image:: Textile inline images
Chris@0 232 # inline_textile_link:: Textile inline links
Chris@0 233 # inline_textile_span:: Textile inline spans
Chris@0 234 # glyphs_textile:: Textile entities (such as em-dashes and smart quotes)
Chris@0 235 #
Chris@0 236 # == Markdown
Chris@0 237 #
Chris@0 238 # refs_markdown:: Markdown references (for example: [hobix]: http://hobix.com/)
Chris@0 239 # block_markdown_setext:: Markdown setext headers
Chris@0 240 # block_markdown_atx:: Markdown atx headers
Chris@0 241 # block_markdown_rule:: Markdown horizontal rules
Chris@0 242 # block_markdown_bq:: Markdown blockquotes
Chris@0 243 # block_markdown_lists:: Markdown lists
Chris@0 244 # inline_markdown_link:: Markdown links
Chris@0 245 attr_accessor :rules
Chris@0 246
Chris@0 247 # Returns a new RedCloth object, based on _string_ and
Chris@0 248 # enforcing all the included _restrictions_.
Chris@0 249 #
Chris@0 250 # r = RedCloth.new( "h1. A <b>bold</b> man", [:filter_html] )
Chris@0 251 # r.to_html
Chris@0 252 # #=>"<h1>A &lt;b&gt;bold&lt;/b&gt; man</h1>"
Chris@0 253 #
Chris@0 254 def initialize( string, restrictions = [] )
Chris@0 255 restrictions.each { |r| method( "#{ r }=" ).call( true ) }
Chris@0 256 super( string )
Chris@0 257 end
Chris@0 258
Chris@0 259 #
Chris@0 260 # Generates HTML from the Textile contents.
Chris@0 261 #
Chris@0 262 # r = RedCloth.new( "And then? She *fell*!" )
Chris@0 263 # r.to_html( true )
Chris@0 264 # #=>"And then? She <strong>fell</strong>!"
Chris@0 265 #
Chris@0 266 def to_html( *rules )
Chris@0 267 rules = DEFAULT_RULES if rules.empty?
Chris@0 268 # make our working copy
Chris@0 269 text = self.dup
Chris@0 270
Chris@0 271 @urlrefs = {}
Chris@0 272 @shelf = []
Chris@0 273 textile_rules = [:block_textile_table, :block_textile_lists,
Chris@0 274 :block_textile_prefix, :inline_textile_image, :inline_textile_link,
Chris@0 275 :inline_textile_code, :inline_textile_span, :glyphs_textile]
Chris@0 276 markdown_rules = [:refs_markdown, :block_markdown_setext, :block_markdown_atx, :block_markdown_rule,
Chris@0 277 :block_markdown_bq, :block_markdown_lists,
Chris@0 278 :inline_markdown_reflink, :inline_markdown_link]
Chris@0 279 @rules = rules.collect do |rule|
Chris@0 280 case rule
Chris@0 281 when :markdown
Chris@0 282 markdown_rules
Chris@0 283 when :textile
Chris@0 284 textile_rules
Chris@0 285 else
Chris@0 286 rule
Chris@0 287 end
Chris@0 288 end.flatten
Chris@0 289
Chris@0 290 # standard clean up
Chris@0 291 incoming_entities text
Chris@0 292 clean_white_space text
Chris@0 293
Chris@0 294 # start processor
Chris@0 295 @pre_list = []
Chris@0 296 rip_offtags text
Chris@0 297 no_textile text
Chris@0 298 escape_html_tags text
Chris@0 299 hard_break text
Chris@0 300 unless @lite_mode
Chris@0 301 refs text
Chris@0 302 # need to do this before text is split by #blocks
Chris@0 303 block_textile_quotes text
Chris@0 304 blocks text
Chris@0 305 end
Chris@0 306 inline text
Chris@0 307 smooth_offtags text
Chris@0 308
Chris@0 309 retrieve text
Chris@0 310
Chris@0 311 text.gsub!( /<\/?notextile>/, '' )
Chris@0 312 text.gsub!( /x%x%/, '&#38;' )
Chris@0 313 clean_html text if filter_html
Chris@0 314 text.strip!
Chris@0 315 text
Chris@0 316
Chris@0 317 end
Chris@0 318
Chris@0 319 #######
Chris@0 320 private
Chris@0 321 #######
Chris@0 322 #
Chris@0 323 # Mapping of 8-bit ASCII codes to HTML numerical entity equivalents.
Chris@0 324 # (from PyTextile)
Chris@0 325 #
Chris@0 326 TEXTILE_TAGS =
Chris@0 327
Chris@0 328 [[128, 8364], [129, 0], [130, 8218], [131, 402], [132, 8222], [133, 8230],
Chris@0 329 [134, 8224], [135, 8225], [136, 710], [137, 8240], [138, 352], [139, 8249],
Chris@0 330 [140, 338], [141, 0], [142, 0], [143, 0], [144, 0], [145, 8216], [146, 8217],
Chris@0 331 [147, 8220], [148, 8221], [149, 8226], [150, 8211], [151, 8212], [152, 732],
Chris@0 332 [153, 8482], [154, 353], [155, 8250], [156, 339], [157, 0], [158, 0], [159, 376]].
Chris@0 333
Chris@0 334 collect! do |a, b|
Chris@0 335 [a.chr, ( b.zero? and "" or "&#{ b };" )]
Chris@0 336 end
Chris@0 337
Chris@0 338 #
Chris@0 339 # Regular expressions to convert to HTML.
Chris@0 340 #
Chris@0 341 A_HLGN = /(?:(?:<>|<|>|\=|[()]+)+)/
Chris@0 342 A_VLGN = /[\-^~]/
Chris@0 343 C_CLAS = '(?:\([^)]+\))'
Chris@0 344 C_LNGE = '(?:\[[^\[\]]+\])'
Chris@0 345 C_STYL = '(?:\{[^}]+\})'
Chris@0 346 S_CSPN = '(?:\\\\\d+)'
Chris@0 347 S_RSPN = '(?:/\d+)'
Chris@0 348 A = "(?:#{A_HLGN}?#{A_VLGN}?|#{A_VLGN}?#{A_HLGN}?)"
Chris@0 349 S = "(?:#{S_CSPN}?#{S_RSPN}|#{S_RSPN}?#{S_CSPN}?)"
Chris@0 350 C = "(?:#{C_CLAS}?#{C_STYL}?#{C_LNGE}?|#{C_STYL}?#{C_LNGE}?#{C_CLAS}?|#{C_LNGE}?#{C_STYL}?#{C_CLAS}?)"
Chris@0 351 # PUNCT = Regexp::quote( '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~' )
Chris@0 352 PUNCT = Regexp::quote( '!"#$%&\'*+,-./:;=?@\\^_`|~' )
Chris@0 353 PUNCT_NOQ = Regexp::quote( '!"#$&\',./:;=?@\\`|' )
Chris@0 354 PUNCT_Q = Regexp::quote( '*-_+^~%' )
Chris@0 355 HYPERLINK = '(\S+?)([^\w\s/;=\?]*?)(?=\s|<|$)'
Chris@0 356
Chris@0 357 # Text markup tags, don't conflict with block tags
Chris@0 358 SIMPLE_HTML_TAGS = [
Chris@0 359 'tt', 'b', 'i', 'big', 'small', 'em', 'strong', 'dfn', 'code',
Chris@0 360 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'a', 'img', 'br',
Chris@0 361 'br', 'map', 'q', 'sub', 'sup', 'span', 'bdo'
Chris@0 362 ]
Chris@0 363
Chris@0 364 QTAGS = [
Chris@0 365 ['**', 'b', :limit],
Chris@0 366 ['*', 'strong', :limit],
Chris@0 367 ['??', 'cite', :limit],
Chris@0 368 ['-', 'del', :limit],
Chris@0 369 ['__', 'i', :limit],
Chris@0 370 ['_', 'em', :limit],
Chris@0 371 ['%', 'span', :limit],
Chris@0 372 ['+', 'ins', :limit],
Chris@0 373 ['^', 'sup', :limit],
Chris@0 374 ['~', 'sub', :limit]
Chris@0 375 ]
Chris@0 376 QTAGS_JOIN = QTAGS.map {|rc, ht, rtype| Regexp::quote rc}.join('|')
Chris@0 377
Chris@0 378 QTAGS.collect! do |rc, ht, rtype|
Chris@0 379 rcq = Regexp::quote rc
Chris@0 380 re =
Chris@0 381 case rtype
Chris@0 382 when :limit
Chris@0 383 /(^|[>\s\(]) # sta
Chris@0 384 (?!\-\-)
Chris@0 385 (#{QTAGS_JOIN}|) # oqs
Chris@0 386 (#{rcq}) # qtag
Chris@0 387 (\w|[^\s].*?[^\s]) # content
Chris@0 388 (?!\-\-)
Chris@0 389 #{rcq}
Chris@0 390 (#{QTAGS_JOIN}|) # oqa
Chris@0 391 (?=[[:punct:]]|<|\s|\)|$)/x
Chris@0 392 else
Chris@0 393 /(#{rcq})
Chris@0 394 (#{C})
Chris@0 395 (?::(\S+))?
Chris@0 396 (\w|[^\s\-].*?[^\s\-])
Chris@0 397 #{rcq}/xm
Chris@0 398 end
Chris@0 399 [rc, ht, re, rtype]
Chris@0 400 end
Chris@0 401
Chris@0 402 # Elements to handle
Chris@0 403 GLYPHS = [
Chris@0 404 # [ /([^\s\[{(>])?\'([dmst]\b|ll\b|ve\b|\s|:|$)/, '\1&#8217;\2' ], # single closing
Chris@0 405 # [ /([^\s\[{(>#{PUNCT_Q}][#{PUNCT_Q}]*)\'/, '\1&#8217;' ], # single closing
Chris@0 406 # [ /\'(?=[#{PUNCT_Q}]*(s\b|[\s#{PUNCT_NOQ}]))/, '&#8217;' ], # single closing
Chris@0 407 # [ /\'/, '&#8216;' ], # single opening
Chris@0 408 # [ /</, '&lt;' ], # less-than
Chris@0 409 # [ />/, '&gt;' ], # greater-than
Chris@0 410 # [ /([^\s\[{(])?"(\s|:|$)/, '\1&#8221;\2' ], # double closing
Chris@0 411 # [ /([^\s\[{(>#{PUNCT_Q}][#{PUNCT_Q}]*)"/, '\1&#8221;' ], # double closing
Chris@0 412 # [ /"(?=[#{PUNCT_Q}]*[\s#{PUNCT_NOQ}])/, '&#8221;' ], # double closing
Chris@0 413 # [ /"/, '&#8220;' ], # double opening
Chris@0 414 # [ /\b( )?\.{3}/, '\1&#8230;' ], # ellipsis
Chris@0 415 # [ /\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])/, '<acronym title="\2">\1</acronym>' ], # 3+ uppercase acronym
Chris@0 416 # [ /(^|[^"][>\s])([A-Z][A-Z0-9 ]+[A-Z0-9])([^<A-Za-z0-9]|$)/, '\1<span class="caps">\2</span>\3', :no_span_caps ], # 3+ uppercase caps
Chris@0 417 # [ /(\.\s)?\s?--\s?/, '\1&#8212;' ], # em dash
Chris@0 418 # [ /\s->\s/, ' &rarr; ' ], # right arrow
Chris@0 419 # [ /\s-\s/, ' &#8211; ' ], # en dash
Chris@0 420 # [ /(\d+) ?x ?(\d+)/, '\1&#215;\2' ], # dimension sign
Chris@0 421 # [ /\b ?[(\[]TM[\])]/i, '&#8482;' ], # trademark
Chris@0 422 # [ /\b ?[(\[]R[\])]/i, '&#174;' ], # registered
Chris@0 423 # [ /\b ?[(\[]C[\])]/i, '&#169;' ] # copyright
Chris@0 424 ]
Chris@0 425
Chris@0 426 H_ALGN_VALS = {
Chris@0 427 '<' => 'left',
Chris@0 428 '=' => 'center',
Chris@0 429 '>' => 'right',
Chris@0 430 '<>' => 'justify'
Chris@0 431 }
Chris@0 432
Chris@0 433 V_ALGN_VALS = {
Chris@0 434 '^' => 'top',
Chris@0 435 '-' => 'middle',
Chris@0 436 '~' => 'bottom'
Chris@0 437 }
Chris@0 438
Chris@0 439 #
Chris@0 440 # Flexible HTML escaping
Chris@0 441 #
Chris@0 442 def htmlesc( str, mode=:Quotes )
Chris@0 443 if str
Chris@0 444 str.gsub!( '&', '&amp;' )
Chris@0 445 str.gsub!( '"', '&quot;' ) if mode != :NoQuotes
Chris@0 446 str.gsub!( "'", '&#039;' ) if mode == :Quotes
Chris@0 447 str.gsub!( '<', '&lt;')
Chris@0 448 str.gsub!( '>', '&gt;')
Chris@0 449 end
Chris@0 450 str
Chris@0 451 end
Chris@0 452
Chris@0 453 # Search and replace for Textile glyphs (quotes, dashes, other symbols)
Chris@0 454 def pgl( text )
Chris@0 455 #GLYPHS.each do |re, resub, tog|
Chris@0 456 # next if tog and method( tog ).call
Chris@0 457 # text.gsub! re, resub
Chris@0 458 #end
Chris@0 459 text.gsub!(/\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])/) do |m|
Chris@0 460 "<acronym title=\"#{htmlesc $2}\">#{$1}</acronym>"
Chris@0 461 end
Chris@0 462 end
Chris@0 463
Chris@0 464 # Parses Textile attribute lists and builds an HTML attribute string
Chris@0 465 def pba( text_in, element = "" )
Chris@0 466
Chris@0 467 return '' unless text_in
Chris@0 468
Chris@0 469 style = []
Chris@0 470 text = text_in.dup
Chris@0 471 if element == 'td'
Chris@0 472 colspan = $1 if text =~ /\\(\d+)/
Chris@0 473 rowspan = $1 if text =~ /\/(\d+)/
Chris@0 474 style << "vertical-align:#{ v_align( $& ) };" if text =~ A_VLGN
Chris@0 475 end
Chris@0 476
Chris@0 477 style << "#{ htmlesc $1 };" if text.sub!( /\{([^}]*)\}/, '' ) && !filter_styles
Chris@0 478
Chris@0 479 lang = $1 if
Chris@0 480 text.sub!( /\[([^)]+?)\]/, '' )
Chris@0 481
Chris@0 482 cls = $1 if
Chris@0 483 text.sub!( /\(([^()]+?)\)/, '' )
Chris@0 484
Chris@0 485 style << "padding-left:#{ $1.length }em;" if
Chris@0 486 text.sub!( /([(]+)/, '' )
Chris@0 487
Chris@0 488 style << "padding-right:#{ $1.length }em;" if text.sub!( /([)]+)/, '' )
Chris@0 489
Chris@0 490 style << "text-align:#{ h_align( $& ) };" if text =~ A_HLGN
Chris@0 491
Chris@0 492 cls, id = $1, $2 if cls =~ /^(.*?)#(.*)$/
Chris@0 493
Chris@0 494 atts = ''
Chris@0 495 atts << " style=\"#{ style.join }\"" unless style.empty?
Chris@0 496 atts << " class=\"#{ cls }\"" unless cls.to_s.empty?
Chris@0 497 atts << " lang=\"#{ lang }\"" if lang
Chris@0 498 atts << " id=\"#{ id }\"" if id
Chris@0 499 atts << " colspan=\"#{ colspan }\"" if colspan
Chris@0 500 atts << " rowspan=\"#{ rowspan }\"" if rowspan
Chris@0 501
Chris@0 502 atts
Chris@0 503 end
Chris@0 504
Chris@0 505 TABLE_RE = /^(?:table(_?#{S}#{A}#{C})\. ?\n)?^(#{A}#{C}\.? ?\|.*?\|)(\n\n|\Z)/m
Chris@0 506
Chris@0 507 # Parses a Textile table block, building HTML from the result.
Chris@0 508 def block_textile_table( text )
Chris@0 509 text.gsub!( TABLE_RE ) do |matches|
Chris@0 510
Chris@0 511 tatts, fullrow = $~[1..2]
Chris@0 512 tatts = pba( tatts, 'table' )
Chris@0 513 tatts = shelve( tatts ) if tatts
Chris@0 514 rows = []
Chris@0 515
Chris@0 516 fullrow.each_line do |row|
Chris@0 517 ratts, row = pba( $1, 'tr' ), $2 if row =~ /^(#{A}#{C}\. )(.*)/m
Chris@0 518 cells = []
Chris@0 519 row.split( /(\|)(?![^\[\|]*\]\])/ )[1..-2].each do |cell|
Chris@0 520 next if cell == '|'
Chris@0 521 ctyp = 'd'
Chris@0 522 ctyp = 'h' if cell =~ /^_/
Chris@0 523
Chris@0 524 catts = ''
Chris@0 525 catts, cell = pba( $1, 'td' ), $2 if cell =~ /^(_?#{S}#{A}#{C}\. ?)(.*)/
Chris@0 526
Chris@0 527 catts = shelve( catts ) if catts
Chris@0 528 cells << "\t\t\t<t#{ ctyp }#{ catts }>#{ cell }</t#{ ctyp }>"
Chris@0 529 end
Chris@0 530 ratts = shelve( ratts ) if ratts
Chris@0 531 rows << "\t\t<tr#{ ratts }>\n#{ cells.join( "\n" ) }\n\t\t</tr>"
Chris@0 532 end
Chris@0 533 "\t<table#{ tatts }>\n#{ rows.join( "\n" ) }\n\t</table>\n\n"
Chris@0 534 end
Chris@0 535 end
Chris@0 536
Chris@0 537 LISTS_RE = /^([#*]+?#{C} .*?)$(?![^#*])/m
Chris@0 538 LISTS_CONTENT_RE = /^([#*]+)(#{A}#{C}) (.*)$/m
Chris@0 539
Chris@0 540 # Parses Textile lists and generates HTML
Chris@0 541 def block_textile_lists( text )
Chris@0 542 text.gsub!( LISTS_RE ) do |match|
Chris@0 543 lines = match.split( /\n/ )
Chris@0 544 last_line = -1
Chris@0 545 depth = []
Chris@0 546 lines.each_with_index do |line, line_id|
Chris@0 547 if line =~ LISTS_CONTENT_RE
Chris@0 548 tl,atts,content = $~[1..3]
Chris@0 549 if depth.last
Chris@0 550 if depth.last.length > tl.length
Chris@0 551 (depth.length - 1).downto(0) do |i|
Chris@0 552 break if depth[i].length == tl.length
Chris@0 553 lines[line_id - 1] << "</li>\n\t</#{ lT( depth[i] ) }l>\n\t"
Chris@0 554 depth.pop
Chris@0 555 end
Chris@0 556 end
Chris@0 557 if depth.last and depth.last.length == tl.length
Chris@0 558 lines[line_id - 1] << '</li>'
Chris@0 559 end
Chris@0 560 end
Chris@0 561 unless depth.last == tl
Chris@0 562 depth << tl
Chris@0 563 atts = pba( atts )
Chris@0 564 atts = shelve( atts ) if atts
Chris@0 565 lines[line_id] = "\t<#{ lT(tl) }l#{ atts }>\n\t<li>#{ content }"
Chris@0 566 else
Chris@0 567 lines[line_id] = "\t\t<li>#{ content }"
Chris@0 568 end
Chris@0 569 last_line = line_id
Chris@0 570
Chris@0 571 else
Chris@0 572 last_line = line_id
Chris@0 573 end
Chris@0 574 if line_id - last_line > 1 or line_id == lines.length - 1
Chris@0 575 depth.delete_if do |v|
Chris@0 576 lines[last_line] << "</li>\n\t</#{ lT( v ) }l>"
Chris@0 577 end
Chris@0 578 end
Chris@0 579 end
Chris@0 580 lines.join( "\n" )
Chris@0 581 end
Chris@0 582 end
Chris@0 583
Chris@0 584 QUOTES_RE = /(^>+([^\n]*?)(\n|$))+/m
Chris@0 585 QUOTES_CONTENT_RE = /^([> ]+)(.*)$/m
Chris@0 586
Chris@0 587 def block_textile_quotes( text )
Chris@0 588 text.gsub!( QUOTES_RE ) do |match|
Chris@0 589 lines = match.split( /\n/ )
Chris@0 590 quotes = ''
Chris@0 591 indent = 0
Chris@0 592 lines.each do |line|
Chris@0 593 line =~ QUOTES_CONTENT_RE
Chris@0 594 bq,content = $1, $2
Chris@0 595 l = bq.count('>')
Chris@0 596 if l != indent
Chris@0 597 quotes << ("\n\n" + (l>indent ? '<blockquote>' * (l-indent) : '</blockquote>' * (indent-l)) + "\n\n")
Chris@0 598 indent = l
Chris@0 599 end
Chris@0 600 quotes << (content + "\n")
Chris@0 601 end
Chris@0 602 quotes << ("\n" + '</blockquote>' * indent + "\n\n")
Chris@0 603 quotes
Chris@0 604 end
Chris@0 605 end
Chris@0 606
Chris@0 607 CODE_RE = /(\W)
Chris@0 608 @
Chris@0 609 (?:\|(\w+?)\|)?
Chris@0 610 (.+?)
Chris@0 611 @
Chris@0 612 (?=\W)/x
Chris@0 613
Chris@0 614 def inline_textile_code( text )
Chris@0 615 text.gsub!( CODE_RE ) do |m|
Chris@0 616 before,lang,code,after = $~[1..4]
Chris@0 617 lang = " lang=\"#{ lang }\"" if lang
Chris@0 618 rip_offtags( "#{ before }<code#{ lang }>#{ code }</code>#{ after }", false )
Chris@0 619 end
Chris@0 620 end
Chris@0 621
Chris@0 622 def lT( text )
Chris@0 623 text =~ /\#$/ ? 'o' : 'u'
Chris@0 624 end
Chris@0 625
Chris@0 626 def hard_break( text )
Chris@0 627 text.gsub!( /(.)\n(?!\Z| *([#*=]+(\s|$)|[{|]))/, "\\1<br />" ) if hard_breaks
Chris@0 628 end
Chris@0 629
Chris@0 630 BLOCKS_GROUP_RE = /\n{2,}(?! )/m
Chris@0 631
Chris@0 632 def blocks( text, deep_code = false )
Chris@0 633 text.replace( text.split( BLOCKS_GROUP_RE ).collect do |blk|
Chris@0 634 plain = blk !~ /\A[#*> ]/
Chris@0 635
Chris@0 636 # skip blocks that are complex HTML
Chris@0 637 if blk =~ /^<\/?(\w+).*>/ and not SIMPLE_HTML_TAGS.include? $1
Chris@0 638 blk
Chris@0 639 else
Chris@0 640 # search for indentation levels
Chris@0 641 blk.strip!
Chris@0 642 if blk.empty?
Chris@0 643 blk
Chris@0 644 else
Chris@0 645 code_blk = nil
Chris@0 646 blk.gsub!( /((?:\n(?:\n^ +[^\n]*)+)+)/m ) do |iblk|
Chris@0 647 flush_left iblk
Chris@0 648 blocks iblk, plain
Chris@0 649 iblk.gsub( /^(\S)/, "\t\\1" )
Chris@0 650 if plain
Chris@0 651 code_blk = iblk; ""
Chris@0 652 else
Chris@0 653 iblk
Chris@0 654 end
Chris@0 655 end
Chris@0 656
Chris@0 657 block_applied = 0
Chris@0 658 @rules.each do |rule_name|
Chris@0 659 block_applied += 1 if ( rule_name.to_s.match /^block_/ and method( rule_name ).call( blk ) )
Chris@0 660 end
Chris@0 661 if block_applied.zero?
Chris@0 662 if deep_code
Chris@0 663 blk = "\t<pre><code>#{ blk }</code></pre>"
Chris@0 664 else
Chris@0 665 blk = "\t<p>#{ blk }</p>"
Chris@0 666 end
Chris@0 667 end
Chris@0 668 # hard_break blk
Chris@0 669 blk + "\n#{ code_blk }"
Chris@0 670 end
Chris@0 671 end
Chris@0 672
Chris@0 673 end.join( "\n\n" ) )
Chris@0 674 end
Chris@0 675
Chris@0 676 def textile_bq( tag, atts, cite, content )
Chris@0 677 cite, cite_title = check_refs( cite )
Chris@0 678 cite = " cite=\"#{ cite }\"" if cite
Chris@0 679 atts = shelve( atts ) if atts
Chris@0 680 "\t<blockquote#{ cite }>\n\t\t<p#{ atts }>#{ content }</p>\n\t</blockquote>"
Chris@0 681 end
Chris@0 682
Chris@0 683 def textile_p( tag, atts, cite, content )
Chris@0 684 atts = shelve( atts ) if atts
Chris@0 685 "\t<#{ tag }#{ atts }>#{ content }</#{ tag }>"
Chris@0 686 end
Chris@0 687
Chris@0 688 alias textile_h1 textile_p
Chris@0 689 alias textile_h2 textile_p
Chris@0 690 alias textile_h3 textile_p
Chris@0 691 alias textile_h4 textile_p
Chris@0 692 alias textile_h5 textile_p
Chris@0 693 alias textile_h6 textile_p
Chris@0 694
Chris@0 695 def textile_fn_( tag, num, atts, cite, content )
Chris@0 696 atts << " id=\"fn#{ num }\" class=\"footnote\""
Chris@0 697 content = "<sup>#{ num }</sup> #{ content }"
Chris@0 698 atts = shelve( atts ) if atts
Chris@0 699 "\t<p#{ atts }>#{ content }</p>"
Chris@0 700 end
Chris@0 701
Chris@0 702 BLOCK_RE = /^(([a-z]+)(\d*))(#{A}#{C})\.(?::(\S+))? (.*)$/m
Chris@0 703
Chris@0 704 def block_textile_prefix( text )
Chris@0 705 if text =~ BLOCK_RE
Chris@0 706 tag,tagpre,num,atts,cite,content = $~[1..6]
Chris@0 707 atts = pba( atts )
Chris@0 708
Chris@0 709 # pass to prefix handler
Chris@0 710 if respond_to? "textile_#{ tag }", true
Chris@0 711 text.gsub!( $&, method( "textile_#{ tag }" ).call( tag, atts, cite, content ) )
Chris@0 712 elsif respond_to? "textile_#{ tagpre }_", true
Chris@0 713 text.gsub!( $&, method( "textile_#{ tagpre }_" ).call( tagpre, num, atts, cite, content ) )
Chris@0 714 end
Chris@0 715 end
Chris@0 716 end
Chris@0 717
Chris@0 718 SETEXT_RE = /\A(.+?)\n([=-])[=-]* *$/m
Chris@0 719 def block_markdown_setext( text )
Chris@0 720 if text =~ SETEXT_RE
Chris@0 721 tag = if $2 == "="; "h1"; else; "h2"; end
Chris@0 722 blk, cont = "<#{ tag }>#{ $1 }</#{ tag }>", $'
Chris@0 723 blocks cont
Chris@0 724 text.replace( blk + cont )
Chris@0 725 end
Chris@0 726 end
Chris@0 727
Chris@0 728 ATX_RE = /\A(\#{1,6}) # $1 = string of #'s
Chris@0 729 [ ]*
Chris@0 730 (.+?) # $2 = Header text
Chris@0 731 [ ]*
Chris@0 732 \#* # optional closing #'s (not counted)
Chris@0 733 $/x
Chris@0 734 def block_markdown_atx( text )
Chris@0 735 if text =~ ATX_RE
Chris@0 736 tag = "h#{ $1.length }"
Chris@0 737 blk, cont = "<#{ tag }>#{ $2 }</#{ tag }>\n\n", $'
Chris@0 738 blocks cont
Chris@0 739 text.replace( blk + cont )
Chris@0 740 end
Chris@0 741 end
Chris@0 742
Chris@0 743 MARKDOWN_BQ_RE = /\A(^ *> ?.+$(.+\n)*\n*)+/m
Chris@0 744
Chris@0 745 def block_markdown_bq( text )
Chris@0 746 text.gsub!( MARKDOWN_BQ_RE ) do |blk|
Chris@0 747 blk.gsub!( /^ *> ?/, '' )
Chris@0 748 flush_left blk
Chris@0 749 blocks blk
Chris@0 750 blk.gsub!( /^(\S)/, "\t\\1" )
Chris@0 751 "<blockquote>\n#{ blk }\n</blockquote>\n\n"
Chris@0 752 end
Chris@0 753 end
Chris@0 754
Chris@0 755 MARKDOWN_RULE_RE = /^(#{
Chris@0 756 ['*', '-', '_'].collect { |ch| ' ?(' + Regexp::quote( ch ) + ' ?){3,}' }.join( '|' )
Chris@0 757 })$/
Chris@0 758
Chris@0 759 def block_markdown_rule( text )
Chris@0 760 text.gsub!( MARKDOWN_RULE_RE ) do |blk|
Chris@0 761 "<hr />"
Chris@0 762 end
Chris@0 763 end
Chris@0 764
Chris@0 765 # XXX TODO XXX
Chris@0 766 def block_markdown_lists( text )
Chris@0 767 end
Chris@0 768
Chris@0 769 def inline_textile_span( text )
Chris@0 770 QTAGS.each do |qtag_rc, ht, qtag_re, rtype|
Chris@0 771 text.gsub!( qtag_re ) do |m|
Chris@0 772
Chris@0 773 case rtype
Chris@0 774 when :limit
Chris@0 775 sta,oqs,qtag,content,oqa = $~[1..6]
Chris@0 776 atts = nil
Chris@0 777 if content =~ /^(#{C})(.+)$/
Chris@0 778 atts, content = $~[1..2]
Chris@0 779 end
Chris@0 780 else
Chris@0 781 qtag,atts,cite,content = $~[1..4]
Chris@0 782 sta = ''
Chris@0 783 end
Chris@0 784 atts = pba( atts )
Chris@0 785 atts = shelve( atts ) if atts
Chris@0 786
Chris@0 787 "#{ sta }#{ oqs }<#{ ht }#{ atts }>#{ content }</#{ ht }>#{ oqa }"
Chris@0 788
Chris@0 789 end
Chris@0 790 end
Chris@0 791 end
Chris@0 792
Chris@0 793 LINK_RE = /
Chris@0 794 (
Chris@0 795 ([\s\[{(]|[#{PUNCT}])? # $pre
Chris@0 796 " # start
Chris@0 797 (#{C}) # $atts
Chris@0 798 ([^"\n]+?) # $text
Chris@0 799 \s?
Chris@0 800 (?:\(([^)]+?)\)(?="))? # $title
Chris@0 801 ":
Chris@0 802 ( # $url
Chris@0 803 (\/|[a-zA-Z]+:\/\/|www\.|mailto:) # $proto
Chris@0 804 [\w\/]\S+?
Chris@0 805 )
Chris@0 806 (\/)? # $slash
Chris@0 807 ([^\w\=\/;\(\)]*?) # $post
Chris@0 808 )
Chris@0 809 (?=<|\s|$)
Chris@0 810 /x
Chris@0 811 #"
Chris@0 812 def inline_textile_link( text )
Chris@0 813 text.gsub!( LINK_RE ) do |m|
Chris@0 814 all,pre,atts,text,title,url,proto,slash,post = $~[1..9]
Chris@0 815 if text.include?('<br />')
Chris@0 816 all
Chris@0 817 else
Chris@0 818 url, url_title = check_refs( url )
Chris@0 819 title ||= url_title
Chris@0 820
Chris@0 821 # Idea below : an URL with unbalanced parethesis and
Chris@0 822 # ending by ')' is put into external parenthesis
Chris@0 823 if ( url[-1]==?) and ((url.count("(") - url.count(")")) < 0 ) )
Chris@0 824 url=url[0..-2] # discard closing parenth from url
Chris@0 825 post = ")"+post # add closing parenth to post
Chris@0 826 end
Chris@0 827 atts = pba( atts )
Chris@0 828 atts = " href=\"#{ htmlesc url }#{ slash }\"#{ atts }"
Chris@0 829 atts << " title=\"#{ htmlesc title }\"" if title
Chris@0 830 atts = shelve( atts ) if atts
Chris@0 831
Chris@0 832 external = (url =~ /^https?:\/\//) ? ' class="external"' : ''
Chris@0 833
Chris@0 834 "#{ pre }<a#{ atts }#{ external }>#{ text }</a>#{ post }"
Chris@0 835 end
Chris@0 836 end
Chris@0 837 end
Chris@0 838
Chris@0 839 MARKDOWN_REFLINK_RE = /
Chris@0 840 \[([^\[\]]+)\] # $text
Chris@0 841 [ ]? # opt. space
Chris@0 842 (?:\n[ ]*)? # one optional newline followed by spaces
Chris@0 843 \[(.*?)\] # $id
Chris@0 844 /x
Chris@0 845
Chris@0 846 def inline_markdown_reflink( text )
Chris@0 847 text.gsub!( MARKDOWN_REFLINK_RE ) do |m|
Chris@0 848 text, id = $~[1..2]
Chris@0 849
Chris@0 850 if id.empty?
Chris@0 851 url, title = check_refs( text )
Chris@0 852 else
Chris@0 853 url, title = check_refs( id )
Chris@0 854 end
Chris@0 855
Chris@0 856 atts = " href=\"#{ url }\""
Chris@0 857 atts << " title=\"#{ title }\"" if title
Chris@0 858 atts = shelve( atts )
Chris@0 859
Chris@0 860 "<a#{ atts }>#{ text }</a>"
Chris@0 861 end
Chris@0 862 end
Chris@0 863
Chris@0 864 MARKDOWN_LINK_RE = /
Chris@0 865 \[([^\[\]]+)\] # $text
Chris@0 866 \( # open paren
Chris@0 867 [ \t]* # opt space
Chris@0 868 <?(.+?)>? # $href
Chris@0 869 [ \t]* # opt space
Chris@0 870 (?: # whole title
Chris@0 871 (['"]) # $quote
Chris@0 872 (.*?) # $title
Chris@0 873 \3 # matching quote
Chris@0 874 )? # title is optional
Chris@0 875 \)
Chris@0 876 /x
Chris@0 877
Chris@0 878 def inline_markdown_link( text )
Chris@0 879 text.gsub!( MARKDOWN_LINK_RE ) do |m|
Chris@0 880 text, url, quote, title = $~[1..4]
Chris@0 881
Chris@0 882 atts = " href=\"#{ url }\""
Chris@0 883 atts << " title=\"#{ title }\"" if title
Chris@0 884 atts = shelve( atts )
Chris@0 885
Chris@0 886 "<a#{ atts }>#{ text }</a>"
Chris@0 887 end
Chris@0 888 end
Chris@0 889
Chris@0 890 TEXTILE_REFS_RE = /(^ *)\[([^\[\n]+?)\](#{HYPERLINK})(?=\s|$)/
Chris@0 891 MARKDOWN_REFS_RE = /(^ *)\[([^\n]+?)\]:\s+<?(#{HYPERLINK})>?(?:\s+"((?:[^"]|\\")+)")?(?=\s|$)/m
Chris@0 892
Chris@0 893 def refs( text )
Chris@0 894 @rules.each do |rule_name|
Chris@0 895 method( rule_name ).call( text ) if rule_name.to_s.match /^refs_/
Chris@0 896 end
Chris@0 897 end
Chris@0 898
Chris@0 899 def refs_textile( text )
Chris@0 900 text.gsub!( TEXTILE_REFS_RE ) do |m|
Chris@0 901 flag, url = $~[2..3]
Chris@0 902 @urlrefs[flag.downcase] = [url, nil]
Chris@0 903 nil
Chris@0 904 end
Chris@0 905 end
Chris@0 906
Chris@0 907 def refs_markdown( text )
Chris@0 908 text.gsub!( MARKDOWN_REFS_RE ) do |m|
Chris@0 909 flag, url = $~[2..3]
Chris@0 910 title = $~[6]
Chris@0 911 @urlrefs[flag.downcase] = [url, title]
Chris@0 912 nil
Chris@0 913 end
Chris@0 914 end
Chris@0 915
Chris@0 916 def check_refs( text )
Chris@0 917 ret = @urlrefs[text.downcase] if text
Chris@0 918 ret || [text, nil]
Chris@0 919 end
Chris@0 920
Chris@0 921 IMAGE_RE = /
Chris@0 922 (>|\s|^) # start of line?
Chris@0 923 \! # opening
Chris@0 924 (\<|\=|\>)? # optional alignment atts
Chris@0 925 (#{C}) # optional style,class atts
Chris@0 926 (?:\. )? # optional dot-space
Chris@0 927 ([^\s(!]+?) # presume this is the src
Chris@0 928 \s? # optional space
Chris@0 929 (?:\(((?:[^\(\)]|\([^\)]+\))+?)\))? # optional title
Chris@0 930 \! # closing
Chris@0 931 (?::#{ HYPERLINK })? # optional href
Chris@0 932 /x
Chris@0 933
Chris@0 934 def inline_textile_image( text )
Chris@0 935 text.gsub!( IMAGE_RE ) do |m|
Chris@0 936 stln,algn,atts,url,title,href,href_a1,href_a2 = $~[1..8]
Chris@0 937 htmlesc title
Chris@0 938 atts = pba( atts )
Chris@0 939 atts = " src=\"#{ url }\"#{ atts }"
Chris@0 940 atts << " title=\"#{ title }\"" if title
Chris@0 941 atts << " alt=\"#{ title }\""
Chris@0 942 # size = @getimagesize($url);
Chris@0 943 # if($size) $atts.= " $size[3]";
Chris@0 944
Chris@0 945 href, alt_title = check_refs( href ) if href
Chris@0 946 url, url_title = check_refs( url )
Chris@0 947
Chris@0 948 out = ''
Chris@0 949 out << "<a#{ shelve( " href=\"#{ href }\"" ) }>" if href
Chris@0 950 out << "<img#{ shelve( atts ) } />"
Chris@0 951 out << "</a>#{ href_a1 }#{ href_a2 }" if href
Chris@0 952
Chris@0 953 if algn
Chris@0 954 algn = h_align( algn )
Chris@0 955 if stln == "<p>"
Chris@0 956 out = "<p style=\"float:#{ algn }\">#{ out }"
Chris@0 957 else
Chris@0 958 out = "#{ stln }<div style=\"float:#{ algn }\">#{ out }</div>"
Chris@0 959 end
Chris@0 960 else
Chris@0 961 out = stln + out
Chris@0 962 end
Chris@0 963
Chris@0 964 out
Chris@0 965 end
Chris@0 966 end
Chris@0 967
Chris@0 968 def shelve( val )
Chris@0 969 @shelf << val
Chris@0 970 " :redsh##{ @shelf.length }:"
Chris@0 971 end
Chris@0 972
Chris@0 973 def retrieve( text )
Chris@0 974 @shelf.each_with_index do |r, i|
Chris@0 975 text.gsub!( " :redsh##{ i + 1 }:", r )
Chris@0 976 end
Chris@0 977 end
Chris@0 978
Chris@0 979 def incoming_entities( text )
Chris@0 980 ## turn any incoming ampersands into a dummy character for now.
Chris@0 981 ## This uses a negative lookahead for alphanumerics followed by a semicolon,
Chris@0 982 ## implying an incoming html entity, to be skipped
Chris@0 983
Chris@0 984 text.gsub!( /&(?![#a-z0-9]+;)/i, "x%x%" )
Chris@0 985 end
Chris@0 986
Chris@0 987 def no_textile( text )
Chris@0 988 text.gsub!( /(^|\s)==([^=]+.*?)==(\s|$)?/,
Chris@0 989 '\1<notextile>\2</notextile>\3' )
Chris@0 990 text.gsub!( /^ *==([^=]+.*?)==/m,
Chris@0 991 '\1<notextile>\2</notextile>\3' )
Chris@0 992 end
Chris@0 993
Chris@0 994 def clean_white_space( text )
Chris@0 995 # normalize line breaks
Chris@0 996 text.gsub!( /\r\n/, "\n" )
Chris@0 997 text.gsub!( /\r/, "\n" )
Chris@0 998 text.gsub!( /\t/, ' ' )
Chris@0 999 text.gsub!( /^ +$/, '' )
Chris@0 1000 text.gsub!( /\n{3,}/, "\n\n" )
Chris@0 1001 text.gsub!( /"$/, "\" " )
Chris@0 1002
Chris@0 1003 # if entire document is indented, flush
Chris@0 1004 # to the left side
Chris@0 1005 flush_left text
Chris@0 1006 end
Chris@0 1007
Chris@0 1008 def flush_left( text )
Chris@0 1009 indt = 0
Chris@0 1010 if text =~ /^ /
Chris@0 1011 while text !~ /^ {#{indt}}\S/
Chris@0 1012 indt += 1
Chris@0 1013 end unless text.empty?
Chris@0 1014 if indt.nonzero?
Chris@0 1015 text.gsub!( /^ {#{indt}}/, '' )
Chris@0 1016 end
Chris@0 1017 end
Chris@0 1018 end
Chris@0 1019
Chris@0 1020 def footnote_ref( text )
Chris@0 1021 text.gsub!( /\b\[([0-9]+?)\](\s)?/,
Chris@0 1022 '<sup><a href="#fn\1">\1</a></sup>\2' )
Chris@0 1023 end
Chris@0 1024
Chris@0 1025 OFFTAGS = /(code|pre|kbd|notextile)/
Chris@0 1026 OFFTAG_MATCH = /(?:(<\/#{ OFFTAGS }>)|(<#{ OFFTAGS }[^>]*>))(.*?)(?=<\/?#{ OFFTAGS }\W|\Z)/mi
Chris@0 1027 OFFTAG_OPEN = /<#{ OFFTAGS }/
Chris@0 1028 OFFTAG_CLOSE = /<\/?#{ OFFTAGS }/
Chris@0 1029 HASTAG_MATCH = /(<\/?\w[^\n]*?>)/m
Chris@0 1030 ALLTAG_MATCH = /(<\/?\w[^\n]*?>)|.*?(?=<\/?\w[^\n]*?>|$)/m
Chris@0 1031
Chris@0 1032 def glyphs_textile( text, level = 0 )
Chris@0 1033 if text !~ HASTAG_MATCH
Chris@0 1034 pgl text
Chris@0 1035 footnote_ref text
Chris@0 1036 else
Chris@0 1037 codepre = 0
Chris@0 1038 text.gsub!( ALLTAG_MATCH ) do |line|
Chris@0 1039 ## matches are off if we're between <code>, <pre> etc.
Chris@0 1040 if $1
Chris@0 1041 if line =~ OFFTAG_OPEN
Chris@0 1042 codepre += 1
Chris@0 1043 elsif line =~ OFFTAG_CLOSE
Chris@0 1044 codepre -= 1
Chris@0 1045 codepre = 0 if codepre < 0
Chris@0 1046 end
Chris@0 1047 elsif codepre.zero?
Chris@0 1048 glyphs_textile( line, level + 1 )
Chris@0 1049 else
Chris@0 1050 htmlesc( line, :NoQuotes )
Chris@0 1051 end
Chris@0 1052 # p [level, codepre, line]
Chris@0 1053
Chris@0 1054 line
Chris@0 1055 end
Chris@0 1056 end
Chris@0 1057 end
Chris@0 1058
Chris@0 1059 def rip_offtags( text, escape_aftertag=true )
Chris@0 1060 if text =~ /<.*>/
Chris@0 1061 ## strip and encode <pre> content
Chris@0 1062 codepre, used_offtags = 0, {}
Chris@0 1063 text.gsub!( OFFTAG_MATCH ) do |line|
Chris@0 1064 if $3
Chris@0 1065 first, offtag, aftertag = $3, $4, $5
Chris@0 1066 codepre += 1
Chris@0 1067 used_offtags[offtag] = true
Chris@0 1068 if codepre - used_offtags.length > 0
Chris@0 1069 htmlesc( line, :NoQuotes )
Chris@0 1070 @pre_list.last << line
Chris@0 1071 line = ""
Chris@0 1072 else
Chris@0 1073 ### htmlesc is disabled between CODE tags which will be parsed with highlighter
Chris@0 1074 ### Regexp in formatter.rb is : /<code\s+class="(\w+)">\s?(.+)/m
Chris@0 1075 ### NB: some changes were made not to use $N variables, because we use "match"
Chris@0 1076 ### and it breaks following lines
Chris@0 1077 htmlesc( aftertag, :NoQuotes ) if aftertag && escape_aftertag && !first.match(/<code\s+class="(\w+)">/)
Chris@0 1078 line = "<redpre##{ @pre_list.length }>"
Chris@0 1079 first.match(/<#{ OFFTAGS }([^>]*)>/)
Chris@0 1080 tag = $1
Chris@0 1081 $2.to_s.match(/(class\=\S+)/i)
Chris@0 1082 tag << " #{$1}" if $1
Chris@0 1083 @pre_list << "<#{ tag }>#{ aftertag }"
Chris@0 1084 end
Chris@0 1085 elsif $1 and codepre > 0
Chris@0 1086 if codepre - used_offtags.length > 0
Chris@0 1087 htmlesc( line, :NoQuotes )
Chris@0 1088 @pre_list.last << line
Chris@0 1089 line = ""
Chris@0 1090 end
Chris@0 1091 codepre -= 1 unless codepre.zero?
Chris@0 1092 used_offtags = {} if codepre.zero?
Chris@0 1093 end
Chris@0 1094 line
Chris@0 1095 end
Chris@0 1096 end
Chris@0 1097 text
Chris@0 1098 end
Chris@0 1099
Chris@0 1100 def smooth_offtags( text )
Chris@0 1101 unless @pre_list.empty?
Chris@0 1102 ## replace <pre> content
Chris@0 1103 text.gsub!( /<redpre#(\d+)>/ ) { @pre_list[$1.to_i] }
Chris@0 1104 end
Chris@0 1105 end
Chris@0 1106
Chris@0 1107 def inline( text )
Chris@0 1108 [/^inline_/, /^glyphs_/].each do |meth_re|
Chris@0 1109 @rules.each do |rule_name|
Chris@0 1110 method( rule_name ).call( text ) if rule_name.to_s.match( meth_re )
Chris@0 1111 end
Chris@0 1112 end
Chris@0 1113 end
Chris@0 1114
Chris@0 1115 def h_align( text )
Chris@0 1116 H_ALGN_VALS[text]
Chris@0 1117 end
Chris@0 1118
Chris@0 1119 def v_align( text )
Chris@0 1120 V_ALGN_VALS[text]
Chris@0 1121 end
Chris@0 1122
Chris@0 1123 def textile_popup_help( name, windowW, windowH )
Chris@0 1124 ' <a target="_blank" href="http://hobix.com/textile/#' + helpvar + '" onclick="window.open(this.href, \'popupwindow\', \'width=' + windowW + ',height=' + windowH + ',scrollbars,resizable\'); return false;">' + name + '</a><br />'
Chris@0 1125 end
Chris@0 1126
Chris@0 1127 # HTML cleansing stuff
Chris@0 1128 BASIC_TAGS = {
Chris@0 1129 'a' => ['href', 'title'],
Chris@0 1130 'img' => ['src', 'alt', 'title'],
Chris@0 1131 'br' => [],
Chris@0 1132 'i' => nil,
Chris@0 1133 'u' => nil,
Chris@0 1134 'b' => nil,
Chris@0 1135 'pre' => nil,
Chris@0 1136 'kbd' => nil,
Chris@0 1137 'code' => ['lang'],
Chris@0 1138 'cite' => nil,
Chris@0 1139 'strong' => nil,
Chris@0 1140 'em' => nil,
Chris@0 1141 'ins' => nil,
Chris@0 1142 'sup' => nil,
Chris@0 1143 'sub' => nil,
Chris@0 1144 'del' => nil,
Chris@0 1145 'table' => nil,
Chris@0 1146 'tr' => nil,
Chris@0 1147 'td' => ['colspan', 'rowspan'],
Chris@0 1148 'th' => nil,
Chris@0 1149 'ol' => nil,
Chris@0 1150 'ul' => nil,
Chris@0 1151 'li' => nil,
Chris@0 1152 'p' => nil,
Chris@0 1153 'h1' => nil,
Chris@0 1154 'h2' => nil,
Chris@0 1155 'h3' => nil,
Chris@0 1156 'h4' => nil,
Chris@0 1157 'h5' => nil,
Chris@0 1158 'h6' => nil,
Chris@0 1159 'blockquote' => ['cite']
Chris@0 1160 }
Chris@0 1161
Chris@0 1162 def clean_html( text, tags = BASIC_TAGS )
Chris@0 1163 text.gsub!( /<!\[CDATA\[/, '' )
Chris@0 1164 text.gsub!( /<(\/*)(\w+)([^>]*)>/ ) do
Chris@0 1165 raw = $~
Chris@0 1166 tag = raw[2].downcase
Chris@0 1167 if tags.has_key? tag
Chris@0 1168 pcs = [tag]
Chris@0 1169 tags[tag].each do |prop|
Chris@0 1170 ['"', "'", ''].each do |q|
Chris@0 1171 q2 = ( q != '' ? q : '\s' )
Chris@0 1172 if raw[3] =~ /#{prop}\s*=\s*#{q}([^#{q2}]+)#{q}/i
Chris@0 1173 attrv = $1
Chris@0 1174 next if prop == 'src' and attrv =~ %r{^(?!http)\w+:}
Chris@0 1175 pcs << "#{prop}=\"#{$1.gsub('"', '\\"')}\""
Chris@0 1176 break
Chris@0 1177 end
Chris@0 1178 end
Chris@0 1179 end if tags[tag]
Chris@0 1180 "<#{raw[1]}#{pcs.join " "}>"
Chris@0 1181 else
Chris@0 1182 " "
Chris@0 1183 end
Chris@0 1184 end
Chris@0 1185 end
Chris@0 1186
Chris@0 1187 ALLOWED_TAGS = %w(redpre pre code notextile)
Chris@0 1188
Chris@0 1189 def escape_html_tags(text)
Chris@0 1190 text.gsub!(%r{<(\/?([!\w]+)[^<>\n]*)(>?)}) {|m| ALLOWED_TAGS.include?($2) ? "<#{$1}#{$3}" : "&lt;#{$1}#{'&gt;' unless $3.blank?}" }
Chris@0 1191 end
Chris@0 1192 end
Chris@0 1193