annotate www/p/lib/js/showdown.js @ 101:52e44ee1c791 tip master

enabled all scores in autostart script
author Rob Canning <rc@kiben.net>
date Tue, 21 Apr 2015 16:20:57 +0100
parents 49c94f63b8b0
children
rev   line source
rc-web@42 1 //
rc-web@42 2 // showdown.js -- A javascript port of Markdown.
rc-web@42 3 //
rc-web@42 4 // Copyright (c) 2007 John Fraser.
rc-web@42 5 //
rc-web@42 6 // Original Markdown Copyright (c) 2004-2005 John Gruber
rc-web@42 7 // <http://daringfireball.net/projects/markdown/>
rc-web@42 8 //
rc-web@42 9 // Redistributable under a BSD-style open source license.
rc-web@42 10 // See license.txt for more information.
rc-web@42 11 //
rc-web@42 12 // The full source distribution is at:
rc-web@42 13 //
rc-web@42 14 // A A L
rc-web@42 15 // T C A
rc-web@42 16 // T K B
rc-web@42 17 //
rc-web@42 18 // <http://www.attacklab.net/>
rc-web@42 19 //
rc-web@42 20
rc-web@42 21 //
rc-web@42 22 // Wherever possible, Showdown is a straight, line-by-line port
rc-web@42 23 // of the Perl version of Markdown.
rc-web@42 24 //
rc-web@42 25 // This is not a normal parser design; it's basically just a
rc-web@42 26 // series of string substitutions. It's hard to read and
rc-web@42 27 // maintain this way, but keeping Showdown close to the original
rc-web@42 28 // design makes it easier to port new features.
rc-web@42 29 //
rc-web@42 30 // More importantly, Showdown behaves like markdown.pl in most
rc-web@42 31 // edge cases. So web applications can do client-side preview
rc-web@42 32 // in Javascript, and then build identical HTML on the server.
rc-web@42 33 //
rc-web@42 34 // This port needs the new RegExp functionality of ECMA 262,
rc-web@42 35 // 3rd Edition (i.e. Javascript 1.5). Most modern web browsers
rc-web@42 36 // should do fine. Even with the new regular expression features,
rc-web@42 37 // We do a lot of work to emulate Perl's regex functionality.
rc-web@42 38 // The tricky changes in this file mostly have the "attacklab:"
rc-web@42 39 // label. Major or self-explanatory changes don't.
rc-web@42 40 //
rc-web@42 41 // Smart diff tools like Araxis Merge will be able to match up
rc-web@42 42 // this file with markdown.pl in a useful way. A little tweaking
rc-web@42 43 // helps: in a copy of markdown.pl, replace "#" with "//" and
rc-web@42 44 // replace "$text" with "text". Be sure to ignore whitespace
rc-web@42 45 // and line endings.
rc-web@42 46 //
rc-web@42 47
rc-web@42 48
rc-web@42 49 //
rc-web@42 50 // Showdown usage:
rc-web@42 51 //
rc-web@42 52 // var text = "Markdown *rocks*.";
rc-web@42 53 //
rc-web@42 54 // var converter = new Showdown.converter();
rc-web@42 55 // var html = converter.makeHtml(text);
rc-web@42 56 //
rc-web@42 57 // alert(html);
rc-web@42 58 //
rc-web@42 59 // Note: move the sample code to the bottom of this
rc-web@42 60 // file before uncommenting it.
rc-web@42 61 //
rc-web@42 62
rc-web@42 63
rc-web@42 64 //
rc-web@42 65 // Showdown namespace
rc-web@42 66 //
rc-web@42 67 var Showdown = {};
rc-web@42 68
rc-web@42 69 //
rc-web@42 70 // converter
rc-web@42 71 //
rc-web@42 72 // Wraps all "globals" so that the only thing
rc-web@42 73 // exposed is makeHtml().
rc-web@42 74 //
rc-web@42 75 Showdown.converter = function() {
rc-web@42 76
rc-web@42 77 //
rc-web@42 78 // Globals:
rc-web@42 79 //
rc-web@42 80
rc-web@42 81 // Global hashes, used by various utility routines
rc-web@42 82 var g_urls;
rc-web@42 83 var g_titles;
rc-web@42 84 var g_html_blocks;
rc-web@42 85
rc-web@42 86 // Used to track when we're inside an ordered or unordered list
rc-web@42 87 // (see _ProcessListItems() for details):
rc-web@42 88 var g_list_level = 0;
rc-web@42 89
rc-web@42 90
rc-web@42 91 this.makeHtml = function(text) {
rc-web@42 92 //
rc-web@42 93 // Main function. The order in which other subs are called here is
rc-web@42 94 // essential. Link and image substitutions need to happen before
rc-web@42 95 // _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the <a>
rc-web@42 96 // and <img> tags get encoded.
rc-web@42 97 //
rc-web@42 98
rc-web@42 99 // Clear the global hashes. If we don't clear these, you get conflicts
rc-web@42 100 // from other articles when generating a page which contains more than
rc-web@42 101 // one article (e.g. an index page that shows the N most recent
rc-web@42 102 // articles):
rc-web@42 103 g_urls = new Array();
rc-web@42 104 g_titles = new Array();
rc-web@42 105 g_html_blocks = new Array();
rc-web@42 106
rc-web@42 107 // attacklab: Replace ~ with ~T
rc-web@42 108 // This lets us use tilde as an escape char to avoid md5 hashes
rc-web@42 109 // The choice of character is arbitray; anything that isn't
rc-web@42 110 // magic in Markdown will work.
rc-web@42 111 text = text.replace(/~/g,"~T");
rc-web@42 112
rc-web@42 113 // attacklab: Replace $ with ~D
rc-web@42 114 // RegExp interprets $ as a special character
rc-web@42 115 // when it's in a replacement string
rc-web@42 116 text = text.replace(/\$/g,"~D");
rc-web@42 117
rc-web@42 118 // Standardize line endings
rc-web@42 119 text = text.replace(/\r\n/g,"\n"); // DOS to Unix
rc-web@42 120 text = text.replace(/\r/g,"\n"); // Mac to Unix
rc-web@42 121
rc-web@42 122 // Make sure text begins and ends with a couple of newlines:
rc-web@42 123 text = "\n\n" + text + "\n\n";
rc-web@42 124
rc-web@42 125 // Convert all tabs to spaces.
rc-web@42 126 text = _Detab(text);
rc-web@42 127
rc-web@42 128 // Strip any lines consisting only of spaces and tabs.
rc-web@42 129 // This makes subsequent regexen easier to write, because we can
rc-web@42 130 // match consecutive blank lines with /\n+/ instead of something
rc-web@42 131 // contorted like /[ \t]*\n+/ .
rc-web@42 132 text = text.replace(/^[ \t]+$/mg,"");
rc-web@42 133
rc-web@42 134 // Handle github codeblocks prior to running HashHTML so that
rc-web@42 135 // HTML contained within the codeblock gets escaped propertly
rc-web@42 136 text = _DoGithubCodeBlocks(text);
rc-web@42 137
rc-web@42 138 // Turn block-level HTML blocks into hash entries
rc-web@42 139 text = _HashHTMLBlocks(text);
rc-web@42 140
rc-web@42 141 // Strip link definitions, store in hashes.
rc-web@42 142 text = _StripLinkDefinitions(text);
rc-web@42 143
rc-web@42 144 text = _RunBlockGamut(text);
rc-web@42 145
rc-web@42 146 text = _UnescapeSpecialChars(text);
rc-web@42 147
rc-web@42 148 // attacklab: Restore dollar signs
rc-web@42 149 text = text.replace(/~D/g,"$$");
rc-web@42 150
rc-web@42 151 // attacklab: Restore tildes
rc-web@42 152 text = text.replace(/~T/g,"~");
rc-web@42 153
rc-web@42 154 return text;
rc-web@42 155 };
rc-web@42 156
rc-web@42 157
rc-web@42 158 var _StripLinkDefinitions = function(text) {
rc-web@42 159 //
rc-web@42 160 // Strips link definitions from text, stores the URLs and titles in
rc-web@42 161 // hash references.
rc-web@42 162 //
rc-web@42 163
rc-web@42 164 // Link defs are in the form: ^[id]: url "optional title"
rc-web@42 165
rc-web@42 166 /*
rc-web@42 167 var text = text.replace(/
rc-web@42 168 ^[ ]{0,3}\[(.+)\]: // id = $1 attacklab: g_tab_width - 1
rc-web@42 169 [ \t]*
rc-web@42 170 \n? // maybe *one* newline
rc-web@42 171 [ \t]*
rc-web@42 172 <?(\S+?)>? // url = $2
rc-web@42 173 [ \t]*
rc-web@42 174 \n? // maybe one newline
rc-web@42 175 [ \t]*
rc-web@42 176 (?:
rc-web@42 177 (\n*) // any lines skipped = $3 attacklab: lookbehind removed
rc-web@42 178 ["(]
rc-web@42 179 (.+?) // title = $4
rc-web@42 180 [")]
rc-web@42 181 [ \t]*
rc-web@42 182 )? // title is optional
rc-web@42 183 (?:\n+|$)
rc-web@42 184 /gm,
rc-web@42 185 function(){...});
rc-web@42 186 */
rc-web@42 187 var text = text.replace(/^[ ]{0,3}\[(.+)\]:[ \t]*\n?[ \t]*<?(\S+?)>?[ \t]*\n?[ \t]*(?:(\n*)["(](.+?)[")][ \t]*)?(?:\n+|\Z)/gm,
rc-web@42 188 function (wholeMatch,m1,m2,m3,m4) {
rc-web@42 189 m1 = m1.toLowerCase();
rc-web@42 190 g_urls[m1] = _EncodeAmpsAndAngles(m2); // Link IDs are case-insensitive
rc-web@42 191 if (m3) {
rc-web@42 192 // Oops, found blank lines, so it's not a title.
rc-web@42 193 // Put back the parenthetical statement we stole.
rc-web@42 194 return m3+m4;
rc-web@42 195 } else if (m4) {
rc-web@42 196 g_titles[m1] = m4.replace(/"/g,"&quot;");
rc-web@42 197 }
rc-web@42 198
rc-web@42 199 // Completely remove the definition from the text
rc-web@42 200 return "";
rc-web@42 201 }
rc-web@42 202 );
rc-web@42 203
rc-web@42 204 return text;
rc-web@42 205 }
rc-web@42 206
rc-web@42 207
rc-web@42 208 var _HashHTMLBlocks = function(text) {
rc-web@42 209 // attacklab: Double up blank lines to reduce lookaround
rc-web@42 210 text = text.replace(/\n/g,"\n\n");
rc-web@42 211
rc-web@42 212 // Hashify HTML blocks:
rc-web@42 213 // We only want to do this for block-level HTML tags, such as headers,
rc-web@42 214 // lists, and tables. That's because we still want to wrap <p>s around
rc-web@42 215 // "paragraphs" that are wrapped in non-block-level tags, such as anchors,
rc-web@42 216 // phrase emphasis, and spans. The list of tags we're looking for is
rc-web@42 217 // hard-coded:
rc-web@42 218 var block_tags_a = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del|style|section|header|footer|nav|article|aside";
rc-web@42 219 var block_tags_b = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|style|section|header|footer|nav|article|aside";
rc-web@42 220
rc-web@42 221 // First, look for nested blocks, e.g.:
rc-web@42 222 // <div>
rc-web@42 223 // <div>
rc-web@42 224 // tags for inner block must be indented.
rc-web@42 225 // </div>
rc-web@42 226 // </div>
rc-web@42 227 //
rc-web@42 228 // The outermost tags must start at the left margin for this to match, and
rc-web@42 229 // the inner nested divs must be indented.
rc-web@42 230 // We need to do this before the next, more liberal match, because the next
rc-web@42 231 // match will start at the first `<div>` and stop at the first `</div>`.
rc-web@42 232
rc-web@42 233 // attacklab: This regex can be expensive when it fails.
rc-web@42 234 /*
rc-web@42 235 var text = text.replace(/
rc-web@42 236 ( // save in $1
rc-web@42 237 ^ // start of line (with /m)
rc-web@42 238 <($block_tags_a) // start tag = $2
rc-web@42 239 \b // word break
rc-web@42 240 // attacklab: hack around khtml/pcre bug...
rc-web@42 241 [^\r]*?\n // any number of lines, minimally matching
rc-web@42 242 </\2> // the matching end tag
rc-web@42 243 [ \t]* // trailing spaces/tabs
rc-web@42 244 (?=\n+) // followed by a newline
rc-web@42 245 ) // attacklab: there are sentinel newlines at end of document
rc-web@42 246 /gm,function(){...}};
rc-web@42 247 */
rc-web@42 248 text = text.replace(/^(<(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del)\b[^\r]*?\n<\/\2>[ \t]*(?=\n+))/gm,hashElement);
rc-web@42 249
rc-web@42 250 //
rc-web@42 251 // Now match more liberally, simply from `\n<tag>` to `</tag>\n`
rc-web@42 252 //
rc-web@42 253
rc-web@42 254 /*
rc-web@42 255 var text = text.replace(/
rc-web@42 256 ( // save in $1
rc-web@42 257 ^ // start of line (with /m)
rc-web@42 258 <($block_tags_b) // start tag = $2
rc-web@42 259 \b // word break
rc-web@42 260 // attacklab: hack around khtml/pcre bug...
rc-web@42 261 [^\r]*? // any number of lines, minimally matching
rc-web@42 262 .*</\2> // the matching end tag
rc-web@42 263 [ \t]* // trailing spaces/tabs
rc-web@42 264 (?=\n+) // followed by a newline
rc-web@42 265 ) // attacklab: there are sentinel newlines at end of document
rc-web@42 266 /gm,function(){...}};
rc-web@42 267 */
rc-web@42 268 text = text.replace(/^(<(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|style|section|header|footer|nav|article|aside)\b[^\r]*?.*<\/\2>[ \t]*(?=\n+)\n)/gm,hashElement);
rc-web@42 269
rc-web@42 270 // Special case just for <hr />. It was easier to make a special case than
rc-web@42 271 // to make the other regex more complicated.
rc-web@42 272
rc-web@42 273 /*
rc-web@42 274 text = text.replace(/
rc-web@42 275 ( // save in $1
rc-web@42 276 \n\n // Starting after a blank line
rc-web@42 277 [ ]{0,3}
rc-web@42 278 (<(hr) // start tag = $2
rc-web@42 279 \b // word break
rc-web@42 280 ([^<>])*? //
rc-web@42 281 \/?>) // the matching end tag
rc-web@42 282 [ \t]*
rc-web@42 283 (?=\n{2,}) // followed by a blank line
rc-web@42 284 )
rc-web@42 285 /g,hashElement);
rc-web@42 286 */
rc-web@42 287 text = text.replace(/(\n[ ]{0,3}(<(hr)\b([^<>])*?\/?>)[ \t]*(?=\n{2,}))/g,hashElement);
rc-web@42 288
rc-web@42 289 // Special case for standalone HTML comments:
rc-web@42 290
rc-web@42 291 /*
rc-web@42 292 text = text.replace(/
rc-web@42 293 ( // save in $1
rc-web@42 294 \n\n // Starting after a blank line
rc-web@42 295 [ ]{0,3} // attacklab: g_tab_width - 1
rc-web@42 296 <!
rc-web@42 297 (--[^\r]*?--\s*)+
rc-web@42 298 >
rc-web@42 299 [ \t]*
rc-web@42 300 (?=\n{2,}) // followed by a blank line
rc-web@42 301 )
rc-web@42 302 /g,hashElement);
rc-web@42 303 */
rc-web@42 304 text = text.replace(/(\n\n[ ]{0,3}<!(--[^\r]*?--\s*)+>[ \t]*(?=\n{2,}))/g,hashElement);
rc-web@42 305
rc-web@42 306 // PHP and ASP-style processor instructions (<?...?> and <%...%>)
rc-web@42 307
rc-web@42 308 /*
rc-web@42 309 text = text.replace(/
rc-web@42 310 (?:
rc-web@42 311 \n\n // Starting after a blank line
rc-web@42 312 )
rc-web@42 313 ( // save in $1
rc-web@42 314 [ ]{0,3} // attacklab: g_tab_width - 1
rc-web@42 315 (?:
rc-web@42 316 <([?%]) // $2
rc-web@42 317 [^\r]*?
rc-web@42 318 \2>
rc-web@42 319 )
rc-web@42 320 [ \t]*
rc-web@42 321 (?=\n{2,}) // followed by a blank line
rc-web@42 322 )
rc-web@42 323 /g,hashElement);
rc-web@42 324 */
rc-web@42 325 text = text.replace(/(?:\n\n)([ ]{0,3}(?:<([?%])[^\r]*?\2>)[ \t]*(?=\n{2,}))/g,hashElement);
rc-web@42 326
rc-web@42 327 // attacklab: Undo double lines (see comment at top of this function)
rc-web@42 328 text = text.replace(/\n\n/g,"\n");
rc-web@42 329 return text;
rc-web@42 330 }
rc-web@42 331
rc-web@42 332 var hashElement = function(wholeMatch,m1) {
rc-web@42 333 var blockText = m1;
rc-web@42 334
rc-web@42 335 // Undo double lines
rc-web@42 336 blockText = blockText.replace(/\n\n/g,"\n");
rc-web@42 337 blockText = blockText.replace(/^\n/,"");
rc-web@42 338
rc-web@42 339 // strip trailing blank lines
rc-web@42 340 blockText = blockText.replace(/\n+$/g,"");
rc-web@42 341
rc-web@42 342 // Replace the element text with a marker ("~KxK" where x is its key)
rc-web@42 343 blockText = "\n\n~K" + (g_html_blocks.push(blockText)-1) + "K\n\n";
rc-web@42 344
rc-web@42 345 return blockText;
rc-web@42 346 };
rc-web@42 347
rc-web@42 348 var _RunBlockGamut = function(text) {
rc-web@42 349 //
rc-web@42 350 // These are all the transformations that form block-level
rc-web@42 351 // tags like paragraphs, headers, and list items.
rc-web@42 352 //
rc-web@42 353 text = _DoHeaders(text);
rc-web@42 354
rc-web@42 355 // Do Horizontal Rules:
rc-web@42 356 var key = hashBlock("<hr />");
rc-web@42 357 text = text.replace(/^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$/gm,key);
rc-web@42 358 text = text.replace(/^[ ]{0,2}([ ]?\-[ ]?){3,}[ \t]*$/gm,key);
rc-web@42 359 text = text.replace(/^[ ]{0,2}([ ]?\_[ ]?){3,}[ \t]*$/gm,key);
rc-web@42 360
rc-web@42 361 text = _DoLists(text);
rc-web@42 362 text = _DoCodeBlocks(text);
rc-web@42 363 text = _DoBlockQuotes(text);
rc-web@42 364
rc-web@42 365 // We already ran _HashHTMLBlocks() before, in Markdown(), but that
rc-web@42 366 // was to escape raw HTML in the original Markdown source. This time,
rc-web@42 367 // we're escaping the markup we've just created, so that we don't wrap
rc-web@42 368 // <p> tags around block-level tags.
rc-web@42 369 text = _HashHTMLBlocks(text);
rc-web@42 370 text = _FormParagraphs(text);
rc-web@42 371
rc-web@42 372 return text;
rc-web@42 373 };
rc-web@42 374
rc-web@42 375
rc-web@42 376 var _RunSpanGamut = function(text) {
rc-web@42 377 //
rc-web@42 378 // These are all the transformations that occur *within* block-level
rc-web@42 379 // tags like paragraphs, headers, and list items.
rc-web@42 380 //
rc-web@42 381
rc-web@42 382 text = _DoCodeSpans(text);
rc-web@42 383 text = _EscapeSpecialCharsWithinTagAttributes(text);
rc-web@42 384 text = _EncodeBackslashEscapes(text);
rc-web@42 385
rc-web@42 386 // Process anchor and image tags. Images must come first,
rc-web@42 387 // because ![foo][f] looks like an anchor.
rc-web@42 388 text = _DoImages(text);
rc-web@42 389 text = _DoAnchors(text);
rc-web@42 390
rc-web@42 391 // Make links out of things like `<http://example.com/>`
rc-web@42 392 // Must come after _DoAnchors(), because you can use < and >
rc-web@42 393 // delimiters in inline links like [this](<url>).
rc-web@42 394 text = _DoAutoLinks(text);
rc-web@42 395 text = _EncodeAmpsAndAngles(text);
rc-web@42 396 text = _DoItalicsAndBold(text);
rc-web@42 397
rc-web@42 398 // Do hard breaks:
rc-web@42 399 text = text.replace(/ +\n/g," <br />\n");
rc-web@42 400
rc-web@42 401 return text;
rc-web@42 402 }
rc-web@42 403
rc-web@42 404 var _EscapeSpecialCharsWithinTagAttributes = function(text) {
rc-web@42 405 //
rc-web@42 406 // Within tags -- meaning between < and > -- encode [\ ` * _] so they
rc-web@42 407 // don't conflict with their use in Markdown for code, italics and strong.
rc-web@42 408 //
rc-web@42 409
rc-web@42 410 // Build a regex to find HTML tags and comments. See Friedl's
rc-web@42 411 // "Mastering Regular Expressions", 2nd Ed., pp. 200-201.
rc-web@42 412 var regex = /(<[a-z\/!$]("[^"]*"|'[^']*'|[^'">])*>|<!(--.*?--\s*)+>)/gi;
rc-web@42 413
rc-web@42 414 text = text.replace(regex, function(wholeMatch) {
rc-web@42 415 var tag = wholeMatch.replace(/(.)<\/?code>(?=.)/g,"$1`");
rc-web@42 416 tag = escapeCharacters(tag,"\\`*_");
rc-web@42 417 return tag;
rc-web@42 418 });
rc-web@42 419
rc-web@42 420 return text;
rc-web@42 421 }
rc-web@42 422
rc-web@42 423 var _DoAnchors = function(text) {
rc-web@42 424 //
rc-web@42 425 // Turn Markdown link shortcuts into XHTML <a> tags.
rc-web@42 426 //
rc-web@42 427 //
rc-web@42 428 // First, handle reference-style links: [link text] [id]
rc-web@42 429 //
rc-web@42 430
rc-web@42 431 /*
rc-web@42 432 text = text.replace(/
rc-web@42 433 ( // wrap whole match in $1
rc-web@42 434 \[
rc-web@42 435 (
rc-web@42 436 (?:
rc-web@42 437 \[[^\]]*\] // allow brackets nested one level
rc-web@42 438 |
rc-web@42 439 [^\[] // or anything else
rc-web@42 440 )*
rc-web@42 441 )
rc-web@42 442 \]
rc-web@42 443
rc-web@42 444 [ ]? // one optional space
rc-web@42 445 (?:\n[ ]*)? // one optional newline followed by spaces
rc-web@42 446
rc-web@42 447 \[
rc-web@42 448 (.*?) // id = $3
rc-web@42 449 \]
rc-web@42 450 )()()()() // pad remaining backreferences
rc-web@42 451 /g,_DoAnchors_callback);
rc-web@42 452 */
rc-web@42 453 text = text.replace(/(\[((?:\[[^\]]*\]|[^\[\]])*)\][ ]?(?:\n[ ]*)?\[(.*?)\])()()()()/g,writeAnchorTag);
rc-web@42 454
rc-web@42 455 //
rc-web@42 456 // Next, inline-style links: [link text](url "optional title")
rc-web@42 457 //
rc-web@42 458
rc-web@42 459 /*
rc-web@42 460 text = text.replace(/
rc-web@42 461 ( // wrap whole match in $1
rc-web@42 462 \[
rc-web@42 463 (
rc-web@42 464 (?:
rc-web@42 465 \[[^\]]*\] // allow brackets nested one level
rc-web@42 466 |
rc-web@42 467 [^\[\]] // or anything else
rc-web@42 468 )
rc-web@42 469 )
rc-web@42 470 \]
rc-web@42 471 \( // literal paren
rc-web@42 472 [ \t]*
rc-web@42 473 () // no id, so leave $3 empty
rc-web@42 474 <?(.*?)>? // href = $4
rc-web@42 475 [ \t]*
rc-web@42 476 ( // $5
rc-web@42 477 (['"]) // quote char = $6
rc-web@42 478 (.*?) // Title = $7
rc-web@42 479 \6 // matching quote
rc-web@42 480 [ \t]* // ignore any spaces/tabs between closing quote and )
rc-web@42 481 )? // title is optional
rc-web@42 482 \)
rc-web@42 483 )
rc-web@42 484 /g,writeAnchorTag);
rc-web@42 485 */
rc-web@42 486 text = text.replace(/(\[((?:\[[^\]]*\]|[^\[\]])*)\]\([ \t]*()<?(.*?)>?[ \t]*((['"])(.*?)\6[ \t]*)?\))/g,writeAnchorTag);
rc-web@42 487
rc-web@42 488 //
rc-web@42 489 // Last, handle reference-style shortcuts: [link text]
rc-web@42 490 // These must come last in case you've also got [link test][1]
rc-web@42 491 // or [link test](/foo)
rc-web@42 492 //
rc-web@42 493
rc-web@42 494 /*
rc-web@42 495 text = text.replace(/
rc-web@42 496 ( // wrap whole match in $1
rc-web@42 497 \[
rc-web@42 498 ([^\[\]]+) // link text = $2; can't contain '[' or ']'
rc-web@42 499 \]
rc-web@42 500 )()()()()() // pad rest of backreferences
rc-web@42 501 /g, writeAnchorTag);
rc-web@42 502 */
rc-web@42 503 text = text.replace(/(\[([^\[\]]+)\])()()()()()/g, writeAnchorTag);
rc-web@42 504
rc-web@42 505 return text;
rc-web@42 506 }
rc-web@42 507
rc-web@42 508 var writeAnchorTag = function(wholeMatch,m1,m2,m3,m4,m5,m6,m7) {
rc-web@42 509 if (m7 == undefined) m7 = "";
rc-web@42 510 var whole_match = m1;
rc-web@42 511 var link_text = m2;
rc-web@42 512 var link_id = m3.toLowerCase();
rc-web@42 513 var url = m4;
rc-web@42 514 var title = m7;
rc-web@42 515
rc-web@42 516 if (url == "") {
rc-web@42 517 if (link_id == "") {
rc-web@42 518 // lower-case and turn embedded newlines into spaces
rc-web@42 519 link_id = link_text.toLowerCase().replace(/ ?\n/g," ");
rc-web@42 520 }
rc-web@42 521 url = "#"+link_id;
rc-web@42 522
rc-web@42 523 if (g_urls[link_id] != undefined) {
rc-web@42 524 url = g_urls[link_id];
rc-web@42 525 if (g_titles[link_id] != undefined) {
rc-web@42 526 title = g_titles[link_id];
rc-web@42 527 }
rc-web@42 528 }
rc-web@42 529 else {
rc-web@42 530 if (whole_match.search(/\(\s*\)$/m)>-1) {
rc-web@42 531 // Special case for explicit empty url
rc-web@42 532 url = "";
rc-web@42 533 } else {
rc-web@42 534 return whole_match;
rc-web@42 535 }
rc-web@42 536 }
rc-web@42 537 }
rc-web@42 538
rc-web@42 539 url = escapeCharacters(url,"*_");
rc-web@42 540 var result = "<a href=\"" + url + "\"";
rc-web@42 541
rc-web@42 542 if (title != "") {
rc-web@42 543 title = title.replace(/"/g,"&quot;");
rc-web@42 544 title = escapeCharacters(title,"*_");
rc-web@42 545 result += " title=\"" + title + "\"";
rc-web@42 546 }
rc-web@42 547
rc-web@42 548 result += ">" + link_text + "</a>";
rc-web@42 549
rc-web@42 550 return result;
rc-web@42 551 }
rc-web@42 552
rc-web@42 553
rc-web@42 554 var _DoImages = function(text) {
rc-web@42 555 //
rc-web@42 556 // Turn Markdown image shortcuts into <img> tags.
rc-web@42 557 //
rc-web@42 558
rc-web@42 559 //
rc-web@42 560 // First, handle reference-style labeled images: ![alt text][id]
rc-web@42 561 //
rc-web@42 562
rc-web@42 563 /*
rc-web@42 564 text = text.replace(/
rc-web@42 565 ( // wrap whole match in $1
rc-web@42 566 !\[
rc-web@42 567 (.*?) // alt text = $2
rc-web@42 568 \]
rc-web@42 569
rc-web@42 570 [ ]? // one optional space
rc-web@42 571 (?:\n[ ]*)? // one optional newline followed by spaces
rc-web@42 572
rc-web@42 573 \[
rc-web@42 574 (.*?) // id = $3
rc-web@42 575 \]
rc-web@42 576 )()()()() // pad rest of backreferences
rc-web@42 577 /g,writeImageTag);
rc-web@42 578 */
rc-web@42 579 text = text.replace(/(!\[(.*?)\][ ]?(?:\n[ ]*)?\[(.*?)\])()()()()/g,writeImageTag);
rc-web@42 580
rc-web@42 581 //
rc-web@42 582 // Next, handle inline images: ![alt text](url "optional title")
rc-web@42 583 // Don't forget: encode * and _
rc-web@42 584
rc-web@42 585 /*
rc-web@42 586 text = text.replace(/
rc-web@42 587 ( // wrap whole match in $1
rc-web@42 588 !\[
rc-web@42 589 (.*?) // alt text = $2
rc-web@42 590 \]
rc-web@42 591 \s? // One optional whitespace character
rc-web@42 592 \( // literal paren
rc-web@42 593 [ \t]*
rc-web@42 594 () // no id, so leave $3 empty
rc-web@42 595 <?(\S+?)>? // src url = $4
rc-web@42 596 [ \t]*
rc-web@42 597 ( // $5
rc-web@42 598 (['"]) // quote char = $6
rc-web@42 599 (.*?) // title = $7
rc-web@42 600 \6 // matching quote
rc-web@42 601 [ \t]*
rc-web@42 602 )? // title is optional
rc-web@42 603 \)
rc-web@42 604 )
rc-web@42 605 /g,writeImageTag);
rc-web@42 606 */
rc-web@42 607 text = text.replace(/(!\[(.*?)\]\s?\([ \t]*()<?(\S+?)>?[ \t]*((['"])(.*?)\6[ \t]*)?\))/g,writeImageTag);
rc-web@42 608
rc-web@42 609 return text;
rc-web@42 610 }
rc-web@42 611
rc-web@42 612 var writeImageTag = function(wholeMatch,m1,m2,m3,m4,m5,m6,m7) {
rc-web@42 613 var whole_match = m1;
rc-web@42 614 var alt_text = m2;
rc-web@42 615 var link_id = m3.toLowerCase();
rc-web@42 616 var url = m4;
rc-web@42 617 var title = m7;
rc-web@42 618
rc-web@42 619 if (!title) title = "";
rc-web@42 620
rc-web@42 621 if (url == "") {
rc-web@42 622 if (link_id == "") {
rc-web@42 623 // lower-case and turn embedded newlines into spaces
rc-web@42 624 link_id = alt_text.toLowerCase().replace(/ ?\n/g," ");
rc-web@42 625 }
rc-web@42 626 url = "#"+link_id;
rc-web@42 627
rc-web@42 628 if (g_urls[link_id] != undefined) {
rc-web@42 629 url = g_urls[link_id];
rc-web@42 630 if (g_titles[link_id] != undefined) {
rc-web@42 631 title = g_titles[link_id];
rc-web@42 632 }
rc-web@42 633 }
rc-web@42 634 else {
rc-web@42 635 return whole_match;
rc-web@42 636 }
rc-web@42 637 }
rc-web@42 638
rc-web@42 639 alt_text = alt_text.replace(/"/g,"&quot;");
rc-web@42 640 url = escapeCharacters(url,"*_");
rc-web@42 641 var result = "<img src=\"" + url + "\" alt=\"" + alt_text + "\"";
rc-web@42 642
rc-web@42 643 // attacklab: Markdown.pl adds empty title attributes to images.
rc-web@42 644 // Replicate this bug.
rc-web@42 645
rc-web@42 646 //if (title != "") {
rc-web@42 647 title = title.replace(/"/g,"&quot;");
rc-web@42 648 title = escapeCharacters(title,"*_");
rc-web@42 649 result += " title=\"" + title + "\"";
rc-web@42 650 //}
rc-web@42 651
rc-web@42 652 result += " />";
rc-web@42 653
rc-web@42 654 return result;
rc-web@42 655 }
rc-web@42 656
rc-web@42 657
rc-web@42 658 var _DoHeaders = function(text) {
rc-web@42 659
rc-web@42 660 // Setext-style headers:
rc-web@42 661 // Header 1
rc-web@42 662 // ========
rc-web@42 663 //
rc-web@42 664 // Header 2
rc-web@42 665 // --------
rc-web@42 666 //
rc-web@42 667 text = text.replace(/^(.+)[ \t]*\n=+[ \t]*\n+/gm,
rc-web@42 668 function(wholeMatch,m1){return hashBlock('<h1 id="' + headerId(m1) + '">' + _RunSpanGamut(m1) + "</h1>");});
rc-web@42 669
rc-web@42 670 text = text.replace(/^(.+)[ \t]*\n-+[ \t]*\n+/gm,
rc-web@42 671 function(matchFound,m1){return hashBlock('<h2 id="' + headerId(m1) + '">' + _RunSpanGamut(m1) + "</h2>");});
rc-web@42 672
rc-web@42 673 // atx-style headers:
rc-web@42 674 // # Header 1
rc-web@42 675 // ## Header 2
rc-web@42 676 // ## Header 2 with closing hashes ##
rc-web@42 677 // ...
rc-web@42 678 // ###### Header 6
rc-web@42 679 //
rc-web@42 680
rc-web@42 681 /*
rc-web@42 682 text = text.replace(/
rc-web@42 683 ^(\#{1,6}) // $1 = string of #'s
rc-web@42 684 [ \t]*
rc-web@42 685 (.+?) // $2 = Header text
rc-web@42 686 [ \t]*
rc-web@42 687 \#* // optional closing #'s (not counted)
rc-web@42 688 \n+
rc-web@42 689 /gm, function() {...});
rc-web@42 690 */
rc-web@42 691
rc-web@42 692 text = text.replace(/^(\#{1,6})[ \t]*(.+?)[ \t]*\#*\n+/gm,
rc-web@42 693 function(wholeMatch,m1,m2) {
rc-web@42 694 var h_level = m1.length;
rc-web@42 695 return hashBlock("<h" + h_level + ' id="' + headerId(m2) + '">' + _RunSpanGamut(m2) + "</h" + h_level + ">");
rc-web@42 696 });
rc-web@42 697
rc-web@42 698 function headerId(m) {
rc-web@42 699 return m.replace(/[^\w]/g, '').toLowerCase();
rc-web@42 700 }
rc-web@42 701 return text;
rc-web@42 702 }
rc-web@42 703
rc-web@42 704 // This declaration keeps Dojo compressor from outputting garbage:
rc-web@42 705 var _ProcessListItems;
rc-web@42 706
rc-web@42 707 var _DoLists = function(text) {
rc-web@42 708 //
rc-web@42 709 // Form HTML ordered (numbered) and unordered (bulleted) lists.
rc-web@42 710 //
rc-web@42 711
rc-web@42 712 // attacklab: add sentinel to hack around khtml/safari bug:
rc-web@42 713 // http://bugs.webkit.org/show_bug.cgi?id=11231
rc-web@42 714 text += "~0";
rc-web@42 715
rc-web@42 716 // Re-usable pattern to match any entirel ul or ol list:
rc-web@42 717
rc-web@42 718 /*
rc-web@42 719 var whole_list = /
rc-web@42 720 ( // $1 = whole list
rc-web@42 721 ( // $2
rc-web@42 722 [ ]{0,3} // attacklab: g_tab_width - 1
rc-web@42 723 ([*+-]|\d+[.]) // $3 = first list item marker
rc-web@42 724 [ \t]+
rc-web@42 725 )
rc-web@42 726 [^\r]+?
rc-web@42 727 ( // $4
rc-web@42 728 ~0 // sentinel for workaround; should be $
rc-web@42 729 |
rc-web@42 730 \n{2,}
rc-web@42 731 (?=\S)
rc-web@42 732 (?! // Negative lookahead for another list item marker
rc-web@42 733 [ \t]*
rc-web@42 734 (?:[*+-]|\d+[.])[ \t]+
rc-web@42 735 )
rc-web@42 736 )
rc-web@42 737 )/g
rc-web@42 738 */
rc-web@42 739 var whole_list = /^(([ ]{0,3}([*+-]|\d+[.])[ \t]+)[^\r]+?(~0|\n{2,}(?=\S)(?![ \t]*(?:[*+-]|\d+[.])[ \t]+)))/gm;
rc-web@42 740
rc-web@42 741 if (g_list_level) {
rc-web@42 742 text = text.replace(whole_list,function(wholeMatch,m1,m2) {
rc-web@42 743 var list = m1;
rc-web@42 744 var list_type = (m2.search(/[*+-]/g)>-1) ? "ul" : "ol";
rc-web@42 745
rc-web@42 746 // Turn double returns into triple returns, so that we can make a
rc-web@42 747 // paragraph for the last item in a list, if necessary:
rc-web@42 748 list = list.replace(/\n{2,}/g,"\n\n\n");;
rc-web@42 749 var result = _ProcessListItems(list);
rc-web@42 750
rc-web@42 751 // Trim any trailing whitespace, to put the closing `</$list_type>`
rc-web@42 752 // up on the preceding line, to get it past the current stupid
rc-web@42 753 // HTML block parser. This is a hack to work around the terrible
rc-web@42 754 // hack that is the HTML block parser.
rc-web@42 755 result = result.replace(/\s+$/,"");
rc-web@42 756 result = "<"+list_type+">" + result + "</"+list_type+">\n";
rc-web@42 757 return result;
rc-web@42 758 });
rc-web@42 759 } else {
rc-web@42 760 whole_list = /(\n\n|^\n?)(([ ]{0,3}([*+-]|\d+[.])[ \t]+)[^\r]+?(~0|\n{2,}(?=\S)(?![ \t]*(?:[*+-]|\d+[.])[ \t]+)))/g;
rc-web@42 761 text = text.replace(whole_list,function(wholeMatch,m1,m2,m3) {
rc-web@42 762 var runup = m1;
rc-web@42 763 var list = m2;
rc-web@42 764
rc-web@42 765 var list_type = (m3.search(/[*+-]/g)>-1) ? "ul" : "ol";
rc-web@42 766 // Turn double returns into triple returns, so that we can make a
rc-web@42 767 // paragraph for the last item in a list, if necessary:
rc-web@42 768 var list = list.replace(/\n{2,}/g,"\n\n\n");;
rc-web@42 769 var result = _ProcessListItems(list);
rc-web@42 770 result = runup + "<"+list_type+">\n" + result + "</"+list_type+">\n";
rc-web@42 771 return result;
rc-web@42 772 });
rc-web@42 773 }
rc-web@42 774
rc-web@42 775 // attacklab: strip sentinel
rc-web@42 776 text = text.replace(/~0/,"");
rc-web@42 777
rc-web@42 778 return text;
rc-web@42 779 }
rc-web@42 780
rc-web@42 781 _ProcessListItems = function(list_str) {
rc-web@42 782 //
rc-web@42 783 // Process the contents of a single ordered or unordered list, splitting it
rc-web@42 784 // into individual list items.
rc-web@42 785 //
rc-web@42 786 // The $g_list_level global keeps track of when we're inside a list.
rc-web@42 787 // Each time we enter a list, we increment it; when we leave a list,
rc-web@42 788 // we decrement. If it's zero, we're not in a list anymore.
rc-web@42 789 //
rc-web@42 790 // We do this because when we're not inside a list, we want to treat
rc-web@42 791 // something like this:
rc-web@42 792 //
rc-web@42 793 // I recommend upgrading to version
rc-web@42 794 // 8. Oops, now this line is treated
rc-web@42 795 // as a sub-list.
rc-web@42 796 //
rc-web@42 797 // As a single paragraph, despite the fact that the second line starts
rc-web@42 798 // with a digit-period-space sequence.
rc-web@42 799 //
rc-web@42 800 // Whereas when we're inside a list (or sub-list), that line will be
rc-web@42 801 // treated as the start of a sub-list. What a kludge, huh? This is
rc-web@42 802 // an aspect of Markdown's syntax that's hard to parse perfectly
rc-web@42 803 // without resorting to mind-reading. Perhaps the solution is to
rc-web@42 804 // change the syntax rules such that sub-lists must start with a
rc-web@42 805 // starting cardinal number; e.g. "1." or "a.".
rc-web@42 806
rc-web@42 807 g_list_level++;
rc-web@42 808
rc-web@42 809 // trim trailing blank lines:
rc-web@42 810 list_str = list_str.replace(/\n{2,}$/,"\n");
rc-web@42 811
rc-web@42 812 // attacklab: add sentinel to emulate \z
rc-web@42 813 list_str += "~0";
rc-web@42 814
rc-web@42 815 /*
rc-web@42 816 list_str = list_str.replace(/
rc-web@42 817 (\n)? // leading line = $1
rc-web@42 818 (^[ \t]*) // leading whitespace = $2
rc-web@42 819 ([*+-]|\d+[.]) [ \t]+ // list marker = $3
rc-web@42 820 ([^\r]+? // list item text = $4
rc-web@42 821 (\n{1,2}))
rc-web@42 822 (?= \n* (~0 | \2 ([*+-]|\d+[.]) [ \t]+))
rc-web@42 823 /gm, function(){...});
rc-web@42 824 */
rc-web@42 825 list_str = list_str.replace(/(\n)?(^[ \t]*)([*+-]|\d+[.])[ \t]+([^\r]+?(\n{1,2}))(?=\n*(~0|\2([*+-]|\d+[.])[ \t]+))/gm,
rc-web@42 826 function(wholeMatch,m1,m2,m3,m4){
rc-web@42 827 var item = m4;
rc-web@42 828 var leading_line = m1;
rc-web@42 829 var leading_space = m2;
rc-web@42 830
rc-web@42 831 if (leading_line || (item.search(/\n{2,}/)>-1)) {
rc-web@42 832 item = _RunBlockGamut(_Outdent(item));
rc-web@42 833 }
rc-web@42 834 else {
rc-web@42 835 // Recursion for sub-lists:
rc-web@42 836 item = _DoLists(_Outdent(item));
rc-web@42 837 item = item.replace(/\n$/,""); // chomp(item)
rc-web@42 838 item = _RunSpanGamut(item);
rc-web@42 839 }
rc-web@42 840
rc-web@42 841 return "<li>" + item + "</li>\n";
rc-web@42 842 }
rc-web@42 843 );
rc-web@42 844
rc-web@42 845 // attacklab: strip sentinel
rc-web@42 846 list_str = list_str.replace(/~0/g,"");
rc-web@42 847
rc-web@42 848 g_list_level--;
rc-web@42 849 return list_str;
rc-web@42 850 }
rc-web@42 851
rc-web@42 852
rc-web@42 853 var _DoCodeBlocks = function(text) {
rc-web@42 854 //
rc-web@42 855 // Process Markdown `<pre><code>` blocks.
rc-web@42 856 //
rc-web@42 857
rc-web@42 858 /*
rc-web@42 859 text = text.replace(text,
rc-web@42 860 /(?:\n\n|^)
rc-web@42 861 ( // $1 = the code block -- one or more lines, starting with a space/tab
rc-web@42 862 (?:
rc-web@42 863 (?:[ ]{4}|\t) // Lines must start with a tab or a tab-width of spaces - attacklab: g_tab_width
rc-web@42 864 .*\n+
rc-web@42 865 )+
rc-web@42 866 )
rc-web@42 867 (\n*[ ]{0,3}[^ \t\n]|(?=~0)) // attacklab: g_tab_width
rc-web@42 868 /g,function(){...});
rc-web@42 869 */
rc-web@42 870
rc-web@42 871 // attacklab: sentinel workarounds for lack of \A and \Z, safari\khtml bug
rc-web@42 872 text += "~0";
rc-web@42 873
rc-web@42 874 text = text.replace(/(?:\n\n|^)((?:(?:[ ]{4}|\t).*\n+)+)(\n*[ ]{0,3}[^ \t\n]|(?=~0))/g,
rc-web@42 875 function(wholeMatch,m1,m2) {
rc-web@42 876 var codeblock = m1;
rc-web@42 877 var nextChar = m2;
rc-web@42 878
rc-web@42 879 codeblock = _EncodeCode( _Outdent(codeblock));
rc-web@42 880 codeblock = _Detab(codeblock);
rc-web@42 881 codeblock = codeblock.replace(/^\n+/g,""); // trim leading newlines
rc-web@42 882 codeblock = codeblock.replace(/\n+$/g,""); // trim trailing whitespace
rc-web@42 883
rc-web@42 884 codeblock = "<pre><code>" + codeblock + "\n</code></pre>";
rc-web@42 885
rc-web@42 886 return hashBlock(codeblock) + nextChar;
rc-web@42 887 }
rc-web@42 888 );
rc-web@42 889
rc-web@42 890 // attacklab: strip sentinel
rc-web@42 891 text = text.replace(/~0/,"");
rc-web@42 892
rc-web@42 893 return text;
rc-web@42 894 };
rc-web@42 895
rc-web@42 896 var _DoGithubCodeBlocks = function(text) {
rc-web@42 897 //
rc-web@42 898 // Process Github-style code blocks
rc-web@42 899 // Example:
rc-web@42 900 // ```ruby
rc-web@42 901 // def hello_world(x)
rc-web@42 902 // puts "Hello, #{x}"
rc-web@42 903 // end
rc-web@42 904 // ```
rc-web@42 905 //
rc-web@42 906
rc-web@42 907
rc-web@42 908 // attacklab: sentinel workarounds for lack of \A and \Z, safari\khtml bug
rc-web@42 909 text += "~0";
rc-web@42 910
rc-web@42 911 text = text.replace(/(?:^|\n)```(.*)\n([\s\S]*?)\n```/g,
rc-web@42 912 function(wholeMatch,m1,m2) {
rc-web@42 913 var language = m1;
rc-web@42 914 var codeblock = m2;
rc-web@42 915
rc-web@42 916 codeblock = _EncodeCode(codeblock);
rc-web@42 917 codeblock = _Detab(codeblock);
rc-web@42 918 codeblock = codeblock.replace(/^\n+/g,""); // trim leading newlines
rc-web@42 919 codeblock = codeblock.replace(/\n+$/g,""); // trim trailing whitespace
rc-web@42 920
rc-web@42 921 codeblock = "<pre><code" + (language ? " class=\"" + language + '"' : "") + ">" + codeblock + "\n</code></pre>";
rc-web@42 922
rc-web@42 923 return hashBlock(codeblock);
rc-web@42 924 }
rc-web@42 925 );
rc-web@42 926
rc-web@42 927 // attacklab: strip sentinel
rc-web@42 928 text = text.replace(/~0/,"");
rc-web@42 929
rc-web@42 930 return text;
rc-web@42 931 }
rc-web@42 932
rc-web@42 933 var hashBlock = function(text) {
rc-web@42 934 text = text.replace(/(^\n+|\n+$)/g,"");
rc-web@42 935 return "\n\n~K" + (g_html_blocks.push(text)-1) + "K\n\n";
rc-web@42 936 }
rc-web@42 937
rc-web@42 938 var _DoCodeSpans = function(text) {
rc-web@42 939 //
rc-web@42 940 // * Backtick quotes are used for <code></code> spans.
rc-web@42 941 //
rc-web@42 942 // * You can use multiple backticks as the delimiters if you want to
rc-web@42 943 // include literal backticks in the code span. So, this input:
rc-web@42 944 //
rc-web@42 945 // Just type ``foo `bar` baz`` at the prompt.
rc-web@42 946 //
rc-web@42 947 // Will translate to:
rc-web@42 948 //
rc-web@42 949 // <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
rc-web@42 950 //
rc-web@42 951 // There's no arbitrary limit to the number of backticks you
rc-web@42 952 // can use as delimters. If you need three consecutive backticks
rc-web@42 953 // in your code, use four for delimiters, etc.
rc-web@42 954 //
rc-web@42 955 // * You can use spaces to get literal backticks at the edges:
rc-web@42 956 //
rc-web@42 957 // ... type `` `bar` `` ...
rc-web@42 958 //
rc-web@42 959 // Turns to:
rc-web@42 960 //
rc-web@42 961 // ... type <code>`bar`</code> ...
rc-web@42 962 //
rc-web@42 963
rc-web@42 964 /*
rc-web@42 965 text = text.replace(/
rc-web@42 966 (^|[^\\]) // Character before opening ` can't be a backslash
rc-web@42 967 (`+) // $2 = Opening run of `
rc-web@42 968 ( // $3 = The code block
rc-web@42 969 [^\r]*?
rc-web@42 970 [^`] // attacklab: work around lack of lookbehind
rc-web@42 971 )
rc-web@42 972 \2 // Matching closer
rc-web@42 973 (?!`)
rc-web@42 974 /gm, function(){...});
rc-web@42 975 */
rc-web@42 976
rc-web@42 977 text = text.replace(/(^|[^\\])(`+)([^\r]*?[^`])\2(?!`)/gm,
rc-web@42 978 function(wholeMatch,m1,m2,m3,m4) {
rc-web@42 979 var c = m3;
rc-web@42 980 c = c.replace(/^([ \t]*)/g,""); // leading whitespace
rc-web@42 981 c = c.replace(/[ \t]*$/g,""); // trailing whitespace
rc-web@42 982 c = _EncodeCode(c);
rc-web@42 983 return m1+"<code>"+c+"</code>";
rc-web@42 984 });
rc-web@42 985
rc-web@42 986 return text;
rc-web@42 987 }
rc-web@42 988
rc-web@42 989 var _EncodeCode = function(text) {
rc-web@42 990 //
rc-web@42 991 // Encode/escape certain characters inside Markdown code runs.
rc-web@42 992 // The point is that in code, these characters are literals,
rc-web@42 993 // and lose their special Markdown meanings.
rc-web@42 994 //
rc-web@42 995 // Encode all ampersands; HTML entities are not
rc-web@42 996 // entities within a Markdown code span.
rc-web@42 997 text = text.replace(/&/g,"&amp;");
rc-web@42 998
rc-web@42 999 // Do the angle bracket song and dance:
rc-web@42 1000 text = text.replace(/</g,"&lt;");
rc-web@42 1001 text = text.replace(/>/g,"&gt;");
rc-web@42 1002
rc-web@42 1003 // Now, escape characters that are magic in Markdown:
rc-web@42 1004 text = escapeCharacters(text,"\*_{}[]\\",false);
rc-web@42 1005
rc-web@42 1006 // jj the line above breaks this:
rc-web@42 1007 //---
rc-web@42 1008
rc-web@42 1009 //* Item
rc-web@42 1010
rc-web@42 1011 // 1. Subitem
rc-web@42 1012
rc-web@42 1013 // special char: *
rc-web@42 1014 //---
rc-web@42 1015
rc-web@42 1016 return text;
rc-web@42 1017 }
rc-web@42 1018
rc-web@42 1019
rc-web@42 1020 var _DoItalicsAndBold = function(text) {
rc-web@42 1021
rc-web@42 1022 // <strong> must go first:
rc-web@42 1023 text = text.replace(/(\*\*|__)(?=\S)([^\r]*?\S[*_]*)\1/g,
rc-web@42 1024 "<strong>$2</strong>");
rc-web@42 1025
rc-web@42 1026 text = text.replace(/(\*|_)(?=\S)([^\r]*?\S)\1/g,
rc-web@42 1027 "<em>$2</em>");
rc-web@42 1028
rc-web@42 1029 return text;
rc-web@42 1030 }
rc-web@42 1031
rc-web@42 1032
rc-web@42 1033 var _DoBlockQuotes = function(text) {
rc-web@42 1034
rc-web@42 1035 /*
rc-web@42 1036 text = text.replace(/
rc-web@42 1037 ( // Wrap whole match in $1
rc-web@42 1038 (
rc-web@42 1039 ^[ \t]*>[ \t]? // '>' at the start of a line
rc-web@42 1040 .+\n // rest of the first line
rc-web@42 1041 (.+\n)* // subsequent consecutive lines
rc-web@42 1042 \n* // blanks
rc-web@42 1043 )+
rc-web@42 1044 )
rc-web@42 1045 /gm, function(){...});
rc-web@42 1046 */
rc-web@42 1047
rc-web@42 1048 text = text.replace(/((^[ \t]*>[ \t]?.+\n(.+\n)*\n*)+)/gm,
rc-web@42 1049 function(wholeMatch,m1) {
rc-web@42 1050 var bq = m1;
rc-web@42 1051
rc-web@42 1052 // attacklab: hack around Konqueror 3.5.4 bug:
rc-web@42 1053 // "----------bug".replace(/^-/g,"") == "bug"
rc-web@42 1054
rc-web@42 1055 bq = bq.replace(/^[ \t]*>[ \t]?/gm,"~0"); // trim one level of quoting
rc-web@42 1056
rc-web@42 1057 // attacklab: clean up hack
rc-web@42 1058 bq = bq.replace(/~0/g,"");
rc-web@42 1059
rc-web@42 1060 bq = bq.replace(/^[ \t]+$/gm,""); // trim whitespace-only lines
rc-web@42 1061 bq = _RunBlockGamut(bq); // recurse
rc-web@42 1062
rc-web@42 1063 bq = bq.replace(/(^|\n)/g,"$1 ");
rc-web@42 1064 // These leading spaces screw with <pre> content, so we need to fix that:
rc-web@42 1065 bq = bq.replace(
rc-web@42 1066 /(\s*<pre>[^\r]+?<\/pre>)/gm,
rc-web@42 1067 function(wholeMatch,m1) {
rc-web@42 1068 var pre = m1;
rc-web@42 1069 // attacklab: hack around Konqueror 3.5.4 bug:
rc-web@42 1070 pre = pre.replace(/^ /mg,"~0");
rc-web@42 1071 pre = pre.replace(/~0/g,"");
rc-web@42 1072 return pre;
rc-web@42 1073 });
rc-web@42 1074
rc-web@42 1075 return hashBlock("<blockquote>\n" + bq + "\n</blockquote>");
rc-web@42 1076 });
rc-web@42 1077 return text;
rc-web@42 1078 }
rc-web@42 1079
rc-web@42 1080
rc-web@42 1081 var _FormParagraphs = function(text) {
rc-web@42 1082 //
rc-web@42 1083 // Params:
rc-web@42 1084 // $text - string to process with html <p> tags
rc-web@42 1085 //
rc-web@42 1086
rc-web@42 1087 // Strip leading and trailing lines:
rc-web@42 1088 text = text.replace(/^\n+/g,"");
rc-web@42 1089 text = text.replace(/\n+$/g,"");
rc-web@42 1090
rc-web@42 1091 var grafs = text.split(/\n{2,}/g);
rc-web@42 1092 var grafsOut = new Array();
rc-web@42 1093
rc-web@42 1094 //
rc-web@42 1095 // Wrap <p> tags.
rc-web@42 1096 //
rc-web@42 1097 var end = grafs.length;
rc-web@42 1098 for (var i=0; i<end; i++) {
rc-web@42 1099 var str = grafs[i];
rc-web@42 1100
rc-web@42 1101 // if this is an HTML marker, copy it
rc-web@42 1102 if (str.search(/~K(\d+)K/g) >= 0) {
rc-web@42 1103 grafsOut.push(str);
rc-web@42 1104 }
rc-web@42 1105 else if (str.search(/\S/) >= 0) {
rc-web@42 1106 str = _RunSpanGamut(str);
rc-web@42 1107 str = str.replace(/^([ \t]*)/g,"<p>");
rc-web@42 1108 str += "</p>"
rc-web@42 1109 grafsOut.push(str);
rc-web@42 1110 }
rc-web@42 1111
rc-web@42 1112 }
rc-web@42 1113
rc-web@42 1114 //
rc-web@42 1115 // Unhashify HTML blocks
rc-web@42 1116 //
rc-web@42 1117 end = grafsOut.length;
rc-web@42 1118 for (var i=0; i<end; i++) {
rc-web@42 1119 // if this is a marker for an html block...
rc-web@42 1120 while (grafsOut[i].search(/~K(\d+)K/) >= 0) {
rc-web@42 1121 var blockText = g_html_blocks[RegExp.$1];
rc-web@42 1122 blockText = blockText.replace(/\$/g,"$$$$"); // Escape any dollar signs
rc-web@42 1123 grafsOut[i] = grafsOut[i].replace(/~K\d+K/,blockText);
rc-web@42 1124 }
rc-web@42 1125 }
rc-web@42 1126
rc-web@42 1127 return grafsOut.join("\n\n");
rc-web@42 1128 }
rc-web@42 1129
rc-web@42 1130
rc-web@42 1131 var _EncodeAmpsAndAngles = function(text) {
rc-web@42 1132 // Smart processing for ampersands and angle brackets that need to be encoded.
rc-web@42 1133
rc-web@42 1134 // Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
rc-web@42 1135 // http://bumppo.net/projects/amputator/
rc-web@42 1136 text = text.replace(/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/g,"&amp;");
rc-web@42 1137
rc-web@42 1138 // Encode naked <'s
rc-web@42 1139 text = text.replace(/<(?![a-z\/?\$!])/gi,"&lt;");
rc-web@42 1140
rc-web@42 1141 return text;
rc-web@42 1142 }
rc-web@42 1143
rc-web@42 1144
rc-web@42 1145 var _EncodeBackslashEscapes = function(text) {
rc-web@42 1146 //
rc-web@42 1147 // Parameter: String.
rc-web@42 1148 // Returns: The string, with after processing the following backslash
rc-web@42 1149 // escape sequences.
rc-web@42 1150 //
rc-web@42 1151
rc-web@42 1152 // attacklab: The polite way to do this is with the new
rc-web@42 1153 // escapeCharacters() function:
rc-web@42 1154 //
rc-web@42 1155 // text = escapeCharacters(text,"\\",true);
rc-web@42 1156 // text = escapeCharacters(text,"`*_{}[]()>#+-.!",true);
rc-web@42 1157 //
rc-web@42 1158 // ...but we're sidestepping its use of the (slow) RegExp constructor
rc-web@42 1159 // as an optimization for Firefox. This function gets called a LOT.
rc-web@42 1160
rc-web@42 1161 text = text.replace(/\\(\\)/g,escapeCharacters_callback);
rc-web@42 1162 text = text.replace(/\\([`*_{}\[\]()>#+-.!])/g,escapeCharacters_callback);
rc-web@42 1163 return text;
rc-web@42 1164 }
rc-web@42 1165
rc-web@42 1166
rc-web@42 1167 var _DoAutoLinks = function(text) {
rc-web@42 1168
rc-web@42 1169 text = text.replace(/<((https?|ftp|dict):[^'">\s]+)>/gi,"<a href=\"$1\">$1</a>");
rc-web@42 1170
rc-web@42 1171 // Email addresses: <address@domain.foo>
rc-web@42 1172
rc-web@42 1173 /*
rc-web@42 1174 text = text.replace(/
rc-web@42 1175 <
rc-web@42 1176 (?:mailto:)?
rc-web@42 1177 (
rc-web@42 1178 [-.\w]+
rc-web@42 1179 \@
rc-web@42 1180 [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
rc-web@42 1181 )
rc-web@42 1182 >
rc-web@42 1183 /gi, _DoAutoLinks_callback());
rc-web@42 1184 */
rc-web@42 1185 text = text.replace(/<(?:mailto:)?([-.\w]+\@[-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+)>/gi,
rc-web@42 1186 function(wholeMatch,m1) {
rc-web@42 1187 return _EncodeEmailAddress( _UnescapeSpecialChars(m1) );
rc-web@42 1188 }
rc-web@42 1189 );
rc-web@42 1190
rc-web@42 1191 return text;
rc-web@42 1192 }
rc-web@42 1193
rc-web@42 1194
rc-web@42 1195 var _EncodeEmailAddress = function(addr) {
rc-web@42 1196 //
rc-web@42 1197 // Input: an email address, e.g. "foo@example.com"
rc-web@42 1198 //
rc-web@42 1199 // Output: the email address as a mailto link, with each character
rc-web@42 1200 // of the address encoded as either a decimal or hex entity, in
rc-web@42 1201 // the hopes of foiling most address harvesting spam bots. E.g.:
rc-web@42 1202 //
rc-web@42 1203 // <a href="&#x6D;&#97;&#105;&#108;&#x74;&#111;:&#102;&#111;&#111;&#64;&#101;
rc-web@42 1204 // x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;">&#102;&#111;&#111;
rc-web@42 1205 // &#64;&#101;x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;</a>
rc-web@42 1206 //
rc-web@42 1207 // Based on a filter by Matthew Wickline, posted to the BBEdit-Talk
rc-web@42 1208 // mailing list: <http://tinyurl.com/yu7ue>
rc-web@42 1209 //
rc-web@42 1210
rc-web@42 1211 // attacklab: why can't javascript speak hex?
rc-web@42 1212 function char2hex(ch) {
rc-web@42 1213 var hexDigits = '0123456789ABCDEF';
rc-web@42 1214 var dec = ch.charCodeAt(0);
rc-web@42 1215 return(hexDigits.charAt(dec>>4) + hexDigits.charAt(dec&15));
rc-web@42 1216 }
rc-web@42 1217
rc-web@42 1218 var encode = [
rc-web@42 1219 function(ch){return "&#"+ch.charCodeAt(0)+";";},
rc-web@42 1220 function(ch){return "&#x"+char2hex(ch)+";";},
rc-web@42 1221 function(ch){return ch;}
rc-web@42 1222 ];
rc-web@42 1223
rc-web@42 1224 addr = "mailto:" + addr;
rc-web@42 1225
rc-web@42 1226 addr = addr.replace(/./g, function(ch) {
rc-web@42 1227 if (ch == "@") {
rc-web@42 1228 // this *must* be encoded. I insist.
rc-web@42 1229 ch = encode[Math.floor(Math.random()*2)](ch);
rc-web@42 1230 } else if (ch !=":") {
rc-web@42 1231 // leave ':' alone (to spot mailto: later)
rc-web@42 1232 var r = Math.random();
rc-web@42 1233 // roughly 10% raw, 45% hex, 45% dec
rc-web@42 1234 ch = (
rc-web@42 1235 r > .9 ? encode[2](ch) :
rc-web@42 1236 r > .45 ? encode[1](ch) :
rc-web@42 1237 encode[0](ch)
rc-web@42 1238 );
rc-web@42 1239 }
rc-web@42 1240 return ch;
rc-web@42 1241 });
rc-web@42 1242
rc-web@42 1243 addr = "<a href=\"" + addr + "\">" + addr + "</a>";
rc-web@42 1244 addr = addr.replace(/">.+:/g,"\">"); // strip the mailto: from the visible part
rc-web@42 1245
rc-web@42 1246 return addr;
rc-web@42 1247 }
rc-web@42 1248
rc-web@42 1249
rc-web@42 1250 var _UnescapeSpecialChars = function(text) {
rc-web@42 1251 //
rc-web@42 1252 // Swap back in all the special characters we've hidden.
rc-web@42 1253 //
rc-web@42 1254 text = text.replace(/~E(\d+)E/g,
rc-web@42 1255 function(wholeMatch,m1) {
rc-web@42 1256 var charCodeToReplace = parseInt(m1);
rc-web@42 1257 return String.fromCharCode(charCodeToReplace);
rc-web@42 1258 }
rc-web@42 1259 );
rc-web@42 1260 return text;
rc-web@42 1261 }
rc-web@42 1262
rc-web@42 1263
rc-web@42 1264 var _Outdent = function(text) {
rc-web@42 1265 //
rc-web@42 1266 // Remove one level of line-leading tabs or spaces
rc-web@42 1267 //
rc-web@42 1268
rc-web@42 1269 // attacklab: hack around Konqueror 3.5.4 bug:
rc-web@42 1270 // "----------bug".replace(/^-/g,"") == "bug"
rc-web@42 1271
rc-web@42 1272 text = text.replace(/^(\t|[ ]{1,4})/gm,"~0"); // attacklab: g_tab_width
rc-web@42 1273
rc-web@42 1274 // attacklab: clean up hack
rc-web@42 1275 text = text.replace(/~0/g,"")
rc-web@42 1276
rc-web@42 1277 return text;
rc-web@42 1278 }
rc-web@42 1279
rc-web@42 1280 var _Detab = function(text) {
rc-web@42 1281 // attacklab: Detab's completely rewritten for speed.
rc-web@42 1282 // In perl we could fix it by anchoring the regexp with \G.
rc-web@42 1283 // In javascript we're less fortunate.
rc-web@42 1284
rc-web@42 1285 // expand first n-1 tabs
rc-web@42 1286 text = text.replace(/\t(?=\t)/g," "); // attacklab: g_tab_width
rc-web@42 1287
rc-web@42 1288 // replace the nth with two sentinels
rc-web@42 1289 text = text.replace(/\t/g,"~A~B");
rc-web@42 1290
rc-web@42 1291 // use the sentinel to anchor our regex so it doesn't explode
rc-web@42 1292 text = text.replace(/~B(.+?)~A/g,
rc-web@42 1293 function(wholeMatch,m1,m2) {
rc-web@42 1294 var leadingText = m1;
rc-web@42 1295 var numSpaces = 4 - leadingText.length % 4; // attacklab: g_tab_width
rc-web@42 1296
rc-web@42 1297 // there *must* be a better way to do this:
rc-web@42 1298 for (var i=0; i<numSpaces; i++) leadingText+=" ";
rc-web@42 1299
rc-web@42 1300 return leadingText;
rc-web@42 1301 }
rc-web@42 1302 );
rc-web@42 1303
rc-web@42 1304 // clean up sentinels
rc-web@42 1305 text = text.replace(/~A/g," "); // attacklab: g_tab_width
rc-web@42 1306 text = text.replace(/~B/g,"");
rc-web@42 1307
rc-web@42 1308 return text;
rc-web@42 1309 }
rc-web@42 1310
rc-web@42 1311
rc-web@42 1312 //
rc-web@42 1313 // attacklab: Utility functions
rc-web@42 1314 //
rc-web@42 1315
rc-web@42 1316
rc-web@42 1317 var escapeCharacters = function(text, charsToEscape, afterBackslash) {
rc-web@42 1318 // First we have to escape the escape characters so that
rc-web@42 1319 // we can build a character class out of them
rc-web@42 1320 var regexString = "([" + charsToEscape.replace(/([\[\]\\])/g,"\\$1") + "])";
rc-web@42 1321
rc-web@42 1322 if (afterBackslash) {
rc-web@42 1323 regexString = "\\\\" + regexString;
rc-web@42 1324 }
rc-web@42 1325
rc-web@42 1326 var regex = new RegExp(regexString,"g");
rc-web@42 1327 text = text.replace(regex,escapeCharacters_callback);
rc-web@42 1328
rc-web@42 1329 return text;
rc-web@42 1330 }
rc-web@42 1331
rc-web@42 1332
rc-web@42 1333 var escapeCharacters_callback = function(wholeMatch,m1) {
rc-web@42 1334 var charCodeToEscape = m1.charCodeAt(0);
rc-web@42 1335 return "~E"+charCodeToEscape+"E";
rc-web@42 1336 }
rc-web@42 1337
rc-web@42 1338 } // end of Showdown.converter
rc-web@42 1339
rc-web@42 1340 // export
rc-web@42 1341 if (typeof module !== 'undefined') module.exports = Showdown;