File x2engine/framework/vendors/markdown/markdown.php

   1: <?php
   2: #
   3: # Markdown Extra  -  A text-to-HTML conversion tool for web writers
   4: #
   5: # PHP Markdown & Extra
   6: # Copyright (c) 2004-2012 Michel Fortin  
   7: # <http://michelf.com/projects/php-markdown/>
   8: #
   9: # Original Markdown
  10: # Copyright (c) 2004-2006 John Gruber  
  11: # <http://daringfireball.net/projects/markdown/>
  12: #
  13: 
  14: #
  15: # Markdown Parser Class
  16: #
  17: 
  18: class Markdown_Parser {
  19: 
  20:     # Regex to match balanced [brackets].
  21:     # Needed to insert a maximum bracked depth while converting to PHP.
  22:     public $nested_brackets_depth = 6;
  23:     public $nested_brackets_re;
  24: 
  25:     public $nested_url_parenthesis_depth = 4;
  26:     public $nested_url_parenthesis_re;
  27: 
  28:     # Table of hash values for escaped characters:
  29:     public $escape_chars = '\`*_{}[]()>#+-.!';
  30:     public $escape_chars_re;
  31: 
  32:     # Change to ">" for HTML output.
  33:     public $empty_element_suffix = ' />';
  34:     public $tab_width = 4;
  35: 
  36:     # Change to `true` to disallow markup or entities.
  37:     public $no_markup = false;
  38:     public $no_entities = false;
  39: 
  40:     # Predefined urls and titles for reference links and images.
  41:     public $predef_urls = array();
  42:     public $predef_titles = array();
  43: 
  44:     public function __construct() {
  45:     #
  46:     # Constructor function. Initialize appropriate member variables.
  47:     #
  48:         $this->_initDetab();
  49:         $this->prepareItalicsAndBold();
  50: 
  51:         $this->nested_brackets_re =
  52:             str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
  53:             str_repeat('\])*', $this->nested_brackets_depth);
  54: 
  55:         $this->nested_url_parenthesis_re =
  56:             str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
  57:             str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
  58: 
  59:         $this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
  60: 
  61:         # Sort document, block, and span gamut in ascendent priority order.
  62:         asort($this->document_gamut);
  63:         asort($this->block_gamut);
  64:         asort($this->span_gamut);
  65:     }
  66: 
  67: 
  68:     # Internal hashes used during transformation.
  69:     public $urls = array();
  70:     public $titles = array();
  71:     public $html_hashes = array();
  72: 
  73:     # Status flag to avoid invalid nesting.
  74:     public $in_anchor = false;
  75: 
  76: 
  77:     public function setup() {
  78:     #
  79:     # Called before the transformation process starts to setup parser
  80:     # states.
  81:     #
  82:         # Clear global hashes.
  83:         $this->urls = $this->predef_urls;
  84:         $this->titles = $this->predef_titles;
  85:         $this->html_hashes = array();
  86: 
  87:         $in_anchor = false;
  88:     }
  89: 
  90:     public function teardown() {
  91:     #
  92:     # Called after the transformation process to clear any variable
  93:     # which may be taking up memory unnecessarly.
  94:     #
  95:         $this->urls = array();
  96:         $this->titles = array();
  97:         $this->html_hashes = array();
  98:     }
  99: 
 100: 
 101:     public function transform($text) {
 102:     #
 103:     # Main function. Performs some preprocessing on the input text
 104:     # and pass it through the document gamut.
 105:     #
 106:         $this->setup();
 107: 
 108:         # Remove UTF-8 BOM and marker character in input, if present.
 109:         $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
 110: 
 111:         # Standardize line endings:
 112:         #   DOS to Unix and Mac to Unix
 113:         $text = preg_replace('{\r\n?}', "\n", $text);
 114: 
 115:         # Make sure $text ends with a couple of newlines:
 116:         $text .= "\n\n";
 117: 
 118:         # Convert all tabs to spaces.
 119:         $text = $this->detab($text);
 120: 
 121:         # Turn block-level HTML blocks into hash entries
 122:         $text = $this->hashHTMLBlocks($text);
 123: 
 124:         # Strip any lines consisting only of spaces and tabs.
 125:         # This makes subsequent regexen easier to write, because we can
 126:         # match consecutive blank lines with /\n+/ instead of something
 127:         # contorted like /[ ]*\n+/ .
 128:         $text = preg_replace('/^[ ]+$/m', '', $text);
 129: 
 130:         # Run document gamut methods.
 131:         foreach ($this->document_gamut as $method => $priority) {
 132:             $text = $this->$method($text);
 133:         }
 134: 
 135:         $this->teardown();
 136: 
 137:         return $text . "\n";
 138:     }
 139: 
 140:     public $document_gamut = array(
 141:         # Strip link definitions, store in hashes.
 142:         "stripLinkDefinitions" => 20,
 143: 
 144:         "runBasicBlockGamut"   => 30,
 145:         );
 146: 
 147: 
 148:     public function stripLinkDefinitions($text) {
 149:     #
 150:     # Strips link definitions from text, stores the URLs and titles in
 151:     # hash references.
 152:     #
 153:         $less_than_tab = $this->tab_width - 1;
 154: 
 155:         # Link defs are in the form: ^[id]: url "optional title"
 156:         $text = preg_replace_callback('{
 157:                             ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
 158:                               [ ]*
 159:                               \n?               # maybe *one* newline
 160:                               [ ]*
 161:                             (?:
 162:                               <(.+?)>           # url = $2
 163:                             |
 164:                               (\S+?)            # url = $3
 165:                             )
 166:                               [ ]*
 167:                               \n?               # maybe one newline
 168:                               [ ]*
 169:                             (?:
 170:                                 (?<=\s)         # lookbehind for whitespace
 171:                                 ["(]
 172:                                 (.*?)           # title = $4
 173:                                 [")]
 174:                                 [ ]*
 175:                             )?  # title is optional
 176:                             (?:\n+|\Z)
 177:             }xm',
 178:             array(&$this, '_stripLinkDefinitions_callback'),
 179:             $text);
 180:         return $text;
 181:     }
 182:     public function _stripLinkDefinitions_callback($matches) {
 183:         $link_id = strtolower($matches[1]);
 184:         $url = $matches[2] == '' ? $matches[3] : $matches[2];
 185:         $this->urls[$link_id] = $url;
 186:         $this->titles[$link_id] =& $matches[4];
 187:         return ''; # String that will replace the block
 188:     }
 189: 
 190: 
 191:     public function hashHTMLBlocks($text) {
 192:         if ($this->no_markup)  return $text;
 193: 
 194:         $less_than_tab = $this->tab_width - 1;
 195: 
 196:         # Hashify HTML blocks:
 197:         # We only want to do this for block-level HTML tags, such as headers,
 198:         # lists, and tables. That's because we still want to wrap <p>s around
 199:         # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
 200:         # phrase emphasis, and spans. The list of tags we're looking for is
 201:         # hard-coded:
 202:         #
 203:         # *  List "a" is made of tags which can be both inline or block-level.
 204:         #    These will be treated block-level when the start tag is alone on
 205:         #    its line, otherwise they're not matched here and will be taken as
 206:         #    inline later.
 207:         # *  List "b" is made of tags which are always block-level;
 208:         #
 209:         $block_tags_a_re = 'ins|del';
 210:         $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
 211:                            'script|noscript|form|fieldset|iframe|math';
 212: 
 213:         # Regular expression for the content of a block tag.
 214:         $nested_tags_level = 4;
 215:         $attr = '
 216:             (?>             # optional tag attributes
 217:               \s            # starts with whitespace
 218:               (?>
 219:                 [^>"/]+     # text outside quotes
 220:               |
 221:                 /+(?!>)     # slash not followed by ">"
 222:               |
 223:                 "[^"]*"     # text inside double quotes (tolerate ">")
 224:               |
 225:                 \'[^\']*\'  # text inside single quotes (tolerate ">")
 226:               )*
 227:             )?
 228:             ';
 229:         $content =
 230:             str_repeat('
 231:                 (?>
 232:                   [^<]+         # content without tag
 233:                 |
 234:                   <\2           # nested opening tag
 235:                     '.$attr.'   # attributes
 236:                     (?>
 237:                       />
 238:                     |
 239:                       >', $nested_tags_level).  # end of opening tag
 240:                       '.*?'.                    # last level nested tag content
 241:             str_repeat('
 242:                       </\2\s*>  # closing nested tag
 243:                     )
 244:                   |
 245:                     <(?!/\2\s*> # other tags with a different name
 246:                   )
 247:                 )*',
 248:                 $nested_tags_level);
 249:         $content2 = str_replace('\2', '\3', $content);
 250: 
 251:         # First, look for nested blocks, e.g.:
 252:         #   <div>
 253:         #       <div>
 254:         #       tags for inner block must be indented.
 255:         #       </div>
 256:         #   </div>
 257:         #
 258:         # The outermost tags must start at the left margin for this to match, and
 259:         # the inner nested divs must be indented.
 260:         # We need to do this before the next, more liberal match, because the next
 261:         # match will start at the first `<div>` and stop at the first `</div>`.
 262:         $text = preg_replace_callback('{(?>
 263:             (?>
 264:                 (?<=\n\n)       # Starting after a blank line
 265:                 |               # or
 266:                 \A\n?           # the beginning of the doc
 267:             )
 268:             (                       # save in $1
 269: 
 270:               # Match from `\n<tag>` to `</tag>\n`, handling nested tags
 271:               # in between.
 272: 
 273:                         [ ]{0,'.$less_than_tab.'}
 274:                         <('.$block_tags_b_re.')# start tag = $2
 275:                         '.$attr.'>          # attributes followed by > and \n
 276:                         '.$content.'        # content, support nesting
 277:                         </\2>               # the matching end tag
 278:                         [ ]*                # trailing spaces/tabs
 279:                         (?=\n+|\Z)  # followed by a newline or end of document
 280: 
 281:             | # Special version for tags of group a.
 282: 
 283:                         [ ]{0,'.$less_than_tab.'}
 284:                         <('.$block_tags_a_re.')# start tag = $3
 285:                         '.$attr.'>[ ]*\n    # attributes followed by >
 286:                         '.$content2.'       # content, support nesting
 287:                         </\3>               # the matching end tag
 288:                         [ ]*                # trailing spaces/tabs
 289:                         (?=\n+|\Z)  # followed by a newline or end of document
 290: 
 291:             | # Special case just for <hr />. It was easier to make a special
 292:               # case than to make the other regex more complicated.
 293: 
 294:                         [ ]{0,'.$less_than_tab.'}
 295:                         <(hr)               # start tag = $2
 296:                         '.$attr.'           # attributes
 297:                         /?>                 # the matching end tag
 298:                         [ ]*
 299:                         (?=\n{2,}|\Z)       # followed by a blank line or end of document
 300: 
 301:             | # Special case for standalone HTML comments:
 302: 
 303:                     [ ]{0,'.$less_than_tab.'}
 304:                     (?s:
 305:                         <!-- .*? -->
 306:                     )
 307:                     [ ]*
 308:                     (?=\n{2,}|\Z)       # followed by a blank line or end of document
 309: 
 310:             | # PHP and ASP-style processor instructions (<? and <%)
 311: 
 312:                     [ ]{0,'.$less_than_tab.'}
 313:                     (?s:
 314:                         <([?%])         # $2
 315:                         .*?
 316:                         \2>
 317:                     )
 318:                     [ ]*
 319:                     (?=\n{2,}|\Z)       # followed by a blank line or end of document
 320: 
 321:             )
 322:             )}Sxmi',
 323:             array(&$this, '_hashHTMLBlocks_callback'),
 324:             $text);
 325: 
 326:         return $text;
 327:     }
 328:     public function _hashHTMLBlocks_callback($matches) {
 329:         $text = $matches[1];
 330:         $key  = $this->hashBlock($text);
 331:         return "\n\n$key\n\n";
 332:     }
 333: 
 334: 
 335:     public function hashPart($text, $boundary = 'X') {
 336:     #
 337:     # Called whenever a tag must be hashed when a function insert an atomic
 338:     # element in the text stream. Passing $text to through this function gives
 339:     # a unique text-token which will be reverted back when calling unhash.
 340:     #
 341:     # The $boundary argument specify what character should be used to surround
 342:     # the token. By convension, "B" is used for block elements that needs not
 343:     # to be wrapped into paragraph tags at the end, ":" is used for elements
 344:     # that are word separators and "X" is used in the general case.
 345:     #
 346:         # Swap back any tag hash found in $text so we do not have to `unhash`
 347:         # multiple times at the end.
 348:         $text = $this->unhash($text);
 349: 
 350:         # Then hash the block.
 351:         static $i = 0;
 352:         $key = "$boundary\x1A" . ++$i . $boundary;
 353:         $this->html_hashes[$key] = $text;
 354:         return $key; # String that will replace the tag.
 355:     }
 356: 
 357: 
 358:     public function hashBlock($text) {
 359:     #
 360:     # Shortcut function for hashPart with block-level boundaries.
 361:     #
 362:         return $this->hashPart($text, 'B');
 363:     }
 364: 
 365: 
 366:     public $block_gamut = array(
 367:     #
 368:     # These are all the transformations that form block-level
 369:     # tags like paragraphs, headers, and list items.
 370:     #
 371:         "doHeaders"         => 10,
 372:         "doHorizontalRules" => 20,
 373: 
 374:         "doLists"           => 40,
 375:         "doCodeBlocks"      => 50,
 376:         "doBlockQuotes"     => 60,
 377:         );
 378: 
 379:     public function runBlockGamut($text) {
 380:     #
 381:     # Run block gamut tranformations.
 382:     #
 383:         # We need to escape raw HTML in Markdown source before doing anything
 384:         # else. This need to be done for each block, and not only at the
 385:         # begining in the Markdown function since hashed blocks can be part of
 386:         # list items and could have been indented. Indented blocks would have
 387:         # been seen as a code block in a previous pass of hashHTMLBlocks.
 388:         $text = $this->hashHTMLBlocks($text);
 389: 
 390:         return $this->runBasicBlockGamut($text);
 391:     }
 392: 
 393:     public function runBasicBlockGamut($text) {
 394:     #
 395:     # Run block gamut tranformations, without hashing HTML blocks. This is
 396:     # useful when HTML blocks are known to be already hashed, like in the first
 397:     # whole-document pass.
 398:     #
 399:         foreach ($this->block_gamut as $method => $priority) {
 400:             $text = $this->$method($text);
 401:         }
 402: 
 403:         # Finally form paragraph and restore hashed blocks.
 404:         $text = $this->formParagraphs($text);
 405: 
 406:         return $text;
 407:     }
 408: 
 409: 
 410:     public function doHorizontalRules($text) {
 411:         # Do Horizontal Rules:
 412:         return preg_replace(
 413:             '{
 414:                 ^[ ]{0,3}   # Leading space
 415:                 ([-*_])     # $1: First marker
 416:                 (?>         # Repeated marker group
 417:                     [ ]{0,2}    # Zero, one, or two spaces.
 418:                     \1          # Marker character
 419:                 ){2,}       # Group repeated at least twice
 420:                 [ ]*        # Tailing spaces
 421:                 $           # End of line.
 422:             }mx',
 423:             "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
 424:             $text);
 425:     }
 426: 
 427: 
 428:     public $span_gamut = array(
 429:     #
 430:     # These are all the transformations that occur *within* block-level
 431:     # tags like paragraphs, headers, and list items.
 432:     #
 433:         # Process character escapes, code spans, and inline HTML
 434:         # in one shot.
 435:         "parseSpan"           => -30,
 436: 
 437:         # Process anchor and image tags. Images must come first,
 438:         # because ![foo][f] looks like an anchor.
 439:         "doImages"            =>  10,
 440:         "doAnchors"           =>  20,
 441: 
 442:         # Make links out of things like `<http://example.com/>`
 443:         # Must come after doAnchors, because you can use < and >
 444:         # delimiters in inline links like [this](<url>).
 445:         "doAutoLinks"         =>  30,
 446:         "encodeAmpsAndAngles" =>  40,
 447: 
 448:         "doItalicsAndBold"    =>  50,
 449:         "doHardBreaks"        =>  60,
 450:         );
 451: 
 452:     public function runSpanGamut($text) {
 453:     #
 454:     # Run span gamut tranformations.
 455:     #
 456:         foreach ($this->span_gamut as $method => $priority) {
 457:             $text = $this->$method($text);
 458:         }
 459: 
 460:         return $text;
 461:     }
 462: 
 463: 
 464:     public function doHardBreaks($text) {
 465:         # Do hard breaks:
 466:         return preg_replace_callback('/ {2,}\n/',
 467:             array(&$this, '_doHardBreaks_callback'), $text);
 468:     }
 469:     public function _doHardBreaks_callback($matches) {
 470:         return $this->hashPart("<br$this->empty_element_suffix\n");
 471:     }
 472: 
 473: 
 474:     public function doAnchors($text) {
 475:     #
 476:     # Turn Markdown link shortcuts into XHTML <a> tags.
 477:     #
 478:         if ($this->in_anchor) return $text;
 479:         $this->in_anchor = true;
 480: 
 481:         #
 482:         # First, handle reference-style links: [link text] [id]
 483:         #
 484:         $text = preg_replace_callback('{
 485:             (                   # wrap whole match in $1
 486:               \[
 487:                 ('.$this->nested_brackets_re.') # link text = $2
 488:               \]
 489: 
 490:               [ ]?              # one optional space
 491:               (?:\n[ ]*)?       # one optional newline followed by spaces
 492: 
 493:               \[
 494:                 (.*?)       # id = $3
 495:               \]
 496:             )
 497:             }xs',
 498:             array(&$this, '_doAnchors_reference_callback'), $text);
 499: 
 500:         #
 501:         # Next, inline-style links: [link text](url "optional title")
 502:         #
 503:         $text = preg_replace_callback('{
 504:             (               # wrap whole match in $1
 505:               \[
 506:                 ('.$this->nested_brackets_re.') # link text = $2
 507:               \]
 508:               \(            # literal paren
 509:                 [ \n]*
 510:                 (?:
 511:                     <(.+?)> # href = $3
 512:                 |
 513:                     ('.$this->nested_url_parenthesis_re.')  # href = $4
 514:                 )
 515:                 [ \n]*
 516:                 (           # $5
 517:                   ([\'"])   # quote char = $6
 518:                   (.*?)     # Title = $7
 519:                   \6        # matching quote
 520:                   [ \n]*    # ignore any spaces/tabs between closing quote and )
 521:                 )?          # title is optional
 522:               \)
 523:             )
 524:             }xs',
 525:             array(&$this, '_doAnchors_inline_callback'), $text);
 526: 
 527:         #
 528:         # Last, handle reference-style shortcuts: [link text]
 529:         # These must come last in case you've also got [link text][1]
 530:         # or [link text](/foo)
 531:         #
 532:         $text = preg_replace_callback('{
 533:             (                   # wrap whole match in $1
 534:               \[
 535:                 ([^\[\]]+)      # link text = $2; can\'t contain [ or ]
 536:               \]
 537:             )
 538:             }xs',
 539:             array(&$this, '_doAnchors_reference_callback'), $text);
 540: 
 541:         $this->in_anchor = false;
 542:         return $text;
 543:     }
 544:     public function _doAnchors_reference_callback($matches) {
 545:         $whole_match =  $matches[1];
 546:         $link_text   =  $matches[2];
 547:         $link_id     =& $matches[3];
 548: 
 549:         if ($link_id == "") {
 550:             # for shortcut links like [this][] or [this].
 551:             $link_id = $link_text;
 552:         }
 553: 
 554:         # lower-case and turn embedded newlines into spaces
 555:         $link_id = strtolower($link_id);
 556:         $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
 557: 
 558:         if (isset($this->urls[$link_id])) {
 559:             $url = $this->urls[$link_id];
 560:             $url = $this->encodeAttribute($url);
 561: 
 562:             $result = "<a href=\"$url\"";
 563:             if ( isset( $this->titles[$link_id] ) ) {
 564:                 $title = $this->titles[$link_id];
 565:                 $title = $this->encodeAttribute($title);
 566:                 $result .=  " title=\"$title\"";
 567:             }
 568: 
 569:             $link_text = $this->runSpanGamut($link_text);
 570:             $result .= ">$link_text</a>";
 571:             $result = $this->hashPart($result);
 572:         }
 573:         else {
 574:             $result = $whole_match;
 575:         }
 576:         return $result;
 577:     }
 578:     public function _doAnchors_inline_callback($matches) {
 579:         $whole_match    =  $matches[1];
 580:         $link_text      =  $this->runSpanGamut($matches[2]);
 581:         $url            =  $matches[3] == '' ? $matches[4] : $matches[3];
 582:         $title          =& $matches[7];
 583: 
 584:         $url = $this->encodeAttribute($url);
 585: 
 586:         $result = "<a href=\"$url\"";
 587:         if (isset($title)) {
 588:             $title = $this->encodeAttribute($title);
 589:             $result .=  " title=\"$title\"";
 590:         }
 591: 
 592:         $link_text = $this->runSpanGamut($link_text);
 593:         $result .= ">$link_text</a>";
 594: 
 595:         return $this->hashPart($result);
 596:     }
 597: 
 598: 
 599:     public function doImages($text) {
 600:     #
 601:     # Turn Markdown image shortcuts into <img> tags.
 602:     #
 603:         #
 604:         # First, handle reference-style labeled images: ![alt text][id]
 605:         #
 606:         $text = preg_replace_callback('{
 607:             (               # wrap whole match in $1
 608:               !\[
 609:                 ('.$this->nested_brackets_re.')     # alt text = $2
 610:               \]
 611: 
 612:               [ ]?              # one optional space
 613:               (?:\n[ ]*)?       # one optional newline followed by spaces
 614: 
 615:               \[
 616:                 (.*?)       # id = $3
 617:               \]
 618: 
 619:             )
 620:             }xs',
 621:             array(&$this, '_doImages_reference_callback'), $text);
 622: 
 623:         #
 624:         # Next, handle inline images:  ![alt text](url "optional title")
 625:         # Don't forget: encode * and _
 626:         #
 627:         $text = preg_replace_callback('{
 628:             (               # wrap whole match in $1
 629:               !\[
 630:                 ('.$this->nested_brackets_re.')     # alt text = $2
 631:               \]
 632:               \s?           # One optional whitespace character
 633:               \(            # literal paren
 634:                 [ \n]*
 635:                 (?:
 636:                     <(\S*)> # src url = $3
 637:                 |
 638:                     ('.$this->nested_url_parenthesis_re.')  # src url = $4
 639:                 )
 640:                 [ \n]*
 641:                 (           # $5
 642:                   ([\'"])   # quote char = $6
 643:                   (.*?)     # title = $7
 644:                   \6        # matching quote
 645:                   [ \n]*
 646:                 )?          # title is optional
 647:               \)
 648:             )
 649:             }xs',
 650:             array(&$this, '_doImages_inline_callback'), $text);
 651: 
 652:         return $text;
 653:     }
 654:     public function _doImages_reference_callback($matches) {
 655:         $whole_match = $matches[1];
 656:         $alt_text    = $matches[2];
 657:         $link_id     = strtolower($matches[3]);
 658: 
 659:         if ($link_id == "") {
 660:             $link_id = strtolower($alt_text); # for shortcut links like ![this][].
 661:         }
 662: 
 663:         $alt_text = $this->encodeAttribute($alt_text);
 664:         if (isset($this->urls[$link_id])) {
 665:             $url = $this->encodeAttribute($this->urls[$link_id]);
 666:             $result = "<img src=\"$url\" alt=\"$alt_text\"";
 667:             if (isset($this->titles[$link_id])) {
 668:                 $title = $this->titles[$link_id];
 669:                 $title = $this->encodeAttribute($title);
 670:                 $result .=  " title=\"$title\"";
 671:             }
 672:             $result .= $this->empty_element_suffix;
 673:             $result = $this->hashPart($result);
 674:         }
 675:         else {
 676:             # If there's no such link ID, leave intact:
 677:             $result = $whole_match;
 678:         }
 679: 
 680:         return $result;
 681:     }
 682:     public function _doImages_inline_callback($matches) {
 683:         $whole_match    = $matches[1];
 684:         $alt_text       = $matches[2];
 685:         $url            = $matches[3] == '' ? $matches[4] : $matches[3];
 686:         $title          =& $matches[7];
 687: 
 688:         $alt_text = $this->encodeAttribute($alt_text);
 689:         $url = $this->encodeAttribute($url);
 690:         $result = "<img src=\"$url\" alt=\"$alt_text\"";
 691:         if (isset($title)) {
 692:             $title = $this->encodeAttribute($title);
 693:             $result .=  " title=\"$title\""; # $title already quoted
 694:         }
 695:         $result .= $this->empty_element_suffix;
 696: 
 697:         return $this->hashPart($result);
 698:     }
 699: 
 700: 
 701:     public function doHeaders($text) {
 702:         # Setext-style headers:
 703:         #     Header 1
 704:         #     ========
 705:         #
 706:         #     Header 2
 707:         #     --------
 708:         #
 709:         $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
 710:             array(&$this, '_doHeaders_callback_setext'), $text);
 711: 
 712:         # atx-style headers:
 713:         #   # Header 1
 714:         #   ## Header 2
 715:         #   ## Header 2 with closing hashes ##
 716:         #   ...
 717:         #   ###### Header 6
 718:         #
 719:         $text = preg_replace_callback('{
 720:                 ^(\#{1,6})  # $1 = string of #\'s
 721:                 [ ]*
 722:                 (.+?)       # $2 = Header text
 723:                 [ ]*
 724:                 \#*         # optional closing #\'s (not counted)
 725:                 \n+
 726:             }xm',
 727:             array(&$this, '_doHeaders_callback_atx'), $text);
 728: 
 729:         return $text;
 730:     }
 731:     public function _doHeaders_callback_setext($matches) {
 732:         # Terrible hack to check we haven't found an empty list item.
 733:         if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1]))
 734:             return $matches[0];
 735: 
 736:         $level = $matches[2]{0} == '=' ? 1 : 2;
 737:         $block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>";
 738:         return "\n" . $this->hashBlock($block) . "\n\n";
 739:     }
 740:     public function _doHeaders_callback_atx($matches) {
 741:         $level = strlen($matches[1]);
 742:         $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>";
 743:         return "\n" . $this->hashBlock($block) . "\n\n";
 744:     }
 745: 
 746: 
 747:     public function doLists($text) {
 748:     #
 749:     # Form HTML ordered (numbered) and unordered (bulleted) lists.
 750:     #
 751:         $less_than_tab = $this->tab_width - 1;
 752: 
 753:         # Re-usable patterns to match list item bullets and number markers:
 754:         $marker_ul_re  = '[*+-]';
 755:         $marker_ol_re  = '\d+[\.]';
 756:         $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
 757: 
 758:         $markers_relist = array(
 759:             $marker_ul_re => $marker_ol_re,
 760:             $marker_ol_re => $marker_ul_re,
 761:             );
 762: 
 763:         foreach ($markers_relist as $marker_re => $other_marker_re) {
 764:             # Re-usable pattern to match any entirel ul or ol list:
 765:             $whole_list_re = '
 766:                 (                               # $1 = whole list
 767:                   (                             # $2
 768:                     ([ ]{0,'.$less_than_tab.'}) # $3 = number of spaces
 769:                     ('.$marker_re.')            # $4 = first list item marker
 770:                     [ ]+
 771:                   )
 772:                   (?s:.+?)
 773:                   (                             # $5
 774:                       \z
 775:                     |
 776:                       \n{2,}
 777:                       (?=\S)
 778:                       (?!                       # Negative lookahead for another list item marker
 779:                         [ ]*
 780:                         '.$marker_re.'[ ]+
 781:                       )
 782:                     |
 783:                       (?=                       # Lookahead for another kind of list
 784:                         \n
 785:                         \3                      # Must have the same indentation
 786:                         '.$other_marker_re.'[ ]+
 787:                       )
 788:                   )
 789:                 )
 790:             '; // mx
 791: 
 792:             # We use a different prefix before nested lists than top-level lists.
 793:             # See extended comment in _ProcessListItems().
 794: 
 795:             if ($this->list_level) {
 796:                 $text = preg_replace_callback('{
 797:                         ^
 798:                         '.$whole_list_re.'
 799:                     }mx',
 800:                     array(&$this, '_doLists_callback'), $text);
 801:             }
 802:             else {
 803:                 $text = preg_replace_callback('{
 804:                         (?:(?<=\n)\n|\A\n?) # Must eat the newline
 805:                         '.$whole_list_re.'
 806:                     }mx',
 807:                     array(&$this, '_doLists_callback'), $text);
 808:             }
 809:         }
 810: 
 811:         return $text;
 812:     }
 813:     
 814:     public function _doLists_callback($matches) {
 815:         # Re-usable patterns to match list item bullets and number markers:
 816:         $marker_ul_re  = '[*+-]';
 817:         $marker_ol_re  = '\d+[\.]';
 818:         $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
 819:         
 820:         $list = $matches[1];
 821:         $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol";
 822:         
 823:         $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
 824:         
 825:         $list .= "\n";
 826:         $result = $this->processListItems($list, $marker_any_re);
 827:         
 828:         $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
 829:         return "\n". $result ."\n\n";
 830:     }
 831: 
 832:     var $list_level = 0;
 833: 
 834:     public function processListItems($list_str, $marker_any_re) {
 835:     #
 836:     #   Process the contents of a single ordered or unordered list, splitting it
 837:     #   into individual list items.
 838:     #
 839:         # The $this->list_level global keeps track of when we're inside a list.
 840:         # Each time we enter a list, we increment it; when we leave a list,
 841:         # we decrement. If it's zero, we're not in a list anymore.
 842:         #
 843:         # We do this because when we're not inside a list, we want to treat
 844:         # something like this:
 845:         #
 846:         #       I recommend upgrading to version
 847:         #       8. Oops, now this line is treated
 848:         #       as a sub-list.
 849:         #
 850:         # As a single paragraph, despite the fact that the second line starts
 851:         # with a digit-period-space sequence.
 852:         #
 853:         # Whereas when we're inside a list (or sub-list), that line will be
 854:         # treated as the start of a sub-list. What a kludge, huh? This is
 855:         # an aspect of Markdown's syntax that's hard to parse perfectly
 856:         # without resorting to mind-reading. Perhaps the solution is to
 857:         # change the syntax rules such that sub-lists must start with a
 858:         # starting cardinal number; e.g. "1." or "a.".
 859: 
 860:         $this->list_level++;
 861: 
 862:         # trim trailing blank lines:
 863:         $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
 864: 
 865:         $list_str = preg_replace_callback('{
 866:             (\n)?                           # leading line = $1
 867:             (^[ ]*)                         # leading whitespace = $2
 868:             ('.$marker_any_re.'             # list marker and space = $3
 869:                 (?:[ ]+|(?=\n)) # space only required if item is not empty
 870:             )
 871:             ((?s:.*?))                      # list item text   = $4
 872:             (?:(\n+(?=\n))|\n)              # tailing blank line = $5
 873:             (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
 874:             }xm',
 875:             array(&$this, '_processListItems_callback'), $list_str);
 876: 
 877:         $this->list_level--;
 878:         return $list_str;
 879:     }
 880:     public function _processListItems_callback($matches) {
 881:         $item = $matches[4];
 882:         $leading_line =& $matches[1];
 883:         $leading_space =& $matches[2];
 884:         $marker_space = $matches[3];
 885:         $tailing_blank_line =& $matches[5];
 886: 
 887:         if ($leading_line || $tailing_blank_line ||
 888:             preg_match('/\n{2,}/', $item))
 889:         {
 890:             # Replace marker with the appropriate whitespace indentation
 891:             $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
 892:             $item = $this->runBlockGamut($this->outdent($item)."\n");
 893:         }
 894:         else {
 895:             # Recursion for sub-lists:
 896:             $item = $this->doLists($this->outdent($item));
 897:             $item = preg_replace('/\n+$/', '', $item);
 898:             $item = $this->runSpanGamut($item);
 899:         }
 900: 
 901:         return "<li>" . $item . "</li>\n";
 902:     }
 903: 
 904: 
 905:     public function doCodeBlocks($text) {
 906:     #
 907:     #   Process Markdown `<pre><code>` blocks.
 908:     #
 909:         $text = preg_replace_callback('{
 910:                 (?:\n\n|\A\n?)
 911:                 (               # $1 = the code block -- one or more lines, starting with a space/tab
 912:                   (?>
 913:                     [ ]{'.$this->tab_width.'}  # Lines must start with a tab or a tab-width of spaces
 914:                     .*\n+
 915:                   )+
 916:                 )
 917:                 ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
 918:             }xm',
 919:             array(&$this, '_doCodeBlocks_callback'), $text);
 920: 
 921:         return $text;
 922:     }
 923:     public function _doCodeBlocks_callback($matches) {
 924:         $codeblock = $matches[1];
 925: 
 926:         $codeblock = $this->outdent($codeblock);
 927:         $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
 928: 
 929:         # trim leading newlines and trailing newlines
 930:         $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
 931: 
 932:         $codeblock = "<pre><code>$codeblock\n</code></pre>";
 933:         return "\n\n".$this->hashBlock($codeblock)."\n\n";
 934:     }
 935: 
 936: 
 937:     public function makeCodeSpan($code) {
 938:     #
 939:     # Create a code span markup for $code. Called from handleSpanToken.
 940:     #
 941:         $code = htmlspecialchars(trim($code), ENT_NOQUOTES);
 942:         return $this->hashPart("<code>$code</code>");
 943:     }
 944: 
 945: 
 946:     public $em_relist = array(
 947:         ''  => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?=\S|$)(?![\.,:;]\s)',
 948:         '*' => '(?<=\S|^)(?<!\*)\*(?!\*)',
 949:         '_' => '(?<=\S|^)(?<!_)_(?!_)',
 950:         );
 951:     public $strong_relist = array(
 952:         ''   => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?=\S|$)(?![\.,:;]\s)',
 953:         '**' => '(?<=\S|^)(?<!\*)\*\*(?!\*)',
 954:         '__' => '(?<=\S|^)(?<!_)__(?!_)',
 955:         );
 956:     public $em_strong_relist = array(
 957:         ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?=\S|$)(?![\.,:;]\s)',
 958:         '***' => '(?<=\S|^)(?<!\*)\*\*\*(?!\*)',
 959:         '___' => '(?<=\S|^)(?<!_)___(?!_)',
 960:         );
 961:     public $em_strong_prepared_relist;
 962:     
 963:     public function prepareItalicsAndBold() {
 964:     #
 965:     # Prepare regular expressions for searching emphasis tokens in any
 966:     # context.
 967:     #
 968:         foreach ($this->em_relist as $em => $em_re) {
 969:             foreach ($this->strong_relist as $strong => $strong_re) {
 970:                 # Construct list of allowed token expressions.
 971:                 $token_relist = array();
 972:                 if (isset($this->em_strong_relist["$em$strong"])) {
 973:                     $token_relist[] = $this->em_strong_relist["$em$strong"];
 974:                 }
 975:                 $token_relist[] = $em_re;
 976:                 $token_relist[] = $strong_re;
 977:                 
 978:                 # Construct master expression from list.
 979:                 $token_re = '{('. implode('|', $token_relist) .')}';
 980:                 $this->em_strong_prepared_relist["$em$strong"] = $token_re;
 981:             }
 982:         }
 983:     }
 984:     
 985:     public function doItalicsAndBold($text) {
 986:         $token_stack = array('');
 987:         $text_stack = array('');
 988:         $em = '';
 989:         $strong = '';
 990:         $tree_char_em = false;
 991:         
 992:         while (1) {
 993:             #
 994:             # Get prepared regular expression for seraching emphasis tokens
 995:             # in current context.
 996:             #
 997:             $token_re = $this->em_strong_prepared_relist["$em$strong"];
 998:             
 999:             #
1000:             # Each loop iteration search for the next emphasis token. 
1001:             # Each token is then passed to handleSpanToken.
1002:             #
1003:             $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
1004:             $text_stack[0] .= $parts[0];
1005:             $token =& $parts[1];
1006:             $text =& $parts[2];
1007:             
1008:             if (empty($token)) {
1009:                 # Reached end of text span: empty stack without emitting.
1010:                 # any more emphasis.
1011:                 while ($token_stack[0]) {
1012:                     $text_stack[1] .= array_shift($token_stack);
1013:                     $text_stack[0] .= array_shift($text_stack);
1014:                 }
1015:                 break;
1016:             }
1017: 
1018:             $token_len = strlen($token);
1019:             if ($tree_char_em) {
1020:                 # Reached closing marker while inside a three-char emphasis.
1021:                 if ($token_len == 3) {
1022:                     # Three-char closing marker, close em and strong.
1023:                     array_shift($token_stack);
1024:                     $span = array_shift($text_stack);
1025:                     $span = $this->runSpanGamut($span);
1026:                     $span = "<strong><em>$span</em></strong>";
1027:                     $text_stack[0] .= $this->hashPart($span);
1028:                     $em = '';
1029:                     $strong = '';
1030:                 } else {
1031:                     # Other closing marker: close one em or strong and
1032:                     # change current token state to match the other
1033:                     $token_stack[0] = str_repeat($token{0}, 3-$token_len);
1034:                     $tag = $token_len == 2 ? "strong" : "em";
1035:                     $span = $text_stack[0];
1036:                     $span = $this->runSpanGamut($span);
1037:                     $span = "<$tag>$span</$tag>";
1038:                     $text_stack[0] = $this->hashPart($span);
1039:                     $$tag = ''; # $$tag stands for $em or $strong
1040:                 }
1041:                 $tree_char_em = false;
1042:             } else if ($token_len == 3) {
1043:                 if ($em) {
1044:                     # Reached closing marker for both em and strong.
1045:                     # Closing strong marker:
1046:                     for ($i = 0; $i < 2; ++$i) {
1047:                         $shifted_token = array_shift($token_stack);
1048:                         $tag = strlen($shifted_token) == 2 ? "strong" : "em";
1049:                         $span = array_shift($text_stack);
1050:                         $span = $this->runSpanGamut($span);
1051:                         $span = "<$tag>$span</$tag>";
1052:                         $text_stack[0] .= $this->hashPart($span);
1053:                         $$tag = ''; # $$tag stands for $em or $strong
1054:                     }
1055:                 } else {
1056:                     # Reached opening three-char emphasis marker. Push on token
1057:                     # stack; will be handled by the special condition above.
1058:                     $em = $token{0};
1059:                     $strong = "$em$em";
1060:                     array_unshift($token_stack, $token);
1061:                     array_unshift($text_stack, '');
1062:                     $tree_char_em = true;
1063:                 }
1064:             } else if ($token_len == 2) {
1065:                 if ($strong) {
1066:                     # Unwind any dangling emphasis marker:
1067:                     if (strlen($token_stack[0]) == 1) {
1068:                         $text_stack[1] .= array_shift($token_stack);
1069:                         $text_stack[0] .= array_shift($text_stack);
1070:                     }
1071:                     # Closing strong marker:
1072:                     array_shift($token_stack);
1073:                     $span = array_shift($text_stack);
1074:                     $span = $this->runSpanGamut($span);
1075:                     $span = "<strong>$span</strong>";
1076:                     $text_stack[0] .= $this->hashPart($span);
1077:                     $strong = '';
1078:                 } else {
1079:                     array_unshift($token_stack, $token);
1080:                     array_unshift($text_stack, '');
1081:                     $strong = $token;
1082:                 }
1083:             } else {
1084:                 # Here $token_len == 1
1085:                 if ($em) {
1086:                     if (strlen($token_stack[0]) == 1) {
1087:                         # Closing emphasis marker:
1088:                         array_shift($token_stack);
1089:                         $span = array_shift($text_stack);
1090:                         $span = $this->runSpanGamut($span);
1091:                         $span = "<em>$span</em>";
1092:                         $text_stack[0] .= $this->hashPart($span);
1093:                         $em = '';
1094:                     } else {
1095:                         $text_stack[0] .= $token;
1096:                     }
1097:                 } else {
1098:                     array_unshift($token_stack, $token);
1099:                     array_unshift($text_stack, '');
1100:                     $em = $token;
1101:                 }
1102:             }
1103:         }
1104:         return $text_stack[0];
1105:     }
1106: 
1107: 
1108:     public function doBlockQuotes($text) {
1109:         $text = preg_replace_callback('/
1110:               (                             # Wrap whole match in $1
1111:                 (?>
1112:                   ^[ ]*>[ ]?            # ">" at the start of a line
1113:                     .+\n                    # rest of the first line
1114:                   (.+\n)*                   # subsequent consecutive lines
1115:                   \n*                       # blanks
1116:                 )+
1117:               )
1118:             /xm',
1119:             array(&$this, '_doBlockQuotes_callback'), $text);
1120: 
1121:         return $text;
1122:     }
1123:     
1124:     public function _doBlockQuotes_callback($matches) {
1125:         $bq = $matches[1];
1126:         # trim one level of quoting - trim whitespace-only lines
1127:         $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
1128:         $bq = $this->runBlockGamut($bq);        # recurse
1129: 
1130:         $bq = preg_replace('/^/m', "  ", $bq);
1131:         # These leading spaces cause problem with <pre> content, 
1132:         # so we need to fix that:
1133:         $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx', 
1134:             array(&$this, '_doBlockQuotes_callback2'), $bq);
1135: 
1136:         return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
1137:     }
1138:     
1139:     public function _doBlockQuotes_callback2($matches) {
1140:         $pre = $matches[1];
1141:         $pre = preg_replace('/^  /m', '', $pre);
1142:         return $pre;
1143:     }
1144: 
1145: 
1146:     public function formParagraphs($text) {
1147:     #
1148:     #   Params:
1149:     #       $text - string to process with html <p> tags
1150:     #
1151:         # Strip leading and trailing lines:
1152:         $text = preg_replace('/\A\n+|\n+\z/', '', $text);
1153: 
1154:         $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1155: 
1156:         #
1157:         # Wrap <p> tags and unhashify HTML blocks
1158:         #
1159:         foreach ($grafs as $key => $value) {
1160:             if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
1161:                 # Is a paragraph.
1162:                 $value = $this->runSpanGamut($value);
1163:                 $value = preg_replace('/^([ ]*)/', "<p>", $value);
1164:                 $value .= "</p>";
1165:                 $grafs[$key] = $this->unhash($value);
1166:             }
1167:             else {
1168:                 # Is a block.
1169:                 # Modify elements of @grafs in-place...
1170:                 $graf = $value;
1171:                 $block = $this->html_hashes[$graf];
1172:                 $graf = $block;
1173: //              if (preg_match('{
1174: //                  \A
1175: //                  (                           # $1 = <div> tag
1176: //                    <div  \s+
1177: //                    [^>]*
1178: //                    \b
1179: //                    markdown\s*=\s*  ([\'"])  #   $2 = attr quote char
1180: //                    1
1181: //                    \2
1182: //                    [^>]*
1183: //                    >
1184: //                  )
1185: //                  (                           # $3 = contents
1186: //                  .*
1187: //                  )
1188: //                  (</div>)                    # $4 = closing tag
1189: //                  \z
1190: //                  }xs', $block, $matches))
1191: //              {
1192: //                  list(, $div_open, , $div_content, $div_close) = $matches;
1193: //
1194: //                  # We can't call Markdown(), because that resets the hash;
1195: //                  # that initialization code should be pulled into its own sub, though.
1196: //                  $div_content = $this->hashHTMLBlocks($div_content);
1197: //
1198: //                  # Run document gamut methods on the content.
1199: //                  foreach ($this->document_gamut as $method => $priority) {
1200: //                      $div_content = $this->$method($div_content);
1201: //                  }
1202: //
1203: //                  $div_open = preg_replace(
1204: //                      '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1205: //
1206: //                  $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1207: //              }
1208:                 $grafs[$key] = $graf;
1209:             }
1210:         }
1211: 
1212:         return implode("\n\n", $grafs);
1213:     }
1214: 
1215: 
1216:     public function encodeAttribute($text) {
1217:     #
1218:     # Encode text for a double-quoted HTML attribute. This function
1219:     # is *not* suitable for attributes enclosed in single quotes.
1220:     #
1221:         $text = $this->encodeAmpsAndAngles($text);
1222:         $text = str_replace('"', '&quot;', $text);
1223:         return $text;
1224:     }
1225: 
1226: 
1227:     public function encodeAmpsAndAngles($text) {
1228:     #
1229:     # Smart processing for ampersands and angle brackets that need to
1230:     # be encoded. Valid character entities are left alone unless the
1231:     # no-entities mode is set.
1232:     #
1233:         if ($this->no_entities) {
1234:             $text = str_replace('&', '&amp;', $text);
1235:         } else {
1236:             # Ampersand-encoding based entirely on Nat Irons's Amputator
1237:             # MT plugin: <http://bumppo.net/projects/amputator/>
1238:             $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
1239:                                 '&amp;', $text);;
1240:         }
1241:         # Encode remaining <'s
1242:         $text = str_replace('<', '&lt;', $text);
1243: 
1244:         return $text;
1245:     }
1246: 
1247: 
1248:     public function doAutoLinks($text) {
1249:         $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i',
1250:             array(&$this, '_doAutoLinks_url_callback'), $text);
1251: 
1252:         # Email addresses: <address@domain.foo>
1253:         $text = preg_replace_callback('{
1254:             <
1255:             (?:mailto:)?
1256:             (
1257:                 (?:
1258:                     [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+
1259:                 |
1260:                     ".*?"
1261:                 )
1262:                 \@
1263:                 (?:
1264:                     [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1265:                 |
1266:                     \[[\d.a-fA-F:]+\]   # IPv4 & IPv6
1267:                 )
1268:             )
1269:             >
1270:             }xi',
1271:             array(&$this, '_doAutoLinks_email_callback'), $text);
1272: 
1273:         return $text;
1274:     }
1275:     public function _doAutoLinks_url_callback($matches) {
1276:         $url = $this->encodeAttribute($matches[1]);
1277:         $link = "<a href=\"$url\">$url</a>";
1278:         return $this->hashPart($link);
1279:     }
1280:     public function _doAutoLinks_email_callback($matches) {
1281:         $address = $matches[1];
1282:         $link = $this->encodeEmailAddress($address);
1283:         return $this->hashPart($link);
1284:     }
1285: 
1286: 
1287:     public function encodeEmailAddress($addr) {
1288:     #
1289:     #   Input: an email address, e.g. "foo@example.com"
1290:     #
1291:     #   Output: the email address as a mailto link, with each character
1292:     #       of the address encoded as either a decimal or hex entity, in
1293:     #       the hopes of foiling most address harvesting spam bots. E.g.:
1294:     #
1295:     #     <p><a href="&#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
1296:     #        &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
1297:     #        &#x6d;">&#x66;o&#111;&#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;
1298:     #        &#101;&#46;&#x63;&#111;&#x6d;</a></p>
1299:     #
1300:     #   Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1301:     #   With some optimizations by Milian Wolff.
1302:     #
1303:         $addr = "mailto:" . $addr;
1304:         $chars = preg_split('/(?<!^)(?!$)/', $addr);
1305:         $seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed.
1306: 
1307:         foreach ($chars as $key => $char) {
1308:             $ord = ord($char);
1309:             # Ignore non-ascii chars.
1310:             if ($ord < 128) {
1311:                 $r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
1312:                 # roughly 10% raw, 45% hex, 45% dec
1313:                 # '@' *must* be encoded. I insist.
1314:                 if ($r > 90 && $char != '@') /* do nothing */;
1315:                 else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
1316:                 else              $chars[$key] = '&#'.$ord.';';
1317:             }
1318:         }
1319: 
1320:         $addr = implode('', $chars);
1321:         $text = implode('', array_slice($chars, 7)); # text without `mailto:`
1322:         $addr = "<a href=\"$addr\">$text</a>";
1323: 
1324:         return $addr;
1325:     }
1326: 
1327: 
1328:     public function parseSpan($str) {
1329:     #
1330:     # Take the string $str and parse it into tokens, hashing embeded HTML,
1331:     # escaped characters and handling code spans.
1332:     #
1333:         $output = '';
1334: 
1335:         $span_re = '{
1336:                 (
1337:                     \\\\'.$this->escape_chars_re.'
1338:                 |
1339:                     (?<![`\\\\])
1340:                     `+                      # code span marker
1341:             '.( $this->no_markup ? '' : '
1342:                 |
1343:                     <!--    .*?     -->     # comment
1344:                 |
1345:                     <\?.*?\?> | <%.*?%>     # processing instruction
1346:                 |
1347:                     <[/!$]?[-a-zA-Z0-9:_]+  # regular tags
1348:                     (?>
1349:                         \s
1350:                         (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1351:                     )?
1352:                     >
1353:             ').'
1354:                 )
1355:                 }xs';
1356: 
1357:         while (1) {
1358:             #
1359:             # Each loop iteration seach for either the next tag, the next
1360:             # openning code span marker, or the next escaped character.
1361:             # Each token is then passed to handleSpanToken.
1362:             #
1363:             $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
1364: 
1365:             # Create token from text preceding tag.
1366:             if ($parts[0] != "") {
1367:                 $output .= $parts[0];
1368:             }
1369: 
1370:             # Check if we reach the end.
1371:             if (isset($parts[1])) {
1372:                 $output .= $this->handleSpanToken($parts[1], $parts[2]);
1373:                 $str = $parts[2];
1374:             }
1375:             else {
1376:                 break;
1377:             }
1378:         }
1379: 
1380:         return $output;
1381:     }
1382: 
1383: 
1384:     public function handleSpanToken($token, &$str) {
1385:     #
1386:     # Handle $token provided by parseSpan by determining its nature and
1387:     # returning the corresponding value that should replace it.
1388:     #
1389:         switch ($token{0}) {
1390:             case "\\":
1391:                 return $this->hashPart("&#". ord($token{1}). ";");
1392:             case "`":
1393:                 # Search for end marker in remaining text.
1394:                 if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm',
1395:                     $str, $matches))
1396:                 {
1397:                     $str = $matches[2];
1398:                     $codespan = $this->makeCodeSpan($matches[1]);
1399:                     return $this->hashPart($codespan);
1400:                 }
1401:                 return $token; // return as text since no ending marker found.
1402:             default:
1403:                 return $this->hashPart($token);
1404:         }
1405:     }
1406: 
1407: 
1408:     public function outdent($text) {
1409:     #
1410:     # Remove one level of line-leading tabs or spaces
1411:     #
1412:         return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text);
1413:     }
1414: 
1415: 
1416:     # String length function for detab. `_initDetab` will create a function to
1417:     # hanlde UTF-8 if the default function does not exist.
1418:     public $utf8_strlen = 'mb_strlen';
1419: 
1420:     public function detab($text) {
1421:     #
1422:     # Replace tabs with the appropriate amount of space.
1423:     #
1424:         # For each line we separate the line in blocks delemited by
1425:         # tab characters. Then we reconstruct every line by adding the
1426:         # appropriate number of space between each blocks.
1427: 
1428:         $text = preg_replace_callback('/^.*\t.*$/m',
1429:             array(&$this, '_detab_callback'), $text);
1430: 
1431:         return $text;
1432:     }
1433:     public function _detab_callback($matches) {
1434:         $line = $matches[0];
1435:         $strlen = $this->utf8_strlen; # strlen function for UTF-8.
1436: 
1437:         # Split in blocks.
1438:         $blocks = explode("\t", $line);
1439:         # Add each blocks to the line.
1440:         $line = $blocks[0];
1441:         unset($blocks[0]); # Do not add first block twice.
1442:         foreach ($blocks as $block) {
1443:             # Calculate amount of space, insert spaces, insert block.
1444:             $amount = $this->tab_width -
1445:                 $strlen($line, 'UTF-8') % $this->tab_width;
1446:             $line .= str_repeat(" ", $amount) . $block;
1447:         }
1448:         return $line;
1449:     }
1450:     public function _initDetab() {
1451:     #
1452:     # Check for the availability of the function in the `utf8_strlen` property
1453:     # (initially `mb_strlen`). If the function is not available, create a
1454:     # function that will loosely count the number of UTF-8 characters with a
1455:     # regular expression.
1456:     #
1457:         if (function_exists($this->utf8_strlen)) return;
1458:         $this->utf8_strlen = create_function('$text', 'return preg_match_all(
1459:             "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/",
1460:             $text, $m);');
1461:     }
1462: 
1463: 
1464:     public function unhash($text) {
1465:     #
1466:     # Swap back in all the tags hashed by _HashHTMLBlocks.
1467:     #
1468:         return preg_replace_callback('/(.)\x1A[0-9]+\1/',
1469:             array(&$this, '_unhash_callback'), $text);
1470:     }
1471:     public function _unhash_callback($matches) {
1472:         return $this->html_hashes[$matches[0]];
1473:     }
1474: 
1475: }
1476: 
1477: 
1478: #
1479: # Markdown Extra Parser Class
1480: #
1481: 
1482: class MarkdownExtra_Parser extends Markdown_Parser {
1483: 
1484:     # Prefix for footnote ids.
1485:     public $fn_id_prefix = "";
1486: 
1487:     # Optional title attribute for footnote links and backlinks.
1488:     public $fn_link_title = '';
1489:     public $fn_backlink_title = '';
1490: 
1491:     # Optional class attribute for footnote links and backlinks.
1492:     public $fn_link_class = '';
1493:     public $fn_backlink_class = '';
1494: 
1495:     # Predefined abbreviations.
1496:     public $predef_abbr = array();
1497: 
1498: 
1499:     public function __construct() {
1500:     #
1501:     # Constructor function. Initialize the parser object.
1502:     #
1503:         # Add extra escapable characters before parent constructor
1504:         # initialize the table.
1505:         $this->escape_chars .= ':|';
1506: 
1507:         # Insert extra document, block, and span transformations.
1508:         # Parent constructor will do the sorting.
1509:         $this->document_gamut += array(
1510:             "doFencedCodeBlocks" => 5,
1511:             "stripFootnotes"     => 15,
1512:             "stripAbbreviations" => 25,
1513:             "appendFootnotes"    => 50,
1514:             );
1515:         $this->block_gamut += array(
1516:             "doFencedCodeBlocks" => 5,
1517:             "doTables"           => 15,
1518:             "doDefLists"         => 45,
1519:             );
1520:         $this->span_gamut += array(
1521:             "doFootnotes"        => 5,
1522:             "doAbbreviations"    => 70,
1523:             );
1524: 
1525:         parent::__construct();
1526:     }
1527: 
1528: 
1529:     # Extra variables used during extra transformations.
1530:     public $footnotes = array();
1531:     public $footnotes_ordered = array();
1532:     public $abbr_desciptions = array();
1533:     public $abbr_word_re = '';
1534: 
1535:     # Give the current footnote number.
1536:     public $footnote_counter = 1;
1537: 
1538: 
1539:     public function setup() {
1540:     #
1541:     # Setting up Extra-specific variables.
1542:     #
1543:         parent::setup();
1544: 
1545:         $this->footnotes = array();
1546:         $this->footnotes_ordered = array();
1547:         $this->abbr_desciptions = array();
1548:         $this->abbr_word_re = '';
1549:         $this->footnote_counter = 1;
1550: 
1551:         foreach ($this->predef_abbr as $abbr_word => $abbr_desc) {
1552:             if ($this->abbr_word_re)
1553:                 $this->abbr_word_re .= '|';
1554:             $this->abbr_word_re .= preg_quote($abbr_word);
1555:             $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
1556:         }
1557:     }
1558: 
1559:     public function teardown() {
1560:     #
1561:     # Clearing Extra-specific variables.
1562:     #
1563:         $this->footnotes = array();
1564:         $this->footnotes_ordered = array();
1565:         $this->abbr_desciptions = array();
1566:         $this->abbr_word_re = '';
1567: 
1568:         parent::teardown();
1569:     }
1570: 
1571: 
1572:     ### HTML Block Parser ###
1573: 
1574:     # Tags that are always treated as block tags:
1575:     public $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend';
1576: 
1577:     # Tags treated as block tags only if the opening tag is alone on it's line:
1578:     public $context_block_tags_re = 'script|noscript|math|ins|del';
1579: 
1580:     # Tags where markdown="1" default to span mode:
1581:     public $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
1582: 
1583:     # Tags which must not have their contents modified, no matter where
1584:     # they appear:
1585:     public $clean_tags_re = 'script|math';
1586: 
1587:     # Tags that do not need to be closed.
1588:     public $auto_close_tags_re = 'hr|img';
1589: 
1590: 
1591:     public function hashHTMLBlocks($text) {
1592:     #
1593:     # Hashify HTML Blocks and "clean tags".
1594:     #
1595:     # We only want to do this for block-level HTML tags, such as headers,
1596:     # lists, and tables. That's because we still want to wrap <p>s around
1597:     # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
1598:     # phrase emphasis, and spans. The list of tags we're looking for is
1599:     # hard-coded.
1600:     #
1601:     # This works by calling _HashHTMLBlocks_InMarkdown, which then calls
1602:     # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
1603:     # attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back
1604:     #  _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
1605:     # These two functions are calling each other. It's recursive!
1606:     #
1607:         #
1608:         # Call the HTML-in-Markdown hasher.
1609:         #
1610:         list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
1611: 
1612:         return $text;
1613:     }
1614:     public function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
1615:                                         $enclosing_tag_re = '', $span = false)
1616:     {
1617:     #
1618:     # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
1619:     #
1620:     # *   $indent is the number of space to be ignored when checking for code
1621:     #     blocks. This is important because if we don't take the indent into
1622:     #     account, something like this (which looks right) won't work as expected:
1623:     #
1624:     #     <div>
1625:     #         <div markdown="1">
1626:     #         Hello World.  <-- Is this a Markdown code block or text?
1627:     #         </div>  <-- Is this a Markdown code block or a real tag?
1628:     #     <div>
1629:     #
1630:     #     If you don't like this, just don't indent the tag on which
1631:     #     you apply the markdown="1" attribute.
1632:     #
1633:     # *   If $enclosing_tag_re is not empty, stops at the first unmatched closing
1634:     #     tag with that name. Nested tags supported.
1635:     #
1636:     # *   If $span is true, text inside must treated as span. So any double
1637:     #     newline will be replaced by a single newline so that it does not create
1638:     #     paragraphs.
1639:     #
1640:     # Returns an array of that form: ( processed text , remaining text )
1641:     #
1642:         if ($text === '') return array('', '');
1643: 
1644:         # Regex to check for the presense of newlines around a block tag.
1645:         $newline_before_re = '/(?:^\n?|\n\n)*$/';
1646:         $newline_after_re =
1647:             '{
1648:                 ^                       # Start of text following the tag.
1649:                 (?>[ ]*<!--.*?-->)?     # Optional comment.
1650:                 [ ]*\n                  # Must be followed by newline.
1651:             }xs';
1652: 
1653:         # Regex to match any tag.
1654:         $block_tag_re =
1655:             '{
1656:                 (                   # $2: Capture hole tag.
1657:                     </?                 # Any opening or closing tag.
1658:                         (?>             # Tag name.
1659:                             '.$this->block_tags_re.'            |
1660:                             '.$this->context_block_tags_re.'    |
1661:                             '.$this->clean_tags_re.'            |
1662:                             (?!\s)'.$enclosing_tag_re.'
1663:                         )
1664:                         (?:
1665:                             (?=[\s"\'/a-zA-Z0-9])   # Allowed characters after tag name.
1666:                             (?>
1667:                                 ".*?"       |   # Double quotes (can contain `>`)
1668:                                 \'.*?\'     |   # Single quotes (can contain `>`)
1669:                                 .+?             # Anything but quotes and `>`.
1670:                             )*?
1671:                         )?
1672:                     >                   # End of tag.
1673:                 |
1674:                     <!--    .*?     --> # HTML Comment
1675:                 |
1676:                     <\?.*?\?> | <%.*?%> # Processing instruction
1677:                 |
1678:                     <!\[CDATA\[.*?\]\]> # CData Block
1679:                 |
1680:                     # Code span marker
1681:                     `+
1682:                 '. ( !$span ? ' # If not in span.
1683:                 |
1684:                     # Indented code block
1685:                     (?: ^[ ]*\n | ^ | \n[ ]*\n )
1686:                     [ ]{'.($indent+4).'}[^\n]* \n
1687:                     (?>
1688:                         (?: [ ]{'.($indent+4).'}[^\n]* | [ ]* ) \n
1689:                     )*
1690:                 |
1691:                     # Fenced code block marker
1692:                     (?> ^ | \n )
1693:                     [ ]{0,'.($indent).'}~~~+[ ]*\n
1694:                 ' : '' ). ' # End (if not is span).
1695:                 )
1696:             }xs';
1697: 
1698: 
1699:         $depth = 0;     # Current depth inside the tag tree.
1700:         $parsed = "";   # Parsed text that will be returned.
1701: 
1702:         #
1703:         # Loop through every tag until we find the closing tag of the parent
1704:         # or loop until reaching the end of text if no parent tag specified.
1705:         #
1706:         do {
1707:             #
1708:             # Split the text using the first $tag_match pattern found.
1709:             # Text before  pattern will be first in the array, text after
1710:             # pattern will be at the end, and between will be any catches made
1711:             # by the pattern.
1712:             #
1713:             $parts = preg_split($block_tag_re, $text, 2,
1714:                                 PREG_SPLIT_DELIM_CAPTURE);
1715: 
1716:             # If in Markdown span mode, add a empty-string span-level hash
1717:             # after each newline to prevent triggering any block element.
1718:             if ($span) {
1719:                 $void = $this->hashPart("", ':');
1720:                 $newline = "$void\n";
1721:                 $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
1722:             }
1723: 
1724:             $parsed .= $parts[0]; # Text before current tag.
1725: 
1726:             # If end of $text has been reached. Stop loop.
1727:             if (count($parts) < 3) {
1728:                 $text = "";
1729:                 break;
1730:             }
1731: 
1732:             $tag  = $parts[1]; # Tag to handle.
1733:             $text = $parts[2]; # Remaining text after current tag.
1734:             $tag_re = preg_quote($tag); # For use in a regular expression.
1735: 
1736:             #
1737:             # Check for: Code span marker
1738:             #
1739:             if ($tag{0} == "`") {
1740:                 # Find corresponding end marker.
1741:                 $tag_re = preg_quote($tag);
1742:                 if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)'.$tag_re.'(?!`)}',
1743:                     $text, $matches))
1744:                 {
1745:                     # End marker found: pass text unchanged until marker.
1746:                     $parsed .= $tag . $matches[0];
1747:                     $text = substr($text, strlen($matches[0]));
1748:                 }
1749:                 else {
1750:                     # Unmatched marker: just skip it.
1751:                     $parsed .= $tag;
1752:                 }
1753:             }
1754:             #
1755:             # Check for: Fenced code block marker.
1756:             #
1757:             else if (preg_match('{^\n?[ ]{0,'.($indent+3).'}~}', $tag)) {
1758:                 # Fenced code block marker: find matching end marker.
1759:                 $tag_re = preg_quote(trim($tag));
1760:                 if (preg_match('{^(?>.*\n)+?[ ]{0,'.($indent).'}'.$tag_re.'[ ]*\n}', $text, 
1761:                     $matches)) 
1762:                 {
1763:                     # End marker found: pass text unchanged until marker.
1764:                     $parsed .= $tag . $matches[0];
1765:                     $text = substr($text, strlen($matches[0]));
1766:                 }
1767:                 else {
1768:                     # No end marker: just skip it.
1769:                     $parsed .= $tag;
1770:                 }
1771:             }
1772:             #
1773:             # Check for: Indented code block.
1774:             #
1775:             else if ($tag{0} == "\n" || $tag{0} == " ") {
1776:                 # Indented code block: pass it unchanged, will be handled 
1777:                 # later.
1778:                 $parsed .= $tag;
1779:             }
1780:             #
1781:             # Check for: Opening Block level tag or
1782:             #            Opening Context Block tag (like ins and del)
1783:             #               used as a block tag (tag is alone on it's line).
1784:             #
1785:             else if (preg_match('{^<(?:'.$this->block_tags_re.')\b}', $tag) ||
1786:                 (   preg_match('{^<(?:'.$this->context_block_tags_re.')\b}', $tag) &&
1787:                     preg_match($newline_before_re, $parsed) &&
1788:                     preg_match($newline_after_re, $text)    )
1789:                 )
1790:             {
1791:                 # Need to parse tag and following text using the HTML parser.
1792:                 list($block_text, $text) =
1793:                     $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
1794: 
1795:                 # Make sure it stays outside of any paragraph by adding newlines.
1796:                 $parsed .= "\n\n$block_text\n\n";
1797:             }
1798:             #
1799:             # Check for: Clean tag (like script, math)
1800:             #            HTML Comments, processing instructions.
1801:             #
1802:             else if (preg_match('{^<(?:'.$this->clean_tags_re.')\b}', $tag) ||
1803:                 $tag{1} == '!' || $tag{1} == '?')
1804:             {
1805:                 # Need to parse tag and following text using the HTML parser.
1806:                 # (don't check for markdown attribute)
1807:                 list($block_text, $text) =
1808:                     $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
1809: 
1810:                 $parsed .= $block_text;
1811:             }
1812:             #
1813:             # Check for: Tag with same name as enclosing tag.
1814:             #
1815:             else if ($enclosing_tag_re !== '' &&
1816:                 # Same name as enclosing tag.
1817:                 preg_match('{^</?(?:'.$enclosing_tag_re.')\b}', $tag))
1818:             {
1819:                 #
1820:                 # Increase/decrease nested tag count.
1821:                 #
1822:                 if ($tag{1} == '/')                     $depth--;
1823:                 else if ($tag{strlen($tag)-2} != '/')   $depth++;
1824: 
1825:                 if ($depth < 0) {
1826:                     #
1827:                     # Going out of parent element. Clean up and break so we
1828:                     # return to the calling function.
1829:                     #
1830:                     $text = $tag . $text;
1831:                     break;
1832:                 }
1833: 
1834:                 $parsed .= $tag;
1835:             }
1836:             else {
1837:                 $parsed .= $tag;
1838:             }
1839:         } while ($depth >= 0);
1840: 
1841:         return array($parsed, $text);
1842:     }
1843:     public function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
1844:     #
1845:     # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
1846:     #
1847:     # *   Calls $hash_method to convert any blocks.
1848:     # *   Stops when the first opening tag closes.
1849:     # *   $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
1850:     #     (it is not inside clean tags)
1851:     #
1852:     # Returns an array of that form: ( processed text , remaining text )
1853:     #
1854:         if ($text === '') return array('', '');
1855: 
1856:         # Regex to match `markdown` attribute inside of a tag.
1857:         $markdown_attr_re = '
1858:             {
1859:                 \s*         # Eat whitespace before the `markdown` attribute
1860:                 markdown
1861:                 \s*=\s*
1862:                 (?>
1863:                     (["\'])     # $1: quote delimiter
1864:                     (.*?)       # $2: attribute value
1865:                     \1          # matching delimiter
1866:                 |
1867:                     ([^\s>]*)   # $3: unquoted attribute value
1868:                 )
1869:                 ()              # $4: make $3 always defined (avoid warnings)
1870:             }xs';
1871: 
1872:         # Regex to match any tag.
1873:         $tag_re = '{
1874:                 (                   # $2: Capture hole tag.
1875:                     </?                 # Any opening or closing tag.
1876:                         [\w:$]+         # Tag name.
1877:                         (?:
1878:                             (?=[\s"\'/a-zA-Z0-9])   # Allowed characters after tag name.
1879:                             (?>
1880:                                 ".*?"       |   # Double quotes (can contain `>`)
1881:                                 \'.*?\'     |   # Single quotes (can contain `>`)
1882:                                 .+?             # Anything but quotes and `>`.
1883:                             )*?
1884:                         )?
1885:                     >                   # End of tag.
1886:                 |
1887:                     <!--    .*?     --> # HTML Comment
1888:                 |
1889:                     <\?.*?\?> | <%.*?%> # Processing instruction
1890:                 |
1891:                     <!\[CDATA\[.*?\]\]> # CData Block
1892:                 )
1893:             }xs';
1894: 
1895:         $original_text = $text;     # Save original text in case of faliure.
1896: 
1897:         $depth      = 0;    # Current depth inside the tag tree.
1898:         $block_text = "";   # Temporary text holder for current text.
1899:         $parsed     = "";   # Parsed text that will be returned.
1900: 
1901:         #
1902:         # Get the name of the starting tag.
1903:         # (This pattern makes $base_tag_name_re safe without quoting.)
1904:         #
1905:         if (preg_match('/^<([\w:$]*)\b/', $text, $matches))
1906:             $base_tag_name_re = $matches[1];
1907: 
1908:         #
1909:         # Loop through every tag until we find the corresponding closing tag.
1910:         #
1911:         do {
1912:             #
1913:             # Split the text using the first $tag_match pattern found.
1914:             # Text before  pattern will be first in the array, text after
1915:             # pattern will be at the end, and between will be any catches made
1916:             # by the pattern.
1917:             #
1918:             $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
1919: 
1920:             if (count($parts) < 3) {
1921:                 #
1922:                 # End of $text reached with unbalenced tag(s).
1923:                 # In that case, we return original text unchanged and pass the
1924:                 # first character as filtered to prevent an infinite loop in the
1925:                 # parent function.
1926:                 #
1927:                 return array($original_text{0}, substr($original_text, 1));
1928:             }
1929: 
1930:             $block_text .= $parts[0]; # Text before current tag.
1931:             $tag         = $parts[1]; # Tag to handle.
1932:             $text        = $parts[2]; # Remaining text after current tag.
1933: 
1934:             #
1935:             # Check for: Auto-close tag (like <hr/>)
1936:             #            Comments and Processing Instructions.
1937:             #
1938:             if (preg_match('{^</?(?:'.$this->auto_close_tags_re.')\b}', $tag) ||
1939:                 $tag{1} == '!' || $tag{1} == '?')
1940:             {
1941:                 # Just add the tag to the block as if it was text.
1942:                 $block_text .= $tag;
1943:             }
1944:             else {
1945:                 #
1946:                 # Increase/decrease nested tag count. Only do so if
1947:                 # the tag's name match base tag's.
1948:                 #
1949:                 if (preg_match('{^</?'.$base_tag_name_re.'\b}', $tag)) {
1950:                     if ($tag{1} == '/')                     $depth--;
1951:                     else if ($tag{strlen($tag)-2} != '/')   $depth++;
1952:                 }
1953: 
1954:                 #
1955:                 # Check for `markdown="1"` attribute and handle it.
1956:                 #
1957:                 if ($md_attr &&
1958:                     preg_match($markdown_attr_re, $tag, $attr_m) &&
1959:                     preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
1960:                 {
1961:                     # Remove `markdown` attribute from opening tag.
1962:                     $tag = preg_replace($markdown_attr_re, '', $tag);
1963: 
1964:                     # Check if text inside this tag must be parsed in span mode.
1965:                     $this->mode = $attr_m[2] . $attr_m[3];
1966:                     $span_mode = $this->mode == 'span' || $this->mode != 'block' &&
1967:                         preg_match('{^<(?:'.$this->contain_span_tags_re.')\b}', $tag);
1968: 
1969:                     # Calculate indent before tag.
1970:                     if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) {
1971:                         $strlen = $this->utf8_strlen;
1972:                         $indent = $strlen($matches[1], 'UTF-8');
1973:                     } else {
1974:                         $indent = 0;
1975:                     }
1976: 
1977:                     # End preceding block with this tag.
1978:                     $block_text .= $tag;
1979:                     $parsed .= $this->$hash_method($block_text);
1980: 
1981:                     # Get enclosing tag name for the ParseMarkdown function.
1982:                     # (This pattern makes $tag_name_re safe without quoting.)
1983:                     preg_match('/^<([\w:$]*)\b/', $tag, $matches);
1984:                     $tag_name_re = $matches[1];
1985: 
1986:                     # Parse the content using the HTML-in-Markdown parser.
1987:                     list ($block_text, $text)
1988:                         = $this->_hashHTMLBlocks_inMarkdown($text, $indent,
1989:                             $tag_name_re, $span_mode);
1990: 
1991:                     # Outdent markdown text.
1992:                     if ($indent > 0) {
1993:                         $block_text = preg_replace("/^[ ]{1,$indent}/m", "",
1994:                                                     $block_text);
1995:                     }
1996: 
1997:                     # Append tag content to parsed text.
1998:                     if (!$span_mode)    $parsed .= "\n\n$block_text\n\n";
1999:                     else                $parsed .= "$block_text";
2000: 
2001:                     # Start over a new block.
2002:                     $block_text = "";
2003:                 }
2004:                 else $block_text .= $tag;
2005:             }
2006: 
2007:         } while ($depth > 0);
2008: 
2009:         #
2010:         # Hash last block text that wasn't processed inside the loop.
2011:         #
2012:         $parsed .= $this->$hash_method($block_text);
2013: 
2014:         return array($parsed, $text);
2015:     }
2016: 
2017: 
2018:     public function hashClean($text) {
2019:     #
2020:     # Called whenever a tag must be hashed when a function insert a "clean" tag
2021:     # in $text, it pass through this function and is automaticaly escaped,
2022:     # blocking invalid nested overlap.
2023:     #
2024:         return $this->hashPart($text, 'C');
2025:     }
2026: 
2027: 
2028:     public function doHeaders($text) {
2029:     #
2030:     # Redefined to add id attribute support.
2031:     #
2032:         # Setext-style headers:
2033:         #     Header 1  {#header1}
2034:         #     ========
2035:         #
2036:         #     Header 2  {#header2}
2037:         #     --------
2038:         #
2039:         $text = preg_replace_callback(
2040:             '{
2041:                 (^.+?)                              # $1: Header text
2042:                 (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})?    # $2: Id attribute
2043:                 [ ]*\n(=+|-+)[ ]*\n+                # $3: Header footer
2044:             }mx',
2045:             array(&$this, '_doHeaders_callback_setext'), $text);
2046: 
2047:         # atx-style headers:
2048:         #   # Header 1        {#header1}
2049:         #   ## Header 2       {#header2}
2050:         #   ## Header 2 with closing hashes ##  {#header3}
2051:         #   ...
2052:         #   ###### Header 6   {#header2}
2053:         #
2054:         $text = preg_replace_callback('{
2055:                 ^(\#{1,6})  # $1 = string of #\'s
2056:                 [ ]*
2057:                 (.+?)       # $2 = Header text
2058:                 [ ]*
2059:                 \#*         # optional closing #\'s (not counted)
2060:                 (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # id attribute
2061:                 [ ]*
2062:                 \n+
2063:             }xm',
2064:             array(&$this, '_doHeaders_callback_atx'), $text);
2065: 
2066:         return $text;
2067:     }
2068:     public function _doHeaders_attr($attr) {
2069:         if (empty($attr))  return "";
2070:         return " id=\"$attr\"";
2071:     }
2072:     public function _doHeaders_callback_setext($matches) {
2073:         if ($matches[3] == '-' && preg_match('{^- }', $matches[1]))
2074:             return $matches[0];
2075:         $level = $matches[3]{0} == '=' ? 1 : 2;
2076:         $attr  = $this->_doHeaders_attr($id =& $matches[2]);
2077:         $block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>";
2078:         return "\n" . $this->hashBlock($block) . "\n\n";
2079:     }
2080:     public function _doHeaders_callback_atx($matches) {
2081:         $level = strlen($matches[1]);
2082:         $attr  = $this->_doHeaders_attr($id =& $matches[3]);
2083:         $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>";
2084:         return "\n" . $this->hashBlock($block) . "\n\n";
2085:     }
2086: 
2087: 
2088:     public function doTables($text) {
2089:     #
2090:     # Form HTML tables.
2091:     #
2092:         $less_than_tab = $this->tab_width - 1;
2093:         #
2094:         # Find tables with leading pipe.
2095:         #
2096:         #   | Header 1 | Header 2
2097:         #   | -------- | --------
2098:         #   | Cell 1   | Cell 2
2099:         #   | Cell 3   | Cell 4
2100:         #
2101:         $text = preg_replace_callback('
2102:             {
2103:                 ^                           # Start of a line
2104:                 [ ]{0,'.$less_than_tab.'}   # Allowed whitespace.
2105:                 [|]                         # Optional leading pipe (present)
2106:                 (.+) \n                     # $1: Header row (at least one pipe)
2107: 
2108:                 [ ]{0,'.$less_than_tab.'}   # Allowed whitespace.
2109:                 [|] ([ ]*[-:]+[-| :]*) \n   # $2: Header underline
2110: 
2111:                 (                           # $3: Cells
2112:                     (?>
2113:                         [ ]*                # Allowed whitespace.
2114:                         [|] .* \n           # Row content.
2115:                     )*
2116:                 )
2117:                 (?=\n|\Z)                   # Stop at final double newline.
2118:             }xm',
2119:             array(&$this, '_doTable_leadingPipe_callback'), $text);
2120: 
2121:         #
2122:         # Find tables without leading pipe.
2123:         #
2124:         #   Header 1 | Header 2
2125:         #   -------- | --------
2126:         #   Cell 1   | Cell 2
2127:         #   Cell 3   | Cell 4
2128:         #
2129:         $text = preg_replace_callback('
2130:             {
2131:                 ^                           # Start of a line
2132:                 [ ]{0,'.$less_than_tab.'}   # Allowed whitespace.
2133:                 (\S.*[|].*) \n              # $1: Header row (at least one pipe)
2134: 
2135:                 [ ]{0,'.$less_than_tab.'}   # Allowed whitespace.
2136:                 ([-:]+[ ]*[|][-| :]*) \n    # $2: Header underline
2137: 
2138:                 (                           # $3: Cells
2139:                     (?>
2140:                         .* [|] .* \n        # Row content
2141:                     )*
2142:                 )
2143:                 (?=\n|\Z)                   # Stop at final double newline.
2144:             }xm',
2145:             array(&$this, '_DoTable_callback'), $text);
2146: 
2147:         return $text;
2148:     }
2149:     public function _doTable_leadingPipe_callback($matches) {
2150:         $head       = $matches[1];
2151:         $underline  = $matches[2];
2152:         $content    = $matches[3];
2153: 
2154:         # Remove leading pipe for each row.
2155:         $content    = preg_replace('/^ *[|]/m', '', $content);
2156: 
2157:         return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
2158:     }
2159:     public function _doTable_callback($matches) {
2160:         $head       = $matches[1];
2161:         $underline  = $matches[2];
2162:         $content    = $matches[3];
2163: 
2164:         # Remove any tailing pipes for each line.
2165:         $head       = preg_replace('/[|] *$/m', '', $head);
2166:         $underline  = preg_replace('/[|] *$/m', '', $underline);
2167:         $content    = preg_replace('/[|] *$/m', '', $content);
2168: 
2169:         # Reading alignement from header underline.
2170:         $separators = preg_split('/ *[|] */', $underline);
2171:         foreach ($separators as $n => $s) {
2172:             if (preg_match('/^ *-+: *$/', $s))      $attr[$n] = ' align="right"';
2173:             else if (preg_match('/^ *:-+: *$/', $s))$attr[$n] = ' align="center"';
2174:             else if (preg_match('/^ *:-+ *$/', $s)) $attr[$n] = ' align="left"';
2175:             else                                    $attr[$n] = '';
2176:         }
2177: 
2178:         # Parsing span elements, including code spans, character escapes,
2179:         # and inline HTML tags, so that pipes inside those gets ignored.
2180:         $head       = $this->parseSpan($head);
2181:         $headers    = preg_split('/ *[|] */', $head);
2182:         $col_count  = count($headers);
2183: 
2184:         # Write column headers.
2185:         $text = "<table>\n";
2186:         $text .= "<thead>\n";
2187:         $text .= "<tr>\n";
2188:         foreach ($headers as $n => $header)
2189:             $text .= "  <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n";
2190:         $text .= "</tr>\n";
2191:         $text .= "</thead>\n";
2192: 
2193:         # Split content by row.
2194:         $rows = explode("\n", trim($content, "\n"));
2195: 
2196:         $text .= "<tbody>\n";
2197:         foreach ($rows as $row) {
2198:             # Parsing span elements, including code spans, character escapes,
2199:             # and inline HTML tags, so that pipes inside those gets ignored.
2200:             $row = $this->parseSpan($row);
2201: 
2202:             # Split row by cell.
2203:             $row_cells = preg_split('/ *[|] */', $row, $col_count);
2204:             $row_cells = array_pad($row_cells, $col_count, '');
2205: 
2206:             $text .= "<tr>\n";
2207:             foreach ($row_cells as $n => $cell)
2208:                 $text .= "  <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n";
2209:             $text .= "</tr>\n";
2210:         }
2211:         $text .= "</tbody>\n";
2212:         $text .= "</table>";
2213: 
2214:         return $this->hashBlock($text) . "\n";
2215:     }
2216: 
2217: 
2218:     public function doDefLists($text) {
2219:     #
2220:     # Form HTML definition lists.
2221:     #
2222:         $less_than_tab = $this->tab_width - 1;
2223: 
2224:         # Re-usable pattern to match any entire dl list:
2225:         $whole_list_re = '(?>
2226:             (                               # $1 = whole list
2227:               (                             # $2
2228:                 [ ]{0,'.$less_than_tab.'}
2229:                 ((?>.*\S.*\n)+)             # $3 = defined term
2230:                 \n?
2231:                 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2232:               )
2233:               (?s:.+?)
2234:               (                             # $4
2235:                   \z
2236:                 |
2237:                   \n{2,}
2238:                   (?=\S)
2239:                   (?!                       # Negative lookahead for another term
2240:                     [ ]{0,'.$less_than_tab.'}
2241:                     (?: \S.*\n )+?          # defined term
2242:                     \n?
2243:                     [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2244:                   )
2245:                   (?!                       # Negative lookahead for another definition
2246:                     [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2247:                   )
2248:               )
2249:             )
2250:         )'; // mx
2251: 
2252:         $text = preg_replace_callback('{
2253:                 (?>\A\n?|(?<=\n\n))
2254:                 '.$whole_list_re.'
2255:             }mx',
2256:             array(&$this, '_doDefLists_callback'), $text);
2257: 
2258:         return $text;
2259:     }
2260:     public function _doDefLists_callback($matches) {
2261:         # Re-usable patterns to match list item bullets and number markers:
2262:         $list = $matches[1];
2263: 
2264:         # Turn double returns into triple returns, so that we can make a
2265:         # paragraph for the last item in a list, if necessary:
2266:         $result = trim($this->processDefListItems($list));
2267:         $result = "<dl>\n" . $result . "\n</dl>";
2268:         return $this->hashBlock($result) . "\n\n";
2269:     }
2270: 
2271: 
2272:     public function processDefListItems($list_str) {
2273:     #
2274:     #   Process the contents of a single definition list, splitting it
2275:     #   into individual term and definition list items.
2276:     #
2277:         $less_than_tab = $this->tab_width - 1;
2278: 
2279:         # trim trailing blank lines:
2280:         $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
2281: 
2282:         # Process definition terms.
2283:         $list_str = preg_replace_callback('{
2284:             (?>\A\n?|\n\n+)                 # leading line
2285:             (                               # definition terms = $1
2286:                 [ ]{0,'.$less_than_tab.'}   # leading whitespace
2287:                 (?![:][ ]|[ ])              # negative lookahead for a definition
2288:                                             #   mark (colon) or more whitespace.
2289:                 (?> \S.* \n)+?              # actual term (not whitespace).
2290:             )
2291:             (?=\n?[ ]{0,3}:[ ])             # lookahead for following line feed
2292:                                             #   with a definition mark.
2293:             }xm',
2294:             array(&$this, '_processDefListItems_callback_dt'), $list_str);
2295: 
2296:         # Process actual definitions.
2297:         $list_str = preg_replace_callback('{
2298:             \n(\n+)?                        # leading line = $1
2299:             (                               # marker space = $2
2300:                 [ ]{0,'.$less_than_tab.'}   # whitespace before colon
2301:                 [:][ ]+                     # definition mark (colon)
2302:             )
2303:             ((?s:.+?))                      # definition text = $3
2304:             (?= \n+                         # stop at next definition mark,
2305:                 (?:                         # next term or end of text
2306:                     [ ]{0,'.$less_than_tab.'} [:][ ]    |
2307:                     <dt> | \z
2308:                 )
2309:             )
2310:             }xm',
2311:             array(&$this, '_processDefListItems_callback_dd'), $list_str);
2312: 
2313:         return $list_str;
2314:     }
2315:     public function _processDefListItems_callback_dt($matches) {
2316:         $terms = explode("\n", trim($matches[1]));
2317:         $text = '';
2318:         foreach ($terms as $term) {
2319:             $term = $this->runSpanGamut(trim($term));
2320:             $text .= "\n<dt>" . $term . "</dt>";
2321:         }
2322:         return $text . "\n";
2323:     }
2324:     public function _processDefListItems_callback_dd($matches) {
2325:         $leading_line   = $matches[1];
2326:         $marker_space   = $matches[2];
2327:         $def            = $matches[3];
2328: 
2329:         if ($leading_line || preg_match('/\n{2,}/', $def)) {
2330:             # Replace marker with the appropriate whitespace indentation
2331:             $def = str_repeat(' ', strlen($marker_space)) . $def;
2332:             $def = $this->runBlockGamut($this->outdent($def . "\n\n"));
2333:             $def = "\n". $def ."\n";
2334:         }
2335:         else {
2336:             $def = rtrim($def);
2337:             $def = $this->runSpanGamut($this->outdent($def));
2338:         }
2339: 
2340:         return "\n<dd>" . $def . "</dd>\n";
2341:     }
2342: 
2343: 
2344:     public function doFencedCodeBlocks($text) {
2345:     #
2346:     # Adding the fenced code block syntax to regular Markdown:
2347:     #
2348:     # ~~~
2349:     # Code block
2350:     # ~~~
2351:     #
2352:         $less_than_tab = $this->tab_width;
2353: 
2354:         $text = preg_replace_callback('{
2355:                 (?:\n|\A)
2356:                 # 1: Opening marker
2357:                 (
2358:                     ~{3,} # Marker: three tilde or more.
2359:                 )
2360:                 [ ]* \n # Whitespace and newline following marker.
2361: 
2362:                 # 2: Content
2363:                 (
2364:                     (?>
2365:                         (?!\1 [ ]* \n)  # Not a closing marker.
2366:                         .*\n+
2367:                     )+
2368:                 )
2369: 
2370:                 # Closing marker.
2371:                 \1 [ ]* \n
2372:             }xm',
2373:             array(&$this, '_doFencedCodeBlocks_callback'), $text);
2374: 
2375:         return $text;
2376:     }
2377:     public function _doFencedCodeBlocks_callback($matches) {
2378:         $codeblock = $matches[2];
2379:         $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
2380:         $codeblock = preg_replace_callback('/^\n+/',
2381:             array(&$this, '_doFencedCodeBlocks_newlines'), $codeblock);
2382:         $codeblock = "<pre><code>$codeblock</code></pre>";
2383:         return "\n\n".$this->hashBlock($codeblock)."\n\n";
2384:     }
2385:     public function _doFencedCodeBlocks_newlines($matches) {
2386:         return str_repeat("<br$this->empty_element_suffix",
2387:             strlen($matches[0]));
2388:     }
2389: 
2390: 
2391:     #
2392:     # Redefining emphasis markers so that emphasis by underscore does not
2393:     # work in the middle of a word.
2394:     #
2395:     public $em_relist = array(
2396:         ''  => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?=\S|$)(?![\.,:;]\s)',
2397:         '*' => '(?<=\S|^)(?<!\*)\*(?!\*)',
2398:         '_' => '(?<=\S|^)(?<!_)_(?![a-zA-Z0-9_])',
2399:         );
2400:     public $strong_relist = array(
2401:         ''   => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?=\S|$)(?![\.,:;]\s)',
2402:         '**' => '(?<=\S|^)(?<!\*)\*\*(?!\*)',
2403:         '__' => '(?<=\S|^)(?<!_)__(?![a-zA-Z0-9_])',
2404:         );
2405:     public $em_strong_relist = array(
2406:         ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?=\S|$)(?![\.,:;]\s)',
2407:         '***' => '(?<=\S|^)(?<!\*)\*\*\*(?!\*)',
2408:         '___' => '(?<=\S|^)(?<!_)___(?![a-zA-Z0-9_])',
2409:         );
2410: 
2411: 
2412:     public function formParagraphs($text) {
2413:     #
2414:     #   Params:
2415:     #       $text - string to process with html <p> tags
2416:     #
2417:         # Strip leading and trailing lines:
2418:         $text = preg_replace('/\A\n+|\n+\z/', '', $text);
2419:         
2420:         $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
2421: 
2422:         #
2423:         # Wrap <p> tags and unhashify HTML blocks
2424:         #
2425:         foreach ($grafs as $key => $value) {
2426:             $value = trim($this->runSpanGamut($value));
2427: 
2428:             # Check if this should be enclosed in a paragraph.
2429:             # Clean tag hashes & block tag hashes are left alone.
2430:             $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value);
2431: 
2432:             if ($is_p) {
2433:                 $value = "<p>$value</p>";
2434:             }
2435:             $grafs[$key] = $value;
2436:         }
2437: 
2438:         # Join grafs in one text, then unhash HTML tags.
2439:         $text = implode("\n\n", $grafs);
2440: 
2441:         # Finish by removing any tag hashes still present in $text.
2442:         $text = $this->unhash($text);
2443: 
2444:         return $text;
2445:     }
2446: 
2447: 
2448:     ### Footnotes
2449: 
2450:     public function stripFootnotes($text) {
2451:     #
2452:     # Strips link definitions from text, stores the URLs and titles in
2453:     # hash references.
2454:     #
2455:         $less_than_tab = $this->tab_width - 1;
2456: 
2457:         # Link defs are in the form: [^id]: url "optional title"
2458:         $text = preg_replace_callback('{
2459:             ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?:  # note_id = $1
2460:               [ ]*
2461:               \n?                   # maybe *one* newline
2462:             (                       # text = $2 (no blank lines allowed)
2463:                 (?:
2464:                     .+              # actual text
2465:                 |
2466:                     \n              # newlines but
2467:                     (?!\[\^.+?\]:\s)# negative lookahead for footnote marker.
2468:                     (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed
2469:                                     # by non-indented content
2470:                 )*
2471:             )
2472:             }xm',
2473:             array(&$this, '_stripFootnotes_callback'),
2474:             $text);
2475:         return $text;
2476:     }
2477:     public function _stripFootnotes_callback($matches) {
2478:         $note_id = $this->fn_id_prefix . $matches[1];
2479:         $this->footnotes[$note_id] = $this->outdent($matches[2]);
2480:         return ''; # String that will replace the block
2481:     }
2482: 
2483: 
2484:     public function doFootnotes($text) {
2485:     #
2486:     # Replace footnote references in $text [^id] with a special text-token
2487:     # which will be replaced by the actual footnote marker in appendFootnotes.
2488:     #
2489:         if (!$this->in_anchor) {
2490:             $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text);
2491:         }
2492:         return $text;
2493:     }
2494: 
2495:     
2496:     public function appendFootnotes($text) {
2497:     #
2498:     # Append footnote list to text.
2499:     #
2500:         $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 
2501:             array(&$this, '_appendFootnotes_callback'), $text);
2502:     
2503:         if (!empty($this->footnotes_ordered)) {
2504:             $text .= "\n\n";
2505:             $text .= "<div class=\"footnotes\">\n";
2506:             $text .= "<hr". $this->empty_element_suffix ."\n";
2507:             $text .= "<ol>\n\n";
2508:             
2509:             $attr = " rev=\"footnote\"";
2510:             if ($this->fn_backlink_class != "") {
2511:                 $class = $this->fn_backlink_class;
2512:                 $class = $this->encodeAttribute($class);
2513:                 $attr .= " class=\"$class\"";
2514:             }
2515:             if ($this->fn_backlink_title != "") {
2516:                 $title = $this->fn_backlink_title;
2517:                 $title = $this->encodeAttribute($title);
2518:                 $attr .= " title=\"$title\"";
2519:             }
2520:             $num = 0;
2521: 
2522:             while (!empty($this->footnotes_ordered)) {
2523:                 $footnote = reset($this->footnotes_ordered);
2524:                 $note_id = key($this->footnotes_ordered);
2525:                 unset($this->footnotes_ordered[$note_id]);
2526: 
2527:                 $footnote .= "\n"; # Need to append newline before parsing.
2528:                 $footnote = $this->runBlockGamut("$footnote\n");
2529:                 $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
2530:                     array(&$this, '_appendFootnotes_callback'), $footnote);
2531: 
2532:                 $attr = str_replace("%%", ++$num, $attr);
2533:                 $note_id = $this->encodeAttribute($note_id);
2534: 
2535:                 # Add backlink to last paragraph; create new paragraph if needed.
2536:                 $backlink = "<a href=\"#fnref:$note_id\"$attr>&#8617;</a>";
2537:                 if (preg_match('{</p>$}', $footnote)) {
2538:                     $footnote = substr($footnote, 0, -4) . "&#160;$backlink</p>";
2539:                 } else {
2540:                     $footnote .= "\n\n<p>$backlink</p>";
2541:                 }
2542: 
2543:                 $text .= "<li id=\"fn:$note_id\">\n";
2544:                 $text .= $footnote . "\n";
2545:                 $text .= "</li>\n\n";
2546:             }
2547: 
2548:             $text .= "</ol>\n";
2549:             $text .= "</div>";
2550:         }
2551:         return $text;
2552:     }
2553:     public function _appendFootnotes_callback($matches) {
2554:         $node_id = $this->fn_id_prefix . $matches[1];
2555: 
2556:         # Create footnote marker only if it has a corresponding footnote *and*
2557:         # the footnote hasn't been used by another marker.
2558:         if (isset($this->footnotes[$node_id])) {
2559:             # Transfert footnote content to the ordered list.
2560:             $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id];
2561:             unset($this->footnotes[$node_id]);
2562: 
2563:             $num = $this->footnote_counter++;
2564:             $attr = " rel=\"footnote\"";
2565:             if ($this->fn_link_class != "") {
2566:                 $class = $this->fn_link_class;
2567:                 $class = $this->encodeAttribute($class);
2568:                 $attr .= " class=\"$class\"";
2569:             }
2570:             if ($this->fn_link_title != "") {
2571:                 $title = $this->fn_link_title;
2572:                 $title = $this->encodeAttribute($title);
2573:                 $attr .= " title=\"$title\"";
2574:             }
2575: 
2576:             $attr = str_replace("%%", $num, $attr);
2577:             $node_id = $this->encodeAttribute($node_id);
2578: 
2579:             return
2580:                 "<sup id=\"fnref:$node_id\">".
2581:                 "<a href=\"#fn:$node_id\"$attr>$num</a>".
2582:                 "</sup>";
2583:         }
2584: 
2585:         return "[^".$matches[1]."]";
2586:     }
2587: 
2588: 
2589:     ### Abbreviations ###
2590: 
2591:     public function stripAbbreviations($text) {
2592:     #
2593:     # Strips abbreviations from text, stores titles in hash references.
2594:     #
2595:         $less_than_tab = $this->tab_width - 1;
2596: 
2597:         # Link defs are in the form: [id]*: url "optional title"
2598:         $text = preg_replace_callback('{
2599:             ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?:  # abbr_id = $1
2600:             (.*)                    # text = $2 (no blank lines allowed)
2601:             }xm',
2602:             array(&$this, '_stripAbbreviations_callback'),
2603:             $text);
2604:         return $text;
2605:     }
2606:     public function _stripAbbreviations_callback($matches) {
2607:         $abbr_word = $matches[1];
2608:         $abbr_desc = $matches[2];
2609:         if ($this->abbr_word_re)
2610:             $this->abbr_word_re .= '|';
2611:         $this->abbr_word_re .= preg_quote($abbr_word);
2612:         $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
2613:         return ''; # String that will replace the block
2614:     }
2615: 
2616: 
2617:     public function doAbbreviations($text) {
2618:     #
2619:     # Find defined abbreviations in text and wrap them in <abbr> elements.
2620:     #
2621:         if ($this->abbr_word_re) {
2622:             // cannot use the /x modifier because abbr_word_re may
2623:             // contain significant spaces:
2624:             $text = preg_replace_callback('{'.
2625:                 '(?<![\w\x1A])'.
2626:                 '(?:'.$this->abbr_word_re.')'.
2627:                 '(?![\w\x1A])'.
2628:                 '}',
2629:                 array(&$this, '_doAbbreviations_callback'), $text);
2630:         }
2631:         return $text;
2632:     }
2633:     public function _doAbbreviations_callback($matches) {
2634:         $abbr = $matches[0];
2635:         if (isset($this->abbr_desciptions[$abbr])) {
2636:             $desc = $this->abbr_desciptions[$abbr];
2637:             if (empty($desc)) {
2638:                 return $this->hashPart("<abbr>$abbr</abbr>");
2639:             } else {
2640:                 $desc = $this->encodeAttribute($desc);
2641:                 return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>");
2642:             }
2643:         } else {
2644:             return $matches[0];
2645:         }
2646:     }
2647: 
2648: }
2649:
Packages

Classes

Interfaces

Exceptions

Functions