/** * @param array $matches A set of results of the `transform` function * @return string */ protected function _email_callback($matches) { $address = $matches[1]; Kernel::addConfig('urls', $address); $block = Kernel::get('OutputFormatBag')->buildTag('link', $address, array('email' => $address)); return parent::hashPart($block); }
/** * Process the fenced code blocks * * @param array $matches Results form the `transform()` function * @return string */ protected function _callback($matches) { $language = $matches[2]; $codeblock = Helper::escapeCodeContent($matches[3]); $codeblock = preg_replace_callback('/^\\n+/', array($this, '_newlines'), $codeblock); $attributes = array(); if (!empty($language)) { $attributes['language'] = $language; } $codeblock = Kernel::get('OutputFormatBag')->buildTag('preformatted', $codeblock, $attributes); return "\n\n" . parent::hashBlock($codeblock) . "\n\n"; }
/** * @param string $text * @return string */ public function transform($text) { return preg_replace('{ ^[ ]{0,3} # Leading space ([-*_]) # $1: First marker (?> # Repeated marker group [ ]{0,2} # Zero, one, or two spaces. \\1 # Marker character ){2,} # Group repeated at least twice [ ]* # Tailing spaces $ # End of line. }mx', "\n" . parent::hashBlock(Kernel::get('OutputFormatBag')->buildTag('horizontal_rule')) . "\n", $text); }
/** * Build each blockquote block * * @param array $matches A set of results of the `transform()` function * @return string */ protected function _callback($matches) { $blockq = $matches[1]; $cite = isset($matches[2]) ? $matches[2] : null; // trim one level of quoting - trim whitespace-only lines $blockq = preg_replace('/^[ ]*>[ ]?(\\((.+?)\\))?|^[ ]+$/m', '', $blockq); $blockq = Lexer::runGamut('html_block_gamut', $blockq); # recurse $blockq = preg_replace('/^/m', " ", $blockq); // These leading spaces cause problem with <pre> content, // so we need to fix that: $blockq = preg_replace_callback('{(\\s*<pre>.+?</pre>)}sx', array($this, '_callback_spaces'), $blockq); $attributes = array(); if (!empty($cite)) { $attributes['cite'] = $cite; } $block = Kernel::get('OutputFormatBag')->buildTag('blockquote', $blockq, $attributes); return "\n" . parent::hashBlock($block) . "\n\n"; }
/** * Build each maths span * * @param string $texblock * @return string */ public function span($texblock) { $texblock = trim($texblock); $block = Kernel::get('OutputFormatBag')->buildTag('maths_span', $texblock, array()); return parent::hashPart($block); }
/** * Process ATX-style headers * * @param array $matches The results from the `transform()` function * @return string */ protected function _atx_callback($matches) { $level = strlen($matches[1]) + $this->_getRebasedHeaderLevel(); $domid = !empty($matches[3]) ? $matches[3] : Helper::header2Label($matches[2]); $domid = Kernel::get('DomId')->set($domid); $title = Lexer::runGamut('span_gamut', $matches[2]); Kernel::addConfig('menu', array('level' => $level, 'text' => parent::unhash($title)), $domid); $block = Kernel::get('OutputFormatBag')->buildTag('title', $title, array('level' => $level, 'id' => $domid)); $this->_setContentTitle($title); return "\n" . parent::hashBlock($block) . "\n\n"; }
/** * Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags. * * * $indent is the number of space to be ignored when checking for code * blocks. This is important because if we don't take the indent into * account, something like this (which looks right) won't work as expected: * * <div> * <div markdown="1"> * Hello World. <-- Is this a Markdown code block or text? * </div> <-- Is this a Markdown code block or a real tag? * <div> * * If you don't like this, just don't indent the tag on which * you apply the markdown="1" attribute. * * * If $enclosing_tag_re is not empty, stops at the first unmatched closing * tag with that name. Nested tags supported. * * * If $span is true, text inside must treated as span. So any double * newline will be replaced by a single newline so that it does not create * paragraphs. * * Returns an array of that form: ( processed text , remaining text ) * * @param string $text The text to be parsed * @param int $indent The indentation to use * @param string $enclosing_tag_re The closing tag to use * @param bool $span Are we in a span element (false by default) * @return array ( processed text , remaining text ) */ protected function _hashBlocks_inMarkdown($text, $indent = 0, $enclosing_tag_re = '', $span = false) { if ($text === '') { return array('', ''); } // Regex to check for the presense of newlines around a block tag. $newline_before_re = '/(?:^\\n?|\\n\\n)*$/'; $newline_after_re = '{ ^ # Start of text following the tag. (?>[ ]*<!--.*?-->)? # Optional comment. [ ]*\\n # Must be followed by newline. }xs'; // Regex to match any tag. $block_tag_re = '{ ( # $2: Capture hole tag. </? # Any opening or closing tag. (?> # Tag name. ' . $this->block_tags_re . ' | ' . $this->blocks_tags_re . ' | ' . $this->clean_tags_re . ' | (?!\\s)' . $enclosing_tag_re . ' ) (?: (?=[\\s"\'/a-zA-Z0-9]) # Allowed characters after tag name. (?> ".*?" | # Double quotes (can contain `>`) \'.*?\' | # Single quotes (can contain `>`) .+? # Anything but quotes and `>`. )*? )? > # End of tag. | <!-- .*? --> # HTML Comment | <\\?.*?\\?> | <%.*?%> # Processing instruction | <!\\[CDATA\\[.*?\\]\\]> # CData Block | # Code span marker `+ ' . (!$span ? ' # If not in span. | # Indented code block (?: ^[ ]*\\n | ^ | \\n[ ]*\\n ) [ ]{' . ($indent + 4) . '}[^\\n]* \\n (?> (?: [ ]{' . ($indent + 4) . '}[^\\n]* | [ ]* ) \\n )* | # Fenced code block marker (?> ^ | \\n ) [ ]{0,' . $indent . '}~~~+[ ]*\\n ' : '') . ' # End (if not is span). ) }xs'; $depth = 0; // Current depth inside the tag tree. $parsed = ""; // Parsed text that will be returned. // Loop through every tag until we find the closing tag of the parent // or loop until reaching the end of text if no parent tag specified. do { // Split the text using the first $tag_match pattern found. // Text before pattern will be first in the array, text after // pattern will be at the end, and between will be any catches made // by the pattern. $parts = preg_split($block_tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE); // If in Markdown span mode, add a empty-string span-level hash // after each newline to prevent triggering any block element. if ($span) { $void = parent::hashPart("", ':'); $newline = "{$void}\n"; $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void; } $parsed .= $parts[0]; // Text before current tag. // If end of $text has been reached. Stop loop. if (count($parts) < 3) { $text = ""; break; } $tag = $parts[1]; // Tag to handle. $text = $parts[2]; // Remaining text after current tag. $tag_re = preg_quote($tag); // For use in a regular expression. // Check for: Code span marker if ($tag[0] == "`") { // Find corresponding end marker. $tag_re = preg_quote($tag); // End marker found: pass text unchanged until marker. if (preg_match('{^(?>.+?|\\n(?!\\n))*?(?<!`)' . $tag_re . '(?!`)}', $text, $matches)) { $parsed .= $tag . $matches[0]; $text = substr($text, strlen($matches[0])); // Unmatched marker: just skip it. } else { $parsed .= $tag; } } elseif (preg_match('{^\\n?[ ]{0,' . ($indent + 3) . '}~}', $tag)) { // Fenced code block marker: find matching end marker. $tag_re = preg_quote(trim($tag)); // End marker found: pass text unchanged until marker. if (preg_match('{^(?>.*\\n)+?[ ]{0,' . $indent . '}' . $tag_re . '[ ]*\\n}', $text, $matches)) { $parsed .= $tag . $matches[0]; $text = substr($text, strlen($matches[0])); // No end marker: just skip it. } else { $parsed .= $tag; } } elseif ($tag[0] == "\n" || $tag[0] == " ") { // Indented code block: pass it unchanged, will be handled later. $parsed .= $tag; } elseif (preg_match('{^<(?:' . $this->block_tags_re . ')\\b}', $tag) || preg_match('{^<(?:' . $this->blocks_tags_re . ')\\b}', $tag) && preg_match($newline_before_re, $parsed) && preg_match($newline_after_re, $text)) { // Need to parse tag and following text using the HTML parser. list($block_text, $text) = self::_hashBlocks_inHTML($tag . $text, "hashBlock", true); // Make sure it stays outside of any paragraph by adding newlines. $parsed .= "\n\n{$block_text}\n\n"; } elseif (preg_match('{^<(?:' . $this->clean_tags_re . ')\\b}', $tag) || $tag[1] == '!' || $tag[1] == '?') { // Need to parse tag and following text using the HTML parser. // (don't check for markdown attribute) list($block_text, $text) = $this->_hashBlocks_inHTML($tag . $text, "hashClean", false); $parsed .= $block_text; } elseif ($enclosing_tag_re !== '' && preg_match('{^</?(?:' . $enclosing_tag_re . ')\\b}', $tag)) { // Increase/decrease nested tag count. if ($tag[1] == '/') { $depth--; } elseif ($tag[strlen($tag) - 2] != '/') { $depth++; } if ($depth < 0) { // Going out of parent element. Clean up and break so we // return to the calling function. $text = $tag . $text; break; } $parsed .= $tag; } else { $parsed .= $tag; } } while ($depth >= 0); return array($parsed, $text); }
/** * @param array $matches A set of results of the `transform` function * @return string */ protected function _inline_callback($matches) { $alt_text = $matches[2]; $url = $matches[3] == '' ? $matches[4] : $matches[3]; $title =& $matches[7]; $attributes = array(); $attributes['alt'] = Lexer::runGamut(GamutLoader::TOOL_ALIAS . ':EncodeAttribute', $alt_text); $attributes['src'] = Lexer::runGamut(GamutLoader::TOOL_ALIAS . ':EncodeAttribute', $url); if (!empty($title)) { $attributes['title'] = Lexer::runGamut(GamutLoader::TOOL_ALIAS . ':EncodeAttribute', $title); } $block = Kernel::get('OutputFormatBag')->buildTag('image', null, $attributes); return parent::hashPart($block); }
/** * Turn double returns into triple returns, so that we can make a * paragraph for the last item in a list, if necessary * * @param array $matches The results form the `transform()` method * @return string */ protected function _callback($matches) { // Re-usable patterns to match list item bullets and number markers: $result = trim(self::transformItems($matches[1])); $result = str_replace('<!--dt-->', '', $result); $result = Kernel::get('OutputFormatBag')->buildTag('definition_list', $result); return parent::hashBlock($result) . "\n\n"; }
/** * @param array $matches A set of results of the `transform()` function * @return string */ protected function _callback($matches) { return parent::hashPart(Kernel::get('OutputFormatBag')->buildTag('new_line') . "\n"); }
/** * @param array $matches A set of results of the `transform` function * @return string */ protected function _callback($matches) { $marker_any_re = '(?:' . self::$marker_ul_re . '|' . self::$marker_ol_re . ')'; $list = $matches[1] . "\n"; $list_type = preg_match('/' . self::$marker_ul_re . '/', $matches[4]) ? "unordered" : "ordered"; $marker_any_re = $list_type == "unordered" ? self::$marker_ul_re : self::$marker_ol_re; $list = self::transformItems($list, $marker_any_re); $block = Kernel::get('OutputFormatBag')->buildTag($list_type . '_list', $list); return "\n" . parent::hashBlock($block) . "\n\n"; }
/** * Form HTML tables: parses table contents * * @param array $matches * @return string */ protected function _callback($matches) { $attributes = array(); //self::doDebug($matches); // The head string may have a begin slash $caption = count($matches) > 3 ? $matches[1] : null; $head = count($matches) > 3 ? preg_replace('/^ *[|]/m', '', $matches[2]) : preg_replace('/^ *[|]/m', '', $matches[1]); $underline = count($matches) > 3 ? $matches[3] : $matches[2]; $content = count($matches) > 3 ? preg_replace('/^ *[|]/m', '', $matches[4]) : preg_replace('/^ *[|]/m', '', $matches[3]); // Remove any tailing pipes for each line. $underline = preg_replace('/[|] *$/m', '', $underline); $content = preg_replace('/[|] *$/m', '', $content); // Reading alignement from header underline. $separators = preg_split('/ *[|] */', $underline); foreach ($separators as $n => $s) { $attributes[$n] = array(); if (preg_match('/^ *-+: *$/', $s)) { $attributes[$n]['style'] = 'text-align:right;'; } elseif (preg_match('/^ *:-+: *$/', $s)) { $attributes[$n]['style'] = 'text-align:center;'; } elseif (preg_match('/^ *:-+ *$/', $s)) { $attributes[$n]['style'] = 'text-align:left;'; } } // Split content by row. $headers = explode("\n", trim($head, "\n")); $text = ''; if (!empty($caption)) { $this->table_id = Helper::header2Label($caption); $text .= preg_replace_callback('/\\[(.*)\\]/', array($this, '_doCaption'), Lexer::runGamut('span_gamut', $caption)); } $lines = ''; foreach ($headers as $_header) { $line = ''; // Parsing span elements, including code spans, character escapes, // and inline HTML tags, so that pipes inside those gets ignored. $_header = Lexer::runGamut('filter:Span', $_header); // Split row by cell. $_header = preg_replace('/[|] *$/m', '', $_header); $_headers = preg_split('/[|]/', $_header); $col_count = count($_headers); // Write column headers. // we first loop for colspans $headspans = array(); foreach ($_headers as $_i => $_cell) { if ($_cell == '') { if ($_i == 0) { $headspans[1] = 2; } else { if (isset($headspans[$_i - 1])) { $headspans[$_i - 1]++; } else { $headspans[$_i - 1] = 2; } } } } foreach ($_headers as $n => $__header) { if ($__header != '') { $cell_attributes = $attributes[$n]; if (isset($headspans[$n])) { $cell_attributes['colspan'] = $headspans[$n]; } $line .= Kernel::get('OutputFormatBag')->buildTag('table_cell_head', Lexer::runGamut('span_gamut', trim($__header)), $cell_attributes) . "\n"; } } $lines .= Kernel::get('OutputFormatBag')->buildTag('table_line', $line) . "\n"; } $text .= Kernel::get('OutputFormatBag')->buildTag('table_header', $lines); // Split content by row. $rows = explode("\n", trim($content, "\n")); $lines = ''; foreach ($rows as $row) { $line = ''; // Parsing span elements, including code spans, character escapes, // and inline HTML tags, so that pipes inside those gets ignored. $row = Lexer::runGamut('filter:Span', $row); // Split row by cell. $row_cells = preg_split('/ *[|] */', $row, $col_count); $row_cells = array_pad($row_cells, $col_count, ''); // we first loop for colspans $colspans = array(); foreach ($row_cells as $_i => $_cell) { if ($_cell == '') { if ($_i == 0) { $colspans[1] = 2; } else { if (isset($colspans[$_i - 1])) { $colspans[$_i - 1]++; } else { $colspans[$_i - 1] = 2; } } } } foreach ($row_cells as $n => $cell) { if ($cell != '') { $cell_attributes = $attributes[$n]; if (isset($colspans[$n])) { $cell_attributes['colspan'] = $colspans[$n]; } $line .= Kernel::get('OutputFormatBag')->buildTag('table_cell', Lexer::runGamut('span_gamut', trim($cell)), $cell_attributes) . "\n"; } } $lines .= Kernel::get('OutputFormatBag')->buildTag('table_line', $line) . "\n"; } $text .= Kernel::get('OutputFormatBag')->buildTag('table_body', $lines); $table = Kernel::get('OutputFormatBag')->buildTag('table', $text); return parent::hashBlock($table) . "\n"; }
/** * Create a code span markup for $code. Called from handleSpanToken. * * @param string $code * @return string */ public function span($code) { $codeblock = Kernel::get('OutputFormatBag')->buildTag('code', Helper::escapeCodeContent(trim($code))); return parent::hashPart($codeblock); }
/** * Handle $token provided by parseSpan by determining its nature and * returning the corresponding value that should replace it. * * @param string $token * @param string $str * @return string */ public function handleSpanToken($token, &$str) { switch ($token[0]) { case "\\": if ($token[1] == "(") { $texend = strpos($str, '\\)'); if ($texend) { $eqn = substr($str, 0, $texend); $str = substr($str, $texend + 2); $texspan = Lexer::runGamut('filter:Maths:span', $eqn); return parent::hashPart($texspan); } else { return $str; } } else { return parent::hashPart("&#" . ord($token[1]) . ";"); } case "`": // Search for end marker in remaining text. if (preg_match('/^(.*?[^`])' . preg_quote($token) . '(?!`)(.*)$/sm', $str, $matches)) { $str = $matches[2]; $codespan = Lexer::runGamut('filter:CodeBlock:span', $matches[1], true); return parent::hashPart($codespan); } return $token; // return as text since no ending marker found. // return as text since no ending marker found. default: return parent::hashPart($token); } }
/** * @param string $text * @return string */ public function transform($text) { $token_stack = array(''); $text_stack = array(''); $italic = ''; $strong = ''; $tree_char_em = false; while (1) { // Get prepared regular expression for seraching emphasis tokens in current context. $token_re = self::$em_strong_prepared["{$italic}{$strong}"]; // Each loop iteration search for the next emphasis token. // Each token is then passed to handleSpanToken. $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE); $text_stack[0] .= $parts[0]; $token =& $parts[1]; $text =& $parts[2]; if (empty($token)) { // Reached end of text span: empty stack without emitting any more emphasis. while ($token_stack[0]) { $text_stack[1] .= array_shift($token_stack); $text_stack[0] .= array_shift($text_stack); } break; } $token_len = strlen($token); if ($tree_char_em) { // Reached closing marker while inside a three-char emphasis. if ($token_len == 3) { // Three-char closing marker, close em and strong. array_shift($token_stack); $span = Lexer::runGamut('span_gamut', array_shift($text_stack)); $span = Kernel::get('OutputFormatBag')->buildTag('italic', $span); $span = Kernel::get('OutputFormatBag')->buildTag('bold', $span); $text_stack[0] .= parent::hashPart($span); $italic = ''; $strong = ''; } else { // Other closing marker: close one em or strong and // change current token state to match the other $token_stack[0] = str_repeat($token[0], 3 - $token_len); $tag = $token_len == 2 ? "bold" : "italic"; $span = Lexer::runGamut('span_gamut', $text_stack[0]); $span = Kernel::get('OutputFormatBag')->buildTag($tag, $span); $text_stack[0] = parent::hashPart($span); ${$tag} = ''; // $$tag stands for $italic or $strong } $tree_char_em = false; } elseif ($token_len == 3) { if ($italic) { // Reached closing marker for both em and strong. // Closing strong marker: for ($i = 0; $i < 2; ++$i) { $shifted_token = array_shift($token_stack); $tag = strlen($shifted_token) == 2 ? "bold" : "italic"; $span = Lexer::runGamut('span_gamut', array_shift($text_stack)); $span = Kernel::get('OutputFormatBag')->buildTag($tag, $span); $text_stack[0] .= parent::hashPart($span); ${$tag} = ''; // $$tag stands for $italic or $strong } } else { // Reached opening three-char emphasis marker. Push on token // stack; will be handled by the special condition above. $italic = $token[0]; $strong = "{$italic}{$italic}"; array_unshift($token_stack, $token); array_unshift($text_stack, ''); $tree_char_em = true; } } elseif ($token_len == 2) { if ($strong) { // Unwind any dangling emphasis marker: if (strlen($token_stack[0]) == 1) { $text_stack[1] .= array_shift($token_stack); $text_stack[0] .= array_shift($text_stack); } // Closing strong marker: array_shift($token_stack); $span = Lexer::runGamut('span_gamut', array_shift($text_stack)); $span = Kernel::get('OutputFormatBag')->buildTag('bold', $span); $text_stack[0] .= parent::hashPart($span); $strong = ''; } else { array_unshift($token_stack, $token); array_unshift($text_stack, ''); $strong = $token; } } else { // Here $token_len == 1 if ($italic) { if (strlen($token_stack[0]) == 1) { // Closing emphasis marker: array_shift($token_stack); $span = Lexer::runGamut('span_gamut', array_shift($text_stack)); $span = Kernel::get('OutputFormatBag')->buildTag('italic', $span); $text_stack[0] .= parent::hashPart($span); $italic = ''; } else { $text_stack[0] .= $token; } } else { array_unshift($token_stack, $token); array_unshift($text_stack, ''); $italic = $token; } } } return $text_stack[0]; }
/** * Process each abbreviation * * @param array $matches One set of results form the `transform()` function * @return string */ protected function _callback($matches) { $abbr = $matches[0]; $abbr_desciptions = Kernel::getConfig('abbr_desciptions'); if (isset($abbr_desciptions[$abbr])) { $attributes = array(); $desc = trim($abbr_desciptions[$abbr]); if (!empty($desc)) { $attributes['title'] = Lexer::runGamut(GamutLoader::TOOL_ALIAS . ':EncodeAttribute', $desc); } $abbr = Kernel::get('OutputFormatBag')->buildTag('abbreviation', $abbr, $attributes); return parent::hashBlock($abbr); } else { return $abbr; } }