/** * Gets a template file content * * @param string $template_path * * @return mixed|string * * @throws \MarkdownExtended\Exception\FileSystemException if the template can not be found or is not readable */ public function getTemplate($template_path) { if (true === $template_path) { $template_path = Kernel::getConfig('output_format_options.' . Kernel::getConfig('output_format') . '.default_template'); if (empty($template_path)) { return Kernel::getConfig('template_options.inline_template'); } } if (!file_exists($template_path)) { $local_path = Kernel::getResourcePath($template_path, Kernel::RESOURCE_TEMPLATE); if (empty($local_path) || !file_exists($local_path)) { throw new FileSystemException(sprintf('Template "%s" not found', $template_path)); } $template_path = $local_path; } if (!$this->cache->isCached($template_path)) { if (!is_readable($template_path)) { throw new FileSystemException(sprintf('Template "%s" is not readable', $template_path)); } $tpl_content = Helper::readFile($template_path); $this->cache->setCache($template_path, $tpl_content); } else { $tpl_content = $this->cache->getCache($template_path); } return $tpl_content; }
/** * Process each inclusion, errors are written as comments * * @param array $matches One set of results form the `transform()` function * @return string The result of the inclusion parsed if so */ protected function _callback($matches) { $filename = $matches[1]; if (!file_exists($filename)) { $base_path = Kernel::getConfig('base_path'); if (!is_array($base_path)) { $base_path = array($base_path); } foreach ($base_path as $path) { $file = rtrim($path, DIRECTORY_SEPARATOR) . DIRECTORY_SEPARATOR . $filename; if (file_exists($file)) { $filename = $file; break; } } } $content_collection = Kernel::get('ContentCollection'); $index = $content_collection->key(); try { $parsed_content = Kernel::get('Parser')->transformSource($filename, false); } catch (\Exception $e) { $parsed_content = Kernel::get('OutputFormatBag')->buildTag('comment', "ERROR while parsing {$filename} : '{$e->getMessage()}'"); } Kernel::get('ContentCollection')->seek($index); return $parsed_content; }
/** * Form HTML tables. * * Find tables with leading pipe: * * | Header 1 | Header 2 * | -------- | -------- * | Cell 1 | Cell 2 * | Cell 3 | Cell 4 * * Or without: * * Header 1 | Header 2 * -------- | -------- * Cell 1 | Cell 2 * Cell 3 | Cell 4 * * @param string $text * @return string */ public function transform($text) { $less_than_tab = Kernel::getConfig('less_than_tab'); // Find tables with leading pipe. $text = preg_replace_callback(' { ^ # Start of a line ( # A caption between brackets (optional) [ ]{0,' . $less_than_tab . '} \\[.*?\\][ \\t]*\\n )? [ ]{0,' . $less_than_tab . '} # Allowed whitespace. ( (?> [ ]{0,' . $less_than_tab . '} # Allowed whitespace. [|] # Optional leading pipe (present) .* [|] .* \\n )* ) # $1: Header rows (at least one pipe) [ ]{0,' . $less_than_tab . '} # Allowed whitespace. [|] ([ ]*[-:]+[-| :]*) \\n # $2: Header underline ( # $3: Cells (?> [ ]{0,' . $less_than_tab . '} # Allowed whitespace. [|] .* \\n # Row content )* ) (?=\\n|\\Z) # Stop at final double newline. }xm', array($this, '_callback'), $text); // Find tables without leading pipe. $text = preg_replace_callback(' { ^ # Start of a line ( # A caption between brackets (optional) [ ]{0,' . $less_than_tab . '} \\[.*?\\][ \\t]*\\n )? [ ]{0,' . $less_than_tab . '} # Allowed whitespace. ( (?> [ ]{0,' . $less_than_tab . '} # Allowed whitespace. \\S .* [|] .* \\n )* ) # $1: Header rows (at least one pipe) ^[ ]{0,' . $less_than_tab . '} # Allowed whitespace at the beginning ([-:]+[ ]*[|][-| :]*) \\n # $2: Header underline ( # $3: Cells (?> [ ]{0,' . $less_than_tab . '} # Allowed whitespace. .* [|] .* \\n # Row content )* ) (?=\\n|\\Z) # Stop at final double newline. }xm', array($this, '_callback'), $text); return $text; }
/** * Gets a gamuts' array by name * * @param string $name * * @return null|array * * @throws \MarkdownExtended\Exception\UnexpectedValueException if `$name` seems malformed or can not be found */ public function getGamutStack($name) { if (!$this->isGamutStackName($name)) { throw new UnexpectedValueException(sprintf('A gamut stack name must follow a form like "%%_gamut", "%s" given', $name)); } $stack = Kernel::getConfig($name); if (empty($stack)) { throw new UnexpectedValueException(sprintf('Unknown gamut stack "%s"', $name)); } return $stack; }
/** * Process Markdown `<pre><code>` blocks. * * @param string $text * @return string */ public function transform($text) { return preg_replace_callback('{ (?:\\n\\n|\\A\\n?) ( # $1 = the code block -- one or more lines, starting with a space/tab (?> [ ]{' . Kernel::getConfig('tab_width') . '} # Lines must start with a tab or a tab-width of spaces .*\\n+ )+ ) ((?=^[ ]{0,' . Kernel::getConfig('tab_width') . '}\\S)|\\Z) # Lookahead for non-space at line-start, or end of doc }xm', array($this, '_callback'), $text); }
/** * Form HTML ordered (numbered) and unordered (bulleted) lists. * * @param string $text * @return string */ public function transform($text) { $markers_relist = array(self::$marker_ul_re => self::$marker_ol_re, self::$marker_ol_re => self::$marker_ul_re); foreach ($markers_relist as $marker_re => $other_marker_re) { // Re-usable pattern to match any entirel ul or ol list: $whole_list_re = ' ( # $1 = whole list ( # $2 ([ ]{0,' . Kernel::getConfig('less_than_tab') . '}) # $3 = number of spaces (' . $marker_re . ') # $4 = first list item marker [ ]+ ) (?s:.+?) ( # $5 \\z | \\n{2,} (?=\\S) (?! # Negative lookahead for another list item marker [ ]* ' . $marker_re . '[ ]+ ) | (?= # Lookahead for another kind of list \\n \\3 # Must have the same indentation ' . $other_marker_re . '[ ]+ ) ) ) '; // mx // We use a different prefix before nested lists than top-level lists. // See extended comment in `self::transformItems()`. if (self::$list_level) { $text = preg_replace_callback('{ ^ ' . $whole_list_re . ' }mx', array($this, '_callback'), $text); } else { $text = preg_replace_callback('{ (?:(?<=\\n)\\n|\\A\\n?) # Must eat the newline ' . $whole_list_re . ' }mx', array($this, '_callback'), $text); } } return $text; }
/** * Take the string $str and parse it into tokens, hashing embedded HTML, * escaped characters and handling code and maths spans. * * @param string $str * @return string */ public function transform($str) { $output = ''; $span_re = '{ ( \\\\' . Kernel::getConfig('escaped_characters_re') . ' | (?<![`\\\\]) `+ # code span marker | \\ \\( # inline math ' . (Kernel::getConfig('no_markup') === true ? '' : ' | <!-- .*? --> # comment | <\\?.*?\\?> | <%.*?%> # processing instruction | <[/!$]?[-a-zA-Z0-9:_]+ # regular tags (?> \\s (?>[^"\'>]+|"[^"]*"|\'[^\']*\')* )? > ') . ' ) }xs'; while (1) { // Each loop iteration search for either the next tag, the next // opening code span marker, or the next escaped character. // Each token is then passed to handleSpanToken. $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE); // Create token from text preceding tag. if ($parts[0] !== '') { $output .= $parts[0]; } // Check if we reach the end. if (isset($parts[1])) { $output .= self::handleSpanToken($parts[1], $parts[2]); $str = $parts[2]; } else { break; } } return $output; }
/** * Process tabs replacement * * @param array $matches A set of results of the `detab()` function * @return string The line rebuilt */ protected function _callback($matches) { $line = $matches[0]; $strlen = $this->utf8_strlen; // strlen function for UTF-8. // Split in blocks. $blocks = explode("\t", $line); // Add each blocks to the line. $line = $blocks[0]; unset($blocks[0]); // Do not add first block twice. foreach ($blocks as $block) { // Calculate amount of space, insert spaces, insert block. $amount = Kernel::getConfig('tab_width') - $strlen($line, 'UTF-8') % Kernel::getConfig('tab_width'); $line .= str_repeat(" ", $amount) . $block; } return $line; }
/** * Strips link definitions from text, stores the URLs and titles in hash references * * @param string $text * @return string * @todo Manage attributes (not working for now) */ public function strip($text) { return preg_replace_callback('{ ^[ ]{0,' . Kernel::getConfig('less_than_tab') . '}\\[(.+)\\][ ]?: # id = $1 [ ]* \\n? # maybe *one* newline [ ]* (?: <(.+?)> # url = $2 | (\\S+?) # url = $3 ) [ ]* \\n? # maybe one newline [ ]* (?: (?<=\\s) # lookbehind for whitespace ["(] (.*?) # title = $4 [")] [ ]* )? # title is optional [ ]* \\n? # maybe one newline [ ]* (?: # Attributes = $5 (?<=\\s) # lookbehind for whitespace ( ([ ]*\\n)? ((?:\\S+?=\\S+?)|(?:.+?=.+?)|(?:.+?=".*?")|(?:\\S+?=".*?")) ) [ ]* )? # attributes are optional (\\n+|\\Z) }xm', array($this, '_strip_callback'), $text); }
/** * Append footnote and glossary list to text. * * @param array $matches * @return string */ protected function _append_callback($matches) { $note_id = $matches[1]; $note_num = $note_ref = null; // Create footnote marker only if it has a corresponding footnote *and* // the footnote hasn't been used by another marker. $node_id = Kernel::getConfig('footnote_id_prefix') . $note_id; $footnotes = Kernel::getConfig('footnotes'); if (isset($footnotes[$node_id])) { $type_info = $this->getTypeInfo(self::FOOTNOTE_DEFAULT); // Transfer footnote content to the ordered list. self::$notes_ordered[$node_id] = $footnotes[$node_id]; $note_num = array_key_exists($node_id, self::$written_notes) ? self::$written_notes[$node_id] : self::$footnote_counter++; $note_ref = $node_id; } // Create glossary marker only if it has a corresponding note *and* // the glossary hasn't been used by another marker. $glossary_node_id = Kernel::getConfig('glossarynote_id_prefix') . $note_id; $glossaries = Kernel::getConfig('glossaries'); if (isset($glossaries[$glossary_node_id])) { $type_info = $this->getTypeInfo(self::FOOTNOTE_GLOSSARY); // Transfer footnote content to the ordered list. self::$notes_ordered[$glossary_node_id] = $glossaries[$glossary_node_id]; $note_num = array_key_exists($note_id, self::$written_notes) ? self::$written_notes[$note_id] : self::$footnote_counter++; $note_ref = $glossary_node_id; } // Create bibliography marker only if it has a corresponding note *and* // the glossary hasn't been used by another marker. $bibliography_node_id = Kernel::getConfig('bibliographynote_id_prefix') . $note_id; $bibliographies = Kernel::getConfig('bibliographies'); if (isset($bibliographies[$bibliography_node_id])) { $type_info = $this->getTypeInfo(self::FOOTNOTE_BIBLIOGRAPHY); // Transfer footnote content to the ordered list. self::$notes_ordered[$bibliography_node_id] = $bibliographies[$bibliography_node_id]; $note_num = array_key_exists($note_id, self::$written_notes) ? self::$written_notes[$note_id] : self::$footnote_counter++; $note_ref = $bibliography_node_id; } if (isset($type_info) && !empty($note_id) && !empty($note_num) && !empty($note_ref)) { $backlink_id = Kernel::get('DomId')->get($type_info['prefix'] . 'ref:' . $note_ref); $footlink_id = Kernel::get('DomId')->get($type_info['prefix'] . ':' . $note_ref); $attributes = array(); $attributes['rel'] = $type_info['name']; $attributes['href'] = '#' . $footlink_id; $attributes['counter'] = $note_num; $attributes['backlink_id'] = $backlink_id; return Kernel::get('OutputFormatBag')->buildTag($type_info['outputformat_methods']['link'], $note_num, $attributes); } return '[^' . $matches[1] . ']'; }
/** * @param array $matches A set of results of the `transform` function * @return string */ protected function _reference_callback($matches) { $whole_match = $matches[1]; $link_text = $matches[2]; $link_id =& $matches[3]; // for shortcut links like [this][] or [this] if (empty($link_id)) { $link_id = $link_text; } // lower-case and turn embedded newlines into spaces $link_id = preg_replace('{[ ]?\\n}', ' ', strtolower($link_id)); $urls = Kernel::getConfig('urls'); $titles = Kernel::getConfig('titles'); $predef_attributes = Kernel::getConfig('attributes'); if (isset($urls[$link_id])) { $attributes = array(); $attributes['href'] = Lexer::runGamut(GamutLoader::TOOL_ALIAS . ':EncodeAttribute', $urls[$link_id]); if (!empty($titles[$link_id])) { $attributes['title'] = Lexer::runGamut(GamutLoader::TOOL_ALIAS . ':EncodeAttribute', $titles[$link_id]); } if (!empty($predef_attributes[$link_id])) { $attributes = array_merge(Lexer::runGamut(GamutLoader::TOOL_ALIAS . ':ExtractAttributes', $predef_attributes[$link_id]), $attributes); } $block = Kernel::get('OutputFormatBag')->buildTag('link', Lexer::runGamut('span_gamut', $link_text), $attributes); $result = parent::hashPart($block); } else { $result = $whole_match; } return $result; }
/** * Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags. * * * Calls $hash_method to convert any blocks. * * Stops when the first opening tag closes. * * $md_attr indicate if the use of the `markdown="1"` attribute is allowed. * (it is not inside clean tags) * * Returns an array of that form: ( processed text , remaining text ) * * @param string $text The text to be parsed * @param string $hash_method The method to execute * @param string $md_attr The attributes to add * @return array ( processed text , remaining text ) */ protected function _hashBlocks_inHTML($text, $hash_method, $md_attr) { if ($text === '') { return array('', ''); } // Regex to match `markdown` attribute inside of a tag. $markdown_attr_re = ' { \\s* # Eat whitespace before the `markdown` attribute markdown \\s*=\\s* (?> (["\']) # $1: quote delimiter (.*?) # $2: attribute value \\1 # matching delimiter | ([^\\s>]*) # $3: unquoted attribute value ) () # $4: make $3 always defined (avoid warnings) }xs'; // Regex to match any tag. $tag_re = '{ ( # $2: Capture hole tag. </? # Any opening or closing tag. [\\w:$]+ # Tag name. (?: (?=[\\s"\'/a-zA-Z0-9]) # Allowed characters after tag name. (?> ".*?" | # Double quotes (can contain `>`) \'.*?\' | # Single quotes (can contain `>`) .+? # Anything but quotes and `>`. )*? )? > # End of tag. | <!-- .*? --> # HTML Comment | <\\?.*?\\?> | <%.*?%> # Processing instruction | <!\\[CDATA\\[.*?\\]\\]> # CData Block ) }xs'; $original_text = $text; // Save original text in case of faliure. $depth = 0; // Current depth inside the tag tree. $block_text = ""; // Temporary text holder for current text. $parsed = ""; // Parsed text that will be returned. // Get the name of the starting tag. // (This pattern makes $base_tag_name_re safe without quoting.) $base_tag_name_re = ''; if (preg_match('/^<([\\w:$]*)\\b/', $text, $matches)) { $base_tag_name_re = $matches[1]; } // Loop through every tag until we find the corresponding closing tag. do { // Split the text using the first $tag_match pattern found. // Text before pattern will be first in the array, text after // pattern will be at the end, and between will be any catches made // by the pattern. $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE); if (count($parts) < 3) { // End of $text reached with unbalenced tag(s). // In that case, we return original text unchanged and pass the // first character as filtered to prevent an infinite loop in the // parent function. return array($original_text[0], substr($original_text, 1)); } $block_text .= $parts[0]; // Text before current tag. $tag = $parts[1]; // Tag to handle. $text = $parts[2]; // Remaining text after current tag. // Check for: Auto-close tag (like <hr/>) Comments and Processing Instructions. if (preg_match('{^</?(?:' . $this->auto_close_tags_re . ')\\b}', $tag) || $tag[1] == '!' || $tag[1] == '?') { // Just add the tag to the block as if it was text. $block_text .= $tag; } else { // Increase/decrease nested tag count. Only do so if // the tag's name match base tag's. if (preg_match('{^</?' . $base_tag_name_re . '\\b}', $tag)) { if ($tag[1] == '/') { $depth--; } elseif ($tag[strlen($tag) - 2] != '/') { $depth++; } } // Check for `markdown="1"` attribute and handle it. if ($md_attr && preg_match($markdown_attr_re, $tag, $attr_m) && preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3])) { // Remove `markdown` attribute from opening tag. $tag = preg_replace($markdown_attr_re, '', $tag); // Check if text inside this tag must be parsed in span mode. $mode = $attr_m[2] . $attr_m[3]; $span_mode = $mode == 'span' || $mode != 'block' && preg_match('{^<(?:' . $this->contain_span_tags_re . ')\\b}', $tag); // Calculate indent before tag. if (preg_match('/(?:^|\\n)( *?)(?! ).*?$/', $block_text, $matches)) { /* @var callable $strlen */ $strlen = Kernel::getConfig('utf8_strlen'); $indent = $strlen($matches[1], 'UTF-8'); } else { $indent = 0; } // End preceding block with this tag. $block_text .= $tag; $parsed .= $this->{$hash_method}($block_text); // Get enclosing tag name for the ParseMarkdown function. // (This pattern makes $tag_name_re safe without quoting.) preg_match('/^<([\\w:$]*)\\b/', $tag, $matches); $tag_name_re = $matches[1]; // Parse the content using the HTML-in-Markdown parser. list($block_text, $text) = self::_hashBlocks_inMarkdown($text, $indent, $tag_name_re, $span_mode); // Outdent markdown text. if ($indent > 0) { $block_text = preg_replace("/^[ ]{1,{$indent}}/m", "", $block_text); } // Append tag content to parsed text. if (!$span_mode) { $parsed .= "\n\n{$block_text}\n\n"; } else { $parsed .= "{$block_text}"; } // Start over a new block. $block_text = ""; } else { $block_text .= $tag; } } } while ($depth > 0); // Hash last block text that wasn't processed inside the loop. $parsed .= $this->{$hash_method}($block_text); return array($parsed, $text); }
/** * Get the configuration 'empty_element_suffix' */ public function __construct() { $this->config = Kernel::getConfig('output_format_options.html'); $this->empty_element_suffix = $this->getConfig('html_empty_element_suffix'); }
/** * @param array $matches A set of results of the `transform` function * @return string */ protected function _reference_callback($matches) { $whole_match = $matches[1]; $alt_text = $matches[2]; $link_id = strtolower($matches[3]); if ($link_id == "") { $link_id = strtolower($alt_text); // for shortcut links like ![this][]. } $urls = Kernel::getConfig('urls'); $titles = Kernel::getConfig('titles'); $predef_attributes = Kernel::getConfig('attributes'); $alt_text = Lexer::runGamut(GamutLoader::TOOL_ALIAS . ':EncodeAttribute', $alt_text); if (isset($urls[$link_id])) { $attributes = array(); $attributes['alt'] = $alt_text; $attributes['id'] = Helper::header2Label($link_id); $attributes['src'] = Lexer::runGamut(GamutLoader::TOOL_ALIAS . ':EncodeAttribute', $urls[$link_id]); if (!empty($titles[$link_id])) { $attributes['title'] = Lexer::runGamut(GamutLoader::TOOL_ALIAS . ':EncodeAttribute', $titles[$link_id]); } if (!empty($predef_attributes[$link_id])) { $attributes = array_merge(Lexer::runGamut(GamutLoader::TOOL_ALIAS . ':ExtractAttributes', $predef_attributes[$link_id]), $attributes); } $block = Kernel::get('OutputFormatBag')->buildTag('image', null, $attributes); $result = parent::hashPart($block); } else { // If there's no such link ID, leave intact $result = $whole_match; } return $result; }
/** * Process the contents of a single definition list, splitting it * into individual term and definition list items. * * @param string $list_str The result string form the `_callback()` function * @return string */ public function transformItems($list_str) { $less_than_tab = Kernel::getConfig('less_than_tab'); // trim trailing blank lines: $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str); // Process definition terms. $list_str = preg_replace_callback('{ (?>\\A\\n?|\\n\\n+) # leading line ( # definition term = $1 [ ]{0,' . $less_than_tab . '} # leading whitespace (?![:][ ]|[ ]) # negative lookahead for a definition mark (colon) or more whitespace. (?> \\S.* \\n)+? # actual term (not whitespace). ) (?=\\n?[ ]{0,3}:[ ]) # lookahead for following line feed with a definition mark. }xm', array($this, '_item_callback_dt'), $list_str); // Process actual definitions. $list_str = preg_replace_callback('{ \\n(\\n+)? # leading line = $1 ( # marker space = $2 [ ]{0,' . $less_than_tab . '} # whitespace before colon [:][ ]+ # definition mark (colon) ) ((?s:.+?)) # definition text = $3 (?= \\n+ # stop at next definition mark, (?: # next term or end of text [ ]{0,' . $less_than_tab . '} [:][ ] | <!--dt--> | \\z ) ) }xm', array($this, '_item_callback_dd'), $list_str); return $list_str; }
/** * Gets the metadata list as string * * @param array $metadata * @param \MarkdownExtended\API\ContentInterface $content * * @return string */ public function getMetadataToString(array $metadata, ContentInterface $content) { $specials = Kernel::getConfig('special_metadata'); $data = array(); foreach ($metadata as $var => $val) { if (!in_array($var, $specials)) { $data[] = $this->buildTag('meta_data', null, array('name' => $var, 'content' => $val)); } } return implode(PHP_EOL, $data); }
/** * Build meta data strings */ public function append($text) { $metadata = Kernel::getConfig('metadata'); if (!empty($metadata)) { foreach ($metadata as $meta_name => $meta_value) { if (!empty($meta_name) && is_string($meta_name)) { if (in_array($meta_name, $this->special_metadata)) { Kernel::setConfig($meta_name, $meta_value); } elseif ($meta_name == 'title') { Kernel::get(Kernel::TYPE_CONTENT)->setTitle($meta_value); } } } } return $text; }
private function _clearHashes() { Kernel::setConfig('html_hashes', array()); Kernel::setConfig('cross_references', array()); Kernel::setConfig('urls', Kernel::getConfig('predefined_urls', array())); Kernel::setConfig('titles', Kernel::getConfig('predefined_titles', array())); Kernel::setConfig('attributes', Kernel::getConfig('predefined_attributes', array())); Kernel::setConfig('predefined_abbr', Kernel::getConfig('predefined_abbr', array())); }
/** * Remove one level of line-leading tabs or spaces * * @param string $text The text to be parsed * * @return string The text parsed */ public function Outdent($text) { return preg_replace('/^(\\t|[ ]{1,' . Kernel::getConfig('tab_width') . '})/m', '', $text); }
/** * Rebase a header level according to the `baseheaderlevel` config value */ protected function _getRebasedHeaderLevel() { $base_level = Kernel::getConfig('baseheaderlevel'); return !empty($base_level) ? $base_level - 1 : 0; }
/** * Strips abbreviations from text, stores titles in hash references. * * @param array $matches Results from the `strip()` function * @return string The replacement text */ protected function _strip_callback($matches) { Kernel::addConfig('abbr_word_re', (Kernel::getConfig('abbr_word_re') ? '|' : '') . preg_quote($matches[1])); Kernel::addConfig('abbr_desciptions', array($matches[1] => trim($matches[2]))); return ''; }