/** * @return RegexHelper */ public static function getInstance() { if (self::$instance === null) { self::$instance = new CommonMark_Util_RegexHelper(); } return self::$instance; }
/** * @param string $ln * @param int $lineNumber */ protected function incorporateLine($ln, $lineNumber) { $allMatched = true; $offset = 0; $blank = false; $container = $this->doc; $oldTip = $this->tip; // Convert tabs to spaces: $ln = self::detabLine($ln); // For each containing block, try to parse the associated line start. // Bail out on failure: container will point to the last matching block. // Set all_matched to false if not all containers match. while ($container->hasChildren()) { /** @var BlockElement $lastChild */ $lastChild = $container->getChildren()->last(); if (!$lastChild->getIsOpen()) { break; } $container = $lastChild; $match = CommonMark_Util_RegexHelper::matchAt('/[^ ]/', $ln, $offset); if ($match === null) { $firstNonSpace = strlen($ln); $blank = true; } else { $firstNonSpace = $match; $blank = false; } $indent = $firstNonSpace - $offset; switch ($container->getType()) { case CommonMark_Element_BlockElement::TYPE_BLOCK_QUOTE: $matched = $indent <= 3 && isset($ln[$firstNonSpace]) && $ln[$firstNonSpace] === '>'; if ($matched) { $offset = $firstNonSpace + 1; if (isset($ln[$offset]) && $ln[$offset] === ' ') { $offset++; } } else { $allMatched = false; } break; case CommonMark_Element_BlockElement::TYPE_LIST_ITEM: $listData = $container->getExtra('list_data'); $increment = $listData['marker_offset'] + $listData['padding']; if ($indent >= $increment) { $offset += $increment; } elseif ($blank) { $offset = $firstNonSpace; } else { $allMatched = false; } break; case CommonMark_Element_BlockElement::TYPE_INDENTED_CODE: if ($indent >= self::CODE_INDENT) { $offset += self::CODE_INDENT; } elseif ($blank) { $offset = $firstNonSpace; } else { $allMatched = false; } break; case CommonMark_Element_BlockElement::TYPE_ATX_HEADER: case CommonMark_Element_BlockElement::TYPE_SETEXT_HEADER: case CommonMark_Element_BlockElement::TYPE_HORIZONTAL_RULE: // a header can never contain > 1 line, so fail to match: $allMatched = false; break; case CommonMark_Element_BlockElement::TYPE_FENCED_CODE: // skip optional spaces of fence offset $i = $container->getExtra('fence_offset'); while ($i > 0 && $ln[$offset] === ' ') { $offset++; $i--; } break; case CommonMark_Element_BlockElement::TYPE_HTML_BLOCK: if ($blank) { $allMatched = false; } break; case CommonMark_Element_BlockElement::TYPE_PARAGRAPH: if ($blank) { $container->setIsLastLineBlank(true); $allMatched = false; } break; default: // Nothing } if (!$allMatched) { $container = $container->getParent(); // back up to the last matching block break; } } $lastMatchedContainer = $container; // This function is used to finalize and close any unmatched // blocks. We aren't ready to do this now, because we might // have a lazy paragraph continuation, in which case we don't // want to close unmatched blocks. So we store this closure for // use later, when we have more information. $closeUnmatchedBlocksAlreadyDone = false; // Check to see if we've hit 2nd blank line; if so break out of list: if ($blank && $container->getIsLastLineBlank()) { $this->breakOutOfLists($container, $lineNumber); } // Unless last matched container is a code block, try new container starts, // adding children to the last matched container: while ($container->getType() != CommonMark_Element_BlockElement::TYPE_FENCED_CODE && $container->getType() != CommonMark_Element_BlockElement::TYPE_INDENTED_CODE && $container->getType() != CommonMark_Element_BlockElement::TYPE_HTML_BLOCK && CommonMark_Util_RegexHelper::matchAt('/^[ #`~*+_=<>0-9-]/', $ln, $offset) !== null) { $match = CommonMark_Util_RegexHelper::matchAt('/[^ ]/', $ln, $offset); if ($match === null) { $firstNonSpace = strlen($ln); $blank = true; } else { $firstNonSpace = $match; $blank = false; } $indent = $firstNonSpace - $offset; if ($indent >= self::CODE_INDENT) { // indented code if ($this->tip->getType() != CommonMark_Element_BlockElement::TYPE_PARAGRAPH && !$blank) { $offset += self::CODE_INDENT; $this->closeUnmatchedBlocks($this, $oldTip, $lastMatchedContainer, $lineNumber, $closeUnmatchedBlocksAlreadyDone); $container = $this->addChild(CommonMark_Element_BlockElement::TYPE_INDENTED_CODE, $lineNumber, $offset); } else { // ident > 4 in a lazy paragraph continuation break; } } elseif (!$blank && $ln[$firstNonSpace] === '>') { // blockquote $offset = $firstNonSpace + 1; // optional following space if (isset($ln[$offset]) && $ln[$offset] === ' ') { $offset++; } $this->closeUnmatchedBlocks($this, $oldTip, $lastMatchedContainer, $lineNumber, $closeUnmatchedBlocksAlreadyDone); $container = $this->addChild(CommonMark_Element_BlockElement::TYPE_BLOCK_QUOTE, $lineNumber, $offset); } elseif ($match = CommonMark_Util_RegexHelper::matchAll('/^#{1,6}(?: +|$)/', $ln, $firstNonSpace)) { // ATX header $offset = $firstNonSpace + strlen($match[0]); $this->closeUnmatchedBlocks($this, $oldTip, $lastMatchedContainer, $lineNumber, $closeUnmatchedBlocksAlreadyDone); $container = $this->addChild(CommonMark_Element_BlockElement::TYPE_ATX_HEADER, $lineNumber, $firstNonSpace); $container->setExtra('level', strlen(trim($match[0]))); // number of #s // remove trailing ###s $container->getStrings()->add(preg_replace('/(?:(\\\\#) *#*| *#+) *$/', '$1', substr($ln, $offset))); break; } elseif ($match = CommonMark_Util_RegexHelper::matchAll('/^`{3,}(?!.*`)|^~{3,}(?!.*~)/', $ln, $firstNonSpace)) { // fenced code block $fenceLength = strlen($match[0]); $this->closeUnmatchedBlocks($this, $oldTip, $lastMatchedContainer, $lineNumber, $closeUnmatchedBlocksAlreadyDone); $container = $this->addChild(CommonMark_Element_BlockElement::TYPE_FENCED_CODE, $lineNumber, $firstNonSpace); $container->setExtra('fence_length', $fenceLength); $container->setExtra('fence_char', $match[0][0]); $container->setExtra('fence_offset', $firstNonSpace - $offset); $offset = $firstNonSpace + $fenceLength; break; } elseif (CommonMark_Util_RegexHelper::matchAt(CommonMark_Util_RegexHelper::getInstance()->getHtmlBlockOpenRegex(), $ln, $firstNonSpace) !== null) { // html block $this->closeUnmatchedBlocks($this, $oldTip, $lastMatchedContainer, $lineNumber, $closeUnmatchedBlocksAlreadyDone); $container = $this->addChild(CommonMark_Element_BlockElement::TYPE_HTML_BLOCK, $lineNumber, $firstNonSpace); // note, we don't adjust offset because the tag is part of the text break; } elseif ($container->getType() === CommonMark_Element_BlockElement::TYPE_PARAGRAPH && $container->getStrings()->count() === 1 && ($match = CommonMark_Util_RegexHelper::matchAll('/^(?:=+|-+) *$/', $ln, $firstNonSpace))) { // setext header line $this->closeUnmatchedBlocks($this, $oldTip, $lastMatchedContainer, $lineNumber, $closeUnmatchedBlocksAlreadyDone); $container->setType(CommonMark_Element_BlockElement::TYPE_SETEXT_HEADER); $container->setExtra('level', $match[0][0] === '=' ? 1 : 2); $offset = strlen($ln); } elseif (CommonMark_Util_RegexHelper::matchAt(CommonMark_Util_RegexHelper::getInstance()->getHRuleRegex(), $ln, $firstNonSpace) !== null) { // hrule $this->closeUnmatchedBlocks($this, $oldTip, $lastMatchedContainer, $lineNumber, $closeUnmatchedBlocksAlreadyDone); $container = $this->addChild(CommonMark_Element_BlockElement::TYPE_HORIZONTAL_RULE, $lineNumber, $firstNonSpace); $offset = strlen($ln) - 1; break; } elseif ($data = $this->parseListMarker($ln, $firstNonSpace)) { // list item $this->closeUnmatchedBlocks($this, $oldTip, $lastMatchedContainer, $lineNumber, $closeUnmatchedBlocksAlreadyDone); $data['marker_offset'] = $indent; $offset = $firstNonSpace + $data['padding']; // add the list if needed if ($container->getType() !== CommonMark_Element_BlockElement::TYPE_LIST || !$this->listsMatch($container->getExtra('list_data'), $data)) { $container = $this->addChild(CommonMark_Element_BlockElement::TYPE_LIST, $lineNumber, $firstNonSpace); $container->setExtra('list_data', $data); } // add the list item $container = $this->addChild(CommonMark_Element_BlockElement::TYPE_LIST_ITEM, $lineNumber, $firstNonSpace); $container->setExtra('list_data', $data); } else { break; } if ($container->acceptsLines()) { // if it's a line container, it can't contain other containers break; } } // What remains at the offset is a text line. Add the text to the appropriate container. $match = CommonMark_Util_RegexHelper::matchAt('/[^ ]/', $ln, $offset); if ($match === null) { $firstNonSpace = strlen($ln); $blank = true; } else { $firstNonSpace = $match; $blank = false; } $indent = $firstNonSpace - $offset; // First check for a lazy paragraph continuation: if ($this->tip !== $lastMatchedContainer && !$blank && $this->tip->getType() == CommonMark_Element_BlockElement::TYPE_PARAGRAPH && $this->tip->getStrings()->count() > 0) { // lazy paragraph continuation $this->lastLineBlank = false; // TODO: really? (see line 1152) $this->addLine($ln, $offset); } else { // not a lazy continuation //finalize any blocks not matched $this->closeUnmatchedBlocks($this, $oldTip, $lastMatchedContainer, $lineNumber, $closeUnmatchedBlocksAlreadyDone); // Block quote lines are never blank as they start with > // and we don't count blanks in fenced code for purposes of tight/loose // lists or breaking out of lists. We also don't set last_line_blank // on an empty list item. $container->setIsLastLineBlank($blank && !($container->getType() == CommonMark_Element_BlockElement::TYPE_BLOCK_QUOTE || $container->getType() == CommonMark_Element_BlockElement::TYPE_FENCED_CODE || $container->getType() == CommonMark_Element_BlockElement::TYPE_LIST_ITEM && $container->getChildren()->count() === 0 && $container->getStartLine() == $lineNumber)); $cont = $container; while ($cont->getParent()) { $cont->getParent()->setIsLastLineBlank(false); $cont = $cont->getParent(); } switch ($container->getType()) { case CommonMark_Element_BlockElement::TYPE_INDENTED_CODE: case CommonMark_Element_BlockElement::TYPE_HTML_BLOCK: $this->addLine($ln, $offset); break; case CommonMark_Element_BlockElement::TYPE_FENCED_CODE: // check for closing code fence $test = $indent <= 3 && isset($ln[$firstNonSpace]) && $ln[$firstNonSpace] == $container->getExtra('fence_char') && ($match = CommonMark_Util_RegexHelper::matchAll('/^(?:`{3,}|~{3,})(?= *$)/', $ln, $firstNonSpace)); if ($test && strlen($match[0]) >= $container->getExtra('fence_length')) { // don't add closing fence to container; instead, close it: $this->finalize($container, $lineNumber); } else { $this->addLine($ln, $offset); } break; case CommonMark_Element_BlockElement::TYPE_ATX_HEADER: case CommonMark_Element_BlockElement::TYPE_SETEXT_HEADER: case CommonMark_Element_BlockElement::TYPE_HORIZONTAL_RULE: // nothing to do; we already added the contents. break; default: if ($container->acceptsLines()) { $this->addLine($ln, $firstNonSpace); } elseif ($blank) { // do nothing } elseif ($container->getType() != CommonMark_Element_BlockElement::TYPE_HORIZONTAL_RULE && $container->getType() != CommonMark_Element_BlockElement::TYPE_SETEXT_HEADER) { // create paragraph container for line $container = $this->addChild(CommonMark_Element_BlockElement::TYPE_PARAGRAPH, $lineNumber, $firstNonSpace); $this->addLine($ln, $firstNonSpace); } else { // TODO: throw exception? } } } }
/** * Finalize the block; mark it closed for modification * * @param int $lineNumber * @param InlineParser $inlineParser * @param ReferenceMap $refMap */ public function finalize($lineNumber, CommonMark_InlineParser $inlineParser, CommonMark_Reference_ReferenceMap $refMap) { if (!$this->open) { return; } $this->open = false; if ($lineNumber > $this->startLine) { $this->endLine = $lineNumber - 1; } else { $this->endLine = $lineNumber; } switch ($this->getType()) { case self::TYPE_PARAGRAPH: $this->stringContent = preg_replace('/^ */m', '', implode("\n", $this->strings->toArray())); // Try parsing the beginning as link reference definitions: while ($this->stringContent[0] === '[' && ($pos = $inlineParser->parseReference($this->stringContent, $refMap))) { $this->stringContent = substr($this->stringContent, $pos); if ($this->isStringContentBlank()) { //RegexHelper::getInstance()->isBlank($this->stringContent)) { $this->type = self::TYPE_REFERENCE_DEF; break; } } break; case self::TYPE_ATX_HEADER: case self::TYPE_SETEXT_HEADER: case self::TYPE_HTML_BLOCK: $this->stringContent = implode("\n", $this->strings->toArray()); break; case self::TYPE_INDENTED_CODE: $reversed = array_reverse($this->strings->toArray(), true); foreach ($reversed as $index => $line) { if ($line == '' || $line === "\n" || preg_match('/^(\\n *)$/', $line)) { unset($reversed[$index]); } else { break; } } $fixed = array_reverse($reversed); $tmp = implode("\n", $fixed); if (substr($tmp, -1) !== "\n") { $tmp .= "\n"; } $this->stringContent = $tmp; break; case self::TYPE_FENCED_CODE: // first line becomes info string $this->setExtra('info', CommonMark_Util_RegexHelper::unescape(trim($this->strings->first()))); if ($this->strings->count() == 1) { $this->stringContent = ''; } else { $this->stringContent = implode("\n", $this->strings->slice(1)) . "\n"; } break; case self::TYPE_LIST: $this->setExtra('tight', true); // tight by default $numItems = $this->children->count(); $i = 0; while ($i < $numItems) { /** @var BlockElement $item */ $item = $this->children->get($i); // check for non-final list item ending with blank line: $lastItem = $i == $numItems - 1; if ($item->endsWithBlankLine() && !$lastItem) { $this->setExtra('tight', false); break; } // Recurse into children of list item, to see if there are // spaces between any of them: $numSubItems = $item->getChildren()->count(); $j = 0; while ($j < $numSubItems) { $subItem = $item->getChildren()->get($j); $lastSubItem = $j == $numSubItems - 1; if ($subItem->endsWithBlankLine() && !($lastItem && $lastSubItem)) { $this->setExtra('tight', false); break; } $j++; } $i++; } break; default: break; } }
/** * Parse a run of ordinary characters, or a single character with * a special meaning in markdown, as a plain string, adding to inlines. * * @param \ColinODell\CommonMark\Util\ArrayCollection $inlines * * @return int */ protected function parseString(CommonMark_Util_ArrayCollection $inlines) { if ($m = $this->match(CommonMark_Util_RegexHelper::getInstance()->getMainRegex())) { $inlines->add(CommonMark_Element_InlineCreator::createString($m)); return strlen($m); } return 0; }