/** * @param string $ln * @param int $lineNumber */ protected function incorporateLine($ln, $lineNumber) { $allMatched = true; $offset = 0; $blank = false; $container = $this->doc; $oldTip = $this->tip; // Convert tabs to spaces: $ln = self::detabLine($ln); // For each containing block, try to parse the associated line start. // Bail out on failure: container will point to the last matching block. // Set all_matched to false if not all containers match. while ($container->hasChildren()) { /** @var BlockElement $lastChild */ $lastChild = $container->getChildren()->last(); if (!$lastChild->getIsOpen()) { break; } $container = $lastChild; $match = CommonMark_Util_RegexHelper::matchAt('/[^ ]/', $ln, $offset); if ($match === null) { $firstNonSpace = strlen($ln); $blank = true; } else { $firstNonSpace = $match; $blank = false; } $indent = $firstNonSpace - $offset; switch ($container->getType()) { case CommonMark_Element_BlockElement::TYPE_BLOCK_QUOTE: $matched = $indent <= 3 && isset($ln[$firstNonSpace]) && $ln[$firstNonSpace] === '>'; if ($matched) { $offset = $firstNonSpace + 1; if (isset($ln[$offset]) && $ln[$offset] === ' ') { $offset++; } } else { $allMatched = false; } break; case CommonMark_Element_BlockElement::TYPE_LIST_ITEM: $listData = $container->getExtra('list_data'); $increment = $listData['marker_offset'] + $listData['padding']; if ($indent >= $increment) { $offset += $increment; } elseif ($blank) { $offset = $firstNonSpace; } else { $allMatched = false; } break; case CommonMark_Element_BlockElement::TYPE_INDENTED_CODE: if ($indent >= self::CODE_INDENT) { $offset += self::CODE_INDENT; } elseif ($blank) { $offset = $firstNonSpace; } else { $allMatched = false; } break; case CommonMark_Element_BlockElement::TYPE_ATX_HEADER: case CommonMark_Element_BlockElement::TYPE_SETEXT_HEADER: case CommonMark_Element_BlockElement::TYPE_HORIZONTAL_RULE: // a header can never contain > 1 line, so fail to match: $allMatched = false; break; case CommonMark_Element_BlockElement::TYPE_FENCED_CODE: // skip optional spaces of fence offset $i = $container->getExtra('fence_offset'); while ($i > 0 && $ln[$offset] === ' ') { $offset++; $i--; } break; case CommonMark_Element_BlockElement::TYPE_HTML_BLOCK: if ($blank) { $allMatched = false; } break; case CommonMark_Element_BlockElement::TYPE_PARAGRAPH: if ($blank) { $container->setIsLastLineBlank(true); $allMatched = false; } break; default: // Nothing } if (!$allMatched) { $container = $container->getParent(); // back up to the last matching block break; } } $lastMatchedContainer = $container; // This function is used to finalize and close any unmatched // blocks. We aren't ready to do this now, because we might // have a lazy paragraph continuation, in which case we don't // want to close unmatched blocks. So we store this closure for // use later, when we have more information. $closeUnmatchedBlocksAlreadyDone = false; // Check to see if we've hit 2nd blank line; if so break out of list: if ($blank && $container->getIsLastLineBlank()) { $this->breakOutOfLists($container, $lineNumber); } // Unless last matched container is a code block, try new container starts, // adding children to the last matched container: while ($container->getType() != CommonMark_Element_BlockElement::TYPE_FENCED_CODE && $container->getType() != CommonMark_Element_BlockElement::TYPE_INDENTED_CODE && $container->getType() != CommonMark_Element_BlockElement::TYPE_HTML_BLOCK && CommonMark_Util_RegexHelper::matchAt('/^[ #`~*+_=<>0-9-]/', $ln, $offset) !== null) { $match = CommonMark_Util_RegexHelper::matchAt('/[^ ]/', $ln, $offset); if ($match === null) { $firstNonSpace = strlen($ln); $blank = true; } else { $firstNonSpace = $match; $blank = false; } $indent = $firstNonSpace - $offset; if ($indent >= self::CODE_INDENT) { // indented code if ($this->tip->getType() != CommonMark_Element_BlockElement::TYPE_PARAGRAPH && !$blank) { $offset += self::CODE_INDENT; $this->closeUnmatchedBlocks($this, $oldTip, $lastMatchedContainer, $lineNumber, $closeUnmatchedBlocksAlreadyDone); $container = $this->addChild(CommonMark_Element_BlockElement::TYPE_INDENTED_CODE, $lineNumber, $offset); } else { // ident > 4 in a lazy paragraph continuation break; } } elseif (!$blank && $ln[$firstNonSpace] === '>') { // blockquote $offset = $firstNonSpace + 1; // optional following space if (isset($ln[$offset]) && $ln[$offset] === ' ') { $offset++; } $this->closeUnmatchedBlocks($this, $oldTip, $lastMatchedContainer, $lineNumber, $closeUnmatchedBlocksAlreadyDone); $container = $this->addChild(CommonMark_Element_BlockElement::TYPE_BLOCK_QUOTE, $lineNumber, $offset); } elseif ($match = CommonMark_Util_RegexHelper::matchAll('/^#{1,6}(?: +|$)/', $ln, $firstNonSpace)) { // ATX header $offset = $firstNonSpace + strlen($match[0]); $this->closeUnmatchedBlocks($this, $oldTip, $lastMatchedContainer, $lineNumber, $closeUnmatchedBlocksAlreadyDone); $container = $this->addChild(CommonMark_Element_BlockElement::TYPE_ATX_HEADER, $lineNumber, $firstNonSpace); $container->setExtra('level', strlen(trim($match[0]))); // number of #s // remove trailing ###s $container->getStrings()->add(preg_replace('/(?:(\\\\#) *#*| *#+) *$/', '$1', substr($ln, $offset))); break; } elseif ($match = CommonMark_Util_RegexHelper::matchAll('/^`{3,}(?!.*`)|^~{3,}(?!.*~)/', $ln, $firstNonSpace)) { // fenced code block $fenceLength = strlen($match[0]); $this->closeUnmatchedBlocks($this, $oldTip, $lastMatchedContainer, $lineNumber, $closeUnmatchedBlocksAlreadyDone); $container = $this->addChild(CommonMark_Element_BlockElement::TYPE_FENCED_CODE, $lineNumber, $firstNonSpace); $container->setExtra('fence_length', $fenceLength); $container->setExtra('fence_char', $match[0][0]); $container->setExtra('fence_offset', $firstNonSpace - $offset); $offset = $firstNonSpace + $fenceLength; break; } elseif (CommonMark_Util_RegexHelper::matchAt(CommonMark_Util_RegexHelper::getInstance()->getHtmlBlockOpenRegex(), $ln, $firstNonSpace) !== null) { // html block $this->closeUnmatchedBlocks($this, $oldTip, $lastMatchedContainer, $lineNumber, $closeUnmatchedBlocksAlreadyDone); $container = $this->addChild(CommonMark_Element_BlockElement::TYPE_HTML_BLOCK, $lineNumber, $firstNonSpace); // note, we don't adjust offset because the tag is part of the text break; } elseif ($container->getType() === CommonMark_Element_BlockElement::TYPE_PARAGRAPH && $container->getStrings()->count() === 1 && ($match = CommonMark_Util_RegexHelper::matchAll('/^(?:=+|-+) *$/', $ln, $firstNonSpace))) { // setext header line $this->closeUnmatchedBlocks($this, $oldTip, $lastMatchedContainer, $lineNumber, $closeUnmatchedBlocksAlreadyDone); $container->setType(CommonMark_Element_BlockElement::TYPE_SETEXT_HEADER); $container->setExtra('level', $match[0][0] === '=' ? 1 : 2); $offset = strlen($ln); } elseif (CommonMark_Util_RegexHelper::matchAt(CommonMark_Util_RegexHelper::getInstance()->getHRuleRegex(), $ln, $firstNonSpace) !== null) { // hrule $this->closeUnmatchedBlocks($this, $oldTip, $lastMatchedContainer, $lineNumber, $closeUnmatchedBlocksAlreadyDone); $container = $this->addChild(CommonMark_Element_BlockElement::TYPE_HORIZONTAL_RULE, $lineNumber, $firstNonSpace); $offset = strlen($ln) - 1; break; } elseif ($data = $this->parseListMarker($ln, $firstNonSpace)) { // list item $this->closeUnmatchedBlocks($this, $oldTip, $lastMatchedContainer, $lineNumber, $closeUnmatchedBlocksAlreadyDone); $data['marker_offset'] = $indent; $offset = $firstNonSpace + $data['padding']; // add the list if needed if ($container->getType() !== CommonMark_Element_BlockElement::TYPE_LIST || !$this->listsMatch($container->getExtra('list_data'), $data)) { $container = $this->addChild(CommonMark_Element_BlockElement::TYPE_LIST, $lineNumber, $firstNonSpace); $container->setExtra('list_data', $data); } // add the list item $container = $this->addChild(CommonMark_Element_BlockElement::TYPE_LIST_ITEM, $lineNumber, $firstNonSpace); $container->setExtra('list_data', $data); } else { break; } if ($container->acceptsLines()) { // if it's a line container, it can't contain other containers break; } } // What remains at the offset is a text line. Add the text to the appropriate container. $match = CommonMark_Util_RegexHelper::matchAt('/[^ ]/', $ln, $offset); if ($match === null) { $firstNonSpace = strlen($ln); $blank = true; } else { $firstNonSpace = $match; $blank = false; } $indent = $firstNonSpace - $offset; // First check for a lazy paragraph continuation: if ($this->tip !== $lastMatchedContainer && !$blank && $this->tip->getType() == CommonMark_Element_BlockElement::TYPE_PARAGRAPH && $this->tip->getStrings()->count() > 0) { // lazy paragraph continuation $this->lastLineBlank = false; // TODO: really? (see line 1152) $this->addLine($ln, $offset); } else { // not a lazy continuation //finalize any blocks not matched $this->closeUnmatchedBlocks($this, $oldTip, $lastMatchedContainer, $lineNumber, $closeUnmatchedBlocksAlreadyDone); // Block quote lines are never blank as they start with > // and we don't count blanks in fenced code for purposes of tight/loose // lists or breaking out of lists. We also don't set last_line_blank // on an empty list item. $container->setIsLastLineBlank($blank && !($container->getType() == CommonMark_Element_BlockElement::TYPE_BLOCK_QUOTE || $container->getType() == CommonMark_Element_BlockElement::TYPE_FENCED_CODE || $container->getType() == CommonMark_Element_BlockElement::TYPE_LIST_ITEM && $container->getChildren()->count() === 0 && $container->getStartLine() == $lineNumber)); $cont = $container; while ($cont->getParent()) { $cont->getParent()->setIsLastLineBlank(false); $cont = $cont->getParent(); } switch ($container->getType()) { case CommonMark_Element_BlockElement::TYPE_INDENTED_CODE: case CommonMark_Element_BlockElement::TYPE_HTML_BLOCK: $this->addLine($ln, $offset); break; case CommonMark_Element_BlockElement::TYPE_FENCED_CODE: // check for closing code fence $test = $indent <= 3 && isset($ln[$firstNonSpace]) && $ln[$firstNonSpace] == $container->getExtra('fence_char') && ($match = CommonMark_Util_RegexHelper::matchAll('/^(?:`{3,}|~{3,})(?= *$)/', $ln, $firstNonSpace)); if ($test && strlen($match[0]) >= $container->getExtra('fence_length')) { // don't add closing fence to container; instead, close it: $this->finalize($container, $lineNumber); } else { $this->addLine($ln, $offset); } break; case CommonMark_Element_BlockElement::TYPE_ATX_HEADER: case CommonMark_Element_BlockElement::TYPE_SETEXT_HEADER: case CommonMark_Element_BlockElement::TYPE_HORIZONTAL_RULE: // nothing to do; we already added the contents. break; default: if ($container->acceptsLines()) { $this->addLine($ln, $firstNonSpace); } elseif ($blank) { // do nothing } elseif ($container->getType() != CommonMark_Element_BlockElement::TYPE_HORIZONTAL_RULE && $container->getType() != CommonMark_Element_BlockElement::TYPE_SETEXT_HEADER) { // create paragraph container for line $container = $this->addChild(CommonMark_Element_BlockElement::TYPE_PARAGRAPH, $lineNumber, $firstNonSpace); $this->addLine($ln, $firstNonSpace); } else { // TODO: throw exception? } } } }
/** * Parse a run of ordinary characters, or a single character with * a special meaning in markdown, as a plain string, adding to inlines. * * @param \ColinODell\CommonMark\Util\ArrayCollection $inlines * * @return int */ protected function parseString(CommonMark_Util_ArrayCollection $inlines) { if ($m = $this->match(CommonMark_Util_RegexHelper::getInstance()->getMainRegex())) { $inlines->add(CommonMark_Element_InlineCreator::createString($m)); return strlen($m); } return 0; }