/**
  * @return RegexHelper
  */
 public static function getInstance()
 {
     if (self::$instance === null) {
         self::$instance = new CommonMark_Util_RegexHelper();
     }
     return self::$instance;
 }
Exemple #2
0
 /**
  * @param string $ln
  * @param int    $lineNumber
  */
 protected function incorporateLine($ln, $lineNumber)
 {
     $allMatched = true;
     $offset = 0;
     $blank = false;
     $container = $this->doc;
     $oldTip = $this->tip;
     // Convert tabs to spaces:
     $ln = self::detabLine($ln);
     // For each containing block, try to parse the associated line start.
     // Bail out on failure: container will point to the last matching block.
     // Set all_matched to false if not all containers match.
     while ($container->hasChildren()) {
         /** @var BlockElement $lastChild */
         $lastChild = $container->getChildren()->last();
         if (!$lastChild->getIsOpen()) {
             break;
         }
         $container = $lastChild;
         $match = CommonMark_Util_RegexHelper::matchAt('/[^ ]/', $ln, $offset);
         if ($match === null) {
             $firstNonSpace = strlen($ln);
             $blank = true;
         } else {
             $firstNonSpace = $match;
             $blank = false;
         }
         $indent = $firstNonSpace - $offset;
         switch ($container->getType()) {
             case CommonMark_Element_BlockElement::TYPE_BLOCK_QUOTE:
                 $matched = $indent <= 3 && isset($ln[$firstNonSpace]) && $ln[$firstNonSpace] === '>';
                 if ($matched) {
                     $offset = $firstNonSpace + 1;
                     if (isset($ln[$offset]) && $ln[$offset] === ' ') {
                         $offset++;
                     }
                 } else {
                     $allMatched = false;
                 }
                 break;
             case CommonMark_Element_BlockElement::TYPE_LIST_ITEM:
                 $listData = $container->getExtra('list_data');
                 $increment = $listData['marker_offset'] + $listData['padding'];
                 if ($indent >= $increment) {
                     $offset += $increment;
                 } elseif ($blank) {
                     $offset = $firstNonSpace;
                 } else {
                     $allMatched = false;
                 }
                 break;
             case CommonMark_Element_BlockElement::TYPE_INDENTED_CODE:
                 if ($indent >= self::CODE_INDENT) {
                     $offset += self::CODE_INDENT;
                 } elseif ($blank) {
                     $offset = $firstNonSpace;
                 } else {
                     $allMatched = false;
                 }
                 break;
             case CommonMark_Element_BlockElement::TYPE_ATX_HEADER:
             case CommonMark_Element_BlockElement::TYPE_SETEXT_HEADER:
             case CommonMark_Element_BlockElement::TYPE_HORIZONTAL_RULE:
                 // a header can never contain > 1 line, so fail to match:
                 $allMatched = false;
                 break;
             case CommonMark_Element_BlockElement::TYPE_FENCED_CODE:
                 // skip optional spaces of fence offset
                 $i = $container->getExtra('fence_offset');
                 while ($i > 0 && $ln[$offset] === ' ') {
                     $offset++;
                     $i--;
                 }
                 break;
             case CommonMark_Element_BlockElement::TYPE_HTML_BLOCK:
                 if ($blank) {
                     $allMatched = false;
                 }
                 break;
             case CommonMark_Element_BlockElement::TYPE_PARAGRAPH:
                 if ($blank) {
                     $container->setIsLastLineBlank(true);
                     $allMatched = false;
                 }
                 break;
             default:
                 // Nothing
         }
         if (!$allMatched) {
             $container = $container->getParent();
             // back up to the last matching block
             break;
         }
     }
     $lastMatchedContainer = $container;
     // This function is used to finalize and close any unmatched
     // blocks.  We aren't ready to do this now, because we might
     // have a lazy paragraph continuation, in which case we don't
     // want to close unmatched blocks.  So we store this closure for
     // use later, when we have more information.
     $closeUnmatchedBlocksAlreadyDone = false;
     // Check to see if we've hit 2nd blank line; if so break out of list:
     if ($blank && $container->getIsLastLineBlank()) {
         $this->breakOutOfLists($container, $lineNumber);
     }
     // Unless last matched container is a code block, try new container starts,
     // adding children to the last matched container:
     while ($container->getType() != CommonMark_Element_BlockElement::TYPE_FENCED_CODE && $container->getType() != CommonMark_Element_BlockElement::TYPE_INDENTED_CODE && $container->getType() != CommonMark_Element_BlockElement::TYPE_HTML_BLOCK && CommonMark_Util_RegexHelper::matchAt('/^[ #`~*+_=<>0-9-]/', $ln, $offset) !== null) {
         $match = CommonMark_Util_RegexHelper::matchAt('/[^ ]/', $ln, $offset);
         if ($match === null) {
             $firstNonSpace = strlen($ln);
             $blank = true;
         } else {
             $firstNonSpace = $match;
             $blank = false;
         }
         $indent = $firstNonSpace - $offset;
         if ($indent >= self::CODE_INDENT) {
             // indented code
             if ($this->tip->getType() != CommonMark_Element_BlockElement::TYPE_PARAGRAPH && !$blank) {
                 $offset += self::CODE_INDENT;
                 $this->closeUnmatchedBlocks($this, $oldTip, $lastMatchedContainer, $lineNumber, $closeUnmatchedBlocksAlreadyDone);
                 $container = $this->addChild(CommonMark_Element_BlockElement::TYPE_INDENTED_CODE, $lineNumber, $offset);
             } else {
                 // ident > 4 in a lazy paragraph continuation
                 break;
             }
         } elseif (!$blank && $ln[$firstNonSpace] === '>') {
             // blockquote
             $offset = $firstNonSpace + 1;
             // optional following space
             if (isset($ln[$offset]) && $ln[$offset] === ' ') {
                 $offset++;
             }
             $this->closeUnmatchedBlocks($this, $oldTip, $lastMatchedContainer, $lineNumber, $closeUnmatchedBlocksAlreadyDone);
             $container = $this->addChild(CommonMark_Element_BlockElement::TYPE_BLOCK_QUOTE, $lineNumber, $offset);
         } elseif ($match = CommonMark_Util_RegexHelper::matchAll('/^#{1,6}(?: +|$)/', $ln, $firstNonSpace)) {
             // ATX header
             $offset = $firstNonSpace + strlen($match[0]);
             $this->closeUnmatchedBlocks($this, $oldTip, $lastMatchedContainer, $lineNumber, $closeUnmatchedBlocksAlreadyDone);
             $container = $this->addChild(CommonMark_Element_BlockElement::TYPE_ATX_HEADER, $lineNumber, $firstNonSpace);
             $container->setExtra('level', strlen(trim($match[0])));
             // number of #s
             // remove trailing ###s
             $container->getStrings()->add(preg_replace('/(?:(\\\\#) *#*| *#+) *$/', '$1', substr($ln, $offset)));
             break;
         } elseif ($match = CommonMark_Util_RegexHelper::matchAll('/^`{3,}(?!.*`)|^~{3,}(?!.*~)/', $ln, $firstNonSpace)) {
             // fenced code block
             $fenceLength = strlen($match[0]);
             $this->closeUnmatchedBlocks($this, $oldTip, $lastMatchedContainer, $lineNumber, $closeUnmatchedBlocksAlreadyDone);
             $container = $this->addChild(CommonMark_Element_BlockElement::TYPE_FENCED_CODE, $lineNumber, $firstNonSpace);
             $container->setExtra('fence_length', $fenceLength);
             $container->setExtra('fence_char', $match[0][0]);
             $container->setExtra('fence_offset', $firstNonSpace - $offset);
             $offset = $firstNonSpace + $fenceLength;
             break;
         } elseif (CommonMark_Util_RegexHelper::matchAt(CommonMark_Util_RegexHelper::getInstance()->getHtmlBlockOpenRegex(), $ln, $firstNonSpace) !== null) {
             // html block
             $this->closeUnmatchedBlocks($this, $oldTip, $lastMatchedContainer, $lineNumber, $closeUnmatchedBlocksAlreadyDone);
             $container = $this->addChild(CommonMark_Element_BlockElement::TYPE_HTML_BLOCK, $lineNumber, $firstNonSpace);
             // note, we don't adjust offset because the tag is part of the text
             break;
         } elseif ($container->getType() === CommonMark_Element_BlockElement::TYPE_PARAGRAPH && $container->getStrings()->count() === 1 && ($match = CommonMark_Util_RegexHelper::matchAll('/^(?:=+|-+) *$/', $ln, $firstNonSpace))) {
             // setext header line
             $this->closeUnmatchedBlocks($this, $oldTip, $lastMatchedContainer, $lineNumber, $closeUnmatchedBlocksAlreadyDone);
             $container->setType(CommonMark_Element_BlockElement::TYPE_SETEXT_HEADER);
             $container->setExtra('level', $match[0][0] === '=' ? 1 : 2);
             $offset = strlen($ln);
         } elseif (CommonMark_Util_RegexHelper::matchAt(CommonMark_Util_RegexHelper::getInstance()->getHRuleRegex(), $ln, $firstNonSpace) !== null) {
             // hrule
             $this->closeUnmatchedBlocks($this, $oldTip, $lastMatchedContainer, $lineNumber, $closeUnmatchedBlocksAlreadyDone);
             $container = $this->addChild(CommonMark_Element_BlockElement::TYPE_HORIZONTAL_RULE, $lineNumber, $firstNonSpace);
             $offset = strlen($ln) - 1;
             break;
         } elseif ($data = $this->parseListMarker($ln, $firstNonSpace)) {
             // list item
             $this->closeUnmatchedBlocks($this, $oldTip, $lastMatchedContainer, $lineNumber, $closeUnmatchedBlocksAlreadyDone);
             $data['marker_offset'] = $indent;
             $offset = $firstNonSpace + $data['padding'];
             // add the list if needed
             if ($container->getType() !== CommonMark_Element_BlockElement::TYPE_LIST || !$this->listsMatch($container->getExtra('list_data'), $data)) {
                 $container = $this->addChild(CommonMark_Element_BlockElement::TYPE_LIST, $lineNumber, $firstNonSpace);
                 $container->setExtra('list_data', $data);
             }
             // add the list item
             $container = $this->addChild(CommonMark_Element_BlockElement::TYPE_LIST_ITEM, $lineNumber, $firstNonSpace);
             $container->setExtra('list_data', $data);
         } else {
             break;
         }
         if ($container->acceptsLines()) {
             // if it's a line container, it can't contain other containers
             break;
         }
     }
     // What remains at the offset is a text line.  Add the text to the appropriate container.
     $match = CommonMark_Util_RegexHelper::matchAt('/[^ ]/', $ln, $offset);
     if ($match === null) {
         $firstNonSpace = strlen($ln);
         $blank = true;
     } else {
         $firstNonSpace = $match;
         $blank = false;
     }
     $indent = $firstNonSpace - $offset;
     // First check for a lazy paragraph continuation:
     if ($this->tip !== $lastMatchedContainer && !$blank && $this->tip->getType() == CommonMark_Element_BlockElement::TYPE_PARAGRAPH && $this->tip->getStrings()->count() > 0) {
         // lazy paragraph continuation
         $this->lastLineBlank = false;
         // TODO: really? (see line 1152)
         $this->addLine($ln, $offset);
     } else {
         // not a lazy continuation
         //finalize any blocks not matched
         $this->closeUnmatchedBlocks($this, $oldTip, $lastMatchedContainer, $lineNumber, $closeUnmatchedBlocksAlreadyDone);
         // Block quote lines are never blank as they start with >
         // and we don't count blanks in fenced code for purposes of tight/loose
         // lists or breaking out of lists.  We also don't set last_line_blank
         // on an empty list item.
         $container->setIsLastLineBlank($blank && !($container->getType() == CommonMark_Element_BlockElement::TYPE_BLOCK_QUOTE || $container->getType() == CommonMark_Element_BlockElement::TYPE_FENCED_CODE || $container->getType() == CommonMark_Element_BlockElement::TYPE_LIST_ITEM && $container->getChildren()->count() === 0 && $container->getStartLine() == $lineNumber));
         $cont = $container;
         while ($cont->getParent()) {
             $cont->getParent()->setIsLastLineBlank(false);
             $cont = $cont->getParent();
         }
         switch ($container->getType()) {
             case CommonMark_Element_BlockElement::TYPE_INDENTED_CODE:
             case CommonMark_Element_BlockElement::TYPE_HTML_BLOCK:
                 $this->addLine($ln, $offset);
                 break;
             case CommonMark_Element_BlockElement::TYPE_FENCED_CODE:
                 // check for closing code fence
                 $test = $indent <= 3 && isset($ln[$firstNonSpace]) && $ln[$firstNonSpace] == $container->getExtra('fence_char') && ($match = CommonMark_Util_RegexHelper::matchAll('/^(?:`{3,}|~{3,})(?= *$)/', $ln, $firstNonSpace));
                 if ($test && strlen($match[0]) >= $container->getExtra('fence_length')) {
                     // don't add closing fence to container; instead, close it:
                     $this->finalize($container, $lineNumber);
                 } else {
                     $this->addLine($ln, $offset);
                 }
                 break;
             case CommonMark_Element_BlockElement::TYPE_ATX_HEADER:
             case CommonMark_Element_BlockElement::TYPE_SETEXT_HEADER:
             case CommonMark_Element_BlockElement::TYPE_HORIZONTAL_RULE:
                 // nothing to do; we already added the contents.
                 break;
             default:
                 if ($container->acceptsLines()) {
                     $this->addLine($ln, $firstNonSpace);
                 } elseif ($blank) {
                     // do nothing
                 } elseif ($container->getType() != CommonMark_Element_BlockElement::TYPE_HORIZONTAL_RULE && $container->getType() != CommonMark_Element_BlockElement::TYPE_SETEXT_HEADER) {
                     // create paragraph container for line
                     $container = $this->addChild(CommonMark_Element_BlockElement::TYPE_PARAGRAPH, $lineNumber, $firstNonSpace);
                     $this->addLine($ln, $firstNonSpace);
                 } else {
                     // TODO: throw exception?
                 }
         }
     }
 }
 /**
  * Finalize the block; mark it closed for modification
  *
  * @param int          $lineNumber
  * @param InlineParser $inlineParser
  * @param ReferenceMap $refMap
  */
 public function finalize($lineNumber, CommonMark_InlineParser $inlineParser, CommonMark_Reference_ReferenceMap $refMap)
 {
     if (!$this->open) {
         return;
     }
     $this->open = false;
     if ($lineNumber > $this->startLine) {
         $this->endLine = $lineNumber - 1;
     } else {
         $this->endLine = $lineNumber;
     }
     switch ($this->getType()) {
         case self::TYPE_PARAGRAPH:
             $this->stringContent = preg_replace('/^  */m', '', implode("\n", $this->strings->toArray()));
             // Try parsing the beginning as link reference definitions:
             while ($this->stringContent[0] === '[' && ($pos = $inlineParser->parseReference($this->stringContent, $refMap))) {
                 $this->stringContent = substr($this->stringContent, $pos);
                 if ($this->isStringContentBlank()) {
                     //RegexHelper::getInstance()->isBlank($this->stringContent)) {
                     $this->type = self::TYPE_REFERENCE_DEF;
                     break;
                 }
             }
             break;
         case self::TYPE_ATX_HEADER:
         case self::TYPE_SETEXT_HEADER:
         case self::TYPE_HTML_BLOCK:
             $this->stringContent = implode("\n", $this->strings->toArray());
             break;
         case self::TYPE_INDENTED_CODE:
             $reversed = array_reverse($this->strings->toArray(), true);
             foreach ($reversed as $index => $line) {
                 if ($line == '' || $line === "\n" || preg_match('/^(\\n *)$/', $line)) {
                     unset($reversed[$index]);
                 } else {
                     break;
                 }
             }
             $fixed = array_reverse($reversed);
             $tmp = implode("\n", $fixed);
             if (substr($tmp, -1) !== "\n") {
                 $tmp .= "\n";
             }
             $this->stringContent = $tmp;
             break;
         case self::TYPE_FENCED_CODE:
             // first line becomes info string
             $this->setExtra('info', CommonMark_Util_RegexHelper::unescape(trim($this->strings->first())));
             if ($this->strings->count() == 1) {
                 $this->stringContent = '';
             } else {
                 $this->stringContent = implode("\n", $this->strings->slice(1)) . "\n";
             }
             break;
         case self::TYPE_LIST:
             $this->setExtra('tight', true);
             // tight by default
             $numItems = $this->children->count();
             $i = 0;
             while ($i < $numItems) {
                 /** @var BlockElement $item */
                 $item = $this->children->get($i);
                 // check for non-final list item ending with blank line:
                 $lastItem = $i == $numItems - 1;
                 if ($item->endsWithBlankLine() && !$lastItem) {
                     $this->setExtra('tight', false);
                     break;
                 }
                 // Recurse into children of list item, to see if there are
                 // spaces between any of them:
                 $numSubItems = $item->getChildren()->count();
                 $j = 0;
                 while ($j < $numSubItems) {
                     $subItem = $item->getChildren()->get($j);
                     $lastSubItem = $j == $numSubItems - 1;
                     if ($subItem->endsWithBlankLine() && !($lastItem && $lastSubItem)) {
                         $this->setExtra('tight', false);
                         break;
                     }
                     $j++;
                 }
                 $i++;
             }
             break;
         default:
             break;
     }
 }
 /**
  * Parse a run of ordinary characters, or a single character with
  * a special meaning in markdown, as a plain string, adding to inlines.
  *
  * @param \ColinODell\CommonMark\Util\ArrayCollection $inlines
  *
  * @return int
  */
 protected function parseString(CommonMark_Util_ArrayCollection $inlines)
 {
     if ($m = $this->match(CommonMark_Util_RegexHelper::getInstance()->getMainRegex())) {
         $inlines->add(CommonMark_Element_InlineCreator::createString($m));
         return strlen($m);
     }
     return 0;
 }