/** * Detect valid formatting elements within a paragraph * * @param \Candybanana\HtmlToCarbonJson\Element * @return string */ protected function detectFormatting(Element $element) { if ($element->hasChildren()) { $formats = []; foreach ($element->getChildren() as $node) { if (!$node->isText() && in_array($node->getTagName(), $this->config['allowedFormattingTags'])) { $type = $this->config['formattingTags'][$node->getTagName()]; if (empty($node->getValue())) { continue; } list($from, $to) = $this->getTextPosition($element->getValue(), $node->getValue()); $format = ['type' => $type, 'from' => $from, 'to' => $to]; // if <a> tag, add attributes if ($type == 'a') { $format['attrs'] = ['href' => $node->getAttribute('href')]; } $formats[] = $format; } } return !empty($formats) ? $formats : null; } }
/** * Extract the equivalent of 'Sections' in Carbon from the document * * @param \Candybanana\HtmlToCarbonJson\Element * @return boolean */ protected function extractSections(Element $element) { // recursively iterate until we get to the innermost child if ($element->hasChildren()) { foreach ($element->getChildren() as $child) { // we've found our section, return if ($this->extractSections($child)) { return; } } } // is this a block element with children? // 30/10/16: hacky fix here for LIs containing children. @todo: Proper fix = custom isBlock() function. if ($element->getTagName() !== 'li' && $element->isBlock() && $element->hasChildren()) { // does it have a parent? If so that's our section if ($parent = $element->getParent()) { $this->sectionsHtml[] = $parent; // remove it from the DOM // @todo: check if this is the root element - if so error out $parent->remove(); return true; } } // we're done with this node return false; }