/** * Adds any siblings that may have a decent score to this node * * @param Element $currentSibling * @param int $baselineScoreForSiblingParagraphs * * @return Element[] */ private function getSiblingContent(Element $currentSibling, $baselineScoreForSiblingParagraphs) { $text = trim($currentSibling->text()); if ($currentSibling->is('p, strong') && !empty($text)) { return [$currentSibling]; } $results = []; $nodes = $currentSibling->find('p, strong'); foreach ($nodes as $node) { $text = trim($node->text()); if (!empty($text)) { $wordStats = $this->config()->getStopWords()->getStopwordCount($text); if ($baselineScoreForSiblingParagraphs * self::$SIBLING_BASE_LINE_SCORE < $wordStats->getStopWordCount()) { $results[] = $node->document()->createElement('p', $text); } } } return $results; }
/** * Generate <p> element replacements for supplied elements child nodes as required. * * @param Element $node * * @return \DOMWrap\NodeList $nodesToReturn Replacement elements */ private function getReplacementNodes(Element $node) { $nodesToReturn = $node->newNodeList(); $nodesToRemove = $node->newNodeList(); $replacementNodes = $node->newNodeList(); $fnCompareSiblingNodes = function ($node) { if ($node->is(':not(a)') || $node->nodeType == XML_TEXT_NODE) { return true; } }; foreach ($node->contents() as $child) { if ($child->is('p') && $replacementNodes->count()) { $nodesToReturn[] = $this->getFlushedBuffer($replacementNodes); $replacementNodes->fromArray([]); $nodesToReturn[] = $child; } else { if ($child->nodeType == XML_TEXT_NODE) { $replaceText = $child->text(); if (!empty($replaceText)) { // Get all previous sibling <a> nodes, the current text node, and all next sibling <a> nodes. $siblings = $child->precedingUntil($fnCompareSiblingNodes, 'a')->merge([$child])->merge($child->followingUntil($fnCompareSiblingNodes, 'a')); foreach ($siblings as $sibling) { // Place current nodes textual contents in-between previous and next nodes. if ($sibling->isSameNode($child)) { $replacementNodes[] = new Text($replaceText); // Grab the contents of any unprocessed <a> siblings and flag them for removal. } else { if ($sibling->getAttribute('grv-usedalready') != 'yes') { $sibling->setAttribute('grv-usedalready', 'yes'); $replacementNodes[] = $sibling->cloneNode(true); $nodesToRemove[] = $sibling; } } } } $nodesToRemove[] = $child; } else { if ($replacementNodes->count()) { $nodesToReturn[] = $this->getFlushedBuffer($replacementNodes); $replacementNodes->fromArray([]); } $nodesToReturn[] = $child; } } } // Flush any remaining replacementNodes left over from text nodes. if ($replacementNodes->count()) { $nodesToReturn[] = $this->getFlushedBuffer($replacementNodes); } // Remove potential duplicate <a> tags. foreach ($nodesToReturn as $key => $return) { if ($nodesToRemove->exists($return)) { unset($nodesToReturn[$key]); } } $nodesToRemove->remove(); return $nodesToReturn; }