Example #1
0
 /**
  * Adds any siblings that may have a decent score to this node
  *
  * @param Element $currentSibling
  * @param int $baselineScoreForSiblingParagraphs
  *
  * @return Element[]
  */
 private function getSiblingContent(Element $currentSibling, $baselineScoreForSiblingParagraphs)
 {
     $text = trim($currentSibling->text());
     if ($currentSibling->is('p, strong') && !empty($text)) {
         return [$currentSibling];
     }
     $results = [];
     $nodes = $currentSibling->find('p, strong');
     foreach ($nodes as $node) {
         $text = trim($node->text());
         if (!empty($text)) {
             $wordStats = $this->config()->getStopWords()->getStopwordCount($text);
             if ($baselineScoreForSiblingParagraphs * self::$SIBLING_BASE_LINE_SCORE < $wordStats->getStopWordCount()) {
                 $results[] = $node->document()->createElement('p', $text);
             }
         }
     }
     return $results;
 }
Example #2
0
 /**
  * Generate <p> element replacements for supplied elements child nodes as required.
  *
  * @param Element $node
  *
  * @return \DOMWrap\NodeList $nodesToReturn Replacement elements
  */
 private function getReplacementNodes(Element $node)
 {
     $nodesToReturn = $node->newNodeList();
     $nodesToRemove = $node->newNodeList();
     $replacementNodes = $node->newNodeList();
     $fnCompareSiblingNodes = function ($node) {
         if ($node->is(':not(a)') || $node->nodeType == XML_TEXT_NODE) {
             return true;
         }
     };
     foreach ($node->contents() as $child) {
         if ($child->is('p') && $replacementNodes->count()) {
             $nodesToReturn[] = $this->getFlushedBuffer($replacementNodes);
             $replacementNodes->fromArray([]);
             $nodesToReturn[] = $child;
         } else {
             if ($child->nodeType == XML_TEXT_NODE) {
                 $replaceText = $child->text();
                 if (!empty($replaceText)) {
                     // Get all previous sibling <a> nodes, the current text node, and all next sibling <a> nodes.
                     $siblings = $child->precedingUntil($fnCompareSiblingNodes, 'a')->merge([$child])->merge($child->followingUntil($fnCompareSiblingNodes, 'a'));
                     foreach ($siblings as $sibling) {
                         // Place current nodes textual contents in-between previous and next nodes.
                         if ($sibling->isSameNode($child)) {
                             $replacementNodes[] = new Text($replaceText);
                             // Grab the contents of any unprocessed <a> siblings and flag them for removal.
                         } else {
                             if ($sibling->getAttribute('grv-usedalready') != 'yes') {
                                 $sibling->setAttribute('grv-usedalready', 'yes');
                                 $replacementNodes[] = $sibling->cloneNode(true);
                                 $nodesToRemove[] = $sibling;
                             }
                         }
                     }
                 }
                 $nodesToRemove[] = $child;
             } else {
                 if ($replacementNodes->count()) {
                     $nodesToReturn[] = $this->getFlushedBuffer($replacementNodes);
                     $replacementNodes->fromArray([]);
                 }
                 $nodesToReturn[] = $child;
             }
         }
     }
     // Flush any remaining replacementNodes left over from text nodes.
     if ($replacementNodes->count()) {
         $nodesToReturn[] = $this->getFlushedBuffer($replacementNodes);
     }
     // Remove potential duplicate <a> tags.
     foreach ($nodesToReturn as $key => $return) {
         if ($nodesToRemove->exists($return)) {
             unset($nodesToReturn[$key]);
         }
     }
     $nodesToRemove->remove();
     return $nodesToReturn;
 }