protected function classify(TextBlock $prev, TextBlock $curr, TextBlock $next) { $isContent = false; if ($curr->getLinkDensity() <= 0.333) { if ($prev->getLinkDensity() <= 0.555) { if ($curr->getWordCount() <= 16) { if ($next->getWordCount() <= 15) { if ($prev->getWordCount() > 4) { $isContent = true; } } else { $isContent = true; } } else { $isContent = true; } } else { if ($curr->getWordCount() <= 40) { if ($next->getWordCount() > 17) { $isContent = true; } } else { $isContent = true; } } } return $curr->setIsContent($isContent); }
protected function getFullTextWordCount(TextBlock $block, $minTextDensity = 9) { if ($block->getTextDensity() < $minTextDensity) { return 0; } else { return $block->getWordCount(); } }