示例#1
0
文件: lib.php 项目: Nixes/PageScraper
function checkNode($rootDOM, $rootXpath, $lastHighest)
{
    $paragraphCounts = countParagraphs($rootDOM, $rootXpath);
    // if more than 50% less paragraphs, send parentNode to be output
    if (max($paragraphCounts) < 0.5 * $lastHighest) {
        // WE HAVE FOUND THE ELEMENT CONTAINING CONTENT
        processContent($rootDOM);
    } else {
        $lastHighest = max($paragraphCounts);
        if (isset($GLOBALS["debug"]) && $GLOBALS["debug"] == 1) {
            echo "<p>From Above. The highest no of p were found in index no:" . (findHighestIndex($paragraphCounts) + 1) . ". With a total of " . $lastHighest . " paragraphs.</p>";
            echo "<p>Paragraph counts: ";
            foreach ($paragraphCounts as $pCount) {
                echo $pCount, ', ';
            }
            echo "</p><br></br>";
        }
        $index_highest_pcount = findHighestIndex($paragraphCounts);
        removeJunk($rootDOM->childNodes->item($index_highest_pcount));
        if ($rootDOM->childNodes->item($index_highest_pcount)->hasChildNodes()) {
            checkNode($rootDOM->childNodes->item($index_highest_pcount), $rootXpath, $lastHighest);
        }
    }
}
示例#2
0
 public function test_findHighestIndex()
 {
     $test_array = array(0, 2, 5, 7, 9, 10, 50);
     $correct_result = 6;
     $this->assertEquals($correct_result, findHighestIndex($test_array));
 }