} else { $lb = "<br />"; } // Print the URL and the HTTP-status-Code echo "Page requested: " . $DocInfo->url . " (" . $DocInfo->http_status_code . ")" . $lb; flush(); } } $crawler = new MyCrawler(); $crawler->setURL("www.php.net"); $crawler->addContentTypeReceiveRule("#text/html#"); $crawler->addURLFilterRule("#\\.(jpg|jpeg|gif|png)\$# i"); $crawler->setPageLimit(50); // Set the page-limit to 50 for testing // Important for resumable scripts/processes! $crawler->enableResumption(); // At the firts start of the script retreive the crawler-ID and store it // (in a temporary file in this example) if (!file_exists("/tmp/mycrawlerid_for_php.net.tmp")) { $crawler_ID = $crawler->getCrawlerId(); file_put_contents("/tmp/mycrawlerid_for_php.net.tmp", $crawler_ID); } else { $crawler_ID = file_get_contents("/tmp/mycrawlerid_for_php.net.tmp"); $crawler->resume($crawler_ID); } // Start crawling $crawler->goMultiProcessed(5); // Delete the stored crawler-ID after the process is finished completely and successfully. unlink("/tmp/mycrawlerid_for_php.net.tmp"); $report = $crawler->getProcessReport(); if (PHP_SAPI == "cli") {