Example #1
 *  crawl method
 *  Create the crawler class object and set the options for crawling
 * @param type $u URL
function crawl($u)
    $C = new MyCrawler();
    /* Only receive HTML pages */
    $C->addURLFilterRule("#(jpg|gif|png|pdf|jpeg|svg|css|js)\$# i");
    /* We don't want to crawl non HTML pages */
    $C->setTrafficLimit(2000 * 1024);
    /* Should We follow robots.txt */
Example #2
            echo "Content not received" . $lb;
        // Now you should do something with the content of the actual
        // received page or file ($DocInfo->source), we skip it in this example
        echo $lb;
// Now, create a instance of your class, define the behaviour
// of the crawler (see class-reference for more options and details)
// and start the crawling-process.
$crawler = new MyCrawler();
// URL to crawl
// Only receive content of files with content-type "text/html"
// Ignore links to pictures, dont even request pictures
$crawler->addURLFilterRule("#\\.(jpg|jpeg|gif|png|css|js)([?].*)?\$# i");
// Store and send cookie-data like a browser does
// Set the traffic-limit to 1 MB (in bytes,
// for testing we dont want to "suck" the whole site)
$crawler->setTrafficLimit(1000 * 1024);
// Thats enough, now here we go
// At the end, after the process is finished, we print a short
// report (see method getProcessReport() for more information)
$report = $crawler->getProcessReport();
Example #3
        // Final re-order
        $this->links = array_values($this->links);
        return $this->links;
// Now, create a instance of your class, define the behaviour
// of the crawler (see class-reference for more options and details)
// and start the crawling-process.
$crawler = new MyCrawler($_SESSION['crawler']['domain']);
$crawler->addURLFilterRule("#\\.(jpg|jpeg|gif|png)\$# i");
if ($_SESSION['crawler']['respect_robots_txt'] == true) {
    $crawler->obeyRobotsTxt(true, $_SESSION['crawler']['domain'] . '/robots.txt');
$crawler->addLinkSearchContentType("#text/html# i");
// no data on poage yet
if ($_SESSION['crawler']['auth'] == true) {
    $crawler->set_url_test_auth($_SESSION['crawler']['user'], $_SESSION['crawler']['pass']);
    $pattern = "/https?://" . str_replace('.', '\\.', $_SESSION['crawler']['silo']) . "/is";
    $crawler->addBasicAuthentication($pattern, $_SESSION['crawler']['user'], $_SESSION['crawler']['pass']);
// Thats enough, now here we go