PHP MyCrawler::addBasicAuthentication Examples

Programming Language: PHP

Class/Type: MyCrawler

Method/Function: addBasicAuthentication

Examples at hotexamples.com: 1

PHP MyCrawler::addBasicAuthentication - 1 examples found. These are the top rated real world PHP examples of MyCrawler::addBasicAuthentication extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

setURL(23)

addURLFilterRule(5)

setTrafficLimit(3)

obeyRobotsTxt(3)

setPageLimit(3)

addContentTypeReceiveRule(2)

goMultiProcessed(2)

go(2)

obeyNoFollowTags(2)

enableAggressiveLinkSearch(2)

addURLFollowRule(2)

setFollowMode(2)

setCrawlingDepthLimit(1)

setUrlCacheType(1)

setLinkExtractionTags(1)

setUserAgentString(1)

setWorkingDirectory(1)

addBasicAuthentication(1)

resume(1)

processLinks(1)

getProcessReport(1)

getCrawlerId(1)

excludeLinkSearchDocumentSections(1)

enableResumption(1)

enableCookieHandling(1)

addReceiveContentType(1)

addLinkSearchContentType(1)

set_url_test_auth(1)

Example #1

Show file

File: crawl.php Project: JamesRichard-son/whyte-dwarf

$crawler->addURLFilterRule("#\\.(jpg|jpeg|gif|png)\$# i");
$crawler->enableCookieHandling(true);
if ($_SESSION['crawler']['respect_robots_txt'] == true) {
    $crawler->obeyRobotsTxt(true, $_SESSION['crawler']['domain'] . '/robots.txt');
    $crawler->obeyNoFollowTags(true);
}
$crawler->enableAggressiveLinkSearch(false);
$crawler->excludeLinkSearchDocumentSections(PHPCrawlerLinkSearchDocumentSections::ALL_SPECIAL_SECTIONS);
$crawler->addLinkSearchContentType("#text/html# i");
$crawler->setLinkExtractionTags(array('href'));
$crawler->setUserAgentString('Crawl_Scrape_Solr_Index/1.0)');
// no data on poage yet
if ($_SESSION['crawler']['auth'] == true) {
    $crawler->set_url_test_auth($_SESSION['crawler']['user'], $_SESSION['crawler']['pass']);
    $pattern = "/https?://" . str_replace('.', '\\.', $_SESSION['crawler']['silo']) . "/is";
    $crawler->addBasicAuthentication($pattern, $_SESSION['crawler']['user'], $_SESSION['crawler']['pass']);
}
// Thats enough, now here we go
$crawler->go();
// At the end, after the process is finished, we print a short
// report (see method getProcessReport() for more information)
$report = $crawler->getProcessReport();
$links = $crawler->processLinks($_SESSION['crawler']['domain'], $_SESSION['crawler']['respect_robots_txt']);
//$lb     = "<br />";
//echo "Summary:" . $lb;
//echo "Links followed: " . $report->links_followed . $lb;
//echo "Links extracted: " . count($links) . $lb;
//echo "Documents received: " . $report->files_received . $lb;
//echo "Bytes received: " . $report->bytes_received . " bytes." . $lb;
//echo "Spider Process runtime: " . round($report->process_runtime, 2) . " seconds." . $lb . $lb;
if (count($links) > 0) {