示例#1
0
 /**
  * Adds a rule to the list of rules that decide in what kind of documents the crawler
  * should search for links in (regarding their content-type)
  *
  * By default the crawler ONLY searches for links in documents of type "text/html".
  * Use this method to add one or more other content-types the crawler should check for links.
  *
  * Example:
  * <code>
  * $crawler->addLinkSearchContentType("#text/css# i");
  * $crawler->addLinkSearchContentType("#text/xml# i");
  * </code>
  * These rules let the crawler search for links in HTML-, CSS- ans XML-documents.
  *
  * <b>Please note:</b> It is NOT recommended to let the crawler checkfor links in EVERY document-
  * type! This could slow down the crawling-process dramatically (e.g. if the crawler receives large
  * binary-files like images and tries to find links in them).
  *
  * @param string $regex Regular-expression defining the rule
  * @return bool         TRUE if the rule was successfully added
  */
 function addLinkSearchContentType($regex)
 {
     $this->initCrawler();
     $check = PHPCrawlerUtils::checkExpressionPattern($regex);
     // Check pattern
     if ($check == true) {
         $this->pageRequest->linksearch_content_types[] = trim($regex);
     }
     return $check;
 }
示例#2
0
 function addBasicAuthentication($expression, $username, $password)
 {
     $this->initCrawler();
     $check = PHPCrawlerUtils::checkExpressionPattern($expression);
     // Check pattern
     if ($check == true) {
         $c = count($this->pageRequest->basic_authentications);
         $this->pageRequest->basic_authentications[$c]["match"] = $expression;
         $this->pageRequest->basic_authentications[$c]["username"] = $username;
         $this->pageRequest->basic_authentications[$c]["password"] = $password;
         return true;
     } else {
         return false;
     }
 }