/**
  * Adds a basic-authentication (username and password) to the list of authentications that will be send
  * with requests.
  *
  * @param string $url_regex Regular expression defining the URL(s) the authentication should be send to.
  * @param string $username The username
  * @param string $password The password
  *
  * @return bool
  */
 public function addBasicAuthentication($url_regex, $username, $password)
 {
     // Check regex
     $regex_okay = PHPCrawlerUtils::checkRegexPattern($url_regex);
     if ($regex_okay == true) {
         // Add authentication to basic_authentications-array
         $tmp = array();
         $tmp["url_regex"] = $url_regex;
         $tmp["username"] = $username;
         $tmp["password"] = $password;
         $this->basic_authentications[] = $tmp;
         return true;
     } else {
         return false;
     }
 }
Exemple #2
0
 /**
  * Adds a regular expression togehter with a priority-level to the list of rules that decide what links should be prefered.
  *
  * Links/URLs that match an expression with a high priority-level will be followed before links with a lower level.
  * All links that don't match with any of the given rules will get the level 0 (lowest level) automatically.
  *
  * The level can be any positive integer.
  *
  * <b>Example:</b>
  *
  * Telling the crawler to follow links that contain the string "forum" before links that contain ".gif" before all other found links.
  * <code>
  * $crawler->addLinkPriority("/forum/", 10);
  * $cralwer->addLinkPriority("/\.gif/", 5);
  * </code>
  *
  * @param string $regex Regular expression definig the rule
  * @param int $level The priority-level
  *
  * @return bool  TRUE if a valid preg-pattern is given as argument and was succsessfully added, otherwise it returns FALSE.
  * @section 10 Other settings
  */
 function addLinkPriority($regex, $level)
 {
     $check = PHPCrawlerUtils::checkRegexPattern($regex);
     // Check pattern
     if ($check == true && preg_match("/^[0-9]*\$/", $level)) {
         $c = count($this->link_priority_array);
         $this->link_priority_array[$c]["match"] = trim($regex);
         $this->link_priority_array[$c]["level"] = trim($level);
         return true;
     } else {
         return false;
     }
 }
 /**
  * Adds a rule to the list of rules that decide which URLs found on a page should be ignored by the crawler.
  */
 public function addURLFilterRule($regex)
 {
     $check = PHPCrawlerUtils::checkRegexPattern($regex);
     // Check pattern
     if ($check == true) {
         $this->url_filter_rules[] = trim($regex);
     }
     return $check;
 }
 /**
  * Adds a rule to the list of rules that decide what kind of documents should get
  * checked for links in (regarding their content-type)
  *
  * @param string $regex Regular-expression defining the rule
  * @return bool         TRUE if the rule was successfully added
  */
 public function addLinkSearchContentType($regex)
 {
     $check = PHPCrawlerUtils::checkRegexPattern($regex);
     // Check pattern
     if ($check == true) {
         $this->linksearch_content_types[] = trim($regex);
     }
     return $check;
 }