/**
  * @link https://github.com/t1gor/Robots.txt-Parser-Class/issues/22
  */
 public function testAllowWildcard()
 {
     // init parser
     $parser = new RobotsTxtParser("\n\t\t\tUser-agent: *\n\t\t\tAllow: /\n\t\t");
     // asserts
     $this->assertFalse($parser->isDisallowed("/index"));
     $this->assertFalse($parser->isDisallowed("/"));
     $this->assertTrue($parser->isAllowed("/index"));
     $this->assertTrue($parser->isAllowed("/"));
 }
示例#2
0
 /**
  * Returns content of URL
  *
  * @param string $url Any valid URL
  * @param string $actionType "GET", "POST", any other...
  * @return null|\Symfony\Component\DomCrawler\Crawler
  */
 protected function getContentOfUrl($url, $actionType = 'GET', $listenRobotsDotTxt = true)
 {
     if (!$url) {
         return null;
     }
     // Check if url is allowed
     if ($listenRobotsDotTxt && $this->robotsTxtContent) {
         $parser = new \RobotsTxtParser($this->robotsTxtContent);
         // $parser->setUserAgent('VeiktDotComBot'); // ???
         if ($parser->isDisallowed($url)) {
             return null;
         }
     }
     $goutteClient = new GoutteClient();
     $guzzleClient = new GuzzleClient(array('curl' => array(CURLOPT_TIMEOUT => $this::CURLOPT_TIMEOUT, CURLOPT_CONNECTTIMEOUT => $this::CURLOPT_CONNECTTIMEOUT)));
     $goutteClient->setClient($guzzleClient);
     $result = $goutteClient->request($actionType, $url);
     if (!$result) {
         return null;
     }
     return $result;
 }
 /**
  * @covder RobotsTxtParser::checkRule
  * @link https://github.com/t1gor/Robots.txt-Parser-Class/issues/23
  */
 public function testEmptyRulesAllow()
 {
     $parser = new RobotsTxtParser('');
     $this->assertTrue($parser->isAllowed('/foo'));
     $this->assertFalse($parser->isDisallowed('/foo'));
 }