/** * @link https://github.com/t1gor/Robots.txt-Parser-Class/issues/22 */ public function testAllowWildcard() { // init parser $parser = new RobotsTxtParser("\n\t\t\tUser-agent: *\n\t\t\tAllow: /\n\t\t"); // asserts $this->assertFalse($parser->isDisallowed("/index")); $this->assertFalse($parser->isDisallowed("/")); $this->assertTrue($parser->isAllowed("/index")); $this->assertTrue($parser->isAllowed("/")); }
/** * Returns content of URL * * @param string $url Any valid URL * @param string $actionType "GET", "POST", any other... * @return null|\Symfony\Component\DomCrawler\Crawler */ protected function getContentOfUrl($url, $actionType = 'GET', $listenRobotsDotTxt = true) { if (!$url) { return null; } // Check if url is allowed if ($listenRobotsDotTxt && $this->robotsTxtContent) { $parser = new \RobotsTxtParser($this->robotsTxtContent); // $parser->setUserAgent('VeiktDotComBot'); // ??? if ($parser->isDisallowed($url)) { return null; } } $goutteClient = new GoutteClient(); $guzzleClient = new GuzzleClient(array('curl' => array(CURLOPT_TIMEOUT => $this::CURLOPT_TIMEOUT, CURLOPT_CONNECTTIMEOUT => $this::CURLOPT_CONNECTTIMEOUT))); $goutteClient->setClient($guzzleClient); $result = $goutteClient->request($actionType, $url); if (!$result) { return null; } return $result; }
/** * @covder RobotsTxtParser::checkRule * @link https://github.com/t1gor/Robots.txt-Parser-Class/issues/23 */ public function testEmptyRulesAllow() { $parser = new RobotsTxtParser(''); $this->assertTrue($parser->isAllowed('/foo')); $this->assertFalse($parser->isDisallowed('/foo')); }