/** * @link https://github.com/t1gor/Robots.txt-Parser-Class/issues/22 */ public function testAllowWildcard() { // init parser $parser = new RobotsTxtParser("\n\t\t\tUser-agent: *\n\t\t\tAllow: /\n\t\t"); // asserts $this->assertFalse($parser->isDisallowed("/index")); $this->assertFalse($parser->isDisallowed("/")); $this->assertTrue($parser->isAllowed("/index")); $this->assertTrue($parser->isAllowed("/")); }
function get_robots_tester() { $root = (!empty($_SERVER['HTTPS']) ? 'https' : 'http') . '://' . $_SERVER['HTTP_HOST'] . '/'; if (class_exists('\\RobotsTxtParser')) { $parser = new RobotsTxtParser(file_get_contents($root . 'robots.txt')); $parser->setUserAgent('GoogleBot'); $status = $parser->isAllowed('/') ? 'Enable' : 'Blocked'; $output = '<tr>'; $output .= '<td>' . 'Robots Checking ' . $root . 'robots.txt' . '</td><td>' . ' Enable ' . '</td>'; $output .= '<td style="color:' . ($parser->isAllowed('/') ? 'green' : 'red') . ';">' . $status . '</td>'; $output .= '<td>' . ($parser->isAllowed('/') ? 'Passed' : 'FAILED') . '</td>'; $output .= '</tr>'; echo $output; } }
public function allowCrawl($url) { $file = robot_parser::getRobotFile($url); $parser = new RobotsTxtParser($file); return $parser->isAllowed($url); }
/** * @covder RobotsTxtParser::checkRule * @link https://github.com/t1gor/Robots.txt-Parser-Class/issues/23 */ public function testEmptyRulesAllow() { $parser = new RobotsTxtParser(''); $this->assertTrue($parser->isAllowed('/foo')); $this->assertFalse($parser->isDisallowed('/foo')); }