public function testUrl() { $this->assert->string(Robots\Reader::url('http://www.google.com'))->isEqualTo('http://www.google.com/robots.txt'); $this->assert->string(Robots\Reader::url('http://www.google.com/robots.txt'))->isEqualTo('http://www.google.com/robots.txt'); $this->assert->string(Robots\Reader::url('http://www.google.com:80'))->isEqualTo('http://www.google.com:80/robots.txt'); $this->assert->exception(function () { Robots\Reader::url('www.google.com'); })->isInstanceOf('\\RuntimeException'); $this->assert->string(Robots\Reader::url('http://www.google.com/search'))->isEqualTo('http://www.google.com/robots.txt'); }
<?php /** * @package Robots * @author Axel Etcheverry <*****@*****.**> * @copyright Copyright (c) 2011 Axel Etcheverry (http://www.axel-etcheverry.com) * Displays <a href="http://creativecommons.org/licenses/MIT/deed.fr">MIT</a> * @license http://creativecommons.org/licenses/MIT/deed.fr MIT */ /** * @namespace */ namespace Application; require_once __DIR__ . '/../src/Robots/Reader.php'; use Robots; $r = new Robots\Reader(); print_r($r->parse(file_get_contents(Robots\Reader::url('http://www.robotstxt.org')))); exit; $robots = "# Disallow all crawlers access to certain pages.\n\nUser-agent: * # test comment\nDisallow: /test/1\nAllow: /test/2*\n\nUser-Agent: Googlebot\nDisallow: /test/3 #comment\nAllow: /test/4*\n\n\n# Sitemap files\nSitemap: http://www.amazon.com/sitemap-manual-index.xml\nSitemap: http://www.amazon.com/sitemap_dp_index.xml\n"; print_r($r->parse($robots));