Esempio n. 1
0
 public function testUrl()
 {
     $this->assert->string(Robots\Reader::url('http://www.google.com'))->isEqualTo('http://www.google.com/robots.txt');
     $this->assert->string(Robots\Reader::url('http://www.google.com/robots.txt'))->isEqualTo('http://www.google.com/robots.txt');
     $this->assert->string(Robots\Reader::url('http://www.google.com:80'))->isEqualTo('http://www.google.com:80/robots.txt');
     $this->assert->exception(function () {
         Robots\Reader::url('www.google.com');
     })->isInstanceOf('\\RuntimeException');
     $this->assert->string(Robots\Reader::url('http://www.google.com/search'))->isEqualTo('http://www.google.com/robots.txt');
 }
Esempio n. 2
0
<?php

/**
 * @package     Robots
 * @author      Axel Etcheverry <*****@*****.**>
 * @copyright   Copyright (c) 2011 Axel Etcheverry (http://www.axel-etcheverry.com)
 * Displays     <a href="http://creativecommons.org/licenses/MIT/deed.fr">MIT</a>
 * @license     http://creativecommons.org/licenses/MIT/deed.fr    MIT
 */
/**
 * @namespace
 */
namespace Application;

require_once __DIR__ . '/../src/Robots/Reader.php';
use Robots;
$r = new Robots\Reader();
print_r($r->parse(file_get_contents(Robots\Reader::url('http://www.robotstxt.org'))));
exit;
$robots = "# Disallow all crawlers access to certain pages.\n\nUser-agent: * # test comment\nDisallow: /test/1\nAllow: /test/2*\n\nUser-Agent: Googlebot\nDisallow: /test/3 #comment\nAllow: /test/4*\n\n\n# Sitemap files\nSitemap: http://www.amazon.com/sitemap-manual-index.xml\nSitemap: http://www.amazon.com/sitemap_dp_index.xml\n";
print_r($r->parse($robots));