示例#1
0
 /**
  * Add
  *
  * @param string $line
  * @return bool
  */
 public function add($line)
 {
     $uriParser = new UriParser($line);
     $uri = $uriParser->encode();
     if (!$uriParser->validate() || in_array($uri, $this->sitemaps)) {
         return false;
     }
     $this->sitemaps[] = $uri;
     return true;
 }
 /**
  * Parse
  *
  * @param string $line
  * @return string|false
  */
 private function parse($line)
 {
     $uriParser = new UriParser($line);
     $line = $uriParser->encode();
     if ($uriParser->validateIP() || !$uriParser->validateHost() || parse_url($line, PHP_URL_SCHEME) !== null && !$uriParser->validateScheme()) {
         return false;
     }
     $parts = $this->getParts($line);
     return $parts['scheme'] . $parts['host'] . $parts['port'];
 }
 /**
  * Check
  *
  * @param string $directive
  * @param string $uri
  * @return bool
  * @throws ClientException
  */
 private function check($directive, $uri)
 {
     $uriParser = new UriParser($uri);
     $uri = $uriParser->convertToFull($this->base);
     if ($this->base !== $uriParser->base()) {
         throw new ClientException('URI belongs to a different robots.txt');
     }
     if (($result = $this->checkOverride($uri)) !== false) {
         return $directive === $result;
     }
     // Path check
     return $this->checkPath($directive, $uri);
 }
 /**
  * TxtClient constructor.
  *
  * @param string $baseUri
  * @param string $content
  * @param string|null $effectiveUri
  */
 public function __construct($baseUri, $content, $effectiveUri = null)
 {
     mb_internal_encoding(self::ENCODING);
     $baseParser = new UriParser($baseUri);
     $baseUri = $baseParser->base();
     $effectiveBase = $baseUri;
     if ($effectiveUri !== null) {
         $effectiveParser = new UriParser($effectiveUri);
         $effectiveBase = $effectiveParser->base();
     }
     $this->handler = new RootDirectiveHandler($baseUri, $effectiveBase);
     $this->parseTxt($content);
 }
 /**
  * Is listed?
  *
  * @param string $uri
  * @return bool
  */
 public function isListed($uri)
 {
     $uriParser = new UriParser($uri);
     $uri = $uriParser->encode();
     $parts = ['scheme' => parse_url($uri, PHP_URL_SCHEME), 'host' => parse_url($uri, PHP_URL_HOST)];
     $parts['port'] = is_int($port = parse_url($uri, PHP_URL_PORT)) ? $port : getservbyname($parts['scheme'], 'tcp');
     $cases = [$parts['host'], $parts['host'] . ':' . $parts['port'], $parts['scheme'] . '://' . $parts['host'], $parts['scheme'] . '://' . $parts['host'] . ':' . $parts['port']];
     foreach ($this->host as $host) {
         if (in_array($host, $cases)) {
             return true;
         }
     }
     return false;
 }
 /**
  * Add
  *
  * @param string $line
  * @return bool
  */
 public function add($line)
 {
     // split into parameter and path
     $array = array_map('trim', mb_split('\\s+', $line, 2));
     if (isset($array[1])) {
         // strip any invalid characters from path prefix
         $uriParser = new UriParser(preg_replace('/[^A-Za-z0-9\\.-\\/\\*\\_]/', '', $array[1]));
         $path = rtrim($uriParser->encode(), '*');
     }
     $path = empty($path) ? '/' : $path;
     $param = array_map('trim', explode('&', $array[0]));
     foreach ($param as $key) {
         $this->cleanParam[$key][] = $path;
     }
     return true;
 }
示例#7
0
 /**
  * Invalidate cache
  *
  * @param $baseUri
  * @return bool
  * @throws ClientException
  */
 public function invalidate($baseUri)
 {
     $parser = new UriParser($baseUri);
     return $this->handler->invalidate($parser->base());
 }
示例#8
0
 /**
  * cURL request
  *
  * @param array $options
  * @return bool
  */
 private function request($options = [])
 {
     $curl = curl_init();
     // Set default cURL options
     curl_setopt_array($curl, [CURLOPT_AUTOREFERER => true, CURLOPT_CAINFO => CaBundle::getSystemCaRootBundlePath(), CURLOPT_CONNECTTIMEOUT => 30, CURLOPT_ENCODING => 'identity', CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_NONE, CURLOPT_IPRESOLVE => CURL_IPRESOLVE_WHATEVER, CURLOPT_SSL_VERIFYHOST => 2, CURLOPT_SSL_VERIFYPEER => true, CURLOPT_TIMEOUT => 120, CURLOPT_USERAGENT => self::CURL_USER_AGENT]);
     // Apply custom cURL options
     curl_setopt_array($curl, $options);
     $this->headerParser = new Parser\HeaderParser($curl);
     // Make sure these cURL options stays untouched
     curl_setopt_array($curl, [CURLOPT_FAILONERROR => false, CURLOPT_FOLLOWLOCATION => true, CURLOPT_FTPSSLAUTH => CURLFTPAUTH_DEFAULT, CURLOPT_HEADER => false, CURLOPT_HEADERFUNCTION => [$this->headerParser, 'curlCallback'], CURLOPT_HTTPAUTH => CURLAUTH_BASIC, CURLOPT_MAXREDIRS => self::MAX_REDIRECTS, CURLOPT_NOBODY => false, CURLOPT_PROTOCOLS => CURLPROTO_FTP | CURLPROTO_FTPS | CURLPROTO_HTTP | CURLPROTO_HTTPS | CURLPROTO_SFTP, CURLOPT_REDIR_PROTOCOLS => CURLPROTO_FTP | CURLPROTO_FTPS | CURLPROTO_HTTP | CURLPROTO_HTTPS | CURLPROTO_SFTP, CURLOPT_RETURNTRANSFER => true, CURLOPT_URL => $this->base . self::PATH, CURLOPT_USERPWD => 'anonymous:anonymous@']);
     // Execute cURL request
     if (($this->rawContents = curl_exec($curl)) === false) {
         // Request failed
         return false;
     }
     $this->time = time();
     $this->rawStatusCode = curl_getinfo($curl, CURLINFO_HTTP_CODE);
     // also works with FTP status codes
     $uriParser = new UriParser(curl_getinfo($curl, CURLINFO_EFFECTIVE_URL));
     $this->effective = $uriParser->base();
     curl_close($curl);
     $this->rawEncoding = $this->headerParser->getCharset();
     $this->rawMaxAge = $this->headerParser->getMaxAge();
     return true;
 }
示例#9
0
 /**
  * Get the RAW data
  *
  * @param string $baseUri
  * @return array
  */
 public function debug($baseUri)
 {
     $parser = new UriParser($baseUri);
     return $this->switch->delayManager()->debug($parser->base());
 }
示例#10
0
 /**
  * Get path and query
  *
  * @param string $uri
  * @return string
  * @throws ClientException
  */
 private function getPath($uri)
 {
     $uriParser = new UriParser($uri);
     // Prepare uri
     $uriParser->encode();
     $uri = $uriParser->stripFragment();
     if (mb_strpos($uri, '/') === 0) {
         // URI is already an path
         return $uri;
     }
     if (!$uriParser->validate()) {
         throw new ClientException('Invalid URI');
     }
     $path = ($path = parse_url($uri, PHP_URL_PATH)) === null ? '/' : $path;
     $query = ($query = parse_url($uri, PHP_URL_QUERY)) === null ? '' : '?' . $query;
     return $path . $query;
 }