/** * Check * * @param string $directive * @param string $uri * @return bool * @throws ClientException */ private function check($directive, $uri) { $uriParser = new UriParser($uri); $uri = $uriParser->convertToFull($this->base); if ($this->base !== $uriParser->base()) { throw new ClientException('URI belongs to a different robots.txt'); } if (($result = $this->checkOverride($uri)) !== false) { return $directive === $result; } // Path check return $this->checkPath($directive, $uri); }
/** * TxtClient constructor. * * @param string $baseUri * @param string $content * @param string|null $effectiveUri */ public function __construct($baseUri, $content, $effectiveUri = null) { mb_internal_encoding(self::ENCODING); $baseParser = new UriParser($baseUri); $baseUri = $baseParser->base(); $effectiveBase = $baseUri; if ($effectiveUri !== null) { $effectiveParser = new UriParser($effectiveUri); $effectiveBase = $effectiveParser->base(); } $this->handler = new RootDirectiveHandler($baseUri, $effectiveBase); $this->parseTxt($content); }
/** * Invalidate cache * * @param $baseUri * @return bool * @throws ClientException */ public function invalidate($baseUri) { $parser = new UriParser($baseUri); return $this->handler->invalidate($parser->base()); }
/** * cURL request * * @param array $options * @return bool */ private function request($options = []) { $curl = curl_init(); // Set default cURL options curl_setopt_array($curl, [CURLOPT_AUTOREFERER => true, CURLOPT_CAINFO => CaBundle::getSystemCaRootBundlePath(), CURLOPT_CONNECTTIMEOUT => 30, CURLOPT_ENCODING => 'identity', CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_NONE, CURLOPT_IPRESOLVE => CURL_IPRESOLVE_WHATEVER, CURLOPT_SSL_VERIFYHOST => 2, CURLOPT_SSL_VERIFYPEER => true, CURLOPT_TIMEOUT => 120, CURLOPT_USERAGENT => self::CURL_USER_AGENT]); // Apply custom cURL options curl_setopt_array($curl, $options); $this->headerParser = new Parser\HeaderParser($curl); // Make sure these cURL options stays untouched curl_setopt_array($curl, [CURLOPT_FAILONERROR => false, CURLOPT_FOLLOWLOCATION => true, CURLOPT_FTPSSLAUTH => CURLFTPAUTH_DEFAULT, CURLOPT_HEADER => false, CURLOPT_HEADERFUNCTION => [$this->headerParser, 'curlCallback'], CURLOPT_HTTPAUTH => CURLAUTH_BASIC, CURLOPT_MAXREDIRS => self::MAX_REDIRECTS, CURLOPT_NOBODY => false, CURLOPT_PROTOCOLS => CURLPROTO_FTP | CURLPROTO_FTPS | CURLPROTO_HTTP | CURLPROTO_HTTPS | CURLPROTO_SFTP, CURLOPT_REDIR_PROTOCOLS => CURLPROTO_FTP | CURLPROTO_FTPS | CURLPROTO_HTTP | CURLPROTO_HTTPS | CURLPROTO_SFTP, CURLOPT_RETURNTRANSFER => true, CURLOPT_URL => $this->base . self::PATH, CURLOPT_USERPWD => 'anonymous:anonymous@']); // Execute cURL request if (($this->rawContents = curl_exec($curl)) === false) { // Request failed return false; } $this->time = time(); $this->rawStatusCode = curl_getinfo($curl, CURLINFO_HTTP_CODE); // also works with FTP status codes $uriParser = new UriParser(curl_getinfo($curl, CURLINFO_EFFECTIVE_URL)); $this->effective = $uriParser->base(); curl_close($curl); $this->rawEncoding = $this->headerParser->getCharset(); $this->rawMaxAge = $this->headerParser->getMaxAge(); return true; }
/** * Get the RAW data * * @param string $baseUri * @return array */ public function debug($baseUri) { $parser = new UriParser($baseUri); return $this->switch->delayManager()->debug($parser->base()); }