/** * Add * * @param string $line * @return bool */ public function add($line) { $uriParser = new UriParser($line); $uri = $uriParser->encode(); if (!$uriParser->validate() || in_array($uri, $this->sitemaps)) { return false; } $this->sitemaps[] = $uri; return true; }
/** * Parse * * @param string $line * @return string|false */ private function parse($line) { $uriParser = new UriParser($line); $line = $uriParser->encode(); if ($uriParser->validateIP() || !$uriParser->validateHost() || parse_url($line, PHP_URL_SCHEME) !== null && !$uriParser->validateScheme()) { return false; } $parts = $this->getParts($line); return $parts['scheme'] . $parts['host'] . $parts['port']; }
/** * Check * * @param string $directive * @param string $uri * @return bool * @throws ClientException */ private function check($directive, $uri) { $uriParser = new UriParser($uri); $uri = $uriParser->convertToFull($this->base); if ($this->base !== $uriParser->base()) { throw new ClientException('URI belongs to a different robots.txt'); } if (($result = $this->checkOverride($uri)) !== false) { return $directive === $result; } // Path check return $this->checkPath($directive, $uri); }
/** * TxtClient constructor. * * @param string $baseUri * @param string $content * @param string|null $effectiveUri */ public function __construct($baseUri, $content, $effectiveUri = null) { mb_internal_encoding(self::ENCODING); $baseParser = new UriParser($baseUri); $baseUri = $baseParser->base(); $effectiveBase = $baseUri; if ($effectiveUri !== null) { $effectiveParser = new UriParser($effectiveUri); $effectiveBase = $effectiveParser->base(); } $this->handler = new RootDirectiveHandler($baseUri, $effectiveBase); $this->parseTxt($content); }
/** * Is listed? * * @param string $uri * @return bool */ public function isListed($uri) { $uriParser = new UriParser($uri); $uri = $uriParser->encode(); $parts = ['scheme' => parse_url($uri, PHP_URL_SCHEME), 'host' => parse_url($uri, PHP_URL_HOST)]; $parts['port'] = is_int($port = parse_url($uri, PHP_URL_PORT)) ? $port : getservbyname($parts['scheme'], 'tcp'); $cases = [$parts['host'], $parts['host'] . ':' . $parts['port'], $parts['scheme'] . '://' . $parts['host'], $parts['scheme'] . '://' . $parts['host'] . ':' . $parts['port']]; foreach ($this->host as $host) { if (in_array($host, $cases)) { return true; } } return false; }
/** * Add * * @param string $line * @return bool */ public function add($line) { // split into parameter and path $array = array_map('trim', mb_split('\\s+', $line, 2)); if (isset($array[1])) { // strip any invalid characters from path prefix $uriParser = new UriParser(preg_replace('/[^A-Za-z0-9\\.-\\/\\*\\_]/', '', $array[1])); $path = rtrim($uriParser->encode(), '*'); } $path = empty($path) ? '/' : $path; $param = array_map('trim', explode('&', $array[0])); foreach ($param as $key) { $this->cleanParam[$key][] = $path; } return true; }
/** * Invalidate cache * * @param $baseUri * @return bool * @throws ClientException */ public function invalidate($baseUri) { $parser = new UriParser($baseUri); return $this->handler->invalidate($parser->base()); }
/** * cURL request * * @param array $options * @return bool */ private function request($options = []) { $curl = curl_init(); // Set default cURL options curl_setopt_array($curl, [CURLOPT_AUTOREFERER => true, CURLOPT_CAINFO => CaBundle::getSystemCaRootBundlePath(), CURLOPT_CONNECTTIMEOUT => 30, CURLOPT_ENCODING => 'identity', CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_NONE, CURLOPT_IPRESOLVE => CURL_IPRESOLVE_WHATEVER, CURLOPT_SSL_VERIFYHOST => 2, CURLOPT_SSL_VERIFYPEER => true, CURLOPT_TIMEOUT => 120, CURLOPT_USERAGENT => self::CURL_USER_AGENT]); // Apply custom cURL options curl_setopt_array($curl, $options); $this->headerParser = new Parser\HeaderParser($curl); // Make sure these cURL options stays untouched curl_setopt_array($curl, [CURLOPT_FAILONERROR => false, CURLOPT_FOLLOWLOCATION => true, CURLOPT_FTPSSLAUTH => CURLFTPAUTH_DEFAULT, CURLOPT_HEADER => false, CURLOPT_HEADERFUNCTION => [$this->headerParser, 'curlCallback'], CURLOPT_HTTPAUTH => CURLAUTH_BASIC, CURLOPT_MAXREDIRS => self::MAX_REDIRECTS, CURLOPT_NOBODY => false, CURLOPT_PROTOCOLS => CURLPROTO_FTP | CURLPROTO_FTPS | CURLPROTO_HTTP | CURLPROTO_HTTPS | CURLPROTO_SFTP, CURLOPT_REDIR_PROTOCOLS => CURLPROTO_FTP | CURLPROTO_FTPS | CURLPROTO_HTTP | CURLPROTO_HTTPS | CURLPROTO_SFTP, CURLOPT_RETURNTRANSFER => true, CURLOPT_URL => $this->base . self::PATH, CURLOPT_USERPWD => 'anonymous:anonymous@']); // Execute cURL request if (($this->rawContents = curl_exec($curl)) === false) { // Request failed return false; } $this->time = time(); $this->rawStatusCode = curl_getinfo($curl, CURLINFO_HTTP_CODE); // also works with FTP status codes $uriParser = new UriParser(curl_getinfo($curl, CURLINFO_EFFECTIVE_URL)); $this->effective = $uriParser->base(); curl_close($curl); $this->rawEncoding = $this->headerParser->getCharset(); $this->rawMaxAge = $this->headerParser->getMaxAge(); return true; }
/** * Get the RAW data * * @param string $baseUri * @return array */ public function debug($baseUri) { $parser = new UriParser($baseUri); return $this->switch->delayManager()->debug($parser->base()); }
/** * Get path and query * * @param string $uri * @return string * @throws ClientException */ private function getPath($uri) { $uriParser = new UriParser($uri); // Prepare uri $uriParser->encode(); $uri = $uriParser->stripFragment(); if (mb_strpos($uri, '/') === 0) { // URI is already an path return $uri; } if (!$uriParser->validate()) { throw new ClientException('Invalid URI'); } $path = ($path = parse_url($uri, PHP_URL_PATH)) === null ? '/' : $path; $query = ($query = parse_url($uri, PHP_URL_QUERY)) === null ? '' : '?' . $query; return $path . $query; }