Example #1
0
 /**
  * Set website url for crawling
  *
  * @param string $baseUrl []
  *
  * @return  void
  */
 public function setBaseUrl($baseUrl)
 {
     if (strpos($baseUrl, 'http') === false) {
         $this->baseUrl = Uri\normalize('http://' . $baseUrl);
     } else {
         $this->baseUrl = Uri\normalize($baseUrl);
     }
 }
Example #2
0
    /**
     * Calculates the uri for a request, making sure that the base uri is stripped out
     *
     * @param string $uri
     * @throws Exception\Forbidden A permission denied exception is thrown whenever there was an attempt to supply a uri outside of the base uri
     * @return string
     */
    function calculateUri($uri) {

        if ($uri[0] != '/' && strpos($uri, '://')) {

            $uri = parse_url($uri, PHP_URL_PATH);

        }

        $uri = Uri\normalize(str_replace('//', '/', $uri));
        $baseUri = Uri\normalize($this->getBaseUri());

        if (strpos($uri, $baseUri) === 0) {

            return trim(URLUtil::decodePath(substr($uri, strlen($baseUri))), '/');

        // A special case, if the baseUri was accessed without a trailing
        // slash, we'll accept it as well.
        } elseif ($uri . '/' === $baseUri) {

            return '';

        } else {

            throw new Exception\Forbidden('Requested uri (' . $uri . ') is out of base uri (' . $this->getBaseUri() . ')');

        }

    }
Example #3
0
 /**
  * Returns the relative path.
  *
  * This is being calculated using the base url. This path will not start
  * with a slash, so it will always return something like
  * 'example/path.html'.
  *
  * If the full path is equal to the base url, this method will return an
  * empty string.
  *
  * This method will also urldecode the path, and if the url was incoded as
  * ISO-8859-1, it will convert it to UTF-8.
  *
  * If the path is outside of the base url, a LogicException will be thrown.
  *
  * @return string
  */
 function getPath()
 {
     // Removing duplicated slashes.
     $uri = str_replace('//', '/', $this->getUrl());
     $uri = Uri\normalize($uri);
     $baseUri = Uri\normalize($this->getBaseUrl());
     if (strpos($uri, $baseUri) === 0) {
         // We're not interested in the query part (everything after the ?).
         list($uri) = explode('?', $uri);
         return trim(URLUtil::decodePath(substr($uri, strlen($baseUri))), '/');
     } elseif ($uri . '/' === $baseUri) {
         return '';
     }
     throw new \LogicException('Requested uri (' . $this->getUrl() . ') is out of base uri (' . $this->getBaseUrl() . ')');
 }