示例#1
0
/**
 * Given a URL extracted from a page, return an absolute URL
 *
 * Takes a URL (e.g. /test) extracted from a page (e.g. http://foo.com/bar/) and
 * returns an absolute URL (e.g. http://foo.com/bar/test). Absolute URLs are
 * returned as-is.
 *
 * A null value is returned in the event that the extracted_url is blank or it's
 * unable to be parsed.
 *
 * @param  string       $extracted_url   Relative or absolute URL extracted from page
 * @param  string       $page_url        URL of page
 * @return string|null                   Absolute URL, or null
 */
function sist_relative_to_absolute_url($extracted_url, $page_url)
{
    $extracted_url = trim($extracted_url);
    if ($extracted_url === '') {
        return null;
    }
    // check for a protocol-less URL
    // (Note: there's a bug in PHP <= 5.4.7 where parsed URLs starting with //
    // are treated as a path. So we're doing this check upfront.)
    // http://php.net/manual/en/function.parse-url.php#example-4617
    if (strpos($extracted_url, '//') === 0) {
        // if this is a local URL, add the protocol to the URL
        if (stripos($extracted_url, '//' . sist_origin_host()) === 0) {
            $extracted_url = substr_replace($extracted_url, sist_origin_scheme() . '://', 0, 2);
        }
        return $extracted_url;
    }
    $parsed_extracted_url = parse_url($extracted_url);
    // parse_url can sometimes return false; bail if it does
    if ($parsed_extracted_url === false) {
        return null;
    }
    if (isset($parsed_extracted_url['host'])) {
        return $extracted_url;
    } elseif (isset($parsed_extracted_url['scheme'])) {
        // examples of schemes without hosts: java:, data:
        return $extracted_url;
    } else {
        // no host on extracted page (might be relative url)
        $path = isset($parsed_extracted_url['path']) ? $parsed_extracted_url['path'] : '';
        $query = isset($parsed_extracted_url['query']) ? '?' . $parsed_extracted_url['query'] : '';
        $fragment = isset($parsed_extracted_url['fragment']) ? '#' . $parsed_extracted_url['fragment'] : '';
        // turn our relative url into an absolute url
        $extracted_url = phpUri::parse($page_url)->join($path . $query . $fragment);
        return $extracted_url;
    }
}
 /**
  * Replaces base URL
  *
  * @param string $origin_url
  * @param string $destination_url
  * @return void
  */
 public function replace_urls($destination_url)
 {
     /* TODO: Might want to eventually rope this into extract_urls_from_html/
     		 	extract_urls_from_css so that we're only doing preg_replace/
     			str_replace once. Only reason I'm not doing that now is because of
     			the fix for wp_json_encode.
     		*/
     if ($this->is_html() || $this->is_css()) {
         // replace any instance of the origin url, whether it starts with https://, http://, or //
         $response_body = preg_replace('/(https?:)?\\/\\/' . addcslashes(sist_origin_host(), '/') . '/i', $destination_url, $this->body);
         // also replace wp_json_encode'd urls, as used by WP's `concatemoji`
         $response_body = str_replace(addcslashes(sist_origin_url(), '/'), addcslashes($destination_url, '/'), $response_body);
         $this->body = $response_body;
     }
 }