/** * Given a URL extracted from a page, return an absolute URL * * Takes a URL (e.g. /test) extracted from a page (e.g. http://foo.com/bar/) and * returns an absolute URL (e.g. http://foo.com/bar/test). Absolute URLs are * returned as-is. * * A null value is returned in the event that the extracted_url is blank or it's * unable to be parsed. * * @param string $extracted_url Relative or absolute URL extracted from page * @param string $page_url URL of page * @return string|null Absolute URL, or null */ function sist_relative_to_absolute_url($extracted_url, $page_url) { $extracted_url = trim($extracted_url); if ($extracted_url === '') { return null; } // check for a protocol-less URL // (Note: there's a bug in PHP <= 5.4.7 where parsed URLs starting with // // are treated as a path. So we're doing this check upfront.) // http://php.net/manual/en/function.parse-url.php#example-4617 if (strpos($extracted_url, '//') === 0) { // if this is a local URL, add the protocol to the URL if (stripos($extracted_url, '//' . sist_origin_host()) === 0) { $extracted_url = substr_replace($extracted_url, sist_origin_scheme() . '://', 0, 2); } return $extracted_url; } $parsed_extracted_url = parse_url($extracted_url); // parse_url can sometimes return false; bail if it does if ($parsed_extracted_url === false) { return null; } if (isset($parsed_extracted_url['host'])) { return $extracted_url; } elseif (isset($parsed_extracted_url['scheme'])) { // examples of schemes without hosts: java:, data: return $extracted_url; } else { // no host on extracted page (might be relative url) $path = isset($parsed_extracted_url['path']) ? $parsed_extracted_url['path'] : ''; $query = isset($parsed_extracted_url['query']) ? '?' . $parsed_extracted_url['query'] : ''; $fragment = isset($parsed_extracted_url['fragment']) ? '#' . $parsed_extracted_url['fragment'] : ''; // turn our relative url into an absolute url $extracted_url = phpUri::parse($page_url)->join($path . $query . $fragment); return $extracted_url; } }
/** * Replaces base URL * * @param string $origin_url * @param string $destination_url * @return void */ public function replace_urls($destination_url) { /* TODO: Might want to eventually rope this into extract_urls_from_html/ extract_urls_from_css so that we're only doing preg_replace/ str_replace once. Only reason I'm not doing that now is because of the fix for wp_json_encode. */ if ($this->is_html() || $this->is_css()) { // replace any instance of the origin url, whether it starts with https://, http://, or // $response_body = preg_replace('/(https?:)?\\/\\/' . addcslashes(sist_origin_host(), '/') . '/i', $destination_url, $this->body); // also replace wp_json_encode'd urls, as used by WP's `concatemoji` $response_body = str_replace(addcslashes(sist_origin_url(), '/'), addcslashes($destination_url, '/'), $response_body); $this->body = $response_body; } }