function ensureAbsoluteUrl($url) { // error_log(json_encode($this->url_parts)); if (substr($url, 0, 4) == 'http') { return $url; } else { return phpUri::parse($this->url)->join($url); } }
/** * Join with a relative url * * @param string $relative the relative url to join * * @return string */ public function join($relative) { $uri = new phpUri($relative); switch (TRUE) { case !empty($uri->scheme): break; case !empty($uri->authority): break; case empty($uri->path): $uri->path = $this->path; if (empty($uri->query)) { $uri->query = $this->query; } break; case strpos($uri->path, '/') === 0: break; default: $base_path = $this->path; if (strpos($base_path, '/') === FALSE) { $base_path = ''; } else { $base_path = preg_replace('/\\/[^\\/]+$/', '/', $base_path); } if (empty($base_path) && empty($this->authority)) { $base_path = '/'; } $uri->path = $base_path . $uri->path; } if (empty($uri->scheme)) { $uri->scheme = $this->scheme; if (empty($uri->authority)) { $uri->authority = $this->authority; } } return $uri->to_str(); }
/** * Given a URL extracted from a page, return an absolute URL * * Takes a URL (e.g. /test) extracted from a page (e.g. http://foo.com/bar/) and * returns an absolute URL (e.g. http://foo.com/bar/test). Absolute URLs are * returned as-is. * * A null value is returned in the event that the extracted_url is blank or it's * unable to be parsed. * * @param string $extracted_url Relative or absolute URL extracted from page * @param string $page_url URL of page * @return string|null Absolute URL, or null */ function sist_relative_to_absolute_url($extracted_url, $page_url) { $extracted_url = trim($extracted_url); if ($extracted_url === '') { return null; } // check for a protocol-less URL // (Note: there's a bug in PHP <= 5.4.7 where parsed URLs starting with // // are treated as a path. So we're doing this check upfront.) // http://php.net/manual/en/function.parse-url.php#example-4617 if (strpos($extracted_url, '//') === 0) { // if this is a local URL, add the protocol to the URL if (stripos($extracted_url, '//' . sist_origin_host()) === 0) { $extracted_url = substr_replace($extracted_url, sist_origin_scheme() . '://', 0, 2); } return $extracted_url; } $parsed_extracted_url = parse_url($extracted_url); // parse_url can sometimes return false; bail if it does if ($parsed_extracted_url === false) { return null; } if (isset($parsed_extracted_url['host'])) { return $extracted_url; } elseif (isset($parsed_extracted_url['scheme'])) { // examples of schemes without hosts: java:, data: return $extracted_url; } else { // no host on extracted page (might be relative url) $path = isset($parsed_extracted_url['path']) ? $parsed_extracted_url['path'] : ''; $query = isset($parsed_extracted_url['query']) ? '?' . $parsed_extracted_url['query'] : ''; $fragment = isset($parsed_extracted_url['fragment']) ? '#' . $parsed_extracted_url['fragment'] : ''; // turn our relative url into an absolute url $extracted_url = phpUri::parse($page_url)->join($path . $query . $fragment); return $extracted_url; } }
private static function convertRelativeToAbsoluteUrl($domDocument, $attribute, $rootUrl) { $xpath = new DOMXPath($domDocument); $nodes = $xpath->query('//*[@' . $attribute . ' != "" and not(starts-with(@' . $attribute . ', "http"))]'); foreach ($nodes as $node) { $relativeUrl = $node->getAttribute($attribute); if (!self::isIgnored($relativeUrl)) { $absoluteUrl = phpUri::parse($rootUrl)->join($relativeUrl); $node->setAttribute($attribute, $absoluteUrl); } } }
} } $elapsed = microtime() - $start; echo "rel2abs: successes -> {$successes}, failures => {$failures}, elapsed time: {$elapsed}\n"; # url_to_absolute $start = microtime(); $base = 'http://a/b/c/d;p?q'; list($successes, $failures) = array(0, 0); foreach ($tests as $test) { if (($r = url_to_absolute($base, $test['rel'])) == $test['result']) { $successes++; } else { $failures++; } } $elapsed = microtime() - $start; echo "url_to_absolute: successes -> {$successes}, failures => {$failures}, elapsed time: {$elapsed}\n"; # phpuri $start = microtime(); $base = phpUri::parse('http://a/b/c/d;p?q'); list($successes, $failures) = array(0, 0); foreach ($tests as $test) { if (($r = $base->join($test['rel'])) == $test['result']) { $successes++; } else { $failures++; echo "failure: {$r} instead of " . $test['result'] . " \n"; } } $elapsed = microtime() - $start; echo "phpuri: successes -> {$successes}, failures => {$failures}, elapsed time: {$elapsed}\n";
private function make_absolute($relative, $url) { require_once DPSFA_DIR . '/libs/phpuri/phpuri.php'; return \phpUri::parse($url)->join($relative); }
/** * Given a URL extracted from a page, return an absolute URL * * Takes a URL (e.g. /test) extracted from a page (e.g. http://foo.com/bar/) and * returns an absolute URL (e.g. http://foo.com/bar/test). Absolute URLs are * returned as-is. * * A null value is returned in the event that the extracted_url is blank or it's * unable to be parsed. * * @param string $extracted_url Relative or absolute URL extracted from page * @param string $page_url URL of page * @return string|null Absolute URL, or null */ function sist_relative_to_absolute_url($extracted_url, $page_url) { $extracted_url = trim($extracted_url); if ($extracted_url === '') { return null; } $parsed_extracted_url = parse_url($extracted_url); // parse_url can sometimes return false; bail if it does if ($parsed_extracted_url === false) { return null; } if (isset($parsed_extracted_url['host'])) { return $extracted_url; } elseif (isset($parsed_extracted_url['scheme'])) { // examples of schemes without hosts: java:, data: return $extracted_url; } else { // no host on extracted page (might be relative url) $path = isset($parsed_extracted_url['path']) ? $parsed_extracted_url['path'] : ''; // Check for a bug in PHP <= 5.4.7 where URLs starting // with '//' are identified as a path // http://php.net/manual/en/function.parse-url.php#example-4617 if (substr($path, 0, 2) === '//') { return $extracted_url; } else { $query = isset($parsed_extracted_url['query']) ? '?' . $parsed_extracted_url['query'] : ''; $fragment = isset($parsed_extracted_url['fragment']) ? '#' . $parsed_extracted_url['fragment'] : ''; // turn our relative url into an absolute url $extracted_url = phpUri::parse($page_url)->join($path . $query . $fragment); return $extracted_url; } } }
list($successes, $failures) = array(0, 0); foreach ($tests as $test) { if (($r = url_to_absolute($test['base'], $test['rel'])) == $test['result']) { $successes++; } else { $failures++; echo "url_to_absolute failure: {$r} instead of " . $test['result'] . " \n"; } } $elapsed = microtime() - $start; echo "url_to_absolute: successes -> {$successes}, failures => {$failures}, elapsed time: {$elapsed}\n"; # phpuri $start = microtime(); list($successes, $failures) = array(0, 0); foreach ($tests as $test) { $base = phpUri::parse($test['base']); if (($r = $base->join($test['rel'])) == $test['result']) { $successes++; } else { $failures++; echo "phpuri failure: {$r} instead of " . $test['result'] . " \n"; } } $elapsed = microtime() - $start; echo "phpuri: successes -> {$successes}, failures => {$failures}, elapsed time: {$elapsed}\n"; # net_url2 $start = microtime(); list($successes, $failures) = array(0, 0); foreach ($tests as $test) { $base = new Net_URL2($test['base']); if (($r = $base->resolve($test['rel'])) == $test['result']) {