function devtips_extract(DevTip $tip) { global $updates_dir; $assetPath = getFileName($tip->get('date'), $tip->get('title')); $assetPath = str_replace(".markdown", "", $assetPath); # create new asset directory based on new filename if (!file_exists($updates_dir . 'images/' . $assetPath)) { mkdir($updates_dir . 'images/' . $assetPath); chmod($updates_dir . 'images/' . $assetPath, 0777); } # Download and store each asset $assets = $tip->get('assets'); $featured = null; foreach ($assets as $key => $url) { if (strpos($url, "/sponsor/") !== false) { continue; } $base = new Net_URL2('https://umaar.com/dev-tips/'); $abs = $base->resolve($url); $dest = $updates_dir . 'images/' . $assetPath . '/' . pathinfo($url)['basename']; $content = $tip->get('content'); $tip->set('content', str_replace($url, '/web/updates/images/' . $assetPath . '/' . pathinfo($url)['basename'], $content)); if (!$featured) { $tip->set('featured-image', '/web/updates/images/' . $assetPath . '/' . pathinfo($url)['basename']); } if (!file_exists($dest)) { set_time_limit(0); $fp = fopen($dest, 'w+'); //This is the file where we save the information $ch = curl_init(str_replace(" ", "%20", $abs)); //Here is the file we are downloading, replace spaces with %20 curl_setopt($ch, CURLOPT_TIMEOUT, 50); curl_setopt($ch, CURLOPT_FILE, $fp); // write curl response to file curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); curl_exec($ch); // get curl response curl_close($ch); fclose($fp); // set proper chmod chmod($dest, 0777); } } }
public function extract(\HTTP_Request2_Response $res) { $url = $res->getEffectiveUrl(); $base = new \Net_URL2($url); $sx = simplexml_load_string($res->getBody()); $linkInfos = array(); $alreadySeen = array(); foreach ($sx->entry as $entry) { $linkTitle = (string) $entry->title; foreach ($entry->link as $xlink) { $linkUrl = (string) $base->resolve((string) $xlink['href']); if (isset($alreadySeen[$linkUrl])) { continue; } if ($xlink['rel'] == 'alternate') { $linkInfos[] = new LinkInfo($linkUrl, $linkTitle, $url); } $alreadySeen[$linkUrl] = true; } } return $linkInfos; }
/** * Method which builds a URL by taking a relative URL and its corresponding * absolute URL * @param string $relativeUrl the relative URL * @param string $absoluteUrl the absolute URL with at least scheme and host * @return string the resulting absolute URL */ protected function relativeToAbsoluteUrl($relativeUrl, $absoluteUrl) { $base = new \Net_URL2($absoluteUrl); return $base->resolve($relativeUrl); }
public static function resolveUrl($relative_url, $base_url) { $base_url = new Net_URL2($base_url); return $base_url->resolve($relative_url)->getURL(); }
/** * Resolve a possibly relative URL against some absolute base URL * @param string $rel relative or absolute URL * @param string $base absolute URL * @return string absolute URL, or original URL if could not be resolved. */ function resolveURI($rel, $base) { require_once "Net/URL2.php"; try { $relUrl = new Net_URL2($rel); if ($relUrl->isAbsolute()) { return $rel; } $baseUrl = new Net_URL2($base); $absUrl = $baseUrl->resolve($relUrl); return $absUrl->getURL(); } catch (Exception $e) { common_log(LOG_WARNING, 'Unable to resolve relative link "' . $rel . '" against base "' . $base . '": ' . $e->getMessage()); return $rel; } }
Port.......: <?php $url->port; ?> File/path..: <?php $url->path; ?> Querystring: <?php print_r($url->querystring); ?> Anchor.....: <?php echo $url->anchor; ?> Full URL...: <?php echo $url->getUrl(); ?> Resolve path (.././/foo/bar/joe/./././../jabba): <b><?php echo $url->resolve('.././/foo/bar/joe/./././../jabba'); ?> </b> </pre> </body> </html>
/** * Given a user's WikiHow profile URL, find their avatar. * * @param string $profileUrl user page on the wiki * * @return array of data; possible members: * 'avatar' => full URL to avatar image * * @throws Exception on various low-level failures * * @todo pull location, web site, and about sections -- they aren't currently marked up cleanly. */ private function fetchProfile($profileUrl) { $client = HTTPClient::start(); $response = $client->get($profileUrl); if (!$response->isOk()) { throw new Exception("WikiHow profile page fetch failed."); // HTTP error response already logged. return false; } // Suppress warnings during HTML parsing; non-well-formed bits will // spew horrible warning everywhere even though it works fine. $old = error_reporting(); error_reporting($old & ~E_WARNING); $dom = new DOMDocument(); $ok = $dom->loadHTML($response->getBody()); error_reporting($old); if (!$ok) { throw new Exception("HTML parse failure during check for WikiHow avatar."); return false; } $data = array(); $avatar = $dom->getElementById('avatarULimg'); if ($avatar) { $src = $avatar->getAttribute('src'); $base = new Net_URL2($profileUrl); $absolute = $base->resolve($src); $avatarUrl = strval($absolute); common_log(LOG_DEBUG, "WikiHow avatar found for {$profileUrl} - {$avatarUrl}"); $data['avatar'] = $avatarUrl; } return $data; }
/** * Guess if I can find the most significant image in a page. */ function find_image_in_gallery_page($url, &$title = "") { $page_source = file_get_contents($url); $base_url = new Net_URL2($url); $doc = new DOMDocument(); @$doc->loadHTML($page_source); $tags = $doc->getElementsByTagName('title'); foreach ($tags as $tag) { $title = trim($tag->textContent); } $imgs = $doc->getElementsByTagName('img'); $favorite_image = NULL; $contents = array(); foreach ($imgs as $img) { // A bunch of heuristics here. $parentNode = $img->parentNode; $parentNodeName = $parentNode->nodeName; $img_url = $img->getAttribute('src'); $img_url_parts = parse_url($img_url); $img_class = $img->getAttribute('class'); // http://g.e-hentai.org/ if ($img->getAttribute('id') == 'img') { $favorite_image = $img; } elseif ($img->getAttribute('id') == 'image') { $favorite_image = $img; } elseif ($img->getAttribute('id') == 'cursor_lupa') { $favorite_image = $img; } elseif ($img_class == 'pic' && preg_match('/imagetwist.com/', $img_url_parts['host'])) { $favorite_image = $img; } elseif (intval($img->getAttribute('height')) >= 350 && intval($img->getAttribute('width')) >= 350) { $favorite_image = $img; } elseif ($parentNodeName == 'a' && $parentNode->getAttribute('href') == $img->getAttribute('src')) { $favorite_image = $img; } // Other patterns for scraping? } if ($favorite_image) { return $base_url->resolve($favorite_image->getAttribute('src')); } }
/** * This is a regression test to test that resolve() does * merge the path if the base path is empty as the opposite * was reported as Bug #19176 on 2011-12-31 02:07 UTC * * @return void */ public function test19176() { $foo = new Net_URL2('http://www.example.com'); $test = $foo->resolve('test.html')->getURL(); $this->assertEquals('http://www.example.com/test.html', $test); }
/** * Test the resolve() function. */ public function testResolve() { // Examples from RFC 3986, section 5.4. // relative URL => absolute URL $tests = array("" => "http://a/b/c/d;p?q", "g:h" => "g:h", "g" => "http://a/b/c/g", "./g" => "http://a/b/c/g", "g/" => "http://a/b/c/g/", "/g" => "http://a/g", "//g" => "http://g", "?y" => "http://a/b/c/d;p?y", "g?y" => "http://a/b/c/g?y", "#s" => "http://a/b/c/d;p?q#s", "g#s" => "http://a/b/c/g#s", "g?y#s" => "http://a/b/c/g?y#s", ";x" => "http://a/b/c/;x", "g;x" => "http://a/b/c/g;x", "g;x?y#s" => "http://a/b/c/g;x?y#s", "" => "http://a/b/c/d;p?q", "." => "http://a/b/c/", "./" => "http://a/b/c/", ".." => "http://a/b/", "../" => "http://a/b/", "../g" => "http://a/b/g", "../.." => "http://a/", "../../" => "http://a/", "../../g" => "http://a/g", "../../../g" => "http://a/g", "../../../../g" => "http://a/g", "/./g" => "http://a/g", "/../g" => "http://a/g", "g." => "http://a/b/c/g.", ".g" => "http://a/b/c/.g", "g.." => "http://a/b/c/g..", "..g" => "http://a/b/c/..g", "./../g" => "http://a/b/g", "./g/." => "http://a/b/c/g/", "g/./h" => "http://a/b/c/g/h", "g/../h" => "http://a/b/c/h", "g;x=1/./y" => "http://a/b/c/g;x=1/y", "g;x=1/../y" => "http://a/b/c/y", "g?y/./x" => "http://a/b/c/g?y/./x", "g?y/../x" => "http://a/b/c/g?y/../x", "g#s/./x" => "http://a/b/c/g#s/./x", "g#s/../x" => "http://a/b/c/g#s/../x", "http:g" => "http:g"); $baseURL = 'http://a/b/c/d;p?q'; $base = new Net_URL2($baseURL); foreach ($tests as $relativeURL => $absoluteURL) { $this->assertEquals($absoluteURL, $base->resolve($relativeURL)->getURL()); } $base = new Net_URL2($baseURL, array(Net_URL2::OPTION_STRICT => false)); $relativeURL = 'http:g'; $this->assertEquals('http://a/b/c/g', $base->resolve($relativeURL)->getURL()); }
private function reportFoundUrl($base_url, $relative) { // skip in case the src/href attribute is missing if (!$relative) { return; } $resolver = new \Net_URL2($base_url); $absolute_url = (string) $resolver->resolve($relative); $this->queueUrl($absolute_url); }
} $elapsed = microtime() - $start; echo "url_to_absolute: successes -> {$successes}, failures => {$failures}, elapsed time: {$elapsed}\n"; # phpuri $start = microtime(); list($successes, $failures) = array(0, 0); foreach ($tests as $test) { $base = phpUri::parse($test['base']); if (($r = $base->join($test['rel'])) == $test['result']) { $successes++; } else { $failures++; echo "phpuri failure: {$r} instead of " . $test['result'] . " \n"; } } $elapsed = microtime() - $start; echo "phpuri: successes -> {$successes}, failures => {$failures}, elapsed time: {$elapsed}\n"; # net_url2 $start = microtime(); list($successes, $failures) = array(0, 0); foreach ($tests as $test) { $base = new Net_URL2($test['base']); if (($r = $base->resolve($test['rel'])) == $test['result']) { $successes++; } else { $failures++; echo "net_url2 failure: {$r} instead of " . $test['result'] . " \n"; } } $elapsed = microtime() - $start; echo "net_url2: successes -> {$successes}, failures => {$failures}, elapsed time: {$elapsed}\n";
/** * This is a regression test to ensure that fragment-only references can be * resolved to a non-absolute Base-URI. * * It was reported as Bug #20158 2013-12-28 14:49 UTC that fragment-only * references would not be resolved to non-absolute base URI * * @covers Net_URL2::resolve * @covers Net_URL2::_isFragmentOnly * @return void */ public function test20158() { $base = new Net_URL2('myfile.html'); $resolved = $base->resolve('#world'); $this->assertSame('myfile.html#world', (string) $resolved); }