예제 #1
0
function devtips_extract(DevTip $tip)
{
    global $updates_dir;
    $assetPath = getFileName($tip->get('date'), $tip->get('title'));
    $assetPath = str_replace(".markdown", "", $assetPath);
    # create new asset directory based on new filename
    if (!file_exists($updates_dir . 'images/' . $assetPath)) {
        mkdir($updates_dir . 'images/' . $assetPath);
        chmod($updates_dir . 'images/' . $assetPath, 0777);
    }
    # Download and store each asset
    $assets = $tip->get('assets');
    $featured = null;
    foreach ($assets as $key => $url) {
        if (strpos($url, "/sponsor/") !== false) {
            continue;
        }
        $base = new Net_URL2('https://umaar.com/dev-tips/');
        $abs = $base->resolve($url);
        $dest = $updates_dir . 'images/' . $assetPath . '/' . pathinfo($url)['basename'];
        $content = $tip->get('content');
        $tip->set('content', str_replace($url, '/web/updates/images/' . $assetPath . '/' . pathinfo($url)['basename'], $content));
        if (!$featured) {
            $tip->set('featured-image', '/web/updates/images/' . $assetPath . '/' . pathinfo($url)['basename']);
        }
        if (!file_exists($dest)) {
            set_time_limit(0);
            $fp = fopen($dest, 'w+');
            //This is the file where we save the information
            $ch = curl_init(str_replace(" ", "%20", $abs));
            //Here is the file we are downloading, replace spaces with %20
            curl_setopt($ch, CURLOPT_TIMEOUT, 50);
            curl_setopt($ch, CURLOPT_FILE, $fp);
            // write curl response to file
            curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
            curl_exec($ch);
            // get curl response
            curl_close($ch);
            fclose($fp);
            // set proper chmod
            chmod($dest, 0777);
        }
    }
}
예제 #2
0
파일: Atom.php 프로젝트: cweiske/phinde
 public function extract(\HTTP_Request2_Response $res)
 {
     $url = $res->getEffectiveUrl();
     $base = new \Net_URL2($url);
     $sx = simplexml_load_string($res->getBody());
     $linkInfos = array();
     $alreadySeen = array();
     foreach ($sx->entry as $entry) {
         $linkTitle = (string) $entry->title;
         foreach ($entry->link as $xlink) {
             $linkUrl = (string) $base->resolve((string) $xlink['href']);
             if (isset($alreadySeen[$linkUrl])) {
                 continue;
             }
             if ($xlink['rel'] == 'alternate') {
                 $linkInfos[] = new LinkInfo($linkUrl, $linkTitle, $url);
             }
             $alreadySeen[$linkUrl] = true;
         }
     }
     return $linkInfos;
 }
예제 #3
0
 /**
  * Method which builds a URL by taking a relative URL and its corresponding
  * absolute URL
  * @param string $relativeUrl the relative URL
  * @param string $absoluteUrl the absolute URL with at least scheme and host
  * @return string the resulting absolute URL
  */
 protected function relativeToAbsoluteUrl($relativeUrl, $absoluteUrl)
 {
     $base = new \Net_URL2($absoluteUrl);
     return $base->resolve($relativeUrl);
 }
 public static function resolveUrl($relative_url, $base_url)
 {
     $base_url = new Net_URL2($base_url);
     return $base_url->resolve($relative_url)->getURL();
 }
예제 #5
0
 /**
  * Resolve a possibly relative URL against some absolute base URL
  * @param string $rel relative or absolute URL
  * @param string $base absolute URL
  * @return string absolute URL, or original URL if could not be resolved.
  */
 function resolveURI($rel, $base)
 {
     require_once "Net/URL2.php";
     try {
         $relUrl = new Net_URL2($rel);
         if ($relUrl->isAbsolute()) {
             return $rel;
         }
         $baseUrl = new Net_URL2($base);
         $absUrl = $baseUrl->resolve($relUrl);
         return $absUrl->getURL();
     } catch (Exception $e) {
         common_log(LOG_WARNING, 'Unable to resolve relative link "' . $rel . '" against base "' . $base . '": ' . $e->getMessage());
         return $rel;
     }
 }
예제 #6
0
Port.......: <?php 
$url->port;
?>

File/path..: <?php 
$url->path;
?>

Querystring: <?php 
print_r($url->querystring);
?>

Anchor.....: <?php 
echo $url->anchor;
?>

Full URL...: <?php 
echo $url->getUrl();
?>


Resolve path (.././/foo/bar/joe/./././../jabba): <b><?php 
echo $url->resolve('.././/foo/bar/joe/./././../jabba');
?>
</b>
</pre>

</body>
</html>
 /**
  * Given a user's WikiHow profile URL, find their avatar.
  *
  * @param string $profileUrl user page on the wiki
  *
  * @return array of data; possible members:
  *               'avatar' => full URL to avatar image
  *
  * @throws Exception on various low-level failures
  *
  * @todo pull location, web site, and about sections -- they aren't currently marked up cleanly.
  */
 private function fetchProfile($profileUrl)
 {
     $client = HTTPClient::start();
     $response = $client->get($profileUrl);
     if (!$response->isOk()) {
         throw new Exception("WikiHow profile page fetch failed.");
         // HTTP error response already logged.
         return false;
     }
     // Suppress warnings during HTML parsing; non-well-formed bits will
     // spew horrible warning everywhere even though it works fine.
     $old = error_reporting();
     error_reporting($old & ~E_WARNING);
     $dom = new DOMDocument();
     $ok = $dom->loadHTML($response->getBody());
     error_reporting($old);
     if (!$ok) {
         throw new Exception("HTML parse failure during check for WikiHow avatar.");
         return false;
     }
     $data = array();
     $avatar = $dom->getElementById('avatarULimg');
     if ($avatar) {
         $src = $avatar->getAttribute('src');
         $base = new Net_URL2($profileUrl);
         $absolute = $base->resolve($src);
         $avatarUrl = strval($absolute);
         common_log(LOG_DEBUG, "WikiHow avatar found for {$profileUrl} - {$avatarUrl}");
         $data['avatar'] = $avatarUrl;
     }
     return $data;
 }
예제 #8
0
/**
 * Guess if I can find the most significant image in a page.
 */
function find_image_in_gallery_page($url, &$title = "")
{
    $page_source = file_get_contents($url);
    $base_url = new Net_URL2($url);
    $doc = new DOMDocument();
    @$doc->loadHTML($page_source);
    $tags = $doc->getElementsByTagName('title');
    foreach ($tags as $tag) {
        $title = trim($tag->textContent);
    }
    $imgs = $doc->getElementsByTagName('img');
    $favorite_image = NULL;
    $contents = array();
    foreach ($imgs as $img) {
        // A bunch of heuristics here.
        $parentNode = $img->parentNode;
        $parentNodeName = $parentNode->nodeName;
        $img_url = $img->getAttribute('src');
        $img_url_parts = parse_url($img_url);
        $img_class = $img->getAttribute('class');
        // http://g.e-hentai.org/
        if ($img->getAttribute('id') == 'img') {
            $favorite_image = $img;
        } elseif ($img->getAttribute('id') == 'image') {
            $favorite_image = $img;
        } elseif ($img->getAttribute('id') == 'cursor_lupa') {
            $favorite_image = $img;
        } elseif ($img_class == 'pic' && preg_match('/imagetwist.com/', $img_url_parts['host'])) {
            $favorite_image = $img;
        } elseif (intval($img->getAttribute('height')) >= 350 && intval($img->getAttribute('width')) >= 350) {
            $favorite_image = $img;
        } elseif ($parentNodeName == 'a' && $parentNode->getAttribute('href') == $img->getAttribute('src')) {
            $favorite_image = $img;
        }
        // Other patterns for scraping?
    }
    if ($favorite_image) {
        return $base_url->resolve($favorite_image->getAttribute('src'));
    }
}
예제 #9
0
 /**
  * This is a regression test to test that resolve() does
  * merge the path if the base path is empty as the opposite
  * was reported as Bug #19176 on 2011-12-31 02:07 UTC
  *
  * @return void
  */
 public function test19176()
 {
     $foo = new Net_URL2('http://www.example.com');
     $test = $foo->resolve('test.html')->getURL();
     $this->assertEquals('http://www.example.com/test.html', $test);
 }
예제 #10
0
 /**
  * Test the resolve() function.
  */
 public function testResolve()
 {
     // Examples from RFC 3986, section 5.4.
     // relative URL => absolute URL
     $tests = array("" => "http://a/b/c/d;p?q", "g:h" => "g:h", "g" => "http://a/b/c/g", "./g" => "http://a/b/c/g", "g/" => "http://a/b/c/g/", "/g" => "http://a/g", "//g" => "http://g", "?y" => "http://a/b/c/d;p?y", "g?y" => "http://a/b/c/g?y", "#s" => "http://a/b/c/d;p?q#s", "g#s" => "http://a/b/c/g#s", "g?y#s" => "http://a/b/c/g?y#s", ";x" => "http://a/b/c/;x", "g;x" => "http://a/b/c/g;x", "g;x?y#s" => "http://a/b/c/g;x?y#s", "" => "http://a/b/c/d;p?q", "." => "http://a/b/c/", "./" => "http://a/b/c/", ".." => "http://a/b/", "../" => "http://a/b/", "../g" => "http://a/b/g", "../.." => "http://a/", "../../" => "http://a/", "../../g" => "http://a/g", "../../../g" => "http://a/g", "../../../../g" => "http://a/g", "/./g" => "http://a/g", "/../g" => "http://a/g", "g." => "http://a/b/c/g.", ".g" => "http://a/b/c/.g", "g.." => "http://a/b/c/g..", "..g" => "http://a/b/c/..g", "./../g" => "http://a/b/g", "./g/." => "http://a/b/c/g/", "g/./h" => "http://a/b/c/g/h", "g/../h" => "http://a/b/c/h", "g;x=1/./y" => "http://a/b/c/g;x=1/y", "g;x=1/../y" => "http://a/b/c/y", "g?y/./x" => "http://a/b/c/g?y/./x", "g?y/../x" => "http://a/b/c/g?y/../x", "g#s/./x" => "http://a/b/c/g#s/./x", "g#s/../x" => "http://a/b/c/g#s/../x", "http:g" => "http:g");
     $baseURL = 'http://a/b/c/d;p?q';
     $base = new Net_URL2($baseURL);
     foreach ($tests as $relativeURL => $absoluteURL) {
         $this->assertEquals($absoluteURL, $base->resolve($relativeURL)->getURL());
     }
     $base = new Net_URL2($baseURL, array(Net_URL2::OPTION_STRICT => false));
     $relativeURL = 'http:g';
     $this->assertEquals('http://a/b/c/g', $base->resolve($relativeURL)->getURL());
 }
 private function reportFoundUrl($base_url, $relative)
 {
     // skip in case the src/href attribute is missing
     if (!$relative) {
         return;
     }
     $resolver = new \Net_URL2($base_url);
     $absolute_url = (string) $resolver->resolve($relative);
     $this->queueUrl($absolute_url);
 }
예제 #12
0
파일: test.php 프로젝트: qdinar/phpuri
}
$elapsed = microtime() - $start;
echo "url_to_absolute: successes -> {$successes}, failures => {$failures}, elapsed time: {$elapsed}\n";
# phpuri
$start = microtime();
list($successes, $failures) = array(0, 0);
foreach ($tests as $test) {
    $base = phpUri::parse($test['base']);
    if (($r = $base->join($test['rel'])) == $test['result']) {
        $successes++;
    } else {
        $failures++;
        echo "phpuri failure: {$r} instead of " . $test['result'] . " \n";
    }
}
$elapsed = microtime() - $start;
echo "phpuri:          successes -> {$successes}, failures => {$failures}, elapsed time: {$elapsed}\n";
# net_url2
$start = microtime();
list($successes, $failures) = array(0, 0);
foreach ($tests as $test) {
    $base = new Net_URL2($test['base']);
    if (($r = $base->resolve($test['rel'])) == $test['result']) {
        $successes++;
    } else {
        $failures++;
        echo "net_url2 failure: {$r} instead of " . $test['result'] . " \n";
    }
}
$elapsed = microtime() - $start;
echo "net_url2:          successes -> {$successes}, failures => {$failures}, elapsed time: {$elapsed}\n";
예제 #13
0
 /**
  * This is a regression test to ensure that fragment-only references can be
  * resolved to a non-absolute Base-URI.
  *
  * It was reported as Bug #20158 2013-12-28 14:49 UTC that fragment-only
  * references would not be resolved to non-absolute base URI
  *
  * @covers Net_URL2::resolve
  * @covers Net_URL2::_isFragmentOnly
  * @return void
  */
 public function test20158()
 {
     $base = new Net_URL2('myfile.html');
     $resolved = $base->resolve('#world');
     $this->assertSame('myfile.html#world', (string) $resolved);
 }