Beispiel #1
0
function fetchUrl($url, $userdata = array())
{
    require_once "HTTP/Request.php";
    # logEvent("Fetching $url");
    if (sizeof($userdata)) {
        foreach ($userdata as $key => $val) {
            $url = eregi_replace("\\[{$key}\\]", urlencode($val), $url);
        }
    }
    if (!isset($GLOBALS['urlcache'])) {
        $GLOBALS['urlcache'] = array();
    }
    # keep in memory cache in case we send a page to many emails
    if (isset($GLOBALS['urlcache'][$url]) && is_array($GLOBALS['urlcache'][$url]) && time() - $GLOBALS['urlcache'][$url]['fetched'] < REMOTE_URL_REFETCH_TIMEOUT) {
        #     logEvent($url . " is cached in memory");
        return $GLOBALS['urlcache'][$url]['content'];
    }
    $dbcache_lastmodified = getPageCacheLastModified($url);
    $timeout = time() - $dbcache_lastmodified;
    if ($timeout < REMOTE_URL_REFETCH_TIMEOUT) {
        #    logEvent($url.' was cached in database');
        return getPageCache($url);
    } else {
        #    logEvent($url.' is not cached in database '.$timeout.' '. $dbcache_lastmodified." ".time());
    }
    # add a small timeout, although the biggest timeout will exist in doing the DNS lookup,
    # so it won't make too much of a difference
    $request_parameters = array('timeout' => 10, 'allowRedirects' => 1, 'method' => 'HEAD');
    $headreq =& new HTTP_Request($url, $request_parameters);
    $headreq->addHeader('User-Agent', 'phplist v' . VERSION . ' (http://www.phplist.com)');
    if (!PEAR::isError($headreq->sendRequest(false))) {
        $code = $headreq->getResponseCode();
        if ($code != 200) {
            logEvent('Fetching ' . $url . ' failed, error code ' . $code);
            return 0;
        }
        $header = $headreq->getResponseHeader();
        ## relying on the last modified header doesn't work for many pages
        ## use current time instead
        ## see http://mantis.phplist.com/view.php?id=7684
        #    $lastmodified = strtotime($header["last-modified"]);
        $lastmodified = time();
        $cache = getPageCache($url, $lastmodified);
        if (!$cache) {
            $request_parameters['method'] = 'GET';
            $req =& new HTTP_Request($url, $request_parameters);
            $req->addHeader('User-Agent', 'phplist v' . VERSION . ' (http://www.phplist.com)');
            logEvent('Fetching ' . $url);
            if (!PEAR::isError($req->sendRequest(true))) {
                $content = $req->getResponseBody();
                $content = addAbsoluteResources($content, $url);
                logEvent('Fetching ' . $url . ' success');
                setPageCache($url, $lastmodified, $content);
            } else {
                logEvent('Fetching ' . $url . ' failed');
                return 0;
            }
        } else {
            logEvent($url . ' was cached in database');
            $content = $cache;
        }
    } else {
        logEvent('Fetching ' . $url . ' failed');
        return 0;
    }
    $GLOBALS['urlcache'][$url] = array('fetched' => time(), 'content' => $content);
    return $content;
}
Beispiel #2
0
function fetchUrl($url, $userdata = array())
{
    $content = '';
    ## fix the Editor replacing & with &amp;
    $url = str_ireplace('&amp;', '&', $url);
    # logEvent("Fetching $url");
    if (count($userdata)) {
        foreach ($userdata as $key => $val) {
            if ($key != 'password') {
                $url = utf8_encode(str_ireplace("[{$key}]", urlencode($val), utf8_decode($url)));
            }
        }
    }
    if (!isset($GLOBALS['urlcache'])) {
        $GLOBALS['urlcache'] = array();
    }
    $url = expandUrl($url);
    #  print "<h1>Fetching ".$url."</h1>";
    # keep in memory cache in case we send a page to many emails
    if (isset($GLOBALS['urlcache'][$url]) && is_array($GLOBALS['urlcache'][$url]) && time() - $GLOBALS['urlcache'][$url]['fetched'] < REMOTE_URL_REFETCH_TIMEOUT) {
        #     logEvent($url . " is cached in memory");
        if (VERBOSE && function_exists('output')) {
            output('From memory cache: ' . $url);
        }
        return $GLOBALS['urlcache'][$url]['content'];
    }
    $dbcache_lastmodified = getPageCacheLastModified($url);
    $timeout = time() - $dbcache_lastmodified;
    if ($timeout < REMOTE_URL_REFETCH_TIMEOUT) {
        #    logEvent($url.' was cached in database');
        if (VERBOSE && function_exists('output')) {
            output('From database cache: ' . $url);
        }
        return getPageCache($url);
    } else {
        #    logEvent($url.' is not cached in database '.$timeout.' '. $dbcache_lastmodified." ".time());
    }
    $request_parameters = array('timeout' => 600, 'allowRedirects' => 1, 'method' => 'HEAD');
    $remote_charset = 'UTF-8';
    ## relying on the last modified header doesn't work for many pages
    ## use current time instead
    ## see http://mantis.phplist.com/view.php?id=7684
    #    $lastmodified = strtotime($header["last-modified"]);
    $lastmodified = time();
    $cache = getPageCache($url, $lastmodified);
    if (!$cache) {
        if (function_exists('curl_init')) {
            $content = fetchUrlCurl($url, $request_parameters);
        } elseif (0 && $GLOBALS['has_pear_http_request'] == 2) {
            ## @#TODO, make it work with Request2
            @(require_once 'HTTP/Request2.php');
        } elseif ($GLOBALS['has_pear_http_request']) {
            @(require_once 'HTTP/Request.php');
            $content = fetchUrlPear($url, $request_parameters);
        } else {
            return false;
        }
    } else {
        if (VERBOSE) {
            logEvent($url . ' was cached in database');
        }
        $content = $cache;
    }
    if (!empty($content)) {
        $content = addAbsoluteResources($content, $url);
        logEvent('Fetching ' . $url . ' success');
        setPageCache($url, $lastmodified, $content);
        $GLOBALS['urlcache'][$url] = array('fetched' => time(), 'content' => $content);
    }
    return $content;
}