function fetchUrl($url, $userdata = array()) { $content = ''; ## fix the Editor replacing & with & $url = str_ireplace('&', '&', $url); # logEvent("Fetching $url"); if (count($userdata)) { foreach ($userdata as $key => $val) { if ($key != 'password') { $url = utf8_encode(str_ireplace("[{$key}]", urlencode($val), utf8_decode($url))); } } } if (!isset($GLOBALS['urlcache'])) { $GLOBALS['urlcache'] = array(); } $url = expandUrl($url); # print "<h1>Fetching ".$url."</h1>"; # keep in memory cache in case we send a page to many emails if (isset($GLOBALS['urlcache'][$url]) && is_array($GLOBALS['urlcache'][$url]) && time() - $GLOBALS['urlcache'][$url]['fetched'] < REMOTE_URL_REFETCH_TIMEOUT) { # logEvent($url . " is cached in memory"); if (VERBOSE && function_exists('output')) { output('From memory cache: ' . $url); } return $GLOBALS['urlcache'][$url]['content']; } $dbcache_lastmodified = getPageCacheLastModified($url); $timeout = time() - $dbcache_lastmodified; if ($timeout < REMOTE_URL_REFETCH_TIMEOUT) { # logEvent($url.' was cached in database'); if (VERBOSE && function_exists('output')) { output('From database cache: ' . $url); } return getPageCache($url); } else { # logEvent($url.' is not cached in database '.$timeout.' '. $dbcache_lastmodified." ".time()); } $request_parameters = array('timeout' => 600, 'allowRedirects' => 1, 'method' => 'HEAD'); $remote_charset = 'UTF-8'; ## relying on the last modified header doesn't work for many pages ## use current time instead ## see http://mantis.phplist.com/view.php?id=7684 # $lastmodified = strtotime($header["last-modified"]); $lastmodified = time(); $cache = getPageCache($url, $lastmodified); if (!$cache) { if (function_exists('curl_init')) { $content = fetchUrlCurl($url, $request_parameters); } elseif (0 && $GLOBALS['has_pear_http_request'] == 2) { ## @#TODO, make it work with Request2 @(require_once 'HTTP/Request2.php'); } elseif ($GLOBALS['has_pear_http_request']) { @(require_once 'HTTP/Request.php'); $content = fetchUrlPear($url, $request_parameters); } else { return false; } } else { if (VERBOSE) { logEvent($url . ' was cached in database'); } $content = $cache; } if (!empty($content)) { $content = addAbsoluteResources($content, $url); logEvent('Fetching ' . $url . ' success'); setPageCache($url, $lastmodified, $content); $GLOBALS['urlcache'][$url] = array('fetched' => time(), 'content' => $content); } return $content; }
function fetchUrl($url, $userdata = array()) { require_once "HTTP/Request.php"; # logEvent("Fetching $url"); if (sizeof($userdata)) { foreach ($userdata as $key => $val) { $url = eregi_replace("\\[{$key}\\]", urlencode($val), $url); } } if (!isset($GLOBALS['urlcache'])) { $GLOBALS['urlcache'] = array(); } # keep in memory cache in case we send a page to many emails if (isset($GLOBALS['urlcache'][$url]) && is_array($GLOBALS['urlcache'][$url]) && time() - $GLOBALS['urlcache'][$url]['fetched'] < REMOTE_URL_REFETCH_TIMEOUT) { # logEvent($url . " is cached in memory"); return $GLOBALS['urlcache'][$url]['content']; } $dbcache_lastmodified = getPageCacheLastModified($url); $timeout = time() - $dbcache_lastmodified; if ($timeout < REMOTE_URL_REFETCH_TIMEOUT) { # logEvent($url.' was cached in database'); return getPageCache($url); } else { # logEvent($url.' is not cached in database '.$timeout.' '. $dbcache_lastmodified." ".time()); } # add a small timeout, although the biggest timeout will exist in doing the DNS lookup, # so it won't make too much of a difference $request_parameters = array('timeout' => 10, 'allowRedirects' => 1, 'method' => 'HEAD'); $headreq =& new HTTP_Request($url, $request_parameters); $headreq->addHeader('User-Agent', 'phplist v' . VERSION . ' (http://www.phplist.com)'); if (!PEAR::isError($headreq->sendRequest(false))) { $code = $headreq->getResponseCode(); if ($code != 200) { logEvent('Fetching ' . $url . ' failed, error code ' . $code); return 0; } $header = $headreq->getResponseHeader(); ## relying on the last modified header doesn't work for many pages ## use current time instead ## see http://mantis.phplist.com/view.php?id=7684 # $lastmodified = strtotime($header["last-modified"]); $lastmodified = time(); $cache = getPageCache($url, $lastmodified); if (!$cache) { $request_parameters['method'] = 'GET'; $req =& new HTTP_Request($url, $request_parameters); $req->addHeader('User-Agent', 'phplist v' . VERSION . ' (http://www.phplist.com)'); logEvent('Fetching ' . $url); if (!PEAR::isError($req->sendRequest(true))) { $content = $req->getResponseBody(); $content = addAbsoluteResources($content, $url); logEvent('Fetching ' . $url . ' success'); setPageCache($url, $lastmodified, $content); } else { logEvent('Fetching ' . $url . ' failed'); return 0; } } else { logEvent($url . ' was cached in database'); $content = $cache; } } else { logEvent('Fetching ' . $url . ' failed'); return 0; } $GLOBALS['urlcache'][$url] = array('fetched' => time(), 'content' => $content); return $content; }
/** * Retrieve a URL and send the contents as an HTML email * * @param string $sTo * @param string $sFrom * @param string $sSubject * @param string $sUrl * @return boolean Succes / failure */ function mailURL($sTo, $sFrom, $sSubject, $sUrl) { if ($sHtmlBody = file_get_contents($sUrl)) { $sHtmlBody = addAbsoluteResources($sHtmlBody, sprintf('http://%s/', $_SERVER['HTTP_HOST'])); $sTextBody = HTML2Text($sHtmlBody); return htmlEmail($sTo, $sFrom, $sSubject, $sTextBody, '', $sFrom, $sHtmlBody); } else { Error("URL {$sUrl} could not be opened"); return false; } }