function find($uri = NULL) { $ret = array(); if (!is_null($this->data($uri))) { if ($this->is_feed($uri)) { $href = array($this->uri); } else { // Assume that we have HTML or XHTML (even if we don't, who's it gonna hurt?) // Autodiscovery is the preferred method $href = $this->_link_rel_feeds(); // ... but we'll also take the little orange buttons $href = array_merge($href, $this->_a_href_feeds(TRUE)); // If all that failed, look harder if (count($href) == 0) { $href = $this->_a_href_feeds(FALSE); } // Our search may turn up duplicate URIs. We only need to do any given URI once. // Props to Camilo <> $href = array_unique($href); } /* if */ // Try some clever URL little tricks before we go $href = array_merge($href, $this->_url_manipulation_feeds()); $href = array_unique($href); // Verify feeds and resolve relative URIs foreach ($href as $u) { $the_uri = SimplePie_Misc::absolutize_url($u, $this->uri); if ($this->verify and ($u != $this->uri and $the_uri != $this->uri)) { $feed = new FeedFinder($the_uri); if ($feed->is_feed()) { $ret[] = $the_uri; } unset($feed); } else { $ret[] = $the_uri; } } /* foreach */ } /* if */ return array_values($ret); }
/** * Tries to get a favicon from a page * @param string $url the url to the page * @return string the full url to the page */ protected function extractFromPage($url) { if (!$url) { return null; } $file = $this->getFile($url); if ($file->body !== '') { $document = new \DOMDocument(); @$document->loadHTML($file->body); if ($document) { $xpath = new \DOMXpath($document); $elements = $xpath->query("//link[contains(@rel, 'icon')]"); if ($elements->length > 0) { $iconPath = $elements->item(0)->getAttribute('href'); $absPath = \SimplePie_Misc::absolutize_url($iconPath, $url); return $absPath; } } } }
function result() { if ($this->data['file']->url != '') { parent::result(); } static $done = array(); $links = SimplePie_Misc::get_element('link', $this->data['file']->body); foreach ($links as $link) { if (!empty($link['attribs']['href']['data']) && !empty($link['attribs']['rel']['data'])) { $rel = array_unique(SimplePie_Misc::space_seperated_tokens(strtolower($link['attribs']['rel']['data']))); $href = SimplePie_Misc::absolutize_url(trim($link['attribs']['href']['data']), $this->data['file']->url); if (!in_array($href, $done) && in_array('next', $rel)) { $done[] = $this->data['url'] = $href; break; } } } if ($this->data['url']) { $this->run(); } }
function find($uri = NULL, $params = array()) { $params = wp_parse_args($params, array("authentication" => -1, "username" => NULL, "password" => NULL)); // Equivalents if ($params['authentication'] == '-') { $params['authentication'] = NULL; $params['username'] = NULL; $params['password'] = NULL; } // Set/reset if ($params['authentication'] != -1) { $this->credentials = array("authentication" => $params['authentication'], "username" => $params['username'], "password" => $params['password']); } $ret = array(); if (!is_null($this->data($uri))) { if ($this->is_opml($uri)) { $href = $this->_opml_rss_uris(); } else { if ($this->is_feed($uri)) { $href = array($this->uri); } else { // Assume that we have HTML or XHTML (even if we don't, who's // it gonna hurt?) Autodiscovery is the preferred method. $href = $this->_link_rel_feeds(); // ... but we'll also take the little orange buttons if ($this->fallbacks > 0) { $href = array_merge($href, $this->_a_href_feeds(TRUE)); } // If all that failed, look harder if ($this->fallbacks > 1) { if (count($href) == 0) { $href = $this->_a_href_feeds(FALSE); } } // Our search may turn up duplicate URIs. We only need to do // any given URI once. Props to Camilo <> $href = array_unique($href); } // Try some clever URL little tricks before we go if ($this->fallbacks > 2) { $href = array_merge($href, $this->_url_manipulation_feeds()); } } $href = array_unique($href); // Verify feeds and resolve relative URIs foreach ($href as $u) { $the_uri = SimplePie_Misc::absolutize_url($u, $this->uri); if ($this->verify and ($u != $this->uri and $the_uri != $this->uri)) { $feed = new FeedFinder($the_uri, $this->credentials); if ($feed->is_feed()) { $ret[] = $the_uri; } unset($feed); } else { $ret[] = $the_uri; } } } if ($this->is_401($uri)) { $ret = array_merge(array(new WP_Error('http_request_failed', '401 Not authorized', array("uri" => $this->uri, "status" => 401))), $ret); } return array_values($ret); }
public function get_links() { $links = SimplePie_Misc::get_element('a', $this->file->body); foreach ($links as $link) { if (isset($link['attribs']['href']['data'])) { $href = trim($link['attribs']['href']['data']); $parsed = SimplePie_Misc::parse_url($href); if ($parsed['scheme'] === '' || preg_match('/^(http(s)|feed)?$/i', $parsed['scheme'])) { if ($this->base_location < $link['offset']) { $href = SimplePie_Misc::absolutize_url(trim($link['attribs']['href']['data']), $this->base); } else { $href = SimplePie_Misc::absolutize_url(trim($link['attribs']['href']['data']), $this->http_base); } $current = SimplePie_Misc::parse_url($this->file->url); if ($parsed['authority'] === '' || $parsed['authority'] === $current['authority']) { $this->local[] = $href; } else { $this->elsewhere[] = $href; } } } } $this->local = array_unique($this->local); $this->elsewhere = array_unique($this->elsewhere); if (!empty($this->local) || !empty($this->elsewhere)) { return true; } return null; }
/** * Simple HTTP response parser * * @param string $headers Full response text including headers and body * @param string $url Original request URL * @param array $req_headers Original $headers array passed to {@link request()}, in case we need to follow redirects * @param array $req_data Original $data array passed to {@link request()}, in case we need to follow redirects * @param array $req_type Original $type constant passed to {@link request()}, in case we need to follow redirects * @return stdClass Contains "body" string, "headers" array, "status code" integer, "success" boolean, "redirects" integer as properties */ protected function parse_response($headers, $url, $req_headers, $req_data, $req_type) { $redirects = 10; $headers = explode("\r\n\r\n", $headers, 2); $return = new stdClass(); $return->body = array_pop($headers); $headers = $headers[0]; // Pretend CRLF = LF for compatibility (RFC 2616, section 19.3) $headers = str_replace("\r\n", "\n", $headers); // Unfold headers (replace [CRLF] 1*( SP | HT ) with SP) as per RFC 2616 (section 2.2) $headers = preg_replace('/\\n[ \\t]/', ' ', $headers); $headers = explode("\n", $headers); preg_match('#^HTTP/1\\.\\d[ \\t]+(\\d+)#i', array_shift($headers), $matches); if (empty($matches)) { throw new Exception(_r('Response could not be parsed')); } $return->status_code = (int) $matches[1]; $return->success = false; if ($return->status_code >= 200 && $return->status_code < 300) { $return->success = true; } $return->headers = array(); foreach ($headers as $header) { list($key, $value) = explode(':', $header, 2); $value = trim($value); preg_replace('#(\\s+)#i', ' ', $value); $key = strtolower($key); if (isset($return->headers[$key])) { if (!is_array($return->headers[$key])) { $return->headers[$key] = array($return->headers[$key]); } $return->headers[$key][] = $value; } else { $return->headers[$key] = $value; } } if (isset($return->headers['transfer-encoding'])) { $return->body = HTTPRequest::decode_chunked($return->body); } if (isset($return->headers['content-encoding'])) { switch ($return->headers['content-encoding']) { case 'gzip': if (function_exists('gzdecode')) { $return->body = gzdecode($return->body); } else { throw new Exception(_r('gzdecode is missing')); } break; case 'deflate': if (function_exists('gzinflate')) { $return->body = gzinflate($return->body); } else { throw new Exception(_r('gzinflate is missing')); } break; } } //fsockopen and cURL compatibility if (isset($return->headers['connection'])) { unset($return->headers['connection']); } if ((in_array($return->status_code, array(300, 301, 302, 303, 307)) || $return->status_code > 307 && $return->status_code < 400) && isset($return->headers['location']) && $this->redirects < $redirects) { $this->redirects++; $location = SimplePie_Misc::absolutize_url($return->headers['location'], $url); return $this->request($location, $req_headers, $req_data, $req_type); } $return->redirects = $this->redirects; return $return; }
/** * Find the feed's icon * * @param SimplePie $feed SimplePie object to retrieve logo for * @return string URL to feed icon */ protected static function discover_favicon($feed, $id) { if ($return = $feed->get_channel_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'icon')) { $favicon = SimplePie_Misc::absolutize_url($return[0]['data'], $feed->get_base($return[0])); } elseif (($url = $feed->get_link()) !== null && preg_match('/^http(s)?:\\/\\//i', $url)) { $filename = $id . '.ico'; $favicon = SimplePie_Misc::absolutize_url('/favicon.ico', $url); } else { return false; } $cache = new DataHandler(get_option('cachedir')); $request = new HTTPRequest(); $file = $request->get($favicon, array('X-Forwarded-For' => $_SERVER['REMOTE_ADDR'])); if ($file->success && strlen($file->body) > 0) { $sniffer = new $feed->content_type_sniffer_class($file); if (substr($sniffer->get_type(), 0, 6) === 'image/') { $body = array('type' => $sniffer->get_type(), 'body' => $file->body); return $cache->save($filename, serialize($body)); } else { return false; } } return false; }
function replace_urls($data, $tag, $attribute) { $elements = SimplePie_Misc::get_element($tag, $data); foreach ($elements as $element) { if (isset($element['attribs'][$attribute]['data'])) { $element['attribs'][$attribute]['data'] = SimplePie_Misc::absolutize_url($element['attribs'][$attribute]['data'], $this->base); $data = str_replace($element['full'], SimplePie_Misc::element_implode($element), $data); } } return $data; }
public function __construct($url, $timeout = 10, $redirects = 5, $headers = null, $useragent = null, $force_fsockopen = false) { if (class_exists('idna_convert')) { $idn = new idna_convert(); $parsed = SimplePie_Misc::parse_url($url); $url = SimplePie_Misc::compress_parse_url($parsed['scheme'], $idn->encode($parsed['authority']), $parsed['path'], $parsed['query'], $parsed['fragment']); } $this->url = $url; $this->useragent = $useragent; if (preg_match('/^http(s)?:\\/\\//i', $url)) { if ($useragent === null) { $useragent = ini_get('user_agent'); $this->useragent = $useragent; } if (!is_array($headers)) { $headers = array(); } if (!$force_fsockopen && function_exists('curl_exec')) { $this->method = SIMPLEPIE_FILE_SOURCE_REMOTE | SIMPLEPIE_FILE_SOURCE_CURL; $fp = curl_init(); $headers2 = array(); foreach ($headers as $key => $value) { $headers2[] = "{$key}: {$value}"; } if (version_compare(SimplePie_Misc::get_curl_version(), '7.10.5', '>=')) { curl_setopt($fp, CURLOPT_ENCODING, ''); } curl_setopt($fp, CURLOPT_URL, $url); curl_setopt($fp, CURLOPT_HEADER, 1); curl_setopt($fp, CURLOPT_RETURNTRANSFER, 1); curl_setopt($fp, CURLOPT_TIMEOUT, $timeout); curl_setopt($fp, CURLOPT_CONNECTTIMEOUT, $timeout); curl_setopt($fp, CURLOPT_REFERER, $url); curl_setopt($fp, CURLOPT_USERAGENT, $useragent); curl_setopt($fp, CURLOPT_HTTPHEADER, $headers2); if (!ini_get('open_basedir') && !ini_get('safe_mode') && version_compare(SimplePie_Misc::get_curl_version(), '7.15.2', '>=')) { curl_setopt($fp, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($fp, CURLOPT_MAXREDIRS, $redirects); } $this->headers = curl_exec($fp); if (curl_errno($fp) === 23 || curl_errno($fp) === 61) { curl_setopt($fp, CURLOPT_ENCODING, 'none'); $this->headers = curl_exec($fp); } if (curl_errno($fp)) { $this->error = 'cURL error ' . curl_errno($fp) . ': ' . curl_error($fp); $this->success = false; } else { $info = curl_getinfo($fp); curl_close($fp); $this->headers = explode("\r\n\r\n", $this->headers, $info['redirect_count'] + 1); $this->headers = array_pop($this->headers); $parser = new SimplePie_HTTP_Parser($this->headers); if ($parser->parse()) { $this->headers = $parser->headers; $this->body = $parser->body; $this->status_code = $parser->status_code; if ((in_array($this->status_code, array(300, 301, 302, 303, 307)) || $this->status_code > 307 && $this->status_code < 400) && isset($this->headers['location']) && $this->redirects < $redirects) { $this->redirects++; $location = SimplePie_Misc::absolutize_url($this->headers['location'], $url); return $this->__construct($location, $timeout, $redirects, $headers, $useragent, $force_fsockopen); } } } } else { $this->method = SIMPLEPIE_FILE_SOURCE_REMOTE | SIMPLEPIE_FILE_SOURCE_FSOCKOPEN; $url_parts = parse_url($url); $socket_host = $url_parts['host']; if (isset($url_parts['scheme']) && strtolower($url_parts['scheme']) === 'https') { $socket_host = "ssl://{$url_parts['host']}"; $url_parts['port'] = 443; } if (!isset($url_parts['port'])) { $url_parts['port'] = 80; } $fp = @fsockopen($socket_host, $url_parts['port'], $errno, $errstr, $timeout); if (!$fp) { $this->error = 'fsockopen error: ' . $errstr; $this->success = false; } else { stream_set_timeout($fp, $timeout); if (isset($url_parts['path'])) { if (isset($url_parts['query'])) { $get = "{$url_parts['path']}?{$url_parts['query']}"; } else { $get = $url_parts['path']; } } else { $get = '/'; } $out = "GET {$get} HTTP/1.1\r\n"; $out .= "Host: {$url_parts['host']}\r\n"; $out .= "User-Agent: {$useragent}\r\n"; if (extension_loaded('zlib')) { $out .= "Accept-Encoding: x-gzip,gzip,deflate\r\n"; } if (isset($url_parts['user']) && isset($url_parts['pass'])) { $out .= "Authorization: Basic " . base64_encode("{$url_parts['user']}:{$url_parts['pass']}") . "\r\n"; } foreach ($headers as $key => $value) { $out .= "{$key}: {$value}\r\n"; } $out .= "Connection: Close\r\n\r\n"; fwrite($fp, $out); $info = stream_get_meta_data($fp); $this->headers = ''; while (!$info['eof'] && !$info['timed_out']) { $this->headers .= fread($fp, 1160); $info = stream_get_meta_data($fp); } if (!$info['timed_out']) { $parser = new SimplePie_HTTP_Parser($this->headers); if ($parser->parse()) { $this->headers = $parser->headers; $this->body = $parser->body; $this->status_code = $parser->status_code; if ((in_array($this->status_code, array(300, 301, 302, 303, 307)) || $this->status_code > 307 && $this->status_code < 400) && isset($this->headers['location']) && $this->redirects < $redirects) { $this->redirects++; $location = SimplePie_Misc::absolutize_url($this->headers['location'], $url); return $this->__construct($location, $timeout, $redirects, $headers, $useragent, $force_fsockopen); } if (isset($this->headers['content-encoding'])) { // Hey, we act dumb elsewhere, so let's do that here too switch (strtolower(trim($this->headers['content-encoding'], "\t\n\r "))) { case 'gzip': case 'x-gzip': $decoder = new SimplePie_gzdecode($this->body); if (!$decoder->parse()) { $this->error = 'Unable to decode HTTP "gzip" stream'; $this->success = false; } else { $this->body = $decoder->data; } break; case 'deflate': if (($decompressed = gzinflate($this->body)) !== false) { $this->body = $decompressed; } else { if (($decompressed = gzuncompress($this->body)) !== false) { $this->body = $decompressed; } else { if (function_exists('gzdecode') && ($decompressed = gzdecode($this->body)) !== false) { $this->body = $decompressed; } else { $this->error = 'Unable to decode HTTP "deflate" stream'; $this->success = false; } } } break; default: $this->error = 'Unknown content coding'; $this->success = false; } } } } else { $this->error = 'fsocket timed out'; $this->success = false; } fclose($fp); } } } else { $this->method = SIMPLEPIE_FILE_SOURCE_LOCAL | SIMPLEPIE_FILE_SOURCE_FILE_GET_CONTENTS; if (!($this->body = file_get_contents($url))) { $this->error = 'file_get_contents could not read the file'; $this->success = false; } } }
function test() { $this->result = SimplePie_Misc::absolutize_url($this->data['relative'], $this->data['base']); }
public function fetchAllOnce(array $urls, $isRedirect = false) { if (!$isRedirect) { $urls = array_unique($urls); } if (empty($urls)) { return; } ////////////////////////////////////////////////////// // parallel (HttpRequestPool) if ($this->method == self::METHOD_REQUEST_POOL) { $this->debug('Starting parallel fetch (HttpRequestPool)'); try { while (count($urls) > 0) { $this->debug('Processing set of ' . min($this->maxParallelRequests, count($urls))); $subset = array_splice($urls, 0, $this->maxParallelRequests); $pool = new HttpRequestPool(); foreach ($subset as $orig => $url) { if (!$isRedirect) { $orig = $url; } unset($this->redirectQueue[$orig]); $this->debug("...{$url}"); if (!$isRedirect && isset($this->requests[$url])) { $this->debug(" memory"); /* } elseif ($this->isCached($url)) { $this->debug(" cached"); if (!$this->minimiseMemoryUse) { $this->requests[$url] = $this->getCached($url); } */ } else { $this->debug("......adding to pool"); $req_url = $this->rewriteHashbangFragment ? $this->rewriteHashbangFragment($url) : $url; $req_url = $this->removeFragment($req_url); $httpRequest = new HttpRequest($req_url, HttpRequest::METH_GET, $this->requestOptions); // send cookies, if we have any if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) { $this->debug("......sending cookies: {$cookies}"); $httpRequest->addHeaders(array('Cookie' => $cookies)); } $this->requests[$orig] = array('headers' => null, 'body' => null, 'httpRequest' => $httpRequest); $this->requests[$orig]['original_url'] = $orig; $pool->attach($httpRequest); } } // did we get anything into the pool? if (count($pool) > 0) { $this->debug('Sending request...'); try { $pool->send(); } catch (HttpRequestPoolException $e) { // do nothing } $this->debug('Received responses'); foreach ($subset as $orig => $url) { if (!$isRedirect) { $orig = $url; } //if (!isset($this->requests[$url]['fromCache'])) { $request = $this->requests[$orig]['httpRequest']; //$this->requests[$orig]['headers'] = $this->headersToString($request->getResponseHeader()); // getResponseHeader() doesn't return status line, so, for consistency... $this->requests[$orig]['headers'] = substr($request->getRawResponseMessage(), 0, $request->getResponseInfo('header_size')); $this->requests[$orig]['body'] = $request->getResponseBody(); $this->requests[$orig]['effective_url'] = $request->getResponseInfo('effective_url'); $this->requests[$orig]['status_code'] = $status_code = $request->getResponseCode(); // is redirect? if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && $request->getResponseHeader('location')) { $redirectURL = $request->getResponseHeader('location'); if (!preg_match('!^https?://!i', $redirectURL)) { $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url); } if ($this->validateURL($redirectURL)) { $this->debug('Redirect detected. Valid URL: ' . $redirectURL); // store any cookies $cookies = $request->getResponseHeader('set-cookie'); if ($cookies && !is_array($cookies)) { $cookies = array($cookies); } if ($cookies) { $this->cookieJar->storeCookies($url, $cookies); } $this->redirectQueue[$orig] = $redirectURL; } else { $this->debug('Redirect detected. Invalid URL: ' . $redirectURL); } } //die($url.' -multi- '.$request->getResponseInfo('effective_url')); $pool->detach($request); unset($this->requests[$orig]['httpRequest'], $request); /* if ($this->minimiseMemoryUse) { if ($this->cache($url)) { unset($this->requests[$url]); } } */ //} } } } } catch (HttpException $e) { $this->debug($e); return false; } } elseif ($this->method == self::METHOD_CURL_MULTI) { $this->debug('Starting parallel fetch (curl_multi_*)'); while (count($urls) > 0) { $this->debug('Processing set of ' . min($this->maxParallelRequests, count($urls))); $subset = array_splice($urls, 0, $this->maxParallelRequests); $pool = new RollingCurl(array($this, 'handleCurlResponse')); $pool->window_size = count($subset); foreach ($subset as $orig => $url) { if (!$isRedirect) { $orig = $url; } unset($this->redirectQueue[$orig]); $this->debug("...{$url}"); if (!$isRedirect && isset($this->requests[$url])) { $this->debug(" memory"); /* } elseif ($this->isCached($url)) { $this->debug(" cached"); if (!$this->minimiseMemoryUse) { $this->requests[$url] = $this->getCached($url); } */ } else { $this->debug("......adding to pool"); $req_url = $this->rewriteHashbangFragment ? $this->rewriteHashbangFragment($url) : $url; $req_url = $this->removeFragment($req_url); $headers = array(); // send cookies, if we have any if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) { $this->debug("......sending cookies: {$cookies}"); $headers[] = 'Cookie: ' . $cookies; } $httpRequest = new RollingCurlRequest($req_url, 'GET', null, $headers, array(CURLOPT_CONNECTTIMEOUT => $this->requestOptions['timeout'], CURLOPT_TIMEOUT => $this->requestOptions['timeout'])); $httpRequest->set_original_url($orig); $this->requests[$orig] = array('headers' => null, 'body' => null, 'httpRequest' => $httpRequest); $this->requests[$orig]['original_url'] = $orig; // TODO: is this needed anymore? $pool->add($httpRequest); } } // did we get anything into the pool? if (count($pool) > 0) { $this->debug('Sending request...'); $pool->execute(); // this will call handleCurlResponse() and populate $this->requests[$orig] $this->debug('Received responses'); foreach ($subset as $orig => $url) { if (!$isRedirect) { $orig = $url; } // $this->requests[$orig]['headers'] // $this->requests[$orig]['body'] // $this->requests[$orig]['effective_url'] $status_code = $this->requests[$orig]['status_code']; if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && isset($this->requests[$orig]['location'])) { $redirectURL = $this->requests[$orig]['location']; if (!preg_match('!^https?://!i', $redirectURL)) { $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url); } if ($this->validateURL($redirectURL)) { $this->debug('Redirect detected. Valid URL: ' . $redirectURL); // store any cookies $cookies = $this->cookieJar->extractCookies($this->requests[$orig]['headers']); if (!empty($cookies)) { $this->cookieJar->storeCookies($url, $cookies); } $this->redirectQueue[$orig] = $redirectURL; } else { $this->debug('Redirect detected. Invalid URL: ' . $redirectURL); } } // die($url.' -multi- '.$request->getResponseInfo('effective_url')); unset($this->requests[$orig]['httpRequest']); } } } } else { $this->debug('Starting sequential fetch (file_get_contents)'); $this->debug('Processing set of ' . count($urls)); foreach ($urls as $orig => $url) { if (!$isRedirect) { $orig = $url; } unset($this->redirectQueue[$orig]); $this->debug("...{$url}"); if (!$isRedirect && isset($this->requests[$url])) { $this->debug(" memory"); /* } elseif ($this->isCached($url)) { $this->debug(" cached"); if (!$this->minimiseMemoryUse) { $this->requests[$url] = $this->getCached($url); } */ } else { $this->debug("Sending request for {$url}"); $this->requests[$orig]['original_url'] = $orig; $req_url = $this->rewriteHashbangFragment ? $this->rewriteHashbangFragment($url) : $url; $req_url = $this->removeFragment($req_url); // send cookies, if we have any $httpContext = $this->httpContext; if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) { $this->debug("......sending cookies: {$cookies}"); $httpContext['http']['header'] .= 'Cookie: ' . $cookies . "\r\n"; } if (false !== ($html = @file_get_contents($req_url, false, stream_context_create($httpContext)))) { $this->debug('Received response'); // get status code if (!isset($http_response_header[0]) || !preg_match('!^HTTP/\\d+\\.\\d+\\s+(\\d+)!', trim($http_response_header[0]), $match)) { $this->debug('Error: no status code found'); // TODO: handle error - no status code } else { $this->requests[$orig]['headers'] = $this->headersToString($http_response_header, false); $this->requests[$orig]['body'] = $html; $this->requests[$orig]['effective_url'] = $req_url; $this->requests[$orig]['status_code'] = $status_code = (int) $match[1]; unset($match); // handle redirect if (preg_match('/^Location:(.*?)$/m', $this->requests[$orig]['headers'], $match)) { $this->requests[$orig]['location'] = trim($match[1]); } if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && isset($this->requests[$orig]['location'])) { $redirectURL = $this->requests[$orig]['location']; if (!preg_match('!^https?://!i', $redirectURL)) { $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url); } if ($this->validateURL($redirectURL)) { $this->debug('Redirect detected. Valid URL: ' . $redirectURL); // store any cookies $cookies = $this->cookieJar->extractCookies($this->requests[$orig]['headers']); if (!empty($cookies)) { $this->cookieJar->storeCookies($url, $cookies); } $this->redirectQueue[$orig] = $redirectURL; } else { $this->debug('Redirect detected. Invalid URL: ' . $redirectURL); } } } } else { $this->debug('Error retrieving URL'); //print_r($req_url); //print_r($http_response_header); //print_r($html); // TODO: handle error - failed to retrieve URL } } } } }
public function tag_open($parser, $tag, $attributes) { list($this->namespace[], $this->element[]) = $this->split_ns($tag); $attribs = array(); foreach ($attributes as $name => $value) { list($attrib_namespace, $attribute) = $this->split_ns($name); $attribs[$attrib_namespace][$attribute] = $value; } if (isset($attribs[SIMPLEPIE_NAMESPACE_XML]['base'])) { $this->xml_base[] = SimplePie_Misc::absolutize_url($attribs[SIMPLEPIE_NAMESPACE_XML]['base'], end($this->xml_base)); $this->xml_base_explicit[] = true; } else { $this->xml_base[] = end($this->xml_base); $this->xml_base_explicit[] = end($this->xml_base_explicit); } if (isset($attribs[SIMPLEPIE_NAMESPACE_XML]['lang'])) { $this->xml_lang[] = $attribs[SIMPLEPIE_NAMESPACE_XML]['lang']; } else { $this->xml_lang[] = end($this->xml_lang); } if ($this->current_xhtml_construct >= 0) { $this->current_xhtml_construct++; if (end($this->namespace) === SIMPLEPIE_NAMESPACE_XHTML) { $this->data['data'] .= '<' . end($this->element); if (isset($attribs[''])) { foreach ($attribs[''] as $name => $value) { $this->data['data'] .= ' ' . $name . '="' . htmlspecialchars($value, ENT_COMPAT, $this->encoding) . '"'; } } $this->data['data'] .= '>'; } } else { $this->datas[] =& $this->data; $this->data =& $this->data['child'][end($this->namespace)][end($this->element)][]; $this->data = array('data' => '', 'attribs' => $attribs, 'xml_base' => end($this->xml_base), 'xml_base_explicit' => end($this->xml_base_explicit), 'xml_lang' => end($this->xml_lang)); if ((end($this->namespace) === SIMPLEPIE_NAMESPACE_ATOM_03 && in_array(end($this->element), array('title', 'tagline', 'copyright', 'info', 'summary', 'content')) && isset($attribs['']['mode']) && $attribs['']['mode'] === 'xml') || (end($this->namespace) === SIMPLEPIE_NAMESPACE_ATOM_10 && in_array(end($this->element), array('rights', 'subtitle', 'summary', 'info', 'title', 'content')) && isset($attribs['']['type']) && $attribs['']['type'] === 'xhtml')) { $this->current_xhtml_construct = 0; } } }
/** * The contructor is a copy of the stock simplepie File class which has * been modifed to add in use the Moodle curl class rather than php curl * functions. */ function moodle_simplepie_file($url, $timeout = 10, $redirects = 5, $headers = null, $useragent = null, $force_fsockopen = false) { $this->url = $url; $this->method = SIMPLEPIE_FILE_SOURCE_REMOTE | SIMPLEPIE_FILE_SOURCE_CURL; $curl = new curl(); $curl->setopt(array('CURLOPT_HEADER' => true)); try { $this->headers = $curl->get($url); } catch (moodle_exception $e) { $this->error = 'cURL Error: ' . $curl->error; $this->success = false; return false; } $parser =& new SimplePie_HTTP_Parser($this->headers); if ($parser->parse()) { $this->headers = $parser->headers; $this->body = $parser->body; $this->status_code = $parser->status_code; if (($this->status_code == 300 || $this->status_code == 301 || $this->status_code == 302 || $this->status_code == 303 || $this->status_code == 307 || $this->status_code > 307 && $this->status_code < 400) && isset($this->headers['location']) && $this->redirects < $redirects) { $this->redirects++; $location = SimplePie_Misc::absolutize_url($this->headers['location'], $url); return $this->SimplePie_File($location, $timeout, $redirects, $headers); } } }
function resolve_single_relative_uri($refs) { $tag = FeedWordPressHTML::attributeMatch($refs); $url = SimplePie_Misc::absolutize_url($tag['value'], $this->_base); return $tag['prefix'] . $url . $tag['suffix']; }
function replace_urls($data, $raw_url = false) { if (!empty($this->attribs['XML:BASE'])) { $xmlbase = $attribs['XML:BASE']; } else { if (!empty($this->attribs['HTTP://WWW.W3.ORG/XML/1998/NAMESPACE:BASE'])) { $xmlbase = $this->attribs['HTTP://WWW.W3.ORG/XML/1998/NAMESPACE:BASE']; } } if (!empty($xmlbase)) { if (!empty($this->item_xmlbase)) { $xmlbase = SimplePie_Misc::absolutize_url($xmlbase, $this->item_xmlbase); } else { $xmlbase = SimplePie_Misc::absolutize_url($xmlbase, $this->feed_xmlbase); } } else { if (!empty($this->item_xmlbase)) { $xmlbase = $this->item_xmlbase; } else { $xmlbase = $this->feed_xmlbase; } } if ($raw_url) { return SimplePie_Misc::absolutize_url($data, $xmlbase); } else { $attributes = array('background', 'href', 'src', 'longdesc', 'usemap', 'codebase', 'data', 'classid', 'cite', 'action', 'profile', 'for'); foreach ($attributes as $attribute) { if (preg_match("/{$attribute}='(.*)'/siU", $data[0], $attrib) || preg_match("/{$attribute}=\"(.*)\"/siU", $data[0], $attrib) || preg_match("/{$attribute}=(.*)[ |\\/|>]/siU", $data[0], $attrib)) { $new_tag = str_replace($attrib[1], SimplePie_Misc::absolutize_url($attrib[1], $xmlbase), $attrib[0]); $data[0] = str_replace($attrib[0], $new_tag, $data[0]); } } return $data[0]; } }
function replace_urls($data, $tag, $attributes) { if (!is_array($this->strip_htmltags) || !in_array($tag, $this->strip_htmltags)) { $elements = SimplePie_Misc::get_element($tag, $data); foreach ($elements as $element) { if (is_array($attributes)) { foreach ($attributes as $attribute) { if (isset($element['attribs'][$attribute]['data'])) { $element['attribs'][$attribute]['data'] = SimplePie_Misc::absolutize_url($element['attribs'][$attribute]['data'], $this->base); $new_element = SimplePie_Misc::element_implode($element); $data = str_replace($element['full'], $new_element, $data); $element['full'] = $new_element; } } } elseif (isset($element['attribs'][$attributes]['data'])) { $element['attribs'][$attributes]['data'] = SimplePie_Misc::absolutize_url($element['attribs'][$attributes]['data'], $this->base); $data = str_replace($element['full'], SimplePie_Misc::element_implode($element), $data); } } } return $data; }
public static function discoverFavicon($url) { //try webroot favicon $favicon = \SimplePie_Misc::absolutize_url('/favicon.ico', $url); if (self::checkFavicon($favicon)) { return $favicon; } //try to extract favicon from web page $absoluteUrl = \SimplePie_Misc::absolutize_url('/', $url); $handle = curl_init(); curl_setopt($handle, CURLOPT_URL, $absoluteUrl); curl_setopt($handle, CURLOPT_RETURNTRANSFER, 1); curl_setopt($handle, CURLOPT_FOLLOWLOCATION, TRUE); curl_setopt($handle, CURLOPT_MAXREDIRS, 10); if (FALSE !== ($page = curl_exec($handle))) { preg_match('/<[^>]*link[^>]*(rel=["\']icon["\']|rel=["\']shortcut icon["\']) .*href=["\']([^>]*)["\'].*>/iU', $page, $match); if (1 < sizeof($match)) { // the specified uri might be an url, an absolute or a relative path // we have to turn it into an url to be able to display it out of context $favicon = htmlspecialchars_decode($match[2]); // test for an url if (parse_url($favicon, PHP_URL_SCHEME)) { if (self::checkFavicon($favicon)) { return $favicon; } } } } return null; }
/** * The contructor is a copy of the stock simplepie File class which has * been modifed to add in use the Moodle curl class rather than php curl * functions. */ function moodle_simplepie_file($url, $timeout = 10, $redirects = 5, $headers = null, $useragent = null, $force_fsockopen = false) { $this->url = $url; $this->method = SIMPLEPIE_FILE_SOURCE_REMOTE | SIMPLEPIE_FILE_SOURCE_CURL; $curl = new curl(); $curl->setopt(array('CURLOPT_HEADER' => true, 'CURLOPT_TIMEOUT' => $timeout, 'CURLOPT_CONNECTTIMEOUT' => $timeout)); if ($headers !== null) { // translate simplepie headers to those class curl expects foreach ($headers as $headername => $headervalue) { $headerstr = "{$headername}: {$headervalue}"; $curl->setHeader($headerstr); } } $this->headers = $curl->get($url); if ($curl->error) { $this->error = 'cURL Error: ' . $curl->error; $this->success = false; return false; } $parser = new SimplePie_HTTP_Parser($this->headers); if ($parser->parse()) { $this->headers = $parser->headers; $this->body = $parser->body; $this->status_code = $parser->status_code; if (($this->status_code == 300 || $this->status_code == 301 || $this->status_code == 302 || $this->status_code == 303 || $this->status_code == 307 || $this->status_code > 307 && $this->status_code < 400) && isset($this->headers['location']) && $this->redirects < $redirects) { $this->redirects++; $location = SimplePie_Misc::absolutize_url($this->headers['location'], $url); return $this->moodle_simplepie_file($location, $timeout, $redirects, $headers); } } }
/** * Simple HTTP response parser * * @param string $headers Full response text including headers and body * @param array $req_headers Original $headers array passed to {@link request()}, in case we need to follow redirects * @param array $req_data Original $data array passed to {@link request()}, in case we need to follow redirects * @param array $req_type Original $type constant passed to {@link request()}, in case we need to follow redirects * @return stdClass Contains "body" string, "headers" array, "status code" integer as properties */ protected function parse_response($headers, $req_headers, $req_data, $req_type) { $headers = explode("\r\n\r\n", $headers, 2); $return->body = array_pop($headers); $headers = $headers[0]; $headers = explode("\r\n", $headers); preg_match('#^HTTP/1\\.\\d (\\d+)#i', array_shift($headers), $matches); if (empty($matches)) { throw new Exception('Response could not be parsed'); } $return->status_code = (int) $matches[1]; if ($return->status_code >= 200 && $return->status_code < 200) { $return->success = true; } $return->headers = array(); foreach ($headers as $header) { list($key, $value) = explode(':', $header, 2); $value = trim($value); preg_replace('#(\\s+)#i', ' ', $value); $key = strtolower($key); $return->headers[$key] = trim($value); } if (isset($return->headers['content-encoding']) && $this->transport == 'HTTPRequest_fsockopen') { // Bail. We'll handle this at some later date. throw new Exception('Encoded feeds are not currently handled'); } //fsockopen and cURL compatibility if (isset($return->headers['connection'])) { unset($return->headers['connection']); } if ((in_array($return->status_code, array(300, 301, 302, 303, 307)) || $return->status_code > 307 && $return->status_code < 400) && isset($return->headers['location']) && $this->redirects < $redirects) { $this->redirects++; $location = SimplePie_Misc::absolutize_url($return->headers['location'], $url); return $this->request($location, $req_headers, $eq_data, $req_type); } return $return; }