/** * Parse the OPML file * * @access public * @return array|false */ public function execute() { Logging::setMessage(get_called_class() . ': start importation'); $xml = XmlParser::getSimpleXml(trim($this->content)); if ($xml === false || $xml->getName() !== 'opml' || !isset($xml->body)) { Logging::setMessage(get_called_class() . ': OPML tag not found or malformed XML document'); return false; } $this->parseEntries($xml->body); Logging::setMessage(get_called_class() . ': ' . count($this->items) . ' subscriptions found'); return $this->items; }
/** * Do the HTTP request * * @access public * @return array HTTP response ['body' => ..., 'status' => ..., 'headers' => ...] */ public function doRequest() { // Prepare HTTP headers for the request $headers = array('Connection: close', 'User-Agent: ' . $this->user_agent); if (function_exists('gzdecode')) { $headers[] = 'Accept-Encoding: gzip'; } if ($this->etag) { $headers[] = 'If-None-Match: ' . $this->etag; } if ($this->last_modified) { $headers[] = 'If-Modified-Since: ' . $this->last_modified; } // Create context $context_options = array('http' => array('method' => 'GET', 'protocol_version' => 1.1, 'timeout' => $this->timeout, 'max_redirects' => $this->max_redirects, 'header' => implode("\r\n", $headers))); if ($this->proxy_hostname) { Logging::setMessage(get_called_class() . ' Proxy: ' . $this->proxy_hostname . ':' . $this->proxy_port); $context_options['http']['proxy'] = 'tcp://' . $this->proxy_hostname . ':' . $this->proxy_port; $context_options['http']['request_fulluri'] = true; if ($this->proxy_username) { Logging::setMessage(get_called_class() . ' Proxy credentials: Yes'); $headers[] = 'Proxy-Authorization: Basic ' . base64_encode($this->proxy_username . ':' . $this->proxy_password); $context_options['http']['header'] = implode("\r\n", $headers); } else { Logging::setMessage(get_called_class() . ' Proxy credentials: No'); } } $context = stream_context_create($context_options); // Make HTTP request $stream = @fopen($this->url, 'r', false, $context); if (!is_resource($stream)) { return false; } // Get the entire body until the max size $body = stream_get_contents($stream, $this->max_body_size + 1); // If the body size is too large abort everything if (strlen($body) > $this->max_body_size) { return false; } // Get HTTP headers response $metadata = stream_get_meta_data($stream); list($status, $headers) = $this->parseHeaders($metadata['wrapper_data']); fclose($stream); if (isset($headers['Transfer-Encoding']) && $headers['Transfer-Encoding'] === 'chunked') { $body = $this->decodeChunked($body); } if (isset($headers['Content-Encoding']) && $headers['Content-Encoding'] === 'gzip') { $body = @gzdecode($body); } return array('status' => $status, 'body' => $body, 'headers' => $headers); }
/** * Do the HTTP request * * @access public * @param bool $follow_location Flag used when there is an open_basedir restriction * @return array HTTP response ['body' => ..., 'status' => ..., 'headers' => ...] */ public function doRequest($follow_location = true) { $request_headers = array('Connection: close'); if ($this->etag) { $request_headers[] = 'If-None-Match: ' . $this->etag; } if ($this->last_modified) { $request_headers[] = 'If-Modified-Since: ' . $this->last_modified; } $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $this->url); curl_setopt($ch, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $this->timeout); curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout); curl_setopt($ch, CURLOPT_USERAGENT, $this->user_agent); curl_setopt($ch, CURLOPT_HTTPHEADER, $request_headers); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, ini_get('open_basedir') === ''); curl_setopt($ch, CURLOPT_MAXREDIRS, $this->max_redirects); curl_setopt($ch, CURLOPT_ENCODING, ''); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); // For auto-signed certificates... curl_setopt($ch, CURLOPT_WRITEFUNCTION, array($this, 'readBody')); curl_setopt($ch, CURLOPT_HEADERFUNCTION, array($this, 'readHeaders')); curl_setopt($ch, CURLOPT_COOKIEJAR, 'php://memory'); curl_setopt($ch, CURLOPT_COOKIEFILE, 'php://memory'); if ($this->proxy_hostname) { Logging::setMessage(get_called_class() . ' Proxy: ' . $this->proxy_hostname . ':' . $this->proxy_port); curl_setopt($ch, CURLOPT_PROXYPORT, $this->proxy_port); curl_setopt($ch, CURLOPT_PROXYTYPE, 'HTTP'); curl_setopt($ch, CURLOPT_PROXY, $this->proxy_hostname); if ($this->proxy_username) { Logging::setMessage(get_called_class() . ' Proxy credentials: Yes'); curl_setopt($ch, CURLOPT_PROXYUSERPWD, $this->proxy_username . ':' . $this->proxy_password); } else { Logging::setMessage(get_called_class() . ' Proxy credentials: No'); } } curl_exec($ch); Logging::setMessage(get_called_class() . ' cURL total time: ' . curl_getinfo($ch, CURLINFO_TOTAL_TIME)); Logging::setMessage(get_called_class() . ' cURL dns lookup time: ' . curl_getinfo($ch, CURLINFO_NAMELOOKUP_TIME)); Logging::setMessage(get_called_class() . ' cURL connect time: ' . curl_getinfo($ch, CURLINFO_CONNECT_TIME)); Logging::setMessage(get_called_class() . ' cURL speed download: ' . curl_getinfo($ch, CURLINFO_SPEED_DOWNLOAD)); Logging::setMessage(get_called_class() . ' cURL effective url: ' . curl_getinfo($ch, CURLINFO_EFFECTIVE_URL)); if (curl_errno($ch)) { Logging::setMessage(get_called_class() . ' cURL error: ' . curl_error($ch)); curl_close($ch); return false; } curl_close($ch); list($status, $headers) = $this->parseHeaders(explode("\r\n", $this->headers[$this->headers_counter - 1])); if ($follow_location && ini_get('open_basedir') !== '' && ($status == 301 || $status == 302)) { $nb_redirects = 0; $this->url = $headers['Location']; $this->body = ''; $this->body_length = 0; $this->headers = array(); $this->headers_counter = 0; while (true) { $nb_redirects++; if ($nb_redirects >= $this->max_redirects) { return false; } $result = $this->doRequest(false); if ($result['status'] == 301 || $result['status'] == 302) { $this->url = $result['headers']['Location']; $this->body = ''; $this->body_length = 0; $this->headers = array(); $this->headers_counter = 0; } else { return $result; } } } return array('status' => $status, 'body' => $this->body, 'headers' => $headers); }
/** * Parse HTTP headers * * @access public * @param array $lines List of headers * @return array */ public function parseHeaders(array $lines) { $status = 200; $headers = array(); foreach ($lines as $line) { if (strpos($line, 'HTTP') === 0) { $status = (int) substr($line, 9, 3); } else { if (strpos($line, ':') !== false) { @(list($name, $value) = explode(': ', $line)); if ($value) { $headers[trim($name)] = trim($value); } } } } Logging::setMessage(get_called_class() . ' HTTP status code: ' . $status); foreach ($headers as $name => $value) { Logging::setMessage(get_called_class() . ' HTTP header: ' . $name . ' => ' . $value); } return array($status, $headers); }
protected function writeErrors(OutputInterface $output) { foreach (Logging::getMessages() as $message) { $output->writeln('<error>' . $message . '</error>'); } }
/** * Strip useless tags * * @access public */ public function stripGarbage() { $dom = XmlParser::getDomDocument($this->content); if ($dom !== false) { $xpath = new DOMXPath($dom); foreach ($this->stripTags as $tag) { $nodes = $xpath->query('//' . $tag); if ($nodes !== false && $nodes->length > 0) { Logging::setMessage(get_called_class() . ' Strip tag: "' . $tag . '"'); foreach ($nodes as $node) { $node->parentNode->removeChild($node); } } } foreach ($this->stripAttributes as $attribute) { $nodes = $xpath->query('//*[contains(@class, "' . $attribute . '") or contains(@id, "' . $attribute . '")]'); if ($nodes !== false && $nodes->length > 0) { Logging::setMessage(get_called_class() . ' Strip attribute: "' . $attribute . '"'); foreach ($nodes as $node) { $node->parentNode->removeChild($node); } } } $this->content = $dom->saveXML($dom->documentElement); } }
/** * Discover the feed url inside a HTML document and download the feed * * @access public * @return boolean */ public function discover() { if (!$this->content) { return false; } Logging::setMessage(get_called_class() . ': Try to discover a subscription'); $dom = XmlParser::getHtmlDocument($this->content); $xpath = new DOMXPath($dom); $queries = array('//link[@type="application/rss+xml"]', '//link[@type="application/atom+xml"]'); foreach ($queries as $query) { $nodes = $xpath->query($query); if ($nodes->length !== 0) { $link = $nodes->item(0)->getAttribute('href'); if (!empty($link)) { // Relative links if (strpos($link, 'http') !== 0) { if ($link[0] === '/') { $link = substr($link, 1); } if ($this->url[strlen($this->url) - 1] !== '/') { $this->url .= '/'; } $link = $this->url . $link; } Logging::setMessage(get_called_class() . ': Find subscription link: ' . $link); $this->download($link); return true; } } } return false; }