/** * Parse the OPML file. * * @return array|false */ public function execute() { Logger::setMessage(get_called_class() . ': start importation'); $xml = XmlParser::getSimpleXml(trim($this->content)); if ($xml === false || $xml->getName() !== 'opml' || !isset($xml->body)) { Logger::setMessage(get_called_class() . ': OPML tag not found or malformed XML document'); return false; } $this->parseEntries($xml->body); Logger::setMessage(get_called_class() . ': ' . count($this->items) . ' subscriptions found'); return $this->items; }
/** * Prepare stream context. * * @return array */ private function prepareContext() { $context = array('http' => array('method' => 'GET', 'protocol_version' => 1.1, 'timeout' => $this->timeout, 'max_redirects' => $this->max_redirects)); if ($this->proxy_hostname) { Logger::setMessage(get_called_class() . ' Proxy: ' . $this->proxy_hostname . ':' . $this->proxy_port); $context['http']['proxy'] = 'tcp://' . $this->proxy_hostname . ':' . $this->proxy_port; $context['http']['request_fulluri'] = true; if ($this->proxy_username) { Logger::setMessage(get_called_class() . ' Proxy credentials: Yes'); } else { Logger::setMessage(get_called_class() . ' Proxy credentials: No'); } } $context['http']['header'] = implode("\r\n", $this->prepareHeaders()); return $context; }
/** * Parse HTTP headers. * * @static * * @param array $lines List of headers * * @return array */ public static function parse(array $lines) { $status = 0; $headers = array(); foreach ($lines as $line) { if (strpos($line, 'HTTP/1') === 0) { $headers = array(); $status = (int) substr($line, 9, 3); } elseif (strpos($line, ': ') !== false) { list($name, $value) = explode(': ', $line); if ($value) { $headers[trim($name)] = trim($value); } } } Logger::setMessage(get_called_class() . ' HTTP status code: ' . $status); foreach ($headers as $name => $value) { Logger::setMessage(get_called_class() . ' HTTP header: ' . $name . ' => ' . $value); } return array($status, new self($headers)); }
/** * Find feed urls inside a HTML document. * * @param string $url Website url * @param string $html HTML content * * @return array List of feed links */ public function find($url, $html) { Logger::setMessage(get_called_class() . ': Try to discover subscriptions'); $dom = XmlParser::getHtmlDocument($html); $xpath = new DOMXPath($dom); $links = array(); $queries = array('//link[@type="application/rss+xml"]', '//link[@type="application/atom+xml"]'); foreach ($queries as $query) { $nodes = $xpath->query($query); foreach ($nodes as $node) { $link = $node->getAttribute('href'); if (!empty($link)) { $feedUrl = new Url($link); $siteUrl = new Url($url); $links[] = $feedUrl->getAbsoluteUrl($feedUrl->isRelativeUrl() ? $siteUrl->getBaseUrl() : ''); } } } Logger::setMessage(get_called_class() . ': ' . implode(', ', $links)); return $links; }
/** * Filter HTML for entry content. * * @param Feed $feed Feed object * @param Item $item Item object */ public function filterItemContent(Feed $feed, Item $item) { if ($this->isFilteringEnabled()) { $filter = Filter::html($item->getContent(), $feed->getSiteUrl()); $filter->setConfig($this->config); $item->content = $filter->execute(); } else { Logger::setMessage(get_called_class() . ': Content filtering disabled'); } }
/** * Execute curl context. */ private function executeContext() { $ch = $this->prepareContext(); curl_exec($ch); Logger::setMessage(get_called_class() . ' cURL total time: ' . curl_getinfo($ch, CURLINFO_TOTAL_TIME)); Logger::setMessage(get_called_class() . ' cURL dns lookup time: ' . curl_getinfo($ch, CURLINFO_NAMELOOKUP_TIME)); Logger::setMessage(get_called_class() . ' cURL connect time: ' . curl_getinfo($ch, CURLINFO_CONNECT_TIME)); Logger::setMessage(get_called_class() . ' cURL speed download: ' . curl_getinfo($ch, CURLINFO_SPEED_DOWNLOAD)); Logger::setMessage(get_called_class() . ' cURL effective url: ' . curl_getinfo($ch, CURLINFO_EFFECTIVE_URL)); $curl_errno = curl_errno($ch); if ($curl_errno) { Logger::setMessage(get_called_class() . ' cURL error: ' . curl_error($ch)); curl_close($ch); $this->handleError($curl_errno); } // Update the url if there where redirects $this->url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL); curl_close($ch); }
/** * Normalize encoding and strip head tag. */ public function prepareHtml() { $html_encoding = XmlParser::getEncodingFromMetaTag($this->html); $this->html = Encoding::convert($this->html, $html_encoding ?: $this->encoding); $this->html = Filter::stripHeadTags($this->html); Logger::setMessage(get_called_class() . ': HTTP Encoding "' . $this->encoding . '" ; HTML Encoding "' . $html_encoding . '"'); }
/** * Download and check if a resource exists. * * @param string $url URL * * @return \AsteFeed\Client Client instance */ public function download($url) { $client = Client::getInstance(); $client->setConfig($this->config); Logger::setMessage(get_called_class() . ' Download => ' . $url); try { $client->execute($url); } catch (ClientException $e) { Logger::setMessage(get_called_class() . ' Download Failed => ' . $e->getMessage()); } return $client; }
/** * Load a rule file from the defined folder. * * @param string $folder Rule directory * @param array $files List of possible file names * * @return array */ public function loadRuleFile($folder, array $files) { foreach ($files as $file) { $filename = $folder . '/' . $file . '.php'; if (file_exists($filename)) { Logger::setMessage(get_called_class() . ' Load rule: ' . $file); return include $filename; } } return array(); }
/** * Return false if the node should not be removed. * * @param DomDocument $dom * @param DomNode $node * * @return bool */ public function shouldRemove(DomDocument $dom, $node) { $document_length = strlen($dom->textContent); $node_length = strlen($node->textContent); if ($document_length === 0) { return true; } $ratio = $node_length * 100 / $document_length; if ($ratio >= 90) { Logger::setMessage(get_called_class() . ': Should not remove this node (' . $node->nodeName . ') ratio: ' . $ratio . '%'); return false; } return true; }
/** * Handle not modified response. * * @param array $response Client response */ public function handleNotModifiedResponse(array $response) { if ($response['status'] == 304) { $this->is_modified = false; } elseif ($response['status'] == 200) { $this->is_modified = $this->hasBeenModified($response, $this->etag, $this->last_modified); $this->etag = $this->getHeader($response, 'ETag'); $this->last_modified = $this->getHeader($response, 'Last-Modified'); } if ($this->is_modified === false) { Logger::setMessage(get_called_class() . ' Resource not modified'); } }