/** * Get a parser instance with a custom config * * @access public * @param string $name Parser name * @return \PicoFeed\Parser */ public function getParserInstance($name) { require_once __DIR__ . '/Parsers/' . ucfirst($name) . '.php'; $name = '\\PicoFeed\\Parsers\\' . $name; $parser = new $name($this->content, $this->encoding); $parser->setHashAlgo($this->config->getParserHashAlgo()); $parser->setTimezone($this->config->getTimezone()); $parser->setConfig($this->config); return $parser; }
/** * Download the HTML content * * @access public * @return HTML content */ public function download() { $client = Client::getInstance(); if ($this->config !== null) { $client->setTimeout($this->config->getGrabberTimeout())->setUserAgent($this->config->getGrabberUserAgent())->setMaxRedirections($this->config->getMaxRedirections())->setMaxBodySize($this->config->getMaxBodySize())->setProxyHostname($this->config->getProxyHostname())->setProxyPort($this->config->getProxyPort())->setProxyUsername($this->config->getProxyUsername())->setProxyPassword($this->config->getProxyPassword()); } $client->execute($this->url); $this->html = $client->getContent(); $this->encoding = $client->getEncoding(); return $this->html; }
protected function absorb(OutputInterface $output, $id, $url) { $config = new Config(); $config->setClientUserAgent('Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:11.0) Gecko/20100101 Firefox/11.0'); $reader = new Reader($config); $reader->download($url); $parser = $reader->getParser(); if ($parser === false) { //$this->feedRepository->disableFeed($id); return $this->writeErrors($output); } $feed = $parser->execute(); if ($feed === false) { //$this->feedRepository->disableFeed($id); return $this->writeErrors($output); } $data = ['lang' => $feed->getLanguage(), 'title' => $feed->getTitle(), 'lastUpdate' => $this->formatDateForMySQL($feed->getDate())]; $this->feedRepository->updateByPk($data, $id); foreach ($feed->items as $item) { $url = $item->getUrl(); $output->writeln('+ ' . $item->title); $fullContent = file_get_contents($url); $tidy = tidy_parse_string($fullContent, array(), 'UTF8'); $tidy->cleanRepair(); $html = $tidy->value; $readability = new \Readability($html, $url); $result = $readability->init(); if ($result) { $content = $readability->getContent()->innerHTML; $tidy = tidy_parse_string($content, array('indent' => true, 'show-body-only' => true), 'UTF8'); $tidy->cleanRepair(); $content = $tidy->value; } else { $output->writeln('unable to get full content'); $content = $item->getContent(); } $data = ['feedId' => $id, 'remoteId' => $item->getId(), 'title' => $item->getTitle(), 'url' => $url, 'pubDate' => $this->formatDateForMySQL($item->getDate()), 'content' => $content, 'author' => $item->getAuthor()]; $this->postRepository->add($data, true); } }
/** * Set config object * * @access public * @param \PicoFeed\Config $config Config instance * @return \PicoFeed\Parse */ public function setConfig($config) { $this->config = $config; if ($this->config !== null) { $this->setIframeWhitelist($this->config->getFilterIframeWhitelist(array())); $this->setIntegerAttributes($this->config->getFilterIntegerAttributes(array())); $this->setAttributeOverrides($this->config->getFilterAttributeOverrides(array())); $this->setRequiredAttributes($this->config->getFilterRequiredAttributes(array())); $this->setMediaBlacklist($this->config->getFilterMediaBlacklist(array())); $this->setMediaAttributes($this->config->getFilterMediaAttributes(array())); $this->setSchemeWhitelist($this->config->getFilterSchemeWhitelist(array())); $this->setBlacklistedTags($this->config->getFilterBlacklistedTags(array())); $this->setWhitelistedTags($this->config->getFilterWhitelistedTags(array())); } return $this; }
/** * Set config object * * @access public * @param \PicoFeed\Config $config Config instance * @return \PicoFeed\Client */ public function setConfig($config) { $this->setTimeout($config->getGrabberTimeout()); $this->setUserAgent($config->getGrabberUserAgent()); $this->setMaxRedirections($config->getMaxRedirections()); $this->setMaxBodySize($config->getMaxBodySize()); $this->setProxyHostname($config->getProxyHostname()); $this->setProxyPort($config->getProxyPort()); $this->setProxyUsername($config->getProxyUsername()); $this->setProxyPassword($config->getProxyPassword()); return $this; }