Example #1
0
 /**
  * Get a parser instance with a custom config
  *
  * @access public
  * @param  string  $name  Parser name
  * @return \PicoFeed\Parser
  */
 public function getParserInstance($name)
 {
     require_once __DIR__ . '/Parsers/' . ucfirst($name) . '.php';
     $name = '\\PicoFeed\\Parsers\\' . $name;
     $parser = new $name($this->content, $this->encoding);
     $parser->setHashAlgo($this->config->getParserHashAlgo());
     $parser->setTimezone($this->config->getTimezone());
     $parser->setConfig($this->config);
     return $parser;
 }
Example #2
0
 /**
  * Download the HTML content
  *
  * @access public
  * @return HTML content
  */
 public function download()
 {
     $client = Client::getInstance();
     if ($this->config !== null) {
         $client->setTimeout($this->config->getGrabberTimeout())->setUserAgent($this->config->getGrabberUserAgent())->setMaxRedirections($this->config->getMaxRedirections())->setMaxBodySize($this->config->getMaxBodySize())->setProxyHostname($this->config->getProxyHostname())->setProxyPort($this->config->getProxyPort())->setProxyUsername($this->config->getProxyUsername())->setProxyPassword($this->config->getProxyPassword());
     }
     $client->execute($this->url);
     $this->html = $client->getContent();
     $this->encoding = $client->getEncoding();
     return $this->html;
 }
Example #3
0
 protected function absorb(OutputInterface $output, $id, $url)
 {
     $config = new Config();
     $config->setClientUserAgent('Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:11.0) Gecko/20100101 Firefox/11.0');
     $reader = new Reader($config);
     $reader->download($url);
     $parser = $reader->getParser();
     if ($parser === false) {
         //$this->feedRepository->disableFeed($id);
         return $this->writeErrors($output);
     }
     $feed = $parser->execute();
     if ($feed === false) {
         //$this->feedRepository->disableFeed($id);
         return $this->writeErrors($output);
     }
     $data = ['lang' => $feed->getLanguage(), 'title' => $feed->getTitle(), 'lastUpdate' => $this->formatDateForMySQL($feed->getDate())];
     $this->feedRepository->updateByPk($data, $id);
     foreach ($feed->items as $item) {
         $url = $item->getUrl();
         $output->writeln('+ ' . $item->title);
         $fullContent = file_get_contents($url);
         $tidy = tidy_parse_string($fullContent, array(), 'UTF8');
         $tidy->cleanRepair();
         $html = $tidy->value;
         $readability = new \Readability($html, $url);
         $result = $readability->init();
         if ($result) {
             $content = $readability->getContent()->innerHTML;
             $tidy = tidy_parse_string($content, array('indent' => true, 'show-body-only' => true), 'UTF8');
             $tidy->cleanRepair();
             $content = $tidy->value;
         } else {
             $output->writeln('unable to get full content');
             $content = $item->getContent();
         }
         $data = ['feedId' => $id, 'remoteId' => $item->getId(), 'title' => $item->getTitle(), 'url' => $url, 'pubDate' => $this->formatDateForMySQL($item->getDate()), 'content' => $content, 'author' => $item->getAuthor()];
         $this->postRepository->add($data, true);
     }
 }
Example #4
0
 /**
  * Set config object
  *
  * @access public
  * @param  \PicoFeed\Config  $config   Config instance
  * @return \PicoFeed\Parse
  */
 public function setConfig($config)
 {
     $this->config = $config;
     if ($this->config !== null) {
         $this->setIframeWhitelist($this->config->getFilterIframeWhitelist(array()));
         $this->setIntegerAttributes($this->config->getFilterIntegerAttributes(array()));
         $this->setAttributeOverrides($this->config->getFilterAttributeOverrides(array()));
         $this->setRequiredAttributes($this->config->getFilterRequiredAttributes(array()));
         $this->setMediaBlacklist($this->config->getFilterMediaBlacklist(array()));
         $this->setMediaAttributes($this->config->getFilterMediaAttributes(array()));
         $this->setSchemeWhitelist($this->config->getFilterSchemeWhitelist(array()));
         $this->setBlacklistedTags($this->config->getFilterBlacklistedTags(array()));
         $this->setWhitelistedTags($this->config->getFilterWhitelistedTags(array()));
     }
     return $this;
 }
Example #5
0
 /**
  * Set config object
  *
  * @access public
  * @param  \PicoFeed\Config  $config   Config instance
  * @return \PicoFeed\Client
  */
 public function setConfig($config)
 {
     $this->setTimeout($config->getGrabberTimeout());
     $this->setUserAgent($config->getGrabberUserAgent());
     $this->setMaxRedirections($config->getMaxRedirections());
     $this->setMaxBodySize($config->getMaxBodySize());
     $this->setProxyHostname($config->getProxyHostname());
     $this->setProxyPort($config->getProxyPort());
     $this->setProxyUsername($config->getProxyUsername());
     $this->setProxyPassword($config->getProxyPassword());
     return $this;
 }