Example #1
0
 public function testRssGrabContent()
 {
     $reader = new Reader();
     $reader->download('http://www.egscomics.com/rss.php');
     $parser = $reader->getParser();
     $this->assertTrue($parser !== false);
     $parser->grabber = true;
     $feed = $parser->execute();
     $this->assertTrue(is_array($feed->items));
     $this->assertTrue(strpos($feed->items[0]->content, '<img') >= 0);
 }
Example #2
0
 public function testFeedsReportedAsNotWorking()
 {
     $reader = new Reader();
     $reader->setContent(file_get_contents('tests/fixtures/cercle.psy.xml'));
     $this->assertInstanceOf('PicoFeed\\Parsers\\Rss20', $reader->getParser());
     $reader = new Reader();
     $reader->setContent(file_get_contents('tests/fixtures/ezrss.it'));
     $this->assertInstanceOf('PicoFeed\\Parsers\\Rss20', $reader->getParser());
     $reader = new Reader();
     $reader->setContent(file_get_contents('tests/fixtures/grotte_barbu.xml'));
     $this->assertInstanceOf('PicoFeed\\Parsers\\Rss20', $reader->getParser());
 }
Example #3
0
 protected function absorb(OutputInterface $output, $id, $url)
 {
     $config = new Config();
     $config->setClientUserAgent('Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:11.0) Gecko/20100101 Firefox/11.0');
     $reader = new Reader($config);
     $reader->download($url);
     $parser = $reader->getParser();
     if ($parser === false) {
         //$this->feedRepository->disableFeed($id);
         return $this->writeErrors($output);
     }
     $feed = $parser->execute();
     if ($feed === false) {
         //$this->feedRepository->disableFeed($id);
         return $this->writeErrors($output);
     }
     $data = ['lang' => $feed->getLanguage(), 'title' => $feed->getTitle(), 'lastUpdate' => $this->formatDateForMySQL($feed->getDate())];
     $this->feedRepository->updateByPk($data, $id);
     foreach ($feed->items as $item) {
         $url = $item->getUrl();
         $output->writeln('+ ' . $item->title);
         $fullContent = file_get_contents($url);
         $tidy = tidy_parse_string($fullContent, array(), 'UTF8');
         $tidy->cleanRepair();
         $html = $tidy->value;
         $readability = new \Readability($html, $url);
         $result = $readability->init();
         if ($result) {
             $content = $readability->getContent()->innerHTML;
             $tidy = tidy_parse_string($content, array('indent' => true, 'show-body-only' => true), 'UTF8');
             $tidy->cleanRepair();
             $content = $tidy->value;
         } else {
             $output->writeln('unable to get full content');
             $content = $item->getContent();
         }
         $data = ['feedId' => $id, 'remoteId' => $item->getId(), 'title' => $item->getTitle(), 'url' => $url, 'pubDate' => $this->formatDateForMySQL($item->getDate()), 'content' => $content, 'author' => $item->getAuthor()];
         $this->postRepository->add($data, true);
     }
 }