public function testRssGrabContent() { $reader = new Reader(); $reader->download('http://www.egscomics.com/rss.php'); $parser = $reader->getParser(); $this->assertTrue($parser !== false); $parser->grabber = true; $feed = $parser->execute(); $this->assertTrue(is_array($feed->items)); $this->assertTrue(strpos($feed->items[0]->content, '<img') >= 0); }
public function testFeedsReportedAsNotWorking() { $reader = new Reader(); $reader->setContent(file_get_contents('tests/fixtures/cercle.psy.xml')); $this->assertInstanceOf('PicoFeed\\Parsers\\Rss20', $reader->getParser()); $reader = new Reader(); $reader->setContent(file_get_contents('tests/fixtures/ezrss.it')); $this->assertInstanceOf('PicoFeed\\Parsers\\Rss20', $reader->getParser()); $reader = new Reader(); $reader->setContent(file_get_contents('tests/fixtures/grotte_barbu.xml')); $this->assertInstanceOf('PicoFeed\\Parsers\\Rss20', $reader->getParser()); }
protected function absorb(OutputInterface $output, $id, $url) { $config = new Config(); $config->setClientUserAgent('Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:11.0) Gecko/20100101 Firefox/11.0'); $reader = new Reader($config); $reader->download($url); $parser = $reader->getParser(); if ($parser === false) { //$this->feedRepository->disableFeed($id); return $this->writeErrors($output); } $feed = $parser->execute(); if ($feed === false) { //$this->feedRepository->disableFeed($id); return $this->writeErrors($output); } $data = ['lang' => $feed->getLanguage(), 'title' => $feed->getTitle(), 'lastUpdate' => $this->formatDateForMySQL($feed->getDate())]; $this->feedRepository->updateByPk($data, $id); foreach ($feed->items as $item) { $url = $item->getUrl(); $output->writeln('+ ' . $item->title); $fullContent = file_get_contents($url); $tidy = tidy_parse_string($fullContent, array(), 'UTF8'); $tidy->cleanRepair(); $html = $tidy->value; $readability = new \Readability($html, $url); $result = $readability->init(); if ($result) { $content = $readability->getContent()->innerHTML; $tidy = tidy_parse_string($content, array('indent' => true, 'show-body-only' => true), 'UTF8'); $tidy->cleanRepair(); $content = $tidy->value; } else { $output->writeln('unable to get full content'); $content = $item->getContent(); } $data = ['feedId' => $id, 'remoteId' => $item->getId(), 'title' => $item->getTitle(), 'url' => $url, 'pubDate' => $this->formatDateForMySQL($item->getDate()), 'content' => $content, 'author' => $item->getAuthor()]; $this->postRepository->add($data, true); } }