/** * @group ZF-8330 */ public function testGetsFeedLinksAndNormalisesRelativeUrlsOnUriWithPath() { $this->markTestIncomplete('Pending fix to \\Zend\\URI\\URL::validate()'); try { $currClient = Reader\Reader::getHttpClient(); $testAdapter = new \Zend\HTTP\Client\Adapter\Test(); $testAdapter->setResponse(new \Zend\HTTP\Response\Response(200, array(), '<!DOCTYPE html><html><head><link rel="alternate" type="application/rss+xml" href="../test.rss"><link rel="alternate" type="application/atom+xml" href="/test.atom"></head><body></body></html>')); Reader\Reader::setHttpClient(new \Zend\HTTP\Client(null, array('adapter' => $testAdapter))); $links = Reader\Reader::findFeedLinks('http://foo/bar'); Reader\Reader::setHttpClient($currClient); } catch (\Exception $e) { $this->fail($e->getMessage()); } $this->assertEquals('http://foo/test.rss', $links->rss); $this->assertEquals('http://foo/test.atom', $links->atom); }
/** * @group ZF-8330 */ public function testGetsFeedLinksAndNormalisesRelativeUrlsOnUriWithPath() { try { $currClient = Reader\Reader::getHttpClient(); $response = new \Zend\Http\Response(); $response->setContent('<!DOCTYPE html><html><head><link rel="alternate" type="application/rss+xml" href="../test.rss"><link rel="alternate" type="application/atom+xml" href="/test.atom"></head><body></body></html>'); $response->setStatusCode(200); $testAdapter = new \Zend\Http\Client\Adapter\Test(); $testAdapter->setResponse($response); Reader\Reader::setHttpClient(new \Zend\Http\Client(null, array('adapter' => $testAdapter))); $links = Reader\Reader::findFeedLinks('http://foo/bar'); Reader\Reader::setHttpClient($currClient); } catch (\Exception $e) { $this->fail($e->getMessage()); } $this->assertEquals('http://foo/test.rss', $links->rss); $this->assertEquals('http://foo/test.atom', $links->atom); }
/** * Import a feed by providing a URI * * @param string $uri * The URI to the feed * @param string $format * The output format. Possible values xml or json (default). * @return array */ public function feedRead($uri, $format = "json", $fixencoding = true) { $zuri = \Zend\Uri\UriFactory::factory($uri); if (!$zuri->isValid()) { $this->logger->logWarn("Invalid Uri ({$uri})"); return false; } $cache = \Zend\Cache\StorageFactory::factory(array('adapter' => array('name' => 'filesystem', 'options' => array('cache_dir' => __DIR__ . DIRECTORY_SEPARATOR . 'cache', 'ttl' => 3600)), 'plugins' => array(array('name' => 'serializer', 'options' => array())))); \Zend\Feed\Reader\Reader::setCache($cache); \Zend\Feed\Reader\Reader::useHttpConditionalGet(); // Change to firefox agent $httpClient = \Zend\Feed\Reader\Reader::getHttpClient(); $httpClient->setOptions(array('useragent' => 'Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0', 'timeout' => 25)); \Zend\Feed\Reader\Reader::setHttpClient($httpClient); // Import feed try { $feed = \Zend\Feed\Reader\Reader::import($uri); $httpClient = \Zend\Feed\Reader\Reader::getHttpClient(); $httpClientResponse = $httpClient->getResponse(); $feedencoding = $feed->getEncoding(); if (!$feedencoding) { $feedencoding = 'utf-8'; } $feedResponse = array('feed' => $uri, 'statuscode' => $httpClientResponse->getStatusCode(), 'headers' => $httpClientResponse->getHeaders()->toArray(), 'encoding' => $feedencoding); } catch (\Exception $e) { $this->logger->logWarn("Zend feed reader cannot fetch: ({$uri}) because : " . $e->getMessage() . " trying casperjs"); $command = 'casperjs ' . __DIR__ . '/html.js --cookies-file=cookies.txt --url=' . escapeshellarg($uri); exec($command, $response, $return_var); $response = $this->strip_json($response); if ($return_var !== 0 || !isset($response['content']) || empty($response['content'])) { $this->logger->logWarn("Cannot fetch: ({$uri}) exit code {$return_var}", array('command' => $command, 'response' => $response)); $failed = new \Rss\Feed\Reader\ReadFailures($this->postgresql); $failed->update_failure($uri, json_encode(array('zend' => $e->getMessage(), 'casper' => $response))); return false; } try { $feed = \Zend\Feed\Reader\Reader::importString($response['content']); $feedencoding = $feed->getEncoding(); if (!$feedencoding) { $feedencoding = 'utf-8'; } $feedResponse = array('feed' => $uri, 'statuscode' => $response['http']['status'], 'headers' => $response['http'], 'encoding' => $feedencoding); } catch (\Exception $e) { $this->logger->logWarn("Cannot parse feed content from ({$uri}) because " . $e->getMessage()); // $failed = new \Rss\Feed\Reader\ReadFailures ( $this->postgresql ); // $failed->update_failure ( $uri ); return false; } } // Fix relative links $newdata = $this->fix_links($feed, $uri); // Fix encoding errors if ($fixencoding) { $newdata = $this->fix_encoding($newdata); } // Return XML if ($format === "xml") { $feedResponse['data'] = $newdata; return $feedResponse; } // Reload fixed data try { $feed = \Zend\Feed\Reader\Reader::importString($newdata); } catch (\Exception $e) { $this->logger->logWarn("Cannot parse corrected feed content from ({$uri}) because " . $e->getMessage()); return false; } $data = array('title' => $feed->getTitle(), 'link' => $feed->getLink(), 'date' => $this->get_feed_date($feed), 'description' => $feed->getDescription(), 'language' => $feed->getLanguage(), 'entries' => array()); foreach ($feed as $entry) { if (is_object($entry)) { $DateCreated = $entry->getDateCreated(); if (is_object($DateCreated)) { $DateCreated = $DateCreated->getTimestamp(); } else { $DateCreated = $data['date']; } $DateModified = $entry->getDateModified(); if (is_object($DateModified)) { $DateModified = $DateModified->getTimestamp(); } else { $DateModified = $data['date']; } if (empty($DateModified)) { $DateModified = time(); } if (empty($DateCreated)) { $DateCreated = time(); } $description = $entry->getDescription(); $content = $entry->getContent(); if (empty($description)) { $description = $content; } if (empty($content)) { $content = $description; } $edata = array('title' => $entry->getTitle(), 'description' => $description, 'dateCreated' => $DateCreated, 'dateModified' => $DateModified, 'link' => $entry->getLink(), 'content' => $content, 'feed' => $uri); if ($this->check_missing($edata)) { $data['entries'][] = $edata; } else { $this->logger->logTrace("Missing data from feed {$uri} " . json_encode($edata, JSON_UNESCAPED_UNICODE)); } } } // Return array $feedResponse['data'] = $data; return $feedResponse; }