public function testLinkHtmlSpecialCharsAreDecodedByExtractLink()
 {
     $this->aggregatorConfiguration->expects($this->atLeastOnce())->method('getLinkExtractPattern')->willReturn('/^(.*?)$/');
     $testString = 'http://example.com/file.bin?x=abc&y=123';
     $expectedResult = 'http://example.com/file.bin?x=abc&y=123';
     $this->assertSame($expectedResult, $this->subjectUnderTest->extractLink($testString));
 }
Esempio n. 2
0
 /**
  * Parses given XML and returns array with titles & processed links.
  *
  * @param string $feed Valid RSS XML.
  *
  * @return array|false Key contains title, value contains processed link. It can also return bool false on error.
  *
  * @todo This method sucks...
  */
 public function parseFeedXml($feed)
 {
     $feed = simplexml_load_string($feed);
     if ($feed === false) {
         $this->logger->error('RSS XML parsing failed in ' . $this->getName(), libxml_get_errors());
         return false;
     }
     if (!isset($feed->channel->item)) {
         $this->logger->error($this->getName() . ' failed to extract items from XML');
         return false;
     }
     $nameTag = $this->configuration->getNameTagName();
     $linkTag = $this->configuration->getLinkTagName();
     $nameExtract = $this->configuration->getNameExtractPattern();
     $linkExtract = $this->configuration->getLinkExtractPattern();
     $linkTransform = $this->configuration->getLinkTransformPattern();
     $result = [];
     $counter = 0;
     foreach ($feed->channel->item as $item) {
         if (!isset($item->{$nameTag})) {
             $this->logger->warning('Unable to locate name tag <' . $nameTag . '> in ' . $this->getName() . ' for item#' . $counter . '. Skipping item.');
             continue;
         }
         if (!isset($item->{$linkTag})) {
             $this->logger->warning('Unable to locate name tag <' . $linkTag . '> in ' . $this->getName() . ' for item#' . $counter . '. Skipping item.');
             continue;
         }
         $nameMatches = [];
         if (preg_match($nameExtract, $item->{$nameTag}, $nameMatches) < 1 || !isset($nameMatches[1])) {
             $this->logger->warning('Extracting name from "' . $item->{$nameTag} . '" failed in ' . $this->getName() . ' for item#' . $counter . '. Skipping item.');
             continue;
         }
         if (isset($result[$nameMatches[1]])) {
             $this->logger->warning('Duplicate name "' . $item->{$nameTag} . '" in ' . $this->getName() . ' for item#' . $counter . '. Skipping item.');
             continue;
         }
         $linkMatches = [];
         if (preg_match($linkExtract, $item->{$linkTag}, $linkMatches) < 1 || !isset($linkMatches[1])) {
             $this->logger->warning('Extracting link from "' . $item->{$linkTag} . '" failed in ' . $this->getName() . ' for item#' . $counter . '. Skipping item.');
             continue;
         }
         if ($linkTransform !== null) {
             $linkMatches[1] = preg_replace($linkTransform[0], $linkTransform[1], $linkMatches[1]);
         }
         $result[$nameMatches[1]] = $linkMatches[1];
         $counter++;
     }
     return $result;
 }
 public function testConfigurationIsConsideredValidAfterSettingUrl()
 {
     $this->subjectUnderTest->setUrl('http://example.org/feed.rss');
     $this->assertTrue($this->subjectUnderTest->isValid());
 }