Inheritance: implements FastFeed\Processor\ProcessorInterface
 /**
  * @dataProvider dataProvider
  */
 public function testProcess($expected, $actual, $allowedTags)
 {
     $this->items[0]->setIntro($actual);
     $this->items[0]->setContent($actual);
     $this->processor->setAllowedTagsForContent($allowedTags);
     $this->processor->setAllowedTagsForIntro($allowedTags);
     $this->items = $this->processor->process($this->items);
     $this->assertEquals($expected, $this->items[0]->getIntro());
     $this->assertEquals($expected, $this->items[0]->getContent());
 }
Exemple #2
0
 /**
  * Scrape the RSS feed
  * @since Version 3.9
  * @return \Railpage\News\Scraper
  */
 public function fetch()
 {
     if (!is_string($this->feed)) {
         throw new Exception("Cannot fetch news articles from RSS feed because no RSS feed was provided");
     }
     $articles = array();
     $FastFeed = FastFeedFactory::create();
     $FastFeed->addFeed('default', $this->feed);
     $FastFeed->pushProcessor(new RemoveStylesProcessor());
     #$FastFeed->pushParser(new RailpageParser);
     /**
      * Remove tags
      */
     $StripTagsProcessor = new StripTagsProcessor();
     $StripTagsProcessor->setAllowedTagsForContent("img, a, ul, li, ol, strong, i, em, table, tr, td, th, thead, tbody, tfoot");
     $StripTagsProcessor->setAllowedTagsForIntro("a, ul, li, ol, strong, i, em, table, tr, td, th, thead, tbody, tfoot");
     $FastFeed->pushProcessor($StripTagsProcessor);
     $items = $FastFeed->fetch('default');
     printArray($items);
     die;
     foreach ($items as $Item) {
         $content = $Item->getContent();
         #printArray($Item->getExtra("category"));
         $date = $Item->getDate();
         $row = array("title" => $Item->getName(), "date" => $date->setTimeZone(new DateTimeZone("Australia/Melbourne")), "source" => $Item->getSource(), "blurb" => $Item->getIntro(), "body" => $Item->getContent(), "topic" => News::guessTopic($topic));
         printArray($row);
         die;
     }
     $articles[] = $row;
     $this->articles = $articles;
     /**
      * Zend HTTP config
      */
     $config = array('adapter' => 'Zend\\Http\\Client\\Adapter\\Curl', 'curloptions' => array(CURLOPT_FOLLOWLOCATION => true));
     $client = new Client($this->feed, $config);
     /**
      * Fetch the RSS feed
      */
     $response = $client->send();
     $content = $response->getBody();
     /**
      * Load the SimpleXML object
      */
     $xml = new SimpleXMLElement($content);
     /**
      * Load the namespaces
      */
     $ns = $xml->getNamespaces(true);
     /**
      * Loop through each RSS item and build an associative array of the data we need
      */
     foreach ($xml->channel->item as $item) {
         if (isset($ns['content']) && !empty($ns['content'])) {
             $content = $item->children($ns['content']);
             $content = strval($content->encoded);
         } else {
             $content = $item->description->__toString();
             $content = strip_tags($content, "img,a");
         }
         #printArray($content->__toString());die;
         $topic = json_decode(json_encode($item->category), true);
         if (empty($topic)) {
             $topic = $this->feed;
         }
         $line = explode("\n", $content);
         $firstline = preg_replace('/([^?!.]*.).*/', '\\1', strip_tags($line[0]));
         $body = trim(str_replace($firstline, "", $content));
         $row = array("title" => strval($item->title), "date" => (new DateTime(strval($item->pubDate)))->setTimeZone(new DateTimeZone("Australia/Melbourne")), "source" => strval($item->link), "blurb" => $firstline, "body" => $body, "topic" => News::guessTopic($topic));
         /**
          * Add this article to the list of news articles found in this scrape
          */
         $articles[] = $row;
     }
     $this->articles = $articles;
     return $this;
 }