private function parseFeed($author, $feed)
 {
     $contenttype = $this->config['contenttype'];
     $reader = new Reader();
     $resource = $reader->download($feed['feed']);
     // Return the right parser instance according to the feed format
     $parser = $reader->getParser($resource->getUrl(), $resource->getContent(), $resource->getEncoding());
     // Return a Feed object
     $feeditems = $parser->execute();
     // Print the feed properties with the magic method __toString()
     // slice
     $items = array_slice($feeditems->items, 0, $this->config['itemAmount']);
     foreach ($items as $item) {
         //echo "<hr>\n";
         //echo " - " .$item->getTitle() . "<br>\n - " . \Bolt\Helpers\String::slug( $item->getTitle() ) . "\n";
         // try to get an existing record for this item
         $record = $this->app['storage']->getContent($contenttype, array('slug' => \Bolt\Helpers\String::slug($item->getTitle()), 'returnsingle' => true));
         if (!$record) {
             // New one.
             $record = $this->app['storage']->getContentObject($contenttype);
             echo "\n<b>[NEW]</b> ";
             $new = true;
         } else {
             echo "\n<b>[UPD]</b> : " . $record->values['id'];
             $new = false;
         }
         $date = new \DateTime("@" . $item->getDate());
         if ($item->getContent() != false) {
             $raw = $item->getContent();
         } else {
             $raw = $item->getIntro();
         }
         // Sanitize/clean the HTML.
         $maid = new \Maid\Maid(array('output-format' => 'html', 'allowed-tags' => array('p', 'br', 'hr', 's', 'u', 'strong', 'em', 'i', 'b', 'li', 'ul', 'ol', 'menu', 'blockquote', 'pre', 'code', 'tt', 'h2', 'h3', 'h4', 'h5', 'h6', 'dd', 'dl', 'dh', 'table', 'tbody', 'thead', 'tfoot', 'th', 'td', 'tr', 'a', 'img'), 'allowed-attribs' => array('id', 'class', 'name', 'value', 'href', 'src')));
         $content = $maid->clean($raw);
         // if ($item->getImage() != "") {
         //     $image = $item->getImage();
         // } else {
         $image = $this->findImage($content, $feed['url']);
         // }
         $values = array('itemid' => $item->getId(), 'title' => "" . $item->getTitle(), 'raw' => "" . $raw, 'body' => "" . $content, 'author' => $author, 'image' => $image, 'status' => 'published', 'sitetitle' => $feed['title'], 'sitesource' => $feed['url']);
         // echo "<img src='".$image['file']."' width='200'>";
         if ($new || $date instanceof \DateTime) {
             echo "[1]";
             $values['datecreated'] = $date instanceof \DateTime ? $date->format('Y-m-d H:i:s') : "";
             $values['datepublish'] = $date instanceof \DateTime ? $date->format('Y-m-d H:i:s') : "";
         } else {
             dump($date);
             echo "[2]";
         }
         // $record->setTaxonomy('tags', $item->getTags());
         // $record->setTaxonomy('authors', $author);
         $record->setValues($values);
         $id = $this->app['storage']->saveContent($record);
         echo " - " . $values['datecreated'] . " / " . $values['title'];
         flush();
     }
 }
 /**
  * @group online
  */
 public function testRssGrabContent()
 {
     $reader = new Reader();
     $client = $reader->download('http://www.egscomics.com/rss.php');
     $parser = $reader->getParser($client->getUrl(), $client->getContent(), $client->getEncoding());
     $parser->enableContentGrabber();
     $feed = $parser->execute();
     $this->assertTrue(is_array($feed->items));
     $this->assertTrue(strpos($feed->items[0]->content, '<img') >= 0);
 }
Esempio n. 3
0
 /**
  * @group online
  */
 public function testDownload_withCache()
 {
     $reader = new Reader();
     $resource = $reader->download('http://linuxfr.org/robots.txt');
     $this->assertTrue($resource->isModified());
     $lastModified = $resource->getLastModified();
     $etag = $resource->getEtag();
     $reader = new Reader();
     $resource = $reader->download('http://linuxfr.org/robots.txt', $lastModified, $etag);
     $this->assertFalse($resource->isModified());
 }
Esempio n. 4
0
 /**
  * Update feed items.
  *
  * @return boolean
  */
 public function updateFeed()
 {
     try {
         $reader = new Reader();
         $lastModified = $this->last_modified_at === null ? null : $this->last_modified_at->toRfc2822String();
         $resource = $reader->download($this->url, $lastModified, $this->etag);
         if (!$resource->isModified()) {
             return true;
         }
         $parser = $reader->getParser($resource->getUrl(), $resource->getContent(), $resource->getEncoding());
         $feed = $parser->execute();
         foreach ($feed->getItems() as $item) {
             $guid = $item->getTag('guid') !== false ? $item->getTag('guid') : $item->getId();
             if (is_array($guid)) {
                 $guid = $guid[0];
             }
             $feedItem = FeedItem::where('feed_id', '=', $this->id)->where('guid', '=', $guid)->first();
             if ($feedItem === null) {
                 $feedItem = new FeedItem();
                 $feedItem->feed_id = $this->id;
                 $feedItem->guid = $guid;
                 $feedItem->title = $item->getTitle();
                 $feedItem->url = $item->getUrl();
                 $feedItem->save();
             }
         }
         $this->last_modified_at = new Carbon($resource->getLastModified());
         $this->etag = $resource->getEtag();
         $this->save();
     } catch (PicoFeedException $e) {
         Log::error($e->getMessage());
         echo $e->getMessage();
         // TODO: Remove debug
         var_dump($e);
         return false;
     }
     return true;
 }
Esempio n. 5
0
 /**
  * Get Links from the RSS Feed URL
  * @param  String $url  RSS Feed URL
  * @param  String $date Date in 'Y-m-d' format
  * @return Array       Array of URL's
  */
 public static function getFeed($url, $date = null)
 {
     $reader = new Reader();
     // Return a resource
     $resource = $reader->download($url);
     // Return the right parser instance according to the feed format
     $parser = $reader->getParser($resource->getUrl(), $resource->getContent(), $resource->getEncoding());
     // Return a Feed object
     $feed = $parser->execute();
     // Print the feed properties with the magic method __toString()
     $urls = [];
     $lastCrawled = null;
     foreach ($feed->items as $item) {
         $published = date_format($item->publishedDate, 'Y-m-d');
         if (!$lastCrawled) {
             $lastCrawled = $published;
         }
         if ($date && $published == $date) {
             break;
         }
         $urls[] = urldecode($item->url);
     }
     return ['urls' => $urls, 'lastCrawled' => $lastCrawled];
 }
Esempio n. 6
0
 /**
  * Get the feed from cache or retrieve it
  * @param  string $feed The feed URL
  * @return object       Reader object
  */
 private function getFeed($feed)
 {
     $reader = new Reader();
     $d = \FreePBX::Dashboard();
     $etag = $d->getConfig($feed, "etag");
     $last_modified = $d->getConfig($feed, "last_modified");
     try {
         $resource = $reader->download($feed, $last_modified, $etag);
         if ($resource->isModified()) {
             $parser = $reader->getParser($resource->getUrl(), $resource->getContent(), $resource->getEncoding());
             $content = $parser->execute();
             $etag = $resource->getEtag();
             $last_modified = $resource->getLastModified();
             $d->setConfig($feed, $content, "content");
             $d->setConfig($feed, $etag, "etag");
             $d->setConfig($feed, $last_modified, "last_modified");
         } else {
             $content = $d->getConfig($feed, "content");
         }
     } catch (\PicoFeed\PicoFeedException $e) {
         $content = $d->getConfig($feed, "content");
     }
     return $content;
 }
Esempio n. 7
0
function refresh($feed_id)
{
    try {
        $feed = get($feed_id);
        if (empty($feed)) {
            return false;
        }
        $reader = new Reader(Config\get_reader_config());
        $resource = $reader->download($feed['feed_url'], $feed['last_modified'], $feed['etag']);
        // Update the `last_checked` column each time, HTTP cache or not
        update_last_checked($feed_id);
        // Feed modified
        if ($resource->isModified()) {
            $parser = $reader->getParser($resource->getUrl(), $resource->getContent(), $resource->getEncoding());
            if ($feed['download_content']) {
                $parser->enableContentGrabber();
                // Don't fetch previous items, only new one
                $parser->setGrabberIgnoreUrls(Database::getInstance('db')->table('items')->eq('feed_id', $feed_id)->findAllByColumn('url'));
            }
            $feed = $parser->execute();
            update_cache($feed_id, $resource->getLastModified(), $resource->getEtag());
            Item\update_all($feed_id, $feed->getItems());
            fetch_favicon($feed_id, $feed->getSiteUrl(), $feed->getIcon());
        }
        update_parsing_error($feed_id, 0);
        Config\write_debug();
        return true;
    } catch (PicoFeedException $e) {
    }
    update_parsing_error($feed_id, 1);
    Config\write_debug();
    return false;
}
//
// DB connect
//
$dbcfg = unserialize(DB_CONFIG);
$db = new Connection($dbcfg['host'], $dbcfg['user'], $dbcfg['password'], $dbcfg['database'], $dbcfg['fetchMode'], $dbcfg['charset'], array('port' => $dbcfg['port']));
//
// Loop through every source
//
foreach (unserialize(ARTICLE_FEEDS) as $source_id => $source_url) {
    echo sprintf("Source #%d (%s). ---- Go:<br>", $source_id, $source_url);
    //
    // RSS fetch
    //
    try {
        $reader = new Reader();
        $resource = $reader->download($source_url);
        $parser = $reader->getParser($resource->getUrl(), $resource->getContent(), $resource->getEncoding());
        $feed = $parser->execute();
    } catch (Exception $e) {
        echo "RSS Fetch Error: " . $e->getMessage();
        exit;
    }
    //
    // New RSS feeds goes to our database
    //
    $n_articles = (int) $db->fetchColumn("\n        SELECT \n            count(id)\n        FROM\n            jobarticles \n        WHERE \n            source_id = :source_id\n    ", array('source_id' => $source_id));
    if (!$n_articles) {
        // No articles from this resoure
        // add every article to our database
        echo sprintf("No articles from there resource in our DB. Adding %d articles<br>", count($feed->items));
        // Latest date is very early date
Esempio n. 9
0
<?php

// Podiobooks Channel Server
// =============================================================================
//
// * Author: [Craig Davis](craig@there4development.com)
// * Since: 9/21/2015
//
// -----------------------------------------------------------------------------
//
require_once __DIR__ . '/../vendor/autoload.php';
use PicoFeed\Reader\Reader;
use PicoFeed\Config\Config;
$reader = new Reader();
// Fetch all titles
$resource = $reader->download('http://podiobooks.com/rss/feeds/titles/');
$parser = $reader->getParser($resource->getUrl(), $resource->getContent(), $resource->getEncoding());
$feed = $parser->execute();
$titleUrls = [];
foreach ($feed->getItems() as $item) {
    $titleUrls[] = $item->getUrl();
}
//$titleUrls = array_slice($titleUrls, 0, 100);
$folders = [];
$media = [];
$folderIdCount = 1;
$folders['root'] = ['id' => $folderIdCount++, 'title' => 'root', 'imgURL' => '', 'description' => '', 'contents' => []];
// Fetch all the details for each
foreach ($titleUrls as $url) {
    print " Parsing {$url}\n";
    flush();
Esempio n. 10
0
 public function getHomeWidgets($feed = null)
 {
     //return array();
     $fpbxfeeds = $this->UCP->FreePBX->Config->get('UCPRSSFEEDS');
     $fpbxfeeds = !empty($fpbxfeeds) ? $fpbxfeeds : $this->UCP->FreePBX->Config->get('RSSFEEDS');
     $fpbxfeeds = trim($fpbxfeeds);
     if (empty($fpbxfeeds)) {
         return array();
     }
     $feeds = array();
     $fpbxfeeds = str_replace("\r", "", $fpbxfeeds);
     foreach (explode("\n", $fpbxfeeds) as $k => $f) {
         $feeds['feed-' . $k] = $f;
     }
     if (!empty($feed) && !empty($feeds[$feed])) {
         $feeds = array($feeds[$feed]);
     }
     $out = array();
     $reader = new Reader();
     //Check if dashboard is installed and enabled,
     //if so then we will use the same cache engine dashboard uses
     if ($this->UCP->FreePBX->Modules->moduleHasMethod("dashboard", "getConfig")) {
         $storage = $this->UCP->FreePBX->Dashboard;
     } else {
         $storage = $this->UCP->FreePBX->Ucp;
     }
     foreach ($feeds as $k => $feed) {
         $etag = $storage->getConfig($feed, "etag");
         $last_modified = $storage->getConfig($feed, "last_modified");
         $content = '';
         try {
             $resource = $reader->download($feed, $last_modified, $etag);
             if ($resource->isModified()) {
                 $parser = $reader->getParser($resource->getUrl(), $resource->getContent(), $resource->getEncoding());
                 $content = $parser->execute();
                 $etag = $resource->getEtag();
                 $last_modified = $resource->getLastModified();
                 $storage->setConfig($feed, $content, "content");
                 $storage->setConfig($feed, $etag, "etag");
                 $storage->setConfig($feed, $last_modified, "last_modified");
             } else {
                 $content = $storage->getConfig($feed, "content");
             }
         } catch (\PicoFeed\PicoFeedException $e) {
             $content = $storage->getConfig($feed, "content");
         }
         if (empty($content)) {
             continue;
         }
         $htmlcontent = '<ul>';
         $i = 1;
         foreach ($content->items as $item) {
             if ($i > 5) {
                 break;
             }
             $htmlcontent .= '<li><a href="' . $item->url . '" target="_blank">' . $item->title . '</a></li>';
             $i++;
         }
         $htmlcontent .= '</ul>';
         $out[] = array("id" => $k, "title" => '<a href="' . $content->site_url . '" target="_blank">' . $content->title . '</a>', "content" => $htmlcontent, "size" => '33.33%');
     }
     return $out;
 }
Esempio n. 11
0
    if (preg_match_all(IMAGE_URL_RX, $desc, $matches)) {
        return $matches[1];
    }
    return array();
}
function url_to_filename($url)
{
    $parsed = parse_url($url);
    if (!$parsed['path']) {
        return false;
    }
    return basename($parsed['path']);
}
$feed_url = urldecode($_GET['url']);
$reader = new Reader();
$resource = $reader->download($feed_url);
$parser = $reader->getParser($resource->getUrl(), $resource->getContent(), $resource->getEncoding());
$feed = $parser->execute();
$image_items = array();
foreach ($feed->items as $item) {
    $image_urls = extract_image_urls($item->content);
    foreach ($image_urls as $url) {
        $image_items[] = array('title' => url_to_filename($url), 'updated' => $item->date->getTimestamp(), 'url' => $url, 'summary' => "<img src=\"{$url}\">");
    }
}
$writer = new Rss20();
$writer->title = $feed->title;
$writer->site_url = $feed->site_url;
$writer->feed_url = "http://{$_SERVER['HTTP_HOST']}{$_SERVER['SCRIPT_NAME']}/?url=" . urlencode($feed_url);
$writer->description = "{$feed->description}\nModified by Splitch (http://{$_SERVER['HTTP_HOST']})";
$writer->items = $image_items;