Beispiel #1
0
 /**
  * Test that I can load the ATOM page and that it returns valid XML.
  * The XMLLoader will take care of the validation, since it should be a valid document anyway.
  */
 public function testATOMPage()
 {
     // Get the RSS feed and download it to a local file.
     $rewriteurl = $this->blog->get('rewriteurl');
     $this->assertNotEmpty($rewriteurl);
     // Go to the page and make sure that it loads up!
     $request = new PageRequest($rewriteurl . '.atom');
     $request->execute();
     $view = $request->getView();
     $this->assertEquals(200, $view->error);
     $markup = $view->fetch();
     $this->assertNotEmpty($markup);
     // DEVELOPMENT DEBUG
     //echo $markup; // DEBUG //
     $xml = new XMLLoader();
     $xml->setRootName('feed');
     // If it's invalid markup, this load will throw an error, causing phpunit to return an error :)
     // If the bug is fixed, this will not throw any errors.
     $xml->loadFromString($markup);
     $parsedmarkup = $xml->asMinifiedXML();
     $this->assertNotEmpty($parsedmarkup);
 }
Beispiel #2
0
 /**
  * Helper utility to import a given remote blog.
  *
  * @param bool $verbose Set to true to enable real-time verbose output of the operation.
  * @return array
  *
  * @throws Exception
  */
 public function importFeed($verbose = false)
 {
     $blogid = $this->get('id');
     if (!$this->exists()) {
         throw new Exception('Unable to import a blog that does not exist!');
     }
     // Make sure this is a remote blog.
     if ($this->get('type') != 'remote') {
         throw new Exception('Cannot import a blog that is not remote!');
     }
     $file = \Core\Filestore\Factory::File($this->get('remote_url'));
     if (!$file->exists()) {
         throw new Exception($this->get('remote_url') . ' does not appear to exist');
     }
     $defaults = ['parenturl' => $this->get('baseurl'), 'site' => $this->get('site'), 'component' => 'blog'];
     $changes = ['added' => 0, 'updated' => 0, 'skipped' => 0, 'deleted' => 0];
     $changelog = '';
     // I need a list of current articles in this feed.  This is because remote deletions won't be coming in on the feed.
     $map = array();
     $articles = BlogArticleModel::FindRaw(['blogid = ' . $blogid]);
     foreach ($articles as $a) {
         $map[$a['guid']] = $a['id'];
     }
     // I can't trust that remote files list what they actually are because many frameworks,
     // (WP in specific), do not correctly use content-types :/
     $contents = $file->getContents();
     // Which feed type is this?
     $header = substr($contents, 0, 400);
     // All the standardized records
     $records = array();
     if (strpos($header, '<rss ') !== false) {
         if ($verbose) {
             echo 'Found an RSS feed with the URL of ' . $file->getURL() . '!<br/>' . "\n";
             ob_flush();
             flush();
         }
         $xml = new XMLLoader();
         $xml->setRootName('rss');
         $xml->loadFromString($contents);
         foreach ($xml->getElements('channel/item') as $item) {
             $dat = ['guid' => '', 'link' => '', 'thumbnail' => '', 'published' => '', 'updated' => '', 'description' => ''];
             foreach ($item->childNodes as $child) {
                 if ($child->nodeName == '#text') {
                     continue;
                 }
                 switch ($child->nodeName) {
                     case 'media:thumbnail':
                         $dat['thumbnail'] = $child->getAttribute('url');
                         break;
                     case 'pubDate':
                         $dat['published'] = $child->nodeValue;
                         break;
                     default:
                         $dat[$child->nodeName] = $child->nodeValue;
                 }
             }
             $records[] = $dat;
         }
     } elseif (strpos($header, 'http://www.w3.org/2005/Atom') !== false) {
         if ($verbose) {
             echo 'Found an ATOM feed with the URL of ' . $file->getURL() . '!<br/>' . "\n";
             ob_flush();
             flush();
         }
         $xml = new XMLLoader();
         $xml->setRootName('feed');
         $xml->loadFromString($contents);
         foreach ($xml->getRootDOM()->childNodes as $item) {
             if ($item->nodeName != 'entry') {
                 continue;
             }
             $dat = ['guid' => '', 'link' => '', 'thumbnail' => '', 'published' => '', 'updated' => '', 'description' => ''];
             $imgheight = 0;
             foreach ($item->childNodes as $child) {
                 if ($child->nodeName == '#text') {
                     continue;
                 }
                 switch ($child->nodeName) {
                     case 'id':
                         $dat['guid'] = $child->nodeValue;
                         break;
                     case 'link':
                         if ($child->getAttribute('rel') == 'alternate' && $child->getAttribute('type') == 'text/html') {
                             if ($child->nodeValue) {
                                 $dat['link'] = $child->nodeValue;
                             } else {
                                 $dat['link'] = $child->getAttribute('href');
                             }
                         }
                         break;
                     case 'im:image':
                         if ($child->getAttribute('height') > $imgheight) {
                             $dat['thumbnail'] = $child->nodeValue;
                             $imgheight = $child->getAttribute('height');
                         }
                         break;
                     case 'updated':
                         $dat['updated'] = strtotime($child->nodeValue);
                         break;
                     case 'summary':
                         if ($dat['description'] != '') {
                             $dat['description'] = $child->nodeValue;
                         }
                         break;
                     case 'content':
                         $dat['description'] = $child->nodeValue;
                         break;
                     default:
                         $dat[$child->nodeName] = $child->nodeValue;
                 }
             }
             if (!$dat['published'] && $dat['updated']) {
                 // make sure that there's a published date.
                 $dat['published'] = $dat['updated'];
             }
             $records[] = $dat;
         }
     } else {
         throw new Exception('Invalid remote file found, please ensure it is either an RSS or Atom feed!');
     }
     // Now that they're standardized...
     foreach ($records as $dat) {
         /** @var PageModel $page */
         $page = PageModel::Construct($dat['link']);
         $published = $dat['published'] == '' || is_numeric($dat['published']) ? $dat['published'] : strtotime($dat['published']);
         $updated = $dat['updated'] != '' ? is_numeric($dat['updated']) ? $dat['updated'] : strtotime($dat['updated']) : $published;
         $pagedat = ['published' => $published, 'title' => $dat['title'], 'body' => $dat['description'], 'updated' => $updated];
         $newpagedat = array_merge($defaults, ['selectable' => '0']);
         $page->setFromArray($pagedat);
         if (!$page->exists()) {
             // Add the "new" dat only if the page doesn't exist before.
             $page->setFromArray($newpagedat);
         }
         if ($dat['thumbnail']) {
             $remote = \Core\Filestore\Factory::File($dat['thumbnail']);
             $new = $remote->copyTo('public/blog/');
             $page->setMeta('image', $new->getFilename(false));
         }
         $page->setMeta('guid', $dat['guid']);
         $thischange = $page->exists() ? 'updated' : 'added';
         if ($page->changed()) {
             $page->save();
             $changes[$thischange]++;
             $changelog .= $thischange . ' ' . $dat['title'] . "<br/>\n";
             if ($verbose) {
                 echo $thischange . ' ' . $dat['title'] . "<br/>\n";
                 ob_flush();
                 flush();
             }
         } else {
             $changes['skipped']++;
             if ($verbose) {
                 echo 'No changes to ' . $dat['title'] . "<br/>\n";
                 ob_flush();
                 flush();
             }
         }
     }
     return ['status' => 1, 'message' => 'Import feed successfully!', 'added' => $changes['added'], 'updated' => $changes['updated'], 'deleted' => $changes['deleted'], 'skipped' => $changes['skipped'], 'changelog' => $changelog];
 }