/** * This test functions shows an example of SmartDOMDocument in action. * A sample HTML fragment is loaded. * Then, the first image in the document is cut out and saved separately. * It also shows that Russian characters are parsed correctly. * */ public static function testHTML() { $content = <<<CONTENT <div class='class1'> <img src='http://www.google.com/favicon.ico' /> Some Text <p>руÑÑкий</p> </div> CONTENT; print "Before removing the image, the content is: " . htmlspecialchars($content) . "<br>"; $content_doc = new Dom_SmartDOMDocument(); $content_doc->loadHTML($content); try { $first_image = $content_doc->getElementsByTagName("img")->item(0); if ($first_image) { $first_image->parentNode->removeChild($first_image); $content = $content_doc->saveHTMLExact(); $image_doc = new Dom_SmartDOMDocument(); $image_doc->appendChild($image_doc->importNode($first_image, true)); $image = $image_doc->saveHTMLExact(); } } catch (Exception $e) { } print "After removing the image, the content is: " . htmlspecialchars($content) . "<br>"; print "The image is: " . htmlspecialchars($image); }
protected function _parse() { $feed = Zend_Feed_Reader::import($this->getLink()); $this->_news = new Core_Model_Default(array('title' => $feed->getTitle(), 'link' => $feed->getLink(), 'dateModified' => $feed->getDateModified(), 'description' => $feed->getDescription(), 'language' => $feed->getLanguage(), 'entries' => array())); $data = array(); foreach ($feed as $entry) { $picture = null; if ($entry->getEnclosure() && $entry->getEnclosure()->url) { $picture = $entry->getEnclosure()->url; } if ($entry->getDescription()) { $content = new Dom_SmartDOMDocument(); $content->loadHTML($entry->getDescription()); $content->encoding = 'utf-8'; $description = $content->documentElement; $imgs = $description->getElementsByTagName('img'); if ($imgs->length > 0) { $img = $imgs->item(0); if ($img->getAttribute('src')) { $picture = $img->getAttribute('src'); } } } $edata = new Core_Model_Default(array('title' => $entry->getTitle(), 'description' => strip_tags($entry->getDescription()), 'dateModified' => $entry->getDateModified(), 'authors' => $entry->getAuthors(), 'link' => $entry->getLink(), 'content' => strip_tags($entry->getContent()), 'enclosure' => $entry->getEnclosure(), 'timestamp' => $entry->getDateCreated()->getTimestamp(), 'picture' => $picture)); $data[] = $edata; } $this->_news->setEntries($data); return $this; }
public function getHtmlFilePath() { if (!file_exists(Core_Model_Directory::getCacheDirectory(true) . '/' . $this->_getFilename())) { $file = fopen(Core_Model_Directory::getCacheDirectory(true) . '/' . $this->_getFilename(), 'w'); $html_code = mb_convert_encoding(html_entity_decode($this->getText()), 'HTML-ENTITIES', 'UTF-8'); $html_a_target = "_top"; if ($this->getSession()->isOverview) { $html_a_target = "_blank"; } //adding or changing target of the <a> $doc = new Dom_SmartDOMDocument(); $doc->loadHTML($html_code); $links = $doc->getElementsByTagName('a'); foreach ($links as $item) { $item->setAttribute('target', $html_a_target); } $html_code = html_entity_decode($doc->saveHTML(), ENT_QUOTES, "UTF-8"); $html = '<html><head> <meta content="text/html; charset=utf-8" http-equiv="Content-Type" /> <meta content="width=device-width, height=device-height, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0, user-scalable=0" name="viewport" /> <meta content="black" name="apple-mobile-web-app-status-bar-style" /> <meta content="IE=8" http-equiv="X-UA-Compatible" /> <style type="text/css"> html, body { margin:0; padding:0; border:none; } html { overflow: scroll; } body { font-size: 15px; width: 100%; height: 100%; overflow: auto; -webkit-user-select : none; -webkit-text-size-adjust : none; -webkit-touch-callout: none; line-height:1; background-color:white; } </style> </head>' . html_entity_decode($html_code) . '</html>'; fputs($file, $html); fclose($file); } return Core_Model_Directory::getCacheDirectory() . '/' . $this->_getFilename(); }
protected function _parse() { $feed = Zend_Feed_Reader::import($this->getLink()); $this->_news = new Core_Model_Default(array('title' => $feed->getTitle(), 'link' => $feed->getLink(), 'dateModified' => $feed->getDateModified(), 'description' => $feed->getDescription(), 'language' => $feed->getLanguage(), 'entries' => array())); $data = array(); foreach ($feed as $entry) { $picture = null; if ($entry->getEnclosure() && $entry->getEnclosure()->url) { $picture = $entry->getEnclosure()->url; } $description = ""; if ($entry->getContent()) { $content = new Dom_SmartDOMDocument(); $content->loadHTML($entry->getContent()); $content->encoding = 'utf-8'; $description = $content->documentElement; $imgs = $description->getElementsByTagName('img'); if ($imgs->length > 0) { foreach ($imgs as $k => $img) { if ($k == 0) { $img = $imgs->item(0); if ($img->getAttribute('src') and stripos($img->getAttribute('src'), ".gif") === false) { $picture = $img->getAttribute('src'); $img->parentNode->removeChild($img); } } $img->removeAttribute('width'); $img->removeAttribute('height'); } } $as = $description->getElementsByTagName('a'); if ($as->length > 0) { foreach ($as as $a) { $a->setAttribute('target', '_self'); } } $description = $content->saveHTMLExact(); } $timestamp = $entry->getDateCreated() ? $entry->getDateCreated()->getTimestamp() : null; $updated_at = null; if ($timestamp) { $updated_at = $this->_getUpdatedAt($timestamp); } $edata = new Core_Model_Default(array('entry_id' => $entry->getId(), 'title' => $entry->getTitle(), 'description' => $description, 'short_description' => strip_tags($description), 'dateModified' => $entry->getDateModified(), 'authors' => $entry->getAuthors(), 'link' => $entry->getLink(), 'content' => $description, 'enclosure' => $entry->getEnclosure(), 'timestamp' => $timestamp, 'updated_at' => $updated_at, 'picture' => $picture)); $data[] = $edata; } $this->_news->setEntries($data); return $this; }
public function getRemotePosts($showAll = false, $url = '', $useCache = false) { $cache = Zend_Registry::get('cache'); $cacheId = 'wordpress_cache_' . sha1($this->getId()); // if(!$this->_remote_posts AND (!$useCache OR ($this->_remote_posts = $cache->load($cacheId)) === false)) { $this->_remote_posts = array(); if ($this->getData('url') or !empty($url)) { $category_ids = $this->getCategoryIds(); $params = array('object' => 'posts'); if (!$showAll) { $params['cat_ids'] = $category_ids; } // Envoie la requête $datas = $this->_sendRequest(!empty($url) ? $url : $this->getData('url'), $params); // Test si les données sont OK if ($datas['status'] == '1') { foreach ($datas['posts'] as $post_datas) { $first_picture = ''; $first_picture_src = ''; if (!empty($post_datas['description'])) { $content = new Dom_SmartDOMDocument(); $content->loadHTML($post_datas['description']); $content->encoding = 'utf-8'; // $content->removeChild($content->firstChild); // $content->replaceChild($content->firstChild->firstChild, $content->firstChild); $description = $content->documentElement; // Traitement des images $imgs = $description->getElementsByTagName('img'); if ($imgs->length > 0) { foreach ($imgs as $img) { if ($img->getAttribute('src')) { if (empty($first_picture)) { $first_picture = $img; $first_picture_src = $src = $this->getUrl('Front/image/crop', array('image' => base64_encode($img->getAttribute('src')), 'width' => 640, 'height' => 400)); } else { $img->setAttribute('onload', 'javascript:setImageSize($(this), true);'); $img->setAttribute('src', $this->getUrl('Front/image/crop', array('image' => base64_encode($img->getAttribute('src')), 'width' => 240, 'height' => 180))); } } } if (!empty($first_picture)) { $first_picture->parentNode->removeChild($first_picture); } } // Traitement des iframes $iframes = $description->getElementsByTagName('iframe'); if ($iframes->length > 0) { foreach ($iframes as $iframe) { $iframe->setAttribute('width', '100%'); $iframe->removeAttribute('height'); // if($iframe->getAttribute('width')) {} } } $post_datas['description'] = $content->saveHTMLExact(); $post_datas['description'] = strip_tags($post_datas['description'], '<div><p><a><img><iframe>'); } $post_datas['picture'] = $first_picture_src; $this->_remote_posts[$post_datas['date']] = new Wordpress_Model_Wordpress_Category_Post($post_datas); } } krsort($this->_remote_posts); if ($useCache) { $cache->save($this->_remote_posts, $cacheId); } } // } return array_splice($this->_remote_posts, 0, 20); }
public function getRemotePosts($showAll = false, $url = '', $useCache = false, $offset = 0) { $cache = Zend_Registry::get('cache'); $cacheId = 'wordpress_cache_' . sha1($this->getId()) . $showAll; $showAll = false; if (!$this->_remote_posts and (!$useCache or ($this->_remote_posts = $cache->load($cacheId)) === false)) { $this->_remote_posts = array(); if ($this->getData('url') or !empty($url)) { $category_ids = $this->getCategoryIds(); $params = array('object' => 'posts'); if (!$showAll) { $params['cat_ids'] = $category_ids; } // Envoie la requête $datas = $this->_sendRequest(!empty($url) ? $url : $this->getData('url'), $params); // Test si les données sont OK if ($datas['status'] == '1') { foreach ($datas['posts'] as $post_datas) { $post_datas['picture'] = !empty($post_datas["featured_image"]) ? $post_datas["featured_image"] : null; if ($showAll and count(array_intersect($category_ids, $post_datas['category_ids'])) == 0) { $post_datas['is_hidden'] = true; } $first_picture = ""; $first_picture_src = ""; if (!empty($post_datas['description'])) { $content = new Dom_SmartDOMDocument(); $content->loadHTML($post_datas['description']); $content->encoding = 'utf-8'; // $content->removeChild($content->firstChild); // $content->replaceChild($content->firstChild->firstChild, $content->firstChild); $description = $content->documentElement; // Traitement des images $imgs = $description->getElementsByTagName('img'); if ($imgs->length > 0) { foreach ($imgs as $img) { if ($img->getAttribute('src')) { if (empty($post_datas['picture']) and empty($first_picture)) { $first_picture = $img; $first_picture_src = $src = $this->getUrl('Front/image/crop', array('image' => base64_encode($img->getAttribute('src')), 'width' => 640, 'height' => 400)); } else { // $img->setAttribute('src', $this->getUrl('Front/image/crop', array( // 'image' => base64_encode($img->getAttribute('src')), // 'width' => 240, // 'height' => 180 // ))); $img->removeAttribute("height"); } } } if (!empty($first_picture)) { $first_picture->parentNode->removeChild($first_picture); $post_datas['picture'] = $first_picture_src; } } if (empty($post_datas['picture'])) { $post_datas['picture'] = $this->getNoImage(); } // Traitement des iframes $iframes = $description->getElementsByTagName('iframe'); if ($iframes->length > 0) { foreach ($iframes as $iframe) { $iframe->setAttribute('width', '100%'); $iframe->removeAttribute('height'); // if($iframe->getAttribute('width')) {} } } $post_datas['description'] = $content->saveHTMLExact(); $post_datas['description'] = strip_tags($post_datas['description'], '<div><p><a><img><iframe>'); } // $featured_image = null; // if(!empty($post_datas["featured_image"])) { // $featured_image = $this->getUrl('Front/image/crop', array( // 'image' => base64_encode($post_datas["featured_image"]), // 'width' => 640, // 'height' => 400 // )); // } // $post_datas['picture'] = $featured_image; $this->_remote_posts[$post_datas['date']] = new Wordpress_Model_Wordpress_Category_Post($post_datas); } } krsort($this->_remote_posts); $cache->save($this->_remote_posts, $cacheId); } } return array_slice($this->_remote_posts, $offset, self::DISPLAYED_PER_PAGE); }