Exemplo n.º 1
0
 /**
  * Function that performs the action of updating the local links on the webpage.
  *
  * @param WebPage          $webPage
  * @param HTMLImportStages $stagesSettings
  * @param WPMetaConfigs    $meta
  * @param null             $html_post_lookup
  *
  * @return null
  */
 protected function performStage(WebPage $webPage, HTMLImportStages $stagesSettings, WPMetaConfigs &$meta, &$html_post_lookup = null)
 {
     $body = $meta->getPostContent();
     if (!is_null($html_post_lookup)) {
         $bodyXML = XMLHelper::getXMLObjectFromString($body);
         $filepath = dirname($meta->getSourcePath());
         $link_table = array();
         // get a list of all the links in the page and iterate through them
         $all_links = $bodyXML->xpath('//a[@href]');
         // TODO: encapsulate this in a function use XMLHelper::getAllHRefsFromHTML as a start
         if ($all_links) {
             foreach ($all_links as $link) {
                 // iterate the link's attributes to find the HREF value
                 foreach ($link->attributes() as $attribute => $value) {
                     $path = '' . $value;
                     if (0 == strcasecmp('href', $attribute)) {
                         // TODO: handle foo.html#rar
                         if (!preg_match('/^[a-zA-Z].*:.*/', $path)) {
                             // TODO: need to handle foo.html without handling http://...
                             // only handle files that end in .html or .htm
                             if (preg_match('/\\.([hH][tT][mM][lL]?)$/', $path)) {
                                 // if the file the path links to has been imported already, then it will exist in the lookup and we can update the link, otherwise leave the link alone
                                 $fullpath = $webPage->getFullPath($path);
                                 if ($fullpath) {
                                     if (array_key_exists($fullpath, $html_post_lookup)) {
                                         $link_table[$path] = $fullpath;
                                     }
                                 } else {
                                     echo '<span>***could not update link ' . $path . '</span><br>';
                                 }
                             }
                         }
                     }
                 }
             }
         }
         // after building a list of all the links to update and what to update them to, we can do a change in the html file as a whole to catch all references
         foreach ($link_table as $link => $full_link) {
             $post_id = $html_post_lookup[$full_link];
             $post_link = get_permalink($post_id);
             echo 'Updating ' . $link . ' with ' . $post_link . '<br>';
             $search_str = '/(\\b[hH][rR][eE][fF]\\s*=\\s*")([\\b\\.\\/]*' . preg_quote($link, '/') . '\\b)(")/';
             $body = preg_replace($search_str, '$1' . preg_quote($post_link, '/') . '$3', $body);
         }
         $meta->setPostContent($body);
     }
 }
Exemplo n.º 2
0
 /**
  * Returns an array of all links destinations contained in the content of the webpage.
  * @return array
  */
 public function getAllLinks()
 {
     $content = $this->getContent();
     $contentAsXML = XMLHelper::getXMLObjectFromString($content);
     return XMLHelper::getAllHRefsFromHTML($contentAsXML);
 }
Exemplo n.º 3
0
 /**
  * Builds meta data based on a loaded WebPage, and HtmlImportSettings from the plugin.
  *
  * @param admin\HtmlImportSettings $globalSettings
  * @param WebPage                  $webPage
  * @param null                     $post_id
  * @param null                     $parent_page_id
  */
 public function buildConfig(admin\HtmlImportSettings $globalSettings, WebPage $webPage, $post_id = null, $parent_page_id = null)
 {
     if (!is_null($post_id)) {
         $this->loadFromPostID($post_id);
     }
     if (is_null($webPage)) {
         $file_as_xml_obj = null;
     } else {
         $file_as_xml_obj = XMLHelper::getXMLObjectFromString($webPage->getContent());
         if (!is_null($file_as_xml_obj)) {
             $this->setPostContent($file_as_xml_obj->body->asXML());
             $this->setPostTitle($this->getTitleFromTag($file_as_xml_obj));
         }
     }
     $this->setPostName($this->getPostTitle());
     $this->setPostStatus('publish');
     $this->setPostType('page');
     $this->setCommentStatus('closed');
     $this->setPingStatus('closed');
     $categoryIDs = null;
     $overrideSettings = $webPage->getSettings();
     if (!is_null($overrideSettings)) {
         $categoryIDs = $overrideSettings->getCategoryIds();
     }
     // TODO: need to determine if index can override by providing no categories, and what that means
     if (is_null($overrideSettings) || is_null($categoryIDs) || sizeof($categoryIDs) <= 0) {
         $category = $globalSettings->getCategories()->getValuesArray();
         $categoryIDs = null;
         if (!is_null($category) && is_array($category)) {
             foreach ($category as $index => $cat) {
                 $cat_id = get_cat_ID(trim($cat));
                 $categoryIDs[$index] = intval($cat_id);
             }
         }
     }
     $this->setPostCategory($categoryIDs);
     // TODO need a way to track the date and time of the original file
     //if ( ! is_null($source_file)) {
     //	$this->setPostDate( date( 'Y-m-d H:i:s', filemtime( $source_file ) ) );
     //} else {
     $this->setPostDate(null);
     //}
     if (!is_null($parent_page_id)) {
         $this->setPostParent($parent_page_id);
     }
     $order = $webPage->getOrderPosition();
     if (isset($order)) {
         $this->setMenuOrder($order);
     }
     $this->setPostAuthor(wp_get_current_user()->ID);
     // TODO: should be in the settings object
     $this->setPageTemplate($globalSettings->getTemplate()->getValue());
 }
Exemplo n.º 4
0
 /**
  * Performs the stage action of uploading media files and updating the WebPage accordingly.
  *
  * @param WebPage          $webPage
  * @param HTMLImportStages $stagesSettings
  * @param WPMetaConfigs    $meta
  * @param null             $media_lookup
  *
  * @return null
  */
 protected function performStage(WebPage $webPage, HTMLImportStages $stagesSettings, WPMetaConfigs &$meta, &$media_lookup = null)
 {
     $post_id = $meta->getPostId();
     $body = $meta->getPostContent();
     if (is_null($body) || strcmp('', $body) == 0) {
         echo '** the body for post ' . $post_id . ' was empty, no media to import.';
         return;
     }
     $media_table = array();
     $file_as_xml_obj = XMLHelper::getXMLObjectFromString($body);
     // import img srcs
     $all_imgs = $file_as_xml_obj->xpath('//img[@src]');
     if ($all_imgs) {
         foreach ($all_imgs as $img) {
             foreach ($img->attributes() as $attribute => $value) {
                 $path = '' . $value;
                 if (0 == strcasecmp('src', $attribute)) {
                     // TODO: this is duplicated below, refactor it out
                     if (!preg_match('/^[a-zA-Z].*:.*/', $path)) {
                         // if it's local
                         if (!is_null($media_lookup) && !array_key_exists($path, $media_table)) {
                             $fullpath = $webPage->getFullPath($path);
                             if (array_key_exists($fullpath, $media_lookup)) {
                                 $attach_id = $media_lookup[$fullpath];
                                 require_once ABSPATH . 'wp-admin/includes/image.php';
                                 $attach_data = wp_get_attachment_metadata($attach_id);
                                 wp_update_attachment_metadata($attach_id, $attach_data);
                                 $media_table[$path] = $fullpath;
                             } else {
                                 $filename = basename($fullpath);
                                 $upload = wp_upload_bits($filename, null, $webPage->getLinkContents($path));
                                 if ($upload['error']) {
                                     echo '<li>***Unable to upload media file ' . $filename . '</li>';
                                 } else {
                                     echo '<li>' . $filename . ' media file uploaded.</li>';
                                     $wp_filetype = wp_check_filetype(basename($upload['file']), null);
                                     $attachment = array('guid' => $upload['file'], 'post_mime_type' => $wp_filetype['type'], 'post_title' => preg_replace('/\\.[^.]+$/', '', basename($upload['file'])), 'post_content' => '', 'post_status' => 'inherit');
                                     $attach_id = wp_insert_attachment($attachment, $upload['file'], $post_id);
                                     require_once ABSPATH . 'wp-admin/includes/image.php';
                                     $attach_data = wp_generate_attachment_metadata($attach_id, $upload['file']);
                                     wp_update_attachment_metadata($attach_id, $attach_data);
                                     $media_lookup[$fullpath] = $attach_id;
                                     $media_table[$path] = $fullpath;
                                     echo '<li>' . $filename . ' attached to post ' . $post_id . '</li>';
                                 }
                             }
                         }
                     }
                 }
             }
         }
     }
     // linked media
     $all_links = $file_as_xml_obj->xpath('//a[@href]');
     // TODO: encapsulate this in a function
     if ($all_links) {
         foreach ($all_links as $link) {
             foreach ($link->attributes() as $attribute => $value) {
                 $path = '' . $value;
                 if (0 == strcasecmp('href', $attribute)) {
                     if (!preg_match('/^[a-zA-Z].*:.*/', $path)) {
                         if (preg_match('/\\.(png|bmp|jpg|jpeg|gif|pdf|doc|docx|mp3|ogg|wav)$/', strtolower($path))) {
                             // media png,bmp,jpg,jpeg,gif,pdf,doc,docx,mp3,ogg,wav
                             if (!is_null($media_lookup)) {
                                 /*if ( $path[0] != '/' ) {
                                 			$fullpath = realpath( dirname( $meta->getSourcePath() ) . '/' . $path );
                                 		} else {
                                 			$fullpath = $path;
                                 		}*/
                                 $fullpath = $webPage->getFullPath($path);
                                 if (array_key_exists($fullpath, $media_lookup)) {
                                     $attach_id = $media_lookup[$fullpath];
                                     require_once ABSPATH . 'wp-admin/includes/image.php';
                                     $attach_data = wp_get_attachment_metadata($attach_id);
                                     wp_update_attachment_metadata($attach_id, $attach_data);
                                     $media_table[$path] = $fullpath;
                                 } else {
                                     $filename = basename($fullpath);
                                     $upload = wp_upload_bits($filename, null, $webPage->getLinkContents($path));
                                     if ($upload['error']) {
                                         echo '<li>***Unable to upload media file ' . $filename . '</li>';
                                     } else {
                                         echo '<li>' . $filename . ' media file uploaded.</li>';
                                         $wp_filetype = wp_check_filetype(basename($upload['file']), null);
                                         $attachment = array('guid' => $upload['file'], 'post_mime_type' => $wp_filetype['type'], 'post_title' => preg_replace('/\\.[^.]+$/', '', basename($upload['file'])), 'post_content' => '', 'post_status' => 'inherit');
                                         $attach_id = wp_insert_attachment($attachment, $upload['file'], $post_id);
                                         require_once ABSPATH . 'wp-admin/includes/image.php';
                                         $attach_data = wp_generate_attachment_metadata($attach_id, $upload['file']);
                                         wp_update_attachment_metadata($attach_id, $attach_data);
                                         $media_lookup[$fullpath] = $attach_id;
                                         $media_table[$path] = $fullpath;
                                         echo '<li>' . $filename . ' attached to post ' . $post_id . '</li>';
                                     }
                                 }
                             }
                         }
                     }
                 }
             }
         }
     }
     foreach ($media_table as $media_item => $full_media_path) {
         $media_id = $media_lookup[$full_media_path];
         $media_url = wp_get_attachment_url($media_id);
         $search_str = '/(\\b[iI][mM][gG]\\s*[^>]*\\s+[sS][rR][cC]\\s*=\\s*")([\\b\\/\\.]*' . preg_quote($media_item, '/') . '\\b)(")/';
         $body = preg_replace($search_str, '$1' . preg_quote($media_url, '/') . '$3', $body);
         // img src
         $body = preg_replace('/(\\b[hH][rR][eE][fF]\\s*=\\s*")(\\b' . preg_quote($media_item, '/') . '\\b)(")/', '$1' . preg_quote($media_url, '/') . '$3', $body);
         // a href
     }
     $meta->setPostContent($body);
     echo '<li>Post ' . $post_id . ' updated with correct image links.</li>';
 }