/** * Function that performs the action of updating the local links on the webpage. * * @param WebPage $webPage * @param HTMLImportStages $stagesSettings * @param WPMetaConfigs $meta * @param null $html_post_lookup * * @return null */ protected function performStage(WebPage $webPage, HTMLImportStages $stagesSettings, WPMetaConfigs &$meta, &$html_post_lookup = null) { $body = $meta->getPostContent(); if (!is_null($html_post_lookup)) { $bodyXML = XMLHelper::getXMLObjectFromString($body); $filepath = dirname($meta->getSourcePath()); $link_table = array(); // get a list of all the links in the page and iterate through them $all_links = $bodyXML->xpath('//a[@href]'); // TODO: encapsulate this in a function use XMLHelper::getAllHRefsFromHTML as a start if ($all_links) { foreach ($all_links as $link) { // iterate the link's attributes to find the HREF value foreach ($link->attributes() as $attribute => $value) { $path = '' . $value; if (0 == strcasecmp('href', $attribute)) { // TODO: handle foo.html#rar if (!preg_match('/^[a-zA-Z].*:.*/', $path)) { // TODO: need to handle foo.html without handling http://... // only handle files that end in .html or .htm if (preg_match('/\\.([hH][tT][mM][lL]?)$/', $path)) { // if the file the path links to has been imported already, then it will exist in the lookup and we can update the link, otherwise leave the link alone $fullpath = $webPage->getFullPath($path); if ($fullpath) { if (array_key_exists($fullpath, $html_post_lookup)) { $link_table[$path] = $fullpath; } } else { echo '<span>***could not update link ' . $path . '</span><br>'; } } } } } } } // after building a list of all the links to update and what to update them to, we can do a change in the html file as a whole to catch all references foreach ($link_table as $link => $full_link) { $post_id = $html_post_lookup[$full_link]; $post_link = get_permalink($post_id); echo 'Updating ' . $link . ' with ' . $post_link . '<br>'; $search_str = '/(\\b[hH][rR][eE][fF]\\s*=\\s*")([\\b\\.\\/]*' . preg_quote($link, '/') . '\\b)(")/'; $body = preg_replace($search_str, '$1' . preg_quote($post_link, '/') . '$3', $body); } $meta->setPostContent($body); } }
/** * Returns an array of all links destinations contained in the content of the webpage. * @return array */ public function getAllLinks() { $content = $this->getContent(); $contentAsXML = XMLHelper::getXMLObjectFromString($content); return XMLHelper::getAllHRefsFromHTML($contentAsXML); }
/** * Builds meta data based on a loaded WebPage, and HtmlImportSettings from the plugin. * * @param admin\HtmlImportSettings $globalSettings * @param WebPage $webPage * @param null $post_id * @param null $parent_page_id */ public function buildConfig(admin\HtmlImportSettings $globalSettings, WebPage $webPage, $post_id = null, $parent_page_id = null) { if (!is_null($post_id)) { $this->loadFromPostID($post_id); } if (is_null($webPage)) { $file_as_xml_obj = null; } else { $file_as_xml_obj = XMLHelper::getXMLObjectFromString($webPage->getContent()); if (!is_null($file_as_xml_obj)) { $this->setPostContent($file_as_xml_obj->body->asXML()); $this->setPostTitle($this->getTitleFromTag($file_as_xml_obj)); } } $this->setPostName($this->getPostTitle()); $this->setPostStatus('publish'); $this->setPostType('page'); $this->setCommentStatus('closed'); $this->setPingStatus('closed'); $categoryIDs = null; $overrideSettings = $webPage->getSettings(); if (!is_null($overrideSettings)) { $categoryIDs = $overrideSettings->getCategoryIds(); } // TODO: need to determine if index can override by providing no categories, and what that means if (is_null($overrideSettings) || is_null($categoryIDs) || sizeof($categoryIDs) <= 0) { $category = $globalSettings->getCategories()->getValuesArray(); $categoryIDs = null; if (!is_null($category) && is_array($category)) { foreach ($category as $index => $cat) { $cat_id = get_cat_ID(trim($cat)); $categoryIDs[$index] = intval($cat_id); } } } $this->setPostCategory($categoryIDs); // TODO need a way to track the date and time of the original file //if ( ! is_null($source_file)) { // $this->setPostDate( date( 'Y-m-d H:i:s', filemtime( $source_file ) ) ); //} else { $this->setPostDate(null); //} if (!is_null($parent_page_id)) { $this->setPostParent($parent_page_id); } $order = $webPage->getOrderPosition(); if (isset($order)) { $this->setMenuOrder($order); } $this->setPostAuthor(wp_get_current_user()->ID); // TODO: should be in the settings object $this->setPageTemplate($globalSettings->getTemplate()->getValue()); }
/** * Performs the stage action of uploading media files and updating the WebPage accordingly. * * @param WebPage $webPage * @param HTMLImportStages $stagesSettings * @param WPMetaConfigs $meta * @param null $media_lookup * * @return null */ protected function performStage(WebPage $webPage, HTMLImportStages $stagesSettings, WPMetaConfigs &$meta, &$media_lookup = null) { $post_id = $meta->getPostId(); $body = $meta->getPostContent(); if (is_null($body) || strcmp('', $body) == 0) { echo '** the body for post ' . $post_id . ' was empty, no media to import.'; return; } $media_table = array(); $file_as_xml_obj = XMLHelper::getXMLObjectFromString($body); // import img srcs $all_imgs = $file_as_xml_obj->xpath('//img[@src]'); if ($all_imgs) { foreach ($all_imgs as $img) { foreach ($img->attributes() as $attribute => $value) { $path = '' . $value; if (0 == strcasecmp('src', $attribute)) { // TODO: this is duplicated below, refactor it out if (!preg_match('/^[a-zA-Z].*:.*/', $path)) { // if it's local if (!is_null($media_lookup) && !array_key_exists($path, $media_table)) { $fullpath = $webPage->getFullPath($path); if (array_key_exists($fullpath, $media_lookup)) { $attach_id = $media_lookup[$fullpath]; require_once ABSPATH . 'wp-admin/includes/image.php'; $attach_data = wp_get_attachment_metadata($attach_id); wp_update_attachment_metadata($attach_id, $attach_data); $media_table[$path] = $fullpath; } else { $filename = basename($fullpath); $upload = wp_upload_bits($filename, null, $webPage->getLinkContents($path)); if ($upload['error']) { echo '<li>***Unable to upload media file ' . $filename . '</li>'; } else { echo '<li>' . $filename . ' media file uploaded.</li>'; $wp_filetype = wp_check_filetype(basename($upload['file']), null); $attachment = array('guid' => $upload['file'], 'post_mime_type' => $wp_filetype['type'], 'post_title' => preg_replace('/\\.[^.]+$/', '', basename($upload['file'])), 'post_content' => '', 'post_status' => 'inherit'); $attach_id = wp_insert_attachment($attachment, $upload['file'], $post_id); require_once ABSPATH . 'wp-admin/includes/image.php'; $attach_data = wp_generate_attachment_metadata($attach_id, $upload['file']); wp_update_attachment_metadata($attach_id, $attach_data); $media_lookup[$fullpath] = $attach_id; $media_table[$path] = $fullpath; echo '<li>' . $filename . ' attached to post ' . $post_id . '</li>'; } } } } } } } } // linked media $all_links = $file_as_xml_obj->xpath('//a[@href]'); // TODO: encapsulate this in a function if ($all_links) { foreach ($all_links as $link) { foreach ($link->attributes() as $attribute => $value) { $path = '' . $value; if (0 == strcasecmp('href', $attribute)) { if (!preg_match('/^[a-zA-Z].*:.*/', $path)) { if (preg_match('/\\.(png|bmp|jpg|jpeg|gif|pdf|doc|docx|mp3|ogg|wav)$/', strtolower($path))) { // media png,bmp,jpg,jpeg,gif,pdf,doc,docx,mp3,ogg,wav if (!is_null($media_lookup)) { /*if ( $path[0] != '/' ) { $fullpath = realpath( dirname( $meta->getSourcePath() ) . '/' . $path ); } else { $fullpath = $path; }*/ $fullpath = $webPage->getFullPath($path); if (array_key_exists($fullpath, $media_lookup)) { $attach_id = $media_lookup[$fullpath]; require_once ABSPATH . 'wp-admin/includes/image.php'; $attach_data = wp_get_attachment_metadata($attach_id); wp_update_attachment_metadata($attach_id, $attach_data); $media_table[$path] = $fullpath; } else { $filename = basename($fullpath); $upload = wp_upload_bits($filename, null, $webPage->getLinkContents($path)); if ($upload['error']) { echo '<li>***Unable to upload media file ' . $filename . '</li>'; } else { echo '<li>' . $filename . ' media file uploaded.</li>'; $wp_filetype = wp_check_filetype(basename($upload['file']), null); $attachment = array('guid' => $upload['file'], 'post_mime_type' => $wp_filetype['type'], 'post_title' => preg_replace('/\\.[^.]+$/', '', basename($upload['file'])), 'post_content' => '', 'post_status' => 'inherit'); $attach_id = wp_insert_attachment($attachment, $upload['file'], $post_id); require_once ABSPATH . 'wp-admin/includes/image.php'; $attach_data = wp_generate_attachment_metadata($attach_id, $upload['file']); wp_update_attachment_metadata($attach_id, $attach_data); $media_lookup[$fullpath] = $attach_id; $media_table[$path] = $fullpath; echo '<li>' . $filename . ' attached to post ' . $post_id . '</li>'; } } } } } } } } } foreach ($media_table as $media_item => $full_media_path) { $media_id = $media_lookup[$full_media_path]; $media_url = wp_get_attachment_url($media_id); $search_str = '/(\\b[iI][mM][gG]\\s*[^>]*\\s+[sS][rR][cC]\\s*=\\s*")([\\b\\/\\.]*' . preg_quote($media_item, '/') . '\\b)(")/'; $body = preg_replace($search_str, '$1' . preg_quote($media_url, '/') . '$3', $body); // img src $body = preg_replace('/(\\b[hH][rR][eE][fF]\\s*=\\s*")(\\b' . preg_quote($media_item, '/') . '\\b)(")/', '$1' . preg_quote($media_url, '/') . '$3', $body); // a href } $meta->setPostContent($body); echo '<li>Post ' . $post_id . ' updated with correct image links.</li>'; }