/** * @param string|SimpleXMLElement $file * @param $num_posts split posts into segments * @return array|WP_Error */ public function parse($file, $num_posts = 0, $page = 0) { $authors = $posts = $categories = $tags = $terms = $options = array(); //$skins_instances = array(); //parse paths $paths = array(); if (is_string($file)) { if (!$this->parser->data('import_path')) { $paths['import_path'] = rtrim(HW_URL::get_path_url($file, true), '\\/') . '/'; } if (!$this->parser->data('import_dir')) { $paths['import_dir'] = dirname($file); } $this->parser->update_variables($paths); } $xml = self::read_simplexml_object($file); $namespaces = $xml->namespaces; $xml = $xml->xml; $wxr_version = $xml->xpath('posts/wp:wxr_version'); #/rss/posts/wp:wxr_version /*if ( ! $wxr_version ) return new WP_Error( 'WXR_parse_error', __( 'This does not appear to be a WXR file, missing/invalid WXR version number', 'wordpress-importer' ) ); $wxr_version = (string) trim( $wxr_version[0] ); // confirm that we are dealing with the correct file format if ( ! preg_match( '/^\d+\.\d+$/', $wxr_version ) ) return new WP_Error( 'WXR_parse_error', __( 'This does not appear to be a WXR file, missing/invalid WXR version number', 'wordpress-importer' ) ); */ $base_url = $xml->xpath('posts/wp:base_site_url'); #/rss/posts/wp:base_site_url $base_url = isset($base_url[0]) ? (string) trim($base_url[0]) : ''; //$x=simplexml_load_string($file->saveXML(), 'SimpleXMLElement'); //$p=$x->posts->item[0];__print((string)$p->children('http://wordpress.org/export/1.2/')->post_type); //store namespace $this->namespaces = $namespaces; //gather skins instances $this->gather_skins_data($xml, $namespaces); //grab widgets data $widgets = $this->grab_widgets($xml, $namespaces); $sidebars = $this->grab_sidebars($xml, $namespaces); // grab authors if ($xml->xpath('posts/wp:author')) { foreach ($xml->xpath('posts/wp:author') as $author_arr) { #/rss/posts/wp:author $a = $author_arr->children($namespaces['wp']); $login = (string) $a->author_login; $authors[$login] = array('author_id' => (int) $a->author_id, 'author_login' => $login, 'author_email' => (string) $a->author_email, 'author_display_name' => (string) $a->author_display_name, 'author_first_name' => (string) $a->author_first_name, 'author_last_name' => (string) $a->author_last_name); } } // grab cats, tags and terms if ($xml->xpath('posts/wp:category')) { foreach ($xml->xpath('posts/wp:category') as $term_arr) { #/rss/posts/wp:category $atts = $term_arr->attributes(); $t = $term_arr->children($namespaces['wp']); if (isset($atts['_id'])) { $key = (string) $atts['_id']; } else { $key = (string) $t->category_nicename; } $args = array('_id' => $key, 'term_id' => (int) $t->term_id, 'category_nicename' => (string) $t->category_nicename, 'category_parent' => (string) $t->category_parent, 'cat_name' => (string) $t->cat_name, 'category_description' => $this->parser->pre_shortcode_tags((string) $t->category_description)); if (isset($t->category_parent) && count($t->category_parent[0]->xpath('hw:params'))) { $parent = $this->recursive_option_data($t->category_parent[0]->xpath('hw:params')); //purpose of import result by hoangweb /*if(!empty($parent->attributes['export']) && is_array($parent->option[0])) { $export_path = (string) $parent->attributes['export']; $export_path = hwArray::drwOfPath(explode('/', $export_path)); eval('$parent = $parent->option[0]'.$export_path.';'); } elseif(!empty($parent->option[0])) { $parent = $parent->option[0]; }*/ if (isset($parent->option[0])) { $args['category_parent'] = $parent->option[0]; } } $categories[] = $args; } } if ($xml->xpath('posts/wp:tag')) { foreach ($xml->xpath('posts/wp:tag') as $term_arr) { #/rss/posts/wp:tag $atts = $term_arr->attributes(); $t = $term_arr->children($namespaces['wp']); if (isset($atts['_id'])) { $key = (string) $atts['_id']; } else { $key = (string) $t->tag_slug; } $tags[] = array('_id' => $key, 'term_id' => (int) $t->term_id, 'tag_slug' => (string) $t->tag_slug, 'tag_name' => (string) $t->tag_name, 'tag_description' => $this->parser->pre_shortcode_tags((string) $t->tag_description)); } } if ($xml->xpath('posts/wp:term')) { foreach ($xml->xpath('posts/wp:term') as $term_arr) { #/rss/posts/wp:term $atts = $term_arr->attributes(); $t = $term_arr->children($namespaces['wp']); if (isset($atts['_id'])) { $key = (string) $atts['_id']; } else { $key = (string) $t->term_slug; } $args = array('_id' => $key, 'term_id' => (int) $t->term_id, 'term_taxonomy' => (string) $t->term_taxonomy, 'slug' => (string) $t->term_slug, 'term_parent' => (string) $t->term_parent, 'term_name' => (string) $t->term_name, 'term_description' => $this->parser->pre_shortcode_tags((string) $t->term_description)); if (isset($t->term_parent) && count($t->term_parent[0]->xpath('hw:params'))) { $parent = $this->recursive_option_data($t->term_parent[0]->xpath('hw:params')); //purpose of import result by hoangweb if (!empty($parent->option[0])) { $args['term_parent'] = $parent->option[0]; } } if ($args['term_taxonomy'] == 'nav_menu') { $args['menu_location'] = (string) $t->menu_location; } //for nav menu $terms[] = $args; } } $start = $end = 0; if ($num_posts != 0 && isset($xml->posts->item)) { $segments = hwArray::split_loop_segments($num_posts, $xml->posts->item->count()); if (is_numeric($page) && isset($segments[$page])) { $range = explode('-', $segments[$page]); $start = $range[0]; $end = $range[1]; } } elseif (isset($xml->posts->item)) { $start = 0; $end = $xml->posts->item->count(); } $count = 0; //$end = isset($xml->posts->item)? $xml->posts->item->count() : 0; // grab posts if (isset($xml->posts->item)) { foreach ($xml->posts->item as $item) { if ($start > ++$count) { continue; } if ($count && $end + 1 < $count) { break; } //by hoang $atts = (array) $item->attributes(); $atts = isset($atts['@attributes']) ? $atts['@attributes'] : array(); //cast to array $post = array('post_title' => (string) $item->title, 'guid' => (string) $item->guid, 'hw_attributes' => !empty($atts) ? $atts : array()); $dc = $item->children($namespaces['dc']); $post['post_author'] = (string) $dc->creator; $content = $item->children($namespaces['content']); $excerpt = $item->children($namespaces['excerpt']); if (isset($content->encoded) && count($content->encoded[0]->xpath('hw:params'))) { $content_encoded = $this->recursive_option_data($content->encoded[0]->xpath('hw:params')); //purpose of import result by hoangweb if (!empty($content_encoded->option[0])) { $content_encoded = $content_encoded->option[0]; } } else { $content_encoded = (string) $content->encoded; } //i decided to use import result in content encoded for post if (!is_object($content_encoded)) { $post['post_content'] = $this->parser->pre_shortcode_tags($content_encoded); } else { $post['post_content'] = $content_encoded; } $post['post_excerpt'] = (string) $excerpt->encoded; $wp = $item->children($namespaces['wp']); $hw = $item->children($namespaces['hw']); //import id or save post id $post['post_id'] = (int) $wp->post_id; if (!empty($atts['_id'])) { $post['_id'] = $atts['_id']; } elseif (!empty($hw->_id)) { $post['_id'] = (string) $hw->_id; } elseif (isset($post['title'])) { $post['_id'] = sanitize_title($post['title']); } $post['post_date'] = (string) $wp->post_date; $post['post_date_gmt'] = (string) $wp->post_date_gmt; $post['comment_status'] = (string) $wp->comment_status; $post['ping_status'] = (string) $wp->ping_status; $post['post_name'] = (string) $wp->post_name; $post['status'] = !empty($wp->status) ? (string) $wp->status : 'publish'; $post['post_parent'] = (int) $wp->post_parent; $post['menu_order'] = (int) $wp->menu_order; $post['post_type'] = (string) $wp->post_type; $post['post_password'] = (string) $wp->post_password; $post['is_sticky'] = (int) $wp->is_sticky; if (isset($wp->attachment_url)) { //for attachment of post type $post['attachment_url'] = (string) $wp->attachment_url; } foreach ($hw->attachment as $a) { //fetch attachments in other post type //$_post = hwArray::cloneArray($post); $url = (string) $a->url; if ($this->parser->data('demo')) { $url = $this->parser->data('demo')->get_file_url($url); } else { if (!HW_URL::valid_url($url)) { $url = $this->parser->data('import_path') . '/' . $url; } } //$post['_id'] = (string) $a->_id; if ($post['post_type'] !== 'attachment') { $_post = hwArray::cloneArray($post); $_post['post_content'] = HW_String::limit($_post['post_content'], 50); $_post['post_excerpt'] = HW_String::limit($_post['post_excerpt'], 50); $_post['attachment_url'] = $url; $_post['post_type'] = 'attachment'; $_post['_id'] = (string) $a->_id; $posts[] = $_post; if ((string) $a->thumbnail) { $post['hw_thumbnail_id'] = (string) $a->_id; } } //continue; } foreach ($item->category as $c) { $att = $c->attributes(); if (isset($att['nicename'])) { $post['terms'][] = array('name' => (string) $c, 'slug' => (string) $att['nicename'], 'domain' => (string) $att['domain']); } } //post meta foreach ($wp->postmeta as $meta) { //by hoangweb $hw_params = $meta->meta_value->xpath('hw:params'); if (!empty($hw_params)) { //$meta_value = $this->grab_options($meta->meta_value->xpath('hw:params'), $namespaces); $meta_value = $this->recursive_option_data($meta->meta_value->xpath('hw:params')); if (!empty($meta_value->attributes['export']) && is_array($meta_value->option[0])) { $export_path = (string) $meta_value->attributes['export']; $export_path = hwArray::drwOfPath(explode('/', $export_path)); eval('$meta_value = $meta_value->option[0]' . $export_path . ';'); } elseif (!empty($meta_value->option[0])) { $meta_value = $meta_value->option[0]; } else { $meta_value = array(); } //HW_Logger::log_file($meta_value); if (!is_string($meta_value) && !is_object($meta_value)) { $meta_value = serialize($meta_value); } } else { $meta_value = (string) $meta->meta_value; } $post['postmeta'][] = array('key' => (string) $meta->meta_key, 'value' => $meta_value); } //for comment foreach ($wp->comment as $comment) { $meta = array(); if (isset($comment->commentmeta)) { foreach ($comment->commentmeta as $m) { $meta[] = array('key' => (string) $m->meta_key, 'value' => (string) $m->meta_value); } } $post['comments'][] = array('comment_id' => (int) $comment->comment_id, 'comment_author' => (string) $comment->comment_author, 'comment_author_email' => (string) $comment->comment_author_email, 'comment_author_IP' => (string) $comment->comment_author_IP, 'comment_author_url' => (string) $comment->comment_author_url, 'comment_date' => (string) $comment->comment_date, 'comment_date_gmt' => (string) $comment->comment_date_gmt, 'comment_content' => (string) $comment->comment_content, 'comment_approved' => (string) $comment->comment_approved, 'comment_type' => (string) $comment->comment_type, 'comment_parent' => (string) $comment->comment_parent, 'comment_user_id' => (int) $comment->comment_user_id, 'commentmeta' => $meta); } $posts[] = $post; } } //grab options if (isset($xml->options->option)) { $options = $this->grab_options($xml->options->option, $namespaces); } return array('authors' => $authors, 'posts' => $posts, 'categories' => $categories, 'tags' => $tags, 'terms' => $terms, 'options' => $options, 'widgets' => $widgets, 'sidebars' => $sidebars, 'skins_data' => $this->skins_data, 'base_url' => $base_url, 'version' => $wxr_version); }