function anno_validate_on_save($post_id, $post) { remove_action('save_post_article', 'anno_validate_on_save', 999, 2); $error = false; $schema = trailingslashit(get_template_directory()) . 'functions/schema/kipling-jp3-partial.rng'; $body_content = anno_validation_prep_body($post->post_content_filtered, $post_id); $body_validation = anno_validate($body_content, $schema); $abstract_content = anno_validation_prep_abstract($post->post_excerpt); $abstract_validation = anno_validate($abstract_content, $schema); if (isset($body_validation['status']) && $body_validation['status'] == 'error') { $error = true; } if (isset($abstract_validation['status']) && $abstract_validation['status'] == 'error') { $error = true; } if ($error && $post->post_status == 'publish') { $post->post_status = 'draft'; if (anno_workflow_enabled()) { $status = 'pending'; update_post_meta($post_id, '_post_state', 'approved'); } else { $status = 'draft'; } add_filter('redirect_post_location', 'anno_validation_redirect_post_location_message'); remove_action('post_updated', 'annowf_transistion_state', 10, 3); remove_action('add_post_meta', 'anno_save_appendices_xml_as_html', 10, 3); remove_filter('wp_insert_post_data', 'anno_insert_post_data', null, 2); remove_filter('wp_insert_post_data', 'annowf_insert_post_data', 10, 2); wp_update_post(array('ID' => $post_id, 'post_status' => 'draft')); } }
function parse($file) { global $wp_filesystem; $authors = $posts = $attachments = $post = $author_snapshots = $authors_meta = array(); $file_content = $wp_filesystem->get_contents($file); if (!$file_content) { return new WP_Error('xml_parse_error', __('There was an error when reading this Kipling DTD file', 'anno')); } $schema = trailingslashit(get_template_directory()) . 'functions/schema/kipling-jp3.rng'; $validation_results = anno_validate($file_content, trailingslashit(get_template_directory()) . 'functions/schema/kipling-jp3.rng'); if ($validation_results['status'] == 'error') { $validation_results['content'] = $file_content; return $validation_results; } else { if (!class_exists('phpQueryObject')) { require trailingslashit(TEMPLATEPATH) . 'functions/phpQuery/phpQuery.php'; } phpQuery::newDocumentXML($file_content); // Made up post IDs just for sanities sake, and parent relationship $post_id = 1; $articles = pq('article'); // Lets make sure we have article tags $num_articles = $articles->length(); if (empty($num_articles)) { return new WP_Error('xml_parse_article_error', __('This does not appear to be a Kipling DTD file, no articles could be found', 'anno')); } // Process articles, this contains all catergory, tag, author, term etc... processing. foreach ($articles as $article) { $article = pq('article'); $article_meta = pq('article-meta', $article); $article_back = pq('back', $article); $post['post_type'] = 'article'; $post['post_content_filtered'] = trim(pq('> body', $article)->html()); $post['post_title'] = trim(pq('article-title', $article_meta)->text()); $post['postmeta'][] = array('key' => '_anno_subtitle', 'value' => trim(pq('subtitle', $article_meta)->text())); // Auto generated $post['guid'] = ''; $abstract = pq('abstract', $article_meta); // We don't want the title of the abstract pq('title', $abstract)->remove(); // Just the text, wpautop is run on it later (excerpt) $post['post_excerpt'] = trim($abstract->text()); // Post content gets generated by Annotum theme from the XML on wp_insert_post. We can leave it empty for now. $post['post_content'] = ''; $post['post_id'] = $post_id; // Generated from post title on insert $post['post_name'] = ''; $pub_date = pq('pub-date', $article_meta); $pub_date = $this->parse_date($pub_date); $post['post_date'] = (string) $pub_date; $post['post_date_gmt'] = (string) $pub_date; $post['status'] = 'draft'; // Reflect in post_state meta as well. $post['postmeta'][] = array('key' => '_post_state', 'value' => 'draft'); // Not used in Kipling DTD, but set for data structure integrity required by the importer $post['post_parent'] = 0; $post['menu_order'] = 0; $post['post_password'] = ''; $post['is_sticky'] = 0; $post['ping_status'] = ''; $post['comment_status'] = ''; // Grab the category(ies). Annotum DTD should contain only one, Kipling DTD does not contain this requirement. foreach (pq('subject', $article_meta) as $category) { $category = pq($category); // We really don't care about the global categories, categories aren't defined outside of an article in the XML $cat_name = trim($category->text()); if (!empty($cat_name)) { $post['terms'][] = array('name' => $cat_name, 'slug' => sanitize_title($cat_name), 'domain' => 'article_category'); } } // Grab the tags. foreach (pq('kwd', $article_meta) as $tag) { $tag = pq($tag); // We really don't care about the global tags, tags aren't defined outside of an article in the XML $tag_name = trim($tag->text()); if (!empty($tag_name)) { $post['terms'][] = array('name' => $tag_name, 'slug' => sanitize_title($tag_name), 'domain' => 'article_tag'); } } // First author is the primary author, possible @todo - look for primary-author contrib type $first_author_check = true; $default_author_id = $first_author_id = 1; // Grab the author(s). $authors = array(); foreach (pq('contrib', $article_meta) as $contributor) { $contributor = pq($contributor); $author_arr = $this->parse_author($contributor); $author = $author_arr['author']; $author_meta = $author_arr['author_meta']; // Check for author_id existance, if not, assign one. if (empty($author['author_id'])) { $author['author_id'] = $default_author_id; } // Save in authors $authors[$author['author_id']] = $author; // Save in authors_meta, consistant with author_id to match on import of user $authors_meta[$author['author_id']] = $author_meta; if ($first_author_check) { $post['post_author'] = $author['author_id']; } $author_snapshots[] = $this->author_snapshot($author, $author_meta); if ($first_author_check) { // Used in attachment assignment $first_author_id = $author['author_id']; } // We'll convert this in the import process $post['postmeta'][] = array('key' => '_anno_author_' . $author['author_id'], 'value' => $author['author_id']); $first_author_check = false; $default_author_id++; } // Acknowledgements $ack = trim(pq('ack p', $article_back)->text()); if (!empty($ack)) { $post['postmeta'][] = array('key' => '_anno_acknowledgements', 'value' => $ack); } // Funding $funding = trim(pq('funding-statement', $article_meta)->text()); if (!empty($funding)) { $post['postmeta'][] = array('key' => '_anno_funding', 'value' => $funding); } // Appendices $appendices = pq('app', $article_back); $appendix_array = array(); foreach ($appendices as $appendix) { $appendix = trim(pq($appendix)->html()); if (!empty($appendix)) { $appendix_array[] = $appendix; } } if (!empty($appendix_array)) { // Process to HTML on import $post['postmeta'][] = array('key' => '_anno_appendices', 'value' => serialize($appendix_array)); } // References $references = pq('ref', $article_back); $ref_array = array(); $single_ref = array('doi' => '', 'text' => '', 'pmid' => '', 'figures' => '', 'url' => ''); foreach ($references as $reference) { $reference = pq($reference); // For now, just support mixed-citations as text. $ref_id = str_replace('ref', '', $reference->attr('id')); // Only store numeric values if (is_numeric($ref_id)) { $ref_id = intval($ref_id) - 1; } else { $ref_id = null; } $ref_text = pq('mixed-citation', $reference); $ref_data['text'] = trim($ref_text->text()); if (empty($ref_id)) { $ref_array[] = $ref_data; } else { // Possibility that this key was already set programmatically, replace it and add old ref to end. if (isset($ref_array[$ref_id])) { $old_ref = $ref_array[$ref_id]; $ref_array[$ref_id] = $ref_data; $ref_array[] = $old_ref; } else { $ref_array[$ref_id] = $ref_data; } } } if (!empty($ref_array)) { $post['postmeta'][] = array('key' => '_anno_references', 'value' => serialize($ref_array)); } // Attachments // Modification for post_id $attachment_id_mod = 0; // $pub_date is the date gathered from the post data. $attachment_template = array('upload_date' => (string) $pub_date, 'post_date' => (string) $pub_date, 'post_date_gmt' => (string) $pub_date, 'post_author' => $first_author_id, 'post_type' => 'attachment', 'post_parent' => $post_id, 'post_id' => '', 'post_content' => '', 'post_content_filtered' => '', 'postmeta' => '', 'guid' => '', 'attachment_url' => '', 'status' => 'inherit', 'post_title' => '', 'ping_status' => '', 'menu_order' => '', 'post_password' => '', 'terms' => '', 'comment_status' => '', 'is_sticky' => '', 'post_excerpt' => '', 'post_name' => ''); $inline_images = pq('> body inline-graphic', $article); foreach ($inline_images as $img) { $img = pq($img); $img_url = $img->attr('xlink:href'); // Dont save chart api images (most likely formulas) if (!empty($img_url) && strpos($img_url, 'googleapis.com/chart') === false) { $post_meta = array(); $alt_text = pq('alt-text', $img)->html(); if (!empty($alt_text)) { $post_meta[] = array('key' => '_wp_attachment_image_alt', 'value' => $alt_text); } $attachment_title = !empty($alt_text) ? $alt_text : end(explode('/', $img_url)); $attachments[] = array_merge($attachment_template, array('post_id' => $post_id . '.' . $attachment_id_mod, 'guid' => $img_url, 'attachment_url' => $img_url, 'post_parent' => $post_id, 'title' => trim($attachment_title), 'postmeta' => $post_meta, 'post_title' => $img_url)); $attachment_id_mod++; } } // Find media and save as attachment $media_images = pq('> body media', $article); foreach ($media_images as $media_image) { $media_image = pq($media_image); // Parse Media will return an array with: // attachment_url // guid // post_title // post_content // postmeta $media_array = $this->parse_media($media_image); if (is_array($media_array) && !empty($media_array['attachment_url'])) { // Check if this is a figure image $figure = $media_image->parent('fig'); $figure_html = trim($figure->html()); $caption = ''; if (!empty($figure_html)) { $label = pq('label', $figure)->html(); $caption = pq('caption', $figure)->html(); $post_meta[] = array('key' => '_anno_attachment_image_label', 'value' => $label); } $attachment = array_merge($media_array, array('post_id' => $post_id . '.' . $attachment_id_mod, 'post_parent' => $post_id, 'postmeta' => array_merge($post_meta, $media_array['postmeta']))); $attachments[] = array_merge($attachment_template, $attachment); $attachment_id_mod++; } } $comments = pq('response'); foreach ($comments as $comment) { $comment = pq($comment); $comment_content = pq('body', $comment)->html(); $comment_date = $this->parse_date(pq('pub-date', $comment)); $comment_author_arr = $this->parse_author(pq('contrib', $comment)); $comment_author = $comment_author_arr['author']; $post['comments'][] = array('comment_id' => '', 'comment_author' => (string) $comment_author['author_display_name'], 'comment_author_email' => (string) $comment_author['author_email'], 'comment_author_IP' => '', 'comment_author_url' => (string) $comment_author['author_url'], 'comment_date' => (string) $comment_date, 'comment_date_gmt' => '', 'comment_content' => $comment_content, 'comment_approved' => 1, 'comment_type' => '', 'comment_parent' => '', 'comment_user_id' => 0, 'commentmeta' => array()); } // Save our author snapshots $post['postmeta'][] = array('key' => '_anno_author_snapshot', 'value' => serialize($author_snapshots)); $posts[] = $post; // Concat, both indexed $posts = array_merge($posts, $attachments); } return array('authors' => $authors, 'authors_meta' => $authors_meta, 'posts' => $posts, 'categories' => array(), 'tags' => array(), 'terms' => array(), 'base_url' => '', 'version' => 1.1); } }