/** * Print an XML sitemap conforming to the Sitemaps.org protocol. * Outputs an XML list of up to the latest 1000 posts. * * @module sitemaps * * @link http://sitemaps.org/protocol.php Sitemaps.org protocol. */ function jetpack_print_sitemap() { global $wpdb, $post; $xml = get_transient('jetpack_sitemap'); if ($xml) { header('Content-Type: ' . jetpack_sitemap_content_type(), true); echo $xml; die; } // Compatibility with PHP 5.3 and older if (!defined('ENT_XML1')) { define('ENT_XML1', 16); } /** * Filter the post types that will be included in sitemap. * * @module sitemaps * * @since 3.9.0 * * @param array $post_types Array of post types. */ $post_types = apply_filters('jetpack_sitemap_post_types', array('post', 'page')); $post_types_in = array(); foreach ((array) $post_types as $post_type) { $post_types_in[] = $wpdb->prepare('%s', $post_type); } $post_types_in = join(",", $post_types_in); // use direct query instead because get_posts was acting too heavy for our needs //$posts = get_posts( array( 'numberposts'=>1000, 'post_type'=>$post_types, 'post_status'=>'published' ) ); $posts = $wpdb->get_results("SELECT ID, post_type, post_modified_gmt, comment_count FROM {$wpdb->posts} WHERE post_status='publish' AND post_type IN ({$post_types_in}) ORDER BY post_modified_gmt DESC LIMIT 1000"); if (empty($posts)) { status_header(404); } header('Content-Type: ' . jetpack_sitemap_content_type()); $initstr = jetpack_sitemap_initstr(get_bloginfo('charset')); $tree = simplexml_load_string($initstr); // If we did not get a valid string, force UTF-8 and try again. if (false === $tree) { $initstr = jetpack_sitemap_initstr('UTF-8'); $tree = simplexml_load_string($initstr); } unset($initstr); $latest_mod = ''; foreach ($posts as $post) { setup_postdata($post); /** * Filter condition to allow skipping specific posts in sitemap. * * @module sitemaps * * @since 3.9.0 * * @param bool $skip Current boolean. False by default, so no post is skipped. * @param WP_POST $post Current post object. */ if (apply_filters('jetpack_sitemap_skip_post', false, $post)) { continue; } $post_latest_mod = null; $url = array('loc' => esc_url(get_permalink($post->ID))); // If this post is configured to be the site home, skip since it's added separately later if (untrailingslashit(get_permalink($post->ID)) == untrailingslashit(get_option('home'))) { continue; } // Mobile node specified in http://support.google.com/webmasters/bin/answer.py?hl=en&answer=34648 $url['mobile:mobile'] = ''; // Image node specified in http://support.google.com/webmasters/bin/answer.py?hl=en&answer=178636 // These attachments were produced with batch SQL earlier in the script if (!post_password_required($post->ID)) { $media = array(); $methods = array('from_thumbnail' => false, 'from_slideshow' => false, 'from_gallery' => false, 'from_attachment' => false, 'from_html' => false); foreach ($methods as $method => $value) { $methods[$method] = true; $images_collected = Jetpack_PostImages::get_images($post->ID, $methods); if (is_array($images_collected)) { $media = array_merge($media, $images_collected); } $methods[$method] = false; } $images = array(); foreach ($media as $item) { if (!isset($item['type']) || 'image' != $item['type']) { continue; } $one_image = array(); if (isset($item['src'])) { $one_image['image:loc'] = esc_url($item['src']); $one_image['image:title'] = sanitize_title_with_dashes($name = pathinfo($item['src'], PATHINFO_FILENAME)); } $images[] = $one_image; } if (!empty($images)) { $url['image:image'] = $images; } } if ($post->post_modified_gmt && $post->post_modified_gmt != '0000-00-00 00:00:00') { $post_latest_mod = $post->post_modified_gmt; } if ($post->comment_count > 0) { // last modified based on last comment $latest_comment_datetime = jetpack_get_approved_comments_max_datetime($post->ID); if (!empty($latest_comment_datetime)) { if (is_null($post_latest_mod) || $latest_comment_datetime > $post_latest_mod) { $post_latest_mod = $latest_comment_datetime; } } unset($latest_comment_datetime); } if (!empty($post_latest_mod)) { $latest_mod = max($latest_mod, $post_latest_mod); $url['lastmod'] = jetpack_w3cdate_from_mysql($post_latest_mod); } unset($post_latest_mod); if ($post->post_type == 'page') { $url['changefreq'] = 'weekly'; $url['priority'] = '0.6'; // set page priority above default priority of 0.5 } else { $url['changefreq'] = 'monthly'; } /** * Filter associative array with data to build <url> node and its descendants for current post. * * @module sitemaps * * @since 3.9.0 * * @param array $url Data to build parent and children nodes for current post. * @param int $post_id Current post ID. */ $url_node = apply_filters('jetpack_sitemap_url', $url, $post->ID); jetpack_sitemap_array_to_simplexml(array('url' => $url_node), $tree); unset($url); } wp_reset_postdata(); $blog_home = array('loc' => esc_url(get_option('home')), 'changefreq' => 'daily', 'priority' => '1.0'); if (!empty($latest_mod)) { $blog_home['lastmod'] = jetpack_w3cdate_from_mysql($latest_mod); header('Last-Modified:' . mysql2date('D, d M Y H:i:s', $latest_mod, 0) . ' GMT'); } /** * Filter associative array with data to build <url> node and its descendants for site home. * * @module sitemaps * * @since 3.9.0 * * @param array $blog_home Data to build parent and children nodes for site home. */ $url_node = apply_filters('jetpack_sitemap_url_home', $blog_home); jetpack_sitemap_array_to_simplexml(array('url' => $url_node), $tree); unset($blog_home); /** * Filter data before rendering it as XML. * * @module sitemaps * * @since 3.9.0 * * @param SimpleXMLElement $tree Data tree for sitemap. * @param string $latest_mod Date of last modification. */ $tree = apply_filters('jetpack_print_sitemap', $tree, $latest_mod); $xml = $tree->asXML(); unset($tree); if (!empty($xml)) { set_transient('jetpack_sitemap', $xml, DAY_IN_SECONDS); echo $xml; } die; }
/** * Print an XML sitemap conforming to the Sitemaps.org protocol. * Outputs an XML list of up to the latest 1000 posts. * * @module sitemaps * * @link http://sitemaps.org/protocol.php Sitemaps.org protocol. */ function jetpack_print_sitemap() { global $wpdb; $xml = get_transient('jetpack_sitemap'); if ($xml) { header('Content-Type: ' . jetpack_sitemap_content_type(), true); echo $xml; die; } // Compatibility with PHP 5.3 and older if (!defined('ENT_XML1')) { define('ENT_XML1', 16); } /** * Filter the post types that will be included in sitemap. * * @module sitemaps * * @since 3.9.0 * * @param array $post_types Array of post types. */ $post_types = apply_filters('jetpack_sitemap_post_types', array('post', 'page')); $post_types_in = array(); foreach ((array) $post_types as $post_type) { $post_types_in[] = $wpdb->prepare('%s', $post_type); } $post_types_in = join(",", $post_types_in); // use direct query instead because get_posts was acting too heavy for our needs //$posts = get_posts( array( 'numberposts'=>1000, 'post_type'=>$post_types, 'post_status'=>'published' ) ); $posts = $wpdb->get_results("SELECT ID, post_type, post_modified_gmt, comment_count FROM {$wpdb->posts} WHERE post_status='publish' AND post_type IN ({$post_types_in}) ORDER BY post_modified_gmt DESC LIMIT 1000"); if (empty($posts)) { status_header(404); } header('Content-Type: ' . jetpack_sitemap_content_type()); $initstr = jetpack_sitemap_initstr(get_bloginfo('charset')); $tree = simplexml_load_string($initstr); // If we did not get a valid string, force UTF-8 and try again. if (false === $tree) { $initstr = jetpack_sitemap_initstr('UTF-8'); $tree = simplexml_load_string($initstr); } // Acquire necessary attachment data for all of the posts in a performant manner $attachment_parents = wp_list_pluck($posts, 'ID'); $post_attachments = array(); while ($sub_posts = array_splice($attachment_parents, 0, 100)) { $post_parents = implode(',', array_map('intval', $sub_posts)); // Get the attachment IDs for all posts. We need to see how many // attachments each post parent has and limit it to 5. $query = "SELECT ID, post_parent FROM {$wpdb->posts} WHERE post_parent IN ({$post_parents}) AND post_type='attachment' AND ( post_mime_type='image/jpeg' OR post_mime_type='image/png' ) LIMIT 0,1000;"; $all_attachments = $wpdb->get_results($query); $selected_attachments = array(); $attachment_count = array(); foreach ($all_attachments as $attachment) { if (!isset($attachment_count[$attachment->post_parent])) { $attachment_count[$attachment->post_parent] = 0; } // Skip this particular attachment if we already have 5 for the post if ($attachment_count[$attachment->post_parent] >= 5) { continue; } $selected_attachments[] = $attachment->ID; $attachment_count[$attachment->post_parent]++; } // bail if there weren't any attachments to avoid an extra query if (empty($selected_attachments)) { continue; } // Get more of the attachment object for the attachments we actually care about $attachment_ids = implode(',', array_map('intval', $selected_attachments)); $query = "SELECT p.ID, p.post_parent, p.post_title, p.post_excerpt, p.guid FROM {$wpdb->posts} as p WHERE p.ID IN ({$attachment_ids}) AND p.post_type='attachment' AND ( p.post_mime_type='image/jpeg' OR p.post_mime_type='image/png' ) LIMIT 500;"; $attachments = $wpdb->get_results($query); $post_attachments = array_merge($post_attachments, $attachments); } unset($initstr); $latest_mod = ''; foreach ($posts as $post) { /** * Filter condition to allow skipping specific posts in sitemap. * * @module sitemaps * * @since 3.9.0 * * @param bool $skip Current boolean. False by default, so no post is skipped. * @param WP_POST $post Current post object. */ if (apply_filters('jetpack_sitemap_skip_post', false, $post)) { continue; } $post_latest_mod = null; $url = array('loc' => esc_url(get_permalink($post->ID))); // If this post is configured to be the site home, skip since it's added separately later if (untrailingslashit(get_permalink($post->ID)) == untrailingslashit(get_option('home'))) { continue; } // Mobile node specified in http://support.google.com/webmasters/bin/answer.py?hl=en&answer=34648 $url['mobile:mobile'] = ''; // Image node specified in http://support.google.com/webmasters/bin/answer.py?hl=en&answer=178636 // These attachments were produced with batch SQL earlier in the script if (!post_password_required($post->ID) && ($attachments = wp_filter_object_list($post_attachments, array('post_parent' => $post->ID)))) { $url['image:image'] = array(); foreach ($attachments as $attachment) { $attachment_url = wp_get_attachment_url($attachment->ID); if ($attachment_url) { $url['image:image']['loc'] = esc_url($attachment_url); } // Only include title if not empty. /** This filter is documented in wp-includes/feed.php */ if ($attachment_title = apply_filters('the_title_rss', $attachment->post_title)) { $url['image:image']['title'] = html_entity_decode(esc_html($attachment_title), ENT_XML1); } // Only include caption if not empty. /** This filter is documented in wp-includes/feed.php */ if ($attachment_caption = apply_filters('the_excerpt_rss', $attachment->post_excerpt)) { $url['image:image']['caption'] = html_entity_decode(esc_html($attachment_caption), ENT_XML1); } } } if ($post->post_modified_gmt && $post->post_modified_gmt != '0000-00-00 00:00:00') { $post_latest_mod = $post->post_modified_gmt; } if ($post->comment_count > 0) { // last modified based on last comment $latest_comment_datetime = jetpack_get_approved_comments_max_datetime($post->ID); if (!empty($latest_comment_datetime)) { if (is_null($post_latest_mod) || $latest_comment_datetime > $post_latest_mod) { $post_latest_mod = $latest_comment_datetime; } } unset($latest_comment_datetime); } if (!empty($post_latest_mod)) { $latest_mod = max($latest_mod, $post_latest_mod); $url['lastmod'] = jetpack_w3cdate_from_mysql($post_latest_mod); } unset($post_latest_mod); if ($post->post_type == 'page') { $url['changefreq'] = 'weekly'; $url['priority'] = '0.6'; // set page priority above default priority of 0.5 } else { $url['changefreq'] = 'monthly'; } /** * Filter associative array with data to build <url> node and its descendants for current post. * * @module sitemaps * * @since 3.9.0 * * @param array $url Data to build parent and children nodes for current post. * @param int $post_id Current post ID. */ $url_node = apply_filters('jetpack_sitemap_url', $url, $post->ID); jetpack_sitemap_array_to_simplexml(array('url' => $url_node), $tree); unset($url); } $blog_home = array('loc' => esc_url(get_option('home')), 'changefreq' => 'daily', 'priority' => '1.0'); if (!empty($latest_mod)) { $blog_home['lastmod'] = jetpack_w3cdate_from_mysql($latest_mod); header('Last-Modified:' . mysql2date('D, d M Y H:i:s', $latest_mod, 0) . ' GMT'); } /** * Filter associative array with data to build <url> node and its descendants for site home. * * @module sitemaps * * @since 3.9.0 * * @param array $blog_home Data to build parent and children nodes for site home. */ $url_node = apply_filters('jetpack_sitemap_url_home', $blog_home); jetpack_sitemap_array_to_simplexml(array('url' => $url_node), $tree); unset($blog_home); /** * Filter data before rendering it as XML. * * @module sitemaps * * @since 3.9.0 * * @param SimpleXMLElement $tree Data tree for sitemap. * @param string $latest_mod Date of last modification. */ $tree = apply_filters('jetpack_print_sitemap', $tree, $latest_mod); $xml = $tree->asXML(); unset($tree); if (!empty($xml)) { set_transient('jetpack_sitemap', $xml, DAY_IN_SECONDS); echo $xml; } die; }