Exemplo n.º 1
0
/**
 * Print an XML sitemap conforming to the Sitemaps.org protocol.
 * Outputs an XML list of up to the latest 1000 posts.
 *
 * @module sitemaps
 *
 * @link http://sitemaps.org/protocol.php Sitemaps.org protocol.
 */
function jetpack_print_sitemap()
{
    global $wpdb, $post;
    $xml = get_transient('jetpack_sitemap');
    if ($xml) {
        header('Content-Type: ' . jetpack_sitemap_content_type(), true);
        echo $xml;
        die;
    }
    // Compatibility with PHP 5.3 and older
    if (!defined('ENT_XML1')) {
        define('ENT_XML1', 16);
    }
    /**
     * Filter the post types that will be included in sitemap.
     *
     * @module sitemaps
     *
     * @since 3.9.0
     *
     * @param array $post_types Array of post types.
     */
    $post_types = apply_filters('jetpack_sitemap_post_types', array('post', 'page'));
    $post_types_in = array();
    foreach ((array) $post_types as $post_type) {
        $post_types_in[] = $wpdb->prepare('%s', $post_type);
    }
    $post_types_in = join(",", $post_types_in);
    // use direct query instead because get_posts was acting too heavy for our needs
    //$posts = get_posts( array( 'numberposts'=>1000, 'post_type'=>$post_types, 'post_status'=>'published' ) );
    $posts = $wpdb->get_results("SELECT ID, post_type, post_modified_gmt, comment_count FROM {$wpdb->posts} WHERE post_status='publish' AND post_type IN ({$post_types_in}) ORDER BY post_modified_gmt DESC LIMIT 1000");
    if (empty($posts)) {
        status_header(404);
    }
    header('Content-Type: ' . jetpack_sitemap_content_type());
    $initstr = jetpack_sitemap_initstr(get_bloginfo('charset'));
    $tree = simplexml_load_string($initstr);
    // If we did not get a valid string, force UTF-8 and try again.
    if (false === $tree) {
        $initstr = jetpack_sitemap_initstr('UTF-8');
        $tree = simplexml_load_string($initstr);
    }
    unset($initstr);
    $latest_mod = '';
    foreach ($posts as $post) {
        setup_postdata($post);
        /**
         * Filter condition to allow skipping specific posts in sitemap.
         *
         * @module sitemaps
         *
         * @since 3.9.0
         *
         * @param bool $skip Current boolean. False by default, so no post is skipped.
         * @param WP_POST $post Current post object.
         */
        if (apply_filters('jetpack_sitemap_skip_post', false, $post)) {
            continue;
        }
        $post_latest_mod = null;
        $url = array('loc' => esc_url(get_permalink($post->ID)));
        // If this post is configured to be the site home, skip since it's added separately later
        if (untrailingslashit(get_permalink($post->ID)) == untrailingslashit(get_option('home'))) {
            continue;
        }
        // Mobile node specified in http://support.google.com/webmasters/bin/answer.py?hl=en&answer=34648
        $url['mobile:mobile'] = '';
        // Image node specified in http://support.google.com/webmasters/bin/answer.py?hl=en&answer=178636
        // These attachments were produced with batch SQL earlier in the script
        if (!post_password_required($post->ID)) {
            $media = array();
            $methods = array('from_thumbnail' => false, 'from_slideshow' => false, 'from_gallery' => false, 'from_attachment' => false, 'from_html' => false);
            foreach ($methods as $method => $value) {
                $methods[$method] = true;
                $images_collected = Jetpack_PostImages::get_images($post->ID, $methods);
                if (is_array($images_collected)) {
                    $media = array_merge($media, $images_collected);
                }
                $methods[$method] = false;
            }
            $images = array();
            foreach ($media as $item) {
                if (!isset($item['type']) || 'image' != $item['type']) {
                    continue;
                }
                $one_image = array();
                if (isset($item['src'])) {
                    $one_image['image:loc'] = esc_url($item['src']);
                    $one_image['image:title'] = sanitize_title_with_dashes($name = pathinfo($item['src'], PATHINFO_FILENAME));
                }
                $images[] = $one_image;
            }
            if (!empty($images)) {
                $url['image:image'] = $images;
            }
        }
        if ($post->post_modified_gmt && $post->post_modified_gmt != '0000-00-00 00:00:00') {
            $post_latest_mod = $post->post_modified_gmt;
        }
        if ($post->comment_count > 0) {
            // last modified based on last comment
            $latest_comment_datetime = jetpack_get_approved_comments_max_datetime($post->ID);
            if (!empty($latest_comment_datetime)) {
                if (is_null($post_latest_mod) || $latest_comment_datetime > $post_latest_mod) {
                    $post_latest_mod = $latest_comment_datetime;
                }
            }
            unset($latest_comment_datetime);
        }
        if (!empty($post_latest_mod)) {
            $latest_mod = max($latest_mod, $post_latest_mod);
            $url['lastmod'] = jetpack_w3cdate_from_mysql($post_latest_mod);
        }
        unset($post_latest_mod);
        if ($post->post_type == 'page') {
            $url['changefreq'] = 'weekly';
            $url['priority'] = '0.6';
            // set page priority above default priority of 0.5
        } else {
            $url['changefreq'] = 'monthly';
        }
        /**
         * Filter associative array with data to build <url> node and its descendants for current post.
         *
         * @module sitemaps
         *
         * @since 3.9.0
         *
         * @param array $url Data to build parent and children nodes for current post.
         * @param int $post_id Current post ID.
         */
        $url_node = apply_filters('jetpack_sitemap_url', $url, $post->ID);
        jetpack_sitemap_array_to_simplexml(array('url' => $url_node), $tree);
        unset($url);
    }
    wp_reset_postdata();
    $blog_home = array('loc' => esc_url(get_option('home')), 'changefreq' => 'daily', 'priority' => '1.0');
    if (!empty($latest_mod)) {
        $blog_home['lastmod'] = jetpack_w3cdate_from_mysql($latest_mod);
        header('Last-Modified:' . mysql2date('D, d M Y H:i:s', $latest_mod, 0) . ' GMT');
    }
    /**
     * Filter associative array with data to build <url> node and its descendants for site home.
     *
     * @module sitemaps
     *
     * @since 3.9.0
     *
     * @param array $blog_home Data to build parent and children nodes for site home.
     */
    $url_node = apply_filters('jetpack_sitemap_url_home', $blog_home);
    jetpack_sitemap_array_to_simplexml(array('url' => $url_node), $tree);
    unset($blog_home);
    /**
     * Filter data before rendering it as XML.
     *
     * @module sitemaps
     *
     * @since 3.9.0
     *
     * @param SimpleXMLElement $tree Data tree for sitemap.
     * @param string $latest_mod Date of last modification.
     */
    $tree = apply_filters('jetpack_print_sitemap', $tree, $latest_mod);
    $xml = $tree->asXML();
    unset($tree);
    if (!empty($xml)) {
        set_transient('jetpack_sitemap', $xml, DAY_IN_SECONDS);
        echo $xml;
    }
    die;
}
Exemplo n.º 2
0
/**
 * Print an XML sitemap conforming to the Sitemaps.org protocol.
 * Outputs an XML list of up to the latest 1000 posts.
 *
 * @module sitemaps
 *
 * @link http://sitemaps.org/protocol.php Sitemaps.org protocol.
 */
function jetpack_print_sitemap()
{
    global $wpdb;
    $xml = get_transient('jetpack_sitemap');
    if ($xml) {
        header('Content-Type: ' . jetpack_sitemap_content_type(), true);
        echo $xml;
        die;
    }
    // Compatibility with PHP 5.3 and older
    if (!defined('ENT_XML1')) {
        define('ENT_XML1', 16);
    }
    /**
     * Filter the post types that will be included in sitemap.
     *
     * @module sitemaps
     *
     * @since 3.9.0
     *
     * @param array $post_types Array of post types.
     */
    $post_types = apply_filters('jetpack_sitemap_post_types', array('post', 'page'));
    $post_types_in = array();
    foreach ((array) $post_types as $post_type) {
        $post_types_in[] = $wpdb->prepare('%s', $post_type);
    }
    $post_types_in = join(",", $post_types_in);
    // use direct query instead because get_posts was acting too heavy for our needs
    //$posts = get_posts( array( 'numberposts'=>1000, 'post_type'=>$post_types, 'post_status'=>'published' ) );
    $posts = $wpdb->get_results("SELECT ID, post_type, post_modified_gmt, comment_count FROM {$wpdb->posts} WHERE post_status='publish' AND post_type IN ({$post_types_in}) ORDER BY post_modified_gmt DESC LIMIT 1000");
    if (empty($posts)) {
        status_header(404);
    }
    header('Content-Type: ' . jetpack_sitemap_content_type());
    $initstr = jetpack_sitemap_initstr(get_bloginfo('charset'));
    $tree = simplexml_load_string($initstr);
    // If we did not get a valid string, force UTF-8 and try again.
    if (false === $tree) {
        $initstr = jetpack_sitemap_initstr('UTF-8');
        $tree = simplexml_load_string($initstr);
    }
    // Acquire necessary attachment data for all of the posts in a performant manner
    $attachment_parents = wp_list_pluck($posts, 'ID');
    $post_attachments = array();
    while ($sub_posts = array_splice($attachment_parents, 0, 100)) {
        $post_parents = implode(',', array_map('intval', $sub_posts));
        // Get the attachment IDs for all posts. We need to see how many
        // attachments each post parent has and limit it to 5.
        $query = "SELECT ID, post_parent FROM {$wpdb->posts} WHERE post_parent IN ({$post_parents}) AND post_type='attachment' AND ( post_mime_type='image/jpeg' OR post_mime_type='image/png' ) LIMIT 0,1000;";
        $all_attachments = $wpdb->get_results($query);
        $selected_attachments = array();
        $attachment_count = array();
        foreach ($all_attachments as $attachment) {
            if (!isset($attachment_count[$attachment->post_parent])) {
                $attachment_count[$attachment->post_parent] = 0;
            }
            // Skip this particular attachment if we already have 5 for the post
            if ($attachment_count[$attachment->post_parent] >= 5) {
                continue;
            }
            $selected_attachments[] = $attachment->ID;
            $attachment_count[$attachment->post_parent]++;
        }
        // bail if there weren't any attachments to avoid an extra query
        if (empty($selected_attachments)) {
            continue;
        }
        // Get more of the attachment object for the attachments we actually care about
        $attachment_ids = implode(',', array_map('intval', $selected_attachments));
        $query = "SELECT p.ID, p.post_parent, p.post_title, p.post_excerpt, p.guid FROM {$wpdb->posts} as p WHERE p.ID IN ({$attachment_ids}) AND p.post_type='attachment' AND ( p.post_mime_type='image/jpeg' OR p.post_mime_type='image/png' ) LIMIT 500;";
        $attachments = $wpdb->get_results($query);
        $post_attachments = array_merge($post_attachments, $attachments);
    }
    unset($initstr);
    $latest_mod = '';
    foreach ($posts as $post) {
        /**
         * Filter condition to allow skipping specific posts in sitemap.
         *
         * @module sitemaps
         *
         * @since 3.9.0
         *
         * @param bool $skip Current boolean. False by default, so no post is skipped.
         * @param WP_POST $post Current post object.
         */
        if (apply_filters('jetpack_sitemap_skip_post', false, $post)) {
            continue;
        }
        $post_latest_mod = null;
        $url = array('loc' => esc_url(get_permalink($post->ID)));
        // If this post is configured to be the site home, skip since it's added separately later
        if (untrailingslashit(get_permalink($post->ID)) == untrailingslashit(get_option('home'))) {
            continue;
        }
        // Mobile node specified in http://support.google.com/webmasters/bin/answer.py?hl=en&answer=34648
        $url['mobile:mobile'] = '';
        // Image node specified in http://support.google.com/webmasters/bin/answer.py?hl=en&answer=178636
        // These attachments were produced with batch SQL earlier in the script
        if (!post_password_required($post->ID) && ($attachments = wp_filter_object_list($post_attachments, array('post_parent' => $post->ID)))) {
            $url['image:image'] = array();
            foreach ($attachments as $attachment) {
                $attachment_url = wp_get_attachment_url($attachment->ID);
                if ($attachment_url) {
                    $url['image:image']['loc'] = esc_url($attachment_url);
                }
                // Only include title if not empty.
                /** This filter is documented in wp-includes/feed.php */
                if ($attachment_title = apply_filters('the_title_rss', $attachment->post_title)) {
                    $url['image:image']['title'] = html_entity_decode(esc_html($attachment_title), ENT_XML1);
                }
                // Only include caption if not empty.
                /** This filter is documented in wp-includes/feed.php */
                if ($attachment_caption = apply_filters('the_excerpt_rss', $attachment->post_excerpt)) {
                    $url['image:image']['caption'] = html_entity_decode(esc_html($attachment_caption), ENT_XML1);
                }
            }
        }
        if ($post->post_modified_gmt && $post->post_modified_gmt != '0000-00-00 00:00:00') {
            $post_latest_mod = $post->post_modified_gmt;
        }
        if ($post->comment_count > 0) {
            // last modified based on last comment
            $latest_comment_datetime = jetpack_get_approved_comments_max_datetime($post->ID);
            if (!empty($latest_comment_datetime)) {
                if (is_null($post_latest_mod) || $latest_comment_datetime > $post_latest_mod) {
                    $post_latest_mod = $latest_comment_datetime;
                }
            }
            unset($latest_comment_datetime);
        }
        if (!empty($post_latest_mod)) {
            $latest_mod = max($latest_mod, $post_latest_mod);
            $url['lastmod'] = jetpack_w3cdate_from_mysql($post_latest_mod);
        }
        unset($post_latest_mod);
        if ($post->post_type == 'page') {
            $url['changefreq'] = 'weekly';
            $url['priority'] = '0.6';
            // set page priority above default priority of 0.5
        } else {
            $url['changefreq'] = 'monthly';
        }
        /**
         * Filter associative array with data to build <url> node and its descendants for current post.
         *
         * @module sitemaps
         *
         * @since 3.9.0
         *
         * @param array $url Data to build parent and children nodes for current post.
         * @param int $post_id Current post ID.
         */
        $url_node = apply_filters('jetpack_sitemap_url', $url, $post->ID);
        jetpack_sitemap_array_to_simplexml(array('url' => $url_node), $tree);
        unset($url);
    }
    $blog_home = array('loc' => esc_url(get_option('home')), 'changefreq' => 'daily', 'priority' => '1.0');
    if (!empty($latest_mod)) {
        $blog_home['lastmod'] = jetpack_w3cdate_from_mysql($latest_mod);
        header('Last-Modified:' . mysql2date('D, d M Y H:i:s', $latest_mod, 0) . ' GMT');
    }
    /**
     * Filter associative array with data to build <url> node and its descendants for site home.
     *
     * @module sitemaps
     *
     * @since 3.9.0
     *
     * @param array $blog_home Data to build parent and children nodes for site home.
     */
    $url_node = apply_filters('jetpack_sitemap_url_home', $blog_home);
    jetpack_sitemap_array_to_simplexml(array('url' => $url_node), $tree);
    unset($blog_home);
    /**
     * Filter data before rendering it as XML.
     *
     * @module sitemaps
     *
     * @since 3.9.0
     *
     * @param SimpleXMLElement $tree Data tree for sitemap.
     * @param string $latest_mod Date of last modification.
     */
    $tree = apply_filters('jetpack_print_sitemap', $tree, $latest_mod);
    $xml = $tree->asXML();
    unset($tree);
    if (!empty($xml)) {
        set_transient('jetpack_sitemap', $xml, DAY_IN_SECONDS);
        echo $xml;
    }
    die;
}