/**
  * Count the number of anchors and group them by type.
  *
  * @param object $xpath An XPATH object of the current document.
  *
  * @return array
  */
 function get_anchor_count(&$xpath)
 {
     $query = '//a|//A';
     $dom_objects = $xpath->query($query);
     $count = array('total' => 0, 'internal' => array('nofollow' => 0, 'dofollow' => 0), 'external' => array('nofollow' => 0, 'dofollow' => 0), 'other' => array('nofollow' => 0, 'dofollow' => 0));
     if (is_object($dom_objects) && is_a($dom_objects, 'DOMNodeList') && $dom_objects->length > 0) {
         foreach ($dom_objects as $dom_object) {
             $count['total']++;
             if ($dom_object->attributes->getNamedItem('href')) {
                 $href = $dom_object->attributes->getNamedItem('href')->textContent;
                 $wpurl = get_bloginfo('url');
                 if (wpseo_is_url_relative($href) === true || substr($href, 0, strlen($wpurl)) === $wpurl) {
                     $type = 'internal';
                 } elseif (substr($href, 0, 4) == 'http') {
                     $type = 'external';
                 } else {
                     $type = 'other';
                 }
                 if ($dom_object->attributes->getNamedItem('rel')) {
                     $link_rel = $dom_object->attributes->getNamedItem('rel')->textContent;
                     if (stripos($link_rel, 'nofollow') !== false) {
                         $count[$type]['nofollow']++;
                     } else {
                         $count[$type]['dofollow']++;
                     }
                 } else {
                     $count[$type]['dofollow']++;
                 }
             }
         }
     }
     return $count;
 }
 /**
  * Display an OpenGraph image tag
  *
  * @param string $img Source URL to the image
  *
  * @return bool
  */
 function image_output($img)
 {
     /**
      * Filter: 'wpseo_opengraph_image' - Allow changing the OpenGraph image
      *
      * @api string $img Image URL string
      */
     $img = trim(apply_filters('wpseo_opengraph_image', $img));
     if (empty($img)) {
         return false;
     }
     if (wpseo_is_url_relative($img) === true) {
         if ($img[0] != '/') {
             return false;
         }
         // If it's a relative URL, it's relative to the domain, not necessarily to the WordPress install, we
         // want to preserve domain name and URL scheme (http / https) though.
         $parsed_url = parse_url(home_url());
         $img = $parsed_url['scheme'] . '://' . $parsed_url['host'] . $img;
     }
     if (in_array($img, $this->shown_images)) {
         return false;
     }
     array_push($this->shown_images, $img);
     $this->og_tag('og:image', esc_url($img));
     return true;
 }
Example #3
0
 /**
  * This function normally outputs the canonical but is also used in other places to retrieve
  * the canonical URL for the current page.
  *
  * @param bool $echo        Whether or not to output the canonical element.
  * @param bool $un_paged    Whether or not to return the canonical with or without pagination added to the URL.
  * @param bool $no_override Whether or not to return a manually overridden canonical
  *
  * @return string $canonical
  */
 public function canonical($echo = true, $un_paged = false, $no_override = false)
 {
     $canonical = false;
     $skip_pagination = false;
     // Set decent canonicals for homepage, singulars and taxonomy pages
     if (is_singular()) {
         $meta_canon = WPSEO_Meta::get_value('canonical');
         if ($no_override === false && $meta_canon !== '') {
             $canonical = $meta_canon;
             $skip_pagination = true;
         } else {
             $obj = get_queried_object();
             $canonical = get_permalink($obj->ID);
             // Fix paginated pages canonical, but only if the page is truly paginated.
             if (get_query_var('page') > 1) {
                 global $wp_rewrite;
                 $numpages = substr_count($obj->post_content, '<!--nextpage-->') + 1;
                 if ($numpages && get_query_var('page') <= $numpages) {
                     if (!$wp_rewrite->using_permalinks()) {
                         $canonical = add_query_arg('page', get_query_var('page'), $canonical);
                     } else {
                         $canonical = user_trailingslashit(trailingslashit($canonical) . get_query_var('page'));
                     }
                 }
             }
         }
         unset($meta_canon);
     } else {
         if (is_search()) {
             $canonical = get_search_link();
         } elseif (is_front_page()) {
             $canonical = home_url();
         } elseif ($this->is_posts_page()) {
             $canonical = get_permalink(get_option('page_for_posts'));
         } elseif (is_tax() || is_tag() || is_category()) {
             $term = get_queried_object();
             if ($no_override === false) {
                 $canonical = WPSEO_Taxonomy_Meta::get_term_meta($term, $term->taxonomy, 'canonical');
                 if (is_string($canonical) && $canonical !== '') {
                     $skip_pagination = true;
                 }
             }
             if (!is_string($canonical) || $canonical === '') {
                 $canonical = get_term_link($term, $term->taxonomy);
             }
         } elseif (is_post_type_archive()) {
             $post_type = get_query_var('post_type');
             if (is_array($post_type)) {
                 $post_type = reset($post_type);
             }
             $canonical = get_post_type_archive_link($post_type);
         } elseif (is_author()) {
             $canonical = get_author_posts_url(get_query_var('author'), get_query_var('author_name'));
         } elseif (is_archive()) {
             if (is_date()) {
                 if (is_day()) {
                     $canonical = get_day_link(get_query_var('year'), get_query_var('monthnum'), get_query_var('day'));
                 } elseif (is_month()) {
                     $canonical = get_month_link(get_query_var('year'), get_query_var('monthnum'));
                 } elseif (is_year()) {
                     $canonical = get_year_link(get_query_var('year'));
                 }
             }
         }
     }
     if ($canonical && $un_paged) {
         return $canonical;
     }
     if ($canonical && !$skip_pagination && get_query_var('paged') > 1) {
         global $wp_rewrite;
         if (!$wp_rewrite->using_permalinks()) {
             $canonical = add_query_arg('paged', get_query_var('paged'), $canonical);
         } else {
             if (is_front_page()) {
                 $base = $wp_rewrite->using_index_permalinks() ? 'index.php/' : '/';
                 $canonical = home_url($base);
             }
             $canonical = user_trailingslashit(trailingslashit($canonical) . trailingslashit($wp_rewrite->pagination_base) . get_query_var('paged'));
         }
     }
     if ($canonical && 'default' !== $this->options['force_transport']) {
         $canonical = preg_replace('`^http[s]?`', $this->options['force_transport'], $canonical);
     }
     /**
      * Filter: 'wpseo_canonical' - Allow filtering of the canonical URL put out by WP SEO
      *
      * @api string $canonical The canonical URL
      */
     $canonical = apply_filters('wpseo_canonical', $canonical);
     if (is_string($canonical) && $canonical !== '') {
         // Force canonical links to be absolute, relative is NOT an option.
         if (wpseo_is_url_relative($canonical) === true) {
             $canonical = home_url($canonical);
         }
         if ($echo !== false) {
             echo '<link rel="canonical" href="' . esc_url($canonical, null, 'other') . '" />' . "\n";
         } else {
             return $canonical;
         }
     } else {
         return false;
     }
 }
 /**
  * Fallback function for WP SEO functionality, is_url_relative
  *
  * @param $url
  *
  * @return mixed
  */
 public static function yoast_wpseo_video_is_url_relative($url)
 {
     if (method_exists('WPSEO_Utils', 'is_url_relative')) {
         return WPSEO_Utils::is_url_relative($url);
     }
     return wpseo_is_url_relative($url);
 }
Example #5
0
 /**
  * Build a sub-sitemap for a specific post type -- example.com/post_type-sitemap.xml
  *
  * @param string $post_type Registered post type's slug
  */
 function build_post_type_map($post_type)
 {
     global $wpdb;
     if (isset($this->options['post_types-' . $post_type . '-not_in_sitemap']) && $this->options['post_types-' . $post_type . '-not_in_sitemap'] === true || in_array($post_type, array('revision', 'nav_menu_item')) || apply_filters('wpseo_sitemap_exclude_post_type', false, $post_type)) {
         $this->bad_sitemap = true;
         return;
     }
     $output = '';
     $steps = 100 > $this->max_entries ? $this->max_entries : 100;
     $n = (int) $this->n;
     $offset = $n > 1 ? ($n - 1) * $this->max_entries : 0;
     $total = $offset + $this->max_entries;
     $join_filter = '';
     $join_filter = apply_filters('wpseo_typecount_join', $join_filter, $post_type);
     $where_filter = '';
     $where_filter = apply_filters('wpseo_typecount_where', $where_filter, $post_type);
     $query = $wpdb->prepare("SELECT COUNT(ID) FROM {$wpdb->posts} {$join_filter} WHERE post_status IN ('publish','inherit') AND post_password = '' AND post_author != 0 AND post_date != '0000-00-00 00:00:00' AND post_type = %s " . $where_filter, $post_type);
     $typecount = $wpdb->get_var($query);
     if ($total > $typecount) {
         $total = $typecount;
     }
     if ($n === 1) {
         $front_id = get_option('page_on_front');
         if (!$front_id && ($post_type == 'post' || $post_type == 'page')) {
             $output .= $this->sitemap_url(array('loc' => $this->home_url, 'pri' => 1, 'chf' => $this->filter_frequency('homepage', 'daily', $this->home_url)));
         } elseif ($front_id && $post_type == 'post') {
             $page_for_posts = get_option('page_for_posts');
             if ($page_for_posts) {
                 $page_for_posts_url = get_permalink($page_for_posts);
                 $output .= $this->sitemap_url(array('loc' => $page_for_posts_url, 'pri' => 1, 'chf' => $change_freq = $this->filter_frequency('blogpage', 'daily', $page_for_posts_url)));
             }
         }
         $archive = get_post_type_archive_link($post_type);
         if ($archive) {
             /**
              * Filter: 'wpseo_xml_post_type_archive_priority' - Allow changing the priority of the URL WordPress SEO uses in the XML sitemap.
              *
              * @api float $priority The priority for this URL, ranging from 0 to 1
              *
              * @param string $post_type The post type this archive is for
              */
             $output .= $this->sitemap_url(array('loc' => $archive, 'pri' => apply_filters('wpseo_xml_post_type_archive_priority', 0.8, $post_type), 'chf' => $this->filter_frequency($post_type . '_archive', 'weekly', $archive), 'mod' => $this->get_last_modified($post_type)));
         }
     }
     if ($typecount == 0 && empty($archive)) {
         $this->bad_sitemap = true;
         return;
     }
     $stackedurls = array();
     // Make sure you're wpdb->preparing everything you throw into this!!
     $join_filter = apply_filters('wpseo_posts_join', false, $post_type);
     $where_filter = apply_filters('wpseo_posts_where', false, $post_type);
     $status = $post_type == 'attachment' ? 'inherit' : 'publish';
     $parsed_home = parse_url($this->home_url);
     $host = '';
     $scheme = 'http';
     if (isset($parsed_home['host']) && !empty($parsed_home['host'])) {
         $host = str_replace('www.', '', $parsed_home['host']);
     }
     if (isset($parsed_home['scheme']) && !empty($parsed_home['scheme'])) {
         $scheme = $parsed_home['scheme'];
     }
     /**
      * We grab post_date, post_name, post_author and post_status too so we can throw these objects
      * into get_permalink, which saves a get_post call for each permalink.
      */
     while ($total > $offset) {
         // Optimized query per this thread: http://wordpress.org/support/topic/plugin-wordpress-seo-by-yoast-performance-suggestion
         // Also see http://explainextended.com/2009/10/23/mysql-order-by-limit-performance-late-row-lookups/
         $query = $wpdb->prepare("SELECT l.ID, post_title, post_content, post_name, post_author, post_parent, post_modified_gmt, post_date, post_date_gmt FROM ( SELECT ID FROM {$wpdb->posts} {$join_filter} WHERE post_status = '%s' AND post_password = '' AND post_type = '%s' AND post_author != 0 AND post_date != '0000-00-00 00:00:00' {$where_filter} ORDER BY post_modified ASC LIMIT %d OFFSET %d ) o JOIN {$wpdb->posts} l ON l.ID = o.ID ORDER BY l.ID", $status, $post_type, $steps, $offset);
         $posts = $wpdb->get_results($query);
         $post_ids = array();
         foreach ($posts as $p) {
             $post_ids[] = $p->ID;
         }
         if (count($post_ids) > 0) {
             update_meta_cache('post', $post_ids);
             $imploded_post_ids = implode($post_ids, ',');
             $attachments = $this->get_attachments($imploded_post_ids);
             $thumbnails = $this->get_thumbnails($imploded_post_ids);
             $this->do_attachment_ids_caching($attachments, $thumbnails);
         }
         $offset = $offset + $steps;
         if (is_array($posts) && $posts !== array()) {
             foreach ($posts as $p) {
                 $p->post_type = $post_type;
                 $p->post_status = 'publish';
                 $p->filter = 'sample';
                 if (WPSEO_Meta::get_value('meta-robots-noindex', $p->ID) === '1' && WPSEO_Meta::get_value('sitemap-include', $p->ID) !== 'always') {
                     continue;
                 }
                 if (WPSEO_Meta::get_value('sitemap-include', $p->ID) === 'never') {
                     continue;
                 }
                 if (WPSEO_Meta::get_value('redirect', $p->ID) !== '') {
                     continue;
                 }
                 $url = array();
                 if (isset($p->post_modified_gmt) && $p->post_modified_gmt != '0000-00-00 00:00:00' && $p->post_modified_gmt > $p->post_date_gmt) {
                     $url['mod'] = $p->post_modified_gmt;
                 } else {
                     if ('0000-00-00 00:00:00' != $p->post_date_gmt) {
                         $url['mod'] = $p->post_date_gmt;
                     } else {
                         $url['mod'] = $p->post_date;
                     }
                 }
                 $url['loc'] = get_permalink($p);
                 /**
                  * Filter: 'wpseo_xml_sitemap_post_url' - Allow changing the URL WordPress SEO uses in the XML sitemap.
                  *
                  * Note that only absolute local URLs are allowed as the check after this removes external URLs.
                  *
                  * @api string $url URL to use in the XML sitemap
                  *
                  * @param object $p Post object for the URL
                  */
                 $url['loc'] = apply_filters('wpseo_xml_sitemap_post_url', $url['loc'], $p);
                 $url['chf'] = $this->filter_frequency($post_type . '_single', 'weekly', $url['loc']);
                 /**
                  * Do not include external URLs.
                  * @see https://wordpress.org/plugins/page-links-to/ can rewrite permalinks to external URLs.
                  */
                 if (false === strpos($url['loc'], $this->home_url)) {
                     continue;
                 }
                 $canonical = WPSEO_Meta::get_value('canonical', $p->ID);
                 if ($canonical !== '' && $canonical !== $url['loc']) {
                     /* Let's assume that if a canonical is set for this page and it's different from
                        the URL of this post, that page is either already in the XML sitemap OR is on
                        an external site, either way, we shouldn't include it here. */
                     continue;
                 } else {
                     if ($this->options['trailingslash'] === true && $p->post_type != 'post') {
                         $url['loc'] = trailingslashit($url['loc']);
                     }
                 }
                 $url['pri'] = $this->calculate_priority($p);
                 $url['images'] = array();
                 $content = $p->post_content;
                 $content = '<p><img src="' . $this->image_url(get_post_thumbnail_id($p->ID)) . '" alt="' . $p->post_title . '" /></p>' . $content;
                 if (preg_match_all('`<img [^>]+>`', $content, $matches)) {
                     foreach ($matches[0] as $img) {
                         if (preg_match('`src=["\']([^"\']+)["\']`', $img, $match)) {
                             $src = $match[1];
                             if (wpseo_is_url_relative($src) === true) {
                                 if ($src[0] !== '/') {
                                     continue;
                                 } else {
                                     // The URL is relative, we'll have to make it absolute
                                     $src = $this->home_url . $src;
                                 }
                             } elseif (strpos($src, 'http') !== 0) {
                                 // Protocol relative url, we add the scheme as the standard requires a protocol
                                 $src = $scheme . ':' . $src;
                             }
                             if (strpos($src, $host) === false) {
                                 continue;
                             }
                             if ($src != esc_url($src)) {
                                 continue;
                             }
                             if (isset($url['images'][$src])) {
                                 continue;
                             }
                             $image = array('src' => apply_filters('wpseo_xml_sitemap_img_src', $src, $p));
                             if (preg_match('`title=["\']([^"\']+)["\']`', $img, $match)) {
                                 $image['title'] = str_replace(array('-', '_'), ' ', $match[1]);
                             }
                             if (preg_match('`alt=["\']([^"\']+)["\']`', $img, $match)) {
                                 $image['alt'] = str_replace(array('-', '_'), ' ', $match[1]);
                             }
                             $image = apply_filters('wpseo_xml_sitemap_img', $image, $p);
                             $url['images'][] = $image;
                         }
                     }
                 }
                 if (strpos($p->post_content, '[gallery') !== false) {
                     if (is_array($attachments) && $attachments !== array()) {
                         $url['images'] = $this->parse_attachments($attachments, $p);
                     }
                     unset($attachment, $src, $image, $alt);
                 }
                 $url['images'] = apply_filters('wpseo_sitemap_urlimages', $url['images'], $p->ID);
                 if (!in_array($url['loc'], $stackedurls)) {
                     // Use this filter to adjust the entry before it gets added to the sitemap
                     $url = apply_filters('wpseo_sitemap_entry', $url, 'post', $p);
                     if (is_array($url) && $url !== array()) {
                         $output .= $this->sitemap_url($url);
                         $stackedurls[] = $url['loc'];
                     }
                 }
                 // Clear the post_meta and the term cache for the post, as we no longer need it now.
                 // wp_cache_delete( $p->ID, 'post_meta' );
                 // clean_object_term_cache( $p->ID, $post_type );
             }
         }
     }
     if (empty($output)) {
         $this->bad_sitemap = true;
         return;
     }
     $this->sitemap = '<urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:image="http://www.google.com/schemas/sitemap-image/1.1" ';
     $this->sitemap .= 'xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd" ';
     $this->sitemap .= 'xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n";
     $this->sitemap .= $output;
     // Filter to allow adding extra URLs, only do this on the first XML sitemap, not on all.
     if ($n === 1) {
         $this->sitemap .= apply_filters('wpseo_sitemap_' . $post_type . '_content', '');
     }
     $this->sitemap .= '</urlset>';
 }