/** * Get a PageView analytics object * @return Analytics object */ public function pageViewAnalytics($url = "", $title = "") { $result = null; $analytics = $this->analytics(); if ($analytics) { if ($url == "") { $url = craft()->request->url; } /* -- We want to send just a path to GA for page views */ if (UrlHelper::isAbsoluteUrl($url)) { $urlParts = parse_url($url); if (isset($urlParts['path'])) { $url = $urlParts['path']; } else { $url = "/"; } if (isset($urlParts['query'])) { $url = $url . "?" . $urlParts['query']; } } /* -- We don't want to send protocol-relative URLs either */ if (UrlHelper::isProtocolRelativeUrl($url)) { $url = substr($url, 1); } /* -- Strip the query string if that's the global config setting */ $settings = craft()->plugins->getPlugin('instantanalytics')->getSettings(); if (isset($settings) && isset($settings['stripQueryString']) && $settings['stripQueryString']) { $url = UrlHelper::stripQueryString($url); } /* -- Prepare the Analytics object, and send the pageview */ $analytics->setDocumentPath($url)->setDocumentTitle($title); $result = $analytics; InstantAnalyticsPlugin::log("Created sendPageView for `" . $url . "` - `" . $title . "`", LogLevel::Info, false); } return $result; }
public function actionRenderMetrics() { if (!$this->parsingDom) { $this->parsingDom = true; $oldPath = method_exists(craft()->templates, 'getTemplatesPath') ? craft()->templates->getTemplatesPath() : craft()->path->getTemplatesPath(); $newPath = craft()->path->getPluginsPath() . 'seomatic/templates'; method_exists(craft()->templates, 'setTemplatesPath') ? craft()->templates->setTemplatesPath($newPath) : craft()->path->setTemplatesPath($newPath); /* -- Render the SEOmatic display preview template */ $url = urldecode(craft()->request->getParam('url')); if (UrlHelper::isAbsoluteUrl($url)) { $urlParts = parse_url($url); if (isset($urlParts['scheme'])) { $rootUrl = $urlParts['scheme'] . "://" . $urlParts['host']; } else { $rootUrl = "http" . "://" . $urlParts['host']; } if (isset($urlParts['port'])) { $rootUrl .= $urlParts['port'] . "/"; } else { $rootUrl .= "/"; } $keywordsParam = urldecode(craft()->request->getParam('keywords')); $keywordsKeys = explode(",", $keywordsParam); $keywords = array(); /* -- Silly work-around for what appears to be a file_get_contents bug with https -> http://stackoverflow.com/questions/10524748/why-im-getting-500-error-when-using-file-get-contents-but-works-in-a-browser */ $opts = array('http' => array('header' => "User-Agent:Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13\r\n")); $context = stream_context_create($opts); $dom = HtmlDomParser::file_get_html($url, false, $context); if ($dom) { $textStatistics = new TS\TextStatistics(); /* -- See if robots.txt exists */ $hasRobotsTxt = false; $hasSitemap = false; $sitemapUrl = rtrim($rootUrl, '/') . "/sitemap.xml"; $foundSitemapUrl = ""; $robotsUrl = rtrim($rootUrl, '/') . "/robots.txt"; $robots = @file_get_contents($robotsUrl, false, $context); if ($robots !== false) { $hasRobotsTxt = true; $lines = explode("\n", $robots); foreach ($lines as $line) { $line = ltrim($line); $searchStr = 'Sitemap'; $pos = strpos($line, $searchStr); if ($pos !== false) { $pos += strlen($searchStr); $foundSitemapUrl = substr($line, $pos); $foundSitemapUrl = trim($sitemapUrl, ':'); $foundSitemapUrl = trim($sitemapUrl); } } } /* -- Check to see if a sitemap exists */ if ($foundSitemapUrl) { $siteMapContents = ""; $siteMapContents = @file_get_contents($sitemapUrl, false, $context, 0, 1); if ($siteMapContents !== false) { $hasSitemap = true; } } $siteMapContents = ""; $siteMapContents = @file_get_contents($sitemapUrl, false, $context, 0, 1); if ($siteMapContents !== false) { $hasSitemap = true; } /* -- See if the site is https */ $sslReturnCode = 0; $sslUrl = "https" . "://" . $urlParts['host']; if (isset($urlParts['port'])) { $sslUrl .= $sslUrl['port'] . '/'; } else { $sslUrl .= '/'; } $ch = curl_init($sslUrl); curl_setopt($ch, CURLOPT_NOBODY, true); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); $open_basedir = ini_get('open_basedir'); if (empty($open_basedir)) { curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); } curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13'); curl_exec($ch); $sslReturnCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); curl_close($ch); /* -- Check to see if the page is valid */ $validatorUrl = "https://validator.w3.org/check?uri=" . urlencode($url) . "&output=json"; $ch = curl_init(); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13'); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_URL, $validatorUrl); $validatorResult = curl_exec($ch); curl_close($ch); $validatorStatus = $validatorErrors = $validatorWarnings = ""; if ($validatorResult) { $searchStr = "X-W3C-Validator-Status: "; $pos = strpos($validatorResult, $searchStr); if ($pos !== false) { $pos += strlen($searchStr); $validatorStatus = substr($validatorResult, $pos, strpos($validatorResult, PHP_EOL, $pos) - $pos); } $searchStr = "X-W3C-Validator-Errors: "; $pos = strpos($validatorResult, $searchStr); if ($pos !== false) { $pos += strlen($searchStr); $validatorErrors = substr($validatorResult, $pos, strpos($validatorResult, PHP_EOL, $pos) - $pos); } $searchStr = "X-W3C-Validator-Warnings: "; $pos = strpos($validatorResult, $searchStr); if ($pos !== false) { $pos += strlen($searchStr); $validatorWarnings = substr($validatorResult, $pos, strpos($validatorResult, PHP_EOL, $pos) - $pos); } } $validatorUrl = "https://validator.w3.org/check?uri=" . urlencode($url); /* -- Check Google Pagespeed insights for desktop */ $pagespeedDesktopScore = ""; $pagespeedDesktopUrl = "https://www.googleapis.com/pagespeedonline/v2/runPagespeed?url=" . urlencode($url) . "&strategy=desktop"; $ch = curl_init(); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13'); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_URL, $pagespeedDesktopUrl); $pagespeedDesktopResult = curl_exec($ch); curl_close($ch); $pageSpeedPageStats = array(); if ($pagespeedDesktopResult) { $pagespeedJson = json_decode($pagespeedDesktopResult, true); if ($pagespeedJson) { if (!empty($pagespeedJson['pageStats'])) { $pageSpeedPageStats = $pagespeedJson['pageStats']; if (empty($pageSpeedPageStats['htmlResponseBytes'])) { $pageSpeedPageStats['htmlResponseBytes'] = 0; } if (empty($pageSpeedPageStats['cssResponseBytes'])) { $pageSpeedPageStats['cssResponseBytes'] = 0; } if (empty($pageSpeedPageStats['imageResponseBytes'])) { $pageSpeedPageStats['imageResponseBytes'] = 0; } if (empty($pageSpeedPageStats['javascriptResponseBytes'])) { $pageSpeedPageStats['javascriptResponseBytes'] = 0; } if (empty($pageSpeedPageStats['otherResponseBytes'])) { $pageSpeedPageStats['otherResponseBytes'] = 0; } $pageSpeedPageStats['totalResponseBytes'] = $pageSpeedPageStats['htmlResponseBytes'] + $pageSpeedPageStats['cssResponseBytes'] + $pageSpeedPageStats['imageResponseBytes'] + $pageSpeedPageStats['javascriptResponseBytes'] + $pageSpeedPageStats['otherResponseBytes']; } if (isset($pagespeedJson['responseCode']) && ($pagespeedJson['responseCode'] == "200" || $pagespeedJson['responseCode'] == "301" || $pagespeedJson['responseCode'] == "302")) { if (isset($pagespeedJson['ruleGroups']['SPEED']['score'])) { $pagespeedDesktopScore = intval($pagespeedJson['ruleGroups']['SPEED']['score']); } } } } $pagespeedDesktopUrl = "https://developers.google.com/speed/pagespeed/insights/?url=" . urlencode($url) . "&tab=desktop"; /* -- Check Google Pagespeed insights for desktop */ $pagespeedMobileScore = ""; $pagespeedMobileUsability = ""; $pagespeedMobileUrl = "https://www.googleapis.com/pagespeedonline/v2/runPagespeed?url=" . urlencode($url) . "&strategy=mobile"; $ch = curl_init(); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13'); curl_setopt($ch, CURLOPT_URL, $pagespeedMobileUrl); $pagespeedMobileResult = curl_exec($ch); curl_close($ch); if ($pagespeedMobileResult) { $pagespeedJson = json_decode($pagespeedMobileResult, true); if ($pagespeedJson) { if (isset($pagespeedJson['responseCode']) && ($pagespeedJson['responseCode'] == "200" || $pagespeedJson['responseCode'] == "301" || $pagespeedJson['responseCode'] == "302")) { if (isset($pagespeedJson['ruleGroups']['SPEED']['score'])) { $pagespeedMobileScore = intval($pagespeedJson['ruleGroups']['SPEED']['score']); } if (isset($pagespeedJson['ruleGroups']['USABILITY']['score'])) { $pagespeedMobileUsability = intval($pagespeedJson['ruleGroups']['USABILITY']['score']); } } } } $pagespeedMobileUrl = "https://developers.google.com/speed/pagespeed/insights/?url=" . urlencode($url) . "&tab=mobile"; /* -- Scrape for JSON-LD before we remove the <script> tags */ $jsonLdTypes = array(); foreach ($dom->find('script[type=application/ld+json]') as $elem) { $jsonArray = json_decode($elem->innertext, true); if (isset($jsonArray['@type'])) { array_push($jsonLdTypes, $jsonArray['@type']); } } $jsonLdTypes = array_unique($jsonLdTypes); /* -- Remove inline <script> and <style> tags, and then strip the DOM down */ foreach ($dom->find('style') as $element) { $element->outertext = ''; } foreach ($dom->find('script') as $element) { $element->outertext = ''; } $strippedDom = html_entity_decode($dom->plaintext); // $strippedDom = preg_replace('@[^0-9a-z\.\!]+@i', ', ', $strippedDom); $strippedDom = stripslashes($strippedDom); $htmlDom = html_entity_decode($dom->outertext); // $htmlDom = preg_replace('@[^0-9a-z\.\!]+@i', '', $htmlDom); /* -- SEO statistics */ $titleTag = html_entity_decode($dom->find('title', 0)->plaintext); $titleLength = strlen($titleTag); $metaDescriptionTag = ""; $metaDescriptionLength = 0; $elem = $dom->find('meta[name=description]', 0); if ($elem) { $metaDescriptionTag = html_entity_decode($elem->content); $metaDescriptionLength = strlen($metaDescriptionTag); } $metaTwitterTag = ""; $elem = $dom->find('meta[name=twitter:card],meta[property=twitter:card]', 0); if ($elem) { $metaTwitterTag = html_entity_decode($elem->content); } $metaOpenGraphTag = ""; $elem = $dom->find('meta[property=og:type],meta[property=og:url],meta[property=og:title]', 0); if ($elem) { $metaOpenGraphTag = html_entity_decode($elem->content); } $hasRelPublisherTag = false; $elem = $dom->find('link[rel=publisher]', 0); if ($elem) { $hasRelPublisherTag = true; } $emptyImageAlts = count($dom->find('img[!alt]')); $h1Tags = count($dom->find('h1')); $h2Tags = count($dom->find('h2')); $h3Tags = count($dom->find('h3')); $h4Tags = count($dom->find('h4')); $h5Tags = count($dom->find('h5')); $totalHTags = $h1Tags + $h2Tags + $h3Tags + $h4Tags + $h5Tags; $effectiveHTags = true; if ($h1Tags != 1) { $effectiveHTags = false; } if ($totalHTags < 3) { $effectiveHTags = false; } if ($h2Tags == 0 && ($h3Tags || $h4Tags || $h5Tags)) { $effectiveHTags = false; } if ($h3Tags == 0 && ($h4Tags || $h5Tags)) { $effectiveHTags = false; } if ($h4Tags == 0 && $h5Tags) { $effectiveHTags = false; } $textToHtmlRatio = strlen($strippedDom) / (strlen($htmlDom) - strlen($strippedDom)) * 100; $strippedDom = preg_replace('/\\s+/', ' ', $strippedDom); /* -- Extract the page keywords, and clean them up a bit */ $pageKeywords = craft()->seomatic->extractKeywords($strippedDom); $pageKeywords = str_replace(",,", ",", $pageKeywords); $pageKeywords = str_replace(" ,", ",", $pageKeywords); $pageKeywords = str_replace(" .", ".", $pageKeywords); $pageKeywords = preg_replace('/\\.+/', '.', $pageKeywords); $pageKeywords = preg_replace('/,+/', ',', $pageKeywords); $pageKeywords = str_replace(",.,", ",", $pageKeywords); $pageKeywords = html_entity_decode($pageKeywords, ENT_COMPAT, 'UTF-8'); /* -- Focus keywords */ foreach ($keywordsKeys as $keywordsKey) { $keywordsKey = trim($keywordsKey); if (strlen($keywordsKey)) { $appearsInH1Tag = 0; foreach ($dom->find('h1') as $element) { $appearsInH1Tag += substr_count(strtolower($element->plaintext), strtolower($keywordsKey)); } foreach ($dom->find('h2') as $element) { $appearsInH1Tag += substr_count(strtolower($element->plaintext), strtolower($keywordsKey)); } $appearsInImgTag = 0; foreach ($dom->find('img') as $element) { $appearsInImgTag += substr_count(strtolower($element->alt), strtolower($keywordsKey)); } $appearsInAhrefTag = 0; foreach ($dom->find('a') as $element) { $appearsInAhrefTag += substr_count(strtolower($element->plaintext), strtolower($keywordsKey)); } $keywords[$keywordsKey] = array('appearsInTitleTag' => substr_count(strtolower($titleTag), strtolower($keywordsKey)), 'appearsInUrl' => substr_count(strtolower($url), strtolower($keywordsKey)), 'appearsInMetaDescriptionTag' => substr_count(strtolower($metaDescriptionTag), strtolower($keywordsKey)), 'appearsInH1Tag' => $appearsInH1Tag, 'appearsInAhrefTag' => $appearsInAhrefTag, 'appearsInImgTag' => $appearsInImgTag, 'appearsInPageKeywords' => substr_count(strtolower($pageKeywords), strtolower($keywordsKey)), 'appearsOnWebPage' => substr_count(strtolower($strippedDom), strtolower($keywordsKey))); } } /* -- Text statistics */ $wordCount = $textStatistics->wordCount($strippedDom); $readingTime = floor($wordCount / 200); if ($readingTime === 0) { $readingTime = 1; } $fleschKincaidReadingEase = $textStatistics->fleschKincaidReadingEase($strippedDom); $fleschKincaidGradeLevel = $textStatistics->fleschKincaidGradeLevel($strippedDom); $gunningFogScore = $textStatistics->gunningFogScore($strippedDom); $colemanLiauIndex = $textStatistics->colemanLiauIndex($strippedDom); $smogIndex = $textStatistics->smogIndex($strippedDom); $automatedReadabilityIndex = $textStatistics->automatedReadabilityIndex($strippedDom); $vars = array('titleTag' => $titleTag, 'titleLength' => $titleLength, 'metaDescriptionTag' => $metaDescriptionTag, 'metaDescriptionLength' => $metaDescriptionLength, 'metaTwitterTag' => $metaTwitterTag, 'metaOpenGraphTag' => $metaOpenGraphTag, 'hasRelPublisherTag' => $hasRelPublisherTag, 'jsonLdTypes' => $jsonLdTypes, 'hasRobotsTxt' => $hasRobotsTxt, 'hasSitemap' => $hasSitemap, 'emptyImageAlts' => $emptyImageAlts, 'validatorUrl' => $validatorUrl, 'validatorStatus' => $validatorStatus, 'validatorErrors' => $validatorErrors, 'validatorWarnings' => $validatorWarnings, 'pageSpeedPageStats' => $pageSpeedPageStats, 'pagespeedDesktopScore' => $pagespeedDesktopScore, 'pagespeedDesktopUrl' => $pagespeedDesktopUrl, 'pagespeedMobileScore' => $pagespeedMobileScore, 'pagespeedMobileUsability' => $pagespeedMobileUsability, 'pagespeedMobileUrl' => $pagespeedMobileUrl, 'sslReturnCode' => $sslReturnCode, 'h1Tags' => $h1Tags, 'h2Tags' => $h2Tags, 'h3Tags' => $h3Tags, 'h4Tags' => $h4Tags, 'h5Tags' => $h5Tags, 'effectiveHTags' => $effectiveHTags, 'textToHtmlRatio' => $textToHtmlRatio, 'wordCount' => $wordCount, 'readingTime' => $readingTime, 'pageKeywords' => $pageKeywords, 'keywords' => $keywords, 'fleschKincaidReadingEase' => $fleschKincaidReadingEase, 'fleschKincaidGradeLevel' => $fleschKincaidGradeLevel, 'gunningFogScore' => $gunningFogScore, 'colemanLiauIndex' => $colemanLiauIndex, 'smogIndex' => $smogIndex, 'automatedReadabilityIndex' => $automatedReadabilityIndex); //$htmlText = craft()->templates->render('_seo_metrics.twig', $vars); $this->renderTemplate('_seo_metrics.twig', $vars); } else { $this->renderTemplate('_error', array('errorMessage' => "Error parsing the DOM. Is this a valid, publicly accessible URL?")); } } else { $this->renderTemplate('_error', array('errorMessage' => "Error loading the webpage. Is this a valid, publicly accessible URL?")); } method_exists(craft()->templates, 'setTemplatesPath') ? craft()->templates->setTemplatesPath($oldPath) : craft()->path->setTemplatesPath($oldPath); } $this->parsingDom = false; }
public function getFullyQualifiedUrl($url) { $result = $url; if (!isset($result) || $result == "") { return $result; } $srcUrlParts = parse_url($result); if (UrlHelper::isAbsoluteUrl($url) || UrlHelper::isProtocolRelativeUrl($url)) { /* -- The URL is already a fully qualfied URL, do nothing */ } else { $siteUrlOverride = craft()->config->get("siteUrlOverride", "seomatic"); if ($siteUrlOverride) { $siteUrl = $siteUrlOverride; } else { $siteUrl = craft()->getSiteUrl(); } $urlParts = parse_url($siteUrl); $port = ""; if (isset($urlParts['port'])) { $port = ":" . $urlParts['port']; } if (isset($urlParts['scheme']) && isset($urlParts['host'])) { $siteUrl = $urlParts['scheme'] . "://" . $urlParts['host'] . $port . "/"; } else { $siteUrl = "/"; } if ($siteUrl[strlen($siteUrl) - 1] == '/' && $result[0] == '/') { $siteUrl = rtrim($siteUrl, '/'); } $result = $siteUrl . $result; } return $result; }