コード例 #1
0
ファイル: wsw-calc.php プロジェクト: Kishor900/scrapboard
 static function calc_post_density($text, $keyword)
 {
     $textStatistics = new TS\TextStatistics();
     $wordCount = $textStatistics->wordCount($text);
     $keywordCount = preg_match_all('`\\b' . preg_quote($keyword, '`') . '\\b`miu', utf8_encode($text), $res);
     $keywordDensity = $keywordCount / $wordCount * 100;
     $strResult = $keywordDensity;
     return round($strResult, 2);
 }
コード例 #2
0
ファイル: test.php プロジェクト: kediamanav/Quoranalysis
<?php

include './Text-Statistics/src/DaveChild/TextStatistics/TextStatistics.php';
include './Text-Statistics/src/DaveChild/TextStatistics/Text.php';
include './Text-Statistics/src/DaveChild/TextStatistics/Syllables.php';
include './Text-Statistics/src/DaveChild/TextStatistics/Resource.php';
include './Text-Statistics/src/DaveChild/TextStatistics/Pluralise.php';
include './Text-Statistics/src/DaveChild/TextStatistics/Maths.php';
use DaveChild\TextStatistics as TS;
$textStatistics = new TS\TextStatistics();
$filename = './query.txt';
$handle = fopen($filename, "r");
$contents = fread($handle, filesize($filename));
fclose($handle);
$text = $contents;
$text1 = $contents;
$msg = $textStatistics->fleschKincaidReadingEase($text);
$msg1 = $textStatistics->fleschKincaidReadingEase($text1);
$final = ($msg + $msg1) / 2;
$fp = fopen('./readability.txt', "w");
fwrite($fp, $final);
fclose($fp);
コード例 #3
0
 public function actionRenderMetrics()
 {
     if (!$this->parsingDom) {
         $this->parsingDom = true;
         $oldPath = method_exists(craft()->templates, 'getTemplatesPath') ? craft()->templates->getTemplatesPath() : craft()->path->getTemplatesPath();
         $newPath = craft()->path->getPluginsPath() . 'seomatic/templates';
         method_exists(craft()->templates, 'setTemplatesPath') ? craft()->templates->setTemplatesPath($newPath) : craft()->path->setTemplatesPath($newPath);
         /* -- Render the SEOmatic display preview template */
         $url = urldecode(craft()->request->getParam('url'));
         if (UrlHelper::isAbsoluteUrl($url)) {
             $urlParts = parse_url($url);
             if (isset($urlParts['scheme'])) {
                 $rootUrl = $urlParts['scheme'] . "://" . $urlParts['host'];
             } else {
                 $rootUrl = "http" . "://" . $urlParts['host'];
             }
             if (isset($urlParts['port'])) {
                 $rootUrl .= $urlParts['port'] . "/";
             } else {
                 $rootUrl .= "/";
             }
             $keywordsParam = urldecode(craft()->request->getParam('keywords'));
             $keywordsKeys = explode(",", $keywordsParam);
             $keywords = array();
             /* -- Silly work-around for what appears to be a file_get_contents bug with https -> http://stackoverflow.com/questions/10524748/why-im-getting-500-error-when-using-file-get-contents-but-works-in-a-browser */
             $opts = array('http' => array('header' => "User-Agent:Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13\r\n"));
             $context = stream_context_create($opts);
             $dom = HtmlDomParser::file_get_html($url, false, $context);
             if ($dom) {
                 $textStatistics = new TS\TextStatistics();
                 /* -- See if robots.txt exists */
                 $hasRobotsTxt = false;
                 $hasSitemap = false;
                 $sitemapUrl = rtrim($rootUrl, '/') . "/sitemap.xml";
                 $foundSitemapUrl = "";
                 $robotsUrl = rtrim($rootUrl, '/') . "/robots.txt";
                 $robots = @file_get_contents($robotsUrl, false, $context);
                 if ($robots !== false) {
                     $hasRobotsTxt = true;
                     $lines = explode("\n", $robots);
                     foreach ($lines as $line) {
                         $line = ltrim($line);
                         $searchStr = 'Sitemap';
                         $pos = strpos($line, $searchStr);
                         if ($pos !== false) {
                             $pos += strlen($searchStr);
                             $foundSitemapUrl = substr($line, $pos);
                             $foundSitemapUrl = trim($sitemapUrl, ':');
                             $foundSitemapUrl = trim($sitemapUrl);
                         }
                     }
                 }
                 /* -- Check to see if a sitemap exists */
                 if ($foundSitemapUrl) {
                     $siteMapContents = "";
                     $siteMapContents = @file_get_contents($sitemapUrl, false, $context, 0, 1);
                     if ($siteMapContents !== false) {
                         $hasSitemap = true;
                     }
                 }
                 $siteMapContents = "";
                 $siteMapContents = @file_get_contents($sitemapUrl, false, $context, 0, 1);
                 if ($siteMapContents !== false) {
                     $hasSitemap = true;
                 }
                 /* -- See if the site is https */
                 $sslReturnCode = 0;
                 $sslUrl = "https" . "://" . $urlParts['host'];
                 if (isset($urlParts['port'])) {
                     $sslUrl .= $sslUrl['port'] . '/';
                 } else {
                     $sslUrl .= '/';
                 }
                 $ch = curl_init($sslUrl);
                 curl_setopt($ch, CURLOPT_NOBODY, true);
                 curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
                 $open_basedir = ini_get('open_basedir');
                 if (empty($open_basedir)) {
                     curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
                 }
                 curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13');
                 curl_exec($ch);
                 $sslReturnCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
                 curl_close($ch);
                 /* -- Check to see if the page is valid */
                 $validatorUrl = "https://validator.w3.org/check?uri=" . urlencode($url) . "&output=json";
                 $ch = curl_init();
                 curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
                 curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13');
                 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
                 curl_setopt($ch, CURLOPT_URL, $validatorUrl);
                 $validatorResult = curl_exec($ch);
                 curl_close($ch);
                 $validatorStatus = $validatorErrors = $validatorWarnings = "";
                 if ($validatorResult) {
                     $searchStr = "X-W3C-Validator-Status: ";
                     $pos = strpos($validatorResult, $searchStr);
                     if ($pos !== false) {
                         $pos += strlen($searchStr);
                         $validatorStatus = substr($validatorResult, $pos, strpos($validatorResult, PHP_EOL, $pos) - $pos);
                     }
                     $searchStr = "X-W3C-Validator-Errors: ";
                     $pos = strpos($validatorResult, $searchStr);
                     if ($pos !== false) {
                         $pos += strlen($searchStr);
                         $validatorErrors = substr($validatorResult, $pos, strpos($validatorResult, PHP_EOL, $pos) - $pos);
                     }
                     $searchStr = "X-W3C-Validator-Warnings: ";
                     $pos = strpos($validatorResult, $searchStr);
                     if ($pos !== false) {
                         $pos += strlen($searchStr);
                         $validatorWarnings = substr($validatorResult, $pos, strpos($validatorResult, PHP_EOL, $pos) - $pos);
                     }
                 }
                 $validatorUrl = "https://validator.w3.org/check?uri=" . urlencode($url);
                 /* -- Check Google Pagespeed insights for desktop */
                 $pagespeedDesktopScore = "";
                 $pagespeedDesktopUrl = "https://www.googleapis.com/pagespeedonline/v2/runPagespeed?url=" . urlencode($url) . "&strategy=desktop";
                 $ch = curl_init();
                 curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
                 curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13');
                 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
                 curl_setopt($ch, CURLOPT_URL, $pagespeedDesktopUrl);
                 $pagespeedDesktopResult = curl_exec($ch);
                 curl_close($ch);
                 $pageSpeedPageStats = array();
                 if ($pagespeedDesktopResult) {
                     $pagespeedJson = json_decode($pagespeedDesktopResult, true);
                     if ($pagespeedJson) {
                         if (!empty($pagespeedJson['pageStats'])) {
                             $pageSpeedPageStats = $pagespeedJson['pageStats'];
                             if (empty($pageSpeedPageStats['htmlResponseBytes'])) {
                                 $pageSpeedPageStats['htmlResponseBytes'] = 0;
                             }
                             if (empty($pageSpeedPageStats['cssResponseBytes'])) {
                                 $pageSpeedPageStats['cssResponseBytes'] = 0;
                             }
                             if (empty($pageSpeedPageStats['imageResponseBytes'])) {
                                 $pageSpeedPageStats['imageResponseBytes'] = 0;
                             }
                             if (empty($pageSpeedPageStats['javascriptResponseBytes'])) {
                                 $pageSpeedPageStats['javascriptResponseBytes'] = 0;
                             }
                             if (empty($pageSpeedPageStats['otherResponseBytes'])) {
                                 $pageSpeedPageStats['otherResponseBytes'] = 0;
                             }
                             $pageSpeedPageStats['totalResponseBytes'] = $pageSpeedPageStats['htmlResponseBytes'] + $pageSpeedPageStats['cssResponseBytes'] + $pageSpeedPageStats['imageResponseBytes'] + $pageSpeedPageStats['javascriptResponseBytes'] + $pageSpeedPageStats['otherResponseBytes'];
                         }
                         if (isset($pagespeedJson['responseCode']) && ($pagespeedJson['responseCode'] == "200" || $pagespeedJson['responseCode'] == "301" || $pagespeedJson['responseCode'] == "302")) {
                             if (isset($pagespeedJson['ruleGroups']['SPEED']['score'])) {
                                 $pagespeedDesktopScore = intval($pagespeedJson['ruleGroups']['SPEED']['score']);
                             }
                         }
                     }
                 }
                 $pagespeedDesktopUrl = "https://developers.google.com/speed/pagespeed/insights/?url=" . urlencode($url) . "&tab=desktop";
                 /* -- Check Google Pagespeed insights for desktop */
                 $pagespeedMobileScore = "";
                 $pagespeedMobileUsability = "";
                 $pagespeedMobileUrl = "https://www.googleapis.com/pagespeedonline/v2/runPagespeed?url=" . urlencode($url) . "&strategy=mobile";
                 $ch = curl_init();
                 curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
                 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
                 curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13');
                 curl_setopt($ch, CURLOPT_URL, $pagespeedMobileUrl);
                 $pagespeedMobileResult = curl_exec($ch);
                 curl_close($ch);
                 if ($pagespeedMobileResult) {
                     $pagespeedJson = json_decode($pagespeedMobileResult, true);
                     if ($pagespeedJson) {
                         if (isset($pagespeedJson['responseCode']) && ($pagespeedJson['responseCode'] == "200" || $pagespeedJson['responseCode'] == "301" || $pagespeedJson['responseCode'] == "302")) {
                             if (isset($pagespeedJson['ruleGroups']['SPEED']['score'])) {
                                 $pagespeedMobileScore = intval($pagespeedJson['ruleGroups']['SPEED']['score']);
                             }
                             if (isset($pagespeedJson['ruleGroups']['USABILITY']['score'])) {
                                 $pagespeedMobileUsability = intval($pagespeedJson['ruleGroups']['USABILITY']['score']);
                             }
                         }
                     }
                 }
                 $pagespeedMobileUrl = "https://developers.google.com/speed/pagespeed/insights/?url=" . urlencode($url) . "&tab=mobile";
                 /* -- Scrape for JSON-LD before we remove the <script> tags */
                 $jsonLdTypes = array();
                 foreach ($dom->find('script[type=application/ld+json]') as $elem) {
                     $jsonArray = json_decode($elem->innertext, true);
                     if (isset($jsonArray['@type'])) {
                         array_push($jsonLdTypes, $jsonArray['@type']);
                     }
                 }
                 $jsonLdTypes = array_unique($jsonLdTypes);
                 /* -- Remove inline <script> and <style> tags, and then strip the DOM down */
                 foreach ($dom->find('style') as $element) {
                     $element->outertext = '';
                 }
                 foreach ($dom->find('script') as $element) {
                     $element->outertext = '';
                 }
                 $strippedDom = html_entity_decode($dom->plaintext);
                 //                    $strippedDom = preg_replace('@[^0-9a-z\.\!]+@i', ', ', $strippedDom);
                 $strippedDom = stripslashes($strippedDom);
                 $htmlDom = html_entity_decode($dom->outertext);
                 //                    $htmlDom = preg_replace('@[^0-9a-z\.\!]+@i', '', $htmlDom);
                 /* -- SEO statistics */
                 $titleTag = html_entity_decode($dom->find('title', 0)->plaintext);
                 $titleLength = strlen($titleTag);
                 $metaDescriptionTag = "";
                 $metaDescriptionLength = 0;
                 $elem = $dom->find('meta[name=description]', 0);
                 if ($elem) {
                     $metaDescriptionTag = html_entity_decode($elem->content);
                     $metaDescriptionLength = strlen($metaDescriptionTag);
                 }
                 $metaTwitterTag = "";
                 $elem = $dom->find('meta[name=twitter:card],meta[property=twitter:card]', 0);
                 if ($elem) {
                     $metaTwitterTag = html_entity_decode($elem->content);
                 }
                 $metaOpenGraphTag = "";
                 $elem = $dom->find('meta[property=og:type],meta[property=og:url],meta[property=og:title]', 0);
                 if ($elem) {
                     $metaOpenGraphTag = html_entity_decode($elem->content);
                 }
                 $hasRelPublisherTag = false;
                 $elem = $dom->find('link[rel=publisher]', 0);
                 if ($elem) {
                     $hasRelPublisherTag = true;
                 }
                 $emptyImageAlts = count($dom->find('img[!alt]'));
                 $h1Tags = count($dom->find('h1'));
                 $h2Tags = count($dom->find('h2'));
                 $h3Tags = count($dom->find('h3'));
                 $h4Tags = count($dom->find('h4'));
                 $h5Tags = count($dom->find('h5'));
                 $totalHTags = $h1Tags + $h2Tags + $h3Tags + $h4Tags + $h5Tags;
                 $effectiveHTags = true;
                 if ($h1Tags != 1) {
                     $effectiveHTags = false;
                 }
                 if ($totalHTags < 3) {
                     $effectiveHTags = false;
                 }
                 if ($h2Tags == 0 && ($h3Tags || $h4Tags || $h5Tags)) {
                     $effectiveHTags = false;
                 }
                 if ($h3Tags == 0 && ($h4Tags || $h5Tags)) {
                     $effectiveHTags = false;
                 }
                 if ($h4Tags == 0 && $h5Tags) {
                     $effectiveHTags = false;
                 }
                 $textToHtmlRatio = strlen($strippedDom) / (strlen($htmlDom) - strlen($strippedDom)) * 100;
                 $strippedDom = preg_replace('/\\s+/', ' ', $strippedDom);
                 /* -- Extract the page keywords, and clean them up a bit */
                 $pageKeywords = craft()->seomatic->extractKeywords($strippedDom);
                 $pageKeywords = str_replace(",,", ",", $pageKeywords);
                 $pageKeywords = str_replace(" ,", ",", $pageKeywords);
                 $pageKeywords = str_replace(" .", ".", $pageKeywords);
                 $pageKeywords = preg_replace('/\\.+/', '.', $pageKeywords);
                 $pageKeywords = preg_replace('/,+/', ',', $pageKeywords);
                 $pageKeywords = str_replace(",.,", ",", $pageKeywords);
                 $pageKeywords = html_entity_decode($pageKeywords, ENT_COMPAT, 'UTF-8');
                 /* -- Focus keywords */
                 foreach ($keywordsKeys as $keywordsKey) {
                     $keywordsKey = trim($keywordsKey);
                     if (strlen($keywordsKey)) {
                         $appearsInH1Tag = 0;
                         foreach ($dom->find('h1') as $element) {
                             $appearsInH1Tag += substr_count(strtolower($element->plaintext), strtolower($keywordsKey));
                         }
                         foreach ($dom->find('h2') as $element) {
                             $appearsInH1Tag += substr_count(strtolower($element->plaintext), strtolower($keywordsKey));
                         }
                         $appearsInImgTag = 0;
                         foreach ($dom->find('img') as $element) {
                             $appearsInImgTag += substr_count(strtolower($element->alt), strtolower($keywordsKey));
                         }
                         $appearsInAhrefTag = 0;
                         foreach ($dom->find('a') as $element) {
                             $appearsInAhrefTag += substr_count(strtolower($element->plaintext), strtolower($keywordsKey));
                         }
                         $keywords[$keywordsKey] = array('appearsInTitleTag' => substr_count(strtolower($titleTag), strtolower($keywordsKey)), 'appearsInUrl' => substr_count(strtolower($url), strtolower($keywordsKey)), 'appearsInMetaDescriptionTag' => substr_count(strtolower($metaDescriptionTag), strtolower($keywordsKey)), 'appearsInH1Tag' => $appearsInH1Tag, 'appearsInAhrefTag' => $appearsInAhrefTag, 'appearsInImgTag' => $appearsInImgTag, 'appearsInPageKeywords' => substr_count(strtolower($pageKeywords), strtolower($keywordsKey)), 'appearsOnWebPage' => substr_count(strtolower($strippedDom), strtolower($keywordsKey)));
                     }
                 }
                 /* -- Text statistics */
                 $wordCount = $textStatistics->wordCount($strippedDom);
                 $readingTime = floor($wordCount / 200);
                 if ($readingTime === 0) {
                     $readingTime = 1;
                 }
                 $fleschKincaidReadingEase = $textStatistics->fleschKincaidReadingEase($strippedDom);
                 $fleschKincaidGradeLevel = $textStatistics->fleschKincaidGradeLevel($strippedDom);
                 $gunningFogScore = $textStatistics->gunningFogScore($strippedDom);
                 $colemanLiauIndex = $textStatistics->colemanLiauIndex($strippedDom);
                 $smogIndex = $textStatistics->smogIndex($strippedDom);
                 $automatedReadabilityIndex = $textStatistics->automatedReadabilityIndex($strippedDom);
                 $vars = array('titleTag' => $titleTag, 'titleLength' => $titleLength, 'metaDescriptionTag' => $metaDescriptionTag, 'metaDescriptionLength' => $metaDescriptionLength, 'metaTwitterTag' => $metaTwitterTag, 'metaOpenGraphTag' => $metaOpenGraphTag, 'hasRelPublisherTag' => $hasRelPublisherTag, 'jsonLdTypes' => $jsonLdTypes, 'hasRobotsTxt' => $hasRobotsTxt, 'hasSitemap' => $hasSitemap, 'emptyImageAlts' => $emptyImageAlts, 'validatorUrl' => $validatorUrl, 'validatorStatus' => $validatorStatus, 'validatorErrors' => $validatorErrors, 'validatorWarnings' => $validatorWarnings, 'pageSpeedPageStats' => $pageSpeedPageStats, 'pagespeedDesktopScore' => $pagespeedDesktopScore, 'pagespeedDesktopUrl' => $pagespeedDesktopUrl, 'pagespeedMobileScore' => $pagespeedMobileScore, 'pagespeedMobileUsability' => $pagespeedMobileUsability, 'pagespeedMobileUrl' => $pagespeedMobileUrl, 'sslReturnCode' => $sslReturnCode, 'h1Tags' => $h1Tags, 'h2Tags' => $h2Tags, 'h3Tags' => $h3Tags, 'h4Tags' => $h4Tags, 'h5Tags' => $h5Tags, 'effectiveHTags' => $effectiveHTags, 'textToHtmlRatio' => $textToHtmlRatio, 'wordCount' => $wordCount, 'readingTime' => $readingTime, 'pageKeywords' => $pageKeywords, 'keywords' => $keywords, 'fleschKincaidReadingEase' => $fleschKincaidReadingEase, 'fleschKincaidGradeLevel' => $fleschKincaidGradeLevel, 'gunningFogScore' => $gunningFogScore, 'colemanLiauIndex' => $colemanLiauIndex, 'smogIndex' => $smogIndex, 'automatedReadabilityIndex' => $automatedReadabilityIndex);
                 //$htmlText = craft()->templates->render('_seo_metrics.twig', $vars);
                 $this->renderTemplate('_seo_metrics.twig', $vars);
             } else {
                 $this->renderTemplate('_error', array('errorMessage' => "Error parsing the DOM.  Is this a valid, publicly accessible URL?"));
             }
         } else {
             $this->renderTemplate('_error', array('errorMessage' => "Error loading the webpage. Is this a valid, publicly accessible URL?"));
         }
         method_exists(craft()->templates, 'setTemplatesPath') ? craft()->templates->setTemplatesPath($oldPath) : craft()->path->setTemplatesPath($oldPath);
     }
     $this->parsingDom = false;
 }