//-------------------------------------------------------------------------------------------------------------------------------| //-----if you start getting a bunch of 0's or NA's, remember that there are pretty low limits to Alexa and Google calls...-------| //-------------------------------------------------------------------------------------------------------------------------------| $alexa = SEOstats\Alexa::getGlobalRank(); $pagerank = SEOstats\Google::getPageRank(); $g_links = SEOstats\Google::getBacklinksTotal("www." . $row[0]); $g_links_no_www = SEOstats\Google::getBacklinksTotal($row[0]); $g_plus_count = SEOstats\Social::getGooglePlusShares(); $twitter_shares = SEOstats\Social::getTwitterShares(); $linkedin_shares = SEOstats\Social::getLinkedInShares(); echo "<div class='row'><div class='col-md-12'><h3>" . $row[0] . "</h3>"; echo "<div class='row'><div class='col-md-7'>Alexa rank = " . SEOstats\Alexa::getGlobalRank() . "<br />"; echo "Daily traffic graph: " . SEOstats\Alexa::getTrafficGraph(1) . "<br /></div>"; echo "<div class='col-md-3'>PageRank = " . SEOstats\Google::getPageRank() . "<br />"; echo "Google links = " . SEOstats\Google::getBacklinksTotal("www." . $row[0]) . "<br />"; echo "Google links (no www) = " . SEOstats\Google::getBacklinksTotal($row[0]) . "<br />"; echo "Twitter shares: " . SEOstats\Social::getTwitterShares() . "<br />"; echo "Plus ones: " . SEOstats\Social::getGooglePlusShares() . "<br />"; echo "Twitter shares: " . SEOstats\Social::getTwitterShares() . "<br />"; echo "Linkedin shares: " . SEOstats\Social::getLinkedInShares() . "<br /><br /></div>"; echo "<div class='col-md-3'>SEMRush Search Engine Traffic Graph: " . SEOstats\SemRush::getDomainGraph(1) . "<br /></div></div></div></div>"; //----------------------------------------------------------------------------------------| //--------------------uncomment this stuff when you want to insert into MySQL!!-----------| //----------------------------------------------------------------------------------------| /* Prepared statement, stage 1: prepare */ //if (!($stmt = $mysqli->prepare("INSERT INTO seo (url, alexa, pagerank, link_num, linked_pages, g_links, g_links_no_www, g_plus_ones, linkedin_shares) VALUES (?,?,?,?,?,?,?,?)"))) { // echo "Prepare failed: (" . $mysqli->errno . ") " . $mysqli->error; //} //if (!$stmt->bind_param("siiiiiiii", $url, $alexa, $pagerank, $link_num, $linked_pages, $g_links, $g_links_no_www, $g_plus_ones, $linkedin_shares)) { // echo "Binding parameters failed: (" . $stmt->errno . ") " . $stmt->error; //}
/** * */ private function GetSEOState($taskID) { $dataProvider = new CActiveDataProvider(SiteUrl::model(), array('pagination' => false)); echo '資料庫共' . $dataProvider->totalItemCount . '筆資料' . "\r\n"; $i = 0; foreach ($dataProvider->getData() as $record) { $site = $record->site; $id = $record->SiteID; // if($i>=5) // { // break; // } //移除前面的http:// (若有的話) $site = preg_replace('#^https?://#', '', $site); $site = preg_replace('#^http?://#', '', $site); //系統延遲 $pagerank = 0; $now = new DateTime(); try { $pagerank = \SEOstats\Services\Google::getPageRank($site); if (!is_numeric($pagerank)) { $pagerank = null; } } catch (Exception $e) { $pagerank = null; } usleep(rand(1000, 3000)); //下面這行是採用Google API提供之資料 //$googleIds = \SEOstats\Services\Google::getSiteindexTotal($site); //下面這行是採用網頁搜尋結果資料 if ($i % 2 == 1) { usleep(rand(500, 1000)); $googleIds = $this->GetGoogleSearch("site:{$site}"); } else { $googleIds = $this->GetGoogleSearch("site:{$site}"); } sleep(3); if (is_null($googleIds)) { //用另外種管道重抓一次 usleep(rand(5000, 10000)); echo '[log]' . $site . "使用重抓索引資料;/r/n"; $googleIds = \SEOstats\Services\Google::getSiteindexTotal($site); } if ($i % 2 == 1) { usleep(rand(500, 1000)); $googleLinks = $this->GetGoogleSearch("link:{$site}"); } else { $googleLinks = $this->GetGoogleSearch("link:{$site}"); } //如果抓不到資料就換個管道 if (is_null($googleLinks)) { //用另外種管道重抓一次 usleep(rand(5000, 10000)); echo '[log]' . $site . "使用API重抓頁面數資料;/r/n"; $googleLinks = \SEOstats\Services\Google::getBacklinksTotal($site); } usleep(rand(1010, 15020)); //echo("[log]".$now->format( 'Y-m-d H:i:s' )."搜尋".$site."有".$googleLinks.'項結果 '."\r\n"); //取得社群分享數據 $seostats = new \SEOstats\SEOstats(); $seostats->setUrl("http://{$site}"); $fb = Social::getFacebookShares(); //print_r($fb); usleep(rand(4000, 10000)); // //設定抓取的檔案類型 // $fileTypeList = array('pdf', 'doc', 'docx', 'ppt', 'pptx', 'ps', 'eps'); $fileCount = array(); $error_statues = 0; foreach ($fileTypeList as $key => $value) { usleep(rand(5000, 10000)); if ($error_statues == 0) { $fileCount[$value] = $this->GetGoogleSearch("site:{$site}" . ' filetype:' . $value); } if (is_null($fileCount[$value])) { $fileCount[$value] = \SEOstats\Services\Google::getSiteFileTypeTotal($site, $value); //表示系統被Google封鎖了。 if (is_null($fileCount[$value])) { $fileCount[$value] = 0; $error_statues++; } } } usleep(rand(4000, 10000)); //取得網站資訊,包含網站一些設定資訊。 $info = $this->get_url_info($site); $model = new Data(); $model->attributes = array('SiteID' => $id, 'GoogleData' => $googleIds, 'google_backlink' => $googleLinks, 'filetime' => $info['filetime'] = !-1 ? date("Y-m-d H:i:s", $info['filetime']) : null, 'robot' => $this->remoteFileExists("{$site}/robots.txt"), 'sitemap' => $this->remoteFileExists("{$site}/sitemap.xml"), 'Time' => $now->format('Y-m-d H:i:s'), 'Facebook' => $fb['total_count'], 'FB_share_count' => $fb['share_count'], 'FB_like_count' => $fb['like_count'], 'FB_comment_count' => $fb['comment_count'], 'FB_commentsbox_count' => $fb['commentsbox_count'], 'FB_click_count' => $fb['click_count'], 'TwitterShares' => Social::getTwitterShares(), 'LinkedInShares' => Social::getLinkedInShares(), 'pdf' => $fileCount['pdf'], 'doc' => $fileCount['doc'], 'docx' => $fileCount['docx'], 'ppt' => $fileCount['ppt'], 'pptx' => $fileCount['pptx'], 'ps' => $fileCount['ps'], 'eps' => $fileCount['eps'], 'YY' => $now->format('Y'), 'MM' => $now->format('m'), 'DD' => $now->format('d'), 'TaskID' => $taskID, 'google_page_rank' => $pagerank); //Yii::app()->end(); if ($model->save()) { $i++; } else { print "網址: {$site} 出現錯誤"; print_r($model->getErrors()); } usleep(rand(100, 400)); // if($i==3) // { // break; // } } echo '執行完畢,共儲存' . $i . '筆資料'; }