/** * The constructor of API */ function KeywordAPI() { include_once SP_CTRLPATH . "/keyword.ctrl.php"; include_once SP_CTRLPATH . "/report.ctrl.php"; $this->ctrler = new KeywordController(); $this->reportCtrler = new ReportController(); $seController = new SearchEngineController(); $list = $seController->__getAllSearchEngines(); $this->seList = array(); // loop through the search engine and assign id as key foreach ($list as $listInfo) { $this->seList[$listInfo['id']] = $listInfo; } }
/** * The constructor of API */ function WebsiteAPI() { $this->ctrler = new WebsiteController(); $seController = new SearchEngineController(); $list = $seController->__getAllSearchEngines(); $this->seList = array(); // loop through the search engine and assign id as key foreach ($list as $listInfo) { $this->seList[$listInfo['id']] = $listInfo; } include_once SP_CTRLPATH . "/saturationchecker.ctrl.php"; include_once SP_CTRLPATH . "/rank.ctrl.php"; include_once SP_CTRLPATH . "/backlink.ctrl.php"; include_once SP_CTRLPATH . "/directory.ctrl.php"; include_once SP_CTRLPATH . "/keyword.ctrl.php"; }
function keywordPositionCheckerCron($websiteId) { include_once SP_CTRLPATH . "/searchengine.ctrl.php"; include_once SP_CTRLPATH . "/report.ctrl.php"; $reportController = new ReportController(); $seController = new SearchEngineController(); $reportController->seList = $seController->__getAllCrawlFormatedSearchEngines(); // get keywords not to be checked $time = mktime(0, 0, 0, date('m'), date('d'), date('Y')); $sql = "select distinct(keyword_id) from keywordcrontracker kc, keywords k where k.id=kc.keyword_id and k.website_id={$websiteId} and time={$time}"; $keyList = $this->db->select($sql); $excludeKeyList = array(0); foreach ($keyList as $info) { $excludeKeyList[] = $info['keyword_id']; } // get keywords needs to be checked $sql = "select k.*,w.url from keywords k,websites w where k.website_id=w.id and w.id={$websiteId} and k.status=1"; $sql .= " and k.id not in(" . implode(",", $excludeKeyList) . ") order by k.name"; $keywordList = $reportController->db->select($sql); $this->debugMsg("Starting keyword position checker cron for website: {$this->websiteInfo['name']}....<br>\n"); // loop through each keyword foreach ($keywordList as $keywordInfo) { $reportController->seFound = 0; $crawlResult = $reportController->crawlKeyword($keywordInfo, '', true); foreach ($crawlResult as $sengineId => $matchList) { if ($matchList['status']) { foreach ($matchList['matched'] as $i => $matchInfo) { $remove = $i == 0 ? true : false; $matchInfo['se_id'] = $sengineId; $matchInfo['keyword_id'] = $keywordInfo['id']; $repCtrler = new ReportController(); $repCtrler->saveMatchedKeywordInfo($matchInfo, $remove); } $this->debugMsg("Successfully crawled keyword <b>{$keywordInfo['name']}</b> results from " . $reportController->seList[$sengineId]['domain'] . ".....<br>\n"); } else { $this->debugMsg("Crawling keyword </b>{$keywordInfo['name']}</b> results from " . $reportController->seList[$sengineId]['domain'] . " failed......<br>\n"); } } // to implement split cron execution feature if (SP_NUMBER_KEYWORDS_CRON > 0 && !empty($crawlResult)) { $this->checkedKeywords++; if ($this->checkedKeywords == SP_NUMBER_KEYWORDS_CRON) { die("Reached total number of allowed keywords(" . SP_NUMBER_KEYWORDS_CRON . ") in each cron job"); } } if (empty($reportController->seFound)) { $this->debugMsg("Keyword <b>{$keywordInfo['name']}</b> not assigned to required search engines........\n"); } sleep(SP_CRAWL_DELAY); } }
function editKeyword($keywordId, $listInfo = '') { $userId = isLoggedIn(); $websiteController = new WebsiteController(); $this->set('websiteList', $websiteController->__getAllWebsites($userId, true)); $langController = new LanguageController(); $this->set('langList', $langController->__getAllLanguages()); $this->set('langNull', true); $countryController = new CountryController(); $this->set('countryList', $countryController->__getAllCountries()); $this->set('countryNull', true); $seController = new SearchEngineController(); $this->set('seList', $seController->__getAllSearchEngines()); if (!empty($keywordId)) { if (empty($listInfo)) { $listInfo = $this->__getKeywordInfo($keywordId); $listInfo['oldName'] = $listInfo['name']; $listInfo['searchengines'] = explode(':', $listInfo['searchengines']); } $this->set('post', $listInfo); $this->render('keyword/edit'); exit; } $this->listKeywords(); }
function showOverallReportSummary($searchInfo = '', $cronUserId = false) { $spTextHome = $this->getLanguageTexts('home', $_SESSION['lang_code']); $this->set('spTextHome', $spTextHome); $this->set('cronUserId', $cronUserId); $exportVersion = false; switch ($searchInfo['doc_type']) { case "export": $exportVersion = true; $exportContent = ""; break; case "print": $this->set('printVersion', true); break; } $this->set('sectionHead', 'Overall Report Summary'); $userId = empty($cronUserId) ? isLoggedIn() : $cronUserId; $isAdmin = isAdmin(); $websiteCtrler = new WebsiteController(); $websiteList = $websiteCtrler->__getAllWebsites($userId, true); $this->set('siteList', $websiteList); $websiteId = isset($searchInfo['website_id']) ? $searchInfo['website_id'] : $websiteList[0]['id']; $websiteId = intval($websiteId); $this->set('websiteId', $websiteId); $urlarg = "website_id={$websiteId}"; $websiteUrl = ""; foreach ($websiteList as $websiteInfo) { if ($websiteInfo['id'] == $websiteId) { $websiteUrl = $websiteInfo['url']; break; } } $this->set('websiteUrl', $websiteUrl); $reportTypes = array('keyword-position' => $this->spTextTools["Keyword Position Summary"], 'website-stats' => $spTextHome["Website Statistics"]); $this->set('reportTypes', $reportTypes); $urlarg .= "&report_type=" . $searchInfo['report_type']; if (!empty($searchInfo['from_time'])) { $fromTime = strtotime($searchInfo['from_time'] . ' 00:00:00'); } else { $fromTime = mktime(0, 0, 0, date('m'), date('d') - 1, date('Y')); } if (!empty($searchInfo['to_time'])) { $toTime = strtotime($searchInfo['to_time'] . ' 00:00:00'); } else { $toTime = mktime(0, 0, 0, date('m'), date('d'), date('Y')); } $fromTimeShort = date('Y-m-d', $fromTime); $this->set('fromTime', $fromTimeShort); $toTimeShort = date('Y-m-d', $toTime); $this->set('toTime', $toTimeShort); $urlarg .= "&from_time={$fromTimeShort}&to_time={$toTimeShort}"; $seController = new SearchEngineController(); $this->seLIst = $seController->__getAllSearchEngines(); $this->set('seList', $this->seLIst); $this->set('isAdmin', $isAdmin); $this->set('urlarg', $urlarg); # keyword position report section if (empty($searchInfo['report_type']) || $searchInfo['report_type'] == 'keyword-position') { // to find order col if (!empty($searchInfo['order_col'])) { $orderCol = $searchInfo['order_col']; $orderVal = $searchInfo['order_val']; } else { $orderCol = $this->seLIst[0]['id']; $orderVal = 'ASC'; } $this->set('orderCol', $orderCol); $this->set('orderVal', $orderVal); $keywordController = new KeywordController(); $list = $keywordController->__getAllKeywords($userId, $websiteId, true, true, $orderVal); $indexList = array(); foreach ($list as $keywordInfo) { $positionInfo = $this->__getKeywordSearchReport($keywordInfo['id'], $fromTime, $toTime); // check whether the sorting search engine is there $indexList[$keywordInfo['id']] = empty($positionInfo[$orderCol]) ? 10000 : $positionInfo[$orderCol]['rank']; $keywordInfo['position_info'] = $positionInfo; $keywordList[$keywordInfo['id']] = $keywordInfo; } // sort array according the value if ($orderCol != 'keyword') { if ($orderVal == 'DESC') { arsort($indexList); } else { asort($indexList); } } $this->set('indexList', $indexList); if ($exportVersion) { $spText = $_SESSION['text']; $reportHeading = $this->spTextTools['Keyword Position Summary'] . "({$fromTimeShort} - {$toTimeShort})"; $exportContent .= createExportContent(array('', $reportHeading, '')); $exportContent .= createExportContent(array()); $headList = array($spText['common']['Website'], $spText['common']['Keyword']); foreach ($this->seLIst as $seInfo) { $headList[] = $seInfo['domain']; } $exportContent .= createExportContent($headList); foreach ($indexList as $keywordId => $rankValue) { $listInfo = $keywordList[$keywordId]; $positionInfo = $listInfo['position_info']; $valueList = array($listInfo['weburl'], $listInfo['name']); foreach ($this->seLIst as $index => $seInfo) { $rank = empty($positionInfo[$seInfo['id']]['rank']) ? '-' : $positionInfo[$seInfo['id']]['rank']; $rankDiff = empty($positionInfo[$seInfo['id']]['rank_diff']) ? '' : $positionInfo[$seInfo['id']]['rank_diff']; $valueList[] = $rank . strip_tags($rankDiff); } $exportContent .= createExportContent($valueList); } } else { $this->set('list', $keywordList); $this->set('keywordPos', true); } } # website report section if (empty($searchInfo['report_type']) || $searchInfo['report_type'] == 'website-stats') { include_once SP_CTRLPATH . "/saturationchecker.ctrl.php"; include_once SP_CTRLPATH . "/rank.ctrl.php"; include_once SP_CTRLPATH . "/backlink.ctrl.php"; include_once SP_CTRLPATH . "/directory.ctrl.php"; $rankCtrler = new RankController(); $backlinlCtrler = new BacklinkController(); $saturationCtrler = new SaturationCheckerController(); $dirCtrler = new DirectoryController(); $websiteRankList = array(); foreach ($websiteList as $listInfo) { // if only needs to show onewebsite selected if (!empty($websiteId) && $listInfo['id'] != $websiteId) { continue; } # rank reports $report = $rankCtrler->__getWebsiteRankReport($listInfo['id'], $fromTime, $toTime); $report = $report[0]; $listInfo['alexarank'] = empty($report['alexa_rank']) ? "-" : $report['alexa_rank'] . " " . $report['rank_diff_alexa']; $listInfo['googlerank'] = empty($report['google_pagerank']) ? "-" : $report['google_pagerank'] . " " . $report['rank_diff_google']; # back links reports $report = $backlinlCtrler->__getWebsitebacklinkReport($listInfo['id'], $fromTime, $toTime); $report = $report[0]; $listInfo['google']['backlinks'] = empty($report['google']) ? "-" : $report['google'] . " " . $report['rank_diff_google']; $listInfo['alexa']['backlinks'] = empty($report['alexa']) ? "-" : $report['alexa'] . " " . $report['rank_diff_alexa']; $listInfo['msn']['backlinks'] = empty($report['msn']) ? "-" : $report['msn'] . " " . $report['rank_diff_msn']; # rank reports $report = $saturationCtrler->__getWebsiteSaturationReport($listInfo['id'], $fromTime, $toTime); $report = $report[0]; $listInfo['google']['indexed'] = empty($report['google']) ? "-" : $report['google'] . " " . $report['rank_diff_google']; $listInfo['msn']['indexed'] = empty($report['msn']) ? "-" : $report['msn'] . " " . $report['rank_diff_msn']; $listInfo['dirsub']['total'] = $dirCtrler->__getTotalSubmitInfo($listInfo['id']); $listInfo['dirsub']['active'] = $dirCtrler->__getTotalSubmitInfo($listInfo['id'], true); $websiteRankList[] = $listInfo; } // if export function called if ($exportVersion) { $exportContent .= createExportContent(array()); $exportContent .= createExportContent(array()); $exportContent .= createExportContent(array('', $spTextHome['Website Statistics'] . "({$fromTimeShort} - {$toTimeShort})", '')); if (isAdmin() && !empty($webUserId)) { $exportContent .= createExportContent(array()); $exportContent .= createExportContent(array()); $userInfo = $userCtrler->__getUserInfo($webUserId); $exportContent .= createExportContent(array($_SESSION['text']['common']['User'], $userInfo['username'])); } $exportContent .= createExportContent(array()); $headList = array($_SESSION['text']['common']['Id'], $_SESSION['text']['common']['Website'], 'Google Pagerank', 'Alexa Rank', 'Google ' . $spTextHome['Backlinks'], 'alexa ' . $spTextHome['Backlinks'], 'Bing ' . $spTextHome['Backlinks'], 'Google ' . $spTextHome['Indexed'], 'Bing ' . $spTextHome['Indexed'], $_SESSION['text']['common']['Total'] . ' Submission', $_SESSION['text']['common']['Active'] . ' Submission'); $exportContent .= createExportContent($headList); foreach ($websiteRankList as $websiteInfo) { $valueList = array($websiteInfo['id'], $websiteInfo['url'], strip_tags($websiteInfo['googlerank']), strip_tags($websiteInfo['alexarank']), strip_tags($websiteInfo['google']['backlinks']), strip_tags($websiteInfo['alexa']['backlinks']), strip_tags($websiteInfo['msn']['backlinks']), strip_tags($websiteInfo['google']['indexed']), strip_tags($websiteInfo['msn']['indexed']), $websiteInfo['dirsub']['total'], $websiteInfo['dirsub']['active']); $exportContent .= createExportContent($valueList); } } else { $this->set('websiteRankList', $websiteRankList); $this->set('websiteStats', true); } } if ($exportVersion) { exportToCsv('archived_report', $exportContent); } else { $this->set('searchInfo', $searchInfo); // if execution through cron job then just return teh content to send through mail if (!empty($cronUserId)) { return $this->getViewContent('report/archive'); } else { $this->render('report/archive'); } } }
function __getBacklinks($engine) { if (SP_DEMO && !empty($_SERVER['REQUEST_METHOD'])) { return 0; } $backlinkCount = 0; switch ($engine) { #google case 'google': $url = $this->backUrlList[$engine] . urlencode($this->url); $v = $this->spider->getContent($url); $pageContent = empty($v['page']) ? '' : $v['page']; if (preg_match('/about ([0-9\\,]+) result/si', $pageContent, $r)) { } elseif (preg_match('/<div id=resultStats>([0-9\\,]+) result/si', $pageContent, $r)) { } elseif (preg_match('/([0-9\\,]+) result/si', $pageContent, $r)) { } elseif (preg_match('/about <b>([0-9\\,]+)<\\/b> linking/si', $pageContent, $r)) { } else { $crawlInfo['crawl_status'] = 0; $crawlInfo['log_message'] = SearchEngineController::isCaptchInSearchResults($pageContent) ? "<font class=error>Captcha found</font> in search result page" : "Regex not matched error occured while parsing search results!"; } $backlinkCount = !empty($r[1]) ? str_replace(',', '', $r[1]) : 0; break; #msn #msn case 'msn': $url = formatUrl($this->url, false); $url = $this->backUrlList[$engine] . urlencode(addHttpToUrl($url)); $v = $this->spider->getContent($url); $pageContent = empty($v['page']) ? '' : $v['page']; if (preg_match('/([0-9\\,]+) results/si', $pageContent, $r)) { } elseif (preg_match('/id="count".*?>.*?\\(([0-9\\,]+).*?\\)/si', $pageContent, $r)) { } elseif (preg_match('/id="count".*?>.*?([0-9\\,]+).*?/si', $pageContent, $r)) { } elseif (preg_match('/class="sb_count".*?>.*?([0-9\\,]+).*?<\\/span>/si', $pageContent, $r)) { } else { $crawlInfo['crawl_status'] = 0; $crawlInfo['log_message'] = SearchEngineController::isCaptchInSearchResults($pageContent) ? "<font class=error>Captcha found</font> in search result page" : "Regex not matched error occured while parsing search results!"; } $backlinkCount = !empty($r[1]) ? str_replace(',', '', $r[1]) : 0; break; # alexa # alexa case 'alexa': /*$url = 'http://data.alexa.com/data?cli=10&dat=snbamz&url=' . urlencode($this->url);*/ $url = $this->backUrlList[$engine] . urlencode($this->url); $v = $this->spider->getContent($url); $pageContent = empty($v['page']) ? '' : $v['page']; /*if (preg_match('/<LINKSIN NUM="(.*?)"/si', $pageContent, $r) ) { }*/ if (preg_match('/id="linksin-panel-content".*>([0-9,]+)<\\/span>/si', $pageContent, $r)) { } else { $crawlInfo['crawl_status'] = 0; $crawlInfo['log_message'] = SearchEngineController::isCaptchInSearchResults($pageContent) ? "<font class=error>Captcha found</font> in search result page" : "Regex not matched error occured while parsing search results!"; } $backlinkCount = !empty($r[1]) ? intval(str_replace(",", "", $r[1])) : 0; break; } // update crawl log $crawlLogCtrl = new CrawlLogController(); $crawlInfo['crawl_type'] = 'backlink'; $crawlInfo['ref_id'] = $this->url; $crawlInfo['subject'] = $engine; $crawlLogCtrl->updateCrawlLog($v['log_id'], $crawlInfo); return $backlinkCount; }
} // if from popup if ($fromPopUp) { $logLink = scriptAJAXLinkHref('log.php', 'content', "sec=crawl_log_details&id=" . $listInfo['id'], $listInfo['id']); } else { $logLink = scriptAJAXLinkHrefDialog('log.php', 'content', "sec=crawl_log_details&id=" . $listInfo['id'], $listInfo['id']); } // crawl log is for keyword if ($listInfo['crawl_type'] == 'keyword') { // if ref is is integer find keyword name if (!empty($listInfo['keyword'])) { $listInfo['ref_id'] = $listInfo['keyword']; } // find search engine info if (preg_match("/^\\d+\$/", $listInfo['subject'])) { $seCtrler = new SearchEngineController(); $seInfo = $seCtrler->__getsearchEngineInfo($listInfo['subject']); $listInfo['subject'] = $seInfo['domain']; } } ?> <tr class="<?php echo $class; ?> "> <td class="<?php echo $leftBotClass; ?> "><input type="checkbox" name="ids[]" value="<?php echo $listInfo['id']; ?>
function showQuickRankChecker($keywordInfo = '') { $keywordInfo['searchengines'] = $keywordInfo['se_id']; $this->showAll = $keywordInfo['show_all']; $seController = new SearchEngineController(); $this->seList = $seController->__getAllCrawlFormatedSearchEngines(); $crawlResult = $this->crawlKeyword($keywordInfo); $resultList = array(); if (!empty($crawlResult[$keywordInfo['se_id']]['status'])) { $resultList = $crawlResult[$keywordInfo['se_id']]['matched']; } $this->set('list', $resultList); $this->render('report/showquickrankchecker'); }
function keywordPositionCheckerCron($websiteId) { include_once SP_CTRLPATH . "/searchengine.ctrl.php"; include_once SP_CTRLPATH . "/report.ctrl.php"; $reportController = new ReportController(); $seController = new SearchEngineController(); $reportController->seList = $seController->__getAllCrawlFormatedSearchEngines(); $sql = "select k.*,w.url from keywords k,websites w where k.website_id=w.id and w.id={$websiteId} and k.status=1"; $sql .= " order by k.name"; $keywordList = $reportController->db->select($sql); $this->debugMsg("Starting keyword position checker cron for website: {$this->websiteInfo['name']}....<br>\n"); # loop through each keyword foreach ($keywordList as $keywordInfo) { $reportController->seFound = 0; $crawlResult = $reportController->crawlKeyword($keywordInfo); foreach ($crawlResult as $sengineId => $matchList) { if ($matchList['status']) { foreach ($matchList['matched'] as $i => $matchInfo) { $remove = $i == 0 ? true : false; $matchInfo['se_id'] = $sengineId; $matchInfo['keyword_id'] = $keywordInfo['id']; $reportController->saveMatchedKeywordInfo($matchInfo, $remove); } $this->debugMsg("Successfully crawled keyword <b>{$keywordInfo['name']}</b> results from " . $reportController->seList[$sengineId]['domain'] . ".....<br>\n"); } else { $this->debugMsg("Crawling keyword </b>{$keywordInfo['name']}</b> results from " . $reportController->seList[$sengineId]['domain'] . " failed......<br>\n"); } } if (empty($reportController->seFound)) { $this->debugMsg("Keyword <b>{$keywordInfo['name']}</b> not assigned to required search engines........\n"); } sleep(SP_CRAWL_DELAY); } }
/** * Function to display crawl log details * @param Array $info Contains all search details */ function listCrawlLog($info = '') { $userId = isLoggedIn(); $sql = "select t.*, k.name keyword from {$this->tablName} t left join keywords k on t.ref_id=k.id where 1=1"; $conditions = ""; if (isset($info['status'])) { if ($info['status'] == 'success' || $info['status'] == 'fail') { $statVal = $info['status'] == 'success' ? 1 : 0; $conditions .= " and crawl_status={$statVal}"; $urlParams .= "&status=" . $info['status']; } } else { $info['status'] = ''; } $this->set('statVal', $info['status']); if (empty($info['keyword'])) { $info['keyword'] = ''; } else { $info['keyword'] = urldecode($info['keyword']); $searchKeyword = addslashes($info['keyword']); $conditions .= " and (ref_id like '%{$searchKeyword}%' or subject like '%{$searchKeyword}%' or crawl_referer like '%{$searchKeyword}%'\r\n\t\t\tor log_message like '%{$searchKeyword}%' or k.name like '%{$searchKeyword}%' or crawl_link like '%{$searchKeyword}%'\r\n\t\t\tor crawl_cookie like '%{$searchKeyword}%' or crawl_post_fields like '%{$searchKeyword}%' or crawl_useragent like '%{$searchKeyword}%')"; $urlParams .= "&keyword=" . urlencode($info['keyword']); } $this->set('keyword', $info['keyword']); $crawlType = ""; if (!empty($info['crawl_type'])) { $crawlType = $info['crawl_type']; $conditions .= " and crawl_type='" . addslashes($crawlType) . "'"; $urlParams .= "&crawl_type=" . $crawlType; } // find different crawl types $crawlTypeSql = "select distinct crawl_type from {$this->tablName}"; $crawlTypeList = $this->db->select($crawlTypeSql); $this->set('crawlTypeList', $crawlTypeList); $this->set('crawlType', $crawlType); $proxyId = ""; if (!empty($info['proxy_id'])) { $proxyId = $info['proxy_id']; $conditions .= " and proxy_id='" . intval($proxyId) . "'"; $urlParams .= "&proxy_id=" . $proxyId; } // find different proxy used $proxySql = "select distinct proxy_id, proxy, port from {$this->tablName} t, proxylist pl \r\n\t\twhere pl.id=t.proxy_id and t.proxy_id!=0"; $proxyList = $this->db->select($proxySql); $this->set('proxyList', $proxyList); $this->set('proxyId', $proxyId); $seId = ""; $seController = new SearchEngineController(); $seList = $seController->__getAllSearchEngines(); $seNameList = array(); foreach ($seList as $seInfo) { $seNameList[] = $seInfo['domain']; } if (!empty($info['se_id'])) { $seId = intval($info['se_id']); $conditions .= " and (subject='{$seId}' or subject in ('" . implode(",", $seNameList) . "'))"; $urlParams .= "&se_id=" . $seId; } $this->set('seList', $seList); $this->set('seId', $seId); if (!empty($info['from_time'])) { $fromTime = strtotime($info['from_time'] . ' 00:00:00'); } else { $fromTime = mktime(0, 0, 0, date('m'), date('d') - 30, date('Y')); } if (!empty($info['to_time'])) { $toTime = strtotime($info['to_time'] . ' 00:00:00'); } else { $toTime = mktime(0, 0, 0, date('m'), date('d'), date('Y')); } $fromTimeLabel = date('Y-m-d', $fromTime); $toTimeLabel = date('Y-m-d', $toTime); $this->set('fromTime', $fromTimeLabel); $this->set('toTime', $toTimeLabel); $urlParams .= "&from_time={$fromTimeLabel}&to_time={$toTimeLabel}"; // sql created using param $sql .= " {$conditions} and crawl_time >='{$fromTimeLabel} 00:00:00' and crawl_time<='{$toTimeLabel} 23:59:59' order by crawl_time DESC"; // pagination setup $this->db->query($sql, true); $this->paging->setDivClass('pagingdiv'); $this->paging->loadPaging($this->db->noRows, SP_PAGINGNO); $pagingDiv = $this->paging->printPages('log.php', '', 'scriptDoLoad', 'content', $urlParams); $this->set('pagingDiv', $pagingDiv); $sql .= " limit " . $this->paging->start . "," . $this->paging->per_page; $logList = $this->db->select($sql); $this->set('pageNo', $info['pageno']); $this->set('list', $logList); $this->set('urlParams', $urlParams); $this->set('fromPopUp', $info['fromPopUp']); $this->render('log/crawlloglist'); }
function __getAlexaRank($url) { if (SP_DEMO && !empty($_SERVER['REQUEST_METHOD'])) { return 0; } $websiteUrl = $url; $url = 'http://data.alexa.com/data?cli=10&dat=snbamz&url=' . urlencode($url); $ret = $this->spider->getContent($url); $rank = 0; // parse rank from teh page if (!empty($ret['page'])) { if (preg_match('/\\<popularity url\\="(.*?)" TEXT\\="([0-9]+)"/si', $ret['page'], $matches)) { $rank = empty($matches[2]) ? 0 : $matches[2]; } else { $crawlInfo['crawl_status'] = 0; $crawlInfo['log_message'] = SearchEngineController::isCaptchInSearchResults($ret['page']) ? "<font class=error>Captcha found</font> in search result page" : "Regex not matched error occured while parsing search results!"; } } // update crawl log $crawlLogCtrl = new CrawlLogController(); $crawlInfo['crawl_type'] = 'rank'; $crawlInfo['ref_id'] = $websiteUrl; $crawlInfo['subject'] = "alexa"; $crawlLogCtrl->updateCrawlLog($ret['log_id'], $crawlInfo); return $rank; }
* (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ include_once "includes/sp-load.php"; checkAdminLoggedIn(); include_once SP_CTRLPATH . "/searchengine.ctrl.php"; $controller = new SearchEngineController(); $controller->view->menu = 'se-manager'; $controller->layout = 'ajax'; $controller->spTextPanel = $controller->getLanguageTexts('panel', $_SESSION['lang_code']); $controller->set('spTextPanel', $controller->spTextPanel); $controller->spTextUser = $controller->getLanguageTexts('searchengine', $_SESSION['lang_code']); $controller->set('spTextSE', $controller->spTextUser); if ($_SERVER['REQUEST_METHOD'] == 'POST') { switch ($_POST['sec']) { case "activateall": if (!empty($_POST['ids'])) { foreach ($_POST['ids'] as $id) { $controller->__changeStatus($id, 1); } } $controller->listSE($_POST);
function __getSaturationRank($engine) { if (SP_DEMO && !empty($_SERVER['REQUEST_METHOD'])) { return 0; } $saturationCount = 0; switch ($engine) { #google case 'google': $url = $this->saturationUrlList[$engine] . urlencode($this->url); $v = $this->spider->getContent($url); $pageContent = empty($v['page']) ? '' : $v['page']; if (preg_match('/about ([0-9\\,]+) result/si', $pageContent, $r)) { } elseif (preg_match('/<div id=resultStats>([0-9\\,]+) result/si', $pageContent, $r)) { } elseif (preg_match('/([0-9\\,]+) result/si', $pageContent, $r)) { } elseif (preg_match('/about <b>([0-9\\,]+)<\\/b> from/si', $pageContent, $r)) { } elseif (preg_match('/of <b>([0-9\\,]+)<\\/b>/si', $pageContent, $r)) { } else { $crawlInfo['crawl_status'] = 0; $crawlInfo['log_message'] = SearchEngineController::isCaptchInSearchResults($pageContent) ? "<font class=error>Captcha found</font> in search result page" : "Regex not matched error occured while parsing search results!"; } $saturationCount = !empty($r[1]) ? str_replace(',', '', $r[1]) : 0; break; #msn #msn case 'msn': $url = $this->saturationUrlList[$engine] . urlencode(addHttpToUrl($this->url)); $v = $this->spider->getContent($url); $pageContent = empty($v['page']) ? '' : $v['page']; if (preg_match('/([0-9\\,]+) results/si', $pageContent, $r)) { } elseif (preg_match('/id="count".*?>.*?\\(([0-9\\,]+).*?\\)/si', $pageContent, $r)) { } elseif (preg_match('/id="count".*?>.*?([0-9\\,]+).*?/si', $pageContent, $r)) { } elseif (preg_match('/class="sb_count".*?>.*?([0-9\\,]+).*?<\\/span>/si', $pageContent, $r)) { } else { $crawlInfo['crawl_status'] = 0; $crawlInfo['log_message'] = SearchEngineController::isCaptchInSearchResults($pageContent) ? "<font class=error>Captcha found</font> in search result page" : "Regex not matched error occured while parsing search results!"; } $saturationCount = !empty($r[1]) ? str_replace(',', '', $r[1]) : 0; break; } // update crawl log $crawlLogCtrl = new CrawlLogController(); $crawlInfo['crawl_type'] = 'saturation'; $crawlInfo['ref_id'] = $this->url; $crawlInfo['subject'] = $engine; $crawlLogCtrl->updateCrawlLog($v['log_id'], $crawlInfo); return $saturationCount; }