Beispiel #1
0
 /**
  * The constructor of API
  */
 function KeywordAPI()
 {
     include_once SP_CTRLPATH . "/keyword.ctrl.php";
     include_once SP_CTRLPATH . "/report.ctrl.php";
     $this->ctrler = new KeywordController();
     $this->reportCtrler = new ReportController();
     $seController = new SearchEngineController();
     $list = $seController->__getAllSearchEngines();
     $this->seList = array();
     // loop through the search engine and assign id as key
     foreach ($list as $listInfo) {
         $this->seList[$listInfo['id']] = $listInfo;
     }
 }
Beispiel #2
0
 /**
  * The constructor of API
  */
 function WebsiteAPI()
 {
     $this->ctrler = new WebsiteController();
     $seController = new SearchEngineController();
     $list = $seController->__getAllSearchEngines();
     $this->seList = array();
     // loop through the search engine and assign id as key
     foreach ($list as $listInfo) {
         $this->seList[$listInfo['id']] = $listInfo;
     }
     include_once SP_CTRLPATH . "/saturationchecker.ctrl.php";
     include_once SP_CTRLPATH . "/rank.ctrl.php";
     include_once SP_CTRLPATH . "/backlink.ctrl.php";
     include_once SP_CTRLPATH . "/directory.ctrl.php";
     include_once SP_CTRLPATH . "/keyword.ctrl.php";
 }
Beispiel #3
0
 function keywordPositionCheckerCron($websiteId)
 {
     include_once SP_CTRLPATH . "/searchengine.ctrl.php";
     include_once SP_CTRLPATH . "/report.ctrl.php";
     $reportController = new ReportController();
     $seController = new SearchEngineController();
     $reportController->seList = $seController->__getAllCrawlFormatedSearchEngines();
     // get keywords not to be checked
     $time = mktime(0, 0, 0, date('m'), date('d'), date('Y'));
     $sql = "select distinct(keyword_id) from keywordcrontracker kc, keywords k where k.id=kc.keyword_id and k.website_id={$websiteId} and time={$time}";
     $keyList = $this->db->select($sql);
     $excludeKeyList = array(0);
     foreach ($keyList as $info) {
         $excludeKeyList[] = $info['keyword_id'];
     }
     // get keywords needs to be checked
     $sql = "select k.*,w.url from keywords k,websites w where k.website_id=w.id and w.id={$websiteId} and k.status=1";
     $sql .= " and k.id not in(" . implode(",", $excludeKeyList) . ") order by k.name";
     $keywordList = $reportController->db->select($sql);
     $this->debugMsg("Starting keyword position checker cron for website: {$this->websiteInfo['name']}....<br>\n");
     // loop through each keyword
     foreach ($keywordList as $keywordInfo) {
         $reportController->seFound = 0;
         $crawlResult = $reportController->crawlKeyword($keywordInfo, '', true);
         foreach ($crawlResult as $sengineId => $matchList) {
             if ($matchList['status']) {
                 foreach ($matchList['matched'] as $i => $matchInfo) {
                     $remove = $i == 0 ? true : false;
                     $matchInfo['se_id'] = $sengineId;
                     $matchInfo['keyword_id'] = $keywordInfo['id'];
                     $repCtrler = new ReportController();
                     $repCtrler->saveMatchedKeywordInfo($matchInfo, $remove);
                 }
                 $this->debugMsg("Successfully crawled keyword <b>{$keywordInfo['name']}</b> results from " . $reportController->seList[$sengineId]['domain'] . ".....<br>\n");
             } else {
                 $this->debugMsg("Crawling keyword </b>{$keywordInfo['name']}</b> results from " . $reportController->seList[$sengineId]['domain'] . " failed......<br>\n");
             }
         }
         // to implement split cron execution feature
         if (SP_NUMBER_KEYWORDS_CRON > 0 && !empty($crawlResult)) {
             $this->checkedKeywords++;
             if ($this->checkedKeywords == SP_NUMBER_KEYWORDS_CRON) {
                 die("Reached total number of allowed keywords(" . SP_NUMBER_KEYWORDS_CRON . ") in each cron job");
             }
         }
         if (empty($reportController->seFound)) {
             $this->debugMsg("Keyword <b>{$keywordInfo['name']}</b> not assigned to required search engines........\n");
         }
         sleep(SP_CRAWL_DELAY);
     }
 }
 function editKeyword($keywordId, $listInfo = '')
 {
     $userId = isLoggedIn();
     $websiteController = new WebsiteController();
     $this->set('websiteList', $websiteController->__getAllWebsites($userId, true));
     $langController = new LanguageController();
     $this->set('langList', $langController->__getAllLanguages());
     $this->set('langNull', true);
     $countryController = new CountryController();
     $this->set('countryList', $countryController->__getAllCountries());
     $this->set('countryNull', true);
     $seController = new SearchEngineController();
     $this->set('seList', $seController->__getAllSearchEngines());
     if (!empty($keywordId)) {
         if (empty($listInfo)) {
             $listInfo = $this->__getKeywordInfo($keywordId);
             $listInfo['oldName'] = $listInfo['name'];
             $listInfo['searchengines'] = explode(':', $listInfo['searchengines']);
         }
         $this->set('post', $listInfo);
         $this->render('keyword/edit');
         exit;
     }
     $this->listKeywords();
 }
Beispiel #5
0
 function showOverallReportSummary($searchInfo = '', $cronUserId = false)
 {
     $spTextHome = $this->getLanguageTexts('home', $_SESSION['lang_code']);
     $this->set('spTextHome', $spTextHome);
     $this->set('cronUserId', $cronUserId);
     $exportVersion = false;
     switch ($searchInfo['doc_type']) {
         case "export":
             $exportVersion = true;
             $exportContent = "";
             break;
         case "print":
             $this->set('printVersion', true);
             break;
     }
     $this->set('sectionHead', 'Overall Report Summary');
     $userId = empty($cronUserId) ? isLoggedIn() : $cronUserId;
     $isAdmin = isAdmin();
     $websiteCtrler = new WebsiteController();
     $websiteList = $websiteCtrler->__getAllWebsites($userId, true);
     $this->set('siteList', $websiteList);
     $websiteId = isset($searchInfo['website_id']) ? $searchInfo['website_id'] : $websiteList[0]['id'];
     $websiteId = intval($websiteId);
     $this->set('websiteId', $websiteId);
     $urlarg = "website_id={$websiteId}";
     $websiteUrl = "";
     foreach ($websiteList as $websiteInfo) {
         if ($websiteInfo['id'] == $websiteId) {
             $websiteUrl = $websiteInfo['url'];
             break;
         }
     }
     $this->set('websiteUrl', $websiteUrl);
     $reportTypes = array('keyword-position' => $this->spTextTools["Keyword Position Summary"], 'website-stats' => $spTextHome["Website Statistics"]);
     $this->set('reportTypes', $reportTypes);
     $urlarg .= "&report_type=" . $searchInfo['report_type'];
     if (!empty($searchInfo['from_time'])) {
         $fromTime = strtotime($searchInfo['from_time'] . ' 00:00:00');
     } else {
         $fromTime = mktime(0, 0, 0, date('m'), date('d') - 1, date('Y'));
     }
     if (!empty($searchInfo['to_time'])) {
         $toTime = strtotime($searchInfo['to_time'] . ' 00:00:00');
     } else {
         $toTime = mktime(0, 0, 0, date('m'), date('d'), date('Y'));
     }
     $fromTimeShort = date('Y-m-d', $fromTime);
     $this->set('fromTime', $fromTimeShort);
     $toTimeShort = date('Y-m-d', $toTime);
     $this->set('toTime', $toTimeShort);
     $urlarg .= "&from_time={$fromTimeShort}&to_time={$toTimeShort}";
     $seController = new SearchEngineController();
     $this->seLIst = $seController->__getAllSearchEngines();
     $this->set('seList', $this->seLIst);
     $this->set('isAdmin', $isAdmin);
     $this->set('urlarg', $urlarg);
     # keyword position report section
     if (empty($searchInfo['report_type']) || $searchInfo['report_type'] == 'keyword-position') {
         // to find order col
         if (!empty($searchInfo['order_col'])) {
             $orderCol = $searchInfo['order_col'];
             $orderVal = $searchInfo['order_val'];
         } else {
             $orderCol = $this->seLIst[0]['id'];
             $orderVal = 'ASC';
         }
         $this->set('orderCol', $orderCol);
         $this->set('orderVal', $orderVal);
         $keywordController = new KeywordController();
         $list = $keywordController->__getAllKeywords($userId, $websiteId, true, true, $orderVal);
         $indexList = array();
         foreach ($list as $keywordInfo) {
             $positionInfo = $this->__getKeywordSearchReport($keywordInfo['id'], $fromTime, $toTime);
             // check whether the sorting search engine is there
             $indexList[$keywordInfo['id']] = empty($positionInfo[$orderCol]) ? 10000 : $positionInfo[$orderCol]['rank'];
             $keywordInfo['position_info'] = $positionInfo;
             $keywordList[$keywordInfo['id']] = $keywordInfo;
         }
         // sort array according the value
         if ($orderCol != 'keyword') {
             if ($orderVal == 'DESC') {
                 arsort($indexList);
             } else {
                 asort($indexList);
             }
         }
         $this->set('indexList', $indexList);
         if ($exportVersion) {
             $spText = $_SESSION['text'];
             $reportHeading = $this->spTextTools['Keyword Position Summary'] . "({$fromTimeShort} - {$toTimeShort})";
             $exportContent .= createExportContent(array('', $reportHeading, ''));
             $exportContent .= createExportContent(array());
             $headList = array($spText['common']['Website'], $spText['common']['Keyword']);
             foreach ($this->seLIst as $seInfo) {
                 $headList[] = $seInfo['domain'];
             }
             $exportContent .= createExportContent($headList);
             foreach ($indexList as $keywordId => $rankValue) {
                 $listInfo = $keywordList[$keywordId];
                 $positionInfo = $listInfo['position_info'];
                 $valueList = array($listInfo['weburl'], $listInfo['name']);
                 foreach ($this->seLIst as $index => $seInfo) {
                     $rank = empty($positionInfo[$seInfo['id']]['rank']) ? '-' : $positionInfo[$seInfo['id']]['rank'];
                     $rankDiff = empty($positionInfo[$seInfo['id']]['rank_diff']) ? '' : $positionInfo[$seInfo['id']]['rank_diff'];
                     $valueList[] = $rank . strip_tags($rankDiff);
                 }
                 $exportContent .= createExportContent($valueList);
             }
         } else {
             $this->set('list', $keywordList);
             $this->set('keywordPos', true);
         }
     }
     # website report section
     if (empty($searchInfo['report_type']) || $searchInfo['report_type'] == 'website-stats') {
         include_once SP_CTRLPATH . "/saturationchecker.ctrl.php";
         include_once SP_CTRLPATH . "/rank.ctrl.php";
         include_once SP_CTRLPATH . "/backlink.ctrl.php";
         include_once SP_CTRLPATH . "/directory.ctrl.php";
         $rankCtrler = new RankController();
         $backlinlCtrler = new BacklinkController();
         $saturationCtrler = new SaturationCheckerController();
         $dirCtrler = new DirectoryController();
         $websiteRankList = array();
         foreach ($websiteList as $listInfo) {
             // if only needs to show onewebsite selected
             if (!empty($websiteId) && $listInfo['id'] != $websiteId) {
                 continue;
             }
             # rank reports
             $report = $rankCtrler->__getWebsiteRankReport($listInfo['id'], $fromTime, $toTime);
             $report = $report[0];
             $listInfo['alexarank'] = empty($report['alexa_rank']) ? "-" : $report['alexa_rank'] . " " . $report['rank_diff_alexa'];
             $listInfo['googlerank'] = empty($report['google_pagerank']) ? "-" : $report['google_pagerank'] . " " . $report['rank_diff_google'];
             # back links reports
             $report = $backlinlCtrler->__getWebsitebacklinkReport($listInfo['id'], $fromTime, $toTime);
             $report = $report[0];
             $listInfo['google']['backlinks'] = empty($report['google']) ? "-" : $report['google'] . " " . $report['rank_diff_google'];
             $listInfo['alexa']['backlinks'] = empty($report['alexa']) ? "-" : $report['alexa'] . " " . $report['rank_diff_alexa'];
             $listInfo['msn']['backlinks'] = empty($report['msn']) ? "-" : $report['msn'] . " " . $report['rank_diff_msn'];
             # rank reports
             $report = $saturationCtrler->__getWebsiteSaturationReport($listInfo['id'], $fromTime, $toTime);
             $report = $report[0];
             $listInfo['google']['indexed'] = empty($report['google']) ? "-" : $report['google'] . " " . $report['rank_diff_google'];
             $listInfo['msn']['indexed'] = empty($report['msn']) ? "-" : $report['msn'] . " " . $report['rank_diff_msn'];
             $listInfo['dirsub']['total'] = $dirCtrler->__getTotalSubmitInfo($listInfo['id']);
             $listInfo['dirsub']['active'] = $dirCtrler->__getTotalSubmitInfo($listInfo['id'], true);
             $websiteRankList[] = $listInfo;
         }
         // if export function called
         if ($exportVersion) {
             $exportContent .= createExportContent(array());
             $exportContent .= createExportContent(array());
             $exportContent .= createExportContent(array('', $spTextHome['Website Statistics'] . "({$fromTimeShort} - {$toTimeShort})", ''));
             if (isAdmin() && !empty($webUserId)) {
                 $exportContent .= createExportContent(array());
                 $exportContent .= createExportContent(array());
                 $userInfo = $userCtrler->__getUserInfo($webUserId);
                 $exportContent .= createExportContent(array($_SESSION['text']['common']['User'], $userInfo['username']));
             }
             $exportContent .= createExportContent(array());
             $headList = array($_SESSION['text']['common']['Id'], $_SESSION['text']['common']['Website'], 'Google Pagerank', 'Alexa Rank', 'Google ' . $spTextHome['Backlinks'], 'alexa ' . $spTextHome['Backlinks'], 'Bing ' . $spTextHome['Backlinks'], 'Google ' . $spTextHome['Indexed'], 'Bing ' . $spTextHome['Indexed'], $_SESSION['text']['common']['Total'] . ' Submission', $_SESSION['text']['common']['Active'] . ' Submission');
             $exportContent .= createExportContent($headList);
             foreach ($websiteRankList as $websiteInfo) {
                 $valueList = array($websiteInfo['id'], $websiteInfo['url'], strip_tags($websiteInfo['googlerank']), strip_tags($websiteInfo['alexarank']), strip_tags($websiteInfo['google']['backlinks']), strip_tags($websiteInfo['alexa']['backlinks']), strip_tags($websiteInfo['msn']['backlinks']), strip_tags($websiteInfo['google']['indexed']), strip_tags($websiteInfo['msn']['indexed']), $websiteInfo['dirsub']['total'], $websiteInfo['dirsub']['active']);
                 $exportContent .= createExportContent($valueList);
             }
         } else {
             $this->set('websiteRankList', $websiteRankList);
             $this->set('websiteStats', true);
         }
     }
     if ($exportVersion) {
         exportToCsv('archived_report', $exportContent);
     } else {
         $this->set('searchInfo', $searchInfo);
         // if execution through cron job then just return teh content to send through mail
         if (!empty($cronUserId)) {
             return $this->getViewContent('report/archive');
         } else {
             $this->render('report/archive');
         }
     }
 }
 function __getBacklinks($engine)
 {
     if (SP_DEMO && !empty($_SERVER['REQUEST_METHOD'])) {
         return 0;
     }
     $backlinkCount = 0;
     switch ($engine) {
         #google
         case 'google':
             $url = $this->backUrlList[$engine] . urlencode($this->url);
             $v = $this->spider->getContent($url);
             $pageContent = empty($v['page']) ? '' : $v['page'];
             if (preg_match('/about ([0-9\\,]+) result/si', $pageContent, $r)) {
             } elseif (preg_match('/<div id=resultStats>([0-9\\,]+) result/si', $pageContent, $r)) {
             } elseif (preg_match('/([0-9\\,]+) result/si', $pageContent, $r)) {
             } elseif (preg_match('/about <b>([0-9\\,]+)<\\/b> linking/si', $pageContent, $r)) {
             } else {
                 $crawlInfo['crawl_status'] = 0;
                 $crawlInfo['log_message'] = SearchEngineController::isCaptchInSearchResults($pageContent) ? "<font class=error>Captcha found</font> in search result page" : "Regex not matched error occured while parsing search results!";
             }
             $backlinkCount = !empty($r[1]) ? str_replace(',', '', $r[1]) : 0;
             break;
             #msn
         #msn
         case 'msn':
             $url = formatUrl($this->url, false);
             $url = $this->backUrlList[$engine] . urlencode(addHttpToUrl($url));
             $v = $this->spider->getContent($url);
             $pageContent = empty($v['page']) ? '' : $v['page'];
             if (preg_match('/([0-9\\,]+) results/si', $pageContent, $r)) {
             } elseif (preg_match('/id="count".*?>.*?\\(([0-9\\,]+).*?\\)/si', $pageContent, $r)) {
             } elseif (preg_match('/id="count".*?>.*?([0-9\\,]+).*?/si', $pageContent, $r)) {
             } elseif (preg_match('/class="sb_count".*?>.*?([0-9\\,]+).*?<\\/span>/si', $pageContent, $r)) {
             } else {
                 $crawlInfo['crawl_status'] = 0;
                 $crawlInfo['log_message'] = SearchEngineController::isCaptchInSearchResults($pageContent) ? "<font class=error>Captcha found</font> in search result page" : "Regex not matched error occured while parsing search results!";
             }
             $backlinkCount = !empty($r[1]) ? str_replace(',', '', $r[1]) : 0;
             break;
             # alexa
         # alexa
         case 'alexa':
             /*$url = 'http://data.alexa.com/data?cli=10&dat=snbamz&url=' . urlencode($this->url);*/
             $url = $this->backUrlList[$engine] . urlencode($this->url);
             $v = $this->spider->getContent($url);
             $pageContent = empty($v['page']) ? '' : $v['page'];
             /*if (preg_match('/<LINKSIN NUM="(.*?)"/si', $pageContent, $r) ) {
             		}*/
             if (preg_match('/id="linksin-panel-content".*>([0-9,]+)<\\/span>/si', $pageContent, $r)) {
             } else {
                 $crawlInfo['crawl_status'] = 0;
                 $crawlInfo['log_message'] = SearchEngineController::isCaptchInSearchResults($pageContent) ? "<font class=error>Captcha found</font> in search result page" : "Regex not matched error occured while parsing search results!";
             }
             $backlinkCount = !empty($r[1]) ? intval(str_replace(",", "", $r[1])) : 0;
             break;
     }
     // update crawl log
     $crawlLogCtrl = new CrawlLogController();
     $crawlInfo['crawl_type'] = 'backlink';
     $crawlInfo['ref_id'] = $this->url;
     $crawlInfo['subject'] = $engine;
     $crawlLogCtrl->updateCrawlLog($v['log_id'], $crawlInfo);
     return $backlinkCount;
 }
        }
        // if from popup
        if ($fromPopUp) {
            $logLink = scriptAJAXLinkHref('log.php', 'content', "sec=crawl_log_details&id=" . $listInfo['id'], $listInfo['id']);
        } else {
            $logLink = scriptAJAXLinkHrefDialog('log.php', 'content', "sec=crawl_log_details&id=" . $listInfo['id'], $listInfo['id']);
        }
        // crawl log is for keyword
        if ($listInfo['crawl_type'] == 'keyword') {
            // if ref is is integer find keyword name
            if (!empty($listInfo['keyword'])) {
                $listInfo['ref_id'] = $listInfo['keyword'];
            }
            // find search engine info
            if (preg_match("/^\\d+\$/", $listInfo['subject'])) {
                $seCtrler = new SearchEngineController();
                $seInfo = $seCtrler->__getsearchEngineInfo($listInfo['subject']);
                $listInfo['subject'] = $seInfo['domain'];
            }
        }
        ?>
			<tr class="<?php 
        echo $class;
        ?>
">
				<td class="<?php 
        echo $leftBotClass;
        ?>
"><input type="checkbox" name="ids[]" value="<?php 
        echo $listInfo['id'];
        ?>
Beispiel #8
0
 function showQuickRankChecker($keywordInfo = '')
 {
     $keywordInfo['searchengines'] = $keywordInfo['se_id'];
     $this->showAll = $keywordInfo['show_all'];
     $seController = new SearchEngineController();
     $this->seList = $seController->__getAllCrawlFormatedSearchEngines();
     $crawlResult = $this->crawlKeyword($keywordInfo);
     $resultList = array();
     if (!empty($crawlResult[$keywordInfo['se_id']]['status'])) {
         $resultList = $crawlResult[$keywordInfo['se_id']]['matched'];
     }
     $this->set('list', $resultList);
     $this->render('report/showquickrankchecker');
 }
Beispiel #9
0
 function keywordPositionCheckerCron($websiteId)
 {
     include_once SP_CTRLPATH . "/searchengine.ctrl.php";
     include_once SP_CTRLPATH . "/report.ctrl.php";
     $reportController = new ReportController();
     $seController = new SearchEngineController();
     $reportController->seList = $seController->__getAllCrawlFormatedSearchEngines();
     $sql = "select k.*,w.url from keywords k,websites w where k.website_id=w.id and w.id={$websiteId} and k.status=1";
     $sql .= " order by k.name";
     $keywordList = $reportController->db->select($sql);
     $this->debugMsg("Starting keyword position checker cron for website: {$this->websiteInfo['name']}....<br>\n");
     # loop through each keyword
     foreach ($keywordList as $keywordInfo) {
         $reportController->seFound = 0;
         $crawlResult = $reportController->crawlKeyword($keywordInfo);
         foreach ($crawlResult as $sengineId => $matchList) {
             if ($matchList['status']) {
                 foreach ($matchList['matched'] as $i => $matchInfo) {
                     $remove = $i == 0 ? true : false;
                     $matchInfo['se_id'] = $sengineId;
                     $matchInfo['keyword_id'] = $keywordInfo['id'];
                     $reportController->saveMatchedKeywordInfo($matchInfo, $remove);
                 }
                 $this->debugMsg("Successfully crawled keyword <b>{$keywordInfo['name']}</b> results from " . $reportController->seList[$sengineId]['domain'] . ".....<br>\n");
             } else {
                 $this->debugMsg("Crawling keyword </b>{$keywordInfo['name']}</b> results from " . $reportController->seList[$sengineId]['domain'] . " failed......<br>\n");
             }
         }
         if (empty($reportController->seFound)) {
             $this->debugMsg("Keyword <b>{$keywordInfo['name']}</b> not assigned to required search engines........\n");
         }
         sleep(SP_CRAWL_DELAY);
     }
 }
 /**
  * Function to display crawl log details 
  * @param Array $info	Contains all search details
  */
 function listCrawlLog($info = '')
 {
     $userId = isLoggedIn();
     $sql = "select t.*, k.name keyword from {$this->tablName} t left join keywords k on t.ref_id=k.id where 1=1";
     $conditions = "";
     if (isset($info['status'])) {
         if ($info['status'] == 'success' || $info['status'] == 'fail') {
             $statVal = $info['status'] == 'success' ? 1 : 0;
             $conditions .= " and crawl_status={$statVal}";
             $urlParams .= "&status=" . $info['status'];
         }
     } else {
         $info['status'] = '';
     }
     $this->set('statVal', $info['status']);
     if (empty($info['keyword'])) {
         $info['keyword'] = '';
     } else {
         $info['keyword'] = urldecode($info['keyword']);
         $searchKeyword = addslashes($info['keyword']);
         $conditions .= " and (ref_id like '%{$searchKeyword}%' or subject like '%{$searchKeyword}%' or crawl_referer like '%{$searchKeyword}%'\r\n\t\t\tor log_message like '%{$searchKeyword}%' or k.name like '%{$searchKeyword}%' or crawl_link like '%{$searchKeyword}%'\r\n\t\t\tor crawl_cookie like '%{$searchKeyword}%' or crawl_post_fields like '%{$searchKeyword}%' or crawl_useragent like '%{$searchKeyword}%')";
         $urlParams .= "&keyword=" . urlencode($info['keyword']);
     }
     $this->set('keyword', $info['keyword']);
     $crawlType = "";
     if (!empty($info['crawl_type'])) {
         $crawlType = $info['crawl_type'];
         $conditions .= " and crawl_type='" . addslashes($crawlType) . "'";
         $urlParams .= "&crawl_type=" . $crawlType;
     }
     // find different crawl types
     $crawlTypeSql = "select distinct crawl_type from {$this->tablName}";
     $crawlTypeList = $this->db->select($crawlTypeSql);
     $this->set('crawlTypeList', $crawlTypeList);
     $this->set('crawlType', $crawlType);
     $proxyId = "";
     if (!empty($info['proxy_id'])) {
         $proxyId = $info['proxy_id'];
         $conditions .= " and proxy_id='" . intval($proxyId) . "'";
         $urlParams .= "&proxy_id=" . $proxyId;
     }
     // find different proxy used
     $proxySql = "select distinct proxy_id, proxy, port from {$this->tablName} t, proxylist pl \r\n\t\twhere pl.id=t.proxy_id and t.proxy_id!=0";
     $proxyList = $this->db->select($proxySql);
     $this->set('proxyList', $proxyList);
     $this->set('proxyId', $proxyId);
     $seId = "";
     $seController = new SearchEngineController();
     $seList = $seController->__getAllSearchEngines();
     $seNameList = array();
     foreach ($seList as $seInfo) {
         $seNameList[] = $seInfo['domain'];
     }
     if (!empty($info['se_id'])) {
         $seId = intval($info['se_id']);
         $conditions .= " and (subject='{$seId}' or subject in ('" . implode(",", $seNameList) . "'))";
         $urlParams .= "&se_id=" . $seId;
     }
     $this->set('seList', $seList);
     $this->set('seId', $seId);
     if (!empty($info['from_time'])) {
         $fromTime = strtotime($info['from_time'] . ' 00:00:00');
     } else {
         $fromTime = mktime(0, 0, 0, date('m'), date('d') - 30, date('Y'));
     }
     if (!empty($info['to_time'])) {
         $toTime = strtotime($info['to_time'] . ' 00:00:00');
     } else {
         $toTime = mktime(0, 0, 0, date('m'), date('d'), date('Y'));
     }
     $fromTimeLabel = date('Y-m-d', $fromTime);
     $toTimeLabel = date('Y-m-d', $toTime);
     $this->set('fromTime', $fromTimeLabel);
     $this->set('toTime', $toTimeLabel);
     $urlParams .= "&from_time={$fromTimeLabel}&to_time={$toTimeLabel}";
     // sql created using param
     $sql .= " {$conditions} and crawl_time >='{$fromTimeLabel} 00:00:00' and crawl_time<='{$toTimeLabel} 23:59:59' order by crawl_time DESC";
     // pagination setup
     $this->db->query($sql, true);
     $this->paging->setDivClass('pagingdiv');
     $this->paging->loadPaging($this->db->noRows, SP_PAGINGNO);
     $pagingDiv = $this->paging->printPages('log.php', '', 'scriptDoLoad', 'content', $urlParams);
     $this->set('pagingDiv', $pagingDiv);
     $sql .= " limit " . $this->paging->start . "," . $this->paging->per_page;
     $logList = $this->db->select($sql);
     $this->set('pageNo', $info['pageno']);
     $this->set('list', $logList);
     $this->set('urlParams', $urlParams);
     $this->set('fromPopUp', $info['fromPopUp']);
     $this->render('log/crawlloglist');
 }
Beispiel #11
0
 function __getAlexaRank($url)
 {
     if (SP_DEMO && !empty($_SERVER['REQUEST_METHOD'])) {
         return 0;
     }
     $websiteUrl = $url;
     $url = 'http://data.alexa.com/data?cli=10&dat=snbamz&url=' . urlencode($url);
     $ret = $this->spider->getContent($url);
     $rank = 0;
     // parse rank from teh page
     if (!empty($ret['page'])) {
         if (preg_match('/\\<popularity url\\="(.*?)" TEXT\\="([0-9]+)"/si', $ret['page'], $matches)) {
             $rank = empty($matches[2]) ? 0 : $matches[2];
         } else {
             $crawlInfo['crawl_status'] = 0;
             $crawlInfo['log_message'] = SearchEngineController::isCaptchInSearchResults($ret['page']) ? "<font class=error>Captcha found</font> in search result page" : "Regex not matched error occured while parsing search results!";
         }
     }
     // update crawl log
     $crawlLogCtrl = new CrawlLogController();
     $crawlInfo['crawl_type'] = 'rank';
     $crawlInfo['ref_id'] = $websiteUrl;
     $crawlInfo['subject'] = "alexa";
     $crawlLogCtrl->updateCrawlLog($ret['log_id'], $crawlInfo);
     return $rank;
 }
Beispiel #12
0
 *   (at your option) any later version.                                   *
 *                                                                         *
 *   This program is distributed in the hope that it will be useful,       *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write to the                         *
 *   Free Software Foundation, Inc.,                                       *
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
 ***************************************************************************/
include_once "includes/sp-load.php";
checkAdminLoggedIn();
include_once SP_CTRLPATH . "/searchengine.ctrl.php";
$controller = new SearchEngineController();
$controller->view->menu = 'se-manager';
$controller->layout = 'ajax';
$controller->spTextPanel = $controller->getLanguageTexts('panel', $_SESSION['lang_code']);
$controller->set('spTextPanel', $controller->spTextPanel);
$controller->spTextUser = $controller->getLanguageTexts('searchengine', $_SESSION['lang_code']);
$controller->set('spTextSE', $controller->spTextUser);
if ($_SERVER['REQUEST_METHOD'] == 'POST') {
    switch ($_POST['sec']) {
        case "activateall":
            if (!empty($_POST['ids'])) {
                foreach ($_POST['ids'] as $id) {
                    $controller->__changeStatus($id, 1);
                }
            }
            $controller->listSE($_POST);
 function __getSaturationRank($engine)
 {
     if (SP_DEMO && !empty($_SERVER['REQUEST_METHOD'])) {
         return 0;
     }
     $saturationCount = 0;
     switch ($engine) {
         #google
         case 'google':
             $url = $this->saturationUrlList[$engine] . urlencode($this->url);
             $v = $this->spider->getContent($url);
             $pageContent = empty($v['page']) ? '' : $v['page'];
             if (preg_match('/about ([0-9\\,]+) result/si', $pageContent, $r)) {
             } elseif (preg_match('/<div id=resultStats>([0-9\\,]+) result/si', $pageContent, $r)) {
             } elseif (preg_match('/([0-9\\,]+) result/si', $pageContent, $r)) {
             } elseif (preg_match('/about <b>([0-9\\,]+)<\\/b> from/si', $pageContent, $r)) {
             } elseif (preg_match('/of <b>([0-9\\,]+)<\\/b>/si', $pageContent, $r)) {
             } else {
                 $crawlInfo['crawl_status'] = 0;
                 $crawlInfo['log_message'] = SearchEngineController::isCaptchInSearchResults($pageContent) ? "<font class=error>Captcha found</font> in search result page" : "Regex not matched error occured while parsing search results!";
             }
             $saturationCount = !empty($r[1]) ? str_replace(',', '', $r[1]) : 0;
             break;
             #msn
         #msn
         case 'msn':
             $url = $this->saturationUrlList[$engine] . urlencode(addHttpToUrl($this->url));
             $v = $this->spider->getContent($url);
             $pageContent = empty($v['page']) ? '' : $v['page'];
             if (preg_match('/([0-9\\,]+) results/si', $pageContent, $r)) {
             } elseif (preg_match('/id="count".*?>.*?\\(([0-9\\,]+).*?\\)/si', $pageContent, $r)) {
             } elseif (preg_match('/id="count".*?>.*?([0-9\\,]+).*?/si', $pageContent, $r)) {
             } elseif (preg_match('/class="sb_count".*?>.*?([0-9\\,]+).*?<\\/span>/si', $pageContent, $r)) {
             } else {
                 $crawlInfo['crawl_status'] = 0;
                 $crawlInfo['log_message'] = SearchEngineController::isCaptchInSearchResults($pageContent) ? "<font class=error>Captcha found</font> in search result page" : "Regex not matched error occured while parsing search results!";
             }
             $saturationCount = !empty($r[1]) ? str_replace(',', '', $r[1]) : 0;
             break;
     }
     // update crawl log
     $crawlLogCtrl = new CrawlLogController();
     $crawlInfo['crawl_type'] = 'saturation';
     $crawlInfo['ref_id'] = $this->url;
     $crawlInfo['subject'] = $engine;
     $crawlLogCtrl->updateCrawlLog($v['log_id'], $crawlInfo);
     return $saturationCount;
 }