function importLinks($listInfo) { $userId = isLoggedIn(); $listInfo['project_id'] = intval($listInfo['project_id']); $this->set('post', $listInfo); $errMsg['links'] = formatErrorMsg($this->validate->checkBlank($listInfo['links'])); if (!$this->validate->flagErr) { $totalLinks = $this->getCountcrawledLinks($listInfo['project_id']); $projectInfo = $this->__getProjectInfo($listInfo['project_id']); // if total links greater than max links of a project if ($totalLinks >= $projectInfo['max_links']) { $errMsg['links'] = formatErrorMsg($this->spTextSA['totallinksgreaterallowed'] . " - {$projectInfo['max_links']}"); } else { // check whether links are pages of website $linkInfo = $this->checkExcludeLinks($listInfo['links'], $projectInfo['url'], false); if (!empty($linkInfo['err_msg'])) { $errMsg['links'] = formatErrorMsg($linkInfo['err_msg']); } else { $auditorComp = $this->createComponent('AuditorComponent'); $links = explode(",", $listInfo['links']); $error = false; $linkList = array(); foreach ($links as $i => $link) { $link = Spider::formatUrl(trim($link)); if (empty($link)) { continue; } if ($auditorComp->isExcludeLink($link, $projectInfo['exclude_links'])) { continue; } // check whether url exists or not if ($auditorComp->getReportInfo(" and project_id={$projectInfo['id']} and page_url='" . addslashes($link) . "'")) { $errMsg['links'] = formatErrorMsg($this->spTextSA['Page Link'] . " '<b>{$link}</b>' " . $_SESSION['text']['label']['already exist']); $error = true; break; } else { $totalLinks++; // if total links greater than max links of a project if ($totalLinks > $projectInfo['max_links']) { $error = true; $errMsg['links'] = formatErrorMsg($this->spTextSA['totallinksgreaterallowed'] . " - {$projectInfo['max_links']}"); break; } } $linkList[$link] = 1; } // to save the page if no error occurs if (!$error) { foreach ($linkList as $link => $val) { $reportInfo['page_url'] = $link; $reportInfo['project_id'] = $projectInfo['id']; $auditorComp->saveReportInfo($reportInfo); } $this->showAuditorProjects(); exit; } } } } $this->set('errMsg', $errMsg); $this->showImportProjectLinks(); }
function runReport($reportUrl, $projectInfo, $totalLinks) { $spider = new Spider(); $pageInfo = $spider->getPageInfo($reportUrl, $projectInfo['url'], true); if ($rInfo = $this->getReportInfo(" and project_id={$projectInfo['id']} and page_url='{$reportUrl}'")) { $reportInfo['id'] = $rInfo['id']; $reportInfo['page_title'] = addslashes($pageInfo['page_title']); $reportInfo['page_description'] = addslashes($pageInfo['page_description']); $reportInfo['page_keywords'] = addslashes($pageInfo['page_keywords']); $reportInfo['total_links'] = $pageInfo['total_links']; $reportInfo['external_links'] = $pageInfo['external']; $reportInfo['crawled'] = 1; // gooogle pagerank check if ($projectInfo['check_pr']) { $rankCtrler = $this->createController('Rank'); $reportInfo['pagerank'] = $rankCtrler->__getGooglePageRank(Spider::addTrailingSlash($reportUrl)); } // backlinks page check if ($projectInfo['check_backlinks']) { $backlinkCtrler = $this->createController('Backlink'); $backlinkCtrler->url = Spider::addTrailingSlash($reportUrl); $reportInfo['bing_backlinks'] = $backlinkCtrler->__getBacklinks('msn'); $reportInfo['google_backlinks'] = $backlinkCtrler->__getBacklinks('google'); } // indexed page check if ($projectInfo['check_indexed']) { $saturationCtrler = $this->createController('SaturationChecker'); $saturationCtrler->url = Spider::addTrailingSlash($reportUrl); $reportInfo['bing_indexed'] = $saturationCtrler->__getSaturationRank('msn'); $reportInfo['google_indexed'] = $saturationCtrler->__getSaturationRank('google'); } if ($projectInfo['check_brocken']) { $reportInfo['brocken'] = Spider::isLInkBrocken($linkInfo['link_url']); } $this->saveReportInfo($reportInfo, 'update'); // to store sitelinks in page and links reports $i = 0; if (count($pageInfo['site_links']) > 0) { // loo through site links foreach ($pageInfo['site_links'] as $linkInfo) { // if store links if ($projectInfo['store_links_in_page']) { $delete = $i++ ? false : true; $linkInfo['report_id'] = $rInfo['id']; $this->storePagelLinks($linkInfo, $delete); } // if total links saved less than max links allowed for a project if ($totalLinks < $projectInfo['max_links']) { // check whether valid html serving link if (preg_match('/\\.zip$|\\.gz$|\\.tar$|\\.png$|\\.jpg$|\\.jpeg$|\\.gif$|\\.mp3$|\\.flv$|\\.pdf$|\\.m4a$|#$/i', $linkInfo['link_url'])) { continue; } // if found any space in the link $linkInfo['link_url'] = Spider::formatUrl($linkInfo['link_url']); if (!preg_match('/\\S+/', $linkInfo['link_url'])) { continue; } // check whether url needs to be excluded if ($this->isExcludeLink($linkInfo['link_url'], $projectInfo['exclude_links'])) { continue; } // save links for the project report if (!$this->getReportInfo(" and project_id={$projectInfo['id']} and page_url='{$linkInfo['link_url']}'")) { $repInfo['page_url'] = $linkInfo['link_url']; $repInfo['project_id'] = $projectInfo['id']; $this->saveReportInfo($repInfo); $totalLinks++; } } } } // to store external links in page if ($projectInfo['store_links_in_page']) { if (count($pageInfo['external_links']) > 0) { foreach ($pageInfo['external_links'] as $linkInfo) { $delete = $i++ ? false : true; $linkInfo['report_id'] = $rInfo['id']; $linkInfo['extrenal'] = 1; $this->storePagelLinks($linkInfo, $delete); } } } // calculate score of each page and update it $this->updateReportPageScore($rInfo['id']); // calculate score of each page and update it $this->updateProjectPageScore($projectInfo['id']); } }