function HandlePhraseLinks() { header("Content-type: text/html; charset=windows-1251"); require_once "YandexSitesLinksGrabber.php"; $json = new Services_JSON(); if (isset($_REQUEST["delay"]) && $_REQUEST["delay"] == "true") { if (isset($_REQUEST["delayValue"])) { sleep((int) $_REQUEST["delayValue"]); } else { sleep(DELAY_BEFORE_REQUEST); } } $_REQUEST["url"] = trim($_REQUEST["url"]); $_REQUEST["phrase"] = trim($_REQUEST["phrase"]); $parser =& new YandexSitesLinksGrabber(); $parser->showInfo = false; $links = array(); if (isset($_REQUEST["filter"]) && $_REQUEST["filter"]) { $result = ""; $_REQUEST["phrase"] = iconv("UTF-8", "windows-1251", urldecode($_REQUEST["phrase"])); if (strlen($_REQUEST["phrase"]) == 0) { $text = $parser->findUrl($_REQUEST["url"], $_REQUEST["srcUrl"]); } else { $text = $parser->find($_REQUEST["url"], $_REQUEST["phrase"], $_REQUEST["useEqual"] === "true"); } if (strlen(trim($text)) > 0) { $text = iconv("windows-1251", "UTF-8", $text); $u = parse_url($_REQUEST["url"]); $result = array("srcUrl" => $_REQUEST["srcUrl"], "url" => $u["host"], "text" => $text, "num" => $_REQUEST["num"]); } } else { // указываем имя сервиса и ip-сервера откуда скрипт запускается $proxyService = new ProxiesService("yandex", SERVER_IP); $res = $proxyService->getProxyData(); if (PEAR::isError($res)) { print "Error: " . $res->getMessage(); exit; } $parser->setProxyData($res); $parser->proxyService = $proxyService; $res = $parser->parseForDomen($_REQUEST["url"], $_REQUEST["phrase"]); if (!PEAR::isError($res)) { $links = $res; } $result = array("url" => $_REQUEST["url"], "links" => $links); } print $json->encode($result); }
function HandleCheckTIC() { header("Content-type: text/html; charset=windows-1251"); require_once "YandexTICGrabber.php"; $json = new Services_JSON(); $result = array("rowNum" => (int) $_REQUEST["rowNum"], "url" => $_REQUEST["url"]); $proxyService = new ProxiesService("yandex", SERVER_IP); $res = $proxyService->getProxyData(); if (PEAR::isError($res)) { $result["error"] = $res->getMessage(); print $json->encode($result); exit; } if (isset($_REQUEST["delay"]) && $_REQUEST["delay"] == "true") { if (isset($_REQUEST["delayValue"])) { sleep((int) $_REQUEST["delayValue"]); } else { sleep(DELAY_BEFORE_REQUEST); } } $_REQUEST["url"] = trim($_REQUEST["url"]); $parser =& new YandexTICGrabber(); $parser->showInfo = false; $cy = $parser->parseForDomen(removeWWW($_REQUEST["url"])); $proxyService->increase(); $cyNoWww = $parser->parseForDomen("www." . removeWWW($_REQUEST["url"])); $proxyService->increase(); $cy = max($cy["num"], $cyNoWWW["num"]); if ($cy < 0) { $result["error"] = "Wrong CY"; print $json->encode($result); exit; } $result["num"] = $cy; print $json->encode($result); }
} StateUtils::setBusy(CALC_REALNUMS_STAT); register_shutdown_function("onShutdown"); // ƒл¤ YellowPagesContentGrabber запускать скрипт без параметров или 0 0 // ƒл¤ YellowPages1ContentGrabber запускать скрипт c параметрами 1 1 $parserId = StateUtils::getProcessNum() + 1; if ($parserId == 1) { $Parser =& new YellowPagesContentGrabber(); } else { $Parser =& new YellowPages1ContentGrabber(); } $Parser->showInfo = false; $Parser->waitBeforeRequest = 3; $rubrics =& new RubricsData(); $Parser->rubricsData =& $rubrics; $ProxyService = new ProxiesService(); $start = (int) FileUtils::loadContents(CALC_START_ID); $subRubrics = $rubrics->getSubRubricsForParser($parserId, $start, 1); if (PEAR::isError($subRubrics)) { print $subRubrics->getMessage(); exit; } if ($subRubrics->numRows() == 0) { // —брос счетчика последнего обработаной записи FileUtils::saveContents(CALC_START_ID, "0"); exit; } while ($row =& $subRubrics->fetchRow(DB_FETCHMODE_OBJECT)) { $res = $ProxyService->getRandomProxyData(); $Parser->setProxyData($res); $res = $Parser->getRealNum($row->rurl);
function getAllRubrics($prevRubrics = array(), $parserNum = 0, $usePrev = false) { require_once "ProxiesService.php"; $proxyService = new ProxiesService(); $resProxy = $proxyService->getRandomProxyData(); $this->setProxyData($resProxy); if ($usePrev) { if (sizeof($prevRubrics) == 0) { print "Use main\n"; list($res, $parserId) = $this->getMainSiteRubrics(); if (PEAR::isError($res)) { return array(); } } else { print "Use previous\n"; $res = $prevRubrics; $parserId = $parserNum; } } else { list($res, $parserId) = $this->getMainSiteRubrics(); if (PEAR::isError($res)) { return; } } foreach ($res as $rubric) { print $rubric["id"] . ", " . $rubric["url"] . "\n"; $resProxy = $proxyService->getRandomProxyData(); $this->setProxyData($resProxy); $r1 = $this->getSiteRubrics1($rubric["url"], $rubric["id"], $parserId); if (PEAR::isError($r1)) { continue; } foreach ($r1 as $rubric1) { print $rubric1["id"] . ", "; $r2 = $this->getSiteRubrics2($rubric1["url"], $rubric1["id"], $parserId); } if ($usePrev) { break; } print "\n"; } if ($usePrev) { array_shift($res); return $res; } }
function HandleCheckPriceLink() { header("Content-type: text/html; charset=windows-1251"); require_once "YandexTICGrabber.php"; require_once "CheckPageRankGrabber.php"; $json = new Services_JSON(); $result = array("url" => $_REQUEST["url"], "rowNum" => (int) $_REQUEST["rowNum"]); $proxyService = new ProxiesService("yandex", SERVER_IP); $res = $proxyService->getProxyData(); if (PEAR::isError($res)) { $result["error"] = $res->getMessage(); print $json->encode($result); exit; } if (isset($_REQUEST["delay"]) && $_REQUEST["delay"] == "true") { if (isset($_REQUEST["delayValue"])) { sleep((int) $_REQUEST["delayValue"]); } else { sleep(DELAY_BEFORE_REQUEST); } } $_REQUEST["url"] = trim($_REQUEST["url"]); $parserTic =& new YandexTICGrabber(); $parserTic->showInfo = false; $resultTic = $parserTic->parseForDomen(removeWWW($_REQUEST["url"])); $proxyService->increase(); $resultTicNoWww = $parserTic->parseForDomen("www." . removeWWW($_REQUEST["url"])); $resultTic = max($resultTic["num"], $resultTicNoWww["num"]); if ($resultTic < 0) { $result["error"] = "Wrong CY"; print $json->encode($result); exit; } $proxyService->increase(); $parserRank =& new CheckPageRankGrabber(); $parserRank->showInfo = false; $resultRank = $parserRank->parseForDomen($_REQUEST["url"]); $pageRank = max($resultRank["www"], $resultRank["nowww"]); if ($pageRank < 0) { $result["error"] = "Wrong Page Rank"; print $json->encode($result); exit; } if (strpos($_REQUEST["url"], "http://") === false || strpos($_REQUEST["url"], "http://") > 0) { $_REQUEST["url"] = "http://" . $_REQUEST["url"]; } $res = getLinksForUrlDOM($_REQUEST["url"]); if (PEAR::isError($res)) { $result["error"] = $res->getMessage(); print $json->encode($result); exit; } list($linksCount, $linksCountNoIndex, $linksCountIn, $linksCountOut, $linkCountOur) = $res; $lCount = $linksCountOut + $linkCountOur; switch ((int) $_REQUEST["linktype"]) { case 1: $result["num1"] = get_face_value($pageRank, $resultTic, -1); $result["num"] = get_face_value($pageRank, $resultTic, $linksCountOut + $linkCountOur); break; case 2: $result["num1"] = get_inner_value($pageRank, $resultTic, -1); $result["num"] = get_inner_value($pageRank, $resultTic, $linksCountOut + $linkCountOur); break; } $result["linksCountOut"] = $lCount; $result["pr"] = $pageRank; $result["cy"] = $resultTic; print $json->encode($result); }
<?php chdir("/***/ResearchGrabber"); require_once "prepend.php"; require_once "ProxiesService.php"; require_once "FileUtils.php"; require_once "StateUtils.php"; define("BUSY_FILENAME", "data/isBusy.dat"); $proxyService = new ProxiesService(); FileUtils::saveData("countersStat.log", $proxyService->getCountersState()); $proxyService->resetAll(); StateUtils::resetBusy(BUSY_FILENAME); include "ExportData.php";
function HandleHighlighting($useProxy = true) { $params = array(); if ($useProxy) { $proxyService = new ProxiesService(); $res = $proxyService->getRandomProxyData(); if (PEAR::isError($res)) { return $res; } $params["proxyData"] = $res; } $content = UrlUtils::loadPage($_REQUEST["url"], $params); print <<<HTML <html> <title>Подсветка {$_REQUEST["url"]}</title> <head> <style> em { color: red; font-size: 12pt; font-weight: bold; } pre { font-size: 9pt; } b { color: blue; } b b { color: red; } </style> </head> <body> <pre> HTML; print getHighlitedHtml($content); print <<<HTML </pre> </body> </html> HTML; }
// Проверка наличия новых ссылок на страницы if ($parser->hasUrls()) { $parser->items = array(); $parser->pageContent = ""; StateUtils::save(STATE_FILENAME, $parser); } } if (StateUtils::isBusy(BUSY_FILENAME)) { print "Is busy\n"; exit; } StateUtils::setBusy(BUSY_FILENAME); register_shutdown_function("onShutdown"); $baseUrl = "http://direct.yandex.ru/stat/wordsstat.pl"; // указываем имя сервиса и ip-сервера откуда скрипт запускается $proxyService = new ProxiesService("yandex", "194.186.45.242"); $res = $proxyService->getProxyData(); if (PEAR::isError($res)) { print $res->getMessage(); exit; } $queries = null; $parser = StateUtils::load(STATE_FILENAME); var_dump($parser); if ($parser == null) { // Получить запрос $queries = $db->getAll(CMD_SEL_QUERIES, array(), DB_FETCHMODE_ASSOC); if (PEAR::isError($queries)) { print $queries->getMessage(); exit; }
function HandleGetSearchResults() { set_time_limit(0); header("Content-type: text/html; charset=windows-1251"); // NOTE Без этого почемуто оне выводит нифига print " "; require_once "YandexResultsGrabber.php"; require_once "YandexResultsAddGrabber.php"; $json = new Services_JSON(); $result = array("rowNum" => (int) $_REQUEST["rowNum"], "linkType" => (int) $_REQUEST["linkType"]); $parser =& new YandexResultsGrabber(); $parser->showInfo = false; $parserAdd =& new YandexResultsAddGrabber(); $parserAdd->showInfo = false; if (isset($_REQUEST["withoutOur"]) && $_REQUEST["withoutOur"] == "true") { $parserAdd->withoutOur = true; } else { $parserAdd->withoutOur = false; } if (isset($_REQUEST["delay"]) && $_REQUEST["delay"] == "true") { $parser->waitBeforRequest = isset($_REQUEST["delayValue"]) ? (int) $_REQUEST["delayValue"] : DELAY_BEFORE_REQUEST; $parserAdd->waitBeforRequest = isset($_REQUEST["delayValue"]) ? (int) $_REQUEST["delayValue"] : DELAY_BEFORE_REQUEST; } // указываем имя сервиса и ip-сервера откуда скрипт запускается $proxyService = new ProxiesService("yandex", SERVER_IP); $res = $proxyService->getProxyData(); if (PEAR::isError($res)) { $result["error"] = $res->getMessage(); print $json->encode($result); exit; } $parser->setProxyData($res); $parserAdd->proxyService = $proxyService; $parserAdd->setProxyData($res); $_REQUEST["url"] = trim($_REQUEST["url"]); switch ((int) $_REQUEST["linkType"]) { // Все case 0: $res1 = $parser->parseForDomen($_REQUEST["url"]); $proxyService->increase(); $res2 = $parser->parseForDomen("www." . $_REQUEST["url"]); $proxyService->increase(); $result["res1"] = $res1; $result["res2"] = $res2; if (isset($_REQUEST["mainAndRest"]) && $_REQUEST["mainAndRest"] == "true" || isset($_REQUEST["withoutOur"]) && $_REQUEST["withoutOur"] == "true") { $resAdd1 = $parserAdd->parseForDomen($_REQUEST["url"]); $resAdd2 = $parserAdd->parseForDomen("www." . $_REQUEST["url"]); if (PEAR::isError($resAdd1) && PEAR::isError($resAdd2)) { $result["error"] = $resAdd1->getMessage(); print $json->encode($result); exit; } $result["resAdd1"] = $resAdd1; $result["resAdd2"] = $resAdd2; } break; // С www // С www case 1: $res = $parser->parseForDomen("www." . $_REQUEST["url"]); $proxyService->increase(); $result["res"] = $res; if (isset($_REQUEST["mainAndRest"]) && $_REQUEST["mainAndRest"] == "true" || isset($_REQUEST["withoutOur"]) && $_REQUEST["withoutOur"] == "true") { $resAdd = $parserAdd->parseForDomen("www." . $_REQUEST["url"]); if (PEAR::isError($resAdd)) { $result["error"] = $resAdd->getMessage(); print $json->encode($result); exit; } $result["resAdd"] = $resAdd; } break; // Без www // Без www case 2: $res = $parser->parseForDomen($_REQUEST["url"]); $proxyService->increase(); $result["res"] = $res; if (isset($_REQUEST["mainAndRest"]) && $_REQUEST["mainAndRest"] == "true" || isset($_REQUEST["withoutOur"]) && $_REQUEST["withoutOur"] == "true") { $resAdd = $parserAdd->parseForDomen($_REQUEST["url"]); if (PEAR::isError($resAdd)) { $result["error"] = $resAdd->getMessage(); print $json->encode($result); exit; } $result["resAdd"] = $resAdd; } break; } print $json->encode($result); }