function HandlePhraseLinks()
{
    header("Content-type: text/html; charset=windows-1251");
    require_once "YandexSitesLinksGrabber.php";
    $json = new Services_JSON();
    if (isset($_REQUEST["delay"]) && $_REQUEST["delay"] == "true") {
        if (isset($_REQUEST["delayValue"])) {
            sleep((int) $_REQUEST["delayValue"]);
        } else {
            sleep(DELAY_BEFORE_REQUEST);
        }
    }
    $_REQUEST["url"] = trim($_REQUEST["url"]);
    $_REQUEST["phrase"] = trim($_REQUEST["phrase"]);
    $parser =& new YandexSitesLinksGrabber();
    $parser->showInfo = false;
    $links = array();
    if (isset($_REQUEST["filter"]) && $_REQUEST["filter"]) {
        $result = "";
        $_REQUEST["phrase"] = iconv("UTF-8", "windows-1251", urldecode($_REQUEST["phrase"]));
        if (strlen($_REQUEST["phrase"]) == 0) {
            $text = $parser->findUrl($_REQUEST["url"], $_REQUEST["srcUrl"]);
        } else {
            $text = $parser->find($_REQUEST["url"], $_REQUEST["phrase"], $_REQUEST["useEqual"] === "true");
        }
        if (strlen(trim($text)) > 0) {
            $text = iconv("windows-1251", "UTF-8", $text);
            $u = parse_url($_REQUEST["url"]);
            $result = array("srcUrl" => $_REQUEST["srcUrl"], "url" => $u["host"], "text" => $text, "num" => $_REQUEST["num"]);
        }
    } else {
        // указываем имя сервиса и ip-сервера откуда скрипт запускается
        $proxyService = new ProxiesService("yandex", SERVER_IP);
        $res = $proxyService->getProxyData();
        if (PEAR::isError($res)) {
            print "Error: " . $res->getMessage();
            exit;
        }
        $parser->setProxyData($res);
        $parser->proxyService = $proxyService;
        $res = $parser->parseForDomen($_REQUEST["url"], $_REQUEST["phrase"]);
        if (!PEAR::isError($res)) {
            $links = $res;
        }
        $result = array("url" => $_REQUEST["url"], "links" => $links);
    }
    print $json->encode($result);
}
Ejemplo n.º 2
0
function HandleCheckTIC()
{
    header("Content-type: text/html; charset=windows-1251");
    require_once "YandexTICGrabber.php";
    $json = new Services_JSON();
    $result = array("rowNum" => (int) $_REQUEST["rowNum"], "url" => $_REQUEST["url"]);
    $proxyService = new ProxiesService("yandex", SERVER_IP);
    $res = $proxyService->getProxyData();
    if (PEAR::isError($res)) {
        $result["error"] = $res->getMessage();
        print $json->encode($result);
        exit;
    }
    if (isset($_REQUEST["delay"]) && $_REQUEST["delay"] == "true") {
        if (isset($_REQUEST["delayValue"])) {
            sleep((int) $_REQUEST["delayValue"]);
        } else {
            sleep(DELAY_BEFORE_REQUEST);
        }
    }
    $_REQUEST["url"] = trim($_REQUEST["url"]);
    $parser =& new YandexTICGrabber();
    $parser->showInfo = false;
    $cy = $parser->parseForDomen(removeWWW($_REQUEST["url"]));
    $proxyService->increase();
    $cyNoWww = $parser->parseForDomen("www." . removeWWW($_REQUEST["url"]));
    $proxyService->increase();
    $cy = max($cy["num"], $cyNoWWW["num"]);
    if ($cy < 0) {
        $result["error"] = "Wrong CY";
        print $json->encode($result);
        exit;
    }
    $result["num"] = $cy;
    print $json->encode($result);
}
}
StateUtils::setBusy(CALC_REALNUMS_STAT);
register_shutdown_function("onShutdown");
// ƒл¤ YellowPagesContentGrabber запускать скрипт без параметров или 0 0
// ƒл¤ YellowPages1ContentGrabber запускать скрипт c параметрами 1 1
$parserId = StateUtils::getProcessNum() + 1;
if ($parserId == 1) {
    $Parser =& new YellowPagesContentGrabber();
} else {
    $Parser =& new YellowPages1ContentGrabber();
}
$Parser->showInfo = false;
$Parser->waitBeforeRequest = 3;
$rubrics =& new RubricsData();
$Parser->rubricsData =& $rubrics;
$ProxyService = new ProxiesService();
$start = (int) FileUtils::loadContents(CALC_START_ID);
$subRubrics = $rubrics->getSubRubricsForParser($parserId, $start, 1);
if (PEAR::isError($subRubrics)) {
    print $subRubrics->getMessage();
    exit;
}
if ($subRubrics->numRows() == 0) {
    // —брос счетчика последнего обработаной записи
    FileUtils::saveContents(CALC_START_ID, "0");
    exit;
}
while ($row =& $subRubrics->fetchRow(DB_FETCHMODE_OBJECT)) {
    $res = $ProxyService->getRandomProxyData();
    $Parser->setProxyData($res);
    $res = $Parser->getRealNum($row->rurl);
 function getAllRubrics($prevRubrics = array(), $parserNum = 0, $usePrev = false)
 {
     require_once "ProxiesService.php";
     $proxyService = new ProxiesService();
     $resProxy = $proxyService->getRandomProxyData();
     $this->setProxyData($resProxy);
     if ($usePrev) {
         if (sizeof($prevRubrics) == 0) {
             print "Use main\n";
             list($res, $parserId) = $this->getMainSiteRubrics();
             if (PEAR::isError($res)) {
                 return array();
             }
         } else {
             print "Use previous\n";
             $res = $prevRubrics;
             $parserId = $parserNum;
         }
     } else {
         list($res, $parserId) = $this->getMainSiteRubrics();
         if (PEAR::isError($res)) {
             return;
         }
     }
     foreach ($res as $rubric) {
         print $rubric["id"] . ", " . $rubric["url"] . "\n";
         $resProxy = $proxyService->getRandomProxyData();
         $this->setProxyData($resProxy);
         $r1 = $this->getSiteRubrics1($rubric["url"], $rubric["id"], $parserId);
         if (PEAR::isError($r1)) {
             continue;
         }
         foreach ($r1 as $rubric1) {
             print $rubric1["id"] . ", ";
             $r2 = $this->getSiteRubrics2($rubric1["url"], $rubric1["id"], $parserId);
         }
         if ($usePrev) {
             break;
         }
         print "\n";
     }
     if ($usePrev) {
         array_shift($res);
         return $res;
     }
 }
function HandleCheckPriceLink()
{
    header("Content-type: text/html; charset=windows-1251");
    require_once "YandexTICGrabber.php";
    require_once "CheckPageRankGrabber.php";
    $json = new Services_JSON();
    $result = array("url" => $_REQUEST["url"], "rowNum" => (int) $_REQUEST["rowNum"]);
    $proxyService = new ProxiesService("yandex", SERVER_IP);
    $res = $proxyService->getProxyData();
    if (PEAR::isError($res)) {
        $result["error"] = $res->getMessage();
        print $json->encode($result);
        exit;
    }
    if (isset($_REQUEST["delay"]) && $_REQUEST["delay"] == "true") {
        if (isset($_REQUEST["delayValue"])) {
            sleep((int) $_REQUEST["delayValue"]);
        } else {
            sleep(DELAY_BEFORE_REQUEST);
        }
    }
    $_REQUEST["url"] = trim($_REQUEST["url"]);
    $parserTic =& new YandexTICGrabber();
    $parserTic->showInfo = false;
    $resultTic = $parserTic->parseForDomen(removeWWW($_REQUEST["url"]));
    $proxyService->increase();
    $resultTicNoWww = $parserTic->parseForDomen("www." . removeWWW($_REQUEST["url"]));
    $resultTic = max($resultTic["num"], $resultTicNoWww["num"]);
    if ($resultTic < 0) {
        $result["error"] = "Wrong CY";
        print $json->encode($result);
        exit;
    }
    $proxyService->increase();
    $parserRank =& new CheckPageRankGrabber();
    $parserRank->showInfo = false;
    $resultRank = $parserRank->parseForDomen($_REQUEST["url"]);
    $pageRank = max($resultRank["www"], $resultRank["nowww"]);
    if ($pageRank < 0) {
        $result["error"] = "Wrong Page Rank";
        print $json->encode($result);
        exit;
    }
    if (strpos($_REQUEST["url"], "http://") === false || strpos($_REQUEST["url"], "http://") > 0) {
        $_REQUEST["url"] = "http://" . $_REQUEST["url"];
    }
    $res = getLinksForUrlDOM($_REQUEST["url"]);
    if (PEAR::isError($res)) {
        $result["error"] = $res->getMessage();
        print $json->encode($result);
        exit;
    }
    list($linksCount, $linksCountNoIndex, $linksCountIn, $linksCountOut, $linkCountOur) = $res;
    $lCount = $linksCountOut + $linkCountOur;
    switch ((int) $_REQUEST["linktype"]) {
        case 1:
            $result["num1"] = get_face_value($pageRank, $resultTic, -1);
            $result["num"] = get_face_value($pageRank, $resultTic, $linksCountOut + $linkCountOur);
            break;
        case 2:
            $result["num1"] = get_inner_value($pageRank, $resultTic, -1);
            $result["num"] = get_inner_value($pageRank, $resultTic, $linksCountOut + $linkCountOur);
            break;
    }
    $result["linksCountOut"] = $lCount;
    $result["pr"] = $pageRank;
    $result["cy"] = $resultTic;
    print $json->encode($result);
}
<?php

chdir("/***/ResearchGrabber");
require_once "prepend.php";
require_once "ProxiesService.php";
require_once "FileUtils.php";
require_once "StateUtils.php";
define("BUSY_FILENAME", "data/isBusy.dat");
$proxyService = new ProxiesService();
FileUtils::saveData("countersStat.log", $proxyService->getCountersState());
$proxyService->resetAll();
StateUtils::resetBusy(BUSY_FILENAME);
include "ExportData.php";
function HandleHighlighting($useProxy = true)
{
    $params = array();
    if ($useProxy) {
        $proxyService = new ProxiesService();
        $res = $proxyService->getRandomProxyData();
        if (PEAR::isError($res)) {
            return $res;
        }
        $params["proxyData"] = $res;
    }
    $content = UrlUtils::loadPage($_REQUEST["url"], $params);
    print <<<HTML
<html>
<title>Подсветка {$_REQUEST["url"]}</title>
<head>
<style>
em { color: red; font-size: 12pt; font-weight: bold; }
pre { font-size: 9pt; }
b { color: blue; }
b b { color: red; }
</style>
</head>
<body>
<pre>
HTML;
    print getHighlitedHtml($content);
    print <<<HTML
</pre>
</body>
</html>
HTML;
}
    // Проверка наличия новых ссылок на страницы
    if ($parser->hasUrls()) {
        $parser->items = array();
        $parser->pageContent = "";
        StateUtils::save(STATE_FILENAME, $parser);
    }
}
if (StateUtils::isBusy(BUSY_FILENAME)) {
    print "Is busy\n";
    exit;
}
StateUtils::setBusy(BUSY_FILENAME);
register_shutdown_function("onShutdown");
$baseUrl = "http://direct.yandex.ru/stat/wordsstat.pl";
// указываем имя сервиса и ip-сервера откуда скрипт запускается
$proxyService = new ProxiesService("yandex", "194.186.45.242");
$res = $proxyService->getProxyData();
if (PEAR::isError($res)) {
    print $res->getMessage();
    exit;
}
$queries = null;
$parser = StateUtils::load(STATE_FILENAME);
var_dump($parser);
if ($parser == null) {
    // Получить запрос
    $queries = $db->getAll(CMD_SEL_QUERIES, array(), DB_FETCHMODE_ASSOC);
    if (PEAR::isError($queries)) {
        print $queries->getMessage();
        exit;
    }
function HandleGetSearchResults()
{
    set_time_limit(0);
    header("Content-type: text/html; charset=windows-1251");
    // NOTE Без этого почемуто оне выводит нифига
    print " ";
    require_once "YandexResultsGrabber.php";
    require_once "YandexResultsAddGrabber.php";
    $json = new Services_JSON();
    $result = array("rowNum" => (int) $_REQUEST["rowNum"], "linkType" => (int) $_REQUEST["linkType"]);
    $parser =& new YandexResultsGrabber();
    $parser->showInfo = false;
    $parserAdd =& new YandexResultsAddGrabber();
    $parserAdd->showInfo = false;
    if (isset($_REQUEST["withoutOur"]) && $_REQUEST["withoutOur"] == "true") {
        $parserAdd->withoutOur = true;
    } else {
        $parserAdd->withoutOur = false;
    }
    if (isset($_REQUEST["delay"]) && $_REQUEST["delay"] == "true") {
        $parser->waitBeforRequest = isset($_REQUEST["delayValue"]) ? (int) $_REQUEST["delayValue"] : DELAY_BEFORE_REQUEST;
        $parserAdd->waitBeforRequest = isset($_REQUEST["delayValue"]) ? (int) $_REQUEST["delayValue"] : DELAY_BEFORE_REQUEST;
    }
    // указываем имя сервиса и ip-сервера откуда скрипт запускается
    $proxyService = new ProxiesService("yandex", SERVER_IP);
    $res = $proxyService->getProxyData();
    if (PEAR::isError($res)) {
        $result["error"] = $res->getMessage();
        print $json->encode($result);
        exit;
    }
    $parser->setProxyData($res);
    $parserAdd->proxyService = $proxyService;
    $parserAdd->setProxyData($res);
    $_REQUEST["url"] = trim($_REQUEST["url"]);
    switch ((int) $_REQUEST["linkType"]) {
        // Все
        case 0:
            $res1 = $parser->parseForDomen($_REQUEST["url"]);
            $proxyService->increase();
            $res2 = $parser->parseForDomen("www." . $_REQUEST["url"]);
            $proxyService->increase();
            $result["res1"] = $res1;
            $result["res2"] = $res2;
            if (isset($_REQUEST["mainAndRest"]) && $_REQUEST["mainAndRest"] == "true" || isset($_REQUEST["withoutOur"]) && $_REQUEST["withoutOur"] == "true") {
                $resAdd1 = $parserAdd->parseForDomen($_REQUEST["url"]);
                $resAdd2 = $parserAdd->parseForDomen("www." . $_REQUEST["url"]);
                if (PEAR::isError($resAdd1) && PEAR::isError($resAdd2)) {
                    $result["error"] = $resAdd1->getMessage();
                    print $json->encode($result);
                    exit;
                }
                $result["resAdd1"] = $resAdd1;
                $result["resAdd2"] = $resAdd2;
            }
            break;
            // С www
        // С www
        case 1:
            $res = $parser->parseForDomen("www." . $_REQUEST["url"]);
            $proxyService->increase();
            $result["res"] = $res;
            if (isset($_REQUEST["mainAndRest"]) && $_REQUEST["mainAndRest"] == "true" || isset($_REQUEST["withoutOur"]) && $_REQUEST["withoutOur"] == "true") {
                $resAdd = $parserAdd->parseForDomen("www." . $_REQUEST["url"]);
                if (PEAR::isError($resAdd)) {
                    $result["error"] = $resAdd->getMessage();
                    print $json->encode($result);
                    exit;
                }
                $result["resAdd"] = $resAdd;
            }
            break;
            // Без www
        // Без www
        case 2:
            $res = $parser->parseForDomen($_REQUEST["url"]);
            $proxyService->increase();
            $result["res"] = $res;
            if (isset($_REQUEST["mainAndRest"]) && $_REQUEST["mainAndRest"] == "true" || isset($_REQUEST["withoutOur"]) && $_REQUEST["withoutOur"] == "true") {
                $resAdd = $parserAdd->parseForDomen($_REQUEST["url"]);
                if (PEAR::isError($resAdd)) {
                    $result["error"] = $resAdd->getMessage();
                    print $json->encode($result);
                    exit;
                }
                $result["resAdd"] = $resAdd;
            }
            break;
    }
    print $json->encode($result);
}