コード例 #1
0
// ƒл¤ YellowPages1ContentGrabber запускать скрипт c параметрами 1 1
$parserId = StateUtils::getProcessNum() + 1;
if ($parserId == 1) {
    $Parser =& new YellowPagesContentGrabber();
} else {
    $Parser =& new YellowPages1ContentGrabber();
}
$Parser->showInfo = false;
$Parser->waitBeforeRequest = 3;
$rubrics =& new RubricsData();
$Parser->rubricsData =& $rubrics;
$ProxyService = new ProxiesService();
$start = (int) FileUtils::loadContents(CALC_START_ID);
$subRubrics = $rubrics->getSubRubricsForParser($parserId, $start, 1);
if (PEAR::isError($subRubrics)) {
    print $subRubrics->getMessage();
    exit;
}
if ($subRubrics->numRows() == 0) {
    // —брос счетчика последнего обработаной записи
    FileUtils::saveContents(CALC_START_ID, "0");
    exit;
}
while ($row =& $subRubrics->fetchRow(DB_FETCHMODE_OBJECT)) {
    $res = $ProxyService->getRandomProxyData();
    $Parser->setProxyData($res);
    $res = $Parser->getRealNum($row->rurl);
    $rubrics->setRealNum($res, $row->rid);
    print $row->rid . ", " . $row->rurl . ": " . $res . "\n";
    FileUtils::saveContents(CALC_START_ID, $row->rid);
}
コード例 #2
0
 function getEmailData($url, $id)
 {
     if (strlen($url) == 0) {
         return;
     }
     $logger =& Log::singleton("null", "results.log", "ident");
     $content = $this->loadPage($url);
     if (strlen($content) > 0) {
         $p = pathinfo($url);
         $fName = EMAIL_IMGS_DIR . $id . "." . $p["extension"];
         FileUtils::saveContents($fName, $content);
         $res = $this->createTxtFromPng($fName);
         if (isset($res["file"])) {
             $fName = $res["file"];
             $width = $res["width"];
             $height = $res["height"];
             $email = system("./recogn {$fName} {$width} {$height}");
             if (preg_match("{\\[\\[\\[(.+)\\]\\]\\]}si", $email, $matches)) {
                 $email = $matches[1];
             } else {
                 $email = "";
             }
             //print "\nRecognized email: " . $email . "\n";
             $this->rubricsData->setEmailText($id, $email);
         }
     } else {
         $logger->log("Error on load " . $url);
     }
 }
コード例 #3
0
ファイル: Misc.php プロジェクト: rivetweb/old-auto-catalog
/**
 * Загрузка изображения
 * @param string $url урл изображения
 * @param string $destName имя файла без расширения для загруженного изображения
 * @return string имя сохраненного файла
 */
function loadImage($url, $destName)
{
    $hConn = curl_init();
    curl_setopt($hConn, CURLOPT_USERAGENT, USER_AGENT);
    curl_setopt($hConn, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($hConn, CURLOPT_FOLLOWLOCATION, true);
    curl_setopt($hConn, CURLOPT_URL, $url);
    $content = curl_exec($hConn);
    if (curl_errno($hConn) || strlen($content) == 0) {
        curl_close($hConn);
        return new PEAR_Error("Can't load image from " . $url . ".");
    }
    $contentType = curl_getinfo($hConn, CURLINFO_CONTENT_TYPE);
    if (strpos(strtolower($contentType), "image/") === false) {
        curl_close($hConn);
        return new PEAR_Error("File is not image.");
    }
    $ext = substr($contentType, strlen("image/"));
    $fName = $destName . "." . $ext;
    FileUtils::saveContents($fName, $content);
    curl_close($hConn);
    return $fName;
}
コード例 #4
0
function generateItemFile($itemId, $path, $idx, $itemsData)
{
    global $db;
    $row =& $db->getRow(CMD_SEL_ITEM, array($itemId), DB_FETCHMODE_ASSOC);
    if (PEAR::isError($row)) {
        print $row->getMessage() . "\n";
        return;
    }
    print "Generate item page... {$itemId} \n";
    // Новый объект
    if ($row["item_id"] == 0) {
        $fields = getViewFields();
        $imgUrl = IMAGES_BASE . getNormImagePath("new" . $row["id"]);
    } else {
        $categId =& $db->getOne("SELECT category_id FROM items WHERE id = ?", array($row["item_id"]));
        if (PEAR::isError($categId)) {
            $fields = getViewFields();
        } else {
            $fields = getViewFields($categId);
        }
        $imgUrl = IMAGES_BASE . getNormImagePath($row["item_id"]);
    }
    $fields = removeFields($fields[0], $fields[1]);
    $fields = array_unique($fields);
    ob_start();
    $tpl = new HTML_Template_IT("");
    $tpl->loadTemplatefile(ITEM_TPL_FILE, true, true);
    $params = array("itemData" => $itemsData, "perPage" => 1, "delta" => 3, "append" => false, "expanded" => true, "fileName" => "?popup=[%d]", "mode" => "Sliding", "path" => "", "prevImg" => "<prev", "nextImg" => "next>");
    $pager = new ExtendedPager1($params);
    $pager->ext = BASE_INDEX_URL_POPUP;
    $pager->infoStr = NAV_INFO_STR;
    $pager->prev = NAV_PREV;
    $pager->prevDis = NAV_PREV_DIS;
    $pager->next = NAV_NEXT;
    $pager->nextDis = NAV_NEXT_DIS;
    $pager->prevPages = NAV_PREV_PAGES;
    $pager->prevPagesDis = NAV_PREV_PAGES_DIS;
    $pager->nextPages = NAV_NEXT_PAGES;
    $pager->nextPagesDis = NAV_NEXT_PAGES_DIS;
    $pager->currentPage = $idx + 1;
    $navigation = $pager->getLinks();
    $navigation = $pager->replaceLinks($navigation);
    $navigation = str_replace("/http://", "http://", $navigation);
    $navigation = str_replace(".http://", "http://", $navigation);
    $tpl->setVariable("NAVIGATION", $navigation);
    $row["name"] = stripslashes($row["name"]);
    $tpl->setVariable("name1", $row["name"]);
    $tpl->setVariable("HTMLTITLE", $row["name"] . START_PATH_TITLE);
    foreach ($fields as $field) {
        if (!isset($row[$field])) {
            continue;
        }
        if (strlen(trim($row[$field])) == 0) {
            continue;
        }
        $row[$field] = stripslashes($row[$field]);
        $tpl->setCurrentBlock($field);
        $tpl->setVariable($field, $row[$field]);
        $tpl->parseCurrentBlock();
    }
    $tpl->setCurrentBlock("image");
    $tpl->setVariable("image", $imgUrl);
    $tpl->parseCurrentBlock();
    $tpl->show();
    $contents = ob_get_contents();
    ob_end_clean();
    $fName = str_replace(" ", "-", trim($row['art']));
    $fName = str_replace("/", "-", $fName);
    FileUtils::saveContents(OUTPUT_DIR . "/popup/" . $fName . ".html", $contents);
}
コード例 #5
0
);
$db->setFetchMode(DB_FETCHMODE_OBJECT);
$res = $db->query(CMD_SEL_SRCDATA);
if (PEAR::isError($res)) {
    print $res->getMessage() . "\n";
    exit;
}
ob_start();
while ($row =& $res->fetchRow()) {
    $count = $db->getOne(CMD_SEL_QUERIESNUM, array($row->id));
    if (PEAR::isError($count)) {
        //print $count->getMessage() . "\n";
        continue;
    }
    if (!isset($count)) {
        $count = 0;
    }
    $notProcessed = $db->getOne(CMD_SEL_NOTPROCESSED, array($row->id));
    if (PEAR::isError($notProcessed)) {
        //print $count->getMessage() . "\n";
        continue;
    }
    if ($notProcessed == 0) {
        print $row->name . ";" . $count . "\n";
        // $row->num
    }
}
$contents = ob_get_contents();
ob_end_clean();
FileUtils::saveContents("results.csv", $contents);
コード例 #6
0
<?php

require_once "prepend.php";
require_once "ProxiesService.php";
require_once "FileUtils.php";
chdir("/***/ResearchGrabber");
$proxyService = new ProxiesService();
FileUtils::saveData("countersStat.log", $proxyService->getCountersState());
$proxyService->resetAll();
FileUtils::saveContents("isBusy.dat", "free free free");
include "ExportData.php";