// ƒл¤ YellowPages1ContentGrabber запускать скрипт c параметрами 1 1 $parserId = StateUtils::getProcessNum() + 1; if ($parserId == 1) { $Parser =& new YellowPagesContentGrabber(); } else { $Parser =& new YellowPages1ContentGrabber(); } $Parser->showInfo = false; $Parser->waitBeforeRequest = 3; $rubrics =& new RubricsData(); $Parser->rubricsData =& $rubrics; $ProxyService = new ProxiesService(); $start = (int) FileUtils::loadContents(CALC_START_ID); $subRubrics = $rubrics->getSubRubricsForParser($parserId, $start, 1); if (PEAR::isError($subRubrics)) { print $subRubrics->getMessage(); exit; } if ($subRubrics->numRows() == 0) { // —брос счетчика последнего обработаной записи FileUtils::saveContents(CALC_START_ID, "0"); exit; } while ($row =& $subRubrics->fetchRow(DB_FETCHMODE_OBJECT)) { $res = $ProxyService->getRandomProxyData(); $Parser->setProxyData($res); $res = $Parser->getRealNum($row->rurl); $rubrics->setRealNum($res, $row->rid); print $row->rid . ", " . $row->rurl . ": " . $res . "\n"; FileUtils::saveContents(CALC_START_ID, $row->rid); }
function getEmailData($url, $id) { if (strlen($url) == 0) { return; } $logger =& Log::singleton("null", "results.log", "ident"); $content = $this->loadPage($url); if (strlen($content) > 0) { $p = pathinfo($url); $fName = EMAIL_IMGS_DIR . $id . "." . $p["extension"]; FileUtils::saveContents($fName, $content); $res = $this->createTxtFromPng($fName); if (isset($res["file"])) { $fName = $res["file"]; $width = $res["width"]; $height = $res["height"]; $email = system("./recogn {$fName} {$width} {$height}"); if (preg_match("{\\[\\[\\[(.+)\\]\\]\\]}si", $email, $matches)) { $email = $matches[1]; } else { $email = ""; } //print "\nRecognized email: " . $email . "\n"; $this->rubricsData->setEmailText($id, $email); } } else { $logger->log("Error on load " . $url); } }
/** * Загрузка изображения * @param string $url урл изображения * @param string $destName имя файла без расширения для загруженного изображения * @return string имя сохраненного файла */ function loadImage($url, $destName) { $hConn = curl_init(); curl_setopt($hConn, CURLOPT_USERAGENT, USER_AGENT); curl_setopt($hConn, CURLOPT_RETURNTRANSFER, true); curl_setopt($hConn, CURLOPT_FOLLOWLOCATION, true); curl_setopt($hConn, CURLOPT_URL, $url); $content = curl_exec($hConn); if (curl_errno($hConn) || strlen($content) == 0) { curl_close($hConn); return new PEAR_Error("Can't load image from " . $url . "."); } $contentType = curl_getinfo($hConn, CURLINFO_CONTENT_TYPE); if (strpos(strtolower($contentType), "image/") === false) { curl_close($hConn); return new PEAR_Error("File is not image."); } $ext = substr($contentType, strlen("image/")); $fName = $destName . "." . $ext; FileUtils::saveContents($fName, $content); curl_close($hConn); return $fName; }
function generateItemFile($itemId, $path, $idx, $itemsData) { global $db; $row =& $db->getRow(CMD_SEL_ITEM, array($itemId), DB_FETCHMODE_ASSOC); if (PEAR::isError($row)) { print $row->getMessage() . "\n"; return; } print "Generate item page... {$itemId} \n"; // Новый объект if ($row["item_id"] == 0) { $fields = getViewFields(); $imgUrl = IMAGES_BASE . getNormImagePath("new" . $row["id"]); } else { $categId =& $db->getOne("SELECT category_id FROM items WHERE id = ?", array($row["item_id"])); if (PEAR::isError($categId)) { $fields = getViewFields(); } else { $fields = getViewFields($categId); } $imgUrl = IMAGES_BASE . getNormImagePath($row["item_id"]); } $fields = removeFields($fields[0], $fields[1]); $fields = array_unique($fields); ob_start(); $tpl = new HTML_Template_IT(""); $tpl->loadTemplatefile(ITEM_TPL_FILE, true, true); $params = array("itemData" => $itemsData, "perPage" => 1, "delta" => 3, "append" => false, "expanded" => true, "fileName" => "?popup=[%d]", "mode" => "Sliding", "path" => "", "prevImg" => "<prev", "nextImg" => "next>"); $pager = new ExtendedPager1($params); $pager->ext = BASE_INDEX_URL_POPUP; $pager->infoStr = NAV_INFO_STR; $pager->prev = NAV_PREV; $pager->prevDis = NAV_PREV_DIS; $pager->next = NAV_NEXT; $pager->nextDis = NAV_NEXT_DIS; $pager->prevPages = NAV_PREV_PAGES; $pager->prevPagesDis = NAV_PREV_PAGES_DIS; $pager->nextPages = NAV_NEXT_PAGES; $pager->nextPagesDis = NAV_NEXT_PAGES_DIS; $pager->currentPage = $idx + 1; $navigation = $pager->getLinks(); $navigation = $pager->replaceLinks($navigation); $navigation = str_replace("/http://", "http://", $navigation); $navigation = str_replace(".http://", "http://", $navigation); $tpl->setVariable("NAVIGATION", $navigation); $row["name"] = stripslashes($row["name"]); $tpl->setVariable("name1", $row["name"]); $tpl->setVariable("HTMLTITLE", $row["name"] . START_PATH_TITLE); foreach ($fields as $field) { if (!isset($row[$field])) { continue; } if (strlen(trim($row[$field])) == 0) { continue; } $row[$field] = stripslashes($row[$field]); $tpl->setCurrentBlock($field); $tpl->setVariable($field, $row[$field]); $tpl->parseCurrentBlock(); } $tpl->setCurrentBlock("image"); $tpl->setVariable("image", $imgUrl); $tpl->parseCurrentBlock(); $tpl->show(); $contents = ob_get_contents(); ob_end_clean(); $fName = str_replace(" ", "-", trim($row['art'])); $fName = str_replace("/", "-", $fName); FileUtils::saveContents(OUTPUT_DIR . "/popup/" . $fName . ".html", $contents); }
); $db->setFetchMode(DB_FETCHMODE_OBJECT); $res = $db->query(CMD_SEL_SRCDATA); if (PEAR::isError($res)) { print $res->getMessage() . "\n"; exit; } ob_start(); while ($row =& $res->fetchRow()) { $count = $db->getOne(CMD_SEL_QUERIESNUM, array($row->id)); if (PEAR::isError($count)) { //print $count->getMessage() . "\n"; continue; } if (!isset($count)) { $count = 0; } $notProcessed = $db->getOne(CMD_SEL_NOTPROCESSED, array($row->id)); if (PEAR::isError($notProcessed)) { //print $count->getMessage() . "\n"; continue; } if ($notProcessed == 0) { print $row->name . ";" . $count . "\n"; // $row->num } } $contents = ob_get_contents(); ob_end_clean(); FileUtils::saveContents("results.csv", $contents);
<?php require_once "prepend.php"; require_once "ProxiesService.php"; require_once "FileUtils.php"; chdir("/***/ResearchGrabber"); $proxyService = new ProxiesService(); FileUtils::saveData("countersStat.log", $proxyService->getCountersState()); $proxyService->resetAll(); FileUtils::saveContents("isBusy.dat", "free free free"); include "ExportData.php";