protected function request($method, $url) { try { $this->http_client->{$method}($url); $response = $this->http_client->currentResponse(); } catch (nc_search_indexer_crawler_exception $e) { // слушателю не понравились заголовки $response = $e->get_response(); } $response["url"] = $url; $response = new nc_search_indexer_crawler_response($response); // if (nc_search::will_log(nc_search::LOG_CRAWLER_REQUEST)) { $len = $response->get_content_length(); nc_search::log(nc_search::LOG_CRAWLER_REQUEST, strtoupper($method) . " {$url}\n" . "Response: {$response->get_code()}\n" . ($response->get_code() < 400 ? "Content-Type: " . $response->get_content_type() . "\n" . "Content-Length: " . (is_null($len) ? "no" : $len) . ", received: " . $response->get_body_length() . " bytes" : '')); // } return $response; }
/** * Выполнить первую задачу из очереди * @param int $indexer_strategy * @return bool|null */ public static function run($indexer_strategy = nc_search::INDEXING_NC_CRON) { $provider = nc_search::get_provider(); if ($provider->is_reindexing()) { nc_search::log(nc_search::LOG_SCHEDULER_START, "Scheduler: indexing in progress"); return false; } $intent = nc_search::load('nc_search_scheduler_intent', 'SELECT * FROM `%t%`' . ' WHERE `StartTime` <= ' . time() . ' ORDER BY `StartTime` ASC LIMIT 1')->first(); if (!$intent) { nc_search::log(nc_search::LOG_SCHEDULER_START, "Scheduler: no scheduler intents to process now"); return false; } if (nc_search::will_log(nc_search::LOG_SCHEDULER_START)) { nc_search::log(nc_search::LOG_SCHEDULER_START, "Scheduler started (planned start time: " . strftime("%Y-%m-%d %H:%M:%S", $intent->get('start_time')) . "; area: '" . preg_replace("/\\s+/u", " ", $intent->get('area_string')) . "')"); } // информация принята к сведению и больше не нужна $intent->delete(); // запуск индексации $provider->index_area($intent->get('area_string'), $indexer_strategy); }
/** * Работает ли в данный момент переиндексация? * @param bool $remove_hung_tasks * @return false|nc_search_indexer_task */ public static function get_current_task($remove_hung_tasks = true) { $tasks = nc_search::load_all('nc_search_indexer_task', true); if (!sizeof($tasks)) { return false; } // не подвисли ли мы? $task = $tasks->first(); if ($remove_hung_tasks && time() > $task->get('last_activity') + nc_search::get_setting("IndexerRemoveIdleTasksAfter")) { $task->delete(); $db = nc_Core::get_object()->db; $db->query("TRUNCATE TABLE `Search_Link`"); $db->query("TRUNCATE TABLE `Search_LinkReferrer`"); nc_search::log(nc_search::LOG_ERROR, "Indexer task was last active at " . strftime("%Y-%m-%d %H:%M:%S", (int) $task->get('last_activity')) . ". Task removed."); return false; } return $task; }
/** * Возвращает ID ссылки с указанным URL; создает объект nc_search_indexer_link * при необходимости * @param string $url * @param string $referrer * @return integer Link ID */ public function add_link($url, $referrer = null) { $full_url = $this->resolve_link($url, $referrer); // считать URL’ы с "www." и без него синонимами (просто выкинуть "www.", // к сожалению, нельзя — не у всех правильно настроен сервер) $has_www = strpos($full_url, "://www."); $full_url_with_www = $has_www ? $full_url : str_replace("://", "://www.", $full_url); $full_url_without_www = $has_www ? str_replace("://www.", "://", $full_url) : $full_url; $link = new nc_search_indexer_link(); // search for link with that URL in the database, create new if it's not there if (!$link->load_by_url(array($full_url_with_www, $full_url_without_www))) { // it's a brand new link $link->set("url", $full_url); $link->save(); if (nc_search::will_log(nc_search::LOG_PARSER_DOCUMENT_LINKS)) { nc_search::log(nc_search::LOG_PARSER_DOCUMENT_LINKS, "Added link to the queue: " . nc_search_util::decode_url($full_url)); } } return $link->get_id(); }
<?php /* $Id: netcat_cron.php 8456 2012-11-23 10:42:55Z aix $ */ /** * Запуск из "крона" неткета */ $NETCAT_FOLDER = realpath(dirname(__FILE__) . "/../../../../"); require_once "{$NETCAT_FOLDER}/vars.inc.php"; require_once $ROOT_FOLDER . "connect_io.php"; $nc_core = nc_Core::get_object(); $nc_core->modules->load_env('ru'); $lang = $nc_core->lang->detect_lang(); require_once $ADMIN_FOLDER . "lang/" . $lang . ".php"; error_reporting(E_PARSE | E_ERROR | E_WARNING | E_USER_ERROR | E_USER_WARNING); while (@ob_end_flush()) { } $secret_key = nc_Core::get_object()->input->fetch_get("secret_key"); if ($secret_key != nc_search::get_setting('IndexerSecretKey')) { $file = __FILE__; nc_search::log(nc_search::LOG_ERROR, "Attempt to access '{$file}' with a wrong secret key '{$secret_key}' from {$_SERVER['REMOTE_ADDR']}"); die("Access denied."); } nc_search::register_logger(new nc_search_logger_plaintext()); nc_search_scheduler::run(nc_search::INDEXING_NC_CRON);