예제 #1
0
파일: crawler.php 프로젝트: Blu2z/implsk
 protected function request($method, $url)
 {
     try {
         $this->http_client->{$method}($url);
         $response = $this->http_client->currentResponse();
     } catch (nc_search_indexer_crawler_exception $e) {
         // слушателю не понравились заголовки
         $response = $e->get_response();
     }
     $response["url"] = $url;
     $response = new nc_search_indexer_crawler_response($response);
     //    if (nc_search::will_log(nc_search::LOG_CRAWLER_REQUEST)) {
     $len = $response->get_content_length();
     nc_search::log(nc_search::LOG_CRAWLER_REQUEST, strtoupper($method) . " {$url}\n" . "Response: {$response->get_code()}\n" . ($response->get_code() < 400 ? "Content-Type: " . $response->get_content_type() . "\n" . "Content-Length: " . (is_null($len) ? "no" : $len) . ", received: " . $response->get_body_length() . " bytes" : ''));
     //    }
     return $response;
 }
예제 #2
0
파일: scheduler.php 프로젝트: Blu2z/implsk
 /**
  * Выполнить первую задачу из очереди
  * @param int $indexer_strategy
  * @return bool|null
  */
 public static function run($indexer_strategy = nc_search::INDEXING_NC_CRON)
 {
     $provider = nc_search::get_provider();
     if ($provider->is_reindexing()) {
         nc_search::log(nc_search::LOG_SCHEDULER_START, "Scheduler: indexing in progress");
         return false;
     }
     $intent = nc_search::load('nc_search_scheduler_intent', 'SELECT * FROM `%t%`' . ' WHERE `StartTime` <= ' . time() . ' ORDER BY `StartTime` ASC LIMIT 1')->first();
     if (!$intent) {
         nc_search::log(nc_search::LOG_SCHEDULER_START, "Scheduler: no scheduler intents to process now");
         return false;
     }
     if (nc_search::will_log(nc_search::LOG_SCHEDULER_START)) {
         nc_search::log(nc_search::LOG_SCHEDULER_START, "Scheduler started (planned start time: " . strftime("%Y-%m-%d %H:%M:%S", $intent->get('start_time')) . "; area: '" . preg_replace("/\\s+/u", " ", $intent->get('area_string')) . "')");
     }
     // информация принята к сведению и больше не нужна
     $intent->delete();
     // запуск индексации
     $provider->index_area($intent->get('area_string'), $indexer_strategy);
 }
예제 #3
0
파일: indexer.php 프로젝트: Blu2z/implsk
 /**
  * Работает ли в данный момент переиндексация?
  * @param bool $remove_hung_tasks
  * @return false|nc_search_indexer_task
  */
 public static function get_current_task($remove_hung_tasks = true)
 {
     $tasks = nc_search::load_all('nc_search_indexer_task', true);
     if (!sizeof($tasks)) {
         return false;
     }
     // не подвисли ли мы?
     $task = $tasks->first();
     if ($remove_hung_tasks && time() > $task->get('last_activity') + nc_search::get_setting("IndexerRemoveIdleTasksAfter")) {
         $task->delete();
         $db = nc_Core::get_object()->db;
         $db->query("TRUNCATE TABLE `Search_Link`");
         $db->query("TRUNCATE TABLE `Search_LinkReferrer`");
         nc_search::log(nc_search::LOG_ERROR, "Indexer task was last active at " . strftime("%Y-%m-%d %H:%M:%S", (int) $task->get('last_activity')) . ". Task removed.");
         return false;
     }
     return $task;
 }
예제 #4
0
파일: manager.php 프로젝트: Blu2z/implsk
 /**
  * Возвращает ID ссылки с указанным URL; создает объект nc_search_indexer_link
  * при необходимости
  * @param string $url
  * @param string  $referrer
  * @return integer Link ID
  */
 public function add_link($url, $referrer = null)
 {
     $full_url = $this->resolve_link($url, $referrer);
     // считать URL’ы с "www." и без него синонимами (просто выкинуть "www.",
     // к сожалению, нельзя — не у всех правильно настроен сервер)
     $has_www = strpos($full_url, "://www.");
     $full_url_with_www = $has_www ? $full_url : str_replace("://", "://www.", $full_url);
     $full_url_without_www = $has_www ? str_replace("://www.", "://", $full_url) : $full_url;
     $link = new nc_search_indexer_link();
     // search for link with that URL in the database, create new if it's not there
     if (!$link->load_by_url(array($full_url_with_www, $full_url_without_www))) {
         // it's a brand new link
         $link->set("url", $full_url);
         $link->save();
         if (nc_search::will_log(nc_search::LOG_PARSER_DOCUMENT_LINKS)) {
             nc_search::log(nc_search::LOG_PARSER_DOCUMENT_LINKS, "Added link to the queue: " . nc_search_util::decode_url($full_url));
         }
     }
     return $link->get_id();
 }
예제 #5
0
<?php

/* $Id: netcat_cron.php 8456 2012-11-23 10:42:55Z aix $ */
/**
 * Запуск из "крона" неткета
 */
$NETCAT_FOLDER = realpath(dirname(__FILE__) . "/../../../../");
require_once "{$NETCAT_FOLDER}/vars.inc.php";
require_once $ROOT_FOLDER . "connect_io.php";
$nc_core = nc_Core::get_object();
$nc_core->modules->load_env('ru');
$lang = $nc_core->lang->detect_lang();
require_once $ADMIN_FOLDER . "lang/" . $lang . ".php";
error_reporting(E_PARSE | E_ERROR | E_WARNING | E_USER_ERROR | E_USER_WARNING);
while (@ob_end_flush()) {
}
$secret_key = nc_Core::get_object()->input->fetch_get("secret_key");
if ($secret_key != nc_search::get_setting('IndexerSecretKey')) {
    $file = __FILE__;
    nc_search::log(nc_search::LOG_ERROR, "Attempt to access '{$file}' with a wrong secret key '{$secret_key}' from {$_SERVER['REMOTE_ADDR']}");
    die("Access denied.");
}
nc_search::register_logger(new nc_search_logger_plaintext());
nc_search_scheduler::run(nc_search::INDEXING_NC_CRON);