public function __construct($level = null) { if (!$level) { $level = nc_search::get_setting('LogLevel') | nc_search::LOG_ERROR | nc_search::LOG_CRAWLER_REQUEST | nc_search::LOG_INDEXING_BEGIN_END; } $this->level = $level; }
public function filter(array $terms) { for ($i = 0, $max = sizeof($terms); $i < $max; $i++) { $terms[$i] = mb_convert_case($terms[$i], nc_search::get_setting('FilterStringCase'), 'UTF-8'); } return $terms; }
/** * * @param nc_search_indexer $indexer * @throws nc_search_exception * @return boolean true when task is finished */ public function loop(nc_search_indexer $indexer) { $cycle_number = 0; $delay = (int) nc_search::get_setting('CrawlerDelay'); while (true) { // сохранять задачу каждые X циклов if ($cycle_number % nc_search::get_setting('IndexerSaveTaskEveryNthCycle') == 0) { $indexer->save_task(); } switch ($indexer->next()) { case nc_search_indexer::TASK_FINISHED: return true; // we're done // we're done case nc_search_indexer::TASK_STEP_FINISHED: $delay && sleep($delay); break; case nc_search_indexer::TASK_STEP_SKIPPED: break; default: throw new nc_search_exception("Incorrect return value from nc_search_indexer::next()"); } $cycle_number++; } }
/** * Если есть мета-тэг robots или с именем бота и значенем 'noindex', * документ не индексируется (внимание: атрибуты case-sensitive) * * @return boolean */ public function should_index() { if (!nc_search::should('ObeyMetaNoindex')) { return true; } $xpath_query = '//meta[((@name="robots") or (@name="' . nc_search::get_setting('CrawlerUserAgent') . '")) and (contains(@content, "noindex"))]'; return $this->xpath($xpath_query)->length == 0; }
/** * */ public function __construct() { // skip numbers? $this->ignore_numbers = nc_search::should('IgnoreNumbers'); $this->tokenizer_regexp = $this->ignore_numbers ? $this->regexp_alpha : $this->regexp_alnum; // max terms $max_terms = nc_search::get_setting('MaxTermsPerField'); if ($max_terms > 0) { $this->max_terms = $max_terms + 2; $this->max_chunks = $max_terms * 2 + 2; } }
public function filter(array $terms) { $min_length = nc_search::get_setting('MinWordLength'); if ($min_length < 2) { return $terms; } $result = array(); for ($i = 0, $max = sizeof($terms); $i < $max; $i++) { if (mb_strlen($terms[$i], 'UTF-8') >= $min_length) { $result[] = $terms[$i]; } } return $result; }
/** * @param string $default_operator "AND"|"OR" (case-sensitive); defaults * to the 'DefaultBooleanOperator' setting. * @param bool $ignore_numbers defaults to 'IgnoreNumbers' */ public function __construct($default_operator = null, $ignore_numbers = null) { if (!$default_operator || $default_operator != "AND" && $default_operator != "OR") { $default_operator = nc_search::get_setting("DefaultBooleanOperator"); } $this->default_operator = $default_operator; if ($ignore_numbers === null) { $ignore_numbers = nc_search::should('IgnoreNumbers'); } if ($ignore_numbers) { $this->term_chars = "\\pL"; } $this->ignore_numbers = $ignore_numbers; }
/** * Запланировать запуск переиндексирования области или правила в указанное время * @param string $area_string * @param integer $timestamp */ public static function schedule_indexing($area_string, $timestamp) { // Если данная область уже поставлена в очередь на более раннее или ближайшее // время, не нужно добавлять ещё раз $interval = $timestamp + nc_search::get_setting('MinScheduleInterval'); $intent = nc_search::load('nc_search_scheduler_intent', "SELECT * FROM `%t%`" . " WHERE `StartTime` <= {$interval}" . " AND `AreaString` = '" . nc_search_util::db_escape($area_string) . "'")->first(); // type is ignored if ($intent) { // уже есть такое расписание! if ($intent->get('start_time') > $timestamp) { $intent->set('start_time', $timestamp); // let's run it sooner } } else { $intent = new nc_search_scheduler_intent(array('start_time' => $timestamp, 'type' => nc_search_scheduler_intent::ON_REQUEST, 'area_string' => $area_string)); } $intent->save(); }
public function __construct() { require_once 'HTTP/Client.php'; // /netcat/require/lib require_once 'HTTP/Request/Listener.php'; $headers = array('User-Agent' => nc_search::get_setting('CrawlerUserAgent')); $this->http_client = new HTTP_Client(null, $headers); $this->http_client->enableHistory(false); $this->http_client->setMaxRedirects(nc_search::get_setting('CrawlerMaxRedirects')); $max_doc_size = nc_search::get_setting('CrawlerMaxDocumentSize'); $db = nc_Core::get_object()->db; $accepted_mime_types = $db->get_col("SELECT DISTINCT `ContentType`\n FROM `Search_Extension`\n WHERE `ExtensionInterface` = 'nc_search_document_parser'"); // $accepted_mime_types = nc_search::load_all('nc_search_extension_rule') // ->where('extension_interface', 'nc_search_document_parser') // ->each('get', 'content_type'); // $accepted_mime_types = array_unique($accepted_mime_types); $listener = new nc_search_indexer_crawler_listener($max_doc_size, $accepted_mime_types); $this->http_client->attach($listener, true); }
/** * Перед сохранением нужно прогнать список слов через фильтры */ public function save() { $mb_case = nc_search::get_setting('FilterStringCase'); $apply_filter = !$this->get('dont_filter'); $list = array(); foreach ($this->get('words') as $word) { $word = trim($word); if (strlen($word)) { // пропустить пустые значения // преобразовать регистр, если в дальнейшем не будут применены фильтры $list[] = $apply_filter ? $word : mb_convert_case($word, $mb_case); } } if ($apply_filter) { $context = new nc_search_context(array('language' => $this->get('language'))); $list = nc_search_extension_manager::get('nc_search_language_filter', $context)->until_first('nc_search_language_filter_synonyms')->apply('filter', $list); } if (sizeof($list) < 2) { throw new nc_search_data_exception(NETCAT_MODULE_SEARCH_ADMIN_SYNONYM_LIST_MUST_HAVE_AT_LEAST_TWO_WORDS); } $this->set('words', $list); parent::save(); }
/** * Установить свойство 'context' документа (фрагменты совпавшего с запросом текста) * (не имеет отношения к nc_search_context) */ protected function document_set_context(nc_search_result_document $doc) { // служба поиска (напр. Гугль какой-нибудь), теоретически, может установить свойство context if (!$doc->get('context') && $this->should_highlight()) { $language = $this->get_query()->get('language'); $doc->set('context', $this->highlight($doc->get('content'), $language, nc_search::get_setting('ResultContextMaxNumberOfWords'))); $doc->set('title', $this->highlight($doc->get('title'), $language, nc_search::get_setting('ResultTitleMaxNumberOfWords'))); } return $doc; }
<?php /** * Входящие параметры: * - term * - language * * @global $catalogue */ $NETCAT_FOLDER = realpath("../../../../"); require_once "{$NETCAT_FOLDER}/vars.inc.php"; require $INCLUDE_FOLDER . "index.php"; // получение параметров $input = trim($nc_core->input->fetch_get('term')); if (!nc_search::should('EnableQuerySuggest') || nc_search::get_setting('SuggestMode') != 'titles' || mb_strlen($input) < nc_search::get_setting('SuggestionsMinInputLength')) { die("[]"); } $input = $nc_core->utf8->conv($nc_core->NC_CHARSET, 'utf-8', $input); $language = $nc_core->input->fetch_get('language'); if (!$language) { $language = $nc_core->lang->detect_lang(1); } // поиск подходящих заголовков is provider-dependent $suggestions = nc_search::get_provider()->suggest_titles($input, $language, $catalogue); if (!$nc_core->NC_UNICODE) { $suggestions = $nc_core->utf8->array_utf2win($suggestions); } print nc_array_json($suggestions);
<?php /* $Id: index.php 8366 2012-11-07 16:30:14Z aix $ */ /** * Запуск переиндексации "в реальном времени" */ $NETCAT_FOLDER = realpath("../../../../"); require_once "{$NETCAT_FOLDER}/vars.inc.php"; $use_gzip_compression = false; require_once "{$ADMIN_FOLDER}/function.inc.php"; require_once "../function.inc.php"; // замедление работы при необходимости $delay = trim(nc_search::get_setting('IndexerInBrowserSlowdownDelay')); // секунды if ($delay) { define('NC_SEARCH_INDEXER_DELAY_VALUE', (int) ($delay * 1000000)); // микросекунды function nc_search_indexer_delay() { usleep(NC_SEARCH_INDEXER_DELAY_VALUE); print " "; } register_tick_function('nc_search_indexer_delay'); declare (ticks=10000); } $input = nc_Core::get_object()->input; $request = array('.page_title' => NETCAT_MODULE_SEARCH_ADMIN_INDEXING_TITLE, 'view' => 'indexing_on_request', 'rule_id' => $input->fetch_get('rule_id'), 'token' => $input->fetch_get('token'), 'continue' => $input->fetch_get('continue')); nc_search_admin_controller::process_request($request);
/** * @param string $string Text to tokenize * @return array */ protected function tokenize_text($string) { // split words containing numbers into number+string parts $string = preg_replace("/(\\pL)(\\d)/u", "\$1 \$2", $string); $string = preg_replace("/(\\d)(\\pL)/u", "\$1 \$2", $string); $delimiter = nc_search::should('IgnoreNumbers') ? '/[^\\pL]+/u' : '/[^\\pL\\d]+/u'; $max_terms = (int) nc_search::get_setting('MaxTermsPerField'); $tokens = preg_split($delimiter, $string, $max_terms); return $tokens; }
if (isset($_SERVER['REMOTE_ADDR'])) { die("Access denied."); } $NETCAT_FOLDER = realpath(dirname(__FILE__) . "/../../../../"); putenv("DOCUMENT_ROOT={$NETCAT_FOLDER}"); putenv("HTTP_HOST=localhost"); putenv("REQUEST_URI=/"); require_once "{$NETCAT_FOLDER}/vars.inc.php"; require_once $ROOT_FOLDER . "connect_io.php"; $nc_core = nc_Core::get_object(); $nc_core->modules->load_env('ru'); $lang = $nc_core->lang->detect_lang(); require_once $ADMIN_FOLDER . "lang/" . $lang . ".php"; error_reporting(E_PARSE | E_ERROR | E_WARNING | E_USER_ERROR | E_USER_WARNING); // замедление работы при необходимости $delay = trim(nc_search::get_setting('IndexerConsoleSlowdownDelay')); // секунды if ($delay) { define('NC_SEARCH_INDEXER_DELAY_VALUE', (int) ($delay * 1000000)); // микросекунды function nc_search_indexer_delay() { usleep(NC_SEARCH_INDEXER_DELAY_VALUE); } register_tick_function('nc_search_indexer_delay'); declare (ticks=10000); } while (@ob_end_flush()) { } // Поменяйте nc_search::LOG_CONSOLE на другое значение, если хотите получать // больше или меньше информации о переиндексации
require_once $ROOT_FOLDER . "connect_io.php"; $nc_core = nc_Core::get_object(); $nc_core->modules->load_env('ru'); $lang = $nc_core->lang->detect_lang(); require_once $ADMIN_FOLDER . "lang/" . $lang . ".php"; error_reporting(E_PARSE | E_ERROR | E_WARNING | E_USER_ERROR | E_USER_WARNING); // замедление работы при необходимости $delay = trim(nc_search::get_setting('IndexerConsoleSlowdownDelay')); // секунды if ($delay) { define('NC_SEARCH_INDEXER_DELAY_VALUE', (int) ($delay * 1000000)); // микросекунды function nc_search_indexer_delay() { usleep(NC_SEARCH_INDEXER_DELAY_VALUE); } register_tick_function('nc_search_indexer_delay'); declare (ticks=10000); } while (@ob_end_flush()) { } nc_search::register_logger(new nc_search_logger_plaintext(nc_search::LOG_CONSOLE)); $remove_hung_tasks = !nc_search::should('IndexerConsoleRestartHungTasks'); $current_task = nc_search_indexer::get_current_task($remove_hung_tasks); $continue = $current_task instanceof nc_search_indexer_task && $current_task->get('runner_type') == nc_search::INDEXING_CONSOLE_BATCH && ($current_task->get('is_idle') || nc_search::should('IndexerConsoleRestartHungTasks') && time() > $current_task->get('last_activity') + nc_search::get_setting("IndexerRemoveIdleTasksAfter")); if ($continue) { $indexer = new nc_search_indexer(); $indexer->resume($current_task, new nc_search_indexer_runner_batch()); } else { nc_search_scheduler::run(nc_search::INDEXING_CONSOLE_BATCH); }
} else { ?> <fieldset> <legend><?php echo NETCAT_MODULE_SEARCH_ADMIN_INTERFACE_FORM_TEMPLATES; ?> </legend> <textarea class="code" name="s[SearchFormTemplate]"><?php echo htmlspecialchars(nc_search::get_setting('SearchFormTemplate')); ?> </textarea> </fieldset> <fieldset> <legend><?php echo NETCAT_MODULE_SEARCH_ADMIN_INTERFACE_ADVANCED_FORM_TEMPLATE; ?> </legend> <textarea class="code" name="s[AdvancedSearchFormTemplate]"><?php echo htmlspecialchars(nc_search::get_setting('AdvancedSearchFormTemplate')); ?> </textarea> </fieldset> <?php } ?> </form>
} nc_search::save_setting($k, $v); } // check SearchProvider if ($search_provider_changed) { $new_provider = $input['SearchProvider']; if (@class_exists($new_provider)) { try { $provider = new $new_provider(); if ($provider instanceof nc_search_provider) { $provider->first_run(); } else { nc_print_status(NETCAT_MODULE_SEARCH_ADMIN_SETTINGS_INCORRECT_PROVIDER_CLASS, 'error', array($new_provider)); } } catch (Exception $e) { nc_print_status(NETCAT_MODULE_SEARCH_ADMIN_SETTINGS_PROVIDER_CLASS_INITIALIZATION_ERROR, 'error', array($new_provider, $e->getMessage())); } } else { nc_print_status(NETCAT_MODULE_SEARCH_ADMIN_SETTINGS_PROVIDER_CLASS_NOT_FOUND, 'error', array($new_provider)); } } // done saving nc_print_status(NETCAT_MODULE_SEARCH_ADMIN_SETTINGS_SAVED, 'ok'); } $settings = array('ComponentID', 'SearchProvider', 'IndexerSecretKey', 'IndexerNormalizeLinks', 'IndexerSaveTaskEveryNthCycle', 'IndexerRemoveIdleTasksAfter', 'IndexerTimeThreshold', 'IndexerMemoryThreshold', 'IndexerConsoleMemoryThreshold', 'IndexerConsoleTimeThreshold', 'IndexerConsoleDocumentsPerSession', 'IndexerConsoleSlowdownDelay', 'IndexerConsoleRestartHungTasks', 'IndexerInBrowserSlowdownDelay', 'MinScheduleInterval', 'CrawlerMaxRedirects', 'NumberOfEntriesPerSitemap', 'MaxTermsPerQuery', 'MaxTermsPerField', 'ZendSearchLucene_MaxBufferedDocs', 'ZendSearchLucene_MaxMergeDocs', 'ZendSearchLucene_MergeFactor', 'PhpMorphy_LoadDictsDuringIndexing', 'DatabaseIndex_LoadAllCodesForIndexing', 'DatabaseIndex_MaxSimilarityCandidates', 'DatabaseIndex_MaxRewriteTerms', 'DatabaseIndex_UseUtf8Levenshtein', 'DatabaseIndex_MaxProximityTerms', 'DatabaseIndex_MaxProximityDistance', 'DatabaseIndex_AlwaysGetTotalCount', 'DatabaseIndex_OptimizationFrequency'); $form_description = array(); foreach ($settings as $s) { $form_description[$s] = array('type' => 'string', 'caption' => $s, 'value' => nc_search::get_setting($s)); } $form = new nc_a2f($form_description, "settings"); echo "<form class='settings system_settings' method='POST'>", "<input type='hidden' name='view' value='systemsettings' />", $form->render("<div>", "", "</div>", ""), "</form>";
<legend><?php echo NETCAT_MODULE_SEARCH_ADMIN_SETTING_QUERY_FEATURES; ?> </legend> <div class="setting"> <?php echo NETCAT_MODULE_SEARCH_ADMIN_SETTING_QUERY_DEFAULT_OPERATOR; ?> : <select name="s[DefaultBooleanOperator]"> <option value="AND"><?php echo NETCAT_MODULE_SEARCH_ADMIN_SETTING_QUERY_DEFAULT_OPERATOR_AND; ?> </option> <option value="OR"<?php echo nc_search::get_setting('DefaultBooleanOperator') == 'OR' ? ' selected' : ''; ?> > <?php echo NETCAT_MODULE_SEARCH_ADMIN_SETTING_QUERY_DEFAULT_OPERATOR_OR; ?> </option> </select> </div> <?php echo $this->setting_cb('AllowTermBoost', NETCAT_MODULE_SEARCH_ADMIN_SETTING_QUERY_TERM_BOOST); ?> <?php echo $this->setting_cb('AllowProximitySearch', NETCAT_MODULE_SEARCH_ADMIN_SETTING_QUERY_PROXIMITY_SEARCH); ?> <?php
/** * Логгер может иметь собственный уровень сообщений об ошибках. * @param int $level */ public function __construct($level = null) { $this->level = $level ? $level : nc_search::get_setting('LogLevel'); }
/** * */ protected function get_disallowed_areas() { $disallowed = array(); // (1) robots.txt if (nc_search::should('CrawlerObeyRobotsTxt')) { $disallowed = $this->get_robots_txt_area_parts(); } // (2) Settings (ExcludeUrlRegexps) $regexps = preg_split("/\\s*\n/u", nc_search::get_setting('ExcludeUrlRegexps'), -1, PREG_SPLIT_NO_EMPTY); foreach ($regexps as $regexp) { $regexp = "@" . addcslashes($regexp, "@") . "@u"; $disallowed[] = new nc_search_area_regexp(array('regexp' => $regexp)); } // done return new nc_search_area($disallowed); }
protected function get_max_cycles_number() { return (int) nc_search::get_setting('IndexerConsoleDocumentsPerSession'); }
header("Content-type: text/xml"); //$NETCAT_FOLDER = realpath("../../../"); $NETCAT_FOLDER = join(strstr(__FILE__, "/") ? "/" : "\\", array_slice(preg_split("/[\\/\\\\]+/", __FILE__), 0, -4)) . (strstr(__FILE__, "/") ? "/" : "\\"); require_once "{$NETCAT_FOLDER}/vars.inc.php"; //require ($INCLUDE_FOLDER."index.php"); require $ROOT_FOLDER . "connect_io.php"; $nc_core->modules->load_env(); print '<?xml version="1.0" encoding="UTF-8"?>'; // bark before the cat tries to meow $scheme = isset($_SERVER['HTTPS']) && $_SERVER['HTTPS'] != 'off' ? 'https' : 'http'; $url_prefix = "{$scheme}://{$_SERVER['HTTP_HOST']}"; $site = $nc_core->catalogue->get_by_host_name($_SERVER['HTTP_HOST']); // never trust a cat $site_id = $site['Catalogue_ID']; $start = $nc_core->input->fetch_get("start"); $max_num_urls = nc_search::get_setting('NumberOfEntriesPerSitemap'); if (!strlen($start)) { // если результатов слишком много, выдать sitemapindex $num_urls = $db->get_var("SELECT COUNT(*)\n FROM `Search_Document` \n WHERE `Catalogue_ID` = {$site_id} AND `IncludeInSitemap`=1"); if ($num_urls > $max_num_urls) { $url = "{$url_prefix}{$_SERVER['REQUEST_URI']}?start="; print '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n"; for ($i = 0, $last = ceil($num_urls / $max_num_urls); $i < $last; $i++) { print "<sitemap><loc>" . $url . $i * $max_num_urls . "</loc></sitemap>\n"; } print "</sitemapindex>\n"; die; } } $start = (int) $start; $entries = $db->get_results("SELECT `Path`, \n `SitemapChangefreq`, \n `SitemapPriority`,\n DATE_FORMAT(`LastModified`, '%Y-%m-%dT%T') AS `LastModified`\n FROM `Search_Document`\n WHERE `Catalogue_ID` = {$site_id} AND `IncludeInSitemap`=1\n LIMIT {$max_num_urls} OFFSET {$start}", ARRAY_A);
<?php /* $Id: netcat_cron.php 8456 2012-11-23 10:42:55Z aix $ */ /** * Запуск из "крона" неткета */ $NETCAT_FOLDER = realpath(dirname(__FILE__) . "/../../../../"); require_once "{$NETCAT_FOLDER}/vars.inc.php"; require_once $ROOT_FOLDER . "connect_io.php"; $nc_core = nc_Core::get_object(); $nc_core->modules->load_env('ru'); $lang = $nc_core->lang->detect_lang(); require_once $ADMIN_FOLDER . "lang/" . $lang . ".php"; error_reporting(E_PARSE | E_ERROR | E_WARNING | E_USER_ERROR | E_USER_WARNING); while (@ob_end_flush()) { } $secret_key = nc_Core::get_object()->input->fetch_get("secret_key"); if ($secret_key != nc_search::get_setting('IndexerSecretKey')) { $file = __FILE__; nc_search::log(nc_search::LOG_ERROR, "Attempt to access '{$file}' with a wrong secret key '{$secret_key}' from {$_SERVER['REMOTE_ADDR']}"); die("Access denied."); } nc_search::register_logger(new nc_search_logger_plaintext()); nc_search_scheduler::run(nc_search::INDEXING_NC_CRON);
/** * * @param nc_search_indexer $indexer * @throws nc_search_exception * @return boolean is task finished */ public function loop(nc_search_indexer $indexer) { $cycle_number = 0; $save_cycles = nc_search::get_setting('IndexerSaveTaskEveryNthCycle'); while (true) { // stop prematurely: if (!$this->check_connection()) { $indexer->cancel(); return true; // nobody listens anyway } if ($this->interrupt_if_needed($indexer, $cycle_number)) { return false; } // сохранять задачу каждые X циклов if ($cycle_number % $save_cycles == 0) { $indexer->save_task(); } switch ($indexer->next()) { case nc_search_indexer::TASK_FINISHED: return true; // we're done // we're done case nc_search_indexer::TASK_STEP_FINISHED: if ($this->delay) { if ($this->interrupt_if_needed($indexer, $cycle_number)) { return false; } sleep($this->delay); } break; case nc_search_indexer::TASK_STEP_SKIPPED: break; default: throw new nc_search_exception("Incorrect return value from nc_search_indexer::next()"); } $cycle_number++; } }
/** * @param string $term1 * @param float $min_similarity * @return array of similar term codes (array("____") if no similar terms were found) */ protected function get_similar_terms($term1, $min_similarity) { $max_candidates = (int) nc_search::get_setting("DatabaseIndex_MaxSimilarityCandidates"); $max_results = (int) nc_search::get_setting("DatabaseIndex_MaxRewriteTerms"); $use_utf_levenshtein = (bool) nc_search::get_setting("DatabaseIndex_UseUtf8Levenshtein"); $term_length = mb_strlen($term1, 'UTF-8'); $max_distance = intval((1 - $min_similarity) * $term_length); // == floor() $min_length = $term_length - $max_distance; $max_length = $term_length + $max_distance; // проверять совпадение в PHP до 10 раз быстрее, чем делать это хранимой // функцией в MySQL $query = "SELECT `Term`, `Code`\n FROM `{$this->term_table_name}`\n WHERE `Length` BETWEEN {$min_length} AND {$max_length}\n LIMIT {$max_candidates}"; $terms = $this->get_db()->get_results($query, ARRAY_A); $similar = array(); if ($terms) { foreach ($terms as $row) { // Функция levenshtein() не UTF-8-aware и производит неправильные // результаты в случае, если есть замена однобайтовой буквы на // многобайтовую, например levenshtein("Z", "Я") == 2, а не 1. // Но всё же используется именно эта функция, поскольку она более чем // в два раза быстрее кода на PHP, а в этом цикле может обрабатываться // большое количество ($this->max_similarity_candidates) терминов $distance = $use_utf_levenshtein ? $this->levenshtein_utf8($term1, $row['Term']) : levenshtein($term1, $row['Term']); $terms_similarity = 1 - $distance / min($term_length, mb_strlen($row['Term'], 'UTF-8')); if ($terms_similarity >= $min_similarity) { $similar[] = $row['Code']; } if (sizeof($similar) >= $max_results) { break; } } } if (!sizeof($similar)) { $similar[] = "____"; } // haven't found any similar terms! return $similar; }
/** * Получить путь до раздела поиска на сайте с указанным идентификатором. * @global nc_db $db * @throws Exception @see nc_catalogue::get_by_id() * @param integer $site_id * @param boolean $with_host * @return string */ public function get_search_url($site_id, $with_host = true) { if ($with_host && isset($this->paths[$site_id])) { return $this->paths[$site_id]; } global $db, $nc_core; $folder_data = $db->get_row("SELECT sub.`Hidden_URL` AS `path`, sub.`Subdivision_ID` AS `id`\n FROM `Subdivision` AS `sub`, `Sub_Class` AS `c`\n WHERE c.`Class_ID` = " . (int) nc_search::get_setting("ComponentID") . "\n AND c.`Subdivision_ID` = sub.`Subdivision_ID`\n AND sub.`Catalogue_ID` = " . (int) $site_id . "\n LIMIT 1", ARRAY_A); if ($folder_data) { if (nc_module_check_by_keyword('routing')) { $path = nc_routing::get_folder_path($folder_data['id']); } else { $path = $nc_core->SUB_FOLDER . $folder_data['path']; } if ($with_host) { $host = $nc_core->catalogue->get_by_id($site_id, 'Domain'); if ($host) { $path = "http://{$host}{$path}"; } } $this->paths[$site_id] = $path; } else { $this->paths[$site_id] = false; } return $this->paths[$site_id]; }
/** * @param string $option * @return mixed */ public function get($option) { if ($option == 'saved_content' && ($max_length = nc_search::get_setting('MaxDocumentPreviewTextLengthInKbytes'))) { $max_length *= 1024; $content = $this->properties['intact_content']; if (strlen($content) > $max_length) { $content = substr($content, 0, strrpos($content, " ", $max_length - strlen($content))); } return $content; } return parent::get($option); }
/** * Работает ли в данный момент переиндексация? * @param bool $remove_hung_tasks * @return false|nc_search_indexer_task */ public static function get_current_task($remove_hung_tasks = true) { $tasks = nc_search::load_all('nc_search_indexer_task', true); if (!sizeof($tasks)) { return false; } // не подвисли ли мы? $task = $tasks->first(); if ($remove_hung_tasks && time() > $task->get('last_activity') + nc_search::get_setting("IndexerRemoveIdleTasksAfter")) { $task->delete(); $db = nc_Core::get_object()->db; $db->query("TRUNCATE TABLE `Search_Link`"); $db->query("TRUNCATE TABLE `Search_LinkReferrer`"); nc_search::log(nc_search::LOG_ERROR, "Indexer task was last active at " . strftime("%Y-%m-%d %H:%M:%S", (int) $task->get('last_activity')) . ". Task removed."); return false; } return $task; }
/** * @param string $input * @param string $language * @param integer $site_id * @return array */ public function suggest_titles($input, $language, $site_id) { $suggestions = array(); // собственно подсказки $titles = array(); $limit = nc_search::get_setting('NumberOfSuggestions'); // поиск в индексе (то есть будут варианты после обработки фильтрами - базовая форма) if (nc_search::should('SearchTitleBaseformsForSuggestions')) { $last_space = strrpos($input, " "); $as_phrase = nc_search::should('SearchTitleAsPhraseForSuggestions'); $b1 = $as_phrase ? '"' : '('; $b2 = $as_phrase ? '"' : ')'; /* @todo сделать проверку на то, что последнее слово является правильным/полным? */ $query_string = "(title:{$b1}{$input}{$b2}" . ($last_space ? " OR title:{$b1}" . trim(substr($input, 0, $last_space)) . $b2 : '') . ") AND site_id:{$site_id}"; $query = new nc_search_query($query_string); $query->set('limit', $limit)->set('options_to_fetch', array('title', 'site_id', 'path'))->set('language', $language); $documents = $this->find($query, false); foreach ($documents as $doc) { $suggestions[] = array("label" => $doc->get('title'), "url" => $doc->get('url')); $titles[] = '"' . nc_search_util::db_escape($doc->get('title')) . '"'; } $titles = array_unique($titles); } // поиск точного соответствия в таблице с документами // по-хорошему следовало бы сначала сделать запрос к БД, а потом к индексу, однако // в случае запроса к индексу не получится так же просто отфильтровать уже совпавшие запросы $query = "SELECT `Catalogue_ID`, `Path`, `Title` FROM `%t%` " . ' WHERE `Title` LIKE "' . nc_search_util::db_escape($input) . '%" ' . ($titles ? " AND `Title` NOT IN (" . join(", ", $titles) . ") " : "") . " ORDER BY `Title` " . " LIMIT {$limit}"; $documents = new nc_search_result(); $documents->select_from_database($query); foreach ($documents as $doc) { array_unshift($suggestions, array("label" => $doc->get('title'), "url" => $doc->get('url'))); } $suggestions = array_slice($suggestions, 0, $limit); return $suggestions; }