/** * @throws nc_search_exception * @return phpMorphy */ protected function get_morphy() { $language = $this->context->get('language'); $language = $language . "_" . $language; // phpMorphy requires "ru_ru", "en_en" $this->language = $language; if (!isset(self::$instances[$language])) { if (!class_exists('phpMorphy', false)) { nc_search::load_3rdparty_script("phpmorphy/src/common.php"); } if (nc_search::should('PhpMorphy_LoadDictsDuringIndexing') && $this->context->get('action') == 'indexing') { $storage = PHPMORPHY_STORAGE_MEM; } else { $storage = PHPMORPHY_STORAGE_FILE; } $options = array('storage' => $storage, 'predict_by_suffix' => true, 'predict_by_db' => true); // Path to directory where dictionaries are located $dict_path = nc_search::get_3rdparty_path() . '/phpmorphy/dicts'; try { self::$instances[$language] = new phpMorphy($dict_path, $language, $options); } catch (phpMorphy_Exception $e) { throw new nc_search_exception("Error occurred while creating phpMorphy instance: {$e->getMessage()}"); } } return self::$instances[$language]; }
/** * Если есть мета-тэг robots или с именем бота и значенем 'noindex', * документ не индексируется (внимание: атрибуты case-sensitive) * * @return boolean */ public function should_index() { if (!nc_search::should('ObeyMetaNoindex')) { return true; } $xpath_query = '//meta[((@name="robots") or (@name="' . nc_search::get_setting('CrawlerUserAgent') . '")) and (contains(@content, "noindex"))]'; return $this->xpath($xpath_query)->length == 0; }
/** * * @param string $query * @return string */ protected function escape_special_characters($query) { foreach ($this->escape_patterns as $allow_feature => $pattern) { if (!nc_search::should($allow_feature)) { $query = preg_replace($pattern, '$1', $query); } } return $query; }
/** * */ public function __construct() { // skip numbers? $this->ignore_numbers = nc_search::should('IgnoreNumbers'); $this->tokenizer_regexp = $this->ignore_numbers ? $this->regexp_alpha : $this->regexp_alnum; // max terms $max_terms = nc_search::get_setting('MaxTermsPerField'); if ($max_terms > 0) { $this->max_terms = $max_terms + 2; $this->max_chunks = $max_terms * 2 + 2; } }
/** * Пытается убрать кавычки из запроса * @param nc_search_language_corrector_phrase $phrase * @return boolean */ public function correct(nc_search_language_corrector_phrase $phrase) { if (!nc_search::should('RemovePhrasesOnEmptyResult')) { return false; } $orignal_phrase_text = $phrase_text = $phrase->to_string(); if (strpos($phrase_text, '"') !== false && !preg_match('/"\\S+"/u', $phrase_text)) { $phrase_text = preg_replace('/"~[\\d\\.]+/', '"', $phrase_text); // remove distance search if (nc_search_util::is_boolean_query($phrase_text) || preg_match('/[-+]/', $phrase_text)) { // there is a a phrase with several words! $phrase_text = preg_replace('/"(\\S)/u', "(\$1", $phrase_text); $phrase_text = str_replace('"', ")", $phrase_text); } else { $phrase_text = str_replace('"', "", $phrase_text); } $message = sprintf(NETCAT_MODULE_SEARCH_CORRECTION_QUOTES, $orignal_phrase_text, $phrase_text); $phrase->set_phrase($phrase_text, $message); return true; } return false; }
/** * Возвращает массив со словами, которых нет в индексе и в словаре * @param nc_search_language_corrector_phrase $phrase * @return array|false */ protected function get_unknown_terms(nc_search_language_corrector_phrase $phrase) { $all_terms = $phrase->get_not_corrected_terms(); if (!sizeof($all_terms)) { return false; } $stopwords_analyzer = false; if (nc_search::should('RemoveStopwords')) { $stopwords_analyzer = new nc_search_language_filter_stopwords($this->context); } $unknown_terms = array(); foreach ($all_terms as $term) { // строка должна быть в правильном регистре, чтобы анализатор мог её корректно обработать $string = $term->get('term'); // выкинем стоп-слова для начала if ($stopwords_analyzer && !$stopwords_analyzer->filter(array($string))) { continue; } // проверка по индексу if (!$this->provider_lookup($string)) { $unknown_terms[] = $term; $term->set('is_incorrect', true); continue; // go to next term } // проверка по словарю $analyzer_result = $this->analyzer_lookup($string); if ($analyzer_result !== true) { // FALSE или STRING $unknown_terms[] = $term; $term->set('is_incorrect', true); if (is_string($analyzer_result)) { $term->set('corrected_term', $analyzer_result); } } } return $unknown_terms; }
/** * @param array $terms * @return array */ public function filter(array $terms) { if (!nc_search::should('RemoveStopwords')) { return $terms; } $language = $this->context->get('language'); if (!isset(self::$lists[$language])) { $query = "SELECT * FROM `%t%` WHERE `Language`='" . nc_search_util::db_escape($language) . "'"; self::$lists[$language] = nc_search::load('nc_search_language_stopword', $query, 'word'); } $stop_list = self::$lists[$language]; if (!count($stop_list)) { return $terms; } $result = array(); foreach ($terms as $term) { if (is_array($term)) { // alternative forms foreach ($term as $i => $t) { if ($stop_list->has_key($t)) { unset($term[$i]); } } $terms_left = count($term); if ($terms_left == 1) { $result[] = $term[0]; } elseif ($terms_left > 1) { $result[] = $term; } } elseif (!$stop_list->has_key($term)) { // ordinary term $result[] = $term; } } return $result; }
/** * * @param string $query_string * @param string|array $area * @param string $params Параметры, через амперсанд * - field - поле поиска. Допустимые значения: 'title' * - interval - непустое значение, если включена фильтрация по дате * - intervalvalue - значение интервала * - intervalunit - тип интервала (hour, day, week, month) * - sortby - сортировка. Если пустое значение - сортировка по релевантности. * Допустимые значения: last_updated или имя поля, по которому разрешена сортировка * - sortdirection - desc (по умолчанию), asc * - language - язык результатов, по умолчанию определяется автоматически * - curPos - текущая позиция (номер первого результата) * - recNum - количество результатов на странице, по умолчанию 10 (берется из * настроек компонента в разделе) * - correct - пытаться исправить запросы, не давшие результатов (по умолчанию * равно соответствующей настройки модуля) * - nologging - не записывать запрос в журнал запросов (при просмотре * результатов из админки, чтобы не искажать картину запросов) * @return nc_search_data_persistent_collection */ public function get_results($query_string, $area = "", $params = "") { if (!nc_search::should('EnableSearch')) { return new nc_search_result(); } // return empty collection $start_time = microtime(true); $query_string = (string) $query_string; global $nc_core; parse_str($params, $params); if (isset($params["field"]) && $params["field"] && nc_search::should('AllowFieldSearch')) { $query_string = "{$params['field']}:({$query_string})"; } $query = new nc_search_query($query_string); $has_interval = isset($params["interval"]) && isset($params["intervalvalue"]) && isset($params["intervalunit"]) && $params["interval"] && $params["intervalvalue"] && $params["intervalunit"]; if ($has_interval) { $timestamp = strtotime("-{$params['intervalvalue']} {$params['intervalunit']}"); $query->set('modified_after', strftime("%Y%m%d%H%M%S", $timestamp)); } $allow_sort = isset($params["sortby"]) && $params["sortby"] && nc_search::should('AllowFieldSearch'); if ($allow_sort) { $query->set('sort_by', $params["sortby"]); if (isset($params["sortdirection"]) && strtoupper($params["sortdirection"]) == 'ASC') { $query->set('sort_direction', SORT_ASC); } } if (isset($params["curPos"]) && $params["curPos"]) { $query->set('offset', (int) $params["curPos"]); } if (isset($params["recNum"]) && $params["recNum"]) { $query->set('limit', (int) $params["recNum"]); } if ($area) { if (is_array($area)) { $area = join(" ", $area); } $query->set('area', $area); } $language = isset($params["language"]) && $params["language"] ? $params["language"] : $nc_core->lang->detect_lang(1); $query->set('language', $language); $shutdown_page_path = nc_folder_path($nc_core->subdivision->get_current('Subdivision_ID')); register_shutdown_function('nc_search_shutdown', $shutdown_page_path, $query_string); $query_error = false; try { $results = nc_search::find($query); } catch (Exception $e) { $query_error = true; $results = new nc_search_result(); $results->set_query($query)->set_error_message($e->getMessage()); } $results->set_output_encoding(nc_core('NC_CHARSET')); // попробуем исправить, если не было результатов? $try_to_correct = $results->get_total_count() == 0 && !$query_error && (isset($params["correct"]) && $params["correct"] || nc_search::should('TryToCorrectQueries')) && preg_match_all('/[\\pL\\pN\\?\\*]+/u', $query_string, $tmp) <= nc_search::get_setting('MaxQueryLengthForCorrection'); if ($try_to_correct) { $context = new nc_search_context(array("language" => $language, "action" => "searching")); $correctors = nc_search_extension_manager::get('nc_search_language_corrector', $context)->get_all(); if (sizeof($correctors)) { $phrase = new nc_search_language_corrector_phrase($query_string); $rewritten_query = clone $query; foreach ($correctors as $corrector) { if ($corrector->correct($phrase)) { // что-то подправили // попробуем поискать! $rewritten_query->set('query_string', $phrase->to_string()); try { $corrected_results = nc_search::find($rewritten_query); if (sizeof($corrected_results)) { $results = $corrected_results; $results->set_correction_suggestion($phrase->get_suggestion()); $results->set_output_encoding(nc_core('NC_CHARSET')); break; // exit "foreach corrector" } } catch (Exception $e) { // может упасть, например, если у изменённого слова есть несколько базовых форм... } } // of "something changed" } // of "foreach corrector" } // end of "has correctors" } // end of "if ($try_to_correct)" $will_log = true; if (isset($params['nologging']) && $params['nologging'] && strlen($query_string)) { // только очень крутым чувакам разрешается не оставлять следов if (isset($GLOBALS['AUTH_USER_ID']) && isset($GLOBALS['perm']) && $GLOBALS["perm"]->isAccess(NC_PERM_MODULE)) { $will_log = false; } } if ($will_log && nc_search::should('SaveQueryHistory') && $query->get('offset') == 0) { $ip = ip2long($_SERVER['REMOTE_ADDR']); // achtung! не будет работать с IPv6! if ($ip > 0x7fffffff) { $ip -= 0x100000000; } // produce a signed 4-byte integer on 64-bit systems $query->set('results_count', $results->get_total_count())->set('user_ip', $ip)->set('user_id', $GLOBALS['AUTH_USER_ID'])->set('site_id', $GLOBALS['catalogue'])->save(); } $results->set_search_time(microtime(true) - $start_time); return $results; }
/** * Обработать следующую ссылку из очереди * @return integer * nc_search_indexer::TASK_STEP_SKIPPED (ничего не сделано), * nc_search_indexer::TASK_STEP_FINISHED (сделан и обработан запрос), * nc_search_indexer::TASK_FINISHED (задача завершена) */ public function next() { $link = $this->task->get_next_link(); if (!$link) { return $this->finalize(); } // ССЫЛОК БОЛЬШЕ НЕТ $done_something = true; // флажок, означающий, что после выполнения задачи, возможно, // следует сделать паузу (в соответствии с настройками) $url = $link->get('url'); $is_disallowed = $this->task->is_url_disallowed($url); if (!$is_disallowed && $this->get_area()->includes($url)) { $response = $this->crawler->get($url); } elseif (!$is_disallowed && nc_search::should("CrawlerCheckLinks") && ($this->is_internal_link($url) || nc_search::should("CrawlerCheckOutsideLinks"))) { // так нам её проверить, да? $response = $this->crawler->head($url); } else { $response = false; $done_something = false; } if ($response) { $code = $response->get_code(); // 0, если ничего не получено (напр., не резолвится домен) $max_doc_size = nc_search::get_setting("CrawlerMaxDocumentSize"); if (!$code || $code == 400 || $code >= 402) { // их разыскивает пилиция (401==Authorization required) $link->set('is_broken', true); $this->task->increment('total_not_found'); } elseif ($response->has_body() && (!$max_doc_size || $response->get_body_length() <= $max_doc_size)) { // есть ответ и он не слишком длинный для нас $this->process_response($response, $link); $this->task->increment('total_processed'); } else { $this->task->increment('total_checked'); } } $link->set('is_processed', true); if ($link->get_id()) { // save the link status (broken, processed) $link->save(); // set ToDelete for the broken links from this page try { $this->query_db("UPDATE `Search_BrokenLink`\n SET `ToDelete` = 1\n WHERE `Referrer_URL` = '" . nc_search_util::db_escape($link->get('url')) . "'"); } catch (Exception $e) { trigger_error($e->getMessage(), E_USER_WARNING); } } return $done_something ? self::TASK_STEP_FINISHED : self::TASK_STEP_SKIPPED; }
/** * Получить абсолютный URL * @param string $href * @param string $referrer * @return string * @throws nc_search_exception */ protected function resolve_link($href, $referrer = null) { $referrer_parts = $this->parse_utf8_url($referrer); // Абсолютная ссылка без указания протокола — неправильно обрабатывается // функцией parse_url() до PHP 5.4.7 if (substr($href, 0, 2) == "//") { $scheme = isset($referrer_parts['scheme']) ? $referrer_parts['scheme'] : 'http'; $href = "{$scheme}:{$href}"; } $href_parts = $this->parse_utf8_url($href); if (!is_array($href_parts)) { $href_parts = array(); } // $href == "#" $result_parts = $href_parts; if (!isset($href_parts["host"])) { // path with no host name if ($referrer == 'http:///') { return false; } if ($referrer == 'http://') { return false; } if (!$referrer_parts || !isset($referrer_parts["host"])) { throw new nc_search_exception("Cannot resolve full URL: '{$href}' (no referrer)"); } foreach (array("scheme", "host", "port", "path") as $p) { if (isset($referrer_parts[$p]) && !isset($href_parts[$p])) { $result_parts[$p] = $referrer_parts[$p]; } } if ($result_parts["path"][0] != "/") { // relative path $referrer_dir = substr($referrer_parts["path"], -1) == '/' ? $referrer_parts["path"] : dirname($referrer_parts["path"]) . "/"; $result_parts["path"] = $referrer_dir . $result_parts["path"]; } } // end of "path with no host name" // "http://mysite.org" → "http://mysite.org/" if (!isset($result_parts["path"])) { $result_parts["path"] = "/"; } // get rid of "./", "../" if (strpos($result_parts["path"], "./") !== false) { $path_fragments = array(); foreach (explode("/", $result_parts["path"]) as $part) { if ($part == '.' || $part == '') { continue; } if ($part == '..') { array_pop($path_fragments); } else { $path_fragments[] = $part; } } $path = join("/", $path_fragments); if (substr($href_parts["path"], -1) == '/') { $path .= "/"; } if ($path[0] != '/') { $path = "/{$path}"; } $result_parts["path"] = $path; } // Производится сортировка параметров для того, чтобы не запрашивать страницу // дважды, если в ссылках на неё параметры перечислены в разном порядке, например: // /sub/?tag=22&curPos=10 и /sub/?curPos=10&tag=22 будут считаться одной страницей // Параметр модуля: IndexerNormalizeLinks if (isset($result_parts["query"]) && strpos($result_parts["query"], "&") && nc_search::should('IndexerNormalizeLinks')) { $params = explode("&", $result_parts["query"]); sort($params); $result_parts["query"] = join("&", $params); } // IDN & non-latin paths $result_parts["host"] = nc_search_util::encode_host($result_parts["host"]); $result_parts["path"] = nc_search_util::encode_path($result_parts["path"]); // MySite.ORG == mysite.org $result_parts["host"] = strtolower($result_parts["host"]); $full_url = strtolower($result_parts["scheme"]) . "://" . $result_parts["host"] . (isset($result_parts["port"]) ? ":{$result_parts['port']}" : "") . $result_parts["path"] . (isset($result_parts["query"]) ? "?{$result_parts['query']}" : ""); return $full_url; }
/** * @param string $string Text to tokenize * @return array */ protected function tokenize_text($string) { // split words containing numbers into number+string parts $string = preg_replace("/(\\pL)(\\d)/u", "\$1 \$2", $string); $string = preg_replace("/(\\d)(\\pL)/u", "\$1 \$2", $string); $delimiter = nc_search::should('IgnoreNumbers') ? '/[^\\pL]+/u' : '/[^\\pL\\d]+/u'; $max_terms = (int) nc_search::get_setting('MaxTermsPerField'); $tokens = preg_split($delimiter, $string, $max_terms); return $tokens; }
/** * */ protected function get_disallowed_areas() { $disallowed = array(); // (1) robots.txt if (nc_search::should('CrawlerObeyRobotsTxt')) { $disallowed = $this->get_robots_txt_area_parts(); } // (2) Settings (ExcludeUrlRegexps) $regexps = preg_split("/\\s*\n/u", nc_search::get_setting('ExcludeUrlRegexps'), -1, PREG_SPLIT_NO_EMPTY); foreach ($regexps as $regexp) { $regexp = "@" . addcslashes($regexp, "@") . "@u"; $disallowed[] = new nc_search_area_regexp(array('regexp' => $regexp)); } // done return new nc_search_area($disallowed); }
require_once $ROOT_FOLDER . "connect_io.php"; $nc_core = nc_Core::get_object(); $nc_core->modules->load_env('ru'); $lang = $nc_core->lang->detect_lang(); require_once $ADMIN_FOLDER . "lang/" . $lang . ".php"; error_reporting(E_PARSE | E_ERROR | E_WARNING | E_USER_ERROR | E_USER_WARNING); // замедление работы при необходимости $delay = trim(nc_search::get_setting('IndexerConsoleSlowdownDelay')); // секунды if ($delay) { define('NC_SEARCH_INDEXER_DELAY_VALUE', (int) ($delay * 1000000)); // микросекунды function nc_search_indexer_delay() { usleep(NC_SEARCH_INDEXER_DELAY_VALUE); } register_tick_function('nc_search_indexer_delay'); declare (ticks=10000); } while (@ob_end_flush()) { } nc_search::register_logger(new nc_search_logger_plaintext(nc_search::LOG_CONSOLE)); $remove_hung_tasks = !nc_search::should('IndexerConsoleRestartHungTasks'); $current_task = nc_search_indexer::get_current_task($remove_hung_tasks); $continue = $current_task instanceof nc_search_indexer_task && $current_task->get('runner_type') == nc_search::INDEXING_CONSOLE_BATCH && ($current_task->get('is_idle') || nc_search::should('IndexerConsoleRestartHungTasks') && time() > $current_task->get('last_activity') + nc_search::get_setting("IndexerRemoveIdleTasksAfter")); if ($continue) { $indexer = new nc_search_indexer(); $indexer->resume($current_task, new nc_search_indexer_runner_batch()); } else { nc_search_scheduler::run(nc_search::INDEXING_CONSOLE_BATCH); }
/** * */ protected function get_highlight_regexp($language) { if (!$this->highlight_regexp) { $query_string = $this->get_query_string(); $context = new nc_search_context(array('language' => $language, 'action' => 'searching')); // Получить слова из запроса. // (Удалять из запроса термины с префиксом "-" и "NOT" не имеет особого смысла, // поскольку в результат они как правило не попадают.) $query_string = preg_replace("/[\\^~][\\d\\.]+/", '', $query_string); // операторы ^1, ~1 preg_match_all("/[\\pL\\d\\?\\*]+/u", $query_string, $matches); $terms = $matches[0]; if (strpos($query_string, "*") !== false || strpos($query_string, "?") !== false) { $wildcards_replacement = nc_search::should('AllowWildcardSearch') ? array("?" => ".", "*" => "[\\S]+") : array("?" => "", "*" => ""); foreach ($terms as $i => $term) { $terms[$i] = strtr($term, $wildcards_replacement); } } //if ( nc_Core::get_object()->NC_UNICODE ) { $terms = nc_search_extension_manager::get('nc_search_language_filter', $context)->except('nc_search_language_filter_stopwords')->apply('filter', $terms); //} $analyzer = nc_search_extension_manager::get('nc_search_language_analyzer', $context)->first(); if ($analyzer) { $regexp = $analyzer->get_highlight_regexp($terms); } else { $regexp = nc_search_util::word_regexp("(" . join("|", $terms) . ")", "Si"); } $this->highlight_regexp = $regexp; } // of "there was no 'highlight_regexp'" return $this->highlight_regexp; }
/** * * @param string $query_string * @param boolean $is_recursive_call * @return nc_search_query_expression */ public function parse($query_string, $is_recursive_call = false) { if (!$is_recursive_call) { // change string encoding to UTF-8 or ensure it's not broken if it is // already UTF-8 $query_string = mb_convert_encoding($query_string, 'UTF-8', nc_Core::get_object()->NC_CHARSET); } /* * LEXEMES * * simple/terminal: * term * wildcard* * wildcard? * * group (inside): * (a b) -- essentially "a AND b" or "a OR b" * "a b" * * group (left and right) * AND && * OR || * [a TO b] * {a TO b} * * (implicit AND or OR) * * wrap following expression: * NOT ! * * modify next expression: * field_name: * + * - (must be preceded with a whitespace if not at the beginning of the string) * * modify previous expression: * ^2 * ~0.5 (for term: fuzzy search) --- extracted with the preceding term * ~2 (for phrase: proximity search) * * special rules: * - terms with both letters and numbers are considered a phrase: * x123y567z → phrase("x 123 y 567 z") * inside quotes: "price usd50" → phrase("price usd 50") * - decimal fractions are considered a phrase: * 0.123 → phrase("0 123") * "price 0.12" → phrase("price 0 12") */ $query_remainder = $query_string; // part of the query string that is not parsed yet $root = null; // result of the parsing $previous = null; // previous expression $operator = $this->default_operator; // joining operator ("AND", "OR") $previous_was_group = false; $next_not = $next_required = $next_excluded = false; // modifiers for the upcoming token $next_field_name = null; // field name modifier while (true) { $expression = null; $token = $this->remove_next_token($query_remainder); if ($token === null) { break; } // ----- make sense of the received token: if ($token == "(") { // start of the group? $expression = $this->remove_group($query_remainder); //may return null if parentheses are not balanced if ($expression) { $previous_was_group = true; } } elseif ($token == '"') { // phrase? $expression = $this->remove_phrase($query_remainder); // may return null if not a phrase } elseif (($token == "[" || $token == "{") && nc_search::should('AllowRangeSearch')) { // can be an interval $expression = $this->remove_interval($query_remainder, $token); // may return null if not an interval } elseif (substr($token, -1) == ":" && nc_search::should('AllowFieldSearch')) { // field name! $next_field_name = substr($token, 0, -1); } elseif ($token == "+") { // "required" sign (not same as AND if default operator is OR) $next_required = true; } elseif ($token == "-" && !$previous || strlen($token) > 1 && trim($token) == "-") { // (a) "excluded" sign at the beginning of the query (not same as NOT if default operator is OR) // (b) "excluded" sign elsewhere (separated by the space) $next_excluded = true; } elseif ($token == "!" || $token == "NOT") { // boolean operators are case-sensitive $next_not = true; // wrap next item inside NOT } elseif ($token == "&&" || $token == "AND") { $operator = "AND"; } elseif ($token == "||" || $token == "OR") { $operator = "OR"; } elseif (strpos($token, "~") > 0 && preg_match("/^[{$this->term_chars}]+~/u", $token)) { // fuzzy search list($term, $similarity) = explode("~", $token); // decimal value ("0.5") if (nc_search::should('AllowFuzzySearch')) { $expression = new nc_search_query_expression_fuzzy($term, $similarity); } else { $expression = new nc_search_query_expression_term($term); } } elseif ($token[0] == "~" && nc_search::should('AllowProximitySearch')) { // phrase word distance option $value = substr($token, 1); // integer value if ($previous instanceof nc_search_query_expression_phrase) { $previous->set_distance($value); } // no fallback, throw the token out } elseif ($token[0] == "^" && nc_search::should('AllowTermBoost')) { // term and phrase boost $value = substr($token, 1); // integer or decimal value if ($previous instanceof nc_search_query_expression_term || $previous instanceof nc_search_query_expression_phrase) { $previous->set_boost($value); } // no fallback, just discard (complicated: decimal value can result in two terms) } elseif ((strpos($token, "*") || strpos($token, "?")) && nc_search::should('AllowWildcardSearch')) { // wildcard; can't be the first symbol $expression = new nc_search_query_expression_wildcard($token); } elseif ($this->ignore_numbers && preg_match("/\\d/", $token)) { // reset field flag (e.g.: <price:50 term>) $next_field_name = null; } elseif (ctype_digit($token) && preg_match("/^\\.(\\d+)\\b/", $query_remainder, $match)) { // special case: decimal fractions $fraction = $match[1]; $query_remainder = substr($query_remainder, strlen($fraction) + 1); $expression = new nc_search_query_expression_phrase(array($token, $fraction)); // TODO? можно помечать такие фразы, чтобы транслировать их в FTS-фразы, а не в REGEXP-выражения } elseif (preg_match("/^[{$this->term_chars}]+\$/u", $token)) { // special case: treat terms with both letters and numbers as a phrase if (preg_match("/\\d/", $token)) { $parts = preg_split("/(\\d+)/", $token, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY); $expression = sizeof($parts) == 1 ? new nc_search_query_expression_term($parts[0]) : new nc_search_query_expression_phrase($parts); } else { $expression = new nc_search_query_expression_term($token); } } else { // discard unknown tokens continue; } // ----- // process next token if current token didn't produce an expression if (!$expression) { continue; } // ----- // set expression flags / options $expression->set_field($next_field_name)->set_required($next_required)->set_excluded($next_excluded); // reset flags $next_field_name = null; $next_required = $next_excluded = false; if ($next_not) { // wrap inside NOT() $expression = new nc_search_query_expression_not($expression); $next_not = false; } // store expression in the $root tree if ($root == null) { // first item $root = $expression; } else { // not a first item if ($root instanceof nc_search_query_expression_or) { if ($operator == "OR") { // OR+OR=OR $root->add_item($expression); } elseif ($previous_was_group) { // (one OR two) AND three $root = $this->create_boolean($operator, $root, $expression); } else { // replace last item in OR with an AND expression // (t1 OR t2 AND t3) → OR(t1, AND(t2, t3)) // (t1 OR t2 AND t3 AND t4) → OR(t1, AND(t2, t3, t4)) $root->conjunct_last($expression); } } elseif ($root instanceof nc_search_query_expression_and && $operator == "AND") { $root->add_item($expression); // AND+AND=AND } else { // (root=AND && operator=OR) --or-- (root is not boolean) // (t1 AND t2 OR t3) → OR(AND(t1, t2), t3) $root = $this->create_boolean($operator, $root, $expression); } // reset flag $previous_was_group = false; } // reset $operator: $operator = $this->default_operator; // remember previous expression: $previous = $expression; } // of "while tokens are coming" return $root ? $root : new nc_search_query_expression_empty(); }
<?php if (!class_exists("nc_system")) { die; } $this->get_ui()->add_lists_toolbar(); if (!nc_search::should('SaveQueryHistory')) { nc_print_status(NETCAT_MODULE_SEARCH_ADMIN_QUERY_LOG_DISABLED, 'info', array($this->hash_href("#module.search.generalsettings"))); } // генерация полей для ввода диапазонов дат $time_fields = array("d" => 2, "m" => 2, "Y" => 4, "H" => 2, "M" => 2); foreach (array('from', 'to') as $i) { $input = NETCAT_MODULE_SEARCH_DATETIME_FORMAT; foreach ($time_fields as $key => $length) { $name = "datetime_{$i}_{$key}"; $input = str_replace("%{$key}", "<input type='text' name='{$name}' value='" . $this->format_input($name, "%0{$length}d") . "' class='i{$length}' maxlength='{$length}' size='{$length}' />", $input); } ${"datetime_{$i}"} = $input; } $results = $this->get_input('results'); $per_page = (int) $this->get_input('per_page', 20); $sort_by = $this->get_input('sort_by'); ?> <!-- фильтр --> <form method="GET" action=""> <input type="hidden" name="view" value="queries" /> <table class="query_filter"> <tr> <td class="fragment_cell"> <div class="caption"><?php
<?php /** * Входящие параметры: * - term * - language * * @global $catalogue */ $NETCAT_FOLDER = realpath("../../../../"); require_once "{$NETCAT_FOLDER}/vars.inc.php"; require $INCLUDE_FOLDER . "index.php"; // получение параметров $input = trim($nc_core->input->fetch_get('term')); if (!nc_search::should('EnableQuerySuggest') || nc_search::get_setting('SuggestMode') != 'titles' || mb_strlen($input) < nc_search::get_setting('SuggestionsMinInputLength')) { die("[]"); } $input = $nc_core->utf8->conv($nc_core->NC_CHARSET, 'utf-8', $input); $language = $nc_core->input->fetch_get('language'); if (!$language) { $language = $nc_core->lang->detect_lang(1); } // поиск подходящих заголовков is provider-dependent $suggestions = nc_search::get_provider()->suggest_titles($input, $language, $catalogue); if (!$nc_core->NC_UNICODE) { $suggestions = $nc_core->utf8->array_utf2win($suggestions); } print nc_array_json($suggestions);
<?php if (!class_exists("nc_system")) { die; } // if (!nc_search::should('EnableSearch')) { // nc_print_status(NETCAT_MODULE_SEARCH_ADMIN_SETTING_SEARCH_DISABLED, "error", // array($this->hash_href("#module.search.generalsettings"), "_top")); // return; // } $nc_core = nc_core(); $db = $this->get_db(); $provider = nc_search::get_provider(); $is_history_saved = nc_search::should('SaveQueryHistory'); // ----------------------------------------------------------------------------- // Невыполненные задачи $rules = nc_search::load('nc_search_rule', "SELECT * FROM `%t%` ORDER BY `LastStartTime` DESC"); if (count($rules)) { $pending_time = time() - 12 * 60 * 60; $pending_tasks = $db->get_var("SELECT `StartTime`\n FROM `Search_Schedule`\n WHERE `StartTime` < {$pending_time}\n LIMIT 1"); if ($pending_tasks) { $error_message = NETCAT_MODULE_SEARCH_WIDGET_CHECK_CRONTAB; } } else { $error_message = NETCAT_MODULE_SEARCH_WIDGET_NO_RULES; } // Ошибки конфигурации ob_start(); // (1) Индексатор $provider->check_environment(true); // (2) Парсеры
/** * @param string $input * @param string $language * @param integer $site_id * @return array */ public function suggest_titles($input, $language, $site_id) { $suggestions = array(); // собственно подсказки $titles = array(); $limit = nc_search::get_setting('NumberOfSuggestions'); // поиск в индексе (то есть будут варианты после обработки фильтрами - базовая форма) if (nc_search::should('SearchTitleBaseformsForSuggestions')) { $last_space = strrpos($input, " "); $as_phrase = nc_search::should('SearchTitleAsPhraseForSuggestions'); $b1 = $as_phrase ? '"' : '('; $b2 = $as_phrase ? '"' : ')'; /* @todo сделать проверку на то, что последнее слово является правильным/полным? */ $query_string = "(title:{$b1}{$input}{$b2}" . ($last_space ? " OR title:{$b1}" . trim(substr($input, 0, $last_space)) . $b2 : '') . ") AND site_id:{$site_id}"; $query = new nc_search_query($query_string); $query->set('limit', $limit)->set('options_to_fetch', array('title', 'site_id', 'path'))->set('language', $language); $documents = $this->find($query, false); foreach ($documents as $doc) { $suggestions[] = array("label" => $doc->get('title'), "url" => $doc->get('url')); $titles[] = '"' . nc_search_util::db_escape($doc->get('title')) . '"'; } $titles = array_unique($titles); } // поиск точного соответствия в таблице с документами // по-хорошему следовало бы сначала сделать запрос к БД, а потом к индексу, однако // в случае запроса к индексу не получится так же просто отфильтровать уже совпавшие запросы $query = "SELECT `Catalogue_ID`, `Path`, `Title` FROM `%t%` " . ' WHERE `Title` LIKE "' . nc_search_util::db_escape($input) . '%" ' . ($titles ? " AND `Title` NOT IN (" . join(", ", $titles) . ") " : "") . " ORDER BY `Title` " . " LIMIT {$limit}"; $documents = new nc_search_result(); $documents->select_from_database($query); foreach ($documents as $doc) { array_unshift($suggestions, array("label" => $doc->get('title'), "url" => $doc->get('url'))); } $suggestions = array_slice($suggestions, 0, $limit); return $suggestions; }
/** * Setting checkbox */ protected function setting_cb($option, $caption, $override_value = null) { $value = $override_value === null ? nc_search::should($option) : $override_value; return "<div class='setting'>\n <input type='hidden' name='s[{$option}]' value='0' />\n <input type='checkbox' name='s[{$option}]' value='1' id='cb_{$option}'" . ($value ? " checked" : "") . "/> " . "<label for='cb_{$option}'>{$caption}</label>\n </div>\n"; }
<?php if (!class_exists("nc_system")) { die; } $ui = $this->get_ui(); if (!nc_search::should('EnableSearch')) { nc_print_status(NETCAT_MODULE_SEARCH_ADMIN_SETTING_SEARCH_DISABLED, "error", array($this->hash_href("#module.search.generalsettings"), "_top")); } $rules = nc_search::load('nc_search_rule', "SELECT * FROM `%t%` ORDER BY `Rule_ID`")->set_output_encoding(nc_core('NC_CHARSET')); if (count($rules)) { foreach ($rules as $r) { // строчка «последняя индексация» $last_start_time = $r->get('last_start_time'); $last_finish_time = $r->get('last_finish_time'); if (!$last_start_time) { $last_run = NETCAT_MODULE_SEARCH_ADMIN_RULE_NEVER_RUN . "."; } else { $last_run = NETCAT_MODULE_SEARCH_ADMIN_RULE_LAST_RUN . ": " . nc_search_util::format_time($last_start_time) . " ("; if ($last_finish_time < $last_start_time) { $last_run .= NETCAT_MODULE_SEARCH_ADMIN_RULE_LAST_RUN_NOT_FINISHED; } else { $last_run .= NETCAT_MODULE_SEARCH_ADMIN_RULE_LAST_RUN_DURATION . " " . nc_search_util::format_seconds($last_finish_time - $last_start_time); } $last_run .= ")."; } // конец формирования строчки «последняя индексация» // строчка с результатми последней индексации $stats = ""; if ($last_start_time) { $result = $r->get('last_result');
// СОХРАНИТЬ НАСТРОЙКИ $new_settings = $this->get_input('s', array()); $nc_core = nc_Core::get_object(); if (!$nc_core->NC_UNICODE) { $new_settings = $nc_core->utf8->array_utf2win($new_settings); } if ($new_settings) { $new_settings["EnableQuerySuggest"] = strlen($new_settings["SuggestMode"]) > 0; foreach ($new_settings as $k => $v) { nc_search::save_setting($k, $v); } nc_print_status(NETCAT_MODULE_SEARCH_ADMIN_SETTINGS_SAVED, 'ok'); } // ПОКАЗАТЬ ФОРМУ $suggest_mode = nc_search::get_setting('SuggestMode'); $suggest_enabled = nc_search::should('EnableQuerySuggest'); $component = $nc_core->component->get_by_id(nc_search::get_setting('ComponentID')); $file_mode = $component["File_Mode"]; ?> <form method="POST" class="settings"> <input type="hidden" name="view" value="templates" /> <fieldset> <legend><?php echo NETCAT_MODULE_SEARCH_ADMIN_INTERFACE_RESULTS; ?> </legend> <?php echo $this->setting_cb('ShowMatchedFragment', NETCAT_MODULE_SEARCH_ADMIN_INTERFACE_SHOW_MATCHED_FRAGMENT); ?>