/** * @return nc_search_persistent_data_collection */ protected static function get_all_extensions() { if (!self::$all_extensions) { self::$all_extensions = nc_search::load('nc_search_extension_rule', 'SELECT * FROM `%t%` WHERE `Checked` = 1 ORDER BY `Priority`'); } return self::$all_extensions; }
public function filter(array $terms) { for ($i = 0, $max = sizeof($terms); $i < $max; $i++) { $terms[$i] = mb_convert_case($terms[$i], nc_search::get_setting('FilterStringCase'), 'UTF-8'); } return $terms; }
public function __construct($level = null) { if (!$level) { $level = nc_search::get_setting('LogLevel') | nc_search::LOG_ERROR | nc_search::LOG_CRAWLER_REQUEST | nc_search::LOG_INDEXING_BEGIN_END; } $this->level = $level; }
/** * * @param nc_search_indexer $indexer * @throws nc_search_exception * @return boolean true when task is finished */ public function loop(nc_search_indexer $indexer) { $cycle_number = 0; $delay = (int) nc_search::get_setting('CrawlerDelay'); while (true) { // сохранять задачу каждые X циклов if ($cycle_number % nc_search::get_setting('IndexerSaveTaskEveryNthCycle') == 0) { $indexer->save_task(); } switch ($indexer->next()) { case nc_search_indexer::TASK_FINISHED: return true; // we're done // we're done case nc_search_indexer::TASK_STEP_FINISHED: $delay && sleep($delay); break; case nc_search_indexer::TASK_STEP_SKIPPED: break; default: throw new nc_search_exception("Incorrect return value from nc_search_indexer::next()"); } $cycle_number++; } }
/** * @throws nc_search_exception * @return phpMorphy */ protected function get_morphy() { $language = $this->context->get('language'); $language = $language . "_" . $language; // phpMorphy requires "ru_ru", "en_en" $this->language = $language; if (!isset(self::$instances[$language])) { if (!class_exists('phpMorphy', false)) { nc_search::load_3rdparty_script("phpmorphy/src/common.php"); } if (nc_search::should('PhpMorphy_LoadDictsDuringIndexing') && $this->context->get('action') == 'indexing') { $storage = PHPMORPHY_STORAGE_MEM; } else { $storage = PHPMORPHY_STORAGE_FILE; } $options = array('storage' => $storage, 'predict_by_suffix' => true, 'predict_by_db' => true); // Path to directory where dictionaries are located $dict_path = nc_search::get_3rdparty_path() . '/phpmorphy/dicts'; try { self::$instances[$language] = new phpMorphy($dict_path, $language, $options); } catch (phpMorphy_Exception $e) { throw new nc_search_exception("Error occurred while creating phpMorphy instance: {$e->getMessage()}"); } } return self::$instances[$language]; }
/** * * @param string $query * @return string */ protected function escape_special_characters($query) { foreach ($this->escape_patterns as $allow_feature => $pattern) { if (!nc_search::should($allow_feature)) { $query = preg_replace($pattern, '$1', $query); } } return $query; }
public function filter(array $terms) { $min_length = nc_search::get_setting('MinWordLength'); if ($min_length < 2) { return $terms; } $result = array(); for ($i = 0, $max = sizeof($terms); $i < $max; $i++) { if (mb_strlen($terms[$i], 'UTF-8') >= $min_length) { $result[] = $terms[$i]; } } return $result; }
protected function request($method, $url) { try { $this->http_client->{$method}($url); $response = $this->http_client->currentResponse(); } catch (nc_search_indexer_crawler_exception $e) { // слушателю не понравились заголовки $response = $e->get_response(); } $response["url"] = $url; $response = new nc_search_indexer_crawler_response($response); // if (nc_search::will_log(nc_search::LOG_CRAWLER_REQUEST)) { $len = $response->get_content_length(); nc_search::log(nc_search::LOG_CRAWLER_REQUEST, strtoupper($method) . " {$url}\n" . "Response: {$response->get_code()}\n" . ($response->get_code() < 400 ? "Content-Type: " . $response->get_content_type() . "\n" . "Content-Length: " . (is_null($len) ? "no" : $len) . ", received: " . $response->get_body_length() . " bytes" : '')); // } return $response; }
/** * Выполнить первую задачу из очереди * @param int $indexer_strategy * @return bool|null */ public static function run($indexer_strategy = nc_search::INDEXING_NC_CRON) { $provider = nc_search::get_provider(); if ($provider->is_reindexing()) { nc_search::log(nc_search::LOG_SCHEDULER_START, "Scheduler: indexing in progress"); return false; } $intent = nc_search::load('nc_search_scheduler_intent', 'SELECT * FROM `%t%`' . ' WHERE `StartTime` <= ' . time() . ' ORDER BY `StartTime` ASC LIMIT 1')->first(); if (!$intent) { nc_search::log(nc_search::LOG_SCHEDULER_START, "Scheduler: no scheduler intents to process now"); return false; } if (nc_search::will_log(nc_search::LOG_SCHEDULER_START)) { nc_search::log(nc_search::LOG_SCHEDULER_START, "Scheduler started (planned start time: " . strftime("%Y-%m-%d %H:%M:%S", $intent->get('start_time')) . "; area: '" . preg_replace("/\\s+/u", " ", $intent->get('area_string')) . "')"); } // информация принята к сведению и больше не нужна $intent->delete(); // запуск индексации $provider->index_area($intent->get('area_string'), $indexer_strategy); }
/** * Пытается убрать кавычки из запроса * @param nc_search_language_corrector_phrase $phrase * @return boolean */ public function correct(nc_search_language_corrector_phrase $phrase) { if (!nc_search::should('RemovePhrasesOnEmptyResult')) { return false; } $orignal_phrase_text = $phrase_text = $phrase->to_string(); if (strpos($phrase_text, '"') !== false && !preg_match('/"\\S+"/u', $phrase_text)) { $phrase_text = preg_replace('/"~[\\d\\.]+/', '"', $phrase_text); // remove distance search if (nc_search_util::is_boolean_query($phrase_text) || preg_match('/[-+]/', $phrase_text)) { // there is a a phrase with several words! $phrase_text = preg_replace('/"(\\S)/u', "(\$1", $phrase_text); $phrase_text = str_replace('"', ")", $phrase_text); } else { $phrase_text = str_replace('"', "", $phrase_text); } $message = sprintf(NETCAT_MODULE_SEARCH_CORRECTION_QUOTES, $orignal_phrase_text, $phrase_text); $phrase->set_phrase($phrase_text, $message); return true; } return false; }
/** * Перед сохранением нужно прогнать список слов через фильтры */ public function save() { $mb_case = nc_search::get_setting('FilterStringCase'); $apply_filter = !$this->get('dont_filter'); $list = array(); foreach ($this->get('words') as $word) { $word = trim($word); if (strlen($word)) { // пропустить пустые значения // преобразовать регистр, если в дальнейшем не будут применены фильтры $list[] = $apply_filter ? $word : mb_convert_case($word, $mb_case); } } if ($apply_filter) { $context = new nc_search_context(array('language' => $this->get('language'))); $list = nc_search_extension_manager::get('nc_search_language_filter', $context)->until_first('nc_search_language_filter_synonyms')->apply('filter', $list); } if (sizeof($list) < 2) { throw new nc_search_data_exception(NETCAT_MODULE_SEARCH_ADMIN_SYNONYM_LIST_MUST_HAVE_AT_LEAST_TWO_WORDS); } $this->set('words', $list); parent::save(); }
/** * @param array $terms * @return array */ public function filter(array $terms) { if (!nc_search::should('RemoveStopwords')) { return $terms; } $language = $this->context->get('language'); if (!isset(self::$lists[$language])) { $query = "SELECT * FROM `%t%` WHERE `Language`='" . nc_search_util::db_escape($language) . "'"; self::$lists[$language] = nc_search::load('nc_search_language_stopword', $query, 'word'); } $stop_list = self::$lists[$language]; if (!count($stop_list)) { return $terms; } $result = array(); foreach ($terms as $term) { if (is_array($term)) { // alternative forms foreach ($term as $i => $t) { if ($stop_list->has_key($t)) { unset($term[$i]); } } $terms_left = count($term); if ($terms_left == 1) { $result[] = $term[0]; } elseif ($terms_left > 1) { $result[] = $term; } } elseif (!$stop_list->has_key($term)) { // ordinary term $result[] = $term; } } return $result; }
/** * Получить абсолютный URL * @param string $href * @param string $referrer * @return string * @throws nc_search_exception */ protected function resolve_link($href, $referrer = null) { $referrer_parts = $this->parse_utf8_url($referrer); // Абсолютная ссылка без указания протокола — неправильно обрабатывается // функцией parse_url() до PHP 5.4.7 if (substr($href, 0, 2) == "//") { $scheme = isset($referrer_parts['scheme']) ? $referrer_parts['scheme'] : 'http'; $href = "{$scheme}:{$href}"; } $href_parts = $this->parse_utf8_url($href); if (!is_array($href_parts)) { $href_parts = array(); } // $href == "#" $result_parts = $href_parts; if (!isset($href_parts["host"])) { // path with no host name if ($referrer == 'http:///') { return false; } if ($referrer == 'http://') { return false; } if (!$referrer_parts || !isset($referrer_parts["host"])) { throw new nc_search_exception("Cannot resolve full URL: '{$href}' (no referrer)"); } foreach (array("scheme", "host", "port", "path") as $p) { if (isset($referrer_parts[$p]) && !isset($href_parts[$p])) { $result_parts[$p] = $referrer_parts[$p]; } } if ($result_parts["path"][0] != "/") { // relative path $referrer_dir = substr($referrer_parts["path"], -1) == '/' ? $referrer_parts["path"] : dirname($referrer_parts["path"]) . "/"; $result_parts["path"] = $referrer_dir . $result_parts["path"]; } } // end of "path with no host name" // "http://mysite.org" → "http://mysite.org/" if (!isset($result_parts["path"])) { $result_parts["path"] = "/"; } // get rid of "./", "../" if (strpos($result_parts["path"], "./") !== false) { $path_fragments = array(); foreach (explode("/", $result_parts["path"]) as $part) { if ($part == '.' || $part == '') { continue; } if ($part == '..') { array_pop($path_fragments); } else { $path_fragments[] = $part; } } $path = join("/", $path_fragments); if (substr($href_parts["path"], -1) == '/') { $path .= "/"; } if ($path[0] != '/') { $path = "/{$path}"; } $result_parts["path"] = $path; } // Производится сортировка параметров для того, чтобы не запрашивать страницу // дважды, если в ссылках на неё параметры перечислены в разном порядке, например: // /sub/?tag=22&curPos=10 и /sub/?curPos=10&tag=22 будут считаться одной страницей // Параметр модуля: IndexerNormalizeLinks if (isset($result_parts["query"]) && strpos($result_parts["query"], "&") && nc_search::should('IndexerNormalizeLinks')) { $params = explode("&", $result_parts["query"]); sort($params); $result_parts["query"] = join("&", $params); } // IDN & non-latin paths $result_parts["host"] = nc_search_util::encode_host($result_parts["host"]); $result_parts["path"] = nc_search_util::encode_path($result_parts["path"]); // MySite.ORG == mysite.org $result_parts["host"] = strtolower($result_parts["host"]); $full_url = strtolower($result_parts["scheme"]) . "://" . $result_parts["host"] . (isset($result_parts["port"]) ? ":{$result_parts['port']}" : "") . $result_parts["path"] . (isset($result_parts["query"]) ? "?{$result_parts['query']}" : ""); return $full_url; }
require_once $ROOT_FOLDER . "connect_io.php"; $nc_core = nc_Core::get_object(); $nc_core->modules->load_env('ru'); $lang = $nc_core->lang->detect_lang(); require_once $ADMIN_FOLDER . "lang/" . $lang . ".php"; error_reporting(E_PARSE | E_ERROR | E_WARNING | E_USER_ERROR | E_USER_WARNING); // замедление работы при необходимости $delay = trim(nc_search::get_setting('IndexerConsoleSlowdownDelay')); // секунды if ($delay) { define('NC_SEARCH_INDEXER_DELAY_VALUE', (int) ($delay * 1000000)); // микросекунды function nc_search_indexer_delay() { usleep(NC_SEARCH_INDEXER_DELAY_VALUE); } register_tick_function('nc_search_indexer_delay'); declare (ticks=10000); } while (@ob_end_flush()) { } nc_search::register_logger(new nc_search_logger_plaintext(nc_search::LOG_CONSOLE)); $remove_hung_tasks = !nc_search::should('IndexerConsoleRestartHungTasks'); $current_task = nc_search_indexer::get_current_task($remove_hung_tasks); $continue = $current_task instanceof nc_search_indexer_task && $current_task->get('runner_type') == nc_search::INDEXING_CONSOLE_BATCH && ($current_task->get('is_idle') || nc_search::should('IndexerConsoleRestartHungTasks') && time() > $current_task->get('last_activity') + nc_search::get_setting("IndexerRemoveIdleTasksAfter")); if ($continue) { $indexer = new nc_search_indexer(); $indexer->resume($current_task, new nc_search_indexer_runner_batch()); } else { nc_search_scheduler::run(nc_search::INDEXING_CONSOLE_BATCH); }
header("Content-type: text/xml"); //$NETCAT_FOLDER = realpath("../../../"); $NETCAT_FOLDER = join(strstr(__FILE__, "/") ? "/" : "\\", array_slice(preg_split("/[\\/\\\\]+/", __FILE__), 0, -4)) . (strstr(__FILE__, "/") ? "/" : "\\"); require_once "{$NETCAT_FOLDER}/vars.inc.php"; //require ($INCLUDE_FOLDER."index.php"); require $ROOT_FOLDER . "connect_io.php"; $nc_core->modules->load_env(); print '<?xml version="1.0" encoding="UTF-8"?>'; // bark before the cat tries to meow $scheme = isset($_SERVER['HTTPS']) && $_SERVER['HTTPS'] != 'off' ? 'https' : 'http'; $url_prefix = "{$scheme}://{$_SERVER['HTTP_HOST']}"; $site = $nc_core->catalogue->get_by_host_name($_SERVER['HTTP_HOST']); // never trust a cat $site_id = $site['Catalogue_ID']; $start = $nc_core->input->fetch_get("start"); $max_num_urls = nc_search::get_setting('NumberOfEntriesPerSitemap'); if (!strlen($start)) { // если результатов слишком много, выдать sitemapindex $num_urls = $db->get_var("SELECT COUNT(*)\n FROM `Search_Document` \n WHERE `Catalogue_ID` = {$site_id} AND `IncludeInSitemap`=1"); if ($num_urls > $max_num_urls) { $url = "{$url_prefix}{$_SERVER['REQUEST_URI']}?start="; print '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n"; for ($i = 0, $last = ceil($num_urls / $max_num_urls); $i < $last; $i++) { print "<sitemap><loc>" . $url . $i * $max_num_urls . "</loc></sitemap>\n"; } print "</sitemapindex>\n"; die; } } $start = (int) $start; $entries = $db->get_results("SELECT `Path`, \n `SitemapChangefreq`, \n `SitemapPriority`,\n DATE_FORMAT(`LastModified`, '%Y-%m-%dT%T') AS `LastModified`\n FROM `Search_Document`\n WHERE `Catalogue_ID` = {$site_id} AND `IncludeInSitemap`=1\n LIMIT {$max_num_urls} OFFSET {$start}", ARRAY_A);
/** * * @param string $query_string * @param boolean $is_recursive_call * @return nc_search_query_expression */ public function parse($query_string, $is_recursive_call = false) { if (!$is_recursive_call) { // change string encoding to UTF-8 or ensure it's not broken if it is // already UTF-8 $query_string = mb_convert_encoding($query_string, 'UTF-8', nc_Core::get_object()->NC_CHARSET); } /* * LEXEMES * * simple/terminal: * term * wildcard* * wildcard? * * group (inside): * (a b) -- essentially "a AND b" or "a OR b" * "a b" * * group (left and right) * AND && * OR || * [a TO b] * {a TO b} * * (implicit AND or OR) * * wrap following expression: * NOT ! * * modify next expression: * field_name: * + * - (must be preceded with a whitespace if not at the beginning of the string) * * modify previous expression: * ^2 * ~0.5 (for term: fuzzy search) --- extracted with the preceding term * ~2 (for phrase: proximity search) * * special rules: * - terms with both letters and numbers are considered a phrase: * x123y567z → phrase("x 123 y 567 z") * inside quotes: "price usd50" → phrase("price usd 50") * - decimal fractions are considered a phrase: * 0.123 → phrase("0 123") * "price 0.12" → phrase("price 0 12") */ $query_remainder = $query_string; // part of the query string that is not parsed yet $root = null; // result of the parsing $previous = null; // previous expression $operator = $this->default_operator; // joining operator ("AND", "OR") $previous_was_group = false; $next_not = $next_required = $next_excluded = false; // modifiers for the upcoming token $next_field_name = null; // field name modifier while (true) { $expression = null; $token = $this->remove_next_token($query_remainder); if ($token === null) { break; } // ----- make sense of the received token: if ($token == "(") { // start of the group? $expression = $this->remove_group($query_remainder); //may return null if parentheses are not balanced if ($expression) { $previous_was_group = true; } } elseif ($token == '"') { // phrase? $expression = $this->remove_phrase($query_remainder); // may return null if not a phrase } elseif (($token == "[" || $token == "{") && nc_search::should('AllowRangeSearch')) { // can be an interval $expression = $this->remove_interval($query_remainder, $token); // may return null if not an interval } elseif (substr($token, -1) == ":" && nc_search::should('AllowFieldSearch')) { // field name! $next_field_name = substr($token, 0, -1); } elseif ($token == "+") { // "required" sign (not same as AND if default operator is OR) $next_required = true; } elseif ($token == "-" && !$previous || strlen($token) > 1 && trim($token) == "-") { // (a) "excluded" sign at the beginning of the query (not same as NOT if default operator is OR) // (b) "excluded" sign elsewhere (separated by the space) $next_excluded = true; } elseif ($token == "!" || $token == "NOT") { // boolean operators are case-sensitive $next_not = true; // wrap next item inside NOT } elseif ($token == "&&" || $token == "AND") { $operator = "AND"; } elseif ($token == "||" || $token == "OR") { $operator = "OR"; } elseif (strpos($token, "~") > 0 && preg_match("/^[{$this->term_chars}]+~/u", $token)) { // fuzzy search list($term, $similarity) = explode("~", $token); // decimal value ("0.5") if (nc_search::should('AllowFuzzySearch')) { $expression = new nc_search_query_expression_fuzzy($term, $similarity); } else { $expression = new nc_search_query_expression_term($term); } } elseif ($token[0] == "~" && nc_search::should('AllowProximitySearch')) { // phrase word distance option $value = substr($token, 1); // integer value if ($previous instanceof nc_search_query_expression_phrase) { $previous->set_distance($value); } // no fallback, throw the token out } elseif ($token[0] == "^" && nc_search::should('AllowTermBoost')) { // term and phrase boost $value = substr($token, 1); // integer or decimal value if ($previous instanceof nc_search_query_expression_term || $previous instanceof nc_search_query_expression_phrase) { $previous->set_boost($value); } // no fallback, just discard (complicated: decimal value can result in two terms) } elseif ((strpos($token, "*") || strpos($token, "?")) && nc_search::should('AllowWildcardSearch')) { // wildcard; can't be the first symbol $expression = new nc_search_query_expression_wildcard($token); } elseif ($this->ignore_numbers && preg_match("/\\d/", $token)) { // reset field flag (e.g.: <price:50 term>) $next_field_name = null; } elseif (ctype_digit($token) && preg_match("/^\\.(\\d+)\\b/", $query_remainder, $match)) { // special case: decimal fractions $fraction = $match[1]; $query_remainder = substr($query_remainder, strlen($fraction) + 1); $expression = new nc_search_query_expression_phrase(array($token, $fraction)); // TODO? можно помечать такие фразы, чтобы транслировать их в FTS-фразы, а не в REGEXP-выражения } elseif (preg_match("/^[{$this->term_chars}]+\$/u", $token)) { // special case: treat terms with both letters and numbers as a phrase if (preg_match("/\\d/", $token)) { $parts = preg_split("/(\\d+)/", $token, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY); $expression = sizeof($parts) == 1 ? new nc_search_query_expression_term($parts[0]) : new nc_search_query_expression_phrase($parts); } else { $expression = new nc_search_query_expression_term($token); } } else { // discard unknown tokens continue; } // ----- // process next token if current token didn't produce an expression if (!$expression) { continue; } // ----- // set expression flags / options $expression->set_field($next_field_name)->set_required($next_required)->set_excluded($next_excluded); // reset flags $next_field_name = null; $next_required = $next_excluded = false; if ($next_not) { // wrap inside NOT() $expression = new nc_search_query_expression_not($expression); $next_not = false; } // store expression in the $root tree if ($root == null) { // first item $root = $expression; } else { // not a first item if ($root instanceof nc_search_query_expression_or) { if ($operator == "OR") { // OR+OR=OR $root->add_item($expression); } elseif ($previous_was_group) { // (one OR two) AND three $root = $this->create_boolean($operator, $root, $expression); } else { // replace last item in OR with an AND expression // (t1 OR t2 AND t3) → OR(t1, AND(t2, t3)) // (t1 OR t2 AND t3 AND t4) → OR(t1, AND(t2, t3, t4)) $root->conjunct_last($expression); } } elseif ($root instanceof nc_search_query_expression_and && $operator == "AND") { $root->add_item($expression); // AND+AND=AND } else { // (root=AND && operator=OR) --or-- (root is not boolean) // (t1 AND t2 OR t3) → OR(AND(t1, t2), t3) $root = $this->create_boolean($operator, $root, $expression); } // reset flag $previous_was_group = false; } // reset $operator: $operator = $this->default_operator; // remember previous expression: $previous = $expression; } // of "while tokens are coming" return $root ? $root : new nc_search_query_expression_empty(); }
/** * */ protected function get_disallowed_areas() { $disallowed = array(); // (1) robots.txt if (nc_search::should('CrawlerObeyRobotsTxt')) { $disallowed = $this->get_robots_txt_area_parts(); } // (2) Settings (ExcludeUrlRegexps) $regexps = preg_split("/\\s*\n/u", nc_search::get_setting('ExcludeUrlRegexps'), -1, PREG_SPLIT_NO_EMPTY); foreach ($regexps as $regexp) { $regexp = "@" . addcslashes($regexp, "@") . "@u"; $disallowed[] = new nc_search_area_regexp(array('regexp' => $regexp)); } // done return new nc_search_area($disallowed); }
die; } $ui = $this->get_ui(); $ui->add_lists_toolbar(); $nc_core = nc_Core::get_object(); // предупредить, если мы сохранили не то, что ввёл пользователь $crud_record = $this->get_action_record(); if ($crud_record && !$crud_record->get('dont_filter')) { $input = $this->get_input('data'); $saved_value = !$nc_core->NC_UNICODE ? $nc_core->utf8->array_utf2win($crud_record->get('words')) : $crud_record->get('words'); if ($input['words'] != $saved_value) { nc_print_status(NETCAT_MODULE_SEARCH_ADMIN_SYNONYM_SAVE_RESULT, 'info', array(join(' ', $saved_value), $this->hash_href("#module.search.synonyms_edit({$crud_record->get_id()})"))); } } // end of "show a notice" $synonyms = nc_search::load('nc_search_language_synonyms', "SELECT * FROM `%t%` ORDER BY `Language`")->set_output_encoding(nc_core('NC_CHARSET')); if (count($synonyms)) { // фильтр $language_options = array("<option value=''>" . NETCAT_MODULE_SEARCH_ADMIN_LANGUAGE_ANY_LANGUAGE . "</option>"); foreach ($this->get_language_list() as $code => $lang) { if ($synonyms->first('language', $code)) { $language_options[] = "<option value='{$code}'>{$lang}</option>"; } } echo "<div class='live_filter' id='synonym_filter'>", "<span class='icon'>", nc_admin_img("i_field_search_off.gif", NETCAT_MODULE_SEARCH_ADMIN_FILTER), "</span>", "<select id='filter_language'>", join("\n", $language_options), "</select>", "<input type='text' id='filter_words'>", "<span class='reset'>", "<div class='icons icon_delete' title='" . NETCAT_MODULE_SEARCH_ADMIN_FILTER_RESET . "' style='margin-top:5px'></div>", "</span>", "</div>"; ?> <form method="POST" action="?view=synonyms" onsubmit="return ($nc('input:checked').size() > 0)"> <input type="hidden" name="action" value="delete" /> <input type="hidden" name="data_class" value="nc_search_language_synonyms" /> <table id="synonym_table" class="nc-table nc--striped nc--hovered nc--small" width="100%">
} $NETCAT_FOLDER = realpath(dirname(__FILE__) . "/../../../../"); putenv("DOCUMENT_ROOT={$NETCAT_FOLDER}"); putenv("HTTP_HOST=localhost"); putenv("REQUEST_URI=/"); require_once "{$NETCAT_FOLDER}/vars.inc.php"; require_once $ROOT_FOLDER . "connect_io.php"; $nc_core = nc_Core::get_object(); $nc_core->modules->load_env('ru'); $lang = $nc_core->lang->detect_lang(); require_once $ADMIN_FOLDER . "lang/" . $lang . ".php"; error_reporting(E_PARSE | E_ERROR | E_WARNING | E_USER_ERROR | E_USER_WARNING); // замедление работы при необходимости $delay = trim(nc_search::get_setting('IndexerConsoleSlowdownDelay')); // секунды if ($delay) { define('NC_SEARCH_INDEXER_DELAY_VALUE', (int) ($delay * 1000000)); // микросекунды function nc_search_indexer_delay() { usleep(NC_SEARCH_INDEXER_DELAY_VALUE); } register_tick_function('nc_search_indexer_delay'); declare (ticks=10000); } while (@ob_end_flush()) { } // Поменяйте nc_search::LOG_CONSOLE на другое значение, если хотите получать // больше или меньше информации о переиндексации nc_search::register_logger(new nc_search_logger_plaintext(nc_search::LOG_CONSOLE)); nc_search_scheduler::run(nc_search::INDEXING_CONSOLE);
protected function get_max_cycles_number() { return (int) nc_search::get_setting('IndexerConsoleDocumentsPerSession'); }
/** * Получить путь до раздела поиска на сайте с указанным идентификатором. * @global nc_db $db * @throws Exception @see nc_catalogue::get_by_id() * @param integer $site_id * @param boolean $with_host * @return string */ public function get_search_url($site_id, $with_host = true) { if ($with_host && isset($this->paths[$site_id])) { return $this->paths[$site_id]; } global $db, $nc_core; $folder_data = $db->get_row("SELECT sub.`Hidden_URL` AS `path`, sub.`Subdivision_ID` AS `id`\n FROM `Subdivision` AS `sub`, `Sub_Class` AS `c`\n WHERE c.`Class_ID` = " . (int) nc_search::get_setting("ComponentID") . "\n AND c.`Subdivision_ID` = sub.`Subdivision_ID`\n AND sub.`Catalogue_ID` = " . (int) $site_id . "\n LIMIT 1", ARRAY_A); if ($folder_data) { if (nc_module_check_by_keyword('routing')) { $path = nc_routing::get_folder_path($folder_data['id']); } else { $path = $nc_core->SUB_FOLDER . $folder_data['path']; } if ($with_host) { $host = $nc_core->catalogue->get_by_id($site_id, 'Domain'); if ($host) { $path = "http://{$host}{$path}"; } } $this->paths[$site_id] = $path; } else { $this->paths[$site_id] = false; } return $this->paths[$site_id]; }
/** * @param string $string Text to tokenize * @return array */ protected function tokenize_text($string) { // split words containing numbers into number+string parts $string = preg_replace("/(\\pL)(\\d)/u", "\$1 \$2", $string); $string = preg_replace("/(\\d)(\\pL)/u", "\$1 \$2", $string); $delimiter = nc_search::should('IgnoreNumbers') ? '/[^\\pL]+/u' : '/[^\\pL\\d]+/u'; $max_terms = (int) nc_search::get_setting('MaxTermsPerField'); $tokens = preg_split($delimiter, $string, $max_terms); return $tokens; }
/** * * @param nc_search_indexer $indexer * @throws nc_search_exception * @return boolean is task finished */ public function loop(nc_search_indexer $indexer) { $cycle_number = 0; $save_cycles = nc_search::get_setting('IndexerSaveTaskEveryNthCycle'); while (true) { // stop prematurely: if (!$this->check_connection()) { $indexer->cancel(); return true; // nobody listens anyway } if ($this->interrupt_if_needed($indexer, $cycle_number)) { return false; } // сохранять задачу каждые X циклов if ($cycle_number % $save_cycles == 0) { $indexer->save_task(); } switch ($indexer->next()) { case nc_search_indexer::TASK_FINISHED: return true; // we're done // we're done case nc_search_indexer::TASK_STEP_FINISHED: if ($this->delay) { if ($this->interrupt_if_needed($indexer, $cycle_number)) { return false; } sleep($this->delay); } break; case nc_search_indexer::TASK_STEP_SKIPPED: break; default: throw new nc_search_exception("Incorrect return value from nc_search_indexer::next()"); } $cycle_number++; } }
<?php /* $Id: index.php 8366 2012-11-07 16:30:14Z aix $ */ /** * Запуск переиндексации "в реальном времени" */ $NETCAT_FOLDER = realpath("../../../../"); require_once "{$NETCAT_FOLDER}/vars.inc.php"; $use_gzip_compression = false; require_once "{$ADMIN_FOLDER}/function.inc.php"; require_once "../function.inc.php"; // замедление работы при необходимости $delay = trim(nc_search::get_setting('IndexerInBrowserSlowdownDelay')); // секунды if ($delay) { define('NC_SEARCH_INDEXER_DELAY_VALUE', (int) ($delay * 1000000)); // микросекунды function nc_search_indexer_delay() { usleep(NC_SEARCH_INDEXER_DELAY_VALUE); print " "; } register_tick_function('nc_search_indexer_delay'); declare (ticks=10000); } $input = nc_Core::get_object()->input; $request = array('.page_title' => NETCAT_MODULE_SEARCH_ADMIN_INDEXING_TITLE, 'view' => 'indexing_on_request', 'rule_id' => $input->fetch_get('rule_id'), 'token' => $input->fetch_get('token'), 'continue' => $input->fetch_get('continue')); nc_search_admin_controller::process_request($request);
<?php if (!class_exists("nc_system")) { die; } $ui = $this->get_ui(); $ui->add_lists_toolbar(); $stopwords = nc_search::load('nc_search_language_stopword', "SELECT * FROM `%t%` ORDER BY `Language`, `Word`")->set_output_encoding(nc_core('NC_CHARSET')); if (count($stopwords)) { // фильтр $language_options = array("<option value=''>" . NETCAT_MODULE_SEARCH_ADMIN_LANGUAGE_ANY_LANGUAGE . "</option>"); foreach ($this->get_language_list() as $code => $lang) { if ($stopwords->first('language', $code)) { $language_options[] = "<option value='{$code}'>{$lang}</option>"; } } echo "<div class='live_filter' id='stopword_filter'>", "<span class='icon'>", nc_admin_img("i_field_search_off.gif", NETCAT_MODULE_SEARCH_ADMIN_FILTER), "</span>", "<select id='filter_language'>", join("\n", $language_options), "</select>", "<input type='text' id='filter_word'>", "<span class='reset'>", "<div class='icons icon_delete' title='" . NETCAT_MODULE_SEARCH_ADMIN_FILTER_RESET . "' style='margin-top:5px'></div>", "</span>", "</div>"; ?> <form method="POST" action="?view=stopwords" onsubmit="return ($nc('input:checked').size() > 0)"> <input type="hidden" name="action" value="delete" /> <input type="hidden" name="data_class" value="nc_search_language_stopword" /> <table id="stopword_table" class="nc-table nc--striped nc--hovered nc--small" width="100%"> <tr align="left"> <th><?php echo NETCAT_MODULE_SEARCH_ADMIN_LANGUAGE; ?> </th> <th width="75%"><?php echo NETCAT_MODULE_SEARCH_ADMIN_STOPWORD; ?>
<?php /** * Входящие параметры: * - term * - language * * @global $catalogue */ $NETCAT_FOLDER = realpath("../../../../"); require_once "{$NETCAT_FOLDER}/vars.inc.php"; require $INCLUDE_FOLDER . "index.php"; // получение параметров $input = trim($nc_core->input->fetch_get('term')); if (!nc_search::should('EnableQuerySuggest') || nc_search::get_setting('SuggestMode') != 'titles' || mb_strlen($input) < nc_search::get_setting('SuggestionsMinInputLength')) { die("[]"); } $input = $nc_core->utf8->conv($nc_core->NC_CHARSET, 'utf-8', $input); $language = $nc_core->input->fetch_get('language'); if (!$language) { $language = $nc_core->lang->detect_lang(1); } // поиск подходящих заголовков is provider-dependent $suggestions = nc_search::get_provider()->suggest_titles($input, $language, $catalogue); if (!$nc_core->NC_UNICODE) { $suggestions = $nc_core->utf8->array_utf2win($suggestions); } print nc_array_json($suggestions);
<?php require_once dirname(__FILE__) . "/nc_search.class.php"; nc_search::init();
/** * */ protected function get_highlight_regexp($language) { if (!$this->highlight_regexp) { $query_string = $this->get_query_string(); $context = new nc_search_context(array('language' => $language, 'action' => 'searching')); // Получить слова из запроса. // (Удалять из запроса термины с префиксом "-" и "NOT" не имеет особого смысла, // поскольку в результат они как правило не попадают.) $query_string = preg_replace("/[\\^~][\\d\\.]+/", '', $query_string); // операторы ^1, ~1 preg_match_all("/[\\pL\\d\\?\\*]+/u", $query_string, $matches); $terms = $matches[0]; if (strpos($query_string, "*") !== false || strpos($query_string, "?") !== false) { $wildcards_replacement = nc_search::should('AllowWildcardSearch') ? array("?" => ".", "*" => "[\\S]+") : array("?" => "", "*" => ""); foreach ($terms as $i => $term) { $terms[$i] = strtr($term, $wildcards_replacement); } } //if ( nc_Core::get_object()->NC_UNICODE ) { $terms = nc_search_extension_manager::get('nc_search_language_filter', $context)->except('nc_search_language_filter_stopwords')->apply('filter', $terms); //} $analyzer = nc_search_extension_manager::get('nc_search_language_analyzer', $context)->first(); if ($analyzer) { $regexp = $analyzer->get_highlight_regexp($terms); } else { $regexp = nc_search_util::word_regexp("(" . join("|", $terms) . ")", "Si"); } $this->highlight_regexp = $regexp; } // of "there was no 'highlight_regexp'" return $this->highlight_regexp; }
/** * Работает ли в данный момент переиндексация? * @param bool $remove_hung_tasks * @return false|nc_search_indexer_task */ public static function get_current_task($remove_hung_tasks = true) { $tasks = nc_search::load_all('nc_search_indexer_task', true); if (!sizeof($tasks)) { return false; } // не подвисли ли мы? $task = $tasks->first(); if ($remove_hung_tasks && time() > $task->get('last_activity') + nc_search::get_setting("IndexerRemoveIdleTasksAfter")) { $task->delete(); $db = nc_Core::get_object()->db; $db->query("TRUNCATE TABLE `Search_Link`"); $db->query("TRUNCATE TABLE `Search_LinkReferrer`"); nc_search::log(nc_search::LOG_ERROR, "Indexer task was last active at " . strftime("%Y-%m-%d %H:%M:%S", (int) $task->get('last_activity')) . ". Task removed."); return false; } return $task; }
<?php /** * Запускает переиндексацию по правилу в кроне в ближайшее возможное время */ if (!class_exists("nc_system")) { die; } while (@ob_end_clean()) { } // discard output $area = $this->get_input('area'); if (!$area) { print "0; // no area"; die; } try { nc_search::index_area($area, "now"); print "1"; } catch (Exception $e) { print "0; /* {$e->getMessage()} */"; } die;