/** * * @param array $terms * @return string */ public function get_highlight_regexp(array $terms) { $res = array(); foreach ($this->get_base_forms($terms) as $base) { $res[] = $base . "[\\pL\\d]{0,{$this->max_remainder_length}}"; } return nc_search_util::word_regexp("(" . join("|", $res) . ")", "Si"); }
/** * Запланировать запуск переиндексирования области или правила в указанное время * @param string $area_string * @param integer $timestamp */ public static function schedule_indexing($area_string, $timestamp) { // Если данная область уже поставлена в очередь на более раннее или ближайшее // время, не нужно добавлять ещё раз $interval = $timestamp + nc_search::get_setting('MinScheduleInterval'); $intent = nc_search::load('nc_search_scheduler_intent', "SELECT * FROM `%t%`" . " WHERE `StartTime` <= {$interval}" . " AND `AreaString` = '" . nc_search_util::db_escape($area_string) . "'")->first(); // type is ignored if ($intent) { // уже есть такое расписание! if ($intent->get('start_time') > $timestamp) { $intent->set('start_time', $timestamp); // let's run it sooner } } else { $intent = new nc_search_scheduler_intent(array('start_time' => $timestamp, 'type' => nc_search_scheduler_intent::ON_REQUEST, 'area_string' => $area_string)); } $intent->save(); }
/** * */ public function __construct() { $this->start_time = time(); $this->time_threshold = $this->get_time_threshold(); if (self::$original_time_limit === null) { self::$original_time_limit = ini_get('max_execution_time'); } $this->time_limit = self::$original_time_limit; $this->memory_threshold = $this->get_memory_threshold(); $this->memory_limit = nc_search_util::int_from_bytes_string(ini_get('memory_limit')); $this->delay = nc_search::get_setting('CrawlerDelay'); $this->cycle_limit = $this->get_max_cycles_number(); @set_time_limit(0); nc_search::enable_error_logging(); ignore_user_abort(true); nc_Core::get_object()->db->query("SET wait_timeout=900"); // might loose connection when running in slow mode }
/** * Пытается убрать кавычки из запроса * @param nc_search_language_corrector_phrase $phrase * @return boolean */ public function correct(nc_search_language_corrector_phrase $phrase) { if (!nc_search::should('RemovePhrasesOnEmptyResult')) { return false; } $orignal_phrase_text = $phrase_text = $phrase->to_string(); if (strpos($phrase_text, '"') !== false && !preg_match('/"\\S+"/u', $phrase_text)) { $phrase_text = preg_replace('/"~[\\d\\.]+/', '"', $phrase_text); // remove distance search if (nc_search_util::is_boolean_query($phrase_text) || preg_match('/[-+]/', $phrase_text)) { // there is a a phrase with several words! $phrase_text = preg_replace('/"(\\S)/u', "(\$1", $phrase_text); $phrase_text = str_replace('"', ")", $phrase_text); } else { $phrase_text = str_replace('"', "", $phrase_text); } $message = sprintf(NETCAT_MODULE_SEARCH_CORRECTION_QUOTES, $orignal_phrase_text, $phrase_text); $phrase->set_phrase($phrase_text, $message); return true; } return false; }
/** * @param array $terms * @return array */ public function filter(array $terms) { if (!nc_search::should('RemoveStopwords')) { return $terms; } $language = $this->context->get('language'); if (!isset(self::$lists[$language])) { $query = "SELECT * FROM `%t%` WHERE `Language`='" . nc_search_util::db_escape($language) . "'"; self::$lists[$language] = nc_search::load('nc_search_language_stopword', $query, 'word'); } $stop_list = self::$lists[$language]; if (!count($stop_list)) { return $terms; } $result = array(); foreach ($terms as $term) { if (is_array($term)) { // alternative forms foreach ($term as $i => $t) { if ($stop_list->has_key($t)) { unset($term[$i]); } } $terms_left = count($term); if ($terms_left == 1) { $result[] = $term[0]; } elseif ($terms_left > 1) { $result[] = $term; } } elseif (!$stop_list->has_key($term)) { // ordinary term $result[] = $term; } } return $result; }
$where = join(" AND ", $where); $offset = (int) $this->get_input('offset'); $query = "SELECT `latest`.`Timestamp`,\n `latest`.`QueryString`,\n `latest`.`ResultsCount`,\n `latest`.`IP`,\n `latest`.`User_ID`,\n `q`.`QueryCount`,\n `u`.`Login`\n FROM (SELECT MAX(`Query_ID`) AS `Query_ID`,\n COUNT(`Query_ID`) AS `QueryCount`\n FROM `Search_Query`\n WHERE {$where}\n GROUP BY `QueryString`\n ORDER BY {$order_by}\n LIMIT {$per_page} OFFSET {$offset}) AS `q`\n JOIN `Search_Query` AS `latest` ON (`q`.`Query_ID` = `latest`.`Query_ID`)\n LEFT JOIN `User` AS `u` ON (`latest`.`User_ID` = `u`.`User_ID`)"; $res = $this->get_db()->get_results($query, ARRAY_A); if ($res) { $found_rows = $this->get_db()->get_var("SELECT COUNT(DISTINCT(`QueryString`))\n FROM `Search_Query`\n WHERE {$where}"); // строка с вариантами сортировки $sort_link = $this->make_page_query(array('sort_by')); echo '<div class="query_sort">', $this->link_if($sort_by != '', $sort_link, NETCAT_MODULE_SEARCH_ADMIN_QUERY_SORT_BY_RESULT_COUNT), " | ", $this->link_if($sort_by != 'time', "{$sort_link}&sort_by=time", NETCAT_MODULE_SEARCH_ADMIN_QUERY_SORT_BY_TIME), " | ", $this->link_if($sort_by != 'query', "{$sort_link}&sort_by=query", NETCAT_MODULE_SEARCH_ADMIN_QUERY_SORT_BY_QUERY), "</div>"; // таблица с результатами echo "<table class='nc-table nc--large nc--hovered nc--striped list'>\n", "<tr>", "<th rowspan='2' width='40%'>", NETCAT_MODULE_SEARCH_ADMIN_QUERY_STRING, "</th>", "<th rowspan='2' width='10%' class='nc-text-center'>", NETCAT_MODULE_SEARCH_ADMIN_QUERY_COUNT, "</th>", "<th colspan='3' class='nc-text-center'>", NETCAT_MODULE_SEARCH_ADMIN_QUERY_LAST_QUERY, "</th>", "</tr>\n", "<tr>", "<th class='nc-text-center'>", NETCAT_MODULE_SEARCH_ADMIN_QUERY_LAST_QUERY_TIME, "</th>", "<th class='nc-text-center'>", NETCAT_MODULE_SEARCH_ADMIN_QUERY_LAST_QUERY_RESULT_COUNT, "</th>", "<th class='nc-text-center'>", NETCAT_MODULE_SEARCH_ADMIN_QUERY_LAST_QUERY_USER, "</th>", "</tr>\n"; $result_link_title = "title='" . htmlspecialchars(NETCAT_MODULE_SEARCH_ADMIN_QUERY_OPEN_RESULTS_LINK_HINT) . "'"; foreach ($res as $row) { // ссылка на страницу с результатами поиска $search_link = nc_search::get_object()->get_search_url($row['Catalogue_ID'], true) . "?nologging=1&search_query=" . rawurlencode($row['QueryString']) . "&area=" . rawurlencode($row['Area']); echo "<tr class='nc-text-center'>", "<td align='left'><a href='?view=queries_details&query=", rawurlencode($row['QueryString']), "'>", htmlspecialchars($row['QueryString']), "</a></td>", "<td>", $row['QueryCount'], "</td>", "<td>", nc_search_util::format_time($row['Timestamp']), "</td>", "<td><a href='{$search_link}' target='_blank' {$result_link_title}>{$row['ResultsCount']}</a></td>", "<td>", long2ip($row['IP']), $row['User_ID'] ? " (" . $this->hash_link("#user.edit({$row['User_ID']})", $row["Login"]) . ")" : "", "</td>", "</tr>\n"; } echo "</table>"; echo $this->result_count($offset + 1, $per_page, $found_rows); // листалка по страницам $ui = $this->get_ui(); $page_link = $this->make_page_query(array('offset'), true); if ($offset > 0) { $prev_page = $page_link . "&offset=" . ($offset - $per_page); $ui->actionButtons[] = array("id" => "prev_page", "caption" => NETCAT_MODULE_SEARCH_ADMIN_QUERY_PREV_PAGE, "action" => "mainView.loadIframe('{$prev_page}')", "align" => "left"); } if ($found_rows > $offset + $per_page) { $next_page = $page_link . "&offset=" . ($offset + $per_page); $ui->actionButtons[] = array("id" => "next_page", "caption" => NETCAT_MODULE_SEARCH_ADMIN_QUERY_NEXT_PAGE, "action" => "mainView.loadIframe('{$next_page}')"); } } else {
/** * */ protected function get_path_sql_condition($operator = '=', $template = '') { $site_cond = array(); foreach ($this->get_sites() as $site) { $site_cond[] = $site->get_sql_condition(); } $q = $site_cond ? "(" . join(" OR ", $site_cond) . ")" : "1"; $q .= " AND `{$this->document_table_name}`.`Path` {$operator} '" . nc_search_util::db_escape($this->get_path()) . $template . "'"; return $q; }
foreach ($description["included"] as $item) { $hint .= "<div class='item'>" . NETCAT_MODULE_SEARCH_ADMIN_BULLET . " {$item}</div>\n"; } $hint .= "</div>"; } if ($description["excluded"]) { $hint .= "<div class='header'><strong>" . NETCAT_MODULE_SEARCH_ADMIN_QUERY_AREA_EXCLUDED . "</strong>:</div><div class='list'>"; foreach ($description["excluded"] as $item) { $hint .= "<div class='item'>" . NETCAT_MODULE_SEARCH_ADMIN_BULLET . " {$item}</div>\n"; } $hint .= "</div>"; } $area_cell = "<td class='area_hint'><div>" . $area->to_string() . "</div>" . "<div class='inline_help area_description'>{$hint}</div>" . "</td>"; } $search_link = nc_search::get_object()->get_search_url($row['Catalogue_ID'], true) . "?nologging=1&search_query=" . rawurlencode($row['QueryString']) . "&area=" . rawurlencode($row['Area']); echo "<tr class='nc-text-center'>", "<td>", nc_search_util::format_time($row['Timestamp']), "</td>", $area_cell, "<td><a href='{$search_link}' target='_blank' title='", htmlspecialchars(NETCAT_MODULE_SEARCH_ADMIN_QUERY_OPEN_RESULTS_LINK_HINT), "'>", $row['ResultsCount'], "</a></td>", "<td>", $row['User_ID'] ? $this->hash_link("#user.edit({$row['User_ID']})", $row["Login"]) : " ", "</td>", "<td>", long2ip($row['IP']), "</td>", "</tr>\n"; } $ui = $this->get_ui(); $ui->actionButtons[] = array("id" => "prev_page", "caption" => NETCAT_MODULE_SEARCH_ADMIN_QUERY_BACK_TO_LIST, "location" => "#module.search.queries", "align" => "left"); $page_link = $this->make_page_query(array('offset'), true); if ($offset > 0) { $prev_page = $page_link . "&offset=" . ($offset - $per_page); $ui->actionButtons[] = array("id" => "prev_page", "caption" => NETCAT_MODULE_SEARCH_ADMIN_QUERY_PREV_PAGE, "action" => "mainView.loadIframe('{$prev_page}')", "align" => "left"); } if ($found_rows > $offset + $per_page) { $next_page = $page_link . "&offset=" . ($offset + $per_page); $ui->actionButtons[] = array("id" => "next_page", "caption" => NETCAT_MODULE_SEARCH_ADMIN_QUERY_NEXT_PAGE, "action" => "mainView.loadIframe('{$next_page}')"); } ?> <script type='text/javascript'> $nc('td.area_hint').hover(
/** * */ public static function purge_log() { $days_to_keep = self::get_setting('DaysToKeepEventLog'); $time = nc_search_util::sql_datetime(strtotime("-{$days_to_keep} days")); nc_Core::get_object()->db->query("DELETE FROM `Search_Log` WHERE `Timestamp` < '{$time}'"); }
/** * */ protected function get_highlight_regexp($language) { if (!$this->highlight_regexp) { $query_string = $this->get_query_string(); $context = new nc_search_context(array('language' => $language, 'action' => 'searching')); // Получить слова из запроса. // (Удалять из запроса термины с префиксом "-" и "NOT" не имеет особого смысла, // поскольку в результат они как правило не попадают.) $query_string = preg_replace("/[\\^~][\\d\\.]+/", '', $query_string); // операторы ^1, ~1 preg_match_all("/[\\pL\\d\\?\\*]+/u", $query_string, $matches); $terms = $matches[0]; if (strpos($query_string, "*") !== false || strpos($query_string, "?") !== false) { $wildcards_replacement = nc_search::should('AllowWildcardSearch') ? array("?" => ".", "*" => "[\\S]+") : array("?" => "", "*" => ""); foreach ($terms as $i => $term) { $terms[$i] = strtr($term, $wildcards_replacement); } } //if ( nc_Core::get_object()->NC_UNICODE ) { $terms = nc_search_extension_manager::get('nc_search_language_filter', $context)->except('nc_search_language_filter_stopwords')->apply('filter', $terms); //} $analyzer = nc_search_extension_manager::get('nc_search_language_analyzer', $context)->first(); if ($analyzer) { $regexp = $analyzer->get_highlight_regexp($terms); } else { $regexp = nc_search_util::word_regexp("(" . join("|", $terms) . ")", "Si"); } $this->highlight_regexp = $regexp; } // of "there was no 'highlight_regexp'" return $this->highlight_regexp; }
/** * * @return string */ public function get_suggestion() { if (!$this->suggestion && $this->is_corrected()) { $this->suggestion = sprintf(NETCAT_MODULE_SEARCH_CORRECTION_GENERIC, nc_search_util::convert($this->original_phrase), nc_search_util::convert($this->to_string())); } return $this->suggestion; }
/** * Получение кодов для массива терминов вынесено в отдельный метод для * удобства рекурсивного вызова, которое необходимо при обработке альтернативных * форм слов (когда значение в массиве $terms является массивом) * @param array $terms * @param boolean $create_new * @return array */ protected function get_term_codes(array $terms, $create_new) { $codes = array(); foreach ($terms as $t) { // skip empty terms (think nc_search_language_filter_stopwords) if ($t === null || is_scalar($t) && strlen($t) == 0 || is_array($t) && sizeof($t) == 0) { continue; } if (is_array($t)) { // "alternative forms" $res = join("|", $this->get_term_codes($t, $create_new)); if ($res) { $codes[] = $res; } } else { if (isset($this->terms[$t])) { // this is a known term $codes[] = $this->terms[$t]; } else { if ($create_new) { // should create new records in Search_Index_Term $new_code = $this->get_next_code(); $this->terms[$t] = $codes[] = $new_code; $this->new_term_data[] = "('" . nc_search_util::db_escape($t) . "', '{$new_code}', " . mb_strlen($t, 'UTF-8') . ")"; } } } // else (i.e. $create_new == false and term is unknown): do not add entry to the $codes } return $codes; }
/** * Получить документ из БД по URL (только поля 'id' и 'hash') * @static * @param $site_id * @param $path * @return self|FALSE */ public static function get_hash_by_path($site_id, $path) { $doc = new self(); return $doc->select_from_database("SELECT `Document_ID`, `Hash`\n FROM `{$doc->get_table_name()}`\n WHERE `Catalogue_ID` = " . (int) $site_id . "\n AND `Path` = '" . nc_search_util::db_escape($path) . "'\n LIMIT 1"); }
/** * @param $url * @return string */ protected function make_hash_statement($url) { $unhex = nc_search_util::can_use_binary_columns() ? "UNHEX" : ""; return "{$unhex}(SHA1('" . nc_search_util::db_escape($url) . "'))"; }
/** * * @return nc_search_data_persistent */ public function save() { $this->set('timestamp', nc_search_util::sql_datetime()); return parent::save(); }
$search_link = nc_search::get_object()->get_search_url($row['Catalogue_ID'], true) . "?nologging=1&search_query=" . rawurlencode($row['QueryString']) . "&area=" . rawurlencode($row['Area']); echo "<tr class='nc-text-center'>", "<td align='left'><a href='?view=queries_details&query=", rawurlencode($row['QueryString']), "' {$query_link_title}>", htmlspecialchars($row['QueryString']), "</a></td>", "<td>", $row["Count"], "</td>", "</tr>\n"; } echo "</table>\n"; } } // ----------------------------------------------------------------------------- // ВРЕМЯ ПОСЛЕДНЕЙ ПЕРЕИНДЕКСАЦИИ, ЗАПУСК ПЕРЕИНДЕКСИРОВАНИЯ $indexing_in_progress = $db->get_var("SELECT COUNT(*) FROM `Search_Task`"); if (count($rules)) { echo "<div class='legend'>", NETCAT_MODULE_SEARCH_ADMIN_STAT_INDEXING, "</div>", "<table class='nc-table nc--large nc--hovered nc--striped list'>", "<tr>", "<th width='30%'>", NETCAT_MODULE_SEARCH_ADMIN_RULE, "</th>", "<th>", NETCAT_MODULE_SEARCH_ADMIN_RULE_SITE, "</th>", "<th class='nc-text-center'>", NETCAT_MODULE_SEARCH_ADMIN_RULE_SCHEDULE, "</th>", "<th class='nc-text-center'>", NETCAT_MODULE_SEARCH_ADMIN_RULE_LAST_RUN, "</th>", "<th class='nc-text-center'>", NETCAT_MODULE_SEARCH_ADMIN_STAT_INDEX, "</th>", "</tr>\n"; foreach ($rules as $r) { $rule_id = $r->get_id(); $last_start_time = $r->get('last_start_time'); $last_finish_time = $r->get('last_finish_time'); $last_run = $last_start_time ? nc_search_util::format_time($last_start_time) : "—"; $rule_name = $this->if_null($r->get('name'), NETCAT_MODULE_SEARCH_ADMIN_UNNAMED_RULE); $action_cell = ""; if ($last_finish_time < $last_start_time) { $action_cell = NETCAT_MODULE_SEARCH_ADMIN_STAT_INDEXING_NOW; } else { $action_cell = "<a class='ajax' href='javascript:search_schedule({$rule_id})'>" . NETCAT_MODULE_SEARCH_ADMIN_STAT_INDEX_IN_BACKGROUND . "</a>"; if (!$indexing_in_progress) { $action_cell .= " | <a href='javascript:search_index_now({$rule_id})'>" . NETCAT_MODULE_SEARCH_ADMIN_STAT_INDEX_IN_BROWSER . "</a>"; } } echo "<tr>", "<td>", $this->hash_link("#module.search.rules_edit({$rule_id})", $rule_name), "</td>", "<td>", $r->get_site_name(), "</td>", "<td class='nc-text-center'>", $r->get_schedule_string(), "</td>", "<td class='nc-text-center'>", $last_run, "</td>", "<td class='nc-text-center nc--nowrap'>", $action_cell, "</td>", "</tr>\n"; } echo "</table>\n"; } // -----------------------------------------------------------------------------
/** * @param $value * @return int|string */ protected function escape_number($value) { if (!is_numeric($value)) { $value = "'" . nc_search_util::db_escape($value) . "'"; } return $value; }
nc_print_status(NETCAT_MODULE_SEARCH_ADMIN_SETTING_SEARCH_DISABLED, "error", array($this->hash_href("#module.search.generalsettings"), "_top")); } $rules = nc_search::load('nc_search_rule', "SELECT * FROM `%t%` ORDER BY `Rule_ID`")->set_output_encoding(nc_core('NC_CHARSET')); if (count($rules)) { foreach ($rules as $r) { // строчка «последняя индексация» $last_start_time = $r->get('last_start_time'); $last_finish_time = $r->get('last_finish_time'); if (!$last_start_time) { $last_run = NETCAT_MODULE_SEARCH_ADMIN_RULE_NEVER_RUN . "."; } else { $last_run = NETCAT_MODULE_SEARCH_ADMIN_RULE_LAST_RUN . ": " . nc_search_util::format_time($last_start_time) . " ("; if ($last_finish_time < $last_start_time) { $last_run .= NETCAT_MODULE_SEARCH_ADMIN_RULE_LAST_RUN_NOT_FINISHED; } else { $last_run .= NETCAT_MODULE_SEARCH_ADMIN_RULE_LAST_RUN_DURATION . " " . nc_search_util::format_seconds($last_finish_time - $last_start_time); } $last_run .= ")."; } // конец формирования строчки «последняя индексация» // строчка с результатми последней индексации $stats = ""; if ($last_start_time) { $result = $r->get('last_result'); $stats = "<div class='stats'>" . sprintf(NETCAT_MODULE_SEARCH_ADMIN_RULE_STATISTICS, $result['processed'], $result['deleted'], $result['checked']) . ".</div>"; } // конец формирования строчки с результатами // «подробнее» $details = "<div class='site'><strong>" . NETCAT_MODULE_SEARCH_ADMIN_RULE_SITE . "</strong>: " . $this->hash_link("#site.map({$r->get('site_id')})", $r->get_site_name()) . "</div>"; if ($r->get('area_string')) { // sic, not get_area_string()
<?php /** * Входящие параметры: * - term * * @global $catalogue */ $NETCAT_FOLDER = realpath("../../../../"); require_once "{$NETCAT_FOLDER}/vars.inc.php"; require $INCLUDE_FOLDER . "index.php"; // получение параметров $input = trim($nc_core->input->fetch_get('term')); if (!nc_search::should('EnableQuerySuggest') || nc_search::get_setting('SuggestMode') != 'queries' || mb_strlen($input) < nc_search::get_setting('SuggestionsMinInputLength')) { die("[]"); } $input = $nc_core->utf8->conv($nc_core->NC_CHARSET, 'utf-8', $input); // поиск запросов, начинающихся с указанной подстроки $db->query("SET NAMES 'utf8'"); $query = "SELECT DISTINCT(`QueryString`) AS `label` FROM `Search_Query` \n WHERE `QueryString` LIKE '" . nc_search_util::db_escape($input) . "%'\n AND `ResultsCount` > 0\n ORDER BY `QueryString`\n LIMIT " . (int) nc_search::get_setting('NumberOfSuggestions'); $suggestions = (array) $db->get_results($query, ARRAY_A); if (!$nc_core->NC_UNICODE) { $suggestions = $nc_core->utf8->array_utf2win($suggestions); } print nc_array_json($suggestions);
/** * For logging purposes * @return array */ public function get_all_urls() { $db = nc_Core::get_object()->db; $links = $db->get_col("SELECT `URL` FROM `Search_Link`"); foreach ($links as $i => $url) { $links[$i] = nc_search_util::decode_url($url); } return $links; }
/** * @param $table * @param $fts_qry * @return string */ protected function make_match($table, $fts_qry) { return "MATCH(`{$table}`.`Content`) AGAINST ('" . nc_search_util::db_escape($fts_qry) . "' IN BOOLEAN MODE)"; }
protected function load_synonyms($language) { $query = "SELECT * FROM `%t%` WHERE `Language`='" . nc_search_util::db_escape($language) . "'"; self::$lists[$language] = nc_search::load('nc_search_language_synonyms', $query); }
/** * @param string $input * @param string $language * @param integer $site_id * @return array */ public function suggest_titles($input, $language, $site_id) { $suggestions = array(); // собственно подсказки $titles = array(); $limit = nc_search::get_setting('NumberOfSuggestions'); // поиск в индексе (то есть будут варианты после обработки фильтрами - базовая форма) if (nc_search::should('SearchTitleBaseformsForSuggestions')) { $last_space = strrpos($input, " "); $as_phrase = nc_search::should('SearchTitleAsPhraseForSuggestions'); $b1 = $as_phrase ? '"' : '('; $b2 = $as_phrase ? '"' : ')'; /* @todo сделать проверку на то, что последнее слово является правильным/полным? */ $query_string = "(title:{$b1}{$input}{$b2}" . ($last_space ? " OR title:{$b1}" . trim(substr($input, 0, $last_space)) . $b2 : '') . ") AND site_id:{$site_id}"; $query = new nc_search_query($query_string); $query->set('limit', $limit)->set('options_to_fetch', array('title', 'site_id', 'path'))->set('language', $language); $documents = $this->find($query, false); foreach ($documents as $doc) { $suggestions[] = array("label" => $doc->get('title'), "url" => $doc->get('url')); $titles[] = '"' . nc_search_util::db_escape($doc->get('title')) . '"'; } $titles = array_unique($titles); } // поиск точного соответствия в таблице с документами // по-хорошему следовало бы сначала сделать запрос к БД, а потом к индексу, однако // в случае запроса к индексу не получится так же просто отфильтровать уже совпавшие запросы $query = "SELECT `Catalogue_ID`, `Path`, `Title` FROM `%t%` " . ' WHERE `Title` LIKE "' . nc_search_util::db_escape($input) . '%" ' . ($titles ? " AND `Title` NOT IN (" . join(", ", $titles) . ") " : "") . " ORDER BY `Title` " . " LIMIT {$limit}"; $documents = new nc_search_result(); $documents->select_from_database($query); foreach ($documents as $doc) { array_unshift($suggestions, array("label" => $doc->get('title'), "url" => $doc->get('url'))); } $suggestions = array_slice($suggestions, 0, $limit); return $suggestions; }
return "<a href='" . htmlspecialchars($edit_link) . "' target='_blank'>" . NETCAT_MODULE_SEARCH_ADMIN_BROKEN_LINK_EDIT . "</a>"; } // --------------------------------------------------------------- // foreach ($res as $row) { echo "<li>"; if ($group_by == 'referrer') { echo "<b><a href='", htmlspecialchars($row['Referrer_URL']), "' target='_blank'>", $row['Title'] ? $row['Title'] : $row['Referrer_URL'], "</a></b> ", _edit_doc_link($row), "\n", "<ul>\n"; $where = $row["Referrer_Document_ID"] ? "`Referrer_Document_ID` = '{$row['Referrer_Document_ID']}'" : "`Referrer_URL` = '" . $db->escape($row["Referrer_URL"]) . "'"; $broken_links = $db->get_col("SELECT `URL`\n FROM `Search_BrokenLink`\n WHERE {$where}"); foreach ($broken_links as $link) { echo "<li>" . nc_search_util::decode_url($link) . "</li>\n"; } echo "</ul>\n"; } else { // group by broken link URL echo "<b>" . nc_search_util::decode_url($row['URL']) . "</b>\n<ul>"; $referrers = $db->get_results("SELECT l.`Referrer_URL`,\n doc.`Title`,\n doc.`Catalogue_ID`,\n doc.`Subdivision_ID`\n FROM `Search_BrokenLink` AS l\n LEFT JOIN `Search_Document` AS doc\n ON (l.`Referrer_Document_ID` = doc.`Document_ID`)\n WHERE l.`URL` = '" . $db->escape($row['URL']) . "'\n LIMIT {$max_referrer_links}", ARRAY_A); foreach ($referrers as $n => $ref) { echo "<li><b><a href='", htmlspecialchars($ref['Referrer_URL']), "' target='_blank'>", $ref['Title'] ? $ref['Title'] : $ref['Referrer_URL'], "</a></b> ", _edit_doc_link($ref), "</li>\n"; } echo "</ul>\n"; if (count($referrers) == $max_referrer_links) { echo "<div>", sprintf(NETCAT_MODULE_SEARCH_ADMIN_BROKEN_LINKS_REFERRER_LIMIT, $max_referrer_links), "</div>"; } } echo "</li>\n"; } echo "</ul></div>"; // листалка по страницам $page_link = $this->make_page_query(array('offset'), true); if ($offset > 0) {
/** * */ public function get_site_name() { $nc_core = nc_Core::get_object(); $cat = $nc_core->catalogue; $site_id = $this->get('site_id'); try { $site_name = $cat->get_by_id($site_id, "Catalogue_Name"); $domain = $cat->get_by_id($site_id, "Domain"); if ($domain) { $domain_decode = nc_search_util::decode_host($domain); if (!$nc_core->NC_UNICODE) { $domain_decode = $nc_core->utf8->utf2win($domain_decode); } $site_name .= " (" . $domain_decode . ")"; } } catch (Exception $e) { $site_name = sprintf(NETCAT_MODULE_SEARCH_ADMIN_RULE_NONEXISTENT_SITE, $site_id); } return $site_name; }
/** * * @return Net_IDNA2 */ protected static function get_idn_converter() { if (!self::$idn) { require_once 'Net/IDNA2.php'; // netcat/require/lib self::$idn = new Net_IDNA2(); } return self::$idn; }
/** * Обработать ответ пользователя (crawler) * @param nc_search_indexer_crawler_response $response * @param nc_search_indexer_link $link * @return bool */ protected function process_response(nc_search_indexer_crawler_response $response, nc_search_indexer_link $link) { $content_type = $response->get_content_type(); $parser = $this->get_parser($content_type); $parser->load($response); // пусть парсер скажет, будем ли мы обрабатывать этот документ? // (например, не будем, если есть meta robots=noindex) if (!$parser->should_index()) { return false; } // получить ссылки $page_hrefs = $this->filter_links($parser->extract_links()); $page_url = $response->get_url(); // добавить ссылки в очередь (где-то там разберутся, чтобы ссылки // не повторялись и были абсолютными): $page_link_ids = $this->task->add_links($page_hrefs, $page_url); // распарсить контент $document = $parser->get_document(); $document->set_values(array('url' => $page_url, 'path' => nc_search_util::get_url_path($page_url), 'content_type' => $content_type, 'to_delete' => false, 'last_modified' => $response->get_last_modified())); $this->apply_hierarchy_options($document); if (nc_search::will_log(nc_search::LOG_PARSER_DOCUMENT_BRIEF)) { nc_search::log(nc_search::LOG_PARSER_DOCUMENT_BRIEF, "Parsed document from '{$page_url}'. Indexed content: " . strlen($document->get('intact_content')) . " bytes"); } if (nc_search::will_log(nc_search::LOG_PARSER_DOCUMENT_VERBOSE)) { nc_search::log(nc_search::LOG_PARSER_DOCUMENT_VERBOSE, $document->dump()); } // добавить в индекс $this->index->process_document($document); // сохранить информацию о том, кто куда ссылается // (referrer_link_id сохраняется из-за неясностей/неточностей в ТЗ, которое // подразумевает, что сбор ссылок может производиться на страницах, // которые не сохраняются в индексе) $referrer_link_id = (int) $link->get_id(); // might be null $doc_id = (int) $document->get_id(); foreach ($page_link_ids as $page_link_id) { if (!$page_link_id) { continue; } $this->referrer_cache[] = "(" . $doc_id . "," . $referrer_link_id . "," . (int) $page_link_id . ")"; } return true; }
/** * Сохраняет поле индекса в указанной таблице (Search_Index или Search_Index_FieldX), * при необходимости разбивает запрос на части таким образом, чтобы запрос * был не более mysql.max_allowed_packet * @param string $table_name * @param int $doc_id * @param string $all_content * @param string $all_raw_data */ protected function store_index_data($table_name, $doc_id, $all_content, $all_raw_data = '') { $db = $this->get_db(); $overhead = 1024; // команды SQL etc. $chunk_size = $this->max_allowed_packet - $overhead; $content_chunks = str_split($all_content, $chunk_size); $raw_chunks = strlen($all_raw_data) ? str_split($all_raw_data, $chunk_size) : array(); $doc_id = (int) $doc_id; unset($all_content, $all_raw_data); $n_content = $n_raw = 0; while (count($content_chunks) || count($raw_chunks)) { $update = $n_content || $n_raw; $query = ($update ? "UPDATE" : "REPLACE INTO") . " `{$table_name}` SET `Document_ID` = {$doc_id}"; $content = array_shift($content_chunks); if (strlen($content)) { $content = nc_search_util::db_escape($content); $query .= ", `Content` = " . ($n_content ? "CONCAT(`Content`, '{$content}')" : "'{$content}'"); $n_content++; } $add_raw = count($content_chunks) == 0 && isset($raw_chunks[0]) && strlen($content) + strlen($raw_chunks[0]) < $chunk_size; // adding raw_data will not cause overflow if ($add_raw) { $raw_data = nc_search_util::db_escape(array_shift($raw_chunks)); $query .= ", `RawData` = " . ($n_raw ? "CONCAT(`RawData`, '{$raw_data}')" : "'{$raw_data}'"); $n_raw++; } if ($update) { $query .= " WHERE `Document_ID` = {$doc_id}"; } $db->query($query); } }
/** * Вспомогательный метод для составления запроса из нескольких полей * (поле «исключить страницы, на которых встречаются слова...») */ public function make_query_string($query, $exclude = false) { $query = (string) $query; if ($exclude && ($exclude = trim((string) $exclude))) { $not_op = nc_search_util::is_boolean_query($query) || nc_search_util::is_boolean_query($exclude) ? "AND NOT " : "-"; if (strpos($exclude, " ")) { $exclude = "({$exclude})"; } if (strpos($query, " ")) { $query = "({$query})"; } $query = "{$query} {$not_op}{$exclude}"; } return $query; }
/** * * @return string */ public function get_body() { return nc_search_util::convert($this->body, 1); }