Beispiel #1
0
 /**
  *
  * @param array $terms
  * @return string
  */
 public function get_highlight_regexp(array $terms)
 {
     $res = array();
     foreach ($this->get_base_forms($terms) as $base) {
         $res[] = $base . "[\\pL\\d]{0,{$this->max_remainder_length}}";
     }
     return nc_search_util::word_regexp("(" . join("|", $res) . ")", "Si");
 }
Beispiel #2
0
 /**
  * Запланировать запуск переиндексирования области или правила в указанное время
  * @param string $area_string
  * @param integer $timestamp
  */
 public static function schedule_indexing($area_string, $timestamp)
 {
     // Если данная область уже поставлена в очередь на более раннее или ближайшее
     // время, не нужно добавлять ещё раз
     $interval = $timestamp + nc_search::get_setting('MinScheduleInterval');
     $intent = nc_search::load('nc_search_scheduler_intent', "SELECT * FROM `%t%`" . " WHERE `StartTime` <= {$interval}" . "   AND `AreaString` = '" . nc_search_util::db_escape($area_string) . "'")->first();
     // type is ignored
     if ($intent) {
         // уже есть такое расписание!
         if ($intent->get('start_time') > $timestamp) {
             $intent->set('start_time', $timestamp);
             // let's run it sooner
         }
     } else {
         $intent = new nc_search_scheduler_intent(array('start_time' => $timestamp, 'type' => nc_search_scheduler_intent::ON_REQUEST, 'area_string' => $area_string));
     }
     $intent->save();
 }
Beispiel #3
0
 /**
  *
  */
 public function __construct()
 {
     $this->start_time = time();
     $this->time_threshold = $this->get_time_threshold();
     if (self::$original_time_limit === null) {
         self::$original_time_limit = ini_get('max_execution_time');
     }
     $this->time_limit = self::$original_time_limit;
     $this->memory_threshold = $this->get_memory_threshold();
     $this->memory_limit = nc_search_util::int_from_bytes_string(ini_get('memory_limit'));
     $this->delay = nc_search::get_setting('CrawlerDelay');
     $this->cycle_limit = $this->get_max_cycles_number();
     @set_time_limit(0);
     nc_search::enable_error_logging();
     ignore_user_abort(true);
     nc_Core::get_object()->db->query("SET wait_timeout=900");
     // might loose connection when running in slow mode
 }
Beispiel #4
0
 /**
  * Пытается убрать кавычки из запроса
  * @param nc_search_language_corrector_phrase $phrase
  * @return boolean
  */
 public function correct(nc_search_language_corrector_phrase $phrase)
 {
     if (!nc_search::should('RemovePhrasesOnEmptyResult')) {
         return false;
     }
     $orignal_phrase_text = $phrase_text = $phrase->to_string();
     if (strpos($phrase_text, '"') !== false && !preg_match('/"\\S+"/u', $phrase_text)) {
         $phrase_text = preg_replace('/"~[\\d\\.]+/', '"', $phrase_text);
         // remove distance search
         if (nc_search_util::is_boolean_query($phrase_text) || preg_match('/[-+]/', $phrase_text)) {
             // there is a a phrase with several words!
             $phrase_text = preg_replace('/"(\\S)/u', "(\$1", $phrase_text);
             $phrase_text = str_replace('"', ")", $phrase_text);
         } else {
             $phrase_text = str_replace('"', "", $phrase_text);
         }
         $message = sprintf(NETCAT_MODULE_SEARCH_CORRECTION_QUOTES, $orignal_phrase_text, $phrase_text);
         $phrase->set_phrase($phrase_text, $message);
         return true;
     }
     return false;
 }
Beispiel #5
0
 /**
  * @param array $terms
  * @return array
  */
 public function filter(array $terms)
 {
     if (!nc_search::should('RemoveStopwords')) {
         return $terms;
     }
     $language = $this->context->get('language');
     if (!isset(self::$lists[$language])) {
         $query = "SELECT * FROM `%t%` WHERE `Language`='" . nc_search_util::db_escape($language) . "'";
         self::$lists[$language] = nc_search::load('nc_search_language_stopword', $query, 'word');
     }
     $stop_list = self::$lists[$language];
     if (!count($stop_list)) {
         return $terms;
     }
     $result = array();
     foreach ($terms as $term) {
         if (is_array($term)) {
             // alternative forms
             foreach ($term as $i => $t) {
                 if ($stop_list->has_key($t)) {
                     unset($term[$i]);
                 }
             }
             $terms_left = count($term);
             if ($terms_left == 1) {
                 $result[] = $term[0];
             } elseif ($terms_left > 1) {
                 $result[] = $term;
             }
         } elseif (!$stop_list->has_key($term)) {
             // ordinary term
             $result[] = $term;
         }
     }
     return $result;
 }
Beispiel #6
0
$where = join(" AND ", $where);
$offset = (int) $this->get_input('offset');
$query = "SELECT `latest`.`Timestamp`,\n                 `latest`.`QueryString`,\n                 `latest`.`ResultsCount`,\n                 `latest`.`IP`,\n                 `latest`.`User_ID`,\n                 `q`.`QueryCount`,\n                 `u`.`Login`\n            FROM (SELECT MAX(`Query_ID`) AS `Query_ID`,\n                         COUNT(`Query_ID`) AS `QueryCount`\n                    FROM `Search_Query`\n                   WHERE {$where}\n                   GROUP BY `QueryString`\n                   ORDER BY {$order_by}\n                   LIMIT {$per_page} OFFSET {$offset}) AS `q`\n            JOIN `Search_Query` AS `latest` ON (`q`.`Query_ID` = `latest`.`Query_ID`)\n            LEFT JOIN `User` AS `u` ON (`latest`.`User_ID` = `u`.`User_ID`)";
$res = $this->get_db()->get_results($query, ARRAY_A);
if ($res) {
    $found_rows = $this->get_db()->get_var("SELECT COUNT(DISTINCT(`QueryString`))\n                                              FROM `Search_Query`\n                                             WHERE {$where}");
    // строка с вариантами сортировки
    $sort_link = $this->make_page_query(array('sort_by'));
    echo '<div class="query_sort">', $this->link_if($sort_by != '', $sort_link, NETCAT_MODULE_SEARCH_ADMIN_QUERY_SORT_BY_RESULT_COUNT), " | ", $this->link_if($sort_by != 'time', "{$sort_link}&amp;sort_by=time", NETCAT_MODULE_SEARCH_ADMIN_QUERY_SORT_BY_TIME), " | ", $this->link_if($sort_by != 'query', "{$sort_link}&amp;sort_by=query", NETCAT_MODULE_SEARCH_ADMIN_QUERY_SORT_BY_QUERY), "</div>";
    // таблица с результатами
    echo "<table class='nc-table nc--large nc--hovered nc--striped list'>\n", "<tr>", "<th rowspan='2' width='40%'>", NETCAT_MODULE_SEARCH_ADMIN_QUERY_STRING, "</th>", "<th rowspan='2' width='10%' class='nc-text-center'>", NETCAT_MODULE_SEARCH_ADMIN_QUERY_COUNT, "</th>", "<th colspan='3' class='nc-text-center'>", NETCAT_MODULE_SEARCH_ADMIN_QUERY_LAST_QUERY, "</th>", "</tr>\n", "<tr>", "<th class='nc-text-center'>", NETCAT_MODULE_SEARCH_ADMIN_QUERY_LAST_QUERY_TIME, "</th>", "<th class='nc-text-center'>", NETCAT_MODULE_SEARCH_ADMIN_QUERY_LAST_QUERY_RESULT_COUNT, "</th>", "<th class='nc-text-center'>", NETCAT_MODULE_SEARCH_ADMIN_QUERY_LAST_QUERY_USER, "</th>", "</tr>\n";
    $result_link_title = "title='" . htmlspecialchars(NETCAT_MODULE_SEARCH_ADMIN_QUERY_OPEN_RESULTS_LINK_HINT) . "'";
    foreach ($res as $row) {
        // ссылка на страницу с результатами поиска
        $search_link = nc_search::get_object()->get_search_url($row['Catalogue_ID'], true) . "?nologging=1&amp;search_query=" . rawurlencode($row['QueryString']) . "&amp;area=" . rawurlencode($row['Area']);
        echo "<tr class='nc-text-center'>", "<td align='left'><a href='?view=queries_details&amp;query=", rawurlencode($row['QueryString']), "'>", htmlspecialchars($row['QueryString']), "</a></td>", "<td>", $row['QueryCount'], "</td>", "<td>", nc_search_util::format_time($row['Timestamp']), "</td>", "<td><a href='{$search_link}' target='_blank' {$result_link_title}>{$row['ResultsCount']}</a></td>", "<td>", long2ip($row['IP']), $row['User_ID'] ? " (" . $this->hash_link("#user.edit({$row['User_ID']})", $row["Login"]) . ")" : "", "</td>", "</tr>\n";
    }
    echo "</table>";
    echo $this->result_count($offset + 1, $per_page, $found_rows);
    // листалка по страницам
    $ui = $this->get_ui();
    $page_link = $this->make_page_query(array('offset'), true);
    if ($offset > 0) {
        $prev_page = $page_link . "&amp;offset=" . ($offset - $per_page);
        $ui->actionButtons[] = array("id" => "prev_page", "caption" => NETCAT_MODULE_SEARCH_ADMIN_QUERY_PREV_PAGE, "action" => "mainView.loadIframe('{$prev_page}')", "align" => "left");
    }
    if ($found_rows > $offset + $per_page) {
        $next_page = $page_link . "&amp;offset=" . ($offset + $per_page);
        $ui->actionButtons[] = array("id" => "next_page", "caption" => NETCAT_MODULE_SEARCH_ADMIN_QUERY_NEXT_PAGE, "action" => "mainView.loadIframe('{$next_page}')");
    }
} else {
Beispiel #7
0
 /**
  *
  */
 protected function get_path_sql_condition($operator = '=', $template = '')
 {
     $site_cond = array();
     foreach ($this->get_sites() as $site) {
         $site_cond[] = $site->get_sql_condition();
     }
     $q = $site_cond ? "(" . join(" OR ", $site_cond) . ")" : "1";
     $q .= " AND `{$this->document_table_name}`.`Path` {$operator} '" . nc_search_util::db_escape($this->get_path()) . $template . "'";
     return $q;
 }
Beispiel #8
0
 /**
  * Обработать ответ пользователя (crawler)
  * @param nc_search_indexer_crawler_response $response
  * @param nc_search_indexer_link $link
  * @return bool
  */
 protected function process_response(nc_search_indexer_crawler_response $response, nc_search_indexer_link $link)
 {
     $content_type = $response->get_content_type();
     $parser = $this->get_parser($content_type);
     $parser->load($response);
     // пусть парсер скажет, будем ли мы обрабатывать этот документ?
     // (например, не будем, если есть meta robots=noindex)
     if (!$parser->should_index()) {
         return false;
     }
     // получить ссылки
     $page_hrefs = $this->filter_links($parser->extract_links());
     $page_url = $response->get_url();
     // добавить ссылки в очередь (где-то там разберутся, чтобы ссылки
     // не повторялись и были абсолютными):
     $page_link_ids = $this->task->add_links($page_hrefs, $page_url);
     // распарсить контент
     $document = $parser->get_document();
     $document->set_values(array('url' => $page_url, 'path' => nc_search_util::get_url_path($page_url), 'content_type' => $content_type, 'to_delete' => false, 'last_modified' => $response->get_last_modified()));
     $this->apply_hierarchy_options($document);
     if (nc_search::will_log(nc_search::LOG_PARSER_DOCUMENT_BRIEF)) {
         nc_search::log(nc_search::LOG_PARSER_DOCUMENT_BRIEF, "Parsed document from '{$page_url}'. Indexed content: " . strlen($document->get('intact_content')) . " bytes");
     }
     if (nc_search::will_log(nc_search::LOG_PARSER_DOCUMENT_VERBOSE)) {
         nc_search::log(nc_search::LOG_PARSER_DOCUMENT_VERBOSE, $document->dump());
     }
     // добавить в индекс
     $this->index->process_document($document);
     // сохранить информацию о том, кто куда ссылается
     // (referrer_link_id сохраняется из-за неясностей/неточностей в ТЗ, которое
     // подразумевает, что сбор ссылок может производиться на страницах,
     // которые не сохраняются в индексе)
     $referrer_link_id = (int) $link->get_id();
     // might be null
     $doc_id = (int) $document->get_id();
     foreach ($page_link_ids as $page_link_id) {
         if (!$page_link_id) {
             continue;
         }
         $this->referrer_cache[] = "(" . $doc_id . "," . $referrer_link_id . "," . (int) $page_link_id . ")";
     }
     return true;
 }
Beispiel #9
0
 /**
  * 
  */
 public static function purge_log()
 {
     $days_to_keep = self::get_setting('DaysToKeepEventLog');
     $time = nc_search_util::sql_datetime(strtotime("-{$days_to_keep} days"));
     nc_Core::get_object()->db->query("DELETE FROM `Search_Log` WHERE `Timestamp` < '{$time}'");
 }
Beispiel #10
0
 /**
  *
  */
 protected function get_highlight_regexp($language)
 {
     if (!$this->highlight_regexp) {
         $query_string = $this->get_query_string();
         $context = new nc_search_context(array('language' => $language, 'action' => 'searching'));
         // Получить слова из запроса.
         // (Удалять из запроса термины с префиксом "-" и "NOT" не имеет особого смысла,
         // поскольку в результат они как правило не попадают.)
         $query_string = preg_replace("/[\\^~][\\d\\.]+/", '', $query_string);
         // операторы ^1, ~1
         preg_match_all("/[\\pL\\d\\?\\*]+/u", $query_string, $matches);
         $terms = $matches[0];
         if (strpos($query_string, "*") !== false || strpos($query_string, "?") !== false) {
             $wildcards_replacement = nc_search::should('AllowWildcardSearch') ? array("?" => ".", "*" => "[\\S]+") : array("?" => "", "*" => "");
             foreach ($terms as $i => $term) {
                 $terms[$i] = strtr($term, $wildcards_replacement);
             }
         }
         //if ( nc_Core::get_object()->NC_UNICODE ) {
         $terms = nc_search_extension_manager::get('nc_search_language_filter', $context)->except('nc_search_language_filter_stopwords')->apply('filter', $terms);
         //}
         $analyzer = nc_search_extension_manager::get('nc_search_language_analyzer', $context)->first();
         if ($analyzer) {
             $regexp = $analyzer->get_highlight_regexp($terms);
         } else {
             $regexp = nc_search_util::word_regexp("(" . join("|", $terms) . ")", "Si");
         }
         $this->highlight_regexp = $regexp;
     }
     // of "there was no 'highlight_regexp'"
     return $this->highlight_regexp;
 }
Beispiel #11
0
 /**
  * 
  * @return string
  */
 public function get_suggestion()
 {
     if (!$this->suggestion && $this->is_corrected()) {
         $this->suggestion = sprintf(NETCAT_MODULE_SEARCH_CORRECTION_GENERIC, nc_search_util::convert($this->original_phrase), nc_search_util::convert($this->to_string()));
     }
     return $this->suggestion;
 }
Beispiel #12
0
 /**
  * Получение кодов для массива терминов вынесено в отдельный метод для
  * удобства рекурсивного вызова, которое необходимо при обработке альтернативных
  * форм слов (когда значение в массиве $terms является массивом)
  * @param array $terms
  * @param boolean $create_new
  * @return array
  */
 protected function get_term_codes(array $terms, $create_new)
 {
     $codes = array();
     foreach ($terms as $t) {
         // skip empty terms (think nc_search_language_filter_stopwords)
         if ($t === null || is_scalar($t) && strlen($t) == 0 || is_array($t) && sizeof($t) == 0) {
             continue;
         }
         if (is_array($t)) {
             // "alternative forms"
             $res = join("|", $this->get_term_codes($t, $create_new));
             if ($res) {
                 $codes[] = $res;
             }
         } else {
             if (isset($this->terms[$t])) {
                 // this is a known term
                 $codes[] = $this->terms[$t];
             } else {
                 if ($create_new) {
                     // should create new records in Search_Index_Term
                     $new_code = $this->get_next_code();
                     $this->terms[$t] = $codes[] = $new_code;
                     $this->new_term_data[] = "('" . nc_search_util::db_escape($t) . "', '{$new_code}', " . mb_strlen($t, 'UTF-8') . ")";
                 }
             }
         }
         // else (i.e. $create_new == false and term is unknown): do not add entry to the $codes
     }
     return $codes;
 }
Beispiel #13
0
 /**
  * Получить документ из БД по URL (только поля 'id' и 'hash')
  * @static
  * @param $site_id
  * @param $path
  * @return self|FALSE
  */
 public static function get_hash_by_path($site_id, $path)
 {
     $doc = new self();
     return $doc->select_from_database("SELECT `Document_ID`, `Hash`\n                                        FROM `{$doc->get_table_name()}`\n                                       WHERE `Catalogue_ID` = " . (int) $site_id . "\n                                         AND `Path` = '" . nc_search_util::db_escape($path) . "'\n                                       LIMIT 1");
 }
Beispiel #14
0
 /**
  * @param $url
  * @return string
  */
 protected function make_hash_statement($url)
 {
     $unhex = nc_search_util::can_use_binary_columns() ? "UNHEX" : "";
     return "{$unhex}(SHA1('" . nc_search_util::db_escape($url) . "'))";
 }
Beispiel #15
0
 /**
  *
  * @return nc_search_data_persistent
  */
 public function save()
 {
     $this->set('timestamp', nc_search_util::sql_datetime());
     return parent::save();
 }
Beispiel #16
0
            $search_link = nc_search::get_object()->get_search_url($row['Catalogue_ID'], true) . "?nologging=1&amp;search_query=" . rawurlencode($row['QueryString']) . "&amp;area=" . rawurlencode($row['Area']);
            echo "<tr class='nc-text-center'>", "<td align='left'><a href='?view=queries_details&amp;query=", rawurlencode($row['QueryString']), "' {$query_link_title}>", htmlspecialchars($row['QueryString']), "</a></td>", "<td>", $row["Count"], "</td>", "</tr>\n";
        }
        echo "</table>\n";
    }
}
// -----------------------------------------------------------------------------
// ВРЕМЯ ПОСЛЕДНЕЙ ПЕРЕИНДЕКСАЦИИ, ЗАПУСК ПЕРЕИНДЕКСИРОВАНИЯ
$indexing_in_progress = $db->get_var("SELECT COUNT(*) FROM `Search_Task`");
if (count($rules)) {
    echo "<div class='legend'>", NETCAT_MODULE_SEARCH_ADMIN_STAT_INDEXING, "</div>", "<table class='nc-table nc--large nc--hovered nc--striped list'>", "<tr>", "<th width='30%'>", NETCAT_MODULE_SEARCH_ADMIN_RULE, "</th>", "<th>", NETCAT_MODULE_SEARCH_ADMIN_RULE_SITE, "</th>", "<th class='nc-text-center'>", NETCAT_MODULE_SEARCH_ADMIN_RULE_SCHEDULE, "</th>", "<th class='nc-text-center'>", NETCAT_MODULE_SEARCH_ADMIN_RULE_LAST_RUN, "</th>", "<th class='nc-text-center'>", NETCAT_MODULE_SEARCH_ADMIN_STAT_INDEX, "</th>", "</tr>\n";
    foreach ($rules as $r) {
        $rule_id = $r->get_id();
        $last_start_time = $r->get('last_start_time');
        $last_finish_time = $r->get('last_finish_time');
        $last_run = $last_start_time ? nc_search_util::format_time($last_start_time) : "&mdash;";
        $rule_name = $this->if_null($r->get('name'), NETCAT_MODULE_SEARCH_ADMIN_UNNAMED_RULE);
        $action_cell = "";
        if ($last_finish_time < $last_start_time) {
            $action_cell = NETCAT_MODULE_SEARCH_ADMIN_STAT_INDEXING_NOW;
        } else {
            $action_cell = "<a class='ajax' href='javascript:search_schedule({$rule_id})'>" . NETCAT_MODULE_SEARCH_ADMIN_STAT_INDEX_IN_BACKGROUND . "</a>";
            if (!$indexing_in_progress) {
                $action_cell .= " | <a href='javascript:search_index_now({$rule_id})'>" . NETCAT_MODULE_SEARCH_ADMIN_STAT_INDEX_IN_BROWSER . "</a>";
            }
        }
        echo "<tr>", "<td>", $this->hash_link("#module.search.rules_edit({$rule_id})", $rule_name), "</td>", "<td>", $r->get_site_name(), "</td>", "<td class='nc-text-center'>", $r->get_schedule_string(), "</td>", "<td class='nc-text-center'>", $last_run, "</td>", "<td class='nc-text-center nc--nowrap'>", $action_cell, "</td>", "</tr>\n";
    }
    echo "</table>\n";
}
// -----------------------------------------------------------------------------
Beispiel #17
0
 /**
  * @param $value
  * @return int|string
  */
 protected function escape_number($value)
 {
     if (!is_numeric($value)) {
         $value = "'" . nc_search_util::db_escape($value) . "'";
     }
     return $value;
 }
Beispiel #18
0
    nc_print_status(NETCAT_MODULE_SEARCH_ADMIN_SETTING_SEARCH_DISABLED, "error", array($this->hash_href("#module.search.generalsettings"), "_top"));
}
$rules = nc_search::load('nc_search_rule', "SELECT * FROM `%t%` ORDER BY `Rule_ID`")->set_output_encoding(nc_core('NC_CHARSET'));
if (count($rules)) {
    foreach ($rules as $r) {
        // строчка «последняя индексация»
        $last_start_time = $r->get('last_start_time');
        $last_finish_time = $r->get('last_finish_time');
        if (!$last_start_time) {
            $last_run = NETCAT_MODULE_SEARCH_ADMIN_RULE_NEVER_RUN . ".";
        } else {
            $last_run = NETCAT_MODULE_SEARCH_ADMIN_RULE_LAST_RUN . ": " . nc_search_util::format_time($last_start_time) . " (";
            if ($last_finish_time < $last_start_time) {
                $last_run .= NETCAT_MODULE_SEARCH_ADMIN_RULE_LAST_RUN_NOT_FINISHED;
            } else {
                $last_run .= NETCAT_MODULE_SEARCH_ADMIN_RULE_LAST_RUN_DURATION . " " . nc_search_util::format_seconds($last_finish_time - $last_start_time);
            }
            $last_run .= ").";
        }
        // конец формирования строчки «последняя индексация»
        // строчка с результатми последней индексации
        $stats = "";
        if ($last_start_time) {
            $result = $r->get('last_result');
            $stats = "<div class='stats'>" . sprintf(NETCAT_MODULE_SEARCH_ADMIN_RULE_STATISTICS, $result['processed'], $result['deleted'], $result['checked']) . ".</div>";
        }
        // конец формирования строчки с результатами
        // «подробнее»
        $details = "<div class='site'><strong>" . NETCAT_MODULE_SEARCH_ADMIN_RULE_SITE . "</strong>: " . $this->hash_link("#site.map({$r->get('site_id')})", $r->get_site_name()) . "</div>";
        if ($r->get('area_string')) {
            // sic, not get_area_string()
Beispiel #19
0
<?php

/**
 * Входящие параметры:
 *  - term
 * 
 * @global $catalogue
 */
$NETCAT_FOLDER = realpath("../../../../");
require_once "{$NETCAT_FOLDER}/vars.inc.php";
require $INCLUDE_FOLDER . "index.php";
// получение параметров
$input = trim($nc_core->input->fetch_get('term'));
if (!nc_search::should('EnableQuerySuggest') || nc_search::get_setting('SuggestMode') != 'queries' || mb_strlen($input) < nc_search::get_setting('SuggestionsMinInputLength')) {
    die("[]");
}
$input = $nc_core->utf8->conv($nc_core->NC_CHARSET, 'utf-8', $input);
// поиск запросов, начинающихся с указанной подстроки
$db->query("SET NAMES 'utf8'");
$query = "SELECT DISTINCT(`QueryString`) AS `label` FROM `Search_Query` \n           WHERE `QueryString` LIKE '" . nc_search_util::db_escape($input) . "%'\n             AND `ResultsCount` > 0\n           ORDER BY `QueryString`\n           LIMIT " . (int) nc_search::get_setting('NumberOfSuggestions');
$suggestions = (array) $db->get_results($query, ARRAY_A);
if (!$nc_core->NC_UNICODE) {
    $suggestions = $nc_core->utf8->array_utf2win($suggestions);
}
print nc_array_json($suggestions);
Beispiel #20
0
 /**
  * For logging purposes
  * @return array
  */
 public function get_all_urls()
 {
     $db = nc_Core::get_object()->db;
     $links = $db->get_col("SELECT `URL` FROM `Search_Link`");
     foreach ($links as $i => $url) {
         $links[$i] = nc_search_util::decode_url($url);
     }
     return $links;
 }
Beispiel #21
0
 /**
  * @param $table
  * @param $fts_qry
  * @return string
  */
 protected function make_match($table, $fts_qry)
 {
     return "MATCH(`{$table}`.`Content`) AGAINST ('" . nc_search_util::db_escape($fts_qry) . "' IN BOOLEAN MODE)";
 }
Beispiel #22
0
 /**
  * Сохраняет поле индекса в указанной таблице (Search_Index или Search_Index_FieldX),
  * при необходимости разбивает запрос на части таким образом, чтобы запрос
  * был не более mysql.max_allowed_packet
  * @param string $table_name
  * @param int $doc_id
  * @param string $all_content
  * @param string $all_raw_data
  */
 protected function store_index_data($table_name, $doc_id, $all_content, $all_raw_data = '')
 {
     $db = $this->get_db();
     $overhead = 1024;
     // команды SQL etc.
     $chunk_size = $this->max_allowed_packet - $overhead;
     $content_chunks = str_split($all_content, $chunk_size);
     $raw_chunks = strlen($all_raw_data) ? str_split($all_raw_data, $chunk_size) : array();
     $doc_id = (int) $doc_id;
     unset($all_content, $all_raw_data);
     $n_content = $n_raw = 0;
     while (count($content_chunks) || count($raw_chunks)) {
         $update = $n_content || $n_raw;
         $query = ($update ? "UPDATE" : "REPLACE INTO") . " `{$table_name}` SET `Document_ID` = {$doc_id}";
         $content = array_shift($content_chunks);
         if (strlen($content)) {
             $content = nc_search_util::db_escape($content);
             $query .= ", `Content` = " . ($n_content ? "CONCAT(`Content`, '{$content}')" : "'{$content}'");
             $n_content++;
         }
         $add_raw = count($content_chunks) == 0 && isset($raw_chunks[0]) && strlen($content) + strlen($raw_chunks[0]) < $chunk_size;
         // adding raw_data will not cause overflow
         if ($add_raw) {
             $raw_data = nc_search_util::db_escape(array_shift($raw_chunks));
             $query .= ", `RawData` = " . ($n_raw ? "CONCAT(`RawData`, '{$raw_data}')" : "'{$raw_data}'");
             $n_raw++;
         }
         if ($update) {
             $query .= " WHERE `Document_ID` = {$doc_id}";
         }
         $db->query($query);
     }
 }
Beispiel #23
0
 /**
  *
  * @return string
  */
 public function get_body()
 {
     return nc_search_util::convert($this->body, 1);
 }
Beispiel #24
0
     return "<a href='" . htmlspecialchars($edit_link) . "' target='_blank'>" . NETCAT_MODULE_SEARCH_ADMIN_BROKEN_LINK_EDIT . "</a>";
 }
 // --------------------------------------------------------------- //
 foreach ($res as $row) {
     echo "<li>";
     if ($group_by == 'referrer') {
         echo "<b><a href='", htmlspecialchars($row['Referrer_URL']), "' target='_blank'>", $row['Title'] ? $row['Title'] : $row['Referrer_URL'], "</a></b> &nbsp; ", _edit_doc_link($row), "\n", "<ul>\n";
         $where = $row["Referrer_Document_ID"] ? "`Referrer_Document_ID` = '{$row['Referrer_Document_ID']}'" : "`Referrer_URL` = '" . $db->escape($row["Referrer_URL"]) . "'";
         $broken_links = $db->get_col("SELECT `URL`\n                                            FROM `Search_BrokenLink`\n                                           WHERE {$where}");
         foreach ($broken_links as $link) {
             echo "<li>" . nc_search_util::decode_url($link) . "</li>\n";
         }
         echo "</ul>\n";
     } else {
         // group by broken link URL
         echo "<b>" . nc_search_util::decode_url($row['URL']) . "</b>\n<ul>";
         $referrers = $db->get_results("SELECT l.`Referrer_URL`,\n                        doc.`Title`,\n                        doc.`Catalogue_ID`,\n                        doc.`Subdivision_ID`\n                   FROM `Search_BrokenLink` AS l\n                   LEFT JOIN `Search_Document` AS doc\n                        ON (l.`Referrer_Document_ID` = doc.`Document_ID`)\n                  WHERE l.`URL` = '" . $db->escape($row['URL']) . "'\n                  LIMIT {$max_referrer_links}", ARRAY_A);
         foreach ($referrers as $n => $ref) {
             echo "<li><b><a href='", htmlspecialchars($ref['Referrer_URL']), "' target='_blank'>", $ref['Title'] ? $ref['Title'] : $ref['Referrer_URL'], "</a></b> &nbsp; ", _edit_doc_link($ref), "</li>\n";
         }
         echo "</ul>\n";
         if (count($referrers) == $max_referrer_links) {
             echo "<div>", sprintf(NETCAT_MODULE_SEARCH_ADMIN_BROKEN_LINKS_REFERRER_LIMIT, $max_referrer_links), "</div>";
         }
     }
     echo "</li>\n";
 }
 echo "</ul></div>";
 // листалка по страницам
 $page_link = $this->make_page_query(array('offset'), true);
 if ($offset > 0) {
Beispiel #25
0
 protected function load_synonyms($language)
 {
     $query = "SELECT * FROM `%t%` WHERE `Language`='" . nc_search_util::db_escape($language) . "'";
     self::$lists[$language] = nc_search::load('nc_search_language_synonyms', $query);
 }
Beispiel #26
0
            foreach ($description["included"] as $item) {
                $hint .= "<div class='item'>" . NETCAT_MODULE_SEARCH_ADMIN_BULLET . " {$item}</div>\n";
            }
            $hint .= "</div>";
        }
        if ($description["excluded"]) {
            $hint .= "<div class='header'><strong>" . NETCAT_MODULE_SEARCH_ADMIN_QUERY_AREA_EXCLUDED . "</strong>:</div><div class='list'>";
            foreach ($description["excluded"] as $item) {
                $hint .= "<div class='item'>" . NETCAT_MODULE_SEARCH_ADMIN_BULLET . " {$item}</div>\n";
            }
            $hint .= "</div>";
        }
        $area_cell = "<td class='area_hint'><div>" . $area->to_string() . "</div>" . "<div class='inline_help area_description'>{$hint}</div>" . "</td>";
    }
    $search_link = nc_search::get_object()->get_search_url($row['Catalogue_ID'], true) . "?nologging=1&amp;search_query=" . rawurlencode($row['QueryString']) . "&amp;area=" . rawurlencode($row['Area']);
    echo "<tr class='nc-text-center'>", "<td>", nc_search_util::format_time($row['Timestamp']), "</td>", $area_cell, "<td><a href='{$search_link}' target='_blank' title='", htmlspecialchars(NETCAT_MODULE_SEARCH_ADMIN_QUERY_OPEN_RESULTS_LINK_HINT), "'>", $row['ResultsCount'], "</a></td>", "<td>", $row['User_ID'] ? $this->hash_link("#user.edit({$row['User_ID']})", $row["Login"]) : "&nbsp;", "</td>", "<td>", long2ip($row['IP']), "</td>", "</tr>\n";
}
$ui = $this->get_ui();
$ui->actionButtons[] = array("id" => "prev_page", "caption" => NETCAT_MODULE_SEARCH_ADMIN_QUERY_BACK_TO_LIST, "location" => "#module.search.queries", "align" => "left");
$page_link = $this->make_page_query(array('offset'), true);
if ($offset > 0) {
    $prev_page = $page_link . "&amp;offset=" . ($offset - $per_page);
    $ui->actionButtons[] = array("id" => "prev_page", "caption" => NETCAT_MODULE_SEARCH_ADMIN_QUERY_PREV_PAGE, "action" => "mainView.loadIframe('{$prev_page}')", "align" => "left");
}
if ($found_rows > $offset + $per_page) {
    $next_page = $page_link . "&amp;offset=" . ($offset + $per_page);
    $ui->actionButtons[] = array("id" => "next_page", "caption" => NETCAT_MODULE_SEARCH_ADMIN_QUERY_NEXT_PAGE, "action" => "mainView.loadIframe('{$next_page}')");
}
?>
<script type='text/javascript'>
$nc('td.area_hint').hover(
Beispiel #27
0
 /**
  * Вспомогательный метод для составления запроса из нескольких полей
  * (поле «исключить страницы, на которых встречаются слова...»)
  */
 public function make_query_string($query, $exclude = false)
 {
     $query = (string) $query;
     if ($exclude && ($exclude = trim((string) $exclude))) {
         $not_op = nc_search_util::is_boolean_query($query) || nc_search_util::is_boolean_query($exclude) ? "AND NOT " : "-";
         if (strpos($exclude, " ")) {
             $exclude = "({$exclude})";
         }
         if (strpos($query, " ")) {
             $query = "({$query})";
         }
         $query = "{$query} {$not_op}{$exclude}";
     }
     return $query;
 }
Beispiel #28
0
 /**
  * @param string $input
  * @param string $language
  * @param integer $site_id
  * @return array
  */
 public function suggest_titles($input, $language, $site_id)
 {
     $suggestions = array();
     // собственно подсказки
     $titles = array();
     $limit = nc_search::get_setting('NumberOfSuggestions');
     // поиск в индексе (то есть будут варианты после обработки фильтрами - базовая форма)
     if (nc_search::should('SearchTitleBaseformsForSuggestions')) {
         $last_space = strrpos($input, " ");
         $as_phrase = nc_search::should('SearchTitleAsPhraseForSuggestions');
         $b1 = $as_phrase ? '"' : '(';
         $b2 = $as_phrase ? '"' : ')';
         /* @todo сделать проверку на то, что последнее слово является правильным/полным? */
         $query_string = "(title:{$b1}{$input}{$b2}" . ($last_space ? " OR title:{$b1}" . trim(substr($input, 0, $last_space)) . $b2 : '') . ") AND site_id:{$site_id}";
         $query = new nc_search_query($query_string);
         $query->set('limit', $limit)->set('options_to_fetch', array('title', 'site_id', 'path'))->set('language', $language);
         $documents = $this->find($query, false);
         foreach ($documents as $doc) {
             $suggestions[] = array("label" => $doc->get('title'), "url" => $doc->get('url'));
             $titles[] = '"' . nc_search_util::db_escape($doc->get('title')) . '"';
         }
         $titles = array_unique($titles);
     }
     // поиск точного соответствия в таблице с документами
     // по-хорошему следовало бы сначала сделать запрос к БД, а потом к индексу, однако
     // в случае запроса к индексу не получится так же просто отфильтровать уже совпавшие запросы
     $query = "SELECT `Catalogue_ID`, `Path`, `Title` FROM `%t%` " . ' WHERE `Title` LIKE "' . nc_search_util::db_escape($input) . '%" ' . ($titles ? " AND `Title` NOT IN (" . join(", ", $titles) . ") " : "") . " ORDER BY `Title` " . " LIMIT {$limit}";
     $documents = new nc_search_result();
     $documents->select_from_database($query);
     foreach ($documents as $doc) {
         array_unshift($suggestions, array("label" => $doc->get('title'), "url" => $doc->get('url')));
     }
     $suggestions = array_slice($suggestions, 0, $limit);
     return $suggestions;
 }
Beispiel #29
0
 /**
  *
  */
 public function get_site_name()
 {
     $nc_core = nc_Core::get_object();
     $cat = $nc_core->catalogue;
     $site_id = $this->get('site_id');
     try {
         $site_name = $cat->get_by_id($site_id, "Catalogue_Name");
         $domain = $cat->get_by_id($site_id, "Domain");
         if ($domain) {
             $domain_decode = nc_search_util::decode_host($domain);
             if (!$nc_core->NC_UNICODE) {
                 $domain_decode = $nc_core->utf8->utf2win($domain_decode);
             }
             $site_name .= " (" . $domain_decode . ")";
         }
     } catch (Exception $e) {
         $site_name = sprintf(NETCAT_MODULE_SEARCH_ADMIN_RULE_NONEXISTENT_SITE, $site_id);
     }
     return $site_name;
 }
Beispiel #30
0
 /**
  *
  * @return Net_IDNA2
  */
 protected static function get_idn_converter()
 {
     if (!self::$idn) {
         require_once 'Net/IDNA2.php';
         // netcat/require/lib
         self::$idn = new Net_IDNA2();
     }
     return self::$idn;
 }