예제 #1
0
파일: manager.php 프로젝트: Blu2z/implsk
 /**
  * @return nc_search_persistent_data_collection
  */
 protected static function get_all_extensions()
 {
     if (!self::$all_extensions) {
         self::$all_extensions = nc_search::load('nc_search_extension_rule', 'SELECT * FROM `%t%` WHERE `Checked` = 1 ORDER BY `Priority`');
     }
     return self::$all_extensions;
 }
예제 #2
0
파일: case.php 프로젝트: Blu2z/implsk
 public function filter(array $terms)
 {
     for ($i = 0, $max = sizeof($terms); $i < $max; $i++) {
         $terms[$i] = mb_convert_case($terms[$i], nc_search::get_setting('FilterStringCase'), 'UTF-8');
     }
     return $terms;
 }
예제 #3
0
파일: html.php 프로젝트: Blu2z/implsk
 public function __construct($level = null)
 {
     if (!$level) {
         $level = nc_search::get_setting('LogLevel') | nc_search::LOG_ERROR | nc_search::LOG_CRAWLER_REQUEST | nc_search::LOG_INDEXING_BEGIN_END;
     }
     $this->level = $level;
 }
예제 #4
0
파일: console.php 프로젝트: Blu2z/implsk
 /**
  *
  * @param nc_search_indexer $indexer
  * @throws nc_search_exception
  * @return boolean true when task is finished
  */
 public function loop(nc_search_indexer $indexer)
 {
     $cycle_number = 0;
     $delay = (int) nc_search::get_setting('CrawlerDelay');
     while (true) {
         // сохранять задачу каждые X циклов
         if ($cycle_number % nc_search::get_setting('IndexerSaveTaskEveryNthCycle') == 0) {
             $indexer->save_task();
         }
         switch ($indexer->next()) {
             case nc_search_indexer::TASK_FINISHED:
                 return true;
                 // we're done
             // we're done
             case nc_search_indexer::TASK_STEP_FINISHED:
                 $delay && sleep($delay);
                 break;
             case nc_search_indexer::TASK_STEP_SKIPPED:
                 break;
             default:
                 throw new nc_search_exception("Incorrect return value from nc_search_indexer::next()");
         }
         $cycle_number++;
     }
 }
예제 #5
0
파일: morphy.php 프로젝트: Blu2z/implsk
 /**
  * @throws nc_search_exception
  * @return phpMorphy
  */
 protected function get_morphy()
 {
     $language = $this->context->get('language');
     $language = $language . "_" . $language;
     // phpMorphy requires "ru_ru", "en_en"
     $this->language = $language;
     if (!isset(self::$instances[$language])) {
         if (!class_exists('phpMorphy', false)) {
             nc_search::load_3rdparty_script("phpmorphy/src/common.php");
         }
         if (nc_search::should('PhpMorphy_LoadDictsDuringIndexing') && $this->context->get('action') == 'indexing') {
             $storage = PHPMORPHY_STORAGE_MEM;
         } else {
             $storage = PHPMORPHY_STORAGE_FILE;
         }
         $options = array('storage' => $storage, 'predict_by_suffix' => true, 'predict_by_db' => true);
         // Path to directory where dictionaries are located
         $dict_path = nc_search::get_3rdparty_path() . '/phpmorphy/dicts';
         try {
             self::$instances[$language] = new phpMorphy($dict_path, $language, $options);
         } catch (phpMorphy_Exception $e) {
             throw new nc_search_exception("Error occurred while creating phpMorphy instance: {$e->getMessage()}");
         }
     }
     return self::$instances[$language];
 }
예제 #6
0
파일: query.php 프로젝트: Blu2z/implsk
 /**
  *
  * @param string $query
  * @return string
  */
 protected function escape_special_characters($query)
 {
     foreach ($this->escape_patterns as $allow_feature => $pattern) {
         if (!nc_search::should($allow_feature)) {
             $query = preg_replace($pattern, '$1', $query);
         }
     }
     return $query;
 }
예제 #7
0
파일: minlength.php 프로젝트: Blu2z/implsk
 public function filter(array $terms)
 {
     $min_length = nc_search::get_setting('MinWordLength');
     if ($min_length < 2) {
         return $terms;
     }
     $result = array();
     for ($i = 0, $max = sizeof($terms); $i < $max; $i++) {
         if (mb_strlen($terms[$i], 'UTF-8') >= $min_length) {
             $result[] = $terms[$i];
         }
     }
     return $result;
 }
예제 #8
0
파일: crawler.php 프로젝트: Blu2z/implsk
 protected function request($method, $url)
 {
     try {
         $this->http_client->{$method}($url);
         $response = $this->http_client->currentResponse();
     } catch (nc_search_indexer_crawler_exception $e) {
         // слушателю не понравились заголовки
         $response = $e->get_response();
     }
     $response["url"] = $url;
     $response = new nc_search_indexer_crawler_response($response);
     //    if (nc_search::will_log(nc_search::LOG_CRAWLER_REQUEST)) {
     $len = $response->get_content_length();
     nc_search::log(nc_search::LOG_CRAWLER_REQUEST, strtoupper($method) . " {$url}\n" . "Response: {$response->get_code()}\n" . ($response->get_code() < 400 ? "Content-Type: " . $response->get_content_type() . "\n" . "Content-Length: " . (is_null($len) ? "no" : $len) . ", received: " . $response->get_body_length() . " bytes" : ''));
     //    }
     return $response;
 }
예제 #9
0
파일: scheduler.php 프로젝트: Blu2z/implsk
 /**
  * Выполнить первую задачу из очереди
  * @param int $indexer_strategy
  * @return bool|null
  */
 public static function run($indexer_strategy = nc_search::INDEXING_NC_CRON)
 {
     $provider = nc_search::get_provider();
     if ($provider->is_reindexing()) {
         nc_search::log(nc_search::LOG_SCHEDULER_START, "Scheduler: indexing in progress");
         return false;
     }
     $intent = nc_search::load('nc_search_scheduler_intent', 'SELECT * FROM `%t%`' . ' WHERE `StartTime` <= ' . time() . ' ORDER BY `StartTime` ASC LIMIT 1')->first();
     if (!$intent) {
         nc_search::log(nc_search::LOG_SCHEDULER_START, "Scheduler: no scheduler intents to process now");
         return false;
     }
     if (nc_search::will_log(nc_search::LOG_SCHEDULER_START)) {
         nc_search::log(nc_search::LOG_SCHEDULER_START, "Scheduler started (planned start time: " . strftime("%Y-%m-%d %H:%M:%S", $intent->get('start_time')) . "; area: '" . preg_replace("/\\s+/u", " ", $intent->get('area_string')) . "')");
     }
     // информация принята к сведению и больше не нужна
     $intent->delete();
     // запуск индексации
     $provider->index_area($intent->get('area_string'), $indexer_strategy);
 }
예제 #10
0
파일: quotes.php 프로젝트: Blu2z/implsk
 /**
  * Пытается убрать кавычки из запроса
  * @param nc_search_language_corrector_phrase $phrase
  * @return boolean
  */
 public function correct(nc_search_language_corrector_phrase $phrase)
 {
     if (!nc_search::should('RemovePhrasesOnEmptyResult')) {
         return false;
     }
     $orignal_phrase_text = $phrase_text = $phrase->to_string();
     if (strpos($phrase_text, '"') !== false && !preg_match('/"\\S+"/u', $phrase_text)) {
         $phrase_text = preg_replace('/"~[\\d\\.]+/', '"', $phrase_text);
         // remove distance search
         if (nc_search_util::is_boolean_query($phrase_text) || preg_match('/[-+]/', $phrase_text)) {
             // there is a a phrase with several words!
             $phrase_text = preg_replace('/"(\\S)/u', "(\$1", $phrase_text);
             $phrase_text = str_replace('"', ")", $phrase_text);
         } else {
             $phrase_text = str_replace('"', "", $phrase_text);
         }
         $message = sprintf(NETCAT_MODULE_SEARCH_CORRECTION_QUOTES, $orignal_phrase_text, $phrase_text);
         $phrase->set_phrase($phrase_text, $message);
         return true;
     }
     return false;
 }
예제 #11
0
파일: synonyms.php 프로젝트: Blu2z/implsk
 /**
  * Перед сохранением нужно прогнать список слов через фильтры
  */
 public function save()
 {
     $mb_case = nc_search::get_setting('FilterStringCase');
     $apply_filter = !$this->get('dont_filter');
     $list = array();
     foreach ($this->get('words') as $word) {
         $word = trim($word);
         if (strlen($word)) {
             // пропустить пустые значения
             // преобразовать регистр, если в дальнейшем не будут применены фильтры
             $list[] = $apply_filter ? $word : mb_convert_case($word, $mb_case);
         }
     }
     if ($apply_filter) {
         $context = new nc_search_context(array('language' => $this->get('language')));
         $list = nc_search_extension_manager::get('nc_search_language_filter', $context)->until_first('nc_search_language_filter_synonyms')->apply('filter', $list);
     }
     if (sizeof($list) < 2) {
         throw new nc_search_data_exception(NETCAT_MODULE_SEARCH_ADMIN_SYNONYM_LIST_MUST_HAVE_AT_LEAST_TWO_WORDS);
     }
     $this->set('words', $list);
     parent::save();
 }
예제 #12
0
파일: stopwords.php 프로젝트: Blu2z/implsk
 /**
  * @param array $terms
  * @return array
  */
 public function filter(array $terms)
 {
     if (!nc_search::should('RemoveStopwords')) {
         return $terms;
     }
     $language = $this->context->get('language');
     if (!isset(self::$lists[$language])) {
         $query = "SELECT * FROM `%t%` WHERE `Language`='" . nc_search_util::db_escape($language) . "'";
         self::$lists[$language] = nc_search::load('nc_search_language_stopword', $query, 'word');
     }
     $stop_list = self::$lists[$language];
     if (!count($stop_list)) {
         return $terms;
     }
     $result = array();
     foreach ($terms as $term) {
         if (is_array($term)) {
             // alternative forms
             foreach ($term as $i => $t) {
                 if ($stop_list->has_key($t)) {
                     unset($term[$i]);
                 }
             }
             $terms_left = count($term);
             if ($terms_left == 1) {
                 $result[] = $term[0];
             } elseif ($terms_left > 1) {
                 $result[] = $term;
             }
         } elseif (!$stop_list->has_key($term)) {
             // ordinary term
             $result[] = $term;
         }
     }
     return $result;
 }
예제 #13
0
파일: manager.php 프로젝트: Blu2z/implsk
 /**
  * Получить абсолютный URL
  * @param string $href
  * @param string $referrer
  * @return string
  * @throws nc_search_exception
  */
 protected function resolve_link($href, $referrer = null)
 {
     $referrer_parts = $this->parse_utf8_url($referrer);
     // Абсолютная ссылка без указания протокола — неправильно обрабатывается
     // функцией parse_url() до PHP 5.4.7
     if (substr($href, 0, 2) == "//") {
         $scheme = isset($referrer_parts['scheme']) ? $referrer_parts['scheme'] : 'http';
         $href = "{$scheme}:{$href}";
     }
     $href_parts = $this->parse_utf8_url($href);
     if (!is_array($href_parts)) {
         $href_parts = array();
     }
     // $href == "#"
     $result_parts = $href_parts;
     if (!isset($href_parts["host"])) {
         // path with no host name
         if ($referrer == 'http:///') {
             return false;
         }
         if ($referrer == 'http://') {
             return false;
         }
         if (!$referrer_parts || !isset($referrer_parts["host"])) {
             throw new nc_search_exception("Cannot resolve full URL: '{$href}' (no referrer)");
         }
         foreach (array("scheme", "host", "port", "path") as $p) {
             if (isset($referrer_parts[$p]) && !isset($href_parts[$p])) {
                 $result_parts[$p] = $referrer_parts[$p];
             }
         }
         if ($result_parts["path"][0] != "/") {
             // relative path
             $referrer_dir = substr($referrer_parts["path"], -1) == '/' ? $referrer_parts["path"] : dirname($referrer_parts["path"]) . "/";
             $result_parts["path"] = $referrer_dir . $result_parts["path"];
         }
     }
     // end of "path with no host name"
     // "http://mysite.org" → "http://mysite.org/"
     if (!isset($result_parts["path"])) {
         $result_parts["path"] = "/";
     }
     // get rid of "./", "../"
     if (strpos($result_parts["path"], "./") !== false) {
         $path_fragments = array();
         foreach (explode("/", $result_parts["path"]) as $part) {
             if ($part == '.' || $part == '') {
                 continue;
             }
             if ($part == '..') {
                 array_pop($path_fragments);
             } else {
                 $path_fragments[] = $part;
             }
         }
         $path = join("/", $path_fragments);
         if (substr($href_parts["path"], -1) == '/') {
             $path .= "/";
         }
         if ($path[0] != '/') {
             $path = "/{$path}";
         }
         $result_parts["path"] = $path;
     }
     // Производится сортировка параметров для того, чтобы не запрашивать страницу
     // дважды, если в ссылках на неё параметры перечислены в разном порядке, например:
     // /sub/?tag=22&curPos=10 и /sub/?curPos=10&tag=22 будут считаться одной страницей
     // Параметр модуля: IndexerNormalizeLinks
     if (isset($result_parts["query"]) && strpos($result_parts["query"], "&") && nc_search::should('IndexerNormalizeLinks')) {
         $params = explode("&", $result_parts["query"]);
         sort($params);
         $result_parts["query"] = join("&", $params);
     }
     // IDN & non-latin paths
     $result_parts["host"] = nc_search_util::encode_host($result_parts["host"]);
     $result_parts["path"] = nc_search_util::encode_path($result_parts["path"]);
     // MySite.ORG == mysite.org
     $result_parts["host"] = strtolower($result_parts["host"]);
     $full_url = strtolower($result_parts["scheme"]) . "://" . $result_parts["host"] . (isset($result_parts["port"]) ? ":{$result_parts['port']}" : "") . $result_parts["path"] . (isset($result_parts["query"]) ? "?{$result_parts['query']}" : "");
     return $full_url;
 }
예제 #14
0
require_once $ROOT_FOLDER . "connect_io.php";
$nc_core = nc_Core::get_object();
$nc_core->modules->load_env('ru');
$lang = $nc_core->lang->detect_lang();
require_once $ADMIN_FOLDER . "lang/" . $lang . ".php";
error_reporting(E_PARSE | E_ERROR | E_WARNING | E_USER_ERROR | E_USER_WARNING);
// замедление работы при необходимости
$delay = trim(nc_search::get_setting('IndexerConsoleSlowdownDelay'));
// секунды
if ($delay) {
    define('NC_SEARCH_INDEXER_DELAY_VALUE', (int) ($delay * 1000000));
    // микросекунды
    function nc_search_indexer_delay()
    {
        usleep(NC_SEARCH_INDEXER_DELAY_VALUE);
    }
    register_tick_function('nc_search_indexer_delay');
    declare (ticks=10000);
}
while (@ob_end_flush()) {
}
nc_search::register_logger(new nc_search_logger_plaintext(nc_search::LOG_CONSOLE));
$remove_hung_tasks = !nc_search::should('IndexerConsoleRestartHungTasks');
$current_task = nc_search_indexer::get_current_task($remove_hung_tasks);
$continue = $current_task instanceof nc_search_indexer_task && $current_task->get('runner_type') == nc_search::INDEXING_CONSOLE_BATCH && ($current_task->get('is_idle') || nc_search::should('IndexerConsoleRestartHungTasks') && time() > $current_task->get('last_activity') + nc_search::get_setting("IndexerRemoveIdleTasksAfter"));
if ($continue) {
    $indexer = new nc_search_indexer();
    $indexer->resume($current_task, new nc_search_indexer_runner_batch());
} else {
    nc_search_scheduler::run(nc_search::INDEXING_CONSOLE_BATCH);
}
예제 #15
0
파일: sitemap.php 프로젝트: Blu2z/implsk
header("Content-type: text/xml");
//$NETCAT_FOLDER = realpath("../../../");
$NETCAT_FOLDER = join(strstr(__FILE__, "/") ? "/" : "\\", array_slice(preg_split("/[\\/\\\\]+/", __FILE__), 0, -4)) . (strstr(__FILE__, "/") ? "/" : "\\");
require_once "{$NETCAT_FOLDER}/vars.inc.php";
//require ($INCLUDE_FOLDER."index.php");
require $ROOT_FOLDER . "connect_io.php";
$nc_core->modules->load_env();
print '<?xml version="1.0" encoding="UTF-8"?>';
// bark before the cat tries to meow
$scheme = isset($_SERVER['HTTPS']) && $_SERVER['HTTPS'] != 'off' ? 'https' : 'http';
$url_prefix = "{$scheme}://{$_SERVER['HTTP_HOST']}";
$site = $nc_core->catalogue->get_by_host_name($_SERVER['HTTP_HOST']);
// never trust a cat
$site_id = $site['Catalogue_ID'];
$start = $nc_core->input->fetch_get("start");
$max_num_urls = nc_search::get_setting('NumberOfEntriesPerSitemap');
if (!strlen($start)) {
    // если результатов слишком много, выдать sitemapindex
    $num_urls = $db->get_var("SELECT COUNT(*)\n                              FROM `Search_Document` \n                             WHERE `Catalogue_ID` = {$site_id} AND `IncludeInSitemap`=1");
    if ($num_urls > $max_num_urls) {
        $url = "{$url_prefix}{$_SERVER['REQUEST_URI']}?start=";
        print '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n";
        for ($i = 0, $last = ceil($num_urls / $max_num_urls); $i < $last; $i++) {
            print "<sitemap><loc>" . $url . $i * $max_num_urls . "</loc></sitemap>\n";
        }
        print "</sitemapindex>\n";
        die;
    }
}
$start = (int) $start;
$entries = $db->get_results("SELECT `Path`, \n                                    `SitemapChangefreq`, \n                                    `SitemapPriority`,\n                                    DATE_FORMAT(`LastModified`, '%Y-%m-%dT%T') AS `LastModified`\n                               FROM `Search_Document`\n                              WHERE `Catalogue_ID` = {$site_id} AND `IncludeInSitemap`=1\n                              LIMIT {$max_num_urls} OFFSET {$start}", ARRAY_A);
예제 #16
0
파일: parser.php 프로젝트: Blu2z/implsk
 /**
  *
  * @param string $query_string
  * @param boolean $is_recursive_call
  * @return nc_search_query_expression
  */
 public function parse($query_string, $is_recursive_call = false)
 {
     if (!$is_recursive_call) {
         // change string encoding to UTF-8 or ensure it's not broken if it is
         // already UTF-8
         $query_string = mb_convert_encoding($query_string, 'UTF-8', nc_Core::get_object()->NC_CHARSET);
     }
     /*
      * LEXEMES
      *
      * simple/terminal:
      *   term
      *   wildcard*
      *   wildcard?
      *
      * group (inside):
      *   (a b)   -- essentially "a AND b" or "a OR b"
      *   "a b"
      *
      * group (left and right)
      *   AND  &&
      *   OR   ||
      *   [a TO b]
      *   {a TO b}
      *
      * (implicit AND or OR)
      *
      * wrap following expression:
      *   NOT  !
      *
      * modify next expression:
      *   field_name:
      *   +
      *   -    (must be preceded with a whitespace if not at the beginning of the string)
      *
      * modify previous expression:
      *   ^2
      *   ~0.5  (for term: fuzzy search)     --- extracted with the preceding term
      *   ~2    (for phrase: proximity search)
      *
      * special rules:
      *   - terms with both letters and numbers are considered a phrase:
      *       x123y567z → phrase("x 123 y 567 z")
      *       inside quotes: "price usd50" → phrase("price usd 50")
      *   - decimal fractions are considered a phrase:
      *       0.123 → phrase("0 123")
      *       "price 0.12" → phrase("price 0 12")
      */
     $query_remainder = $query_string;
     // part of the query string that is not parsed yet
     $root = null;
     // result of the parsing
     $previous = null;
     // previous expression
     $operator = $this->default_operator;
     // joining operator ("AND", "OR")
     $previous_was_group = false;
     $next_not = $next_required = $next_excluded = false;
     // modifiers for the upcoming token
     $next_field_name = null;
     // field name modifier
     while (true) {
         $expression = null;
         $token = $this->remove_next_token($query_remainder);
         if ($token === null) {
             break;
         }
         // ----- make sense of the received token:
         if ($token == "(") {
             // start of the group?
             $expression = $this->remove_group($query_remainder);
             //may return null if parentheses are not balanced
             if ($expression) {
                 $previous_was_group = true;
             }
         } elseif ($token == '"') {
             // phrase?
             $expression = $this->remove_phrase($query_remainder);
             // may return null if not a phrase
         } elseif (($token == "[" || $token == "{") && nc_search::should('AllowRangeSearch')) {
             // can be an interval
             $expression = $this->remove_interval($query_remainder, $token);
             // may return null if not an interval
         } elseif (substr($token, -1) == ":" && nc_search::should('AllowFieldSearch')) {
             // field name!
             $next_field_name = substr($token, 0, -1);
         } elseif ($token == "+") {
             // "required" sign (not same as AND if default operator is OR)
             $next_required = true;
         } elseif ($token == "-" && !$previous || strlen($token) > 1 && trim($token) == "-") {
             // (a) "excluded" sign at the beginning of the query (not same as NOT if default operator is OR)
             // (b) "excluded" sign elsewhere (separated by the space)
             $next_excluded = true;
         } elseif ($token == "!" || $token == "NOT") {
             // boolean operators are case-sensitive
             $next_not = true;
             // wrap next item inside NOT
         } elseif ($token == "&&" || $token == "AND") {
             $operator = "AND";
         } elseif ($token == "||" || $token == "OR") {
             $operator = "OR";
         } elseif (strpos($token, "~") > 0 && preg_match("/^[{$this->term_chars}]+~/u", $token)) {
             // fuzzy search
             list($term, $similarity) = explode("~", $token);
             // decimal value ("0.5")
             if (nc_search::should('AllowFuzzySearch')) {
                 $expression = new nc_search_query_expression_fuzzy($term, $similarity);
             } else {
                 $expression = new nc_search_query_expression_term($term);
             }
         } elseif ($token[0] == "~" && nc_search::should('AllowProximitySearch')) {
             // phrase word distance option
             $value = substr($token, 1);
             // integer value
             if ($previous instanceof nc_search_query_expression_phrase) {
                 $previous->set_distance($value);
             }
             // no fallback, throw the token out
         } elseif ($token[0] == "^" && nc_search::should('AllowTermBoost')) {
             // term and phrase boost
             $value = substr($token, 1);
             // integer or decimal value
             if ($previous instanceof nc_search_query_expression_term || $previous instanceof nc_search_query_expression_phrase) {
                 $previous->set_boost($value);
             }
             // no fallback, just discard (complicated: decimal value can result in two terms)
         } elseif ((strpos($token, "*") || strpos($token, "?")) && nc_search::should('AllowWildcardSearch')) {
             // wildcard; can't be the first symbol
             $expression = new nc_search_query_expression_wildcard($token);
         } elseif ($this->ignore_numbers && preg_match("/\\d/", $token)) {
             // reset field flag (e.g.: <price:50 term>)
             $next_field_name = null;
         } elseif (ctype_digit($token) && preg_match("/^\\.(\\d+)\\b/", $query_remainder, $match)) {
             // special case: decimal fractions
             $fraction = $match[1];
             $query_remainder = substr($query_remainder, strlen($fraction) + 1);
             $expression = new nc_search_query_expression_phrase(array($token, $fraction));
             // TODO? можно помечать такие фразы, чтобы транслировать их в FTS-фразы, а не в REGEXP-выражения
         } elseif (preg_match("/^[{$this->term_chars}]+\$/u", $token)) {
             // special case: treat terms with both letters and numbers as a phrase
             if (preg_match("/\\d/", $token)) {
                 $parts = preg_split("/(\\d+)/", $token, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
                 $expression = sizeof($parts) == 1 ? new nc_search_query_expression_term($parts[0]) : new nc_search_query_expression_phrase($parts);
             } else {
                 $expression = new nc_search_query_expression_term($token);
             }
         } else {
             // discard unknown tokens
             continue;
         }
         // -----
         // process next token if current token didn't produce an expression
         if (!$expression) {
             continue;
         }
         // -----
         // set expression flags / options
         $expression->set_field($next_field_name)->set_required($next_required)->set_excluded($next_excluded);
         // reset flags
         $next_field_name = null;
         $next_required = $next_excluded = false;
         if ($next_not) {
             // wrap inside NOT()
             $expression = new nc_search_query_expression_not($expression);
             $next_not = false;
         }
         // store expression in the $root tree
         if ($root == null) {
             // first item
             $root = $expression;
         } else {
             // not a first item
             if ($root instanceof nc_search_query_expression_or) {
                 if ($operator == "OR") {
                     // OR+OR=OR
                     $root->add_item($expression);
                 } elseif ($previous_was_group) {
                     // (one OR two) AND three
                     $root = $this->create_boolean($operator, $root, $expression);
                 } else {
                     // replace last item in OR with an AND expression
                     // (t1 OR t2 AND t3) → OR(t1, AND(t2, t3))
                     // (t1 OR t2 AND t3 AND t4) → OR(t1, AND(t2, t3, t4))
                     $root->conjunct_last($expression);
                 }
             } elseif ($root instanceof nc_search_query_expression_and && $operator == "AND") {
                 $root->add_item($expression);
                 // AND+AND=AND
             } else {
                 // (root=AND && operator=OR) --or-- (root is not boolean)
                 // (t1 AND t2 OR t3) → OR(AND(t1, t2), t3)
                 $root = $this->create_boolean($operator, $root, $expression);
             }
             // reset flag
             $previous_was_group = false;
         }
         // reset $operator:
         $operator = $this->default_operator;
         // remember previous expression:
         $previous = $expression;
     }
     // of "while tokens are coming"
     return $root ? $root : new nc_search_query_expression_empty();
 }
예제 #17
0
파일: task.php 프로젝트: Blu2z/implsk
 /**
  *
  */
 protected function get_disallowed_areas()
 {
     $disallowed = array();
     // (1) robots.txt
     if (nc_search::should('CrawlerObeyRobotsTxt')) {
         $disallowed = $this->get_robots_txt_area_parts();
     }
     // (2) Settings (ExcludeUrlRegexps)
     $regexps = preg_split("/\\s*\n/u", nc_search::get_setting('ExcludeUrlRegexps'), -1, PREG_SPLIT_NO_EMPTY);
     foreach ($regexps as $regexp) {
         $regexp = "@" . addcslashes($regexp, "@") . "@u";
         $disallowed[] = new nc_search_area_regexp(array('regexp' => $regexp));
     }
     // done
     return new nc_search_area($disallowed);
 }
예제 #18
0
파일: synonyms.php 프로젝트: Blu2z/implsk
    die;
}
$ui = $this->get_ui();
$ui->add_lists_toolbar();
$nc_core = nc_Core::get_object();
// предупредить, если мы сохранили не то, что ввёл пользователь
$crud_record = $this->get_action_record();
if ($crud_record && !$crud_record->get('dont_filter')) {
    $input = $this->get_input('data');
    $saved_value = !$nc_core->NC_UNICODE ? $nc_core->utf8->array_utf2win($crud_record->get('words')) : $crud_record->get('words');
    if ($input['words'] != $saved_value) {
        nc_print_status(NETCAT_MODULE_SEARCH_ADMIN_SYNONYM_SAVE_RESULT, 'info', array(join(' ', $saved_value), $this->hash_href("#module.search.synonyms_edit({$crud_record->get_id()})")));
    }
}
// end of "show a notice"
$synonyms = nc_search::load('nc_search_language_synonyms', "SELECT * FROM `%t%` ORDER BY `Language`")->set_output_encoding(nc_core('NC_CHARSET'));
if (count($synonyms)) {
    // фильтр
    $language_options = array("<option value=''>" . NETCAT_MODULE_SEARCH_ADMIN_LANGUAGE_ANY_LANGUAGE . "</option>");
    foreach ($this->get_language_list() as $code => $lang) {
        if ($synonyms->first('language', $code)) {
            $language_options[] = "<option value='{$code}'>{$lang}</option>";
        }
    }
    echo "<div class='live_filter' id='synonym_filter'>", "<span class='icon'>", nc_admin_img("i_field_search_off.gif", NETCAT_MODULE_SEARCH_ADMIN_FILTER), "</span>", "<select id='filter_language'>", join("\n", $language_options), "</select>", "<input type='text' id='filter_words'>", "<span class='reset'>", "<div class='icons icon_delete' title='" . NETCAT_MODULE_SEARCH_ADMIN_FILTER_RESET . "' style='margin-top:5px'></div>", "</span>", "</div>";
    ?>

    <form method="POST" action="?view=synonyms" onsubmit="return ($nc('input:checked').size() > 0)">
        <input type="hidden" name="action" value="delete" />
        <input type="hidden" name="data_class" value="nc_search_language_synonyms" />
        <table id="synonym_table" class="nc-table nc--striped nc--hovered nc--small" width="100%">
예제 #19
0
파일: console.php 프로젝트: Blu2z/implsk
}
$NETCAT_FOLDER = realpath(dirname(__FILE__) . "/../../../../");
putenv("DOCUMENT_ROOT={$NETCAT_FOLDER}");
putenv("HTTP_HOST=localhost");
putenv("REQUEST_URI=/");
require_once "{$NETCAT_FOLDER}/vars.inc.php";
require_once $ROOT_FOLDER . "connect_io.php";
$nc_core = nc_Core::get_object();
$nc_core->modules->load_env('ru');
$lang = $nc_core->lang->detect_lang();
require_once $ADMIN_FOLDER . "lang/" . $lang . ".php";
error_reporting(E_PARSE | E_ERROR | E_WARNING | E_USER_ERROR | E_USER_WARNING);
// замедление работы при необходимости
$delay = trim(nc_search::get_setting('IndexerConsoleSlowdownDelay'));
// секунды
if ($delay) {
    define('NC_SEARCH_INDEXER_DELAY_VALUE', (int) ($delay * 1000000));
    // микросекунды
    function nc_search_indexer_delay()
    {
        usleep(NC_SEARCH_INDEXER_DELAY_VALUE);
    }
    register_tick_function('nc_search_indexer_delay');
    declare (ticks=10000);
}
while (@ob_end_flush()) {
}
// Поменяйте nc_search::LOG_CONSOLE на другое значение, если хотите получать
// больше или меньше информации о переиндексации
nc_search::register_logger(new nc_search_logger_plaintext(nc_search::LOG_CONSOLE));
nc_search_scheduler::run(nc_search::INDEXING_CONSOLE);
예제 #20
0
파일: batch.php 프로젝트: Blu2z/implsk
 protected function get_max_cycles_number()
 {
     return (int) nc_search::get_setting('IndexerConsoleDocumentsPerSession');
 }
예제 #21
0
파일: ui.php 프로젝트: Blu2z/implsk
 /**
  * Получить путь до раздела поиска на сайте с указанным идентификатором.
  * @global nc_db $db
  * @throws Exception @see nc_catalogue::get_by_id()
  * @param integer $site_id
  * @param boolean $with_host
  * @return string
  */
 public function get_search_url($site_id, $with_host = true)
 {
     if ($with_host && isset($this->paths[$site_id])) {
         return $this->paths[$site_id];
     }
     global $db, $nc_core;
     $folder_data = $db->get_row("SELECT sub.`Hidden_URL` AS `path`, sub.`Subdivision_ID` AS `id`\n               FROM `Subdivision` AS `sub`, `Sub_Class` AS `c`\n              WHERE c.`Class_ID` = " . (int) nc_search::get_setting("ComponentID") . "\n                AND c.`Subdivision_ID` = sub.`Subdivision_ID`\n                AND sub.`Catalogue_ID` = " . (int) $site_id . "\n              LIMIT 1", ARRAY_A);
     if ($folder_data) {
         if (nc_module_check_by_keyword('routing')) {
             $path = nc_routing::get_folder_path($folder_data['id']);
         } else {
             $path = $nc_core->SUB_FOLDER . $folder_data['path'];
         }
         if ($with_host) {
             $host = $nc_core->catalogue->get_by_id($site_id, 'Domain');
             if ($host) {
                 $path = "http://{$host}{$path}";
             }
         }
         $this->paths[$site_id] = $path;
     } else {
         $this->paths[$site_id] = false;
     }
     return $this->paths[$site_id];
 }
예제 #22
0
파일: index.php 프로젝트: Blu2z/implsk
 /**
  * @param string $string Text to tokenize
  * @return array
  */
 protected function tokenize_text($string)
 {
     // split words containing numbers into number+string parts
     $string = preg_replace("/(\\pL)(\\d)/u", "\$1 \$2", $string);
     $string = preg_replace("/(\\d)(\\pL)/u", "\$1 \$2", $string);
     $delimiter = nc_search::should('IgnoreNumbers') ? '/[^\\pL]+/u' : '/[^\\pL\\d]+/u';
     $max_terms = (int) nc_search::get_setting('MaxTermsPerField');
     $tokens = preg_split($delimiter, $string, $max_terms);
     return $tokens;
 }
예제 #23
0
 /**
  *
  * @param nc_search_indexer $indexer
  * @throws nc_search_exception
  * @return boolean is task finished
  */
 public function loop(nc_search_indexer $indexer)
 {
     $cycle_number = 0;
     $save_cycles = nc_search::get_setting('IndexerSaveTaskEveryNthCycle');
     while (true) {
         // stop prematurely:
         if (!$this->check_connection()) {
             $indexer->cancel();
             return true;
             // nobody listens anyway
         }
         if ($this->interrupt_if_needed($indexer, $cycle_number)) {
             return false;
         }
         // сохранять задачу каждые X циклов
         if ($cycle_number % $save_cycles == 0) {
             $indexer->save_task();
         }
         switch ($indexer->next()) {
             case nc_search_indexer::TASK_FINISHED:
                 return true;
                 // we're done
             // we're done
             case nc_search_indexer::TASK_STEP_FINISHED:
                 if ($this->delay) {
                     if ($this->interrupt_if_needed($indexer, $cycle_number)) {
                         return false;
                     }
                     sleep($this->delay);
                 }
                 break;
             case nc_search_indexer::TASK_STEP_SKIPPED:
                 break;
             default:
                 throw new nc_search_exception("Incorrect return value from nc_search_indexer::next()");
         }
         $cycle_number++;
     }
 }
예제 #24
0
파일: index.php 프로젝트: Blu2z/implsk
<?php

/* $Id: index.php 8366 2012-11-07 16:30:14Z aix $ */
/**
 * Запуск переиндексации "в реальном времени"
 */
$NETCAT_FOLDER = realpath("../../../../");
require_once "{$NETCAT_FOLDER}/vars.inc.php";
$use_gzip_compression = false;
require_once "{$ADMIN_FOLDER}/function.inc.php";
require_once "../function.inc.php";
// замедление работы при необходимости
$delay = trim(nc_search::get_setting('IndexerInBrowserSlowdownDelay'));
// секунды
if ($delay) {
    define('NC_SEARCH_INDEXER_DELAY_VALUE', (int) ($delay * 1000000));
    // микросекунды
    function nc_search_indexer_delay()
    {
        usleep(NC_SEARCH_INDEXER_DELAY_VALUE);
        print " ";
    }
    register_tick_function('nc_search_indexer_delay');
    declare (ticks=10000);
}
$input = nc_Core::get_object()->input;
$request = array('.page_title' => NETCAT_MODULE_SEARCH_ADMIN_INDEXING_TITLE, 'view' => 'indexing_on_request', 'rule_id' => $input->fetch_get('rule_id'), 'token' => $input->fetch_get('token'), 'continue' => $input->fetch_get('continue'));
nc_search_admin_controller::process_request($request);
예제 #25
0
파일: stopwords.php 프로젝트: Blu2z/implsk
<?php

if (!class_exists("nc_system")) {
    die;
}
$ui = $this->get_ui();
$ui->add_lists_toolbar();
$stopwords = nc_search::load('nc_search_language_stopword', "SELECT * FROM `%t%` ORDER BY `Language`, `Word`")->set_output_encoding(nc_core('NC_CHARSET'));
if (count($stopwords)) {
    // фильтр
    $language_options = array("<option value=''>" . NETCAT_MODULE_SEARCH_ADMIN_LANGUAGE_ANY_LANGUAGE . "</option>");
    foreach ($this->get_language_list() as $code => $lang) {
        if ($stopwords->first('language', $code)) {
            $language_options[] = "<option value='{$code}'>{$lang}</option>";
        }
    }
    echo "<div class='live_filter' id='stopword_filter'>", "<span class='icon'>", nc_admin_img("i_field_search_off.gif", NETCAT_MODULE_SEARCH_ADMIN_FILTER), "</span>", "<select id='filter_language'>", join("\n", $language_options), "</select>", "<input type='text' id='filter_word'>", "<span class='reset'>", "<div class='icons icon_delete' title='" . NETCAT_MODULE_SEARCH_ADMIN_FILTER_RESET . "' style='margin-top:5px'></div>", "</span>", "</div>";
    ?>

    <form method="POST" action="?view=stopwords" onsubmit="return ($nc('input:checked').size() > 0)">
        <input type="hidden" name="action" value="delete" />
        <input type="hidden" name="data_class" value="nc_search_language_stopword" />
        <table id="stopword_table" class="nc-table nc--striped nc--hovered nc--small" width="100%">
            <tr align="left">
                <th><?php 
    echo NETCAT_MODULE_SEARCH_ADMIN_LANGUAGE;
    ?>
</th>
                <th width="75%"><?php 
    echo NETCAT_MODULE_SEARCH_ADMIN_STOPWORD;
    ?>
예제 #26
0
파일: title.php 프로젝트: Blu2z/implsk
<?php

/**
 * Входящие параметры:
 *  - term
 *  - language
 * 
 * @global $catalogue
 */
$NETCAT_FOLDER = realpath("../../../../");
require_once "{$NETCAT_FOLDER}/vars.inc.php";
require $INCLUDE_FOLDER . "index.php";
// получение параметров
$input = trim($nc_core->input->fetch_get('term'));
if (!nc_search::should('EnableQuerySuggest') || nc_search::get_setting('SuggestMode') != 'titles' || mb_strlen($input) < nc_search::get_setting('SuggestionsMinInputLength')) {
    die("[]");
}
$input = $nc_core->utf8->conv($nc_core->NC_CHARSET, 'utf-8', $input);
$language = $nc_core->input->fetch_get('language');
if (!$language) {
    $language = $nc_core->lang->detect_lang(1);
}
// поиск подходящих заголовков is provider-dependent
$suggestions = nc_search::get_provider()->suggest_titles($input, $language, $catalogue);
if (!$nc_core->NC_UNICODE) {
    $suggestions = $nc_core->utf8->array_utf2win($suggestions);
}
print nc_array_json($suggestions);
예제 #27
0
<?php

require_once dirname(__FILE__) . "/nc_search.class.php";
nc_search::init();
예제 #28
0
파일: result.php 프로젝트: Blu2z/implsk
 /**
  *
  */
 protected function get_highlight_regexp($language)
 {
     if (!$this->highlight_regexp) {
         $query_string = $this->get_query_string();
         $context = new nc_search_context(array('language' => $language, 'action' => 'searching'));
         // Получить слова из запроса.
         // (Удалять из запроса термины с префиксом "-" и "NOT" не имеет особого смысла,
         // поскольку в результат они как правило не попадают.)
         $query_string = preg_replace("/[\\^~][\\d\\.]+/", '', $query_string);
         // операторы ^1, ~1
         preg_match_all("/[\\pL\\d\\?\\*]+/u", $query_string, $matches);
         $terms = $matches[0];
         if (strpos($query_string, "*") !== false || strpos($query_string, "?") !== false) {
             $wildcards_replacement = nc_search::should('AllowWildcardSearch') ? array("?" => ".", "*" => "[\\S]+") : array("?" => "", "*" => "");
             foreach ($terms as $i => $term) {
                 $terms[$i] = strtr($term, $wildcards_replacement);
             }
         }
         //if ( nc_Core::get_object()->NC_UNICODE ) {
         $terms = nc_search_extension_manager::get('nc_search_language_filter', $context)->except('nc_search_language_filter_stopwords')->apply('filter', $terms);
         //}
         $analyzer = nc_search_extension_manager::get('nc_search_language_analyzer', $context)->first();
         if ($analyzer) {
             $regexp = $analyzer->get_highlight_regexp($terms);
         } else {
             $regexp = nc_search_util::word_regexp("(" . join("|", $terms) . ")", "Si");
         }
         $this->highlight_regexp = $regexp;
     }
     // of "there was no 'highlight_regexp'"
     return $this->highlight_regexp;
 }
예제 #29
0
파일: indexer.php 프로젝트: Blu2z/implsk
 /**
  * Работает ли в данный момент переиндексация?
  * @param bool $remove_hung_tasks
  * @return false|nc_search_indexer_task
  */
 public static function get_current_task($remove_hung_tasks = true)
 {
     $tasks = nc_search::load_all('nc_search_indexer_task', true);
     if (!sizeof($tasks)) {
         return false;
     }
     // не подвисли ли мы?
     $task = $tasks->first();
     if ($remove_hung_tasks && time() > $task->get('last_activity') + nc_search::get_setting("IndexerRemoveIdleTasksAfter")) {
         $task->delete();
         $db = nc_Core::get_object()->db;
         $db->query("TRUNCATE TABLE `Search_Link`");
         $db->query("TRUNCATE TABLE `Search_LinkReferrer`");
         nc_search::log(nc_search::LOG_ERROR, "Indexer task was last active at " . strftime("%Y-%m-%d %H:%M:%S", (int) $task->get('last_activity')) . ". Task removed.");
         return false;
     }
     return $task;
 }
예제 #30
0
<?php

/**
 * Запускает переиндексацию по правилу в кроне в ближайшее возможное время
 */
if (!class_exists("nc_system")) {
    die;
}
while (@ob_end_clean()) {
}
// discard output
$area = $this->get_input('area');
if (!$area) {
    print "0; // no area";
    die;
}
try {
    nc_search::index_area($area, "now");
    print "1";
} catch (Exception $e) {
    print "0; /* {$e->getMessage()} */";
}
die;