예제 #1
0
파일: html.php 프로젝트: Blu2z/implsk
 public function __construct($level = null)
 {
     if (!$level) {
         $level = nc_search::get_setting('LogLevel') | nc_search::LOG_ERROR | nc_search::LOG_CRAWLER_REQUEST | nc_search::LOG_INDEXING_BEGIN_END;
     }
     $this->level = $level;
 }
예제 #2
0
파일: case.php 프로젝트: Blu2z/implsk
 public function filter(array $terms)
 {
     for ($i = 0, $max = sizeof($terms); $i < $max; $i++) {
         $terms[$i] = mb_convert_case($terms[$i], nc_search::get_setting('FilterStringCase'), 'UTF-8');
     }
     return $terms;
 }
예제 #3
0
파일: console.php 프로젝트: Blu2z/implsk
 /**
  *
  * @param nc_search_indexer $indexer
  * @throws nc_search_exception
  * @return boolean true when task is finished
  */
 public function loop(nc_search_indexer $indexer)
 {
     $cycle_number = 0;
     $delay = (int) nc_search::get_setting('CrawlerDelay');
     while (true) {
         // сохранять задачу каждые X циклов
         if ($cycle_number % nc_search::get_setting('IndexerSaveTaskEveryNthCycle') == 0) {
             $indexer->save_task();
         }
         switch ($indexer->next()) {
             case nc_search_indexer::TASK_FINISHED:
                 return true;
                 // we're done
             // we're done
             case nc_search_indexer::TASK_STEP_FINISHED:
                 $delay && sleep($delay);
                 break;
             case nc_search_indexer::TASK_STEP_SKIPPED:
                 break;
             default:
                 throw new nc_search_exception("Incorrect return value from nc_search_indexer::next()");
         }
         $cycle_number++;
     }
 }
예제 #4
0
파일: html.php 프로젝트: Blu2z/implsk
 /**
  * Если есть мета-тэг robots или с именем бота и значенем 'noindex', 
  * документ не индексируется (внимание: атрибуты case-sensitive)
  * 
  * @return boolean
  */
 public function should_index()
 {
     if (!nc_search::should('ObeyMetaNoindex')) {
         return true;
     }
     $xpath_query = '//meta[((@name="robots") or (@name="' . nc_search::get_setting('CrawlerUserAgent') . '")) and (contains(@content, "noindex"))]';
     return $this->xpath($xpath_query)->length == 0;
 }
예제 #5
0
파일: analyzer.php 프로젝트: Blu2z/implsk
 /**
  *
  */
 public function __construct()
 {
     // skip numbers?
     $this->ignore_numbers = nc_search::should('IgnoreNumbers');
     $this->tokenizer_regexp = $this->ignore_numbers ? $this->regexp_alpha : $this->regexp_alnum;
     // max terms
     $max_terms = nc_search::get_setting('MaxTermsPerField');
     if ($max_terms > 0) {
         $this->max_terms = $max_terms + 2;
         $this->max_chunks = $max_terms * 2 + 2;
     }
 }
예제 #6
0
파일: minlength.php 프로젝트: Blu2z/implsk
 public function filter(array $terms)
 {
     $min_length = nc_search::get_setting('MinWordLength');
     if ($min_length < 2) {
         return $terms;
     }
     $result = array();
     for ($i = 0, $max = sizeof($terms); $i < $max; $i++) {
         if (mb_strlen($terms[$i], 'UTF-8') >= $min_length) {
             $result[] = $terms[$i];
         }
     }
     return $result;
 }
예제 #7
0
파일: parser.php 프로젝트: Blu2z/implsk
 /**
  * @param string $default_operator  "AND"|"OR" (case-sensitive); defaults
  *   to the 'DefaultBooleanOperator' setting.
  * @param bool $ignore_numbers    defaults to 'IgnoreNumbers'
  */
 public function __construct($default_operator = null, $ignore_numbers = null)
 {
     if (!$default_operator || $default_operator != "AND" && $default_operator != "OR") {
         $default_operator = nc_search::get_setting("DefaultBooleanOperator");
     }
     $this->default_operator = $default_operator;
     if ($ignore_numbers === null) {
         $ignore_numbers = nc_search::should('IgnoreNumbers');
     }
     if ($ignore_numbers) {
         $this->term_chars = "\\pL";
     }
     $this->ignore_numbers = $ignore_numbers;
 }
예제 #8
0
파일: scheduler.php 프로젝트: Blu2z/implsk
 /**
  * Запланировать запуск переиндексирования области или правила в указанное время
  * @param string $area_string
  * @param integer $timestamp
  */
 public static function schedule_indexing($area_string, $timestamp)
 {
     // Если данная область уже поставлена в очередь на более раннее или ближайшее
     // время, не нужно добавлять ещё раз
     $interval = $timestamp + nc_search::get_setting('MinScheduleInterval');
     $intent = nc_search::load('nc_search_scheduler_intent', "SELECT * FROM `%t%`" . " WHERE `StartTime` <= {$interval}" . "   AND `AreaString` = '" . nc_search_util::db_escape($area_string) . "'")->first();
     // type is ignored
     if ($intent) {
         // уже есть такое расписание!
         if ($intent->get('start_time') > $timestamp) {
             $intent->set('start_time', $timestamp);
             // let's run it sooner
         }
     } else {
         $intent = new nc_search_scheduler_intent(array('start_time' => $timestamp, 'type' => nc_search_scheduler_intent::ON_REQUEST, 'area_string' => $area_string));
     }
     $intent->save();
 }
예제 #9
0
파일: crawler.php 프로젝트: Blu2z/implsk
 public function __construct()
 {
     require_once 'HTTP/Client.php';
     // /netcat/require/lib
     require_once 'HTTP/Request/Listener.php';
     $headers = array('User-Agent' => nc_search::get_setting('CrawlerUserAgent'));
     $this->http_client = new HTTP_Client(null, $headers);
     $this->http_client->enableHistory(false);
     $this->http_client->setMaxRedirects(nc_search::get_setting('CrawlerMaxRedirects'));
     $max_doc_size = nc_search::get_setting('CrawlerMaxDocumentSize');
     $db = nc_Core::get_object()->db;
     $accepted_mime_types = $db->get_col("SELECT DISTINCT `ContentType`\n                                               FROM `Search_Extension`\n                                              WHERE `ExtensionInterface` = 'nc_search_document_parser'");
     //        $accepted_mime_types = nc_search::load_all('nc_search_extension_rule')
     //                        ->where('extension_interface', 'nc_search_document_parser')
     //                        ->each('get', 'content_type');
     //        $accepted_mime_types = array_unique($accepted_mime_types);
     $listener = new nc_search_indexer_crawler_listener($max_doc_size, $accepted_mime_types);
     $this->http_client->attach($listener, true);
 }
예제 #10
0
파일: synonyms.php 프로젝트: Blu2z/implsk
 /**
  * Перед сохранением нужно прогнать список слов через фильтры
  */
 public function save()
 {
     $mb_case = nc_search::get_setting('FilterStringCase');
     $apply_filter = !$this->get('dont_filter');
     $list = array();
     foreach ($this->get('words') as $word) {
         $word = trim($word);
         if (strlen($word)) {
             // пропустить пустые значения
             // преобразовать регистр, если в дальнейшем не будут применены фильтры
             $list[] = $apply_filter ? $word : mb_convert_case($word, $mb_case);
         }
     }
     if ($apply_filter) {
         $context = new nc_search_context(array('language' => $this->get('language')));
         $list = nc_search_extension_manager::get('nc_search_language_filter', $context)->until_first('nc_search_language_filter_synonyms')->apply('filter', $list);
     }
     if (sizeof($list) < 2) {
         throw new nc_search_data_exception(NETCAT_MODULE_SEARCH_ADMIN_SYNONYM_LIST_MUST_HAVE_AT_LEAST_TWO_WORDS);
     }
     $this->set('words', $list);
     parent::save();
 }
예제 #11
0
파일: result.php 프로젝트: Blu2z/implsk
 /**
  * Установить свойство 'context' документа (фрагменты совпавшего с запросом текста)
  * (не имеет отношения к nc_search_context)
  */
 protected function document_set_context(nc_search_result_document $doc)
 {
     // служба поиска (напр. Гугль какой-нибудь), теоретически, может установить свойство context
     if (!$doc->get('context') && $this->should_highlight()) {
         $language = $this->get_query()->get('language');
         $doc->set('context', $this->highlight($doc->get('content'), $language, nc_search::get_setting('ResultContextMaxNumberOfWords')));
         $doc->set('title', $this->highlight($doc->get('title'), $language, nc_search::get_setting('ResultTitleMaxNumberOfWords')));
     }
     return $doc;
 }
예제 #12
0
파일: title.php 프로젝트: Blu2z/implsk
<?php

/**
 * Входящие параметры:
 *  - term
 *  - language
 * 
 * @global $catalogue
 */
$NETCAT_FOLDER = realpath("../../../../");
require_once "{$NETCAT_FOLDER}/vars.inc.php";
require $INCLUDE_FOLDER . "index.php";
// получение параметров
$input = trim($nc_core->input->fetch_get('term'));
if (!nc_search::should('EnableQuerySuggest') || nc_search::get_setting('SuggestMode') != 'titles' || mb_strlen($input) < nc_search::get_setting('SuggestionsMinInputLength')) {
    die("[]");
}
$input = $nc_core->utf8->conv($nc_core->NC_CHARSET, 'utf-8', $input);
$language = $nc_core->input->fetch_get('language');
if (!$language) {
    $language = $nc_core->lang->detect_lang(1);
}
// поиск подходящих заголовков is provider-dependent
$suggestions = nc_search::get_provider()->suggest_titles($input, $language, $catalogue);
if (!$nc_core->NC_UNICODE) {
    $suggestions = $nc_core->utf8->array_utf2win($suggestions);
}
print nc_array_json($suggestions);
예제 #13
0
파일: index.php 프로젝트: Blu2z/implsk
<?php

/* $Id: index.php 8366 2012-11-07 16:30:14Z aix $ */
/**
 * Запуск переиндексации "в реальном времени"
 */
$NETCAT_FOLDER = realpath("../../../../");
require_once "{$NETCAT_FOLDER}/vars.inc.php";
$use_gzip_compression = false;
require_once "{$ADMIN_FOLDER}/function.inc.php";
require_once "../function.inc.php";
// замедление работы при необходимости
$delay = trim(nc_search::get_setting('IndexerInBrowserSlowdownDelay'));
// секунды
if ($delay) {
    define('NC_SEARCH_INDEXER_DELAY_VALUE', (int) ($delay * 1000000));
    // микросекунды
    function nc_search_indexer_delay()
    {
        usleep(NC_SEARCH_INDEXER_DELAY_VALUE);
        print " ";
    }
    register_tick_function('nc_search_indexer_delay');
    declare (ticks=10000);
}
$input = nc_Core::get_object()->input;
$request = array('.page_title' => NETCAT_MODULE_SEARCH_ADMIN_INDEXING_TITLE, 'view' => 'indexing_on_request', 'rule_id' => $input->fetch_get('rule_id'), 'token' => $input->fetch_get('token'), 'continue' => $input->fetch_get('continue'));
nc_search_admin_controller::process_request($request);
예제 #14
0
파일: index.php 프로젝트: Blu2z/implsk
 /**
  * @param string $string Text to tokenize
  * @return array
  */
 protected function tokenize_text($string)
 {
     // split words containing numbers into number+string parts
     $string = preg_replace("/(\\pL)(\\d)/u", "\$1 \$2", $string);
     $string = preg_replace("/(\\d)(\\pL)/u", "\$1 \$2", $string);
     $delimiter = nc_search::should('IgnoreNumbers') ? '/[^\\pL]+/u' : '/[^\\pL\\d]+/u';
     $max_terms = (int) nc_search::get_setting('MaxTermsPerField');
     $tokens = preg_split($delimiter, $string, $max_terms);
     return $tokens;
 }
예제 #15
0
파일: console.php 프로젝트: Blu2z/implsk
if (isset($_SERVER['REMOTE_ADDR'])) {
    die("Access denied.");
}
$NETCAT_FOLDER = realpath(dirname(__FILE__) . "/../../../../");
putenv("DOCUMENT_ROOT={$NETCAT_FOLDER}");
putenv("HTTP_HOST=localhost");
putenv("REQUEST_URI=/");
require_once "{$NETCAT_FOLDER}/vars.inc.php";
require_once $ROOT_FOLDER . "connect_io.php";
$nc_core = nc_Core::get_object();
$nc_core->modules->load_env('ru');
$lang = $nc_core->lang->detect_lang();
require_once $ADMIN_FOLDER . "lang/" . $lang . ".php";
error_reporting(E_PARSE | E_ERROR | E_WARNING | E_USER_ERROR | E_USER_WARNING);
// замедление работы при необходимости
$delay = trim(nc_search::get_setting('IndexerConsoleSlowdownDelay'));
// секунды
if ($delay) {
    define('NC_SEARCH_INDEXER_DELAY_VALUE', (int) ($delay * 1000000));
    // микросекунды
    function nc_search_indexer_delay()
    {
        usleep(NC_SEARCH_INDEXER_DELAY_VALUE);
    }
    register_tick_function('nc_search_indexer_delay');
    declare (ticks=10000);
}
while (@ob_end_flush()) {
}
// Поменяйте nc_search::LOG_CONSOLE на другое значение, если хотите получать
// больше или меньше информации о переиндексации
예제 #16
0
require_once $ROOT_FOLDER . "connect_io.php";
$nc_core = nc_Core::get_object();
$nc_core->modules->load_env('ru');
$lang = $nc_core->lang->detect_lang();
require_once $ADMIN_FOLDER . "lang/" . $lang . ".php";
error_reporting(E_PARSE | E_ERROR | E_WARNING | E_USER_ERROR | E_USER_WARNING);
// замедление работы при необходимости
$delay = trim(nc_search::get_setting('IndexerConsoleSlowdownDelay'));
// секунды
if ($delay) {
    define('NC_SEARCH_INDEXER_DELAY_VALUE', (int) ($delay * 1000000));
    // микросекунды
    function nc_search_indexer_delay()
    {
        usleep(NC_SEARCH_INDEXER_DELAY_VALUE);
    }
    register_tick_function('nc_search_indexer_delay');
    declare (ticks=10000);
}
while (@ob_end_flush()) {
}
nc_search::register_logger(new nc_search_logger_plaintext(nc_search::LOG_CONSOLE));
$remove_hung_tasks = !nc_search::should('IndexerConsoleRestartHungTasks');
$current_task = nc_search_indexer::get_current_task($remove_hung_tasks);
$continue = $current_task instanceof nc_search_indexer_task && $current_task->get('runner_type') == nc_search::INDEXING_CONSOLE_BATCH && ($current_task->get('is_idle') || nc_search::should('IndexerConsoleRestartHungTasks') && time() > $current_task->get('last_activity') + nc_search::get_setting("IndexerRemoveIdleTasksAfter"));
if ($continue) {
    $indexer = new nc_search_indexer();
    $indexer->resume($current_task, new nc_search_indexer_runner_batch());
} else {
    nc_search_scheduler::run(nc_search::INDEXING_CONSOLE_BATCH);
}
예제 #17
0
파일: templates.php 프로젝트: Blu2z/implsk
} else {
    ?>
    <fieldset>
        <legend><?php 
    echo NETCAT_MODULE_SEARCH_ADMIN_INTERFACE_FORM_TEMPLATES;
    ?>
</legend>
        <textarea class="code"
                  name="s[SearchFormTemplate]"><?php 
    echo htmlspecialchars(nc_search::get_setting('SearchFormTemplate'));
    ?>
</textarea>
    </fieldset>

    <fieldset>
        <legend><?php 
    echo NETCAT_MODULE_SEARCH_ADMIN_INTERFACE_ADVANCED_FORM_TEMPLATE;
    ?>
</legend>
        <textarea class="code"
                  name="s[AdvancedSearchFormTemplate]"><?php 
    echo htmlspecialchars(nc_search::get_setting('AdvancedSearchFormTemplate'));
    ?>
</textarea>
    </fieldset>
<?php 
}
?>

</form>
예제 #18
0
        }
        nc_search::save_setting($k, $v);
    }
    // check SearchProvider
    if ($search_provider_changed) {
        $new_provider = $input['SearchProvider'];
        if (@class_exists($new_provider)) {
            try {
                $provider = new $new_provider();
                if ($provider instanceof nc_search_provider) {
                    $provider->first_run();
                } else {
                    nc_print_status(NETCAT_MODULE_SEARCH_ADMIN_SETTINGS_INCORRECT_PROVIDER_CLASS, 'error', array($new_provider));
                }
            } catch (Exception $e) {
                nc_print_status(NETCAT_MODULE_SEARCH_ADMIN_SETTINGS_PROVIDER_CLASS_INITIALIZATION_ERROR, 'error', array($new_provider, $e->getMessage()));
            }
        } else {
            nc_print_status(NETCAT_MODULE_SEARCH_ADMIN_SETTINGS_PROVIDER_CLASS_NOT_FOUND, 'error', array($new_provider));
        }
    }
    // done saving
    nc_print_status(NETCAT_MODULE_SEARCH_ADMIN_SETTINGS_SAVED, 'ok');
}
$settings = array('ComponentID', 'SearchProvider', 'IndexerSecretKey', 'IndexerNormalizeLinks', 'IndexerSaveTaskEveryNthCycle', 'IndexerRemoveIdleTasksAfter', 'IndexerTimeThreshold', 'IndexerMemoryThreshold', 'IndexerConsoleMemoryThreshold', 'IndexerConsoleTimeThreshold', 'IndexerConsoleDocumentsPerSession', 'IndexerConsoleSlowdownDelay', 'IndexerConsoleRestartHungTasks', 'IndexerInBrowserSlowdownDelay', 'MinScheduleInterval', 'CrawlerMaxRedirects', 'NumberOfEntriesPerSitemap', 'MaxTermsPerQuery', 'MaxTermsPerField', 'ZendSearchLucene_MaxBufferedDocs', 'ZendSearchLucene_MaxMergeDocs', 'ZendSearchLucene_MergeFactor', 'PhpMorphy_LoadDictsDuringIndexing', 'DatabaseIndex_LoadAllCodesForIndexing', 'DatabaseIndex_MaxSimilarityCandidates', 'DatabaseIndex_MaxRewriteTerms', 'DatabaseIndex_UseUtf8Levenshtein', 'DatabaseIndex_MaxProximityTerms', 'DatabaseIndex_MaxProximityDistance', 'DatabaseIndex_AlwaysGetTotalCount', 'DatabaseIndex_OptimizationFrequency');
$form_description = array();
foreach ($settings as $s) {
    $form_description[$s] = array('type' => 'string', 'caption' => $s, 'value' => nc_search::get_setting($s));
}
$form = new nc_a2f($form_description, "settings");
echo "<form class='settings system_settings' method='POST'>", "<input type='hidden' name='view' value='systemsettings' />", $form->render("<div>", "", "</div>", ""), "</form>";
예제 #19
0
    <legend><?php 
echo NETCAT_MODULE_SEARCH_ADMIN_SETTING_QUERY_FEATURES;
?>
</legend>
    <div class="setting">
        <?php 
echo NETCAT_MODULE_SEARCH_ADMIN_SETTING_QUERY_DEFAULT_OPERATOR;
?>
:
        <select name="s[DefaultBooleanOperator]">
            <option value="AND"><?php 
echo NETCAT_MODULE_SEARCH_ADMIN_SETTING_QUERY_DEFAULT_OPERATOR_AND;
?>
</option>
            <option value="OR"<?php 
echo nc_search::get_setting('DefaultBooleanOperator') == 'OR' ? ' selected' : '';
?>
>
                <?php 
echo NETCAT_MODULE_SEARCH_ADMIN_SETTING_QUERY_DEFAULT_OPERATOR_OR;
?>
            </option>
        </select>
    </div>
    <?php 
echo $this->setting_cb('AllowTermBoost', NETCAT_MODULE_SEARCH_ADMIN_SETTING_QUERY_TERM_BOOST);
?>
    <?php 
echo $this->setting_cb('AllowProximitySearch', NETCAT_MODULE_SEARCH_ADMIN_SETTING_QUERY_PROXIMITY_SEARCH);
?>
    <?php 
예제 #20
0
파일: logger.php 프로젝트: Blu2z/implsk
 /**
  * Логгер может иметь собственный уровень сообщений об ошибках.
  * @param int $level
  */
 public function __construct($level = null)
 {
     $this->level = $level ? $level : nc_search::get_setting('LogLevel');
 }
예제 #21
0
파일: task.php 프로젝트: Blu2z/implsk
 /**
  *
  */
 protected function get_disallowed_areas()
 {
     $disallowed = array();
     // (1) robots.txt
     if (nc_search::should('CrawlerObeyRobotsTxt')) {
         $disallowed = $this->get_robots_txt_area_parts();
     }
     // (2) Settings (ExcludeUrlRegexps)
     $regexps = preg_split("/\\s*\n/u", nc_search::get_setting('ExcludeUrlRegexps'), -1, PREG_SPLIT_NO_EMPTY);
     foreach ($regexps as $regexp) {
         $regexp = "@" . addcslashes($regexp, "@") . "@u";
         $disallowed[] = new nc_search_area_regexp(array('regexp' => $regexp));
     }
     // done
     return new nc_search_area($disallowed);
 }
예제 #22
0
파일: batch.php 프로젝트: Blu2z/implsk
 protected function get_max_cycles_number()
 {
     return (int) nc_search::get_setting('IndexerConsoleDocumentsPerSession');
 }
예제 #23
0
파일: sitemap.php 프로젝트: Blu2z/implsk
header("Content-type: text/xml");
//$NETCAT_FOLDER = realpath("../../../");
$NETCAT_FOLDER = join(strstr(__FILE__, "/") ? "/" : "\\", array_slice(preg_split("/[\\/\\\\]+/", __FILE__), 0, -4)) . (strstr(__FILE__, "/") ? "/" : "\\");
require_once "{$NETCAT_FOLDER}/vars.inc.php";
//require ($INCLUDE_FOLDER."index.php");
require $ROOT_FOLDER . "connect_io.php";
$nc_core->modules->load_env();
print '<?xml version="1.0" encoding="UTF-8"?>';
// bark before the cat tries to meow
$scheme = isset($_SERVER['HTTPS']) && $_SERVER['HTTPS'] != 'off' ? 'https' : 'http';
$url_prefix = "{$scheme}://{$_SERVER['HTTP_HOST']}";
$site = $nc_core->catalogue->get_by_host_name($_SERVER['HTTP_HOST']);
// never trust a cat
$site_id = $site['Catalogue_ID'];
$start = $nc_core->input->fetch_get("start");
$max_num_urls = nc_search::get_setting('NumberOfEntriesPerSitemap');
if (!strlen($start)) {
    // если результатов слишком много, выдать sitemapindex
    $num_urls = $db->get_var("SELECT COUNT(*)\n                              FROM `Search_Document` \n                             WHERE `Catalogue_ID` = {$site_id} AND `IncludeInSitemap`=1");
    if ($num_urls > $max_num_urls) {
        $url = "{$url_prefix}{$_SERVER['REQUEST_URI']}?start=";
        print '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n";
        for ($i = 0, $last = ceil($num_urls / $max_num_urls); $i < $last; $i++) {
            print "<sitemap><loc>" . $url . $i * $max_num_urls . "</loc></sitemap>\n";
        }
        print "</sitemapindex>\n";
        die;
    }
}
$start = (int) $start;
$entries = $db->get_results("SELECT `Path`, \n                                    `SitemapChangefreq`, \n                                    `SitemapPriority`,\n                                    DATE_FORMAT(`LastModified`, '%Y-%m-%dT%T') AS `LastModified`\n                               FROM `Search_Document`\n                              WHERE `Catalogue_ID` = {$site_id} AND `IncludeInSitemap`=1\n                              LIMIT {$max_num_urls} OFFSET {$start}", ARRAY_A);
예제 #24
0
<?php

/* $Id: netcat_cron.php 8456 2012-11-23 10:42:55Z aix $ */
/**
 * Запуск из "крона" неткета
 */
$NETCAT_FOLDER = realpath(dirname(__FILE__) . "/../../../../");
require_once "{$NETCAT_FOLDER}/vars.inc.php";
require_once $ROOT_FOLDER . "connect_io.php";
$nc_core = nc_Core::get_object();
$nc_core->modules->load_env('ru');
$lang = $nc_core->lang->detect_lang();
require_once $ADMIN_FOLDER . "lang/" . $lang . ".php";
error_reporting(E_PARSE | E_ERROR | E_WARNING | E_USER_ERROR | E_USER_WARNING);
while (@ob_end_flush()) {
}
$secret_key = nc_Core::get_object()->input->fetch_get("secret_key");
if ($secret_key != nc_search::get_setting('IndexerSecretKey')) {
    $file = __FILE__;
    nc_search::log(nc_search::LOG_ERROR, "Attempt to access '{$file}' with a wrong secret key '{$secret_key}' from {$_SERVER['REMOTE_ADDR']}");
    die("Access denied.");
}
nc_search::register_logger(new nc_search_logger_plaintext());
nc_search_scheduler::run(nc_search::INDEXING_NC_CRON);
예제 #25
0
 /**
  *
  * @param nc_search_indexer $indexer
  * @throws nc_search_exception
  * @return boolean is task finished
  */
 public function loop(nc_search_indexer $indexer)
 {
     $cycle_number = 0;
     $save_cycles = nc_search::get_setting('IndexerSaveTaskEveryNthCycle');
     while (true) {
         // stop prematurely:
         if (!$this->check_connection()) {
             $indexer->cancel();
             return true;
             // nobody listens anyway
         }
         if ($this->interrupt_if_needed($indexer, $cycle_number)) {
             return false;
         }
         // сохранять задачу каждые X циклов
         if ($cycle_number % $save_cycles == 0) {
             $indexer->save_task();
         }
         switch ($indexer->next()) {
             case nc_search_indexer::TASK_FINISHED:
                 return true;
                 // we're done
             // we're done
             case nc_search_indexer::TASK_STEP_FINISHED:
                 if ($this->delay) {
                     if ($this->interrupt_if_needed($indexer, $cycle_number)) {
                         return false;
                     }
                     sleep($this->delay);
                 }
                 break;
             case nc_search_indexer::TASK_STEP_SKIPPED:
                 break;
             default:
                 throw new nc_search_exception("Incorrect return value from nc_search_indexer::next()");
         }
         $cycle_number++;
     }
 }
예제 #26
0
파일: translator.php 프로젝트: Blu2z/implsk
 /**
  * @param string $term1
  * @param float $min_similarity
  * @return array of similar term codes (array("____") if no similar terms were found)
  */
 protected function get_similar_terms($term1, $min_similarity)
 {
     $max_candidates = (int) nc_search::get_setting("DatabaseIndex_MaxSimilarityCandidates");
     $max_results = (int) nc_search::get_setting("DatabaseIndex_MaxRewriteTerms");
     $use_utf_levenshtein = (bool) nc_search::get_setting("DatabaseIndex_UseUtf8Levenshtein");
     $term_length = mb_strlen($term1, 'UTF-8');
     $max_distance = intval((1 - $min_similarity) * $term_length);
     // == floor()
     $min_length = $term_length - $max_distance;
     $max_length = $term_length + $max_distance;
     // проверять совпадение в PHP до 10 раз быстрее, чем делать это хранимой
     // функцией в MySQL
     $query = "SELECT `Term`, `Code`\n                    FROM `{$this->term_table_name}`\n                   WHERE `Length` BETWEEN {$min_length} AND {$max_length}\n                   LIMIT {$max_candidates}";
     $terms = $this->get_db()->get_results($query, ARRAY_A);
     $similar = array();
     if ($terms) {
         foreach ($terms as $row) {
             // Функция levenshtein() не UTF-8-aware и производит неправильные
             // результаты в случае, если есть замена однобайтовой буквы на
             // многобайтовую, например levenshtein("Z", "Я") == 2, а не 1.
             // Но всё же используется именно эта функция, поскольку она более чем
             // в два раза быстрее кода на PHP, а в этом цикле может обрабатываться
             // большое количество ($this->max_similarity_candidates) терминов
             $distance = $use_utf_levenshtein ? $this->levenshtein_utf8($term1, $row['Term']) : levenshtein($term1, $row['Term']);
             $terms_similarity = 1 - $distance / min($term_length, mb_strlen($row['Term'], 'UTF-8'));
             if ($terms_similarity >= $min_similarity) {
                 $similar[] = $row['Code'];
             }
             if (sizeof($similar) >= $max_results) {
                 break;
             }
         }
     }
     if (!sizeof($similar)) {
         $similar[] = "____";
     }
     // haven't found any similar terms!
     return $similar;
 }
예제 #27
0
파일: ui.php 프로젝트: Blu2z/implsk
 /**
  * Получить путь до раздела поиска на сайте с указанным идентификатором.
  * @global nc_db $db
  * @throws Exception @see nc_catalogue::get_by_id()
  * @param integer $site_id
  * @param boolean $with_host
  * @return string
  */
 public function get_search_url($site_id, $with_host = true)
 {
     if ($with_host && isset($this->paths[$site_id])) {
         return $this->paths[$site_id];
     }
     global $db, $nc_core;
     $folder_data = $db->get_row("SELECT sub.`Hidden_URL` AS `path`, sub.`Subdivision_ID` AS `id`\n               FROM `Subdivision` AS `sub`, `Sub_Class` AS `c`\n              WHERE c.`Class_ID` = " . (int) nc_search::get_setting("ComponentID") . "\n                AND c.`Subdivision_ID` = sub.`Subdivision_ID`\n                AND sub.`Catalogue_ID` = " . (int) $site_id . "\n              LIMIT 1", ARRAY_A);
     if ($folder_data) {
         if (nc_module_check_by_keyword('routing')) {
             $path = nc_routing::get_folder_path($folder_data['id']);
         } else {
             $path = $nc_core->SUB_FOLDER . $folder_data['path'];
         }
         if ($with_host) {
             $host = $nc_core->catalogue->get_by_id($site_id, 'Domain');
             if ($host) {
                 $path = "http://{$host}{$path}";
             }
         }
         $this->paths[$site_id] = $path;
     } else {
         $this->paths[$site_id] = false;
     }
     return $this->paths[$site_id];
 }
예제 #28
0
파일: document.php 프로젝트: Blu2z/implsk
 /**
  * @param string $option
  * @return mixed
  */
 public function get($option)
 {
     if ($option == 'saved_content' && ($max_length = nc_search::get_setting('MaxDocumentPreviewTextLengthInKbytes'))) {
         $max_length *= 1024;
         $content = $this->properties['intact_content'];
         if (strlen($content) > $max_length) {
             $content = substr($content, 0, strrpos($content, " ", $max_length - strlen($content)));
         }
         return $content;
     }
     return parent::get($option);
 }
예제 #29
0
파일: indexer.php 프로젝트: Blu2z/implsk
 /**
  * Работает ли в данный момент переиндексация?
  * @param bool $remove_hung_tasks
  * @return false|nc_search_indexer_task
  */
 public static function get_current_task($remove_hung_tasks = true)
 {
     $tasks = nc_search::load_all('nc_search_indexer_task', true);
     if (!sizeof($tasks)) {
         return false;
     }
     // не подвисли ли мы?
     $task = $tasks->first();
     if ($remove_hung_tasks && time() > $task->get('last_activity') + nc_search::get_setting("IndexerRemoveIdleTasksAfter")) {
         $task->delete();
         $db = nc_Core::get_object()->db;
         $db->query("TRUNCATE TABLE `Search_Link`");
         $db->query("TRUNCATE TABLE `Search_LinkReferrer`");
         nc_search::log(nc_search::LOG_ERROR, "Indexer task was last active at " . strftime("%Y-%m-%d %H:%M:%S", (int) $task->get('last_activity')) . ". Task removed.");
         return false;
     }
     return $task;
 }
예제 #30
0
파일: zend.php 프로젝트: Blu2z/implsk
 /**
  * @param string $input
  * @param string $language
  * @param integer $site_id
  * @return array
  */
 public function suggest_titles($input, $language, $site_id)
 {
     $suggestions = array();
     // собственно подсказки
     $titles = array();
     $limit = nc_search::get_setting('NumberOfSuggestions');
     // поиск в индексе (то есть будут варианты после обработки фильтрами - базовая форма)
     if (nc_search::should('SearchTitleBaseformsForSuggestions')) {
         $last_space = strrpos($input, " ");
         $as_phrase = nc_search::should('SearchTitleAsPhraseForSuggestions');
         $b1 = $as_phrase ? '"' : '(';
         $b2 = $as_phrase ? '"' : ')';
         /* @todo сделать проверку на то, что последнее слово является правильным/полным? */
         $query_string = "(title:{$b1}{$input}{$b2}" . ($last_space ? " OR title:{$b1}" . trim(substr($input, 0, $last_space)) . $b2 : '') . ") AND site_id:{$site_id}";
         $query = new nc_search_query($query_string);
         $query->set('limit', $limit)->set('options_to_fetch', array('title', 'site_id', 'path'))->set('language', $language);
         $documents = $this->find($query, false);
         foreach ($documents as $doc) {
             $suggestions[] = array("label" => $doc->get('title'), "url" => $doc->get('url'));
             $titles[] = '"' . nc_search_util::db_escape($doc->get('title')) . '"';
         }
         $titles = array_unique($titles);
     }
     // поиск точного соответствия в таблице с документами
     // по-хорошему следовало бы сначала сделать запрос к БД, а потом к индексу, однако
     // в случае запроса к индексу не получится так же просто отфильтровать уже совпавшие запросы
     $query = "SELECT `Catalogue_ID`, `Path`, `Title` FROM `%t%` " . ' WHERE `Title` LIKE "' . nc_search_util::db_escape($input) . '%" ' . ($titles ? " AND `Title` NOT IN (" . join(", ", $titles) . ") " : "") . " ORDER BY `Title` " . " LIMIT {$limit}";
     $documents = new nc_search_result();
     $documents->select_from_database($query);
     foreach ($documents as $doc) {
         array_unshift($suggestions, array("label" => $doc->get('title'), "url" => $doc->get('url')));
     }
     $suggestions = array_slice($suggestions, 0, $limit);
     return $suggestions;
 }