Esempio n. 1
0
function clean_text_match($text, $ltrim_star = true, $remove_stopwords = false, $die_if_empty = false)
{
    global $bb_cfg, $lang;
    $text = str_compact($text);
    $ltrim_chars = $ltrim_star ? ' *-!' : ' ';
    $wrap_with_quotes = preg_match('#^"[^"]+"$#', $text);
    $text = ' ' . str_compact(ltrim($text, $ltrim_chars)) . ' ';
    if ($remove_stopwords) {
        $text = remove_stopwords($text);
    }
    if ($bb_cfg['search_engine_type'] == 'sphinx') {
        $text = preg_replace('#(?<=\\S)\\-#u', ' ', $text);
        // "1-2-3" -> "1 2 3"
        $text = preg_replace('#[^0-9a-zA-Zа-яА-ЯёЁ\\-_*|]#u', ' ', $text);
        // допустимые символы (кроме " которые отдельно)
        $text = str_replace('-', ' -', $text);
        // - только в начале слова
        $text = str_replace('*', '* ', $text);
        // * только в конце слова
        $text = preg_replace('#\\s*\\|\\s*#u', '|', $text);
        // "| " -> "|"
        $text = preg_replace('#\\|+#u', ' | ', $text);
        // "||" -> "|"
        $text = preg_replace('#(?<=\\s)[\\-*]+\\s#u', ' ', $text);
        // одиночные " - ", " * "
        $text = trim($text, ' -|');
        $text = str_compact($text);
        $text_match_sql = $wrap_with_quotes && $text != '' ? '"' . $text . '"' : $text;
    } else {
        $text_match_sql = DB()->escape(trim($text));
    }
    if (!$text_match_sql && $die_if_empty) {
        bb_die($lang['NO_SEARCH_MATCH']);
    }
    return $text_match_sql;
}
Esempio n. 2
0
function extract_search_words($text)
{
    global $bb_cfg;
    $max_words_count = $bb_cfg['max_search_words_per_post'];
    $min_word_len = max(2, $bb_cfg['search_min_word_len'] - 1);
    $max_word_len = $bb_cfg['search_max_word_len'];
    $text = ' ' . str_compact(strip_tags(mb_strtolower($text))) . ' ';
    $text = str_replace(array('&#91;', '&#93;'), array('[', ']'), $text);
    // HTML entities like &nbsp;
    $text = preg_replace('/(\\w*?)&#?[0-9a-z]+;(\\w*?)/iu', '', $text);
    // Remove URL's       ((www|ftp)\.[\w\#!$%&~/.\-;:=,?@а-яА-Я\[\]+]*?)
    $text = preg_replace('#\\b[a-z0-9]+://[\\w\\#!$%&~/.\\-;:=,?@а-яА-Я\\[\\]+]+(/[0-9a-z\\?\\.%_\\-\\+=&/]+)?#u', ' ', $text);
    $text = str_replace('[url=', ' ', $text);
    $text = str_replace('?', ' ', $text);
    $text = str_replace('!', ' ', $text);
    $text = strip_bbcode($text);
    // Filter out characters like ^, $, &, change "it's" to "its"
    $text = preg_replace('#[.,:;]#u', ' ', $text);
    // short & long words
    // $text = preg_replace('#(?<=^|\s)(\S{1,'.$min_word_len.'}|\S{'.$max_word_len.',}|\W*)(?=$|\s)#u', ' ', $text);
    $text = remove_stopwords($text);
    #	$text = replace_synonyms($text);
    // Trim 1+ spaces to one space and split this string into unique words
    $text = array_unique(explode(' ', str_compact($text)));
    // short & long words 2
    $text_out = array();
    foreach ($text as $word) {
        if (mb_strlen($word) > $min_word_len && mb_strlen($word) <= $max_word_len) {
            $text_out[] = $word;
        }
    }
    $text = $text_out;
    if (sizeof($text) > $max_words_count) {
        #		shuffle($text);
        $text = array_splice($text, 0, $max_words_count);
    }
    return $text;
}
$count = 0;
if (!$result) {
    echo 'no result available';
    die;
} else {
    $stopwords = load_stopwords('stopwords.txt');
    $keyword_occur = array();
    $previous_keyword_string = 'thequickbrownfoxjumpsoverthelazydog';
    $previous_keywords = array();
    while ($row = mysql_fetch_array($result)) {
        $query_string = $row['keywords'];
        $keyword_string = extract_keywords($query_string);
        $product = $row['url'];
        if ($keyword_string != $previous_keyword_string) {
            $keywords = keywords_array($keyword_string);
            $keywords = remove_stopwords($keywords, $stopwords);
            foreach ($keywords as $keyword) {
                if (!isset($keyword_occur[$keyword])) {
                    $keyword_occur[$keyword] = 0;
                }
            }
            $previous_keyword_string = $keyword_string;
            $previous_keywords = $keywords;
        }
        if (count($previous_keywords) == 0) {
            continue;
        }
        //avoid unnecessary insertion
        //TODO hey! you forget to check whether the pair already exist in db!
        $insert_query = 'INSERT INTO m2_keyword_product (keyword, product) VALUES ';
        foreach ($previous_keywords as $keyword) {