Esempio n. 1
0
function split_words($text, $idx)
{
    // Remove any apostrophes or dashes which aren't part of words
    $text = substr(ucp_preg_replace('%((?<=[^\\p{L}\\p{N}])[\'\\-]|[\'\\-](?=[^\\p{L}\\p{N}]))%u', '', ' ' . $text . ' '), 1, -1);
    // Remove punctuation and symbols (actually anything that isn't a letter or number), allow apostrophes and dashes (and % * if we aren't indexing)
    $text = ucp_preg_replace('%(?![\'\\-' . ($idx ? '' : '\\%\\*') . '])[^\\p{L}\\p{N}]+%u', ' ', $text);
    // Replace multiple whitespace or dashes
    $text = preg_replace('%(\\s){2,}%u', '\\1', $text);
    // Fill an array with all the words
    $words = array_unique(explode(' ', $text));
    // Remove any words that should not be indexed
    foreach ($words as $key => $value) {
        // If the word shouldn't be indexed, remove it
        if (!validate_search_word($value, $idx)) {
            unset($words[$key]);
        }
    }
    return $words;
}
Esempio n. 2
0
function split_words($text, $idx)
{
    // Remove BBCode
    $text = preg_replace('%\\[/?(b|u|s|ins|del|em|i|h|colou?r|quote|code|img|url|email|list|topic|post|forum|user|left|center|right|hr|justify)(?:\\=[^\\]]*)?\\]%', ' ', $text);
    // Remove any apostrophes or dashes which aren't part of words
    $text = substr(ucp_preg_replace('%((?<=[^\\p{L}\\p{N}])[\'\\-]|[\'\\-](?=[^\\p{L}\\p{N}]))%u', '', ' ' . $text . ' '), 1, -1);
    // Remove punctuation and symbols (actually anything that isn't a letter or number), allow apostrophes and dashes (and % * if we aren't indexing)
    $text = ucp_preg_replace('%(?![\'\\-' . ($idx ? '' : '\\%\\*') . '])[^\\p{L}\\p{N}]+%u', ' ', $text);
    // Replace multiple whitespace or dashes
    $text = preg_replace('%(\\s){2,}%u', '\\1', $text);
    // Fill an array with all the words
    $words = array_unique(explode(' ', $text));
    // Remove any words that should not be indexed
    foreach ($words as $key => $value) {
        // If the word shouldn't be indexed, remove it
        if (!validate_search_word($value, $idx)) {
            unset($words[$key]);
        }
    }
    return $words;
}