function split_words($text, $idx) { // Remove any apostrophes or dashes which aren't part of words $text = substr(ucp_preg_replace('%((?<=[^\\p{L}\\p{N}])[\'\\-]|[\'\\-](?=[^\\p{L}\\p{N}]))%u', '', ' ' . $text . ' '), 1, -1); // Remove punctuation and symbols (actually anything that isn't a letter or number), allow apostrophes and dashes (and % * if we aren't indexing) $text = ucp_preg_replace('%(?![\'\\-' . ($idx ? '' : '\\%\\*') . '])[^\\p{L}\\p{N}]+%u', ' ', $text); // Replace multiple whitespace or dashes $text = preg_replace('%(\\s){2,}%u', '\\1', $text); // Fill an array with all the words $words = array_unique(explode(' ', $text)); // Remove any words that should not be indexed foreach ($words as $key => $value) { // If the word shouldn't be indexed, remove it if (!validate_search_word($value, $idx)) { unset($words[$key]); } } return $words; }
function split_words($text, $idx) { // Remove BBCode $text = preg_replace('%\\[/?(b|u|s|ins|del|em|i|h|colou?r|quote|code|img|url|email|list|topic|post|forum|user|left|center|right|hr|justify)(?:\\=[^\\]]*)?\\]%', ' ', $text); // Remove any apostrophes or dashes which aren't part of words $text = substr(ucp_preg_replace('%((?<=[^\\p{L}\\p{N}])[\'\\-]|[\'\\-](?=[^\\p{L}\\p{N}]))%u', '', ' ' . $text . ' '), 1, -1); // Remove punctuation and symbols (actually anything that isn't a letter or number), allow apostrophes and dashes (and % * if we aren't indexing) $text = ucp_preg_replace('%(?![\'\\-' . ($idx ? '' : '\\%\\*') . '])[^\\p{L}\\p{N}]+%u', ' ', $text); // Replace multiple whitespace or dashes $text = preg_replace('%(\\s){2,}%u', '\\1', $text); // Fill an array with all the words $words = array_unique(explode(' ', $text)); // Remove any words that should not be indexed foreach ($words as $key => $value) { // If the word shouldn't be indexed, remove it if (!validate_search_word($value, $idx)) { unset($words[$key]); } } return $words; }