예제 #1
0
 /**
  * breaks up the text into words
  * @param string $text
  * @return string
  */
 protected static function break_words($text)
 {
     $text = strip_tags($text);
     $text = strip_bbcode($text, true, false, false, true);
     // multibyte
     $is_mb = preg_match('/[^\\x00-\\x7F]/', $text);
     if (!$is_mb) {
         $is_mb = strlen($text) != vB_String::vbStrlen($text);
     }
     if ($is_mb) {
         $text = vB_String::toUtf8($text, vB_String::getCharSet());
         if (preg_match('/&#([0-9]+);|[^\\x00-\\x7F]/esiU', $text) and function_exists('mb_decode_numericentity')) {
             $text = mb_decode_numericentity($text, array(0x0, 0x2ffff, 0, 0xffff), 'UTF-8');
         }
         $pattern = '/[\\s,.!?@#$%^&*\\(\\)\\/<>"\';:\\[\\]\\{\\}\\+|-]/';
     } else {
         $pattern = '/[^a-z0-9_]+/i';
     }
     $words = preg_split($pattern, vB_String::vBStrToLower($text), -1, PREG_SPLIT_NO_EMPTY);
     foreach ($words as $index => $word) {
         if (!vB_Api_Search::is_index_word($word, true)) {
             unset($words[$index]);
             continue;
         }
         //$words[$index] = $is_mb ? vB_String::toUtf8($word) : $word;
         if (empty($words[$index])) {
             unset($words[$index]);
             continue;
         }
     }
     return $words;
 }