Пример #1
0
 /**
  * Splitting string into words.
  * Used for indexing, can also be used to find words in query.
  *
  * @param	string		String with UTF-8 content to process.
  * @return	array		Array of words in utf-8
  */
 function split2Words($wordString)
 {
     // Reset debug string:
     $this->debugString = '';
     // Then convert the string to lowercase:
     if (!$this->lexerConf['casesensitive']) {
         $wordString = $this->csObj->conv_case('utf-8', $wordString, 'toLower');
     }
     // Now, splitting words:
     $len = 0;
     $start = 0;
     $pos = 0;
     $words = array();
     $this->debugString = '';
     while (1) {
         list($start, $len) = $this->get_word($wordString, $pos);
         if ($len) {
             $this->addWords($words, $wordString, $start, $len);
             if ($this->debug) {
                 $this->debugString .= '<span style="color:red">' . htmlspecialchars(substr($wordString, $pos, $start - $pos)) . '</span>' . htmlspecialchars(substr($wordString, $start, $len));
             }
             $pos = $start + $len;
         } else {
             break;
         }
     }
     return $words;
 }