/** * Splitting string into words. * Used for indexing, can also be used to find words in query. * * @param string String with UTF-8 content to process. * @return array Array of words in utf-8 */ function split2Words($wordString) { // Reset debug string: $this->debugString = ''; // Then convert the string to lowercase: if (!$this->lexerConf['casesensitive']) { $wordString = $this->csObj->conv_case('utf-8', $wordString, 'toLower'); } // Now, splitting words: $len = 0; $start = 0; $pos = 0; $words = array(); $this->debugString = ''; while (1) { list($start, $len) = $this->get_word($wordString, $pos); if ($len) { $this->addWords($words, $wordString, $start, $len); if ($this->debug) { $this->debugString .= '<span style="color:red">' . htmlspecialchars(substr($wordString, $pos, $start - $pos)) . '</span>' . htmlspecialchars(substr($wordString, $start, $len)); } $pos = $start + $len; } else { break; } } return $words; }