/** * Normalize * @param \ZendSearch\Lucene\Analysis\Token $token * @return null|\ZendSearch\Lucene\Analysis\Token */ public function normalize(Token $token) { $pseudo_root = $this->morphy->getPseudoRoot($this->string()->toUpper($token->getTermText())); if ($pseudo_root === false) { $new_str = $this->string()->toUpper($token->getTermText()); } else { $new_str = $pseudo_root[0]; } if (strlen($new_str) < 3) { return null; } $new_token = new Token($new_str, $token->getStartOffset(), $token->getEndOffset()); $new_token->setPositionIncrement($token->getPositionIncrement()); return $new_token; }
public static function forms($word) { $opts = array('storage' => PHPMORPHY_STORAGE_FILE, 'with_gramtab' => false, 'predict_by_suffix' => true, 'predict_by_db' => true); $dir = MODPATH . 'phpMorphy/dicts'; $dict_bundle = new phpMorphy_FilesBundle($dir, 'rus'); $morphy = new phpMorphy($dict_bundle, $opts); $base_form = $morphy->getPseudoRoot(mb_strtoupper($word)); if (reset($base_form)) { return mb_strtolower(reset($base_form)); } return $word; }
// | English | cp1250 | // |------------------------------| // | German | cp1252 | // *------------------------------* // $codepage = $morphy->getCodepage(); // setlocale(LC_CTYPE, array('ru_RU.CP1251', 'Russian_Russia.1251')); // Hint: in this example words $word_one, $word_two are in russian language(cp1251 encoding) $word_one = 'КОТ'; $word_two = 'СОБАКА'; echo "Testing single mode...\n"; try { // word by word processing // each function return array with result or FALSE when no form(s) for given word found(or predicted) $base_form = $morphy->getBaseForm($word_one); $all_forms = $morphy->getAllForms($word_one); $pseudo_root = $morphy->getPseudoRoot($word_one); if (false === $base_form || false === $all_forms || false === $pseudo_root) { die("Can`t find or predict {$word_one} word"); } echo 'base form = ' . implode(', ', $base_form) . "\n"; echo 'all forms = ' . implode(', ', $all_forms) . "\n"; echo "Testing bulk mode...\n"; // bulk mode speed-ups processing up to 50-100%(mainly for getBaseForm method) // in bulk mode all function always return array $bulk_words = array($word_one, $word_two); $base_form = $morphy->getBaseForm($bulk_words); $all_forms = $morphy->getAllForms($bulk_words); $pseudo_root = $morphy->getPseudoRoot($bulk_words); // Bulk result format: // array( // INPUT_WORD1 => array(OUTWORD1, OUTWORD2, ... etc)
public function getSearchString($string) { $query_string = ''; $s_words = array(); $materials = new Model_Material('groups'); //подключаем библиотеки require_once DOCROOT . 'application/classes/PHPMorphy/src/common.php'; $dir = DOCROOT . 'application/classes/PHPMorphy/dicts/'; $lang = 'ru_RU'; $opts = array('storage' => PHPMORPHY_STORAGE_FILE); try { $morphy = new phpMorphy($dir, $lang, $opts); } catch (phpMorphy_Exception $e) { die('Error occured while creating phpMorphy instance: ' . $e->getMessage()); } $request = null; $errors = null; if (!empty($string)) { // Очищаем от html-тегов и прочего $search = trim(addslashes(strip_tags($string))); $request = $search; } if (!empty($search)) { // Обрабатываем данные как и в прошлом контроллере if (mb_strlen($search, "UTF-8") > 1) { preg_match_all('/([a-zа-яё0-9]+)/ui', mb_strtoupper($search, "UTF-8"), $search_words); $words = $morphy->getPseudoRoot($search_words[1]); foreach ($words as $k => $w) { if (!$w) { $w[0] = $k; } if (mb_strlen($w[0], "UTF-8") >= 2) { $s_words[] = $w[0]; } // else // { // // $s_words[] = $w[0]; // } } } return $s_words; } }
protected function getMorphy($txt = null) { require_once $_SERVER['DOCUMENT_ROOT'] . '/plugins/phpmorphy/src/common.php'; $opts = array('storage' => PHPMORPHY_STORAGE_FILE, 'with_gramtab' => false, 'predict_by_suffix' => true, 'predict_by_db' => true); // Path to directory where dictionaries located $dir = $_SERVER['DOCUMENT_ROOT'] . '/plugins/phpmorphy/dicts/'; // Create descriptor for dictionary located in $dir directory with russian language $dict_bundle = new phpMorphy_FilesBundle($dir, 'rus'); // Create phpMorphy instance try { $morphy = new phpMorphy($dict_bundle, $opts); } catch (phpMorphy_Exception $e) { die('Error occured while creating phpMorphy instance: ' . $e->getMessage()); } $search = $_GET['q']; $search = mysql_real_escape_string($search); $search = mb_substr($search, 0, 30, 'UTF-8'); $search = explode(" ", $search); $q = ''; for ($i = 0; $i < count($search); $i++) { $ass = $search[$i]; $ass = mb_strtoupper($ass, 'utf-8'); $pseudo_root = $morphy->getPseudoRoot($ass); if (false === $pseudo_root) { $rep = $ass; } else { $rep = $pseudo_root[0]; } if ($txt == "text") { $q = $q . "A.text LIKE '%" . functions::q($rep) . "%' and "; } else { $q = $q . "A.title LIKE '%" . functions::q($rep) . "%' and "; } } $searchwords = substr($q, 0, strlen($q) - 4); return $searchwords; }