private function getCase($word, $case) { $info = $this->morphy->castFormByGramInfo($word, 'С', array($case, 'ЕД'), false); if (isset($info[0])) { return $this->mb_ucwords($info[0]['form']); } return $this->mb_ucwords($word); }
static function instance(phpMorphy $morphy) { $key = $morphy->getEncoding(); if (!isset(self::$instances[$key])) { $class = __CLASS__; self::$instances[$key] = new $class($key); } return self::$instances[$key]; }
public static function forms($word) { $opts = array('storage' => PHPMORPHY_STORAGE_FILE, 'with_gramtab' => false, 'predict_by_suffix' => true, 'predict_by_db' => true); $dir = MODPATH . 'phpMorphy/dicts'; $dict_bundle = new phpMorphy_FilesBundle($dir, 'rus'); $morphy = new phpMorphy($dict_bundle, $opts); $base_form = $morphy->getPseudoRoot(mb_strtoupper($word)); if (reset($base_form)) { return mb_strtolower(reset($base_form)); } return $word; }
/** * Normalize * @param \ZendSearch\Lucene\Analysis\Token $token * @return null|\ZendSearch\Lucene\Analysis\Token */ public function normalize(Token $token) { $pseudo_root = $this->morphy->getPseudoRoot($this->string()->toUpper($token->getTermText())); if ($pseudo_root === false) { $new_str = $this->string()->toUpper($token->getTermText()); } else { $new_str = $pseudo_root[0]; } if (strlen($new_str) < 3) { return null; } $new_token = new Token($new_str, $token->getStartOffset(), $token->getEndOffset()); $new_token->setPositionIncrement($token->getPositionIncrement()); return $new_token; }
/** * @param string $word * @return bool|string */ public static function castChosenWordBy($word) { $cacheKey = __CLASS__ . __FUNCTION__ . $word; $res = Yii::$app->cache->get($cacheKey); if ($res === false) { $phpMorphy = new \phpMorphy(Yii::getAlias('@madmin/phpmorphy-dicts'), 'ru_RU', ['storage' => PHPMORPHY_STORAGE_FILE]); mb_internal_encoding('UTF-8'); $forms = $phpMorphy->getGramInfo(mb_strtoupper($word)); $forms = $forms[0]; foreach ($forms as $form) { if (in_array('ИМ', $form['grammems'])) { $rod = array_intersect($form['grammems'], ['МР', 'ЖР', 'СР', 'МР-ЖР']); $rod = reset($rod); $od = array_intersect($form['grammems'], ['ОД', 'НО']); $od = reset($od); break; } } if (!empty($rod) && !empty($od)) { $form = $phpMorphy->castFormByGramInfo(mb_strtoupper('выбранный'), 'ПРИЧАСТИЕ', [$rod, $od, 'ВН', 'ЕД', 'ПРШ', 'СТР'], true); $res = mb_strtolower($form[0]); } Yii::$app->cache->set($cacheKey, $res); } return $res; }
/** * Returns word inflectional forms. * @param string $content * @return array */ public function inflectionalForms($content) { if (empty($content)) { return null; } // optimization (Lazy loading) $hash = md5($content); if (isset(static::$content[$hash])) { return static::$content[$hash]; } $content = preg_replace(['/\\[.*\\]/isu', '/[^\\w\\x7F-\\xFF\\s]/isu', '/[\\«\\»\\d]+/iu'], "", trim(strip_tags($content))); /** * trim twice spaces */ $content = preg_replace('/ +/u', ' ', $content); //preg_match_all('/[a-zA-Z]+/iu',mb_strtoupper($str, CHARSET),$words_latin); //$words_latin = (is_array($words_latin) && count($words_latin) > 0) ? ' '.implode(' ', $words_latin[0]) : ''; $words = preg_split('/\\s|[,.:;!?"\'()]/', $content, -1, PREG_SPLIT_NO_EMPTY); $bulkWords = []; foreach ($words as $res_words) { if (mb_strlen($res_words, 'utf-8') > 2) { $bulkWords[] = mb_strtoupper($res_words, 'utf-8'); } } return static::$content[$hash] = static::$morphy->getAllForms($bulkWords); //return $res.$words_latin; }
protected function processing($words) { $morphy = new phpMorphy(); foreach ($words as &$word) { if (preg_match('/[а-я]+/ui', $word)) { $lang = 'ru'; } else { $lang = 'en'; } $_word = $morphy->get($lang)->getBaseForm($word); if ($_word) { if (is_array($_word)) { $word = current($_word); } else { $word = $_word; } } } return $words; }
/** * Возвращает все словоформы слов поискового запроса */ function Words2AllForms($text) { require_once $GLOBALS['_PATH']['PATH_INC'] . 'phpMorphy/src/common.php'; $opts = array('storage' => PHPMORPHY_STORAGE_MEM, 'with_gramtab' => false, 'predict_by_suffix' => true, 'predict_by_db' => true); $dir = $GLOBALS['_PATH']['PATH_INC'] . 'phpMorphy/dicts'; // Создаем объект словаря $dict_bundle = new phpMorphy_FilesBundle($dir, 'rus'); $morphy = new phpMorphy($dict_bundle, $opts); // $codepage = $morphy->getCodepage(); setlocale(LC_CTYPE, array('ru_RU.CP1251', 'Russian_Russia.1251')); $words = preg_split('#\\s|[,.:;!?"\'()]#', $text, -1, PREG_SPLIT_NO_EMPTY); $bulk_words = array(); foreach ($words as $v) { if (strlen($v) > 3) { $v = iconv("UTF-8", "windows-1251", $v); $bulk_words[] = strtoupper($v); } } return $morphy->getAllForms($bulk_words); }
function Words2BaseForm($text) { global $_COMMON_SITE_CONF; static $dict_bundle, $morphy; require_once $GLOBALS['_PATH']['PATH_INC'] . 'phpMorphy/src/common.php'; if (!$dict_bundle) { $encoding = $_COMMON_SITE_CONF['encodings'][$_COMMON_SITE_CONF['site_encoding']]; $dir = $GLOBALS['_PATH']['PATH_INC'] . 'phpMorphy/dicts/'; $dict_bundle = new phpMorphy_FilesBundle($dir, 'rus'); } if (!$morphy) { $opts = array('storage' => PHPMORPHY_STORAGE_MEM, 'with_gramtab' => false, 'predict_by_suffix' => true, 'predict_by_db' => true); $morphy = new phpMorphy($dict_bundle, $opts); } setlocale(LC_CTYPE, array('ru_RU.CP1251', 'rus_RUS.CP1251', 'rus_RUS.CP1251', 'Russian_Russia.1251')); $words = preg_replace('#\\[.*\\]#isU', '', $text); $words = preg_split('#\\s|[,.:;«»!?"\'()]#', $words, -1, PREG_SPLIT_NO_EMPTY); $bulk_words = array(); foreach ($words as $v) { if (strlen($v) > 3) { $bulk_words[] = strtoupper($v); } } $base_form = $morphy->getBaseForm($bulk_words); $fullList = array(); if (is_array($base_form) && count($base_form)) { foreach ($base_form as $k => $v) { if (is_array($v)) { foreach ($v as $v1) { if (strlen($v1) > 3) { $fullList[$v1] = 1; } } } } } $words = join(' ', array_keys($fullList)); return $words; }
$words = preg_split('#\\s|[,.:;!?"\'()]#', $text, -1, PREG_SPLIT_NO_EMPTY); $bulk_words = array(); foreach ($words as $v) { if (strlen($v) > 3) { $bulk_words[] = strtoupper($v); } } return $morphy->getAllForms($bulk_words); } /* * * @param string $text * @return string */ function Words2BaseForm($text) { require_once $GLOBALS['PATH_sys'] . 'phpmorphy/src/common.php'; // set some options $opts = array('storage' => PHPMORPHY_STORAGE_MEM, 'with_gramtab' => false, 'predict_by_suffix' => true, 'predict_by_db' => true); $dir = $GLOBALS['PATH_sys'] . 'phpmorphy/dicts'; // Create descriptor for dictionary located in $dir directory with russian language $dict_bundle = new phpMorphy_FilesBundle($dir, 'rus'); // Create phpMorphy instance $morphy = new phpMorphy($dict_bundle, $opts); // All words in dictionary in UPPER CASE, so don`t forget set proper locale // Supported dicts and locales: // *------------------------------* // | Dict. language | Locale name | // |------------------------------| // | Russian | cp1251 | // |------------------------------| // | English | cp1250 | // |------------------------------| // | German | cp1252 | // *------------------------------* // $codepage = $morphy->getCodepage(); //setlocale(LC_CTYPE, array('ru_RU.CP1251', 'Russian_Russia.1251')); $words = preg_replace('#\\[.*\\]#isU', '', $text); $words = preg_split('#\\s|[,.:;!?"\'()]#', $words, -1, PREG_SPLIT_NO_EMPTY); $bulk_words = array(); foreach ($words as $v) { if (strlen($v) > 3) { $bulk_words[] = mb_strtoupper($v, "UTF-8");
static function create(phpMorphy $morphy) { $locale = $GLOBALS['__phpmorphy_strtolower']($morphy->getLocale()); if (!isset(self::$included[$locale])) { $file_name = PHPMORPHY_DIR . "/langs_stuff/{$locale}.php"; $class = "phpMorphy_GrammemsProvider_{$locale}"; if (is_readable($file_name)) { require $file_name; if (!class_exists($class)) { throw new phpMorphy_Exception("Class '{$class}' not found in '{$file_name}' file"); } self::$included[$locale] = call_user_func(array($class, 'instance'), $morphy); } else { self::$included[$locale] = new phpMorphy_GrammemsProvider_Empty($morphy); } } return self::$included[$locale]; }
protected function getMorphy($txt = null) { require_once $_SERVER['DOCUMENT_ROOT'] . '/plugins/phpmorphy/src/common.php'; $opts = array('storage' => PHPMORPHY_STORAGE_FILE, 'with_gramtab' => false, 'predict_by_suffix' => true, 'predict_by_db' => true); // Path to directory where dictionaries located $dir = $_SERVER['DOCUMENT_ROOT'] . '/plugins/phpmorphy/dicts/'; // Create descriptor for dictionary located in $dir directory with russian language $dict_bundle = new phpMorphy_FilesBundle($dir, 'rus'); // Create phpMorphy instance try { $morphy = new phpMorphy($dict_bundle, $opts); } catch (phpMorphy_Exception $e) { die('Error occured while creating phpMorphy instance: ' . $e->getMessage()); } $search = $_GET['q']; $search = mysql_real_escape_string($search); $search = mb_substr($search, 0, 30, 'UTF-8'); $search = explode(" ", $search); $q = ''; for ($i = 0; $i < count($search); $i++) { $ass = $search[$i]; $ass = mb_strtoupper($ass, 'utf-8'); $pseudo_root = $morphy->getPseudoRoot($ass); if (false === $pseudo_root) { $rep = $ass; } else { $rep = $pseudo_root[0]; } if ($txt == "text") { $q = $q . "A.text LIKE '%" . functions::q($rep) . "%' and "; } else { $q = $q . "A.title LIKE '%" . functions::q($rep) . "%' and "; } } $searchwords = substr($q, 0, strlen($q) - 4); return $searchwords; }
function Words2BaseForm($text) { require_once dirname(__FILE__) . '/../sys/phpmorphy/src/common.php'; // set some options $opts = array('storage' => PHPMORPHY_STORAGE_MEM, 'with_gramtab' => false, 'predict_by_suffix' => true, 'predict_by_db' => true); $dir = dirname(__FILE__) . '/../sys/phpmorphy/dicts'; // Create descriptor for dictionary located in $dir directory with russian language $dict_bundle = new phpMorphy_FilesBundle($dir, 'rus'); // Create phpMorphy instance $morphy = new phpMorphy($dict_bundle, $opts); $words = preg_replace('#\\[.*\\]#isU', '', $text); $words = preg_split('#\\s|[,.:;!?"\'()]#', $words, -1, PREG_SPLIT_NO_EMPTY); $bulk_words = array(); foreach ($words as $v) { if (strlen($v) > 2) { $bulk_words[] = strtoupper($v); } } $base_form = $morphy->getBaseForm($bulk_words); $fullList = array(); if (is_array($base_form) && count($base_form)) { foreach ($base_form as $k => $v) { if (is_array($v)) { foreach ($v as $v1) { if (strlen($v1) > 2) { $fullList[$v1] = 1; } } } } } $words = join(' ', array_keys($fullList)); return $words; }
function bench_morphy_dict($words, $encoding, $dictDir, $lang, $storage) { $opts = array('storage' => $storage, 'predict_by_suffix' => false, 'predict_by_db' => false); $bundle = new phpMorphy_FilesBundle($dictDir, $lang); $morphy = new phpMorphy($bundle, $opts); echo "Bench phpMorphy - Dict[{$encoding}][{$storage}]: "; convert_words($words, $encoding, MB_CASE_UPPER); $fsa = $morphy->getCommonMorphier()->getFinder()->getFsa(); $root = $fsa->getRootTrans(); $predicted = 0; $b = microtime(true); foreach ($words as $word) { $result = $fsa->walk($root, $word, true); } $e = microtime(true); printf("time = %0.2f sec, words per second = %0.2f, predicted = %d\n", $e - $b, count($words) / ($e - $b), $predicted); }
<?php session_start(); error_reporting(E_ALL | E_STRICT); //ini_set('default_charset','UTF-8'); $path = $_SESSION['path']; require 'stemmer_utf.php'; require 'work_csv.php'; require 'readfile.php'; require_once dirname(__FILE__) . '/phpmorphy/src/common.php'; $opts = array('storage' => PHPMORPHY_STORAGE_FILE, 'with_gramtab' => false, 'predict_by_suffix' => true, 'predict_by_db' => true); $dir = dirname(__FILE__) . '/phpmorphy/dicts'; // Create descriptor for dictionary located in $dir directory with russian language $dict_bundle = new phpMorphy_FilesBundle($dir, 'rus'); try { $morphy = new phpMorphy($dict_bundle, $opts); } catch (phpMorphy_Exception $e) { die('Error occured while creating phpMorphy instance: ' . $e->getMessage()); } //$csv = new CSV($path); // $csv_lines = $csv->getCSV($path); $handle = fopen($path, "r"); //$row = 1; while (($data = fgetcsv($handle, 1000, ",")) !== FALSE) { $insertValues = array(); foreach ($data as $v) { $insertValues[] = addslashes(trim($v)); $values = implode(',', $insertValues); $word = $morphy->lemmatize($values); //print_r($morphy->getBaseForm($value)); //$z=$morphy->getBaseForm($values); }
protected function getLemmas(array $words) { require_once MODX_CORE_PATH . 'components/modsearch/external/phpmorphy/src/common.php'; // set some options $opts = array('storage' => PHPMORPHY_STORAGE_FILE, 'predict_by_suffix' => true, 'predict_by_db' => true, 'graminfo_as_text' => true); // Path to directory where dictionaries located $dir = MODX_CORE_PATH . 'components/modsearch/external/phpmorphy/dicts'; $lang = 'ru_RU'; // Create phpMorphy instance # try { # $morphy = new phpMorphy($dir, $lang, $opts); # // print $morphy->getEncoding(); # // exit; # } catch(phpMorphy_Exception $e) { # die('Error occured while creating phpMorphy instance: ' . PHP_EOL . $e); # } if (!($morphy = new phpMorphy($dir, $lang, $opts))) { $this->modx->log(xPDO::LOG_LEVEL_ERROR, "[" . __CLASS__ . "] Не был получен объект phpMorphy"); return false; } $lemmas = array(); # print_r($words); # # exit; // print $morphy->getEncoding(); // if(function_exists('iconv')) { // foreach($words as &$word) { // $word = iconv('windows-1251', $morphy->getEncoding(), $word); // } // unset($word); // } # try { foreach ($words as $word) { if (!$word) { continue; } # $word = mb_strtoupper($word, 'UTF-8'); // by default, phpMorphy finds $word in dictionary and when nothig found, try to predict them // you can change this behaviour, via second argument to getXXX or findWord methods $base = $morphy->getBaseForm($word); $all = $morphy->getAllForms($word); $part_of_speech = $morphy->getPartOfSpeech($word); // echo $morphy->getLocale(); // var_dump($morphy->getShmCache()->getFilesList()); // print_r($base); // exit; // $base = $morphy->getBaseForm($word, phpMorphy::NORMAL); // normal behaviour // $base = $morphy->getBaseForm($word, phpMorphy::IGNORE_PREDICT); // don`t use prediction // $base = $morphy->getBaseForm($word, phpMorphy::ONLY_PREDICT); // always predict word $is_predicted = $morphy->isLastPredicted(); // or $morphy->getLastPredictionType() == phpMorphy::PREDICT_BY_NONE $is_predicted_by_db = $morphy->getLastPredictionType() == phpMorphy::PREDICT_BY_DB; $is_predicted_by_suffix = $morphy->getLastPredictionType() == phpMorphy::PREDICT_BY_SUFFIX; // this used for deep analysis $collection = $morphy->findWord($word); // or var_dump($morphy->getAllFormsWithGramInfo($word)); for debug if (false === $collection) { # echo $word, " NOT FOUND\n"; // Если слово не найдено, добавляем его в массив как есть $lemmas[] = $word; continue; } else { # print "\n<br />Найдено слово: ". $word; } foreach ($base as $lemma) { $lemmas[] = $lemma; } # echo $is_predicted ? '-' : '+', $word, "\n"; # echo 'lemmas: ', implode(', ', $base), "\n"; # echo 'all: ', implode(', ', $all), "\n"; # echo 'poses: ', implode(', ', $part_of_speech), "\n"; # # echo "\n"; // $collection collection of paradigm for given word // TODO: $collection->getByPartOfSpeech(...); # foreach($collection as $paradigm) { // TODO: $paradigm->getBaseForm(); // TODO: $paradigm->getAllForms(); // TODO: $paradigm->hasGrammems(array('', '')); // TODO: $paradigm->getWordFormsByGrammems(array('', '')); // TODO: $paradigm->hasPartOfSpeech(''); // TODO: $paradigm->getWordFormsByPartOfSpeech(''); # echo "lemma: ", $paradigm[0]->getWord(), "\n"; # foreach($paradigm->getFoundWordForm() as $found_word_form) { # echo # $found_word_form->getWord(), ' ', # $found_word_form->getPartOfSpeech(), ' ', # '(', implode(', ', $found_word_form->getGrammems()), ')', # "\n"; # } # echo "\n"; # # foreach($paradigm as $word_form) { # // TODO: $word_form->getWord(); # // TODO: $word_form->getFormNo(); # // TODO: $word_form->getGrammems(); # // TODO: $word_form->getPartOfSpeech(); # // TODO: $word_form->hasGrammems(array('', '')); # } # } # echo "--\n"; } # } catch(phpMorphy_Exception $e) { # die('Error occured while text processing: ' . $e->getMessage()); # } $lemmas = $this->sanitizeWordsArray($lemmas); return array_unique($lemmas); }
<?php error_reporting(E_ALL | E_STRICT); // first we include phpmorphy library require_once dirname(__FILE__) . '/../src/common.php'; // set some options $opts = array('storage' => PHPMORPHY_STORAGE_FILE, 'predict_by_suffix' => true, 'predict_by_db' => true, 'graminfo_as_text' => true); // Path to directory where dictionaries located $dir = dirname(__FILE__) . '/../dicts'; $lang = 'ru_RU'; // Create phpMorphy instance try { $morphy = new phpMorphy($dir, $lang, $opts); } catch (phpMorphy_Exception $e) { die('Error occured while creating phpMorphy instance: ' . PHP_EOL . $e); } // All words in dictionary in UPPER CASE, so don`t forget set proper locale via setlocale(...) call // $morphy->getEncoding() returns dictionary encoding $words = array('КРАКОЗЯБЛИКИ', 'СТАЛИ', 'ВИНА', 'И', 'ДУХИ', 'abc'); /* if(function_exists('iconv')) { foreach($words as &$word) { $word = iconv('windows-1251', $morphy->getEncoding(), $word); } unset($word); } */ try { foreach ($words as $word) { // by default, phpMorphy finds $word in dictionary and when nothig found, try to predict them // you can change this behaviour, via second argument to getXXX or findWord methods
#!/usr/bin/env php <?php set_include_path(__DIR__ . '/../src/' . PATH_SEPARATOR . get_include_path()); require 'phpMorphy.php'; define('WORD_NOT_FOUND', 1); if ($argc < 2) { die("Usage {$argv['0']} WORD [LANG] [ENCODING]" . PHP_EOL); } $word = $argv[1]; $lang = $argc > 2 ? $argv[2] : 'ru_RU'; $dir = __DIR__ . '/../dicts/'; $dir .= $argc > 3 ? "/{$argv[3]}" : 'utf-8'; $opts = array('storage' => PHPMORPHY_STORAGE_FILE, 'predict_by_suffix' => true, 'predict_by_db' => true); $morphy = new phpMorphy($dir, $lang, $opts); $encoding = $morphy->getEncoding(); $formatter = new phpMorphy_Paradigm_Formatter(); $word = iconv('utf-8', $encoding, $word); $word = mb_strtoupper($word, $encoding); $result = $morphy->findWord($word); $predict_text = 'DICT'; if ($morphy->getLastPredictionType() == phpMorphy::PREDICT_BY_DB) { $predict_text = 'PREDICT_BY_DB'; } else { if ($morphy->getLastPredictionType() == phpMorphy::PREDICT_BY_SUFFIX) { $predict_text = 'PREDICT_BY_SUFFIX'; } } echo "Paradigms for {$word}({$predict_text}):" . PHP_EOL; if (false === $result) { echo 'NOT FOUND' . PHP_EOL; exit(WORD_NOT_FOUND);
public function getSearchString($string) { $query_string = ''; $s_words = array(); $materials = new Model_Material('groups'); //подключаем библиотеки require_once DOCROOT . 'application/classes/PHPMorphy/src/common.php'; $dir = DOCROOT . 'application/classes/PHPMorphy/dicts/'; $lang = 'ru_RU'; $opts = array('storage' => PHPMORPHY_STORAGE_FILE); try { $morphy = new phpMorphy($dir, $lang, $opts); } catch (phpMorphy_Exception $e) { die('Error occured while creating phpMorphy instance: ' . $e->getMessage()); } $request = null; $errors = null; if (!empty($string)) { // Очищаем от html-тегов и прочего $search = trim(addslashes(strip_tags($string))); $request = $search; } if (!empty($search)) { // Обрабатываем данные как и в прошлом контроллере if (mb_strlen($search, "UTF-8") > 1) { preg_match_all('/([a-zа-яё0-9]+)/ui', mb_strtoupper($search, "UTF-8"), $search_words); $words = $morphy->getPseudoRoot($search_words[1]); foreach ($words as $k => $w) { if (!$w) { $w[0] = $k; } if (mb_strlen($w[0], "UTF-8") >= 2) { $s_words[] = $w[0]; } // else // { // // $s_words[] = $w[0]; // } } } return $s_words; } }
<?php error_reporting(E_ALL | E_STRICT); // first we include phpmorphy library require_once __DIR__ . '/../vendor/autoload.php'; // set some options $opts = array('storage' => phpMorphy::STORAGE_FILE, 'with_gramtab' => false, 'predict_by_suffix' => true, 'predict_by_db' => true); // Create phpMorphy instance try { $morphy = new phpMorphy(null, 'ru_RU', $opts); //$morphy = new phpMorphy(new phpMorphy_FilesBundle(phpMorphy::getDefaultDictsDir(), 'ru_RU'), $opts); } catch (phpMorphy_Exception $e) { die('Error occured while creating phpMorphy instance: ' . $e->getMessage()); } // All words in dictionary in UPPER CASE, so don`t forget set proper locale // Supported dicts and locales: // *------------------------------* // | Dict. language | Locale name | // |------------------------------| // | Russian | cp1251 | // |------------------------------| // | English | cp1250 | // |------------------------------| // | German | cp1252 | // *------------------------------* // $codepage = $morphy->getCodepage(); // setlocale(LC_CTYPE, array('ru_RU.CP1251', 'Russian_Russia.1251')); // Hint: in this example words $word_one, $word_two are in russian language(cp1251 encoding) $word_one = 'КОТ'; $word_two = 'СОБАКА'; echo "Testing single mode...\n";
<?php // Подключите файл common.php. phpmorphy-0.3.2 - для версии 0.3.2, // если используется иная версия исправьте код. require_once 'c:/server/bin/PHP/lib/phpmorphy-0.3.7/src/common.php'; //global $morphy; // Укажите путь к каталогу со словарями $dir = 'c:/server/bin/PHP/lib/phpmorphy-0.3.7/dicts'; // Укажите, для какого языка будем использовать словарь. // Язык указывается как ISO3166 код страны и ISO639 код языка, // разделенные символом подчеркивания (ru_RU, uk_UA, en_EN, de_DE и т.п.) $lang = 'ru_RU'; // Укажите опции // Список поддерживаемых опций см. ниже $opts = array('storage' => PHPMORPHY_STORAGE_MEM, 'predict_by_suffix' => TRUE); // создаем экземпляр класса phpMorphy // обратите внимание: все функции phpMorphy являются throwable т.е. // могут возбуждать исключения типа phpMorphy_Exception (конструктор тоже) try { $morphy = new phpMorphy($dir, $lang, $opts); print_r($morphy->lemmatize('яблоко', phpMorphy::NORMAL)); //exit(0); } catch (phpMorphy_Exception $e) { die('Error occured while creating phpMorphy instance: ' . $e->getMessage()); } // далее под $morphy мы подразумеваем экземпляр класса phpMorphy
#!/usr/bin/env php <?php require_once __DIR__ . '/init.php'; $user_dict_xml = PHPMORPHY_DIR . '/../bin/user_dict.xml'; $outXmlFilePath = PHPMORPHY_DIR . '/../bin/out.xml'; $morphy_dict_dir = phpMorphy::getDefaultDictsDir(); $morphy_dict_lang = 'ru_RU'; $verbose = true; /** * * @param string $dir * @param string $lang * @return phpMorphy_MorphyInterface */ function createMorphy($dir, $lang) { $opts = array('storage' => phpMorphy::STORAGE_MEM, 'predict_by_suffix' => true, 'predict_by_db' => true); return new phpMorphy($dir, $lang, $opts); } $morphy = createMorphy($morphy_dict_dir, $morphy_dict_lang); $encoding_converter = new phpMorphy_UserDict_EncodingConverter($morphy->getEncoding(), $morphy->isInUpperCase() ? MB_CASE_UPPER : MB_CASE_LOWER, 'utf-8', MB_CASE_UPPER); //$errors_handler = new phpMorphy_UserDict_Log_ErrorsHandlerException(); $errors_handler = new phpMorphy_UserDict_Log_ErrorsHandlerPass(); $observer = new phpMorphy_UserDict_Log_CLI($verbose, $errors_handler, $encoding_converter); $b = microtime(true); if (1) { phpMorphy_UserDict_XmlDiff_Generator::convertFromXmlToXml($user_dict_xml, $outXmlFilePath, $morphy, $observer, $encoding_converter); } $e = microtime(true); printf("Time taken: %0.2f\n", $e - $b);