static function instance(phpMorphy $morphy) { $key = $morphy->getEncoding(); if (!isset(self::$instances[$key])) { $class = __CLASS__; self::$instances[$key] = new $class($key); } return self::$instances[$key]; }
$opts = array('storage' => PHPMORPHY_STORAGE_FILE, 'predict_by_suffix' => true, 'predict_by_db' => true, 'graminfo_as_text' => true); // Path to directory where dictionaries located $dir = __DIR__ . '/../dicts/utf-8'; $lang = 'ru_RU'; // Create phpMorphy instance try { $morphy = new phpMorphy($dir, $lang, $opts); } catch (phpMorphy_Exception $e) { die('Error occured while creating phpMorphy instance: ' . PHP_EOL . $e); } // All words in dictionary in UPPER CASE, so don`t forget set proper locale via setlocale(...) call // $morphy->getEncoding() returns dictionary encoding $words = array('МАМА', 'МЫЛА', 'РАМУ', 'abc'); if (function_exists('iconv')) { foreach ($words as &$word) { $word = iconv('utf-8', $morphy->getEncoding(), $word); } unset($word); } try { foreach ($words as $word) { // by default, phpMorphy finds $word in dictionary and when nothig found, try to predict them // you can change this behaviour, via second argument to getXXX or findWord methods $base = $morphy->getBaseForm($word); $all = $morphy->getAllForms($word); $part_of_speech = $morphy->getPartOfSpeech($word); // $base = $morphy->getBaseForm($word, phpMorphy::NORMAL); // normal behaviour // $base = $morphy->getBaseForm($word, phpMorphy::IGNORE_PREDICT); // don`t use prediction // $base = $morphy->getBaseForm($word, phpMorphy::ONLY_PREDICT); // always predict word $is_predicted = $morphy->isLastPredicted(); // or $morphy->getLastPredictionType() == phpMorphy::PREDICT_BY_NONE
#!/usr/bin/env php <?php set_include_path(__DIR__ . '/../src/' . PATH_SEPARATOR . get_include_path()); require 'phpMorphy.php'; define('WORD_NOT_FOUND', 1); if ($argc < 2) { die("Usage {$argv['0']} WORD [LANG] [ENCODING]" . PHP_EOL); } $word = $argv[1]; $lang = $argc > 2 ? $argv[2] : 'ru_RU'; $dir = __DIR__ . '/../dicts/'; $dir .= $argc > 3 ? "/{$argv[3]}" : 'utf-8'; $opts = array('storage' => PHPMORPHY_STORAGE_FILE, 'predict_by_suffix' => true, 'predict_by_db' => true); $morphy = new phpMorphy($dir, $lang, $opts); $encoding = $morphy->getEncoding(); $formatter = new phpMorphy_Paradigm_Formatter(); $word = iconv('utf-8', $encoding, $word); $word = mb_strtoupper($word, $encoding); $result = $morphy->findWord($word); $predict_text = 'DICT'; if ($morphy->getLastPredictionType() == phpMorphy::PREDICT_BY_DB) { $predict_text = 'PREDICT_BY_DB'; } else { if ($morphy->getLastPredictionType() == phpMorphy::PREDICT_BY_SUFFIX) { $predict_text = 'PREDICT_BY_SUFFIX'; } } echo "Paradigms for {$word}({$predict_text}):" . PHP_EOL; if (false === $result) { echo 'NOT FOUND' . PHP_EOL; exit(WORD_NOT_FOUND);
$opts = array('storage' => PHPMORPHY_STORAGE_FILE, 'predict_by_suffix' => true, 'predict_by_db' => true, 'graminfo_as_text' => true); // Path to directory where dictionaries located $dir = dirname(__FILE__) . '/../dicts'; $lang = 'ru_RU'; // Create phpMorphy instance try { $morphy = new phpMorphy($dir, $lang, $opts); } catch (phpMorphy_Exception $e) { die('Error occured while creating phpMorphy instance: ' . PHP_EOL . $e); } // All words in dictionary in UPPER CASE, so don`t forget set proper locale via setlocale(...) call // $morphy->getEncoding() returns dictionary encoding $words = array('ÊÐÀÊÎÇßÁËÈÊÈ', 'ÑÒÀËÈ', 'ÂÈÍÀ', 'È', 'ÄÓÕÈ', 'abc'); if (function_exists('iconv')) { foreach ($words as &$word) { $word = iconv('windows-1251', $morphy->getEncoding(), $word); } unset($word); } try { foreach ($words as $word) { // by default, phpMorphy finds $word in dictionary and when nothig found, try to predict them // you can change this behaviour, via second argument to getXXX or findWord methods $base = $morphy->getBaseForm($word); $all = $morphy->getAllForms($word); $part_of_speech = $morphy->getPartOfSpeech($word); // $base = $morphy->getBaseForm($word, phpMorphy::NORMAL); // normal behaviour // $base = $morphy->getBaseForm($word, phpMorphy::IGNORE_PREDICT); // don`t use prediction // $base = $morphy->getBaseForm($word, phpMorphy::ONLY_PREDICT); // always predict word $is_predicted = $morphy->isLastPredicted(); // or $morphy->getLastPredictionType() == phpMorphy::PREDICT_BY_NONE
function bench_morphy($words, $encoding, $dictDir, $lang, $storage, $useBulk, $usePrediction = true) { $opts = array('storage' => $storage, 'predict_by_suffix' => $usePrediction, 'predict_by_db' => false); $bundle = new phpMorphy_FilesBundle($dictDir, $lang); $morphy = new phpMorphy($bundle, $opts); $unicode = phpMorphy_UnicodeHelper_UnicodeHelperAbstract::getHelperForEncoding($morphy->getEncoding()); echo "Bench phpMorphy[{$encoding}][{$storage}][" . ($useBulk ? 'BULK' : 'SINGLE') . "] : "; convert_words($words, $encoding, MB_CASE_UPPER); $predicted = 0; $b = microtime(true); if ($useBulk) { $morphy->getBaseForm($words); } else { foreach ($words as $word) { //$unicode->strrev($word); mb_strtoupper($word, 'utf-8'); //strtr($word, $replace); //strrev($word); //mb_strtolower($word, 'utf-8'); $lemma = $morphy->getBaseForm($word); if ($morphy->isLastPredicted()) { $predicted++; } } } $e = microtime(true); printf("time = %0.2f sec, words per second = %0.2f, predicted = %d\n", $e - $b, count($words) / ($e - $b), $predicted); }