unset($word); } */ try { foreach ($words as $word) { // by default, phpMorphy finds $word in dictionary and when nothig found, try to predict them // you can change this behaviour, via second argument to getXXX or findWord methods $base = $morphy->getBaseForm($word); $all = $morphy->getAllForms($word); $part_of_speech = $morphy->getPartOfSpeech($word); // $base = $morphy->getBaseForm($word, phpMorphy::NORMAL); // normal behaviour // $base = $morphy->getBaseForm($word, phpMorphy::IGNORE_PREDICT); // don`t use prediction // $base = $morphy->getBaseForm($word, phpMorphy::ONLY_PREDICT); // always predict word $is_predicted = $morphy->isLastPredicted(); // or $morphy->getLastPredictionType() == phpMorphy::PREDICT_BY_NONE $is_predicted_by_db = $morphy->getLastPredictionType() == phpMorphy::PREDICT_BY_DB; $is_predicted_by_suffix = $morphy->getLastPredictionType() == phpMorphy::PREDICT_BY_SUFFIX; // this used for deep analysis $collection = $morphy->findWord($word); // or var_dump($morphy->getAllFormsWithGramInfo($word)); for debug if (false === $collection) { echo $word, " NOT FOUND\n"; continue; } else { } echo $is_predicted ? '-' : '+', $word, "\n"; echo 'lemmas: ', implode(', ', $base), "\n"; echo 'all: ', implode(', ', $all), "\n"; echo 'poses: ', implode(', ', $part_of_speech), "\n"; echo "\n"; // $collection collection of paradigm for given word
protected function getLemmas(array $words) { require_once MODX_CORE_PATH . 'components/modsearch/external/phpmorphy/src/common.php'; // set some options $opts = array('storage' => PHPMORPHY_STORAGE_FILE, 'predict_by_suffix' => true, 'predict_by_db' => true, 'graminfo_as_text' => true); // Path to directory where dictionaries located $dir = MODX_CORE_PATH . 'components/modsearch/external/phpmorphy/dicts'; $lang = 'ru_RU'; // Create phpMorphy instance # try { # $morphy = new phpMorphy($dir, $lang, $opts); # // print $morphy->getEncoding(); # // exit; # } catch(phpMorphy_Exception $e) { # die('Error occured while creating phpMorphy instance: ' . PHP_EOL . $e); # } if (!($morphy = new phpMorphy($dir, $lang, $opts))) { $this->modx->log(xPDO::LOG_LEVEL_ERROR, "[" . __CLASS__ . "] Не был получен объект phpMorphy"); return false; } $lemmas = array(); # print_r($words); # # exit; // print $morphy->getEncoding(); // if(function_exists('iconv')) { // foreach($words as &$word) { // $word = iconv('windows-1251', $morphy->getEncoding(), $word); // } // unset($word); // } # try { foreach ($words as $word) { if (!$word) { continue; } # $word = mb_strtoupper($word, 'UTF-8'); // by default, phpMorphy finds $word in dictionary and when nothig found, try to predict them // you can change this behaviour, via second argument to getXXX or findWord methods $base = $morphy->getBaseForm($word); $all = $morphy->getAllForms($word); $part_of_speech = $morphy->getPartOfSpeech($word); // echo $morphy->getLocale(); // var_dump($morphy->getShmCache()->getFilesList()); // print_r($base); // exit; // $base = $morphy->getBaseForm($word, phpMorphy::NORMAL); // normal behaviour // $base = $morphy->getBaseForm($word, phpMorphy::IGNORE_PREDICT); // don`t use prediction // $base = $morphy->getBaseForm($word, phpMorphy::ONLY_PREDICT); // always predict word $is_predicted = $morphy->isLastPredicted(); // or $morphy->getLastPredictionType() == phpMorphy::PREDICT_BY_NONE $is_predicted_by_db = $morphy->getLastPredictionType() == phpMorphy::PREDICT_BY_DB; $is_predicted_by_suffix = $morphy->getLastPredictionType() == phpMorphy::PREDICT_BY_SUFFIX; // this used for deep analysis $collection = $morphy->findWord($word); // or var_dump($morphy->getAllFormsWithGramInfo($word)); for debug if (false === $collection) { # echo $word, " NOT FOUND\n"; // Если слово не найдено, добавляем его в массив как есть $lemmas[] = $word; continue; } else { # print "\n<br />Найдено слово: ". $word; } foreach ($base as $lemma) { $lemmas[] = $lemma; } # echo $is_predicted ? '-' : '+', $word, "\n"; # echo 'lemmas: ', implode(', ', $base), "\n"; # echo 'all: ', implode(', ', $all), "\n"; # echo 'poses: ', implode(', ', $part_of_speech), "\n"; # # echo "\n"; // $collection collection of paradigm for given word // TODO: $collection->getByPartOfSpeech(...); # foreach($collection as $paradigm) { // TODO: $paradigm->getBaseForm(); // TODO: $paradigm->getAllForms(); // TODO: $paradigm->hasGrammems(array('', '')); // TODO: $paradigm->getWordFormsByGrammems(array('', '')); // TODO: $paradigm->hasPartOfSpeech(''); // TODO: $paradigm->getWordFormsByPartOfSpeech(''); # echo "lemma: ", $paradigm[0]->getWord(), "\n"; # foreach($paradigm->getFoundWordForm() as $found_word_form) { # echo # $found_word_form->getWord(), ' ', # $found_word_form->getPartOfSpeech(), ' ', # '(', implode(', ', $found_word_form->getGrammems()), ')', # "\n"; # } # echo "\n"; # # foreach($paradigm as $word_form) { # // TODO: $word_form->getWord(); # // TODO: $word_form->getFormNo(); # // TODO: $word_form->getGrammems(); # // TODO: $word_form->getPartOfSpeech(); # // TODO: $word_form->hasGrammems(array('', '')); # } # } # echo "--\n"; } # } catch(phpMorphy_Exception $e) { # die('Error occured while text processing: ' . $e->getMessage()); # } $lemmas = $this->sanitizeWordsArray($lemmas); return array_unique($lemmas); }
if ($argc < 2) { die("Usage {$argv['0']} WORD [LANG] [ENCODING]" . PHP_EOL); } $word = $argv[1]; $lang = $argc > 2 ? $argv[2] : 'ru_RU'; $dir = __DIR__ . '/../dicts/'; $dir .= $argc > 3 ? "/{$argv[3]}" : 'utf-8'; $opts = array('storage' => PHPMORPHY_STORAGE_FILE, 'predict_by_suffix' => true, 'predict_by_db' => true); $morphy = new phpMorphy($dir, $lang, $opts); $encoding = $morphy->getEncoding(); $formatter = new phpMorphy_Paradigm_Formatter(); $word = iconv('utf-8', $encoding, $word); $word = mb_strtoupper($word, $encoding); $result = $morphy->findWord($word); $predict_text = 'DICT'; if ($morphy->getLastPredictionType() == phpMorphy::PREDICT_BY_DB) { $predict_text = 'PREDICT_BY_DB'; } else { if ($morphy->getLastPredictionType() == phpMorphy::PREDICT_BY_SUFFIX) { $predict_text = 'PREDICT_BY_SUFFIX'; } } echo "Paradigms for {$word}({$predict_text}):" . PHP_EOL; if (false === $result) { echo 'NOT FOUND' . PHP_EOL; exit(WORD_NOT_FOUND); } $para_no = 1; foreach ($result as $paradigm) { printf(" Paradigm %2d.\n%s", $para_no++, $formatter->format($paradigm, ' ')); }