Esempio n. 1
0
try {
    foreach ($words as $word) {
        // by default, phpMorphy finds $word in dictionary and when nothig found, try to predict them
        // you can change this behaviour, via second argument to getXXX or findWord methods
        $base = $morphy->getBaseForm($word);
        $all = $morphy->getAllForms($word);
        $part_of_speech = $morphy->getPartOfSpeech($word);
        // $base = $morphy->getBaseForm($word, phpMorphy::NORMAL); // normal behaviour
        // $base = $morphy->getBaseForm($word, phpMorphy::IGNORE_PREDICT); // don`t use prediction
        // $base = $morphy->getBaseForm($word, phpMorphy::ONLY_PREDICT); // always predict word
        $is_predicted = $morphy->isLastPredicted();
        // or $morphy->getLastPredictionType() == phpMorphy::PREDICT_BY_NONE
        $is_predicted_by_db = $morphy->getLastPredictionType() == phpMorphy::PREDICT_BY_DB;
        $is_predicted_by_suffix = $morphy->getLastPredictionType() == phpMorphy::PREDICT_BY_SUFFIX;
        // this used for deep analysis
        $collection = $morphy->findWord($word);
        // or var_dump($morphy->getAllFormsWithGramInfo($word)); for debug
        if (false === $collection) {
            echo $word, " NOT FOUND\n";
            continue;
        } else {
        }
        echo $is_predicted ? '-' : '+', $word, "\n";
        echo 'lemmas: ', implode(', ', $base), "\n";
        echo 'all: ', implode(', ', $all), "\n";
        echo 'poses: ', implode(', ', $part_of_speech), "\n";
        echo "\n";
        // $collection collection of paradigm for given word
        // TODO: $collection->getByPartOfSpeech(...);
        foreach ($collection as $paradigm) {
            // TODO: $paradigm->getBaseForm();
 protected function getLemmas(array $words)
 {
     require_once MODX_CORE_PATH . 'components/modsearch/external/phpmorphy/src/common.php';
     // set some options
     $opts = array('storage' => PHPMORPHY_STORAGE_FILE, 'predict_by_suffix' => true, 'predict_by_db' => true, 'graminfo_as_text' => true);
     // Path to directory where dictionaries located
     $dir = MODX_CORE_PATH . 'components/modsearch/external/phpmorphy/dicts';
     $lang = 'ru_RU';
     // Create phpMorphy instance
     # try {
     #     $morphy = new phpMorphy($dir, $lang, $opts);
     #     // print $morphy->getEncoding();
     #     // exit;
     # } catch(phpMorphy_Exception $e) {
     #     die('Error occured while creating phpMorphy instance: ' . PHP_EOL . $e);
     # }
     if (!($morphy = new phpMorphy($dir, $lang, $opts))) {
         $this->modx->log(xPDO::LOG_LEVEL_ERROR, "[" . __CLASS__ . "] Не был получен объект phpMorphy");
         return false;
     }
     $lemmas = array();
     # print_r($words);
     #
     # exit;
     // print $morphy->getEncoding();
     // if(function_exists('iconv')) {
     //     foreach($words as &$word) {
     //         $word = iconv('windows-1251', $morphy->getEncoding(), $word);
     //     }
     //     unset($word);
     // }
     # try {
     foreach ($words as $word) {
         if (!$word) {
             continue;
         }
         # $word = mb_strtoupper($word, 'UTF-8');
         // by default, phpMorphy finds $word in dictionary and when nothig found, try to predict them
         // you can change this behaviour, via second argument to getXXX or findWord methods
         $base = $morphy->getBaseForm($word);
         $all = $morphy->getAllForms($word);
         $part_of_speech = $morphy->getPartOfSpeech($word);
         // echo $morphy->getLocale();
         // var_dump($morphy->getShmCache()->getFilesList());
         // print_r($base);
         // exit;
         // $base = $morphy->getBaseForm($word, phpMorphy::NORMAL); // normal behaviour
         // $base = $morphy->getBaseForm($word, phpMorphy::IGNORE_PREDICT); // don`t use prediction
         // $base = $morphy->getBaseForm($word, phpMorphy::ONLY_PREDICT); // always predict word
         $is_predicted = $morphy->isLastPredicted();
         // or $morphy->getLastPredictionType() == phpMorphy::PREDICT_BY_NONE
         $is_predicted_by_db = $morphy->getLastPredictionType() == phpMorphy::PREDICT_BY_DB;
         $is_predicted_by_suffix = $morphy->getLastPredictionType() == phpMorphy::PREDICT_BY_SUFFIX;
         // this used for deep analysis
         $collection = $morphy->findWord($word);
         // or var_dump($morphy->getAllFormsWithGramInfo($word)); for debug
         if (false === $collection) {
             # echo $word, " NOT FOUND\n";
             // Если слово не найдено, добавляем его в массив как есть
             $lemmas[] = $word;
             continue;
         } else {
             # print "\n<br />Найдено слово: ". $word;
         }
         foreach ($base as $lemma) {
             $lemmas[] = $lemma;
         }
         #         echo $is_predicted ? '-' : '+', $word, "\n";
         #         echo 'lemmas: ', implode(', ', $base), "\n";
         #         echo 'all: ', implode(', ', $all), "\n";
         #         echo 'poses: ', implode(', ', $part_of_speech), "\n";
         #
         #         echo "\n";
         // $collection collection of paradigm for given word
         // TODO: $collection->getByPartOfSpeech(...);
         # foreach($collection as $paradigm) {
         // TODO: $paradigm->getBaseForm();
         // TODO: $paradigm->getAllForms();
         // TODO: $paradigm->hasGrammems(array('', ''));
         // TODO: $paradigm->getWordFormsByGrammems(array('', ''));
         // TODO: $paradigm->hasPartOfSpeech('');
         // TODO: $paradigm->getWordFormsByPartOfSpeech('');
         # echo "lemma: ", $paradigm[0]->getWord(), "\n";
         #             foreach($paradigm->getFoundWordForm() as $found_word_form) {
         #                 echo
         #                     $found_word_form->getWord(), ' ',
         #                     $found_word_form->getPartOfSpeech(), ' ',
         #                     '(', implode(', ', $found_word_form->getGrammems()), ')',
         #                     "\n";
         #             }
         #             echo "\n";
         #
         #             foreach($paradigm as $word_form) {
         #                 // TODO: $word_form->getWord();
         #                 // TODO: $word_form->getFormNo();
         #                 // TODO: $word_form->getGrammems();
         #                 // TODO: $word_form->getPartOfSpeech();
         #                 // TODO: $word_form->hasGrammems(array('', ''));
         #             }
         # }
         # echo "--\n";
     }
     # } catch(phpMorphy_Exception $e) {
     #     die('Error occured while text processing: ' . $e->getMessage());
     # }
     $lemmas = $this->sanitizeWordsArray($lemmas);
     return array_unique($lemmas);
 }
require 'phpMorphy.php';
define('WORD_NOT_FOUND', 1);
if ($argc < 2) {
    die("Usage {$argv['0']} WORD [LANG] [ENCODING]" . PHP_EOL);
}
$word = $argv[1];
$lang = $argc > 2 ? $argv[2] : 'ru_RU';
$dir = __DIR__ . '/../dicts/';
$dir .= $argc > 3 ? "/{$argv[3]}" : 'utf-8';
$opts = array('storage' => PHPMORPHY_STORAGE_FILE, 'predict_by_suffix' => true, 'predict_by_db' => true);
$morphy = new phpMorphy($dir, $lang, $opts);
$encoding = $morphy->getEncoding();
$formatter = new phpMorphy_Paradigm_Formatter();
$word = iconv('utf-8', $encoding, $word);
$word = mb_strtoupper($word, $encoding);
$result = $morphy->findWord($word);
$predict_text = 'DICT';
if ($morphy->getLastPredictionType() == phpMorphy::PREDICT_BY_DB) {
    $predict_text = 'PREDICT_BY_DB';
} else {
    if ($morphy->getLastPredictionType() == phpMorphy::PREDICT_BY_SUFFIX) {
        $predict_text = 'PREDICT_BY_SUFFIX';
    }
}
echo "Paradigms for {$word}({$predict_text}):" . PHP_EOL;
if (false === $result) {
    echo 'NOT FOUND' . PHP_EOL;
    exit(WORD_NOT_FOUND);
}
$para_no = 1;
foreach ($result as $paradigm) {