Example #1
0
 static function instance(phpMorphy $morphy)
 {
     $key = $morphy->getEncoding();
     if (!isset(self::$instances[$key])) {
         $class = __CLASS__;
         self::$instances[$key] = new $class($key);
     }
     return self::$instances[$key];
 }
$opts = array('storage' => PHPMORPHY_STORAGE_FILE, 'predict_by_suffix' => true, 'predict_by_db' => true, 'graminfo_as_text' => true);
// Path to directory where dictionaries located
$dir = __DIR__ . '/../dicts/utf-8';
$lang = 'ru_RU';
// Create phpMorphy instance
try {
    $morphy = new phpMorphy($dir, $lang, $opts);
} catch (phpMorphy_Exception $e) {
    die('Error occured while creating phpMorphy instance: ' . PHP_EOL . $e);
}
// All words in dictionary in UPPER CASE, so don`t forget set proper locale via setlocale(...) call
// $morphy->getEncoding() returns dictionary encoding
$words = array('МАМА', 'МЫЛА', 'РАМУ', 'abc');
if (function_exists('iconv')) {
    foreach ($words as &$word) {
        $word = iconv('utf-8', $morphy->getEncoding(), $word);
    }
    unset($word);
}
try {
    foreach ($words as $word) {
        // by default, phpMorphy finds $word in dictionary and when nothig found, try to predict them
        // you can change this behaviour, via second argument to getXXX or findWord methods
        $base = $morphy->getBaseForm($word);
        $all = $morphy->getAllForms($word);
        $part_of_speech = $morphy->getPartOfSpeech($word);
        // $base = $morphy->getBaseForm($word, phpMorphy::NORMAL); // normal behaviour
        // $base = $morphy->getBaseForm($word, phpMorphy::IGNORE_PREDICT); // don`t use prediction
        // $base = $morphy->getBaseForm($word, phpMorphy::ONLY_PREDICT); // always predict word
        $is_predicted = $morphy->isLastPredicted();
        // or $morphy->getLastPredictionType() == phpMorphy::PREDICT_BY_NONE
#!/usr/bin/env php
<?php 
set_include_path(__DIR__ . '/../src/' . PATH_SEPARATOR . get_include_path());
require 'phpMorphy.php';
define('WORD_NOT_FOUND', 1);
if ($argc < 2) {
    die("Usage {$argv['0']} WORD [LANG] [ENCODING]" . PHP_EOL);
}
$word = $argv[1];
$lang = $argc > 2 ? $argv[2] : 'ru_RU';
$dir = __DIR__ . '/../dicts/';
$dir .= $argc > 3 ? "/{$argv[3]}" : 'utf-8';
$opts = array('storage' => PHPMORPHY_STORAGE_FILE, 'predict_by_suffix' => true, 'predict_by_db' => true);
$morphy = new phpMorphy($dir, $lang, $opts);
$encoding = $morphy->getEncoding();
$formatter = new phpMorphy_Paradigm_Formatter();
$word = iconv('utf-8', $encoding, $word);
$word = mb_strtoupper($word, $encoding);
$result = $morphy->findWord($word);
$predict_text = 'DICT';
if ($morphy->getLastPredictionType() == phpMorphy::PREDICT_BY_DB) {
    $predict_text = 'PREDICT_BY_DB';
} else {
    if ($morphy->getLastPredictionType() == phpMorphy::PREDICT_BY_SUFFIX) {
        $predict_text = 'PREDICT_BY_SUFFIX';
    }
}
echo "Paradigms for {$word}({$predict_text}):" . PHP_EOL;
if (false === $result) {
    echo 'NOT FOUND' . PHP_EOL;
    exit(WORD_NOT_FOUND);
$opts = array('storage' => PHPMORPHY_STORAGE_FILE, 'predict_by_suffix' => true, 'predict_by_db' => true, 'graminfo_as_text' => true);
// Path to directory where dictionaries located
$dir = dirname(__FILE__) . '/../dicts';
$lang = 'ru_RU';
// Create phpMorphy instance
try {
    $morphy = new phpMorphy($dir, $lang, $opts);
} catch (phpMorphy_Exception $e) {
    die('Error occured while creating phpMorphy instance: ' . PHP_EOL . $e);
}
// All words in dictionary in UPPER CASE, so don`t forget set proper locale via setlocale(...) call
// $morphy->getEncoding() returns dictionary encoding
$words = array('ÊÐÀÊÎÇßÁËÈÊÈ', 'ÑÒÀËÈ', 'ÂÈÍÀ', 'È', 'ÄÓÕÈ', 'abc');
if (function_exists('iconv')) {
    foreach ($words as &$word) {
        $word = iconv('windows-1251', $morphy->getEncoding(), $word);
    }
    unset($word);
}
try {
    foreach ($words as $word) {
        // by default, phpMorphy finds $word in dictionary and when nothig found, try to predict them
        // you can change this behaviour, via second argument to getXXX or findWord methods
        $base = $morphy->getBaseForm($word);
        $all = $morphy->getAllForms($word);
        $part_of_speech = $morphy->getPartOfSpeech($word);
        // $base = $morphy->getBaseForm($word, phpMorphy::NORMAL); // normal behaviour
        // $base = $morphy->getBaseForm($word, phpMorphy::IGNORE_PREDICT); // don`t use prediction
        // $base = $morphy->getBaseForm($word, phpMorphy::ONLY_PREDICT); // always predict word
        $is_predicted = $morphy->isLastPredicted();
        // or $morphy->getLastPredictionType() == phpMorphy::PREDICT_BY_NONE
Example #5
0
function bench_morphy($words, $encoding, $dictDir, $lang, $storage, $useBulk, $usePrediction = true)
{
    $opts = array('storage' => $storage, 'predict_by_suffix' => $usePrediction, 'predict_by_db' => false);
    $bundle = new phpMorphy_FilesBundle($dictDir, $lang);
    $morphy = new phpMorphy($bundle, $opts);
    $unicode = phpMorphy_UnicodeHelper_UnicodeHelperAbstract::getHelperForEncoding($morphy->getEncoding());
    echo "Bench phpMorphy[{$encoding}][{$storage}][" . ($useBulk ? 'BULK' : 'SINGLE') . "] : ";
    convert_words($words, $encoding, MB_CASE_UPPER);
    $predicted = 0;
    $b = microtime(true);
    if ($useBulk) {
        $morphy->getBaseForm($words);
    } else {
        foreach ($words as $word) {
            //$unicode->strrev($word); mb_strtoupper($word, 'utf-8');
            //strtr($word, $replace);
            //strrev($word);
            //mb_strtolower($word, 'utf-8');
            $lemma = $morphy->getBaseForm($word);
            if ($morphy->isLastPredicted()) {
                $predicted++;
            }
        }
    }
    $e = microtime(true);
    printf("time = %0.2f sec, words per second = %0.2f, predicted = %d\n", $e - $b, count($words) / ($e - $b), $predicted);
}