function __construct(phpMorphy_Fsa_FsaInterface $fsa, phpMorphy_AnnotDecoder_AnnotDecoderInterface $annotDecoder, $encoding, phpMorphy_GramInfo_GramInfoInterface $graminfo, $minPostfixMatch = 2, $collectLimit = 32)
 {
     parent::__construct($fsa, $annotDecoder);
     $this->graminfo = $graminfo;
     $this->min_postfix_match = $minPostfixMatch;
     $this->collector = $this->createCollector($collectLimit, $this->getAnnotDecoder());
     $this->unicode = phpMorphy_UnicodeHelper_UnicodeHelperAbstract::getHelperForEncoding($encoding);
 }
Пример #2
0
 function __construct(phpMorphy_Fsa_FsaInterface $fsa, phpMorphy_AnnotDecoder_AnnotDecoderInterface $annotDecoder, $encoding, $minimalSuffixLength = 4)
 {
     parent::__construct($fsa, $annotDecoder);
     $this->min_suf_len = (int) $minimalSuffixLength;
     $this->unicode = phpMorphy_UnicodeHelper_UnicodeHelperAbstract::getHelperForEncoding($encoding);
 }
Пример #3
0
function bench_morphy($words, $encoding, $dictDir, $lang, $storage, $useBulk, $usePrediction = true)
{
    $opts = array('storage' => $storage, 'predict_by_suffix' => $usePrediction, 'predict_by_db' => false);
    $bundle = new phpMorphy_FilesBundle($dictDir, $lang);
    $morphy = new phpMorphy($bundle, $opts);
    $unicode = phpMorphy_UnicodeHelper_UnicodeHelperAbstract::getHelperForEncoding($morphy->getEncoding());
    echo "Bench phpMorphy[{$encoding}][{$storage}][" . ($useBulk ? 'BULK' : 'SINGLE') . "] : ";
    convert_words($words, $encoding, MB_CASE_UPPER);
    $predicted = 0;
    $b = microtime(true);
    if ($useBulk) {
        $morphy->getBaseForm($words);
    } else {
        foreach ($words as $word) {
            //$unicode->strrev($word); mb_strtoupper($word, 'utf-8');
            //strtr($word, $replace);
            //strrev($word);
            //mb_strtolower($word, 'utf-8');
            $lemma = $morphy->getBaseForm($word);
            if ($morphy->isLastPredicted()) {
                $predicted++;
            }
        }
    }
    $e = microtime(true);
    printf("time = %0.2f sec, words per second = %0.2f, predicted = %d\n", $e - $b, count($words) / ($e - $b), $predicted);
}