function __construct(phpMorphy_Fsa_FsaInterface $fsa, phpMorphy_AnnotDecoder_AnnotDecoderInterface $annotDecoder, $encoding, phpMorphy_GramInfo_GramInfoInterface $graminfo, $minPostfixMatch = 2, $collectLimit = 32) { parent::__construct($fsa, $annotDecoder); $this->graminfo = $graminfo; $this->min_postfix_match = $minPostfixMatch; $this->collector = $this->createCollector($collectLimit, $this->getAnnotDecoder()); $this->unicode = phpMorphy_UnicodeHelper_UnicodeHelperAbstract::getHelperForEncoding($encoding); }
function __construct(phpMorphy_Fsa_FsaInterface $fsa, phpMorphy_AnnotDecoder_AnnotDecoderInterface $annotDecoder, $encoding, $minimalSuffixLength = 4) { parent::__construct($fsa, $annotDecoder); $this->min_suf_len = (int) $minimalSuffixLength; $this->unicode = phpMorphy_UnicodeHelper_UnicodeHelperAbstract::getHelperForEncoding($encoding); }
function bench_morphy($words, $encoding, $dictDir, $lang, $storage, $useBulk, $usePrediction = true) { $opts = array('storage' => $storage, 'predict_by_suffix' => $usePrediction, 'predict_by_db' => false); $bundle = new phpMorphy_FilesBundle($dictDir, $lang); $morphy = new phpMorphy($bundle, $opts); $unicode = phpMorphy_UnicodeHelper_UnicodeHelperAbstract::getHelperForEncoding($morphy->getEncoding()); echo "Bench phpMorphy[{$encoding}][{$storage}][" . ($useBulk ? 'BULK' : 'SINGLE') . "] : "; convert_words($words, $encoding, MB_CASE_UPPER); $predicted = 0; $b = microtime(true); if ($useBulk) { $morphy->getBaseForm($words); } else { foreach ($words as $word) { //$unicode->strrev($word); mb_strtoupper($word, 'utf-8'); //strtr($word, $replace); //strrev($word); //mb_strtolower($word, 'utf-8'); $lemma = $morphy->getBaseForm($word); if ($morphy->isLastPredicted()) { $predicted++; } } } $e = microtime(true); printf("time = %0.2f sec, words per second = %0.2f, predicted = %d\n", $e - $b, count($words) / ($e - $b), $predicted); }