function __construct(phpMorphy_Fsa_FsaInterface $fsa, phpMorphy_AnnotDecoder_AnnotDecoderInterface $annotDecoder, $encoding, phpMorphy_GramInfo_GramInfoInterface $graminfo, $minPostfixMatch = 2, $collectLimit = 32)
 {
     parent::__construct($fsa, $annotDecoder);
     $this->graminfo = $graminfo;
     $this->min_postfix_match = $minPostfixMatch;
     $this->collector = $this->createCollector($collectLimit, $this->getAnnotDecoder());
     $this->unicode = phpMorphy_UnicodeHelper_UnicodeHelperAbstract::getHelperForEncoding($encoding);
 }
 /**
  * @param string $encoding
  */
 protected function __construct($encoding)
 {
     $this->encoding = (string) $encoding;
     if (!isset(self::$HAS_ICONV_EXTENSION) || !isset(self::$HAS_MULTIBYTE_EXTENSION)) {
         if (false !== (self::$HAS_ICONV_EXTENSION = extension_loaded('iconv'))) {
             self::$STRLEN_FUNCTION_NAME = 'iconv_strlen';
         } else {
             if (false !== (self::$HAS_MULTIBYTE_EXTENSION = extension_loaded('mbstring'))) {
                 self::$STRLEN_FUNCTION_NAME = 'mb_strlen';
             }
         }
     }
 }
예제 #3
0
 protected function __construct($encoding, $isBigEndian)
 {
     parent::__construct($encoding);
     $this->int_format_string = $isBigEndian ? 'n' : 'v';
 }
예제 #4
0
 function __construct(phpMorphy_Fsa_FsaInterface $fsa, phpMorphy_AnnotDecoder_AnnotDecoderInterface $annotDecoder, $encoding, $minimalSuffixLength = 4)
 {
     parent::__construct($fsa, $annotDecoder);
     $this->min_suf_len = (int) $minimalSuffixLength;
     $this->unicode = phpMorphy_UnicodeHelper_UnicodeHelperAbstract::getHelperForEncoding($encoding);
 }
예제 #5
0
 protected function __construct($encoding, $charSize)
 {
     parent::__construct($encoding);
     $this->char_size = (int) $charSize;
 }
예제 #6
0
function bench_morphy($words, $encoding, $dictDir, $lang, $storage, $useBulk, $usePrediction = true)
{
    $opts = array('storage' => $storage, 'predict_by_suffix' => $usePrediction, 'predict_by_db' => false);
    $bundle = new phpMorphy_FilesBundle($dictDir, $lang);
    $morphy = new phpMorphy($bundle, $opts);
    $unicode = phpMorphy_UnicodeHelper_UnicodeHelperAbstract::getHelperForEncoding($morphy->getEncoding());
    echo "Bench phpMorphy[{$encoding}][{$storage}][" . ($useBulk ? 'BULK' : 'SINGLE') . "] : ";
    convert_words($words, $encoding, MB_CASE_UPPER);
    $predicted = 0;
    $b = microtime(true);
    if ($useBulk) {
        $morphy->getBaseForm($words);
    } else {
        foreach ($words as $word) {
            //$unicode->strrev($word); mb_strtoupper($word, 'utf-8');
            //strtr($word, $replace);
            //strrev($word);
            //mb_strtolower($word, 'utf-8');
            $lemma = $morphy->getBaseForm($word);
            if ($morphy->isLastPredicted()) {
                $predicted++;
            }
        }
    }
    $e = microtime(true);
    printf("time = %0.2f sec, words per second = %0.2f, predicted = %d\n", $e - $b, count($words) / ($e - $b), $predicted);
}