private function getCase($word, $case)
 {
     $info = $this->morphy->castFormByGramInfo($word, 'С', array($case, 'ЕД'), false);
     if (isset($info[0])) {
         return $this->mb_ucwords($info[0]['form']);
     }
     return $this->mb_ucwords($word);
 }
Exemple #2
0
 static function instance(phpMorphy $morphy)
 {
     $key = $morphy->getEncoding();
     if (!isset(self::$instances[$key])) {
         $class = __CLASS__;
         self::$instances[$key] = new $class($key);
     }
     return self::$instances[$key];
 }
Exemple #3
0
 public static function forms($word)
 {
     $opts = array('storage' => PHPMORPHY_STORAGE_FILE, 'with_gramtab' => false, 'predict_by_suffix' => true, 'predict_by_db' => true);
     $dir = MODPATH . 'phpMorphy/dicts';
     $dict_bundle = new phpMorphy_FilesBundle($dir, 'rus');
     $morphy = new phpMorphy($dict_bundle, $opts);
     $base_form = $morphy->getPseudoRoot(mb_strtoupper($word));
     if (reset($base_form)) {
         return mb_strtolower(reset($base_form));
     }
     return $word;
 }
Exemple #4
0
 /**
  * Normalize
  * @param \ZendSearch\Lucene\Analysis\Token $token
  * @return null|\ZendSearch\Lucene\Analysis\Token
  */
 public function normalize(Token $token)
 {
     $pseudo_root = $this->morphy->getPseudoRoot($this->string()->toUpper($token->getTermText()));
     if ($pseudo_root === false) {
         $new_str = $this->string()->toUpper($token->getTermText());
     } else {
         $new_str = $pseudo_root[0];
     }
     if (strlen($new_str) < 3) {
         return null;
     }
     $new_token = new Token($new_str, $token->getStartOffset(), $token->getEndOffset());
     $new_token->setPositionIncrement($token->getPositionIncrement());
     return $new_token;
 }
Exemple #5
0
 /**
  * @param string $word
  * @return bool|string
  */
 public static function castChosenWordBy($word)
 {
     $cacheKey = __CLASS__ . __FUNCTION__ . $word;
     $res = Yii::$app->cache->get($cacheKey);
     if ($res === false) {
         $phpMorphy = new \phpMorphy(Yii::getAlias('@madmin/phpmorphy-dicts'), 'ru_RU', ['storage' => PHPMORPHY_STORAGE_FILE]);
         mb_internal_encoding('UTF-8');
         $forms = $phpMorphy->getGramInfo(mb_strtoupper($word));
         $forms = $forms[0];
         foreach ($forms as $form) {
             if (in_array('ИМ', $form['grammems'])) {
                 $rod = array_intersect($form['grammems'], ['МР', 'ЖР', 'СР', 'МР-ЖР']);
                 $rod = reset($rod);
                 $od = array_intersect($form['grammems'], ['ОД', 'НО']);
                 $od = reset($od);
                 break;
             }
         }
         if (!empty($rod) && !empty($od)) {
             $form = $phpMorphy->castFormByGramInfo(mb_strtoupper('выбранный'), 'ПРИЧАСТИЕ', [$rod, $od, 'ВН', 'ЕД', 'ПРШ', 'СТР'], true);
             $res = mb_strtolower($form[0]);
         }
         Yii::$app->cache->set($cacheKey, $res);
     }
     return $res;
 }
Exemple #6
0
 /**
  * Returns word inflectional forms.
  * @param string $content
  * @return array
  */
 public function inflectionalForms($content)
 {
     if (empty($content)) {
         return null;
     }
     // optimization (Lazy loading)
     $hash = md5($content);
     if (isset(static::$content[$hash])) {
         return static::$content[$hash];
     }
     $content = preg_replace(['/\\[.*\\]/isu', '/[^\\w\\x7F-\\xFF\\s]/isu', '/[\\«\\»\\d]+/iu'], "", trim(strip_tags($content)));
     /**
      * trim twice spaces
      */
     $content = preg_replace('/ +/u', ' ', $content);
     //preg_match_all('/[a-zA-Z]+/iu',mb_strtoupper($str, CHARSET),$words_latin);
     //$words_latin = (is_array($words_latin) && count($words_latin) > 0) ? ' '.implode(' ', $words_latin[0]) : '';
     $words = preg_split('/\\s|[,.:;!?"\'()]/', $content, -1, PREG_SPLIT_NO_EMPTY);
     $bulkWords = [];
     foreach ($words as $res_words) {
         if (mb_strlen($res_words, 'utf-8') > 2) {
             $bulkWords[] = mb_strtoupper($res_words, 'utf-8');
         }
     }
     return static::$content[$hash] = static::$morphy->getAllForms($bulkWords);
     //return $res.$words_latin;
 }
 protected function processing($words)
 {
     $morphy = new phpMorphy();
     foreach ($words as &$word) {
         if (preg_match('/[а-я]+/ui', $word)) {
             $lang = 'ru';
         } else {
             $lang = 'en';
         }
         $_word = $morphy->get($lang)->getBaseForm($word);
         if ($_word) {
             if (is_array($_word)) {
                 $word = current($_word);
             } else {
                 $word = $_word;
             }
         }
     }
     return $words;
 }
Exemple #8
0
 /**
  * Возвращает все словоформы слов поискового запроса
  */
 function Words2AllForms($text)
 {
     require_once $GLOBALS['_PATH']['PATH_INC'] . 'phpMorphy/src/common.php';
     $opts = array('storage' => PHPMORPHY_STORAGE_MEM, 'with_gramtab' => false, 'predict_by_suffix' => true, 'predict_by_db' => true);
     $dir = $GLOBALS['_PATH']['PATH_INC'] . 'phpMorphy/dicts';
     //        Создаем объект словаря
     $dict_bundle = new phpMorphy_FilesBundle($dir, 'rus');
     $morphy = new phpMorphy($dict_bundle, $opts);
     //        $codepage = $morphy->getCodepage();
     setlocale(LC_CTYPE, array('ru_RU.CP1251', 'Russian_Russia.1251'));
     $words = preg_split('#\\s|[,.:;!?"\'()]#', $text, -1, PREG_SPLIT_NO_EMPTY);
     $bulk_words = array();
     foreach ($words as $v) {
         if (strlen($v) > 3) {
             $v = iconv("UTF-8", "windows-1251", $v);
             $bulk_words[] = strtoupper($v);
         }
     }
     return $morphy->getAllForms($bulk_words);
 }
Exemple #9
0
 function Words2BaseForm($text)
 {
     global $_COMMON_SITE_CONF;
     static $dict_bundle, $morphy;
     require_once $GLOBALS['_PATH']['PATH_INC'] . 'phpMorphy/src/common.php';
     if (!$dict_bundle) {
         $encoding = $_COMMON_SITE_CONF['encodings'][$_COMMON_SITE_CONF['site_encoding']];
         $dir = $GLOBALS['_PATH']['PATH_INC'] . 'phpMorphy/dicts/';
         $dict_bundle = new phpMorphy_FilesBundle($dir, 'rus');
     }
     if (!$morphy) {
         $opts = array('storage' => PHPMORPHY_STORAGE_MEM, 'with_gramtab' => false, 'predict_by_suffix' => true, 'predict_by_db' => true);
         $morphy = new phpMorphy($dict_bundle, $opts);
     }
     setlocale(LC_CTYPE, array('ru_RU.CP1251', 'rus_RUS.CP1251', 'rus_RUS.CP1251', 'Russian_Russia.1251'));
     $words = preg_replace('#\\[.*\\]#isU', '', $text);
     $words = preg_split('#\\s|[,.:;«»!?"\'()]#', $words, -1, PREG_SPLIT_NO_EMPTY);
     $bulk_words = array();
     foreach ($words as $v) {
         if (strlen($v) > 3) {
             $bulk_words[] = strtoupper($v);
         }
     }
     $base_form = $morphy->getBaseForm($bulk_words);
     $fullList = array();
     if (is_array($base_form) && count($base_form)) {
         foreach ($base_form as $k => $v) {
             if (is_array($v)) {
                 foreach ($v as $v1) {
                     if (strlen($v1) > 3) {
                         $fullList[$v1] = 1;
                     }
                 }
             }
         }
     }
     $words = join(' ', array_keys($fullList));
     return $words;
 }
    $words = preg_split('#\\s|[,.:;!?"\'()]#', $text, -1, PREG_SPLIT_NO_EMPTY);
    $bulk_words = array();
    foreach ($words as $v) {
        if (strlen($v) > 3) {
            $bulk_words[] = strtoupper($v);
        }
    }
    return $morphy->getAllForms($bulk_words);
}
/*

 *

 * @param string $text

 * @return string

 */
function Words2BaseForm($text)
{
    require_once $GLOBALS['PATH_sys'] . 'phpmorphy/src/common.php';
    // set some options
    $opts = array('storage' => PHPMORPHY_STORAGE_MEM, 'with_gramtab' => false, 'predict_by_suffix' => true, 'predict_by_db' => true);
    $dir = $GLOBALS['PATH_sys'] . 'phpmorphy/dicts';
    // Create descriptor for dictionary located in $dir directory with russian language
    $dict_bundle = new phpMorphy_FilesBundle($dir, 'rus');
    // Create phpMorphy instance
    $morphy = new phpMorphy($dict_bundle, $opts);
    // All words in dictionary in UPPER CASE, so don`t forget set proper locale
    // Supported dicts and locales:
    //  *------------------------------*
    //  | Dict. language | Locale name |
    //  |------------------------------|
    //  | Russian        | cp1251      |
    //  |------------------------------|
    //  | English        | cp1250      |
    //  |------------------------------|
    //  | German         | cp1252      |
    //  *------------------------------*
    // $codepage = $morphy->getCodepage();
    //setlocale(LC_CTYPE, array('ru_RU.CP1251', 'Russian_Russia.1251'));
    $words = preg_replace('#\\[.*\\]#isU', '', $text);
    $words = preg_split('#\\s|[,.:;!?"\'()]#', $words, -1, PREG_SPLIT_NO_EMPTY);
    $bulk_words = array();
    foreach ($words as $v) {
        if (strlen($v) > 3) {
            $bulk_words[] = mb_strtoupper($v, "UTF-8");
Exemple #11
0
 static function create(phpMorphy $morphy)
 {
     $locale = $GLOBALS['__phpmorphy_strtolower']($morphy->getLocale());
     if (!isset(self::$included[$locale])) {
         $file_name = PHPMORPHY_DIR . "/langs_stuff/{$locale}.php";
         $class = "phpMorphy_GrammemsProvider_{$locale}";
         if (is_readable($file_name)) {
             require $file_name;
             if (!class_exists($class)) {
                 throw new phpMorphy_Exception("Class '{$class}' not found in '{$file_name}' file");
             }
             self::$included[$locale] = call_user_func(array($class, 'instance'), $morphy);
         } else {
             self::$included[$locale] = new phpMorphy_GrammemsProvider_Empty($morphy);
         }
     }
     return self::$included[$locale];
 }
Exemple #12
0
 protected function getMorphy($txt = null)
 {
     require_once $_SERVER['DOCUMENT_ROOT'] . '/plugins/phpmorphy/src/common.php';
     $opts = array('storage' => PHPMORPHY_STORAGE_FILE, 'with_gramtab' => false, 'predict_by_suffix' => true, 'predict_by_db' => true);
     // Path to directory where dictionaries located
     $dir = $_SERVER['DOCUMENT_ROOT'] . '/plugins/phpmorphy/dicts/';
     // Create descriptor for dictionary located in $dir directory with russian language
     $dict_bundle = new phpMorphy_FilesBundle($dir, 'rus');
     // Create phpMorphy instance
     try {
         $morphy = new phpMorphy($dict_bundle, $opts);
     } catch (phpMorphy_Exception $e) {
         die('Error occured while creating phpMorphy instance: ' . $e->getMessage());
     }
     $search = $_GET['q'];
     $search = mysql_real_escape_string($search);
     $search = mb_substr($search, 0, 30, 'UTF-8');
     $search = explode(" ", $search);
     $q = '';
     for ($i = 0; $i < count($search); $i++) {
         $ass = $search[$i];
         $ass = mb_strtoupper($ass, 'utf-8');
         $pseudo_root = $morphy->getPseudoRoot($ass);
         if (false === $pseudo_root) {
             $rep = $ass;
         } else {
             $rep = $pseudo_root[0];
         }
         if ($txt == "text") {
             $q = $q . "A.text LIKE '%" . functions::q($rep) . "%' and ";
         } else {
             $q = $q . "A.title LIKE '%" . functions::q($rep) . "%' and ";
         }
     }
     $searchwords = substr($q, 0, strlen($q) - 4);
     return $searchwords;
 }
Exemple #13
0
function Words2BaseForm($text)
{
    require_once dirname(__FILE__) . '/../sys/phpmorphy/src/common.php';
    // set some options
    $opts = array('storage' => PHPMORPHY_STORAGE_MEM, 'with_gramtab' => false, 'predict_by_suffix' => true, 'predict_by_db' => true);
    $dir = dirname(__FILE__) . '/../sys/phpmorphy/dicts';
    // Create descriptor for dictionary located in $dir directory with russian language
    $dict_bundle = new phpMorphy_FilesBundle($dir, 'rus');
    // Create phpMorphy instance
    $morphy = new phpMorphy($dict_bundle, $opts);
    $words = preg_replace('#\\[.*\\]#isU', '', $text);
    $words = preg_split('#\\s|[,.:;!?"\'()]#', $words, -1, PREG_SPLIT_NO_EMPTY);
    $bulk_words = array();
    foreach ($words as $v) {
        if (strlen($v) > 2) {
            $bulk_words[] = strtoupper($v);
        }
    }
    $base_form = $morphy->getBaseForm($bulk_words);
    $fullList = array();
    if (is_array($base_form) && count($base_form)) {
        foreach ($base_form as $k => $v) {
            if (is_array($v)) {
                foreach ($v as $v1) {
                    if (strlen($v1) > 2) {
                        $fullList[$v1] = 1;
                    }
                }
            }
        }
    }
    $words = join(' ', array_keys($fullList));
    return $words;
}
Exemple #14
0
function bench_morphy_dict($words, $encoding, $dictDir, $lang, $storage)
{
    $opts = array('storage' => $storage, 'predict_by_suffix' => false, 'predict_by_db' => false);
    $bundle = new phpMorphy_FilesBundle($dictDir, $lang);
    $morphy = new phpMorphy($bundle, $opts);
    echo "Bench phpMorphy - Dict[{$encoding}][{$storage}]: ";
    convert_words($words, $encoding, MB_CASE_UPPER);
    $fsa = $morphy->getCommonMorphier()->getFinder()->getFsa();
    $root = $fsa->getRootTrans();
    $predicted = 0;
    $b = microtime(true);
    foreach ($words as $word) {
        $result = $fsa->walk($root, $word, true);
    }
    $e = microtime(true);
    printf("time = %0.2f sec, words per second = %0.2f, predicted = %d\n", $e - $b, count($words) / ($e - $b), $predicted);
}
Exemple #15
0
<?php 
session_start();
error_reporting(E_ALL | E_STRICT);
//ini_set('default_charset','UTF-8');
$path = $_SESSION['path'];
require 'stemmer_utf.php';
require 'work_csv.php';
require 'readfile.php';
require_once dirname(__FILE__) . '/phpmorphy/src/common.php';
$opts = array('storage' => PHPMORPHY_STORAGE_FILE, 'with_gramtab' => false, 'predict_by_suffix' => true, 'predict_by_db' => true);
$dir = dirname(__FILE__) . '/phpmorphy/dicts';
// Create descriptor for dictionary located in $dir directory with russian language
$dict_bundle = new phpMorphy_FilesBundle($dir, 'rus');
try {
    $morphy = new phpMorphy($dict_bundle, $opts);
} catch (phpMorphy_Exception $e) {
    die('Error occured while creating phpMorphy instance: ' . $e->getMessage());
}
//$csv = new CSV($path);
// $csv_lines  = $csv->getCSV($path);
$handle = fopen($path, "r");
//$row = 1;
while (($data = fgetcsv($handle, 1000, ",")) !== FALSE) {
    $insertValues = array();
    foreach ($data as $v) {
        $insertValues[] = addslashes(trim($v));
        $values = implode(',', $insertValues);
        $word = $morphy->lemmatize($values);
        //print_r($morphy->getBaseForm($value));
        //$z=$morphy->getBaseForm($values);
    }
 protected function getLemmas(array $words)
 {
     require_once MODX_CORE_PATH . 'components/modsearch/external/phpmorphy/src/common.php';
     // set some options
     $opts = array('storage' => PHPMORPHY_STORAGE_FILE, 'predict_by_suffix' => true, 'predict_by_db' => true, 'graminfo_as_text' => true);
     // Path to directory where dictionaries located
     $dir = MODX_CORE_PATH . 'components/modsearch/external/phpmorphy/dicts';
     $lang = 'ru_RU';
     // Create phpMorphy instance
     # try {
     #     $morphy = new phpMorphy($dir, $lang, $opts);
     #     // print $morphy->getEncoding();
     #     // exit;
     # } catch(phpMorphy_Exception $e) {
     #     die('Error occured while creating phpMorphy instance: ' . PHP_EOL . $e);
     # }
     if (!($morphy = new phpMorphy($dir, $lang, $opts))) {
         $this->modx->log(xPDO::LOG_LEVEL_ERROR, "[" . __CLASS__ . "] Не был получен объект phpMorphy");
         return false;
     }
     $lemmas = array();
     # print_r($words);
     #
     # exit;
     // print $morphy->getEncoding();
     // if(function_exists('iconv')) {
     //     foreach($words as &$word) {
     //         $word = iconv('windows-1251', $morphy->getEncoding(), $word);
     //     }
     //     unset($word);
     // }
     # try {
     foreach ($words as $word) {
         if (!$word) {
             continue;
         }
         # $word = mb_strtoupper($word, 'UTF-8');
         // by default, phpMorphy finds $word in dictionary and when nothig found, try to predict them
         // you can change this behaviour, via second argument to getXXX or findWord methods
         $base = $morphy->getBaseForm($word);
         $all = $morphy->getAllForms($word);
         $part_of_speech = $morphy->getPartOfSpeech($word);
         // echo $morphy->getLocale();
         // var_dump($morphy->getShmCache()->getFilesList());
         // print_r($base);
         // exit;
         // $base = $morphy->getBaseForm($word, phpMorphy::NORMAL); // normal behaviour
         // $base = $morphy->getBaseForm($word, phpMorphy::IGNORE_PREDICT); // don`t use prediction
         // $base = $morphy->getBaseForm($word, phpMorphy::ONLY_PREDICT); // always predict word
         $is_predicted = $morphy->isLastPredicted();
         // or $morphy->getLastPredictionType() == phpMorphy::PREDICT_BY_NONE
         $is_predicted_by_db = $morphy->getLastPredictionType() == phpMorphy::PREDICT_BY_DB;
         $is_predicted_by_suffix = $morphy->getLastPredictionType() == phpMorphy::PREDICT_BY_SUFFIX;
         // this used for deep analysis
         $collection = $morphy->findWord($word);
         // or var_dump($morphy->getAllFormsWithGramInfo($word)); for debug
         if (false === $collection) {
             # echo $word, " NOT FOUND\n";
             // Если слово не найдено, добавляем его в массив как есть
             $lemmas[] = $word;
             continue;
         } else {
             # print "\n<br />Найдено слово: ". $word;
         }
         foreach ($base as $lemma) {
             $lemmas[] = $lemma;
         }
         #         echo $is_predicted ? '-' : '+', $word, "\n";
         #         echo 'lemmas: ', implode(', ', $base), "\n";
         #         echo 'all: ', implode(', ', $all), "\n";
         #         echo 'poses: ', implode(', ', $part_of_speech), "\n";
         #
         #         echo "\n";
         // $collection collection of paradigm for given word
         // TODO: $collection->getByPartOfSpeech(...);
         # foreach($collection as $paradigm) {
         // TODO: $paradigm->getBaseForm();
         // TODO: $paradigm->getAllForms();
         // TODO: $paradigm->hasGrammems(array('', ''));
         // TODO: $paradigm->getWordFormsByGrammems(array('', ''));
         // TODO: $paradigm->hasPartOfSpeech('');
         // TODO: $paradigm->getWordFormsByPartOfSpeech('');
         # echo "lemma: ", $paradigm[0]->getWord(), "\n";
         #             foreach($paradigm->getFoundWordForm() as $found_word_form) {
         #                 echo
         #                     $found_word_form->getWord(), ' ',
         #                     $found_word_form->getPartOfSpeech(), ' ',
         #                     '(', implode(', ', $found_word_form->getGrammems()), ')',
         #                     "\n";
         #             }
         #             echo "\n";
         #
         #             foreach($paradigm as $word_form) {
         #                 // TODO: $word_form->getWord();
         #                 // TODO: $word_form->getFormNo();
         #                 // TODO: $word_form->getGrammems();
         #                 // TODO: $word_form->getPartOfSpeech();
         #                 // TODO: $word_form->hasGrammems(array('', ''));
         #             }
         # }
         # echo "--\n";
     }
     # } catch(phpMorphy_Exception $e) {
     #     die('Error occured while text processing: ' . $e->getMessage());
     # }
     $lemmas = $this->sanitizeWordsArray($lemmas);
     return array_unique($lemmas);
 }
Exemple #17
0
<?php

error_reporting(E_ALL | E_STRICT);
// first we include phpmorphy library
require_once dirname(__FILE__) . '/../src/common.php';
// set some options
$opts = array('storage' => PHPMORPHY_STORAGE_FILE, 'predict_by_suffix' => true, 'predict_by_db' => true, 'graminfo_as_text' => true);
// Path to directory where dictionaries located
$dir = dirname(__FILE__) . '/../dicts';
$lang = 'ru_RU';
// Create phpMorphy instance
try {
    $morphy = new phpMorphy($dir, $lang, $opts);
} catch (phpMorphy_Exception $e) {
    die('Error occured while creating phpMorphy instance: ' . PHP_EOL . $e);
}
// All words in dictionary in UPPER CASE, so don`t forget set proper locale via setlocale(...) call
// $morphy->getEncoding() returns dictionary encoding
$words = array('КРАКОЗЯБЛИКИ', 'СТАЛИ', 'ВИНА', 'И', 'ДУХИ', 'abc');
/*
if(function_exists('iconv')) {
    foreach($words as &$word) {
        $word = iconv('windows-1251', $morphy->getEncoding(), $word);
    }
    unset($word);
}
*/
try {
    foreach ($words as $word) {
        // by default, phpMorphy finds $word in dictionary and when nothig found, try to predict them
        // you can change this behaviour, via second argument to getXXX or findWord methods
#!/usr/bin/env php
<?php 
set_include_path(__DIR__ . '/../src/' . PATH_SEPARATOR . get_include_path());
require 'phpMorphy.php';
define('WORD_NOT_FOUND', 1);
if ($argc < 2) {
    die("Usage {$argv['0']} WORD [LANG] [ENCODING]" . PHP_EOL);
}
$word = $argv[1];
$lang = $argc > 2 ? $argv[2] : 'ru_RU';
$dir = __DIR__ . '/../dicts/';
$dir .= $argc > 3 ? "/{$argv[3]}" : 'utf-8';
$opts = array('storage' => PHPMORPHY_STORAGE_FILE, 'predict_by_suffix' => true, 'predict_by_db' => true);
$morphy = new phpMorphy($dir, $lang, $opts);
$encoding = $morphy->getEncoding();
$formatter = new phpMorphy_Paradigm_Formatter();
$word = iconv('utf-8', $encoding, $word);
$word = mb_strtoupper($word, $encoding);
$result = $morphy->findWord($word);
$predict_text = 'DICT';
if ($morphy->getLastPredictionType() == phpMorphy::PREDICT_BY_DB) {
    $predict_text = 'PREDICT_BY_DB';
} else {
    if ($morphy->getLastPredictionType() == phpMorphy::PREDICT_BY_SUFFIX) {
        $predict_text = 'PREDICT_BY_SUFFIX';
    }
}
echo "Paradigms for {$word}({$predict_text}):" . PHP_EOL;
if (false === $result) {
    echo 'NOT FOUND' . PHP_EOL;
    exit(WORD_NOT_FOUND);
Exemple #19
0
 public function getSearchString($string)
 {
     $query_string = '';
     $s_words = array();
     $materials = new Model_Material('groups');
     //подключаем библиотеки
     require_once DOCROOT . 'application/classes/PHPMorphy/src/common.php';
     $dir = DOCROOT . 'application/classes/PHPMorphy/dicts/';
     $lang = 'ru_RU';
     $opts = array('storage' => PHPMORPHY_STORAGE_FILE);
     try {
         $morphy = new phpMorphy($dir, $lang, $opts);
     } catch (phpMorphy_Exception $e) {
         die('Error occured while creating phpMorphy instance: ' . $e->getMessage());
     }
     $request = null;
     $errors = null;
     if (!empty($string)) {
         // Очищаем от html-тегов и прочего
         $search = trim(addslashes(strip_tags($string)));
         $request = $search;
     }
     if (!empty($search)) {
         // Обрабатываем данные как и в прошлом контроллере
         if (mb_strlen($search, "UTF-8") > 1) {
             preg_match_all('/([a-zа-яё0-9]+)/ui', mb_strtoupper($search, "UTF-8"), $search_words);
             $words = $morphy->getPseudoRoot($search_words[1]);
             foreach ($words as $k => $w) {
                 if (!$w) {
                     $w[0] = $k;
                 }
                 if (mb_strlen($w[0], "UTF-8") >= 2) {
                     $s_words[] = $w[0];
                 }
                 //                    else
                 //                    {
                 //                       // $s_words[] = $w[0];
                 //                    }
             }
         }
         return $s_words;
     }
 }
Exemple #20
0
<?php

error_reporting(E_ALL | E_STRICT);
// first we include phpmorphy library
require_once __DIR__ . '/../vendor/autoload.php';
// set some options
$opts = array('storage' => phpMorphy::STORAGE_FILE, 'with_gramtab' => false, 'predict_by_suffix' => true, 'predict_by_db' => true);
// Create phpMorphy instance
try {
    $morphy = new phpMorphy(null, 'ru_RU', $opts);
    //$morphy = new phpMorphy(new phpMorphy_FilesBundle(phpMorphy::getDefaultDictsDir(), 'ru_RU'), $opts);
} catch (phpMorphy_Exception $e) {
    die('Error occured while creating phpMorphy instance: ' . $e->getMessage());
}
// All words in dictionary in UPPER CASE, so don`t forget set proper locale
// Supported dicts and locales:
//  *------------------------------*
//  | Dict. language | Locale name |
//  |------------------------------|
//  | Russian        | cp1251      |
//  |------------------------------|
//  | English        | cp1250      |
//  |------------------------------|
//  | German         | cp1252      |
//  *------------------------------*
// $codepage = $morphy->getCodepage();
// setlocale(LC_CTYPE, array('ru_RU.CP1251', 'Russian_Russia.1251'));
// Hint: in this example words $word_one, $word_two are in russian language(cp1251 encoding)
$word_one = 'КОТ';
$word_two = 'СОБАКА';
echo "Testing single mode...\n";
Exemple #21
0
<?php

// Подключите файл common.php. phpmorphy-0.3.2 - для версии 0.3.2,
// если используется иная версия исправьте код.
require_once 'c:/server/bin/PHP/lib/phpmorphy-0.3.7/src/common.php';
//global $morphy;
// Укажите путь к каталогу со словарями
$dir = 'c:/server/bin/PHP/lib/phpmorphy-0.3.7/dicts';
// Укажите, для какого языка будем использовать словарь.
// Язык указывается как ISO3166 код страны и ISO639 код языка,
// разделенные символом подчеркивания (ru_RU, uk_UA, en_EN, de_DE и т.п.)
$lang = 'ru_RU';
// Укажите опции
// Список поддерживаемых опций см. ниже
$opts = array('storage' => PHPMORPHY_STORAGE_MEM, 'predict_by_suffix' => TRUE);
// создаем экземпляр класса phpMorphy
// обратите внимание: все функции phpMorphy являются throwable т.е.
// могут возбуждать исключения типа phpMorphy_Exception (конструктор тоже)
try {
    $morphy = new phpMorphy($dir, $lang, $opts);
    print_r($morphy->lemmatize('яблоко', phpMorphy::NORMAL));
    //exit(0);
} catch (phpMorphy_Exception $e) {
    die('Error occured while creating phpMorphy instance: ' . $e->getMessage());
}
// далее под $morphy мы подразумеваем экземпляр класса phpMorphy
#!/usr/bin/env php
<?php 
require_once __DIR__ . '/init.php';
$user_dict_xml = PHPMORPHY_DIR . '/../bin/user_dict.xml';
$outXmlFilePath = PHPMORPHY_DIR . '/../bin/out.xml';
$morphy_dict_dir = phpMorphy::getDefaultDictsDir();
$morphy_dict_lang = 'ru_RU';
$verbose = true;
/**
 *
 * @param string $dir
 * @param string $lang
 * @return phpMorphy_MorphyInterface
 */
function createMorphy($dir, $lang)
{
    $opts = array('storage' => phpMorphy::STORAGE_MEM, 'predict_by_suffix' => true, 'predict_by_db' => true);
    return new phpMorphy($dir, $lang, $opts);
}
$morphy = createMorphy($morphy_dict_dir, $morphy_dict_lang);
$encoding_converter = new phpMorphy_UserDict_EncodingConverter($morphy->getEncoding(), $morphy->isInUpperCase() ? MB_CASE_UPPER : MB_CASE_LOWER, 'utf-8', MB_CASE_UPPER);
//$errors_handler = new phpMorphy_UserDict_Log_ErrorsHandlerException();
$errors_handler = new phpMorphy_UserDict_Log_ErrorsHandlerPass();
$observer = new phpMorphy_UserDict_Log_CLI($verbose, $errors_handler, $encoding_converter);
$b = microtime(true);
if (1) {
    phpMorphy_UserDict_XmlDiff_Generator::convertFromXmlToXml($user_dict_xml, $outXmlFilePath, $morphy, $observer, $encoding_converter);
}
$e = microtime(true);
printf("Time taken: %0.2f\n", $e - $b);