Beispiel #1
0
 /**
  * Normalize
  * @param \ZendSearch\Lucene\Analysis\Token $token
  * @return null|\ZendSearch\Lucene\Analysis\Token
  */
 public function normalize(Token $token)
 {
     $pseudo_root = $this->morphy->getPseudoRoot($this->string()->toUpper($token->getTermText()));
     if ($pseudo_root === false) {
         $new_str = $this->string()->toUpper($token->getTermText());
     } else {
         $new_str = $pseudo_root[0];
     }
     if (strlen($new_str) < 3) {
         return null;
     }
     $new_token = new Token($new_str, $token->getStartOffset(), $token->getEndOffset());
     $new_token->setPositionIncrement($token->getPositionIncrement());
     return $new_token;
 }
Beispiel #2
0
 public static function forms($word)
 {
     $opts = array('storage' => PHPMORPHY_STORAGE_FILE, 'with_gramtab' => false, 'predict_by_suffix' => true, 'predict_by_db' => true);
     $dir = MODPATH . 'phpMorphy/dicts';
     $dict_bundle = new phpMorphy_FilesBundle($dir, 'rus');
     $morphy = new phpMorphy($dict_bundle, $opts);
     $base_form = $morphy->getPseudoRoot(mb_strtoupper($word));
     if (reset($base_form)) {
         return mb_strtolower(reset($base_form));
     }
     return $word;
 }
Beispiel #3
0
//  | English        | cp1250      |
//  |------------------------------|
//  | German         | cp1252      |
//  *------------------------------*
// $codepage = $morphy->getCodepage();
// setlocale(LC_CTYPE, array('ru_RU.CP1251', 'Russian_Russia.1251'));
// Hint: in this example words $word_one, $word_two are in russian language(cp1251 encoding)
$word_one = 'КОТ';
$word_two = 'СОБАКА';
echo "Testing single mode...\n";
try {
    // word by word processing
    // each function return array with result or FALSE when no form(s) for given word found(or predicted)
    $base_form = $morphy->getBaseForm($word_one);
    $all_forms = $morphy->getAllForms($word_one);
    $pseudo_root = $morphy->getPseudoRoot($word_one);
    if (false === $base_form || false === $all_forms || false === $pseudo_root) {
        die("Can`t find or predict {$word_one} word");
    }
    echo 'base form = ' . implode(', ', $base_form) . "\n";
    echo 'all forms = ' . implode(', ', $all_forms) . "\n";
    echo "Testing bulk mode...\n";
    // bulk mode speed-ups processing up to 50-100%(mainly for getBaseForm method)
    // in bulk mode all function always return array
    $bulk_words = array($word_one, $word_two);
    $base_form = $morphy->getBaseForm($bulk_words);
    $all_forms = $morphy->getAllForms($bulk_words);
    $pseudo_root = $morphy->getPseudoRoot($bulk_words);
    // Bulk result format:
    // array(
    //   INPUT_WORD1 => array(OUTWORD1, OUTWORD2, ... etc)
Beispiel #4
0
 public function getSearchString($string)
 {
     $query_string = '';
     $s_words = array();
     $materials = new Model_Material('groups');
     //подключаем библиотеки
     require_once DOCROOT . 'application/classes/PHPMorphy/src/common.php';
     $dir = DOCROOT . 'application/classes/PHPMorphy/dicts/';
     $lang = 'ru_RU';
     $opts = array('storage' => PHPMORPHY_STORAGE_FILE);
     try {
         $morphy = new phpMorphy($dir, $lang, $opts);
     } catch (phpMorphy_Exception $e) {
         die('Error occured while creating phpMorphy instance: ' . $e->getMessage());
     }
     $request = null;
     $errors = null;
     if (!empty($string)) {
         // Очищаем от html-тегов и прочего
         $search = trim(addslashes(strip_tags($string)));
         $request = $search;
     }
     if (!empty($search)) {
         // Обрабатываем данные как и в прошлом контроллере
         if (mb_strlen($search, "UTF-8") > 1) {
             preg_match_all('/([a-zа-яё0-9]+)/ui', mb_strtoupper($search, "UTF-8"), $search_words);
             $words = $morphy->getPseudoRoot($search_words[1]);
             foreach ($words as $k => $w) {
                 if (!$w) {
                     $w[0] = $k;
                 }
                 if (mb_strlen($w[0], "UTF-8") >= 2) {
                     $s_words[] = $w[0];
                 }
                 //                    else
                 //                    {
                 //                       // $s_words[] = $w[0];
                 //                    }
             }
         }
         return $s_words;
     }
 }
Beispiel #5
0
 protected function getMorphy($txt = null)
 {
     require_once $_SERVER['DOCUMENT_ROOT'] . '/plugins/phpmorphy/src/common.php';
     $opts = array('storage' => PHPMORPHY_STORAGE_FILE, 'with_gramtab' => false, 'predict_by_suffix' => true, 'predict_by_db' => true);
     // Path to directory where dictionaries located
     $dir = $_SERVER['DOCUMENT_ROOT'] . '/plugins/phpmorphy/dicts/';
     // Create descriptor for dictionary located in $dir directory with russian language
     $dict_bundle = new phpMorphy_FilesBundle($dir, 'rus');
     // Create phpMorphy instance
     try {
         $morphy = new phpMorphy($dict_bundle, $opts);
     } catch (phpMorphy_Exception $e) {
         die('Error occured while creating phpMorphy instance: ' . $e->getMessage());
     }
     $search = $_GET['q'];
     $search = mysql_real_escape_string($search);
     $search = mb_substr($search, 0, 30, 'UTF-8');
     $search = explode(" ", $search);
     $q = '';
     for ($i = 0; $i < count($search); $i++) {
         $ass = $search[$i];
         $ass = mb_strtoupper($ass, 'utf-8');
         $pseudo_root = $morphy->getPseudoRoot($ass);
         if (false === $pseudo_root) {
             $rep = $ass;
         } else {
             $rep = $pseudo_root[0];
         }
         if ($txt == "text") {
             $q = $q . "A.text LIKE '%" . functions::q($rep) . "%' and ";
         } else {
             $q = $q . "A.title LIKE '%" . functions::q($rep) . "%' and ";
         }
     }
     $searchwords = substr($q, 0, strlen($q) - 4);
     return $searchwords;
 }