Example #1
0
 /** Creates the table with Russian vocabulary from page_title and related words.
  * pw_lemma_ru.id=page.id if word is exist in Wiktionary or next id 
  */
 public static function createVocabularyRelatedTables()
 {
     $link_db = Piwidict::getDatabaseConnection();
     $lang_id = (int) TLang::getIDByLangCode(PWLemma::getLangCode());
     $l_table = PWLemma::getTableName();
     $rw_table = PWRelatedWords::getTableName();
     $query = "DROP TABLE IF EXISTS `{$l_table}`";
     $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
     $query = "CREATE TABLE `{$l_table}`(" . "`id` int(10) unsigned NOT NULL AUTO_INCREMENT," . "`lemma` varchar(255) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL," . "`origin` tinyint(1) default 0," . "`frequency` int default 0," . "`meaning_id` int default 0," . "PRIMARY KEY (`id`), UNIQUE(`lemma`), KEY `origin` (`origin`))";
     $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
     // writing words from page table
     $query = "SELECT DISTINCT page.id, trim(page_title) as page_title FROM page, lang_pos WHERE lang_pos.page_id=page.id and lang_id={$lang_id} ORDER BY page_id";
     $res_page = $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
     $tmp = array();
     while ($row = $res_page->fetch_object()) {
         if (sizeof($tmp) < 27000) {
             $tmp[] = "(" . $row->id . ", '" . str_replace("'", "\\'", $row->page_title) . "',0,0,0)";
         } else {
             $link_db->query_e("INSERT INTO `{$l_table}` VALUES " . join(', ', $tmp), "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
             $tmp = array();
         }
     }
     if (sizeof($tmp) > 1 && sizeof($tmp) < 27000) {
         $link_db->query_e("INSERT INTO `{$l_table}` VALUES " . join(', ', $tmp), "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
     }
     $query = "DROP TABLE IF EXISTS `{$rw_table}`";
     $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
     $query = "CREATE TABLE `{$rw_table}`(" . "`lemma_id1` int(10) unsigned NOT NULL," . "`lemma_id2` int(10) unsigned NOT NULL," . "`weight` decimal(8,6) unsigned NOT NULL," . "PRIMARY KEY (`lemma_id1`,`lemma_id2`))";
     $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
     // writing related words
     $tmp = array();
     $query = "SELECT DISTINCT page_id FROM lang_pos WHERE lang_id={$lang_id} ORDER BY page_id";
     $res_page = $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
     while ($row_page = $res_page->fetch_object()) {
         $related_words = PWSemanticDistance::getRelatedWords($row_page->page_id);
         foreach ($related_words as $word => $coef) {
             $word_s = str_replace("'", "\\'", $word);
             $res_page_exists = $link_db->query_e("SELECT id FROM {$l_table} where lemma LIKE '{$word_s}'", "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
             if ($link_db->query_count($res_page_exists) == 0) {
                 $link_db->query_e("INSERT INTO `{$l_table}` (`lemma`,`origin`,`frequency`) VALUES ('{$word_s}',1,0)", "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
                 $word_id = $link_db->insert_id;
             } else {
                 $row_page_exists = $res_page_exists->fetch_object();
                 $word_id = $row_page_exists->id;
             }
             if (sizeof($tmp) < 27000) {
                 $tmp[] = "('" . $row_page->page_id . "', '{$word_id}', '{$coef}')";
             } else {
                 $link_db->query_e("INSERT INTO `{$rw_table}` VALUES " . join(', ', $tmp), "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
                 $tmp = array();
             }
         }
     }
     if (sizeof($tmp) > 1 && sizeof($tmp) < 27000) {
         $link_db->query_e("INSERT INTO `{$rw_table}` VALUES " . join(', ', $tmp), "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
     }
     //    PWRelatedWords::addReverseRelations();
     print "<p>The table <b>{$l_table}</b> and <b>{$rw_table}</b> are created</p>";
 }
Example #2
0
</form>
<?php 
if (isset($word)) {
    // set some options
    $opts = array('storage' => PHPMORPHY_STORAGE_FILE, 'predict_by_suffix' => true, 'predict_by_db' => true, 'graminfo_as_text' => true);
    // Path to directory where dictionaries located
    $dir = SITE_ROOT . 'phpmorphy/dicts';
    $lang = 'ru_RU';
    // Create phpMorphy instance
    try {
        $morphy = new phpMorphy($dir, $lang, $opts);
    } catch (phpMorphy_Exception $e) {
        die('Error occured while creating phpMorphy instance: ' . PHP_EOL . $e);
    }
    try {
        $words = PWSemanticDistance::meaningsToLemmas($word);
        $lemmas = array();
        if (sizeof($words)) {
            $words = array_count_values($words);
            arsort($words);
            foreach ($words as $word => $count) {
                $lemma = PWLemma::getPhpMorphyLemma($word, $morphy);
                if (isset($lemmas[$lemma])) {
                    $lemmas[$lemma] += $count;
                } else {
                    $lemmas[$lemma] = $count;
                }
            }
            print "<table style='border: 1px solid #000; cellspacing:0; padding: 5px;'>\n";
            foreach ($lemmas as $lemma => $count) {
                print "<tr><td>{$lemma}</td><td>{$count}</td></tr>\n";
Example #3
0
<?php 
//$LINK_DB -> close();
//$LINK_DB = new DB($config['hostname'], $config['admin_login'], $config['admin_password'], $config['dbname']);
$word_arr = file("related_words_in.txt", FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
$fh = fopen("related_words_out.txt", 'w');
foreach ($word_arr as $words) {
    list($word1, $word2) = preg_split("/\\s+/", $words);
    //        list($word1,$word2) = preg_split("/\s+/",$word_arr[0]);
    //print "<P>$word1,$word2";
    //break;
    $start = PWLemma::getIDByLemma($word1);
    $finish = PWLemma::getIDByLemma($word2);
    $word1_url = TPage::getURL($word1);
    $word2_url = TPage::getURL($word2);
    if ($start && $finish) {
        list($dist_len, $path) = PWSemanticDistance::DijkstraAlgorithmByArray($start, $finish);
        if ($path == NULL) {
            print "<p>The words '{$word1_url}' and '{$word2_url}' are not related</p>";
        } else {
            print "<p>" . (int) (sizeof($path) - 1) . " step(s), the length of distance is {$dist_len}</p>";
            print TPage::getURL(PWLemma::getLemmaByID($path[0]));
            for ($i = 1; $i < sizeof($path); $i++) {
                print " -> " . TPage::getURL(PWLemma::getLemmaByID($path[$i]));
            }
        }
    } else {
        $dist_len = 0;
        if (!$start && !$finish) {
            print "<p>The words '{$word1_url}' and '{$word2_url}' have been not found</p>";
        } elseif (!$start) {
            print "<p>The word '{$word1_url}' has been not found</p>";