/** Creates the table with Russian vocabulary from page_title and related words. * pw_lemma_ru.id=page.id if word is exist in Wiktionary or next id */ public static function createVocabularyRelatedTables() { $link_db = Piwidict::getDatabaseConnection(); $lang_id = (int) TLang::getIDByLangCode(PWLemma::getLangCode()); $l_table = PWLemma::getTableName(); $rw_table = PWRelatedWords::getTableName(); $query = "DROP TABLE IF EXISTS `{$l_table}`"; $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); $query = "CREATE TABLE `{$l_table}`(" . "`id` int(10) unsigned NOT NULL AUTO_INCREMENT," . "`lemma` varchar(255) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL," . "`origin` tinyint(1) default 0," . "`frequency` int default 0," . "`meaning_id` int default 0," . "PRIMARY KEY (`id`), UNIQUE(`lemma`), KEY `origin` (`origin`))"; $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); // writing words from page table $query = "SELECT DISTINCT page.id, trim(page_title) as page_title FROM page, lang_pos WHERE lang_pos.page_id=page.id and lang_id={$lang_id} ORDER BY page_id"; $res_page = $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); $tmp = array(); while ($row = $res_page->fetch_object()) { if (sizeof($tmp) < 27000) { $tmp[] = "(" . $row->id . ", '" . str_replace("'", "\\'", $row->page_title) . "',0,0,0)"; } else { $link_db->query_e("INSERT INTO `{$l_table}` VALUES " . join(', ', $tmp), "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); $tmp = array(); } } if (sizeof($tmp) > 1 && sizeof($tmp) < 27000) { $link_db->query_e("INSERT INTO `{$l_table}` VALUES " . join(', ', $tmp), "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); } $query = "DROP TABLE IF EXISTS `{$rw_table}`"; $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); $query = "CREATE TABLE `{$rw_table}`(" . "`lemma_id1` int(10) unsigned NOT NULL," . "`lemma_id2` int(10) unsigned NOT NULL," . "`weight` decimal(8,6) unsigned NOT NULL," . "PRIMARY KEY (`lemma_id1`,`lemma_id2`))"; $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); // writing related words $tmp = array(); $query = "SELECT DISTINCT page_id FROM lang_pos WHERE lang_id={$lang_id} ORDER BY page_id"; $res_page = $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); while ($row_page = $res_page->fetch_object()) { $related_words = PWSemanticDistance::getRelatedWords($row_page->page_id); foreach ($related_words as $word => $coef) { $word_s = str_replace("'", "\\'", $word); $res_page_exists = $link_db->query_e("SELECT id FROM {$l_table} where lemma LIKE '{$word_s}'", "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); if ($link_db->query_count($res_page_exists) == 0) { $link_db->query_e("INSERT INTO `{$l_table}` (`lemma`,`origin`,`frequency`) VALUES ('{$word_s}',1,0)", "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); $word_id = $link_db->insert_id; } else { $row_page_exists = $res_page_exists->fetch_object(); $word_id = $row_page_exists->id; } if (sizeof($tmp) < 27000) { $tmp[] = "('" . $row_page->page_id . "', '{$word_id}', '{$coef}')"; } else { $link_db->query_e("INSERT INTO `{$rw_table}` VALUES " . join(', ', $tmp), "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); $tmp = array(); } } } if (sizeof($tmp) > 1 && sizeof($tmp) < 27000) { $link_db->query_e("INSERT INTO `{$rw_table}` VALUES " . join(', ', $tmp), "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); } // PWRelatedWords::addReverseRelations(); print "<p>The table <b>{$l_table}</b> and <b>{$rw_table}</b> are created</p>"; }
</form> <?php if (isset($word)) { // set some options $opts = array('storage' => PHPMORPHY_STORAGE_FILE, 'predict_by_suffix' => true, 'predict_by_db' => true, 'graminfo_as_text' => true); // Path to directory where dictionaries located $dir = SITE_ROOT . 'phpmorphy/dicts'; $lang = 'ru_RU'; // Create phpMorphy instance try { $morphy = new phpMorphy($dir, $lang, $opts); } catch (phpMorphy_Exception $e) { die('Error occured while creating phpMorphy instance: ' . PHP_EOL . $e); } try { $words = PWSemanticDistance::meaningsToLemmas($word); $lemmas = array(); if (sizeof($words)) { $words = array_count_values($words); arsort($words); foreach ($words as $word => $count) { $lemma = PWLemma::getPhpMorphyLemma($word, $morphy); if (isset($lemmas[$lemma])) { $lemmas[$lemma] += $count; } else { $lemmas[$lemma] = $count; } } print "<table style='border: 1px solid #000; cellspacing:0; padding: 5px;'>\n"; foreach ($lemmas as $lemma => $count) { print "<tr><td>{$lemma}</td><td>{$count}</td></tr>\n";
<?php //$LINK_DB -> close(); //$LINK_DB = new DB($config['hostname'], $config['admin_login'], $config['admin_password'], $config['dbname']); $word_arr = file("related_words_in.txt", FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); $fh = fopen("related_words_out.txt", 'w'); foreach ($word_arr as $words) { list($word1, $word2) = preg_split("/\\s+/", $words); // list($word1,$word2) = preg_split("/\s+/",$word_arr[0]); //print "<P>$word1,$word2"; //break; $start = PWLemma::getIDByLemma($word1); $finish = PWLemma::getIDByLemma($word2); $word1_url = TPage::getURL($word1); $word2_url = TPage::getURL($word2); if ($start && $finish) { list($dist_len, $path) = PWSemanticDistance::DijkstraAlgorithmByArray($start, $finish); if ($path == NULL) { print "<p>The words '{$word1_url}' and '{$word2_url}' are not related</p>"; } else { print "<p>" . (int) (sizeof($path) - 1) . " step(s), the length of distance is {$dist_len}</p>"; print TPage::getURL(PWLemma::getLemmaByID($path[0])); for ($i = 1; $i < sizeof($path); $i++) { print " -> " . TPage::getURL(PWLemma::getLemmaByID($path[$i])); } } } else { $dist_len = 0; if (!$start && !$finish) { print "<p>The words '{$word1_url}' and '{$word2_url}' have been not found</p>"; } elseif (!$start) { print "<p>The word '{$word1_url}' has been not found</p>";