/** Counts number of semantic relations filtered by language code and type of semantic relation. * @return int */ public static function countRelations($lang_code, $relation_type_name) { $link_db = Piwidict::getDatabaseConnection(); $lang_id = TLang::getIDByLangCode($lang_code); $relation_type_id = TRelationType::getIDByName($relation_type_name); $query = "SELECT meaning_id from relation, lang_pos, meaning where lang_pos.id=meaning.lang_pos_id and meaning.id=relation.meaning_id " . "and relation_type_id=" . (int) $relation_type_id . " and lang_pos.lang_id=" . (int) $lang_id; $result = $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); return $link_db->query_count($result); }
// input file structure: // word | RNC (Russian National Corpus) number of occurences| GBN (Google Books Ngram) the same require '../../../vendor/autoload.php'; use piwidict\Piwidict; //use piwidict\sql\{TLang, TPage, TPOS, TRelationType}; //use piwidict\widget\WForm; require '../config_examples.php'; require '../config_password.php'; include LIB_DIR . "header.php"; // $pw = new Piwidict(); Piwidict::setDatabaseConnection($config['hostname'], $config['user_login'], $config['user_password'], $config['dbname']); $link_db = Piwidict::getDatabaseConnection(); $wikt_lang = "ru"; // Russian language is the main language in ruwikt (Russian Wiktionary) Piwidict::setWiktLang($wikt_lang); $lang_id = TLang::getIDByLangCode("ru"); $search_words = file('ru.Wikt_uniq-lemas_with-freq.txt'); $RNC_num = $GBN_num = array(); for ($i = 0; $i < sizeof($search_words); $i++) { $word = trim($search_words[$i]); $word_stats = preg_split("/\\|/", $word); $search_words[$i] = $word_stats[0]; $RNC_num[$word_stats[0]] = $word_stats[1]; $GBN_num[$word_stats[0]] = $word_stats[2]; } $unfound_words = $search_words = array_flip($search_words); /* ksort($search_words); print "<PRE>"; print_r($search_words); */
/** Counts frequency of occurance of lemmas in meanings and writes to field `pw_lemma_LANG_CODE.frequency`, * if this lemma does not exist in table, that it added there with origin=2 and meaning_id where it has be found. */ public static function count_frequency_lemma_in_meaning() { $link_db = Piwidict::getDatabaseConnection(); // set some options $opts = array('storage' => PHPMORPHY_STORAGE_FILE, 'predict_by_suffix' => true, 'predict_by_db' => true, 'graminfo_as_text' => true); // Path to directory where dictionaries located $dir = SITE_ROOT . 'phpmorphy/dicts'; $lang = 'ru_RU'; // Create phpMorphy instance try { $morphy = new phpMorphy($dir, $lang, $opts); } catch (phpMorphy_Exception $e) { die('Error occured while creating phpMorphy instance: ' . PHP_EOL . $e); } try { $lang_id = (int) TLang::getIDByLangCode(PWLemma::getLangCode()); $l_table = PWLemma::getTableName(); $query = "SELECT meaning.id as meaning_id, wiki_text.text as text FROM wiki_text, meaning, lang_pos WHERE " . "wiki_text.id=meaning.wiki_text_id and meaning.lang_pos_id=lang_pos.id and lang_pos.lang_id={$lang_id}"; $res_meaning = $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); while ($row_meaning = $res_meaning->fetch_object()) { //print "<p>".$row_meaning->text; $words = preg_split('/((^\\p{P}+)|(\\p{P}*\\s+\\p{P}*)|(\\p{P}+$))/u', $row_meaning->text, -1, PREG_SPLIT_NO_EMPTY); //print_r($words); $words = array_count_values($words); foreach ($words as $word => $count) { $lemma = PWLemma::getPhpMorphyLemma($word, $morphy); if (!$lemma) { continue; } $lemma = PWString::restoreCase($lemma, $word); $lemma = str_replace("'", "\\'", $lemma); $cond = "WHERE lemma like '{$lemma}'"; $res_lemma = $link_db->query_e("SELECT id,frequency FROM {$l_table} {$cond}", "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); if ($link_db->query_count($res_lemma) == 0) { $query = "INSERT INTO `{$l_table}` (`lemma`,`origin`,`frequency`,`meaning_id`) VALUES ('{$lemma}',2,{$count}," . $row_meaning->meaning_id . ")"; //print "<p>$query"; $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); } else { $row_lemma = $res_lemma->fetch_object(); $query = "UPDATE `{$l_table}` SET `frequency`=" . (int) ($count + $row_lemma->frequency) . " {$cond}"; //print "<p>$query"; $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); } } } } catch (phpMorphy_Exception $e) { die('Error occured while text processing: ' . $e->getMessage()); } }
<?php /* List of Belarusian words with empty definition */ require '../../../vendor/autoload.php'; use piwidict\Piwidict; //use piwidict\sql\{TLang, TPage, TPOS, TRelationType}; //use piwidict\widget\WForm; require '../config_examples.php'; require '../config_password.php'; include LIB_DIR . "header.php"; Piwidict::setDatabaseConnection($config['hostname'], $config['user_login'], $config['user_password'], $config['dbname']); $link_db = Piwidict::getDatabaseConnection(); $wikt_lang = "ru"; // Russian language is the main language in ruwikt (Russian Wiktionary) Piwidict::setWiktLang($wikt_lang); $lang_id = TLang::getIDByLangCode("be"); $fh = fopen('be.wiktionary.with.empty.definition.txt', 'w'); $query = "SELECT page_title FROM lang_pos, page WHERE lang_pos.page_id = page.id AND lang_id={$lang_id} order by page_title"; $result = $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); while ($row = $result->fetch_object()) { $is_empty = 1; $query = "SELECT wiki_text_id FROM lang_pos, page, meaning WHERE lang_pos.page_id = page.id AND lang_id={$lang_id} and page.page_title='" . PWString::escapeQuotes($row->page_title) . "' and lang_pos.id=meaning.lang_pos_id"; $result_meaning = $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); if ($link_db->query_count($result_meaning)) { while ($is_empty && ($row_meaning = $result_meaning->fetch_object())) { if ($row_meaning->wiki_text_id != NULL) { $is_empty = 0; } } } if ($is_empty) {
$LINK_DB = connectMySQL(); extract($_REQUEST, EXTR_PREFIX_ALL | EXTR_REFS, ''); mb_internal_encoding("UTF-8"); $this_script_URL = "list_hypo.php"; include "../lib/header.php"; ?> <h3>Generation of list of hyponyms and hypernyms</h3> <?php print "Database version: {$NAME_DB}<BR>"; //$labels_all = TLabel::getAllLabels(); $lang_all = TLang::getAllLang(); $relation_type_all = TRelationType::getAllRelations(); $pos_all = TPOS::getAllPOS(); $lang_id_ru = TLang::getIDByLangCode($lang_all, "ru"); print "lang_id_ru = {$lang_id_ru}<BR>"; $pos_id_noun = TPOS::getIDByName($pos_all, "noun"); $pos_id_noun_class = TPOS::getIDByName($pos_all, "noun class"); print "ID of part of speech \"noun\" = {$pos_id_noun}<BR>"; print "ID of part of speech \"noun class\" = {$pos_id_noun_class}<BR>"; $relation_type_id_hyponyms = TRelationType::getIDByName($relation_type_all, "hyponyms"); $relation_type_id_hypernyms = TRelationType::getIDByName($relation_type_all, "hypernyms"); print "ID of relation type \"hyponyms\" = {$relation_type_id_hyponyms}<BR>"; print "ID of relation type \"hypernyms\" = {$relation_type_id_hypernyms}<BR>"; print "<BR>"; $query_lang_pos = "SELECT id FROM lang_pos"; $result_lang_pos = mysqli_query($LINK_DB, $query_lang_pos) or die("Query failed (line 39) in list_hypo.php: " . mysqli_error() . ". Query: " . $query); $counter = 0; while ($row = mysqli_fetch_array($result_lang_pos)) { $lang_pos_id = $row['id'];
require '../../../vendor/autoload.php'; use piwidict\Piwidict; //use piwidict\sql\{TLang, TPage, TPOS, TRelationType}; //use piwidict\widget\WForm; require '../config_examples.php'; require '../config_password.php'; include LIB_DIR . "header.php"; // $pw = new Piwidict(); Piwidict::setDatabaseConnection($config['hostname'], $config['user_login'], $config['user_password'], $config['dbname']); $link_db = Piwidict::getDatabaseConnection(); $wikt_lang = "ru"; // Russian language is the main language in ruwikt (Russian Wiktionary) Piwidict::setWiktLang($wikt_lang); $php_self = "antonym_synsets.php"; $lang_name = "ru"; $lang_id = TLang::getIDByLangCode($lang_name); $ant_id = TRelationType::getIDByName("antonyms"); $out_file_name = SITE_ROOT . preg_replace("/^\\/src(\\/.+)\\.php\$/", "data\$1", $php_self); $pos_name = "noun"; //$pos_name = "verb"; //$pos_name = "adjective"; //$pos_name = "adverb"; $pos_id = TPOS::getIDByName($pos_name); //$fh = gzopen($out_file_name.'.txt.gz','wb9'); $fh = gzopen($out_file_name . '_' . $lang_name . '_' . $pos_name . '.txt.gz', 'wb9'); gzwrite($fh, '## Database version: ' . NAME_DB . "\n\n"); $query = "SELECT page_title as first_word, meaning.id as meaning_id\n FROM lang_pos, meaning, page \n WHERE lang_pos.id = meaning.lang_pos_id \n AND page.id = lang_pos.page_id\n AND page_title NOT LIKE '% %'\n AND lang_id = {$lang_id} " . " AND pos_id={$pos_id} " . "ORDER BY page_title"; $result_meaning = $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); while ($row = $result_meaning->fetch_object()) { $query = "SELECT wiki_text.text as relation_word\n FROM wiki_text, relation\n WHERE relation.wiki_text_id=wiki_text.id \n AND wiki_text.text NOT LIKE '% %'\n AND relation_type_id = {$ant_id}\n AND relation.meaning_id = " . $row->meaning_id . " ORDER BY wiki_text.text"; $result_relation = $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
/** Gets IDs by page_id and lang_code. * @return array */ public static function getIDByPageAndLang($page_id, $lang_code) { global $LINK_DB; $lang_id = TLang::getIDByLangCode($lang_code); //print "<P>$lang_id</p>"; $langPOS_arr = array(); $query = "SELECT id FROM lang_pos where page_id=" . (int) $page_id . " and lang_id=" . (int) $lang_id; //print $query; $result = $LINK_DB->query_e($query, "Query failed in " . __METHOD__ . " in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); if ($LINK_DB->query_count($result) == 0) { return $langPOS_arr; } while ($row = $result->fetch_object()) { $langPOS_arr[] = $row->id; } return $langPOS_arr; }