/** Selects row from the table 'lang_pos' by ID.<br><br> * SELECT page_id,lang_id,pos_id,etymology_n,lemma FROM lang_pos WHERE id=8; * @return null if data is absent. */ public static function getByID($lang_pos_id, $lang_all, $pos_all) { global $LINK_DB; $lang_pos = NULL; $query = "SELECT page_id,lang_id,pos_id,etymology_n,lemma FROM lang_pos WHERE id={$lang_pos_id}"; $result = mysqli_query($LINK_DB, $query) or die("Query failed (line 31) in TLangPOS::getByID: " . mysqli_error() . ". Query: " . $query); while ($row = mysqli_fetch_array($result)) { $page_id = $row['page_id']; $lang_id = $row['lang_id']; $pos_id = $row['pos_id']; $etymology_n = $row['etymology_n']; $lemma = $row['lemma']; $lang_pos['page'] = TPage::getByID($page_id); //print "TLangPOS::getByID lang_id = $lang_id<BR>"; //print "TLangPOS::getByID pos_id = $pos_id<BR>"; $lang_pos['lang'] = TLang::getByID($lang_id, $lang_all); //print "TLangPOS::getByID TLang lang = "; print_r ($lang_pos ['lang']); print "<BR>"; $lang_pos['pos'] = TPOS::getByID($pos_id, $pos_all); //print "TLangPOS::getByID TPOS pos = "; print_r($lang_pos ['pos']); print "<BR>"; $lang_pos['etymology_n'] = $etymology_n; $lang_pos['lemma'] = $lemma; $lang = $lang_pos['lang']; $pos = $lang_pos['pos']; if (null == $lang || null == $pos) { $lang_pos = NULL; } } return (object) $lang_pos; }
/** Counts number of semantic relations filtered by language code and type of semantic relation. * @return int */ public static function countRelations($lang_code, $relation_type_name) { $link_db = Piwidict::getDatabaseConnection(); $lang_id = TLang::getIDByLangCode($lang_code); $relation_type_id = TRelationType::getIDByName($relation_type_name); $query = "SELECT meaning_id from relation, lang_pos, meaning where lang_pos.id=meaning.lang_pos_id and meaning.id=relation.meaning_id " . "and relation_type_id=" . (int) $relation_type_id . " and lang_pos.lang_id=" . (int) $lang_id; $result = $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); return $link_db->query_count($result); }
/** Gets TTranslationEntry object by property $property_name with value $property_value. * @return TTranslationEntry or NULL in case of error */ public static function getTranslationEntry($property_name, $property_value, $translation_obj = NULL) { global $LINK_DB; $query = "SELECT * FROM translation_entry WHERE `{$property_name}`='{$property_value}' order by id"; $result = $LINK_DB->query_e($query, "Query failed in " . __METHOD__ . " in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); if ($LINK_DB->query_count($result) == 0) { return NULL; } $translationEntry_arr = array(); while ($row = $result->fetch_object()) { if ($translation_obj == NULL) { $translation_obj = TTranslation::getByID($row->translation_id); } $translationEntry_arr[] = new TTranslationEntry($row->id, $translation_obj, TLang::getByID($row->lang_id), TWikiText::getByID($row->wiki_text_id)); } return $translationEntry_arr; }
// input file structure: // word | RNC (Russian National Corpus) number of occurences| GBN (Google Books Ngram) the same require '../../../vendor/autoload.php'; use piwidict\Piwidict; //use piwidict\sql\{TLang, TPage, TPOS, TRelationType}; //use piwidict\widget\WForm; require '../config_examples.php'; require '../config_password.php'; include LIB_DIR . "header.php"; // $pw = new Piwidict(); Piwidict::setDatabaseConnection($config['hostname'], $config['user_login'], $config['user_password'], $config['dbname']); $link_db = Piwidict::getDatabaseConnection(); $wikt_lang = "ru"; // Russian language is the main language in ruwikt (Russian Wiktionary) Piwidict::setWiktLang($wikt_lang); $lang_id = TLang::getIDByLangCode("ru"); $search_words = file('ru.Wikt_uniq-lemas_with-freq.txt'); $RNC_num = $GBN_num = array(); for ($i = 0; $i < sizeof($search_words); $i++) { $word = trim($search_words[$i]); $word_stats = preg_split("/\\|/", $word); $search_words[$i] = $word_stats[0]; $RNC_num[$word_stats[0]] = $word_stats[1]; $GBN_num[$word_stats[0]] = $word_stats[2]; } $unfound_words = $search_words = array_flip($search_words); /* ksort($search_words); print "<PRE>"; print_r($search_words); */
include "../../../config.php"; include LIB_DIR . "header.php"; $php_self = "dict_info.php"; if (!isset($lang_id)) { $lang_id = TLang::getIDByLangCode("ru"); } ?> <h2>Dictionary info</h2> Database version: <?php echo NAME_DB; ?> <form action="<?php echo $php_self; ?> " method="GET"> <p>Language: <?php echo TLang::getDropDownList($lang_id, "lang_id", 1); ?> </p> <p><input type="submit" name="view_list" value="search"></p> </form> <?php if (isset($view_list) && $view_list && $lang_id) { $lang_name = TLang::getNameByID($lang_id); $lang_code = TLang::getCodeByID($lang_id); $relation_type_name = 'synonyms'; $pos_name = 'phrase'; print "<p>Total number of {$lang_name} entries : <b>" . PWStats::countEntries($lang_code) . "</b>, and <b>" . PWStats::countLangPOS($lang_code, $pos_name) . "</b> of them are phrases</p>" . "<p>Total number of {$lang_name} words with definitions: <b>" . PWStats::countLangPOSWithDefinitions($lang_code) . "</b></p>" . "<p>Total number of {$relation_type_name} pairs: <b>" . PWStats::countRelations($lang_code, $relation_type_name) . "</b></p>"; } include LIB_DIR . "footer.php";
/** Counts frequency of occurance of lemmas in meanings and writes to field `pw_lemma_LANG_CODE.frequency`, * if this lemma does not exist in table, that it added there with origin=2 and meaning_id where it has be found. */ public static function count_frequency_lemma_in_meaning() { $link_db = Piwidict::getDatabaseConnection(); // set some options $opts = array('storage' => PHPMORPHY_STORAGE_FILE, 'predict_by_suffix' => true, 'predict_by_db' => true, 'graminfo_as_text' => true); // Path to directory where dictionaries located $dir = SITE_ROOT . 'phpmorphy/dicts'; $lang = 'ru_RU'; // Create phpMorphy instance try { $morphy = new phpMorphy($dir, $lang, $opts); } catch (phpMorphy_Exception $e) { die('Error occured while creating phpMorphy instance: ' . PHP_EOL . $e); } try { $lang_id = (int) TLang::getIDByLangCode(PWLemma::getLangCode()); $l_table = PWLemma::getTableName(); $query = "SELECT meaning.id as meaning_id, wiki_text.text as text FROM wiki_text, meaning, lang_pos WHERE " . "wiki_text.id=meaning.wiki_text_id and meaning.lang_pos_id=lang_pos.id and lang_pos.lang_id={$lang_id}"; $res_meaning = $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); while ($row_meaning = $res_meaning->fetch_object()) { //print "<p>".$row_meaning->text; $words = preg_split('/((^\\p{P}+)|(\\p{P}*\\s+\\p{P}*)|(\\p{P}+$))/u', $row_meaning->text, -1, PREG_SPLIT_NO_EMPTY); //print_r($words); $words = array_count_values($words); foreach ($words as $word => $count) { $lemma = PWLemma::getPhpMorphyLemma($word, $morphy); if (!$lemma) { continue; } $lemma = PWString::restoreCase($lemma, $word); $lemma = str_replace("'", "\\'", $lemma); $cond = "WHERE lemma like '{$lemma}'"; $res_lemma = $link_db->query_e("SELECT id,frequency FROM {$l_table} {$cond}", "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); if ($link_db->query_count($res_lemma) == 0) { $query = "INSERT INTO `{$l_table}` (`lemma`,`origin`,`frequency`,`meaning_id`) VALUES ('{$lemma}',2,{$count}," . $row_meaning->meaning_id . ")"; //print "<p>$query"; $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); } else { $row_lemma = $res_lemma->fetch_object(); $query = "UPDATE `{$l_table}` SET `frequency`=" . (int) ($count + $row_lemma->frequency) . " {$cond}"; //print "<p>$query"; $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); } } } } catch (phpMorphy_Exception $e) { die('Error occured while text processing: ' . $e->getMessage()); } }
<?php /* List of Belarusian words with empty definition */ require '../../../vendor/autoload.php'; use piwidict\Piwidict; //use piwidict\sql\{TLang, TPage, TPOS, TRelationType}; //use piwidict\widget\WForm; require '../config_examples.php'; require '../config_password.php'; include LIB_DIR . "header.php"; Piwidict::setDatabaseConnection($config['hostname'], $config['user_login'], $config['user_password'], $config['dbname']); $link_db = Piwidict::getDatabaseConnection(); $wikt_lang = "ru"; // Russian language is the main language in ruwikt (Russian Wiktionary) Piwidict::setWiktLang($wikt_lang); $lang_id = TLang::getIDByLangCode("be"); $fh = fopen('be.wiktionary.with.empty.definition.txt', 'w'); $query = "SELECT page_title FROM lang_pos, page WHERE lang_pos.page_id = page.id AND lang_id={$lang_id} order by page_title"; $result = $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); while ($row = $result->fetch_object()) { $is_empty = 1; $query = "SELECT wiki_text_id FROM lang_pos, page, meaning WHERE lang_pos.page_id = page.id AND lang_id={$lang_id} and page.page_title='" . PWString::escapeQuotes($row->page_title) . "' and lang_pos.id=meaning.lang_pos_id"; $result_meaning = $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); if ($link_db->query_count($result_meaning)) { while ($is_empty && ($row_meaning = $result_meaning->fetch_object())) { if ($row_meaning->wiki_text_id != NULL) { $is_empty = 0; } } } if ($is_empty) {
$limit = 100; ?> <h2>Generation of list of relations (LIMIT <?php echo $limit; ?> )</h2> Database version: <?php echo NAME_DB; ?> <form action="<?php echo $php_self; ?> " method="GET"> <p>Language: <?php echo TLang::getDropDownList($lang_id, "lang_id", ''); ?> </p> <p>Part of speech: <?php echo TPOS::getDropDownList($pos_id, "pos_id", ''); ?> </p> <p>Relation type: <?php echo TRelationType::getDropDownList($relation_type_id, "relation_type_id", ''); ?> </p> <p>Word: <input type="text" name="page_title" value="<?php echo $page_title; ?> "></p> <p><input type="submit" name="view_list" value="search"></p>
</p> <p>Part of speech: <?php echo TPOS::getDropDownList($pos, "pos", ''); ?> </p> <p><input type="submit" name="view_dict" value="view"></p> </form> <?php if (isset($view_dict) && $view_dict) { $query = "SELECT pw_reverse_dict.page_id, reverse_page_title FROM pw_reverse_dict"; if ($pos > 0 || $lang > 0) { $query .= ", lang_pos WHERE lang_pos.page_id=pw_reverse_dict.page_id"; if (TPOS::isExist($pos)) { $query .= " and pos_id=" . (int) $pos; } if (TLang::isExist($lang)) { $query .= " and lang_id=" . (int) $lang; } $query .= " group by pw_reverse_dict.page_id"; } $query .= " order by reverse_page_title LIMIT 0,{$limit}"; // $query = "SELECT id, page_title FROM page order by page_title LIMIT 0,100"; //print $query; $res_page = $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); print "<div style=\"width:50%; text-align:right\">"; $count = 0; while ($row = $res_page->fetch_object()) { // meaning.wiki_text_id>0 === words with non-empty definitions // relation.meaning_id=meaning.id === words with semantic relations /* $query = "SELECT count(*) as count FROM lang_pos, meaning, relation WHERE lang_pos.page_id='".$row->page_id."' and lang_pos.id=meaning.lang_pos_id and meaning.wiki_text_id>0 ".
$LINK_DB = connectMySQL(); extract($_REQUEST, EXTR_PREFIX_ALL | EXTR_REFS, ''); mb_internal_encoding("UTF-8"); $this_script_URL = "list_hypo.php"; include "../lib/header.php"; ?> <h3>Generation of list of hyponyms and hypernyms</h3> <?php print "Database version: {$NAME_DB}<BR>"; //$labels_all = TLabel::getAllLabels(); $lang_all = TLang::getAllLang(); $relation_type_all = TRelationType::getAllRelations(); $pos_all = TPOS::getAllPOS(); $lang_id_ru = TLang::getIDByLangCode($lang_all, "ru"); print "lang_id_ru = {$lang_id_ru}<BR>"; $pos_id_noun = TPOS::getIDByName($pos_all, "noun"); $pos_id_noun_class = TPOS::getIDByName($pos_all, "noun class"); print "ID of part of speech \"noun\" = {$pos_id_noun}<BR>"; print "ID of part of speech \"noun class\" = {$pos_id_noun_class}<BR>"; $relation_type_id_hyponyms = TRelationType::getIDByName($relation_type_all, "hyponyms"); $relation_type_id_hypernyms = TRelationType::getIDByName($relation_type_all, "hypernyms"); print "ID of relation type \"hyponyms\" = {$relation_type_id_hyponyms}<BR>"; print "ID of relation type \"hypernyms\" = {$relation_type_id_hypernyms}<BR>"; print "<BR>"; $query_lang_pos = "SELECT id FROM lang_pos"; $result_lang_pos = mysqli_query($LINK_DB, $query_lang_pos) or die("Query failed (line 39) in list_hypo.php: " . mysqli_error() . ". Query: " . $query); $counter = 0; while ($row = mysqli_fetch_array($result_lang_pos)) { $lang_pos_id = $row['id'];
require '../../../vendor/autoload.php'; use piwidict\Piwidict; //use piwidict\sql\{TLang, TPage, TPOS, TRelationType}; //use piwidict\widget\WForm; require '../config_examples.php'; require '../config_password.php'; include LIB_DIR . "header.php"; // $pw = new Piwidict(); Piwidict::setDatabaseConnection($config['hostname'], $config['user_login'], $config['user_password'], $config['dbname']); $link_db = Piwidict::getDatabaseConnection(); $wikt_lang = "ru"; // Russian language is the main language in ruwikt (Russian Wiktionary) Piwidict::setWiktLang($wikt_lang); $php_self = "antonym_synsets.php"; $lang_name = "ru"; $lang_id = TLang::getIDByLangCode($lang_name); $ant_id = TRelationType::getIDByName("antonyms"); $out_file_name = SITE_ROOT . preg_replace("/^\\/src(\\/.+)\\.php\$/", "data\$1", $php_self); $pos_name = "noun"; //$pos_name = "verb"; //$pos_name = "adjective"; //$pos_name = "adverb"; $pos_id = TPOS::getIDByName($pos_name); //$fh = gzopen($out_file_name.'.txt.gz','wb9'); $fh = gzopen($out_file_name . '_' . $lang_name . '_' . $pos_name . '.txt.gz', 'wb9'); gzwrite($fh, '## Database version: ' . NAME_DB . "\n\n"); $query = "SELECT page_title as first_word, meaning.id as meaning_id\n FROM lang_pos, meaning, page \n WHERE lang_pos.id = meaning.lang_pos_id \n AND page.id = lang_pos.page_id\n AND page_title NOT LIKE '% %'\n AND lang_id = {$lang_id} " . " AND pos_id={$pos_id} " . "ORDER BY page_title"; $result_meaning = $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); while ($row = $result_meaning->fetch_object()) { $query = "SELECT wiki_text.text as relation_word\n FROM wiki_text, relation\n WHERE relation.wiki_text_id=wiki_text.id \n AND wiki_text.text NOT LIKE '% %'\n AND relation_type_id = {$ant_id}\n AND relation.meaning_id = " . $row->meaning_id . " ORDER BY wiki_text.text"; $result_relation = $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
/** Gets TLangPOS object by property $property_name with value $property_value. * @return TLangPOS or NULL in case of error */ public static function getLangPOS($property_name, $property_value, $page_obj = NULL) { global $LINK_DB; $query = "SELECT * FROM lang_pos WHERE lang_id is not NULL and pos_id is not NULL and `{$property_name}`='{$property_value}' order by id"; $result = $LINK_DB->query_e($query, "Query failed in " . __METHOD__ . " in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); if ($LINK_DB->query_count($result) == 0) { return NULL; } $lang_pos_arr = array(); while ($row = $result->fetch_object()) { $lang = TLang::getByID($row->lang_id); $pos = TPOS::getByID($row->pos_id); if (NULL == $lang || NULL == $pos) { return NULL; } if ($page_obj == NULL) { $page_obj = TPage::getByID($row->page_id); } $lang_pos = new TLangPOS($row->id, $page_obj, $lang, $pos, $row->etymology_n, $row->lemma); $lang_pos->meaning = TMeaning::getByLangPOS($row->id, $lang_pos); $lang_pos_arr[] = $lang_pos; } return $lang_pos_arr; }