public static function getRelatedWords() { global $LINK_DB; $node_table = PWLemma::getTableName(); $edge_table = PWRelatedWords::getTableName(); // Construct DOM elements $xml = new DomDocument('1.0', 'UTF-8'); $xml->formatOutput = true; // Nicely formats output with indentation and extra space $gexf = $xml->createElementNS(null, 'gexf'); // Create new element node with an associated namespace $gexf = $xml->appendChild($gexf); // Assign namespaces for GexF with VIZ $gexf->setAttribute('xmlns:viz', 'http://www.gexf.net/1.1draft/viz'); // Skip if you don't need viz! $gexf->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance'); $gexf->setAttributeNS('http://www.w3.org/2001/XMLSchema-instance', 'schemaLocation', 'http://www.gexf.net/1.2draft http://www.gexf.net/1.2draft/gexf.xsd'); $gexf->setAttribute('version', '1.2'); // Add Meta data $meta = $gexf->appendChild($xml->createElement('meta')); $meta->setAttribute('lastmodifieddate', date('Y-m-d')); $meta->appendChild($xml->createElement('creator', 'PHP GEXF Generator v0.1')); $meta->appendChild($xml->createElement('description', 'Related words')); // Add Graph data! $graph = $gexf->appendChild($xml->createElement('graph')); $nodes = $graph->appendChild($xml->createElement('nodes')); $edges = $graph->appendChild($xml->createElement('edges')); // Add Nodes! $res_node = $LINK_DB->query_e("SELECT * FROM {$node_table} WHERE id in (select lemma_id1 from {$edge_table}) or id in (select lemma_id2 from {$edge_table}) order by id", "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); while ($row_node = $res_node->fetch_object()) { $node = $xml->createElement('node'); $node->setAttribute('id', $row_node->id); $node->setAttribute('label', $row_node->lemma); /* // Set color for node $color = $xml->createElement('viz:color'); $color->setAttribute('r', '1'); $color->setAttribute('g', '1'); $color->setAttribute('b', '1'); $node->appendChild($color); // Set position for node $position = $xml->createElement('viz:position'); $position->setAttribute('x', '1'); $position->setAttribute('y', '1'); $position->setAttribute('z', '1'); $node->appendChild($position); // Set size for node $size = $xml->createElement('viz:size'); $size->setAttribute('value', '1'); $node->appendChild($size); // Set shape for node $shape = $xml->createElement('viz:shape'); $shape->setAttribute('value', 'disc'); $node->appendChild($shape); */ $nodes->appendChild($node); } // Add Edges $res_relw = $LINK_DB->query_e("SELECT * FROM " . PWRelatedWords::getTableName() . " order by lemma_id1", "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); while ($row_relw = $res_relw->fetch_object()) { $edge = $xml->createElement('edge'); $edge->setAttribute('source', $row_relw->lemma_id1); $edge->setAttribute('target', $row_relw->lemma_id2); $edge->setAttribute('weight', $row_relw->weight); $edges->appendChild($edge); } return $xml->saveXML(); }
$dir = SITE_ROOT . 'phpmorphy/dicts'; $lang = 'ru_RU'; // Create phpMorphy instance try { $morphy = new phpMorphy($dir, $lang, $opts); } catch (phpMorphy_Exception $e) { die('Error occured while creating phpMorphy instance: ' . PHP_EOL . $e); } try { $words = PWSemanticDistance::meaningsToLemmas($word); $lemmas = array(); if (sizeof($words)) { $words = array_count_values($words); arsort($words); foreach ($words as $word => $count) { $lemma = PWLemma::getPhpMorphyLemma($word, $morphy); if (isset($lemmas[$lemma])) { $lemmas[$lemma] += $count; } else { $lemmas[$lemma] = $count; } } print "<table style='border: 1px solid #000; cellspacing:0; padding: 5px;'>\n"; foreach ($lemmas as $lemma => $count) { print "<tr><td>{$lemma}</td><td>{$count}</td></tr>\n"; } print "</table>\n"; } } catch (phpMorphy_Exception $e) { die('Error occured while text processing: ' . $e->getMessage()); }
/** Counts frequency of occurance of lemmas in meanings and writes to field `pw_lemma_LANG_CODE.frequency`, * if this lemma does not exist in table, that it added there with origin=2 and meaning_id where it has be found. */ public static function count_frequency_lemma_in_meaning() { $link_db = Piwidict::getDatabaseConnection(); // set some options $opts = array('storage' => PHPMORPHY_STORAGE_FILE, 'predict_by_suffix' => true, 'predict_by_db' => true, 'graminfo_as_text' => true); // Path to directory where dictionaries located $dir = SITE_ROOT . 'phpmorphy/dicts'; $lang = 'ru_RU'; // Create phpMorphy instance try { $morphy = new phpMorphy($dir, $lang, $opts); } catch (phpMorphy_Exception $e) { die('Error occured while creating phpMorphy instance: ' . PHP_EOL . $e); } try { $lang_id = (int) TLang::getIDByLangCode(PWLemma::getLangCode()); $l_table = PWLemma::getTableName(); $query = "SELECT meaning.id as meaning_id, wiki_text.text as text FROM wiki_text, meaning, lang_pos WHERE " . "wiki_text.id=meaning.wiki_text_id and meaning.lang_pos_id=lang_pos.id and lang_pos.lang_id={$lang_id}"; $res_meaning = $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); while ($row_meaning = $res_meaning->fetch_object()) { //print "<p>".$row_meaning->text; $words = preg_split('/((^\\p{P}+)|(\\p{P}*\\s+\\p{P}*)|(\\p{P}+$))/u', $row_meaning->text, -1, PREG_SPLIT_NO_EMPTY); //print_r($words); $words = array_count_values($words); foreach ($words as $word => $count) { $lemma = PWLemma::getPhpMorphyLemma($word, $morphy); if (!$lemma) { continue; } $lemma = PWString::restoreCase($lemma, $word); $lemma = str_replace("'", "\\'", $lemma); $cond = "WHERE lemma like '{$lemma}'"; $res_lemma = $link_db->query_e("SELECT id,frequency FROM {$l_table} {$cond}", "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); if ($link_db->query_count($res_lemma) == 0) { $query = "INSERT INTO `{$l_table}` (`lemma`,`origin`,`frequency`,`meaning_id`) VALUES ('{$lemma}',2,{$count}," . $row_meaning->meaning_id . ")"; //print "<p>$query"; $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); } else { $row_lemma = $res_lemma->fetch_object(); $query = "UPDATE `{$l_table}` SET `frequency`=" . (int) ($count + $row_lemma->frequency) . " {$cond}"; //print "<p>$query"; $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); } } } } catch (phpMorphy_Exception $e) { die('Error occured while text processing: ' . $e->getMessage()); } }
public static function meaningsToLemmas($word) { $word_obj_arr = PWLemma::getByLemma($word); $words = array(); foreach ($word_obj_arr as $word_obj) { if ($word_obj->getOrigin() > 0) { // The page $word does not exist in LANG_CODE.wiktionary.org continue; } $page_id = $word_obj->getID(); // if origin=0 then word is added from wiktionary, and lemma.id = page.id $meaning_arr = TMeaning::getByPageAndLang($page_id, PWLemma::getLangCode()); foreach ($meaning_arr as $meaning_obj) { $meaning_wiki_text = $meaning_obj->getWikiText(); $meaning_text = $meaning_wiki_text->getText(); // $words = array_merge($words,preg_split('/\P{L}+/u', $meaning_text, -1, PREG_SPLIT_NO_EMPTY)); $words = array_merge($words, preg_split('/((^\\p{P}+)|(\\p{P}*\\s+\\p{P}*)|(\\p{P}+$))/u', $meaning_text, -1, PREG_SPLIT_NO_EMPTY)); } } return $words; }
public static function setLangCode($lang_code) { self::$lang_code = $lang_code; self::$table_name = 'pw_lemma_' . $lang_code; }
// PhpMorphy include "phpmorphy.inc.php"; //include_once(SITE_ROOT."phpmorphy/src/common.php"); foreach ($_REQUEST as $var => $value) { /* TODO!!! check vars */ ${$var} = $value; } /******************************* * Init constants and variables *******************************/ define('NAME_DB', 'ruwik'); $config['hostname'] = 'localhost'; $config['dbname'] = NAME_DB; $config['user_login'] = '******'; $config['user_password'] = '******'; $config['admin_login'] = '******'; $config['admin_password'] = ''; ## DB connection ## mysql>GRANT SELECT ON %.* TO pw_user@'%' identified by ''; ## mysql>GRANT SELECT, INSERT, UPDATE, CREATE, DROP, INDEX ON %.* TO pw_admin@'%' identified by ''; ## mysql>FLUSH PRIVILEGES; ## $LINK_DB = new DB($config['hostname'], $config['user_login'], $config['user_password'], $config['dbname']); define('LangCode', 'ru'); PWLemma::setLangCode(LangCode); PWRelatedWords::setLangCode(LangCode); PWShortPath::setLangCode(LangCode); // из-за этого слетают стили //include_once(LIB_DIR."multi/".LangCode."/WMeaning.php");
// list($word1,$word2) = preg_split("/\s+/",$word_arr[0]); //print "<P>$word1,$word2"; //break; $start = PWLemma::getIDByLemma($word1); $finish = PWLemma::getIDByLemma($word2); $word1_url = TPage::getURL($word1); $word2_url = TPage::getURL($word2); if ($start && $finish) { list($dist_len, $path) = PWSemanticDistance::DijkstraAlgorithmByArray($start, $finish); if ($path == NULL) { print "<p>The words '{$word1_url}' and '{$word2_url}' are not related</p>"; } else { print "<p>" . (int) (sizeof($path) - 1) . " step(s), the length of distance is {$dist_len}</p>"; print TPage::getURL(PWLemma::getLemmaByID($path[0])); for ($i = 1; $i < sizeof($path); $i++) { print " -> " . TPage::getURL(PWLemma::getLemmaByID($path[$i])); } } } else { $dist_len = 0; if (!$start && !$finish) { print "<p>The words '{$word1_url}' and '{$word2_url}' have been not found</p>"; } elseif (!$start) { print "<p>The word '{$word1_url}' has been not found</p>"; } elseif (!$finish) { print "<p>The word '{$word2_url}' has been not found</p>"; } } print "<hr>"; fwrite($fh, $word1 . "\t" . $word2 . "\t" . $dist_len . "\n"); }
<?php require '../../../vendor/autoload.php'; use piwidict\Piwidict; //use piwidict\sql\{TLang, TPage, TPOS, TRelationType}; //use piwidict\widget\WForm; require '../config_examples.php'; require '../config_password.php'; include LIB_DIR . "header.php"; // $pw = new Piwidict(); Piwidict::setDatabaseConnection($config['hostname'], $config['user_login'], $config['user_password'], $config['dbname']); $link_db = Piwidict::getDatabaseConnection(); $wikt_lang = "ru"; // Russian language is the main language in ruwikt (Russian Wiktionary) Piwidict::setWiktLang($wikt_lang); $query = "SELECT * FROM " . PWLemma::getTableName() . " WHERE frequency>0 ORDER BY frequency DESC"; $res = $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); print (int) $link_db->query_count($res) . " records are found" . "<table border=1>\n<tr><th>lemma</th><th>frequency</th><th>origin</th><th>meaning</th></tr>\n"; while ($row = $res->fetch_object()) { $meaning = $row->meaning_id; if ($meaning > 0) { $query = "SELECT wiki_text.text as text FROM wiki_text, meaning WHERE wiki_text.id=meaning.wiki_text_id and meaning.id={$meaning}"; $res_meaning = $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); $row_meaning = $res_meaning->fetch_object(); $meaning = $row_meaning->text; } else { $meaning = ''; } print "<tr><td align='right'>" . $row->lemma . "</td><td>" . $row->frequency . "</td><td>" . $row->origin . "</td><td>{$meaning}</td></tr>\n"; } print "</table>\n";