/** Creates the table with Russian vocabulary from page_title and related words. * pw_lemma_ru.id=page.id if word is exist in Wiktionary or next id */ public static function createVocabularyRelatedTables() { $link_db = Piwidict::getDatabaseConnection(); $lang_id = (int) TLang::getIDByLangCode(PWLemma::getLangCode()); $l_table = PWLemma::getTableName(); $rw_table = PWRelatedWords::getTableName(); $query = "DROP TABLE IF EXISTS `{$l_table}`"; $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); $query = "CREATE TABLE `{$l_table}`(" . "`id` int(10) unsigned NOT NULL AUTO_INCREMENT," . "`lemma` varchar(255) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL," . "`origin` tinyint(1) default 0," . "`frequency` int default 0," . "`meaning_id` int default 0," . "PRIMARY KEY (`id`), UNIQUE(`lemma`), KEY `origin` (`origin`))"; $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); // writing words from page table $query = "SELECT DISTINCT page.id, trim(page_title) as page_title FROM page, lang_pos WHERE lang_pos.page_id=page.id and lang_id={$lang_id} ORDER BY page_id"; $res_page = $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); $tmp = array(); while ($row = $res_page->fetch_object()) { if (sizeof($tmp) < 27000) { $tmp[] = "(" . $row->id . ", '" . str_replace("'", "\\'", $row->page_title) . "',0,0,0)"; } else { $link_db->query_e("INSERT INTO `{$l_table}` VALUES " . join(', ', $tmp), "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); $tmp = array(); } } if (sizeof($tmp) > 1 && sizeof($tmp) < 27000) { $link_db->query_e("INSERT INTO `{$l_table}` VALUES " . join(', ', $tmp), "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); } $query = "DROP TABLE IF EXISTS `{$rw_table}`"; $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); $query = "CREATE TABLE `{$rw_table}`(" . "`lemma_id1` int(10) unsigned NOT NULL," . "`lemma_id2` int(10) unsigned NOT NULL," . "`weight` decimal(8,6) unsigned NOT NULL," . "PRIMARY KEY (`lemma_id1`,`lemma_id2`))"; $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); // writing related words $tmp = array(); $query = "SELECT DISTINCT page_id FROM lang_pos WHERE lang_id={$lang_id} ORDER BY page_id"; $res_page = $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); while ($row_page = $res_page->fetch_object()) { $related_words = PWSemanticDistance::getRelatedWords($row_page->page_id); foreach ($related_words as $word => $coef) { $word_s = str_replace("'", "\\'", $word); $res_page_exists = $link_db->query_e("SELECT id FROM {$l_table} where lemma LIKE '{$word_s}'", "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); if ($link_db->query_count($res_page_exists) == 0) { $link_db->query_e("INSERT INTO `{$l_table}` (`lemma`,`origin`,`frequency`) VALUES ('{$word_s}',1,0)", "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); $word_id = $link_db->insert_id; } else { $row_page_exists = $res_page_exists->fetch_object(); $word_id = $row_page_exists->id; } if (sizeof($tmp) < 27000) { $tmp[] = "('" . $row_page->page_id . "', '{$word_id}', '{$coef}')"; } else { $link_db->query_e("INSERT INTO `{$rw_table}` VALUES " . join(', ', $tmp), "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); $tmp = array(); } } } if (sizeof($tmp) > 1 && sizeof($tmp) < 27000) { $link_db->query_e("INSERT INTO `{$rw_table}` VALUES " . join(', ', $tmp), "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); } // PWRelatedWords::addReverseRelations(); print "<p>The table <b>{$l_table}</b> and <b>{$rw_table}</b> are created</p>"; }
public static function getRelatedWords() { global $LINK_DB; $node_table = PWLemma::getTableName(); $edge_table = PWRelatedWords::getTableName(); // Construct DOM elements $xml = new DomDocument('1.0', 'UTF-8'); $xml->formatOutput = true; // Nicely formats output with indentation and extra space $gexf = $xml->createElementNS(null, 'gexf'); // Create new element node with an associated namespace $gexf = $xml->appendChild($gexf); // Assign namespaces for GexF with VIZ $gexf->setAttribute('xmlns:viz', 'http://www.gexf.net/1.1draft/viz'); // Skip if you don't need viz! $gexf->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance'); $gexf->setAttributeNS('http://www.w3.org/2001/XMLSchema-instance', 'schemaLocation', 'http://www.gexf.net/1.2draft http://www.gexf.net/1.2draft/gexf.xsd'); $gexf->setAttribute('version', '1.2'); // Add Meta data $meta = $gexf->appendChild($xml->createElement('meta')); $meta->setAttribute('lastmodifieddate', date('Y-m-d')); $meta->appendChild($xml->createElement('creator', 'PHP GEXF Generator v0.1')); $meta->appendChild($xml->createElement('description', 'Related words')); // Add Graph data! $graph = $gexf->appendChild($xml->createElement('graph')); $nodes = $graph->appendChild($xml->createElement('nodes')); $edges = $graph->appendChild($xml->createElement('edges')); // Add Nodes! $res_node = $LINK_DB->query_e("SELECT * FROM {$node_table} WHERE id in (select lemma_id1 from {$edge_table}) or id in (select lemma_id2 from {$edge_table}) order by id", "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); while ($row_node = $res_node->fetch_object()) { $node = $xml->createElement('node'); $node->setAttribute('id', $row_node->id); $node->setAttribute('label', $row_node->lemma); /* // Set color for node $color = $xml->createElement('viz:color'); $color->setAttribute('r', '1'); $color->setAttribute('g', '1'); $color->setAttribute('b', '1'); $node->appendChild($color); // Set position for node $position = $xml->createElement('viz:position'); $position->setAttribute('x', '1'); $position->setAttribute('y', '1'); $position->setAttribute('z', '1'); $node->appendChild($position); // Set size for node $size = $xml->createElement('viz:size'); $size->setAttribute('value', '1'); $node->appendChild($size); // Set shape for node $shape = $xml->createElement('viz:shape'); $shape->setAttribute('value', 'disc'); $node->appendChild($shape); */ $nodes->appendChild($node); } // Add Edges $res_relw = $LINK_DB->query_e("SELECT * FROM " . PWRelatedWords::getTableName() . " order by lemma_id1", "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); while ($row_relw = $res_relw->fetch_object()) { $edge = $xml->createElement('edge'); $edge->setAttribute('source', $row_relw->lemma_id1); $edge->setAttribute('target', $row_relw->lemma_id2); $edge->setAttribute('weight', $row_relw->weight); $edges->appendChild($edge); } return $xml->saveXML(); }
// PhpMorphy include "phpmorphy.inc.php"; //include_once(SITE_ROOT."phpmorphy/src/common.php"); foreach ($_REQUEST as $var => $value) { /* TODO!!! check vars */ ${$var} = $value; } /******************************* * Init constants and variables *******************************/ define('NAME_DB', 'ruwik'); $config['hostname'] = 'localhost'; $config['dbname'] = NAME_DB; $config['user_login'] = '******'; $config['user_password'] = '******'; $config['admin_login'] = '******'; $config['admin_password'] = ''; ## DB connection ## mysql>GRANT SELECT ON %.* TO pw_user@'%' identified by ''; ## mysql>GRANT SELECT, INSERT, UPDATE, CREATE, DROP, INDEX ON %.* TO pw_admin@'%' identified by ''; ## mysql>FLUSH PRIVILEGES; ## $LINK_DB = new DB($config['hostname'], $config['user_login'], $config['user_password'], $config['dbname']); define('LangCode', 'ru'); PWLemma::setLangCode(LangCode); PWRelatedWords::setLangCode(LangCode); PWShortPath::setLangCode(LangCode); // из-за этого слетают стили //include_once(LIB_DIR."multi/".LangCode."/WMeaning.php");
public static function DijkstraAlgorithmByDB($first, $finish) { global $LINK_DB; if ($first == $finish) { return array(0, array($first)); } $edge_table = PWRelatedWords::getTableName(); // table of related words (words and distance between them) $path_table = PWShortPath::getTableName(); // table of shortest paths (first, last, next-to-last vertexes, length of path) //print "$first, $finish"; $query = "SELECT lemma_id1 FROM {$edge_table} WHERE lemma_id1='{$first}' or lemma_id2='{$first}' LIMIT 1"; // check if any edge with $first exists $res_exist = $LINK_DB->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); if ($LINK_DB->query_count($res_exist) == 0) { return array(0, NULL); } $query = "SELECT lemma_id1 FROM {$edge_table} WHERE lemma_id1='{$finish}' or lemma_id2='{$finish}' LIMIT 1"; // check if any edge with $finish exists $res_exist = $LINK_DB->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); if ($LINK_DB->query_count($res_exist) == 0) { return array(0, NULL); } $success = 0; // the condition of finding the shortest path in the given vertex ($finish) $count_row = 1; $query = "UPDATE {$path_table} SET mark=0 where lemma_id_1=" . $first; // mark all vertexes as unvisited (if already any paths in DB exists) // $query = "DELETE FROM $path_table where lemma_id_1=".$first; // mark all vertexes as unvisited (if already any paths in DB exists) $LINK_DB->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); $prev = $first; $path_len = 0; //print "<PRE>"; $count = 0; //print $first; //return; while (!$success && $count_row) { // until all vertixes will not be visited $count++; // && $count<3 print "<p>" . $count . ": " . $count_row . ".-----------------------------</p>"; //print_r($finish_arr); //print_r($len_arr); $query = "SELECT * FROM {$edge_table} WHERE lemma_id1='{$prev}' or lemma_id2='{$prev}'"; // search nearest vertexes to $prev (НЕТ необходимости сортировать, так как неважно в какой последовательности ставятся метки) $res_neib = $LINK_DB->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); while ($row_neib = $res_neib->fetch_object()) { if ($row_neib->lemma_id1 == $prev) { $last = $row_neib->lemma_id2; } else { $last = $row_neib->lemma_id1; } $new_path_len = $path_len + $row_neib->weight; // path length from $prev to $last (neighbour of $prev via semantic relations) $query = "SELECT path_len,mark FROM {$path_table} WHERE lemma_id_1='{$first}' and lemma_id_n='{$last}'"; // recounted only unvisited vertexes //print "<P>$query"; $res_path = $LINK_DB->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); if ($LINK_DB->query_count($res_path) == 0) { // 1. this is new path from $start to $finish which is absent in table pw_short_path_LANG_CODE $query = "INSERT INTO {$path_table} (`lemma_id_1`, `lemma_id_n`, `path_len`, `lemma_id_prev_n`, mark) VALUES ({$first}, {$last}, {$new_path_len}, {$prev}, 0)"; //print "<P>$query"; $LINK_DB->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); } else { // 2. already (one) path from $start to $finish does exist, then update (length and previous word) only if length of new path is shorter $row_path = $res_path->fetch_object(); if ($row_path->mark == 0 && $new_path_len < $row_path->path_len) { $query = "UPDATE {$path_table} SET path_len={$new_path_len}, lemma_id_prev_n={$prev} WHERE lemma_id_1={$first} and lemma_id_n={$last}"; //print "<P>$query"; $LINK_DB->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); } } } $query = "SELECT path_len, lemma_id_n FROM {$path_table} WHERE lemma_id_1='{$first}' and mark=0 order by path_len"; // choose minimal distance of path from first to any unvisited vertex $res_min = $LINK_DB->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); $count_row = $LINK_DB->query_count($res_min); if (!$count_row) { // all paths from start are marked as visited $path_len = 0; } else { // choose vertex with minimal distance $row_min = $res_min->fetch_object(); // get only one row - minimal path length $path_len = $row_min->path_len; // choose minimal distance of path from first to any unvisited vertex $prev = $row_min->lemma_id_n; // choose unvisited vertex with minimal distance } //print "<p>prev:$prev, path_len:".$path_len; $query = "UPDATE {$path_table} SET mark=1 where lemma_id_1={$first} and lemma_id_n={$prev}"; // mark vertex $prev as unvisited //print "<P>$query"; $LINK_DB->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); if ($prev == $finish) { // the shortest path in $finish are found!! $success = 1; } } print "<p>{$count} iterations"; if ($success) { // $path = array($finish); $prev = $finish; while ($prev != start) { $query = "SELECT lemma_id_prev_n FROM {$path_table} WHERE lemma_id_1='{$first}' and lemma_id_n='{$prev}' order by path_len LIMIT 1"; // choose minimal distance of path from first to any unvisited vertex $res = $LINK_DB->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>"); $row = $res->fetch_object(); $prev = $row->lemma_id_prev_n; array_unshift($path, $prev); } return array($path_len, $path); } else { return array(NULL, NULL); } // any path from $first to $finish are not found }
public static function setLangCode($lang_code) { self::$lang_code = $lang_code; self::$table_name = 'pw_related_words_' . $lang_code; //.'_small' }