Exemple #1
0
 /** Creates the table with Russian vocabulary from page_title and related words.
  * pw_lemma_ru.id=page.id if word is exist in Wiktionary or next id 
  */
 public static function createVocabularyRelatedTables()
 {
     $link_db = Piwidict::getDatabaseConnection();
     $lang_id = (int) TLang::getIDByLangCode(PWLemma::getLangCode());
     $l_table = PWLemma::getTableName();
     $rw_table = PWRelatedWords::getTableName();
     $query = "DROP TABLE IF EXISTS `{$l_table}`";
     $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
     $query = "CREATE TABLE `{$l_table}`(" . "`id` int(10) unsigned NOT NULL AUTO_INCREMENT," . "`lemma` varchar(255) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL," . "`origin` tinyint(1) default 0," . "`frequency` int default 0," . "`meaning_id` int default 0," . "PRIMARY KEY (`id`), UNIQUE(`lemma`), KEY `origin` (`origin`))";
     $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
     // writing words from page table
     $query = "SELECT DISTINCT page.id, trim(page_title) as page_title FROM page, lang_pos WHERE lang_pos.page_id=page.id and lang_id={$lang_id} ORDER BY page_id";
     $res_page = $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
     $tmp = array();
     while ($row = $res_page->fetch_object()) {
         if (sizeof($tmp) < 27000) {
             $tmp[] = "(" . $row->id . ", '" . str_replace("'", "\\'", $row->page_title) . "',0,0,0)";
         } else {
             $link_db->query_e("INSERT INTO `{$l_table}` VALUES " . join(', ', $tmp), "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
             $tmp = array();
         }
     }
     if (sizeof($tmp) > 1 && sizeof($tmp) < 27000) {
         $link_db->query_e("INSERT INTO `{$l_table}` VALUES " . join(', ', $tmp), "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
     }
     $query = "DROP TABLE IF EXISTS `{$rw_table}`";
     $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
     $query = "CREATE TABLE `{$rw_table}`(" . "`lemma_id1` int(10) unsigned NOT NULL," . "`lemma_id2` int(10) unsigned NOT NULL," . "`weight` decimal(8,6) unsigned NOT NULL," . "PRIMARY KEY (`lemma_id1`,`lemma_id2`))";
     $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
     // writing related words
     $tmp = array();
     $query = "SELECT DISTINCT page_id FROM lang_pos WHERE lang_id={$lang_id} ORDER BY page_id";
     $res_page = $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
     while ($row_page = $res_page->fetch_object()) {
         $related_words = PWSemanticDistance::getRelatedWords($row_page->page_id);
         foreach ($related_words as $word => $coef) {
             $word_s = str_replace("'", "\\'", $word);
             $res_page_exists = $link_db->query_e("SELECT id FROM {$l_table} where lemma LIKE '{$word_s}'", "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
             if ($link_db->query_count($res_page_exists) == 0) {
                 $link_db->query_e("INSERT INTO `{$l_table}` (`lemma`,`origin`,`frequency`) VALUES ('{$word_s}',1,0)", "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
                 $word_id = $link_db->insert_id;
             } else {
                 $row_page_exists = $res_page_exists->fetch_object();
                 $word_id = $row_page_exists->id;
             }
             if (sizeof($tmp) < 27000) {
                 $tmp[] = "('" . $row_page->page_id . "', '{$word_id}', '{$coef}')";
             } else {
                 $link_db->query_e("INSERT INTO `{$rw_table}` VALUES " . join(', ', $tmp), "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
                 $tmp = array();
             }
         }
     }
     if (sizeof($tmp) > 1 && sizeof($tmp) < 27000) {
         $link_db->query_e("INSERT INTO `{$rw_table}` VALUES " . join(', ', $tmp), "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
     }
     //    PWRelatedWords::addReverseRelations();
     print "<p>The table <b>{$l_table}</b> and <b>{$rw_table}</b> are created</p>";
 }
Exemple #2
0
 public static function getRelatedWords()
 {
     global $LINK_DB;
     $node_table = PWLemma::getTableName();
     $edge_table = PWRelatedWords::getTableName();
     // Construct DOM elements
     $xml = new DomDocument('1.0', 'UTF-8');
     $xml->formatOutput = true;
     // Nicely formats output with indentation and extra space
     $gexf = $xml->createElementNS(null, 'gexf');
     // Create new element node with an associated namespace
     $gexf = $xml->appendChild($gexf);
     // Assign namespaces for GexF with VIZ
     $gexf->setAttribute('xmlns:viz', 'http://www.gexf.net/1.1draft/viz');
     // Skip if you don't need viz!
     $gexf->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance');
     $gexf->setAttributeNS('http://www.w3.org/2001/XMLSchema-instance', 'schemaLocation', 'http://www.gexf.net/1.2draft http://www.gexf.net/1.2draft/gexf.xsd');
     $gexf->setAttribute('version', '1.2');
     // Add Meta data
     $meta = $gexf->appendChild($xml->createElement('meta'));
     $meta->setAttribute('lastmodifieddate', date('Y-m-d'));
     $meta->appendChild($xml->createElement('creator', 'PHP GEXF Generator v0.1'));
     $meta->appendChild($xml->createElement('description', 'Related words'));
     // Add Graph data!
     $graph = $gexf->appendChild($xml->createElement('graph'));
     $nodes = $graph->appendChild($xml->createElement('nodes'));
     $edges = $graph->appendChild($xml->createElement('edges'));
     // Add Nodes!
     $res_node = $LINK_DB->query_e("SELECT * FROM {$node_table} WHERE id in (select lemma_id1 from {$edge_table}) or id in (select lemma_id2 from {$edge_table}) order by id", "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
     while ($row_node = $res_node->fetch_object()) {
         $node = $xml->createElement('node');
         $node->setAttribute('id', $row_node->id);
         $node->setAttribute('label', $row_node->lemma);
         /*
                 // Set color for node
                 $color = $xml->createElement('viz:color');
                 $color->setAttribute('r', '1');
                 $color->setAttribute('g', '1');
                 $color->setAttribute('b', '1');
                 $node->appendChild($color);
         
                 // Set position for node
                 $position = $xml->createElement('viz:position');
                 $position->setAttribute('x', '1');
                 $position->setAttribute('y', '1');
                 $position->setAttribute('z', '1');
                 $node->appendChild($position);
         
                 // Set size for node
                 $size = $xml->createElement('viz:size');
                 $size->setAttribute('value', '1');
                 $node->appendChild($size);
         
                 // Set shape for node
                 $shape = $xml->createElement('viz:shape');
                 $shape->setAttribute('value', 'disc');
                 $node->appendChild($shape);
         */
         $nodes->appendChild($node);
     }
     // Add Edges
     $res_relw = $LINK_DB->query_e("SELECT * FROM " . PWRelatedWords::getTableName() . " order by lemma_id1", "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
     while ($row_relw = $res_relw->fetch_object()) {
         $edge = $xml->createElement('edge');
         $edge->setAttribute('source', $row_relw->lemma_id1);
         $edge->setAttribute('target', $row_relw->lemma_id2);
         $edge->setAttribute('weight', $row_relw->weight);
         $edges->appendChild($edge);
     }
     return $xml->saveXML();
 }
Exemple #3
0
// PhpMorphy
include "phpmorphy.inc.php";
//include_once(SITE_ROOT."phpmorphy/src/common.php");
foreach ($_REQUEST as $var => $value) {
    /*
    TODO!!! check vars
    */
    ${$var} = $value;
}
/*******************************
 * Init constants and variables
 *******************************/
define('NAME_DB', 'ruwik');
$config['hostname'] = 'localhost';
$config['dbname'] = NAME_DB;
$config['user_login'] = '******';
$config['user_password'] = '******';
$config['admin_login'] = '******';
$config['admin_password'] = '';
## DB connection
## mysql>GRANT SELECT ON %.* TO pw_user@'%' identified by '';
## mysql>GRANT SELECT, INSERT, UPDATE, CREATE, DROP, INDEX ON %.* TO pw_admin@'%' identified by '';
## mysql>FLUSH PRIVILEGES;
##
$LINK_DB = new DB($config['hostname'], $config['user_login'], $config['user_password'], $config['dbname']);
define('LangCode', 'ru');
PWLemma::setLangCode(LangCode);
PWRelatedWords::setLangCode(LangCode);
PWShortPath::setLangCode(LangCode);
// из-за этого слетают стили
//include_once(LIB_DIR."multi/".LangCode."/WMeaning.php");
 public static function DijkstraAlgorithmByDB($first, $finish)
 {
     global $LINK_DB;
     if ($first == $finish) {
         return array(0, array($first));
     }
     $edge_table = PWRelatedWords::getTableName();
     // table of related words (words and distance between them)
     $path_table = PWShortPath::getTableName();
     // table of shortest paths (first, last, next-to-last vertexes, length of path)
     //print "$first, $finish";
     $query = "SELECT lemma_id1 FROM {$edge_table} WHERE lemma_id1='{$first}' or lemma_id2='{$first}' LIMIT 1";
     // check if any edge with $first exists
     $res_exist = $LINK_DB->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
     if ($LINK_DB->query_count($res_exist) == 0) {
         return array(0, NULL);
     }
     $query = "SELECT lemma_id1 FROM {$edge_table} WHERE lemma_id1='{$finish}' or lemma_id2='{$finish}' LIMIT 1";
     // check if any edge with $finish exists
     $res_exist = $LINK_DB->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
     if ($LINK_DB->query_count($res_exist) == 0) {
         return array(0, NULL);
     }
     $success = 0;
     // the condition of finding the shortest path in the given vertex ($finish)
     $count_row = 1;
     $query = "UPDATE {$path_table} SET mark=0 where lemma_id_1=" . $first;
     // mark all vertexes as unvisited (if already any paths in DB exists)
     //        $query = "DELETE FROM $path_table where lemma_id_1=".$first; // mark all vertexes as unvisited (if already any paths in DB exists)
     $LINK_DB->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
     $prev = $first;
     $path_len = 0;
     //print "<PRE>";
     $count = 0;
     //print $first;
     //return;
     while (!$success && $count_row) {
         // until all vertixes will not be visited
         $count++;
         //  && $count<3
         print "<p>" . $count . ": " . $count_row . ".-----------------------------</p>";
         //print_r($finish_arr);
         //print_r($len_arr);
         $query = "SELECT * FROM {$edge_table} WHERE lemma_id1='{$prev}' or lemma_id2='{$prev}'";
         // search nearest vertexes to $prev (НЕТ необходимости сортировать, так как неважно в какой последовательности ставятся метки)
         $res_neib = $LINK_DB->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
         while ($row_neib = $res_neib->fetch_object()) {
             if ($row_neib->lemma_id1 == $prev) {
                 $last = $row_neib->lemma_id2;
             } else {
                 $last = $row_neib->lemma_id1;
             }
             $new_path_len = $path_len + $row_neib->weight;
             // path length from $prev to $last (neighbour of $prev via semantic relations)
             $query = "SELECT path_len,mark FROM {$path_table} WHERE lemma_id_1='{$first}' and lemma_id_n='{$last}'";
             // recounted only unvisited vertexes
             //print "<P>$query";
             $res_path = $LINK_DB->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
             if ($LINK_DB->query_count($res_path) == 0) {
                 // 1. this is new path from $start to $finish which is absent in table pw_short_path_LANG_CODE
                 $query = "INSERT INTO {$path_table} (`lemma_id_1`, `lemma_id_n`, `path_len`, `lemma_id_prev_n`, mark) VALUES ({$first}, {$last}, {$new_path_len}, {$prev}, 0)";
                 //print "<P>$query";
                 $LINK_DB->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
             } else {
                 // 2. already (one) path from $start to $finish does exist, then update (length and previous word) only if length of new path is shorter
                 $row_path = $res_path->fetch_object();
                 if ($row_path->mark == 0 && $new_path_len < $row_path->path_len) {
                     $query = "UPDATE {$path_table} SET path_len={$new_path_len}, lemma_id_prev_n={$prev} WHERE lemma_id_1={$first} and lemma_id_n={$last}";
                     //print "<P>$query";
                     $LINK_DB->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
                 }
             }
         }
         $query = "SELECT path_len, lemma_id_n FROM {$path_table} WHERE lemma_id_1='{$first}' and mark=0 order by path_len";
         // choose minimal distance of path from first to any unvisited vertex
         $res_min = $LINK_DB->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
         $count_row = $LINK_DB->query_count($res_min);
         if (!$count_row) {
             // all paths from start are marked as visited
             $path_len = 0;
         } else {
             // choose vertex with minimal distance
             $row_min = $res_min->fetch_object();
             // get only one row - minimal path length
             $path_len = $row_min->path_len;
             // choose minimal distance of path from first to any unvisited vertex
             $prev = $row_min->lemma_id_n;
             // choose unvisited vertex with minimal distance
         }
         //print "<p>prev:$prev, path_len:".$path_len;
         $query = "UPDATE {$path_table} SET mark=1 where lemma_id_1={$first} and lemma_id_n={$prev}";
         // mark vertex $prev as unvisited
         //print "<P>$query";
         $LINK_DB->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
         if ($prev == $finish) {
             // the shortest path in $finish are found!!
             $success = 1;
         }
     }
     print "<p>{$count} iterations";
     if ($success) {
         //
         $path = array($finish);
         $prev = $finish;
         while ($prev != start) {
             $query = "SELECT lemma_id_prev_n FROM {$path_table} WHERE lemma_id_1='{$first}' and lemma_id_n='{$prev}' order by path_len LIMIT 1";
             // choose minimal distance of path from first to any unvisited vertex
             $res = $LINK_DB->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
             $row = $res->fetch_object();
             $prev = $row->lemma_id_prev_n;
             array_unshift($path, $prev);
         }
         return array($path_len, $path);
     } else {
         return array(NULL, NULL);
     }
     // any path from $first to $finish are not found
 }
 public static function setLangCode($lang_code)
 {
     self::$lang_code = $lang_code;
     self::$table_name = 'pw_related_words_' . $lang_code;
     //.'_small'
 }