Ejemplo n.º 1
0
 public static function getRelatedWords()
 {
     global $LINK_DB;
     $node_table = PWLemma::getTableName();
     $edge_table = PWRelatedWords::getTableName();
     // Construct DOM elements
     $xml = new DomDocument('1.0', 'UTF-8');
     $xml->formatOutput = true;
     // Nicely formats output with indentation and extra space
     $gexf = $xml->createElementNS(null, 'gexf');
     // Create new element node with an associated namespace
     $gexf = $xml->appendChild($gexf);
     // Assign namespaces for GexF with VIZ
     $gexf->setAttribute('xmlns:viz', 'http://www.gexf.net/1.1draft/viz');
     // Skip if you don't need viz!
     $gexf->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance');
     $gexf->setAttributeNS('http://www.w3.org/2001/XMLSchema-instance', 'schemaLocation', 'http://www.gexf.net/1.2draft http://www.gexf.net/1.2draft/gexf.xsd');
     $gexf->setAttribute('version', '1.2');
     // Add Meta data
     $meta = $gexf->appendChild($xml->createElement('meta'));
     $meta->setAttribute('lastmodifieddate', date('Y-m-d'));
     $meta->appendChild($xml->createElement('creator', 'PHP GEXF Generator v0.1'));
     $meta->appendChild($xml->createElement('description', 'Related words'));
     // Add Graph data!
     $graph = $gexf->appendChild($xml->createElement('graph'));
     $nodes = $graph->appendChild($xml->createElement('nodes'));
     $edges = $graph->appendChild($xml->createElement('edges'));
     // Add Nodes!
     $res_node = $LINK_DB->query_e("SELECT * FROM {$node_table} WHERE id in (select lemma_id1 from {$edge_table}) or id in (select lemma_id2 from {$edge_table}) order by id", "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
     while ($row_node = $res_node->fetch_object()) {
         $node = $xml->createElement('node');
         $node->setAttribute('id', $row_node->id);
         $node->setAttribute('label', $row_node->lemma);
         /*
                 // Set color for node
                 $color = $xml->createElement('viz:color');
                 $color->setAttribute('r', '1');
                 $color->setAttribute('g', '1');
                 $color->setAttribute('b', '1');
                 $node->appendChild($color);
         
                 // Set position for node
                 $position = $xml->createElement('viz:position');
                 $position->setAttribute('x', '1');
                 $position->setAttribute('y', '1');
                 $position->setAttribute('z', '1');
                 $node->appendChild($position);
         
                 // Set size for node
                 $size = $xml->createElement('viz:size');
                 $size->setAttribute('value', '1');
                 $node->appendChild($size);
         
                 // Set shape for node
                 $shape = $xml->createElement('viz:shape');
                 $shape->setAttribute('value', 'disc');
                 $node->appendChild($shape);
         */
         $nodes->appendChild($node);
     }
     // Add Edges
     $res_relw = $LINK_DB->query_e("SELECT * FROM " . PWRelatedWords::getTableName() . " order by lemma_id1", "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
     while ($row_relw = $res_relw->fetch_object()) {
         $edge = $xml->createElement('edge');
         $edge->setAttribute('source', $row_relw->lemma_id1);
         $edge->setAttribute('target', $row_relw->lemma_id2);
         $edge->setAttribute('weight', $row_relw->weight);
         $edges->appendChild($edge);
     }
     return $xml->saveXML();
 }
Ejemplo n.º 2
0
 $dir = SITE_ROOT . 'phpmorphy/dicts';
 $lang = 'ru_RU';
 // Create phpMorphy instance
 try {
     $morphy = new phpMorphy($dir, $lang, $opts);
 } catch (phpMorphy_Exception $e) {
     die('Error occured while creating phpMorphy instance: ' . PHP_EOL . $e);
 }
 try {
     $words = PWSemanticDistance::meaningsToLemmas($word);
     $lemmas = array();
     if (sizeof($words)) {
         $words = array_count_values($words);
         arsort($words);
         foreach ($words as $word => $count) {
             $lemma = PWLemma::getPhpMorphyLemma($word, $morphy);
             if (isset($lemmas[$lemma])) {
                 $lemmas[$lemma] += $count;
             } else {
                 $lemmas[$lemma] = $count;
             }
         }
         print "<table style='border: 1px solid #000; cellspacing:0; padding: 5px;'>\n";
         foreach ($lemmas as $lemma => $count) {
             print "<tr><td>{$lemma}</td><td>{$count}</td></tr>\n";
         }
         print "</table>\n";
     }
 } catch (phpMorphy_Exception $e) {
     die('Error occured while text processing: ' . $e->getMessage());
 }
Ejemplo n.º 3
0
 /** Counts frequency of occurance of lemmas in meanings and writes to field `pw_lemma_LANG_CODE.frequency`,
  *  if this lemma does not exist in table, that it added there with origin=2 and meaning_id where it has be found.
  */
 public static function count_frequency_lemma_in_meaning()
 {
     $link_db = Piwidict::getDatabaseConnection();
     // set some options
     $opts = array('storage' => PHPMORPHY_STORAGE_FILE, 'predict_by_suffix' => true, 'predict_by_db' => true, 'graminfo_as_text' => true);
     // Path to directory where dictionaries located
     $dir = SITE_ROOT . 'phpmorphy/dicts';
     $lang = 'ru_RU';
     // Create phpMorphy instance
     try {
         $morphy = new phpMorphy($dir, $lang, $opts);
     } catch (phpMorphy_Exception $e) {
         die('Error occured while creating phpMorphy instance: ' . PHP_EOL . $e);
     }
     try {
         $lang_id = (int) TLang::getIDByLangCode(PWLemma::getLangCode());
         $l_table = PWLemma::getTableName();
         $query = "SELECT meaning.id as meaning_id, wiki_text.text as text FROM wiki_text, meaning, lang_pos WHERE  " . "wiki_text.id=meaning.wiki_text_id and meaning.lang_pos_id=lang_pos.id and lang_pos.lang_id={$lang_id}";
         $res_meaning = $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
         while ($row_meaning = $res_meaning->fetch_object()) {
             //print "<p>".$row_meaning->text;
             $words = preg_split('/((^\\p{P}+)|(\\p{P}*\\s+\\p{P}*)|(\\p{P}+$))/u', $row_meaning->text, -1, PREG_SPLIT_NO_EMPTY);
             //print_r($words);
             $words = array_count_values($words);
             foreach ($words as $word => $count) {
                 $lemma = PWLemma::getPhpMorphyLemma($word, $morphy);
                 if (!$lemma) {
                     continue;
                 }
                 $lemma = PWString::restoreCase($lemma, $word);
                 $lemma = str_replace("'", "\\'", $lemma);
                 $cond = "WHERE lemma like '{$lemma}'";
                 $res_lemma = $link_db->query_e("SELECT id,frequency FROM {$l_table} {$cond}", "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
                 if ($link_db->query_count($res_lemma) == 0) {
                     $query = "INSERT INTO `{$l_table}` (`lemma`,`origin`,`frequency`,`meaning_id`) VALUES ('{$lemma}',2,{$count}," . $row_meaning->meaning_id . ")";
                     //print "<p>$query";
                     $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
                 } else {
                     $row_lemma = $res_lemma->fetch_object();
                     $query = "UPDATE `{$l_table}` SET `frequency`=" . (int) ($count + $row_lemma->frequency) . " {$cond}";
                     //print "<p>$query";
                     $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
                 }
             }
         }
     } catch (phpMorphy_Exception $e) {
         die('Error occured while text processing: ' . $e->getMessage());
     }
 }
Ejemplo n.º 4
0
 public static function meaningsToLemmas($word)
 {
     $word_obj_arr = PWLemma::getByLemma($word);
     $words = array();
     foreach ($word_obj_arr as $word_obj) {
         if ($word_obj->getOrigin() > 0) {
             // The page $word does not exist in LANG_CODE.wiktionary.org
             continue;
         }
         $page_id = $word_obj->getID();
         // if origin=0 then word is added from wiktionary, and lemma.id = page.id
         $meaning_arr = TMeaning::getByPageAndLang($page_id, PWLemma::getLangCode());
         foreach ($meaning_arr as $meaning_obj) {
             $meaning_wiki_text = $meaning_obj->getWikiText();
             $meaning_text = $meaning_wiki_text->getText();
             //                $words = array_merge($words,preg_split('/\P{L}+/u', $meaning_text, -1, PREG_SPLIT_NO_EMPTY));
             $words = array_merge($words, preg_split('/((^\\p{P}+)|(\\p{P}*\\s+\\p{P}*)|(\\p{P}+$))/u', $meaning_text, -1, PREG_SPLIT_NO_EMPTY));
         }
     }
     return $words;
 }
Ejemplo n.º 5
0
 public static function setLangCode($lang_code)
 {
     self::$lang_code = $lang_code;
     self::$table_name = 'pw_lemma_' . $lang_code;
 }
Ejemplo n.º 6
0
// PhpMorphy
include "phpmorphy.inc.php";
//include_once(SITE_ROOT."phpmorphy/src/common.php");
foreach ($_REQUEST as $var => $value) {
    /*
    TODO!!! check vars
    */
    ${$var} = $value;
}
/*******************************
 * Init constants and variables
 *******************************/
define('NAME_DB', 'ruwik');
$config['hostname'] = 'localhost';
$config['dbname'] = NAME_DB;
$config['user_login'] = '******';
$config['user_password'] = '******';
$config['admin_login'] = '******';
$config['admin_password'] = '';
## DB connection
## mysql>GRANT SELECT ON %.* TO pw_user@'%' identified by '';
## mysql>GRANT SELECT, INSERT, UPDATE, CREATE, DROP, INDEX ON %.* TO pw_admin@'%' identified by '';
## mysql>FLUSH PRIVILEGES;
##
$LINK_DB = new DB($config['hostname'], $config['user_login'], $config['user_password'], $config['dbname']);
define('LangCode', 'ru');
PWLemma::setLangCode(LangCode);
PWRelatedWords::setLangCode(LangCode);
PWShortPath::setLangCode(LangCode);
// из-за этого слетают стили
//include_once(LIB_DIR."multi/".LangCode."/WMeaning.php");
Ejemplo n.º 7
0
    //        list($word1,$word2) = preg_split("/\s+/",$word_arr[0]);
    //print "<P>$word1,$word2";
    //break;
    $start = PWLemma::getIDByLemma($word1);
    $finish = PWLemma::getIDByLemma($word2);
    $word1_url = TPage::getURL($word1);
    $word2_url = TPage::getURL($word2);
    if ($start && $finish) {
        list($dist_len, $path) = PWSemanticDistance::DijkstraAlgorithmByArray($start, $finish);
        if ($path == NULL) {
            print "<p>The words '{$word1_url}' and '{$word2_url}' are not related</p>";
        } else {
            print "<p>" . (int) (sizeof($path) - 1) . " step(s), the length of distance is {$dist_len}</p>";
            print TPage::getURL(PWLemma::getLemmaByID($path[0]));
            for ($i = 1; $i < sizeof($path); $i++) {
                print " -> " . TPage::getURL(PWLemma::getLemmaByID($path[$i]));
            }
        }
    } else {
        $dist_len = 0;
        if (!$start && !$finish) {
            print "<p>The words '{$word1_url}' and '{$word2_url}' have been not found</p>";
        } elseif (!$start) {
            print "<p>The word '{$word1_url}' has been not found</p>";
        } elseif (!$finish) {
            print "<p>The word '{$word2_url}' has been not found</p>";
        }
    }
    print "<hr>";
    fwrite($fh, $word1 . "\t" . $word2 . "\t" . $dist_len . "\n");
}
Ejemplo n.º 8
0
<?php

require '../../../vendor/autoload.php';
use piwidict\Piwidict;
//use piwidict\sql\{TLang, TPage, TPOS, TRelationType};
//use piwidict\widget\WForm;
require '../config_examples.php';
require '../config_password.php';
include LIB_DIR . "header.php";
// $pw = new Piwidict();
Piwidict::setDatabaseConnection($config['hostname'], $config['user_login'], $config['user_password'], $config['dbname']);
$link_db = Piwidict::getDatabaseConnection();
$wikt_lang = "ru";
// Russian language is the main language in ruwikt (Russian Wiktionary)
Piwidict::setWiktLang($wikt_lang);
$query = "SELECT * FROM " . PWLemma::getTableName() . " WHERE frequency>0 ORDER BY frequency DESC";
$res = $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
print (int) $link_db->query_count($res) . " records are found" . "<table border=1>\n<tr><th>lemma</th><th>frequency</th><th>origin</th><th>meaning</th></tr>\n";
while ($row = $res->fetch_object()) {
    $meaning = $row->meaning_id;
    if ($meaning > 0) {
        $query = "SELECT wiki_text.text as text FROM wiki_text, meaning WHERE wiki_text.id=meaning.wiki_text_id and meaning.id={$meaning}";
        $res_meaning = $link_db->query_e($query, "Query failed in file <b>" . __FILE__ . "</b>, string <b>" . __LINE__ . "</b>");
        $row_meaning = $res_meaning->fetch_object();
        $meaning = $row_meaning->text;
    } else {
        $meaning = '';
    }
    print "<tr><td align='right'>" . $row->lemma . "</td><td>" . $row->frequency . "</td><td>" . $row->origin . "</td><td>{$meaning}</td></tr>\n";
}
print "</table>\n";