Пример #1
0
 public static function loadAllTitlesOld()
 {
     $titles = db_getArray("select title from WikiArticle order by title");
     $result = array();
     foreach ($titles as $title) {
         $result[] = array($title, WikiArticle::wikiTitleToUrlTitle($title));
     }
     return $result;
 }
Пример #2
0
 public static function loadDefinitionIdsForLexemModels($lexemModelIds, $sourceId)
 {
     if (empty($lexemModelIds)) {
         return array();
     }
     $lexemString = implode(',', $lexemModelIds);
     if ($sourceId) {
         $query = "select distinct definitionId " . "from FullTextIndex F " . "join Definition D on D.id = F.definitionId " . "where lexemModelId in ({$lexemString}) " . "and D.sourceId = {$sourceId} " . "order by definitionId";
     } else {
         $query = "select distinct definitionId " . "from FullTextIndex " . "where lexemModelId in ({$lexemString}) " . "order by definitionId";
     }
     return db_getArray($query);
 }
Пример #3
0
 public function run($lexems, $definitions)
 {
     if (empty($lexems) && empty($definitions)) {
         // If we are called from a page with no keywords, display one of the top 10 highest CTR books.
         $random = rand(0, 9);
         // TODO: Label books as mature
         $book = Model::factory('DivertaBook')->raw_query("select * from diverta_Book where impressions " . "and title not like '%sex%' " . "and title not like '%erotic%' " . "and title not like '%bordel%' " . "and title not like '%glamour%' " . "order by clicks/impressions desc limit {$random}, 1", null)->find_one();
         return array('bookId' => $book->id);
     }
     $lexemIds = array();
     if (!empty($lexems)) {
         foreach ($lexems as $l) {
             $lexemIds[] = $l->id;
         }
     }
     if (count($lexemIds) == 0 && !empty($definitions)) {
         $defIdString = '-1';
         foreach ($definitions as $def) {
             $defIdString .= ",{$def->id}";
         }
         $lexemIds = db_getArray("select distinct lexemId from LexemDefinitionMap where DefinitionId in ({$defIdString})");
     }
     if (count($lexemIds) == 0 || count($lexemIds) >= 100) {
         return null;
         // No keywords or too many keywords (indicating a regexp search)
     }
     $lexemIdString = implode(',', $lexemIds);
     $books = Model::factory('DivertaBook')->table_alias('b')->select('b.*')->join(DivertaIndex::$_table, 'b.id = i.bookId', 'i')->where_in('i.lexemId', $lexemIds)->order_by_asc('impressions')->find_many();
     if (count($books)) {
         // 20% chance to serve the book with the fewest impressions / 80% chance to serve the book with the highest CTR
         if (rand(0, 99) < 20) {
             return array('bookId' => $books[0]->id);
         } else {
             $best = 0;
             $bestCtr = 0.0;
             foreach ($books as $i => $book) {
                 $ctr = $book->impressions ? $book->clicks / $book->impressions : 0.0;
                 if ($ctr > $bestCtr) {
                     $bestCtr = $ctr;
                     $best = $i;
                 }
             }
             return array('bookId' => $books[$best]->id);
         }
     }
     return null;
 }
Пример #4
0
 public static function searchLexemIds($cuv)
 {
     $ngramList = self::split($cuv);
     $hash = array();
     foreach ($ngramList as $i => $ngram) {
         $lexemIdList = db_getArray(sprintf("select lexemId from NGram where ngram = '%s' and pos between %d and %d", $ngram, $i - self::$MAX_MOVE, $i + self::$MAX_MOVE));
         $lexemIdList = array_unique($lexemIdList);
         foreach ($lexemIdList as $lexemId) {
             if (!isset($hash[$lexemId])) {
                 $hash[$lexemId] = 1;
             } else {
                 $hash[$lexemId]++;
             }
         }
     }
     $minLength = mb_strlen($cuv) / 2;
     $hash = array_filter($hash, function ($val) use($minLength) {
         return $val >= $minLength;
     });
     return $hash;
 }
Пример #5
0
 public static function highlight($words, &$definitions)
 {
     $res = array_fill_keys($words, array());
     foreach ($res as $key => &$words) {
         $var = sprintf("select distinct i2.formNoAccent  \n        from InflectedForm i1, LexemModel lm1, Lexem l, LexemModel lm2, InflectedForm i2\n        where i1.lexemModelId = lm1.id and\n        lm1.lexemId = l.id and\n        l.id = lm2.lexemId and\n        lm2.id = i2.lexemModelId and\n        not l.stopWord and\n        i1.formUtf8General = '%s'", $key);
         $query = db_getArray($var);
         foreach ($query as $q) {
             array_push($words, $q);
         }
         $words = array_unique($words);
         if (empty($words)) {
             unset($res[$key]);
         }
     }
     foreach ($definitions as $def) {
         $classIndex = 0;
         foreach ($res as &$words) {
             $wordsString = implode("|", $words);
             preg_match_all('/[^a-zăâîșț<\\/](' . $wordsString . ')[^a-zăâîșț>]/iS', $def->htmlRep, $match, PREG_OFFSET_CAPTURE);
             $revMatch = array_reverse($match[1]);
             foreach ($revMatch as $m) {
                 $def->htmlRep = substr_replace($def->htmlRep, "<span class=\"fth fth{$classIndex}\">{$m[0]}</span>", $m[1], strlen($m[0]));
             }
             $classIndex = ($classIndex + 1) % 5;
             // keep the number of colors in sync with common.css
         }
     }
 }
    foreach ($matches as $i => $m) {
        if ($choices[count($choices) - 1 - $i] == 'abbrev') {
            $orig = substr($s, $m['position'], $m['length']);
            $replacement = StringUtil::isUppercase(StringUtil::getCharAt($orig, 0)) ? AdminStringUtil::capitalize($m['abbrev']) : $m['abbrev'];
            $s = substr_replace($s, "#{$replacement}#", $m['position'], $m['length']);
        }
    }
    $def->internalRep = $s;
    $def->htmlRep = AdminStringUtil::htmlize($def->internalRep, $def->sourceId);
    $def->abbrevReview = ABBREV_REVIEW_COMPLETE;
    $def->save();
}
$MARKER = 'DEADBEEF';
// any string that won't occur naturally in a definition
$def = null;
$ids = db_getArray(sprintf('select id from Definition where status != %d and abbrevReview = %d', ST_DELETED, ABBREV_AMBIGUOUS));
if (count($ids)) {
    $defId = $ids[array_rand($ids, 1)];
    $def = Definition::get_by_id($defId);
    // Collect the positions of ambiguous abbreviations
    $matches = array();
    AdminStringUtil::markAbbreviations($def->internalRep, $def->sourceId, $matches);
    usort($matches, 'positionCmp');
    // Inject our marker around each ambiguity and htmlize the definition
    $s = $def->internalRep;
    foreach ($matches as $m) {
        $s = substr($s, 0, $m['position']) . " {$MARKER} " . substr($s, $m['position'], $m['length']) . " {$MARKER} " . substr($s, $m['position'] + $m['length']);
    }
    $s = AdminStringUtil::htmlize($s, $def->sourceId);
    // Split the definition into n ambiguities and n+1 bits of text between the ambiguities
    $text = array();
Пример #7
0
require_once 'phplib/ads/adsModule.php';
require_once 'phplib/ads/diverta/divertaAdsModule.php';
$books = Model::factory('DivertaBook')->order_by_asc('id')->find_many();
$numBooks = count($books);
print "Reindexing {$numBooks} book titles.\n";
foreach ($books as $i => $book) {
    db_execute("delete from diverta_Index where bookId = {$book->id}");
    $hasDiacritics = StringUtil::hasDiacritics($book->title);
    $title = mb_strtolower($book->title);
    $title = str_replace(array(',', '.'), '', $title);
    $titleWords = preg_split("/\\s+/", $title);
    $lexemIds = array();
    foreach ($titleWords as $word) {
        if (!StringUtil::isStopWord($word, $hasDiacritics)) {
            $field = $hasDiacritics ? 'formNoAccent' : 'formUtf8General';
            $wordLexemIds = db_getArray(db_execute("select distinct lexemId from InflectedForm where {$field} = '" . addslashes($word) . "'"));
            foreach ($wordLexemIds as $lexemId) {
                $lexemIds[$lexemId] = true;
            }
        }
    }
    foreach ($lexemIds as $lexemId => $ignored) {
        $index = new DivertaIndex();
        $index->lexemId = $lexemId;
        $index->bookId = $book->id;
        $index->save();
    }
    if ($i % 100 == 99) {
        print $i + 1 . " titles indexed.\n";
    }
}
Пример #8
0
if ($submitButton) {
    // Re-rank the inflections according to the order in $inflectionIds
    $modelTypeMap = array();
    foreach ($inflectionIds as $inflId) {
        $infl = Inflection::get_by_id($inflId);
        $rank = array_key_exists($infl->modelType, $modelTypeMap) ? $modelTypeMap[$infl->modelType] + 1 : 1;
        $modelTypeMap[$infl->modelType] = $rank;
        $infl->rank = $rank;
        $infl->save();
    }
    // Add a new inflection if one is given
    if ($newDescription) {
        $infl = Model::factory('Inflection')->create();
        $infl->description = $newDescription;
        $infl->modelType = $newModelType;
        $infl->rank = $modelTypeMap[$newModelType] + 1;
        $infl->save();
    }
    util_redirect('flexiuni');
}
// Tag inflections which can be safely deleted (only those that aren't being used by any model)
$inflections = Model::factory('Inflection')->order_by_asc('modelType')->order_by_asc('rank')->find_many();
$usedInflectionIds = db_getArray('select distinct inflectionId from ModelDescription');
foreach ($inflections as $infl) {
    $infl->canDelete = !in_array($infl->id, $usedInflectionIds);
}
SmartyWrap::assign('suggestHiddenSearchForm', true);
SmartyWrap::assign('inflections', $inflections);
SmartyWrap::assign('modelTypes', ModelType::loadCanonical());
SmartyWrap::addJs('jqTableDnd');
SmartyWrap::display('flexiuni.tpl');
Пример #9
0
<?php

require_once __DIR__ . '/../phplib/util.php';
ini_set('max_execution_time', '3600');
ini_set('memory_limit', '256M');
assert_options(ASSERT_BAIL, 1);
log_scriptLog('Running rebuildFullTextIndex.php.');
if (!Lock::acquire(LOCK_FULL_TEXT_INDEX)) {
    OS::errorAndExit('Lock already exists!');
    exit;
}
log_scriptLog("Clearing table FullTextIndex.");
db_execute('truncate table FullTextIndex');
$stopWordForms = array_flip(db_getArray('select distinct i.formNoAccent ' . 'from Lexem l, LexemModel lm, InflectedForm i ' . 'where l.id = lm.lexemId ' . 'and lm.id = i.lexemModelId ' . 'and l.stopWord'));
$ifMap = array();
$dbResult = db_execute('select id, internalRep from Definition where status = 0');
$numDefs = $dbResult->rowCount();
$defsSeen = 0;
$indexSize = 0;
$fileName = tempnam(Config::get('global.tempDir'), 'index_');
$handle = fopen($fileName, 'w');
log_scriptLog("Writing index to file {$fileName}.");
DebugInfo::disable();
foreach ($dbResult as $dbRow) {
    $words = extractWords($dbRow[1]);
    foreach ($words as $position => $word) {
        if (!isset($stopWordForms[$word])) {
            if (!array_key_exists($word, $ifMap)) {
                cacheWordForm($word);
            }
            if (array_key_exists($word, $ifMap)) {
Пример #10
0
function suggest($word)
{
    global $PREFERRED_FORMS;
    $forms = db_getArray(db_execute("select distinct formNoAccent from InflectedForm where formUtf8General = '{$word}' order by formNoAccent"));
    if (!count($forms)) {
        return $word;
    } else {
        if (count($forms) == 1) {
            return $forms[0];
        } else {
            if (array_key_exists($forms[0], $PREFERRED_FORMS)) {
                return $PREFERRED_FORMS[$forms[0]];
            } else {
                return choice($word, $forms);
            }
        }
    }
}
Пример #11
0
    usage();
}
$user = User::get_by_nick($opts['user']);
$source = Source::get_by_urlName($opts['source']);
$timestamp = strtotime($opts['date']);
if (!$user || !$source || !$timestamp) {
    usage();
}
$similarSource = SimilarSource::getSimilarSource($source->id);
if (!$similarSource) {
    usage();
}
$defs = Model::factory('Definition')->where('userId', $user->id)->where('sourceId', $source->id)->where_gt('createDate', $timestamp)->where('status', Definition::ST_ACTIVE)->order_by_asc('lexicon')->find_many();
$truePositives = $falsePositives = $trueNegatives = 0;
foreach ($defs as $def) {
    $lexemIds = db_getArray("select distinct lexemId from LexemDefinitionMap where definitionId = {$def->id}");
    $similar = $def->loadSimilar($lexemIds, $diffSize);
    if ($similar) {
        $correct = ($def->similarSource == 1) == ($diffSize == 0);
        if ($correct) {
            if ($def->similarSource) {
                $truePositives++;
            } else {
                $trueNegatives++;
            }
        } else {
            if ($def->similarSource) {
                $falsePositives++;
            } else {
                // Do not report false negatives; just fix them
                $correct = true;
Пример #12
0
<?php

require_once "../phplib/util.php";
$letter = util_getRequestParameter('letter');
if (mb_strlen($letter) != '1') {
    exit;
}
$forms = db_getArray("select distinct formNoAccent from Lexem where formNoAccent like '{$letter}%' order by formNoAccent");
smarty_assign('forms', $forms);
smarty_assign('letter', $letter);
smarty_assign('page_title', "Cuvinte care încep cu " . mb_strtoupper($letter));
smarty_displayCommonPageWithSkin('wordList.ihtml');
Пример #13
0
        $curPage->save();
        WikiKeyword::deleteByWikiArticleId($curPage->id);
        $keywords = $curPage->extractKeywords();
        foreach ($keywords as $keyword) {
            $wk = Model::factory('WikiKeyword')->create();
            $wk->wikiArticleId = $curPage->id;
            $wk->keyword = $keyword;
            $wk->save();
        }
        log_scriptLog("Saved page #{$pageId} \"{$title}\"");
    }
}
// Now delete all the pages on our side that aren't category members because
//   (a) they have been deleted or
//   (b) they have been removed from the category
$ourIds = db_getArray('select pageId from WikiArticle');
foreach ($ourIds as $ourId) {
    if (!array_key_exists($ourId, $pageIdHash)) {
        $curPage = WikiArticle::get_by_pageId($ourId);
        log_scriptLog("Deleting page #{$curPage->pageId} \"{$curPage->title}\"");
        $curPage->delete();
    }
}
log_scriptLog('syncWikiArticles.php finished');
/*************************************************************************/
function parse($text)
{
    // Preprocessing
    $text = "__NOEDITSECTION__\n" . $text;
    // Otherwise the returned HTML will contain section edit links
    $text = str_replace(array('ş', 'Ş', 'ţ', 'Ţ'), array('ș', 'Ș', 'ț', 'Ț'), $text);
Пример #14
0
 public function generateParadigm()
 {
     $model = FlexModel::loadCanonicalByTypeNumber($this->modelType, $this->modelNumber);
     // Select inflection IDs for this model
     $inflIds = db_getArray("select distinct inflectionId from ModelDescription where modelId = {$model->id} order by inflectionId");
     $ifs = array();
     foreach ($inflIds as $inflId) {
         $if = $this->generateInflectedFormWithModel($inflId, $model->id);
         if ($if === null) {
             return $inflId;
         }
         $ifs = array_merge($ifs, $if);
     }
     return $ifs;
 }
Пример #15
0
<?php

$lexemIds = db_getArray('select distinct lexemId from Meaning');
foreach ($lexemIds as $lexemId) {
    $t = Meaning::loadTree($lexemId);
    renumber($t, '');
}
/**************************************************************************/
function renumber($t, $prefix)
{
    if (empty($t)) {
        return;
    }
    if ($prefix) {
        $prefix .= '.';
    }
    foreach ($t as $i => $tuple) {
        $m = $tuple['meaning'];
        $m->breadcrumb = $prefix . ($i + 1);
        $m->save();
        renumber($tuple['children'], $m->breadcrumb);
    }
}
Пример #16
0
 function generateInflectedForms()
 {
     if ($this->inflectedForms === null) {
         $lexem = $this->getLexem();
         $model = FlexModel::loadCanonicalByTypeNumber($this->modelType, $this->modelNumber);
         $inflIds = db_getArray("select distinct inflectionId from ModelDescription where modelId = {$model->id} order by inflectionId");
         try {
             $this->inflectedForms = array();
             foreach ($inflIds as $inflId) {
                 $if = $this->generateInflectedFormWithModel($lexem->form, $inflId, $model->id);
                 $this->inflectedForms = array_merge($this->inflectedForms, $if);
             }
         } catch (Exception $ignored) {
             // Make a note of the inflection we cannot generate
             $this->inflectedForms = $inflId;
         }
     }
     return $this->inflectedForms;
 }
Пример #17
0
 public static function loadPositionsByLexemIdsDefinitionId($lexemIds, $defId)
 {
     return db_getArray('select distinct position from FullTextIndex where lexemId in (' . join(',', $lexemIds) . ") and definitionId = {$defId} order by position");
 }
Пример #18
0
}
if ($migrateAll) {
    mysql_query("delete from transforms where transf_from != ''" . "or transf_to != ''");
}
$query = $migrateAll ? "select * from models where model_type not in ('I', 'T')" : "select models.* from models left outer join model_description " . "on model_id = md_model where md_model is null";
$dbResult = logged_query($query);
$numModels = 0;
while ($dbRow = mysql_fetch_assoc($dbResult)) {
    $model = new Model();
    $model->populateFromDbRow($dbRow);
    //print "{$model->modelType} {$model->number}\n";
    // Load all the DMLR model records
    mysql_query("delete from model_description where md_model = {$model->id}");
    $query = "select form, infl_id, variant, is_baseform from dmlr_models " . "where model_type = '{$model->modelType}' " . "and model_no = '" . addslashes($model->number) . "' order by infl_id";
    $dmlrDbResult = logged_query($query);
    $results = db_getArray($dmlrDbResult);
    $baseForm = null;
    foreach ($results as $row) {
        $form = $row['form'];
        $variant = $row['variant'];
        $inflId = $row['infl_id'];
        $isBaseForm = $row['is_baseform'];
        if ($baseForm && $isBaseForm) {
            die("Incorrect baseform for {$model->modelType}{$model->number}\n");
        }
        if (!$baseForm) {
            $baseForm = $form;
        }
        if (text_contains($baseForm, "'") ^ text_contains($form, "'")) {
            print "Incomplete accents for {$baseForm} => {$form}\n";
        }