public static function loadAllTitlesOld() { $titles = db_getArray("select title from WikiArticle order by title"); $result = array(); foreach ($titles as $title) { $result[] = array($title, WikiArticle::wikiTitleToUrlTitle($title)); } return $result; }
public static function loadDefinitionIdsForLexemModels($lexemModelIds, $sourceId) { if (empty($lexemModelIds)) { return array(); } $lexemString = implode(',', $lexemModelIds); if ($sourceId) { $query = "select distinct definitionId " . "from FullTextIndex F " . "join Definition D on D.id = F.definitionId " . "where lexemModelId in ({$lexemString}) " . "and D.sourceId = {$sourceId} " . "order by definitionId"; } else { $query = "select distinct definitionId " . "from FullTextIndex " . "where lexemModelId in ({$lexemString}) " . "order by definitionId"; } return db_getArray($query); }
public function run($lexems, $definitions) { if (empty($lexems) && empty($definitions)) { // If we are called from a page with no keywords, display one of the top 10 highest CTR books. $random = rand(0, 9); // TODO: Label books as mature $book = Model::factory('DivertaBook')->raw_query("select * from diverta_Book where impressions " . "and title not like '%sex%' " . "and title not like '%erotic%' " . "and title not like '%bordel%' " . "and title not like '%glamour%' " . "order by clicks/impressions desc limit {$random}, 1", null)->find_one(); return array('bookId' => $book->id); } $lexemIds = array(); if (!empty($lexems)) { foreach ($lexems as $l) { $lexemIds[] = $l->id; } } if (count($lexemIds) == 0 && !empty($definitions)) { $defIdString = '-1'; foreach ($definitions as $def) { $defIdString .= ",{$def->id}"; } $lexemIds = db_getArray("select distinct lexemId from LexemDefinitionMap where DefinitionId in ({$defIdString})"); } if (count($lexemIds) == 0 || count($lexemIds) >= 100) { return null; // No keywords or too many keywords (indicating a regexp search) } $lexemIdString = implode(',', $lexemIds); $books = Model::factory('DivertaBook')->table_alias('b')->select('b.*')->join(DivertaIndex::$_table, 'b.id = i.bookId', 'i')->where_in('i.lexemId', $lexemIds)->order_by_asc('impressions')->find_many(); if (count($books)) { // 20% chance to serve the book with the fewest impressions / 80% chance to serve the book with the highest CTR if (rand(0, 99) < 20) { return array('bookId' => $books[0]->id); } else { $best = 0; $bestCtr = 0.0; foreach ($books as $i => $book) { $ctr = $book->impressions ? $book->clicks / $book->impressions : 0.0; if ($ctr > $bestCtr) { $bestCtr = $ctr; $best = $i; } } return array('bookId' => $books[$best]->id); } } return null; }
public static function searchLexemIds($cuv) { $ngramList = self::split($cuv); $hash = array(); foreach ($ngramList as $i => $ngram) { $lexemIdList = db_getArray(sprintf("select lexemId from NGram where ngram = '%s' and pos between %d and %d", $ngram, $i - self::$MAX_MOVE, $i + self::$MAX_MOVE)); $lexemIdList = array_unique($lexemIdList); foreach ($lexemIdList as $lexemId) { if (!isset($hash[$lexemId])) { $hash[$lexemId] = 1; } else { $hash[$lexemId]++; } } } $minLength = mb_strlen($cuv) / 2; $hash = array_filter($hash, function ($val) use($minLength) { return $val >= $minLength; }); return $hash; }
public static function highlight($words, &$definitions) { $res = array_fill_keys($words, array()); foreach ($res as $key => &$words) { $var = sprintf("select distinct i2.formNoAccent \n from InflectedForm i1, LexemModel lm1, Lexem l, LexemModel lm2, InflectedForm i2\n where i1.lexemModelId = lm1.id and\n lm1.lexemId = l.id and\n l.id = lm2.lexemId and\n lm2.id = i2.lexemModelId and\n not l.stopWord and\n i1.formUtf8General = '%s'", $key); $query = db_getArray($var); foreach ($query as $q) { array_push($words, $q); } $words = array_unique($words); if (empty($words)) { unset($res[$key]); } } foreach ($definitions as $def) { $classIndex = 0; foreach ($res as &$words) { $wordsString = implode("|", $words); preg_match_all('/[^a-zăâîșț<\\/](' . $wordsString . ')[^a-zăâîșț>]/iS', $def->htmlRep, $match, PREG_OFFSET_CAPTURE); $revMatch = array_reverse($match[1]); foreach ($revMatch as $m) { $def->htmlRep = substr_replace($def->htmlRep, "<span class=\"fth fth{$classIndex}\">{$m[0]}</span>", $m[1], strlen($m[0])); } $classIndex = ($classIndex + 1) % 5; // keep the number of colors in sync with common.css } } }
foreach ($matches as $i => $m) { if ($choices[count($choices) - 1 - $i] == 'abbrev') { $orig = substr($s, $m['position'], $m['length']); $replacement = StringUtil::isUppercase(StringUtil::getCharAt($orig, 0)) ? AdminStringUtil::capitalize($m['abbrev']) : $m['abbrev']; $s = substr_replace($s, "#{$replacement}#", $m['position'], $m['length']); } } $def->internalRep = $s; $def->htmlRep = AdminStringUtil::htmlize($def->internalRep, $def->sourceId); $def->abbrevReview = ABBREV_REVIEW_COMPLETE; $def->save(); } $MARKER = 'DEADBEEF'; // any string that won't occur naturally in a definition $def = null; $ids = db_getArray(sprintf('select id from Definition where status != %d and abbrevReview = %d', ST_DELETED, ABBREV_AMBIGUOUS)); if (count($ids)) { $defId = $ids[array_rand($ids, 1)]; $def = Definition::get_by_id($defId); // Collect the positions of ambiguous abbreviations $matches = array(); AdminStringUtil::markAbbreviations($def->internalRep, $def->sourceId, $matches); usort($matches, 'positionCmp'); // Inject our marker around each ambiguity and htmlize the definition $s = $def->internalRep; foreach ($matches as $m) { $s = substr($s, 0, $m['position']) . " {$MARKER} " . substr($s, $m['position'], $m['length']) . " {$MARKER} " . substr($s, $m['position'] + $m['length']); } $s = AdminStringUtil::htmlize($s, $def->sourceId); // Split the definition into n ambiguities and n+1 bits of text between the ambiguities $text = array();
require_once 'phplib/ads/adsModule.php'; require_once 'phplib/ads/diverta/divertaAdsModule.php'; $books = Model::factory('DivertaBook')->order_by_asc('id')->find_many(); $numBooks = count($books); print "Reindexing {$numBooks} book titles.\n"; foreach ($books as $i => $book) { db_execute("delete from diverta_Index where bookId = {$book->id}"); $hasDiacritics = StringUtil::hasDiacritics($book->title); $title = mb_strtolower($book->title); $title = str_replace(array(',', '.'), '', $title); $titleWords = preg_split("/\\s+/", $title); $lexemIds = array(); foreach ($titleWords as $word) { if (!StringUtil::isStopWord($word, $hasDiacritics)) { $field = $hasDiacritics ? 'formNoAccent' : 'formUtf8General'; $wordLexemIds = db_getArray(db_execute("select distinct lexemId from InflectedForm where {$field} = '" . addslashes($word) . "'")); foreach ($wordLexemIds as $lexemId) { $lexemIds[$lexemId] = true; } } } foreach ($lexemIds as $lexemId => $ignored) { $index = new DivertaIndex(); $index->lexemId = $lexemId; $index->bookId = $book->id; $index->save(); } if ($i % 100 == 99) { print $i + 1 . " titles indexed.\n"; } }
if ($submitButton) { // Re-rank the inflections according to the order in $inflectionIds $modelTypeMap = array(); foreach ($inflectionIds as $inflId) { $infl = Inflection::get_by_id($inflId); $rank = array_key_exists($infl->modelType, $modelTypeMap) ? $modelTypeMap[$infl->modelType] + 1 : 1; $modelTypeMap[$infl->modelType] = $rank; $infl->rank = $rank; $infl->save(); } // Add a new inflection if one is given if ($newDescription) { $infl = Model::factory('Inflection')->create(); $infl->description = $newDescription; $infl->modelType = $newModelType; $infl->rank = $modelTypeMap[$newModelType] + 1; $infl->save(); } util_redirect('flexiuni'); } // Tag inflections which can be safely deleted (only those that aren't being used by any model) $inflections = Model::factory('Inflection')->order_by_asc('modelType')->order_by_asc('rank')->find_many(); $usedInflectionIds = db_getArray('select distinct inflectionId from ModelDescription'); foreach ($inflections as $infl) { $infl->canDelete = !in_array($infl->id, $usedInflectionIds); } SmartyWrap::assign('suggestHiddenSearchForm', true); SmartyWrap::assign('inflections', $inflections); SmartyWrap::assign('modelTypes', ModelType::loadCanonical()); SmartyWrap::addJs('jqTableDnd'); SmartyWrap::display('flexiuni.tpl');
<?php require_once __DIR__ . '/../phplib/util.php'; ini_set('max_execution_time', '3600'); ini_set('memory_limit', '256M'); assert_options(ASSERT_BAIL, 1); log_scriptLog('Running rebuildFullTextIndex.php.'); if (!Lock::acquire(LOCK_FULL_TEXT_INDEX)) { OS::errorAndExit('Lock already exists!'); exit; } log_scriptLog("Clearing table FullTextIndex."); db_execute('truncate table FullTextIndex'); $stopWordForms = array_flip(db_getArray('select distinct i.formNoAccent ' . 'from Lexem l, LexemModel lm, InflectedForm i ' . 'where l.id = lm.lexemId ' . 'and lm.id = i.lexemModelId ' . 'and l.stopWord')); $ifMap = array(); $dbResult = db_execute('select id, internalRep from Definition where status = 0'); $numDefs = $dbResult->rowCount(); $defsSeen = 0; $indexSize = 0; $fileName = tempnam(Config::get('global.tempDir'), 'index_'); $handle = fopen($fileName, 'w'); log_scriptLog("Writing index to file {$fileName}."); DebugInfo::disable(); foreach ($dbResult as $dbRow) { $words = extractWords($dbRow[1]); foreach ($words as $position => $word) { if (!isset($stopWordForms[$word])) { if (!array_key_exists($word, $ifMap)) { cacheWordForm($word); } if (array_key_exists($word, $ifMap)) {
function suggest($word) { global $PREFERRED_FORMS; $forms = db_getArray(db_execute("select distinct formNoAccent from InflectedForm where formUtf8General = '{$word}' order by formNoAccent")); if (!count($forms)) { return $word; } else { if (count($forms) == 1) { return $forms[0]; } else { if (array_key_exists($forms[0], $PREFERRED_FORMS)) { return $PREFERRED_FORMS[$forms[0]]; } else { return choice($word, $forms); } } } }
usage(); } $user = User::get_by_nick($opts['user']); $source = Source::get_by_urlName($opts['source']); $timestamp = strtotime($opts['date']); if (!$user || !$source || !$timestamp) { usage(); } $similarSource = SimilarSource::getSimilarSource($source->id); if (!$similarSource) { usage(); } $defs = Model::factory('Definition')->where('userId', $user->id)->where('sourceId', $source->id)->where_gt('createDate', $timestamp)->where('status', Definition::ST_ACTIVE)->order_by_asc('lexicon')->find_many(); $truePositives = $falsePositives = $trueNegatives = 0; foreach ($defs as $def) { $lexemIds = db_getArray("select distinct lexemId from LexemDefinitionMap where definitionId = {$def->id}"); $similar = $def->loadSimilar($lexemIds, $diffSize); if ($similar) { $correct = ($def->similarSource == 1) == ($diffSize == 0); if ($correct) { if ($def->similarSource) { $truePositives++; } else { $trueNegatives++; } } else { if ($def->similarSource) { $falsePositives++; } else { // Do not report false negatives; just fix them $correct = true;
<?php require_once "../phplib/util.php"; $letter = util_getRequestParameter('letter'); if (mb_strlen($letter) != '1') { exit; } $forms = db_getArray("select distinct formNoAccent from Lexem where formNoAccent like '{$letter}%' order by formNoAccent"); smarty_assign('forms', $forms); smarty_assign('letter', $letter); smarty_assign('page_title', "Cuvinte care încep cu " . mb_strtoupper($letter)); smarty_displayCommonPageWithSkin('wordList.ihtml');
$curPage->save(); WikiKeyword::deleteByWikiArticleId($curPage->id); $keywords = $curPage->extractKeywords(); foreach ($keywords as $keyword) { $wk = Model::factory('WikiKeyword')->create(); $wk->wikiArticleId = $curPage->id; $wk->keyword = $keyword; $wk->save(); } log_scriptLog("Saved page #{$pageId} \"{$title}\""); } } // Now delete all the pages on our side that aren't category members because // (a) they have been deleted or // (b) they have been removed from the category $ourIds = db_getArray('select pageId from WikiArticle'); foreach ($ourIds as $ourId) { if (!array_key_exists($ourId, $pageIdHash)) { $curPage = WikiArticle::get_by_pageId($ourId); log_scriptLog("Deleting page #{$curPage->pageId} \"{$curPage->title}\""); $curPage->delete(); } } log_scriptLog('syncWikiArticles.php finished'); /*************************************************************************/ function parse($text) { // Preprocessing $text = "__NOEDITSECTION__\n" . $text; // Otherwise the returned HTML will contain section edit links $text = str_replace(array('ş', 'Ş', 'ţ', 'Ţ'), array('ș', 'Ș', 'ț', 'Ț'), $text);
public function generateParadigm() { $model = FlexModel::loadCanonicalByTypeNumber($this->modelType, $this->modelNumber); // Select inflection IDs for this model $inflIds = db_getArray("select distinct inflectionId from ModelDescription where modelId = {$model->id} order by inflectionId"); $ifs = array(); foreach ($inflIds as $inflId) { $if = $this->generateInflectedFormWithModel($inflId, $model->id); if ($if === null) { return $inflId; } $ifs = array_merge($ifs, $if); } return $ifs; }
<?php $lexemIds = db_getArray('select distinct lexemId from Meaning'); foreach ($lexemIds as $lexemId) { $t = Meaning::loadTree($lexemId); renumber($t, ''); } /**************************************************************************/ function renumber($t, $prefix) { if (empty($t)) { return; } if ($prefix) { $prefix .= '.'; } foreach ($t as $i => $tuple) { $m = $tuple['meaning']; $m->breadcrumb = $prefix . ($i + 1); $m->save(); renumber($tuple['children'], $m->breadcrumb); } }
function generateInflectedForms() { if ($this->inflectedForms === null) { $lexem = $this->getLexem(); $model = FlexModel::loadCanonicalByTypeNumber($this->modelType, $this->modelNumber); $inflIds = db_getArray("select distinct inflectionId from ModelDescription where modelId = {$model->id} order by inflectionId"); try { $this->inflectedForms = array(); foreach ($inflIds as $inflId) { $if = $this->generateInflectedFormWithModel($lexem->form, $inflId, $model->id); $this->inflectedForms = array_merge($this->inflectedForms, $if); } } catch (Exception $ignored) { // Make a note of the inflection we cannot generate $this->inflectedForms = $inflId; } } return $this->inflectedForms; }
public static function loadPositionsByLexemIdsDefinitionId($lexemIds, $defId) { return db_getArray('select distinct position from FullTextIndex where lexemId in (' . join(',', $lexemIds) . ") and definitionId = {$defId} order by position"); }
} if ($migrateAll) { mysql_query("delete from transforms where transf_from != ''" . "or transf_to != ''"); } $query = $migrateAll ? "select * from models where model_type not in ('I', 'T')" : "select models.* from models left outer join model_description " . "on model_id = md_model where md_model is null"; $dbResult = logged_query($query); $numModels = 0; while ($dbRow = mysql_fetch_assoc($dbResult)) { $model = new Model(); $model->populateFromDbRow($dbRow); //print "{$model->modelType} {$model->number}\n"; // Load all the DMLR model records mysql_query("delete from model_description where md_model = {$model->id}"); $query = "select form, infl_id, variant, is_baseform from dmlr_models " . "where model_type = '{$model->modelType}' " . "and model_no = '" . addslashes($model->number) . "' order by infl_id"; $dmlrDbResult = logged_query($query); $results = db_getArray($dmlrDbResult); $baseForm = null; foreach ($results as $row) { $form = $row['form']; $variant = $row['variant']; $inflId = $row['infl_id']; $isBaseForm = $row['is_baseform']; if ($baseForm && $isBaseForm) { die("Incorrect baseform for {$model->modelType}{$model->number}\n"); } if (!$baseForm) { $baseForm = $form; } if (text_contains($baseForm, "'") ^ text_contains($form, "'")) { print "Incomplete accents for {$baseForm} => {$form}\n"; }