public static function printQueryLevelCat($query, $level = 2, $max = false, $printScore = true) { DedupQuery::addQuery($query); DedupQuery::matchQueries(array($query)); $cats = DedupQuery::getCategories($query); // Determine the score of the secondary level cats from the scores of the bottom level $sLevelCats = array(); foreach ($cats as $cat) { $cTree = self::getCategoryTree('Category:' . $cat['cat']); //print_r($cTree[1]); $cTreeLen = sizeof($cTree); if ($cTreeLen >= $level + 1) { if ($cTree[0] != 'Category:WikiHow') { $sLevelCats[$cTree[$level]] += $cat['score']; } } } arsort($sLevelCats); $numPrinted = 0; foreach ($sLevelCats as $cat => $score) { print str_replace('Category:', '', $cat) . "\t"; if ($printScore) { print $score . "\t"; } $numPrinted++; if ($max && $numPrinted >= $max) { break; } } }
public static function reconcile() { global $wgLanguageCode; // Add titles missing from our system with associated keywords $dbr = wfGetDB(DB_SLAVE); $sql = "select page.* from page left join dedup.title_query on tq_page_id=page_id AND tq_lang=" . $dbr->addQuotes($wgLanguageCode) . " where page_namespace=0 and page_is_redirect=0 and tq_title is NULL group by page.page_id"; $res = $dbr->query($sql, __METHOD__); $missingTitles = array(); foreach ($res as $row) { $missingTitles[] = $row; } foreach ($missingTitles as $title) { print "Adding title to system " . $title->page_title . "\n"; $t = Title::newFromRow($title); DedupQuery::addTitle($t, $wgLanguageCode); } //Deal with titles turned into deletes or redirects $dbr = wfGetDB(DB_SLAVE); $sql = "select tq_title from dedup.title_query left join page on tq_page_id=page_id where page_namespace=0 and page_is_redirect=0 and page_title is NULL"; $res = $dbr->query($sql, __METHOD__); $deletedTitles = array(); foreach ($res as $row) { $deletedTitles[] = $row->page_title; } foreach ($deletedTitles as $title) { print "Removing title from system " . $row->page_title . "\n"; DedupQuery::removeTitle($row->page_title, $wgLanguageCode); } }
public static function addSpreadsheet() { $gs = new GoogleSpreadsheet(); $gs->login(WH_TITUS_GOOGLE_LOGIN, WH_TITUS_GOOGLE_PW); $cols = $gs->getCols(WH_KEYWORD_MASTER_GOOGLE_DOC, 1, 1, 2); foreach ($cols as $col) { DedupQuery::addQuery($col[0]); } }
/** * Determines teh queries that match * @param internal Only match against queries specified instead of the entire database * @param clusterScore Cluster all queries above a certain score */ function getQueries($queries, $internal, $clusterScore) { $queries = preg_split("@\\|@", $queries); $dbw = wfGetDB(DB_MASTER); $queryE = array(); foreach ($queries as $query) { if ($query) { DedupQuery::addQuery($query); $queryE[] = $dbw->addQuotes($query); } } DedupQuery::matchQueries($queries, $internal); $dbr = wfGetDB(DB_SLAVE); $sql = "select query1, query2, ct, tq_title, tq_page_id from dedup.query_match left join dedup.title_query on tq_query=query2 where query1 in (" . implode($queryE, ",") . ")"; if ($internal) { $sql .= " and query2 in (" . implode($queryE, ",") . ")"; } $sql .= " order by query1, ct desc"; $res = $dbr->query($sql, __METHOD__); $ret = array(); if (!$clusterScore) { foreach ($res as $row) { $ret[$row->query1][] = array('query' => $row->query2, 'score' => $row->ct, 'title' => $row->tq_title, 'aid' => $row->tq_page_id); } } else { $clusters = array(); $clusterLookup = array(); foreach ($queries as $query) { $clusters[$query] = $query; $clusterLookup[$query] = array($query); } foreach ($res as $row) { $ret[$row->query2] = array('query' => $row->query2, 'title' => $row->tq_title, 'aid' => $row->tq_page_id); if ($row->ct >= $clusterScore) { if (!isset($clusters[$row->query2])) { $clusters[$row->query2] = $row->query2; $clusterLookup[$row->query2] = array($row->query2); } $cl1 = $clusters[$row->query1]; $cl2 = $clusters[$row->query2]; if ($cl2 && $cl1 != $cl2) { $clusterLookup[$cl1] = array_merge($clusterLookup[$cl1], $clusterLookup[$cl2]); foreach ($clusterLookup[$cl2] as $q) { $clusters[$q] = $cl1; } unset($clusterLookup[$cl2]); } } } $ret2 = array(); foreach ($clusterLookup as $name => $cl) { $fullCl = array(); foreach ($cl as $q) { $fullCl[] = $ret[$q]; } $ret2[] = $fullCl; } $ret = $ret2; } return $ret; }
/** * Get a list of users, who want to suggest to edit this article. ' * @param $title Title object for the article * @param minUserScore The minimum bytes a user needs to have added to an article for us to consider them as having contributed to that article */ function getSuggestedUsers($title, $minUserScore = 200) { if (!$title || !$title->getText()) { return array(); } $relatedTitles = DedupQuery::getRelated($title, 3); $userScore = array(); foreach ($relatedTitles as $t) { if ($t['title']->getArticleId() == $title->getArticleId()) { continue; } if (in_array($t['title']->getArticleId(), $this->_relatedExcludes)) { continue; } $se = SuccessfulEdit::getEdits($t['title']->getArticleId()); $userScore2 = array(); foreach ($se as $e) { if (!isset($userScore2[$e['username']])) { $userScore2[$e['username']] = 0; } $userScore2[$e['username']] += $e['added']; } foreach ($userScore2 as $username => $score) { if ($score > $minUserScore) { $userScore[$username] = $score * $t['ct']; $this->_userArticles[$username][$title->getArticleId()] += $score * $t['ct']; $this->_userArticleRelated[$username][$title->getArticleId()][$t['title']->getArticleId()] = 1; } } } return $userScore; }
public static function getRelated($title, $minScore = 1) { global $wgLanguageCode; $dbr = wfGetDB(DB_SLAVE); $query = DedupQuery::addTitle($title, $wgLanguageCode); if (!$query) { return array(); } $sql = "select query1, query2, ct, tq_page_id from dedup.query_match join dedup.title_query on tq_query=query2 where query1 =" . $dbr->addQuotes($query) . " and query1<> query2 order by query1, ct desc"; $res = $dbr->query($sql, __METHOD__); $titles = array(); foreach ($res as $row) { if ($row->ct >= $minScore) { $t = Title::newFromId($row->tq_page_id); if ($t) { $titles[] = array('title' => $t, 'ct' => $row->ct); } } } return $titles; }