Example #1
0
 public static function printQueryLevelCat($query, $level = 2, $max = false, $printScore = true)
 {
     DedupQuery::addQuery($query);
     DedupQuery::matchQueries(array($query));
     $cats = DedupQuery::getCategories($query);
     // Determine the score of the secondary level cats from the scores of the bottom level
     $sLevelCats = array();
     foreach ($cats as $cat) {
         $cTree = self::getCategoryTree('Category:' . $cat['cat']);
         //print_r($cTree[1]);
         $cTreeLen = sizeof($cTree);
         if ($cTreeLen >= $level + 1) {
             if ($cTree[0] != 'Category:WikiHow') {
                 $sLevelCats[$cTree[$level]] += $cat['score'];
             }
         }
     }
     arsort($sLevelCats);
     $numPrinted = 0;
     foreach ($sLevelCats as $cat => $score) {
         print str_replace('Category:', '', $cat) . "\t";
         if ($printScore) {
             print $score . "\t";
         }
         $numPrinted++;
         if ($max && $numPrinted >= $max) {
             break;
         }
     }
 }
Example #2
0
 public static function reconcile()
 {
     global $wgLanguageCode;
     // Add titles missing from our system with associated keywords
     $dbr = wfGetDB(DB_SLAVE);
     $sql = "select page.* from page left join dedup.title_query on tq_page_id=page_id AND tq_lang=" . $dbr->addQuotes($wgLanguageCode) . " where page_namespace=0 and page_is_redirect=0 and tq_title is NULL group by page.page_id";
     $res = $dbr->query($sql, __METHOD__);
     $missingTitles = array();
     foreach ($res as $row) {
         $missingTitles[] = $row;
     }
     foreach ($missingTitles as $title) {
         print "Adding title to system " . $title->page_title . "\n";
         $t = Title::newFromRow($title);
         DedupQuery::addTitle($t, $wgLanguageCode);
     }
     //Deal with titles turned into deletes or redirects
     $dbr = wfGetDB(DB_SLAVE);
     $sql = "select tq_title from dedup.title_query left join page on tq_page_id=page_id where page_namespace=0 and page_is_redirect=0 and page_title is NULL";
     $res = $dbr->query($sql, __METHOD__);
     $deletedTitles = array();
     foreach ($res as $row) {
         $deletedTitles[] = $row->page_title;
     }
     foreach ($deletedTitles as $title) {
         print "Removing title from system " . $row->page_title . "\n";
         DedupQuery::removeTitle($row->page_title, $wgLanguageCode);
     }
 }
 public static function addSpreadsheet()
 {
     $gs = new GoogleSpreadsheet();
     $gs->login(WH_TITUS_GOOGLE_LOGIN, WH_TITUS_GOOGLE_PW);
     $cols = $gs->getCols(WH_KEYWORD_MASTER_GOOGLE_DOC, 1, 1, 2);
     foreach ($cols as $col) {
         DedupQuery::addQuery($col[0]);
     }
 }
Example #4
0
 /** 
  * Determines teh queries that match
  * @param internal Only match against queries specified instead of the entire database
  * @param clusterScore Cluster all queries above a certain score
  */
 function getQueries($queries, $internal, $clusterScore)
 {
     $queries = preg_split("@\\|@", $queries);
     $dbw = wfGetDB(DB_MASTER);
     $queryE = array();
     foreach ($queries as $query) {
         if ($query) {
             DedupQuery::addQuery($query);
             $queryE[] = $dbw->addQuotes($query);
         }
     }
     DedupQuery::matchQueries($queries, $internal);
     $dbr = wfGetDB(DB_SLAVE);
     $sql = "select query1, query2, ct, tq_title, tq_page_id from dedup.query_match left join dedup.title_query on tq_query=query2 where query1 in (" . implode($queryE, ",") . ")";
     if ($internal) {
         $sql .= " and query2 in (" . implode($queryE, ",") . ")";
     }
     $sql .= " order by query1, ct desc";
     $res = $dbr->query($sql, __METHOD__);
     $ret = array();
     if (!$clusterScore) {
         foreach ($res as $row) {
             $ret[$row->query1][] = array('query' => $row->query2, 'score' => $row->ct, 'title' => $row->tq_title, 'aid' => $row->tq_page_id);
         }
     } else {
         $clusters = array();
         $clusterLookup = array();
         foreach ($queries as $query) {
             $clusters[$query] = $query;
             $clusterLookup[$query] = array($query);
         }
         foreach ($res as $row) {
             $ret[$row->query2] = array('query' => $row->query2, 'title' => $row->tq_title, 'aid' => $row->tq_page_id);
             if ($row->ct >= $clusterScore) {
                 if (!isset($clusters[$row->query2])) {
                     $clusters[$row->query2] = $row->query2;
                     $clusterLookup[$row->query2] = array($row->query2);
                 }
                 $cl1 = $clusters[$row->query1];
                 $cl2 = $clusters[$row->query2];
                 if ($cl2 && $cl1 != $cl2) {
                     $clusterLookup[$cl1] = array_merge($clusterLookup[$cl1], $clusterLookup[$cl2]);
                     foreach ($clusterLookup[$cl2] as $q) {
                         $clusters[$q] = $cl1;
                     }
                     unset($clusterLookup[$cl2]);
                 }
             }
         }
         $ret2 = array();
         foreach ($clusterLookup as $name => $cl) {
             $fullCl = array();
             foreach ($cl as $q) {
                 $fullCl[] = $ret[$q];
             }
             $ret2[] = $fullCl;
         }
         $ret = $ret2;
     }
     return $ret;
 }
 /**
  * Get a list of users, who want to suggest to edit this article. '
  * @param $title Title object for the article
  * @param minUserScore The minimum bytes a user needs to have added to an article for us to consider them as having contributed to that article
  */
 function getSuggestedUsers($title, $minUserScore = 200)
 {
     if (!$title || !$title->getText()) {
         return array();
     }
     $relatedTitles = DedupQuery::getRelated($title, 3);
     $userScore = array();
     foreach ($relatedTitles as $t) {
         if ($t['title']->getArticleId() == $title->getArticleId()) {
             continue;
         }
         if (in_array($t['title']->getArticleId(), $this->_relatedExcludes)) {
             continue;
         }
         $se = SuccessfulEdit::getEdits($t['title']->getArticleId());
         $userScore2 = array();
         foreach ($se as $e) {
             if (!isset($userScore2[$e['username']])) {
                 $userScore2[$e['username']] = 0;
             }
             $userScore2[$e['username']] += $e['added'];
         }
         foreach ($userScore2 as $username => $score) {
             if ($score > $minUserScore) {
                 $userScore[$username] = $score * $t['ct'];
                 $this->_userArticles[$username][$title->getArticleId()] += $score * $t['ct'];
                 $this->_userArticleRelated[$username][$title->getArticleId()][$t['title']->getArticleId()] = 1;
             }
         }
     }
     return $userScore;
 }
Example #6
0
 public static function getRelated($title, $minScore = 1)
 {
     global $wgLanguageCode;
     $dbr = wfGetDB(DB_SLAVE);
     $query = DedupQuery::addTitle($title, $wgLanguageCode);
     if (!$query) {
         return array();
     }
     $sql = "select query1, query2, ct, tq_page_id from dedup.query_match join dedup.title_query on tq_query=query2 where query1 =" . $dbr->addQuotes($query) . " and query1<> query2 order by query1, ct desc";
     $res = $dbr->query($sql, __METHOD__);
     $titles = array();
     foreach ($res as $row) {
         if ($row->ct >= $minScore) {
             $t = Title::newFromId($row->tq_page_id);
             if ($t) {
                 $titles[] = array('title' => $t, 'ct' => $row->ct);
             }
         }
     }
     return $titles;
 }