Exemplo n.º 1
0
function checkDirtyArticles()
{
    echo "Checking dirty articles for spelling mistakes at " . microtime(true) . "\n";
    $dbr = wfGetDB(DB_SLAVE);
    $dbw = wfGetDB(DB_MASTER);
    $articles = DatabaseHelper::batchSelect('spellchecker', array('sc_page'), array('sc_dirty' => 1, 'sc_exempt' => 0));
    echo "Done grabbing articles. There are " . count($articles) . " dirty articles.\n";
    $pspell = wikiHowDictionary::getLibrary();
    $capsString = wikiHowDictionary::getCaps();
    $whitelistArray = wikiHowDictionary::getWhitelistArray();
    $i = 0;
    foreach ($articles as $article) {
        spellCheckArticle($dbw, $article->sc_page, $pspell, $capsString, $whitelistArray);
        $i++;
        if ($i % 1000 == 0) {
            echo $i . " articles processed at " . microtime(true) . "\n";
        }
    }
    echo "Done checking dirty articles at " . microtime(true) . "\n";
}
Exemplo n.º 2
0
 function getNextArticle($articleName = '')
 {
     global $wgOut;
     $dbr = wfGetDB(DB_SLAVE);
     $skippedSql = "";
     $skippedIds = $this->skipTool->getSkipped();
     $expired = wfTimestamp(TS_MW, time() - Spellchecker::SPELLCHECKER_EXPIRED);
     $title = Title::newFromText($articleName);
     if ($title && $title->getArticleID() > 0) {
         $articleId = $title->getArticleID();
     } else {
         if ($skippedIds) {
             $articleId = $dbr->selectField('spellchecker', 'sc_page', array('sc_exempt' => 0, 'sc_errors' => 1, 'sc_dirty' => 0, "sc_checkout < '{$expired}'", "sc_page NOT IN ('" . implode("','", $skippedIds) . "')"), __METHOD__, array("limit" => 1, "ORDER BY" => "RAND()"));
         } else {
             $articleId = $dbr->selectField('spellchecker', 'sc_page', array('sc_exempt' => 0, 'sc_errors' => 1, 'sc_dirty' => 0, "sc_checkout < '{$expired}'"), __METHOD__, array("limit" => 1, "ORDER BY" => "RAND()"));
         }
     }
     if ($articleId) {
         $sql = "SELECT * from `spellchecker_page` JOIN `spellchecker_word` ON sp_word = sw_id WHERE sp_page = {$articleId}";
         $res = $dbr->query($sql, __METHOD__);
         $words = array();
         $corrections = array();
         while ($row = $dbr->fetchObject($res)) {
             $words[] = $row->sw_word;
             $corrections[] = $row->sw_corrections;
         }
         $caps = wikiHowDictionary::getCaps();
         $exclusions = array();
         foreach ($words as $word) {
             if (preg_match('@\\s' . $word . '\\s@', $caps)) {
                 $exclusions[] = strtoupper($word);
             }
         }
         $title = Title::newFromID($articleId);
         if ($title) {
             $revision = Revision::newFromTitle($title);
             $article = new Article($title);
             if ($revision) {
                 $text = $revision->getRawText();
                 $text = self::markBreaks($text);
                 $text = self::replaceNewlines($text);
                 $content['html'] = "<p>{$text}</p>";
                 $content['title'] = "<a href='{$title->getFullURL()}' target='new'>" . wfMsg('howto', $title->getText()) . "</a>";
                 $content['articleId'] = $title->getArticleID();
                 $content['words'] = $words;
                 $content['exclusions'] = $exclusions;
                 $popts = $wgOut->parserOptions();
                 $popts->setTidy(true);
                 $parserOutput = $wgOut->parse($revision->getText(), $title, $popts);
                 $magic = WikihowArticleHTML::grabTheMagic($revision->getText());
                 $html = WikihowArticleHTML::processArticleHTML($parserOutput, array('no-ads' => true, 'ns' => NS_MAIN, 'magic-word' => $magic));
                 $content['html'] = $html;
                 $this->skipTool->useItem($articleId);
                 return $content;
             }
         }
     }
     //return error message
     $content['error'] = true;
     return $content;
 }