function checkDirtyArticles() { echo "Checking dirty articles for spelling mistakes at " . microtime(true) . "\n"; $dbr = wfGetDB(DB_SLAVE); $dbw = wfGetDB(DB_MASTER); $articles = DatabaseHelper::batchSelect('spellchecker', array('sc_page'), array('sc_dirty' => 1, 'sc_exempt' => 0)); echo "Done grabbing articles. There are " . count($articles) . " dirty articles.\n"; $pspell = wikiHowDictionary::getLibrary(); $capsString = wikiHowDictionary::getCaps(); $whitelistArray = wikiHowDictionary::getWhitelistArray(); $i = 0; foreach ($articles as $article) { spellCheckArticle($dbw, $article->sc_page, $pspell, $capsString, $whitelistArray); $i++; if ($i % 1000 == 0) { echo $i . " articles processed at " . microtime(true) . "\n"; } } echo "Done checking dirty articles at " . microtime(true) . "\n"; }
function getNextArticle($articleName = '') { global $wgOut; $dbr = wfGetDB(DB_SLAVE); $skippedSql = ""; $skippedIds = $this->skipTool->getSkipped(); $expired = wfTimestamp(TS_MW, time() - Spellchecker::SPELLCHECKER_EXPIRED); $title = Title::newFromText($articleName); if ($title && $title->getArticleID() > 0) { $articleId = $title->getArticleID(); } else { if ($skippedIds) { $articleId = $dbr->selectField('spellchecker', 'sc_page', array('sc_exempt' => 0, 'sc_errors' => 1, 'sc_dirty' => 0, "sc_checkout < '{$expired}'", "sc_page NOT IN ('" . implode("','", $skippedIds) . "')"), __METHOD__, array("limit" => 1, "ORDER BY" => "RAND()")); } else { $articleId = $dbr->selectField('spellchecker', 'sc_page', array('sc_exempt' => 0, 'sc_errors' => 1, 'sc_dirty' => 0, "sc_checkout < '{$expired}'"), __METHOD__, array("limit" => 1, "ORDER BY" => "RAND()")); } } if ($articleId) { $sql = "SELECT * from `spellchecker_page` JOIN `spellchecker_word` ON sp_word = sw_id WHERE sp_page = {$articleId}"; $res = $dbr->query($sql, __METHOD__); $words = array(); $corrections = array(); while ($row = $dbr->fetchObject($res)) { $words[] = $row->sw_word; $corrections[] = $row->sw_corrections; } $caps = wikiHowDictionary::getCaps(); $exclusions = array(); foreach ($words as $word) { if (preg_match('@\\s' . $word . '\\s@', $caps)) { $exclusions[] = strtoupper($word); } } $title = Title::newFromID($articleId); if ($title) { $revision = Revision::newFromTitle($title); $article = new Article($title); if ($revision) { $text = $revision->getRawText(); $text = self::markBreaks($text); $text = self::replaceNewlines($text); $content['html'] = "<p>{$text}</p>"; $content['title'] = "<a href='{$title->getFullURL()}' target='new'>" . wfMsg('howto', $title->getText()) . "</a>"; $content['articleId'] = $title->getArticleID(); $content['words'] = $words; $content['exclusions'] = $exclusions; $popts = $wgOut->parserOptions(); $popts->setTidy(true); $parserOutput = $wgOut->parse($revision->getText(), $title, $popts); $magic = WikihowArticleHTML::grabTheMagic($revision->getText()); $html = WikihowArticleHTML::processArticleHTML($parserOutput, array('no-ads' => true, 'ns' => NS_MAIN, 'magic-word' => $magic)); $content['html'] = $html; $this->skipTool->useItem($articleId); return $content; } } } //return error message $content['error'] = true; return $content; }