public function execute() { $pages = $this->getOption('maxpages'); $dbr = $this->getDB(DB_REPLICA); $totalsec = 0.0; $scanned = 0; $withcache = 0; $withdiff = 0; while ($pages-- > 0) { $row = $dbr->selectRow('page', '*', ['page_namespace' => $this->getOption('namespace'), 'page_is_redirect' => 0, 'page_random >= ' . wfRandom()], __METHOD__, ['ORDER BY' => 'page_random']); if (!$row) { continue; } ++$scanned; $title = Title::newFromRow($row); $page = WikiPage::factory($title); $revision = $page->getRevision(); $content = $revision->getContent(Revision::RAW); $parserOptions = $page->makeParserOptions('canonical'); $parserOutputOld = ParserCache::singleton()->get($page, $parserOptions); if ($parserOutputOld) { $t1 = microtime(true); $parserOutputNew = $content->getParserOutput($title, $revision->getId(), $parserOptions, false); $sec = microtime(true) - $t1; $totalsec += $sec; $this->output("Parsed '{$title->getPrefixedText()}' in {$sec} seconds.\n"); $this->output("Found cache entry found for '{$title->getPrefixedText()}'..."); $oldHtml = trim(preg_replace('#<!-- .+-->#Us', '', $parserOutputOld->getText())); $newHtml = trim(preg_replace('#<!-- .+-->#Us', '', $parserOutputNew->getText())); $diff = wfDiff($oldHtml, $newHtml); if (strlen($diff)) { $this->output("differences found:\n\n{$diff}\n\n"); ++$withdiff; } else { $this->output("No differences found.\n"); } ++$withcache; } else { $this->output("No parser cache entry found for '{$title->getPrefixedText()}'.\n"); } } $ave = $totalsec ? $totalsec / $scanned : 0; $this->output("Checked {$scanned} pages; {$withcache} had prior cache entries.\n"); $this->output("Pages with differences found: {$withdiff}\n"); $this->output("Average parse time: {$ave} sec\n"); }
/** * Get the set of add, subtract, and change operations required to transform leftText into rightText * * @param string $leftText The left, or old, revision of the text * @param string $rightText The right, or new, revision of the text * @return array Array of arrays containing changes to individual groups of lines within the text * Each change consists of: * An 'action', one of: * - add * - subtract * - change * 'content' that was added or removed, or in the case * of a change, 'old_content' and 'new_content' * 'left_pos' and 'right_pos' (in 1-indexed lines) of the change. */ public function getChangeSet($leftText, $rightText) { /** * The internal diff utility, which is used when GNU diff is not available * prefixes lines with 2 characters instead of 1. * For more info see bug 41689. */ if (self::usingInternalDiff()) { $this->prefixLength = 2; } else { $this->prefixLength = 1; } $left = trim($leftText) . "\n"; $right = trim($rightText) . "\n"; $diff = wfDiff($left, $right, '-u -w'); return $this->parse($diff, $left, $right); }
function __call($name, $args) { $this->init(); $results = array(); $mismatch = false; $lastResult = null; $first = true; foreach ($this->parsers as $i => $parser) { $currentResult = call_user_func_array(array(&$this->parsers[$i], $name), $args); if ($first) { $first = false; } else { if (is_object($lastResult)) { if ($lastResult != $currentResult) { $mismatch = true; } } else { if ($lastResult !== $currentResult) { $mismatch = true; } } } $results[$i] = $currentResult; $lastResult = $currentResult; } if ($mismatch) { if (count($results) == 2) { $resultsList = array(); foreach ($this->parsers as $i => $parser) { $resultsList[] = var_export($results[$i], true); } $diff = wfDiff($resultsList[0], $resultsList[1]); } else { $diff = '[too many parsers]'; } $msg = "Parser_DiffTest: results mismatch on call to {$name}\n"; if (!$this->shortOutput) { $msg .= 'Arguments: ' . $this->formatArray($args) . "\n"; } $msg .= 'Results: ' . $this->formatArray($results) . "\n" . "Diff: {$diff}\n"; throw new MWException($msg); } return $lastResult; }
function getDiffToMeasure($r) { $dbr = wfGetDB(DB_SLAVE); $result = array(); // get the low, we compare this against the last edit // which was made by a different user $revlo = $dbr->selectField('revision', 'rev_id', array('rev_page' => $r->mTitle->getArticleID(), 'rev_user_text != ' . $dbr->addQuotes($r->mUserText), 'rev_id < ' . $r->mId), "RandomEdit::getDiffToMeasure", array("ORDER BY" => "rev_id desc", "LIMIT" => 1)); // get the highest edit in this sequence of edits by this user $not_hi_row = $dbr->selectRow('revision', array('rev_id', 'rev_comment', 'rev_user_text'), array('rev_page' => $r->mTitle->getArticleID(), 'rev_user_text != ' . $dbr->addQuotes($r->mUserText), 'rev_id > ' . $r->mId)); $revhi = null; if (!$not_hi_row) { $revhi = $r->mId; } else { $revhi = $dbr->selectField('revision', 'rev_id', array('rev_page' => $r->mTitle->getArticleID(), 'rev_id < ' . $not_hi_row->rev_id), "RandomEdit::getDiffToMeasure", array("ORDER BY" => "rev_id desc", "LIMIT" => 1)); $result['nextcomment'] = $not_hi_row->rev_comment; $result['nextuser'] = $not_hi_row->rev_user_text; } $hi = Revision::newFromID($revhi); $hitext = $hi->getText(); $lotext = ""; if ($revlo) { $lo = Revision::newFromID($revlo); $lotext = $lo->getText(); } if ($lotext == "") { $result['newpage'] = 1; } else { $result['newpage'] = 0; } $opts = array('rev_page' => $r->mTitle->getArticleID(), 'rev_id <= ' . $revhi); if ($revlo) { $opts[] = 'rev_id > ' . $revlo; } $result['numedits'] = $dbr->selectField('revision', 'count(*)', $opts); $result['diff'] = wfDiff($lotext, $hitext); $result['revhi'] = $hi; $result['revlo'] = $lo; return $result; }
/** * @param $vars AbuseFilterVariableHolder * @return AFPData|array|int|mixed|null|string * @throws MWException * @throws AFPException */ function compute($vars) { $parameters = $this->mParameters; $result = null; if (!wfRunHooks('AbuseFilter-interceptVariable', array($this->mMethod, $vars, $parameters, &$result))) { return $result instanceof AFPData ? $result : AFPData::newFromPHPVar($result); } switch ($this->mMethod) { case 'diff': $text1Var = $parameters['oldtext-var']; $text2Var = $parameters['newtext-var']; $text1 = $vars->getVar($text1Var)->toString() . "\n"; $text2 = $vars->getVar($text2Var)->toString() . "\n"; $result = wfDiff($text1, $text2); break; case 'diff-split': $diff = $vars->getVar($parameters['diff-var'])->toString(); $line_prefix = $parameters['line-prefix']; $diff_lines = explode("\n", $diff); $interest_lines = array(); foreach ($diff_lines as $line) { if (substr($line, 0, 1) === $line_prefix) { $interest_lines[] = substr($line, strlen($line_prefix)); } } $result = $interest_lines; break; case 'links-from-wikitext': // This should ONLY be used when sharing a parse operation with the edit. /* @var WikiPage $article */ $article = $parameters['article']; if ($article !== null && (!defined('MW_SUPPORTS_CONTENTHANDLER') || $article->getContentModel() === CONTENT_MODEL_WIKITEXT)) { $textVar = $parameters['text-var']; // XXX: Use prepareContentForEdit. But we need a Content object for that. $new_text = $vars->getVar($textVar)->toString(); $content = ContentHandler::makeContent($new_text, $article->getTitle()); $editInfo = $article->prepareContentForEdit($content); $links = array_keys($editInfo->output->getExternalLinks()); $result = $links; break; } // Otherwise fall back to database // Otherwise fall back to database case 'links-from-wikitext-nonedit': case 'links-from-wikitext-or-database': // TODO: use Content object instead, if available! In any case, use WikiPage, not Article. $article = self::articleFromTitle($parameters['namespace'], $parameters['title']); if ($vars->getVar('context')->toString() == 'filter') { $links = $this->getLinksFromDB($article); wfDebug("AbuseFilter: loading old links from DB\n"); } elseif (!defined('MW_SUPPORTS_CONTENTHANDLER') || $article->getContentModel() === CONTENT_MODEL_WIKITEXT) { wfDebug("AbuseFilter: loading old links from Parser\n"); $textVar = $parameters['text-var']; $wikitext = $vars->getVar($textVar)->toString(); $editInfo = $this->parseNonEditWikitext($wikitext, $article); $links = array_keys($editInfo->output->getExternalLinks()); } else { // TODO: Get links from Content object. But we don't have the content object. // And for non-text content, $wikitext is usually not going to be a valid // serialization, but rather some dummy text for filtering. $links = array(); } $result = $links; break; case 'link-diff-added': case 'link-diff-removed': $oldLinkVar = $parameters['oldlink-var']; $newLinkVar = $parameters['newlink-var']; $oldLinks = $vars->getVar($oldLinkVar)->toString(); $newLinks = $vars->getVar($newLinkVar)->toString(); $oldLinks = explode("\n", $oldLinks); $newLinks = explode("\n", $newLinks); if ($this->mMethod == 'link-diff-added') { $result = array_diff($newLinks, $oldLinks); } if ($this->mMethod == 'link-diff-removed') { $result = array_diff($oldLinks, $newLinks); } break; case 'parse-wikitext': // Should ONLY be used when sharing a parse operation with the edit. $article = $parameters['article']; if ($article !== null && (!defined('MW_SUPPORTS_CONTENTHANDLER') || $article->getContentModel() === CONTENT_MODEL_WIKITEXT)) { $textVar = $parameters['wikitext-var']; // XXX: Use prepareContentForEdit. But we need a Content object for that. $new_text = $vars->getVar($textVar)->toString(); $editInfo = $article->prepareTextForEdit($new_text); if (isset($parameters['pst']) && $parameters['pst']) { $result = $editInfo->pstContent->serialize($editInfo->format); } else { $newHTML = $editInfo->output->getText(); // Kill the PP limit comments. Ideally we'd just remove these by not setting the // parser option, but then we can't share a parse operation with the edit, which is bad. $result = preg_replace('/<!--\\s*NewPP limit report[^>]*-->\\s*$/si', '', $newHTML); } break; } // Otherwise fall back to database // Otherwise fall back to database case 'parse-wikitext-nonedit': // TODO: use Content object instead, if available! In any case, use WikiPage, not Article. $article = self::articleFromTitle($parameters['namespace'], $parameters['title']); $textVar = $parameters['wikitext-var']; if (!defined('MW_SUPPORTS_CONTENTHANDLER') || $article->getContentModel() === CONTENT_MODEL_WIKITEXT) { if (isset($parameters['pst']) && $parameters['pst']) { // $textVar is already PSTed when it's not loaded from an ongoing edit. $result = $vars->getVar($textVar)->toString(); } else { $text = $vars->getVar($textVar)->toString(); $editInfo = $this->parseNonEditWikitext($text, $article); $result = $editInfo->output->getText(); } } else { // TODO: Parser Output from Content object. But we don't have the content object. // And for non-text content, $wikitext is usually not going to be a valid // serialization, but rather some dummy text for filtering. $result = ''; } break; case 'strip-html': $htmlVar = $parameters['html-var']; $html = $vars->getVar($htmlVar)->toString(); $result = StringUtils::delimiterReplace('<', '>', '', $html); break; case 'load-recent-authors': $cutOff = $parameters['cutoff']; $title = Title::makeTitle($parameters['namespace'], $parameters['title']); if (!$title->exists()) { $result = ''; break; } $dbr = wfGetDB(DB_SLAVE); $res = $dbr->select('revision', 'DISTINCT rev_user_text', array('rev_page' => $title->getArticleID(), 'rev_timestamp<' . $dbr->addQuotes($dbr->timestamp($cutOff))), __METHOD__, array('ORDER BY' => 'rev_timestamp DESC', 'LIMIT' => 10)); $users = array(); foreach ($res as $row) { $users[] = $row->rev_user_text; } $result = $users; break; case 'get-page-restrictions': $action = $parameters['action']; $title = Title::makeTitle($parameters['namespace'], $parameters['title']); $rights = $title->getRestrictions($action); $rights = count($rights) ? $rights : array(); $result = $rights; break; case 'simple-user-accessor': $user = $parameters['user']; $method = $parameters['method']; if (!$user) { throw new MWException('No user parameter given.'); } $obj = self::getUserObject($user); if (!$obj) { throw new MWException("Invalid username {$user}"); } $result = call_user_func(array($obj, $method)); break; case 'user-age': $user = $parameters['user']; $asOf = $parameters['asof']; $obj = self::getUserObject($user); if ($obj->getId() == 0) { $result = 0; break; } $registration = $obj->getRegistration(); $result = wfTimestamp(TS_UNIX, $asOf) - wfTimestampOrNull(TS_UNIX, $registration); break; case 'user-groups': // Deprecated but needed by old log entries $user = $parameters['user']; $obj = self::getUserObject($user); $result = $obj->getEffectiveGroups(); break; case 'length': $s = $vars->getVar($parameters['length-var'])->toString(); $result = strlen($s); break; case 'subtract': $v1 = $vars->getVar($parameters['val1-var'])->toFloat(); $v2 = $vars->getVar($parameters['val2-var'])->toFloat(); $result = $v1 - $v2; break; case 'revision-text-by-id': $rev = Revision::newFromId($parameters['revid']); $result = AbuseFilter::revisionToString($rev); break; case 'revision-text-by-timestamp': $timestamp = $parameters['timestamp']; $title = Title::makeTitle($parameters['namespace'], $parameters['title']); $dbr = wfGetDB(DB_SLAVE); $rev = Revision::loadFromTimestamp($dbr, $title, $timestamp); $result = AbuseFilter::revisionToString($rev); break; default: if (wfRunHooks('AbuseFilter-computeVariable', array($this->mMethod, $vars, $parameters, &$result))) { throw new AFPException('Unknown variable compute type ' . $this->mMethod); } } return $result instanceof AFPData ? $result : AFPData::newFromPHPVar($result); }
/** * Callback function for each revision, parse with both parsers and compare * @param $rev Revision */ public function processRevision($rev) { $title = $rev->getTitle(); $parser1Name = $this->getOption('parser1'); $parser2Name = $this->getOption('parser2'); self::checkParserLocally($parser1Name); self::checkParserLocally($parser2Name); $parser1 = new $parser1Name(); $parser2 = new $parser2Name(); $content = $rev->getContent(); if ($content->getModel() !== CONTENT_MODEL_WIKITEXT) { $this->error("Page {$title->getPrefixedText()} does not contain wikitext but {$content->getModel()}\n"); return; } $text = strval($content->getNativeData()); $output1 = $parser1->parse($text, $title, $this->options); $output2 = $parser2->parse($text, $title, $this->options); if ($output1->getText() != $output2->getText()) { $this->failed++; $this->error("Parsing for {$title->getPrefixedText()} differs\n"); if ($this->saveFailed) { file_put_contents($this->saveFailed . '/' . rawurlencode($title->getPrefixedText()) . ".txt", $text); } if ($this->showDiff) { $this->output(wfDiff($this->stripParameters($output1->getText()), $this->stripParameters($output2->getText()), '')); } } else { $this->output($title->getPrefixedText() . "\tOK\n"); if ($this->showParsedOutput) { $this->output($this->stripParameters($output1->getText())); } } }
/** * Callback function for each revision, parse with both parsers and compare * @param $rev Revision */ public function processRevision($rev) { $title = $rev->getTitle(); $parser1Name = $this->getOption('parser1'); $parser2Name = $this->getOption('parser2'); self::checkParserLocally($parser1Name); self::checkParserLocally($parser2Name); $parser1 = new $parser1Name(); $parser2 = new $parser2Name(); $output1 = $parser1->parse($rev->getText(), $title, $this->options); $output2 = $parser2->parse($rev->getText(), $title, $this->options); if ($output1->getText() != $output2->getText()) { $this->failed++; $this->error("Parsing for {$title->getPrefixedText()} differs\n"); if ($this->saveFailed) { file_put_contents($this->saveFailed . '/' . rawurlencode($title->getPrefixedText()) . ".txt", $rev->getText()); } if ($this->showDiff) { $this->output(wfDiff($this->stripParameters($output1->getText()), $this->stripParameters($output2->getText()), '')); } } else { $this->output($title->getPrefixedText() . "\tOK\n"); if ($this->showParsedOutput) { $this->output($this->stripParameters($output1->getText())); } } }
function compute($vars) { $parameters = $this->mParameters; $result = null; switch ($this->mMethod) { case 'diff': $text1Var = $parameters['oldtext-var']; $text2Var = $parameters['newtext-var']; $text1 = $vars->getVar($text1Var)->toString(); $text2 = $vars->getVar($text2Var)->toString(); $result = wfDiff($text1, $text2); $result = trim(preg_replace("/^\\\\ No newline at end of file\n/m", '', $result)); break; case 'diff-split': $diff = $vars->getVar($parameters['diff-var'])->toString(); $line_prefix = $parameters['line-prefix']; $diff_lines = explode("\n", $diff); $interest_lines = array(); foreach ($diff_lines as $line) { if (substr($line, 0, 1) === $line_prefix) { $interest_lines[] = substr($line, strlen($line_prefix)); } } $result = $interest_lines; break; case 'links-from-wikitext': // This should ONLY be used when sharing a parse operation with the edit. $article = $parameters['article']; if ($article) { $textVar = $parameters['text-var']; $new_text = $vars->getVar($textVar)->toString(); $editInfo = $article->prepareTextForEdit($new_text); $links = array_keys($editInfo->output->getExternalLinks()); $result = $links; break; } // Otherwise fall back to database // Otherwise fall back to database case 'links-from-wikitext-nonedit': case 'links-from-wikitext-or-database': $article = self::articleFromTitle($parameters['namespace'], $parameters['title']); if ($vars->getVar('context')->toString() == 'filter') { $links = $this->getLinksFromDB($article); wfDebug("AbuseFilter: loading old links from DB\n"); } else { wfDebug("AbuseFilter: loading old links from Parser\n"); $textVar = $parameters['text-var']; $wikitext = $vars->getVar($textVar)->toString(); $editInfo = $this->parseNonEditWikitext($wikitext, $article); $links = array_keys($editInfo->output->getExternalLinks()); } $result = $links; break; case 'link-diff-added': case 'link-diff-removed': $oldLinkVar = $parameters['oldlink-var']; $newLinkVar = $parameters['newlink-var']; $oldLinks = $vars->getVar($oldLinkVar)->toString(); $newLinks = $vars->getVar($newLinkVar)->toString(); $oldLinks = explode("\n", $oldLinks); $newLinks = explode("\n", $newLinks); if ($this->mMethod == 'link-diff-added') { $result = array_diff($newLinks, $oldLinks); } if ($this->mMethod == 'link-diff-removed') { $result = array_diff($oldLinks, $newLinks); } break; case 'parse-wikitext': // Should ONLY be used when sharing a parse operation with the edit. $article = $parameters['article']; if ($article) { $textVar = $parameters['wikitext-var']; $new_text = $vars->getVar($textVar)->toString(); $editInfo = $article->prepareTextForEdit($new_text); $newHTML = $editInfo->output->getText(); // Kill the PP limit comments. Ideally we'd just remove these by not setting the // parser option, but then we can't share a parse operation with the edit, which is bad. $result = preg_replace('/<!--\\s*NewPP limit report[^>]*-->\\s*$/si', '', $newHTML); break; } // Otherwise fall back to database // Otherwise fall back to database case 'parse-wikitext-nonedit': $article = self::articleFromTitle($parameters['namespace'], $parameters['title']); $textVar = $parameters['wikitext-var']; $text = $vars->getVar($textVar)->toString(); $editInfo = $this->parseNonEditWikitext($text, $article); $result = $editInfo->output->getText(); break; case 'strip-html': $htmlVar = $parameters['html-var']; $html = $vars->getVar($htmlVar)->toString(); $result = StringUtils::delimiterReplace('<', '>', '', $html); break; case 'load-recent-authors': $cutOff = $parameters['cutoff']; $title = Title::makeTitle($parameters['namespace'], $parameters['title']); if (!$title->exists()) { $result = ''; break; } $dbr = wfGetDB(DB_SLAVE); $res = $dbr->select('revision', 'DISTINCT rev_user_text', array('rev_page' => $title->getArticleId(), 'rev_timestamp<' . $dbr->addQuotes($dbr->timestamp($cutOff))), __METHOD__, array('ORDER BY' => 'rev_timestamp DESC', 'LIMIT' => 10)); $users = array(); foreach ($res as $row) { $users[] = $row->rev_user_text; } $result = $users; break; case 'get-page-restrictions': $action = $parameters['action']; $title = Title::makeTitle($parameters['namespace'], $parameters['title']); $rights = $title->getRestrictions($action); $rights = count($rights) ? $rights : array(); $result = $rights; break; case 'simple-user-accessor': $user = $parameters['user']; $method = $parameters['method']; if (!$user) { throw new MWException('No user parameter given.'); } $obj = self::userObjectFromName($user); if (!$obj) { throw new MWException("Invalid username {$user}"); } $result = call_user_func(array($obj, $method)); break; case 'user-age': $user = $parameters['user']; $asOf = $parameters['asof']; $obj = self::userObjectFromName($user); if ($obj->getId() == 0) { $result = 0; break; } $registration = $obj->getRegistration(); $result = wfTimestamp(TS_UNIX, $asOf) - wfTimestampOrNull(TS_UNIX, $registration); break; case 'user-groups': $user = $parameters['user']; $obj = self::userObjectFromName($user); $result = $obj->getEffectiveGroups(); break; case 'length': $s = $vars->getVar($parameters['length-var'])->toString(); $result = strlen($s); break; case 'subtract': $v1 = $vars->getVar($parameters['val1-var'])->toFloat(); $v2 = $vars->getVar($parameters['val2-var'])->toFloat(); $result = $v1 - $v2; break; case 'revision-text-by-id': $rev = Revision::newFromId($parameters['revid']); $result = $rev->getText(); break; case 'revision-text-by-timestamp': $timestamp = $parameters['timestamp']; $title = Title::makeTitle($parameters['namespace'], $parameters['title']); $dbr = wfGetDB(DB_SLAVE); $rev = Revision::loadFromTimestamp($dbr, $title, $timestamp); if ($rev) { $result = $rev->getText(); } else { $result = ''; } break; default: if (wfRunHooks('AbuseFilter-computeVariable', array($this->mMethod, $vars))) { throw new AFPException('Unknown variable compute type ' . $this->mMethod); } } return $result instanceof AFPData ? $result : AFPData::newFromPHPVar($result); }