/** * Build a set of regular expressions matching URLs with the list of regex fragments. * Returns an empty list if the input list is empty. * * @param array $lines list of fragments which will match in URLs * @param BaseBlacklist $blacklist * @param int $batchSize largest allowed batch regex; * if 0, will produce one regex per line * @return array */ static function buildRegexes($lines, BaseBlacklist $blacklist, $batchSize = 4096) { # Make regex # It's faster using the S modifier even though it will usually only be run once //$regex = 'https?://+[a-z0-9_\-.]*(' . implode( '|', $lines ) . ')'; //return '/' . str_replace( '/', '\/', preg_replace('|\\\*/|', '/', $regex) ) . '/Sim'; $regexes = array(); $regexStart = $blacklist->getRegexStart(); $regexEnd = $blacklist->getRegexEnd($batchSize); $build = false; foreach ($lines as $line) { if (substr($line, -1, 1) == "\\") { // Final \ will break silently on the batched regexes. // Skip it here to avoid breaking the next line; // warnings from getBadLines() will still trigger on // edit to keep new ones from floating in. continue; } // FIXME: not very robust size check, but should work. :) if ($build === false) { $build = $line; } elseif (strlen($build) + strlen($line) > $batchSize) { $regexes[] = $regexStart . str_replace('/', '\\/', preg_replace('|\\\\*/|u', '/', $build)) . $regexEnd; $build = $line; } else { $build .= '|'; $build .= $line; } } if ($build !== false) { $regexes[] = $regexStart . str_replace('/', '\\/', preg_replace('|\\\\*/|u', '/', $build)) . $regexEnd; } return $regexes; }
public function execute() { $params = $this->extractRequestParams(); $matches = BaseBlacklist::getInstance('spam')->filter($params['url'], NULL, true); $res = $this->getResult(); if ($matches !== false) { // this url is blacklisted. $res->addValue('spamblacklist', 'result', 'blacklisted'); $res->setIndexedTagName($matches, 'match'); $res->addValue('spamblacklist', 'matches', $matches); } else { // not blacklisted $res->addValue('spamblacklist', 'result', 'ok'); } }
/** * Returns the end of the regex for matches * * @param $batchSize * @return string */ public function getRegexEnd($batchSize) { return ')' . parent::getRegexEnd($batchSize); }
/** * Hook function for ArticleSaveComplete * Clear local spam blacklist caches on page save. * * @param $article Article * @param $user User * @param $text string * @param $summary string * @param $isminor * @param $iswatch * @param $section * @return bool */ static function articleSave( &$article, &$user, $text, $summary, $isminor, $iswatch, $section ) { if( !BaseBlacklist::isLocalSource( $article->getTitle() ) ) { return false; } global $wgMemc, $wgDBname; // This sucks because every Blacklist needs to be cleared foreach ( BaseBlacklist::getBlacklistTypes() as $type => $class ) { $wgMemc->delete( "$wgDBname:{$type}_blacklist_regexes" ); } }
/** * Hook function for PageContentSaveComplete * Clear local spam blacklist caches on page save. * * @param Page $wikiPage * @param User $user * @param Content $content * @param string $summary * @param bool $isMinor * @param bool $isWatch * @param string $section * @param int $flags * @param int $revision * @param Status $status * @param int $baseRevId * * @return bool */ static function pageSaveContent(Page $wikiPage, User $user, Content $content, $summary, $isMinor, $isWatch, $section, $flags, $revision, Status $status, $baseRevId) { if (!BaseBlacklist::isLocalSource($wikiPage->getTitle())) { return true; } global $wgMemc, $wgDBname; // This sucks because every Blacklist needs to be cleared foreach (BaseBlacklist::getBlacklistTypes() as $type => $class) { $wgMemc->delete("{$wgDBname}:{$type}_blacklist_regexes"); } return true; }
/** * Hook function for PageContentSaveComplete * Clear local spam blacklist caches on page save. * * @param Page $wikiPage * @param User $user * @param Content $content * @param string $summary * @param bool $isMinor * @param bool $isWatch * @param string $section * @param int $flags * @param int $revision * @param Status $status * @param int $baseRevId * * @return bool */ static function pageSaveContent(Page $wikiPage, User $user, Content $content, $summary, $isMinor, $isWatch, $section, $flags, $revision, Status $status, $baseRevId) { if (!BaseBlacklist::isLocalSource($wikiPage->getTitle())) { return true; } // This sucks because every Blacklist needs to be cleared foreach (BaseBlacklist::getBlacklistTypes() as $type => $class) { $blacklist = BaseBlacklist::getInstance($type); $blacklist->clearCache(); } return true; }
/** * Check for abusive or spammy content * * Check the following in sequence (cheapest processing to most expensive, * returning if we get a hit): * 1) Respect $wgSpamRegex * 2) Check SpamBlacklist * 3) Check AbuseFilter * * @param $value string the text to check * @param $pageId int the page ID */ private function findAbuse(&$value, $pageId) { // Respect $wgSpamRegex global $wgSpamRegex; if (is_array($wgSpamRegex) && count($wgSpamRegex) > 0 || is_string($wgSpamRegex) && strlen($wgSpamRegex) > 0) { // In older versions, $wgSpamRegex may be a single string rather than // an array of regexes, so make it compatible. $regexes = (array) $wgSpamRegex; foreach ($regexes as $regex) { if (preg_match($regex, $value)) { return true; } } } // Create a fake title so we can pretend this is an article edit $title = Title::newFromText('__article_feedback_5__'); // Check SpamBlacklist, if installed if (function_exists('wfSpamBlacklistObject')) { $spam = wfSpamBlacklistObject(); } elseif (class_exists('BaseBlacklist')) { $spam = BaseBlacklist::getInstance('spam'); } if ($spam) { $ret = $spam->filter($title, $value, ''); if ($ret !== false) { return true; } } // Check AbuseFilter, if installed if (class_exists('AbuseFilter')) { global $wgUser; $vars = new AbuseFilterVariableHolder(); $vars->addHolder(AbuseFilter::generateUserVars($wgUser)); $vars->addHolder(AbuseFilter::generateTitleVars($title, 'FEEDBACK')); $vars->setVar('SUMMARY', 'Article Feedback 5'); $vars->setVar('ACTION', 'feedback'); $vars->setVar('old_wikitext', ''); $vars->setVar('new_wikitext', $value); $vars->addHolder(AbuseFilter::getEditVars($title)); $filter_result = AbuseFilter::filterAction($vars, $title); return $filter_result != '' && $filter_result !== true; } return false; }