/**
  * Build a set of regular expressions matching URLs with the list of regex fragments.
  * Returns an empty list if the input list is empty.
  *
  * @param array $lines list of fragments which will match in URLs
  * @param BaseBlacklist $blacklist
  * @param int $batchSize largest allowed batch regex;
  *                       if 0, will produce one regex per line
  * @return array
  */
 static function buildRegexes($lines, BaseBlacklist $blacklist, $batchSize = 4096)
 {
     # Make regex
     # It's faster using the S modifier even though it will usually only be run once
     //$regex = 'https?://+[a-z0-9_\-.]*(' . implode( '|', $lines ) . ')';
     //return '/' . str_replace( '/', '\/', preg_replace('|\\\*/|', '/', $regex) ) . '/Sim';
     $regexes = array();
     $regexStart = $blacklist->getRegexStart();
     $regexEnd = $blacklist->getRegexEnd($batchSize);
     $build = false;
     foreach ($lines as $line) {
         if (substr($line, -1, 1) == "\\") {
             // Final \ will break silently on the batched regexes.
             // Skip it here to avoid breaking the next line;
             // warnings from getBadLines() will still trigger on
             // edit to keep new ones from floating in.
             continue;
         }
         // FIXME: not very robust size check, but should work. :)
         if ($build === false) {
             $build = $line;
         } elseif (strlen($build) + strlen($line) > $batchSize) {
             $regexes[] = $regexStart . str_replace('/', '\\/', preg_replace('|\\\\*/|u', '/', $build)) . $regexEnd;
             $build = $line;
         } else {
             $build .= '|';
             $build .= $line;
         }
     }
     if ($build !== false) {
         $regexes[] = $regexStart . str_replace('/', '\\/', preg_replace('|\\\\*/|u', '/', $build)) . $regexEnd;
     }
     return $regexes;
 }
 public function execute()
 {
     $params = $this->extractRequestParams();
     $matches = BaseBlacklist::getInstance('spam')->filter($params['url'], NULL, true);
     $res = $this->getResult();
     if ($matches !== false) {
         // this url is blacklisted.
         $res->addValue('spamblacklist', 'result', 'blacklisted');
         $res->setIndexedTagName($matches, 'match');
         $res->addValue('spamblacklist', 'matches', $matches);
     } else {
         // not blacklisted
         $res->addValue('spamblacklist', 'result', 'ok');
     }
 }
Example #3
0
 /**
  * Returns the end of the regex for matches
  *
  * @param $batchSize
  * @return string
  */
 public function getRegexEnd($batchSize)
 {
     return ')' . parent::getRegexEnd($batchSize);
 }
	/**
	 * Hook function for ArticleSaveComplete
	 * Clear local spam blacklist caches on page save.
	 *
	 * @param $article Article
	 * @param $user User
	 * @param $text string
	 * @param $summary string
	 * @param $isminor
	 * @param $iswatch
	 * @param $section
	 * @return bool
	 */
	static function articleSave( &$article, &$user, $text, $summary, $isminor, $iswatch, $section ) {
		if( !BaseBlacklist::isLocalSource( $article->getTitle() ) ) {
			return false;
		}
		global $wgMemc, $wgDBname;

		// This sucks because every Blacklist needs to be cleared
		foreach ( BaseBlacklist::getBlacklistTypes() as $type => $class ) {
			$wgMemc->delete( "$wgDBname:{$type}_blacklist_regexes" );
		}
	}
Example #5
0
 /**
  * Hook function for PageContentSaveComplete
  * Clear local spam blacklist caches on page save.
  *
  * @param Page $wikiPage
  * @param User     $user
  * @param Content  $content
  * @param string   $summary
  * @param bool     $isMinor
  * @param bool     $isWatch
  * @param string   $section
  * @param int      $flags
  * @param int      $revision
  * @param Status   $status
  * @param int      $baseRevId
  *
  * @return bool
  */
 static function pageSaveContent(Page $wikiPage, User $user, Content $content, $summary, $isMinor, $isWatch, $section, $flags, $revision, Status $status, $baseRevId)
 {
     if (!BaseBlacklist::isLocalSource($wikiPage->getTitle())) {
         return true;
     }
     global $wgMemc, $wgDBname;
     // This sucks because every Blacklist needs to be cleared
     foreach (BaseBlacklist::getBlacklistTypes() as $type => $class) {
         $wgMemc->delete("{$wgDBname}:{$type}_blacklist_regexes");
     }
     return true;
 }
 /**
  * Hook function for PageContentSaveComplete
  * Clear local spam blacklist caches on page save.
  *
  * @param Page $wikiPage
  * @param User     $user
  * @param Content  $content
  * @param string   $summary
  * @param bool     $isMinor
  * @param bool     $isWatch
  * @param string   $section
  * @param int      $flags
  * @param int      $revision
  * @param Status   $status
  * @param int      $baseRevId
  *
  * @return bool
  */
 static function pageSaveContent(Page $wikiPage, User $user, Content $content, $summary, $isMinor, $isWatch, $section, $flags, $revision, Status $status, $baseRevId)
 {
     if (!BaseBlacklist::isLocalSource($wikiPage->getTitle())) {
         return true;
     }
     // This sucks because every Blacklist needs to be cleared
     foreach (BaseBlacklist::getBlacklistTypes() as $type => $class) {
         $blacklist = BaseBlacklist::getInstance($type);
         $blacklist->clearCache();
     }
     return true;
 }
 /**
  * Check for abusive or spammy content
  *
  * Check the following in sequence (cheapest processing to most expensive,
  * returning if we get a hit):
  *  1) Respect $wgSpamRegex
  *  2) Check SpamBlacklist
  *  3) Check AbuseFilter
  *
  * @param $value  string the text to check
  * @param $pageId int    the page ID
  */
 private function findAbuse(&$value, $pageId)
 {
     // Respect $wgSpamRegex
     global $wgSpamRegex;
     if (is_array($wgSpamRegex) && count($wgSpamRegex) > 0 || is_string($wgSpamRegex) && strlen($wgSpamRegex) > 0) {
         // In older versions, $wgSpamRegex may be a single string rather than
         // an array of regexes, so make it compatible.
         $regexes = (array) $wgSpamRegex;
         foreach ($regexes as $regex) {
             if (preg_match($regex, $value)) {
                 return true;
             }
         }
     }
     // Create a fake title so we can pretend this is an article edit
     $title = Title::newFromText('__article_feedback_5__');
     // Check SpamBlacklist, if installed
     if (function_exists('wfSpamBlacklistObject')) {
         $spam = wfSpamBlacklistObject();
     } elseif (class_exists('BaseBlacklist')) {
         $spam = BaseBlacklist::getInstance('spam');
     }
     if ($spam) {
         $ret = $spam->filter($title, $value, '');
         if ($ret !== false) {
             return true;
         }
     }
     // Check AbuseFilter, if installed
     if (class_exists('AbuseFilter')) {
         global $wgUser;
         $vars = new AbuseFilterVariableHolder();
         $vars->addHolder(AbuseFilter::generateUserVars($wgUser));
         $vars->addHolder(AbuseFilter::generateTitleVars($title, 'FEEDBACK'));
         $vars->setVar('SUMMARY', 'Article Feedback 5');
         $vars->setVar('ACTION', 'feedback');
         $vars->setVar('old_wikitext', '');
         $vars->setVar('new_wikitext', $value);
         $vars->addHolder(AbuseFilter::getEditVars($title));
         $filter_result = AbuseFilter::filterAction($vars, $title);
         return $filter_result != '' && $filter_result !== true;
     }
     return false;
 }