/** * Spamcheck against wordlist * * Checks the wikitext against a list of blocked expressions * returns true if the text contains any bad words * * Triggers COMMON_WORDBLOCK_BLOCKED * * Action Plugins can use this event to inspect the blocked data * and gain information about the user who was blocked. * * Event data: * data['matches'] - array of matches * data['userinfo'] - information about the blocked user * [ip] - ip address * [user] - username (if logged in) * [mail] - mail address (if logged in) * [name] - real name (if logged in) * * @author Andreas Gohr <*****@*****.**> * @author Michael Klier <*****@*****.**> * @param string $text - optional text to check, if not given the globals are used * @return bool - true if a spam word was found */ function checkwordblock($text = '') { global $TEXT; global $PRE; global $SUF; global $conf; global $INFO; if (!$conf['usewordblock']) { return false; } if (!$text) { $text = "{$PRE} {$TEXT} {$SUF}"; } // we prepare the text a tiny bit to prevent spammers circumventing URL checks $text = preg_replace('!(\\b)(www\\.[\\w.:?\\-;,]+?\\.[\\w.:?\\-;,]+?[\\w/\\#~:.?+=&%@\\!\\-.:?\\-;,]+?)([.:?\\-;,]*[^\\w/\\#~:.?+=&%@\\!\\-.:?\\-;,])!i', '\\1http://\\2 \\2\\3', $text); $wordblocks = getWordblocks(); // how many lines to read at once (to work around some PCRE limits) if (version_compare(phpversion(), '4.3.0', '<')) { // old versions of PCRE define a maximum of parenthesises even if no // backreferences are used - the maximum is 99 // this is very bad performancewise and may even be too high still $chunksize = 40; } else { // read file in chunks of 200 - this should work around the // MAX_PATTERN_SIZE in modern PCRE $chunksize = 200; } while ($blocks = array_splice($wordblocks, 0, $chunksize)) { $re = array(); // build regexp from blocks foreach ($blocks as $block) { $block = preg_replace('/#.*$/', '', $block); $block = trim($block); if (empty($block)) { continue; } $re[] = $block; } if (count($re) && preg_match('#(' . join('|', $re) . ')#si', $text, $matches)) { // prepare event data $data['matches'] = $matches; $data['userinfo']['ip'] = $_SERVER['REMOTE_ADDR']; if ($_SERVER['REMOTE_USER']) { $data['userinfo']['user'] = $_SERVER['REMOTE_USER']; $data['userinfo']['name'] = $INFO['userinfo']['name']; $data['userinfo']['mail'] = $INFO['userinfo']['mail']; } $callback = create_function('', 'return true;'); return trigger_event('COMMON_WORDBLOCK_BLOCKED', $data, $callback, true); } } return false; }
/** * Spamcheck against wordlist * * Checks the wikitext against a list of blocked expressions * returns true if the text contains any bad words * * @author Andreas Gohr <*****@*****.**> */ function checkwordblock() { global $TEXT; global $conf; if (!$conf['usewordblock']) { return false; } // we prepare the text a tiny bit to prevent spammers circumventing URL checks $text = preg_replace('!(\\b)(www\\.[\\w.:?\\-;,]+?\\.[\\w.:?\\-;,]+?[\\w/\\#~:.?+=&%@\\!\\-.:?\\-;,]+?)([.:?\\-;,]*[^\\w/\\#~:.?+=&%@\\!\\-.:?\\-;,])!i', '\\1http://\\2 \\2\\3', $TEXT); $wordblocks = getWordblocks(); //how many lines to read at once (to work around some PCRE limits) if (version_compare(phpversion(), '4.3.0', '<')) { //old versions of PCRE define a maximum of parenthesises even if no //backreferences are used - the maximum is 99 //this is very bad performancewise and may even be too high still $chunksize = 40; } else { //read file in chunks of 200 - this should work around the //MAX_PATTERN_SIZE in modern PCRE $chunksize = 200; } while ($blocks = array_splice($wordblocks, 0, $chunksize)) { $re = array(); #build regexp from blocks foreach ($blocks as $block) { $block = preg_replace('/#.*$/', '', $block); $block = trim($block); if (empty($block)) { continue; } $re[] = $block; } if (preg_match('#(' . join('|', $re) . ')#si', $text, $match = array())) { return true; } } return false; }