Beispiel #1
0
/**
 * Spamcheck against wordlist
 *
 * Checks the wikitext against a list of blocked expressions
 * returns true if the text contains any bad words
 *
 * Triggers COMMON_WORDBLOCK_BLOCKED
 *
 *  Action Plugins can use this event to inspect the blocked data
 *  and gain information about the user who was blocked.
 *
 *  Event data:
 *    data['matches']  - array of matches
 *    data['userinfo'] - information about the blocked user
 *      [ip]           - ip address
 *      [user]         - username (if logged in)
 *      [mail]         - mail address (if logged in)
 *      [name]         - real name (if logged in)
 *
 * @author Andreas Gohr <*****@*****.**>
 * @author Michael Klier <*****@*****.**>
 * @param  string $text - optional text to check, if not given the globals are used
 * @return bool         - true if a spam word was found
 */
function checkwordblock($text = '')
{
    global $TEXT;
    global $PRE;
    global $SUF;
    global $conf;
    global $INFO;
    if (!$conf['usewordblock']) {
        return false;
    }
    if (!$text) {
        $text = "{$PRE} {$TEXT} {$SUF}";
    }
    // we prepare the text a tiny bit to prevent spammers circumventing URL checks
    $text = preg_replace('!(\\b)(www\\.[\\w.:?\\-;,]+?\\.[\\w.:?\\-;,]+?[\\w/\\#~:.?+=&%@\\!\\-.:?\\-;,]+?)([.:?\\-;,]*[^\\w/\\#~:.?+=&%@\\!\\-.:?\\-;,])!i', '\\1http://\\2 \\2\\3', $text);
    $wordblocks = getWordblocks();
    // how many lines to read at once (to work around some PCRE limits)
    if (version_compare(phpversion(), '4.3.0', '<')) {
        // old versions of PCRE define a maximum of parenthesises even if no
        // backreferences are used - the maximum is 99
        // this is very bad performancewise and may even be too high still
        $chunksize = 40;
    } else {
        // read file in chunks of 200 - this should work around the
        // MAX_PATTERN_SIZE in modern PCRE
        $chunksize = 200;
    }
    while ($blocks = array_splice($wordblocks, 0, $chunksize)) {
        $re = array();
        // build regexp from blocks
        foreach ($blocks as $block) {
            $block = preg_replace('/#.*$/', '', $block);
            $block = trim($block);
            if (empty($block)) {
                continue;
            }
            $re[] = $block;
        }
        if (count($re) && preg_match('#(' . join('|', $re) . ')#si', $text, $matches)) {
            // prepare event data
            $data['matches'] = $matches;
            $data['userinfo']['ip'] = $_SERVER['REMOTE_ADDR'];
            if ($_SERVER['REMOTE_USER']) {
                $data['userinfo']['user'] = $_SERVER['REMOTE_USER'];
                $data['userinfo']['name'] = $INFO['userinfo']['name'];
                $data['userinfo']['mail'] = $INFO['userinfo']['mail'];
            }
            $callback = create_function('', 'return true;');
            return trigger_event('COMMON_WORDBLOCK_BLOCKED', $data, $callback, true);
        }
    }
    return false;
}
Beispiel #2
0
/**
 * Spamcheck against wordlist
 *
 * Checks the wikitext against a list of blocked expressions
 * returns true if the text contains any bad words
 *
 * @author Andreas Gohr <*****@*****.**>
 */
function checkwordblock()
{
    global $TEXT;
    global $conf;
    if (!$conf['usewordblock']) {
        return false;
    }
    // we prepare the text a tiny bit to prevent spammers circumventing URL checks
    $text = preg_replace('!(\\b)(www\\.[\\w.:?\\-;,]+?\\.[\\w.:?\\-;,]+?[\\w/\\#~:.?+=&%@\\!\\-.:?\\-;,]+?)([.:?\\-;,]*[^\\w/\\#~:.?+=&%@\\!\\-.:?\\-;,])!i', '\\1http://\\2 \\2\\3', $TEXT);
    $wordblocks = getWordblocks();
    //how many lines to read at once (to work around some PCRE limits)
    if (version_compare(phpversion(), '4.3.0', '<')) {
        //old versions of PCRE define a maximum of parenthesises even if no
        //backreferences are used - the maximum is 99
        //this is very bad performancewise and may even be too high still
        $chunksize = 40;
    } else {
        //read file in chunks of 200 - this should work around the
        //MAX_PATTERN_SIZE in modern PCRE
        $chunksize = 200;
    }
    while ($blocks = array_splice($wordblocks, 0, $chunksize)) {
        $re = array();
        #build regexp from blocks
        foreach ($blocks as $block) {
            $block = preg_replace('/#.*$/', '', $block);
            $block = trim($block);
            if (empty($block)) {
                continue;
            }
            $re[] = $block;
        }
        if (preg_match('#(' . join('|', $re) . ')#si', $text, $match = array())) {
            return true;
        }
    }
    return false;
}