Esempio n. 1
0
function rs_wpss_anchortxt_blacklist_chk($haystack = NULL, $get_list_arr = FALSE, $haystack_type = 'author', $url = NULL)
{
    /***
     * Author Keyword Blacklist Check
     * Use for testing Comment Author, New User Registrations, and anywhere else you need to test an author name.
     * This list assembled based on statistical analysis of common anchor text spam keyphrases.
     * Script creates all the necessary alphanumeric and linguistic variations to effectively test.
     * $haystack_type can be 'author' (default) or 'content'
     ***/
    global $wpss_cl_active;
    if (empty($wpss_cl_active)) {
        $wpss_cl_active = rs_wpss_is_plugin_active('commentluv/commentluv.php');
        /* Check if active for compatibility with CommentLuv */
    }
    global $spamshield_options;
    if (empty($spamshield_options)) {
        $spamshield_options = get_option('spamshield_options');
    }
    if (!empty($spamshield_options['allow_comment_author_keywords'])) {
        $wpss_cak_active = 1;
    } else {
        $wpss_cak_active = 0;
    }
    /* Check if Comment Author Name Keywords are allowed - equivalent to CommentLuve being active */
    $blacklisted_keyphrases = rs_wpss_rbkmd(rs_wpss_get_anchortxt_blacklist(), 'de', TRUE);
    $blacklisted_keyphrases_lite = rs_wpss_rbkmd(rs_wpss_get_anchortxt_blacklist_lite(), 'de', TRUE);
    if ($haystack_type === 'author' && (!empty($wpss_cl_active) || !empty($wpss_cak_active) || empty($url))) {
        $blacklisted_keyphrases = $blacklisted_keyphrases_lite;
    }
    if (!empty($get_list_arr)) {
        if ($haystack_type === 'content') {
            return $blacklisted_keyphrases_lite;
        } else {
            return $blacklisted_keyphrases;
        }
    }
    /* Goes after array */
    $blacklist_status = FALSE;
    if (empty($haystack)) {
        return FALSE;
    }
    if ($haystack_type === 'author') {
        /* Check 1: Testing for URLs and author domain in author name */
        if (preg_match("~^https?~i", $haystack)) {
            return TRUE;
        }
        if (!empty($url)) {
            $author_email_domain = rs_wpss_get_domain($url, TRUE);
            $author_email_domain_rgx = rs_wpss_get_regex_phrase($author_email_domain, '', 'N');
            if (preg_match($author_email_domain_rgx, $haystack)) {
                return TRUE;
            }
        }
        /* Check 2: Testing for max # words in author name, more than 7 is fail */
        $author_words = rs_wpss_count_words($haystack);
        $word_max = 7;
        /* Default */
        if (!empty($wpss_cl_active) || !empty($wpss_cak_active)) {
            $word_max = 10;
        }
        /* CL or CAK active */
        if ($author_words > $word_max) {
            return TRUE;
        }
        /* Check 3: Testing for Odd Characters in author name */
        $odd_char_regex = "~[\\@\\*]+~";
        /* Default */
        if (!empty($wpss_cl_active) || !empty($wpss_cak_active)) {
            $odd_char_regex = "~(\\@{2,}|\\*)+~";
        }
        /* CL or CAK active */
        if (preg_match($odd_char_regex, $haystack)) {
            return TRUE;
        }
        /***
         * Check 4: Testing for *author name* surrounded by asterisks
         * Check 5: Testing for numbers and cash references ('1000','$5000', etc) in author name 
         ***/
        if (empty($wpss_cl_active) && empty($wpss_cak_active) && preg_match("~(^|[\\s\\.])(\$([0-9]+)([0-9,\\.]+)?|([0-9]+)([0-9,\\.]{3,})|([0-9]{3,}))(\$|[\\s])~", $haystack)) {
            return TRUE;
        }
        /* Final Check: The Blacklist */
        foreach ($blacklisted_keyphrases as $i => $blacklisted_keyphrase) {
            $blacklisted_keyphrase_rgx = rs_wpss_regexify($blacklisted_keyphrase);
            $regex_check_phrase = rs_wpss_get_regex_phrase($blacklisted_keyphrase_rgx, '', 'authorkw');
            if (preg_match($regex_check_phrase, $haystack)) {
                return TRUE;
            }
        }
    } elseif ($haystack_type === 'content') {
        /***
         * Parse content for links with Anchor Text
         * Test 1: Coming Soon
         * For possible use later - from old filter: ((payday|students?|title|onli?ne|short([\s\.\-_]*)term)([\s\.\-_]*)loan|cash([\s\.\-_]*)advance)
         * Final Check: The Blacklist
         ***/
        $anchor_text_phrases = rs_wpss_parse_links($haystack, 'anchor_text');
        foreach ($anchor_text_phrases as $a => $anchor_text_phrase) {
            foreach ($blacklisted_keyphrases_lite as $i => $blacklisted_keyphrase) {
                $blacklisted_keyphrase_rgx = rs_wpss_regexify($blacklisted_keyphrase);
                $regex_check_phrase = rs_wpss_get_regex_phrase($blacklisted_keyphrase_rgx, '', 'authorkw');
                if (preg_match($regex_check_phrase, $anchor_text_phrase)) {
                    return TRUE;
                }
            }
        }
    }
    return $blacklist_status;
}
function rs_wpss_magic_parser($keyphrase_needles = array(), $haystack = NULL)
{
    /**
     * The Magic Parser
     * Magically parse a large string of text for a number of keyphrases
     * The "magic" is that this will check for all kinds of text variations, accents, plurals, 1337 (LEET), etc.
     * Extremely accurate...same mechanism used in the rs_wpss_anchortxt_blacklist_chk() function
     * @since 1.9.7.8
     * @param array $keyphrase_needles	The array containing keyphrases to search haystack for
     * @param string $haystack			The string of text to search. This works well for large chunks of text such as contact form submissions.
     * @return bool TRUE if haystack contains any of the keyphrase needles, FALSE if it does not
     */
    if (empty($keyphrase_needles) || empty($haystack) || !is_array($keyphrase_needles) || !is_string($haystack)) {
        return FALSE;
    }
    foreach ($keyphrase_needles as $i => $keyphrase_needle) {
        $keyphrase_needle_rgx = rs_wpss_regexify($keyphrase_needle);
        $regex_check_phrase = rs_wpss_get_regex_phrase($keyphrase_needle_rgx, '', 'authorkw');
        if (preg_match($regex_check_phrase, $haystack)) {
            return TRUE;
        }
    }
    return FALSE;
}