function rs_wpss_anchortxt_blacklist_chk($haystack = NULL, $get_list_arr = FALSE, $haystack_type = 'author', $url = NULL) { /*** * Author Keyword Blacklist Check * Use for testing Comment Author, New User Registrations, and anywhere else you need to test an author name. * This list assembled based on statistical analysis of common anchor text spam keyphrases. * Script creates all the necessary alphanumeric and linguistic variations to effectively test. * $haystack_type can be 'author' (default) or 'content' ***/ global $wpss_cl_active; if (empty($wpss_cl_active)) { $wpss_cl_active = rs_wpss_is_plugin_active('commentluv/commentluv.php'); /* Check if active for compatibility with CommentLuv */ } global $spamshield_options; if (empty($spamshield_options)) { $spamshield_options = get_option('spamshield_options'); } if (!empty($spamshield_options['allow_comment_author_keywords'])) { $wpss_cak_active = 1; } else { $wpss_cak_active = 0; } /* Check if Comment Author Name Keywords are allowed - equivalent to CommentLuve being active */ $blacklisted_keyphrases = rs_wpss_rbkmd(rs_wpss_get_anchortxt_blacklist(), 'de', TRUE); $blacklisted_keyphrases_lite = rs_wpss_rbkmd(rs_wpss_get_anchortxt_blacklist_lite(), 'de', TRUE); if ($haystack_type === 'author' && (!empty($wpss_cl_active) || !empty($wpss_cak_active) || empty($url))) { $blacklisted_keyphrases = $blacklisted_keyphrases_lite; } if (!empty($get_list_arr)) { if ($haystack_type === 'content') { return $blacklisted_keyphrases_lite; } else { return $blacklisted_keyphrases; } } /* Goes after array */ $blacklist_status = FALSE; if (empty($haystack)) { return FALSE; } if ($haystack_type === 'author') { /* Check 1: Testing for URLs and author domain in author name */ if (preg_match("~^https?~i", $haystack)) { return TRUE; } if (!empty($url)) { $author_email_domain = rs_wpss_get_domain($url, TRUE); $author_email_domain_rgx = rs_wpss_get_regex_phrase($author_email_domain, '', 'N'); if (preg_match($author_email_domain_rgx, $haystack)) { return TRUE; } } /* Check 2: Testing for max # words in author name, more than 7 is fail */ $author_words = rs_wpss_count_words($haystack); $word_max = 7; /* Default */ if (!empty($wpss_cl_active) || !empty($wpss_cak_active)) { $word_max = 10; } /* CL or CAK active */ if ($author_words > $word_max) { return TRUE; } /* Check 3: Testing for Odd Characters in author name */ $odd_char_regex = "~[\\@\\*]+~"; /* Default */ if (!empty($wpss_cl_active) || !empty($wpss_cak_active)) { $odd_char_regex = "~(\\@{2,}|\\*)+~"; } /* CL or CAK active */ if (preg_match($odd_char_regex, $haystack)) { return TRUE; } /*** * Check 4: Testing for *author name* surrounded by asterisks * Check 5: Testing for numbers and cash references ('1000','$5000', etc) in author name ***/ if (empty($wpss_cl_active) && empty($wpss_cak_active) && preg_match("~(^|[\\s\\.])(\$([0-9]+)([0-9,\\.]+)?|([0-9]+)([0-9,\\.]{3,})|([0-9]{3,}))(\$|[\\s])~", $haystack)) { return TRUE; } /* Final Check: The Blacklist */ foreach ($blacklisted_keyphrases as $i => $blacklisted_keyphrase) { $blacklisted_keyphrase_rgx = rs_wpss_regexify($blacklisted_keyphrase); $regex_check_phrase = rs_wpss_get_regex_phrase($blacklisted_keyphrase_rgx, '', 'authorkw'); if (preg_match($regex_check_phrase, $haystack)) { return TRUE; } } } elseif ($haystack_type === 'content') { /*** * Parse content for links with Anchor Text * Test 1: Coming Soon * For possible use later - from old filter: ((payday|students?|title|onli?ne|short([\s\.\-_]*)term)([\s\.\-_]*)loan|cash([\s\.\-_]*)advance) * Final Check: The Blacklist ***/ $anchor_text_phrases = rs_wpss_parse_links($haystack, 'anchor_text'); foreach ($anchor_text_phrases as $a => $anchor_text_phrase) { foreach ($blacklisted_keyphrases_lite as $i => $blacklisted_keyphrase) { $blacklisted_keyphrase_rgx = rs_wpss_regexify($blacklisted_keyphrase); $regex_check_phrase = rs_wpss_get_regex_phrase($blacklisted_keyphrase_rgx, '', 'authorkw'); if (preg_match($regex_check_phrase, $anchor_text_phrase)) { return TRUE; } } } } return $blacklist_status; }
function rs_wpss_magic_parser($keyphrase_needles = array(), $haystack = NULL) { /** * The Magic Parser * Magically parse a large string of text for a number of keyphrases * The "magic" is that this will check for all kinds of text variations, accents, plurals, 1337 (LEET), etc. * Extremely accurate...same mechanism used in the rs_wpss_anchortxt_blacklist_chk() function * @since 1.9.7.8 * @param array $keyphrase_needles The array containing keyphrases to search haystack for * @param string $haystack The string of text to search. This works well for large chunks of text such as contact form submissions. * @return bool TRUE if haystack contains any of the keyphrase needles, FALSE if it does not */ if (empty($keyphrase_needles) || empty($haystack) || !is_array($keyphrase_needles) || !is_string($haystack)) { return FALSE; } foreach ($keyphrase_needles as $i => $keyphrase_needle) { $keyphrase_needle_rgx = rs_wpss_regexify($keyphrase_needle); $regex_check_phrase = rs_wpss_get_regex_phrase($keyphrase_needle_rgx, '', 'authorkw'); if (preg_match($regex_check_phrase, $haystack)) { return TRUE; } } return FALSE; }