function reportResult(ISblamPost $post, $score, $cert) { if (!$this->add) { return; } if ($score > 0.66 && $cert > 0.75) { $this->plonker->addIPs($post->getAuthorIPs(), $score); } else { if ($score < -0.6 && $cert > 0.7) { $this->plonker->removeIPs($post->getAuthorIPs()); } } }
function preTestPost(ISblamPost $p) { $this->addedhosts = array(); $this->ips = array(); // Check sender's IPs foreach ($p->getAuthorIPs() as $ip) { $this->checkIP($ip, "sender"); } // Check all IPs of all linked websites $links = $p->getLinks(); if ($links) { foreach ($links as $link) { $host = $link->getHostname(); $domain = $link->getDomain(); if ($host && $domain && $host !== $domain) { $this->checkHost($host, "link", 0.75); $this->checkHost($domain, "link", 0.75); } else { if ($host) { $this->checkHost($host, "link"); } if ($domain) { $this->checkHost($domain, "link"); } } } } $this->checkHostList(); }
function testPost(ISblamPost $p) { $isWhiteIP = false; $out = array(); foreach ($p->getAuthorIPs() as $ip) { if (array_search($ip, $this->whitelist) !== false) { $isWhiteIP = true; } else { $isWhiteIP = false; } } if ($isWhiteIP) { $out[] = array(-1.0, self::CERTAINITY_HIGH, "Sent from whitelisted IP"); } return $out; }
function testPost(ISblamPost $p) { // whitelist only direct connection (because other can be forged) and only when there aren't any objectionable hosts there $out = array(); $firstIP = true; $whitelisted = false; foreach ($p->getAuthorIPs() as $ip) { $rev = SblamURI::gethostbyaddr($ip); if (!$rev) { continue; } if (is_array($rev)) { warn($rev, 'gethostbyaddr returned array'); $rev = reset($rev); } // WTF? if (preg_match('!(?:\\.|^)(?:' . $this->isps . ')$!', $rev)) { $out[] = array(0.5, self::CERTAINITY_LOW, "Sent from blacklisted ISP ({$rev})"); } else { if ($firstIP && preg_match('!\\.(?:' . $this->whitelist . ')$!', $rev)) { $whitelisted = true; } else { if (preg_match('!\\.(?:' . $this->blacklist . ')$!', $rev)) { $out[] = array(0.35, self::CERTAINITY_LOW, "Sent from blacklisted TLD ({$rev})"); } } } $firstIP = false; } if (!count($out) && $whitelisted) { return array(-0.25, self::CERTAINITY_LOW, "Sent from whitelisted TLD ({$rev})"); } if (count($out)) { return $out; } }
function testPost(ISblamPost $p) { $h = $p->getHeaders(); if (!$h || count($h) < 2) { return NULL; } // HTTP_HOST is hardcoded! :/// $out = array(); if (!empty($h['HTTP_MOD_SECURITY_MESSAGE'])) { $out[] = array(1, self::CERTAINITY_HIGH, "mod_security warning"); } // Buggy .Net always adds header which is only needed for large forms (and browsers tend not to use it) if (!empty($h["HTTP_EXPECT"]) && false !== strpos($h['HTTP_EXPECT'], '100-') && strlen($p->getRawContent()) < 5000) { $out[] = array(0.3, self::CERTAINITY_NORMAL, "100-expect .Net header"); } // Bots tend to send these if (!empty($h["HTTP_PRAGMA"])) { $out[] = array(empty($h["HTTP_VIA"]) ? 0.3 : 0.1, self::CERTAINITY_LOW, "Pragma header"); } if (!empty($h["HTTP_RANGE"])) { $out[] = array(0.5, self::CERTAINITY_HIGH, "Range header"); } if (!empty($h["HTTP_PROXY_CONNECTION"])) { $out[] = array(0.2, self::CERTAINITY_LOW, "Proxy-Connection header"); } if (!empty($h["HTTP_REFERER"]) && ($cnt = substr_count($h["HTTP_REFERER"], "http://")) > 1) { $out[] = array(min(1.5, 0.5 + $cnt / 6), self::CERTAINITY_HIGH, "Multiple links in referrer"); } if (($cnt = count($p->getAuthorIPs())) > 4) { $out[] = array(($cnt - 2) / 10, $cnt > 7 ? self::CERTAINITY_HIGH : self::CERTAINITY_NORMAL, "Insane number of relays ({$cnt})"); } // Unpatched IE!? if (!empty($h["HTTP_USER_AGENT"]) && preg_match('/MSIE [456]\\.[0-9]; Windows (?:9|NT 5)/', $h['HTTP_USER_AGENT'])) { $out[] = array(0.3, self::CERTAINITY_NORMAL, "Unpatched IE"); } // Browsers almost always send these if (empty($h["HTTP_ACCEPT"])) { $out[] = array(0.7, self::CERTAINITY_NORMAL, "Missing Accept header"); } if (empty($h["HTTP_USER_AGENT"])) { $out[] = array(1, self::CERTAINITY_NORMAL, "Missing UA header"); } if (empty($h["HTTP_ACCEPT_LANGUAGE"])) { $out[] = array(0.5, self::CERTAINITY_NORMAL, "Missing Accept-Language header"); } if (empty($h["HTTP_ACCEPT_ENCODING"]) && empty($h["HTTP_VIA"]) && (empty($h["HTTP_USER_AGENT"]) || false === strpos($h["HTTP_USER_AGENT"], 'Mozilla/4.0 (compatible; MSIE '))) { $out[] = array(0.4, self::CERTAINITY_LOW, "Missing Accept-Encoding header"); } if (!empty($h["HTTP_ACCEPT_CHARSET"])) { $out[] = array(-0.2, self::CERTAINITY_LOW, "Has Accept-Charset header"); } // Non-transparent proxy must add Via header if (empty($h["HTTP_VIA"]) && (!empty($h['HTTP_X_FORWARDED_FOR']) || !empty($h['HTTP_MAX_FORWARDS']))) { $out[] = array(0.2, self::CERTAINITY_LOW, "Lame proxy"); } // TE: requires Connection:TE if (!empty($h["HTTP_TE"]) && (empty($h['HTTP_CONNECTION']) || !preg_match('!\\bTE\\b!', $h['HTTP_CONNECTION']))) { $out[] = array(0.2, self::CERTAINITY_NORMAL, "Invalid TE header"); } // Googlebot doesn't post comments! if (!empty($h['HTTP_USER_AGENT']) && preg_match('!Googlebot[/ -]|Slurp|Wget/|W3C_Validator|Advertise\\.com|nicebot|MMCrawler/|MSIECrawler|ia_archiver|WebaltBot/|nutbot\\.com|\\+http://search\\.!', $h['HTTP_USER_AGENT'])) { $out[] = array(1, self::CERTAINITY_NORMAL, "Bots don't post comments"); } // Headless browsers no thanks if (!empty($h['HTTP_USER_AGENT']) && preg_match('!PhantomJS|CasperJS!', $h['HTTP_USER_AGENT'])) { $out[] = array(1, self::CERTAINITY_HIGH, "Nice try, PhantomJS"); } if (!empty($h['HTTP_USERAGENT']) || !empty($h['HTTP_USER_AGENT']) && preg_match('!^User-Agent!i', $h['HTTP_USER_AGENT'])) { $out[] = array(1, self::CERTAINITY_NORMAL, "Really badly written bot"); } // I assume multipart forms are too tricky for most bots if (!empty($h['HTTP_CONTENT_LENGTH']) && !empty($h['HTTP_CONTENT_TYPE']) && preg_match('!^\\s*multipart/form-data\\s*;\\s*boundary\\s*=!i', $h['HTTP_CONTENT_TYPE'])) { $out[] = array(-0.2, self::CERTAINITY_LOW, "Multipart form"); } // browsers nicely decode and normalize paths, remove fragment part if (($path = $p->getPath()) && preg_match('!&|^https?://|^//|/%7e|#|\\.\\./!i', $path)) { $out[] = array(0.3, self::CERTAINITY_NORMAL, "Improperly encoded path"); } if (!empty($h["HTTP_REFERER"]) && preg_match('!&|/%7e|\\.\\./!i', $h["HTTP_REFERER"])) { $out[] = array(0.25, self::CERTAINITY_LOW, "Improperly encoded referer"); } if (count($out)) { return $out; } }
function reportResult(ISblamPost $p, $score, $cert) { if (!function_exists('apc_store')) { throw new Exception("NO APC"); } if ($score > 1.2 && $cert > 0.95) { foreach ($p->getAuthorIPs() as $ip) { apc_store('ip-ban:' . $ip, time() + 5, 5); // block for 5 sec } } }