protected function check(SblamURI $link) { $domain = $link->getDomain(); $min = max(2, count(explode('.', $domain)) + 1); $res = $this->blacklist->check($link->getHostname()); if ($res >= $min) { return $res + 1 - $min; } return 0; }
function addURI(array &$urls, SblamURI $link, $prefix = '') { if ($link->isTLD()) { return; } if ($hostname = $link->getHostname()) { $hostname = preg_replace(array('!^www\\.!', '!\\d\\d+!'), array('', 'D'), $hostname); // normalise digits! (block bulk registrations) $urls[$prefix . $hostname] = true; if ($domain = $link->getDomain()) { $urls[$prefix . $domain] = true; } if ($p = $link->getPath()) { $p = preg_replace('!^(/[^#]{1,7}[^#/\\?]{0,5}).*$!', '\\1', $p); // shorten path. its mainly for getting real tinyurl adresses, not every spammy subpage out there if ($p !== '/') { $urls[$prefix . $hostname . $p] = true; } } } if (preg_match('!\\b(?:site:|https?://)([a-zA-Z0-9.-]+)!', urldecode($link->getPath()), $m)) { $this->addURI($urls, new SblamURI('http://' . $m[1]), $prefix); } }