Beispiel #1
0
 function preTestPost(ISblamPost $p)
 {
     $this->addedhosts = array();
     $this->ips = array();
     // Check sender's IPs
     foreach ($p->getAuthorIPs() as $ip) {
         $this->checkIP($ip, "sender");
     }
     // Check all IPs of all linked websites
     $links = $p->getLinks();
     if ($links) {
         foreach ($links as $link) {
             $host = $link->getHostname();
             $domain = $link->getDomain();
             if ($host && $domain && $host !== $domain) {
                 $this->checkHost($host, "link", 0.75);
                 $this->checkHost($domain, "link", 0.75);
             } else {
                 if ($host) {
                     $this->checkHost($host, "link");
                 }
                 if ($domain) {
                     $this->checkHost($domain, "link");
                 }
             }
         }
     }
     $this->checkHostList();
 }
Beispiel #2
0
 function testPost(ISblamPost $p)
 {
     if ($this->blacklist === NULL) {
         $this->blacklist = new DomainMatch();
         if (!$this->importChongqed($this->blacklistfile)) {
             throw new Exception("Unable to import chongqed.org blacklist from {$this->blacklistfile}");
         }
     }
     $res4 = 0;
     $domains = array();
     if ($uri = $p->getAuthorURI()) {
         $uri = new SblamURI($uri);
         if ($tmp = $this->check($uri)) {
             $domains[$uri->getHostname()] = true;
             $res4 += $tmp;
         }
     }
     foreach ($p->getLinks() as $uri) {
         if ($tmp = $this->check($uri)) {
             $domains[$uri->getHostname()] = true;
             $res4 += $tmp;
         }
     }
     if ($res4) {
         return array(0.8, self::CERTAINITY_NORMAL, "Blacklisted domains (" . implode(', ', array_keys($domains)) . ")");
     }
 }
Beispiel #3
0
    function startTest(ISblamPost $p)
    {
        $this->fp = NULL;
        $count = 0;
        $alllinks = "# only links extracted from post;\n";
        foreach ($p->getLinks() as $link) {
            // linksleeve doesn't support 2-level domains
            $domain = $link->getDomain();
            if ($domain === $link->getHostname() || substr_count($domain, '.') >= 2) {
                d($link->getURI(), "Skipping because of linksleeve bug");
                continue;
            }
            if (isset($this->ignore[$domain])) {
                d($domain, 'skipped linksleeve');
                continue;
            }
            $alllinks .= $link->getURI() . " ; " . substr(preg_replace('/[^a-z0-9.-]+/i', '', $link->getLabel()), 0, 50) . "\n";
            $count++;
        }
        if (!$count) {
            return NULL;
        }
        $query = '<?xml version="1.0"?><methodCall>
			<methodName>slv</methodName>
			<params>
				<param>
				<value><string>' . htmlspecialchars($alllinks) . '</string></value>
				</param>
			</params>
		</methodCall>';
        $this->fp = $this->services->getHTTP()->setHost(self::API_HOST)->setPath(self::API_PATH)->setPost($query, 'text/xml')->setTimeout($this->timeout)->requestAsync();
        return $this->fp != NULL;
    }
Beispiel #4
0
 function preTestPost(ISblamPost $p)
 {
     $this->addedhosts = array();
     $links = $p->getLinks();
     if ($links) {
         foreach ($links as $link) {
             if ($host = $link->getHostname()) {
                 $this->checkHost($host);
             }
             if ($domain = $link->getDomain()) {
                 $this->checkHost($domain);
             }
         }
     }
 }
Beispiel #5
0
 function testPost(ISblamPost $p)
 {
     if ($this->keywords === NULL) {
         $this->importBlocklist2($this->blocklist);
     }
     if (!count($this->keywords)) {
         return NULL;
     }
     $res1 = $this->testText($p->getText() . ' ' . $p->getAuthorName());
     $res2 = 0;
     $res3 = 0;
     $alluris = '';
     if ($uri = $p->getAuthorURI()) {
         $alluris .= strtolower($uri);
     }
     if ($uri = $p->getAuthorEmail()) {
         $alluris .= ' ' . strtolower($uri);
     }
     foreach ($p->getLinks() as $link) {
         if ($label = $link->getLabel()) {
             $res2 += count(array_intersect($this->getKeywordsFromText($label), $this->keywords));
         }
         if ($uri = $link->getURI()) {
             $alluris .= ' ' . strtolower($uri);
         }
     }
     $cnt = 0;
     str_replace($this->keywords, $this->keywords, $alluris, $res3);
     $sum = $res1 + $res2 + $res3;
     if (!$sum) {
         return NULL;
     }
     //array(-0.1,self::CERTAINITY_LOW, "No banned keywords");
     $out = array();
     if ($res1) {
         $out[] = array(1.2 - 1 / $res1, $sum > 2 ? self::CERTAINITY_HIGH : self::CERTAINITY_NORMAL, "Banned keywords in text ({$res1})");
     }
     if ($res2) {
         $out[] = array(1.2 - 1 / ($res2 + 1), self::CERTAINITY_HIGH, "Banned keywords in link labels ({$res2})");
     }
     if ($res3) {
         $out[] = array(1.2 - 1 / $res3, $sum > 2 ? self::CERTAINITY_HIGH : self::CERTAINITY_NORMAL, "Banned keywords in URLs ({$res3})");
     }
     if (count($out)) {
         return $out;
     }
 }
Beispiel #6
0
 function testPost(ISblamPost $p)
 {
     $links = $p->getLinks();
     if ($links === NULL) {
         return NULL;
     }
     $linkcount = count($links);
     $authorlink = $p->getAuthorURI() ? 1 : 0;
     // count separately, because this link may be unrelated to post's contents, so shouldn't skew link/words ratio
     if ($linkcount + $authorlink == 0) {
         if (strlen($p->getText()) > 20) {
             return array(-0.5, self::CERTAINITY_NORMAL, "No links");
         }
         return NULL;
         // don't give nolinks bonus to posts with no content (no content is abnormal and it may be another way to spam)
     }
     if ($linkcount + $authorlink == 1) {
         return array(0.1, self::CERTAINITY_LOW, "Single link");
     }
     if ($linkcount + $authorlink == 2) {
         return array(0.2, self::CERTAINITY_LOW, "Two links");
     }
     $numwords = count(preg_split('![^a-z0-9\\x7F-\\xFF-]+|https?://[^\\]\\[\\s\'"<>]+!i', $p->getText(), 500, PREG_SPLIT_NO_EMPTY));
     // long posts may legitimately have more links. can't set any limits, because wiki pages may contain lots of links.
     $ratio = round($linkcount * 100 / (10 + $numwords));
     if ($ratio > 22) {
         return array(0.45, self::CERTAINITY_NORMAL, "Flooded with links (A{$ratio}: {$linkcount} per {$numwords} words)");
     }
     if ($ratio > 17) {
         return array(0.35, self::CERTAINITY_NORMAL, "Flooded with links (B{$ratio}: {$linkcount} per {$numwords} words)");
     }
     if ($ratio > 12) {
         return array(0.25, self::CERTAINITY_NORMAL, "Flooded with links (C{$ratio}: {$linkcount} per {$numwords} words)");
     }
     if ($ratio > 6) {
         return array(0.25, self::CERTAINITY_NORMAL, "Lots of links (D{$ratio}: {$linkcount} per {$numwords} words)");
     }
     return array(0.25, self::CERTAINITY_LOW, "Some links (E{$ratio}: {$linkcount} per {$numwords} words)");
 }
Beispiel #7
0
 protected function extractURIsFromPost(ISblamPost $p)
 {
     $uris = array();
     if ($uri = $p->getAuthorURI()) {
         $this->addURI($uris, new SblamURI($uri));
     }
     foreach ($p->getLinks() as $link) {
         $this->addURI($uris, $link);
     }
     $this->addEmail($uris, $p->getAuthorEmail());
     return array_keys($uris);
 }
Beispiel #8
0
 protected function extractWordsFromLinks(ISblamPost $p)
 {
     // test link labels specifically
     $labels = '';
     foreach ($p->getLinks() as $link) {
         $labels .= ' ' . $link->getLabel();
     }
     return self::extractWords($labels, $this->db->ignore);
 }