コード例 #1
0
 private function CategoriesFamily($www)
 {
     include_once dirname(__FILE__) . "/class.squid.categorize.generic.inc";
     $f = new generic_categorize();
     $cat = $f->GetCategories($www);
     if ($cat != null) {
         $this->ADD_CATEGORYZED_WEBSITE($www, $cat);
         writelogs("Generic Category {$cat} for {$www} done", __CLASS__ . "/" . __FUNCTION__, __FILE__, __LINE__);
         return $cat;
     }
 }
コード例 #2
0
function parse_txt($filename)
{
    echo "Loading translated_parse\n";
    $MAINZ = unserialize(@file_get_contents("/root/translated_parse"));
    $NODETECT = unserialize(@file_get_contents("/root/nodetect_parse"));
    echo "Loading nodetect_parse\n";
    $gg = new generic_categorize();
    $ipClass = new IP();
    $handle = @fopen($filename, "r");
    if (!$handle) {
        echo "Failed to open file {$filename}\n";
        return;
    }
    while (!feof($handle)) {
        $c++;
        $www = trim(fgets($handle, 4096));
        if ($www == null) {
            $CBADNULL++;
            continue;
        }
        $www = str_replace('"', "", $www);
        $www = stripslashes($www);
        $www = str_replace("'", "", $www);
        $date = date("Y-m-d H:i:s");
        $logprefix = "[{$date}]: {$c} {$www} ";
        echo "{$logprefix} ";
        if (isset($MAINZ[$www])) {
            echo "already done\n";
            continue;
        }
        if (isset($NODETECT[$www])) {
            echo "already done\n";
            continue;
        }
        if ($ipClass->isIPAddress($www)) {
            $ipaddr = $www;
            $www = gethostbyaddr($ipaddr);
        } else {
            $ipaddr = gethostbyname($www);
        }
        echo " -{$ipaddr}- ";
        if (isset($MAINZ[$www])) {
            echo "already done\n";
            continue;
        }
        if (isset($NODETECT[$www])) {
            echo "already done\n";
            continue;
        }
        $category_artica = $gg->GetCategories($www);
        if ($category_artica != null) {
            echo "{$logprefix} {$www} -> ARTICA {$category_artica}\n";
            $MAINZ[$www] = $category_artica;
            @file_put_contents("/root/translated_parse", serialize($MAINZ));
            continue;
        }
        $cat = GetCategory("http://{$www}");
        if (is_numeric($cat)) {
            echo "{$www} -> continue;\n";
            continue;
        }
        if ($cat != null) {
            echo "{$www} -> {$cat};\n";
            $MAINZ[$www] = $cat;
            @file_put_contents("/root/translated_parse", serialize($MAINZ));
            continue;
        }
        echo "{$www} -> NOPE;\n";
        $NODETECT[$www] = true;
        @file_put_contents("/root/nodetect_parse", serialize($NODETECT));
    }
}
コード例 #3
0
function proposal($www)
{
    $f = array();
    $md5 = md5($www);
    $www = trim($www);
    if (preg_match("#music#", $www)) {
        $f["music"] = true;
    }
    if (preg_match("#movie#", $www)) {
        $f["movies"] = true;
        $f["audio-video"] = true;
    }
    if (preg_match("#radio#", $www)) {
        $f["webradio"] = true;
    }
    if (preg_match("#skyrock#", $www)) {
        $f["webradio"] = true;
    }
    if (preg_match("#journal#", $www)) {
        $f["blog"] = true;
    }
    if (preg_match("#shop#", $www)) {
        $f["shopping"] = true;
    }
    if (preg_match("#vintage#", $www)) {
        $f["shopping"] = true;
    }
    if (preg_match("#xxx#", $www)) {
        $f["p**n"] = true;
    }
    if (preg_match("#career#", $www)) {
        $f["jobsearch"] = true;
    }
    if (preg_match("#[-\\_]fm#", $www)) {
        $f["webradio"] = true;
    }
    if (preg_match("#about\\.com\$#", $www)) {
        $f["dictionaries"] = true;
    }
    if (preg_match("#politic#", $www)) {
        $f["politic"] = true;
    }
    if (preg_match("#soiree#", $www)) {
        $f["recreation/nightout"] = true;
    }
    if (preg_match("#tv\\.#", $www)) {
        $f["webtv"] = true;
    }
    if (preg_match("#school#", $www)) {
        $f["recreation/schools"] = true;
    }
    if (preg_match("#mobile#", $www)) {
        $f["mobile-phone"] = true;
    }
    if (preg_match("#tvprogram#", $www)) {
        $f["webtv"] = true;
    }
    if (preg_match("#.musiwave.com\$#", $www)) {
        $f["ringtones"] = true;
    }
    if (preg_match("#\\.2o7\\.net#", $www)) {
        $f["tracker"] = true;
    }
    if (preg_match("#warcraft#", $www)) {
        $f["games"] = true;
    }
    if (preg_match("#\\.fm\$#", $www)) {
        $f["webradio"] = true;
    }
    if (preg_match("#soft#", $www)) {
        $f["science/computing"] = true;
    }
    if (preg_match("#tvideos#", $www)) {
        $f["webtv"] = true;
    }
    if (preg_match("#sex#", $www)) {
        $f["p**n"] = true;
    }
    if (preg_match("#blip\\.tv\$#", $www)) {
        $f["webtv"] = true;
    }
    if (preg_match("#car.*insurance#", $www)) {
        $f["finance/insurance"] = true;
    }
    if (preg_match("#health.*insurance#", $www)) {
        $f["finance/insurance"] = true;
    }
    if (preg_match("#home.*insurance#", $www)) {
        $f["finance/insurance"] = true;
    }
    if (preg_match("#\\.disqus\\.com\$#", $www)) {
        $f["socialnet"] = true;
    }
    if (preg_match("#twenga\\.[a-z]+\$#", $www)) {
        $f["shopping"] = true;
    }
    if (preg_match("#\\.maases\\.com\$#", $www)) {
        $f["music"] = true;
    }
    if (preg_match("#\\.zankyou\\.com\$#", $www)) {
        $f["socialnet"] = true;
    }
    if (preg_match("#\\.wikipedia\\.org\$#", $www)) {
        $f["dictionaries"] = true;
    }
    if (preg_match("#\\.wikia.com\$#", $www)) {
        $f["dictionaries"] = true;
    }
    if (preg_match("#\\.gameleads.ru\$#", $www)) {
        $f["publicite"] = true;
    }
    if (preg_match("#immobilier#", $www)) {
        $f["finance/realestate"] = true;
    }
    if (preg_match("#\\.icplatform.com\$#", $www)) {
        $f["reaffected"] = true;
    }
    if (preg_match("#mailing#", $www)) {
        $f["mailing"] = true;
    }
    if (preg_match("#p**n#", $www)) {
        $f["p**n"] = true;
    }
    if (preg_match("#sport#", $www)) {
        $f["recreation/sports"] = true;
    }
    if (preg_match("#news#", $www)) {
        $f["news"] = true;
    }
    if (preg_match("#amicale#", $www)) {
        $f["associations"] = true;
    }
    if (preg_match("#video#", $www)) {
        $f["movies"] = true;
        $f["audio-video"] = true;
    }
    if (preg_match("#game#", $www)) {
        $f["games"] = true;
    }
    $p = new generic_categorize();
    $ccc = $p->GetCategories($www);
    if ($ccc != null) {
        $f[$ccc] = true;
    }
    while (list($category, $rows) = each($f)) {
        $s[] = "<div>\n\t\t\t<a href=\"javascript:blur();\" Onclick=\"javascript:PerformProposal('{$category}','{$www}','{$md5}')\" \n\t\t\tstyle='font-size:11px;text-decoration:underline'>{$category} ?</a>\n\t\t\t</div>";
    }
    return @implode(" ", $s);
}
コード例 #4
0
function bright()
{
    $q = new mysql_squid_builder();
    $sql = "SELECT sitename FROM webtests WHERE checked=0 ORDER BY sitename";
    $results = $q->QUERY_SQL("{$sql}");
    writelogs(mysql_num_rows($results) . " items for {$sql}", __FUNCTION__, __FILE__, __LINE__);
    $heristic = new generic_categorize();
    while ($ligne = mysql_fetch_assoc($results)) {
        $forcedelete = false;
        $www = $ligne["sitename"];
        if (strpos($www, ",") > 0) {
            $forcedelete = true;
        }
        if (strpos($www, " ") > 0) {
            $forcedelete = true;
        }
        if (strpos($www, ":") > 0) {
            $forcedelete = true;
        }
        if (strpos($www, "%") > 0) {
            $forcedelete = true;
        }
        if ($forcedelete) {
            $q->QUERY_SQL("DELETE FROM webtests WHERE sitename='{$www}'");
            continue;
        }
        $articacats = null;
        $ligne["sitename"] = trim(strtolower($ligne["sitename"]));
        $IPADDR = gethostbyname($ligne["sitename"]);
        if ($IPADDR == $ligne["sitename"]) {
            $q->categorize_reaffected($ligne["sitename"]);
            $q->QUERY_SQL("DELETE FROM webtests WHERE sitename='{$www}'");
            continue;
        }
        if (preg_match("#^www\\.(.+)#", $www, $re)) {
            $q->QUERY_SQL("DELETE FROM webtests WHERE sitename='{$www}'");
            $www = $re[1];
            $ligne["sitename"] = $www;
            $q->QUERY_SQL("INSERT IGNORE INTO webtests (sitename) ('{$re[1]}')");
        }
        $delete = false;
        writelogs("CHECK: {$ligne["sitename"]}", __FUNCTION__, __FILE__, __LINE__);
        $q->QUERY_SQL("UPDATE webtests SET checked=1 WHERE sitename='{$ligne["sitename"]}'");
        $category = $heristic->GetCategories($ligne["sitename"]);
        if ($category != null) {
            echo "{$ligne["sitename"]} -> {$category}\n";
            writelogs("SUCCESS: {$ligne["sitename"]} `{$category}` parse next", __FUNCTION__, __FILE__, __LINE__);
            $q->QUERY_SQL("DELETE FROM webtests WHERE sitename='{$ligne["sitename"]}'");
            $q->ADD_CATEGORYZED_WEBSITE($ligne["sitename"], $category);
            continue;
        }
        $f = new external_categorize($ligne["sitename"]);
        $category = $f->K9();
        if ($category != null) {
            echo "{$ligne["sitename"]} -> {$category}\n";
            writelogs("SUCCESS: {$ligne["sitename"]} `{$category}` parse next", __FUNCTION__, __FILE__, __LINE__);
            $q->QUERY_SQL("DELETE FROM webtests WHERE sitename='{$ligne["sitename"]}'");
            $q->ADD_CATEGORYZED_WEBSITE($ligne["sitename"], $category);
            continue;
        }
    }
}