private function CategoriesFamily($www) { include_once dirname(__FILE__) . "/class.squid.categorize.generic.inc"; $f = new generic_categorize(); $cat = $f->GetCategories($www); if ($cat != null) { $this->ADD_CATEGORYZED_WEBSITE($www, $cat); writelogs("Generic Category {$cat} for {$www} done", __CLASS__ . "/" . __FUNCTION__, __FILE__, __LINE__); return $cat; } }
function parse_txt($filename) { echo "Loading translated_parse\n"; $MAINZ = unserialize(@file_get_contents("/root/translated_parse")); $NODETECT = unserialize(@file_get_contents("/root/nodetect_parse")); echo "Loading nodetect_parse\n"; $gg = new generic_categorize(); $ipClass = new IP(); $handle = @fopen($filename, "r"); if (!$handle) { echo "Failed to open file {$filename}\n"; return; } while (!feof($handle)) { $c++; $www = trim(fgets($handle, 4096)); if ($www == null) { $CBADNULL++; continue; } $www = str_replace('"', "", $www); $www = stripslashes($www); $www = str_replace("'", "", $www); $date = date("Y-m-d H:i:s"); $logprefix = "[{$date}]: {$c} {$www} "; echo "{$logprefix} "; if (isset($MAINZ[$www])) { echo "already done\n"; continue; } if (isset($NODETECT[$www])) { echo "already done\n"; continue; } if ($ipClass->isIPAddress($www)) { $ipaddr = $www; $www = gethostbyaddr($ipaddr); } else { $ipaddr = gethostbyname($www); } echo " -{$ipaddr}- "; if (isset($MAINZ[$www])) { echo "already done\n"; continue; } if (isset($NODETECT[$www])) { echo "already done\n"; continue; } $category_artica = $gg->GetCategories($www); if ($category_artica != null) { echo "{$logprefix} {$www} -> ARTICA {$category_artica}\n"; $MAINZ[$www] = $category_artica; @file_put_contents("/root/translated_parse", serialize($MAINZ)); continue; } $cat = GetCategory("http://{$www}"); if (is_numeric($cat)) { echo "{$www} -> continue;\n"; continue; } if ($cat != null) { echo "{$www} -> {$cat};\n"; $MAINZ[$www] = $cat; @file_put_contents("/root/translated_parse", serialize($MAINZ)); continue; } echo "{$www} -> NOPE;\n"; $NODETECT[$www] = true; @file_put_contents("/root/nodetect_parse", serialize($NODETECT)); } }
function proposal($www) { $f = array(); $md5 = md5($www); $www = trim($www); if (preg_match("#music#", $www)) { $f["music"] = true; } if (preg_match("#movie#", $www)) { $f["movies"] = true; $f["audio-video"] = true; } if (preg_match("#radio#", $www)) { $f["webradio"] = true; } if (preg_match("#skyrock#", $www)) { $f["webradio"] = true; } if (preg_match("#journal#", $www)) { $f["blog"] = true; } if (preg_match("#shop#", $www)) { $f["shopping"] = true; } if (preg_match("#vintage#", $www)) { $f["shopping"] = true; } if (preg_match("#xxx#", $www)) { $f["p**n"] = true; } if (preg_match("#career#", $www)) { $f["jobsearch"] = true; } if (preg_match("#[-\\_]fm#", $www)) { $f["webradio"] = true; } if (preg_match("#about\\.com\$#", $www)) { $f["dictionaries"] = true; } if (preg_match("#politic#", $www)) { $f["politic"] = true; } if (preg_match("#soiree#", $www)) { $f["recreation/nightout"] = true; } if (preg_match("#tv\\.#", $www)) { $f["webtv"] = true; } if (preg_match("#school#", $www)) { $f["recreation/schools"] = true; } if (preg_match("#mobile#", $www)) { $f["mobile-phone"] = true; } if (preg_match("#tvprogram#", $www)) { $f["webtv"] = true; } if (preg_match("#.musiwave.com\$#", $www)) { $f["ringtones"] = true; } if (preg_match("#\\.2o7\\.net#", $www)) { $f["tracker"] = true; } if (preg_match("#warcraft#", $www)) { $f["games"] = true; } if (preg_match("#\\.fm\$#", $www)) { $f["webradio"] = true; } if (preg_match("#soft#", $www)) { $f["science/computing"] = true; } if (preg_match("#tvideos#", $www)) { $f["webtv"] = true; } if (preg_match("#sex#", $www)) { $f["p**n"] = true; } if (preg_match("#blip\\.tv\$#", $www)) { $f["webtv"] = true; } if (preg_match("#car.*insurance#", $www)) { $f["finance/insurance"] = true; } if (preg_match("#health.*insurance#", $www)) { $f["finance/insurance"] = true; } if (preg_match("#home.*insurance#", $www)) { $f["finance/insurance"] = true; } if (preg_match("#\\.disqus\\.com\$#", $www)) { $f["socialnet"] = true; } if (preg_match("#twenga\\.[a-z]+\$#", $www)) { $f["shopping"] = true; } if (preg_match("#\\.maases\\.com\$#", $www)) { $f["music"] = true; } if (preg_match("#\\.zankyou\\.com\$#", $www)) { $f["socialnet"] = true; } if (preg_match("#\\.wikipedia\\.org\$#", $www)) { $f["dictionaries"] = true; } if (preg_match("#\\.wikia.com\$#", $www)) { $f["dictionaries"] = true; } if (preg_match("#\\.gameleads.ru\$#", $www)) { $f["publicite"] = true; } if (preg_match("#immobilier#", $www)) { $f["finance/realestate"] = true; } if (preg_match("#\\.icplatform.com\$#", $www)) { $f["reaffected"] = true; } if (preg_match("#mailing#", $www)) { $f["mailing"] = true; } if (preg_match("#p**n#", $www)) { $f["p**n"] = true; } if (preg_match("#sport#", $www)) { $f["recreation/sports"] = true; } if (preg_match("#news#", $www)) { $f["news"] = true; } if (preg_match("#amicale#", $www)) { $f["associations"] = true; } if (preg_match("#video#", $www)) { $f["movies"] = true; $f["audio-video"] = true; } if (preg_match("#game#", $www)) { $f["games"] = true; } $p = new generic_categorize(); $ccc = $p->GetCategories($www); if ($ccc != null) { $f[$ccc] = true; } while (list($category, $rows) = each($f)) { $s[] = "<div>\n\t\t\t<a href=\"javascript:blur();\" Onclick=\"javascript:PerformProposal('{$category}','{$www}','{$md5}')\" \n\t\t\tstyle='font-size:11px;text-decoration:underline'>{$category} ?</a>\n\t\t\t</div>"; } return @implode(" ", $s); }
function bright() { $q = new mysql_squid_builder(); $sql = "SELECT sitename FROM webtests WHERE checked=0 ORDER BY sitename"; $results = $q->QUERY_SQL("{$sql}"); writelogs(mysql_num_rows($results) . " items for {$sql}", __FUNCTION__, __FILE__, __LINE__); $heristic = new generic_categorize(); while ($ligne = mysql_fetch_assoc($results)) { $forcedelete = false; $www = $ligne["sitename"]; if (strpos($www, ",") > 0) { $forcedelete = true; } if (strpos($www, " ") > 0) { $forcedelete = true; } if (strpos($www, ":") > 0) { $forcedelete = true; } if (strpos($www, "%") > 0) { $forcedelete = true; } if ($forcedelete) { $q->QUERY_SQL("DELETE FROM webtests WHERE sitename='{$www}'"); continue; } $articacats = null; $ligne["sitename"] = trim(strtolower($ligne["sitename"])); $IPADDR = gethostbyname($ligne["sitename"]); if ($IPADDR == $ligne["sitename"]) { $q->categorize_reaffected($ligne["sitename"]); $q->QUERY_SQL("DELETE FROM webtests WHERE sitename='{$www}'"); continue; } if (preg_match("#^www\\.(.+)#", $www, $re)) { $q->QUERY_SQL("DELETE FROM webtests WHERE sitename='{$www}'"); $www = $re[1]; $ligne["sitename"] = $www; $q->QUERY_SQL("INSERT IGNORE INTO webtests (sitename) ('{$re[1]}')"); } $delete = false; writelogs("CHECK: {$ligne["sitename"]}", __FUNCTION__, __FILE__, __LINE__); $q->QUERY_SQL("UPDATE webtests SET checked=1 WHERE sitename='{$ligne["sitename"]}'"); $category = $heristic->GetCategories($ligne["sitename"]); if ($category != null) { echo "{$ligne["sitename"]} -> {$category}\n"; writelogs("SUCCESS: {$ligne["sitename"]} `{$category}` parse next", __FUNCTION__, __FILE__, __LINE__); $q->QUERY_SQL("DELETE FROM webtests WHERE sitename='{$ligne["sitename"]}'"); $q->ADD_CATEGORYZED_WEBSITE($ligne["sitename"], $category); continue; } $f = new external_categorize($ligne["sitename"]); $category = $f->K9(); if ($category != null) { echo "{$ligne["sitename"]} -> {$category}\n"; writelogs("SUCCESS: {$ligne["sitename"]} `{$category}` parse next", __FUNCTION__, __FILE__, __LINE__); $q->QUERY_SQL("DELETE FROM webtests WHERE sitename='{$ligne["sitename"]}'"); $q->ADD_CATEGORYZED_WEBSITE($ligne["sitename"], $category); continue; } } }