function parse($filename) { $unix = new unix(); $LastScannLine = 0; $GLOBALS["MYSQL_CATZ"] = new mysql_catz(); $GLOBALS["SQUID_FAMILY_CLASS"] = new squid_familysite(); if (!isset($GLOBALS["MYHOSTNAME"])) { $unix = new unix(); $GLOBALS["MYHOSTNAME"] = $unix->hostname_g(); } $filesource = dirname(__FILE__) . "/ressources/conf/upload/{$filename}"; if (!is_file($filesource)) { build_progress(110, "{$filename} no such file"); return; } $tmpfile = $unix->FILE_TEMP(); if (!@copy($filesource, $tmpfile)) { @unlink($filesource); build_progress(110, "{$filename} -> {$tmpfile} {failed}"); return; } @unlink($filesource); $SumOflines = $unix->COUNT_LINES_OF_FILE($tmpfile); echo "{$tmpfile} {$SumOflines} lines\n"; $handle = @fopen($tmpfile, "r"); if (!$handle) { echo "Fopen failed on {$tmpfile}\n"; build_progress(110, "{$tmpfile} {failed}"); @unlink($tmpfile); return false; } $c = 0; $d = 0; $e = 0; $prc = 0; $prc_text = 0; $mysql_first_time = 0; $SITES = array(); $NOT_CATEGORIZED = array(); $CATEGORIZED = array(); $RQS = array(); $IPClass = new IP(); $FIRSTTIME = 0; $LASTTIME = 0; $TIME1 = time(); while (!feof($handle)) { $c++; $d++; $e++; $prc = $c / $SumOflines; $prc = round($prc * 100); $buffer = trim(fgets($handle)); if ($buffer == null) { continue; } $stats_sites = count($SITES); $stats_categorized = count($CATEGORIZED); $stats_not_categorized = count($NOT_CATEGORIZED); if (!isset($GLOBALS["LAST_PRC"])) { build_progress($prc, "{$c}/{$SumOflines} {please_wait}"); $GLOBALS["LAST_PRC"] = $prc; } if ($prc > 5) { if ($prc < 95) { if ($GLOBALS["LAST_PRC"] != $prc) { $array_load = sys_getloadavg(); $internal_load = $array_load[0]; $mem = round(memory_get_usage() / 1024 / 1000, 2); echo "Load: {$internal_load}, Memory {$mem}MB\n"; echo "Categorized: " . FormatNumber($stats_categorized) . "\n"; echo "Unknown....: " . FormatNumber($stats_not_categorized) . "\n"; echo "Web sites..: " . FormatNumber($stats_sites) . "\n"; build_progress($prc, FormatNumber($c) . "/" . FormatNumber($SumOflines) . " {please_wait} - {$mem}MB {memory}"); $GLOBALS["LAST_PRC"] = $prc; } } } $array = parseAccessLine($buffer); if (count($array) == 0) { continue; } $TIME = $array["TIME"]; $LASTTIME = $TIME; if ($FIRSTTIME == 0) { $FIRSTTIME = $TIME; } $CATEGORY = $array["CATEGORY"]; $FAMILYSITE = $array["FAMILYSITE"]; $SIZE = intval($array["SIZE"]); if ($IPClass->isIPAddress($FAMILYSITE)) { if (!isset($IPADDRESSES[$FAMILYSITE]["RQS"])) { $IPADDRESSES[$FAMILYSITE]["RQS"] = 1; $IPADDRESSES[$FAMILYSITE]["SIZE"] = 0; $IPADDRESSES[$FAMILYSITE]["CATEGORY"] = $CATEGORY; } else { $IPADDRESSES[$FAMILYSITE]["RQS"] = $IPADDRESSES[$FAMILYSITE]["RQS"] + 1; $IPADDRESSES[$FAMILYSITE]["SIZE"] = $IPADDRESSES[$FAMILYSITE]["SIZE"] + $SIZE; } continue; } if (!isset($SITES[$FAMILYSITE])) { $SITES[$FAMILYSITE] = 0; } if (!isset($RQS[$FAMILYSITE])) { $RQS[$FAMILYSITE] = 0; } $SITES[$FAMILYSITE] = $SITES[$FAMILYSITE] + $SIZE; $RQS[$FAMILYSITE] = $RQS[$FAMILYSITE] + 1; if ($CATEGORY != null) { $CATEGORIZED[$FAMILYSITE] = $CATEGORY; continue; } $NOT_CATEGORIZED[$FAMILYSITE] = true; } fclose($handle); @unlink($tmpfile); build_progress(91, "{building_report}"); $TIME2 = time(); $stats_sites = count($SITES); $stats_categorized = count($CATEGORIZED); $stats_not_categorized = count($NOT_CATEGORIZED); $ARRAY["DURATION"] = $unix->distanceOfTimeInWords($TIME1, $TIME2); $ARRAY["SumOflines"] = $SumOflines; $ARRAY["stats_sites"] = $stats_sites; $ARRAY["stats_ip"] = count($IPADDRESSES); $ARRAY["firsttime"] = $FIRSTTIME; $ARRAY["lasttime"] = $LASTTIME; $ARRAY["stats_categorized"] = $stats_categorized; $ARRAY["stats_not_categorized"] = $stats_not_categorized; build_progress(92, "{building_report}"); $CSV1[] = array("website", "size", "requests"); while (list($familysite, $ligne) = each($NOT_CATEGORIZED)) { $CSV1[] = array($familysite, $SITES[$familysite], $RQS[$familysite]); } build_progress(95, "{building_report}"); $CSV2[] = array("website", "category", "size", "requests"); while (list($familysite, $category) = each($CATEGORIZED)) { $CSV2[] = array($familysite, $category, $SITES[$familysite], $RQS[$familysite]); } build_progress(97, "{building_report}"); $CSV3[] = array("Public IP addresses", "category", "size", "requests"); while (list($ip, $ARRAYIPS) = each($IPADDRESSES)) { $category = $ARRAYIPS["CATEGORY"]; $size = $ARRAYIPS["SIZE"]; $RQS = $ARRAYIPS["RQS"]; $CSV3[] = array($ip, $category, $size, $RQS); } build_progress(99, "{saving_reports}"); outputCSV($CSV1, "/usr/share/artica-postfix/ressources/logs/notcategorized.csv"); outputCSV($CSV2, "/usr/share/artica-postfix/ressources/logs/categorized.csv"); outputCSV($CSV3, "/usr/share/artica-postfix/ressources/logs/ipcategorized.csv"); @file_put_contents("/usr/share/artica-postfix/ressources/logs/categorized.array", serialize($ARRAY)); @chmod("/usr/share/artica-postfix/ressources/logs/notcategorized.csv", 0755); @chmod("/usr/share/artica-postfix/ressources/logs/ipcategorized.csv", 0755); @chmod("/usr/share/artica-postfix/ressources/logs/categorized.csv", 0755); @chmod("/usr/share/artica-postfix/ressources/logs/categorized.array", 0755); build_progress(100, "{done}"); }
function ExplodeFile($filepath, $md5file = null) { $unix = new unix(); $LastScannLine = 0; $GLOBALS["MYSQL_CATZ"] = new mysql_catz(); $GLOBALS["SQUID_FAMILY_CLASS"] = new squid_familysite(); if (!isset($GLOBALS["MYHOSTNAME"])) { $unix = new unix(); $GLOBALS["MYHOSTNAME"] = $unix->hostname_g(); } $GLOBALS["SEQUENCE"] = md5_file($filepath); if (!is_file("{$filepath}.last")) { if (is_file("/home/artica/import-temp/{$GLOBALS["SEQUENCE"]}.working.log")) { $influx = new influx(); if ($influx->files_inject("/home/artica/import-temp/{$GLOBALS["SEQUENCE"]}.working.log")) { @unlink("/home/artica/import-temp/{$GLOBALS["SEQUENCE"]}.working.log"); return true; } } } $handle = @fopen($filepath, "r"); if (!$handle) { echo "Fopen failed on {$filepath}\n"; if ($md5file != null) { mysql_progress($md5file, 100, 3, "Fopen {failed} on {$filepath}"); } return false; } $countlines = 0; if ($md5file != null) { $countlines = $unix->COUNT_LINES_OF_FILE($filepath); if ($md5file != null) { mysql_progress($md5file, 10, 0, "Parsing {$countlines}"); } } if (is_file("{$filepath}.last")) { $LastScannLine = intval(@file_get_contents("{$filepath}.last")); } $c = 0; $d = 0; $e = 0; $prc = 0; $prc_text = 0; $mysql_first_time = 0; if ($LastScannLine > 0) { fseek($handle, $LastScannLine, SEEK_SET); } while (!feof($handle)) { $c++; $d++; $e++; if ($countlines > 0) { $prc = $c / $countlines; $prc = round($prc * 100); if (!isset($GLOBALS["LAST_PRC"])) { if ($GLOBALS["PROGRESS"]) { echo "{$prc}%\n"; } $GLOBALS["LAST_PRC"] = $prc; } else { if ($GLOBALS["LAST_PRC"] != $prc) { if ($GLOBALS["PROGRESS"]) { echo "{$prc}%\n"; } $GLOBALS["LAST_PRC"] = $prc; } } if ($prc > 10) { if ($prc < 99) { if ($prc > $prc_text) { $array_load = sys_getloadavg(); $internal_load = $array_load[0]; $mem = round(memory_get_usage() / 1024 / 1000, 2); $prc_design = FormatNumber($c) . "/" . FormatNumber($countlines); if ($md5file != null) { mysql_progress($md5file, $prc, 1, "{parsing} {$prc_design} {load}:{$internal_load} {memory}:{$mem}MB"); } $prc_text = $prc; } } } } if ($d > 50) { $iSeek = ftell($handle); @file_put_contents("{$filepath}.last", $iSeek); if ($GLOBALS["VERBOSE"]) { $prc_design = FormatNumber($c) . "/" . FormatNumber($countlines); echo "{$prc}% {$prc_design}\n"; } $d = 0; } if ($e > 500) { $mem = round(memory_get_usage() / 1024 / 1000, 2); $prc_design = FormatNumber($c) . "/" . FormatNumber($countlines); if ($md5file != null) { mysql_progress($md5file, $prc, 1, "{parsing} {$prc_design} {load}:{$internal_load} {memory}:{$mem}MB"); } $e = 0; } $buffer = trim(fgets($handle)); if ($buffer == null) { continue; } $array = parseAccessLine($buffer); if (count($array) == 0) { continue; } if ($mysql_first_time == 0) { if (date("Y", $array["TIME"]) > 2001) { $mysql_first_time = $array["TIME"]; mysql_first_time($md5file, $mysql_first_time); } } CachedSizeMem($array["TIME"], $array["CACHED"], $array["SIZE"]); if (intval($array["SIZE"]) == 0) { if ($GLOBALS["VERBOSE"]) { echo "Size = 0 " . __LINE__ . "\n"; } } CachedUserMem($array["TIME"], $array["SITENAME"], $array["SIZE"], null, $array["UID"], $array["IPADDR"], $array["CATEGORY"], $array["FAMILYSITE"]); } @unlink("{$filepath}.last"); mysql_last_time($md5file, $array["TIME"]); CachedUserMem_dump(); CachedSizeMem_dump(); $influx = new influx(); $size = filesize("/home/artica/import-temp/{$GLOBALS["SEQUENCE"]}.working.log"); $size = $size / 1024; $size = $size / 1024; echo "Importing {$size}MB of data....\n"; if (!$influx->files_inject("/home/artica/import-temp/{$GLOBALS["SEQUENCE"]}.working.log")) { @unlink("/home/artica/import-temp/{$GLOBALS["SEQUENCE"]}.working.log"); return false; } @unlink("/home/artica/import-temp/{$GLOBALS["SEQUENCE"]}.working.log"); return true; }