function GetIndex() { $WORKING_DIR = "/home/working_toulouse_databases"; $WORKING_DOWNLOAD = "{$WORKING_DIR}/dowloads"; $WORKING_UPLOAD = "{$WORKING_DIR}/uploads"; @mkdir($WORKING_DOWNLOAD, 0755, true); @mkdir($WORKING_UPLOAD, 0755, true); $unix = new unix(); $tar = $unix->find_program("tar"); $catz = new mysql_catz(); $tmpfile = $unix->FILE_TEMP(); $tmpdir = $unix->TEMP_DIR(); $rm = $unix->find_program("rm"); $mainuri = "ftp://ftp.univ-tlse1.fr/pub/reseau/cache/squidguard_contrib"; $EXEC_NICE = $unix->EXEC_NICE(); $ufdbGenTable = $unix->find_program("ufdbGenTable"); $curl = new ccurl("{$mainuri}/MD5SUM.LST"); if (!$curl->GetFile($tmpfile)) { echo "Failed to download MD5SUM.LST\n"; return; } $tr = explode("\n", @file_get_contents($tmpfile)); while (list($index, $line) = each($tr)) { if (preg_match("#(.+?)\\s+(.+)#", $line, $re)) { $filename = trim($re[1]); $md5 = trim($re[2]); $array[$md5] = $filename; } } @unlink($tmpfile); $q = new mysql_squid_builder(); $TLSE_CONVERTION = TLSE_CONVERTION(); $FINAL_ARRAY = array(); while (list($filename, $md5) = each($array)) { $category = str_replace(".tar.gz", "", $filename); if (isset($TLSE_CONVERTION[$category])) { $FINAL_ARRAY[$filename] = $md5; } } $UPDATED = 0; $MAIN_ARRAY = unserialize(base64_decode(@file_get_contents("{$WORKING_UPLOAD}/index.txt"))); while (list($filename, $md5) = each($FINAL_ARRAY)) { $TargetFile = "{$WORKING_DOWNLOAD}/{$filename}"; $categoryname = str_replace(".tar.gz", "", $filename); if ($categoryname == "adult") { continue; } if ($categoryname == "aggressive") { continue; } if ($categoryname == "agressif") { continue; } if ($categoryname == "redirector") { continue; } if ($categoryname == "ads") { continue; } if ($categoryname == "drogue") { continue; } $MyStoreMd5 = md5_file($TargetFile); if ($MyStoreMd5 == $md5) { echo "Skipping {$filename}\n"; continue; } echo "Downloading {$filename}\n"; $curl = new ccurl("{$mainuri}/{$filename}"); $tmpfile = $unix->FILE_TEMP(); if (!$curl->GetFile($tmpfile)) { echo "Failed {$curl->error}\n"; @unlink($tmpfile); continue; } $md5_tmp = md5_file($tmpfile); if ($md5_tmp != $md5) { echo "Failed Corrupted file\n"; @unlink($tmpfile); continue; } if (is_file($TargetFile)) { @unlink($TargetFile); } if (!@copy($tmpfile, $TargetFile)) { echo "Failed Copy file\n"; @unlink($tmpfile); @unlink($TargetFile); continue; } @unlink($tmpfile); $MyStoreMd5 = md5_file($TargetFile); if ($MyStoreMd5 != $md5) { echo "Failed MD5 file\n"; @unlink($TargetFile); continue; } @mkdir("{$WORKING_DIR}/{$categoryname}", 0755, true); echo "Extracting {$TargetFile}\n"; $cmd = "{$tar} xvf {$TargetFile} -C {$WORKING_DIR}/{$categoryname}/"; echo $cmd . "\n"; system($cmd); $SOURCE_DIR = find_sources("{$WORKING_DIR}/{$categoryname}"); if (!is_file("{$SOURCE_DIR}/domains")) { echo "Failed {$SOURCE_DIR}/domains no such file\n"; @unlink($TargetFile); continue; } $COUNT_OF_DOMAINS = $unix->COUNT_LINES_OF_FILE("{$SOURCE_DIR}/domains"); echo "{$categoryname} {$COUNT_OF_DOMAINS} domains\n"; if ($COUNT_OF_DOMAINS == 0) { shell_exec("{$rm} -rf {$WORKING_DIR}/{$categoryname}"); @unlink($TargetFile); continue; } if (is_file("{$SOURCE_DIR}/domains.ufdb")) { @unlink("{$SOURCE_DIR}/domains.ufdb"); } if (!is_file("{$SOURCE_DIR}/urls")) { @touch("{$SOURCE_DIR}/urls"); } $u = " -u {$SOURCE_DIR}/urls"; $d = " -d {$SOURCE_DIR}/domains"; $cmd = "{$EXEC_NICE}{$ufdbGenTable} -n -q -W -t {$categoryname}{$d}{$u}"; echo $cmd . "\n"; shell_exec($cmd); if (!is_file("{$SOURCE_DIR}/domains.ufdb")) { echo "Failed to compile {$categoryname}\n"; @unlink($TargetFile); continue; } $MD5SRC = md5_file("{$SOURCE_DIR}/domains.ufdb"); if (is_file("{$WORKING_UPLOAD}/{$categoryname}.gz")) { @unlink("{$WORKING_UPLOAD}/{$categoryname}.gz"); } $unix->compress("{$SOURCE_DIR}/domains.ufdb", "{$WORKING_UPLOAD}/{$categoryname}.gz"); $MD5GZ = md5_file("{$WORKING_UPLOAD}/{$categoryname}.gz"); $UPDATED++; $NOTIFICATIONS[] = "{$categoryname} updated with {$COUNT_OF_DOMAINS} domains"; $MAIN_ARRAY[$categoryname]["ROWS"] = $COUNT_OF_DOMAINS; $MAIN_ARRAY[$categoryname]["MD5SRC"] = $MD5SRC; $MAIN_ARRAY[$categoryname]["MD5GZ"] = $MD5GZ; $MAIN_ARRAY[$categoryname]["TIME"] = time(); $MAIN_ARRAY[$categoryname]["SIZE"] = @filesize("{$WORKING_UPLOAD}/{$categoryname}.gz"); @file_put_contents("{$WORKING_UPLOAD}/index.txt", base64_encode(serialize($MAIN_ARRAY))); } if ($UPDATED > 0) { PushToRepo_alls(); sendEmail("{$UPDATED} Toulouse Unversity databases uploaded.", @implode("\n", $NOTIFICATIONS)); } }
function compile() { $unix = new unix(); $MAIN_CACHE = unserialize(@file_get_contents("/root/UFDB_COMPILE_DATABASES")); $q = new mysql_squid_builder(); $DB_LISTES = TransArray(); $ufdbGenTable = $unix->find_program("ufdbGenTable"); $WORKDIR = "/home/artica/ufdbv10"; $OUTPUTDIR = "/home/artica/ufdbv10Export"; @mkdir($OUTPUTDIR, 0755, true); $UPDATED = 0; while (list($category_table, $category) = each($DB_LISTES)) { echo "Starting Cleaning {$category_table}\n"; Clean_table($category_table); $CountCategoryTableRows = $q->COUNT_ROWS("{$category_table}"); echo "{$category_table}: {$CountCategoryTableRows} rows\n"; if ($CountCategoryTableRows == 0) { sendEmail("ALERT! {$category_table} NO ROW!"); continue; } if (intval($MAIN_CACHE[$category_table]["ROWS"]) == $CountCategoryTableRows) { ToSyslog($category_table . " [SKIPPED] {$MAIN_CACHE[$category_table]["ROWS"]} == {$CountCategoryTableRows}"); echo "{$category_table}: SKIPPED\n"; continue; } $workingtempdir = "{$WORKDIR}/{$category_table}"; $workingtempFile = "{$workingtempdir}/domains"; @mkdir($workingtempdir, 0777, true); $unix->chmod_func(0777, $workingtempdir); if (is_file($workingtempFile)) { @unlink($workingtempFile); } $sql = "SELECT pattern FROM {$category_table} ORDER BY pattern INTO OUTFILE '{$workingtempFile}' LINES TERMINATED BY '\n';"; $q = new mysql_squid_builder(); $q->QUERY_SQL($sql); if (!$q->ok) { sendEmail("ALERT! {$category_table} MySQL error", $q->mysql_error); die; } @file_put_contents("{$workingtempdir}/urls", "\n"); @file_put_contents("{$workingtempdir}/expressions", "\n"); @unlink("{$workingtempdir}/domains.ufdb"); $categoryKey = compile_databases_categoryKey($category); $u = " -u {$workingtempdir}/urls"; $d = " -d {$workingtempdir}/domains"; $cmd = "{$ufdbGenTable} -n -q -W -t {$categoryKey}{$d}{$u} >/dev/null 2>&1"; echo "[{$category_table}]::{$category} {$cmd}\n"; $UPDATED++; $t = time(); $resultsCMD[] = $cmd; ToSyslog("[FINISH]:: Compiling {$category_table}..."); exec($cmd, $resultsCMD); if (!is_file("{$workingtempdir}/domains.ufdb")) { sendEmail("ALERT! {$category_table} domains.ufdb no such file!"); continue; } @mkdir("/home/artica/backuped_categories", 0755); $unix->compress($workingtempFile, "/home/artica/backuped_categories/{$category_table}.gz"); echo "[{$category_table}]::{$category} Compressing to {$OUTPUTDIR}/{$category_table}.gz\n"; if (!$unix->compress("{$workingtempdir}/domains.ufdb", "{$OUTPUTDIR}/{$category_table}.gz")) { sendEmail("ALERT! unable to compress {$workingtempdir}/domains.ufdb"); die; } @unlink("/home/ufdbcat/{$category_table}/domains.ufdb"); @copy("{$workingtempdir}/domains.ufdb", "/var/lib/ufdbartica/{$category_table}/domains.ufdb"); echo "[{$category_table}]::{$category} Indexing....\n"; $md5file = md5_file("{$workingtempdir}/domains.ufdb"); $md5zip = md5_file("{$OUTPUTDIR}/{$category_table}.gz"); ToSyslog("{$OUTPUTDIR}/{$category_table}.gz [UPDATED]"); $UPDATED_DBS[] = "{$category_table} ({$CountCategoryTableRows})"; $MAIN_CACHE[$category_table]["ROWS"] = $CountCategoryTableRows; $MAIN_CACHE[$category_table]["TIME"] = time(); $MAIN_CACHE[$category_table]["MD5SRC"] = $md5file; $MAIN_CACHE[$category_table]["MD5GZ"] = $md5zip; } ToSyslog("[FINISH]:: Building indexes {$UPDATED} updated..."); @file_put_contents("/root/UFDB_COMPILE_DATABASES", serialize($MAIN_CACHE)); @file_put_contents("{$OUTPUTDIR}/index.txt", base64_encode(serialize($MAIN_CACHE))); if ($UPDATED > 0) { ToSyslog("[FINISH]:: PushToRepo_alls()"); PushToRepo_alls(); sendEmail("{$UPDATED} Official Webfiltering databases updated", @implode("\n", $UPDATED_DBS)); } }