/** * Function that sorts input log file (according to 2 comparator indexes) * by reading one chunk of this file at a time, then applying merge sort * algorithm on each chunk, and finally merge them into a sorted file * * In our example, there are only 3 possible values for comparators: * 0: song_id * 1: usr_id * 2: country_code * * However, this function has been designed as a generic function, * and it accepts more than 3 log parameters per line * * @param $filename, name of the log file that will be sorted * @param $sortedFilename, name of the sorted file that will be generated * @param $logParamPatterns, array containing the patterns of each log parameter (to detect data corruption) * @param $comparator1, the first comparator index (0: song_id, 1: usr_id, 2: country_code) * @param $comparator2, the second comparator index (0: song_id, 1: usr_id, 2: country_code) */ function sortLogFile($filename, $sortedFilename, $logParamPatterns, $comparator1, $comparator2) { global $red, $green, $blue, $noColor, $OK, $INDEXES; // Number of lines for each chunk of file (this length should depend on the length of the file to sort) $CHUNK_LENGTH = 10000; echo $blue . "*** Sorting " . $filename . " according to " . $INDEXES[$comparator1] . " ***" . $noColor . "\n"; $fh = fopen($filename, 'r') or die($red . "Oops, couldn't open " . $filename . "!" . $noColor . "\n\n"); $nbTmpFiles = 0; // Number of temporary files that will be created $nbLogParam = count($logParamPatterns); while (!feof($fh)) { $i = 0; $chunk = array(); // Chunk of data echo "Creating and sorting chunk file n°" . $nbTmpFiles . "... "; // Reading a chunk of desired length while ($i < $CHUNK_LENGTH && !feof($fh)) { $line = trim(fgets($fh)); $row = explode('|', $line); // Check if data is corrupted if (count($row) == $nbLogParam) { // Advanced detection of data corruption with pattern matching $isCorrupted = false; for ($j = 0; $j < $nbLogParam; $j++) { if (preg_match($logParamPatterns[$j], $row[$j]) != 1) { $isCorrupted = true; break; } } if (!$isCorrupted) { $chunk[] = $row; // If data is not corrupted, it is added to the chunk } else { //echo "\n(Data corruption detected)\n"; } } $i++; } // Then the chunk is sorted $chunk = mergeSort($chunk, $comparator1, $comparator2); // Store it into a chunk file $fp = fopen("chunk_" . $nbTmpFiles . ".log", 'w') or die($red . "Oops, couldn't create a new file!" . $noColor . "\n\n"); foreach ($chunk as $chunkLine) { fwrite($fp, implode('|', $chunkLine) . "\n"); } fclose($fp); $nbTmpFiles++; unset($chunk); echo $OK; } fclose($fh); echo $green . "Chunk files sorted!" . $noColor . "\n"; $chunkNames = array(); for ($i = 0; $i < $nbTmpFiles; $i++) { $chunkNames[] = "chunk_" . $i . ".log"; } mergeFiles($sortedFilename, $chunkNames, $nbLogParam, $comparator1, $comparator2); echo $blue . "*** DONE! ***" . $noColor . "\n"; }
// only a segment $saveName = $uploadDate . $fileName; move_uploaded_file($_FILES["segment"]["tmp_name"], $saveName); echo $saveName; } else { if ($pos < $total) { // file segments greater than 1 if (!file_exists($uploadDate)) { mkdir($uploadDate); } $saveName = $uploadDate . "/" . $pos; move_uploaded_file($_FILES["segment"]["tmp_name"], $saveName); echo 1; } else { if ($pos == $total) { mergeFiles($uploadDate, $fileName); echo $uploadDate, $fileName; } else { echo 0; } } } // the function just effect this file, not common one function mergeFiles($dir, $fileName) { if (touch($dir . $fileName)) { // 'wb' mode can use in Windows OS $fileOut = fopen($dir . $fileName, "wb"); } else { echo 0; die("Can't create file");
$options = getopt($shortopts, $longopts); //map options to variables if ($options["a"] == "yes") { $unixTime = explode(".", $options["sfile"]); $date = date("Y-m-d", $unixTime[1]); $dir = $options["dir"] . $date . "/"; $direc = $options["dir"] . $date . "/"; } else { $dir = $options["dir"]; $direc = $options["dir"] . $date . "/"; } // $dir = $options["dir"]; // $direc = $options["dir"]; //.$date."/"; $start = $options["sfile"]; $stop = $options["efile"]; $pre = $options["pre"]; //var_dump($options); //get files to search $files2search = carve($start, $stop, $dir, $pre); //$files2search = carve($options); //get sizes of the files you want to search $file_sizes = get_sizes($files2search, $dir, $pre); //construct and call cxt2pcap searches, return generated outfiles $outputfiles = cxt2pcap($files2search, $file_sizes, $options, $direc); //take generated outfiles and merge them into one pcap $fileHandle = mergeFiles($outputfiles); //return file handle of the merged pcap file $meta_data = stream_get_meta_data($fileHandle); $filename = $meta_data["uri"]; print $filename; return $filename;
**********************************/ echo $blue . "=== STEP 3 ===" . $noColor . "\n"; $DELETE_FILES = false; // Parameter of mergeFiles function: intermediate files shouldn't be deleted as they will be reused the next day // 3a) Merge User files if (count($previousUserSortedFiles) > 1) { $userWeeklySortedFile = str_replace(".log", "_userWeeklySorted.log", $filename); mergeFiles($userWeeklySortedFile, $previousUserSortedFiles, $NB_LOG_PARAM, $USER_ID_INDEX, $SONG_ID_INDEX, $DELETE_FILES); } else { // If files are not merged, the Top file will be generated from the daily sorted file $userWeeklySortedFile = $userSortedFile; } // 3b) Merge Country files if (count($previousCountrySortedFiles) > 1) { $countryWeeklySortedFile = str_replace(".log", "_countryWeeklySorted.log", $filename); mergeFiles($countryWeeklySortedFile, $previousCountrySortedFiles, $NB_LOG_PARAM, $COUNTRY_INDEX, $SONG_ID_INDEX, $DELETE_FILES); } else { // If files are not merged, the Top file will be generated from the daily sorted file $countryWeeklySortedFile = $countrySortedFile; } echo "\n***\n\n"; /***************************** * STEP 4: Generate Top files *****************************/ echo $blue . "=== STEP 4 ===" . $noColor . "\n"; // 4a) User Top $userTopFilename = 'userTop' . $TOP_NUMBER . '-' . date('Ymd') . '.txt'; generateTopFile($userWeeklySortedFile, $userTopFilename, $TOP_NUMBER, $NB_LOG_PARAM, $USER_ID_INDEX, $SONG_ID_INDEX); echo "\n***\n\n"; // 4b) Country Top $countryTopFilename = 'countryTop' . $TOP_NUMBER . '-' . date('Ymd') . '.txt';