Ejemplo n.º 1
0
/**
 * This function generates a 'Top' file from a sorted log file
 * For each criterion (which index is $criterionIndex), 
 * it counts the occurrence of the values which index are $valueIndex
 * 
 * @param $filename, name of the sorted log file
 * @param $topFilename, name of the top file that will be generated
 * @param $topNumber, number of the Top such as 'Top ($topNumber)' is generated
 * @param $nbLogParam, number of log parameters per line
 * @param $criterionIndex, the index of the criterion for the top (0: song_id, 1: usr_id, 2: country_code)
 * @param $valueIndex, the index of the value that will be counted (0: song_id, 1: usr_id, 2: country_code)
 */
function generateTopFile($filename, $topFilename, $topNumber, $nbLogParam, $criterionIndex, $valueIndex)
{
    global $red, $green, $blue, $noColor, $OK;
    echo $blue . "*** Generating " . $topFilename . " ***" . $noColor . "\n";
    // The sorted log file is opened
    $fh = fopen($filename, 'r') or die($red . "Oops, couldn't open " . $filename . "!" . $noColor . "\n\n");
    // The top file is created
    $fp = fopen($topFilename, 'w') or die($red . "Oops, couldn't create a new file!" . $noColor . "\n\n");
    $currentCriterionID = 0;
    // ID of the criterion that is currently analyzed
    $currentValueID = 0;
    // ID of the value that is currently counted
    $counter = 0;
    // Counter for the values
    // In order to keep the Top updated for each criterion, the data is stored in a MinHeap each time.
    // The structure of this array is as the following: list($valueID, $counter) = $array;
    // Thus, MyHeap class is used and sorted according to $counter, and then according to $valueID in case of equality
    $VALUE_INDEX = 0;
    $COUNTER_INDEX = 1;
    $heap = new MyHeap($COUNTER_INDEX, $VALUE_INDEX);
    echo "Reading and counting data... ";
    // The first line is read
    if (!feof($fh)) {
        $line = trim(fgets($fh));
        $row = explode('|', $line);
        $currentCriterionID = $row[$criterionIndex];
        $currentValueID = $row[$valueIndex];
        $counter++;
    } else {
        // File shouldn't be empty at this point; if it is, something wrong must have happened meanwhile
        exit($red . "Unexpected EOF reached... Script aborted!" . $noColor . "\n");
    }
    while (!feof($fh)) {
        $line = trim(fgets($fh));
        $row = explode('|', $line);
        if (count($row) == $nbLogParam) {
            // When next criterion is detected, the Top of the $currentCriterionID is written in the file
            if ($currentCriterionID != $row[$criterionIndex]) {
                // First, insert the last data of $currentCriterionID into the heap
                $heap->insert(array($currentValueID, $counter));
                if ($heap->count() > $topNumber) {
                    $heap->extract();
                    // Maximum length of the heap is equal to $topNumber
                }
                // Empty MinHeap and reverse it (with a stack) to get the Top in descending order
                // Note: An array is used as a stack (instead of SplStack) for better performance
                $stack = array();
                while (!$heap->isEmpty()) {
                    $stack[] = $heap->extract();
                }
                // Write the Top of the $currentCriterionID (in our example: Top 50)
                // Format: country|sng_id1:n1,sng_id2:n2,sng_id3:n3,...,sng_id50:n50
                $data = array_pop($stack);
                fwrite($fp, $currentCriterionID . "|" . $data[$VALUE_INDEX] . ":" . $data[$COUNTER_INDEX]);
                while (!empty($stack)) {
                    $data = array_pop($stack);
                    fwrite($fp, "," . $data[$VALUE_INDEX] . ":" . $data[$COUNTER_INDEX]);
                }
                fwrite($fp, "\n");
                unset($stack);
                // Update $currentCriterionID, $currentValueID and $counter
                $currentCriterionID = $row[$criterionIndex];
                $currentValueID = $row[$valueIndex];
                $counter = 1;
            } elseif ($currentValueID != $row[$valueIndex]) {
                // When next value is detected, insert the last data of $currentCriterionID into the heap
                $heap->insert(array($currentValueID, $counter));
                if ($heap->count() > $topNumber) {
                    $heap->extract();
                    //
                }
                // Then update $currentValueID and $counter
                $currentValueID = $row[$valueIndex];
                $counter = 1;
            } else {
                // Else, keep counting
                $counter++;
            }
        } else {
            // Data is expected to have ($nbLogParam) values;
            // if not, it means that the end of this file has been reached
            break;
        }
    }
    // Repeat the process for the last Top
    $heap->insert(array($currentValueID, $counter));
    if ($heap->count() > $topNumber) {
        $heap->extract();
    }
    $stack = array();
    while (!$heap->isEmpty()) {
        $stack[] = $heap->extract();
    }
    $data = array_pop($stack);
    fwrite($fp, $currentCriterionID . "|" . $data[$VALUE_INDEX] . ":" . $data[$COUNTER_INDEX]);
    while (!empty($stack)) {
        $data = array_pop($stack);
        fwrite($fp, "," . $data[$VALUE_INDEX] . ":" . $data[$COUNTER_INDEX]);
    }
    echo $OK . $blue . "*** DONE! ***" . $noColor . "\n";
    unset($stack);
    unset($heap);
    fclose($fh);
    fclose($fp);
}
Ejemplo n.º 2
0
<?php

class MyHeap extends SplMaxHeap
{
    public function testCompare()
    {
        return parent::compare(1);
    }
}
$heap = new MyHeap();
$heap->testCompare();
Ejemplo n.º 3
0
/**
 * This function merges sorted files into one (big) sorted file,
 * according to 2 comparator indexes
 * 
 * @param $filename, name of the file that will be generated
 * @param $filesToMerge, array containing the names of the files to be merged
 * @param $nbLogParam, number of log parameters per line
 * @param $comparator1, the first comparator index (0: song_id, 1: usr_id, 2: country_code)
 * @param $comparator2, the second comparator index (0: song_id, 1: usr_id, 2: country_code)
 * @param $deleteFiles, boolean that indicates if the files must be deleted after merging them
 * @return $nbFiles, number of files that have been merged
 */
function mergeFiles($filename, $filesToMerge, $nbLogParam, $comparator1, $comparator2, $deleteFiles = true)
{
    global $red, $green, $blue, $noColor, $OK;
    $nbFilesEnded = 0;
    // Number of files that have reached the EOF
    echo "Now merging files... ";
    // Files are opened (if existing)
    $fh = array();
    $nbFiles = 0;
    foreach ($filesToMerge as $file) {
        if (file_exists($file)) {
            $fh[] = fopen($file, 'r') or die($red . "Oops, couldn't open " . $file . "!" . $noColor . "\n\n");
            $nbFiles++;
        } else {
            echo $file . " not found, it will not be merged...\n";
        }
    }
    if ($nbFiles < 2) {
        echo "(No need to merge, operation canceled)\n";
        foreach ($fh as $fhToClose) {
            fclose($fhToClose);
        }
        return $nbFiles;
    }
    // The sorted file is created
    $fp = fopen($filename, 'w') or die($red . "Oops, couldn't create a new file!" . $noColor . "\n\n");
    // A heap is created to store the current line of each file, and to keep them sorted
    $heap = new MyHeap($comparator1, $comparator2);
    // Get the first value of each file, and add it to the heap (which remains sorted)
    for ($i = 0; $i < $nbFiles; $i++) {
        if (!feof($fh[$i])) {
            // At this point, data shall not be corrupted, nor empty
            $line = trim(fgets($fh[$i]));
            $row = explode('|', $line);
            // Trick: the index of the file associated to the data is also stored in the heap
            $row[] = $i;
            if (count($row) == $nbLogParam + 1) {
                $heap->insert($row);
            } else {
                // Corruption has already been detected before;
                // if $row doesn't contain the whole data, something wrong must have happened meanwhile
                exit($red . "Unexpected data corruption occurred... Script aborted!" . $noColor . "\n");
            }
        } else {
            // File shouldn't be empty at this point; if it is, something wrong must have happened meanwhile
            exit($red . "Unexpected EOF reached... Script aborted!" . $noColor . "\n");
        }
    }
    // Extract the smallest data from the heap, and write it in the final file
    if (!$heap->isEmpty()) {
        $chunkLine = $heap->extract();
        // Extraction of data
        $indexOfLastFileRead = array_pop($chunkLine);
        // Extraction of index of the file from which data is merged
        fwrite($fp, implode('|', $chunkLine) . "\n");
        // Writing data in the final file
    } else {
        // At this point, the heap shouldn't be empty; if it is, something wrong must have happened
        exit($red . "Heap is unexpectedly empty... Script aborted!" . $noColor . "\n");
    }
    // Then the files are fully read and merged (until each of them reaches its EOF)
    while ($nbFilesEnded < $nbFiles) {
        // The tricky part was to merge one line at a time, not the whole heap, and keep a trace of the last file read
        $i = $indexOfLastFileRead;
        // Reading the next line of the file from which data has just been merged
        if (!feof($fh[$i])) {
            $line = trim(fgets($fh[$i]));
            $row = explode('|', $line);
            $row[] = $i;
            if (count($row) == $nbLogParam + 1) {
                // If there is data, it is added to the heap
                $heap->insert($row);
            } else {
                // Data is expected to have ($nbLogParam + 1) values;
                // if not, it means that the end of this file has been reached (case of last line empty)
                $nbFilesEnded++;
            }
        } else {
            $nbFilesEnded++;
        }
        // Writing the next sorted line into the final file
        if (!$heap->isEmpty()) {
            $chunkLine = $heap->extract();
            $indexOfLastFileRead = array_pop($chunkLine);
            fwrite($fp, implode('|', $chunkLine) . "\n");
        }
    }
    // Closing all the files and deleting files if needed
    echo $OK . "Cleaning up... ";
    foreach ($fh as $fhToClose) {
        fclose($fhToClose);
    }
    if ($deleteFiles) {
        foreach ($filesToMerge as $file) {
            if (file_exists($file)) {
                unlink($file);
            }
        }
    }
    fclose($fp);
    unset($fh);
    unset($heap);
    echo $OK . $green . "The " . $nbFiles . " files have been merged successfully!" . $noColor . "\n";
    return $nbFiles;
}
Ejemplo n.º 4
0
<?php

class MyHeap extends SplHeap
{
    public function compare($a, $b)
    {
        return $a < $b;
    }
}
$heap = new MyHeap();
var_dump($heap->isEmpty());
$heap->insert(1);
var_dump($heap->isEmpty());
$heap->extract();
var_dump($heap->isEmpty());
$heap->isEmpty('var');
Ejemplo n.º 5
0
<?php

class MyHeap extends SplHeap
{
    public function compare($a, $b)
    {
        return $a < $b;
    }
    public function count()
    {
        throw new Exception('Cause count to fail');
        return parent::count();
    }
}
$heap = new MyHeap();
$heap->insert(1);
count($heap);
// refers to MyHeap->count() method
Ejemplo n.º 6
0
<?php

class MyHeap extends SplHeap
{
    public function compare($a, $b)
    {
        return $a < $b;
    }
}
$heap = new MyHeap();
$heap->insert(1, 2);