Exemplo n.º 1
0
function prepareDataset()
{
    global $FileNames;
    loadFileNames();
    loadTweetIDs();
    loadKeywords();
    foreach ($FileNames as $file_name) {
        collectTweetsFrom($file_name);
    }
    storeTweetIDs();
}
Exemplo n.º 2
0
function prepareTestset($dataset_dir, $dataset_name)
{
    global $KeyWords, $KeyWords_left, $testset_size;
    loadKeywords();
    $dataset_size = countDataset($dataset_dir . $dataset_name);
    $max_random = $dataset_size / constant("NUMBER_OF_SLICE");
    $testset_name = "test." . $dataset_name . "." . date("mdHi");
    $fp_dataset = fopen($dataset_dir . $dataset_name . ".csv", "r");
    $fp_testset = fopen(constant("TESTSET_LOCATION") . $testset_name . ".csv", "a");
    $no_in_ts = 0;
    $selected = array();
    while ($no_in_ts < $testset_size) {
        //$jump=mt_rand(0, 10000);
        $jump = mt_rand(0, $max_random);
        for (; $jump >= 0; $jump--) {
            if (feof($fp_dataset)) {
                $fp_dataset = fopen($dataset_dir . $dataset_name . ".csv", "r");
            }
            fgetcsv($fp_dataset, 256);
        }
        if (feof($fp_dataset)) {
            $fp_dataset = fopen($dataset_dir . $dataset_name . ".csv", "r");
        }
        $new_tweet = fgetcsv($fp_dataset, 256);
        if (!in_array($new_tweet[2], $selected) && $KeyWords_left[$new_tweet[0]] > 0) {
            array_push($selected, $new_tweet[2]);
            $KeyWords_left[$new_tweet[0]]--;
            $new_tweet[0] = $KeyWords[$new_tweet[0]];
            unset($new_tweet[2]);
            fputcsv($fp_testset, $new_tweet);
            $no_in_ts++;
            print $no_in_ts . ", ";
        }
    }
    fclose($fp_dataset);
    fclose($fp_testset);
}