Ejemplo n.º 1
0
function featureSelector($wordbank_file)
{
    global $wordBank, $positive, $negative, $total;
    global $max_positive, $max_negative, $max_total;
    readWordBankFromJSON($wordbank_file);
    sortPositiveNegative();
    $max_positive = max($positive);
    $max_negative = max($negative);
    $max_total = max($total);
    print "\n" . $max_positive . "\t" . $max_negative . "\t" . $max_total . "\n";
    print count($wordBank) . " => ";
    removeLowFreqWords();
    normalizePositiveLowFreqWords();
    normalizeNegativeLowFreqWords();
    writeWordBankToJSON($wordbank_file);
    print count($wordBank) . "\n";
}
Ejemplo n.º 2
0
function countWordFreq($dataset_file, $wordbank_file = null)
{
    if (!$wordbank_file) {
        $wordbank_file = "w." . $dataset_file;
    }
    readWordBankFromJSON($wordbank_file);
    $fp = fopen($dataset_file . ".csv", "r");
    while (!feof($fp)) {
        $tweet = fgetcsv($fp, 256);
        if ($tweet) {
            $tweet_words = featureExtractor($tweet[1]);
            if ($tweet_words) {
                updatewordBank($tweet_words, $tweet[0]);
            }
        }
    }
    writeWordBankToJSON($wordbank_file);
}
Ejemplo n.º 3
0
function featureSelector($wordbank_file)
{
    global $wordBank, $pd, $total, $positive, $negative;
    global $max_pd, $min_pd, $max_total, $max_positive, $max_negative;
    readWordBankFromJSON($wordbank_file);
    findProportionalDiff();
    $max_pd = max($pd);
    $min_pd = min($pd);
    $max_total = max($total);
    $max_positive = max($positive);
    $max_negative = max($negative);
    print $max_pd . " / " . $min_pd . "\t" . $max_total . "\n";
    print count($wordBank) . " => ";
    removeLowFreqWords();
    removeLowCPDWords();
    normalizePositiveLowFreqWords();
    normalizeNegativeLowFreqWords();
    writeWordBankToJSON($wordbank_file);
    print count($wordBank) . "\n";
}