function featureSelector($wordbank_file) { global $wordBank, $positive, $negative, $total; global $max_positive, $max_negative, $max_total; readWordBankFromJSON($wordbank_file); sortPositiveNegative(); $max_positive = max($positive); $max_negative = max($negative); $max_total = max($total); print "\n" . $max_positive . "\t" . $max_negative . "\t" . $max_total . "\n"; print count($wordBank) . " => "; removeLowFreqWords(); normalizePositiveLowFreqWords(); normalizeNegativeLowFreqWords(); writeWordBankToJSON($wordbank_file); print count($wordBank) . "\n"; }
function countWordFreq($dataset_file, $wordbank_file = null) { if (!$wordbank_file) { $wordbank_file = "w." . $dataset_file; } readWordBankFromJSON($wordbank_file); $fp = fopen($dataset_file . ".csv", "r"); while (!feof($fp)) { $tweet = fgetcsv($fp, 256); if ($tweet) { $tweet_words = featureExtractor($tweet[1]); if ($tweet_words) { updatewordBank($tweet_words, $tweet[0]); } } } writeWordBankToJSON($wordbank_file); }
function featureSelector($wordbank_file) { global $wordBank, $pd, $total, $positive, $negative; global $max_pd, $min_pd, $max_total, $max_positive, $max_negative; readWordBankFromJSON($wordbank_file); findProportionalDiff(); $max_pd = max($pd); $min_pd = min($pd); $max_total = max($total); $max_positive = max($positive); $max_negative = max($negative); print $max_pd . " / " . $min_pd . "\t" . $max_total . "\n"; print count($wordBank) . " => "; removeLowFreqWords(); removeLowCPDWords(); normalizePositiveLowFreqWords(); normalizeNegativeLowFreqWords(); writeWordBankToJSON($wordbank_file); print count($wordBank) . "\n"; }