function featureSelector($wordbank_file) { global $wordBank, $positive, $negative, $total; global $max_positive, $max_negative, $max_total; readWordBankFromJSON($wordbank_file); sortPositiveNegative(); $max_positive = max($positive); $max_negative = max($negative); $max_total = max($total); print "\n" . $max_positive . "\t" . $max_negative . "\t" . $max_total . "\n"; print count($wordBank) . " => "; removeLowFreqWords(); normalizePositiveLowFreqWords(); normalizeNegativeLowFreqWords(); writeWordBankToJSON($wordbank_file); print count($wordBank) . "\n"; }
function countWordFreq($dataset_file, $wordbank_file = null) { if (!$wordbank_file) { $wordbank_file = "w." . $dataset_file; } readWordBankFromJSON($wordbank_file); $fp = fopen($dataset_file . ".csv", "r"); while (!feof($fp)) { $tweet = fgetcsv($fp, 256); if ($tweet) { $tweet_words = featureExtractor($tweet[1]); if ($tweet_words) { updatewordBank($tweet_words, $tweet[0]); } } } writeWordBankToJSON($wordbank_file); }
function featureSelector($wordbank_file) { global $wordBank, $pd, $total, $positive, $negative; global $max_pd, $min_pd, $max_total, $max_positive, $max_negative; readWordBankFromJSON($wordbank_file); findProportionalDiff(); $max_pd = max($pd); $min_pd = min($pd); $max_total = max($total); $max_positive = max($positive); $max_negative = max($negative); print $max_pd . " / " . $min_pd . "\t" . $max_total . "\n"; print count($wordBank) . " => "; removeLowFreqWords(); removeLowCPDWords(); normalizePositiveLowFreqWords(); normalizeNegativeLowFreqWords(); writeWordBankToJSON($wordbank_file); print count($wordBank) . "\n"; }
#!/usr/bin/env php <?php require_once "naive_bayes_classifier.php"; readWordBankFromJSON(); findEffeciency("testdata"); //sleep(18000); function findEffeciency($file_name) { $correct = 0; $incorrect = 0; $fp = fopen($file_name . ".csv", "r"); while (!feof($fp)) { $new_tweet = fgetcsv($fp); if ($new_tweet) { //print(findCatagory($new_tweet[1])."\n"); if (findCatagory($new_tweet[1]) == $new_tweet[0]) { $correct++; } else { $incorrect++; } } } fclose($fp); print "Correct:\t" . $correct . "\n"; print "Incorrect:\t" . $incorrect . "\n"; print "Effeciency:\t" . $correct / ($correct + $incorrect) . "\n"; } function findCatagory($tweet) { $result = classify($tweet); print_r($result);