Esempio n. 1
0
function featureSelector($wordbank_file)
{
    global $wordBank, $positive, $negative, $total;
    global $max_positive, $max_negative, $max_total;
    readWordBankFromJSON($wordbank_file);
    sortPositiveNegative();
    $max_positive = max($positive);
    $max_negative = max($negative);
    $max_total = max($total);
    print "\n" . $max_positive . "\t" . $max_negative . "\t" . $max_total . "\n";
    print count($wordBank) . " => ";
    removeLowFreqWords();
    normalizePositiveLowFreqWords();
    normalizeNegativeLowFreqWords();
    writeWordBankToJSON($wordbank_file);
    print count($wordBank) . "\n";
}
Esempio n. 2
0
function countWordFreq($dataset_file, $wordbank_file = null)
{
    if (!$wordbank_file) {
        $wordbank_file = "w." . $dataset_file;
    }
    readWordBankFromJSON($wordbank_file);
    $fp = fopen($dataset_file . ".csv", "r");
    while (!feof($fp)) {
        $tweet = fgetcsv($fp, 256);
        if ($tweet) {
            $tweet_words = featureExtractor($tweet[1]);
            if ($tweet_words) {
                updatewordBank($tweet_words, $tweet[0]);
            }
        }
    }
    writeWordBankToJSON($wordbank_file);
}
Esempio n. 3
0
function featureSelector($wordbank_file)
{
    global $wordBank, $pd, $total, $positive, $negative;
    global $max_pd, $min_pd, $max_total, $max_positive, $max_negative;
    readWordBankFromJSON($wordbank_file);
    findProportionalDiff();
    $max_pd = max($pd);
    $min_pd = min($pd);
    $max_total = max($total);
    $max_positive = max($positive);
    $max_negative = max($negative);
    print $max_pd . " / " . $min_pd . "\t" . $max_total . "\n";
    print count($wordBank) . " => ";
    removeLowFreqWords();
    removeLowCPDWords();
    normalizePositiveLowFreqWords();
    normalizeNegativeLowFreqWords();
    writeWordBankToJSON($wordbank_file);
    print count($wordBank) . "\n";
}
Esempio n. 4
0
#!/usr/bin/env php
<?php 
require_once "naive_bayes_classifier.php";
readWordBankFromJSON();
findEffeciency("testdata");
//sleep(18000);
function findEffeciency($file_name)
{
    $correct = 0;
    $incorrect = 0;
    $fp = fopen($file_name . ".csv", "r");
    while (!feof($fp)) {
        $new_tweet = fgetcsv($fp);
        if ($new_tweet) {
            //print(findCatagory($new_tweet[1])."\n");
            if (findCatagory($new_tweet[1]) == $new_tweet[0]) {
                $correct++;
            } else {
                $incorrect++;
            }
        }
    }
    fclose($fp);
    print "Correct:\t" . $correct . "\n";
    print "Incorrect:\t" . $incorrect . "\n";
    print "Effeciency:\t" . $correct / ($correct + $incorrect) . "\n";
}
function findCatagory($tweet)
{
    $result = classify($tweet);
    print_r($result);