Beispiel #1
0
predicting whether a given text shows a 
<font style='background-color: #90EE90'>positive</font>, 
<font style='background-color: #FFA07A'>negative</font> or
<font style='background-color: #DCDCDC'>neutral</font>
sentiment/feeling.</p>

<p>In order to produce this system, a Text Classification technique
has to be adapted to a given application domain. In this demo, the
<a href='http://nlp.cs.swarthmore.edu/semeval/'>SemEval-2007 dataset</a>
is of use for training the classifier, and the learnt model is then 
applied to processing similar world news headlines from 
The Washington Post:</p>

<?php 
// Prepare classifier
$classifier = new MultinomialNaiveBayes();
$classifier->setDatabase("semeval07");
// Prepare data
$feeder = new FeedRSS();
$aFeeds = $feeder->getFood("http://feeds.washingtonpost.com/rss/world");
foreach ($aFeeds as $feed) {
    $lab = $classifier->classify($feed["title"]);
    if ($lab == "NEG") {
        echo "<p class='neg'>";
    } elseif ($lab == "NEU") {
        echo "<p class='neu'>";
    } else {
        echo "<p class='pos'>";
    }
    echo "&#8226;&nbsp;&nbsp;" . $feed["title"] . " - <a href='" . $feed["link"] . "'>Read more</a></p>";
}
            </font>
        </p>
        <hr>
        <?php 
if (isset($_POST["gotrain"])) {
    if ($_POST["gotrain"] == "true") {
        if ($_FILES["datafile"]["error"] > 0 || empty($_POST["newdbname"])) {
            echo "Error!!! Missing parameters!\n";
        } else {
            try {
                echo "<p>Loading dataset... ";
                $dataset = new Dataset();
                list($text, $labs) = $dataset->getFood($_FILES["datafile"]["tmp_name"]);
                echo "OK</p>";
                echo "<p>Training model... ";
                $classifier = new MultinomialNaiveBayes();
                $classifier->train($text, $labs);
                echo "OK</p>";
                echo "<p>Saving parameters... ";
                $classifier->setDatabase($_POST["newdbname"]);
                $classifier->save();
                echo "OK</p>";
                echo "<p><b>Training process completed " . "successfully!</b></p>";
            } catch (Exception $e) {
                echo "Caught exception: " . $e->getMessage() . "\n";
            }
        }
    }
}
?>
    </body>
Beispiel #3
0
    } else {
        // create new user
        $db->query("INSERT INTO Users VALUES ( '{$callerIP}', 1 );");
    }
}
// Check length of query
if ($granted) {
    $data = RestUtils::processRequest();
    if ($premium || strlen($data->getTextToProc()) > 0 && strlen($data->getTextToProc()) < 2000) {
        switch ($data->getMethod()) {
            case 'post':
                // right method
                $serv = $data->getService();
                if ($serv == 'sentiment_news') {
                    $tex = $data->getTextToProc();
                    $classifier = new MultinomialNaiveBayes();
                    $classifier->setDatabase("semeval07");
                    $pNEG = $classifier->likelihood($tex, "NEG");
                    $pNEU = $classifier->likelihood($tex, "NEU");
                    $pPOS = $classifier->likelihood($tex, "POS");
                    // Too long, force to neutral, sentiment wash
                    if ($pNEG == false) {
                        $pNEU = 1;
                    } else {
                        $pTotal = $pNEG + $pNEU + $pPOS;
                        $pNEG = $pNEG / $pTotal;
                        $pNEU = $pNEU / $pTotal;
                        $pPOS = $pPOS / $pTotal;
                    }
                    // No need to hit the DB again
                    $lab = "NEG";
Beispiel #4
0
include dirname(__FILE__) . "/../core/classification/MultinomialNaiveBayes.php";
include dirname(__FILE__) . "/../core/util/feeding/FeedRSS.php";
include dirname(__FILE__) . "/pagemaker.php";
putHeader("Text Categorisation and Topic/Domain Identification");
?>

<p>Identifies the semantic field of a given text and relates it
to its corresponding topic or domain.</p>

<p>In order to produce this system, a Text Classification technique
has to be adapted to a given set of application domains. In this demo, 
the <a href='http://kdd.ics.uci.edu/databases/reuters_transcribed/reuters_transcribed.html'>Reuters Transcribed Subset</a>
is of use for training the classifier, and the learnt model is then 
applied to predicting the topic of the most read articles from Reuters:</p>

<?php 
// Prepare classifier
$classifier = new MultinomialNaiveBayes();
$classifier->setDatabase("ReutersTranscribedSubset");
// Prepare data
$feeder = new FeedRSS();
$aFeeds = $feeder->getFood("http://feeds.reuters.com/reuters/MostRead?format=xml");
foreach ($aFeeds as $feed) {
    $lab = $classifier->classify($feed["title"]);
    echo "<p><font color='#808080'>Topic: " . $lab . "</font><br />";
    echo "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<b>" . $feed["title"] . "</b>" . " - <a href='" . $feed["link"] . "'>Read more</a><br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;" . preg_replace("/<.+>/", "", $feed["desc"]) . "</p>";
}
?>

<?php 
putFooter();