predicting whether a given text shows a <font style='background-color: #90EE90'>positive</font>, <font style='background-color: #FFA07A'>negative</font> or <font style='background-color: #DCDCDC'>neutral</font> sentiment/feeling.</p> <p>In order to produce this system, a Text Classification technique has to be adapted to a given application domain. In this demo, the <a href='http://nlp.cs.swarthmore.edu/semeval/'>SemEval-2007 dataset</a> is of use for training the classifier, and the learnt model is then applied to processing similar world news headlines from The Washington Post:</p> <?php // Prepare classifier $classifier = new MultinomialNaiveBayes(); $classifier->setDatabase("semeval07"); // Prepare data $feeder = new FeedRSS(); $aFeeds = $feeder->getFood("http://feeds.washingtonpost.com/rss/world"); foreach ($aFeeds as $feed) { $lab = $classifier->classify($feed["title"]); if ($lab == "NEG") { echo "<p class='neg'>"; } elseif ($lab == "NEU") { echo "<p class='neu'>"; } else { echo "<p class='pos'>"; } echo "• " . $feed["title"] . " - <a href='" . $feed["link"] . "'>Read more</a></p>"; }
</font> </p> <hr> <?php if (isset($_POST["gotrain"])) { if ($_POST["gotrain"] == "true") { if ($_FILES["datafile"]["error"] > 0 || empty($_POST["newdbname"])) { echo "Error!!! Missing parameters!\n"; } else { try { echo "<p>Loading dataset... "; $dataset = new Dataset(); list($text, $labs) = $dataset->getFood($_FILES["datafile"]["tmp_name"]); echo "OK</p>"; echo "<p>Training model... "; $classifier = new MultinomialNaiveBayes(); $classifier->train($text, $labs); echo "OK</p>"; echo "<p>Saving parameters... "; $classifier->setDatabase($_POST["newdbname"]); $classifier->save(); echo "OK</p>"; echo "<p><b>Training process completed " . "successfully!</b></p>"; } catch (Exception $e) { echo "Caught exception: " . $e->getMessage() . "\n"; } } } } ?> </body>
} else { // create new user $db->query("INSERT INTO Users VALUES ( '{$callerIP}', 1 );"); } } // Check length of query if ($granted) { $data = RestUtils::processRequest(); if ($premium || strlen($data->getTextToProc()) > 0 && strlen($data->getTextToProc()) < 2000) { switch ($data->getMethod()) { case 'post': // right method $serv = $data->getService(); if ($serv == 'sentiment_news') { $tex = $data->getTextToProc(); $classifier = new MultinomialNaiveBayes(); $classifier->setDatabase("semeval07"); $pNEG = $classifier->likelihood($tex, "NEG"); $pNEU = $classifier->likelihood($tex, "NEU"); $pPOS = $classifier->likelihood($tex, "POS"); // Too long, force to neutral, sentiment wash if ($pNEG == false) { $pNEU = 1; } else { $pTotal = $pNEG + $pNEU + $pPOS; $pNEG = $pNEG / $pTotal; $pNEU = $pNEU / $pTotal; $pPOS = $pPOS / $pTotal; } // No need to hit the DB again $lab = "NEG";
include dirname(__FILE__) . "/../core/classification/MultinomialNaiveBayes.php"; include dirname(__FILE__) . "/../core/util/feeding/FeedRSS.php"; include dirname(__FILE__) . "/pagemaker.php"; putHeader("Text Categorisation and Topic/Domain Identification"); ?> <p>Identifies the semantic field of a given text and relates it to its corresponding topic or domain.</p> <p>In order to produce this system, a Text Classification technique has to be adapted to a given set of application domains. In this demo, the <a href='http://kdd.ics.uci.edu/databases/reuters_transcribed/reuters_transcribed.html'>Reuters Transcribed Subset</a> is of use for training the classifier, and the learnt model is then applied to predicting the topic of the most read articles from Reuters:</p> <?php // Prepare classifier $classifier = new MultinomialNaiveBayes(); $classifier->setDatabase("ReutersTranscribedSubset"); // Prepare data $feeder = new FeedRSS(); $aFeeds = $feeder->getFood("http://feeds.reuters.com/reuters/MostRead?format=xml"); foreach ($aFeeds as $feed) { $lab = $classifier->classify($feed["title"]); echo "<p><font color='#808080'>Topic: " . $lab . "</font><br />"; echo " <b>" . $feed["title"] . "</b>" . " - <a href='" . $feed["link"] . "'>Read more</a><br /> " . preg_replace("/<.+>/", "", $feed["desc"]) . "</p>"; } ?> <?php putFooter();