// fill in the training set foreach ($train as $f) { $f = substr($f, 0, -1); if (strlen($f) == 0) { continue; } $class = "neg"; if (strpos($f, "pos") !== false) { $class = "pos"; } $tset->addDocument($class, new TokensDocument($tok->tokenize(file_get_contents($f)))); } // train the model $model->train($ff, $tset, $optimizer); // to use the model we need a classifier $cls = new FeatureBasedLinearClassifier($ff, $model); // evaluate the model $correct = 0; $total = 0; foreach ($test as $f) { $f = substr($f, 0, -1); if (strlen($f) == 0) { continue; } $class = "neg"; if (strpos($f, "pos") !== false) { $class = "pos"; } $doc = new TokensDocument($tok->tokenize(file_get_contents($f))); $predicted = $cls->classify(array("pos", "neg"), $doc); if ($predicted == $class) {
$w = current($d->getDocumentData()); if (ctype_upper($w[0])) { return "isCapitalized"; } }); $s = new TrainingSet(); foreach ($tokens as $index => $token) { $s->addDocument($classes[$index], new WordDocument($tokens, $index, 5)); } $maxent = new Maxent(array()); $maxent->train($feats, $s, new MaxentGradientDescent(0.01, 1, 100000)); $maxent->dumpWeights(); $true_positives = 0; $false_neg = 0; $false_pos = 0; $classifier = new FeatureBasedLinearClassifier($feats, $maxent); $s->setAsKey(TrainingSet::CLASS_AS_KEY); foreach ($s as $class => $doc) { $predicted_class = $classifier->classify(array('O', 'START_SENTENCE'), $doc); if ($class != $predicted_class) { if ($predicted_class == 'O') { $false_neg++; } else { $false_pos++; } } else { $true_positives++; } } $precision = function () use($true_positives, $false_pos) { return $true_positives / ($true_positives + $false_pos);