Esempio n. 1
0
// fill in the training set
foreach ($train as $f) {
    $f = substr($f, 0, -1);
    if (strlen($f) == 0) {
        continue;
    }
    $class = "neg";
    if (strpos($f, "pos") !== false) {
        $class = "pos";
    }
    $tset->addDocument($class, new TokensDocument($tok->tokenize(file_get_contents($f))));
}
// train the model
$model->train($ff, $tset, $optimizer);
// to use the model we need a classifier
$cls = new FeatureBasedLinearClassifier($ff, $model);
// evaluate the model
$correct = 0;
$total = 0;
foreach ($test as $f) {
    $f = substr($f, 0, -1);
    if (strlen($f) == 0) {
        continue;
    }
    $class = "neg";
    if (strpos($f, "pos") !== false) {
        $class = "pos";
    }
    $doc = new TokensDocument($tok->tokenize(file_get_contents($f)));
    $predicted = $cls->classify(array("pos", "neg"), $doc);
    if ($predicted == $class) {
Esempio n. 2
0
    $w = current($d->getDocumentData());
    if (ctype_upper($w[0])) {
        return "isCapitalized";
    }
});
$s = new TrainingSet();
foreach ($tokens as $index => $token) {
    $s->addDocument($classes[$index], new WordDocument($tokens, $index, 5));
}
$maxent = new Maxent(array());
$maxent->train($feats, $s, new MaxentGradientDescent(0.01, 1, 100000));
$maxent->dumpWeights();
$true_positives = 0;
$false_neg = 0;
$false_pos = 0;
$classifier = new FeatureBasedLinearClassifier($feats, $maxent);
$s->setAsKey(TrainingSet::CLASS_AS_KEY);
foreach ($s as $class => $doc) {
    $predicted_class = $classifier->classify(array('O', 'START_SENTENCE'), $doc);
    if ($class != $predicted_class) {
        if ($predicted_class == 'O') {
            $false_neg++;
        } else {
            $false_pos++;
        }
    } else {
        $true_positives++;
    }
}
$precision = function () use($true_positives, $false_pos) {
    return $true_positives / ($true_positives + $false_pos);