Esempio n. 1
0
$ff = new FunctionFeatures();
$ff->add(function ($class, DocumentInterface $d) {
    $r = array();
    foreach ($d->getDocumentData() as $tok) {
        $r[] = $class . $tok;
    }
    return $r;
});
// create
// 1. an empty training set
// 2. an optimizer
// 3. an empty model
$tset = new TrainingSet();
$OPTIMIZER_PATH = isset($_ENV["GD_OPTIMIZER"]) ? $_ENV["GD_OPTIMIZER"] : 'gradient-descent';
$optimizer = new ExternalMaxentOptimizer($OPTIMIZER_PATH);
$model = new Maxent(array());
// argv[1] and argv[2] are paths to files that contain the paths
// to the actual documents.
$train = new SplFileObject($argv[1]);
$test = new SplFileObject($argv[2]);
// fill in the training set
foreach ($train as $f) {
    $f = substr($f, 0, -1);
    if (strlen($f) == 0) {
        continue;
    }
    $class = "neg";
    if (strpos($f, "pos") !== false) {
        $class = "pos";
    }
    $tset->addDocument($class, new TokensDocument($tok->tokenize(file_get_contents($f))));
Esempio n. 2
0
    return 'prev=' . current($prev);
});
$feats->add(function ($class, Document $d) {
    if ($class != 'START_SENTENCE') {
        return;
    }
    $w = current($d->getDocumentData());
    if (ctype_upper($w[0])) {
        return "isCapitalized";
    }
});
$s = new TrainingSet();
foreach ($tokens as $index => $token) {
    $s->addDocument($classes[$index], new WordDocument($tokens, $index, 5));
}
$maxent = new Maxent(array());
$maxent->train($feats, $s, new MaxentGradientDescent(0.01, 1, 100000));
$maxent->dumpWeights();
$true_positives = 0;
$false_neg = 0;
$false_pos = 0;
$classifier = new FeatureBasedLinearClassifier($feats, $maxent);
$s->setAsKey(TrainingSet::CLASS_AS_KEY);
foreach ($s as $class => $doc) {
    $predicted_class = $classifier->classify(array('O', 'START_SENTENCE'), $doc);
    if ($class != $predicted_class) {
        if ($predicted_class == 'O') {
            $false_neg++;
        } else {
            $false_pos++;
        }