コード例 #1
0
ファイル: feature-test.php プロジェクト: Tjorriemorrie/app
<?php

include '../autoloader.php';
use NlpTools\FeatureFactories\FunctionFeatures;
use NlpTools\Tokenizers\WhitespaceTokenizer;
use NlpTools\Documents\Document;
use NlpTools\Documents\WordDocument;
$text = file_get_contents('token-test');
$tokenizer = new WhitespaceTokenizer();
$tokens = $tokenizer->tokenize($text);
$feats = new FunctionFeatures();
$feats->add(function ($class, Document $d) {
    return current($d->getDocumentData());
});
$feats->add(function ($class, Document $d) {
    $w = current($d->getDocumentData());
    if (ctype_upper($w[0])) {
        return "isCapitalized";
    }
});
$documents = array();
foreach ($tokens as $index => $token) {
    $documents[$index] = new WordDocument($tokens, $index, 5);
}
foreach ($documents as $d) {
    echo '[' . implode(',', $feats->getFeatureArray('0', $d)) . ']', PHP_EOL;
}
コード例 #2
0
 *    php -d memory_limit=300M sentiment_maxent.php train test
 *
 */
// include the autoloader
include '../autoloader.php';
use NlpTools\Tokenizers\WhitespaceTokenizer;
use NlpTools\FeatureFactories\FunctionFeatures;
use NlpTools\Documents\Document;
use NlpTools\Documents\TokensDocument;
use NlpTools\Documents\TrainingSet;
use NlpTools\Optimizers\ExternalMaxentOptimizer;
use NlpTools\Models\Maxent;
use NlpTools\Classifiers\FeatureBasedLinearClassifier;
// create needed reusable objects, a tokenizer and a feature factory
$tok = new WhitespaceTokenizer();
$ff = new FunctionFeatures();
$ff->add(function ($class, DocumentInterface $d) {
    $r = array();
    foreach ($d->getDocumentData() as $tok) {
        $r[] = $class . $tok;
    }
    return $r;
});
// create
// 1. an empty training set
// 2. an optimizer
// 3. an empty model
$tset = new TrainingSet();
$OPTIMIZER_PATH = isset($_ENV["GD_OPTIMIZER"]) ? $_ENV["GD_OPTIMIZER"] : 'gradient-descent';
$optimizer = new ExternalMaxentOptimizer($OPTIMIZER_PATH);
$model = new Maxent(array());
コード例 #3
0
ファイル: maxent-test.php プロジェクト: Tjorriemorrie/app
include '../autoloader.php';
use NlpTools\Documents\Document;
use NlpTools\Documents\TrainingSet;
use NlpTools\Documents\WordDocument;
use NlpTools\FeatureFactories\FunctionFeatures;
use NlpTools\Models\Maxent;
use NlpTools\Optimizers\MaxentGradientDescent;
use NlpTools\Classifiers\FeatureBasedLinearClassifier;
$tokens = array();
$classes = array();
foreach (file('dev-doc') as $line) {
    $tmp = explode(' ', substr($line, 0, -1));
    $tokens[] = $tmp[0];
    $classes[] = $tmp[1];
}
$feats = new FunctionFeatures();
//$feats->add(function ($class,Document $d) {
//	return $class.current($d->getDocumentData());
//});
$feats->add(function ($class, Document $d) {
    if ($class != 'START_SENTENCE') {
        return;
    }
    $dat = $d->getDocumentData();
    $prev = $dat[1];
    end($prev);
    return 'prev=' . current($prev);
});
$feats->add(function ($class, Document $d) {
    if ($class != 'START_SENTENCE') {
        return;