コード例 #1
0
ファイル: PostController.php プロジェクト: frenzelgmbh/sblog
 /**
  * Displays a single Post model in online view.
  * @param integer $id
  * @return mixed
  */
 public function actionOnlineview($id)
 {
     //get the model, to use it within the meta tags
     $model = $this->findModel($id);
     //setting the meta keywords for this page
     $tok = new ClassifierBasedTokenizer(new EndOfSentence(), new WhitespaceTokenizer());
     $text = strip_tags($model->content);
     $sentences = $tok->tokenize($text);
     $text = strip_tags($model->content);
     $sentences = $tok->tokenize($text);
     if (is_array($sentences) && array_key_exists(0, $sentences)) {
         $this->view->registerMetaTag(['name' => 'metadescription', 'content' => $model->title . ' ' . substr($sentences[0], 0, 100)], 'keywords');
     } else {
         $this->view->registerMetaTag(['name' => 'metadescription', 'content' => $model->title . ' ' . $text], 'keywords');
     }
     $this->view->registerMetaTag(['name' => 'metakeywords', 'content' => $model->tags], 'keywords');
     return $this->render('onlineview', ['model' => $model]);
 }
コード例 #2
0
 public function testTokenizer()
 {
     $tok = new ClassifierBasedTokenizer(new EndOfSentenceRules(), new WhitespaceTokenizer());
     $text = "We are what we repeatedly do.\n                Excellence, then, is not an act, but a habit.";
     $this->assertEquals(array("We are what we repeatedly do.", "Excellence, then, is not an act, but a habit."), $tok->tokenize($text));
 }
コード例 #3
0
include '../../testing.php';
use NlpTools\Tokenizers\ClassifierBasedTokenizer;
use NlpTools\Tokenizers\WhitespaceTokenizer;
use NlpTools\Classifiers\Classifier;
use NlpTools\Documents\Document;
class EndOfSentence implements Classifier
{
    public function classify(array $classes, Document $d)
    {
        list($token, $before, $after) = $d->getDocumentData();
        $dotcnt = count(explode('.', $token)) - 1;
        $lastdot = substr($token, -1) == '.';
        if (!$lastdot) {
            // assume that all sentences end in full stops
            return 'O';
        }
        if ($dotcnt > 1) {
            // to catch some naive abbreviations U.S.A.
            return 'O';
        }
        return 'EOW';
    }
}
$tok = new ClassifierBasedTokenizer(new EndOfSentence(), new WhitespaceTokenizer());
$text = "We are what we repeatedly do.\n\t\tExcellence, then, is not an act, but a habit.";
print_r($tok->tokenize($text));
// Array
// (
//    [0] => We are what we repeatedly do.
//    [1] => Excellence, then, is not an act, but a habit.
// )