public function classify(array $classes, Document $d)
 {
     list($token, $before, $after) = $d->getDocumentData();
     $dotcnt = count(explode('.', $token)) - 1;
     $lastdot = substr($token, -1) == '.';
     if (!$lastdot) {
         // assume that all sentences end in full stops
         return 'O';
     }
     if ($dotcnt > 1) {
         // to catch some naive abbreviations U.S.A.
         return 'O';
     }
     return 'EOW';
 }
Beispiel #2
0
 /**
  * For use with TokensDocument mostly. Simply return the data as
  * features. Could contain duplicates (a feature firing twice in
  * for a signle document).
  * 
  * @param string $class The class for which we are calculating features
  * @param Document $d The document to calculate features for.
  * @return array
  */
 public function getFeatureArray($class, Document $d)
 {
     return $d->getDocumentData();
 }