示例#1
0
 /**
  * Dokonuje zamiany wyrazu w wersji odmienionnej leksykalnie do wersji
  * pseudos³ownikowej, redukuj±c tym samym liczbê odmian wyrazu w zbiorze
  * cech.
  *
  * @param array $comment Tablica wyrazów do przeanalizowania.
  * @return array Tablica wyrazów lub false, je¶li nie ma wyrazów.
  */
 protected function lematize(array $comment)
 {
     $tok_comment = array();
     foreach ($comment as $w) {
         $temp = self::$fsal->lematize($w);
         if ($temp !== false) {
             $tok_comment = array_merge($tok_comment, $temp);
         } else {
             $tok_comment[] = $w;
         }
     }
     if (count($tok_comment) == 0) {
         return false;
     }
     return $tok_comment;
 }
示例#2
0
     } elseif ($temp = $fsaa->accent_word($w)) {
         $tok_comment = array_merge($tok_comment, $temp);
     } elseif (strlen($w) > 3 && ($temp = $fsas->spell_word($w))) {
         $tok_comment = array_merge($tok_comment, array_slice($temp, 0, 5));
         // $tokenizer->checkOnStoplist($temp));
     } else {
         //$tok_comment[] = $w;
     }
 }
 unset($tok_comment1);
 $stoper->set('accents&spell');
 // lematyzacja
 $tok_comment1 = array();
 if (count($tok_comment) > 0) {
     foreach ($tok_comment as $w) {
         $temp = $fsal->lematize($w);
         if ($temp !== false) {
             $tok_comment1 = array_merge($tok_comment1, $temp);
         }
     }
     $tok_comment = array_unique($tok_comment1);
 }
 unset($tok_comment1);
 $stoper->set('lems');
 // sprawdzenie, czy s± wulgaryzmy
 if ($validation->findVulgarism($tok_comment) || $prop_vulg > 0) {
     echo 'V' . $id . "-{$prop_vulg}-" . $comment . '===' . implode(', ', $tok_comment) . "\n";
     $stoper->set('vulg');
     continue;
 }
 $stoper->set('vulg');