Пример #1
0
 /**
  * Processes the current stem with the 1st step
  * of the Porter algorithm.
  *
  * @return void
  * @author John Anderson
  * @see http://snowball.tartarus.org/algorithms/english/stemmer.html
  */
 protected function step1()
 {
     // a:
     $this->currentStem = $this->performOnLongestSuffix($this->currentStem, array("sses" => function ($word) {
         return preg_replace("/sses\$/", 'ss', $word);
     }, "ied" => function ($word) {
         if (strlen($word) > 4) {
             return preg_replace("/ied\$/", 'i', $word);
         } else {
             return preg_replace("/ied\$/", 'ie', $word);
         }
     }, "ies" => function ($word) {
         if (strlen($word) > 4) {
             return preg_replace("/ies\$/", 'i', $word);
         } else {
             return preg_replace("/ies\$/", 'ie', $word);
         }
     }, "s" => function ($word) {
         if (strlen($word) > 2 && preg_match('/[aeiouy].+s$/', substr($word, 0, strlen($word) - 1)) < 1) {
             return preg_replace("/s\$/", '', $word);
         } else {
             return $word;
         }
     }, "us" => function ($word) {
         return $word;
     }, "ss" => function ($word) {
         return $word;
     }));
     //Exceptional forms
     $exceptions = array('inning', 'outing', 'canning', 'herring', 'earring', 'proceed', 'exceed', 'succeed');
     if (isset($exceptions[$this->currentStem])) {
         return $exceptions[$this->currentStem];
     }
     // b:
     $this->currentStem = $this->performOnLongestSuffix($this->currentStem, array("eed" => function ($word) {
         if (strstr(Stemmer::getR1($word), 'eed')) {
             return preg_replace("/eed\$/", 'ee', $word);
         }
     }, "eedly" => function ($word) {
         if (strstr(Stemmer::getR1($word), 'eedly')) {
             return preg_replace("/eedly\$/", 'ee', $word);
         }
     }, "ed" => function ($word) {
         $rest = substr($word, 0, strlen($word) - 2);
         if (preg_match('/[aeiouy]/', $rest) > 0) {
             $word = $rest;
             $newSuffix = substr($word, -2, 2);
             $endings = array('at', 'bl', 'iz');
             $doubles = array('bb', 'dd', 'ff', 'gg', 'mm', 'nn', 'pp', 'rr', 'tt');
             if (in_array($newSuffix, $endings)) {
                 $word = $word . 'e';
             } else {
                 if (in_array($newSuffix, $doubles)) {
                     $word = substr($word, 0, strlen($word) - 1);
                 } else {
                     if (Stemmer::isShort($word)) {
                         $word = $word . 'e';
                     }
                 }
             }
         }
         return $word;
     }, "edly" => function ($word) {
         $rest = substr($word, 0, strlen($word) - 4);
         if (preg_match('/[aeiouy]/', $rest) > 0) {
             $word = $rest;
             $newSuffix = substr($word, -2, 2);
             $endings = array('at', 'bl', 'iz');
             $doubles = array('bb', 'dd', 'ff', 'gg', 'mm', 'nn', 'pp', 'rr', 'tt');
             if (in_array($newSuffix, $endings)) {
                 $word = $word . 'e';
             } else {
                 if (in_array($newSuffix, $doubles)) {
                     $word = substr($word, 0, strlen($word) - 1);
                 } else {
                     if (Stemmer::isShort($word)) {
                         $word = $word . 'e';
                     }
                 }
             }
         }
         return $word;
     }, "ing" => function ($word) {
         $rest = substr($word, 0, strlen($word) - 3);
         if (preg_match('/[aeiouy]/', $rest) > 0) {
             $word = $rest;
             $newSuffix = substr($word, -2, 2);
             $endings = array('at', 'bl', 'iz');
             $doubles = array('bb', 'dd', 'ff', 'gg', 'mm', 'nn', 'pp', 'rr', 'tt');
             if (in_array($newSuffix, $endings)) {
                 $word = $word . 'e';
             } else {
                 if (in_array($newSuffix, $doubles)) {
                     $word = substr($word, 0, strlen($word) - 1);
                 } else {
                     if (Stemmer::isShort($word)) {
                         $word = $word . 'e';
                     }
                 }
             }
         }
         return $word;
     }, "ingly" => function ($word) {
         $rest = substr($word, 0, strlen($word) - 5);
         if (preg_match('/[aeiouy]/', $rest) > 0) {
             $word = $rest;
             $newSuffix = substr($word, -2, 2);
             $endings = array('at', 'bl', 'iz');
             $doubles = array('bb', 'dd', 'ff', 'gg', 'mm', 'nn', 'pp', 'rr', 'tt');
             if (in_array($newSuffix, $endings)) {
                 $word = $word . 'e';
             } else {
                 if (in_array($newSuffix, $doubles)) {
                     $word = substr($word, 0, strlen($word) - 1);
                 } else {
                     if (Stemmer::isShort($word)) {
                         $word = $word . 'e';
                     }
                 }
             }
         }
         return $word;
     }));
     // c:
     $this->currentStem = preg_replace('/([bcdfghjklmnpqrstvwxz])[yY]$/', '$1i', $this->currentStem);
 }