/** * Processes the current stem with the 1st step * of the Porter algorithm. * * @return void * @author John Anderson * @see http://snowball.tartarus.org/algorithms/english/stemmer.html */ protected function step1() { // a: $this->currentStem = $this->performOnLongestSuffix($this->currentStem, array("sses" => function ($word) { return preg_replace("/sses\$/", 'ss', $word); }, "ied" => function ($word) { if (strlen($word) > 4) { return preg_replace("/ied\$/", 'i', $word); } else { return preg_replace("/ied\$/", 'ie', $word); } }, "ies" => function ($word) { if (strlen($word) > 4) { return preg_replace("/ies\$/", 'i', $word); } else { return preg_replace("/ies\$/", 'ie', $word); } }, "s" => function ($word) { if (strlen($word) > 2 && preg_match('/[aeiouy].+s$/', substr($word, 0, strlen($word) - 1)) < 1) { return preg_replace("/s\$/", '', $word); } else { return $word; } }, "us" => function ($word) { return $word; }, "ss" => function ($word) { return $word; })); //Exceptional forms $exceptions = array('inning', 'outing', 'canning', 'herring', 'earring', 'proceed', 'exceed', 'succeed'); if (isset($exceptions[$this->currentStem])) { return $exceptions[$this->currentStem]; } // b: $this->currentStem = $this->performOnLongestSuffix($this->currentStem, array("eed" => function ($word) { if (strstr(Stemmer::getR1($word), 'eed')) { return preg_replace("/eed\$/", 'ee', $word); } }, "eedly" => function ($word) { if (strstr(Stemmer::getR1($word), 'eedly')) { return preg_replace("/eedly\$/", 'ee', $word); } }, "ed" => function ($word) { $rest = substr($word, 0, strlen($word) - 2); if (preg_match('/[aeiouy]/', $rest) > 0) { $word = $rest; $newSuffix = substr($word, -2, 2); $endings = array('at', 'bl', 'iz'); $doubles = array('bb', 'dd', 'ff', 'gg', 'mm', 'nn', 'pp', 'rr', 'tt'); if (in_array($newSuffix, $endings)) { $word = $word . 'e'; } else { if (in_array($newSuffix, $doubles)) { $word = substr($word, 0, strlen($word) - 1); } else { if (Stemmer::isShort($word)) { $word = $word . 'e'; } } } } return $word; }, "edly" => function ($word) { $rest = substr($word, 0, strlen($word) - 4); if (preg_match('/[aeiouy]/', $rest) > 0) { $word = $rest; $newSuffix = substr($word, -2, 2); $endings = array('at', 'bl', 'iz'); $doubles = array('bb', 'dd', 'ff', 'gg', 'mm', 'nn', 'pp', 'rr', 'tt'); if (in_array($newSuffix, $endings)) { $word = $word . 'e'; } else { if (in_array($newSuffix, $doubles)) { $word = substr($word, 0, strlen($word) - 1); } else { if (Stemmer::isShort($word)) { $word = $word . 'e'; } } } } return $word; }, "ing" => function ($word) { $rest = substr($word, 0, strlen($word) - 3); if (preg_match('/[aeiouy]/', $rest) > 0) { $word = $rest; $newSuffix = substr($word, -2, 2); $endings = array('at', 'bl', 'iz'); $doubles = array('bb', 'dd', 'ff', 'gg', 'mm', 'nn', 'pp', 'rr', 'tt'); if (in_array($newSuffix, $endings)) { $word = $word . 'e'; } else { if (in_array($newSuffix, $doubles)) { $word = substr($word, 0, strlen($word) - 1); } else { if (Stemmer::isShort($word)) { $word = $word . 'e'; } } } } return $word; }, "ingly" => function ($word) { $rest = substr($word, 0, strlen($word) - 5); if (preg_match('/[aeiouy]/', $rest) > 0) { $word = $rest; $newSuffix = substr($word, -2, 2); $endings = array('at', 'bl', 'iz'); $doubles = array('bb', 'dd', 'ff', 'gg', 'mm', 'nn', 'pp', 'rr', 'tt'); if (in_array($newSuffix, $endings)) { $word = $word . 'e'; } else { if (in_array($newSuffix, $doubles)) { $word = substr($word, 0, strlen($word) - 1); } else { if (Stemmer::isShort($word)) { $word = $word . 'e'; } } } } return $word; })); // c: $this->currentStem = preg_replace('/([bcdfghjklmnpqrstvwxz])[yY]$/', '$1i', $this->currentStem); }