/** * Возвращает основу слова * * @param string $word * * @return string */ public function stem_word($word) { $word = strtr(mb_strtolower($word), array('ё' => 'е')); if ($this->use_cache && isset($this->cache[$word])) { return $this->cache[$word]; } list($str, $start, $rv) = Core_Regexps::match_with_results(self::RVRE, $word); if (!$rv) { return $word; } // step 1 if (!Core_Regexps::replace_ref(self::PERFECTIVEGROUND, '', $rv)) { $rv = preg_replace(self::REFLEXIVE, '', $rv); if (Core_Regexps::replace_ref(self::ADJECTIVE, '', $rv)) { $rv = preg_replace(self::PARTICIPLE, '', $rv); } else { if (!Core_Regexps::replace_ref(self::VERB, '', $rv)) { $rv = preg_replace(self::NOUN, '', $rv); } } } // step 2 $rv = preg_replace('{и$}', '', $rv); // step 3 if (preg_match(self::DERIVATIONAL, $rv)) { $rv = preg_replace('{ость?$}', '', $rv); } // step 4 if (!Core_Regexps::replace_ref('{ь$}', '', $rv)) { $rv = preg_replace(array('{ейше?}', '{нн$}'), array('', 'н'), $rv); } return $this->use_cache ? $this->cache[$word] = $start . $rv : $start . $rv; }
/** * @dataProvider provider_replace_ref */ public function test_replace_ref_count($search_str, $replace_str, $subject, $count_replaced, $expected) { $this->assertEquals(Core_Regexps::replace_ref($search_str, $replace_str, $subject), $count_replaced); $this->assertEquals($subject, $expected); }